[ovs-dev] question about dp_packet lifetime

Alessandro Rosetti alessandro.rosetti at gmail.com
Wed Mar 28 08:50:28 UTC 2018


Hi Darrell, Ilya and everyone else,

I'm contacting you since you were interested.
I've posted the patch that implements netmap in OVS attaching the file in
the mail, did I do it wrong?
https://mail.openvswitch.org/pipermail/ovs-dev/2018-March/345371.html

I'm posting it inline now,
sorry for the mess!

Alessandro.

----------------------------------------------------------------------

diff --git a/acinclude.m4 b/acinclude.m4
index d61e37a5e..d9dd9fbd1 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -341,6 +341,36 @@ AC_DEFUN([OVS_CHECK_DPDK], [
   AM_CONDITIONAL([DPDK_NETDEV], test "$DPDKLIB_FOUND" = true)
 ])

+dnl OVS_CHECK_NETMAP
+dnl
+dnl Check netmap
+AC_DEFUN([OVS_CHECK_NETMAP], [
+  AC_ARG_WITH([netmap],
+              [AC_HELP_STRING([--with-netmap], [Enable NETMAP])],
+              [have_netmap=true])
+  AC_MSG_CHECKING([whether netmap datapath is enabled])
+
+  if test "$have_netmap" != true || test "$with_netmap" = no; then
+    AC_MSG_RESULT([no])
+  else
+    AC_MSG_RESULT([yes])
+    NETMAP_FOUND=false
+    AC_LINK_IFELSE(
+       [AC_LANG_PROGRAM([#include <net/if.h>
+                         #include<netinet/in.h>
+                         #include<net/netmap.h>
+                         #include<net/netmap_user.h>], [])],
+                        [NETMAP_FOUND=true])
+    if $NETMAP_FOUND; then
+        AC_DEFINE([NETMAP_NETDEV], [1], [NETMAP datapath is enabled.])
+    else
+        AC_MSG_ERROR([Could not find NETMAP headers])
+    fi
+  fi
+
+  AM_CONDITIONAL([NETMAP_NETDEV], test "$NETMAP_FOUND" = true)
+])
+
 dnl OVS_GREP_IFELSE(FILE, REGEX, [IF-MATCH], [IF-NO-MATCH])
 dnl
 dnl Greps FILE for REGEX.  If it matches, runs IF-MATCH, otherwise
IF-NO-MATCH.
@@ -900,7 +930,7 @@ dnl with or without modifications, as long as this
notice is preserved.

 AC_DEFUN([_OVS_CHECK_CC_OPTION], [dnl
   m4_define([ovs_cv_name], [ovs_cv_[]m4_translit([$1], [-= ], [__])])dnl
-  AC_CACHE_CHECK([whether $CC accepts $1], [ovs_cv_name],
+  AC_CACHE_CHECK([whether $CC accepts $1], [ovs_cv_name],
     [ovs_save_CFLAGS="$CFLAGS"
      dnl Include -Werror in the compiler options, because without -Werror
      dnl clang's GCC-compatible compiler driver does not return a failure
@@ -951,7 +981,7 @@ dnl OVS_ENABLE_OPTION([OPTION])
 dnl Check whether the given C compiler OPTION is accepted.
 dnl If so, add it to WARNING_FLAGS.
 dnl Example: OVS_ENABLE_OPTION([-Wdeclaration-after-statement])
-AC_DEFUN([OVS_ENABLE_OPTION],
+AC_DEFUN([OVS_ENABLE_OPTION],
   [OVS_CHECK_CC_OPTION([$1], [WARNING_FLAGS="$WARNING_FLAGS $1"])
    AC_SUBST([WARNING_FLAGS])])

diff --git a/configure.ac b/configure.ac
index 9940a1a45..24cd4718c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -180,6 +180,7 @@ AC_SUBST(KARCH)
 OVS_CHECK_LINUX
 OVS_CHECK_LINUX_TC
 OVS_CHECK_DPDK
+OVS_CHECK_NETMAP
 OVS_CHECK_PRAGMA_MESSAGE
 AC_SUBST([OVS_CFLAGS])
 AC_SUBST([OVS_LDFLAGS])
diff --git a/lib/automake.mk b/lib/automake.mk
index 5c26e0f33..4ccd9e22a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -134,12 +134,14 @@ lib_libopenvswitch_la_SOURCES = \
  lib/namemap.c \
  lib/netdev-dpdk.h \
  lib/netdev-dummy.c \
+ lib/netdev-netmap.h \
  lib/netdev-provider.h \
  lib/netdev-vport.c \
  lib/netdev-vport.h \
  lib/netdev-vport-private.h \
  lib/netdev.c \
  lib/netdev.h \
+ lib/netmap.h \
  lib/netflow.h \
  lib/netlink.c \
  lib/netlink.h \
@@ -403,6 +405,15 @@ lib_libopenvswitch_la_SOURCES += \
  lib/dpdk-stub.c
 endif

+if NETMAP_NETDEV
+lib_libopenvswitch_la_SOURCES += \
+ lib/netmap.c \
+ lib/netdev-netmap.c
+else
+lib_libopenvswitch_la_SOURCES += \
+ lib/netmap-stub.c
+endif
+
 if WIN32
 lib_libopenvswitch_la_SOURCES += \
  lib/dpif-netlink.c \
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 443c22504..e917e6d6a 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -92,6 +92,7 @@ dp_packet_use_const(struct dp_packet *b, const void
*data, size_t size)
     dp_packet_set_size(b, size);
 }

+
 /* Initializes 'b' as an empty dp_packet that contains the 'allocated'
bytes.
  * DPDK allocated dp_packet and *data is allocated from one continous
memory
  * region as part of memory pool, so in memory data start right after
@@ -105,6 +106,19 @@ dp_packet_init_dpdk(struct dp_packet *b, size_t
allocated)
     b->source = DPBUF_DPDK;
 }

+/* Initializes 'b' as a dp_packet whose data points to a netmap buffer of
size
+ * 'size' bytes. */
+#ifdef NETMAP_NETDEV
+void
+dp_packet_init_netmap(struct dp_packet *b, void *data, size_t size)
+{
+    b->source = DPBUF_NETMAP;
+    dp_packet_set_base(b, data);
+    dp_packet_set_data(b, data);
+    dp_packet_set_size(b, size);
+}
+#endif
+
 /* Initializes 'b' as an empty dp_packet with an initial capacity of 'size'
  * bytes. */
 void
@@ -125,6 +139,11 @@ dp_packet_uninit(struct dp_packet *b)
             /* If this dp_packet was allocated by DPDK it must have been
              * created as a dp_packet */
             free_dpdk_buf((struct dp_packet*) b);
+#endif
+        } else if (b->source == DPBUF_NETMAP) {
+#ifdef NETMAP_NETDEV
+            /* If this dp_packet was allocated by NETMAP, release it. */
+            netmap_free_packet(b);
 #endif
         }
     }
@@ -241,6 +260,9 @@ dp_packet_resize__(struct dp_packet *b, size_t
new_headroom, size_t new_tailroom
     case DPBUF_DPDK:
         OVS_NOT_REACHED();

+    case DPBUF_NETMAP:
+        OVS_NOT_REACHED();
+
     case DPBUF_MALLOC:
         if (new_headroom == dp_packet_headroom(b)) {
             new_base = xrealloc(dp_packet_base(b), new_allocated);
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 21c8ca525..bd7832533 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -26,6 +26,7 @@
 #endif

 #include "netdev-dpdk.h"
+#include "netdev-netmap.h"
 #include "openvswitch/list.h"
 #include "packets.h"
 #include "util.h"
@@ -42,6 +43,7 @@ enum OVS_PACKED_ENUM dp_packet_source {
     DPBUF_DPDK,                /* buffer data is from DPDK allocated
memory.
                                 * ref to dp_packet_init_dpdk() in
dp-packet.c.
                                 */
+    DPBUF_NETMAP,              /* Buffers are from netmap allocated
memory. */
 };

 #define DP_PACKET_CONTEXT_SIZE 64
@@ -60,6 +62,9 @@ struct dp_packet {
     uint32_t size_;             /* Number of bytes in use. */
     uint32_t rss_hash;          /* Packet hash. */
     bool rss_hash_valid;        /* Is the 'rss_hash' valid? */
+#endif
+#ifdef NETMAP_NETDEV
+    uint32_t buf_idx;             /* Netmap slot index. */
 #endif
     enum dp_packet_source source;  /* Source of memory allocated as
'base'. */

@@ -115,6 +120,7 @@ void dp_packet_use_stub(struct dp_packet *, void *,
size_t);
 void dp_packet_use_const(struct dp_packet *, const void *, size_t);

 void dp_packet_init_dpdk(struct dp_packet *, size_t allocated);
+void dp_packet_init_netmap(struct dp_packet *, void *, size_t);

 void dp_packet_init(struct dp_packet *, size_t);
 void dp_packet_uninit(struct dp_packet *);
@@ -173,6 +179,13 @@ dp_packet_delete(struct dp_packet *b)
              * created as a dp_packet */
             free_dpdk_buf((struct dp_packet*) b);
             return;
+        } else if (b->source == DPBUF_NETMAP) {
+            /* It was allocated by a netdev_netmap, it will be marked
+             * for reuse. */
+#ifdef NETMAP_NETDEV
+            netmap_free_packet(b);
+#endif
+            return;
         }

         dp_packet_uninit(b);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b07fc6b8b..af81c992b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4119,11 +4119,14 @@ reload:

     /* List port/core affinity */
     for (i = 0; i < poll_cnt; i++) {
-       VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n",
-                pmd->core_id, netdev_rxq_get_name(poll_list[i].rxq->rx),
-                netdev_rxq_get_queue_id(poll_list[i].rxq->rx));
-       /* Reset the rxq current cycles counter. */
-       dp_netdev_rxq_set_cycles(poll_list[i].rxq, RXQ_CYCLES_PROC_CURR, 0);
+        VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n",
+                 pmd->core_id, netdev_rxq_get_name(poll_list[i].rxq->rx),
+                 netdev_rxq_get_queue_id(poll_list[i].rxq->rx));
+        /* Reset the rxq current cycles counter. */
+        dp_netdev_rxq_set_cycles(poll_list[i].rxq, RXQ_CYCLES_PROC_CURR,
0);
+#ifdef NETMAP_NETDEV
+        netmap_init_port(poll_list[i].rxq->rx);
+#endif
     }

     if (!poll_cnt) {
diff --git a/lib/netdev-netmap.c b/lib/netdev-netmap.c
new file mode 100644
index 000000000..87b292895
--- /dev/null
+++ b/lib/netdev-netmap.c
@@ -0,0 +1,1014 @@
+#include <config.h>
+
+#include <errno.h>
+#include <math.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <net/netmap.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "dpif.h"
+#include "netdev.h"
+#include "netdev-provider.h"
+#include "netmap.h"
+#include "netdev-netmap.h"
+#include "openvswitch/list.h"
+#include "openvswitch/poll-loop.h"
+#include "openvswitch/vlog.h"
+#include "ovs-thread.h"
+#include "packets.h"
+#include "smap.h"
+
+#define DP_BLOCK_SIZE NETDEV_MAX_BURST * 2
+#define DEFAULT_RSYNC_INTVAL 5
+
+VLOG_DEFINE_THIS_MODULE(netdev_netmap);
+
+static struct vlog_rate_limit rl OVS_UNUSED = VLOG_RATE_LIMIT_INIT(5, 100);
+
+struct netdev_netmap {
+    struct netdev up;
+    struct nm_desc *nmd;
+
+    uint64_t timestamp;
+    uint32_t rxsync_intval;
+
+    struct ovs_list list_node;
+    long tid;
+    struct nm_alloc *nma;
+
+    struct ovs_mutex mutex OVS_ACQ_AFTER(netmap_mutex);
+    pthread_spinlock_t tx_lock;
+
+    struct netdev_stats stats;
+    struct eth_addr hwaddr;
+    enum netdev_flags flags;
+
+    int mtu;
+    int requested_mtu;
+};
+
+struct netdev_rxq_netmap {
+    struct netdev_rxq up;
+};
+
+static void netdev_netmap_destruct(struct netdev *netdev);
+
+static bool
+is_netmap_class(const struct netdev_class *class)
+{
+    return class->destruct == netdev_netmap_destruct;
+}
+
+static struct netdev_netmap *
+netdev_netmap_cast(const struct netdev *netdev)
+{
+    ovs_assert(is_netmap_class(netdev_get_class(netdev)));
+    return CONTAINER_OF(netdev, struct netdev_netmap, up);
+}
+
+static struct netdev_rxq_netmap *
+netdev_rxq_netmap_cast(const struct netdev_rxq *rx)
+{
+    ovs_assert(is_netmap_class(netdev_get_class(rx->netdev)));
+    return CONTAINER_OF(rx, struct netdev_rxq_netmap, up);
+}
+
+static struct ovs_mutex netmap_mutex = OVS_MUTEX_INITIALIZER;
+
+/* Blocks are used to store DP_BLOCK_SIZE preallocated netmap dp_packets.
+ * During receive operation, dp_packets are allocated by moving them from a
+ * block to a dp_batch. A block is refilled when packets are freed.
+ * Each netmap dp_packet has source type set to DPBUF_NETMAP, with buf_idx
+ * identifying a netmap buffer. Packets in the blocks (or in flight within
OVS)
+ * are not attached to any netmap ring, i.e. their buf_idx is not stored in
+ * any netmap slot. On receive or transmit, the netmap buffer owned by a
+ * dp_packet is swapped with one attached to a receive/transmit ring slot,
+ * by simply swapping the buf_idx values. */
+struct nm_block {
+    struct ovs_list node;                     /* Blocks can be chained
+                                               * in a list. */
+    struct dp_packet* packets[DP_BLOCK_SIZE]; /* Array of dp_packets. */
+    uint16_t idx;                             /* Array index of the current
+                                               * packet. */
+};
+
+enum nm_block_type {
+    NM_BLOCK_TYPE_PUT = 0,
+    NM_BLOCK_TYPE_GET = 1,
+};
+
+/* Global data structures of the netmap dp_packet allocator. */
+static struct nm_runtime {
+    struct ovs_list port_list;     /* List of all netmap netdevs. */
+    struct ovs_list block_list[2]; /* Lists for dp_packet blocks: one for
+                                    * empty and one for full ones. */
+    void *mem;
+    uint16_t memid;
+    uint32_t memsize;
+    uint32_t nextrabufs;
+} nmr = { 0 };
+
+/* Each thread uses a pair of blocks for allocations and deallocations. */
+struct nm_alloc {
+    struct nm_block *block[2];  /* Blocks used by TX/RX to
allocate/dealloacte
+                                 * dp_packets. */
+};
+
+/* Thread local allocators for packet allocations/dellocations */
+DEFINE_STATIC_PER_THREAD_DATA(struct nm_alloc, nma, { 0 });
+#define NMA nma_get()
+#define PUTB nma_get()->block[NM_BLOCK_TYPE_PUT]
+#define GETB nma_get()->block[NM_BLOCK_TYPE_GET]
+
+/* Creates a new block.
+ * The block can be empty or initialized with new dp_packets associated to
+ * netmap buffers not attached to a netmap ring. */
+static struct nm_block*
+nm_block_new(struct nm_desc *nmd) {
+    struct nm_block *block;
+
+    block = xmalloc(sizeof(struct nm_block));
+    block->idx = 0;
+    ovs_list_init(&block->node);
+
+    if (nmd) {
+        struct dp_packet *packet;
+        struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, 0);
+        uint32_t idx = nmd->nifp->ni_bufs_head;
+
+        for (int i = 0; idx && i < DP_BLOCK_SIZE;
+            i++, idx = *(uint32_t *)NETMAP_BUF(ring, idx)) {
+            packet = dp_packet_new(0);
+            packet->buf_idx = idx;
+            packet->source = DPBUF_NETMAP;
+            block->packets[block->idx++] = packet;
+        }
+
+        nmd->nifp->ni_bufs_head = idx;
+    }
+
+    return block;
+}
+
+/* Swaps blocks from nm_runtime in order to replace the current block with
+ * an empty or full block.
+ * if we want GETB to be swapped with a block filled with dp_packets we
will
+ * speciry NM_BLOCK_TYPE_GET.
+ * if we want PUTB to be swapped with a block filled with dp_packets we
will
+ * speciry NM_BLOCK_TYPE_PUT. */
+static void
+nm_block_swap_global(enum nm_block_type type) {
+    struct nm_block **bselect = NULL;
+    struct nm_block *bswap = NULL, *btmp;
+
+    ovs_mutex_lock(&netmap_mutex);
+
+    bselect = &(NMA->block[type]);
+
+    /* Try to pop a block form the correct list */
+    if (!ovs_list_is_empty(&nmr.block_list[type])) {
+        bswap = CONTAINER_OF(ovs_list_pop_front(&nmr.block_list[type]),
+                        struct nm_block, node);
+    } else {
+        bswap = nm_block_new(NULL);
+    }
+
+    /* Swap blocks. */
+    if (OVS_LIKELY(bswap)) {
+        btmp = *bselect;
+        *bselect = bswap;
+        /* If the current block is empty it will be pushed to the empty
list
+         * and viceversa if it not empty. */
+        type = btmp->idx ? NM_BLOCK_TYPE_GET : NM_BLOCK_TYPE_PUT;
+        ovs_list_push_back(&nmr.block_list[type], &btmp->node);
+    }
+
+    ovs_mutex_unlock(&netmap_mutex);
+}
+
+/* Swap the two blocks of the local allocator. */
+static void
+nm_block_swap_local(void) {
+    struct nm_block* block = GETB;
+    GETB = PUTB;
+    PUTB = block;
+}
+
+/* Frees a block from memory.
+ * If nmd is specified we will return extra buffers to this
+ * nm_desc if the block contains any dp_packet. */
+static void
+nm_block_free(struct nm_block* b, struct nm_desc *nmd) {
+    if (b) {
+        if (nmd) {
+            struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, 0);
+
+            for (int i = 0; i < b->idx; i++) {
+                struct dp_packet *packet = b->packets[i];
+                if (packet) {
+                    uint32_t *e = (uint32_t *) NETMAP_BUF(ring,
packet->buf_idx);
+                    *e = nmd->nifp->ni_bufs_head;
+                    nmd->nifp->ni_bufs_head = packet->buf_idx;
+                    free(packet);
+                }
+            }
+        }
+
+        free(b);
+    }
+}
+
+/* Set up the port by checking if any other port has already been opened.
+ * Prepare blocks of dp_packets. */
+static int
+netmap_setup_port(struct nm_desc *nmd) {
+    ovs_mutex_lock(&netmap_mutex);
+
+    if (ovs_list_size(&nmr.port_list)) {
+        /* Netmap memory has already been set up, check if the new port
uses
+         * the same memid */
+        if (nmr.memid != nmd->req.nr_arg2) {
+            VLOG_WARN("unable to add this port, it has a new mem_id
(%x->%x)",
+                    nmr.memid, nmd->req.nr_arg2);
+            ovs_mutex_unlock(&netmap_mutex);
+            return 1;
+        }
+    } else {
+        /* We are initializing the first Netmap port: setup Netmap memory
+         * to this process. */
+        nmr.memid = nmd->req.nr_arg2;
+        nmr.memsize = nmd->req.nr_memsize;
+        nmr.mem = mmap(0, nmr.memsize, PROT_WRITE | PROT_READ,
+                        MAP_SHARED, nmd->fd, 0);
+
+        if (nmr.mem == MAP_FAILED) {
+            VLOG_WARN("mmap has failed!");
+            ovs_mutex_unlock(&netmap_mutex);
+            return 1;
+        }
+    }
+
+    /* Now we can set up the following nmd fields */
+    {
+        struct netmap_if *nifp;
+
+        nmd->memsize = nmr.memsize;
+        nmd->mem = nmr.mem;
+        nifp = NETMAP_IF(nmd->mem, nmd->req.nr_offset);
+        *(struct netmap_if **)(uintptr_t)&(nmd->nifp) = nifp;
+    }
+
+    /* Allocate a number of blocks containing dp_packets. The total number
+     * of extrabuffers to be used is multiple of the blocksize */
+    uint32_t nextrabufs = nmd->req.nr_arg3 & ~(DP_BLOCK_SIZE-1);
+    struct nm_block *block;
+    for (int i = 0 ; i < (nextrabufs/DP_BLOCK_SIZE); i++) {
+        block = nm_block_new(nmd);
+        ovs_list_push_back(&nmr.block_list[NM_BLOCK_TYPE_GET],
&block->node);
+    }
+
+    ovs_mutex_unlock(&netmap_mutex);
+
+    return 0;
+}
+
+/* This function initializes some variables and has to be called in the pmd
+ * thread reload.
+ * Thanks to this we can initialize thread local blocks and recognize
+ * if there are other ports using our thread-id. */
+void
+netmap_init_port(struct netdev_rxq *rxq) {
+
+    ovs_mutex_lock(&netmap_mutex);
+
+    if(is_netmap_class(netdev_get_class(rxq->netdev))) {
+        struct netdev_netmap *dev = netdev_netmap_cast(rxq->netdev);
+        dev->tid = syscall(SYS_gettid);
+        dev->nma = NMA;
+    }
+
+    /* We need to initialize new blocks in the local allocator */
+    if (!GETB) {
+        GETB = nm_block_new(NULL);
+    }
+
+    if (!PUTB) {
+        PUTB = nm_block_new(NULL);
+    }
+
+    ovs_mutex_unlock(&netmap_mutex);
+}
+
+/* This function is called upon dp_packet deallocation. The pointer is not
+ * dellocated but saved in a nm_block that has free space. */
+void
+netmap_free_packet(struct dp_packet* packet) {
+    struct nm_block* block = PUTB;
+
+    if (OVS_UNLIKELY(block->idx == (DP_BLOCK_SIZE - 1))) {
+        block = GETB;
+        if (OVS_UNLIKELY(block->idx == (DP_BLOCK_SIZE - 1))) {
+            nm_block_swap_global(NM_BLOCK_TYPE_PUT);
+            block = PUTB;
+        }
+    }
+
+    block->packets[block->idx++] = packet;
+}
+
+/* Allocate 'n' dp_packets to the batch. This operation might require
+ * multiple memcpy operations. If no thread local nm_block has data we need
+ * to ask for a new block to the nm_runtime. */
+static int
+netmap_alloc_packets(struct dp_packet_batch* b, size_t n) {
+    struct nm_block* block;
+    size_t step, tot = 0, s;
+
+    for (step = 0; step < 3; step++) {
+        block = GETB;
+        s = MIN(n, block->idx);
+        memcpy(&b->packets[tot], &block->packets[block->idx - s],
+                s * sizeof(struct dp_packet*));
+        block->idx -= s;
+        tot += s;
+        n -= s;
+
+        if (n == 0) {
+            break;
+        } else if (OVS_LIKELY(step == 0)) {
+            nm_block_swap_local();
+        } else {
+            nm_block_swap_global(NM_BLOCK_TYPE_GET);
+        }
+    }
+
+    return tot;
+}
+
+/* Set up some values from the configuration. */
+void
+netmap_init_config(const struct smap *ovs_other_config) {
+    nmr.nextrabufs = (uint32_t)
+        smap_get_int(ovs_other_config, "netmap-nextrabufs", DP_BLOCK_SIZE);
+
+    nmr.nextrabufs &= ~(DP_BLOCK_SIZE-1);
+
+    VLOG_INFO("nextrabufs: %d", nmr.nextrabufs);
+}
+
+static struct netdev_rxq *
+netdev_netmap_rxq_alloc(void)
+{
+    struct netdev_rxq_netmap *rx = xzalloc(sizeof *rx);
+    return &rx->up;
+}
+
+static int
+netdev_netmap_rxq_construct(struct netdev_rxq *rxq OVS_UNUSED)
+{
+    /* Nothing to do here */
+    return 0;
+}
+
+static void
+netdev_netmap_rxq_destruct(struct netdev_rxq *rxq OVS_UNUSED)
+{
+    /* Nothing to do here */
+    return;
+}
+
+static void
+netdev_netmap_rxq_dealloc(struct netdev_rxq *rxq)
+{
+    struct netdev_rxq_netmap *rx = netdev_rxq_netmap_cast(rxq);
+    free(rx);
+}
+
+static struct netdev *
+netdev_netmap_alloc(void)
+{
+    struct netdev_netmap *dev;
+
+    dev = (struct netdev_netmap *) xzalloc(sizeof *dev);
+    if (dev) {
+        return &dev->up;
+    }
+
+    return NULL;
+}
+
+static int
+netdev_netmap_construct(struct netdev *netdev)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+    const char *ifname = netdev_get_name(netdev);
+
+    struct nmreq req;
+    memset(&req, 0 , sizeof(req));
+    req.nr_arg3 = nmr.nextrabufs;
+
+    /* Open Netmap port requesting a number of extrabuffers. We also avoid
to
+     * mmap netmap memory here. */
+    dev->nmd = nm_open(ifname, &req, NM_OPEN_NO_MMAP, NULL);
+
+    if (!dev->nmd) {
+        if (!errno) {
+            VLOG_WARN("opening port \"%s\" failed: not a netmap port",
ifname);
+        } else {
+            VLOG_WARN("opening port \"%s\" failed: %s", ifname,
+                ovs_strerror(errno));
+        }
+        return EINVAL;
+    } else {
+        VLOG_INFO("opening port \"%s\"", ifname);
+    }
+
+    /* Check if we have enough extra buffers to create a nm_block. */
+    if (dev->nmd->req.nr_arg3 < DP_BLOCK_SIZE) {
+        VLOG_WARN("not enough extra buffers(%d/%d), closing port",
+                dev->nmd->req.nr_arg3, DP_BLOCK_SIZE);
+        nm_close(dev->nmd);
+        return EINVAL;
+    }
+
+    /* Possibly mmap netmap memory, initialize the nm_desc, nm_runtime.
+     * Allocate some nm_blocks using the extrabuffers given to this port.
*/
+    if (netmap_setup_port(dev->nmd)) {
+        VLOG_WARN("could not setup \"%s\" port", ifname);
+        nm_close(dev->nmd);
+        return EINVAL;
+    }
+
+    ovs_list_init(&dev->list_node);
+    ovs_mutex_lock(&netmap_mutex);
+    ovs_list_push_front(&nmr.port_list, &dev->list_node);
+    ovs_mutex_unlock(&netmap_mutex);
+
+    ovs_mutex_init(&dev->mutex);
+    pthread_spin_init(&dev->tx_lock, PTHREAD_PROCESS_SHARED);
+    eth_addr_random(&dev->hwaddr);
+    dev->flags = NETDEV_UP | NETDEV_PROMISC;
+    dev->timestamp = netmap_rdtsc();
+    dev->rxsync_intval = DEFAULT_RSYNC_INTVAL;
+    dev->requested_mtu = NETMAP_RXRING(dev->nmd->nifp, 0)->nr_buf_size;
+    netdev_request_reconfigure(netdev);
+
+    return 0;
+}
+
+static void
+netdev_netmap_destruct(struct netdev *netdev)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+    struct nm_block* b;
+
+    ovs_mutex_lock(&netmap_mutex);
+    VLOG_INFO("closing port \"%s\"", (const char*)
netdev_get_name(netdev));
+
+    ovs_list_remove(&dev->list_node);
+
+    /* A netmap netdev is being removed.
+     * If this is the last netmap port we remove all blocks. */
+    if (!ovs_list_size(&nmr.port_list)) {
+        LIST_FOR_EACH_POP(b, node, &nmr.block_list[NM_BLOCK_TYPE_PUT]) {
+            nm_block_free(b, dev->nmd);
+        }
+
+        LIST_FOR_EACH_POP(b, node, &nmr.block_list[NM_BLOCK_TYPE_GET]) {
+            nm_block_free(b, dev->nmd);
+        }
+    } else {
+        struct netdev_netmap *d;
+        enum nm_block_type type;
+        int last_thread_port = true;
+
+        /* Check if there are other netmap ports using the same thread id.
*/
+        LIST_FOR_EACH(d, list_node, &nmr.port_list) {
+            if (dev->tid == d->tid) {
+                last_thread_port = false;
+                break;
+            }
+        }
+
+        /* If there are no ports using this thread id we return thread
local
+         * blocks to the global allocator nm_runtime. */
+        if (last_thread_port) {
+            b = dev->nma->block[NM_BLOCK_TYPE_PUT];
+            type = b->idx ? NM_BLOCK_TYPE_GET : NM_BLOCK_TYPE_PUT;
+            ovs_list_push_front(&nmr.block_list[type], &b->node);
+            dev->nma->block[NM_BLOCK_TYPE_PUT] = NULL;
+
+            b = dev->nma->block[NM_BLOCK_TYPE_GET];
+            type = b->idx ? NM_BLOCK_TYPE_GET : NM_BLOCK_TYPE_PUT;
+            ovs_list_push_front(&nmr.block_list[type], &b->node);
+            dev->nma->block[NM_BLOCK_TYPE_GET] = NULL;
+        }
+
+        /* We will now try to free a number of blocks equal to the blocks
+         * allocated when the port was created.
+         * Each block is then freed returning the extra bufs to the
nm_desc. */
+        int nblocks = nmr.nextrabufs / DP_BLOCK_SIZE;
+        LIST_FOR_EACH_POP(b, node, &nmr.block_list[NM_BLOCK_TYPE_GET]) {
+            nm_block_free(b, dev->nmd);
+            if (!--nblocks) {
+                break;
+            }
+        }
+
+        if (!ovs_list_is_empty(&nmr.block_list[NM_BLOCK_TYPE_PUT])) {
+            struct ovs_list *list_node = ovs_list_pop_front(
+
&nmr.block_list[NM_BLOCK_TYPE_PUT]);
+            b = CONTAINER_OF(list_node, struct nm_block, node);
+            nm_block_free(b, dev->nmd);
+        }
+    }
+
+    ovs_mutex_unlock(&netmap_mutex);
+
+    /* Now we can close the port. */
+    nm_close(dev->nmd);
+}
+
+static void
+netdev_netmap_dealloc(struct netdev *netdev)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_destroy(&dev->mutex);
+    pthread_spin_destroy(&dev->tx_lock);
+
+    free(dev);
+}
+
+static int
+netdev_netmap_class_init(void)
+{
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+    if (ovsthread_once_start(&once)) {
+        ovs_list_init(&nmr.block_list[NM_BLOCK_TYPE_PUT]);
+        ovs_list_init(&nmr.block_list[NM_BLOCK_TYPE_GET]);
+        ovs_list_init(&nmr.port_list);
+        ovsthread_once_done(&once);
+    }
+
+    return 0;
+}
+
+static int
+netdev_netmap_reconfigure(struct netdev *netdev)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+    int err = 0;
+
+    ovs_mutex_lock(&dev->mutex);
+
+    if (dev->mtu == dev->requested_mtu) {
+        /* Reconfiguration is unnecessary */
+        goto out;
+    }
+
+    dev->mtu = dev->requested_mtu;
+    netdev_change_seq_changed(netdev);
+
+out:
+    ovs_mutex_unlock(&dev->mutex);
+    return err;
+}
+
+static int
+netdev_netmap_get_config(const struct netdev *netdev, struct smap *args)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    smap_add_format(args, "mtu", "%d", dev->mtu);
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+netdev_netmap_set_config(struct netdev *netdev, const struct smap *args,
+                         char **errp OVS_UNUSED)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    dev->rxsync_intval = smap_get_int(args, "rxsync-intval",
+            DEFAULT_RSYNC_INTVAL);
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static inline void
+netmap_rxsync(struct netdev_netmap *dev)
+{
+    uint64_t now = netmap_rdtsc();
+    unsigned int diff = TSC2US(now - dev->timestamp);
+
+    if (diff < dev->rxsync_intval) {
+        /* skipping rxsync */
+        return;
+    }
+
+    ioctl(dev->nmd->fd, NIOCRXSYNC, NULL);
+
+    /* update current timestamp */
+    dev->timestamp = now;
+}
+
+static inline void
+netmap_swap_slot(struct dp_packet *packet, struct netmap_slot *s) {
+    uint32_t idx;
+
+    idx = s->buf_idx;
+    s->buf_idx = packet->buf_idx;
+    s->flags |= NS_BUF_CHANGED;
+    packet->buf_idx = idx;
+}
+
+static int
+netdev_netmap_send(struct netdev *netdev, int qid OVS_UNUSED,
+                     struct dp_packet_batch *batch, bool concurrent_txq)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+    struct nm_desc *nmd = dev->nmd;
+    uint16_t r, nrings = dev->nmd->nifp->ni_tx_rings;
+    uint32_t budget = batch->count, count = 0;
+    bool again = false;
+
+    if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
+        dp_packet_delete_batch(batch, true);
+        return 0;
+    }
+
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        pthread_spin_lock(&dev->tx_lock);
+    }
+
+try_again:
+    for (r = 0; r < nrings; r++) {
+        struct netmap_ring *ring;
+        uint32_t head, space;
+
+        ring = NETMAP_TXRING(nmd->nifp, nmd->cur_tx_ring);
+        space = nm_ring_space(ring); /* Available slots in this ring. */
+        head = ring->head;
+
+        if (space > budget) {
+            space = budget;
+        }
+        budget -= space;
+
+        /* Transmit as much as possible in this ring. */
+        while (space--) {
+            struct netmap_slot *ts = &ring->slot[head];
+            struct dp_packet *packet = batch->packets[count++];
+
+            ts->len = dp_packet_get_send_len(packet);
+
+            if (OVS_UNLIKELY(packet->source != DPBUF_NETMAP)) {
+                /* send packet copying data to the netmap slot */
+                memcpy(NETMAP_BUF(ring, ts->buf_idx),
+                        dp_packet_data(packet), ts->len);
+            } else {
+                /* send packet using zerocopy */
+                netmap_swap_slot(packet, ts);
+            }
+
+            head = nm_ring_next(ring, head);
+        }
+
+        ring->head = ring->cur = head;
+
+        /* We may have exhausted the budget */
+        if (OVS_LIKELY(!budget)) {
+            break;
+        }
+
+        /* We still have packets to send, select next ring. */
+        if (OVS_UNLIKELY(++dev->nmd->cur_tx_ring == nrings)) {
+            nmd->cur_tx_ring = 0;
+        }
+    }
+
+    ioctl(dev->nmd->fd, NIOCTXSYNC, NULL);
+
+    if (OVS_UNLIKELY(!count && !again)) {
+        again = true;
+        goto try_again;
+    }
+
+    dp_packet_delete_batch(batch, true);
+
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        pthread_spin_unlock(&dev->tx_lock);
+    }
+
+    return 0;
+}
+
+static int
+netdev_netmap_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch
*batch)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(rxq->netdev);
+    struct nm_desc *nmd = dev->nmd;
+    uint16_t r, nrings = nmd->nifp->ni_rx_rings;
+    uint32_t budget = 0;
+
+    if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
+        return EAGAIN;
+    }
+
+    /* check how much we can receive */
+    for (r = nmd->first_rx_ring; r < nrings; r++) {
+        budget += nm_ring_space(NETMAP_RXRING(nmd->nifp, r));
+    }
+
+    /* sync if there is no packet */
+    if (budget == 0) {
+        netmap_rxsync(dev);
+        return EAGAIN;
+    }
+
+    /* allocate the batch */
+    budget = netmap_alloc_packets(batch, MIN(budget, NETDEV_MAX_BURST));
+
+    for (r = 0; r < nrings; r++) {
+        struct netmap_ring *ring;
+        uint32_t head, space;
+
+        ring = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+        head = ring->head;
+        space = nm_ring_space(ring);
+
+        if (space > budget) {
+            space = budget;
+        }
+        budget -= space;
+
+        /* Receive as much as possible from this ring. */
+        while (space--) {
+            struct netmap_slot *rs = &ring->slot[head];
+            struct dp_packet *packet = batch->packets[batch->count++];
+            dp_packet_init_netmap(packet, NETMAP_BUF(ring, rs->buf_idx),
+                                    rs->len);
+            /* receiving from a netmap port we can always zero copy here.
*/
+            netmap_swap_slot(packet, rs);
+            head = nm_ring_next(ring, head);
+        }
+
+        ring->cur = ring->head = head;
+
+        /* check if the batch has been filled. */
+        if (!budget) {
+            break;
+        }
+
+        /* batch isn't full, try to receive on other rings. */
+        if (OVS_UNLIKELY(++nmd->cur_rx_ring == nrings)) {
+            nmd->cur_rx_ring = 0;
+        }
+    }
+
+    dp_packet_batch_init_packet_fields(batch);
+
+    return 0;
+}
+
+static int
+netdev_netmap_get_ifindex(const struct netdev *netdev)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    /* Calculate hash from the netdev name. Ensure that ifindex is a 24-bit
+     * postive integer to meet RFC 2863 recommendations.
+     */
+    int ifindex = hash_string(netdev->name, 0) % 0xfffffe + 1;
+    ovs_mutex_unlock(&dev->mutex);
+
+    return ifindex;
+}
+
+static int
+netdev_netmap_get_mtu(const struct netdev *netdev, int *mtu)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    *mtu = dev->mtu;
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+netdev_netmap_set_mtu(struct netdev *netdev, int mtu)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    if (mtu > NETMAP_RXRING(dev->nmd->nifp, 0)->nr_buf_size
+        || mtu < ETH_HEADER_LEN) {
+        VLOG_WARN("%s: unsupported MTU %d\n", dev->up.name, mtu);
+        return EINVAL;
+    }
+
+    ovs_mutex_lock(&dev->mutex);
+    if (dev->requested_mtu != mtu) {
+        dev->requested_mtu = mtu;
+        netdev_request_reconfigure(netdev);
+    }
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+netdev_netmap_set_etheraddr(struct netdev *netdev, const struct eth_addr
mac)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    dev->hwaddr = mac;
+    netdev_change_seq_changed(netdev);
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+netdev_netmap_get_etheraddr(const struct netdev *netdev, struct eth_addr
*mac)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    *mac = dev->hwaddr;
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+netdev_netmap_update_flags(struct netdev *netdev,
+                          enum netdev_flags off, enum netdev_flags on,
+                          enum netdev_flags *old_flagsp)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+
+    if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) {
+        return EINVAL;
+    }
+
+    *old_flagsp = dev->flags;
+    dev->flags |= on;
+    dev->flags &= ~off;
+
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+netdev_netmap_get_carrier(const struct netdev *netdev, bool *carrier)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    *carrier = true;
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+netdev_netmap_get_stats(const struct netdev *netdev, struct netdev_stats
*stats)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    stats->tx_packets = dev->stats.tx_packets;
+    stats->tx_bytes = dev->stats.tx_bytes;
+    stats->rx_packets = dev->stats.rx_packets;
+    stats->rx_bytes = dev->stats.rx_bytes;
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+static int
+netdev_netmap_get_status(const struct netdev *netdev, struct smap *args)
+{
+    struct netdev_netmap *dev = netdev_netmap_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    smap_add_format(args, "mtu", "%d", dev->mtu);
+    ovs_mutex_unlock(&dev->mutex);
+
+    return 0;
+}
+
+#define NETDEV_NETMAP_CLASS(NAME, PMD, INIT, CONSTRUCT, DESTRUCT,
SET_CONFIG, \
+        SET_TX_MULTIQ, SEND, SEND_WAIT, GET_CARRIER, GET_STATS,
GET_FEATURES, \
+        GET_STATUS, RECONFIGURE, RXQ_RECV, RXQ_WAIT)        \
+{                                                           \
+    NAME,                                                   \
+    PMD,                        /* is_pmd */                \
+    INIT,                       /* init */                  \
+    NULL,                       /* netdev_netmap_run */     \
+    NULL,                       /* netdev_netmap_wait */    \
+    netdev_netmap_alloc,                                    \
+    CONSTRUCT,                                              \
+    DESTRUCT,                                               \
+    netdev_netmap_dealloc,                                  \
+    netdev_netmap_get_config,                               \
+    SET_CONFIG,                                             \
+    NULL,                       /* get_tunnel_config */     \
+    NULL,                       /* build header */          \
+    NULL,                       /* push header */           \
+    NULL,                       /* pop header */            \
+    NULL,                       /* get numa id */           \
+    SET_TX_MULTIQ,              /* tx multiq */             \
+    SEND,                       /* send */                  \
+    SEND_WAIT,                                              \
+    netdev_netmap_set_etheraddr,                            \
+    netdev_netmap_get_etheraddr,                            \
+    netdev_netmap_get_mtu,                                  \
+    netdev_netmap_set_mtu,                                  \
+    netdev_netmap_get_ifindex,                              \
+    GET_CARRIER,                                            \
+    NULL,                       /* get_carrier_resets */    \
+    NULL,                       /* get_miimon */            \
+    GET_STATS,                                              \
+    NULL,                       /* get_custom_stats */      \
+                                                            \
+    NULL,                       /* get_features */          \
+    NULL,                       /* set_advertisements */    \
+    NULL,                       /* get_pt_mode */           \
+                                                            \
+    NULL,                       /* set_policing */          \
+    NULL,                       /* get_qos_types */         \
+    NULL,                       /* get_qos_capabilities */  \
+    NULL,                       /* get_qos */               \
+    NULL,                       /* set_qos */               \
+    NULL,                       /* get_queue */             \
+    NULL,                       /* set_queue */             \
+    NULL,                       /* delete_queue */          \
+    NULL,                       /* get_queue_stats */       \
+    NULL,                       /* queue_dump_start */      \
+    NULL,                       /* queue_dump_next */       \
+    NULL,                       /* queue_dump_done */       \
+    NULL,                       /* dump_queue_stats */      \
+                                                            \
+    NULL,                       /* set_in4 */               \
+    NULL,                       /* get_addr_list */         \
+    NULL,                       /* add_router */            \
+    NULL,                       /* get_next_hop */          \
+    GET_STATUS,                                             \
+    NULL,                       /* arp_lookup */            \
+                                                            \
+    netdev_netmap_update_flags,                             \
+    RECONFIGURE,                                            \
+                                                            \
+    netdev_netmap_rxq_alloc,                                \
+    netdev_netmap_rxq_construct,                            \
+    netdev_netmap_rxq_destruct,                             \
+    netdev_netmap_rxq_dealloc,                              \
+    RXQ_RECV,                                               \
+    RXQ_WAIT,                                               \
+    NULL,                       /* rxq_drain */             \
+    NO_OFFLOAD_API                                          \
+}
+
+static const struct netdev_class netmap_class =
+    NETDEV_NETMAP_CLASS(
+        "netmap",
+        true,
+        netdev_netmap_class_init,
+        netdev_netmap_construct,
+        netdev_netmap_destruct,
+        netdev_netmap_set_config,
+        NULL,
+        netdev_netmap_send,
+        NULL,
+        netdev_netmap_get_carrier,
+        netdev_netmap_get_stats,
+        NULL,
+        netdev_netmap_get_status,
+        netdev_netmap_reconfigure,
+        netdev_netmap_rxq_recv,
+        NULL);
+
+void
+netdev_netmap_register(void)
+{
+    netdev_register_provider(&netmap_class);
+}
diff --git a/lib/netdev-netmap.h b/lib/netdev-netmap.h
new file mode 100644
index 000000000..49fe8c319
--- /dev/null
+++ b/lib/netdev-netmap.h
@@ -0,0 +1,13 @@
+#ifndef NETDEV_NETMAP_H
+#define NETDEV_NETMAP_H
+
+struct netdev_rxq;
+struct smap;
+struct dp_packet;
+
+void netmap_init_port(struct netdev_rxq *);
+void netmap_init_config(const struct smap *);
+void netmap_free_packet(struct dp_packet *);
+void netdev_netmap_register(void);
+
+#endif /* netdev-netmap.h */
diff --git a/lib/netmap-stub.c b/lib/netmap-stub.c
new file mode 100644
index 000000000..62f7a06b8
--- /dev/null
+++ b/lib/netmap-stub.c
@@ -0,0 +1,21 @@
+#include <config.h>
+#include "netmap.h"
+
+#include "smap.h"
+#include "ovs-thread.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(netmap);
+
+void
+netmap_init(const struct smap *ovs_other_config)
+{
+    if (smap_get_bool(ovs_other_config, "netmap-init", false)) {
+        static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+        if (ovsthread_once_start(&once)) {
+            VLOG_ERR("NETMAP not supported in this copy of Open vSwitch.");
+            ovsthread_once_done(&once);
+        }
+    }
+}
diff --git a/lib/netmap.c b/lib/netmap.c
new file mode 100644
index 000000000..b4147e0ad
--- /dev/null
+++ b/lib/netmap.c
@@ -0,0 +1,76 @@
+#include <config.h>
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <sys/time.h>   /* timersub */
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h> /* read() */
+
+#include "dirs.h"
+#include "netdev-netmap.h"
+#include "netmap.h"
+#include "openvswitch/vlog.h"
+#include "smap.h"
+
+VLOG_DEFINE_THIS_MODULE(netmap);
+
+/* initialize to avoid a division by 0 */
+uint64_t netmap_ticks_per_second = 1000000000; /* set by calibrate_tsc */
+
+/*
+ * do an idle loop to compute the clock speed. We expect
+ * a constant TSC rate and locked on all CPUs.
+ * Returns ticks per second
+ */
+static uint64_t
+netmap_calibrate_tsc(void)
+{
+    struct timeval a, b;
+    uint64_t ta_0, ta_1, tb_0, tb_1, dmax = ~0;
+    uint64_t da, db, cy = 0;
+    int i;
+    for (i=0; i < 3; i++) {
+    ta_0 = netmap_rdtsc();
+    gettimeofday(&a, NULL);
+    ta_1 = netmap_rdtsc();
+    usleep(20000);
+    tb_0 = netmap_rdtsc();
+    gettimeofday(&b, NULL);
+    tb_1 = netmap_rdtsc();
+    da = ta_1 - ta_0;
+    db = tb_1 - tb_0;
+    if (da + db < dmax) {
+        cy = (b.tv_sec - a.tv_sec)*1000000 + b.tv_usec - a.tv_usec;
+        cy = (double)(tb_0 - ta_1)*1000000/(double)cy;
+        dmax = da + db;
+    }
+    }
+    netmap_ticks_per_second = cy;
+    return cy;
+}
+
+void
+netmap_init(const struct smap *ovs_other_config)
+{
+    static bool enabled = false;
+
+    if (enabled || !ovs_other_config) {
+        return;
+    }
+
+    if (smap_get_bool(ovs_other_config, "netmap-init", false)) {
+        static struct ovsthread_once once_enable =
OVSTHREAD_ONCE_INITIALIZER;
+        if (ovsthread_once_start(&once_enable)) {
+            netmap_calibrate_tsc();
+            netmap_init_config(ovs_other_config);
+            netdev_netmap_register();
+            enabled = true;
+            ovsthread_once_done(&once_enable);
+            VLOG_INFO("NETMAP Enabled");
+        }
+    } else
+        VLOG_INFO_ONCE("NETMAP Disabled - Use other_config:netmap-init to
enable");
+}
diff --git a/lib/netmap.h b/lib/netmap.h
new file mode 100644
index 000000000..34ff7b7a2
--- /dev/null
+++ b/lib/netmap.h
@@ -0,0 +1,27 @@
+#ifndef NETMAP_H
+#define NETMAP_H
+
+#include <stdint.h>
+
+extern uint64_t netmap_ticks_per_second;
+#define US2TSC(x) ((x)*netmap_ticks_per_second/1000000UL)
+#define TSC2US(x) ((x)*1000000UL/netmap_ticks_per_second)
+
+#if 0 /* gcc intrinsic */
+#include <x86intrin.h>
+#define rdtsc __rdtsc
+#else
+static inline uint64_t
+netmap_rdtsc(void)
+{
+    uint32_t hi, lo;
+    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+    return (uint64_t)lo | ((uint64_t)hi << 32);
+}
+#endif
+
+struct smap;
+
+void netmap_init(const struct smap *ovs_other_config);
+
+#endif /* netmap.h */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index d90997e3a..2dfcbb7f6 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -38,6 +38,7 @@
 #include "mac-learning.h"
 #include "mcast-snooping.h"
 #include "netdev.h"
+#include "netmap.h"
 #include "nx-match.h"
 #include "ofproto/bond.h"
 #include "ofproto/ofproto.h"
@@ -2977,6 +2978,7 @@ bridge_run(void)
     if (cfg) {
         netdev_set_flow_api_enabled(&cfg->other_config);
         dpdk_init(&cfg->other_config);
+        netmap_init(&cfg->other_config);
     }

     /* Initialize the ofproto library.  This only needs to run once, but
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index f899a1976..f6dd6e7b6 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -217,6 +217,46 @@
         </p>
       </column>

+      <column name="other_config" key="netmap-init"
+              type='{"type": "boolean"}'>
+        <p>
+          Set this value to <code>true</code> to enable runtime support for
+          NETMAP ports. The vswitch must have compile-time support for
NETMAP as
+          well.
+        </p>
+        <p>
+          The default value is <code>false</code>. Changing this value
requires
+          restarting the daemon
+        </p>
+        <p>
+          If this value is <code>false</code> at startup, any netmap ports
which
+          are configured in the bridge will fail.
+        </p>
+      </column>
+
+      <column name="other_config" key="netmap-nextrabufs"
+              type='{"type": "integer", "minInteger": 32}'>
+        <p>
+            Specifies the number of extra buffers to be requested to netmap
+            when opening each netmap port.
+        </p>
+        <p>
+            Each packet received or transmitted by OVS from/to a netmap
port
+            needs an extra buffer. The OVS netmap runtime needs at least a
+            batch worth of extra buffers (32 packets) for each port to
function
+            properly. More extra buffers may be necessary if OVS
temporarily
+            stores netmap buffers within its internal queues.
+        </p>
+      </column>
+
+      <column name="other_config" key="rxsync-intval"
+              type='{"type": "integer", "minInteger": 0}'>
+        <p>
+            Specifies the minimum time (in microseconds) between two
+            consecutive rxsync calls issued on a netmap port.
+        </p>
+      </column>
+
       <column name="other_config" key="dpdk-init"
               type='{"type": "boolean"}'>
         <p>


2018-03-20 15:07 GMT+01:00 Alessandro Rosetti <alessandro.rosetti at gmail.com>
:

> Hi Darrell,
>
> I'm developing netmap support for my thesis and I hope it will make it for
> OVS 2.10.
> In the next days I'm going to post the first prototype patch that is
> almost ready
>
> Thanks to you,
> Alessandro
>
> On 19 Mar 2018 9:26 pm, "Darrell Ball" <dlu998 at gmail.com> wrote:
>
>> Hi Alessandro
>>
>> I also think this would be interesting.
>> Is netmap integration being actively being worked on for OVS 2.10 ?
>>
>> Thanks Darrell
>>
>> On Wed, Feb 7, 2018 at 9:19 AM, Ilya Maximets <i.maximets at samsung.com>
>> wrote:
>>
>>> > Hi,
>>>
>>> Hi, Alessandro.
>>>
>>> >
>>> >   My name is Alessandro Rosetti, and I'm currently adding netmap
>>> support to
>>> > ovs, following an approach similar to DPDK.
>>>
>>> Good to know that someone started to work on this. IMHO, it's a good
>>> idea.
>>> I also wanted to try to implement this someday, but had no much time.
>>>
>>> >
>>> > I've created a new netdev: netdev_netmap that uses the pmd
>>> infrastructure.
>>> > The prototype I have seems to work fine (I still need to tune
>>> performance,
>>> > test optional features, and test more complex topologies.)
>>>
>>> Cool. Looking forward for your RFC patch-set.
>>>
>>> >
>>> > I have a question about the lifetime of dp_packets.
>>> > Is there any guarantee that the dp_packets allocated in a receive
>>> callback
>>> > (e.g. netdev_netmap_rxq_recv) are consumed by OVS (e.g. dropped,
>>> cloned, or
>>> > sent to other ports) **before** a subsequent call to the receive
>>> callback
>>> > (on the same port)?
>>> > Or is it possible for dp_packets to be stored somewhere (e.g. in an OVS
>>> > internal queue) and live across subsequent invocations of the receive
>>> > callback that allocated them?
>>>
>>> I think that there was never such a guarantee, but recent changes in
>>> userspace
>>> datapath completely ruined this assumption. I mean output packet
>>> batching support.
>>>
>>> Please refer the following commits for details:
>>> 009e003 2017-12-14 | dpif-netdev: Output packet batching.
>>> c71ea3c 2018-01-15 | dpif-netdev: Time based output batching.
>>> 00adb8d 2018-01-15 | docs: Describe output packet batching in DPDK guide.
>>>
>>> >
>>> > I need to know if this is the case to check that my current prototype
>>> is
>>> > safe.
>>> > I use per-port pre-allocation of dp_packets, for maximum performance.
>>> I've
>>> > seen that DPDK uses its internal allocator to allocate and deallocate
>>> > dp_packets, but netmap does not expose one.
>>> > Each packet received with netmap is created as a new type dp_packet:
>>> > DPBUF_NETMAP. The data points to a netmap buffer (preallocated by the
>>> > kernel).
>>> > When I receive data (netdev_netmap_rxq_recv) I reuse the dp_packets,
>>> > updating the internal pointer and a couple of additional informations
>>> > stored inside the dp_packet.
>>> > When I have to send data I use zero copy if dp_packet is DPBUF_NETMAP
>>> and
>>> > copy if it's not.
>>> >
>>> > Thanks for the help!
>>> > Alessandro.
>>>
>>>
>>> _______________________________________________
>>> dev mailing list
>>> dev at openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>
>>
>>


More information about the dev mailing list