[ovs-dev] [patch_v1 2/3] Userspace Datapath: Introduce NAT support.

Darrell Ball dlu998 at gmail.com
Mon Jan 23 03:07:45 UTC 2017


This patch introduces NAT support for the userspace datapath.

The per packet scope of lookups for NAT and un_NAT is at
the bucket level rather than global. One hash table is
introduced to support create/delete handling. The create/delete
events may be further optimized, if the need becomes clear.

Some NAT options with limited utility (persistent, random) are
not supported yet, but will be supported in a later patch.

Signed-off-by: Darrell Ball <dlu998 at gmail.com>
---
 lib/conntrack-private.h |  25 +-
 lib/conntrack.c         | 733 ++++++++++++++++++++++++++++++++++++++++++------
 lib/conntrack.h         |  81 +++++-
 lib/dpif-netdev.c       |  85 +++++-
 tests/test-conntrack.c  |   8 +-
 5 files changed, 828 insertions(+), 104 deletions(-)

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 013f19f..b71af37 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -29,15 +29,6 @@
 #include "packets.h"
 #include "unaligned.h"
 
-struct ct_addr {
-    union {
-        ovs_16aligned_be32 ipv4;
-        union ovs_16aligned_in6_addr ipv6;
-        ovs_be32 ipv4_aligned;
-        struct in6_addr ipv6_aligned;
-    };
-};
-
 struct ct_endpoint {
     struct ct_addr addr;
     union {
@@ -60,14 +51,23 @@ struct conn_key {
     uint16_t zone;
 };
 
+struct nat_conn_key_node {
+    struct hmap_node node;
+    struct conn_key key;
+    struct conn_key value;
+};
+
 struct conn {
     struct conn_key key;
     struct conn_key rev_key;
     long long expiration;
     struct ovs_list exp_node;
     struct hmap_node node;
-    uint32_t mark;
     ovs_u128 label;
+    /* XXX: consider flattening. */
+    struct nat_action_info_t *nat_info;
+    uint32_t mark;
+    uint8_t conn_type;
 };
 
 enum ct_update_res {
@@ -76,6 +76,11 @@ enum ct_update_res {
     CT_UPDATE_NEW,
 };
 
+enum ct_conn_type {
+    CT_CONN_TYPE_DEFAULT,
+	CT_CONN_TYPE_UN_NAT,
+};
+
 struct ct_l4_proto {
     struct conn *(*new_conn)(struct conntrack_bucket *, struct dp_packet *pkt,
                              long long now);
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 9bea3d9..6f6a869 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -76,6 +76,20 @@ static void set_label(struct dp_packet *, struct conn *,
                       const struct ovs_key_ct_labels *mask);
 static void *clean_thread_main(void *f_);
 
+static struct nat_conn_key_node *
+nat_conn_keys_lookup(struct hmap *nat_conn_keys,
+                     const struct conn *conn,
+                     uint32_t basis);
+
+static void
+nat_conn_keys_remove(struct hmap *nat_conn_keys,
+                    const struct conn *conn,
+                    uint32_t basis);
+
+static bool
+nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
+		               struct conn *rev_conn);
+
 static struct ct_l4_proto *l4_protos[] = {
     [IPPROTO_TCP] = &ct_proto_tcp,
     [IPPROTO_UDP] = &ct_proto_other,
@@ -90,7 +104,7 @@ long long ct_timeout_val[] = {
 };
 
 /* If the total number of connections goes above this value, no new connections
- * are accepted */
+ * are accepted; this is for CT_CONN_TYPE_DEFAULT connections. */
 #define DEFAULT_N_CONN_LIMIT 3000000
 
 /* Initializes the connection tracker 'ct'.  The caller is responsible for
@@ -101,6 +115,11 @@ conntrack_init(struct conntrack *ct)
     unsigned i, j;
     long long now = time_msec();
 
+    ct_rwlock_init(&ct->nat_resources_lock);
+    ct_rwlock_wrlock(&ct->nat_resources_lock);
+    hmap_init(&ct->nat_conn_keys);
+    ct_rwlock_unlock(&ct->nat_resources_lock);
+
     for (i = 0; i < CONNTRACK_BUCKETS; i++) {
         struct conntrack_bucket *ctb = &ct->buckets[i];
 
@@ -139,13 +158,24 @@ conntrack_destroy(struct conntrack *ct)
         ovs_mutex_destroy(&ctb->cleanup_mutex);
         ct_lock_lock(&ctb->lock);
         HMAP_FOR_EACH_POP(conn, node, &ctb->connections) {
-            atomic_count_dec(&ct->n_conn);
+            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+                atomic_count_dec(&ct->n_conn);
+            }
             delete_conn(conn);
         }
         hmap_destroy(&ctb->connections);
         ct_lock_unlock(&ctb->lock);
         ct_lock_destroy(&ctb->lock);
     }
+    ct_rwlock_wrlock(&ct->nat_resources_lock);
+    struct nat_conn_key_node *nat_conn_key_node;
+    HMAP_FOR_EACH_POP(nat_conn_key_node, node, &ct->nat_conn_keys) {
+        free(nat_conn_key_node);
+    }
+    hmap_destroy(&ct->nat_conn_keys);
+    ct_rwlock_unlock(&ct->nat_resources_lock);
+    ct_rwlock_destroy(&ct->nat_resources_lock);
+
 }
 
 static unsigned hash_to_bucket(uint32_t hash)
@@ -167,10 +197,188 @@ write_ct_md(struct dp_packet *pkt, uint16_t state, uint16_t zone,
     pkt->md.ct_label = label;
 }
 
+static void
+nat_packet(struct dp_packet *pkt, const struct conn *conn, uint16_t *state)
+{
+    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+	    *state |= CS_SRC_NAT;
+        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
+            struct ip_header *nh = dp_packet_l3(pkt);
+            packet_set_ipv4_addr(pkt, &nh->ip_src,
+                conn->rev_key.dst.addr.ipv4_aligned);
+        } else if (conn->key.dl_type == htons(ETH_TYPE_IPV6)) {
+            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
+            struct in6_addr ipv6_addr;
+            memcpy(&ipv6_addr.s6_addr32, conn->rev_key.dst.addr.ipv6.be32,
+                   sizeof ipv6_addr);
+            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
+                                 nh6->ip6_src.be32,
+								 &ipv6_addr,
+                                 true);
+        }
+
+        if (conn->nat_info->nat_action & NAT_ACTION_SRC_PORT) {
+            if (conn->key.nw_proto == IPPROTO_TCP) {
+                struct tcp_header *th = dp_packet_l4(pkt);
+                packet_set_tcp_port(pkt, conn->rev_key.dst.port,
+									th->tcp_dst);
+            } else if (conn->key.nw_proto == IPPROTO_UDP) {
+                struct udp_header *uh = dp_packet_l4(pkt);
+                packet_set_udp_port(pkt, conn->rev_key.dst.port,
+									uh->udp_dst);
+            }
+        }
+    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+        *state |= CS_DST_NAT;
+
+        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
+            struct ip_header *nh = dp_packet_l3(pkt);
+            packet_set_ipv4_addr(pkt, &nh->ip_dst,
+                                 conn->rev_key.src.addr.ipv4_aligned);
+        } else if (conn->key.dl_type == htons(ETH_TYPE_IPV6)) {
+            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
+
+            struct in6_addr ipv6_addr;
+            memcpy(&ipv6_addr.s6_addr32, conn->rev_key.dst.addr.ipv6.be32,
+                   sizeof ipv6_addr);
+            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
+                                 nh6->ip6_dst.be32,
+								 &ipv6_addr,
+								 true);
+        }
+
+        if (conn->nat_info->nat_action & NAT_ACTION_DST_PORT) {
+            if (conn->key.nw_proto == IPPROTO_TCP) {
+                struct tcp_header *th = dp_packet_l4(pkt);
+                packet_set_tcp_port(pkt, th->tcp_src, conn->rev_key.src.port);
+            } else if (conn->key.nw_proto == IPPROTO_UDP) {
+                struct udp_header *uh = dp_packet_l4(pkt);
+                packet_set_udp_port(pkt, uh->udp_src, conn->rev_key.src.port);
+            }
+        }
+    }
+}
+
+static void
+un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
+              uint16_t *state)
+{
+    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+        *state |= CS_SRC_NAT;
+        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
+            struct ip_header *nh = dp_packet_l3(pkt);
+            packet_set_ipv4_addr(pkt, &nh->ip_dst,
+                conn->key.src.addr.ipv4_aligned);
+        } else if (conn->key.dl_type == htons(ETH_TYPE_IPV6)) {
+            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
+            struct in6_addr ipv6_addr;
+            memcpy(&ipv6_addr, conn->key.src.addr.ipv6.be32,
+                   sizeof ipv6_addr);
+            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
+                                 nh6->ip6_dst.be32,
+								 &ipv6_addr,
+								 true);
+        }
+
+        if (conn->nat_info->nat_action & NAT_ACTION_SRC_PORT) {
+            if (conn->key.nw_proto == IPPROTO_TCP) {
+                struct tcp_header *th = dp_packet_l4(pkt);
+                packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
+            } else if (conn->key.nw_proto == IPPROTO_UDP) {
+                struct udp_header *uh = dp_packet_l4(pkt);
+                packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
+            }
+        }
+    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
+        *state |= CS_DST_NAT;
+        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
+            struct ip_header *nh = dp_packet_l3(pkt);
+            packet_set_ipv4_addr(pkt, &nh->ip_src,
+                conn->key.dst.addr.ipv4_aligned);
+        } else if (conn->key.dl_type == htons(ETH_TYPE_IPV6)) {
+            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
+            struct in6_addr ipv6_addr;
+            memcpy(&ipv6_addr, conn->key.dst.addr.ipv6.be32,
+                   sizeof ipv6_addr);
+            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
+                                 nh6->ip6_src.be32,
+								 &ipv6_addr,
+								 true);
+        }
+
+        if (conn->nat_info->nat_action & NAT_ACTION_DST_PORT) {
+            if (conn->key.nw_proto == IPPROTO_TCP) {
+                struct tcp_header *th = dp_packet_l4(pkt);
+                packet_set_tcp_port(pkt, conn->key.dst.port,
+									th->tcp_dst);
+            } else if (conn->key.nw_proto == IPPROTO_UDP) {
+                struct udp_header *uh = dp_packet_l4(pkt);
+                packet_set_udp_port(pkt, conn->key.dst.port,
+                                    uh->udp_dst);
+            }
+        }
+    }
+}
+
+/* Typical usage of this helper is in non per-packet code;
+ * this is because the bucket lock needs to be held for lookup
+ * and a hash would have already been needed. Hence, this function
+ * is just intended for code clarity. */
+static struct conn *
+conn_lookup(struct conntrack *ct, struct conn_key *key, long long now)
+{
+    struct conn_lookup_ctx ctx;
+    ctx.conn = NULL;
+    ctx.key = *key;
+    ctx.hash = conn_key_hash(key, ct->hash_basis);
+    unsigned bucket = hash_to_bucket(ctx.hash);
+    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
+    return ctx.conn;
+}
+
+static void
+nat_clean(struct conntrack *ct, struct conn *conn,
+          struct conntrack_bucket *ctb)
+    OVS_REQUIRES(ctb->lock)
+{
+    long long now = time_msec();
+    ct_rwlock_wrlock(&ct->nat_resources_lock);
+    nat_conn_keys_remove(&ct->nat_conn_keys, conn, ct->hash_basis);
+    ct_rwlock_unlock(&ct->nat_resources_lock);
+    ct_lock_unlock(&ctb->lock);
+
+    uint32_t hash_rev_conn = conn_key_hash(&conn->rev_key, ct->hash_basis);
+    unsigned bucket_rev_conn = hash_to_bucket(hash_rev_conn);
+
+    ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
+    ct_rwlock_wrlock(&ct->nat_resources_lock);
+
+    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
+
+	struct nat_conn_key_node *nat_conn_key_node =
+        nat_conn_keys_lookup(&ct->nat_conn_keys, conn, ct->hash_basis);
+
+    /* In the unlikely event, rev conn was recreated, then skip rev_conn cleanup. */
+    if ((rev_conn) && (!nat_conn_key_node ||
+         memcmp(&nat_conn_key_node->value, &rev_conn->key,
+                sizeof nat_conn_key_node->value))) {
+        hmap_remove(&ct->buckets[bucket_rev_conn].connections,
+                    &rev_conn->node);
+        free(rev_conn);
+    }
+    delete_conn(conn);
+
+    ct_rwlock_unlock(&ct->nat_resources_lock);
+    ct_lock_unlock(&ct->buckets[bucket_rev_conn].lock);
+    ct_lock_lock(&ctb->lock);
+
+}
+
 static struct conn *
 conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
                struct conn_lookup_ctx *ctx, uint16_t *state, bool commit,
-               long long now)
+               long long now, const struct nat_action_info_t *nat_action_info,
+			   struct conn *conn_for_un_nat_copy)
 {
     unsigned bucket = hash_to_bucket(ctx->hash);
     struct conn *nc = NULL;
@@ -179,7 +387,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
         *state |= CS_INVALID;
         return nc;
     }
-
     *state |= CS_NEW;
 
     if (commit) {
@@ -193,71 +400,194 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
         }
 
         nc = new_conn(&ct->buckets[bucket], pkt, &ctx->key, now);
+        ctx->conn = nc;
+        memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
+        conn_key_reverse(&nc->rev_key);
 
-        memcpy(&nc->rev_key, &ctx->key, sizeof nc->rev_key);
+        if (nat_action_info && nat_action_info->nat_action & NAT_ACTION) {
+            nc->nat_info = xzalloc(sizeof *nat_action_info);
+            memcpy(nc->nat_info, nat_action_info, sizeof *nc->nat_info);
+            ct_rwlock_wrlock(&ct->nat_resources_lock);
 
-        conn_key_reverse(&nc->rev_key);
+            bool nat_res = nat_select_range_tuple(ct, nc, conn_for_un_nat_copy);
+
+            if (!nat_res) {
+                free(nc->nat_info);
+                nc->nat_info = NULL;
+                free (nc);
+                ct_rwlock_unlock(&ct->nat_resources_lock);
+                return NULL;
+            }
+
+            if (conn_for_un_nat_copy && nc->conn_type == CT_CONN_TYPE_DEFAULT) {
+                *nc = *conn_for_un_nat_copy;
+                conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT;
+            }
+            ct_rwlock_unlock(&ct->nat_resources_lock);
+
+            nat_packet(pkt, nc, state);
+        }
         hmap_insert(&ct->buckets[bucket].connections, &nc->node, ctx->hash);
         atomic_count_inc(&ct->n_conn);
     }
-
     return nc;
 }
 
-static struct conn *
-process_one(struct conntrack *ct, struct dp_packet *pkt,
-            struct conn_lookup_ctx *ctx, uint16_t zone,
-            bool commit, long long now)
+static void
+conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
+                  struct conn_lookup_ctx *ctx, uint16_t *state,
+                  struct conn **conn, long long now,
+                  unsigned bucket)
+    OVS_REQUIRES(ct->buckets[bucket].lock)
 {
-    unsigned bucket = hash_to_bucket(ctx->hash);
-    struct conn *conn = ctx->conn;
-    uint16_t state = 0;
+    if (ctx->related) {
+        *state |= CS_RELATED;
+        if (ctx->reply) {
+            *state |= CS_REPLY_DIR;
+        }
+    } else {
+        enum ct_update_res res = conn_update(*conn, &ct->buckets[bucket],
+                                             pkt, ctx->reply, now);
 
-    if (conn) {
-        if (ctx->related) {
-            state |= CS_RELATED;
+        switch (res) {
+        case CT_UPDATE_VALID:
+            *state |= CS_ESTABLISHED;
+            *state &= ~CS_NEW;
             if (ctx->reply) {
-                state |= CS_REPLY_DIR;
+                *state |= CS_REPLY_DIR;
+            }
+            break;
+        case CT_UPDATE_INVALID:
+            *state |= CS_INVALID;
+            break;
+        case CT_UPDATE_NEW:
+            ovs_list_remove(&(*conn)->exp_node);
+            hmap_remove(&ct->buckets[bucket].connections, &(*conn)->node);
+            atomic_count_dec(&ct->n_conn);
+            if ((*conn)->nat_info) {
+                nat_clean(ct, *conn, &ct->buckets[bucket]);
+            } else {
+                delete_conn(*conn);
             }
+            *conn = NULL;
+            break;
+        default:
+            OVS_NOT_REACHED();
+        }
+    }
+}
+
+static void
+create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
+                   long long now)
+{
+        struct conn *nc = xzalloc(sizeof *nc);
+        memcpy(nc, conn_for_un_nat_copy, sizeof *nc);
+        nc->key = conn_for_un_nat_copy->rev_key;
+        nc->rev_key = conn_for_un_nat_copy->key;
+        uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
+        unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
+        ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
+        ct_rwlock_rdlock(&ct->nat_resources_lock);
+
+        struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
+        struct nat_conn_key_node *nat_conn_key_node =
+            nat_conn_keys_lookup(&ct->nat_conn_keys, nc, ct->hash_basis);
+        if (nat_conn_key_node && !memcmp(&nat_conn_key_node->value,
+            &nc->rev_key, sizeof nat_conn_key_node->value) && !rev_conn) {
+
+            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections, &nc->node,
+                        un_nat_hash);
         } else {
-            enum ct_update_res res;
+            free(nc);
+        }
+        ct_rwlock_unlock(&ct->nat_resources_lock);
+        ct_lock_unlock(&ct->buckets[un_nat_conn_bucket].lock);
+}
+
+static void
+process_one(struct conntrack *ct, struct dp_packet *pkt,
+            struct conn_lookup_ctx *ctx, uint16_t zone,
+            bool commit, long long now, const uint32_t *setmark,
+            const struct ovs_key_ct_labels *setlabel,
+            const struct nat_action_info_t *nat_action_info)
+{
+    struct conn *conn;
+    uint16_t state = 0;
+    unsigned bucket = hash_to_bucket(ctx->hash);
+    ct_lock_lock(&ct->buckets[bucket].lock);
+    conn_key_lookup(&ct->buckets[bucket], ctx, now);
+    conn = ctx->conn;
+    struct conn conn_for_un_nat_copy;
+    memset(&conn_for_un_nat_copy, 0, sizeof conn_for_un_nat_copy);
+
+    if (OVS_LIKELY(conn)) {
+        if (conn->conn_type == CT_CONN_TYPE_UN_NAT){
+            ctx->reply = 1;
+
+            struct conn_lookup_ctx ctx2;
+            ctx2.conn = NULL;
+            ctx2.key = conn->rev_key;
+            ctx2.hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
+
+		    ct_lock_unlock(&ct->buckets[bucket].lock);
+            bucket = hash_to_bucket(ctx2.hash);
+
+            ct_lock_lock(&ct->buckets[bucket].lock);
+            conn_key_lookup(&ct->buckets[bucket], &ctx2, now);
+
+            if (ctx2.conn) {
+                conn = ctx2.conn;
+            } else {
+                /* It is a race condition where conn has timed out and removed
+                 * between unlock of the rev_conn and lock of the forward conn;
+                 * nothing to do. */
+                ct_lock_unlock(&ct->buckets[bucket].lock);
+                return;
+            }
+        }
 
-            res = conn_update(conn, &ct->buckets[bucket], pkt,
-                              ctx->reply, now);
+        if (nat_action_info->nat_action &&
+            (conn->nat_info && conn->nat_info->nat_action & NAT_ACTION) &&
+            (!(pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) ||
+              (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT) &&
+               zone != pkt->md.ct_zone))){
 
-            switch (res) {
-            case CT_UPDATE_VALID:
-                state |= CS_ESTABLISHED;
-                if (ctx->reply) {
-                    state |= CS_REPLY_DIR;
-                }
-                break;
-            case CT_UPDATE_INVALID:
-                state |= CS_INVALID;
-                break;
-            case CT_UPDATE_NEW:
-                ovs_list_remove(&conn->exp_node);
-                hmap_remove(&ct->buckets[bucket].connections, &conn->node);
-                atomic_count_dec(&ct->n_conn);
-                delete_conn(conn);
-                conn = conn_not_found(ct, pkt, ctx, &state, commit, now);
-                break;
-            default:
-                OVS_NOT_REACHED();
+            if(ctx->reply) {
+                un_nat_packet(pkt, conn, &state);
+            } else {
+                nat_packet(pkt, conn, &state);
             }
         }
+    }
+
+    if (OVS_LIKELY(conn)) {
+        conn_update_state(ct, pkt, ctx, &state, &conn, now, bucket);
     } else {
         if (ctx->related) {
             state |= CS_INVALID;
         } else {
-            conn = conn_not_found(ct, pkt, ctx, &state, commit, now);
+            conn = conn_not_found(ct, pkt, ctx, &state, commit,
+                                  now, nat_action_info, &conn_for_un_nat_copy);
         }
     }
 
     write_ct_md(pkt, state, zone, conn ? conn->mark : 0,
                 conn ? conn->label : OVS_U128_ZERO);
 
-    return conn;
+    if (conn && setmark) {
+        set_mark(pkt, conn, setmark[0], setmark[1]);
+    }
+
+    if (conn && setlabel) {
+        set_label(pkt, conn, &setlabel[0], &setlabel[1]);
+    }
+
+    ct_lock_unlock(&ct->buckets[bucket].lock);
+
+    if (conn_for_un_nat_copy.conn_type == CT_CONN_TYPE_UN_NAT) {
+        create_un_nat_conn(ct, &conn_for_un_nat_copy, now);
+    }
 }
 
 /* Sends the packets in '*pkt_batch' through the connection tracker 'ct'.  All
@@ -273,7 +603,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
                   ovs_be16 dl_type, bool commit, uint16_t zone,
                   const uint32_t *setmark,
                   const struct ovs_key_ct_labels *setlabel,
-                  const char *helper)
+                  const char *helper,
+                  const struct nat_action_info_t *nat_action_info)
 {
     struct dp_packet **pkts = pkt_batch->packets;
     size_t cnt = pkt_batch->count;
@@ -323,27 +654,12 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
     }
 
     for (i = 0; i < arrcnt; i++) {
-        struct conntrack_bucket *ctb = &ct->buckets[arr[i].bucket];
         size_t j;
 
-        ct_lock_lock(&ctb->lock);
-
         ULLONG_FOR_EACH_1(j, arr[i].maps) {
-            struct conn *conn;
-
-            conn_key_lookup(ctb, &ctxs[j], now);
-
-            conn = process_one(ct, pkts[j], &ctxs[j], zone, commit, now);
-
-            if (conn && setmark) {
-                set_mark(pkts[j], conn, setmark[0], setmark[1]);
-            }
-
-            if (conn && setlabel) {
-                set_label(pkts[j], conn, &setlabel[0], &setlabel[1]);
-            }
+            process_one(ct, pkts[j], &ctxs[j], zone, commit,
+                               now, setmark, setlabel, nat_action_info);
         }
-        ct_lock_unlock(&ctb->lock);
     }
 
     return 0;
@@ -372,6 +688,7 @@ set_label(struct dp_packet *pkt, struct conn *conn,
                               | (pkt->md.ct_label.u64.hi & ~(m.u64.hi));
     conn->label = pkt->md.ct_label;
 }
+
 
 /* Delete the expired connections from 'ctb', up to 'limit'. Returns the
  * earliest expiration time among the remaining connections in 'ctb'.  Returns
@@ -389,20 +706,27 @@ sweep_bucket(struct conntrack *ct, struct conntrack_bucket *ctb, long long now,
 
     for (i = 0; i < N_CT_TM; i++) {
         LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
-            if (!conn_expired(conn, now) || count >= limit) {
-                min_expiration = MIN(min_expiration, conn->expiration);
-                if (count >= limit) {
-                    /* Do not check other lists. */
-                    COVERAGE_INC(conntrack_long_cleanup);
-                    return min_expiration;
+            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+                if (!conn_expired(conn, now) || count >= limit) {
+                    min_expiration = MIN(min_expiration, conn->expiration);
+                    if (count >= limit) {
+                        /* Do not check other lists. */
+                        COVERAGE_INC(conntrack_long_cleanup);
+                        return min_expiration;
+                    }
+                    break;
                 }
-                break;
+                ovs_list_remove(&conn->exp_node);
+                hmap_remove(&ctb->connections, &conn->node);
+                if (conn->nat_info) {
+                    nat_clean(ct, conn, ctb);
+                } else {
+                    delete_conn(conn);
+                }
+
+                atomic_count_dec(&ct->n_conn);
+                count++;
             }
-            ovs_list_remove(&conn->exp_node);
-            hmap_remove(&ctb->connections, &conn->node);
-            atomic_count_dec(&ct->n_conn);
-            delete_conn(conn);
-            count++;
         }
     }
 
@@ -773,7 +1097,6 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
             return false;
         }
 
-        /* pf doesn't do this, but it seems a good idea */
         if (inner_key.src.addr.ipv4_aligned != key->dst.addr.ipv4_aligned
             || inner_key.dst.addr.ipv4_aligned != key->src.addr.ipv4_aligned) {
             return false;
@@ -997,7 +1320,6 @@ conn_key_hash(const struct conn_key *key, uint32_t basis)
 
     hsrc = hdst = basis;
 
-    /* Hash the source and destination tuple */
     for (i = 0; i < sizeof(key->src) / sizeof(uint32_t); i++) {
         hsrc = hash_add(hsrc, ((uint32_t *) &key->src)[i]);
         hdst = hash_add(hdst, ((uint32_t *) &key->dst)[i]);
@@ -1024,6 +1346,239 @@ conn_key_reverse(struct conn_key *key)
     key->dst = tmp;
 }
 
+static uint32_t
+nat_ipv6_addrs_delta(struct in6_addr *ipv6_aligned_min, struct in6_addr *ipv6_aligned_max)
+{
+    uint64_t diff = 0;
+    ovs_u128 addr6_128_min;
+    memcpy(&addr6_128_min.u64.hi, &ipv6_aligned_min->s6_addr32[0],
+           sizeof addr6_128_min.u64.hi);
+    memcpy(&addr6_128_min.u64.lo, &ipv6_aligned_min->s6_addr32[2],
+           sizeof addr6_128_min.u64.lo);
+    ovs_u128 addr6_128_max;
+    memcpy(&addr6_128_max.u64.hi, &ipv6_aligned_max->s6_addr32[0],
+           sizeof addr6_128_max.u64.hi);
+    memcpy(&addr6_128_max.u64.lo, &ipv6_aligned_max->s6_addr32[2],
+           sizeof addr6_128_max.u64.lo);
+
+    if ((ntohll(addr6_128_min.u64.hi) == ntohll(addr6_128_max.u64.hi)) &&
+        (ntohll(addr6_128_min.u64.lo) <= ntohll(addr6_128_max.u64.lo))){
+
+        diff = ntohll(addr6_128_max.u64.lo) - ntohll(addr6_128_min.u64.lo);
+
+	} else if ((ntohll(addr6_128_min.u64.hi) + 1 == ntohll(addr6_128_max.u64.hi)) &&
+               (ntohll(addr6_128_min.u64.lo) > ntohll(addr6_128_max.u64.lo))) {
+        diff = 0xffffffffffffffff - (ntohll(addr6_128_min.u64.lo) -
+                   ntohll(addr6_128_max.u64.lo) - 1);
+	} else {
+        /* Limit address delta supported to 32 bits or 4 billion approximately.
+         * Possibly, this should be visible to the user through a datapath
+         * support check, however the practical impact is probably nil. */
+        diff = 0xfffffffe;
+	}
+    return (uint32_t)diff;
+}
+
+static void
+nat_ipv6_addr_increment(struct in6_addr *ipv6_aligned, uint32_t increment)
+{
+    ovs_u128 addr6_128;
+    memcpy(&addr6_128.u64.hi, &ipv6_aligned->s6_addr32[0], sizeof addr6_128.u64.hi);
+    memcpy(&addr6_128.u64.lo, &ipv6_aligned->s6_addr32[2], sizeof addr6_128.u64.lo);
+
+    if (0xffffffffffffffff - increment >= ntohll(addr6_128.u64.lo)) {
+        addr6_128.u64.lo = htonll(increment + ntohll(addr6_128.u64.lo));
+    } else if (addr6_128.u64.hi != 0xffffffffffffffff) {
+        addr6_128.u64.hi = htonll(1 + ntohll(addr6_128.u64.hi));
+        addr6_128.u64.lo =
+            htonll(increment - (0xffffffffffffffff -
+                   ntohll(addr6_128.u64.lo) + 1));
+    }
+
+    memcpy(&ipv6_aligned->s6_addr32[0], &addr6_128.u64.hi, sizeof addr6_128.u64.hi);
+    memcpy(&ipv6_aligned->s6_addr32[2], &addr6_128.u64.lo, sizeof addr6_128.u64.lo);
+
+	return;
+}
+
+static uint32_t
+nat_range_hash(const struct conn *conn, uint32_t basis)
+{
+    uint32_t hash = basis;
+    int i;
+    uint32_t port;
+
+    for (i = 0;
+         i < sizeof(conn->nat_info->min_addr) / sizeof(uint32_t);
+         i++) {
+        hash = hash_add(hash, ((uint32_t *) &conn->nat_info->min_addr)[i]);
+        hash = hash_add(hash, ((uint32_t *) &conn->nat_info->max_addr)[i]);
+    }
+
+    memcpy(&port, &conn->nat_info->min_port, sizeof port);
+    hash = hash_add(hash, port);
+
+    uint32_t dl_type_for_hash = (uint32_t) conn->key.dl_type;
+    hash = hash_add(hash,  dl_type_for_hash);
+    uint32_t nw_proto_for_hash = (uint32_t) conn->key.nw_proto;
+    hash = hash_add(hash,  nw_proto_for_hash);
+    uint32_t zone_for_hash = (uint32_t) conn->key.zone;
+    hash = hash_add(hash,  zone_for_hash);
+    return hash;
+}
+
+static bool
+nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
+		               struct conn *rev_conn)
+{
+#define MIN_NAT_EPHEMERAL_PORT 1024
+#define MAX_NAT_EPHEMERAL_PORT 65535
+
+    struct ct_addr ct_addr;
+    uint16_t min_port;
+    uint16_t max_port;
+    uint16_t first_port;
+
+
+    uint32_t hash = nat_range_hash(conn, ct->hash_basis);
+
+    if ((conn->nat_info->nat_action & NAT_ACTION_SRC) &&
+        (!(conn->nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
+        min_port = ntohs(conn->key.src.port);
+        max_port = ntohs(conn->key.src.port);
+        first_port = min_port;
+    } else if ((conn->nat_info->nat_action & NAT_ACTION_DST) &&
+               (!(conn->nat_info->nat_action & NAT_ACTION_DST_PORT))) {
+        min_port = ntohs(conn->key.dst.port);
+        max_port = ntohs(conn->key.dst.port);
+        first_port = min_port;
+    } else {
+        uint16_t deltap = conn->nat_info->max_port - conn->nat_info->min_port;
+        uint32_t port_index = hash % (deltap + 1);
+        first_port = conn->nat_info->min_port + port_index;
+        min_port = conn->nat_info->min_port;
+        max_port = conn->nat_info->max_port;
+    }
+
+    uint32_t deltaa = 0;
+    uint32_t address_index;
+    memset(&ct_addr, 0, sizeof ct_addr);
+
+    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
+        deltaa = ntohl(conn->nat_info->max_addr.ipv4_aligned) -
+                 ntohl(conn->nat_info->min_addr.ipv4_aligned);
+        address_index = hash % (deltaa + 1);
+        ct_addr.ipv4_aligned = htonl(ntohl(conn->nat_info->min_addr.ipv4_aligned) +
+                                           address_index);
+    } else {
+        deltaa = nat_ipv6_addrs_delta(&conn->nat_info->min_addr.ipv6_aligned,
+                                      &conn->nat_info->max_addr.ipv6_aligned);
+        /* deltaa must be within 32 bits for full hash coverage. A 64 or 128 bit hash is
+         * unnecessary and hence not used here. Most code is kept common with V4. */
+        address_index = hash % (deltaa + 1);
+        ct_addr.ipv6_aligned = conn->nat_info->min_addr.ipv6_aligned;
+        nat_ipv6_addr_increment(&ct_addr.ipv6_aligned, address_index);
+    }
+
+    uint16_t port = first_port;
+    bool all_ports_tried = false;
+    bool original_ports_tried = false;
+    bool all_addreses_tried = deltaa ? true : false;
+    struct ct_addr first_addr = ct_addr;
+    *rev_conn = *conn;
+    while (true) {
+        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
+            rev_conn->rev_key.dst.addr = ct_addr;
+            rev_conn->rev_key.dst.port = htons(port);
+        } else {
+            rev_conn->rev_key.src.addr = ct_addr;
+            rev_conn->rev_key.src.port = htons(port);
+        }
+        struct nat_conn_key_node *nat_conn_key_node =
+            nat_conn_keys_lookup(&ct->nat_conn_keys, rev_conn, ct->hash_basis);
+
+        if (!nat_conn_key_node) {
+            struct nat_conn_key_node *nat_conn_key = xzalloc(sizeof *nat_conn_key);
+            memcpy(&nat_conn_key->key, &rev_conn->rev_key, sizeof nat_conn_key->key);
+            memcpy(&nat_conn_key->value, &rev_conn->key, sizeof nat_conn_key->value);
+            uint32_t nat_conn_key_hash = conn_key_hash(&nat_conn_key->key, ct->hash_basis);
+            hmap_insert(&ct->nat_conn_keys, &nat_conn_key->node, nat_conn_key_hash);
+            return true;
+        } else if (!all_ports_tried) {
+            if (min_port == max_port) {
+                all_ports_tried = true;
+            } else if(port == max_port) {
+                port = min_port;
+            } else {
+                port++;
+            }
+            if (port == first_port) {
+                all_ports_tried = true;
+            }
+	    } else if (!all_addreses_tried) {
+            if(memcmp(&ct_addr, &conn->nat_info->max_addr, sizeof ct_addr)) {
+                if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
+                    ct_addr.ipv4_aligned = htonl(ntohl(ct_addr.ipv4_aligned) + 1);
+                } else {
+                    nat_ipv6_addr_increment(&ct_addr.ipv6_aligned, 1);
+                }
+            } else {
+                ct_addr = conn->nat_info->min_addr;
+            }
+            if (!memcmp(&ct_addr, &first_addr, sizeof ct_addr)) {
+                if (!original_ports_tried) {
+                    original_ports_tried = true;
+                    ct_addr = conn->nat_info->min_addr;
+                    min_port = MIN_NAT_EPHEMERAL_PORT;
+                    max_port = MAX_NAT_EPHEMERAL_PORT;
+                } else {
+                    break;
+                }
+            }
+            first_port = min_port;
+            port = first_port;
+            all_ports_tried = false;
+	    } else {
+            OVS_NOT_REACHED();
+	    }
+	}
+
+    return false;
+}
+
+static struct nat_conn_key_node *
+nat_conn_keys_lookup(struct hmap *nat_conn_keys,
+                     const struct conn *conn,
+                     uint32_t basis)
+{
+    struct nat_conn_key_node *nat_conn_key_node;
+    uint32_t nat_conn_key_hash = conn_key_hash(&conn->key, basis);
+
+    HMAP_FOR_EACH_WITH_HASH (nat_conn_key_node, node, nat_conn_key_hash, nat_conn_keys) {
+        if (!memcmp(&nat_conn_key_node->key, &conn->key, sizeof nat_conn_key_node->key)) {
+            return nat_conn_key_node;
+        }
+    }
+    return NULL;
+}
+
+static void
+nat_conn_keys_remove(struct hmap *nat_conn_keys,
+                    const struct conn *conn,
+                    uint32_t basis)
+{
+    struct nat_conn_key_node *nat_conn_key_node;
+    uint32_t nat_conn_key_hash = conn_key_hash(&conn->key, basis);
+
+    HMAP_FOR_EACH_WITH_HASH (nat_conn_key_node, node, nat_conn_key_hash, nat_conn_keys) {
+        if (!memcmp(&nat_conn_key_node->key, &conn->key, sizeof nat_conn_key_node->key)) {
+            hmap_remove(nat_conn_keys, &nat_conn_key_node->node);
+            free(nat_conn_key_node);
+            return;
+        }
+    }
+}
+
 static void
 conn_key_lookup(struct conntrack_bucket *ctb,
                 struct conn_lookup_ctx *ctx,
@@ -1035,13 +1590,13 @@ conn_key_lookup(struct conntrack_bucket *ctb,
     ctx->conn = NULL;
 
     HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
-        if (!memcmp(&conn->key, &ctx->key, sizeof(conn->key))
+        if (!memcmp(&conn->key, &ctx->key, sizeof conn->key)
                 && !conn_expired(conn, now)) {
             ctx->conn = conn;
             ctx->reply = false;
             break;
         }
-        if (!memcmp(&conn->rev_key, &ctx->key, sizeof(conn->rev_key))
+        if (!memcmp(&conn->rev_key, &ctx->key, sizeof conn->rev_key)
                 && !conn_expired(conn, now)) {
             ctx->conn = conn;
             ctx->reply = true;
@@ -1061,7 +1616,10 @@ conn_update(struct conn *conn, struct conntrack_bucket *ctb,
 static bool
 conn_expired(struct conn *conn, long long now)
 {
-    return now >= conn->expiration;
+	if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+        return now >= conn->expiration;
+	}
+	return false;
 }
 
 static bool
@@ -1088,6 +1646,7 @@ new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt,
 static void
 delete_conn(struct conn *conn)
 {
+	free(conn->nat_info);
     free(conn);
 }
 
@@ -1140,7 +1699,7 @@ conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry,
     entry->zone = conn->key.zone;
     entry->mark = conn->mark;
 
-    memcpy(&entry->labels, &conn->label, sizeof(entry->labels));
+    memcpy(&entry->labels, &conn->label, sizeof entry->labels);
     /* Not implemented yet */
     entry->timestamp.start = 0;
     entry->timestamp.stop = 0;
@@ -1187,7 +1746,8 @@ conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
                 break;
             }
             INIT_CONTAINER(conn, node, node);
-            if (!dump->filter_zone || conn->key.zone == dump->zone) {
+            if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
+                 (conn->conn_type != CT_CONN_TYPE_UN_NAT)){
                 conn_to_ct_dpif_entry(conn, entry, now);
                 break;
             }
@@ -1223,14 +1783,21 @@ conntrack_flush(struct conntrack *ct, const uint16_t *zone)
         ct_lock_lock(&ct->buckets[i].lock);
         HMAP_FOR_EACH_SAFE(conn, next, node, &ct->buckets[i].connections) {
             if (!zone || *zone == conn->key.zone) {
-                ovs_list_remove(&conn->exp_node);
+                if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+                    ovs_list_remove(&conn->exp_node);
+                }
                 hmap_remove(&ct->buckets[i].connections, &conn->node);
-                atomic_count_dec(&ct->n_conn);
-                delete_conn(conn);
+                if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
+                    atomic_count_dec(&ct->n_conn);
+                }
+                if (conn->nat_info) {
+                    nat_clean(ct, conn, &ct->buckets[i]);
+                } else {
+                    delete_conn(conn);
+                }
             }
         }
         ct_lock_unlock(&ct->buckets[i].lock);
     }
-
     return 0;
 }
diff --git a/lib/conntrack.h b/lib/conntrack.h
index 254f61c..411efd5 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -26,6 +26,8 @@
 #include "openvswitch/thread.h"
 #include "openvswitch/types.h"
 #include "ovs-atomic.h"
+#include "ovs-thread.h"
+#include "packets.h"
 
 /* Userspace connection tracker
  * ============================
@@ -61,6 +63,32 @@ struct dp_packet_batch;
 
 struct conntrack;
 
+struct ct_addr {
+    union {
+        ovs_16aligned_be32 ipv4;
+        union ovs_16aligned_in6_addr ipv6;
+        ovs_be32 ipv4_aligned;
+        struct in6_addr ipv6_aligned;
+    };
+};
+
+// Both NAT_ACTION_* and NAT_ACTION_*_PORT can be set
+enum nat_action_e {
+	NAT_ACTION = 1 << 0,
+	NAT_ACTION_SRC = 1 << 1,
+	NAT_ACTION_SRC_PORT = 1 << 2,
+	NAT_ACTION_DST = 1 << 3,
+	NAT_ACTION_DST_PORT = 1 << 4,
+};
+
+struct nat_action_info_t {
+	struct ct_addr min_addr;
+	struct ct_addr max_addr;
+	uint16_t min_port;
+	uint16_t max_port;
+    uint16_t nat_action;
+};
+
 void conntrack_init(struct conntrack *);
 void conntrack_destroy(struct conntrack *);
 
@@ -68,7 +96,8 @@ int conntrack_execute(struct conntrack *, struct dp_packet_batch *,
                       ovs_be16 dl_type, bool commit,
                       uint16_t zone, const uint32_t *setmark,
                       const struct ovs_key_ct_labels *setlabel,
-                      const char *helper);
+                      const char *helper,
+                      const struct nat_action_info_t *nat_action_info);
 
 struct conntrack_dump {
     struct conntrack *ct;
@@ -87,13 +116,17 @@ int conntrack_dump_done(struct conntrack_dump *);
 
 int conntrack_flush(struct conntrack *, const uint16_t *zone);
 
-/* 'struct ct_lock' is a wrapper for an adaptive mutex.  It's useful to try
- * different types of locks (e.g. spinlocks) */
+/* 'struct ct_lock' is a wrapper for an adaptive or regular  mutex.
+ * It's useful to try different types of locks (e.g. spinlocks) */
 
 struct OVS_LOCKABLE ct_lock {
     struct ovs_mutex lock;
 };
 
+struct OVS_LOCKABLE ct_rwlock {
+    struct ovs_rwlock lock;
+};
+
 static inline void ct_lock_init(struct ct_lock *lock)
 {
     ovs_mutex_init_adaptive(&lock->lock);
@@ -117,6 +150,38 @@ static inline void ct_lock_destroy(struct ct_lock *lock)
 {
     ovs_mutex_destroy(&lock->lock);
 }
+
+static inline void ct_rwlock_init(struct ct_rwlock *lock)
+{
+    ovs_rwlock_init(&lock->lock);
+}
+
+static inline void ct_rwlock_wrlock(struct ct_rwlock *lock)
+    OVS_ACQUIRES(lock)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+	ovs_rwlock_wrlock(&lock->lock);
+}
+
+static inline void ct_rwlock_rdlock(struct ct_rwlock *lock)
+    OVS_ACQUIRES(lock)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+	ovs_rwlock_rdlock(&lock->lock);
+}
+
+static inline void ct_rwlock_unlock(struct ct_rwlock *lock)
+    OVS_RELEASES(lock)
+    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+	ovs_rwlock_unlock(&lock->lock);
+}
+
+static inline void ct_rwlock_destroy(struct ct_rwlock *lock)
+{
+	ovs_rwlock_destroy(&lock->lock);
+}
+
 
 /* Timeouts: all the possible timeout states passed to update_expiration()
  * are listed here. The name will be prefix by CT_TM_ and the value is in
@@ -162,8 +227,9 @@ enum ct_timeout {
  *
  * */
 struct conntrack_bucket {
-    /* Protects 'connections' and 'exp_lists'.  Used in the fast path */
+    /* 'lock' protects 'connections' and 'exp_lists'. */
     struct ct_lock lock;
+
     /* Contains the connections in the bucket, indexed by 'struct conn_key' */
     struct hmap connections OVS_GUARDED;
     /* For each possible timeout we have a list of connections. When the
@@ -176,6 +242,7 @@ struct conntrack_bucket {
      * performing the cleanup. */
     struct ovs_mutex cleanup_mutex;
     long long next_cleanup OVS_GUARDED;
+
 };
 
 #define CONNTRACK_BUCKETS_SHIFT 8
@@ -199,6 +266,12 @@ struct conntrack {
     /* Connections limit. When this limit is reached, no new connection
      * will be accepted. */
     atomic_uint n_conn_limit;
+
+    /* The following resources are referenced during nat connection
+     * creation and deletion */
+    struct hmap nat_conn_keys OVS_GUARDED;
+    struct ct_rwlock nat_resources_lock;
+
 };
 
 #endif /* conntrack.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3901129..a71c766 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -97,7 +97,8 @@ static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
 static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600);
 
 #define DP_NETDEV_CS_SUPPORTED_MASK (CS_NEW | CS_ESTABLISHED | CS_RELATED \
-                                     | CS_INVALID | CS_REPLY_DIR | CS_TRACKED)
+                                     | CS_INVALID | CS_REPLY_DIR | CS_TRACKED \
+                                     | CS_SRC_NAT | CS_DST_NAT)
 #define DP_NETDEV_CS_UNSUPPORTED_MASK (~(uint32_t)DP_NETDEV_CS_SUPPORTED_MASK)
 
 static struct odp_support dp_netdev_support = {
@@ -4681,7 +4682,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         const char *helper = NULL;
         const uint32_t *setmark = NULL;
         const struct ovs_key_ct_labels *setlabel = NULL;
-
+        struct nat_action_info_t nat_action_info;
+        bool nat = false;
+        memset(&nat_action_info, 0, sizeof nat_action_info);
         NL_ATTR_FOR_EACH_UNSAFE (b, left, nl_attr_get(a),
                                  nl_attr_get_size(a)) {
             enum ovs_ct_attr sub_type = nl_attr_type(b);
@@ -4702,15 +4705,89 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
             case OVS_CT_ATTR_LABELS:
                 setlabel = nl_attr_get(b);
                 break;
-            case OVS_CT_ATTR_NAT:
+            case OVS_CT_ATTR_NAT: {
+                const struct nlattr *b_nest;
+                unsigned int left_nest;
+                bool ip_min_specified = false;
+                bool proto_num_min_specified = false;
+                bool ip_max_specified = false;
+                bool proto_num_max_specified = false;
+
+                NL_NESTED_FOR_EACH_UNSAFE (b_nest, left_nest, b) {
+                    enum ovs_nat_attr sub_type_nest = nl_attr_type(b_nest);
+
+                    switch(sub_type_nest) {
+                    case OVS_NAT_ATTR_SRC:
+                    case OVS_NAT_ATTR_DST:
+                        nat = true;
+                        nat_action_info.nat_action |= NAT_ACTION;
+                        nat_action_info.nat_action |=
+                            ((sub_type_nest == OVS_NAT_ATTR_SRC)
+                                ? NAT_ACTION_SRC : NAT_ACTION_DST);
+                        break;
+                    case OVS_NAT_ATTR_IP_MIN:
+                        memcpy(&nat_action_info.min_addr,
+                           (char *) b_nest + NLA_HDRLEN, b_nest->nla_len);
+                        ip_min_specified = true;
+                        break;
+                    case OVS_NAT_ATTR_IP_MAX:
+                        memcpy(&nat_action_info.max_addr,
+                            (char *) b_nest + NLA_HDRLEN, b_nest->nla_len);
+                        ip_max_specified = true;
+                        break;
+                    case OVS_NAT_ATTR_PROTO_MIN:
+                        nat_action_info.min_port = nl_attr_get_u16(b_nest);
+                        proto_num_min_specified = true;
+                        break;
+                    case OVS_NAT_ATTR_PROTO_MAX:
+                        nat_action_info.max_port = nl_attr_get_u16(b_nest);
+                        proto_num_max_specified = true;
+                        break;
+                    case OVS_NAT_ATTR_PERSISTENT:
+                    case OVS_NAT_ATTR_PROTO_HASH:
+                    case OVS_NAT_ATTR_PROTO_RANDOM:
+                        break;
+                    case OVS_NAT_ATTR_UNSPEC:
+                    case __OVS_NAT_ATTR_MAX:
+                        OVS_NOT_REACHED();
+                    }
+                }
+
+                if (!nat_action_info.nat_action) {
+                    nat_action_info.nat_action = NAT_ACTION;
+                }
+                if (ip_min_specified && !ip_max_specified) {
+                    memcpy(&nat_action_info.max_addr,
+                           &nat_action_info.min_addr,
+                           sizeof(nat_action_info.max_addr));
+                }
+                if (proto_num_min_specified && !proto_num_max_specified) {
+                    nat_action_info.max_port = nat_action_info.min_port;
+                }
+                if (proto_num_min_specified || proto_num_max_specified) {
+                    if (nat_action_info.nat_action & NAT_ACTION_SRC) {
+                        nat_action_info.nat_action |= NAT_ACTION_SRC_PORT;
+                    } else if (nat_action_info.nat_action & NAT_ACTION_DST) {
+                        nat_action_info.nat_action |= NAT_ACTION_DST_PORT;
+                    }
+                }
+
+                /* Additional sanity checks can be added. */
+                break;
+            }
+
             case OVS_CT_ATTR_UNSPEC:
             case __OVS_CT_ATTR_MAX:
                 OVS_NOT_REACHED();
             }
         }
 
+	    if (nat && !commit) {
+            VLOG_WARN("NAT specified without commit.");
+	    }
+
         conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, commit,
-                          zone, setmark, setlabel, helper);
+                          zone, setmark, setlabel, helper, &nat_action_info);
         break;
     }
 
diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index 803e2b9..a8b2e48 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -91,7 +91,8 @@ ct_thread_main(void *aux_)
     pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, &dl_type);
     ovs_barrier_block(&barrier);
     for (i = 0; i < n_pkts; i += batch_size) {
-        conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL, NULL);
+        conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL,
+                          NULL, NULL);
     }
     ovs_barrier_block(&barrier);
     destroy_packets(pkt_batch);
@@ -176,14 +177,15 @@ pcap_batch_execute_conntrack(struct conntrack *ct,
 
         if (flow.dl_type != dl_type) {
             conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL,
-                              NULL);
+                              NULL, NULL);
             dp_packet_batch_init(&new_batch);
         }
         new_batch.packets[new_batch.count++] = pkt;
     }
 
     if (new_batch.count) {
-        conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL, NULL);
+        conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL,
+                          NULL, NULL);
     }
 
 }
-- 
1.9.1



More information about the dev mailing list