[ovs-dev] [PATCH 1/4] conntrack: remove nat_conn
Aaron Conole
aconole at redhat.com
Wed Feb 24 17:37:13 UTC 2021
"hepeng.0320" <hepeng.0320 at bytedance.com> writes:
> From: hepeng <hepeng.0320 at bytedance.com>
>
> Currently, when doing NAT, the userspace conntrack will use an extra
> conn for the two directions in a flow. However, each conn has actually
> the two keys for both orig and rev flow directions. This patch
> introduces a conn_dir member in the conn and it consists of both rev and
> orig cmap_node for hash lookup. This saves extra allocation for
> nat_conn, and makes userspace code much cleaner.
If I'm understanding this correctly, you still re-insert the conn into
the conn list?
> We also introduces a conn_flags member to reduce the memory footprint of a
> conn.
We should split this out to a separate patch.
> Signed-off-by: Peng He <hepeng.0320 at bytedance.com>
> ---
> lib/conntrack-private.h | 20 +--
> lib/conntrack.c | 264 ++++++++++++++++------------------------
> 2 files changed, 121 insertions(+), 163 deletions(-)
>
> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> index 789af82ff..6bec43d3f 100644
> --- a/lib/conntrack-private.h
> +++ b/lib/conntrack-private.h
> @@ -83,21 +83,23 @@ struct alg_exp_node {
> bool nat_rpl_dst;
> };
>
> -enum OVS_PACKED_ENUM ct_conn_type {
> - CT_CONN_TYPE_DEFAULT,
> - CT_CONN_TYPE_UN_NAT,
> +#define CONN_FLAG_NAT_MASK 0xf
> +
> +struct conn_dir {
> + struct cmap_node cm_node;
> + bool orig;
This naming is confusing. We can have 'conn->orig->orig'. Consider
renaming one of these fields to distinguish what is going on. I would
prefer seeing 'fwd' used (since it's the forward direction).
> };
>
> struct conn {
> /* Immutable data. */
> struct conn_key key;
> + struct conn_dir orig;
> struct conn_key rev_key;
> + struct conn_dir rev;
I think this might be better if setup like:
+enum ct_direction {
+ CT_DIRECTION_FWD,
+ CT_DIRECTION_REV,
+ CT_DIRECTIONS
+}
+
+struct conn_data {
+ struct conn_key key;
+ struct cmap_node cm_node;
};
struct conn {
- struct conn_key key;
- struct conn_key rev_key;
+ struct conn_data cn_data[CT_DIRECTIONS];
...
Then in the code, we can always get orig and rev information:
conn->cn_data[CT_DIRECTION_FWD]
conn->cn_data[CT_DIRECTION_REV]
Did I miss something?
> struct conn_key parent_key; /* Only used for orig_tuple support. */
> struct ovs_list exp_node;
> - struct cmap_node cm_node;
> - struct nat_action_info_t *nat_info;
> + uint64_t conn_flags;
As mentioned, please separate this as a different patch. This is new
piece of functionality.
> char *alg;
> - struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
>
> /* Mutable data. */
> struct ovs_mutex lock; /* Guards all mutable fields. */
> @@ -117,11 +119,15 @@ struct conn {
>
> /* Immutable data. */
> bool alg_related; /* True if alg data connection. */
> - enum ct_conn_type conn_type;
>
> uint32_t tp_id; /* Timeout policy ID. */
> };
>
> +static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
> + return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
> + CONTAINER_OF(conndir, struct conn, rev);
> +}
> +
> enum ct_update_res {
> CT_UPDATE_INVALID,
> CT_UPDATE_VALID,
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 930ed0be6..73d3a2fb2 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -94,7 +94,6 @@ static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
> uint32_t tp_id);
> static void delete_conn_cmn(struct conn *);
> static void delete_conn(struct conn *);
> -static void delete_conn_one(struct conn *conn);
> static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
> struct dp_packet *pkt,
> struct conn_lookup_ctx *ctx,
> @@ -108,8 +107,8 @@ static void set_label(struct dp_packet *, struct conn *,
> static void *clean_thread_main(void *f_);
>
> static bool
> -nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> - struct conn *nat_conn);
> +nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
> + const struct nat_action_info_t *nat_action);
>
> static uint8_t
> reverse_icmp_type(uint8_t type);
> @@ -233,6 +232,7 @@ conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2)
> return 1;
> }
>
> +#if 0
Please remove functions which are no longer used.
> static void
> ct_print_conn_info(const struct conn *c, const char *log_msg,
> enum vlog_level vll, bool force, bool rl_on)
> @@ -287,6 +287,7 @@ ct_print_conn_info(const struct conn *c, const char *log_msg,
> }
> }
> }
> +#endif
>
> /* Initializes the connection tracker 'ct'. The caller is responsible for
> * calling 'conntrack_destroy()', when the instance is not needed anymore */
> @@ -437,7 +438,7 @@ conn_clean_cmn(struct conntrack *ct, struct conn *conn)
> }
>
> uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
> - cmap_remove(&ct->conns, &conn->cm_node, hash);
> + cmap_remove(&ct->conns, &conn->orig.cm_node, hash);
>
> struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
> if (zl && zl->czl.zone_limit_seq == conn->zone_limit_seq) {
> @@ -451,12 +452,10 @@ static void
> conn_clean(struct conntrack *ct, struct conn *conn)
> OVS_REQUIRES(ct->ct_lock)
> {
> - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> -
> conn_clean_cmn(ct, conn);
> - if (conn->nat_conn) {
> - uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis);
> - cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
> + if (conn->conn_flags & CONN_FLAG_NAT_MASK) {
> + uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> + cmap_remove(&ct->conns, &conn->rev.cm_node, hash);
> }
> ovs_list_remove(&conn->exp_node);
> conn->cleaned = true;
> @@ -464,19 +463,6 @@ conn_clean(struct conntrack *ct, struct conn *conn)
> atomic_count_dec(&ct->n_conn);
> }
>
> -static void
> -conn_clean_one(struct conntrack *ct, struct conn *conn)
> - OVS_REQUIRES(ct->ct_lock)
> -{
> - conn_clean_cmn(ct, conn);
> - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> - ovs_list_remove(&conn->exp_node);
> - conn->cleaned = true;
> - atomic_count_dec(&ct->n_conn);
> - }
> - ovsrcu_postpone(delete_conn_one, conn);
> -}
> -
> /* Destroys the connection tracker 'ct' and frees all the allocated memory.
> * The caller of this function must already have shut down packet input
> * and PMD threads (which would have been quiesced). */
> @@ -488,9 +474,14 @@ conntrack_destroy(struct conntrack *ct)
> pthread_join(ct->clean_thread, NULL);
> latch_destroy(&ct->clean_thread_exit);
>
> + struct conn_dir *conndir;
> ovs_mutex_lock(&ct->ct_lock);
> - CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
> - conn_clean_one(ct, conn);
> + CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
> + if (!conndir->orig) {
> + continue;
> + }
> + conn = conn_from_conndir(conndir);
> + conn_clean(ct, conn);
> }
> cmap_destroy(&ct->conns);
>
> @@ -530,9 +521,11 @@ conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
> bool *reply)
> {
> struct conn *conn;
> + struct conn_dir *conndir;
> bool found = false;
>
> - CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) {
> + CMAP_FOR_EACH_WITH_HASH (conndir, cm_node, hash, &ct->conns) {
> + conn = conn_from_conndir(conndir);
> if (!conn_key_cmp(&conn->key, key) && !conn_expired(conn, now)) {
> found = true;
> if (reply) {
> @@ -710,7 +703,7 @@ handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
> static void
> pat_packet(struct dp_packet *pkt, const struct conn *conn)
> {
> - if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> + if (conn->conn_flags & NAT_ACTION_SRC) {
> if (conn->key.nw_proto == IPPROTO_TCP) {
> struct tcp_header *th = dp_packet_l4(pkt);
> packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
> @@ -718,7 +711,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
> struct udp_header *uh = dp_packet_l4(pkt);
> packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
> }
> - } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (conn->conn_flags & NAT_ACTION_DST) {
> if (conn->key.nw_proto == IPPROTO_TCP) {
> struct tcp_header *th = dp_packet_l4(pkt);
> packet_set_tcp_port(pkt, th->tcp_src, conn->rev_key.src.port);
> @@ -732,7 +725,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
> static void
> nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> {
> - if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> + if (conn->conn_flags & NAT_ACTION_SRC) {
> pkt->md.ct_state |= CS_SRC_NAT;
> if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> struct ip_header *nh = dp_packet_l3(pkt);
> @@ -747,7 +740,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> if (!related) {
> pat_packet(pkt, conn);
> }
> - } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (conn->conn_flags & NAT_ACTION_DST) {
> pkt->md.ct_state |= CS_DST_NAT;
> if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> struct ip_header *nh = dp_packet_l3(pkt);
> @@ -768,7 +761,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> static void
> un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> {
> - if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> + if (conn->conn_flags & NAT_ACTION_SRC) {
> if (conn->key.nw_proto == IPPROTO_TCP) {
> struct tcp_header *th = dp_packet_l4(pkt);
> packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
> @@ -776,7 +769,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> struct udp_header *uh = dp_packet_l4(pkt);
> packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
> }
> - } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (conn->conn_flags & NAT_ACTION_DST) {
> if (conn->key.nw_proto == IPPROTO_TCP) {
> struct tcp_header *th = dp_packet_l4(pkt);
> packet_set_tcp_port(pkt, conn->key.dst.port, th->tcp_dst);
> @@ -790,7 +783,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> static void
> reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> {
> - if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> + if (conn->conn_flags & NAT_ACTION_SRC) {
> if (conn->key.nw_proto == IPPROTO_TCP) {
> struct tcp_header *th_in = dp_packet_l4(pkt);
> packet_set_tcp_port(pkt, conn->key.src.port,
> @@ -800,7 +793,7 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> packet_set_udp_port(pkt, conn->key.src.port,
> uh_in->udp_dst);
> }
> - } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (conn->conn_flags & NAT_ACTION_DST) {
> if (conn->key.nw_proto == IPPROTO_TCP) {
> struct tcp_header *th_in = dp_packet_l4(pkt);
> packet_set_tcp_port(pkt, th_in->tcp_src,
> @@ -834,10 +827,10 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
> pkt->l4_ofs += inner_l4 - (char *) icmp;
>
> - if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> + if (conn->conn_flags & NAT_ACTION_SRC) {
> packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
> conn->key.src.addr.ipv4);
> - } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (conn->conn_flags & NAT_ACTION_DST) {
> packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
> conn->key.dst.addr.ipv4);
> }
> @@ -858,11 +851,11 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
> pkt->l4_ofs += inner_l4 - (char *) icmp6;
>
> - if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> + if (conn->conn_flags & NAT_ACTION_SRC) {
> packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> inner_l3_6->ip6_src.be32,
> &conn->key.src.addr.ipv6, true);
> - } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (conn->conn_flags & NAT_ACTION_DST) {
> packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> inner_l3_6->ip6_dst.be32,
> &conn->key.dst.addr.ipv6, true);
> @@ -877,10 +870,9 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> }
>
> static void
> -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> - bool related)
> +un_nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> {
> - if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> + if (conn->conn_flags & NAT_ACTION_SRC) {
> pkt->md.ct_state |= CS_DST_NAT;
> if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> struct ip_header *nh = dp_packet_l3(pkt);
> @@ -898,7 +890,7 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> } else {
> un_pat_packet(pkt, conn);
> }
> - } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (conn->conn_flags & NAT_ACTION_DST) {
> pkt->md.ct_state |= CS_SRC_NAT;
> if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> struct ip_header *nh = dp_packet_l3(pkt);
> @@ -964,7 +956,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> OVS_REQUIRES(ct->ct_lock)
> {
> struct conn *nc = NULL;
> - struct conn *nat_conn = NULL;
>
> if (!valid_new(pkt, &ctx->key)) {
> pkt->md.ct_state = CS_INVALID;
> @@ -995,6 +986,8 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> memcpy(&nc->key, &ctx->key, sizeof nc->key);
> memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
> conn_key_reverse(&nc->rev_key);
> + nc->orig.orig = true;
> + nc->rev.orig = !nc->orig.orig;
>
> if (ct_verify_helper(helper, ct_alg_ctl)) {
> nc->alg = nullable_xstrdup(helper);
> @@ -1008,45 +1001,29 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> }
>
> if (nat_action_info) {
> - nc->nat_info = xmemdup(nat_action_info, sizeof *nc->nat_info);
> - nat_conn = xzalloc(sizeof *nat_conn);
> -
> if (alg_exp) {
> if (alg_exp->nat_rpl_dst) {
> nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr;
> - nc->nat_info->nat_action = NAT_ACTION_SRC;
> + nc->conn_flags |= NAT_ACTION_SRC;
> } else {
> nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr;
> - nc->nat_info->nat_action = NAT_ACTION_DST;
> + nc->conn_flags |= NAT_ACTION_DST;
> }
> } else {
> - memcpy(nat_conn, nc, sizeof *nat_conn);
> - bool nat_res = nat_select_range_tuple(ct, nc, nat_conn);
> + bool nat_res = nat_select_range_tuple(ct, nc, nat_action_info);
>
> if (!nat_res) {
> goto nat_res_exhaustion;
> }
> -
> - /* Update nc with nat adjustments made to nat_conn by
> - * nat_select_range_tuple(). */
> - memcpy(nc, nat_conn, sizeof *nc);
> }
>
> nat_packet(pkt, nc, ctx->icmp_related);
> - memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
> - memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
> - nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
> - nat_conn->nat_info = NULL;
> - nat_conn->alg = NULL;
> - nat_conn->nat_conn = NULL;
> - uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis);
> - cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
> + uint32_t nat_hash = conn_key_hash(&nc->rev_key, ct->hash_basis);
> + cmap_insert(&ct->conns, &nc->rev.cm_node, nat_hash);
> }
>
> - nc->nat_conn = nat_conn;
> ovs_mutex_init_adaptive(&nc->lock);
> - nc->conn_type = CT_CONN_TYPE_DEFAULT;
> - cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
> + cmap_insert(&ct->conns, &nc->orig.cm_node, ctx->hash);
> atomic_count_inc(&ct->n_conn);
> ctx->conn = nc; /* For completeness. */
> if (zl) {
> @@ -1066,7 +1043,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> * firewall rules or a separate firewall. Also using zone partitioning
> * can limit DoS impact. */
> nat_res_exhaustion:
> - free(nat_conn);
> ovs_list_remove(&nc->exp_node);
> delete_conn_cmn(nc);
> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
> @@ -1080,7 +1056,6 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
> struct conn_lookup_ctx *ctx, struct conn *conn,
> long long now)
> {
> - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> bool create_new_conn = false;
>
> if (ctx->icmp_related) {
> @@ -1128,7 +1103,7 @@ static void
> handle_nat(struct dp_packet *pkt, struct conn *conn,
> uint16_t zone, bool reply, bool related)
> {
> - if (conn->nat_info &&
> + if (conn->conn_flags & CONN_FLAG_NAT_MASK &&
> (!(pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) ||
> (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT) &&
> zone != pkt->md.ct_zone))) {
> @@ -1301,25 +1276,6 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
> conn = NULL;
> }
>
> - if (OVS_LIKELY(conn)) {
> - if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> -
> - ctx->reply = true;
> - struct conn *rev_conn = conn; /* Save for debugging. */
> - uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> - conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply);
> -
> - if (!conn) {
> - pkt->md.ct_state |= CS_INVALID;
> - write_ct_md(pkt, zone, NULL, NULL, NULL);
> - char *log_msg = xasprintf("Missing parent conn %p", rev_conn);
> - ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true);
> - free(log_msg);
> - return;
> - }
> - }
> - }
> -
> enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst,
> helper);
>
> @@ -2114,12 +2070,13 @@ conn_key_reverse(struct conn_key *key)
> }
>
> static uint32_t
> -nat_ipv6_addrs_delta(struct in6_addr *ipv6_min, struct in6_addr *ipv6_max)
> +nat_ipv6_addrs_delta(const struct in6_addr *ipv6_min,
> + const struct in6_addr *ipv6_max)
> {
> - uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
> - uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] + sizeof(uint64_t);
> - uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
> - uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
> + const uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
> + const uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] + sizeof(uint64_t);
> + const uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
> + const uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
Why these const changes now? This should be a separate patch.
> ovs_be64 addr6_64_min_hi;
> ovs_be64 addr6_64_min_lo;
> @@ -2180,15 +2137,17 @@ nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t increment)
> }
>
> static uint32_t
> -nat_range_hash(const struct conn *conn, uint32_t basis)
> +nat_range_hash(const struct conn *conn,
> + const struct nat_action_info_t *nat_info,
> + uint32_t basis)
> {
> uint32_t hash = basis;
>
> - hash = ct_addr_hash_add(hash, &conn->nat_info->min_addr);
> - hash = ct_addr_hash_add(hash, &conn->nat_info->max_addr);
> + hash = ct_addr_hash_add(hash, &nat_info->min_addr);
> + hash = ct_addr_hash_add(hash, &nat_info->max_addr);
> hash = hash_add(hash,
> - (conn->nat_info->max_port << 16)
> - | conn->nat_info->min_port);
> + (nat_info->max_port << 16)
> + | nat_info->min_port);
> hash = ct_endpoint_hash_add(hash, &conn->key.src);
> hash = ct_endpoint_hash_add(hash, &conn->key.dst);
> hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type);
> @@ -2202,8 +2161,8 @@ nat_range_hash(const struct conn *conn, uint32_t basis)
> }
>
> static bool
> -nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> - struct conn *nat_conn)
> +nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
> + const struct nat_action_info_t *nat_info)
> {
> enum { MIN_NAT_EPHEMERAL_PORT = 1024,
> MAX_NAT_EPHEMERAL_PORT = 65535 };
> @@ -2211,25 +2170,26 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> uint16_t min_port;
> uint16_t max_port;
> uint16_t first_port;
> - uint32_t hash = nat_range_hash(conn, ct->hash_basis);
> + uint32_t hash = nat_range_hash(conn, nat_info, ct->hash_basis);
>
> - if ((conn->nat_info->nat_action & NAT_ACTION_SRC) &&
> - (!(conn->nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
> + if ((nat_info->nat_action & NAT_ACTION_SRC) &&
> + (!(nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
> min_port = ntohs(conn->key.src.port);
> max_port = ntohs(conn->key.src.port);
> first_port = min_port;
> - } else if ((conn->nat_info->nat_action & NAT_ACTION_DST) &&
> - (!(conn->nat_info->nat_action & NAT_ACTION_DST_PORT))) {
> + } else if ((nat_info->nat_action & NAT_ACTION_DST) &&
> + (!(nat_info->nat_action & NAT_ACTION_DST_PORT))) {
> min_port = ntohs(conn->key.dst.port);
> max_port = ntohs(conn->key.dst.port);
> first_port = min_port;
> } else {
> - uint16_t deltap = conn->nat_info->max_port - conn->nat_info->min_port;
> + uint16_t deltap = nat_info->max_port - nat_info->min_port;
> uint32_t port_index = hash % (deltap + 1);
> - first_port = conn->nat_info->min_port + port_index;
> - min_port = conn->nat_info->min_port;
> - max_port = conn->nat_info->max_port;
> + first_port = nat_info->min_port + port_index;
> + min_port = nat_info->min_port;
> + max_port = nat_info->max_port;
> }
> + conn->conn_flags |= nat_info->nat_action;
>
> uint32_t deltaa = 0;
> uint32_t address_index;
> @@ -2237,25 +2197,25 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> memset(&ct_addr, 0, sizeof ct_addr);
> union ct_addr max_ct_addr;
> memset(&max_ct_addr, 0, sizeof max_ct_addr);
> - max_ct_addr = conn->nat_info->max_addr;
> + max_ct_addr = nat_info->max_addr;
>
> if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> - deltaa = ntohl(conn->nat_info->max_addr.ipv4) -
> - ntohl(conn->nat_info->min_addr.ipv4);
> + deltaa = ntohl(nat_info->max_addr.ipv4) -
> + ntohl(nat_info->min_addr.ipv4);
> address_index = hash % (deltaa + 1);
> ct_addr.ipv4 = htonl(
> - ntohl(conn->nat_info->min_addr.ipv4) + address_index);
> + ntohl(nat_info->min_addr.ipv4) + address_index);
> } else {
> - deltaa = nat_ipv6_addrs_delta(&conn->nat_info->min_addr.ipv6,
> - &conn->nat_info->max_addr.ipv6);
> + deltaa = nat_ipv6_addrs_delta(&nat_info->min_addr.ipv6,
> + &nat_info->max_addr.ipv6);
> /* deltaa must be within 32 bits for full hash coverage. A 64 or
> * 128 bit hash is unnecessary and hence not used here. Most code
> * is kept common with V4; nat_ipv6_addrs_delta() will do the
> * enforcement via max_ct_addr. */
> - max_ct_addr = conn->nat_info->min_addr;
> + max_ct_addr = nat_info->min_addr;
> nat_ipv6_addr_increment(&max_ct_addr.ipv6, deltaa);
> address_index = hash % (deltaa + 1);
> - ct_addr.ipv6 = conn->nat_info->min_addr.ipv6;
> + ct_addr.ipv6 = nat_info->min_addr.ipv6;
> nat_ipv6_addr_increment(&ct_addr.ipv6, address_index);
> }
>
> @@ -2263,27 +2223,27 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> bool all_ports_tried = false;
> /* For DNAT or for specified port ranges, we don't use ephemeral ports. */
> bool ephemeral_ports_tried
> - = conn->nat_info->nat_action & NAT_ACTION_DST ||
> - conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
> + = nat_info->nat_action & NAT_ACTION_DST ||
> + nat_info->nat_action & NAT_ACTION_SRC_PORT
> ? true : false;
> union ct_addr first_addr = ct_addr;
> bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
> conn->key.nw_proto != IPPROTO_ICMPV6;
>
> while (true) {
> - if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> - nat_conn->rev_key.dst.addr = ct_addr;
> + if (nat_info->nat_action & NAT_ACTION_SRC) {
> + conn->rev_key.dst.addr = ct_addr;
> if (pat_enabled) {
> - nat_conn->rev_key.dst.port = htons(port);
> + conn->rev_key.dst.port = htons(port);
> }
> } else {
> - nat_conn->rev_key.src.addr = ct_addr;
> + conn->rev_key.src.addr = ct_addr;
> if (pat_enabled) {
> - nat_conn->rev_key.src.port = htons(port);
> + conn->rev_key.src.port = htons(port);
> }
> }
>
> - bool found = conn_lookup(ct, &nat_conn->rev_key, time_msec(), NULL,
> + bool found = conn_lookup(ct, &conn->rev_key, time_msec(), NULL,
> NULL);
> if (!found) {
> return true;
> @@ -2306,12 +2266,12 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> nat_ipv6_addr_increment(&ct_addr.ipv6, 1);
> }
> } else {
> - ct_addr = conn->nat_info->min_addr;
> + ct_addr = nat_info->min_addr;
> }
> if (!memcmp(&ct_addr, &first_addr, sizeof ct_addr)) {
> if (pat_enabled && !ephemeral_ports_tried) {
> ephemeral_ports_tried = true;
> - ct_addr = conn->nat_info->min_addr;
> + ct_addr = nat_info->min_addr;
> first_addr = ct_addr;
> min_port = MIN_NAT_EPHEMERAL_PORT;
> max_port = MAX_NAT_EPHEMERAL_PORT;
> @@ -2324,6 +2284,7 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> all_ports_tried = false;
> }
> }
> + conn->conn_flags &= ~CONN_FLAG_NAT_MASK;
> return false;
> }
>
> @@ -2342,13 +2303,10 @@ conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
> static bool
> conn_expired(struct conn *conn, long long now)
> {
> - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> - ovs_mutex_lock(&conn->lock);
> - bool expired = now >= conn->expiration ? true : false;
> - ovs_mutex_unlock(&conn->lock);
> - return expired;
> - }
> - return false;
> + ovs_mutex_lock(&conn->lock);
> + bool expired = now >= conn->expiration ? true : false;
> + ovs_mutex_unlock(&conn->lock);
> + return expired;
> }
>
> static bool
> @@ -2367,7 +2325,6 @@ new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
> static void
> delete_conn_cmn(struct conn *conn)
> {
> - free(conn->nat_info);
> free(conn->alg);
> free(conn);
> }
> @@ -2375,22 +2332,10 @@ delete_conn_cmn(struct conn *conn)
> static void
> delete_conn(struct conn *conn)
> {
> - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> ovs_mutex_destroy(&conn->lock);
> - free(conn->nat_conn);
> delete_conn_cmn(conn);
> }
>
> -/* Only used by conn_clean_one(). */
> -static void
> -delete_conn_one(struct conn *conn)
> -{
> - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> - ovs_mutex_destroy(&conn->lock);
> - }
> - delete_conn_cmn(conn);
> -}
> -
> /* Convert a conntrack address 'a' into an IP address 'b' based on 'dl_type'.
> *
> * Note that 'dl_type' should be either "ETH_TYPE_IP" or "ETH_TYPE_IPv6"
> @@ -2539,10 +2484,14 @@ conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
> if (!cm_node) {
> break;
> }
> - struct conn *conn;
> - INIT_CONTAINER(conn, cm_node, cm_node);
> - if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
> - (conn->conn_type != CT_CONN_TYPE_UN_NAT)) {
> + struct conn_dir *conndir;
> + struct conn* conn;
> + INIT_CONTAINER(conndir, cm_node, cm_node);
> + if (!conndir->orig) {
> + continue;
> + }
> + conn = conn_from_conndir(conndir);
> + if ((!dump->filter_zone || conn->key.zone == dump->zone)) {
> conn_to_ct_dpif_entry(conn, entry, now);
> return 0;
> }
> @@ -2561,11 +2510,16 @@ int
> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> {
> struct conn *conn;
> + struct conn_dir *conndir;
>
> ovs_mutex_lock(&ct->ct_lock);
> - CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
> + CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
> + if (!conndir->orig) {
> + continue;
> + }
> + conn = conn_from_conndir(conndir);
> if (!zone || *zone == conn->key.zone) {
> - conn_clean_one(ct, conn);
> + conn_clean(ct, conn);
> }
> }
> ovs_mutex_unlock(&ct->ct_lock);
> @@ -2586,7 +2540,7 @@ conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
> ovs_mutex_lock(&ct->ct_lock);
> conn_lookup(ct, &key, time_msec(), &conn, NULL);
>
> - if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> + if (conn) {
> conn_clean(ct, conn);
> } else {
> VLOG_WARN("Must flush tuple using the original pre-NATed tuple");
> @@ -2744,8 +2698,7 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
> alg_exp_node->nat_rpl_dst = true;
> if (skip_nat) {
> alg_nat_repl_addr = dst_addr;
> - } else if (parent_conn->nat_info &&
> - parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
> alg_nat_repl_addr = parent_conn->rev_key.src.addr;
> alg_exp_node->nat_rpl_dst = false;
> } else {
> @@ -2757,8 +2710,7 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
> alg_exp_node->nat_rpl_dst = false;
> if (skip_nat) {
> alg_nat_repl_addr = src_addr;
> - } else if (parent_conn->nat_info &&
> - parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
> + } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
> alg_nat_repl_addr = parent_conn->key.dst.addr;
> alg_exp_node->nat_rpl_dst = true;
> } else {
More information about the dev
mailing list