[ovs-dev] [PATCH 1/4] conntrack: remove nat_conn

贺鹏 xnhp0320 at gmail.com
Sat Feb 27 09:49:17 UTC 2021


Hi,

Thanks for the detailed reviews.
These patches are like RFC to see if the work is interesting enough.

Aaron Conole <aconole at redhat.com> 于2021年2月25日周四 上午1:37写道:
>
> "hepeng.0320" <hepeng.0320 at bytedance.com> writes:
>
> > From: hepeng <hepeng.0320 at bytedance.com>
> >
> > Currently, when doing NAT, the userspace conntrack will use an extra
> > conn for the two directions in a flow. However, each conn has actually
> > the two keys for both orig and rev flow directions. This patch
> > introduces a conn_dir member in the conn and it consists of both rev and
> > orig cmap_node for hash lookup. This saves extra allocation for
> > nat_conn, and makes userspace code much cleaner.
>
> If I'm understanding this correctly, you still re-insert the conn into
> the conn list?


Yes. This is to insert the rev key into the cmap also. So for the nat traffic,
both orig and rev key are in the cmap.

>
> > We also introduces a conn_flags member to reduce the memory footprint of a
> > conn.
>
> We should split this out to a separate patch.
>
> > Signed-off-by: Peng He <hepeng.0320 at bytedance.com>
> > ---
> >  lib/conntrack-private.h |  20 +--
> >  lib/conntrack.c         | 264 ++++++++++++++++------------------------
> >  2 files changed, 121 insertions(+), 163 deletions(-)
> >
> > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> > index 789af82ff..6bec43d3f 100644
> > --- a/lib/conntrack-private.h
> > +++ b/lib/conntrack-private.h
> > @@ -83,21 +83,23 @@ struct alg_exp_node {
> >      bool nat_rpl_dst;
> >  };
> >
> > -enum OVS_PACKED_ENUM ct_conn_type {
> > -    CT_CONN_TYPE_DEFAULT,
> > -    CT_CONN_TYPE_UN_NAT,
> > +#define CONN_FLAG_NAT_MASK 0xf
> > +
> > +struct conn_dir {
> > +    struct cmap_node cm_node;
> > +    bool orig;
>
> This naming is confusing.  We can have 'conn->orig->orig'.  Consider
> renaming one of these fields to distinguish what is going on.  I would
> prefer seeing 'fwd' used (since it's the forward direction).

Yes, agree.

>
> >  };
> >
> >  struct conn {
> >      /* Immutable data. */
> >      struct conn_key key;
> > +    struct conn_dir orig;
> >      struct conn_key rev_key;
> > +    struct conn_dir rev;
>
> I think this might be better if setup like:
> +enum ct_direction {
> +    CT_DIRECTION_FWD,
> +    CT_DIRECTION_REV,
> +    CT_DIRECTIONS
> +}
> +
> +struct conn_data {
> +    struct conn_key key;
> +    struct cmap_node cm_node;
>  };
>
>  struct conn {
> -    struct conn_key key;
> -    struct conn_key rev_key;
> +    struct conn_data cn_data[CT_DIRECTIONS];
> ...
>
> Then in the code, we can always get orig and rev information:
>
>   conn->cn_data[CT_DIRECTION_FWD]
>   conn->cn_data[CT_DIRECTION_REV]
>
> Did I miss something?

Since both origin and rev are in the cmap, when you lookup the hash table,
you get a pointer to the  'middle data structure', in the patch, it is
the conn_dir.

However, you are not sure what you get is the origin dir or the rev dir. The key
is to use a variable stored in the conn_dir, like 'fwd' or 'orig'.
Then you know the dir,
by knowing the dir, you know the offset between your pointer and the pointer to
the conn, just like the belowing code:

static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
    return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
                           CONTAINER_OF(conndir, struct conn, rev);
}

In your case:

static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
    return conndir->orig ? CONTAINER_OF(conndir, struct conn, cn_data[FWD]) : \
                           CONTAINER_OF(conndir, struct conn, cn_data[REV]);
}

In the following patches I've changed the code of conn_dir as a part
of conn_key....



>
> >      struct conn_key parent_key; /* Only used for orig_tuple support. */
> >      struct ovs_list exp_node;
> > -    struct cmap_node cm_node;
> > -    struct nat_action_info_t *nat_info;
> > +    uint64_t conn_flags;
>
> As mentioned, please separate this as a different patch.  This is new
> piece of functionality.
>
> >      char *alg;
> > -    struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
> >
> >      /* Mutable data. */
> >      struct ovs_mutex lock; /* Guards all mutable fields. */
> > @@ -117,11 +119,15 @@ struct conn {
> >
> >      /* Immutable data. */
> >      bool alg_related; /* True if alg data connection. */
> > -    enum ct_conn_type conn_type;
> >
> >      uint32_t tp_id; /* Timeout policy ID. */
> >  };
> >
> > +static inline struct conn * conn_from_conndir(struct conn_dir *conndir) {
> > +    return conndir->orig ? CONTAINER_OF(conndir, struct conn, orig) : \
> > +                           CONTAINER_OF(conndir, struct conn, rev);
> > +}
> > +
> >  enum ct_update_res {
> >      CT_UPDATE_INVALID,
> >      CT_UPDATE_VALID,
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 930ed0be6..73d3a2fb2 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -94,7 +94,6 @@ static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
> >                               uint32_t tp_id);
> >  static void delete_conn_cmn(struct conn *);
> >  static void delete_conn(struct conn *);
> > -static void delete_conn_one(struct conn *conn);
> >  static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
> >                                        struct dp_packet *pkt,
> >                                        struct conn_lookup_ctx *ctx,
> > @@ -108,8 +107,8 @@ static void set_label(struct dp_packet *, struct conn *,
> >  static void *clean_thread_main(void *f_);
> >
> >  static bool
> > -nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> > -                       struct conn *nat_conn);
> > +nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
> > +                       const struct nat_action_info_t *nat_action);
> >
> >  static uint8_t
> >  reverse_icmp_type(uint8_t type);
> > @@ -233,6 +232,7 @@ conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2)
> >      return 1;
> >  }
> >
> > +#if 0
>
> Please remove functions which are no longer used.

OK.

>
> >  static void
> >  ct_print_conn_info(const struct conn *c, const char *log_msg,
> >                     enum vlog_level vll, bool force, bool rl_on)
> > @@ -287,6 +287,7 @@ ct_print_conn_info(const struct conn *c, const char *log_msg,
> >          }
> >      }
> >  }
> > +#endif
> >
> >  /* Initializes the connection tracker 'ct'.  The caller is responsible for
> >   * calling 'conntrack_destroy()', when the instance is not needed anymore */
> > @@ -437,7 +438,7 @@ conn_clean_cmn(struct conntrack *ct, struct conn *conn)
> >      }
> >
> >      uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
> > -    cmap_remove(&ct->conns, &conn->cm_node, hash);
> > +    cmap_remove(&ct->conns, &conn->orig.cm_node, hash);
> >
> >      struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
> >      if (zl && zl->czl.zone_limit_seq == conn->zone_limit_seq) {
> > @@ -451,12 +452,10 @@ static void
> >  conn_clean(struct conntrack *ct, struct conn *conn)
> >      OVS_REQUIRES(ct->ct_lock)
> >  {
> > -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> > -
> >      conn_clean_cmn(ct, conn);
> > -    if (conn->nat_conn) {
> > -        uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis);
> > -        cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
> > +    if (conn->conn_flags & CONN_FLAG_NAT_MASK) {
> > +        uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> > +        cmap_remove(&ct->conns, &conn->rev.cm_node, hash);
> >      }
> >      ovs_list_remove(&conn->exp_node);
> >      conn->cleaned = true;
> > @@ -464,19 +463,6 @@ conn_clean(struct conntrack *ct, struct conn *conn)
> >      atomic_count_dec(&ct->n_conn);
> >  }
> >
> > -static void
> > -conn_clean_one(struct conntrack *ct, struct conn *conn)
> > -    OVS_REQUIRES(ct->ct_lock)
> > -{
> > -    conn_clean_cmn(ct, conn);
> > -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > -        ovs_list_remove(&conn->exp_node);
> > -        conn->cleaned = true;
> > -        atomic_count_dec(&ct->n_conn);
> > -    }
> > -    ovsrcu_postpone(delete_conn_one, conn);
> > -}
> > -
> >  /* Destroys the connection tracker 'ct' and frees all the allocated memory.
> >   * The caller of this function must already have shut down packet input
> >   * and PMD threads (which would have been quiesced).  */
> > @@ -488,9 +474,14 @@ conntrack_destroy(struct conntrack *ct)
> >      pthread_join(ct->clean_thread, NULL);
> >      latch_destroy(&ct->clean_thread_exit);
> >
> > +    struct conn_dir *conndir;
> >      ovs_mutex_lock(&ct->ct_lock);
> > -    CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
> > -        conn_clean_one(ct, conn);
> > +    CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
> > +        if (!conndir->orig) {
> > +            continue;
> > +        }
> > +        conn = conn_from_conndir(conndir);
> > +        conn_clean(ct, conn);
> >      }
> >      cmap_destroy(&ct->conns);
> >
> > @@ -530,9 +521,11 @@ conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
> >                  bool *reply)
> >  {
> >      struct conn *conn;
> > +    struct conn_dir *conndir;
> >      bool found = false;
> >
> > -    CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) {
> > +    CMAP_FOR_EACH_WITH_HASH (conndir, cm_node, hash, &ct->conns) {
> > +        conn = conn_from_conndir(conndir);
> >          if (!conn_key_cmp(&conn->key, key) && !conn_expired(conn, now)) {
> >              found = true;
> >              if (reply) {
> > @@ -710,7 +703,7 @@ handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
> >  static void
> >  pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
> > @@ -718,7 +711,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >              struct udp_header *uh = dp_packet_l4(pkt);
> >              packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, th->tcp_src, conn->rev_key.src.port);
> > @@ -732,7 +725,7 @@ pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  static void
> >  nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          pkt->md.ct_state |= CS_SRC_NAT;
> >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >              struct ip_header *nh = dp_packet_l3(pkt);
> > @@ -747,7 +740,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> >          if (!related) {
> >              pat_packet(pkt, conn);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          pkt->md.ct_state |= CS_DST_NAT;
> >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >              struct ip_header *nh = dp_packet_l3(pkt);
> > @@ -768,7 +761,7 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> >  static void
> >  un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
> > @@ -776,7 +769,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >              struct udp_header *uh = dp_packet_l4(pkt);
> >              packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, conn->key.dst.port, th->tcp_dst);
> > @@ -790,7 +783,7 @@ un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  static void
> >  reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th_in = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, conn->key.src.port,
> > @@ -800,7 +793,7 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
> >              packet_set_udp_port(pkt, conn->key.src.port,
> >                                  uh_in->udp_dst);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          if (conn->key.nw_proto == IPPROTO_TCP) {
> >              struct tcp_header *th_in = dp_packet_l4(pkt);
> >              packet_set_tcp_port(pkt, th_in->tcp_src,
> > @@ -834,10 +827,10 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> >          pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
> >          pkt->l4_ofs += inner_l4 - (char *) icmp;
> >
> > -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +        if (conn->conn_flags & NAT_ACTION_SRC) {
> >              packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
> >                                   conn->key.src.addr.ipv4);
> > -        } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +        } else if (conn->conn_flags & NAT_ACTION_DST) {
> >              packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
> >                                   conn->key.dst.addr.ipv4);
> >          }
> > @@ -858,11 +851,11 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> >          pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
> >          pkt->l4_ofs += inner_l4 - (char *) icmp6;
> >
> > -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +        if (conn->conn_flags & NAT_ACTION_SRC) {
> >              packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >                                   inner_l3_6->ip6_src.be32,
> >                                   &conn->key.src.addr.ipv6, true);
> > -        } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +        } else if (conn->conn_flags & NAT_ACTION_DST) {
> >              packet_set_ipv6_addr(pkt, conn->key.nw_proto,
> >                                   inner_l3_6->ip6_dst.be32,
> >                                   &conn->key.dst.addr.ipv6, true);
> > @@ -877,10 +870,9 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
> >  }
> >
> >  static void
> > -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> > -              bool related)
> > +un_nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
> >  {
> > -    if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > +    if (conn->conn_flags & NAT_ACTION_SRC) {
> >          pkt->md.ct_state |= CS_DST_NAT;
> >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >              struct ip_header *nh = dp_packet_l3(pkt);
> > @@ -898,7 +890,7 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
> >          } else {
> >              un_pat_packet(pkt, conn);
> >          }
> > -    } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +    } else if (conn->conn_flags & NAT_ACTION_DST) {
> >          pkt->md.ct_state |= CS_SRC_NAT;
> >          if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> >              struct ip_header *nh = dp_packet_l3(pkt);
> > @@ -964,7 +956,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >      OVS_REQUIRES(ct->ct_lock)
> >  {
> >      struct conn *nc = NULL;
> > -    struct conn *nat_conn = NULL;
> >
> >      if (!valid_new(pkt, &ctx->key)) {
> >          pkt->md.ct_state = CS_INVALID;
> > @@ -995,6 +986,8 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >          memcpy(&nc->key, &ctx->key, sizeof nc->key);
> >          memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
> >          conn_key_reverse(&nc->rev_key);
> > +        nc->orig.orig = true;
> > +        nc->rev.orig = !nc->orig.orig;
> >
> >          if (ct_verify_helper(helper, ct_alg_ctl)) {
> >              nc->alg = nullable_xstrdup(helper);
> > @@ -1008,45 +1001,29 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >          }
> >
> >          if (nat_action_info) {
> > -            nc->nat_info = xmemdup(nat_action_info, sizeof *nc->nat_info);
> > -            nat_conn = xzalloc(sizeof *nat_conn);
> > -
> >              if (alg_exp) {
> >                  if (alg_exp->nat_rpl_dst) {
> >                      nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr;
> > -                    nc->nat_info->nat_action = NAT_ACTION_SRC;
> > +                    nc->conn_flags |= NAT_ACTION_SRC;
> >                  } else {
> >                      nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr;
> > -                    nc->nat_info->nat_action = NAT_ACTION_DST;
> > +                    nc->conn_flags |= NAT_ACTION_DST;
> >                  }
> >              } else {
> > -                memcpy(nat_conn, nc, sizeof *nat_conn);
> > -                bool nat_res = nat_select_range_tuple(ct, nc, nat_conn);
> > +                bool nat_res = nat_select_range_tuple(ct, nc, nat_action_info);
> >
> >                  if (!nat_res) {
> >                      goto nat_res_exhaustion;
> >                  }
> > -
> > -                /* Update nc with nat adjustments made to nat_conn by
> > -                 * nat_select_range_tuple(). */
> > -                memcpy(nc, nat_conn, sizeof *nc);
> >              }
> >
> >              nat_packet(pkt, nc, ctx->icmp_related);
> > -            memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
> > -            memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
> > -            nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
> > -            nat_conn->nat_info = NULL;
> > -            nat_conn->alg = NULL;
> > -            nat_conn->nat_conn = NULL;
> > -            uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis);
> > -            cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
> > +            uint32_t nat_hash = conn_key_hash(&nc->rev_key, ct->hash_basis);
> > +            cmap_insert(&ct->conns, &nc->rev.cm_node, nat_hash);
> >          }
> >
> > -        nc->nat_conn = nat_conn;
> >          ovs_mutex_init_adaptive(&nc->lock);
> > -        nc->conn_type = CT_CONN_TYPE_DEFAULT;
> > -        cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
> > +        cmap_insert(&ct->conns, &nc->orig.cm_node, ctx->hash);
> >          atomic_count_inc(&ct->n_conn);
> >          ctx->conn = nc; /* For completeness. */
> >          if (zl) {
> > @@ -1066,7 +1043,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> >       * firewall rules or a separate firewall.  Also using zone partitioning
> >       * can limit DoS impact. */
> >  nat_res_exhaustion:
> > -    free(nat_conn);
> >      ovs_list_remove(&nc->exp_node);
> >      delete_conn_cmn(nc);
> >      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
> > @@ -1080,7 +1056,6 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
> >                    struct conn_lookup_ctx *ctx, struct conn *conn,
> >                    long long now)
> >  {
> > -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> >      bool create_new_conn = false;
> >
> >      if (ctx->icmp_related) {
> > @@ -1128,7 +1103,7 @@ static void
> >  handle_nat(struct dp_packet *pkt, struct conn *conn,
> >             uint16_t zone, bool reply, bool related)
> >  {
> > -    if (conn->nat_info &&
> > +    if (conn->conn_flags & CONN_FLAG_NAT_MASK &&
> >          (!(pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) ||
> >            (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT) &&
> >             zone != pkt->md.ct_zone))) {
> > @@ -1301,25 +1276,6 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
> >          conn = NULL;
> >      }
> >
> > -    if (OVS_LIKELY(conn)) {
> > -        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> > -
> > -            ctx->reply = true;
> > -            struct conn *rev_conn = conn;  /* Save for debugging. */
> > -            uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> > -            conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply);
> > -
> > -            if (!conn) {
> > -                pkt->md.ct_state |= CS_INVALID;
> > -                write_ct_md(pkt, zone, NULL, NULL, NULL);
> > -                char *log_msg = xasprintf("Missing parent conn %p", rev_conn);
> > -                ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true);
> > -                free(log_msg);
> > -                return;
> > -            }
> > -        }
> > -    }
> > -
> >      enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst,
> >                                                         helper);
> >
> > @@ -2114,12 +2070,13 @@ conn_key_reverse(struct conn_key *key)
> >  }
> >
> >  static uint32_t
> > -nat_ipv6_addrs_delta(struct in6_addr *ipv6_min, struct in6_addr *ipv6_max)
> > +nat_ipv6_addrs_delta(const struct in6_addr *ipv6_min,
> > +                     const struct in6_addr *ipv6_max)
> >  {
> > -    uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
> > -    uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] +  sizeof(uint64_t);
> > -    uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
> > -    uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
> > +    const uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
> > +    const uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] +  sizeof(uint64_t);
> > +    const uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
> > +    const uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
>
> Why these const changes now?  This should be a separate patch.
>
> >      ovs_be64 addr6_64_min_hi;
> >      ovs_be64 addr6_64_min_lo;
> > @@ -2180,15 +2137,17 @@ nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t increment)
> >  }
> >
> >  static uint32_t
> > -nat_range_hash(const struct conn *conn, uint32_t basis)
> > +nat_range_hash(const struct conn *conn,
> > +               const struct nat_action_info_t *nat_info,
> > +               uint32_t basis)
> >  {
> >      uint32_t hash = basis;
> >
> > -    hash = ct_addr_hash_add(hash, &conn->nat_info->min_addr);
> > -    hash = ct_addr_hash_add(hash, &conn->nat_info->max_addr);
> > +    hash = ct_addr_hash_add(hash, &nat_info->min_addr);
> > +    hash = ct_addr_hash_add(hash, &nat_info->max_addr);
> >      hash = hash_add(hash,
> > -                    (conn->nat_info->max_port << 16)
> > -                    | conn->nat_info->min_port);
> > +                    (nat_info->max_port << 16)
> > +                    | nat_info->min_port);
> >      hash = ct_endpoint_hash_add(hash, &conn->key.src);
> >      hash = ct_endpoint_hash_add(hash, &conn->key.dst);
> >      hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type);
> > @@ -2202,8 +2161,8 @@ nat_range_hash(const struct conn *conn, uint32_t basis)
> >  }
> >
> >  static bool
> > -nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> > -                       struct conn *nat_conn)
> > +nat_select_range_tuple(struct conntrack *ct, struct conn *conn,
> > +                       const struct nat_action_info_t *nat_info)
> >  {
> >      enum { MIN_NAT_EPHEMERAL_PORT = 1024,
> >             MAX_NAT_EPHEMERAL_PORT = 65535 };
> > @@ -2211,25 +2170,26 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >      uint16_t min_port;
> >      uint16_t max_port;
> >      uint16_t first_port;
> > -    uint32_t hash = nat_range_hash(conn, ct->hash_basis);
> > +    uint32_t hash = nat_range_hash(conn, nat_info, ct->hash_basis);
> >
> > -    if ((conn->nat_info->nat_action & NAT_ACTION_SRC) &&
> > -        (!(conn->nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
> > +    if ((nat_info->nat_action & NAT_ACTION_SRC) &&
> > +        (!(nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
> >          min_port = ntohs(conn->key.src.port);
> >          max_port = ntohs(conn->key.src.port);
> >          first_port = min_port;
> > -    } else if ((conn->nat_info->nat_action & NAT_ACTION_DST) &&
> > -               (!(conn->nat_info->nat_action & NAT_ACTION_DST_PORT))) {
> > +    } else if ((nat_info->nat_action & NAT_ACTION_DST) &&
> > +               (!(nat_info->nat_action & NAT_ACTION_DST_PORT))) {
> >          min_port = ntohs(conn->key.dst.port);
> >          max_port = ntohs(conn->key.dst.port);
> >          first_port = min_port;
> >      } else {
> > -        uint16_t deltap = conn->nat_info->max_port - conn->nat_info->min_port;
> > +        uint16_t deltap = nat_info->max_port - nat_info->min_port;
> >          uint32_t port_index = hash % (deltap + 1);
> > -        first_port = conn->nat_info->min_port + port_index;
> > -        min_port = conn->nat_info->min_port;
> > -        max_port = conn->nat_info->max_port;
> > +        first_port = nat_info->min_port + port_index;
> > +        min_port = nat_info->min_port;
> > +        max_port = nat_info->max_port;
> >      }
> > +    conn->conn_flags |= nat_info->nat_action;
> >
> >      uint32_t deltaa = 0;
> >      uint32_t address_index;
> > @@ -2237,25 +2197,25 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >      memset(&ct_addr, 0, sizeof ct_addr);
> >      union ct_addr max_ct_addr;
> >      memset(&max_ct_addr, 0, sizeof max_ct_addr);
> > -    max_ct_addr = conn->nat_info->max_addr;
> > +    max_ct_addr = nat_info->max_addr;
> >
> >      if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> > -        deltaa = ntohl(conn->nat_info->max_addr.ipv4) -
> > -                 ntohl(conn->nat_info->min_addr.ipv4);
> > +        deltaa = ntohl(nat_info->max_addr.ipv4) -
> > +                 ntohl(nat_info->min_addr.ipv4);
> >          address_index = hash % (deltaa + 1);
> >          ct_addr.ipv4 = htonl(
> > -            ntohl(conn->nat_info->min_addr.ipv4) + address_index);
> > +            ntohl(nat_info->min_addr.ipv4) + address_index);
> >      } else {
> > -        deltaa = nat_ipv6_addrs_delta(&conn->nat_info->min_addr.ipv6,
> > -                                      &conn->nat_info->max_addr.ipv6);
> > +        deltaa = nat_ipv6_addrs_delta(&nat_info->min_addr.ipv6,
> > +                                      &nat_info->max_addr.ipv6);
> >          /* deltaa must be within 32 bits for full hash coverage. A 64 or
> >           * 128 bit hash is unnecessary and hence not used here. Most code
> >           * is kept common with V4; nat_ipv6_addrs_delta() will do the
> >           * enforcement via max_ct_addr. */
> > -        max_ct_addr = conn->nat_info->min_addr;
> > +        max_ct_addr = nat_info->min_addr;
> >          nat_ipv6_addr_increment(&max_ct_addr.ipv6, deltaa);
> >          address_index = hash % (deltaa + 1);
> > -        ct_addr.ipv6 = conn->nat_info->min_addr.ipv6;
> > +        ct_addr.ipv6 = nat_info->min_addr.ipv6;
> >          nat_ipv6_addr_increment(&ct_addr.ipv6, address_index);
> >      }
> >
> > @@ -2263,27 +2223,27 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >      bool all_ports_tried = false;
> >      /* For DNAT or for specified port ranges, we don't use ephemeral ports. */
> >      bool ephemeral_ports_tried
> > -        = conn->nat_info->nat_action & NAT_ACTION_DST ||
> > -              conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
> > +        = nat_info->nat_action & NAT_ACTION_DST ||
> > +              nat_info->nat_action & NAT_ACTION_SRC_PORT
> >            ? true : false;
> >      union ct_addr first_addr = ct_addr;
> >      bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
> >                         conn->key.nw_proto != IPPROTO_ICMPV6;
> >
> >      while (true) {
> > -        if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> > -            nat_conn->rev_key.dst.addr = ct_addr;
> > +        if (nat_info->nat_action & NAT_ACTION_SRC) {
> > +            conn->rev_key.dst.addr = ct_addr;
> >              if (pat_enabled) {
> > -                nat_conn->rev_key.dst.port = htons(port);
> > +                conn->rev_key.dst.port = htons(port);
> >              }
> >          } else {
> > -            nat_conn->rev_key.src.addr = ct_addr;
> > +            conn->rev_key.src.addr = ct_addr;
> >              if (pat_enabled) {
> > -                nat_conn->rev_key.src.port = htons(port);
> > +                conn->rev_key.src.port = htons(port);
> >              }
> >          }
> >
> > -        bool found = conn_lookup(ct, &nat_conn->rev_key, time_msec(), NULL,
> > +        bool found = conn_lookup(ct, &conn->rev_key, time_msec(), NULL,
> >                                   NULL);
> >          if (!found) {
> >              return true;
> > @@ -2306,12 +2266,12 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >                      nat_ipv6_addr_increment(&ct_addr.ipv6, 1);
> >                  }
> >              } else {
> > -                ct_addr = conn->nat_info->min_addr;
> > +                ct_addr = nat_info->min_addr;
> >              }
> >              if (!memcmp(&ct_addr, &first_addr, sizeof ct_addr)) {
> >                  if (pat_enabled && !ephemeral_ports_tried) {
> >                      ephemeral_ports_tried = true;
> > -                    ct_addr = conn->nat_info->min_addr;
> > +                    ct_addr = nat_info->min_addr;
> >                      first_addr = ct_addr;
> >                      min_port = MIN_NAT_EPHEMERAL_PORT;
> >                      max_port = MAX_NAT_EPHEMERAL_PORT;
> > @@ -2324,6 +2284,7 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
> >              all_ports_tried = false;
> >          }
> >      }
> > +    conn->conn_flags &= ~CONN_FLAG_NAT_MASK;
> >      return false;
> >  }
> >
> > @@ -2342,13 +2303,10 @@ conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
> >  static bool
> >  conn_expired(struct conn *conn, long long now)
> >  {
> > -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > -        ovs_mutex_lock(&conn->lock);
> > -        bool expired = now >= conn->expiration ? true : false;
> > -        ovs_mutex_unlock(&conn->lock);
> > -        return expired;
> > -    }
> > -    return false;
> > +    ovs_mutex_lock(&conn->lock);
> > +    bool expired = now >= conn->expiration ? true : false;
> > +    ovs_mutex_unlock(&conn->lock);
> > +    return expired;
> >  }
> >
> >  static bool
> > @@ -2367,7 +2325,6 @@ new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
> >  static void
> >  delete_conn_cmn(struct conn *conn)
> >  {
> > -    free(conn->nat_info);
> >      free(conn->alg);
> >      free(conn);
> >  }
> > @@ -2375,22 +2332,10 @@ delete_conn_cmn(struct conn *conn)
> >  static void
> >  delete_conn(struct conn *conn)
> >  {
> > -    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> >      ovs_mutex_destroy(&conn->lock);
> > -    free(conn->nat_conn);
> >      delete_conn_cmn(conn);
> >  }
> >
> > -/* Only used by conn_clean_one(). */
> > -static void
> > -delete_conn_one(struct conn *conn)
> > -{
> > -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > -        ovs_mutex_destroy(&conn->lock);
> > -    }
> > -    delete_conn_cmn(conn);
> > -}
> > -
> >  /* Convert a conntrack address 'a' into an IP address 'b' based on 'dl_type'.
> >   *
> >   * Note that 'dl_type' should be either "ETH_TYPE_IP" or "ETH_TYPE_IPv6"
> > @@ -2539,10 +2484,14 @@ conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
> >          if (!cm_node) {
> >              break;
> >          }
> > -        struct conn *conn;
> > -        INIT_CONTAINER(conn, cm_node, cm_node);
> > -        if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
> > -            (conn->conn_type != CT_CONN_TYPE_UN_NAT)) {
> > +        struct conn_dir *conndir;
> > +        struct conn* conn;
> > +        INIT_CONTAINER(conndir, cm_node, cm_node);
> > +        if (!conndir->orig) {
> > +            continue;
> > +        }
> > +        conn = conn_from_conndir(conndir);
> > +        if ((!dump->filter_zone || conn->key.zone == dump->zone)) {
> >              conn_to_ct_dpif_entry(conn, entry, now);
> >              return 0;
> >          }
> > @@ -2561,11 +2510,16 @@ int
> >  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> >  {
> >      struct conn *conn;
> > +    struct conn_dir *conndir;
> >
> >      ovs_mutex_lock(&ct->ct_lock);
> > -    CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
> > +    CMAP_FOR_EACH (conndir, cm_node, &ct->conns) {
> > +        if (!conndir->orig) {
> > +            continue;
> > +        }
> > +        conn = conn_from_conndir(conndir);
> >          if (!zone || *zone == conn->key.zone) {
> > -            conn_clean_one(ct, conn);
> > +            conn_clean(ct, conn);
> >          }
> >      }
> >      ovs_mutex_unlock(&ct->ct_lock);
> > @@ -2586,7 +2540,7 @@ conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
> >      ovs_mutex_lock(&ct->ct_lock);
> >      conn_lookup(ct, &key, time_msec(), &conn, NULL);
> >
> > -    if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > +    if (conn) {
> >          conn_clean(ct, conn);
> >      } else {
> >          VLOG_WARN("Must flush tuple using the original pre-NATed tuple");
> > @@ -2744,8 +2698,7 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
> >          alg_exp_node->nat_rpl_dst = true;
> >          if (skip_nat) {
> >              alg_nat_repl_addr = dst_addr;
> > -        } else if (parent_conn->nat_info &&
> > -                   parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +        } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
> >              alg_nat_repl_addr = parent_conn->rev_key.src.addr;
> >              alg_exp_node->nat_rpl_dst = false;
> >          } else {
> > @@ -2757,8 +2710,7 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
> >          alg_exp_node->nat_rpl_dst = false;
> >          if (skip_nat) {
> >              alg_nat_repl_addr = src_addr;
> > -        } else if (parent_conn->nat_info &&
> > -                   parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
> > +        } else if (parent_conn->conn_flags & NAT_ACTION_DST) {
> >              alg_nat_repl_addr = parent_conn->key.dst.addr;
> >              alg_exp_node->nat_rpl_dst = true;
> >          } else {
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev



-- 
hepeng



More information about the dev mailing list