[ovs-dev] [RFC v2] ipv6 tunneling: Fix for performance drop introduced by ipv6 tunnel support.
Thadeu Lima de Souza Cascardo
cascardo at redhat.com
Thu Dec 24 19:16:07 UTC 2015
On Tue, Dec 22, 2015 at 01:59:00PM +0000, Sugesh Chandran wrote:
> Adding a new flag to verify the validity of tunnel metadata. This flag avoids
> the need of resetting and validating the entire ipv4/ipv6 tunnel destination
> address which caused a serious performance drop.
>
> Signed-off-by: Sugesh Chandran <sugesh.chandran at intel.com>
Hi, Sugesh.
This one looks to be in the right direction. Some comments below.
But before I go into that, Jiri has written a version of ipv6_addr_is_set that
should perform much better than the current one. Maybe that alone could give you
some improved performance. He was just ORing the words in the address.
> ---
> lib/dpif.c | 2 +-
> lib/flow.c | 4 ++--
> lib/match.c | 12 +++++-----
> lib/meta-flow.c | 12 ++++++++--
> lib/netdev-vport.c | 4 ++--
> lib/odp-util.c | 8 +++----
> lib/packets.c | 8 ++++++-
> lib/packets.h | 52 ++++++++++++++++++++++++++++++++++++++------
> ofproto/ofproto-dpif-ipfix.c | 3 ++-
> ofproto/ofproto-dpif-rid.c | 4 ++--
> ofproto/ofproto-dpif-sflow.c | 6 ++---
> ofproto/tunnel.c | 14 +++++++-----
> 12 files changed, 93 insertions(+), 36 deletions(-)
>
> diff --git a/lib/dpif.c b/lib/dpif.c
> index 38e40ba..9f08783 100644
> --- a/lib/dpif.c
> +++ b/lib/dpif.c
> @@ -1107,7 +1107,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet **packets, int cnt,
> struct ofpbuf execute_actions;
> uint64_t stub[256 / 8];
> struct pkt_metadata *md = &packet->md;
> - bool dst_set;
> + uint8_t dst_set;
I would still keep flow_tnl_dst_is_set returning bool, and just make it compare
the tunnel protocol with the invalid option. More on that below.
>
> dst_set = flow_tnl_dst_is_set(&md->tunnel);
> if (dst_set) {
> diff --git a/lib/flow.c b/lib/flow.c
> index 75f8285..6b82d95 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -818,13 +818,13 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
> if (flow->tunnel.ip_src) {
> match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
> }
> - if (flow->tunnel.ip_dst) {
> + if (flow->tunnel.flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV4_DST) {
> match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
> }
> if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
> match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
> }
> - if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
> + if (flow->tunnel.flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV6_DST) {
> match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
> }
> if (flow->tunnel.gbp_id != htons(0)) {
> diff --git a/lib/match.c b/lib/match.c
> index 95d34bc..97a623e 100644
> --- a/lib/match.c
> +++ b/lib/match.c
> @@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32 dst)
> void
> match_set_tun_dst_masked(struct match *match, ovs_be32 dst, ovs_be32 mask)
> {
> - match->wc.masks.tunnel.ip_dst = mask;
> - match->flow.tunnel.ip_dst = dst & mask;
> + set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask);
> + set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask);
> }
>
> void
> @@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match *match, const struct in6_addr *src,
> void
> match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst)
> {
> - match->flow.tunnel.ipv6_dst = *dst;
> - match->wc.masks.tunnel.ipv6_dst = in6addr_exact;
> + set_ipv6_dst_tnl(&match->flow.tunnel, *dst);
> + set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact);
> }
>
> void
> match_set_tun_ipv6_dst_masked(struct match *match, const struct in6_addr *dst,
> const struct in6_addr *mask)
> {
> - match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask);
> - match->wc.masks.tunnel.ipv6_dst = *mask;
> + set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask));
> + set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask);
> }
>
> void
> diff --git a/lib/meta-flow.c b/lib/meta-flow.c
> index 6bd0b99..4661f60 100644
> --- a/lib/meta-flow.c
> +++ b/lib/meta-flow.c
> @@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf,
> flow->tunnel.ip_src = value->be32;
> break;
> case MFF_TUN_DST:
> - flow->tunnel.ip_dst = value->be32;
> + set_ipv4_dst_tnl(&flow->tunnel, value->be32);
> break;
> case MFF_TUN_IPV6_SRC:
> flow->tunnel.ipv6_src = value->ipv6;
> break;
> case MFF_TUN_IPV6_DST:
> - flow->tunnel.ipv6_dst = value->ipv6;
> + set_ipv6_dst_tnl(&flow->tunnel, value->ipv6);
> break;
> case MFF_TUN_FLAGS:
> flow->tunnel.flags = (flow->tunnel.flags & ~FLOW_TNL_PUB_F_MASK) |
> @@ -1503,6 +1503,14 @@ mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str)
> sizeof match->wc.masks.tunnel.ipv6_dst);
> memset(&match->flow.tunnel.ipv6_dst, 0,
> sizeof match->flow.tunnel.ipv6_dst);
> + /* What if flow have a valid ipv4 tunnel address??
> + * Reset the flag only if thats not the case.
> + */
> + match->wc.masks.tunnel.flow_tnl_is_valid =
> + (match->wc.masks.tunnel.flow_tnl_is_valid ==
> + FLOW_TUNNEL_VALID_IPV4_DST)?
> + match->wc.masks.tunnel.flow_tnl_is_valid:
> + FLOW_TUNNEL_INVALID_DST;
> break;
> case MFF_TUN_FLAGS:
> match_set_tun_flags_masked(match, 0, 0);
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index 88f5022..f706f61 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -919,9 +919,9 @@ ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
> ip_dst = get_16aligned_be32(&ip->ip_dst);
>
> tnl->ip_src = ip_src;
> - tnl->ip_dst = ip_dst;
> tnl->ip_tos = ip->ip_tos;
> tnl->ip_ttl = ip->ip_ttl;
> + set_ipv4_dst_tnl(tnl, ip_dst);
>
> *hlen += IP_HEADER_LEN;
>
> @@ -931,7 +931,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
> memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst);
> tnl->ip_tos = 0;
> tnl->ip_ttl = ip6->ip6_hlim;
> -
> + set_ipv6_dst_tnl(tnl, tnl->ipv6_dst);
Have you tried casting ip6->ip6_dst to struct in6_addr, or making
set_ipv6_dst_tnl accept the unaligned version? Maybe it's possible to avoid the
second copy here.
> *hlen += IPV6_HEADER_LEN;
>
> } else {
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index aa5bd6b..01e2a98 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -1860,13 +1860,13 @@ odp_tun_key_from_attr__(const struct nlattr *attr,
> tun->ip_src = nl_attr_get_be32(a);
> break;
> case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
> - tun->ip_dst = nl_attr_get_be32(a);
> + set_ipv4_dst_tnl(tun, nl_attr_get_be32(a));
> break;
> case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
> tun->ipv6_src = nl_attr_get_in6_addr(a);
> break;
> case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
> - tun->ipv6_dst = nl_attr_get_in6_addr(a);
> + set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a));
> break;
> case OVS_TUNNEL_KEY_ATTR_TOS:
> tun->ip_tos = nl_attr_get_u8(a);
> @@ -1957,13 +1957,13 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
> if (tun_key->ip_src) {
> nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ip_src);
> }
> - if (tun_key->ip_dst) {
> + if (tun_key->flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV4_DST) {
> nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ip_dst);
> }
Use an else if?
> if (ipv6_addr_is_set(&tun_key->ipv6_src)) {
> nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, &tun_key->ipv6_src);
> }
> - if (ipv6_addr_is_set(&tun_key->ipv6_dst)) {
> + if (tun_key->flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV6_DST) {
> nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST, &tun_key->ipv6_dst);
> }
> if (tun_key->ip_tos) {
> diff --git a/lib/packets.c b/lib/packets.c
> index d82341d..cae2f30 100644
> --- a/lib/packets.c
> +++ b/lib/packets.c
> @@ -39,7 +39,13 @@ const struct in6_addr in6addr_all_hosts = IN6ADDR_ALL_HOSTS_INIT;
> struct in6_addr
> flow_tnl_dst(const struct flow_tnl *tnl)
> {
> - return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) : tnl->ipv6_dst;
> + if(tnl->flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV4_DST) {
> + return in6_addr_mapped_ipv4(tnl->ip_dst);
> + }
> + else if(tnl->flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV6_DST) {
> + return tnl->ipv6_dst;
> + }
> + return in6addr_any;
> }
>
> struct in6_addr
> diff --git a/lib/packets.h b/lib/packets.h
> index 834e8a4..8f9724e 100644
> --- a/lib/packets.h
> +++ b/lib/packets.h
> @@ -35,8 +35,21 @@
> struct dp_packet;
> struct ds;
>
> +/* Each type tunnel uses one bitfield. all bits are unset for unconfigured
> + * tunnel address.*/
> +enum flow_tnl_dst_type {
> + FLOW_TUNNEL_INVALID_DST = 0,
> + FLOW_TUNNEL_VALID_IPV4_DST = 1, //1st bit for ipv4 tunnel
> + FLOW_TUNNEL_VALID_IPV6_DST = 2 //2nd bit for ipv6 tunnel
> +};
> +
> /* Tunnel information used in flow key and metadata. */
> struct flow_tnl {
> + /* The tunnel destination ip/ipv6 address are not initializing anywhere due to
> + * the performance constrains. the enum 'flow_tnl_is_valid' is used to
> + * validate the destination addresses.
> + */
> + enum flow_tnl_dst_type flow_tnl_is_valid;
OK. So this is using 4 bytes anyway, so why not make it a full protocol value?
Call it protocol, not tnl_is_valid, since this is not a bool anymore. I would
use either an address family (AF_UNSPEC, AF_INET, AF_INET6) or a dl/eth proto.
Probably an address family.
> ovs_be32 ip_dst;
> struct in6_addr ipv6_dst;
> ovs_be32 ip_src;
> @@ -49,7 +62,7 @@ struct flow_tnl {
> ovs_be16 tp_dst;
> ovs_be16 gbp_id;
> uint8_t gbp_flags;
> - uint8_t pad1[5]; /* Pad to 64 bits. */
> + uint8_t pad1[1]; /* Pad to 64 bits. */
> struct tun_metadata metadata;
> };
>
> @@ -76,10 +89,36 @@ struct flow_tnl {
>
> static inline bool ipv6_addr_is_set(const struct in6_addr *addr);
>
> -static inline bool
> +static inline void
> +set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst) {
> + if(tnl) {
I guess all users have tnl set, so I don't think this check is needed.
> + tnl->ip_dst = ip_dst;
> + if(tnl->ip_dst) {
> + tnl->flow_tnl_is_valid = FLOW_TUNNEL_VALID_IPV4_DST;
Set tnl->ip_dst only if you have ip_dst set.
> + }
> + else {
> + tnl->flow_tnl_is_valid = FLOW_TUNNEL_INVALID_DST;
> + }
> + }
> +}
> +
> +static inline void
> +set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst) {
> + if(tnl) {
> + tnl->ipv6_dst = ipv6_dst;
> + if(ipv6_addr_is_set(&tnl->ipv6_dst)) {
> + tnl->flow_tnl_is_valid = FLOW_TUNNEL_VALID_IPV6_DST;
> + }
> + else {
> + tnl->flow_tnl_is_valid = FLOW_TUNNEL_INVALID_DST;
> + }
> + }
> +}
Same here as set_ipv4_dst_tnl.
By the way, most cases have the destination addresses set. So maybe you should
treat the 0 case in another way.
> +
> +static inline enum flow_tnl_dst_type
> flow_tnl_dst_is_set(const struct flow_tnl *tnl)
> {
> - return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
> + return tnl->flow_tnl_is_valid;
> }
Just return a bool here and return tnl->protocol != AF_UNSPEC. Or if you'd
rather keep this way, tnl->flow_tnl_is_valid != FLOW_TUNNEL_INVALID_DST.
>
> struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl);
> @@ -90,8 +129,8 @@ static inline size_t
> flow_tnl_size(const struct flow_tnl *src)
> {
> if (!flow_tnl_dst_is_set(src)) {
> - /* Covers ip_dst and ipv6_dst only. */
> - return offsetof(struct flow_tnl, ip_src);
> + /* Covers flow_tnl_is_valid flag only. */
> + return offsetof(struct flow_tnl, ip_dst);
> }
> if (src->flags & FLOW_TNL_F_UDPIF) {
> /* Datapath format, cover all options we have. */
> @@ -157,8 +196,7 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
> * we can just zero out ip_dst and the rest of the data will never be
> * looked at. */
> memset(md, 0, offsetof(struct pkt_metadata, in_port));
> - md->tunnel.ip_dst = 0;
> - md->tunnel.ipv6_dst = in6addr_any;
> + md->tunnel.flow_tnl_is_valid = FLOW_TUNNEL_INVALID_DST;
>
> md->in_port.odp_port = port;
> }
> diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
> index a610c53..8403408 100644
> --- a/ofproto/ofproto-dpif-ipfix.c
> +++ b/ofproto/ofproto-dpif-ipfix.c
> @@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di, const struct dp_packet *packet,
> * of matched packets. */
> packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
> if (di->bridge_exporter.options->enable_tunnel_sampling) {
> - if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
> + if (output_odp_port == ODPP_NONE &&
> + flow->tunnel.flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV4_DST) {
> /* Input tunnel. */
> tunnel_key = &flow->tunnel;
> tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
> diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> index 30c1c94..7b8ef7f 100644
> --- a/ofproto/ofproto-dpif-rid.c
> +++ b/ofproto/ofproto-dpif-rid.c
> @@ -288,8 +288,8 @@ uint32_t
> recirc_alloc_id(struct ofproto_dpif *ofproto)
> {
> struct flow_tnl tunnel;
> - tunnel.ip_dst = htonl(0);
> - tunnel.ipv6_dst = in6addr_any;
> + set_ipv4_dst_tnl(&tunnel,htonl(0));
> + set_ipv6_dst_tnl(&tunnel, in6addr_any);
Should just be tunnel->protocol = AF_UNSPEC.
> struct recirc_state state = {
> .table_id = TBL_INTERNAL,
> .ofproto = ofproto,
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index f11699c..9cc4f14 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr *attr,
> /* IPv4 */
> /* Cannot assume alignment so just use memcpy. */
> sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
> - sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
> + set_ipv4_dst_tnl(&sflow_actions->tunnel, get_16aligned_be32(&ip->ip_dst));
> sflow_actions->tunnel.ip_tos = ip->ip_tos;
> sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
> /* The tnl_push action can supply the ip_protocol too. */
> @@ -991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr,
> sflow_actions->tunnel.ip_src = key->ipv4_src;
> }
> if (key->ipv4_dst) {
> - sflow_actions->tunnel.ip_dst = key->ipv4_dst;
> + set_ipv4_dst_tnl(&sflow_actions->tunnel, key->ipv4_dst);
> }
> if (key->ipv4_proto) {
> sflow_actions->tunnel_ipproto = key->ipv4_proto;
> @@ -1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
> fs.output = cookie->sflow.output;
>
> /* Input tunnel. */
> - if (flow->tunnel.ip_dst) {
> + if (flow->tunnel.flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV4_DST) {
> memset(&tnlInElem, 0, sizeof(tnlInElem));
> tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
> tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
> diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
> index 5cf6c75..655ba1e 100644
> --- a/ofproto/tunnel.c
> +++ b/ofproto/tunnel.c
> @@ -362,12 +362,13 @@ tnl_wc_init(struct flow *flow, struct flow_wildcards *wc)
> {
> if (tnl_port_should_receive(flow)) {
> wc->masks.tunnel.tun_id = OVS_BE64_MAX;
> - if (flow->tunnel.ip_dst) {
> + if (flow->tunnel.flow_tnl_is_valid == FLOW_TUNNEL_VALID_IPV4_DST) {
> wc->masks.tunnel.ip_src = OVS_BE32_MAX;
> - wc->masks.tunnel.ip_dst = OVS_BE32_MAX;
> - } else {
> + set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX);
> + } else if(flow->tunnel.flow_tnl_is_valid ==
> + FLOW_TUNNEL_VALID_IPV6_DST) {
> wc->masks.tunnel.ipv6_src = in6addr_exact;
> - wc->masks.tunnel.ipv6_dst = in6addr_exact;
> + set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact);
> }
> wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT |
> FLOW_TNL_F_CSUM |
> @@ -424,7 +425,10 @@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
> if (!cfg->ip_dst_flow) {
> flow->tunnel.ip_dst = in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst);
> if (!flow->tunnel.ip_dst) {
> - flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst;
> + set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst);
> + }
> + else {
> + set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst);
> }
> }
> flow->pkt_mark = tnl_port->match.pkt_mark;
> --
> 1.9.1
>
Thanks.
Cascardo.
More information about the dev
mailing list