[ovs-dev] [PATCH] ipv6 tunneling: Fix for performance drop introduced by ipv6 tunnel support.
Chandran, Sugesh
sugesh.chandran at intel.com
Wed Jan 13 12:18:02 UTC 2016
Hi Cascardo,
Thank you for looking into it. Please find my answers inline below
Regards
_Sugesh
> -----Original Message-----
> From: Thadeu Lima de Souza Cascardo [mailto:cascardo at redhat.com]
> Sent: Tuesday, January 12, 2016 10:26 PM
> To: Chandran, Sugesh <sugesh.chandran at intel.com>
> Cc: dev at openvswitch.org; jbenc at redhat.com
> Subject: Re: [PATCH] ipv6 tunneling: Fix for performance drop introduced by
> ipv6 tunnel support.
>
> On Tue, Jan 12, 2016 at 01:48:44PM +0000, Sugesh Chandran wrote:
> > Adding a new field called protocol in flow tunnel structure to verify
> > the validity of tunnel metadata. This field avoids the need of
> > resetting and validating the entire ipv4/ipv6 tunnel destination
> > address which caused a serious performance drop.
> >
> > Signed-off-by: Sugesh Chandran <sugesh.chandran at intel.com>
> > ---
> > lib/flow.c | 8 +++----
> > lib/match.c | 12 +++++------
> > lib/meta-flow.c | 11 ++++++++--
> > lib/netdev-vport.c | 5 ++---
> > lib/odp-util.c | 10 ++++-----
> > lib/packets.c | 8 ++++++-
> > lib/packets.h | 50
> ++++++++++++++++++++++++++++++++++++++------
> > ofproto/ofproto-dpif-ipfix.c | 3 ++-
> > ofproto/ofproto-dpif-rid.c | 3 +--
> > ofproto/ofproto-dpif-sflow.c | 6 +++---
> > ofproto/tunnel.c | 14 ++++++++-----
> > 11 files changed, 92 insertions(+), 38 deletions(-)
> >
>
> Hi, Sugesh.
>
> This doesn't build with clang for me. Here is a quick fix. More comments
> below.
>
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index d3477c3..f2b25d5
> 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -877,7 +877,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct
> flow_tnl *tnl, {
> void *nh;
> struct ip_header *ip;
> - struct ovs_16aligned_ip6_hdr *ip6;
> + struct ip6_hdr *ip6;
> void *l4;
> int l3_size;
>
[Sugesh] Do you want me to include this fix along with the patch set? Because I feel that the build issue might be there even without this patch.
> @@ -927,10 +927,10 @@ ip_extract_tnl_md(struct dp_packet *packet,
> struct flow_tnl *tnl,
>
> } else if (IP_VER(ip->ip_ihl_ver) == 6) {
>
> - memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src);
> + memcpy(tnl->ipv6_src.s6_addr, &ip6->ip6_src, sizeof
> + ip6->ip6_src);
> tnl->ip_tos = 0;
> tnl->ip_ttl = ip6->ip6_hlim;
> - set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst);
> + set_ipv6_dst_tnl(tnl, ip6->ip6_dst);
> *hlen += IPV6_HEADER_LEN;
>
> } else {
>
> > diff --git a/lib/flow.c b/lib/flow.c
> > index 5668d0c..34cee55 100644
> > --- a/lib/flow.c
> > +++ b/lib/flow.c
> > @@ -818,15 +818,15 @@ flow_get_metadata(const struct flow *flow,
> struct match *flow_metadata)
> > if (flow->tunnel.ip_src) {
> > match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
> > }
> > - if (flow->tunnel.ip_dst) {
> > + if (flow->tunnel.protocol == FLOW_TUNNEL_IPV4) {
> > match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
> > }
> > + else if (flow->tunnel.protocol == FLOW_TUNNEL_IPV6) {
> > + match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
> > + }
>
> You need to follow coding style here. In this case, the else comes after the
> closing brace at the same line.
[Sugesh] Will update the patch accordingly.
>
> > if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
> > match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
> > }
> > - if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
> > - match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
> > - }
> > if (flow->tunnel.gbp_id != htons(0)) {
> > match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
> > }
> > diff --git a/lib/match.c b/lib/match.c index 95d34bc..97a623e 100644
> > --- a/lib/match.c
> > +++ b/lib/match.c
> > @@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32
> > dst) void match_set_tun_dst_masked(struct match *match, ovs_be32
> > dst, ovs_be32 mask) {
> > - match->wc.masks.tunnel.ip_dst = mask;
> > - match->flow.tunnel.ip_dst = dst & mask;
> > + set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask);
> > + set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask);
> > }
> >
> > void
> > @@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match
> > *match, const struct in6_addr *src, void
> > match_set_tun_ipv6_dst(struct match *match, const struct in6_addr
> > *dst) {
> > - match->flow.tunnel.ipv6_dst = *dst;
> > - match->wc.masks.tunnel.ipv6_dst = in6addr_exact;
> > + set_ipv6_dst_tnl(&match->flow.tunnel, *dst);
> > + set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact);
> > }
> >
> > void
> > match_set_tun_ipv6_dst_masked(struct match *match, const struct
> in6_addr *dst,
> > const struct in6_addr *mask) {
> > - match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask);
> > - match->wc.masks.tunnel.ipv6_dst = *mask;
> > + set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask));
> > + set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask);
> > }
> >
> > void
> > diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 6bd0b99..0826d1d
> > 100644
> > --- a/lib/meta-flow.c
> > +++ b/lib/meta-flow.c
> > @@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf,
> > flow->tunnel.ip_src = value->be32;
> > break;
> > case MFF_TUN_DST:
> > - flow->tunnel.ip_dst = value->be32;
> > + set_ipv4_dst_tnl(&flow->tunnel, value->be32);
> > break;
> > case MFF_TUN_IPV6_SRC:
> > flow->tunnel.ipv6_src = value->ipv6;
> > break;
> > case MFF_TUN_IPV6_DST:
> > - flow->tunnel.ipv6_dst = value->ipv6;
> > + set_ipv6_dst_tnl(&flow->tunnel, value->ipv6);
> > break;
> > case MFF_TUN_FLAGS:
> > flow->tunnel.flags = (flow->tunnel.flags &
> > ~FLOW_TNL_PUB_F_MASK) | @@ -1503,6 +1503,13 @@
> mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str)
> > sizeof match->wc.masks.tunnel.ipv6_dst);
> > memset(&match->flow.tunnel.ipv6_dst, 0,
> > sizeof match->flow.tunnel.ipv6_dst);
> > + /* What if flow have a valid ipv4 tunnel address??
> > + * Reset the flag only if thats not the case.
> > + */
> > + match->wc.masks.tunnel.protocol = (match-
> >wc.masks.tunnel.protocol ==
> > + FLOW_TUNNEL_IPV4)?
> > + match->wc.masks.tunnel.protocol:
> > + FLOW_TUNNEL_UNSPEC;
> > break;
> > case MFF_TUN_FLAGS:
> > match_set_tun_flags_masked(match, 0, 0); diff --git
> > a/lib/netdev-vport.c b/lib/netdev-vport.c index 88f5022..d3477c3
> > 100644
> > --- a/lib/netdev-vport.c
> > +++ b/lib/netdev-vport.c
> > @@ -919,19 +919,18 @@ ip_extract_tnl_md(struct dp_packet *packet,
> struct flow_tnl *tnl,
> > ip_dst = get_16aligned_be32(&ip->ip_dst);
> >
> > tnl->ip_src = ip_src;
> > - tnl->ip_dst = ip_dst;
> > tnl->ip_tos = ip->ip_tos;
> > tnl->ip_ttl = ip->ip_ttl;
> > + set_ipv4_dst_tnl(tnl, ip_dst);
> >
> > *hlen += IP_HEADER_LEN;
> >
> > } else if (IP_VER(ip->ip_ihl_ver) == 6) {
> >
> > memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6-
> >ip6_src);
> > - memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6-
> >ip6_dst);
> > tnl->ip_tos = 0;
> > tnl->ip_ttl = ip6->ip6_hlim;
> > -
> > + set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst);
> > *hlen += IPV6_HEADER_LEN;
> >
> > } else {
> > diff --git a/lib/odp-util.c b/lib/odp-util.c index f16e113..26c3727
> > 100644
> > --- a/lib/odp-util.c
> > +++ b/lib/odp-util.c
> > @@ -510,7 +510,7 @@ format_odp_tnl_push_header(struct ds *ds, struct
> ovs_action_push_tnl *data)
> > gnh->oam ? "oam," : "",
> > gnh->critical ? "crit," : "",
> > ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
> > -
> > +
> > if (gnh->opt_len) {
> > ds_put_cstr(ds, ",options(");
> > format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
> > @@ -1864,13 +1864,13 @@ odp_tun_key_from_attr__(const struct nlattr
> *attr,
> > tun->ip_src = nl_attr_get_be32(a);
> > break;
> > case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
> > - tun->ip_dst = nl_attr_get_be32(a);
> > + set_ipv4_dst_tnl(tun, nl_attr_get_be32(a));
> > break;
> > case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
> > tun->ipv6_src = nl_attr_get_in6_addr(a);
> > break;
> > case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
> > - tun->ipv6_dst = nl_attr_get_in6_addr(a);
> > + set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a));
> > break;
> > case OVS_TUNNEL_KEY_ATTR_TOS:
> > tun->ip_tos = nl_attr_get_u8(a); @@ -1961,13 +1961,13 @@
> > tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
> > if (tun_key->ip_src) {
> > nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key-
> >ip_src);
> > }
> > - if (tun_key->ip_dst) {
> > + if (tun_key->protocol == FLOW_TUNNEL_IPV4) {
> > nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key-
> >ip_dst);
> > }
> > if (ipv6_addr_is_set(&tun_key->ipv6_src)) {
> > nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
> &tun_key->ipv6_src);
> > }
> > - if (ipv6_addr_is_set(&tun_key->ipv6_dst)) {
> > + if (tun_key->protocol == FLOW_TUNNEL_IPV6) {
> > nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
> &tun_key->ipv6_dst);
> > }
> > if (tun_key->ip_tos) {
> > diff --git a/lib/packets.c b/lib/packets.c index d82341d..ed6c5ad
> > 100644
> > --- a/lib/packets.c
> > +++ b/lib/packets.c
> > @@ -39,7 +39,13 @@ const struct in6_addr in6addr_all_hosts =
> > IN6ADDR_ALL_HOSTS_INIT; struct in6_addr flow_tnl_dst(const struct
> > flow_tnl *tnl) {
> > - return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) : tnl->ipv6_dst;
> > + if(tnl->protocol == FLOW_TUNNEL_IPV4) {
> > + return in6_addr_mapped_ipv4(tnl->ip_dst);
> > + }
> > + else if(tnl->protocol == FLOW_TUNNEL_IPV6) {
> > + return tnl->ipv6_dst;
> > + }
> > + return in6addr_any;
> > }
> >
> > struct in6_addr
> > diff --git a/lib/packets.h b/lib/packets.h index 834e8a4..53a6ec0
> > 100644
> > --- a/lib/packets.h
> > +++ b/lib/packets.h
> > @@ -35,8 +35,21 @@
> > struct dp_packet;
> > struct ds;
> >
> > +/* Each type tunnel uses one bitfield. all bits are unset for
> > +unconfigured
> > + * tunnel address.*/
> > +enum flow_tnl_proto_type {
> > + FLOW_TUNNEL_UNSPEC = 0,
> > + FLOW_TUNNEL_IPV4 = 1, //1st bit for ipv4 tunnel
> > + FLOW_TUNNEL_IPV6 = 2 //2nd bit for ipv6 tunnel };
> > +
>
> You introduced an enum. My suggestion was that you don't do that and use
> 0, ETH_TYPE_IP and ETH_TYPE_IPV6 instead. Or AF_UNSPEC, AF_INET,
> AF_INET6.
>
[Sugesh] As I mentioned in the earlier mail communication, How do we specify the cases where the tunnel header data neither ipv6 nor ipv4. (Say for eg: ETH-NSH.). Just wondering what to do for the cases where the ETH/AF types are not defined/available. I am not sure about such a specific usecase at the moment. If you feel that the ETH_TYPE can handle all the cases, Will change accordingly.
> > /* Tunnel information used in flow key and metadata. */ struct
> > flow_tnl {
> > + /* The tunnel destination ip/ipv6 address are not initializing anywhere
> due to
> > + * the performance constrains. the enum 'flow_tnl_proto_type' is used
> to
> > + * validate the destination addresses.
> > + */
> > + enum flow_tnl_proto_type protocol;
> > ovs_be32 ip_dst;
> > struct in6_addr ipv6_dst;
> > ovs_be32 ip_src;
> > @@ -49,7 +62,7 @@ struct flow_tnl {
> > ovs_be16 tp_dst;
> > ovs_be16 gbp_id;
> > uint8_t gbp_flags;
> > - uint8_t pad1[5]; /* Pad to 64 bits. */
> > + uint8_t pad1[1]; /* Pad to 64 bits. */
> > struct tun_metadata metadata;
> > };
> >
> > @@ -76,10 +89,36 @@ struct flow_tnl {
> >
> > static inline bool ipv6_addr_is_set(const struct in6_addr *addr);
> >
> > +static inline void
> > +set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst) {
> > + if(tnl) {
> > + tnl->ip_dst = ip_dst;
> > + if(tnl->ip_dst) {
> > + tnl->protocol = FLOW_TUNNEL_IPV4;
> > + }
> > + else {
> > + tnl->protocol = FLOW_TUNNEL_UNSPEC;
> > + }
> > + }
> > +}
> > +
> > +static inline void
> > +set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst) {
> > + if(tnl) {
> > + tnl->ipv6_dst = ipv6_dst;
> > + if(ipv6_addr_is_set(&tnl->ipv6_dst)) {
> > + tnl->protocol = FLOW_TUNNEL_IPV6;
> > + }
> > + else {
> > + tnl->protocol = FLOW_TUNNEL_UNSPEC;
> > + }
> > + }
> > +}
> > +
> > static inline bool
> > flow_tnl_dst_is_set(const struct flow_tnl *tnl) {
> > - return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
> > + return (tnl->protocol != FLOW_TUNNEL_UNSPEC);
> > }
> >
> > struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl); @@ -90,8
> > +129,8 @@ static inline size_t flow_tnl_size(const struct flow_tnl
> > *src) {
> > if (!flow_tnl_dst_is_set(src)) {
> > - /* Covers ip_dst and ipv6_dst only. */
> > - return offsetof(struct flow_tnl, ip_src);
> > + /* Covers protocol flag only. */
> > + return offsetof(struct flow_tnl, ip_dst);
> > }
> > if (src->flags & FLOW_TNL_F_UDPIF) {
> > /* Datapath format, cover all options we have. */ @@ -157,8
> > +196,7 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
> > * we can just zero out ip_dst and the rest of the data will never be
> > * looked at. */
> > memset(md, 0, offsetof(struct pkt_metadata, in_port));
> > - md->tunnel.ip_dst = 0;
> > - md->tunnel.ipv6_dst = in6addr_any;
> > + md->tunnel.protocol = FLOW_TUNNEL_UNSPEC;
> >
> > md->in_port.odp_port = port;
> > }
> > diff --git a/ofproto/ofproto-dpif-ipfix.c
> > b/ofproto/ofproto-dpif-ipfix.c index a610c53..e00dac7 100644
> > --- a/ofproto/ofproto-dpif-ipfix.c
> > +++ b/ofproto/ofproto-dpif-ipfix.c
> > @@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di,
> const struct dp_packet *packet,
> > * of matched packets. */
> > packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
> > if (di->bridge_exporter.options->enable_tunnel_sampling) {
> > - if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
> > + if (output_odp_port == ODPP_NONE &&
> > + flow->tunnel.protocol == FLOW_TUNNEL_IPV4) {
> > /* Input tunnel. */
> > tunnel_key = &flow->tunnel;
> > tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
> > diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> > index d142933..396489c 100644
> > --- a/ofproto/ofproto-dpif-rid.c
> > +++ b/ofproto/ofproto-dpif-rid.c
> > @@ -297,8 +297,7 @@ uint32_t
> > recirc_alloc_id(struct ofproto_dpif *ofproto) {
> > struct flow_tnl tunnel;
> > - tunnel.ip_dst = htonl(0);
> > - tunnel.ipv6_dst = in6addr_any;
> > + tunnel.protocol = FLOW_TUNNEL_UNSPEC;
> > struct recirc_state state = {
> > .table_id = TBL_INTERNAL,
> > .ofproto = ofproto,
> > diff --git a/ofproto/ofproto-dpif-sflow.c
> > b/ofproto/ofproto-dpif-sflow.c index f11699c..66686de 100644
> > --- a/ofproto/ofproto-dpif-sflow.c
> > +++ b/ofproto/ofproto-dpif-sflow.c
> > @@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr
> *attr,
> > /* IPv4 */
> > /* Cannot assume alignment so just use memcpy. */
> > sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
> > - sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
> > + set_ipv4_dst_tnl(&sflow_actions->tunnel,
> > + get_16aligned_be32(&ip->ip_dst));
> > sflow_actions->tunnel.ip_tos = ip->ip_tos;
> > sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
> > /* The tnl_push action can supply the ip_protocol too. */ @@
> > -991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr,
> > sflow_actions->tunnel.ip_src = key->ipv4_src;
> > }
> > if (key->ipv4_dst) {
> > - sflow_actions->tunnel.ip_dst = key->ipv4_dst;
> > + set_ipv4_dst_tnl(&sflow_actions->tunnel,
> > + key->ipv4_dst);
> > }
> > if (key->ipv4_proto) {
> > sflow_actions->tunnel_ipproto = key->ipv4_proto; @@
> > -1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const
> struct dp_packet *packet,
> > fs.output = cookie->sflow.output;
> >
> > /* Input tunnel. */
> > - if (flow->tunnel.ip_dst) {
> > + if (flow->tunnel.protocol == FLOW_TUNNEL_IPV4) {
> > memset(&tnlInElem, 0, sizeof(tnlInElem));
> > tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
> > tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
> > diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index
> > 5cf6c75..95f6df3 100644
> > --- a/ofproto/tunnel.c
> > +++ b/ofproto/tunnel.c
> > @@ -362,12 +362,13 @@ tnl_wc_init(struct flow *flow, struct
> > flow_wildcards *wc) {
> > if (tnl_port_should_receive(flow)) {
> > wc->masks.tunnel.tun_id = OVS_BE64_MAX;
> > - if (flow->tunnel.ip_dst) {
> > + if (flow->tunnel.protocol == FLOW_TUNNEL_IPV4) {
> > wc->masks.tunnel.ip_src = OVS_BE32_MAX;
> > - wc->masks.tunnel.ip_dst = OVS_BE32_MAX;
> > - } else {
> > + set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX);
> > + }
> > + else if(flow->tunnel.protocol == FLOW_TUNNEL_IPV6) {
> > wc->masks.tunnel.ipv6_src = in6addr_exact;
> > - wc->masks.tunnel.ipv6_dst = in6addr_exact;
> > + set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact);
> > }
> > wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT |
> > FLOW_TNL_F_CSUM | @@ -424,7 +425,10
> > @@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
> > if (!cfg->ip_dst_flow) {
> > flow->tunnel.ip_dst = in6_addr_get_mapped_ipv4(&tnl_port-
> >match.ipv6_dst);
> > if (!flow->tunnel.ip_dst) {
> > - flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst;
> > + set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst);
> > + }
> > + else {
> > + set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst);
> > }
> > }
> > flow->pkt_mark = tnl_port->match.pkt_mark;
> > --
> > 1.9.1
> >
>
> Regards.
> Cascardo.
More information about the dev
mailing list