[ovs-dev] [PATCH] ofproto-dpif-xlate: native tunnel using valid tun_src specified by flow or port options
Ben Pfaff
blp at ovn.org
Fri Apr 13 21:21:17 UTC 2018
OK. I think I understand the patch now. It makes OVS native tunneling
honor tunnel-specified source addresses, in the same way that Linux
kernel tunneling honors them.
I see one potential problem: it makes ovs-router.c #include
<linux/rtnetlink.h>. This is a problem because ovs-router.c is used on
Windows and BSD as well as on Linux, and those systems do not have
rtnetlink.h or RTN_LOCAL. Would you please change the patch to avoid
this problem?
Thank you for the contribution. It will make OVS native tunneling
better.
On Sat, Feb 03, 2018 at 08:47:41AM +0800, wenxu wrote:
> Hi ben,
>
>
> This patch can be a bugfix.
> The tunnel_src of packet maybe not the IP address set by gre port options:local_ip
> dpdk-br has two address 10.1.1.7/24 and 10.1.1.254/32
> Interface gre options:local_ip="10.1.1.254"
> But the tunnel_src of tunnel packet is always 10.1.1.7
>
>
> Also It can be a new feature.
> In the same case the kernel space Open vSwitch can send packet with tunnel_src 10.1.1.254.
>
>
> Why we need this?
> In the High-availability cluster
> server1 with IP 10.1.1.7/24 and a virtual IP 10.1.1.254
> server2 with IP 10.1.1.8/24 and a virtual IP 10.1.1.254
>
>
> 10.1.1.7 and 10.1.1.8 running for the bgp and 10.1.1.254 provide the dataplane forward
>
>
>
>
> At 2018-02-03 06:30:51, "Ben Pfaff" <blp at ovn.org> wrote:
> >Thank you for submitting a patch to improve Open vSwitch. I do not yet
> >understand the purpose of this patch. Is it a bug fix or a new feature?
> >
> >Thanks,
> >
> >Ben.
> >
> >On Fri, Feb 02, 2018 at 02:08:52PM +0800, wenxu at ucloud.cn wrote:
> >> From: wenxu <wenxu at ucloud.cn>
> >>
> >> native tunnel build tunnel with tun_src only from the route src and
> >> not care about the options:local_ip.
> >> Sometimes an virtual IP using for tun_src
> >> dpdk-br:
> >> inet 10.1.1.7/24 brd 10.1.1.255 scope global dpdk-br
> >> inet 10.1.1.254/32 scope global dpdk-br
> >>
> >> Interface: gre options: {key=flow, local_ip="10.1.1.254", remote_ip=flow}
> >>
> >> the native tunnel always using 10.1.1.7 as the tunnel_src but not 10.1.1.254.
> >>
> >> This patch made valid tun_src specified by flow-action or gre port options
> >> can be used for tunnel_src of packet. It stores the rtm_type for each route
> >> and improve the priority RTN_LOCAL type(higher then userdef route).
> >> Like the kernel space when lookup the route, if there are tun_src specified
> >> by flow-action or port options. Check the tun_src wheather is a local
> >> address, then lookup the route.
> >>
> >> Signed-off-by: wenxu <wenxu at ucloud.cn>
> >> Signed-off-by: frank.zeng <frank.zeng at ucloud.cn>
> >> ---
> >> lib/ovs-router.c | 38 +++++++++++++++++++++++++++++++-------
> >> lib/ovs-router.h | 2 +-
> >> lib/route-table.c | 10 ++++++++--
> >> ofproto/ofproto-dpif-sflow.c | 2 +-
> >> ofproto/ofproto-dpif-xlate.c | 4 ++++
> >> 5 files changed, 45 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/lib/ovs-router.c b/lib/ovs-router.c
> >> index 0f1103b..e1375a3 100644
> >> --- a/lib/ovs-router.c
> >> +++ b/lib/ovs-router.c
> >> @@ -29,6 +29,7 @@
> >> #include <stdlib.h>
> >> #include <string.h>
> >> #include <unistd.h>
> >> +#include <linux/rtnetlink.h>
> >>
> >> #include "classifier.h"
> >> #include "command-line.h"
> >> @@ -61,6 +62,7 @@ struct ovs_router_entry {
> >> struct in6_addr nw_addr;
> >> struct in6_addr src_addr;
> >> uint8_t plen;
> >> + uint8_t rtm_type;
> >> uint8_t priority;
> >> uint32_t mark;
> >> };
> >> @@ -97,13 +99,28 @@ ovs_router_lookup(uint32_t mark, const struct in6_addr *ip6_dst,
> >> const struct cls_rule *cr;
> >> struct flow flow = {.ipv6_dst = *ip6_dst, .pkt_mark = mark};
> >>
> >> + if (src && ipv6_addr_is_set(src)) {
> >> + const struct cls_rule *cr_src;
> >> + struct flow flow_src = {.ipv6_dst = *src, .pkt_mark = mark};
> >> +
> >> + cr_src = classifier_lookup(&cls, OVS_VERSION_MAX, &flow_src, NULL);
> >> + if (cr_src) {
> >> + struct ovs_router_entry *p_src = ovs_router_entry_cast(cr_src);
> >> + if (p_src->rtm_type != RTN_LOCAL) {
> >> + return false;
> >> + }
> >> + } else {
> >> + return false;
> >> + }
> >> + }
> >> +
> >> cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL);
> >> if (cr) {
> >> struct ovs_router_entry *p = ovs_router_entry_cast(cr);
> >>
> >> ovs_strlcpy(output_bridge, p->output_bridge, IFNAMSIZ);
> >> *gw = p->gw;
> >> - if (src) {
> >> + if (src && !ipv6_addr_is_set(src)) {
> >> *src = p->src_addr;
> >> }
> >> return true;
> >> @@ -184,7 +201,7 @@ out:
> >> }
> >>
> >> static int
> >> -ovs_router_insert__(uint32_t mark, uint8_t priority,
> >> +ovs_router_insert__(uint32_t mark, uint8_t priority, uint8_t rtm_type,
> >> const struct in6_addr *ip6_dst,
> >> uint8_t plen, const char output_bridge[],
> >> const struct in6_addr *gw)
> >> @@ -204,6 +221,7 @@ ovs_router_insert__(uint32_t mark, uint8_t priority,
> >> p->mark = mark;
> >> p->nw_addr = match.flow.ipv6_dst;
> >> p->plen = plen;
> >> + p->rtm_type = rtm_type;
> >> p->priority = priority;
> >> err = get_src_addr(ip6_dst, output_bridge, &p->src_addr);
> >> if (err && ipv6_addr_is_set(gw)) {
> >> @@ -236,9 +254,10 @@ ovs_router_insert__(uint32_t mark, uint8_t priority,
> >>
> >> void
> >> ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst, uint8_t plen,
> >> - const char output_bridge[], const struct in6_addr *gw)
> >> + uint8_t rtm_type, const char output_bridge[],
> >> + const struct in6_addr *gw)
> >> {
> >> - ovs_router_insert__(mark, plen, ip_dst, plen, output_bridge, gw);
> >> + ovs_router_insert__(mark, plen, rtm_type, ip_dst, plen, output_bridge, gw);
> >> }
> >>
> >> static void
> >> @@ -345,7 +364,7 @@ ovs_router_add(struct unixctl_conn *conn, int argc,
> >> }
> >> }
> >>
> >> - err = ovs_router_insert__(mark, plen + 32, &ip6, plen, argv[2], &gw6);
> >> + err = ovs_router_insert__(mark, plen + 32, RTN_UNICAST, &ip6, plen, argv[2], &gw6);
> >> if (err) {
> >> unixctl_command_reply_error(conn, "Error while inserting route.");
> >> } else {
> >> @@ -402,7 +421,12 @@ ovs_router_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
> >> ipv6_format_mapped(&rt->nw_addr, &ds);
> >> plen = rt->plen;
> >> if (IN6_IS_ADDR_V4MAPPED(&rt->nw_addr)) {
> >> - plen -= 96;
> >> + uint8_t plen_off = 96;
> >> +
> >> + if (rt->rtm_type == RTN_LOCAL) {
> >> + plen_off += 64;
> >> + }
> >> + plen -= plen_off;
> >> }
> >> ds_put_format(&ds, "/%"PRIu8, plen);
> >> if (rt->mark) {
> >> @@ -426,7 +450,7 @@ static void
> >> ovs_router_lookup_cmd(struct unixctl_conn *conn, int argc,
> >> const char *argv[], void *aux OVS_UNUSED)
> >> {
> >> - struct in6_addr gw, src;
> >> + struct in6_addr gw, src = in6addr_any;
> >> char iface[IFNAMSIZ];
> >> struct in6_addr ip6;
> >> unsigned int plen;
> >> diff --git a/lib/ovs-router.h b/lib/ovs-router.h
> >> index b55b1a5..f41771b 100644
> >> --- a/lib/ovs-router.h
> >> +++ b/lib/ovs-router.h
> >> @@ -31,7 +31,7 @@ bool ovs_router_lookup(uint32_t mark, const struct in6_addr *ip_dst,
> >> struct in6_addr *src, struct in6_addr *gw);
> >> void ovs_router_init(void);
> >> void ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst,
> >> - uint8_t plen,
> >> + uint8_t plen, uint8_t rtm_type,
> >> const char output_bridge[], const struct in6_addr *gw);
> >> void ovs_router_flush(void);
> >> #ifdef __cplusplus
> >> diff --git a/lib/route-table.c b/lib/route-table.c
> >> index 97a0be5..e75d24e 100644
> >> --- a/lib/route-table.c
> >> +++ b/lib/route-table.c
> >> @@ -47,6 +47,7 @@ VLOG_DEFINE_THIS_MODULE(route_table);
> >> struct route_data {
> >> /* Copied from struct rtmsg. */
> >> unsigned char rtm_dst_len;
> >> + unsigned char rtm_type;
> >>
> >> /* Extracted from Netlink attributes. */
> >> struct in6_addr rta_dst; /* 0 if missing. */
> >> @@ -228,6 +229,7 @@ route_table_parse(struct ofpbuf *buf, struct route_table_msg *change)
> >> if (parsed) {
> >> const struct nlmsghdr *nlmsg;
> >> int rta_oif; /* Output interface index. */
> >> + uint8_t dst_len_off = 96;
> >>
> >> nlmsg = buf->data;
> >>
> >> @@ -243,7 +245,11 @@ route_table_parse(struct ofpbuf *buf, struct route_table_msg *change)
> >> change->relevant = false;
> >> }
> >> change->nlmsg_type = nlmsg->nlmsg_type;
> >> - change->rd.rtm_dst_len = rtm->rtm_dst_len + (ipv4 ? 96 : 0);
> >> + if (rtm->rtm_type == RTN_LOCAL) {
> >> + dst_len_off += 64;
> >> + }
> >> + change->rd.rtm_dst_len = rtm->rtm_dst_len + (ipv4 ? dst_len_off : 0);
> >> + change->rd.rtm_type = rtm->rtm_type;
> >> if (attrs[RTA_OIF]) {
> >> rta_oif = nl_attr_get_u32(attrs[RTA_OIF]);
> >>
> >> @@ -306,7 +312,7 @@ route_table_handle_msg(const struct route_table_msg *change)
> >> const struct route_data *rd = &change->rd;
> >>
> >> ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len,
> >> - rd->ifname, &rd->rta_gw);
> >> + rd->rtm_type, rd->ifname, &rd->rta_gw);
> >> }
> >> }
> >>
> >> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> >> index 60e1b4e..0a79623 100644
> >> --- a/ofproto/ofproto-dpif-sflow.c
> >> +++ b/ofproto/ofproto-dpif-sflow.c
> >> @@ -467,7 +467,7 @@ sflow_choose_agent_address(const char *agent_device,
> >>
> >> if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &sa.ss)
> >> && sa.ss.ss_family == AF_INET) {
> >> - struct in6_addr addr6, src, gw;
> >> + struct in6_addr addr6, gw, src = in6addr_any;
> >>
> >> in6_addr_set_mapped_ipv4(&addr6, sa.sin.sin_addr.s_addr);
> >> /* sFlow only supports target in default routing table with
> >> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> >> index cc450a8..4e62dab 100644
> >> --- a/ofproto/ofproto-dpif-xlate.c
> >> +++ b/ofproto/ofproto-dpif-xlate.c
> >> @@ -3319,6 +3319,10 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
> >> memcpy(&old_base_flow, &ctx->base_flow, sizeof old_base_flow);
> >> memcpy(&old_flow, &ctx->xin->flow, sizeof old_flow);
> >>
> >> + if (flow->tunnel.ip_src) {
> >> + in6_addr_set_mapped_ipv4(&s_ip6, flow->tunnel.ip_src);
> >> + }
> >> +
> >> err = tnl_route_lookup_flow(ctx, flow, &d_ip6, &s_ip6, &out_dev);
> >> if (err) {
> >> xlate_report(ctx, OFT_WARN, "native tunnel routing failed");
> >> --
> >> 1.7.1
> >>
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev at openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
More information about the dev
mailing list