[ovs-dev] [PATCH 3/3] ovs-router: introduce pkt-mark.

Jarno Rajahalme jarno at ovn.org
Mon Jan 23 17:04:52 UTC 2017


> On Dec 28, 2016, at 1:44 AM, Pravin B Shelar <pshelar at ovn.org> wrote:
> 
> OVS router is basically partial copy of linux kernel FIB.
> kernel routing table use skb-mark along with usual routing

“use”->”uses”

Could you elaborate a bit here, is the kernel doing a simple exact match of the whole mark? Is the value 0 special in any way?

> parameters. Following patch brings in support for skb-mark
> to ovs-router so that we can lookup route according to
> flow skb-mark.
> 

So by default we use the flow’s skb_mark, but if the ‘egress_pkt_mark’ is set, then we use that?

> Signed-off-by: Pravin B Shelar <pshelar at ovn.org>
> ---
> lib/netdev-vport.c           |   4 +-
> lib/ovs-router.c             | 192 ++++++++++++++++++++++++++++++++++---------
> lib/ovs-router.h             |   6 +-
> lib/route-table.c            |   2 +-
> ofproto/ofproto-dpif-sflow.c |   2 +-
> ofproto/ofproto-dpif-xlate.c |   2 +-
> tests/ovs-router.at          |  22 +++++
> tests/tunnel-push-pop.at     |   3 +
> 8 files changed, 188 insertions(+), 45 deletions(-)
> 
> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> index 25dd0e8..54db559 100644
> --- a/lib/netdev-vport.c
> +++ b/lib/netdev-vport.c
> @@ -259,10 +259,12 @@ tunnel_check_status_change__(struct netdev_vport *netdev)
>     bool status = false;
>     struct in6_addr *route;
>     struct in6_addr gw;
> +    uint32_t mark;
> 
>     iface[0] = '\0';
>     route = &netdev->tnl_cfg.ipv6_dst;
> -    if (ovs_router_lookup(route, iface, NULL, &gw)) {
> +    mark = netdev->tnl_cfg.egress_pkt_mark;
> +    if (ovs_router_lookup(mark, route, iface, NULL, &gw)) {
>         struct netdev *egress_netdev;
> 
>         if (!netdev_open(iface, NULL, &egress_netdev)) {
> diff --git a/lib/ovs-router.c b/lib/ovs-router.c
> index 935b60a..7300b36 100644
> --- a/lib/ovs-router.c
> +++ b/lib/ovs-router.c
> @@ -45,6 +45,11 @@
> #include "unaligned.h"
> #include "unixctl.h"
> #include "util.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(ovs_route);
> +

This should be ‘ovs_router’ instead.

> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> 
> static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
> static struct classifier cls;
> @@ -57,6 +62,7 @@ struct ovs_router_entry {
>     struct in6_addr src_addr;
>     uint8_t plen;
>     uint8_t priority;
> +    uint32_t mark;
> };
> 
> static struct ovs_router_entry *
> @@ -88,11 +94,12 @@ ovs_router_lookup_fallback(const struct in6_addr *ip6_dst, char output_bridge[],
> }
> 
> bool
> -ovs_router_lookup(const struct in6_addr *ip6_dst, char output_bridge[],
> +ovs_router_lookup(uint32_t mark, const struct in6_addr *ip6_dst,
> +                  char output_bridge[],
>                   struct in6_addr *src, struct in6_addr *gw)
> {
>     const struct cls_rule *cr;
> -    struct flow flow = {.ipv6_dst = *ip6_dst};
> +    struct flow flow = {.ipv6_dst = *ip6_dst, .pkt_mark = mark};
> 
>     cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL);
>     if (cr) {
> @@ -115,7 +122,8 @@ rt_entry_free(struct ovs_router_entry *p)
>     free(p);
> }
> 
> -static void rt_init_match(struct match *match, const struct in6_addr *ip6_dst,
> +static void rt_init_match(struct match *match, uint32_t mark,
> +                          const struct in6_addr *ip6_dst,
>                           uint8_t plen)
> {
>     struct in6_addr dst;
> @@ -127,6 +135,8 @@ static void rt_init_match(struct match *match, const struct in6_addr *ip6_dst,
>     memset(match, 0, sizeof *match);
>     match->flow.ipv6_dst = dst;
>     match->wc.masks.ipv6_dst = mask;
> +    match->wc.masks.pkt_mark = UINT32_MAX;
> +    match->flow.pkt_mark = mark;

So by default, when the route entry does not specify a mark, an exact match on zero is made?

> }
> 
> static int
> @@ -178,7 +188,8 @@ out:
> }
> 
> static int
> -ovs_router_insert__(uint8_t priority, const struct in6_addr *ip6_dst,
> +ovs_router_insert__(uint32_t mark, uint8_t priority,
> +                    const struct in6_addr *ip6_dst,
>                     uint8_t plen, const char output_bridge[],
>                     const struct in6_addr *gw)
> {
> @@ -187,13 +198,14 @@ ovs_router_insert__(uint8_t priority, const struct in6_addr *ip6_dst,
>     struct match match;
>     int err;
> 
> -    rt_init_match(&match, ip6_dst, plen);
> +    rt_init_match(&match, mark, ip6_dst, plen);
> 
>     p = xzalloc(sizeof *p);
>     ovs_strlcpy(p->output_bridge, output_bridge, sizeof p->output_bridge);
>     if (ipv6_addr_is_set(gw)) {
>         p->gw = *gw;
>     }
> +    p->mark = mark;
>     p->nw_addr = match.flow.ipv6_dst;
>     p->plen = plen;
>     p->priority = priority;
> @@ -202,7 +214,12 @@ ovs_router_insert__(uint8_t priority, const struct in6_addr *ip6_dst,
>         err = get_src_addr(gw, output_bridge, &p->src_addr);
>     }
>     if (err) {
> +        struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +        ipv6_format_mapped(ip6_dst, &ds);
> +        VLOG_DBG_RL(&rl, "src addr not available for route %s", ds_cstr(&ds));
>         free(p);
> +        ds_destroy(&ds);
>         return err;
>     }
>     /* Longest prefix matches first. */
> @@ -222,13 +239,12 @@ ovs_router_insert__(uint8_t priority, const struct in6_addr *ip6_dst,
> }
> 
> void
> -ovs_router_insert(const struct in6_addr *ip_dst, uint8_t plen,
> +ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst, uint8_t plen,
>                   const char output_bridge[], const struct in6_addr *gw)
> {
> -    ovs_router_insert__(plen, ip_dst, plen, output_bridge, gw);
> +    ovs_router_insert__(mark, plen, ip_dst, plen, output_bridge, gw);
> }
> 
> -
> static bool
> __rt_entry_delete(const struct cls_rule *cr)
> {
> @@ -245,14 +261,15 @@ __rt_entry_delete(const struct cls_rule *cr)
> }
> 
> static bool
> -rt_entry_delete(uint8_t priority, const struct in6_addr *ip6_dst, uint8_t plen)
> +rt_entry_delete(uint32_t mark, uint8_t priority,
> +                const struct in6_addr *ip6_dst, uint8_t plen)
> {
>     const struct cls_rule *cr;
>     struct cls_rule rule;
>     struct match match;
>     bool res = false;
> 
> -    rt_init_match(&match, ip6_dst, plen);
> +    rt_init_match(&match, mark, ip6_dst, plen);
> 
>     cls_rule_init(&rule, &match, priority);
> 
> @@ -288,36 +305,118 @@ scan_ipv4_route(const char *s, ovs_be32 *addr, unsigned int *plen)
>     return true;
> }
> 
> +static bool
> +parse_ip_addr(const char *arg, struct in6_addr *ip6, unsigned int *plen)
> +{
> +    ovs_be32 ip;
> +
> +    if (scan_ipv4_route(arg, &ip, plen)) {
> +        in6_addr_set_mapped_ipv4(ip6, ip);
> +        *plen += 96;
> +        return true;
> +    } else if (scan_ipv6_route(arg, ip6, plen)) {
> +        return true;
> +    }
> +    return false;
> +}
> +
> +static void
> +ovs_router2_add(struct unixctl_conn *conn, int argc,
> +                const char *argv[], void *aux OVS_UNUSED)
> +{
> +    struct in6_addr gw6 = in6addr_any;
> +    uint32_t plen = 0, mark = 0;
> +    const char *dev = NULL;
> +    bool req_arg = false;
> +    struct in6_addr ip6;
> +    int err;
> +    int i;
> +
> +    for (i = 1; i < argc; i++) {
> +        const char *arg = argv[i];
> +        unsigned int n;
> +
> +        if (ovs_scan(arg, "subnet=%n", &n)) {
> +            if (req_arg) {
> +                unixctl_command_reply_error(conn, "Multiple subnet not supported");
> +                return;
> +            }
> +            if (parse_ip_addr(&arg[n], &ip6, &plen)) {
> +                req_arg = true;
> +            } else {
> +                unixctl_command_reply_error(conn, "Invalid subnet");
> +                return;
> +            }
> +        } else if (ovs_scan(arg, "dev=%n", &n)) {
> +            dev = &arg[n];
> +        } else if (ovs_scan(arg, "gw=%n", &n)) {
> +            unsigned int plen_gw;
> +
> +            if (memcmp(&gw6, &in6addr_any, sizeof (gw6))) {
> +                unixctl_command_reply_error(conn, "Multiple Gateway not supported");
> +                return;
> +            }
> +            if (parse_ip_addr(&arg[n], &gw6, &plen_gw)) {
> +                req_arg = true;
> +                if (plen_gw != 128) {
> +                    unixctl_command_reply_error(conn, "gw mask is invalid");
> +                }
> +            } else {
> +                unixctl_command_reply_error(conn, "Invalid gateway");
> +                return;
> +            }
> +        } else if (ovs_scan(arg, "pkt_mark=%n", &n)) {
> +            if (mark) {
> +                unixctl_command_reply_error(conn, "multiple mark not supported");
> +                return;
> +            }
> +            ovs_scan(&arg[n], "%"SCNi32, &mark);
> +        } else {
> +            unixctl_command_reply_error(conn, "Invalid parameter");
> +            return;
> +        }
> +    }
> +
> +    if (!req_arg) {
> +        unixctl_command_reply_error(conn, "subnet is required parameter.");
> +        return;
> +    }
> +    if (!dev) {
> +        unixctl_command_reply_error(conn, "egress dev is required parameter.");
> +        return;
> +    }
> +
> +    err = ovs_router_insert__(mark, plen + 32, &ip6, plen, dev, &gw6);
> +    if (err) {
> +        unixctl_command_reply_error(conn, "Error while inserting route.");
> +    } else {
> +        unixctl_command_reply(conn, "OK");
> +    }
> +    return;
> +}
> +
> static void
> ovs_router_add(struct unixctl_conn *conn, int argc,
> -              const char *argv[], void *aux OVS_UNUSED)
> +               const char *argv[], void *aux OVS_UNUSED)
> {
> -    ovs_be32 ip;
> -    unsigned int plen;
>     struct in6_addr ip6;
> -    struct in6_addr gw6;
> +    struct in6_addr gw6 = in6addr_any;
> +    uint32_t plen;

parse_ip_addr() expects unsigned int pointer for ‘plen’, so why change it’s type?

>     int err;
> 
> -    if (scan_ipv4_route(argv[1], &ip, &plen)) {
> -        ovs_be32 gw = 0;
> -        if (argc > 3 && !ip_parse(argv[3], &gw)) {
> +    if (parse_ip_addr(argv[1], &ip6, &plen)) {
> +        uint32_t plen_gw;

same

> +
> +        if (argc > 3 && !parse_ip_addr(argv[3], &gw6, &plen_gw)) {
>             unixctl_command_reply_error(conn, "Invalid gateway");
>             return;
>         }

What if the subnet is IPv6 and gw is IPv4 or vice versa? Will the refactored code catch that error case?

> -        in6_addr_set_mapped_ipv4(&ip6, ip);
> -        in6_addr_set_mapped_ipv4(&gw6, gw);
> -        plen += 96;
> -    } else if (scan_ipv6_route(argv[1], &ip6, &plen)) {
> -        gw6 = in6addr_any;
> -        if (argc > 3 && !ipv6_parse(argv[3], &gw6)) {
> -            unixctl_command_reply_error(conn, "Invalid IPv6 gateway");
> -            return;
> -        }
>     } else {
> -        unixctl_command_reply_error(conn, "Invalid parameters");
> +        unixctl_command_reply_error(conn, "Error while parsing subnet");
>         return;
>     }
> -    err = ovs_router_insert__(plen + 32, &ip6, plen, argv[2], &gw6);
> +
> +    err = ovs_router_insert__(0, plen + 32, &ip6, plen, argv[2], &gw6);
>     if (err) {
>         unixctl_command_reply_error(conn, "Error while inserting route.");
>     } else {
> @@ -330,7 +429,7 @@ ovs_router_del(struct unixctl_conn *conn, int argc OVS_UNUSED,
>               const char *argv[], void *aux OVS_UNUSED)
> {
>     ovs_be32 ip;
> -    unsigned int plen;
> +    uint32_t plen, mark = 0;

Again, I’d rather keep ‘plen’ as ‘unsigned int’.

>     struct in6_addr ip6;
> 
>     if (scan_ipv4_route(argv[1], &ip, &plen)) {
> @@ -340,7 +439,11 @@ ovs_router_del(struct unixctl_conn *conn, int argc OVS_UNUSED,
>         unixctl_command_reply_error(conn, "Invalid parameters");
>         return;
>     }
> -    if (rt_entry_delete(plen + 32, &ip6, plen)) {
> +    if (argc > 2) {
> +        ovs_scan(argv[2], "%"SCNi32, &mark);
> +    }
> +
> +    if (rt_entry_delete(mark, plen + 32, &ip6, plen)) {
>         unixctl_command_reply(conn, "OK");
>         seq_change(tnl_conf_seq);
>     } else {
> @@ -368,7 +471,12 @@ ovs_router_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
>         if (IN6_IS_ADDR_V4MAPPED(&rt->nw_addr)) {
>             plen -= 96;
>         }
> -        ds_put_format(&ds, "/%"PRIu16" dev %s", plen, rt->output_bridge);
> +        ds_put_format(&ds, "/%"PRIu16, plen);
> +        if (rt->mark) {
> +            ds_put_format(&ds, " MARK %"PRIu32, rt->mark);
> +        }

Again, I think we should document somewhere that non-shown mark means matching on mark value 0.


> +
> +        ds_put_format(&ds, " dev %s", rt->output_bridge);
>         if (ipv6_addr_is_set(&rt->gw)) {
>             ds_put_format(&ds, " GW ");
>             ipv6_format_mapped(&rt->gw, &ds);
> @@ -382,12 +490,12 @@ ovs_router_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
> }
> 
> static void
> -ovs_router_lookup_cmd(struct unixctl_conn *conn, int argc OVS_UNUSED,
> +ovs_router_lookup_cmd(struct unixctl_conn *conn, int argc,
>                       const char *argv[], void *aux OVS_UNUSED)
> {
>     ovs_be32 ip;
>     struct in6_addr ip6;
> -    unsigned int plen;
> +    uint32_t plen, mark = 0;

ditto.

>     char iface[IFNAMSIZ];
>     struct in6_addr gw, src;
> 
> @@ -397,9 +505,12 @@ ovs_router_lookup_cmd(struct unixctl_conn *conn, int argc OVS_UNUSED,
>         unixctl_command_reply_error(conn, "Invalid parameters");
>         return;
>     }
> -
> -    if (ovs_router_lookup(&ip6, iface, &src, &gw)) {
> +    if (argc > 2) {
> +        ovs_scan(argv[2], "%"SCNi32, &mark);
> +    }
> +    if (ovs_router_lookup(mark, &ip6, iface, &src, &gw)) {
>         struct ds ds = DS_EMPTY_INITIALIZER;
> +
>         ds_put_format(&ds, "src ");
>         ipv6_format_mapped(&src, &ds);
>         ds_put_format(&ds, "\ngateway ");
> @@ -434,11 +545,14 @@ void
> ovs_router_init(void)
> {
>     classifier_init(&cls, NULL);
> -    unixctl_command_register("ovs/route/add", "ip_addr/prefix_len out_br_name gw", 2, 3,
> +    unixctl_command_register("ovs/route/add", "ip_addr/prefix_len out_br_name gw mark", 2, 3,

There is one more optional parameter, so should the “3” be changed to “4”? There was no test case for the optional mark here, so this was not caught.

>                              ovs_router_add, NULL);
> +    unixctl_command_register("ovs/route2/add",
> +                             "subnet=ip_addr/prefix_len dev=out_br_name gw=ipaddr pkt_mark=mark",
> +                             2, 4, ovs_router2_add, NULL);

I’d rather not add a new command, but extend the syntax of the old one, I.e., In addition to old syntax, allow naming any of the parameters, and maybe support mark without a gateway also in the unnamed syntax (a mark should not parse as an IP address, so it is possible to figure out a mark was given without a gw). If the parameter includes ‘=‘ parse accordingly, otherwise parse positionally as before?

>     unixctl_command_register("ovs/route/show", "", 0, 0, ovs_router_show, NULL);
> -    unixctl_command_register("ovs/route/del", "ip_addr/prefix_len", 1, 1, ovs_router_del,
> -                             NULL);
> -    unixctl_command_register("ovs/route/lookup", "ip_addr", 1, 1,
> +    unixctl_command_register("ovs/route/del", "ip_addr/prefix_len mark", 1, 2,
> +                             ovs_router_del, NULL);
> +    unixctl_command_register("ovs/route/lookup", "ip_addr mark", 1, 2,
>                              ovs_router_lookup_cmd, NULL);

IMO the mark parameter should be handled consistently across all the commands, i.e., allow either “mark=<number>”, or “<number>” in all commands, or require one or the other in all of them.

> }
> diff --git a/lib/ovs-router.h b/lib/ovs-router.h
> index 9cb7509..29c7c5f 100644
> --- a/lib/ovs-router.h
> +++ b/lib/ovs-router.h
> @@ -25,10 +25,12 @@
> extern "C" {
> #endif
> 
> -bool ovs_router_lookup(const struct in6_addr *ip_dst, char out_dev[],
> +bool ovs_router_lookup(uint32_t mark, const struct in6_addr *ip_dst,
> +                       char out_dev[],
>                        struct in6_addr *src, struct in6_addr *gw);
> void ovs_router_init(void);
> -void ovs_router_insert(const struct in6_addr *ip_dst, uint8_t plen,
> +void ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst,
> +                       uint8_t plen,
>                        const char output_bridge[], const struct in6_addr *gw);
> void ovs_router_flush(void);
> #ifdef  __cplusplus
> diff --git a/lib/route-table.c b/lib/route-table.c
> index 61c8cd8..ae8af43 100644
> --- a/lib/route-table.c
> +++ b/lib/route-table.c
> @@ -298,7 +298,7 @@ route_table_handle_msg(const struct route_table_msg *change)
>     if (change->relevant && change->nlmsg_type == RTM_NEWROUTE) {
>         const struct route_data *rd = &change->rd;
> 
> -        ovs_router_insert(&rd->rta_dst, rd->rtm_dst_len,
> +        ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len,
>                           rd->ifname, &rd->rta_gw);
>     }
> }
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index 37992b4..d0ec1cd 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -449,7 +449,7 @@ sflow_choose_agent_address(const char *agent_device,
>             struct in6_addr addr6, src, gw;
> 
>             in6_addr_set_mapped_ipv4(&addr6, sa.sin.sin_addr.s_addr);
> -            if (ovs_router_lookup(&addr6, name, &src, &gw)) {
> +            if (ovs_router_lookup(0, &addr6, name, &src, &gw)) {
> 

Is it possible that the route we need here actually has a non-zero mark? Maybe add a comment either way?

>                 in4.s_addr = in6_addr_get_mapped_ipv4(&src);
>                 goto success;
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 2977be5..ad779c0 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -2715,7 +2715,7 @@ tnl_route_lookup_flow(const struct flow *oflow,
>     struct in6_addr dst;
> 
>     dst = flow_tnl_dst(&oflow->tunnel);
> -    if (!ovs_router_lookup(&dst, out_dev, src, &gw)) {
> +    if (!ovs_router_lookup(oflow->pkt_mark, &dst, out_dev, src, &gw)) {
>         return -ENOENT;
>     }
> 
> diff --git a/tests/ovs-router.at b/tests/ovs-router.at
> index 93a730a..9019e26 100644
> --- a/tests/ovs-router.at
> +++ b/tests/ovs-router.at
> @@ -20,10 +20,32 @@ AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 192.0.2.1/24], [0], [OK
> ])
> AT_CHECK([ovs-appctl ovs/route/add 198.51.100.0/24 br0 192.0.2.254], [0], [OK
> ])
> +AT_CHECK([ovs-appctl ovs/route2/add subnet=192.0.2.1/24 dev=br0 pkt_mark=123], [0], [OK
> +])
> +
> +AT_CHECK([ovs-appctl ovs/route2/add subnet=198.51.100.200/24 dev=br0 gw=192.0.2.250 pkt_mark=1234], [0], [OK
> +])
> +
> +AT_CHECK([ovs-appctl ovs/route/show | grep User | sort], [0], [User: 192.0.2.0/24 MARK 123 dev br0 SRC 192.0.2.1
> +User: 198.51.100.0/24 MARK 1234 dev br0 GW 192.0.2.250 SRC 192.0.2.1
> +User: 198.51.100.0/24 dev br0 GW 192.0.2.254 SRC 192.0.2.1
> +])
> +
> AT_CHECK([ovs-appctl ovs/route/lookup 198.51.100.1], [0], [src 192.0.2.1
> gateway 192.0.2.254
> dev br0
> ])
> +
> +AT_CHECK([ovs-appctl ovs/route/lookup 198.51.100.1 1234], [0], [src 192.0.2.1
> +gateway 192.0.2.250
> +dev br0
> +])
> +AT_CHECK([ovs-appctl ovs/route/del 198.51.100.0/24 1234], [0], [OK
> +])
> +AT_CHECK([ovs-appctl ovs/route/show | grep User | sort], [0], [User: 192.0.2.0/24 MARK 123 dev br0 SRC 192.0.2.1
> +User: 198.51.100.0/24 dev br0 GW 192.0.2.254 SRC 192.0.2.1
> +])
> +
> OVS_VSWITCHD_STOP
> AT_CLEANUP
> 
> diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
> index 4aaa669..60d5224 100644
> --- a/tests/tunnel-push-pop.at
> +++ b/tests/tunnel-push-pop.at
> @@ -40,6 +40,9 @@ AT_CHECK([ovs-appctl netdev-dummy/ip6addr br0 2001:cafe::88/24], [0], [OK
> AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK
> ])
> 
> +AT_CHECK([ovs-appctl ovs/route2/add subnet=1.1.2.92/24 pkt_mark=1234 dev=br0], [0], [OK
> +])
> +
> AT_CHECK([ovs-ofctl add-flow br0 action=normal])
> 
> dnl Check ARP request
> -- 
> 2.9.3
> 
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev



More information about the dev mailing list