[ovs-dev] [PATCH ovn v8 2/5] northd: make connected routes have higher priority than static
Vladislav Odintsov
odivlad at gmail.com
Wed Nov 17 14:23:41 UTC 2021
Two additions:
1. Regarding documentation for flow in lr_in_defrag section:
It seems to me that documentation for it is written in a wrong section (lr_in_defrag).
Since the flow is installed in lr_in_ip_routing, it should be documented there.
I’ll move it if you don’t mind.
2. Documentation for other flow changes was added, but I’ve committed it to a wrong patch (#3).
I’ll move documentation update between patches.
Regards,
Vladislav Odintsov
> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad at gmail.com> wrote:
>
> Hi Numan,
>
> Thanks for the review.
> Sure I will fix this. Should I wait for more comments or that’s all and I can send v9?
>
> Regards,
> Vladislav Odintsov
>
>> On 17 Nov 2021, at 05:17, Numan Siddique <numans at ovn.org> wrote:
>>
>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad at gmail.com <mailto:odivlad at gmail.com>> wrote:
>>>
>>> With this patch routes to connected networks have higher
>>> priority than static routes with same ip_prefix.
>>>
>>> This brings commonly-used behaviour for routes lookup order:
>>> 1: longest prefix match
>>> 2: metric
>>>
>>> The metric has next lookup order:
>>> 1: connected routes
>>> 2: static routes
>>>
>>> Earlier static and connected routes with same ip_prefix had
>>> the same priority, so it was impossible to predict which one
>>> is used for routing decision.
>>>
>>> Each route's prefix length has its own 'slot' in lflow prios.
>>> Now prefix length space is calculated using next information:
>>> to calculate route's priority prefixlen multiplied by 3
>>> + route origin offset (0 - source-based route; 1 - directly-
>>> connected route; 2 - static route).
>>>
>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
>>> by 10000.
>>>
>>> Signed-off-by: Vladislav Odintsov <odivlad at gmail.com>
>>
>> Hi Vladislav,
>>
>> Thanks for the patch. Overall it looks good to me. I've one comment.
>> Looks like the documentation updated in ovn-northd.8.xml is not accurate.
>>
>> This patch modifies the flows in the lr_in_ip_routing stage but this
>> patch doesn't update the documentation.
>> Also the patch updates the documentation for the flow in lr_in_defrag
>> stage, which seems not correct.
>>
>> Can you please update the documentation accurately ?
>>
>> Numan
>>
>>> ---
>>> northd/northd.c | 50 ++++++++++++++++++++++++++++-------------
>>> northd/ovn-northd.8.xml | 12 +++++-----
>>> tests/ovn-northd.at | 8 +++----
>>> 3 files changed, 45 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/northd/northd.c b/northd/northd.c
>>> index 1e8a3457c..0d513f039 100644
>>> --- a/northd/northd.c
>>> +++ b/northd/northd.c
>>> @@ -305,6 +305,15 @@ enum ovn_stage {
>>> *
>>> */
>>>
>>> +/*
>>> + * Route offsets implement logic to prioritize traffic for routes with
>>> + * same ip_prefix values:
>>> + * - connected route overrides static one;
>>> + * - static route overrides connected route. */
>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>> +
>>> /* Returns an "enum ovn_stage" built from the arguments. */
>>> static enum ovn_stage
>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>>> struct in6_addr prefix;
>>> unsigned int plen;
>>> bool is_src_route;
>>> + const char *origin;
>>> uint16_t route_count;
>>> struct ovs_list route_list; /* Contains ecmp_route_list_node */
>>> };
>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>>> eg->prefix = route->prefix;
>>> eg->plen = route->plen;
>>> eg->is_src_route = route->is_src_route;
>>> + eg->origin = smap_get_def(&route->route->options, "origin", "");
>>> ovs_list_init(&eg->route_list);
>>> ecmp_groups_add_route(eg, route);
>>>
>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
>>> static void
>>> build_route_match(const struct ovn_port *op_inport, const char *network_s,
>>> int plen, bool is_src_route, bool is_ipv4, struct ds *match,
>>> - uint16_t *priority)
>>> + uint16_t *priority, int ofs)
>>> {
>>> const char *dir;
>>> /* The priority here is calculated to implement longest-prefix-match
>>> * routing. */
>>> if (is_src_route) {
>>> dir = "src";
>>> - *priority = plen * 2;
>>> + ofs = 0;
>>> } else {
>>> dir = "dst";
>>> - *priority = (plen * 2) + 1;
>>> }
>>>
>>> + *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
>>> +
>>> if (op_inport) {
>>> ds_put_format(match, "inport == %s && ", op_inport->json_key);
>>> }
>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
>>> out_port->lrp_networks.ea_s,
>>> IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>>> port_ip, out_port->json_key);
>>> - ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
>>> + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
>>> ds_cstr(&match), ds_cstr(&actions),
>>> &st_route->header_);
>>>
>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>> struct ds route_match = DS_EMPTY_INITIALIZER;
>>>
>>> char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
>>> + int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
>>> + ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>>> build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
>>> - &route_match, &priority);
>>> + &route_match, &priority, ofs);
>>> free(prefix_s);
>>>
>>> struct ds actions = DS_EMPTY_INITIALIZER;
>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>> const struct ovn_port *op, const char *lrp_addr_s,
>>> const char *network_s, int plen, const char *gateway,
>>> bool is_src_route, const struct ovsdb_idl_row *stage_hint,
>>> - bool is_discard_route)
>>> + bool is_discard_route, int ofs)
>>> {
>>> bool is_ipv4 = strchr(network_s, '.') ? true : false;
>>> struct ds match = DS_EMPTY_INITIALIZER;
>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>> }
>>> }
>>> build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
>>> - &match, &priority);
>>> + &match, &priority, ofs);
>>>
>>> struct ds common_actions = DS_EMPTY_INITIALIZER;
>>> struct ds actions = DS_EMPTY_INITIALIZER;
>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>> }
>>> }
>>>
>>> + int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
>>> + ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
>>> + : ROUTE_PRIO_OFFSET_STATIC;
>>> +
>>> char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
>>> add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
>>> lrp_addr_s, prefix_s, route_->plen, route->nexthop,
>>> - route_->is_src_route, &route->header_, route_->is_discard_route);
>>> + route_->is_src_route, &route->header_, route_->is_discard_route,
>>> + ofs);
>>>
>>> free(prefix_s);
>>> }
>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>>> add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
>>> op->lrp_networks.ipv4_addrs[i].network_s,
>>> op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
>>> - &op->nbrp->header_, false);
>>> + &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>> }
>>>
>>> for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>> add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
>>> op->lrp_networks.ipv6_addrs[i].network_s,
>>> op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
>>> - &op->nbrp->header_, false);
>>> + &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>> }
>>> } else if (lsp_is_router(op->nbsp)) {
>>> struct ovn_port *peer = ovn_port_get_peer(ports, op);
>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>>> peer->lrp_networks.ipv4_addrs[0].addr_s,
>>> laddrs->ipv4_addrs[k].network_s,
>>> laddrs->ipv4_addrs[k].plen, NULL, false,
>>> - &peer->nbrp->header_, false);
>>> + &peer->nbrp->header_, false,
>>> + ROUTE_PRIO_OFFSET_CONNECTED);
>>> }
>>> }
>>> }
>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>>> /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>>> * i.e., router solicitation and router advertisement.
>>> */
>>> - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
>>> + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>>> "nd_rs || nd_ra", "drop;");
>>> if (!od->mcast_info.rtr.relay) {
>>> return;
>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>>> }
>>> ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
>>> igmp_group->mcgroup.name);
>>> - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
>>> + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
>>> ds_cstr(match), ds_cstr(actions));
>>> }
>>>
>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>>> * ports. Otherwise drop any multicast traffic.
>>> */
>>> if (od->mcast_info.rtr.flood_static) {
>>> - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>>> + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>> "ip4.mcast || ip6.mcast",
>>> "clone { "
>>> "outport = \""MC_STATIC"\"; "
>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>>> "next; "
>>> "};");
>>> } else {
>>> - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>>> + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>> "ip4.mcast || ip6.mcast", "drop;");
>>> }
>>> }
>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>>> index fb67395e3..4f3a9d5e3 100644
>>> --- a/northd/ovn-northd.8.xml
>>> +++ b/northd/ovn-northd.8.xml
>>> @@ -2945,12 +2945,12 @@ icmp6 {
>>>
>>> <p>
>>> If ECMP routes with symmetric reply are configured in the
>>> - <code>OVN_Northbound</code> database for a gateway router, a priority-300
>>> - flow is added for each router port on which symmetric replies are
>>> - configured. The matching logic for these ports essentially reverses the
>>> - configured logic of the ECMP route. So for instance, a route with a
>>> - destination routing policy will instead match if the source IP address
>>> - matches the static route's prefix. The flow uses the action
>>> + <code>OVN_Northbound</code> database for a gateway router, a
>>> + priority-10300 flow is added for each router port on which symmetric
>>> + replies are configured. The matching logic for these ports essentially
>>> + reverses the configured logic of the ECMP route. So for instance, a route
>>> + with a destination routing policy will instead match if the source IP
>>> + address matches the static route's prefix. The flow uses the action
>>> <code>ct_next</code> to send IP packets to the connection tracker for
>>> packet de-fragmentation and tracking before sending it to the next table.
>>> </p>
>>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
>>> index 85b47a18f..3c1a97f73 100644
>>> --- a/tests/ovn-northd.at
>>> +++ b/tests/ovn-northd.at
>>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>>
>>> ovn-sbctl dump-flows lr0 > lr0flows
>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>> - table=??(lr_in_ip_routing ), priority=65 , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>> + table=??(lr_in_ip_routing ), priority=97 , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>> ])
>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>> table=??(lr_in_ip_routing_ecmp), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>>
>>> ovn-sbctl dump-flows lr0 > lr0flows
>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>> - table=??(lr_in_ip_routing ), priority=65 , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>> + table=??(lr_in_ip_routing ), priority=97 , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>> ])
>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>> table=??(lr_in_ip_routing_ecmp), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>
>>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>> - table=??(lr_in_ip_routing ), priority=49 , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>> + table=??(lr_in_ip_routing ), priority=73 , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>> ])
>>>
>>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
>>>
>>> ovn-sbctl dump-flows lr0 > lr0flows
>>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>> - table=??(lr_in_ip_routing ), priority=49 , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>> + table=??(lr_in_ip_routing ), priority=73 , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>> ])
>>>
>>> AT_CLEANUP
>>> --
>>> 2.30.0
>>>
>>> _______________________________________________
>>> dev mailing list
>>> dev at openvswitch.org <mailto:dev at openvswitch.org>
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
More information about the dev
mailing list