[ovs-dev] [PATCH ovn v2 1/2] ovn-northd: Don't send the pkt to conntrack if it is to be routed in egress stage.
Dumitru Ceara
dceara at redhat.com
Thu Aug 6 18:29:40 UTC 2020
On 8/4/20 9:19 AM, numans at ovn.org wrote:
> From: Numan Siddique <numans at ovn.org>
>
> If there is a logical port 'P1' with the IP - 10.0.0.3 and a logical port 'P2' with
> the IP 20.0.0.3 and if the logical switch of 'P1' has atleast one load balancer
> associated with it and atleast one ACL with allow-related action associated with it.
> Then for every packet from 'P1' to 'P2' after the TCP connection
> is established we see a total of 4 recirculations in the datapath on the chassis
> claiming 'P1'. This is because,
>
> In the ingress logical switch pipeline, below logical flows are hit
> - table=9 (ls_in_lb ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;)
> - table=10(ls_in_stateful ), priority=100 , match=(reg0[2] == 1), action=(ct_lb;)
>
> And in the egress logical switch pipeline, below logical flows are hit
> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[0] = 1; next;)
> - table=2 (ls_out_pre_stateful), priority=100 , match=(reg0[0] == 1), action=(ct_next;)
> - table=3 (ls_out_lb ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;)
> - table=7 (ls_out_stateful ), priority=100 , match=(reg0[2] == 1), action=(ct_lb;)
>
> In the above example, when the packet enters the egress pipeline and since it needs to
> enter the router pipeline, we can skip setting reg0[0] if outport is peer port of
> logical router port. There is no need to send the packet to conntrack in this case.
>
> This patch handles this case for router ports. Next patch in the series avoids sending to
> conntrack with the action - ct_lb if the packet is not destined to the LB VIP.
>
> With the present master for the above example, we see total of 4 recirculations on the
> chassis claiming the lport 'P1'. With this patch we see only 2 recirculations.
>
> Signed-off-by: Numan Siddique <numans at ovn.org>
Hi Numan,
Looks good to me:
Acked-by: Dumitru Ceara <dceara at redhat.com>
Thanks,
Dumitru
> ---
>
> v1 -> v2
> ----
> * No change.
>
> northd/ovn-northd.8.xml | 33 ++++++++++++++++++++++++++++++++-
> northd/ovn-northd.c | 39 ++++++++++++++++++++++++++++++---------
> 2 files changed, 62 insertions(+), 10 deletions(-)
>
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index ed1cd58e70..b741f49347 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -366,6 +366,15 @@
> db="OVN_Northbound"/> table.
> </p>
>
> + <p>
> + This table also has a priority-110 flow with the match
> + <code>inport == <var>I</var></code> for all logical switch
> + datapaths to move traffic to the next table. Where <var>I</var>
> + is the peer of a logical router port. This flow is added to
> + skip the connection tracking of packets which enter from
> + logical router datapath to logical switch datapath.
> + </p>
> +
> <h3>Ingress Table 5: Pre-stateful</h3>
>
> <p>
> @@ -533,7 +542,20 @@
>
> <p>
> It contains a priority-0 flow that simply moves traffic to the next
> - table. For established connections a priority 100 flow matches on
> + table.
> + </p>
> +
> + <p>
> + A priority-65535 flow with the match
> + <code>inport == <var>I</var></code> for all logical switch
> + datapaths to move traffic to the next table. Where <var>I</var>
> + is the peer of a logical router port. This flow is added to
> + skip the connection tracking of packets which enter from
> + logical router datapath to logical switch datapath.
> + </p>
> +
> + <p>
> + For established connections a priority 65534 flow matches on
> <code>ct.est && !ct.rel && !ct.new &&
> !ct.inv</code> and sets an action <code>reg0[2] = 1; next;</code> to act
> as a hint for table <code>Stateful</code> to send packets through
> @@ -1359,6 +1381,15 @@ output;
> db="OVN_Northbound"/> table.
> </p>
>
> + <p>
> + This table also has a priority-110 flow with the match
> + <code>outport == <var>I</var></code> for all logical switch
> + datapaths to move traffic to the next table. Where <var>I</var>
> + is the peer of a logical router port. This flow is added to
> + skip the connection tracking of packets which will be entering
> + logical router datapath from logical switch datapath for routing.
> + </p>
> +
> <h3>Egress Table 2: Pre-stateful</h3>
>
> <p>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 03c62bafaa..c7b1239adf 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -4850,8 +4850,9 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths,
> }
>
> static void
> -build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op,
> - struct hmap *lflows)
> +skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op,
> + enum ovn_stage in_stage, enum ovn_stage out_stage,
> + uint16_t priority, struct hmap *lflows)
> {
> /* Can't use ct() for router ports. Consider the following configuration:
> * lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
> @@ -4867,10 +4868,10 @@ build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op,
>
> ds_put_format(&match_in, "ip && inport == %s", op->json_key);
> ds_put_format(&match_out, "ip && outport == %s", op->json_key);
> - ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
> + ovn_lflow_add_with_hint(lflows, od, in_stage, priority,
> ds_cstr(&match_in), "next;",
> &op->nbsp->header_);
> - ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
> + ovn_lflow_add_with_hint(lflows, od, out_stage, priority,
> ds_cstr(&match_out), "next;",
> &op->nbsp->header_);
>
> @@ -4903,10 +4904,14 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
> * defragmentation, in order to match L4 headers. */
> if (has_stateful) {
> for (size_t i = 0; i < od->n_router_ports; i++) {
> - build_pre_acl_flows(od, od->router_ports[i], lflows);
> + skip_port_from_conntrack(od, od->router_ports[i],
> + S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
> + 110, lflows);
> }
> for (size_t i = 0; i < od->n_localnet_ports; i++) {
> - build_pre_acl_flows(od, od->localnet_ports[i], lflows);
> + skip_port_from_conntrack(od, od->localnet_ports[i],
> + S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
> + 110, lflows);
> }
>
> /* Ingress and Egress Pre-ACL Table (Priority 110).
> @@ -5050,6 +5055,17 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
> ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
> ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
>
> + for (size_t i = 0; i < od->n_router_ports; i++) {
> + skip_port_from_conntrack(od, od->router_ports[i],
> + S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
> + 110, lflows);
> + }
> + for (size_t i = 0; i < od->n_localnet_ports; i++) {
> + skip_port_from_conntrack(od, od->localnet_ports[i],
> + S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
> + 110, lflows);
> + }
> +
> struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
> struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
> bool vip_configured = false;
> @@ -5725,13 +5741,18 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows)
> ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
>
> if (od->nbs->load_balancer) {
> - /* Ingress and Egress LB Table (Priority 65535).
> + for (size_t i = 0; i < od->n_router_ports; i++) {
> + skip_port_from_conntrack(od, od->router_ports[i],
> + S_SWITCH_IN_LB, S_SWITCH_OUT_LB,
> + UINT16_MAX, lflows);
> + }
> + /* Ingress and Egress LB Table (Priority 65534).
> *
> * Send established traffic through conntrack for just NAT. */
> - ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
> + ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX - 1,
> "ct.est && !ct.rel && !ct.new && !ct.inv",
> REGBIT_CONNTRACK_NAT" = 1; next;");
> - ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
> + ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX - 1,
> "ct.est && !ct.rel && !ct.new && !ct.inv",
> REGBIT_CONNTRACK_NAT" = 1; next;");
> }
>
More information about the dev
mailing list