[ovs-dev] [PATCH ovn v2 1/2] ovn-northd: Don't send the pkt to conntrack if it is to be routed in egress stage.

Dumitru Ceara dceara at redhat.com
Thu Aug 6 18:29:40 UTC 2020


On 8/4/20 9:19 AM, numans at ovn.org wrote:
> From: Numan Siddique <numans at ovn.org>
> 
> If there is a logical port 'P1' with the IP - 10.0.0.3 and a logical port 'P2' with
> the IP 20.0.0.3 and if the logical switch of 'P1' has atleast one load balancer
> associated with it and atleast one ACL with allow-related action associated with it.
> Then for every packet from 'P1' to 'P2' after the TCP connection
> is established we see a total of 4 recirculations in the datapath on the chassis
> claiming 'P1'. This is because,
> 
> In the ingress logical switch pipeline, below logical flows are hit
>   - table=9 (ls_in_lb           ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;)
>   - table=10(ls_in_stateful     ), priority=100  , match=(reg0[2] == 1), action=(ct_lb;)
> 
> And in the egress logical switch pipeline, below logical flows are hit
>  - table=0 (ls_out_pre_lb      ), priority=100  , match=(ip), action=(reg0[0] = 1; next;)
>  - table=2 (ls_out_pre_stateful), priority=100  , match=(reg0[0] == 1), action=(ct_next;)
>  - table=3 (ls_out_lb          ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;)
>  - table=7 (ls_out_stateful    ), priority=100  , match=(reg0[2] == 1), action=(ct_lb;)
> 
> In the above example, when the packet enters the egress pipeline and since it needs to
> enter the router pipeline, we can skip setting reg0[0] if outport is peer port of
> logical router port. There is no need to send the packet to conntrack in this case.
> 
> This patch handles this case for router ports. Next patch in the series avoids sending to
> conntrack with the action - ct_lb if the packet is not destined to the LB VIP.
> 
> With the present master for the above example, we see total of 4 recirculations on the
> chassis claiming the lport 'P1'. With this patch we see only 2 recirculations.
> 
> Signed-off-by: Numan Siddique <numans at ovn.org>

Hi Numan,

Looks good to me:

Acked-by: Dumitru Ceara <dceara at redhat.com>

Thanks,
Dumitru

> ---
> 
> v1 -> v2
> ----
>  * No change.
> 
>  northd/ovn-northd.8.xml | 33 ++++++++++++++++++++++++++++++++-
>  northd/ovn-northd.c     | 39 ++++++++++++++++++++++++++++++---------
>  2 files changed, 62 insertions(+), 10 deletions(-)
> 
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index ed1cd58e70..b741f49347 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -366,6 +366,15 @@
>        db="OVN_Northbound"/> table.
>      </p>
>  
> +    <p>
> +      This table also has a priority-110 flow with the match
> +      <code>inport == <var>I</var></code> for all logical switch
> +      datapaths to move traffic to the next table. Where <var>I</var>
> +      is the peer of a logical router port. This flow is added to
> +      skip the connection tracking of packets which enter from
> +      logical router datapath to logical switch datapath.
> +    </p>
> +
>      <h3>Ingress Table 5: Pre-stateful</h3>
>  
>      <p>
> @@ -533,7 +542,20 @@
>  
>      <p>
>        It contains a priority-0 flow that simply moves traffic to the next
> -      table.  For established connections a priority 100 flow matches on
> +      table.
> +    </p>
> +
> +    <p>
> +      A priority-65535 flow with the match
> +      <code>inport == <var>I</var></code> for all logical switch
> +      datapaths to move traffic to the next table. Where <var>I</var>
> +      is the peer of a logical router port. This flow is added to
> +      skip the connection tracking of packets which enter from
> +      logical router datapath to logical switch datapath.
> +    </p>
> +
> +    <p>
> +      For established connections a priority 65534 flow matches on
>        <code>ct.est && !ct.rel && !ct.new &&
>        !ct.inv</code> and sets an action <code>reg0[2] = 1; next;</code> to act
>        as a hint for table <code>Stateful</code> to send packets through
> @@ -1359,6 +1381,15 @@ output;
>        db="OVN_Northbound"/> table.
>      </p>
>  
> +    <p>
> +      This table also has a priority-110 flow with the match
> +      <code>outport == <var>I</var></code> for all logical switch
> +      datapaths to move traffic to the next table. Where <var>I</var>
> +      is the peer of a logical router port. This flow is added to
> +      skip the connection tracking of packets which will be entering
> +      logical router datapath from logical switch datapath for routing.
> +    </p>
> +
>      <h3>Egress Table 2: Pre-stateful</h3>
>  
>      <p>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 03c62bafaa..c7b1239adf 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -4850,8 +4850,9 @@ build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths,
>  }
>  
>  static void
> -build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op,
> -                    struct hmap *lflows)
> +skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op,
> +                         enum ovn_stage in_stage, enum ovn_stage out_stage,
> +                         uint16_t priority, struct hmap *lflows)
>  {
>      /* Can't use ct() for router ports. Consider the following configuration:
>       * lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
> @@ -4867,10 +4868,10 @@ build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op,
>  
>      ds_put_format(&match_in, "ip && inport == %s", op->json_key);
>      ds_put_format(&match_out, "ip && outport == %s", op->json_key);
> -    ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
> +    ovn_lflow_add_with_hint(lflows, od, in_stage, priority,
>                              ds_cstr(&match_in), "next;",
>                              &op->nbsp->header_);
> -    ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
> +    ovn_lflow_add_with_hint(lflows, od, out_stage, priority,
>                              ds_cstr(&match_out), "next;",
>                              &op->nbsp->header_);
>  
> @@ -4903,10 +4904,14 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
>       * defragmentation, in order to match L4 headers. */
>      if (has_stateful) {
>          for (size_t i = 0; i < od->n_router_ports; i++) {
> -            build_pre_acl_flows(od, od->router_ports[i], lflows);
> +            skip_port_from_conntrack(od, od->router_ports[i],
> +                                     S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
> +                                     110, lflows);
>          }
>          for (size_t i = 0; i < od->n_localnet_ports; i++) {
> -            build_pre_acl_flows(od, od->localnet_ports[i], lflows);
> +            skip_port_from_conntrack(od, od->localnet_ports[i],
> +                                     S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
> +                                     110, lflows);
>          }
>  
>          /* Ingress and Egress Pre-ACL Table (Priority 110).
> @@ -5050,6 +5055,17 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
>      ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
>      ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
>  
> +    for (size_t i = 0; i < od->n_router_ports; i++) {
> +        skip_port_from_conntrack(od, od->router_ports[i],
> +                                 S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
> +                                 110, lflows);
> +    }
> +    for (size_t i = 0; i < od->n_localnet_ports; i++) {
> +        skip_port_from_conntrack(od, od->localnet_ports[i],
> +                                 S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
> +                                 110, lflows);
> +    }
> +
>      struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
>      struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
>      bool vip_configured = false;
> @@ -5725,13 +5741,18 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows)
>      ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
>  
>      if (od->nbs->load_balancer) {
> -        /* Ingress and Egress LB Table (Priority 65535).
> +        for (size_t i = 0; i < od->n_router_ports; i++) {
> +            skip_port_from_conntrack(od, od->router_ports[i],
> +                                     S_SWITCH_IN_LB, S_SWITCH_OUT_LB,
> +                                     UINT16_MAX, lflows);
> +        }
> +        /* Ingress and Egress LB Table (Priority 65534).
>           *
>           * Send established traffic through conntrack for just NAT. */
> -        ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX - 1,
>                        "ct.est && !ct.rel && !ct.new && !ct.inv",
>                        REGBIT_CONNTRACK_NAT" = 1; next;");
> -        ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
> +        ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX - 1,
>                        "ct.est && !ct.rel && !ct.new && !ct.inv",
>                        REGBIT_CONNTRACK_NAT" = 1; next;");
>      }
> 



More information about the dev mailing list