[ovs-dev] [PATCH ovn] northd: Add lflows to send all pkts to conntrack if LB is configured on a lswitch.

Dumitru Ceara dceara at redhat.com
Thu Sep 10 15:54:28 UTC 2020


On 9/7/20 2:43 PM, numans at ovn.org wrote:
> From: Numan Siddique <numans at ovn.org>
> 
> Prior to this patch, if a load balancer is configured on a logical switch but with no
> ACLs with allow-related configured, then in the ingress pipeline only the packets
> with ip.dst = VIP will be sent to conntrack using the zone id of the source logical port.
> 
> If the backend of the load balancer, sends an invalid packet (for example invalid tcp
> sequence number), then such packets will be delivered to the source logical port VIF
> without unDNATting. This causes the source to reset the connection.
> 
> This patch fixes this issue by sending all the packets to conntrack if a load balancer
> is configured on the logical switch. Because of this, any invalid (ct.inv) packets will
> be dropped in the ingress pipeline itself.
> 
> Unfortunately this impacts the performance as now there will be extra recirculations
> because of ct and ct_commit (for new connections) actions.
> 
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1870359
> Reported-by: Tim Rozet (trozet at redhat.com)
> Signed-off-by: Numan Siddique <numans at ovn.org>

Hi Numan,

Overall the patch looks ok to me and sadly I think this is the only way
(send everything to conntrack) to fix the issue reported in the bugzilla.

I do have some minor comments below.

> ---
>  northd/ovn-northd.8.xml | 14 +++---
>  northd/ovn-northd.c     | 99 +++++++++++++++++++++++++++--------------
>  2 files changed, 72 insertions(+), 41 deletions(-)
> 
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index 989e3643b8..89711c5a6c 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -340,15 +340,12 @@
>        it contains a priority-110 flow to move IPv6 Neighbor Discovery traffic
>        to the next table. If load balancing rules with virtual IP addresses
>        (and ports) are configured in <code>OVN_Northbound</code> database for a
> -      logical switch datapath, a priority-100 flow is added for each configured
> -      virtual IP address <var>VIP</var>. For IPv4 <var>VIPs</var>, the match is
> -      <code>ip && ip4.dst == <var>VIP</var></code>. For IPv6
> -      <var>VIPs</var>, the match is <code>ip &&
> -      ip6.dst == <var>VIP</var></code>. The flow sets an action
> +      logical switch datapath, a priority-100 flow is added with the match
> +      <code>ip</code> to match on IP packets and sets the action
>        <code>reg0[0] = 1; next;</code> to act as a hint for table
>        <code>Pre-stateful</code> to send IP packets to the connection tracker
> -      for packet de-fragmentation before eventually advancing to ingress table
> -      <code>LB</code>.
> +      for packet de-fragmentation before eventually advancing to ingress
> +      table <code>LB</code>.
>        If controller_event has been enabled and load balancing rules with
>        empty backends have been added in <code>OVN_Northbound</code>, a 130 flow
>        is added to trigger ovn-controller events whenever the chassis receives a
> @@ -430,7 +427,8 @@
>      <p>
>        This table also contains a priority 0 flow with action
>        <code>next;</code>, so that ACLs allow packets by default.  If the
> -      logical datapath has a statetful ACL, the following flows will
> +      logical datapath has a statetful ACL or a load balancer with VIP
> +      configured, the following flows will
>        also be added:

Nit: this can go on the line above.

>      </p>
>  
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 3de71612b8..c322558051 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -5031,6 +5031,23 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows,
>      free(action);
>  }
>  
> +static bool
> +has_lb_vip(struct ovn_datapath *od, struct hmap *lbs)
> +{
> +    for (int i = 0; i < od->nbs->n_load_balancer; i++) {
> +        struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
> +        struct ovn_lb *lb =
> +            ovn_lb_find(lbs, &nb_lb->header_.uuid);
> +        ovs_assert(lb);
> +
> +        if (lb->n_vips) {
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
>  static void
>  build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
>               struct shash *meter_groups, struct hmap *lbs)
> @@ -5069,8 +5086,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
>                                   110, lflows);
>      }
>  
> -    struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
> -    struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
>      bool vip_configured = false;
>      for (int i = 0; i < od->nbs->n_load_balancer; i++) {
>          struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
> @@ -5080,12 +5095,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
>  
>          for (size_t j = 0; j < lb->n_vips; j++) {
>              struct lb_vip *lb_vip = &lb->vips[j];
> -            if (lb_vip->addr_family == AF_INET) {
> -                sset_add(&all_ips_v4, lb_vip->vip);
> -            } else {
> -                sset_add(&all_ips_v6, lb_vip->vip);
> -            }
> -
>              build_empty_lb_event_flow(od, lflows, lb_vip, nb_lb,
>                                        S_SWITCH_IN_PRE_LB, meter_groups);
>  
> @@ -5099,26 +5108,37 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
>      }
>  
>      /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
> -     * packet to conntrack for defragmentation. */
> -    const char *ip_address;
> -    SSET_FOR_EACH (ip_address, &all_ips_v4) {
> -        char *match = xasprintf("ip && ip4.dst == %s", ip_address);
> -        ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> -                      100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
> -        free(match);
> -    }
> -
> -    SSET_FOR_EACH (ip_address, &all_ips_v6) {
> -        char *match = xasprintf("ip && ip6.dst == %s", ip_address);
> -        ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> -                      100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
> -        free(match);
> -    }
> -
> -    sset_destroy(&all_ips_v4);
> -    sset_destroy(&all_ips_v6);
> -
> +     * packet to conntrack for defragmentation.
> +     *
> +     * Send all the packets to conntrack in the ingress pipeline if the
> +     * logical switch has a load balancer with VIP configured. Earlier
> +     * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline
> +     * if the IP destination matches the VIP. But this causes few issues when
> +     * a logical switch has no ACLs configured with allow-related.
> +     * To understand the issue, lets a take a TCP load balancer -
> +     * 10.0.0.10:80=10.0.0.3:80.
> +     * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with
> +     * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1'
> +     * is sent to the p1's conntrack zone id and the packet is load balanced
> +     * to the backend - 10.0.0.3. For the reply packet from the backend lport,
> +     * it is not sent to the conntrack of backend lport's zone id. This is fine
> +     * as long as the packet is valid. Suppose the backend lport sends an
> +     *  invalid TCP packet (like incorrect sequence number), the packet gets
> +     * delivered to the lport 'p1' without unDNATing the packet to the
> +     * VIP - 10.0.0.10. And this causes the connection to be reset by the
> +     * lport p1's VIF.
> +     *

Thanks for thoroughly explaining the problem, it's really helpful!

However, I think this would be more useful somewhere else, maybe in the
man page where we describe the flow that sets REGBIT_CONNTRACK_DEFRAG?

> +     * We can't fix this issue by adding a logical flow to drop ct.inv packets
> +     * in the egress pipeline since it will drop all other connections not
> +     * destined to the load balancers.
> +     *
> +     * To fix this issue, we send all the packets to the conntrack in the
> +     * ingress pipeline if a load balancer is configured. We can now
> +     * add a lflow to drop ct.inv packets.
> +     */
>      if (vip_configured) {
> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
> +                      100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
>          ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
>                        100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
>      }
> @@ -5477,9 +5497,9 @@ build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs,
>  
>  static void
>  build_acls(struct ovn_datapath *od, struct hmap *lflows,
> -           struct hmap *port_groups)
> +           struct hmap *port_groups, struct hmap *lbs)
>  {
> -    bool has_stateful = has_stateful_acl(od);
> +    bool has_stateful = has_stateful_acl(od) || has_lb_vip(od, lbs);

Nit: I would add parenthesis here:

bool has_stateful = (has_stateful_acl(od) || has_lb_vip(od, lbs));

>  
>      /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
>       * default.  A related rule at priority 1 is added below if there
> @@ -5736,19 +5756,32 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) {
>  }
>  
>  static void
> -build_lb(struct ovn_datapath *od, struct hmap *lflows)
> +build_lb(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
>  {
>      /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
>       * default.  */
>      ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
>      ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
>  
> -    if (od->nbs->load_balancer) {
> +    if (od->nbs->n_load_balancer) {
>          for (size_t i = 0; i < od->n_router_ports; i++) {
>              skip_port_from_conntrack(od, od->router_ports[i],
>                                       S_SWITCH_IN_LB, S_SWITCH_OUT_LB,
>                                       UINT16_MAX, lflows);
>          }
> +    }
> +
> +    bool vip_configured = false;
> +    for (int i = 0; i < od->nbs->n_load_balancer; i++) {
> +        struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
> +        struct ovn_lb *lb =
> +            ovn_lb_find(lbs, &nb_lb->header_.uuid);
> +        ovs_assert(lb);
> +
> +        vip_configured = (vip_configured || lb->n_vips);
> +    }
> +
> +    if (vip_configured) {

We should use the new function you added, "has_lb_vip()". And almost the
same code is duplicated in build_pre_lb() where we also call
build_empty_lb_event_flow(). It would be nice to unify this a bit.

Thanks,
Dumitru

>          /* Ingress and Egress LB Table (Priority 65534).
>           *
>           * Send established traffic through conntrack for just NAT. */
> @@ -6622,9 +6655,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
>          build_pre_acls(od, lflows);
>          build_pre_lb(od, lflows, meter_groups, lbs);
>          build_pre_stateful(od, lflows);
> -        build_acls(od, lflows, port_groups);
> +        build_acls(od, lflows, port_groups, lbs);
>          build_qos(od, lflows);
> -        build_lb(od, lflows);
> +        build_lb(od, lflows, lbs);
>          build_stateful(od, lflows, lbs);
>      }
>  
> 



More information about the dev mailing list