[ovs-dev] [PATCH v2] ovn: Fix IPv6 DAD failure for container ports

Guru Shetty guru at ovn.org
Fri Oct 5 19:53:15 UTC 2018


On Fri, 5 Oct 2018 at 07:35, <nusiddiq at redhat.com> wrote:

> From: Numan Siddique <nusiddiq at redhat.com>
>
> When a container port is created inside a VM, the below kernel message
> is seen and IPv6 doesn't work on that interface.
>
> [  138.000753] IPv6: vlan4: IPv6 duplicate address <IPv6 LLA> detected!
>
> When a container port sends a ethernet broadcast packet, OVN delivers the
> same
> packet back to the child port (and hence the DAD check fails).
>
> This is because
>  - 'MLF_ALLOW_LOOPBACK_BIT' is set in REG10 in table 0 for the packets
> received
>    from any child port.
>  - for ethernet broadcast packets, Table 33 (OFTABLE_LOCAL_OUTPUT) clones
> the
>    packet for every local port 'P' which belongs to the same datapath i.e
>    'P'->REG15, resubmit(,34)
>  - If REG14 and REG15 are same, Table 34 (OFTABLE_CHECK_LOOPBACK) drops
> the packet
>    if 'MLF_ALLOW_LOOPBACK_BIT' is not set.
>  - But in the case of container ports, this bit will be set and hence
> doesn't gets
>    dropped and eventually gets delivered to the source container port.
>  - The VM's kernel thinks its a DAD packet. The latest kernels (4.19)
> implements
>    the RFC -7527 (enhanced DAD), but it is still a problem for older
> kernels.
>
> This patch fixes the issue by using a new register bit
> (MLF_NESTED_CONTAINER_BIT)
> instead of 'MLF_ALLOW_LOOPBACK_BIT' and sets it in REG10 for the packets
> received
> from child ports so that Table 34 drops the packet for the source port.
>
> Signed-off-by: Numan Siddique <nusiddiq at redhat.com>
>

Thank you for the detailed comments in v2. Super helpful. One minor nit
below.
Acked-by: Gurucharan Shetty <guru at ovn.org>


> ---
>
> v1 -> v2
> -------
>   * Addressed the review comments from Guru - Updated the commit message
>     and added few comments in the code.
>
>  ovn/controller/ofctrl.c   | 29 +++++++++++------
>  ovn/controller/ofctrl.h   |  5 +++
>  ovn/controller/physical.c | 65 ++++++++++++++++++++++++++++++++++-----
>  ovn/lib/logical-fields.h  |  4 +++
>  tests/ovn.at              | 11 +++++++
>  5 files changed, 97 insertions(+), 17 deletions(-)
>
> diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
> index f5b53195d..218612787 100644
> --- a/ovn/controller/ofctrl.c
> +++ b/ovn/controller/ofctrl.c
> @@ -630,9 +630,11 @@ ofctrl_recv(const struct ofp_header *oh, enum ofptype
> type)
>   *
>   * The caller should initialize its own hmap to hold the flows. */
>  void
> -ofctrl_add_flow(struct hmap *desired_flows,
> -                uint8_t table_id, uint16_t priority, uint64_t cookie,
> -                const struct match *match, const struct ofpbuf *actions)
> +ofctrl_check_and_add_flow(struct hmap *desired_flows,
> +                          uint8_t table_id, uint16_t priority, uint64_t
> cookie,
> +                          const struct match *match,
> +                          const struct ofpbuf *actions,
> +                          bool log_duplicate_flow)
>  {
>      struct ovn_flow *f = xmalloc(sizeof *f);
>      f->table_id = table_id;
> @@ -644,13 +646,14 @@ ofctrl_add_flow(struct hmap *desired_flows,
>      f->cookie = cookie;
>
>      if (ovn_flow_lookup(desired_flows, f)) {
> -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
> -        if (!VLOG_DROP_DBG(&rl)) {
> -            char *s = ovn_flow_to_string(f);
> -            VLOG_DBG("dropping duplicate flow: %s", s);
> -            free(s);
> +        if (log_duplicate_flow) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
> +            if (!VLOG_DROP_DBG(&rl)) {
> +                char *s = ovn_flow_to_string(f);
> +                VLOG_DBG("dropping duplicate flow: %s", s);
> +                free(s);
> +            }
>          }
> -
>          ovn_flow_destroy(f);
>          return;
>      }
> @@ -658,6 +661,14 @@ ofctrl_add_flow(struct hmap *desired_flows,
>      hmap_insert(desired_flows, &f->hmap_node, f->hmap_node.hash);
>  }
>
> +void
> +ofctrl_add_flow(struct hmap *desired_flows,
> +                uint8_t table_id, uint16_t priority, uint64_t cookie,
> +                const struct match *match, const struct ofpbuf *actions)
> +{
> +    ofctrl_check_and_add_flow(desired_flows, table_id, priority, cookie,
> +                              match, actions, true);
> +}
>
>  /* ovn_flow. */
>
> diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
> index e3fc95b05..ae0cfa513 100644
> --- a/ovn/controller/ofctrl.h
> +++ b/ovn/controller/ofctrl.h
> @@ -55,4 +55,9 @@ void ofctrl_add_flow(struct hmap *desired_flows, uint8_t
> table_id,
>                       uint16_t priority, uint64_t cookie,
>                       const struct match *, const struct ofpbuf *ofpacts);
>
> +void ofctrl_check_and_add_flow(struct hmap *desired_flows, uint8_t
> table_id,
> +                               uint16_t priority, uint64_t cookie,
> +                               const struct match *,
> +                               const struct ofpbuf *ofpacts,
> +                               bool log_duplicate_flow);
>  #endif /* ovn/ofctrl.h */
> diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
> index c38d7b09f..ab3b02ab1 100644
> --- a/ovn/controller/physical.c
> +++ b/ovn/controller/physical.c
> @@ -189,7 +189,8 @@ get_zone_ids(const struct sbrec_port_binding *binding,
>
>  static void
>  put_local_common_flows(uint32_t dp_key, uint32_t port_key,
> -                       bool nested_container, const struct zone_ids
> *zone_ids,
> +                       uint32_t parent_port_key,
> +                       const struct zone_ids *zone_ids,
>                         struct ofpbuf *ofpacts_p, struct hmap *flow_table)
>  {
>      struct match match;
> @@ -244,11 +245,19 @@ put_local_common_flows(uint32_t dp_key, uint32_t
> port_key,
>      /* Table 64, Priority 100.
>       * =======================
>       *
> -     * If the packet is supposed to hair-pin because the "loopback"
> -     * flag is set (or if the destination is a nested container),
> +     * If the packet is supposed to hair-pin because the
> +     *   - "loopback" flag is set
> +     *   - or if the destination is a nested container
> +     *   - or if "nested_container" flag is set and the destination is the
> +     *     parent port,
>       * temporarily set the in_port to zero, resubmit to
>       * table 65 for logical-to-physical translation, then restore
> -     * the port number. */
> +     * the port number.
> +     *
> +     * If 'parent_port_key' is set, then the 'port_key' represents a
> nested
> +     * container. */
> +
> +    bool nested_container = parent_port_key ? true: false;
>      match_init_catchall(&match);
>      ofpbuf_clear(ofpacts_p);
>      match_set_metadata(&match, htonll(dp_key));
> @@ -264,6 +273,38 @@ put_local_common_flows(uint32_t dp_key, uint32_t
> port_key,
>      put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
>      ofctrl_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0,
>                      &match, ofpacts_p);
> +
> +    if (nested_container) {
> +        /* It's a nested container and when the packet from the nested
> +         * container is to be sent to the parent port, "nested_container"
> +         * flag will be set. We need to temporarily set the in_port to
> zero
> +         * as mentioned in the comment above.
> +         *
> +         * If a parent port has multiple child ports, then this if
> condition
> +         * will be hit multiple times, but we want to add only one flow.
> +         * ofctrl_add_flow() logs a warning message for duplicate flows.
> +         * So use the function 'ofctrl_check_and_add_flow' which doesn't
> +         * log a warning.
> +         *
> +         * Other option is to add this flow for all the ports which are
> not
> +         * nested containers. In which case we will add this flow for all
> the
> +         * ports even if they don't have any child ports which is
> +         * unnecessary.
> +         */
> +        match_init_catchall(&match);
> +        ofpbuf_clear(ofpacts_p);
> +        match_set_metadata(&match, htonll(dp_key));
> +        match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0,
> parent_port_key);
> +        match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
> +                             MLF_NESTED_CONTAINER, MLF_NESTED_CONTAINER);
> +
> +        put_stack(MFF_IN_PORT, ofpact_put_STACK_PUSH(ofpacts_p));
> +        put_load(0, MFF_IN_PORT, 0, 16, ofpacts_p);
> +        put_resubmit(OFTABLE_LOG_TO_PHY, ofpacts_p);
> +        put_stack(MFF_IN_PORT, ofpact_put_STACK_POP(ofpacts_p));
> +        ofctrl_check_and_add_flow(flow_table, OFTABLE_SAVE_INPORT, 100, 0,
> +                                  &match, ofpacts_p, false);
> +    }
>  }
>
>  static void
> @@ -328,7 +369,7 @@ consider_port_binding(struct ovsdb_idl_index
> *sbrec_chassis_by_name,
>          }
>
>          struct zone_ids binding_zones = get_zone_ids(binding, ct_zones);
> -        put_local_common_flows(dp_key, port_key, false, &binding_zones,
> +        put_local_common_flows(dp_key, port_key, 0, &binding_zones,
>                                 ofpacts_p, flow_table);
>
>          match_init_catchall(&match);
> @@ -452,6 +493,7 @@ consider_port_binding(struct ovsdb_idl_index
> *sbrec_chassis_by_name,
>
>      int tag = 0;
>      bool nested_container = false;
> +    const struct sbrec_port_binding *parent_port = NULL;
>      ofp_port_t ofport;
>      bool is_remote = false;
>      if (binding->parent_port && *binding->parent_port) {
> @@ -463,6 +505,8 @@ consider_port_binding(struct ovsdb_idl_index
> *sbrec_chassis_by_name,
>          if (ofport) {
>              tag = *binding->tag;
>              nested_container = true;
> +            parent_port = lport_lookup_by_name(
> +                sbrec_port_binding_by_name, binding->parent_port);
>          }
>      } else {
>          ofport = u16_to_ofp(simap_get(&localvif_to_ofport,
> @@ -523,7 +567,10 @@ consider_port_binding(struct ovsdb_idl_index
> *sbrec_chassis_by_name,
>           */
>
>          struct zone_ids zone_ids = get_zone_ids(binding, ct_zones);
> -        put_local_common_flows(dp_key, port_key, nested_container,
> &zone_ids,
> +        uint32_t parent_port_key = parent_port ? parent_port->tunnel_key
> : 0;
> +        /* Pass the parent port tunnel key if the port is a nested
> +         * container. */
> +        put_local_common_flows(dp_key, port_key, parent_port_key,
> &zone_ids,
>                                 ofpacts_p, flow_table);
>
>          /* Table 0, Priority 150 and 100.
> @@ -553,8 +600,10 @@ consider_port_binding(struct ovsdb_idl_index
> *sbrec_chassis_by_name,
>              if (nested_container) {
>                  /* When a packet comes from a container sitting behind a
>                   * parent_port, we should let it loopback to other
> containers
> -                 * or the parent_port itself. */
> -                put_load(MLF_ALLOW_LOOPBACK, MFF_LOG_FLAGS, 0, 1,
> ofpacts_p);
> +                 * or the parent_port itself. Indicate this by setting the
> +                 * MLF_NESTED_CONTAINER_BIT in MFF_LOG_FLAGS.*/
> +                put_load(1, MFF_LOG_FLAGS, MLF_NESTED_CONTAINER_BIT, 1,
> +                         ofpacts_p);
>              }
>              ofpact_put_STRIP_VLAN(ofpacts_p);
>          }
> diff --git a/ovn/lib/logical-fields.h b/ovn/lib/logical-fields.h
> index b1dbb035a..ab31327e9 100644
> --- a/ovn/lib/logical-fields.h
> +++ b/ovn/lib/logical-fields.h
> @@ -50,6 +50,7 @@ enum mff_log_flags_bits {
>      MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
>      MLF_FORCE_SNAT_FOR_LB_BIT = 3,
>      MLF_LOCAL_ONLY_BIT = 4,
> +    MLF_NESTED_CONTAINER_BIT = 5,
>  };
>
>  /* MFF_LOG_FLAGS_REG flag assignments */
> @@ -75,6 +76,9 @@ enum mff_log_flags {
>       * hypervisors should instead only be output to local targets
>       */
>      MLF_LOCAL_ONLY = (1 << MLF_LOCAL_ONLY_BIT),
> +
> +    /* Indicate that a packet has received from a nested container. */

s/has/was

> +    MLF_NESTED_CONTAINER = (1 << MLF_NESTED_CONTAINER_BIT),
>  };
>
>  #endif /* ovn/lib/logical-fields.h */
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 769e09f81..a7aba5ccd 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -7038,6 +7038,17 @@
> packet=${dst_mac}${src_mac}8100000208004500001c000000003f110100${src_ip}${dst_ip
>  echo  $packet >> expected1
>  OVN_CHECK_PACKETS([hv1/vm1-tx.pcap], [expected1])
>
> +# Send broadcast packet from foo1. foo1 should not receive the same
> packet.
> +src_mac="f00000010205"
> +dst_mac="ffffffffffff"
> +src_ip=`ip_to_hex 192 168 1 2`
> +dst_ip=`ip_to_hex 255 255 255 255`
>
> +packet=${dst_mac}${src_mac}8100000108004500001c0000000040110000${src_ip}${dst_ip}0035111100080000
> +as hv1 ovs-appctl netdev-dummy/receive vm1 $packet
> +
> +# expected packet at VM1
> +OVN_CHECK_PACKETS([hv1/vm1-tx.pcap], [expected1])
> +
>  OVN_CLEANUP([hv1],[hv2])
>
>  AT_CLEANUP
> --
> 2.17.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list