[ovs-dev] [PATCH v4 ovn] Fix basic multicast flows for vxlan (non-vtep) tunnels

Numan Siddique numans at ovn.org
Tue Oct 5 01:20:19 UTC 2021


On Mon, Oct 4, 2021 at 12:37 PM Ihar Hrachyshka <ihrachys at redhat.com> wrote:
>
> The 15-bit port key range used for multicast groups can't be covered
> by 12-bit key space available for port keys in VXLAN. To make
> multicast keys work, we have to transform 16-bit multicast port keys
> to 12-bit keys before fanning out packets through VXLAN tunnels.
> Otherwise significant bits are not retained, and multicast / broadcast
> traffic does not reach ports located on other chassis.
>
> This patch introduces a mapping scheme between core 16-bit multicast
> port keys and 12-bit key range available in VXLAN. The scheme is as
> follows:
>
> 1) Before sending a packet through VXLAN tunnel, the most significant
>    bit of a 16-bit port key is copied into the most significant bit of
>    12-bit VXLAN key. The 11 least significant bits of a 16-bit port
>    key are copied to the least significant bits of 12-bit VXLAN key.
>
> 2) When receiving a packet through VXLAN tunnel, the most significant
>    bit of a VXLAN 12-bit port key is copied into the most significant
>    bit of 16-bit port key used in core. The 11 least significant bits
>    of a VXLAN 12-bit port key are copied into the least significant
>    bits of a 16-bit port key used in core.
>
> This change also implies that the range available for multicast port
> keys is more limited and fits into 11-bit space. The same rule should
> be enforced for unicast port keys, like we already do for tunnel keys
> when a VXLAN encap is present in a cluster. This enforcement is
> implied here but missing in master and will be implemented in a
> separate patch. (The missing enforcement is an oversight of the
> original patch that added support for VXLAN tunnels.)
>
> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
> Signed-off-by: Ihar Hrachyshka <ihrachys at redhat.com>

Thanks for fixing this.

The approach taken makes sense to me.  I applied this patch to the main branch
and branch-21.09.

If this needs to be backported to other branches, then please submit
backport patches
as it doesn't apply cleanly to branch-21.06.

Numan

>
> --
>
> v1: initial commit
> v2: updated docs
> v2: removed newly added but unused macros
> v3: rebase
> v4: fix memory leak in consider_mc_group
> ---
>  controller-vtep/gateway.c |   2 +
>  controller/physical.c     | 102 ++++++++++++++++++++++++++++++--------
>  lib/mcast-group-index.h   |   2 +
>  ovn-architecture.7.xml    |  13 +++--
>  tests/ovn.at              |  23 +++++++--
>  5 files changed, 112 insertions(+), 30 deletions(-)
>
> diff --git a/controller-vtep/gateway.c b/controller-vtep/gateway.c
> index e9419138b..288772dc4 100644
> --- a/controller-vtep/gateway.c
> +++ b/controller-vtep/gateway.c
> @@ -61,6 +61,8 @@ create_chassis_rec(struct ovsdb_idl_txn *txn, const char *name,
>      sbrec_encap_set_options(encap_rec, &options);
>      sbrec_encap_set_chassis_name(encap_rec, name);
>      sbrec_chassis_set_encaps(chassis_rec, &encap_rec, 1);
> +    const struct smap oc = SMAP_CONST1(&oc, "is-vtep", "true");
> +    sbrec_chassis_set_other_config(chassis_rec, &oc);
>
>      return chassis_rec;
>  }
> diff --git a/controller/physical.c b/controller/physical.c
> index 0cfb158c8..58e4157f1 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -37,6 +37,7 @@
>  #include "openvswitch/ofp-parse.h"
>  #include "ovn-controller.h"
>  #include "lib/chassis-index.h"
> +#include "lib/mcast-group-index.h"
>  #include "lib/ovn-sb-idl.h"
>  #include "lib/ovn-util.h"
>  #include "physical.h"
> @@ -63,6 +64,7 @@ static void
>  load_logical_ingress_metadata(const struct sbrec_port_binding *binding,
>                                const struct zone_ids *zone_ids,
>                                struct ofpbuf *ofpacts_p);
> +static int64_t get_vxlan_port_key(int64_t port_key);
>
>  /* UUID to identify OF flows not associated with ovsdb rows. */
>  static struct uuid *hc_uuid = NULL;
> @@ -160,8 +162,9 @@ put_encapsulation(enum mf_field_id mff_ovn_geneve,
>      } else if (tun->type == VXLAN) {
>          uint64_t vni = datapath->tunnel_key;
>          if (!is_ramp_switch) {
> -            /* Only some bits are used for regular tunnels. */
> -            vni |= (uint64_t) outport << 12;
> +            /* Map southbound 16-bit port key to limited 12-bit range
> +             * available for VXLAN, which differs for multicast groups. */
> +            vni |= get_vxlan_port_key(outport) << 12;
>          }
>          put_load(vni, MFF_TUN_ID, 0, 24, ofpacts);
>      } else {
> @@ -1372,6 +1375,43 @@ out:
>      }
>  }
>
> +static int64_t
> +get_vxlan_port_key(int64_t port_key)
> +{
> +    if (port_key >= OVN_MIN_MULTICAST) {
> +        /* 0b1<11 least significant bits> */
> +        return OVN_VXLAN_MIN_MULTICAST |
> +            (port_key & (OVN_VXLAN_MIN_MULTICAST - 1));
> +    }
> +    return port_key;
> +}
> +
> +static void
> +fanout_to_chassis(enum mf_field_id mff_ovn_geneve,
> +                  struct sset *remote_chassis,
> +                  const struct hmap *chassis_tunnels,
> +                  const struct sbrec_datapath_binding *datapath,
> +                  uint16_t outport, bool is_ramp_switch,
> +                  struct ofpbuf *remote_ofpacts)
> +{
> +    const char *chassis_name;
> +    const struct chassis_tunnel *prev = NULL;
> +    SSET_FOR_EACH (chassis_name, remote_chassis) {
> +        const struct chassis_tunnel *tun
> +            = chassis_tunnel_find(chassis_tunnels, chassis_name, NULL);
> +        if (!tun) {
> +            continue;
> +        }
> +
> +        if (!prev || tun->type != prev->type) {
> +            put_encapsulation(mff_ovn_geneve, tun, datapath,
> +                              outport, is_ramp_switch, remote_ofpacts);
> +            prev = tun;
> +        }
> +        ofpact_put_OUTPUT(remote_ofpacts)->port = tun->ofport;
> +    }
> +}
> +
>  static void
>  consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                    enum mf_field_id mff_ovn_geneve,
> @@ -1390,6 +1430,7 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>      }
>
>      struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
> +    struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis);
>      struct match match;
>
>      match_init_catchall(&match);
> @@ -1398,7 +1439,8 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>
>      /* Go through all of the ports in the multicast group:
>       *
> -     *    - For remote ports, add the chassis to 'remote_chassis'.
> +     *    - For remote ports, add the chassis to 'remote_chassis' or
> +     *      'vtep_chassis'.
>       *
>       *    - For local ports (other than logical patch ports), add actions
>       *      to 'ofpacts' to set the output port and resubmit.
> @@ -1465,7 +1507,12 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>              /* Add remote chassis only when localnet port not exist,
>               * otherwise multicast will reach remote ports through localnet
>               * port. */
> -            sset_add(&remote_chassis, port->chassis->name);
> +            if (smap_get_bool(&port->chassis->other_config,
> +                              "is-vtep", false)) {
> +                sset_add(&vtep_chassis, port->chassis->name);
> +            } else {
> +                sset_add(&remote_chassis, port->chassis->name);
> +            }
>          }
>      }
>
> @@ -1490,7 +1537,8 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>       *
>       * Handle output to the remote chassis in the multicast group, if
>       * any. */
> -    if (!sset_is_empty(&remote_chassis) || remote_ofpacts.size > 0) {
> +    if (!sset_is_empty(&remote_chassis) ||
> +            !sset_is_empty(&vtep_chassis) || remote_ofpacts.size > 0) {
>          if (remote_ofpacts.size > 0) {
>              /* Following delivery to logical patch ports, restore the
>               * multicast group as the logical output port. */
> @@ -1498,22 +1546,12 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                       &remote_ofpacts);
>          }
>
> -        const char *chassis_name;
> -        const struct chassis_tunnel *prev = NULL;
> -        SSET_FOR_EACH (chassis_name, &remote_chassis) {
> -            const struct chassis_tunnel *tun
> -                = chassis_tunnel_find(chassis_tunnels, chassis_name, NULL);
> -            if (!tun) {
> -                continue;
> -            }
> -
> -            if (!prev || tun->type != prev->type) {
> -                put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
> -                                  mc->tunnel_key, true, &remote_ofpacts);
> -                prev = tun;
> -            }
> -            ofpact_put_OUTPUT(&remote_ofpacts)->port = tun->ofport;
> -        }
> +        fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
> +                          mc->datapath, mc->tunnel_key, false,
> +                          &remote_ofpacts);
> +        fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels,
> +                          mc->datapath, mc->tunnel_key, true,
> +                          &remote_ofpacts);
>
>          if (remote_ofpacts.size) {
>              if (local_ports) {
> @@ -1527,6 +1565,7 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>      ofpbuf_uninit(&ofpacts);
>      ofpbuf_uninit(&remote_ofpacts);
>      sset_destroy(&remote_chassis);
> +    sset_destroy(&vtep_chassis);
>  }
>
>  bool
> @@ -1689,6 +1728,27 @@ physical_run(struct physical_ctx *p_ctx,
>                          &ofpacts, hc_uuid);
>      }
>
> +    /* Add VXLAN specific rules to transform port keys
> +     * from 12 bits to 16 bits used elsewhere. */
> +    HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) {
> +        if (tun->type == VXLAN) {
> +            ofpbuf_clear(&ofpacts);
> +
> +            struct match match = MATCH_CATCHALL_INITIALIZER;
> +            match_set_in_port(&match, tun->ofport);
> +            ovs_be64 mcast_bits = htonll((OVN_VXLAN_MIN_MULTICAST << 12));
> +            match_set_tun_id_masked(&match, mcast_bits, mcast_bits);
> +
> +            put_load(1, MFF_LOG_OUTPORT, 15, 1, &ofpacts);
> +            put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT,  0, 11, &ofpacts);
> +            put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts);
> +            put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);
> +
> +            ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 105, 0,
> +                            &match, &ofpacts, hc_uuid);
> +        }
> +    }
> +
>      /* Handle ramp switch encapsulations. */
>      HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) {
>          if (tun->type != VXLAN) {
> diff --git a/lib/mcast-group-index.h b/lib/mcast-group-index.h
> index 72af117a4..5bc725451 100644
> --- a/lib/mcast-group-index.h
> +++ b/lib/mcast-group-index.h
> @@ -23,6 +23,8 @@ struct sbrec_datapath_binding;
>  #define OVN_MIN_MULTICAST 32768
>  #define OVN_MAX_MULTICAST 65535
>
> +#define OVN_VXLAN_MIN_MULTICAST 2048
> +
>  enum ovn_mcast_tunnel_keys {
>
>      OVN_MCAST_FLOOD_TUNNEL_KEY = OVN_MIN_MULTICAST,
> diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml
> index 3d2910358..5c5acfaab 100644
> --- a/ovn-architecture.7.xml
> +++ b/ovn-architecture.7.xml
> @@ -2797,11 +2797,14 @@
>      </li>
>
>      <li>
> -      12-bit logical egress port identifier.  IDs 0 through 32767 have the same
> -      meaning as for logical ingress ports.  IDs 32768 through 65535,
> -      inclusive, may be assigned to logical multicast groups (see the
> -      <code>tunnel_key</code> column in the OVN Southbound
> -      <code>Multicast_Group</code> table).
> +      12-bit logical egress port identifier. IDs 0 through 2047 are used for
> +      unicast output ports. IDs 2048 through 4095, inclusive, may be assigned
> +      to logical multicast groups (see the <code>tunnel_key</code> column in
> +      the OVN Southbound <code>Multicast_Group</code> table). For multicast
> +      group tunnel keys, a special mapping scheme is used to internally
> +      transform from internal OVN 16-bit keys to 12-bit values before sending
> +      packets through a VXLAN tunnel, and back from 12-bit tunnel keys to
> +      16-bit values when receiving packets from a VXLAN tunnel.
>      </li>
>
>      <li>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 172b5c713..3ae4ef752 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -3271,8 +3271,13 @@ AT_SETUP([VLAN transparency, passthru=true, ARP responder disabled])
>  ovn_start
>
>  net_add net
> -check ovs-vsctl add-br br-phys
> -ovn_attach net br-phys 192.168.0.1
> +for i in 1 2; do
> +    sim_add hv-$i
> +    as hv-$i
> +    check ovs-vsctl add-br br-phys
> +    check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +    ovn_attach net br-phys 192.168.0.$i 24 vxlan
> +done
>
>  check ovn-nbctl ls-add ls
>  check ovn-nbctl --wait=sb add Logical-Switch ls other_config vlan-passthru=true
> @@ -3283,6 +3288,7 @@ for i in 1 2; do
>  done
>
>  for i in 1 2; do
> +    as hv-$i
>      check ovs-vsctl add-port br-int vif$i -- set Interface vif$i external-ids:iface-id=lsp$i \
>                                    options:tx_pcap=vif$i-tx.pcap \
>                                    options:rxq_pcap=vif$i-rx.pcap \
> @@ -3298,18 +3304,27 @@ AT_CHECK([grep -w "ls_in_arp_rsp" lsflows | sort], [0], [dnl
>    table=16(ls_in_arp_rsp      ), priority=0    , match=(1), action=(next;)
>  ])
>
> +for i in 1 2; do
> +    : > $i.expected
> +done
> +
>  test_arp() {
>      local inport=$1 outport=$2 sha=$3 spa=$4 tpa=$5 reply_ha=$6
>      tag=8100fefe
>      local request=ffffffffffff${sha}${tag}08060001080006040001${sha}${spa}ffffffffffff${tpa}
> -    ovs-appctl netdev-dummy/receive vif$inport $request
> +    as hv-$inport ovs-appctl netdev-dummy/receive vif$inport $request
>      echo $request >> $outport.expected
>
>      local reply=${sha}${reply_ha}${tag}08060001080006040002${reply_ha}${tpa}${sha}${spa}
> -    ovs-appctl netdev-dummy/receive vif$outport $reply
> +    as hv-$outport ovs-appctl netdev-dummy/receive vif$outport $reply
>      echo $reply >> $inport.expected
>  }
>
> +# Pre-populate the hypervisors' ARP tables so that we don't lose any
> +# packets for ARP resolution (native tunneling doesn't queue packets
> +# for ARP resolution).
> +OVN_POPULATE_ARP
> +
>  test_arp 1 2 f00000000001 0a000001 0a000002 f00000000002
>  test_arp 2 1 f00000000002 0a000002 0a000001 f00000000001
>
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list