[ovs-dev] [PATCH v5 ovn 1/5] controller: introduce BFD tx path in ovn-controller

Mark Michelson mmichels at redhat.com
Wed Dec 23 02:34:58 UTC 2020


Hi Lorenzo, see below for some findings.

On 12/22/20 3:54 PM, Lorenzo Bianconi wrote:
> Introduce the capability to transmit BFD packets in ovn-controller.
> Introduce BFD tables in nb/sb dbs in order to configure BFD parameters
> (e.g. min_tx, min_rx, ..) for ovn-controller.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi at redhat.com>
> ---
>   controller/ovn-controller.c |   1 +
>   controller/pinctrl.c        | 297 +++++++++++++++++++++++++++++++++++-
>   controller/pinctrl.h        |   2 +
>   lib/ovn-l7.h                |  19 +++
>   northd/ovn-northd.c         | 188 +++++++++++++++++++++++
>   ovn-nb.ovsschema            |  24 ++-
>   ovn-nb.xml                  |  66 ++++++++
>   ovn-sb.ovsschema            |  27 +++-
>   ovn-sb.xml                  |  78 ++++++++++
>   9 files changed, 697 insertions(+), 5 deletions(-)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 366fc9c06..75512871b 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -2837,6 +2837,7 @@ main(int argc, char *argv[])
>                                           ovnsb_idl_loop.idl),
>                                       sbrec_service_monitor_table_get(
>                                           ovnsb_idl_loop.idl),
> +                                    sbrec_bfd_table_get(ovnsb_idl_loop.idl),
>                                       br_int, chassis,
>                                       &runtime_data->local_datapaths,
>                                       &runtime_data->active_tunnels);
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 7e3abf0a4..c2d4e642a 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -323,6 +323,18 @@ put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
>   static void notify_pinctrl_main(void);
>   static void notify_pinctrl_handler(void);
>   
> +static bool bfd_monitor_should_inject(void);
> +static void bfd_monitor_wait(long long int timeout);
> +static void bfd_monitor_init(void);
> +static void bfd_monitor_destroy(void);
> +static void bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> +                                 OVS_REQUIRES(pinctrl_mutex);
> +static void bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> +                            struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +                            const struct sbrec_chassis *chassis,
> +                            const struct sset *active_tunnels)
> +                            OVS_REQUIRES(pinctrl_mutex);
> +
>   COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
>   COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
>   COVERAGE_DEFINE(pinctrl_drop_controller_event);
> @@ -487,6 +499,7 @@ pinctrl_init(void)
>       ip_mcast_snoop_init();
>       init_put_vport_bindings();
>       init_svc_monitors();
> +    bfd_monitor_init();
>       pinctrl.br_int_name = NULL;
>       pinctrl_handler_seq = seq_create();
>       pinctrl_main_seq = seq_create();
> @@ -3053,6 +3066,8 @@ pinctrl_handler(void *arg_)
>       swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP15_VERSION);
>   
>       while (!latch_is_set(&pctrl->pinctrl_thread_exit)) {
> +        long long int bfd_time = LLONG_MAX;
> +
>           ovs_mutex_lock(&pinctrl_mutex);
>           pinctrl_rconn_setup(swconn, pctrl->br_int_name);
>           ip_mcast_snoop_run();
> @@ -3085,6 +3100,7 @@ pinctrl_handler(void *arg_)
>                   send_ipv6_ras(swconn, &send_ipv6_ra_time);
>                   send_ipv6_prefixd(swconn, &send_prefixd_time);
>                   send_mac_binding_buffered_pkts(swconn);
> +                bfd_monitor_send_msg(swconn, &bfd_time);
>                   ovs_mutex_unlock(&pinctrl_mutex);
>   
>                   ip_mcast_querier_run(swconn, &send_mcast_query_time);
> @@ -3102,6 +3118,7 @@ pinctrl_handler(void *arg_)
>           ip_mcast_querier_wait(send_mcast_query_time);
>           svc_monitors_wait(svc_monitors_next_run_time);
>           ipv6_prefixd_wait(send_prefixd_time);
> +        bfd_monitor_wait(bfd_time);
>   
>           new_seq = seq_read(pinctrl_handler_seq);
>           seq_wait(pinctrl_handler_seq, new_seq);
> @@ -3149,6 +3166,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>               const struct sbrec_dns_table *dns_table,
>               const struct sbrec_controller_event_table *ce_table,
>               const struct sbrec_service_monitor_table *svc_mon_table,
> +            const struct sbrec_bfd_table *bfd_table,
>               const struct ovsrec_bridge *br_int,
>               const struct sbrec_chassis *chassis,
>               const struct hmap *local_datapaths,
> @@ -3179,6 +3197,8 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                            local_datapaths);
>       sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
>                         chassis);
> +    bfd_monitor_run(bfd_table, sbrec_port_binding_by_name, chassis,
> +                    active_tunnels);
>       ovs_mutex_unlock(&pinctrl_mutex);
>   }
>   
> @@ -3722,6 +3742,7 @@ pinctrl_destroy(void)
>       destroy_dns_cache();
>       ip_mcast_snoop_destroy();
>       destroy_svc_monitors();
> +    bfd_monitor_destroy();
>       seq_destroy(pinctrl_main_seq);
>       seq_destroy(pinctrl_handler_seq);
>   }
> @@ -5525,7 +5546,8 @@ may_inject_pkts(void)
>               !shash_is_empty(&send_garp_rarp_data) ||
>               ipv6_prefixd_should_inject() ||
>               !ovs_list_is_empty(&mcast_query_list) ||
> -            !ovs_list_is_empty(&buffered_mac_bindings));
> +            !ovs_list_is_empty(&buffered_mac_bindings) ||
> +            bfd_monitor_should_inject());
>   }
>   
>   static void
> @@ -6312,6 +6334,279 @@ sync_svc_monitors(struct ovsdb_idl_txn *ovnsb_idl_txn,
>   
>   }
>   
> +static struct hmap bfd_monitor_map;
> +
> +struct bfd_entry {
> +    struct hmap_node node;
> +    bool erase;
> +
> +    /* L2 source address */
> +    struct eth_addr src_mac;
> +    /* IPv4 source address */
> +    ovs_be32 ip_src;
> +    /* IPv4 destination address */
> +    ovs_be32 ip_dst;
> +    /* RFC 5881 section 4
> +     * The source port MUST be in the range 49152 through 65535.
> +     * The same UDP source port number MUST be used for all BFD
> +     * Control packets associated with a particular session.
> +     * The source port number SHOULD be unique among all BFD
> +     * sessions on the system
> +     */
> +    uint16_t udp_src;
> +    ovs_be32 disc;
> +
> +    int64_t port_key;
> +    int64_t metadata;
> +
> +    long long int next_tx;
> +};
> +
> +static void
> +bfd_monitor_init(void)
> +{
> +    hmap_init(&bfd_monitor_map);
> +}
> +
> +static void
> +bfd_monitor_destroy(void)
> +{
> +    struct bfd_entry *entry, *next_entry;
> +    HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) {
> +        hmap_remove(&bfd_monitor_map, &entry->node);
> +        free(entry);
> +    }
> +    hmap_destroy(&bfd_monitor_map);
> +}
> +
> +static struct bfd_entry *
> +pinctrl_find_bfd_monitor_entry_by_port(char *ip, uint16_t port)
> +{
> +    struct bfd_entry *entry;
> +    HMAP_FOR_EACH_WITH_HASH (entry, node, hash_string(ip, 0),
> +                             &bfd_monitor_map) {
> +        if (entry->udp_src == port) {
> +            return entry;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static bool
> +bfd_monitor_should_inject(void)
> +{
> +    long long int cur_time = time_msec();
> +    struct bfd_entry *entry;
> +
> +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> +        if (entry->next_tx < cur_time) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
> +static void
> +bfd_monitor_wait(long long int timeout)
> +{
> +    if (!hmap_is_empty(&bfd_monitor_map)) {
> +        poll_timer_wait_until(timeout);
> +    }
> +}
> +
> +static void
> +bfd_monitor_put_bfd_msg(struct bfd_entry *entry, struct dp_packet *packet)
> +{
> +    struct udp_header *udp;
> +    struct bfd_msg *msg;
> +
> +    /* Properly align after the ethernet header */
> +    dp_packet_reserve(packet, 2);
> +    struct eth_header *eth = dp_packet_put_uninit(packet, sizeof *eth);
> +    eth->eth_dst = eth_addr_broadcast;
> +    eth->eth_src = entry->src_mac;
> +    eth->eth_type = htons(ETH_TYPE_IP);
> +
> +    struct ip_header *ip = dp_packet_put_zeros(packet, sizeof *ip);
> +    ip->ip_ihl_ver = IP_IHL_VER(5, 4);
> +    ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
> +    ip->ip_ttl = MAXTTL;
> +    ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
> +    ip->ip_proto = IPPROTO_UDP;
> +    put_16aligned_be32(&ip->ip_src, entry->ip_src);
> +    put_16aligned_be32(&ip->ip_dst, entry->ip_dst);
> +    /* Checksum has already been zeroed by put_zeros call. */
> +    ip->ip_csum = csum(ip, sizeof *ip);
> +
> +    udp = dp_packet_put_zeros(packet, sizeof *udp);
> +    udp->udp_src = htons(entry->udp_src);
> +    udp->udp_dst = htons(BFD_DEST_PORT);
> +    udp->udp_len = htons(sizeof *udp + sizeof *msg);
> +
> +    msg = dp_packet_put_uninit(packet, sizeof *msg);
> +    msg->vers_diag = (BFD_VERSION << 5);
> +    msg->length = BFD_PACKET_LEN;
> +}
> +
> +static void
> +bfd_monitor_send_msg(struct rconn *swconn, long long int *bfd_time)
> +    OVS_REQUIRES(pinctrl_mutex)
> +{
> +    long long int cur_time = time_msec();
> +    struct bfd_entry *entry;
> +
> +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> +        if (cur_time < entry->next_tx) {
> +            goto next;
> +        }
> +
> +        uint64_t packet_stub[256 / 8];
> +        struct dp_packet packet;
> +        dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> +        bfd_monitor_put_bfd_msg(entry, &packet);
> +
> +        uint64_t ofpacts_stub[4096 / 8];
> +        struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> +
> +        /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
> +        uint32_t dp_key = entry->metadata;
> +        uint32_t port_key = entry->port_key;
> +        put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> +        put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> +        put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
> +        struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> +        resubmit->in_port = OFPP_CONTROLLER;
> +        resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> +
> +        struct ofputil_packet_out po = {
> +            .packet = dp_packet_data(&packet),
> +            .packet_len = dp_packet_size(&packet),
> +            .buffer_id = UINT32_MAX,
> +            .ofpacts = ofpacts.data,
> +            .ofpacts_len = ofpacts.size,
> +        };
> +
> +        match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> +        enum ofp_version version = rconn_get_version(swconn);
> +        enum ofputil_protocol proto =
> +            ofputil_protocol_from_ofp_version(version);
> +        queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
> +        dp_packet_uninit(&packet);
> +        ofpbuf_uninit(&ofpacts);
> +
> +        entry->next_tx = cur_time + 5000;
> +next:
> +        if (*bfd_time > entry->next_tx) {
> +            *bfd_time = entry->next_tx;
> +        }
> +    }
> +}
> +
> +static void
> +bfd_monitor_run(const struct sbrec_bfd_table *bfd_table,
> +                struct ovsdb_idl_index *sbrec_port_binding_by_name,
> +                const struct sbrec_chassis *chassis,
> +                const struct sset *active_tunnels)
> +    OVS_REQUIRES(pinctrl_mutex)
> +{
> +    struct bfd_entry *entry, *next_entry;
> +    long long int cur_time = time_msec();
> +    bool changed = false;
> +
> +    HMAP_FOR_EACH (entry, node, &bfd_monitor_map) {
> +        entry->erase = true;
> +    }
> +
> +    const struct sbrec_bfd *bt;
> +    SBREC_BFD_TABLE_FOR_EACH (bt, bfd_table) {
> +        const struct sbrec_port_binding *pb
> +            = lport_lookup_by_name(sbrec_port_binding_by_name,
> +                                   bt->logical_port);
> +        if (!pb) {
> +            continue;
> +        }
> +
> +        entry = pinctrl_find_bfd_monitor_entry_by_port(
> +                bt->dst_ip, bt->src_port);
> +        if (!entry) {
> +            ovs_be32 ip_dst, ip_src = htonl(BFD_DEFAULT_SRC_IP);
> +            struct eth_addr ea = eth_addr_zero;
> +            int i;
> +
> +            if (!ip_parse(bt->dst_ip, &ip_dst)) {
> +                continue;
> +            }
> +
> +            const char *peer_s = smap_get(&pb->options, "peer");
> +            if (!peer_s) {
> +                continue;
> +            }
> +
> +            const struct sbrec_port_binding *peer
> +                = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s);
> +            if (!peer) {
> +                continue;
> +            }
> +
> +            char *redirect_name = xasprintf("cr-%s", pb->logical_port);
> +            bool resident = lport_is_chassis_resident(
> +                    sbrec_port_binding_by_name, chassis, active_tunnels,
> +                    redirect_name);
> +            free(redirect_name);
> +            if (!resident && strcmp(pb->type, "l3gateway") &&
> +                pb->chassis != chassis) {
> +                continue;
> +            }
> +
> +            for (i = 0; i < pb->n_mac; i++) {
> +                struct lport_addresses laddrs;
> +
> +                if (!extract_lsp_addresses(pb->mac[i], &laddrs)) {
> +                    continue;
> +                }
> +
> +                ea = laddrs.ea;
> +                if (laddrs.n_ipv4_addrs > 0) {
> +                    ip_src = laddrs.ipv4_addrs[0].addr;
> +                    destroy_lport_addresses(&laddrs);
> +                    break;
> +                }
> +                destroy_lport_addresses(&laddrs);
> +            }
> +
> +            if (eth_addr_is_zero(ea)) {
> +                continue;
> +            }
> +
> +            entry = xzalloc(sizeof *entry);
> +            entry->src_mac = ea;
> +            entry->ip_src = ip_src;
> +            entry->ip_dst = ip_dst;
> +            entry->udp_src = bt->src_port;
> +            entry->disc = htonl(bt->disc);
> +            entry->next_tx = cur_time;
> +            entry->metadata = pb->datapath->tunnel_key;
> +            entry->port_key = pb->tunnel_key;
> +
> +            uint32_t hash = hash_string(bt->dst_ip, 0);
> +            hmap_insert(&bfd_monitor_map, &entry->node, hash);
> +            changed = true;
> +        }
> +        entry->erase = false;
> +    }
> +
> +    HMAP_FOR_EACH_SAFE (entry, next_entry, node, &bfd_monitor_map) {
> +        if (entry->erase) {
> +            hmap_remove(&bfd_monitor_map, &entry->node);
> +            free(entry);
> +        }
> +    }
> +
> +    if (changed) {
> +        notify_pinctrl_handler();
> +    }
> +}
> +
>   static uint16_t
>   get_random_src_port(void)
>   {
> diff --git a/controller/pinctrl.h b/controller/pinctrl.h
> index 4b101ec92..8555d983d 100644
> --- a/controller/pinctrl.h
> +++ b/controller/pinctrl.h
> @@ -31,6 +31,7 @@ struct sbrec_chassis;
>   struct sbrec_dns_table;
>   struct sbrec_controller_event_table;
>   struct sbrec_service_monitor_table;
> +struct sbrec_bfd_table;
>   
>   void pinctrl_init(void);
>   void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
> @@ -44,6 +45,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                    const struct sbrec_dns_table *,
>                    const struct sbrec_controller_event_table *,
>                    const struct sbrec_service_monitor_table *,
> +                 const struct sbrec_bfd_table *,
>                    const struct ovsrec_bridge *, const struct sbrec_chassis *,
>                    const struct hmap *local_datapaths,
>                    const struct sset *active_tunnels);
> diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
> index cdcb8d754..40b00643b 100644
> --- a/lib/ovn-l7.h
> +++ b/lib/ovn-l7.h
> @@ -26,6 +26,25 @@
>   #include "hash.h"
>   #include "ovn/logical-fields.h"
>   
> +#define BFD_PACKET_LEN  24
> +#define BFD_DEST_PORT   3784
> +#define BFD_VERSION     1
> +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */
> +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */
> +
> +struct bfd_msg {
> +    uint8_t vers_diag;
> +    uint8_t flags;
> +    uint8_t mult;
> +    uint8_t length;
> +    ovs_be32 my_disc;
> +    ovs_be32 your_disc;
> +    ovs_be32 min_tx;
> +    ovs_be32 min_rx;
> +    ovs_be32 min_rx_echo;
> +};
> +BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct bfd_msg));
> +
>   /* Generic options map which is used to store dhcpv4 opts and dhcpv6 opts. */
>   struct gen_opts_map {
>       struct hmap_node hmap_node;
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index f6acdc108..8b1ff37af 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -7471,6 +7471,178 @@ build_lswitch_lflows_admission_control(struct ovn_datapath *od,
>       }
>   }
>   
> +struct bfd_entry {
> +    struct hmap_node hmap_node;
> +
> +    const struct sbrec_bfd *sb_bt;
> +
> +    bool ref;
> +    bool found;
> +};
> +
> +static struct bfd_entry *
> +bfd_port_lookup(struct hmap *bfd_map, const char *logical_port,
> +                const char *dst_ip)
> +{
> +    struct bfd_entry *bfd_e;
> +    uint32_t hash;
> +
> +    hash = hash_string(dst_ip, 0);
> +    hash = hash_string(logical_port, hash);
> +    HMAP_FOR_EACH_WITH_HASH (bfd_e, hmap_node, hash, bfd_map) {
> +        if (!strcmp(bfd_e->sb_bt->logical_port, logical_port) &&
> +            !strcmp(bfd_e->sb_bt->dst_ip, dst_ip)) {
> +            return bfd_e;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static void
> +bfd_cleanup_connections(struct northd_context *ctx, struct hmap *bfd_map)
> +{
> +    const struct nbrec_bfd *nb_bt;
> +    NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
> +        struct bfd_entry *bfd_e;
> +
> +        bfd_e = bfd_port_lookup(bfd_map, nb_bt->logical_port, nb_bt->dst_ip);
> +        if (!bfd_e) {
> +            continue;
> +        }
> +
> +        if (!bfd_e->ref && strcmp(nb_bt->status, "admin_down")) {
> +            /* no user for this bfd connection */
> +            nbrec_bfd_set_status(nb_bt, "admin_down");
> +        }
> +
> +        hmap_remove(bfd_map, &bfd_e->hmap_node);
> +        free(bfd_e);
> +    }
> +}
> +
> +static void
> +bfd_table_check_default(const struct nbrec_bfd *nb_bt)

All of these defaults should be mentioned in ovn-nb.xml
It's also probably worth creating constants for the default values.

> +{
> +    if (!nb_bt->status) {
> +        /* default state is admin_down */
> +        nbrec_bfd_set_status(nb_bt, "admin_down");
> +    }
> +
> +    if (!nb_bt->min_tx) {
> +        /* default value for min_tx is 1s */
> +        nbrec_bfd_set_min_tx(nb_bt, 1000);
> +    }
> +
> +    if (!nb_bt->min_rx) {
> +        /* default value for min_rx is 1s */
> +        nbrec_bfd_set_min_rx(nb_bt, 1000);
> +    }

ovn-nb.xml says "If this value is zero, the transmitting system does not 
want the remote system to send any periodic BFD Control packets." This 
seems to indicate that 0 is an allowed value. But then the code here 
sets min_rx to 1000 ms if you set it to 0.

> +
> +    if (!nb_bt->detect_mult) {
> +        /* default value for detect_mult is 5 */
> +        nbrec_bfd_set_detect_mult(nb_bt, 5);
> +    }
> +}
> +
> +static void
> +build_bfd_update_sb_conf(const struct nbrec_bfd *nb_bt,
> +                         const struct sbrec_bfd *sb_bt)
> +{
> +    if (strcmp(nb_bt->dst_ip, sb_bt->dst_ip)) {
> +        sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
> +    }
> +
> +    if (strcmp(nb_bt->logical_port, sb_bt->logical_port)) {
> +        sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
> +    }
> +
> +    if (strcmp(nb_bt->status, sb_bt->status)) {
> +        sbrec_bfd_set_status(sb_bt, nb_bt->status);
> +    }
> +
> +    if (nb_bt->detect_mult != sb_bt->detect_mult) {
> +        sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult);
> +    }
> +
> +    if (nb_bt->min_tx != sb_bt->min_tx) {
> +        sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx);
> +    }
> +
> +    if (nb_bt->min_rx != sb_bt->min_rx) {
> +        sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx);
> +    }
> +}
> +
> +static void
> +build_bfd_table(struct northd_context *ctx, struct hmap *bfd_connections)
> +{
> +    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
> +    struct bfd_entry *bfd_e, *next_bfd_e;
> +    const struct sbrec_bfd *sb_bt;
> +    uint32_t hash;
> +
> +    SBREC_BFD_FOR_EACH (sb_bt, ctx->ovnsb_idl) {
> +        bfd_e = xmalloc(sizeof *bfd_e);
> +        bfd_e->sb_bt = sb_bt;
> +        bfd_e->found = false;
> +        hash = hash_string(sb_bt->dst_ip, 0);
> +        hash = hash_string(sb_bt->logical_port, hash);
> +        hmap_insert(&sb_only, &bfd_e->hmap_node, hash);
> +    }
> +
> +    const struct nbrec_bfd *nb_bt;
> +    NBREC_BFD_FOR_EACH (nb_bt, ctx->ovnnb_idl) {
> +        bfd_table_check_default(nb_bt);
> +
> +        bfd_e = bfd_port_lookup(&sb_only, nb_bt->logical_port, nb_bt->dst_ip);
> +        if (!bfd_e) {
> +            sb_bt = sbrec_bfd_insert(ctx->ovnsb_txn);
> +            sbrec_bfd_set_logical_port(sb_bt, nb_bt->logical_port);
> +            sbrec_bfd_set_dst_ip(sb_bt, nb_bt->dst_ip);
> +            sbrec_bfd_set_disc(sb_bt, 1 + random_uint32());
> +            /* RFC 5881 section 4
> +             * The source port MUST be in the range 49152 through 65535.
> +             * The same UDP source port number MUST be used for all BFD
> +             * Control packets associated with a particular session.
> +             * The source port number SHOULD be unique among all BFD
> +             * sessions on the system
> +             */

The RFC says that the source UDP port should be unique among all BFD 
sessions on the system. With the code below, it's possible to select the 
same UDP source port for multiple BFD sessions. The likelihood grows 
depending on the number of BFD sessions. This wouldn't be a huge deal 
except that pinctrl uses the UDP source port and destination IP as a way 
to uniquely identify a bfd_entry.

1) Start assignments with port 49152 and then increase by 1 with each 
new BFD entry that is created.
2) Create a bitmap that holds all possible port assignments. Set the 
bits to 1 for ports in use and 0 for ports not in use. Find the first 0 
in the bitmap and then add 49152 to its index. This requires more memory 
than (1) (1000x more memory, in fact) but it also allows for you to 
reuse ports once they are no longer in use.

With both of these approaches, you lose the randomness of the port 
assignment, but you guarantee uniqueness.

> +            uint16_t udp_src = random_range(16383) + 49152;
> +            sbrec_bfd_set_src_port(sb_bt, udp_src);
> +            sbrec_bfd_set_status(sb_bt, nb_bt->status);
> +            sbrec_bfd_set_min_tx(sb_bt, nb_bt->min_tx);
> +            sbrec_bfd_set_min_rx(sb_bt, nb_bt->min_rx);
> +            sbrec_bfd_set_detect_mult(sb_bt, nb_bt->detect_mult);
> +        } else if (strcmp(bfd_e->sb_bt->status, nb_bt->status)) {
> +            if (!strcmp(nb_bt->status, "admin_down") ||
> +                !strcmp(bfd_e->sb_bt->status, "admin_down")) {
> +                sbrec_bfd_set_status(bfd_e->sb_bt, nb_bt->status);
> +            } else {
> +                nbrec_bfd_set_status(nb_bt, bfd_e->sb_bt->status);
> +            }
> +        } else {
> +            build_bfd_update_sb_conf(nb_bt, bfd_e->sb_bt);

Why is the SB BFD config only updated when the NB and SB BFD records 
have the same state?

> +        }
> +        if (bfd_e) {
> +            struct bfd_entry *bfd_conn = xmemdup(bfd_e, sizeof *bfd_e);
> +            hash = hash_string(bfd_e->sb_bt->dst_ip, 0);
> +            hash = hash_string(bfd_e->sb_bt->logical_port, hash);
> +            bfd_conn->ref = false;
> +            hmap_insert(bfd_connections, &bfd_conn->hmap_node, hash);
> +
> +            bfd_e->found = true;

I think the "found" field should be removed from the bfd_entry struct. 
It doesn't have any intrinsic value for the bfd_entry. Instead, its only 
relevance is with regards to the logic in this function.

A good alternative would be to remove bfd_e from the sb_only hmap right 
here. By doing so, all bfd_entries left in sb_only need to have their SB 
records removed.

> +        }
> +    }
> +
> +    HMAP_FOR_EACH_SAFE (bfd_e, next_bfd_e, hmap_node, &sb_only) {
> +        if (!bfd_e->found && bfd_e->sb_bt) {
> +            sbrec_bfd_delete(bfd_e->sb_bt);
> +        }
> +        hmap_remove(&sb_only, &bfd_e->hmap_node);
> +        free(bfd_e);
> +    }

If you take my above advice, this loop can be simplified to:

HMAP_FOR_EACH_POP(bfd_e, hmap_node, &sb_only) {
     if (bfd_e->sb_bt) {
         sbrec_bfd_delete(bfd_e->sb_bt);
     }
     free(bfd_e);
}

> +    hmap_destroy(&sb_only);
> +}
>   
>   /* Returns a string of the IP address of the router port 'op' that
>    * overlaps with 'ip_s".  If one is not found, returns NULL.
> @@ -12410,6 +12582,7 @@ ovnnb_db_run(struct northd_context *ctx,
>       struct hmap igmp_groups;
>       struct shash meter_groups = SHASH_INITIALIZER(&meter_groups);
>       struct hmap lbs;
> +    struct hmap bfd_connections = HMAP_INITIALIZER(&bfd_connections);
>   
>       /* Sync ipsec configuration.
>        * Copy nb_cfg from northbound to southbound database.
> @@ -12504,6 +12677,7 @@ ovnnb_db_run(struct northd_context *ctx,
>       build_ip_mcast(ctx, datapaths);
>       build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
>       build_meter_groups(ctx, &meter_groups);
> +    build_bfd_table(ctx, &bfd_connections);
>       build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups,
>                    &igmp_groups, &meter_groups, &lbs);
>       ovn_update_ipv6_prefix(ports);
> @@ -12529,9 +12703,13 @@ ovnnb_db_run(struct northd_context *ctx,
>       HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) {
>           ovn_port_group_destroy(&port_groups, pg);
>       }
> +
> +    bfd_cleanup_connections(ctx, &bfd_connections);
> +
>       hmap_destroy(&igmp_groups);
>       hmap_destroy(&mcast_groups);
>       hmap_destroy(&port_groups);
> +    hmap_destroy(&bfd_connections);
>   
>       struct shash_node *node, *next;
>       SHASH_FOR_EACH_SAFE (node, next, &meter_groups) {
> @@ -13465,6 +13643,16 @@ main(int argc, char *argv[])
>       add_column_noalert(ovnsb_idl_loop.idl,
>                          &sbrec_load_balancer_col_external_ids);
>   
> +    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_bfd);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_logical_port);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_dst_ip);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_status);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_min_tx);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_min_rx);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_detect_mult);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_disc);
> +    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_bfd_col_src_port);
> +
>       struct ovsdb_idl_index *sbrec_chassis_by_name
>           = chassis_index_create(ovnsb_idl_loop.idl);
>   
> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> index b77a2308c..a880482a4 100644
> --- a/ovn-nb.ovsschema
> +++ b/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>   {
>       "name": "OVN_Northbound",
> -    "version": "5.30.0",
> -    "cksum": "3273824429 27172",
> +    "version": "5.31.0",
> +    "cksum": "3663844734 28178",
>       "tables": {
>           "NB_Global": {
>               "columns": {
> @@ -526,5 +526,25 @@
>                       "type": {"key": "string", "value": "string",
>                                "min": 0, "max": "unlimited"}}},
>               "indexes": [["name"]],
> +            "isRoot": true},
> +        "BFD": {
> +            "columns": {
> +                "logical_port": {"type": "string"},
> +                "dst_ip": {"type": "string"},
> +                "min_tx": {"type": {"key": {"type": "integer"}}},
> +                "min_rx": {"type": {"key": {"type": "integer"}}},
> +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> +                "status": {
> +                    "type": {"key": {"type": "string",
> +                             "enum": ["set", ["down", "init", "up",
> +                                              "admin_down"]]},
> +                             "min": 0, "max": 1}},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["logical_port", "dst_ip"]],
>               "isRoot": true}}
>       }
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index ec6405ff5..8ba1c3c82 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -3741,4 +3741,70 @@
>         </column>
>       </group>
>     </table>
> +
> +  <table name="BFD">
> +    <p>
> +      Contains BFD parameter for ovn-controller bfd configuration
> +    </p>
> +
> +    <group title="Configuration">
> +      <column name="logical_port">
> +        OVN logical port when BFD engine is running.
> +      </column>
> +
> +      <column name="dst_ip">
> +        BFD peer IP address.
> +      </column>
> +
> +      <column name="min_tx">
> +        This is the minimum interval, in milliseconds, that the local
> +        system would like to use when transmitting BFD Control packets,
> +        less any jitter applied. The value zero is reserved.
> +      </column>
> +
> +      <column name="min_rx">
> +        This is the minimum interval, in milliseconds, between received
> +        BFD Control packets that this system is capable of supporting,
> +        less any jitter applied by the sender. If this value is zero,
> +        the transmitting system does not want the remote system to send
> +        any periodic BFD Control packets.
> +      </column>
> +
> +      <column name="detect_mult">
> +        Detection time multiplier.  The negotiated transmit interval,
> +        multiplied by this value, provides the Detection Time for the
> +        receiving system in Asynchronous mode.
> +      </column>
> +
> +      <column name="options">
> +        Reserved for future use.
> +      </column>
> +
> +      <column name="external_ids">
> +        See <em>External IDs</em> at the beginning of this document.
> +      </column>
> +    </group>
> +
> +    <group title="Status Reporting">
> +      <column name="status">
> +        <p>
> +          BFD port logical states. Possible values are:
> +          <ul>
> +            <li>
> +              <code>admin_down</code>
> +            </li>
> +            <li>
> +              <code>down</code>
> +            </li>
> +            <li>
> +              <code>init</code>
> +            </li>
> +            <li>
> +              <code>up</code>
> +            </li>
> +          </ul>
> +        </p>
> +      </column>
> +    </group>
> +  </table>
>   </database>
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index 5228839b8..97db6de39 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>   {
>       "name": "OVN_Southbound",
> -    "version": "20.12.0",
> -    "cksum": "3969471120 24441",
> +    "version": "20.13.0",
> +    "cksum": "3035725595 25676",
>       "tables": {
>           "SB_Global": {
>               "columns": {
> @@ -484,6 +484,29 @@
>                   "external_ids": {
>                       "type": {"key": "string", "value": "string",
>                                "min": 0, "max": "unlimited"}}},
> +            "isRoot": true},
> +        "BFD": {
> +            "columns": {
> +                "src_port": {"type": {"key": {"type": "integer",
> +                                          "minInteger": 49152,
> +                                          "maxInteger": 65535}}},
> +                "disc": {"type": {"key": {"type": "integer"}}},
> +                "logical_port": {"type": "string"},
> +                "dst_ip": {"type": "string"},
> +                "min_tx": {"type": {"key": {"type": "integer"}}},
> +                "min_rx": {"type": {"key": {"type": "integer"}}},
> +                "detect_mult": {"type": {"key": {"type": "integer"}}},
> +                "status": {
> +                    "type": {"key": {"type": "string",
> +                             "enum": ["set", ["down", "init", "up",
> +                                              "admin_down"]]}}},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["logical_port", "dst_ip", "src_port", "disc"]],
>               "isRoot": true}
>       }
>   }
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index c13994848..ccfe2e415 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -4231,4 +4231,82 @@ tcp.flags = RST;
>         </column>
>       </group>
>     </table>
> +
> +  <table name="BFD">
> +    <p>
> +      Contains BFD parameter for ovn-controller bfd configuration
> +    </p>
> +
> +    <group title="Configuration">
> +      <column name="src_port">
> +        udp source port used in bfd control packets.
> +        The source port MUST be in the range 49152 through 65535
> +        (RFC5881 section 4).
> +      </column>
> +
> +      <column name="disc">
> +        A unique, nonzero discriminator value generated by the transmitting
> +        system, used to demultiplex multiple BFD sessions between the same pair
> +        of systems.
> +      </column>
> +
> +      <column name="logical_port">
> +        OVN logical port when BFD engine is running.
> +      </column>
> +
> +      <column name="dst_ip">
> +        BFD peer IP address.
> +      </column>
> +
> +      <column name="min_tx">
> +        This is the minimum interval, in milliseconds, that the local
> +        system would like to use when transmitting BFD Control packets,
> +        less any jitter applied. The value zero is reserved.
> +      </column>
> +
> +      <column name="min_rx">
> +        This is the minimum interval, in milliseconds, between received
> +        BFD Control packets that this system is capable of supporting,
> +        less any jitter applied by the sender. If this value is zero,
> +        the transmitting system does not want the remote system to send
> +        any periodic BFD Control packets.
> +      </column>
> +
> +      <column name="detect_mult">
> +        Detection time multiplier.  The negotiated transmit interval,
> +        multiplied by this value, provides the Detection Time for the
> +        receiving system in Asynchronous mode.
> +      </column>
> +
> +      <column name="options">
> +        Reserved for future use.
> +      </column>
> +
> +      <column name="external_ids">
> +        See <em>External IDs</em> at the beginning of this document.
> +      </column>
> +    </group>
> +
> +    <group title="Status Reporting">
> +      <column name="status">
> +        <p>
> +          BFD port logical states. Possible values are:
> +          <ul>
> +            <li>
> +              <code>admin_down</code>
> +            </li>
> +            <li>
> +              <code>down</code>
> +            </li>
> +            <li>
> +              <code>init</code>
> +            </li>
> +            <li>
> +              <code>up</code>
> +            </li>
> +          </ul>
> +        </p>
> +      </column>
> +    </group>
> +  </table>
>   </database>
> 




More information about the dev mailing list