[ovs-dev] [PATCH v3 2/2] OVN: Add support for periodic router advertisements.
Mark Michelson
mmichels at redhat.com
Mon Nov 13 03:17:09 UTC 2017
On Fri, Nov 10, 2017 at 10:19 AM Jakub Sitnicki <jkbs at redhat.com> wrote:
> Hi Mark,
>
> A couple more things caught my attention. Otherwise than that it LGTM.
>
> On Fri, Nov 03, 2017 at 09:24 PM GMT, Mark Michelson wrote:
> > This change adds three new options to the Northbound
> > Logical_Router_Port's ipv6_ra_configs option:
> >
> > * send_periodic: If set to "true", then OVN will send periodic router
> > advertisements out of this router port.
> > * max_interval: The maximum amount of time to wait between sending
> > periodic router advertisements.
> > * min_interval: The minimum amount of time to wait between sending
> > periodic router advertisements.
> >
> > When send_periodic is true, then IPv6 RA configs, as well as some layer
> > 2 and layer 3 information about the router port, are copied to the
> > southbound database. From there, ovn-controller can use this information
> > to know when to send periodic RAs and what to send in them.
> >
> > Because periodic RAs originate from each ovn-controller, the new
> > keep-local flag is set on the packet so that ports don't receive an
> > overabundance of RAs.
> >
> > Signed-off-by: Mark Michelson <mmichels at redhat.com>
> > ---
> > lib/packets.h | 7 +
> > ovn/controller/pinctrl.c | 341
> +++++++++++++++++++++++++++++++++++++++++++++++
> > ovn/northd/ovn-northd.c | 65 +++++++++
> > ovn/ovn-nb.xml | 19 +++
> > tests/ovn-northd.at | 110 +++++++++++++++
> > tests/ovn.at | 150 +++++++++++++++++++++
> > 6 files changed, 692 insertions(+)
> >
> > diff --git a/lib/packets.h b/lib/packets.h
> > index 057935cbf..b8249047f 100644
> > --- a/lib/packets.h
> > +++ b/lib/packets.h
> > @@ -976,6 +976,7 @@ BUILD_ASSERT_DECL(ND_PREFIX_OPT_LEN == sizeof(struct
> ovs_nd_prefix_opt));
> >
> > /* Neighbor Discovery option: MTU. */
> > #define ND_MTU_OPT_LEN 8
> > +#define ND_MTU_DEFAULT 0
> > struct ovs_nd_mtu_opt {
> > uint8_t type; /* ND_OPT_MTU */
> > uint8_t len; /* Always 1. */
> > @@ -1015,6 +1016,12 @@ BUILD_ASSERT_DECL(RA_MSG_LEN == sizeof(struct
> ovs_ra_msg));
> > #define ND_RA_MANAGED_ADDRESS 0x80
> > #define ND_RA_OTHER_CONFIG 0x40
> >
> > +/* Defaults based on MaxRtrInterval and MinRtrInterval from RFC 4861
> section
> > + * 6.2.1
> > + */
> > +#define ND_RA_MAX_INTERVAL_DEFAULT 600
> > +#define ND_RA_MIN_INTERVAL_DEFAULT(max) ((max) >= 9 ? (max) / 3 : (max)
> * 3 / 4)
> > +
> > /*
> > * Use the same struct for MLD and MLD2, naming members as the defined
> fields in
> > * in the corresponding version of the protocol, though they are
> reserved in the
> > diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
> > index 29b2dde0c..428d5048f 100644
> > --- a/ovn/controller/pinctrl.c
> > +++ b/ovn/controller/pinctrl.c
> > @@ -48,6 +48,7 @@
> > #include "socket-util.h"
> > #include "timeval.h"
> > #include "vswitch-idl.h"
> > +#include "lflow.h"
> >
> > VLOG_DEFINE_THIS_MODULE(pinctrl);
> >
> > @@ -88,6 +89,11 @@ static void pinctrl_handle_put_nd_ra_opts(
> > static void pinctrl_handle_nd_ns(const struct flow *ip_flow,
> > const struct match *md,
> > struct ofpbuf *userdata);
> > +static void init_ipv6_ras(void);
> > +static void destroy_ipv6_ras(void);
> > +static void ipv6_ra_wait(void);
> > +static void send_ipv6_ras(const struct controller_ctx *ctx,
> > + struct hmap *local_datapaths);
> >
> > COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
> >
> > @@ -98,6 +104,7 @@ pinctrl_init(void)
> > conn_seq_no = 0;
> > init_put_mac_bindings();
> > init_send_garps();
> > + init_ipv6_ras();
> > }
> >
> > static ovs_be32
> > @@ -1083,8 +1090,340 @@ pinctrl_run(struct controller_ctx *ctx,
> > run_put_mac_bindings(ctx);
> > send_garp_run(ctx, br_int, chassis, chassis_index, local_datapaths,
> > active_tunnels);
> > + send_ipv6_ras(ctx, local_datapaths);
> > }
> >
> > +/* Table of ipv6_ra_state structures, keyed on logical port name */
> > +static struct shash ipv6_ras;
> > +
> > +/* Next IPV6 RA in seconds. */
> > +static long long int send_ipv6_ra_time;
> > +
> > +struct ipv6_network_prefix {
> > + struct in6_addr addr;
> > + unsigned int prefix_len;
> > +};
> > +
> > +struct ipv6_ra_config {
> > + time_t min_interval;
> > + time_t max_interval;
> > + struct eth_addr eth_src;
> > + struct eth_addr eth_dst;
> > + struct in6_addr ipv6_src;
> > + struct in6_addr ipv6_dst;
> > + ovs_be32 mtu;
> > + uint8_t mo_flags; /* Managed/Other flags for RAs */
> > + uint8_t la_flags; /* On-link/autonomous flags for address prefixes
> */
> > + struct ipv6_network_prefix *prefixes;
> > + int n_prefixes;
> > +};
> > +
> > +struct ipv6_ra_state {
> > + long long int next_announce;
> > + struct ipv6_ra_config *config;
> > + int64_t port_key;
> > + int64_t metadata;
> > + bool deleteme;
> > +};
> > +
> > +static void
> > +init_ipv6_ras(void)
> > +{
> > + shash_init(&ipv6_ras);
> > + send_ipv6_ra_time = LLONG_MAX;
> > +}
> > +
> > +static void
> > +ipv6_ra_config_delete(struct ipv6_ra_config *config)
> > +{
> > + if (config) {
> > + free(config->prefixes);
> > + }
> > + free(config);
> > +}
> > +
> > +static void
> > +ipv6_ra_delete(struct ipv6_ra_state *ra)
> > +{
> > + if (ra) {
> > + ipv6_ra_config_delete(ra->config);
> > + }
> > + free(ra);
> > +}
> > +
> > +static void
> > +destroy_ipv6_ras(void)
> > +{
> > + struct shash_node *iter, *next;
> > + SHASH_FOR_EACH_SAFE (iter, next, &ipv6_ras) {
> > + struct ipv6_ra_state *ra = iter->data;
> > + ipv6_ra_delete(ra);
> > + shash_delete(&ipv6_ras, iter);
> > + }
> > + shash_destroy(&ipv6_ras);
> > +}
> > +
> > +static struct ipv6_ra_config *
> > +ipv6_ra_update_config(const struct sbrec_port_binding *pb)
> > +{
> > + struct ipv6_ra_config *config;
> > +
> > + config = xzalloc(sizeof *config);
> > +
> > + config->max_interval = smap_get_int(&pb->options,
> "ipv6_ra_max_interval",
> > + ND_RA_MAX_INTERVAL_DEFAULT);
> > + config->min_interval = smap_get_int(&pb->options,
> "ipv6_ra_min_interval",
> > + ND_RA_MIN_INTERVAL_DEFAULT(config->max_interval));
> > + config->mtu = htonl(smap_get_int(&pb->options, "ipv6_ra_mtu",
> > + ND_MTU_DEFAULT));
> > + config->la_flags = ND_PREFIX_ON_LINK;
> > +
> > + const char *address_mode = smap_get(&pb->options,
> "ipv6_ra_address_mode");
> > + if (!address_mode) {
> > + VLOG_WARN("No address mode specified");
> > + goto fail;
> > + }
> > + if (!strcmp(address_mode, "dhcpv6_stateless")) {
> > + config->mo_flags = IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG;
> > + } else if (!strcmp(address_mode, "dhcpv6_stateful")) {
> > + config->mo_flags = IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG;
> > + } else if (!strcmp(address_mode, "slaac")) {
> > + config->la_flags |= ND_PREFIX_AUTONOMOUS_ADDRESS;
> > + } else {
> > + VLOG_WARN("Invalid address mode %s", address_mode);
> > + goto fail;
> > + }
> > +
> > + const char *prefixes = smap_get(&pb->options, "ipv6_ra_prefixes");
> > + if (prefixes) {
> > + char *prefixes_copy = xstrdup(prefixes);
> > + char *prefix;
> > +
> > + /* Prefixes are in the format:
> > + * addr/len, where
> > + * addr is the network prefix
> > + * and len is the prefix length
> > + */
> > + while ((prefix = strsep(&prefixes_copy, " "))) {
> > + unsigned int prefix_len;
> > + struct in6_addr prefix_addr;
> > + char *error;
> > +
> > + error = ipv6_parse_cidr(prefix, &prefix_addr, &prefix_len);
> > + if (error) {
> > + VLOG_WARN("Error parsing IPv6 prefix: %s", error);
> > + continue;
> > + }
> > +
> > + config->n_prefixes++;
> > + config->prefixes = xrealloc(config->prefixes,
> > + config->n_prefixes * sizeof *config->prefixes);
> > +
> > + struct ipv6_network_prefix *network;
> > + network = &config->prefixes[config->n_prefixes - 1];
> > + network->addr = prefix_addr;
> > + network->prefix_len = prefix_len;
> > + }
> > + free (prefixes_copy);
> > + }
> > +
> > + /* All nodes multicast addresses */
> > + ovs_assert(eth_addr_from_string("33:33:00:00:00:01",
> &config->eth_dst));
> > + ovs_assert(ipv6_parse("ff02::1", &config->ipv6_dst));
> > +
> > + const char *eth_addr = smap_get(&pb->options, "ipv6_ra_src_eth");
> > + if (!eth_addr || !eth_addr_from_string(eth_addr, &config->eth_src))
> {
> > + VLOG_WARN("Invalid ethernet source %s", eth_addr);
> > + goto fail;
> > + }
> > + const char *ip_addr = smap_get(&pb->options, "ipv6_ra_src_addr");
> > + if (!ip_addr || !ipv6_parse(ip_addr, &config->ipv6_src)) {
> > + VLOG_WARN("Invalid IP source %s", ip_addr);
> > + goto fail;
> > + }
> > +
> > + return config;
> > +
> > +fail:
> > + ipv6_ra_config_delete(config);
> > + return NULL;
> > +}
> > +
> > +static long long int
> > +ipv6_ra_calc_next_announce(time_t min_interval, time_t max_interval)
> > +{
> > + long long int min_interval_ms = min_interval * 1000;
> > + long long int max_interval_ms = max_interval * 1000;
> > +
> > + return time_msec() + min_interval_ms +
> > + random_range(max_interval_ms - min_interval_ms);
> > +}
> > +
> > +static void
> > +put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
> > + struct ofpbuf *ofpacts)
> > +{
> > + struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
> > + mf_from_id(dst),
> NULL,
> > + NULL);
> > + ovs_be64 n_value = htonll(value);
> > + bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs,
> n_bits);
> > + bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs,
> n_bits);
> > +}
> > +
> > +#define RA_DEFAULT_VALID_LIFETIME 2592000
> > +#define RA_DEFAULT_PREFERRED_LIFETIME 604800
>
> These defaults also come from RFC 4861:
> - AdvValidLifetime (30 days),
> - AdvPreferredLifetime (7 days)
>
> Maybe we should annotate them too? Otherwise they look like arbitrary
> values. Or just group all values that come from this RFC in one place?
>
I've added annotations. I defined these values inside of pinctrl.c since
they had no obvious use being in a header file.
>
> > +static time_t
> > +ipv6_ra_send(struct ipv6_ra_state *ra)
> > +{
> > + if (time_msec() < ra->next_announce) {
> > + return ra->next_announce;
> > + }
> > +
> > + uint64_t packet_stub[128 / 8];
> > + struct dp_packet packet;
> > + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> > + compose_nd_ra(&packet, ra->config->eth_src, ra->config->eth_dst,
> > + &ra->config->ipv6_src, &ra->config->ipv6_dst,
> > + 255, ra->config->mo_flags, 0, 0, 0, ra->config->mtu);
> > +
> > + for (int i = 0; i < ra->config->n_prefixes; ++i) {
> > + ovs_be128 addr;
> > + memcpy(&addr, &ra->config->prefixes[i].addr, sizeof addr);
> > + packet_put_ra_prefix_opt(&packet,
> ra->config->prefixes[i].prefix_len,
> > + ra->config->la_flags, htonl(RA_DEFAULT_VALID_LIFETIME),
> > + htonl(RA_DEFAULT_PREFERRED_LIFETIME), addr);
> > + }
> > +
> > + uint64_t ofpacts_stub[4096 / 8];
> > + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> > + enum ofp_version version = rconn_get_version(swconn);
> > +
> > + /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
> > + uint32_t dp_key = ra->metadata;
> > + uint32_t port_key = ra->port_key;
> > + put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> > + put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> > + put_load(1, MFF_LOG_FLAGS, MLF_KEEP_LOCAL_BIT, 1, &ofpacts);
> > + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> > + resubmit->in_port = OFPP_CONTROLLER;
> > + resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> > +
> > + struct ofputil_packet_out po = {
> > + .packet = dp_packet_data(&packet),
> > + .packet_len = dp_packet_size(&packet),
> > + .buffer_id = UINT32_MAX,
> > + .ofpacts = ofpacts.data,
> > + .ofpacts_len = ofpacts.size,
> > + };
> > +
> > + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> > + enum ofputil_protocol proto =
> ofputil_protocol_from_ofp_version(version);
> > + queue_msg(ofputil_encode_packet_out(&po, proto));
> > + dp_packet_uninit(&packet);
> > + ofpbuf_uninit(&ofpacts);
> > +
> > + ra->next_announce =
> ipv6_ra_calc_next_announce(ra->config->min_interval,
> > + ra->config->max_interval);
> > +
> > + return ra->next_announce;
> > +}
> > +
> > +static void
> > +ipv6_ra_wait(void)
> > +{
> > + poll_timer_wait_until(send_ipv6_ra_time);
> > +}
> > +
> > +static void
> > +send_ipv6_ras(const struct controller_ctx *ctx OVS_UNUSED,
> > + struct hmap *local_datapaths)
> > +{
> > + struct shash_node *iter, *iter_next;
> > +
> > + send_ipv6_ra_time = LLONG_MAX;
> > +
> > + SHASH_FOR_EACH (iter, &ipv6_ras) {
> > + struct ipv6_ra_state *ra = iter->data;
> > + ra->deleteme = true;
> > + }
> > +
> > + const struct local_datapath *ld;
> > + HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> > +
> > + struct sbrec_port_binding *lpval;
> > + const struct sbrec_port_binding *pb;
> > + struct ovsdb_idl_index_cursor cursor;
> > +
> > + lpval = sbrec_port_binding_index_init_row(ctx->ovnsb_idl,
> > +
> &sbrec_table_port_binding);
> > + sbrec_port_binding_index_set_datapath(lpval, ld->datapath);
> > + ovsdb_idl_initialize_cursor(ctx->ovnsb_idl,
> &sbrec_table_port_binding,
> > + "lport-by-datapath", &cursor);
> > + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
> > + if (!smap_get_bool(&pb->options, "ipv6_ra_send_periodic",
> false)) {
> > + continue;
> > + }
> > +
> > + const char *peer_s;
> > + peer_s = smap_get(&pb->options, "peer");
> > + if (!peer_s) {
> > + continue;
> > + }
> > +
> > + const struct sbrec_port_binding *peer;
> > + peer = lport_lookup_by_name(ctx->ovnsb_idl, peer_s);
> > + if (!peer) {
> > + continue;
> > + }
> > +
> > + struct ipv6_ra_config *config;
> > + config = ipv6_ra_update_config(pb);
> > + if (!config) {
> > + continue;
> > + }
> > +
> > + struct ipv6_ra_state *ra;
> > + ra = shash_find_data(&ipv6_ras, pb->logical_port);
> > + if (!ra) {
> > + ra = xzalloc(sizeof *ra);
> > + ra->config = config;
> > + ra->next_announce = ipv6_ra_calc_next_announce(
> > + ra->config->min_interval,
> > + ra->config->max_interval);
> > + shash_add(&ipv6_ras, pb->logical_port, ra);
> > + } else {
> > + ipv6_ra_config_delete(ra->config);
> > + ra->config = config;
> > + }
> > +
> > + /* Peer is the logical switch port that the logical
> > + * router port is connected to. The RA is injected
> > + * into that logical switch port.
> > + */
> > + ra->port_key = peer->tunnel_key;
> > + ra->metadata = peer->datapath->tunnel_key;
> > + ra->deleteme = false;
> > +
> > + long long int next_ra;
> > + next_ra = ipv6_ra_send(ra);
> > + if (send_ipv6_ra_time > next_ra) {
> > + send_ipv6_ra_time = next_ra;
> > + }
> > + }
> > + }
> > +
> > + /* Remove those that are no longer in the SB database */
> > + SHASH_FOR_EACH_SAFE (iter, iter_next, &ipv6_ras) {
> > + struct ipv6_ra_state *ra = iter->data;
> > + if (ra->deleteme) {
> > + shash_delete(&ipv6_ras, iter);
> > + ipv6_ra_delete(ra);
> > + }
> > + }
> > +}
> > +
> > +
> > void
> > pinctrl_wait(struct controller_ctx *ctx)
> > {
> > @@ -1092,6 +1431,7 @@ pinctrl_wait(struct controller_ctx *ctx)
> > rconn_run_wait(swconn);
> > rconn_recv_wait(swconn);
> > send_garp_wait();
> > + ipv6_ra_wait();
> > }
> >
> > void
> > @@ -1100,6 +1440,7 @@ pinctrl_destroy(void)
> > rconn_destroy(swconn);
> > destroy_put_mac_bindings();
> > destroy_send_garps();
> > + destroy_ipv6_ras();
> > }
> >
> > /* Implementation of the "put_arp" and "put_nd" OVN actions. These
> > diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> > index b4c971320..6dbd5ef15 100644
> > --- a/ovn/northd/ovn-northd.c
> > +++ b/ovn/northd/ovn-northd.c
> > @@ -4445,6 +4445,66 @@ add_router_lb_flow(struct hmap *lflows, struct
> ovn_datapath *od,
> > ds_destroy(&undnat_match);
> > }
> >
> > +#define ND_RA_MAX_INTERVAL_MAX 1800
> > +#define ND_RA_MAX_INTERVAL_MIN 4
> > +
> > +#define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4)
> > +#define ND_RA_MIN_INTERVAL_MIN 3
> > +
> > +static void
> > +copy_ra_to_sb(struct ovn_port *op, const char *address_mode)
> > +{
> > + struct smap options;
> > + smap_clone(&options, &op->sb->options);
> > +
> > + smap_add(&options, "ipv6_ra_send_periodic", "true");
> > + smap_add(&options, "ipv6_ra_address_mode", address_mode);
> > +
> > + int max_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
> > + "max_interval", ND_RA_MAX_INTERVAL_DEFAULT);
> > + if (max_interval > ND_RA_MAX_INTERVAL_MAX) {
> > + max_interval = ND_RA_MAX_INTERVAL_MAX;
> > + }
> > + if (max_interval < ND_RA_MAX_INTERVAL_MIN) {
> > + max_interval = ND_RA_MAX_INTERVAL_MIN;
> > + }
> > + smap_add_format(&options, "ipv6_ra_max_interval", "%d",
> max_interval);
> > +
> > + int min_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
> > + "min_interval", ND_RA_MIN_INTERVAL_DEFAULT(max_interval));
> > + if (min_interval > ND_RA_MIN_INTERVAL_MAX(max_interval)) {
> > + min_interval = ND_RA_MIN_INTERVAL_MAX(max_interval);
> > + }
> > + if (min_interval < ND_RA_MIN_INTERVAL_MIN) {
> > + min_interval = ND_RA_MIN_INTERVAL_MIN;
> > + }
> > + smap_add_format(&options, "ipv6_ra_min_interval", "%d",
> min_interval);
> > +
> > + int mtu = smap_get_int(&op->nbrp->ipv6_ra_configs, "mtu",
> ND_MTU_DEFAULT);
> > + if (mtu) {
> > + smap_add_format(&options, "ipv6_ra_mtu", "%d", mtu);
> > + }
> > +
> > + struct ds s = DS_EMPTY_INITIALIZER;
> > + for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; ++i) {
> > + struct ipv6_netaddr *addrs = &op->lrp_networks.ipv6_addrs[i];
> > + if (in6_is_lla(&addrs->network)) {
> > + smap_add(&options, "ipv6_ra_src_addr", addrs->addr_s);
> > + continue;
> > + }
> > + ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen);
> > + }
> > + /* Remove trailing space */
> > + ds_chomp(&s, ' ');
> > + smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s));
> > + ds_destroy(&s);
> > +
> > + smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s);
> > +
> > + sbrec_port_binding_set_options(op->sb, &options);
> > + smap_destroy(&options);
> > +}
> > +
> > static void
> > build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
> > struct hmap *lflows)
> > @@ -5428,6 +5488,11 @@ build_lrouter_flows(struct hmap *datapaths,
> struct hmap *ports,
> > continue;
> > }
> >
> > + if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
> > + false)) {
> > + copy_ra_to_sb(op, address_mode);
> > + }
> > +
> > ds_clear(&match);
> > ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 &&
> nd_rs",
> > op->json_key);
>
> Shouldn't we check if MTU is at least 1280, if present, before copying
> it to SB DB? Like it is done a few lines later in build_lrouter_flows()
> after this hunk.
>
Done!
>
> [...]
>
> Thanks,
> Jakub
>
More information about the dev
mailing list