[ovs-discuss] RFC: incremental computation for OVN with DDlog

Justin Pettit jpettit at ovn.org
Wed Nov 14 21:24:32 UTC 2018


> On Nov 13, 2018, at 5:25 PM, Russell Bryant <russell at ovn.org> wrote:
> 
> I think this is implied based on the description of how ovn-northd
> would work, but do you expect to make a completely seamless drop-in
> replacement (aside from build-time and run-time dependencies?  All
> parameters would be identical, no new configuration, and requiring
> zero change to integrations project like ovn-kubernetes or the
> OpenStack OVN integration?

Correct.  There will be no external interface changes.  (Other than new runtime flags to indicate whether the C- or DDlog-based version should be used, of course.)  It will be invisible to the existing integrations.

At least for the 2.11 release, there won't even be a requirement to build the DDlog components at all.  The ovn-northd integration I've been doing checks to see whether DDlog and its dependencies exist, and if they don't, the DDlog hooks are #if'd out.

> In terms of "proven in practice", OVN is at a stage where it's being
> used in production, so ideally we set a very high bar for a switchover
> like this.  It sounds like you're planning for that by enabling
> implementations to work in parallel instead of forcing a hard cutover
> early.  I would hope for something like multiple releases of a new
> implementation in experimental state, allowing plenty of time for
> testing in realistic, larger scale environments, and relying on
> reports of significant successes before a cutover.

Yes, there's not any rush in terms of cutting over other than not maintaining two different versions, so we'll make sure it's right.  (The DDlog code should actually be easier to maintain than the C-based version.*)  What we've talked about doing is to provide an option to allow DDlog to run in parallel and report any differences between what it generated and what the C code did.  In fact, when DDlog is built, I'm thinking that we'll run the unit tests in this mode, so that we should be able to catch any drifts early.  I'm hoping that people will try this mode out with actual workloads and report any issues they see.  

--Justin


* Below is an (admittedly) cherry-picked example of meter synchronization that was added to ovn-northd in OVS 2.10.

You can look at the current status of the ovn-northd DDlog implementation here:

	https://github.com/ryzhyk/differential-datalog/blob/northd/test/ovn/ovn_northd.dl

I think this represents most of ovn-northd other than the DNS tables, which should be straight-forward, and logical flows, which we've just started adding.

-=-=-=-=- DDlog Version -=-=-=-=-

/* Meter_Band table */
for (mb in nb.Meter_Band) {
    sb.Out_Meter_Band(.uuid_name = uuid2str(mb._uuid),
                      .action = mb.action,
                      .rate = mb.rate,
                      .burst_size = mb.burst_size)
}

/* Meter table */
for (meter in nb.Meter) {
    sb.Out_Meter(.name = meter.name,
                 .unit = meter.unit,
                 .bands = set_map_uuid2str(meter.bands))
}


-=-=-=-=- C Version -=-=-=-=-

struct band_entry {
    int64_t rate;
    int64_t burst_size;
    const char *action;
};

static int
band_cmp(const void *band1_, const void *band2_)
{
    const struct band_entry *band1p = band1_;
    const struct band_entry *band2p = band2_;

    if (band1p->rate != band2p->rate) {
        return band1p->rate > band2p->rate ? -1 : 1;
    } else if (band1p->burst_size != band2p->burst_size) {
        return band1p->burst_size > band2p->burst_size ? -1 : 1;
    } else {
        return strcmp(band1p->action, band2p->action);
    }
}

static bool
bands_need_update(const struct nbrec_meter *nb_meter,
                  const struct sbrec_meter *sb_meter)
{
    if (nb_meter->n_bands != sb_meter->n_bands) {
        return true;
    }

    /* A single band is the most common scenario, so speed up that
     * check. */
    if (nb_meter->n_bands == 1) {
        struct nbrec_meter_band *nb_band = nb_meter->bands[0];
        struct sbrec_meter_band *sb_band = sb_meter->bands[0];

        return !(nb_band->rate == sb_band->rate
                 && nb_band->burst_size == sb_band->burst_size
                 && !strcmp(sb_band->action, nb_band->action));
    }

    /* Place the Northbound entries in sorted order. */
    struct band_entry *nb_bands;
    nb_bands = xmalloc(sizeof *nb_bands * nb_meter->n_bands);
    for (size_t i = 0; i < nb_meter->n_bands; i++) {
        struct nbrec_meter_band *nb_band = nb_meter->bands[i];

        nb_bands[i].rate = nb_band->rate;
        nb_bands[i].burst_size = nb_band->burst_size;
        nb_bands[i].action = nb_band->action;
    }
    qsort(nb_bands, nb_meter->n_bands, sizeof *nb_bands, band_cmp);

    /* Place the Southbound entries in sorted order. */
    struct band_entry *sb_bands;
    sb_bands = xmalloc(sizeof *sb_bands * sb_meter->n_bands);
    for (size_t i = 0; i < sb_meter->n_bands; i++) {
        struct sbrec_meter_band *sb_band = sb_meter->bands[i];

        sb_bands[i].rate = sb_band->rate;
        sb_bands[i].burst_size = sb_band->burst_size;
        sb_bands[i].action = sb_band->action;
    }
    qsort(sb_bands, sb_meter->n_bands, sizeof *sb_bands, band_cmp);

    bool need_update = false;
    for (size_t i = 0; i < nb_meter->n_bands; i++) {
        if (nb_bands[i].rate != sb_bands[i].rate
            || nb_bands[i].burst_size != sb_bands[i].burst_size
            || strcmp(nb_bands[i].action, nb_bands[i].action)) {
            need_update = true;
            goto done;
        }
    }

done:
    free(nb_bands);
    free(sb_bands);

    return need_update;
}

/* Each entry in the Meter and Meter_Band tables in OVN_Northbound have
 * a corresponding entries in the Meter and Meter_Band tables in
 * OVN_Southbound.
 */
static void
sync_meters(struct northd_context *ctx)
{
    struct shash sb_meters = SHASH_INITIALIZER(&sb_meters);

    const struct sbrec_meter *sb_meter;
    SBREC_METER_FOR_EACH (sb_meter, ctx->ovnsb_idl) {
        shash_add(&sb_meters, sb_meter->name, sb_meter);
    }

    const struct nbrec_meter *nb_meter;
    NBREC_METER_FOR_EACH (nb_meter, ctx->ovnnb_idl) {
        bool new_sb_meter = false;

        sb_meter = shash_find_and_delete(&sb_meters, nb_meter->name);
        if (!sb_meter) {
            sb_meter = sbrec_meter_insert(ctx->ovnsb_txn);
            sbrec_meter_set_name(sb_meter, nb_meter->name);
            new_sb_meter = true;
        }

        if (new_sb_meter || bands_need_update(nb_meter, sb_meter)) {
            struct sbrec_meter_band **sb_bands;
            sb_bands = xcalloc(nb_meter->n_bands, sizeof *sb_bands);
            for (size_t i = 0; i < nb_meter->n_bands; i++) {
                const struct nbrec_meter_band *nb_band = nb_meter->bands[i];

                sb_bands[i] = sbrec_meter_band_insert(ctx->ovnsb_txn);

                sbrec_meter_band_set_action(sb_bands[i], nb_band->action);
                sbrec_meter_band_set_rate(sb_bands[i], nb_band->rate);
                sbrec_meter_band_set_burst_size(sb_bands[i],
                                                nb_band->burst_size);
            }
            sbrec_meter_set_bands(sb_meter, sb_bands, nb_meter->n_bands);
            free(sb_bands);
        }

        sbrec_meter_set_unit(sb_meter, nb_meter->unit);
    }

    struct shash_node *node, *next;
    SHASH_FOR_EACH_SAFE (node, next, &sb_meters) {
        sbrec_meter_delete(node->data);
        shash_delete(&sb_meters, node);
    }
    shash_destroy(&sb_meters);
}



More information about the discuss mailing list