[ovs-dev] [PATCH V3 2/2] bridge: Remove the 'Instant' stats.

Alex Wang alexw at nicira.com
Fri Apr 11 18:24:05 UTC 2014


Hey Ben, Joe,

Could you review this V3 series?

I think it is closer, and better,~

besides, no backlogged transactions when flapping all 5K tunnels for the
entire morning ;D

Alex Wang,


On Fri, Apr 11, 2014 at 11:20 AM, Alex Wang <alexw at nicira.com> wrote:

> This commit removes the 'Instant' stats related logic in bridge.c.
> Instead, the corresponding status is updated immediately after the
> global connectivity sequence number changes.
>
> This change brings the following effects:
>
> 1. The status database update is slightly faster, since the 'Instant'
>    stats update period is 100 millisecond.
>
> 2. The main thread will no longer be waken up every 100 ms for 'Instant'
>    stats check.  Some logic (e.g. ofproto_dpif_send_packet_in()) relies
>    on this fact for function checking or I/O.  So, after this commit,
>    they should explicitly notify the main thread via either registering
>    a timeout in poll loop or using the global sequence number.
>
> 3. The update is more efficient, since the netdev's sequence number is
>    used to avoid updating unchanged netdev status.
>
> 4. When status is changing super fast, to prevent frequent database
>    transaction and backlogged jasonrpc message, logic is added to
>    always wait for unfinished status database transaction.  This also
>    helps batch multiple future status changes in one transaction.
>
> In the experiment with 5K internal ports, when one port is flapping
> at rate of every 0.3 second, there is no additional cpu consumption
> observed.  When all ports are flapping at the rate of every 0.3 second,
> there is no backlogged transactions observed.
>
> Signed-off-by: Alex Wang <alexw at nicira.com>
>
> ---
> V2 -> V3:
> - refine the commit log.
> - when the current status database transaction returns "TXN_INCOMPLETE",
>   do not allow future status updates until the hanging transaction
> finishes.
>   This is extremely helpful in that it can batch multiple future status
>   changes into one transaction.  And that is main reason for the
> elimination
>   of backlogged (jasonrpc) database transactions.
>
> PATCH -> V2:
> - refine the commit message.
> - refine the comments.
> ---
>  ofproto/ofproto-dpif.c |    2 +
>  vswitchd/bridge.c      |  264
> +++++++++++++++++++++---------------------------
>  2 files changed, 116 insertions(+), 150 deletions(-)
>
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 04d5454..7f327b9 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -375,6 +375,8 @@ ofproto_dpif_send_packet_in(struct ofproto_dpif
> *ofproto,
>          COVERAGE_INC(packet_in_overflow);
>          free(CONST_CAST(void *, pin->up.packet));
>          free(pin);
> +    } else {
> +        seq_change(connectivity_seq_get());
>      }
>  }
>
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 44fdb4e..78ec94d 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -77,6 +77,7 @@ struct iface {
>      char *name;                 /* Host network device name. */
>      struct netdev *netdev;      /* Network device. */
>      ofp_port_t ofp_port;        /* OpenFlow port number. */
> +    uint64_t change_seq;
>
>      /* These members are valid only within bridge_reconfigure(). */
>      const char *type;           /* Usually same as cfg->type. */
> @@ -263,7 +264,8 @@ static void iface_configure_qos(struct iface *, const
> struct ovsrec_qos *);
>  static void iface_configure_cfm(struct iface *);
>  static void iface_refresh_cfm_stats(struct iface *);
>  static void iface_refresh_stats(struct iface *);
> -static void iface_refresh_status(struct iface *);
> +static void iface_refresh_netdev_status(struct iface *);
> +static void iface_refresh_ofproto_status(struct iface *);
>  static bool iface_is_synthetic(const struct iface *);
>  static ofp_port_t iface_get_requested_ofp_port(
>      const struct ovsrec_interface *);
> @@ -1506,7 +1508,7 @@ iface_create(struct bridge *br, const struct
> ovsrec_interface *iface_cfg,
>
>      /* Populate initial status in database. */
>      iface_refresh_stats(iface);
> -    iface_refresh_status(iface);
> +    iface_refresh_netdev_status(iface);
>
>      /* Add bond fake iface if necessary. */
>      if (port_is_bond_fake_iface(port)) {
> @@ -1764,22 +1766,27 @@ dpid_from_hash(const void *data, size_t n)
>  }
>
>  static void
> -iface_refresh_status(struct iface *iface)
> +iface_refresh_netdev_status(struct iface *iface)
>  {
>      struct smap smap;
>
>      enum netdev_features current;
> -    int64_t bps;
> -    int mtu;
> -    int64_t mtu_64;
> +    enum netdev_flags flags;
> +    const char *link_state;
>      uint8_t mac[ETH_ADDR_LEN];
> -    int64_t ifindex64;
> -    int error;
> +    int64_t bps, mtu_64, ifindex64, link_resets;
> +    int mtu, error;
>
>      if (iface_is_synthetic(iface)) {
>          return;
>      }
>
> +    if (iface->change_seq == netdev_get_change_seq(iface->netdev)) {
> +        return;
> +    }
> +
> +    iface->change_seq = netdev_get_change_seq(iface->netdev);
> +
>      smap_init(&smap);
>
>      if (!netdev_get_status(iface->netdev, &smap)) {
> @@ -1790,6 +1797,21 @@ iface_refresh_status(struct iface *iface)
>
>      smap_destroy(&smap);
>
> +    error = netdev_get_flags(iface->netdev, &flags);
> +    if (!error) {
> +        const char *state = flags & NETDEV_UP ? "up" : "down";
> +
> +        ovsrec_interface_set_admin_state(iface->cfg, state);
> +    } else {
> +        ovsrec_interface_set_admin_state(iface->cfg, NULL);
> +    }
> +
> +    link_state = netdev_get_carrier(iface->netdev) ? "up" : "down";
> +    ovsrec_interface_set_link_state(iface->cfg, link_state);
> +
> +    link_resets = netdev_get_carrier_resets(iface->netdev);
> +    ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1);
> +
>      error = netdev_get_features(iface->netdev, &current, NULL, NULL,
> NULL);
>      bps = !error ? netdev_features_to_bps(current, 0) : 0;
>      if (bps) {
> @@ -1829,6 +1851,38 @@ iface_refresh_status(struct iface *iface)
>      ovsrec_interface_set_ifindex(iface->cfg, &ifindex64, 1);
>  }
>
> +static void
> +iface_refresh_ofproto_status(struct iface *iface)
> +{
> +    struct smap smap;
> +    int current, error;
> +
> +    if (iface_is_synthetic(iface)) {
> +        return;
> +    }
> +
> +    current = ofproto_port_is_lacp_current(iface->port->bridge->ofproto,
> +                                           iface->ofp_port);
> +    if (current >= 0) {
> +        bool bl = current;
> +        ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1);
> +    } else {
> +        ovsrec_interface_set_lacp_current(iface->cfg, NULL, 0);
> +    }
> +
> +    iface_refresh_cfm_stats(iface);
> +
> +    smap_init(&smap);
> +    error = ofproto_port_get_bfd_status(iface->port->bridge->ofproto,
> +                                        iface->ofp_port, &smap);
> +    /* No need to do the following work if there is no status change. */
> +    if (error < 0) {
> +        return;
> +    }
> +    ovsrec_interface_set_bfd_status(iface->cfg, &smap);
> +    smap_destroy(&smap);
> +}
> +
>  /* Writes 'iface''s CFM statistics to the database. 'iface' must not be
>   * synthetic. */
>  static void
> @@ -2143,145 +2197,6 @@ refresh_controller_status(void)
>      ofproto_free_ofproto_controller_info(&info);
>  }
>
> -/* "Instant" stats.
> - *
> - * Some information in the database must be kept as up-to-date as
> possible to
> - * allow controllers to respond rapidly to network outages.  We call these
> - * statistics "instant" stats.
> - *
> - * We wish to update these statistics every INSTANT_INTERVAL_MSEC
> milliseconds,
> - * assuming that they've changed.  The only means we have to determine
> whether
> - * they have changed are:
> - *
> - *   - Try to commit changes to the database.  If nothing changed, then
> - *     ovsdb_idl_txn_commit() returns TXN_UNCHANGED, otherwise some other
> - *     value.
> - *
> - *   - instant_stats_run() is called late in the run loop, after anything
> that
> - *     might change any of the instant stats.
> - *
> - * We use these two facts together to avoid waking the process up every
> - * INSTANT_INTERVAL_MSEC whether there is any change or not.
> - */
> -
> -/* Minimum interval between writing updates to the instant stats to the
> - * database. */
> -#define INSTANT_INTERVAL_MSEC 100
> -
> -/* Current instant stats database transaction, NULL if there is no ongoing
> - * transaction. */
> -static struct ovsdb_idl_txn *instant_txn;
> -
> -/* Next time (in msec on monotonic clock) at which we will update the
> instant
> - * stats.  */
> -static long long int instant_next_txn = LLONG_MIN;
> -
> -/* True if the run loop has run since we last saw that the instant stats
> were
> - * unchanged, that is, this is true if we need to wake up at
> 'instant_next_txn'
> - * to refresh the instant stats. */
> -static bool instant_stats_could_have_changed;
> -
> -static void
> -instant_stats_run(void)
> -{
> -    enum ovsdb_idl_txn_status status;
> -
> -    instant_stats_could_have_changed = true;
> -
> -    if (!instant_txn) {
> -        struct bridge *br;
> -        uint64_t seq;
> -
> -        if (time_msec() < instant_next_txn) {
> -            return;
> -        }
> -        instant_next_txn = time_msec() + INSTANT_INTERVAL_MSEC;
> -
> -        seq = seq_read(connectivity_seq_get());
> -        if (seq == connectivity_seqno) {
> -            return;
> -        }
> -        connectivity_seqno = seq;
> -
> -        instant_txn = ovsdb_idl_txn_create(idl);
> -        HMAP_FOR_EACH (br, node, &all_bridges) {
> -            struct iface *iface;
> -            struct port *port;
> -
> -            br_refresh_stp_status(br);
> -
> -            HMAP_FOR_EACH (port, hmap_node, &br->ports) {
> -                port_refresh_stp_status(port);
> -            }
> -
> -            HMAP_FOR_EACH (iface, name_node, &br->iface_by_name) {
> -                enum netdev_flags flags;
> -                struct smap smap;
> -                const char *link_state;
> -                int64_t link_resets;
> -                int current, error;
> -
> -                if (iface_is_synthetic(iface)) {
> -                    continue;
> -                }
> -
> -                current = ofproto_port_is_lacp_current(br->ofproto,
> -                                                       iface->ofp_port);
> -                if (current >= 0) {
> -                    bool bl = current;
> -                    ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1);
> -                } else {
> -                    ovsrec_interface_set_lacp_current(iface->cfg, NULL,
> 0);
> -                }
> -
> -                error = netdev_get_flags(iface->netdev, &flags);
> -                if (!error) {
> -                    const char *state = flags & NETDEV_UP ? "up" : "down";
> -                    ovsrec_interface_set_admin_state(iface->cfg, state);
> -                } else {
> -                    ovsrec_interface_set_admin_state(iface->cfg, NULL);
> -                }
> -
> -                link_state = netdev_get_carrier(iface->netdev) ? "up" :
> "down";
> -                ovsrec_interface_set_link_state(iface->cfg, link_state);
> -
> -                link_resets = netdev_get_carrier_resets(iface->netdev);
> -                ovsrec_interface_set_link_resets(iface->cfg,
> &link_resets, 1);
> -
> -                iface_refresh_cfm_stats(iface);
> -
> -                smap_init(&smap);
> -                error = ofproto_port_get_bfd_status(br->ofproto,
> iface->ofp_port,
> -                                                    &smap);
> -                if (error < 0) {
> -                    continue;
> -                }
> -                ovsrec_interface_set_bfd_status(iface->cfg, &smap);
> -                smap_destroy(&smap);
> -            }
> -        }
> -    }
> -
> -    status = ovsdb_idl_txn_commit(instant_txn);
> -    if (status != TXN_INCOMPLETE) {
> -        ovsdb_idl_txn_destroy(instant_txn);
> -        instant_txn = NULL;
> -    }
> -    if (status == TXN_UNCHANGED) {
> -        instant_stats_could_have_changed = false;
> -    }
> -}
> -
> -static void
> -instant_stats_wait(void)
> -{
> -    if (instant_txn) {
> -        ovsdb_idl_txn_wait(instant_txn);
> -    } else if (instant_stats_could_have_changed) {
> -        poll_timer_wait_until(instant_next_txn);
> -    }
> -}
> -
>  static void
>  bridge_run__(void)
>  {
> @@ -2303,6 +2218,14 @@ bridge_run__(void)
>      }
>  }
>
> +/* When the status update commit returns "TXN_INCOMPLETE", should
> register a
> + * timeout in "STATUS_CHECK_AGAIN_MSEC" to check again. */
> +#define STATUS_CHECK_AGAIN_MSEC 500
> +
> +/* Current status database transaction, NULL if there is no ongoing
> + * transaction. */
> +static struct ovsdb_idl_txn *status_txn;
> +
>  void
>  bridge_run(void)
>  {
> @@ -2311,6 +2234,7 @@ bridge_run(void)
>
>      bool vlan_splinters_changed;
>      struct bridge *br;
> +    uint64_t seq;
>
>      ovsrec_open_vswitch_init(&null_cfg);
>
> @@ -2431,7 +2355,6 @@ bridge_run(void)
>
>                      LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
>                          iface_refresh_stats(iface);
> -                        iface_refresh_status(iface);
>                      }
>
>                      port_refresh_stp_stats(port);
> @@ -2450,8 +2373,40 @@ bridge_run(void)
>          iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL;
>      }
>
> +    /* Check the need to update status. */
> +    seq = seq_read(connectivity_seq_get());
> +    if (seq != connectivity_seqno) {
> +        enum ovsdb_idl_txn_status status;
> +
> +        if (!status_txn) {
> +            connectivity_seqno = seq;
> +            status_txn = ovsdb_idl_txn_create(idl);
> +            HMAP_FOR_EACH (br, node, &all_bridges) {
> +                struct port *port;
> +
> +                br_refresh_stp_status(br);
> +                HMAP_FOR_EACH (port, hmap_node, &br->ports) {
> +                    struct iface *iface;
> +
> +                    port_refresh_stp_status(port);
> +                    LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
> +                        iface_refresh_netdev_status(iface);
> +                        iface_refresh_ofproto_status(iface);
> +                    }
> +                }
> +            }
> +        }
> +
> +        status = ovsdb_idl_txn_commit(status_txn);
> +        /* Do not destroy "status_txn" if the transaction is
> +         * "TXN_INCOMPLETE". */
> +        if (status != TXN_INCOMPLETE) {
> +            ovsdb_idl_txn_destroy(status_txn);
> +            status_txn = NULL;
> +        }
> +    }
> +
>      run_system_stats();
> -    instant_stats_run();
>  }
>
>  void
> @@ -2481,8 +2436,17 @@ bridge_wait(void)
>          poll_timer_wait_until(iface_stats_timer);
>      }
>
> +    /* If the status database transaction is "TXN_INCOMPLETE" in this run,
> +     * register a timeout in "STATUS_CHECK_AGAIN_MSEC".  Else, wait on the
> +     * global connectivity sequence number.  Note, this also helps batch
> +     * multiple status changes into one transaction. */
> +    if (status_txn) {
> +        poll_timer_wait_until(time_msec() + STATUS_CHECK_AGAIN_MSEC);
> +    } else {
> +        seq_wait(connectivity_seq_get(), connectivity_seqno);
> +    }
> +
>      system_stats_wait();
> -    instant_stats_wait();
>  }
>
>  /* Adds some memory usage statistics for bridges into 'usage', for use
> with
> --
> 1.7.9.5
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20140411/1d4bd189/attachment-0005.html>


More information about the dev mailing list