[ovs-dev] [PATCH V3 2/2] bridge: Remove the 'Instant' stats.
Alex Wang
alexw at nicira.com
Fri Apr 11 18:24:05 UTC 2014
Hey Ben, Joe,
Could you review this V3 series?
I think it is closer, and better,~
besides, no backlogged transactions when flapping all 5K tunnels for the
entire morning ;D
Alex Wang,
On Fri, Apr 11, 2014 at 11:20 AM, Alex Wang <alexw at nicira.com> wrote:
> This commit removes the 'Instant' stats related logic in bridge.c.
> Instead, the corresponding status is updated immediately after the
> global connectivity sequence number changes.
>
> This change brings the following effects:
>
> 1. The status database update is slightly faster, since the 'Instant'
> stats update period is 100 millisecond.
>
> 2. The main thread will no longer be waken up every 100 ms for 'Instant'
> stats check. Some logic (e.g. ofproto_dpif_send_packet_in()) relies
> on this fact for function checking or I/O. So, after this commit,
> they should explicitly notify the main thread via either registering
> a timeout in poll loop or using the global sequence number.
>
> 3. The update is more efficient, since the netdev's sequence number is
> used to avoid updating unchanged netdev status.
>
> 4. When status is changing super fast, to prevent frequent database
> transaction and backlogged jasonrpc message, logic is added to
> always wait for unfinished status database transaction. This also
> helps batch multiple future status changes in one transaction.
>
> In the experiment with 5K internal ports, when one port is flapping
> at rate of every 0.3 second, there is no additional cpu consumption
> observed. When all ports are flapping at the rate of every 0.3 second,
> there is no backlogged transactions observed.
>
> Signed-off-by: Alex Wang <alexw at nicira.com>
>
> ---
> V2 -> V3:
> - refine the commit log.
> - when the current status database transaction returns "TXN_INCOMPLETE",
> do not allow future status updates until the hanging transaction
> finishes.
> This is extremely helpful in that it can batch multiple future status
> changes into one transaction. And that is main reason for the
> elimination
> of backlogged (jasonrpc) database transactions.
>
> PATCH -> V2:
> - refine the commit message.
> - refine the comments.
> ---
> ofproto/ofproto-dpif.c | 2 +
> vswitchd/bridge.c | 264
> +++++++++++++++++++++---------------------------
> 2 files changed, 116 insertions(+), 150 deletions(-)
>
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 04d5454..7f327b9 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -375,6 +375,8 @@ ofproto_dpif_send_packet_in(struct ofproto_dpif
> *ofproto,
> COVERAGE_INC(packet_in_overflow);
> free(CONST_CAST(void *, pin->up.packet));
> free(pin);
> + } else {
> + seq_change(connectivity_seq_get());
> }
> }
>
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 44fdb4e..78ec94d 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -77,6 +77,7 @@ struct iface {
> char *name; /* Host network device name. */
> struct netdev *netdev; /* Network device. */
> ofp_port_t ofp_port; /* OpenFlow port number. */
> + uint64_t change_seq;
>
> /* These members are valid only within bridge_reconfigure(). */
> const char *type; /* Usually same as cfg->type. */
> @@ -263,7 +264,8 @@ static void iface_configure_qos(struct iface *, const
> struct ovsrec_qos *);
> static void iface_configure_cfm(struct iface *);
> static void iface_refresh_cfm_stats(struct iface *);
> static void iface_refresh_stats(struct iface *);
> -static void iface_refresh_status(struct iface *);
> +static void iface_refresh_netdev_status(struct iface *);
> +static void iface_refresh_ofproto_status(struct iface *);
> static bool iface_is_synthetic(const struct iface *);
> static ofp_port_t iface_get_requested_ofp_port(
> const struct ovsrec_interface *);
> @@ -1506,7 +1508,7 @@ iface_create(struct bridge *br, const struct
> ovsrec_interface *iface_cfg,
>
> /* Populate initial status in database. */
> iface_refresh_stats(iface);
> - iface_refresh_status(iface);
> + iface_refresh_netdev_status(iface);
>
> /* Add bond fake iface if necessary. */
> if (port_is_bond_fake_iface(port)) {
> @@ -1764,22 +1766,27 @@ dpid_from_hash(const void *data, size_t n)
> }
>
> static void
> -iface_refresh_status(struct iface *iface)
> +iface_refresh_netdev_status(struct iface *iface)
> {
> struct smap smap;
>
> enum netdev_features current;
> - int64_t bps;
> - int mtu;
> - int64_t mtu_64;
> + enum netdev_flags flags;
> + const char *link_state;
> uint8_t mac[ETH_ADDR_LEN];
> - int64_t ifindex64;
> - int error;
> + int64_t bps, mtu_64, ifindex64, link_resets;
> + int mtu, error;
>
> if (iface_is_synthetic(iface)) {
> return;
> }
>
> + if (iface->change_seq == netdev_get_change_seq(iface->netdev)) {
> + return;
> + }
> +
> + iface->change_seq = netdev_get_change_seq(iface->netdev);
> +
> smap_init(&smap);
>
> if (!netdev_get_status(iface->netdev, &smap)) {
> @@ -1790,6 +1797,21 @@ iface_refresh_status(struct iface *iface)
>
> smap_destroy(&smap);
>
> + error = netdev_get_flags(iface->netdev, &flags);
> + if (!error) {
> + const char *state = flags & NETDEV_UP ? "up" : "down";
> +
> + ovsrec_interface_set_admin_state(iface->cfg, state);
> + } else {
> + ovsrec_interface_set_admin_state(iface->cfg, NULL);
> + }
> +
> + link_state = netdev_get_carrier(iface->netdev) ? "up" : "down";
> + ovsrec_interface_set_link_state(iface->cfg, link_state);
> +
> + link_resets = netdev_get_carrier_resets(iface->netdev);
> + ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1);
> +
> error = netdev_get_features(iface->netdev, ¤t, NULL, NULL,
> NULL);
> bps = !error ? netdev_features_to_bps(current, 0) : 0;
> if (bps) {
> @@ -1829,6 +1851,38 @@ iface_refresh_status(struct iface *iface)
> ovsrec_interface_set_ifindex(iface->cfg, &ifindex64, 1);
> }
>
> +static void
> +iface_refresh_ofproto_status(struct iface *iface)
> +{
> + struct smap smap;
> + int current, error;
> +
> + if (iface_is_synthetic(iface)) {
> + return;
> + }
> +
> + current = ofproto_port_is_lacp_current(iface->port->bridge->ofproto,
> + iface->ofp_port);
> + if (current >= 0) {
> + bool bl = current;
> + ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1);
> + } else {
> + ovsrec_interface_set_lacp_current(iface->cfg, NULL, 0);
> + }
> +
> + iface_refresh_cfm_stats(iface);
> +
> + smap_init(&smap);
> + error = ofproto_port_get_bfd_status(iface->port->bridge->ofproto,
> + iface->ofp_port, &smap);
> + /* No need to do the following work if there is no status change. */
> + if (error < 0) {
> + return;
> + }
> + ovsrec_interface_set_bfd_status(iface->cfg, &smap);
> + smap_destroy(&smap);
> +}
> +
> /* Writes 'iface''s CFM statistics to the database. 'iface' must not be
> * synthetic. */
> static void
> @@ -2143,145 +2197,6 @@ refresh_controller_status(void)
> ofproto_free_ofproto_controller_info(&info);
> }
>
> -/* "Instant" stats.
> - *
> - * Some information in the database must be kept as up-to-date as
> possible to
> - * allow controllers to respond rapidly to network outages. We call these
> - * statistics "instant" stats.
> - *
> - * We wish to update these statistics every INSTANT_INTERVAL_MSEC
> milliseconds,
> - * assuming that they've changed. The only means we have to determine
> whether
> - * they have changed are:
> - *
> - * - Try to commit changes to the database. If nothing changed, then
> - * ovsdb_idl_txn_commit() returns TXN_UNCHANGED, otherwise some other
> - * value.
> - *
> - * - instant_stats_run() is called late in the run loop, after anything
> that
> - * might change any of the instant stats.
> - *
> - * We use these two facts together to avoid waking the process up every
> - * INSTANT_INTERVAL_MSEC whether there is any change or not.
> - */
> -
> -/* Minimum interval between writing updates to the instant stats to the
> - * database. */
> -#define INSTANT_INTERVAL_MSEC 100
> -
> -/* Current instant stats database transaction, NULL if there is no ongoing
> - * transaction. */
> -static struct ovsdb_idl_txn *instant_txn;
> -
> -/* Next time (in msec on monotonic clock) at which we will update the
> instant
> - * stats. */
> -static long long int instant_next_txn = LLONG_MIN;
> -
> -/* True if the run loop has run since we last saw that the instant stats
> were
> - * unchanged, that is, this is true if we need to wake up at
> 'instant_next_txn'
> - * to refresh the instant stats. */
> -static bool instant_stats_could_have_changed;
> -
> -static void
> -instant_stats_run(void)
> -{
> - enum ovsdb_idl_txn_status status;
> -
> - instant_stats_could_have_changed = true;
> -
> - if (!instant_txn) {
> - struct bridge *br;
> - uint64_t seq;
> -
> - if (time_msec() < instant_next_txn) {
> - return;
> - }
> - instant_next_txn = time_msec() + INSTANT_INTERVAL_MSEC;
> -
> - seq = seq_read(connectivity_seq_get());
> - if (seq == connectivity_seqno) {
> - return;
> - }
> - connectivity_seqno = seq;
> -
> - instant_txn = ovsdb_idl_txn_create(idl);
> - HMAP_FOR_EACH (br, node, &all_bridges) {
> - struct iface *iface;
> - struct port *port;
> -
> - br_refresh_stp_status(br);
> -
> - HMAP_FOR_EACH (port, hmap_node, &br->ports) {
> - port_refresh_stp_status(port);
> - }
> -
> - HMAP_FOR_EACH (iface, name_node, &br->iface_by_name) {
> - enum netdev_flags flags;
> - struct smap smap;
> - const char *link_state;
> - int64_t link_resets;
> - int current, error;
> -
> - if (iface_is_synthetic(iface)) {
> - continue;
> - }
> -
> - current = ofproto_port_is_lacp_current(br->ofproto,
> - iface->ofp_port);
> - if (current >= 0) {
> - bool bl = current;
> - ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1);
> - } else {
> - ovsrec_interface_set_lacp_current(iface->cfg, NULL,
> 0);
> - }
> -
> - error = netdev_get_flags(iface->netdev, &flags);
> - if (!error) {
> - const char *state = flags & NETDEV_UP ? "up" : "down";
> - ovsrec_interface_set_admin_state(iface->cfg, state);
> - } else {
> - ovsrec_interface_set_admin_state(iface->cfg, NULL);
> - }
> -
> - link_state = netdev_get_carrier(iface->netdev) ? "up" :
> "down";
> - ovsrec_interface_set_link_state(iface->cfg, link_state);
> -
> - link_resets = netdev_get_carrier_resets(iface->netdev);
> - ovsrec_interface_set_link_resets(iface->cfg,
> &link_resets, 1);
> -
> - iface_refresh_cfm_stats(iface);
> -
> - smap_init(&smap);
> - error = ofproto_port_get_bfd_status(br->ofproto,
> iface->ofp_port,
> - &smap);
> - if (error < 0) {
> - continue;
> - }
> - ovsrec_interface_set_bfd_status(iface->cfg, &smap);
> - smap_destroy(&smap);
> - }
> - }
> - }
> -
> - status = ovsdb_idl_txn_commit(instant_txn);
> - if (status != TXN_INCOMPLETE) {
> - ovsdb_idl_txn_destroy(instant_txn);
> - instant_txn = NULL;
> - }
> - if (status == TXN_UNCHANGED) {
> - instant_stats_could_have_changed = false;
> - }
> -}
> -
> -static void
> -instant_stats_wait(void)
> -{
> - if (instant_txn) {
> - ovsdb_idl_txn_wait(instant_txn);
> - } else if (instant_stats_could_have_changed) {
> - poll_timer_wait_until(instant_next_txn);
> - }
> -}
> -
> static void
> bridge_run__(void)
> {
> @@ -2303,6 +2218,14 @@ bridge_run__(void)
> }
> }
>
> +/* When the status update commit returns "TXN_INCOMPLETE", should
> register a
> + * timeout in "STATUS_CHECK_AGAIN_MSEC" to check again. */
> +#define STATUS_CHECK_AGAIN_MSEC 500
> +
> +/* Current status database transaction, NULL if there is no ongoing
> + * transaction. */
> +static struct ovsdb_idl_txn *status_txn;
> +
> void
> bridge_run(void)
> {
> @@ -2311,6 +2234,7 @@ bridge_run(void)
>
> bool vlan_splinters_changed;
> struct bridge *br;
> + uint64_t seq;
>
> ovsrec_open_vswitch_init(&null_cfg);
>
> @@ -2431,7 +2355,6 @@ bridge_run(void)
>
> LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
> iface_refresh_stats(iface);
> - iface_refresh_status(iface);
> }
>
> port_refresh_stp_stats(port);
> @@ -2450,8 +2373,40 @@ bridge_run(void)
> iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL;
> }
>
> + /* Check the need to update status. */
> + seq = seq_read(connectivity_seq_get());
> + if (seq != connectivity_seqno) {
> + enum ovsdb_idl_txn_status status;
> +
> + if (!status_txn) {
> + connectivity_seqno = seq;
> + status_txn = ovsdb_idl_txn_create(idl);
> + HMAP_FOR_EACH (br, node, &all_bridges) {
> + struct port *port;
> +
> + br_refresh_stp_status(br);
> + HMAP_FOR_EACH (port, hmap_node, &br->ports) {
> + struct iface *iface;
> +
> + port_refresh_stp_status(port);
> + LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
> + iface_refresh_netdev_status(iface);
> + iface_refresh_ofproto_status(iface);
> + }
> + }
> + }
> + }
> +
> + status = ovsdb_idl_txn_commit(status_txn);
> + /* Do not destroy "status_txn" if the transaction is
> + * "TXN_INCOMPLETE". */
> + if (status != TXN_INCOMPLETE) {
> + ovsdb_idl_txn_destroy(status_txn);
> + status_txn = NULL;
> + }
> + }
> +
> run_system_stats();
> - instant_stats_run();
> }
>
> void
> @@ -2481,8 +2436,17 @@ bridge_wait(void)
> poll_timer_wait_until(iface_stats_timer);
> }
>
> + /* If the status database transaction is "TXN_INCOMPLETE" in this run,
> + * register a timeout in "STATUS_CHECK_AGAIN_MSEC". Else, wait on the
> + * global connectivity sequence number. Note, this also helps batch
> + * multiple status changes into one transaction. */
> + if (status_txn) {
> + poll_timer_wait_until(time_msec() + STATUS_CHECK_AGAIN_MSEC);
> + } else {
> + seq_wait(connectivity_seq_get(), connectivity_seqno);
> + }
> +
> system_stats_wait();
> - instant_stats_wait();
> }
>
> /* Adds some memory usage statistics for bridges into 'usage', for use
> with
> --
> 1.7.9.5
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20140411/1d4bd189/attachment-0005.html>
More information about the dev
mailing list