[ovs-dev] [RFC 7/9] ofproto-dpif: Allow caching of xlate_actions() effects.
Joe Stringer
joestringer at nicira.com
Fri Mar 7 01:20:30 UTC 2014
This patch adds a new object called 'struct xlate_cache' which can be
set in 'struct xlate_in', and passed to xlate_actions() to cache the
modules affected by this flow translation. Subsequently, the caller can
pass the xcache to xlate_from_cache() to credit stats and perform side
effects for a lower cost than full flow translation.
Initial testing shows mild TCP CRR performance increase (~5%) and a
drastic decrease in flow dump duration. This is expected to allow
significantly more flows to be maintained in the datapath.
Signed-off-by: Joe Stringer <joestringer at nicira.com>
---
ofproto/ofproto-dpif-upcall.c | 23 +++
ofproto/ofproto-dpif-xlate.c | 314 ++++++++++++++++++++++++++++++++++++++++-
ofproto/ofproto-dpif-xlate.h | 15 ++
3 files changed, 348 insertions(+), 4 deletions(-)
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 7dbd7f7..d8644de 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -168,6 +168,8 @@ struct udpif_key {
bool mark; /* Used by mark and sweep GC algorithm. */
struct odputil_keybuf key_buf; /* Memory for 'key'. */
+ struct xlate_cache *xc; /* Cache for xlate entries affected by this
+ ukey. Used for stats and learning. */
};
/* 'udpif_flow_dump's hold the state associated with one iteration in a flow
@@ -1289,6 +1291,7 @@ ukey_create(const struct nlattr *key, size_t key_len, long long int used)
ukey->mark = false;
ukey->created = used ? used : time_msec();
memset(&ukey->stats, 0, sizeof ukey->stats);
+ ukey->xc = NULL;
return ukey;
}
@@ -1297,6 +1300,7 @@ static void
ukey_delete(struct revalidator *revalidator, struct udpif_key *ukey)
{
hmap_remove(&revalidator->ukeys, &ukey->hmap_node);
+ xlate_cache_delete(ukey->xc);
free(ukey);
}
@@ -1346,14 +1350,28 @@ revalidate_ukey(struct udpif *udpif, struct udpif_flow_dump *udump,
goto exit;
}
+ if (ukey->xc && !udump->need_revalidate) {
+ xlate_from_cache(ukey->xc, &push);
+ ok = true;
+ goto exit;
+ }
+
error = xlate_receive(udpif->backer, NULL, ukey->key, ukey->key_len, &flow,
&ofproto, NULL, NULL, NULL, &odp_in_port);
if (error) {
goto exit;
}
+ if (udump->need_revalidate) {
+ xlate_cache_clear(ukey->xc);
+ }
+ if (!ukey->xc) {
+ ukey->xc = xlate_cache_new();
+ }
+
xlate_in_init(&xin, ofproto, &flow, NULL, push.tcp_flags, NULL);
xin.resubmit_stats = push.n_packets ? &push : NULL;
+ xin.xc = ukey->xc;
xin.may_learn = push.n_packets > 0;
xin.skip_wildcards = !udump->need_revalidate;
xlate_actions(&xin, &xout);
@@ -1454,6 +1472,11 @@ push_dump_ops(struct revalidator *revalidator,
struct netflow *netflow;
struct flow flow;
+ if (op->ukey && op->ukey->xc) {
+ xlate_from_cache(op->ukey->xc, push);
+ continue;
+ }
+
if (!xlate_receive(udpif->backer, NULL, op->op.u.flow_del.key,
op->op.u.flow_del.key_len, &flow, &ofproto,
NULL, NULL, &netflow, NULL)) {
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index eb4931e..ecc545f 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -215,6 +215,65 @@ struct skb_priority_to_dscp {
uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */
};
+enum stats_type {
+ XC_RULE,
+ XC_BOND,
+ XC_NETDEV,
+ XC_NETFLOW,
+ XC_MIRROR,
+ XC_LEARN,
+ XC_NORMAL,
+};
+
+/* xlate_cache entries hold of enough information to perform the side effects
+ * of xlate_actions() for a rule, without needing to perform rule translation
+ * from scratch. The primary usage of these is to submit statistics to objects
+ * that a flow relates to, although they may be used for other effects as well
+ * (for instance, refreshing hard timeouts for learned flows). */
+struct xc_entry {
+ enum stats_type type;
+ union {
+ struct rule_dpif *rule;
+ struct {
+ struct netdev *tx;
+ struct netdev *rx;
+ struct bfd *bfd;
+ } dev;
+ struct {
+ struct netflow *netflow;
+ struct flow *flow; /* XXX: Could this be replaced with a hash? */
+ ofp_port_t iface;
+ } nf;
+ struct {
+ struct mbridge *mbridge;
+ mirror_mask_t mirrors;
+ } mirror;
+ struct {
+ struct bond *bond;
+ unsigned int hash;
+ } bond;
+ struct {
+ struct ofproto_dpif *ofproto;
+ struct rule_dpif *rule;
+ } learn;
+ struct {
+ struct ofproto_dpif *ofproto;
+ struct flow *flow;
+ int vlan;
+ } normal;
+ } u;
+};
+
+#define XC_ENTRY_FOR_EACH(entry, entries, xc) \
+ entries = xc->entries; \
+ for (entry = ofpbuf_try_pull(&entries, sizeof *entry); \
+ entry; \
+ entry = ofpbuf_try_pull(&entries, sizeof *entry))
+
+struct xlate_cache {
+ struct ofpbuf entries;
+};
+
static struct hmap xbridges = HMAP_INITIALIZER(&xbridges);
static struct hmap xbundles = HMAP_INITIALIZER(&xbundles);
static struct hmap xports = HMAP_INITIALIZER(&xports);
@@ -244,6 +303,9 @@ static void clear_skb_priorities(struct xport *);
static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
uint8_t *dscp);
+static struct xc_entry *xlate_cache_add_entry(struct xlate_cache *xc,
+ enum stats_type type);
+
void
xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
struct dpif *dpif, struct rule_dpif *miss_rule,
@@ -1141,6 +1203,15 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
bond_account(out_xbundle->bond, &ctx->xin->flow, vid,
ctx->xin->resubmit_stats->n_bytes);
}
+ /* Not sure this is needed, but for completeness... */
+ if (ctx->xin->xc && bond_should_account(out_xbundle->bond)) {
+ struct xc_entry *entry;
+
+ entry = xlate_cache_add_entry(ctx->xin->xc, XC_BOND);
+ entry->u.bond.bond = bond_ref(out_xbundle->bond);
+ entry->u.bond.hash = bond_hash(out_xbundle->bond, &ctx->xin->flow,
+ vid);
+ }
}
old_tci = *flow_tci;
@@ -1427,6 +1498,17 @@ xlate_normal(struct xlate_ctx *ctx)
if (ctx->xin->may_learn) {
update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle);
}
+ if (ctx->xin->xc) {
+ struct xc_entry *entry;
+
+ /* Save enough info to update mac learning table later. */
+ entry = xlate_cache_add_entry(ctx->xin->xc, XC_NORMAL);
+ /* XXX: Do we need to ref this ofproto somehow? */
+ entry->u.normal.ofproto = ctx->xin->ofproto;
+ entry->u.normal.flow = xmalloc(sizeof *flow);
+ memcpy(entry->u.normal.flow, flow, sizeof *flow);
+ entry->u.normal.vlan = vlan;
+ }
/* Determine output bundle. */
ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
@@ -1744,6 +1826,14 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
bfd_account_rx(peer->bfd, ctx->xin->resubmit_stats);
}
}
+ if (ctx->xin->xc) {
+ struct xc_entry *entry;
+
+ entry = xlate_cache_add_entry(ctx->xin->xc, XC_NETDEV);
+ entry->u.dev.tx = netdev_ref(xport->netdev);
+ entry->u.dev.rx = netdev_ref(peer->netdev);
+ entry->u.dev.bfd = bfd_ref(peer->bfd);
+ }
return;
}
@@ -1776,6 +1866,12 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
if (ctx->xin->resubmit_stats) {
netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
}
+ if (ctx->xin->xc) {
+ struct xc_entry *entry;
+
+ entry = xlate_cache_add_entry(ctx->xin->xc, XC_NETDEV);
+ entry->u.dev.tx = netdev_ref(xport->netdev);
+ }
out_port = odp_port;
commit_odp_tunnel_action(flow, &ctx->base_flow,
&ctx->xout->odp_actions);
@@ -1830,6 +1926,13 @@ xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule)
if (ctx->xin->resubmit_stats) {
rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats);
}
+ if (ctx->xin->xc) {
+ struct xc_entry *entry;
+
+ entry = xlate_cache_add_entry(ctx->xin->xc, XC_RULE);
+ rule_dpif_ref(rule);
+ entry->u.rule = rule;
+ }
ctx->resubmits++;
ctx->recurse++;
@@ -2430,6 +2533,15 @@ xlate_learn_action(struct xlate_ctx *ctx,
learn_execute(learn, &ctx->xin->flow, &fm, &ofpacts);
ofproto_dpif_flow_mod(ctx->xbridge->ofproto, &fm);
ofpbuf_uninit(&ofpacts);
+
+ if (ctx->xin->xc) {
+ struct xc_entry *entry;
+
+ entry = xlate_cache_add_entry(ctx->xin->xc, XC_LEARN);
+ entry->u.learn.ofproto = ctx->xin->ofproto;
+ rule_dpif_lookup(ctx->xbridge->ofproto, &ctx->xin->flow, NULL,
+ &entry->u.learn.rule);
+ }
}
static void
@@ -2809,6 +2921,7 @@ xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto,
xin->resubmit_hook = NULL;
xin->report_hook = NULL;
xin->resubmit_stats = NULL;
+ xin->xc = NULL;
xin->skip_wildcards = false;
}
@@ -3014,6 +3127,13 @@ xlate_actions__(struct xlate_in *xin, struct xlate_out *xout)
if (ctx.xin->resubmit_stats) {
rule_dpif_credit_stats(rule, ctx.xin->resubmit_stats);
}
+ if (ctx.xin->xc) {
+ struct xc_entry *entry;
+
+ entry = xlate_cache_add_entry(ctx.xin->xc, XC_RULE);
+ rule_dpif_ref(rule);
+ entry->u.rule = rule;
+ }
ctx.rule = rule;
}
xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule);
@@ -3063,10 +3183,19 @@ xlate_actions__(struct xlate_in *xin, struct xlate_out *xout)
}
in_port = get_ofp_port(ctx.xbridge, flow->in_port.ofp_port);
- if (in_port && in_port->is_tunnel && ctx.xin->resubmit_stats) {
- netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
- if (in_port->bfd) {
- bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);
+ if (in_port && in_port->is_tunnel) {
+ if (ctx.xin->resubmit_stats) {
+ netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
+ if (in_port->bfd) {
+ bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);
+ }
+ }
+ if (ctx.xin->xc) {
+ struct xc_entry *entry;
+
+ entry = xlate_cache_add_entry(ctx.xin->xc, XC_NETDEV);
+ entry->u.dev.rx = netdev_ref(in_port->netdev);
+ entry->u.dev.bfd = bfd_ref(in_port->bfd);
}
}
@@ -3148,6 +3277,23 @@ xlate_actions__(struct xlate_in *xin, struct xlate_out *xout)
}
}
+ if (ctx.xin->xc) {
+ struct xc_entry *entry;
+
+ if (mbridge_has_mirrors(ctx.xbridge->mbridge)) {
+ entry = xlate_cache_add_entry(ctx.xin->xc, XC_MIRROR);
+ entry->u.mirror.mbridge = mbridge_ref(ctx.xbridge->mbridge);
+ entry->u.mirror.mirrors = xout->mirrors;
+ }
+ if (ctx.xbridge->netflow) {
+ entry = xlate_cache_add_entry(ctx.xin->xc, XC_NETFLOW);
+ entry->u.nf.netflow = netflow_ref(ctx.xbridge->netflow);
+ entry->u.nf.flow = xmalloc(sizeof *flow);
+ memcpy(entry->u.nf.flow, flow, sizeof *flow);
+ entry->u.nf.iface = xout->nf_output_iface;
+ }
+ }
+
ofpbuf_uninit(&ctx.stack);
ofpbuf_uninit(&ctx.action_set);
@@ -3204,3 +3350,163 @@ xlate_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
&output.ofpact, sizeof output,
packet);
}
+
+static void
+xlate_credit_stats_dev(struct xc_entry *entry, struct dpif_flow_stats *push)
+{
+ if (entry->u.dev.tx) {
+ netdev_vport_inc_tx(entry->u.dev.tx, push);
+ }
+ if (entry->u.dev.rx) {
+ netdev_vport_inc_rx(entry->u.dev.rx, push);
+ }
+ if (entry->u.dev.bfd) {
+ bfd_account_rx(entry->u.dev.bfd, push);
+ }
+}
+
+struct xlate_cache *
+xlate_cache_new(void) {
+ struct xlate_cache *xc = xmalloc(sizeof *xc);
+
+ ofpbuf_init(&xc->entries, 512);
+ return xc;
+}
+
+static struct xc_entry *
+xlate_cache_add_entry(struct xlate_cache *xc, enum stats_type type) {
+ struct xc_entry *entry;
+
+ entry = ofpbuf_put_uninit(&xc->entries, sizeof *entry);
+ entry->type = type;
+
+ return entry;
+}
+
+static void
+xlate_cache_normal(struct ofproto_dpif *ofproto, struct flow *flow, int vlan)
+{
+ struct xbridge *xbridge;
+ struct xbundle *xbundle;
+ struct flow_wildcards wc;
+
+ xbridge = xbridge_lookup(ofproto);
+ if (!xbridge) {
+ return;
+ }
+
+ xbundle = lookup_input_bundle(xbridge, flow->in_port.ofp_port, false, NULL);
+ if (!xbundle) {
+ return;
+ }
+
+ update_learning_table(xbridge, flow, &wc, vlan, xbundle);
+}
+
+void
+xlate_from_cache(struct xlate_cache *xc, struct dpif_flow_stats *push)
+{
+ struct xc_entry *entry;
+ struct ofpbuf entries = xc->entries;
+
+ XC_ENTRY_FOR_EACH(entry, entries, xc) {
+ switch (entry->type) {
+ case XC_RULE:
+ rule_dpif_credit_stats(entry->u.rule, push);
+ break;
+ case XC_BOND:
+ bond_account_by_hash(entry->u.bond.bond, entry->u.bond.hash,
+ push->n_bytes);
+ break;
+ case XC_NETDEV:
+ xlate_credit_stats_dev(entry, push);
+ break;
+ case XC_NETFLOW:
+ netflow_flow_update(entry->u.nf.netflow, entry->u.nf.flow,
+ entry->u.nf.iface, push);
+ break;
+ case XC_MIRROR:
+ mirror_update_stats(entry->u.mirror.mbridge,
+ entry->u.mirror.mirrors,
+ push->n_packets, push->n_bytes);
+ break;
+ case XC_LEARN: {
+ struct rule_dpif *rule = entry->u.learn.rule;
+
+ /* Reset the modified time for a rule that is equivalent to the
+ * currently cached rule. If the rule is not the exact rule we have
+ * cached, update the reference that we have. */
+ entry->u.learn.rule = ofproto_dpif_refresh_rule(rule);
+ break;
+ }
+ case XC_NORMAL:
+ xlate_cache_normal(entry->u.normal.ofproto, entry->u.normal.flow,
+ entry->u.normal.vlan);
+ break;
+ default:
+ OVS_NOT_REACHED();
+ }
+ }
+}
+
+static void
+xlate_dev_unref(struct xc_entry *entry)
+{
+ if (entry->u.dev.tx) {
+ netdev_close(entry->u.dev.tx);
+ }
+ if (entry->u.dev.rx) {
+ netdev_close(entry->u.dev.rx);
+ }
+ if (entry->u.dev.bfd) {
+ bfd_unref(entry->u.dev.bfd);
+ }
+}
+
+void xlate_cache_clear(struct xlate_cache *xc) {
+ struct xc_entry *entry;
+ struct ofpbuf entries;
+
+ if (!xc) {
+ return;
+ }
+
+ XC_ENTRY_FOR_EACH(entry, entries, xc) {
+ switch (entry->type) {
+ case XC_RULE:
+ rule_dpif_unref(entry->u.rule);
+ break;
+ case XC_BOND:
+ bond_unref(entry->u.bond.bond);
+ break;
+ case XC_NETDEV:
+ xlate_dev_unref(entry);
+ break;
+ case XC_NETFLOW:
+ free(entry->u.nf.flow);
+ netflow_unref(entry->u.nf.netflow);
+ break;
+ case XC_MIRROR:
+ mbridge_unref(entry->u.mirror.mbridge);
+ break;
+ case XC_LEARN:
+ rule_dpif_unref(entry->u.learn.rule);
+ break;
+ case XC_NORMAL:
+ free(entry->u.normal.flow);
+ break;
+ default:
+ OVS_NOT_REACHED();
+ }
+ }
+
+ ofpbuf_clear(&xc->entries);
+}
+
+void
+xlate_cache_delete(struct xlate_cache *xc)
+{
+ xlate_cache_clear(xc);
+ ofpbuf_uninit(&xc->entries);
+ free(xc);
+}
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 8b01d4e..da8369a 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -31,6 +31,7 @@ struct lacp;
struct dpif_ipfix;
struct dpif_sflow;
struct mac_learning;
+struct xlate_cache;
struct xlate_out {
/* Wildcards relevant in translation. Any fields that were used to
@@ -118,6 +119,15 @@ struct xlate_in {
* This is normally null so the client has to set it manually after
* calling xlate_in_init(). */
const struct dpif_flow_stats *resubmit_stats;
+
+ /* If nonnull, flow translation takes references to all modules that
+ * are affected by this translation. This 'xlate_cache' may be passed
+ * to xlate_from_cache() to perform the same function as xlate_actions()
+ * without the cost of translation.
+ *
+ * This is normally null so the client has to set it manually after
+ * calling xlate_in_init(). */
+ struct xlate_cache *xc;
};
extern struct ovs_rwlock xlate_rwlock;
@@ -169,4 +179,9 @@ void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src);
int xlate_send_packet(const struct ofport_dpif *, struct ofpbuf *);
+struct xlate_cache *xlate_cache_new(void);
+void xlate_from_cache(struct xlate_cache *xc, struct dpif_flow_stats *push);
+void xlate_cache_clear(struct xlate_cache *);
+void xlate_cache_delete(struct xlate_cache *xc);
+
#endif /* ofproto-dpif-xlate.h */
--
1.7.9.5
More information about the dev
mailing list