[ovs-dev] [PATCH v5 5/6] ofproto-dpif-rid: Make lookups cheaper.

Jarno Rajahalme jrajahalme at nicira.com
Fri Aug 21 22:25:22 UTC 2015


Tunnel metadata has grown large since the addition of Geneve options.
Copying tunnel metadata for performing a lookup is not necessary.
Change recirc_metadata to use a pointer to struct flow_tnl, and only
copy the tunnel metadata when needed, and only copy as little of it as
possible.

Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
---
 lib/flow.h                 | 22 +---------------
 lib/packets.h              | 65 ++++++++++++++++++++++++++++++++++++++++++++--
 ofproto/ofproto-dpif-rid.c | 30 ++++++++++++++++-----
 ofproto/ofproto-dpif-rid.h | 64 +++++++++++++++++++++++++--------------------
 4 files changed, 124 insertions(+), 57 deletions(-)

diff --git a/lib/flow.h b/lib/flow.h
index 958e7cb..f9da6b3 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -65,28 +65,8 @@ BUILD_ASSERT_DECL(FLOW_N_REGS % 2 == 0); /* Even. */
 BUILD_ASSERT_DECL(FLOW_NW_FRAG_ANY == NX_IP_FRAG_ANY);
 BUILD_ASSERT_DECL(FLOW_NW_FRAG_LATER == NX_IP_FRAG_LATER);
 
-/* Some flags are exposed through OpenFlow while others are used only
- * internally. */
-
-/* Public flags */
-#define FLOW_TNL_F_OAM (1 << 0)
-
-#define FLOW_TNL_PUB_F_MASK ((1 << 1) - 1)
 BUILD_ASSERT_DECL(FLOW_TNL_F_OAM == NX_TUN_FLAG_OAM);
 
-/* Private flags */
-#define FLOW_TNL_F_DONT_FRAGMENT (1 << 1)
-#define FLOW_TNL_F_CSUM (1 << 2)
-#define FLOW_TNL_F_KEY (1 << 3)
-
-#define FLOW_TNL_F_MASK ((1 << 4) - 1)
-
-/* Purely internal to OVS userspace. These flags should never be exposed to
- * the outside world and so aren't included in the flags mask. */
-
-/* Tunnel information is in userspace datapath format. */
-#define FLOW_TNL_F_UDPIF (1 << 4)
-
 const char *flow_tun_flag_to_string(uint32_t flags);
 
 /* Maximum number of supported MPLS labels. */
@@ -987,7 +967,7 @@ pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
 {
     md->recirc_id = flow->recirc_id;
     md->dp_hash = flow->dp_hash;
-    md->tunnel = flow->tunnel;
+    flow_tnl_copy__(&md->tunnel, &flow->tunnel);
     md->skb_priority = flow->skb_priority;
     md->pkt_mark = flow->pkt_mark;
     md->in_port = flow->in_port;
diff --git a/lib/packets.h b/lib/packets.h
index 38af37b..7140c83 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -35,9 +35,9 @@ struct ds;
 
 /* Tunnel information used in flow key and metadata. */
 struct flow_tnl {
-    ovs_be64 tun_id;
     ovs_be32 ip_dst;
     ovs_be32 ip_src;
+    ovs_be64 tun_id;
     uint16_t flags;
     uint8_t ip_tos;
     uint8_t ip_ttl;
@@ -49,6 +49,65 @@ struct flow_tnl {
     struct tun_metadata metadata;
 };
 
+/* Some flags are exposed through OpenFlow while others are used only
+ * internally. */
+
+/* Public flags */
+#define FLOW_TNL_F_OAM (1 << 0)
+
+#define FLOW_TNL_PUB_F_MASK ((1 << 1) - 1)
+
+/* Private flags */
+#define FLOW_TNL_F_DONT_FRAGMENT (1 << 1)
+#define FLOW_TNL_F_CSUM (1 << 2)
+#define FLOW_TNL_F_KEY (1 << 3)
+
+#define FLOW_TNL_F_MASK ((1 << 4) - 1)
+
+/* Purely internal to OVS userspace. These flags should never be exposed to
+ * the outside world and so aren't included in the flags mask. */
+
+/* Tunnel information is in userspace datapath format. */
+#define FLOW_TNL_F_UDPIF (1 << 4)
+
+/* Returns an offset to 'src' covering all the meaningful fields in 'src'. */
+static inline size_t
+flow_tnl_size(const struct flow_tnl *src)
+{
+    if (!src->ip_dst) {
+        /* Covers ip_dst only. */
+        return offsetof(struct flow_tnl, ip_src);
+    }
+    if (src->flags & FLOW_TNL_F_UDPIF) {
+        /* Datapath format, cover all options we have. */
+        return offsetof(struct flow_tnl, metadata.opts)
+            + src->metadata.present.len;
+    }
+    if (!src->metadata.present.map) {
+        /* No TLVs, opts is irrelevant. */
+        return offsetof(struct flow_tnl, metadata.opts);
+    }
+    /* Have decoded TLVs, opts is relevant. */
+    return sizeof *src;
+}
+
+/* Copy flow_tnl, but avoid copying unused portions of tun_metadata.  Unused
+ * data in 'dst' is NOT cleared, so this must not be used in cases where the
+ * uninitialized portion may be hashed over. */
+static inline void
+flow_tnl_copy__(struct flow_tnl *dst, const struct flow_tnl *src)
+{
+    memcpy(dst, src, flow_tnl_size(src));
+}
+
+static inline bool
+flow_tnl_equal(const struct flow_tnl *a, const struct flow_tnl *b)
+{
+    size_t a_size = flow_tnl_size(a);
+
+    return a_size == flow_tnl_size(b) && !memcmp(a, b, a_size);
+}
+
 /* Unfortunately, a "struct flow" sometimes has to handle OpenFlow port
  * numbers and other times datapath (dpif) port numbers.  This union allows
  * access to both. */
@@ -67,7 +126,9 @@ struct pkt_metadata {
     uint32_t skb_priority;      /* Packet priority for QoS. */
     uint32_t pkt_mark;          /* Packet mark. */
     union flow_in_port in_port; /* Input port. */
-    struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. */
+    struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. Note that
+                                 * if 'ip_dst' == 0, the rest of the fields may
+                                 * be uninitialized. */
 };
 
 static inline void
diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
index b8207c4..5a02bb0 100644
--- a/ofproto/ofproto-dpif-rid.c
+++ b/ofproto/ofproto-dpif-rid.c
@@ -130,8 +130,16 @@ recirc_metadata_hash(const struct recirc_state *state)
 
     hash = hash_pointer(state->ofproto, 0);
     hash = hash_int(state->table_id, hash);
-    hash = hash_words64((const uint64_t *) &state->metadata,
-                        sizeof state->metadata / sizeof(uint64_t),
+    if (state->metadata.tunnel->ip_dst) {
+        /* We may leave remainder bytes unhashed, but that is unlikely as
+         * the tunnel is not in the datapath format. */
+        hash = hash_words64((const uint64_t *) state->metadata.tunnel,
+                            flow_tnl_size(state->metadata.tunnel)
+                            / sizeof(uint64_t), hash);
+    }
+    hash = hash_words64((const uint64_t *) &state->metadata.metadata,
+                        (sizeof state->metadata - sizeof state->metadata.tunnel)
+                        / sizeof(uint64_t),
                         hash);
     if (state->stack && state->stack->size != 0) {
         hash = hash_words64((const uint64_t *) state->stack->data,
@@ -153,7 +161,9 @@ recirc_metadata_equal(const struct recirc_state *a,
 {
     return (a->table_id == b->table_id
             && a->ofproto == b->ofproto
-            && !memcmp(&a->metadata, &b->metadata, sizeof a->metadata)
+            && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel)
+            && !memcmp(&a->metadata.metadata, &b->metadata.metadata,
+                       sizeof a->metadata - sizeof a->metadata.tunnel)
             && (((!a->stack || !a->stack->size) &&
                  (!b->stack || !b->stack->size))
                 || (a->stack && b->stack && ofpbuf_equal(a->stack, b->stack)))
@@ -193,9 +203,13 @@ recirc_ref_equal(const struct recirc_state *target, uint32_t hash)
 }
 
 static void
-recirc_state_clone(struct recirc_state *new, const struct recirc_state *old)
+recirc_state_clone(struct recirc_state *new, const struct recirc_state *old,
+                   struct flow_tnl *tunnel)
 {
     *new = *old;
+    flow_tnl_copy__(tunnel, old->metadata.tunnel);
+    new->metadata.tunnel = tunnel;
+
     if (new->stack) {
         new->stack = new->stack->size ? ofpbuf_clone(new->stack) : NULL;
     }
@@ -216,9 +230,11 @@ recirc_alloc_id__(const struct recirc_state *state, uint32_t hash)
     ovs_assert(state->action_set_len <= state->ofpacts_len);
 
     struct recirc_id_node *node = xzalloc(sizeof *node);
+
     node->hash = hash;
     ovs_refcount_init(&node->refcount);
-    recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state);
+    recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state,
+                       &node->state_metadata_tunnel);
 
     ovs_mutex_lock(&mutex);
     for (;;) {
@@ -269,10 +285,12 @@ recirc_alloc_id_ctx(const struct recirc_state *state)
 uint32_t
 recirc_alloc_id(struct ofproto_dpif *ofproto)
 {
+    struct flow_tnl tunnel;
+    tunnel.ip_dst = htonl(0);
     struct recirc_state state = {
         .table_id = TBL_INTERNAL,
         .ofproto = ofproto,
-        .metadata = { .in_port = OFPP_NONE },
+        .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE },
     };
     return recirc_alloc_id__(&state, recirc_metadata_hash(&state))->id;
 }
diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
index 11ae486..e7d95bf 100644
--- a/ofproto/ofproto-dpif-rid.h
+++ b/ofproto/ofproto-dpif-rid.h
@@ -34,40 +34,40 @@ struct rule;
  * =============
  *
  * Recirculation is a technique to allow a frame to re-enter the datapath
- * packet processing path for one or multiple times to achieve more flexible
- * packet processing, such modifying header fields after MPLS POP action and
- * selecting bond a slave port for bond ports.
+ * packet processing path to achieve more flexible packet processing, such as
+ * modifying header fields after MPLS POP action and selecting a slave port for
+ * bond ports.
  *
  * Data path and user space interface
  * -----------------------------------
  *
  * Recirculation uses two uint32_t fields, recirc_id and dp_hash, and a RECIRC
- * action.  The value recirc_id is used to select the next packet processing
- * steps among multiple instances of recirculation.  When a packet initially
- * enters the data path it is assigned with recirc_id 0, which indicates no
- * recirculation.  Recirc_ids are managed by the user space, opaque to the
- * data path.
+ * action.  recirc_id is used to select the next packet processing steps among
+ * multiple instances of recirculation.  When a packet initially enters the
+ * datapath it is assigned with recirc_id 0, which indicates no recirculation.
+ * Recirc_ids are managed by the user space, opaque to the datapath.
  *
- * On the other hand, dp_hash can only be computed by the data path, opaque to
- * the user space.  In fact, user space may not able to recompute the hash
- * value.  The dp_hash value should be wildcarded for a newly received
- * packet.  HASH action specifies whether the hash is computed, and if
- * computed, how many fields are to be included in the hash computation.  The
- * computed hash value is stored into the dp_hash field prior to recirculation.
+ * On the other hand, dp_hash can only be computed by the datapath, opaque to
+ * the user space, as the datapath is free to choose the hashing algorithm
+ * without informing user space about it.  The dp_hash value should be
+ * wildcarded for newly received packets.  HASH action specifies whether the
+ * hash is computed, and if computed, how many fields are to be included in the
+ * hash computation.  The computed hash value is stored into the dp_hash field
+ * prior to recirculation.
  *
  * The RECIRC action sets the recirc_id field and then reprocesses the packet
- * as if it was received on the same input port.  RECIRC action works like a
- * function call; actions listed behind the RECIRC action will be executed
- * after its execution.  RECIRC action can be nested, data path implementation
- * limits the number of recirculation executed to prevent unreasonable nesting
- * depth or infinite loop.
+ * as if it was received again on the same input port.  RECIRC action works
+ * like a function call; actions listed after the RECIRC action will be
+ * executed after recirculation.  RECIRC action can be nested, but datapath
+ * implementation limits the number of nested recirculations to prevent
+ * unreasonable nesting depth or infinite loop.
  *
  * User space recirculation context
  * ---------------------------------
  *
- * Recirculation is hidden from the OpenFlow controllers.  Action translation
- * code deduces when recirculation is necessary and issues a data path
- * recirculation action.  All OpenFlow actions to be performed after
+ * Recirculation is usually hidden from the OpenFlow controllers.  Action
+ * translation code deduces when recirculation is necessary and issues a
+ * datapath recirculation action.  All OpenFlow actions to be performed after
  * recirculation are derived from the OpenFlow pipeline and are stored with the
  * recirculation ID.  When the OpenFlow tables are changed in a way affecting
  * the recirculation flows, new recirculation ID with new metadata and actions
@@ -76,9 +76,10 @@ struct rule;
  * Recirculation ID pool
  * ----------------------
  *
- * Recirculation ID needs to be unique for all data paths.  Recirculation ID
- * pool keeps track recirculation ids and stores OpenFlow pipeline translation
- * context so that flow processing may continue after recirculation.
+ * Recirculation ID needs to be unique for all datapaths.  Recirculation ID
+ * pool keeps track of recirculation ids and stores OpenFlow pipeline
+ * translation context so that flow processing may continue after
+ * recirculation.
  *
  * A Recirculation ID can be any uint32_t value, except for that the value 0 is
  * reserved for 'no recirculation' case.
@@ -96,7 +97,7 @@ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
 
 struct recirc_metadata {
     /* Metadata in struct flow. */
-    struct flow_tnl tunnel;       /* Encapsulating tunnel parameters. */
+    const struct flow_tnl *tunnel; /* Encapsulating tunnel parameters. */
     ovs_be64 metadata;            /* OpenFlow Metadata. */
     uint64_t regs[FLOW_N_XREGS];  /* Registers. */
     ofp_port_t in_port;           /* Incoming port. */
@@ -108,7 +109,7 @@ recirc_metadata_from_flow(struct recirc_metadata *md,
                           const struct flow *flow)
 {
     memset(md, 0, sizeof *md);
-    md->tunnel = flow->tunnel;
+    md->tunnel = &flow->tunnel;
     md->metadata = flow->metadata;
     memcpy(md->regs, flow->regs, sizeof md->regs);
     md->in_port = flow->in_port.ofp_port;
@@ -119,7 +120,11 @@ static inline void
 recirc_metadata_to_flow(const struct recirc_metadata *md,
                         struct flow *flow)
 {
-    flow->tunnel = md->tunnel;
+    if (md->tunnel && md->tunnel->ip_dst) {
+        flow->tunnel = *md->tunnel;
+    } else {
+        memset(&flow->tunnel, 0, sizeof flow->tunnel);
+    }
     flow->metadata = md->metadata;
     memcpy(flow->regs, md->regs, sizeof flow->regs);
     flow->in_port.ofp_port = md->in_port;
@@ -161,6 +166,9 @@ struct recirc_id_node {
      * This state should not be modified after inserting a node in the pool,
      * hence the 'const' to emphasize that. */
     const struct recirc_state state;
+
+    /* Storage for tunnel metadata. */
+    struct flow_tnl state_metadata_tunnel;
 };
 
 void recirc_init(void);
-- 
2.1.4




More information about the dev mailing list