[ovs-dev] [PATCH v3] nx-match: Only store significant bytes to stack.

Jarno Rajahalme jarno at ovn.org
Fri Jan 6 01:44:40 UTC 2017


Always storing the maximum mf_value size wastes about 120 bytes for
each stack entry.  This patch changes the stack from an mf_value array
to a string of value-length pairs.

The length is stored after the value so that the stack pop may first
read the length and then the appropriate number of bytes.

Signed-off-by: Jarno Rajahalme <jarno at ovn.org>
---
v3: Address Ben's comments and fold in a fix avoiding assert in OVS due to
    a controller error.

 include/openvswitch/ofp-util.h |  4 +--
 lib/nx-match.c                 | 82 +++++++++++++++++++++++++++++-------------
 lib/nx-match.h                 |  3 ++
 lib/ofp-print.c                | 23 ++++++++----
 lib/ofp-util.c                 | 42 ++++++++++------------
 ofproto/ofproto-dpif-rid.c     | 13 ++++---
 ofproto/ofproto-dpif-rid.h     |  4 +--
 ofproto/ofproto-dpif-xlate.c   | 20 +++++------
 8 files changed, 115 insertions(+), 76 deletions(-)

diff --git a/include/openvswitch/ofp-util.h b/include/openvswitch/ofp-util.h
index b197a9a..62530e8 100644
--- a/include/openvswitch/ofp-util.h
+++ b/include/openvswitch/ofp-util.h
@@ -481,8 +481,8 @@ struct ofputil_packet_in_private {
     struct uuid bridge;
 
     /* NXCPT_STACK. */
-    union mf_subvalue *stack;
-    size_t n_stack;
+    uint8_t *stack;
+    size_t stack_size;
 
     /* NXCPT_MIRRORS. */
     uint32_t mirrors;
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 2fd31b9..e990aed 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -1704,25 +1704,52 @@ nxm_stack_pop_check(const struct ofpact_stack *pop,
     return mf_check_dst(&pop->subfield, flow);
 }
 
-/* nxm_execute_stack_push(), nxm_execute_stack_pop(). */
-static void
-nx_stack_push(struct ofpbuf *stack, union mf_subvalue *v)
+/* nxm_execute_stack_push(), nxm_execute_stack_pop().
+ *
+ * A stack is an ofpbuf with 'data' pointing to the bottom of the stack and
+ * 'size' indexing the top of the stack.  Each value of some byte length is
+ * stored to the stack immediately followed by the length of the value as an
+ * unsigned byte.  This way a POP operation can first read the length byte, and
+ * then the appropriate number of bytes from the stack.  This also means that
+ * it is only possible to traverse the stack from top to bottom.  It is
+ * possible, however, to push values also to the bottom of the stack, which is
+ * useful when a stack has been serialized to a wire format in reverse order
+ * (topmost value first).
+ */
+
+/* Push value 'v' of length 'bytes' to the top of 'stack'. */
+void
+nx_stack_push(struct ofpbuf *stack, const void *v, uint8_t bytes)
 {
-    ofpbuf_put(stack, v, sizeof *v);
+    ofpbuf_put(stack, v, bytes);
+    ofpbuf_put(stack, &bytes, sizeof bytes);
 }
 
-static union mf_subvalue *
-nx_stack_pop(struct ofpbuf *stack)
+/* Push value 'v' of length 'bytes' to the bottom of 'stack'. */
+void
+nx_stack_push_bottom(struct ofpbuf *stack, const void *v, uint8_t bytes)
 {
-    union mf_subvalue *v = NULL;
-
-    if (stack->size) {
+    ofpbuf_push(stack, &bytes, sizeof bytes);
+    ofpbuf_push(stack, v, bytes);
+}
 
-        stack->size -= sizeof *v;
-        v = (union mf_subvalue *) ofpbuf_tail(stack);
+/* Pop the topmost value from 'stack', returning a pointer to the value in the
+ * stack and the length of the value in '*bytes'.  In case of underflow a NULL
+ * is returned and length is returned as zero via '*bytes'. */
+void *
+nx_stack_pop(struct ofpbuf *stack, uint8_t *bytes)
+{
+    if (!stack->size) {
+        *bytes = 0;
+        return NULL;
     }
 
-    return v;
+    stack->size -= sizeof *bytes;
+    memcpy(bytes, ofpbuf_tail(stack), sizeof *bytes);
+
+    ovs_assert(stack->size >= *bytes);
+    stack->size -= *bytes;
+    return ofpbuf_tail(stack);
 }
 
 void
@@ -1730,14 +1757,15 @@ nxm_execute_stack_push(const struct ofpact_stack *push,
                        const struct flow *flow, struct flow_wildcards *wc,
                        struct ofpbuf *stack)
 {
-    union mf_subvalue mask_value;
     union mf_subvalue dst_value;
 
-    memset(&mask_value, 0xff, sizeof mask_value);
-    mf_write_subfield_flow(&push->subfield, &mask_value, &wc->masks);
+    mf_write_subfield_flow(&push->subfield,
+                           (union mf_subvalue *)&exact_match_mask,
+                           &wc->masks);
 
     mf_read_subfield(&push->subfield, flow, &dst_value);
-    nx_stack_push(stack, &dst_value);
+    uint8_t bytes = DIV_ROUND_UP(push->subfield.n_bits, 8);
+    nx_stack_push(stack, &dst_value.u8[sizeof dst_value - bytes], bytes);
 }
 
 void
@@ -1745,17 +1773,23 @@ nxm_execute_stack_pop(const struct ofpact_stack *pop,
                       struct flow *flow, struct flow_wildcards *wc,
                       struct ofpbuf *stack)
 {
-    union mf_subvalue *src_value;
-
-    src_value = nx_stack_pop(stack);
+    uint8_t src_bytes;
+    const void *src = nx_stack_pop(stack, &src_bytes);
 
     /* Only pop if stack is not empty. Otherwise, give warning. */
-    if (src_value) {
-        union mf_subvalue mask_value;
+    if (src) {
+        union mf_subvalue src_value;
+        uint8_t dst_bytes = DIV_ROUND_UP(pop->subfield.n_bits, 8);
 
-        memset(&mask_value, 0xff, sizeof mask_value);
-        mf_write_subfield_flow(&pop->subfield, &mask_value, &wc->masks);
-        mf_write_subfield_flow(&pop->subfield, src_value, flow);
+        if (src_bytes < dst_bytes) {
+            memset(&src_value.u8[sizeof src_value - dst_bytes], 0,
+                   dst_bytes - src_bytes);
+        }
+        memcpy(&src_value.u8[sizeof src_value - src_bytes], src, src_bytes);
+        mf_write_subfield_flow(&pop->subfield,
+                               (union mf_subvalue *)&exact_match_mask,
+                               &wc->masks);
+        mf_write_subfield_flow(&pop->subfield, &src_value, flow);
     } else {
         if (!VLOG_DROP_WARN(&rl)) {
             char *flow_str = flow_to_string(flow);
diff --git a/lib/nx-match.h b/lib/nx-match.h
index a86cceb..ab093e1 100644
--- a/lib/nx-match.h
+++ b/lib/nx-match.h
@@ -125,6 +125,9 @@ enum ofperr nxm_stack_push_check(const struct ofpact_stack *,
                                  const  struct flow *);
 enum ofperr nxm_stack_pop_check(const struct ofpact_stack *,
                                const struct flow *);
+void nx_stack_push(struct ofpbuf *stack, const void *v, uint8_t bytes);
+void nx_stack_push_bottom(struct ofpbuf *stack, const void *v, uint8_t bytes);
+void *nx_stack_pop(struct ofpbuf *stack, uint8_t *bytes);
 
 void nxm_execute_stack_push(const struct ofpact_stack *,
                             const struct flow *, struct flow_wildcards *,
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index dacaa07..7719e5c 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -167,14 +167,23 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh,
                       UUID_ARGS(&pin.bridge));
     }
 
-    if (pin.n_stack) {
-        ds_put_cstr(string, " continuation.stack=");
-        for (size_t i = 0; i < pin.n_stack; i++) {
-            if (i) {
-                ds_put_char(string, ' ');
-            }
-            mf_subvalue_format(&pin.stack[i], string);
+    if (pin.stack_size) {
+        ds_put_cstr(string, " continuation.stack=(top)");
+
+        struct ofpbuf pin_stack;
+        ofpbuf_use_const(&pin_stack, pin.stack, pin.stack_size);
+
+        while (pin_stack.size) {
+            uint8_t len;
+            uint8_t *val = nx_stack_pop(&pin_stack, &len);
+            union mf_subvalue value;
+
+            ds_put_char(string, ' ');
+            memset(&value, 0, sizeof value - len);
+            memcpy(&value.u8[sizeof value - len], val, len);
+            mf_subvalue_format(&value, string);
         }
+        ds_put_cstr(string, " (bottom)\n");
     }
 
     if (pin.mirrors) {
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 156d8d2..305fa37 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -3668,16 +3668,13 @@ ofputil_put_packet_in_private(const struct ofputil_packet_in_private *pin,
         ofpprop_put_uuid(msg, NXCPT_BRIDGE, &pin->bridge);
     }
 
-    for (size_t i = 0; i < pin->n_stack; i++) {
-        const union mf_subvalue *s = &pin->stack[i];
-        size_t ofs;
-        for (ofs = 0; ofs < sizeof *s; ofs++) {
-            if (s->u8[ofs]) {
-                break;
-            }
-        }
+    struct ofpbuf pin_stack;
+    ofpbuf_use_const(&pin_stack, pin->stack, pin->stack_size);
 
-        ofpprop_put(msg, NXCPT_STACK, &s->u8[ofs], sizeof *s - ofs);
+    while (pin_stack.size) {
+        uint8_t len;
+        uint8_t *val = nx_stack_pop(&pin_stack, &len);
+        ofpprop_put(msg, NXCPT_STACK, val, len);
     }
 
     if (pin->mirrors) {
@@ -3796,7 +3793,7 @@ ofputil_encode_nx_packet_in2(const struct ofputil_packet_in_private *pin,
     /* 'extra' is just an estimate of the space required. */
     size_t extra = (pin->public.packet_len
                     + NXM_TYPICAL_LEN   /* flow_metadata */
-                    + pin->n_stack * 16
+                    + pin->stack_size * 4
                     + pin->actions_len
                     + pin->action_set_len
                     + 256);     /* fudge factor */
@@ -3997,16 +3994,15 @@ ofputil_encode_resume(const struct ofputil_packet_in *pin,
 }
 
 static enum ofperr
-parse_subvalue_prop(const struct ofpbuf *property, union mf_subvalue *sv)
+parse_stack_prop(const struct ofpbuf *property, struct ofpbuf *stack)
 {
     unsigned int len = ofpbuf_msgsize(property);
-    if (len > sizeof *sv) {
+    if (len > sizeof(union mf_subvalue)) {
         VLOG_WARN_RL(&bad_ofmsg_rl, "NXCPT_STACK property has bad length %u",
                      len);
         return OFPERR_OFPBPC_BAD_LEN;
     }
-    memset(sv, 0, sizeof *sv);
-    memcpy(&sv->u8[sizeof *sv - len], property->msg, len);
+    nx_stack_push_bottom(stack, property->msg, len);
     return 0;
 }
 
@@ -4054,14 +4050,17 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose,
     uint8_t table_id = 0;
     ovs_be64 cookie = 0;
 
-    size_t allocated_stack = 0;
+    struct ofpbuf stack;
+    ofpbuf_init(&stack, 0);
 
     while (continuation.size > 0) {
         struct ofpbuf payload;
         uint64_t type;
 
         error = ofpprop_pull(&continuation, &payload, &type);
-        ovs_assert(!error);
+        if (error) {
+            break;
+        }
 
         switch (type) {
         case NXCPT_BRIDGE:
@@ -4069,12 +4068,7 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose,
             break;
 
         case NXCPT_STACK:
-            if (pin->n_stack >= allocated_stack) {
-                pin->stack = x2nrealloc(pin->stack, &allocated_stack,
-                                           sizeof *pin->stack);
-            }
-            error = parse_subvalue_prop(&payload,
-                                        &pin->stack[pin->n_stack++]);
+            error = parse_stack_prop(&payload, &stack);
             break;
 
         case NXCPT_MIRRORS:
@@ -4119,12 +4113,14 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose,
     pin->actions = ofpbuf_steal_data(&actions);
     pin->action_set_len = action_set.size;
     pin->action_set = ofpbuf_steal_data(&action_set);
+    pin->stack_size = stack.size;
+    pin->stack = ofpbuf_steal_data(&stack);
 
     if (error) {
         ofputil_packet_in_private_destroy(pin);
     }
 
-    return 0;
+    return error;
 }
 
 /* Frees data in 'pin' that is dynamically allocated by
diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
index f2e451d..d27669e 100644
--- a/ofproto/ofproto-dpif-rid.c
+++ b/ofproto/ofproto-dpif-rid.c
@@ -124,9 +124,8 @@ frozen_state_hash(const struct frozen_state *state)
     hash = hash_bytes64((const uint64_t *) &state->metadata.metadata,
                         sizeof state->metadata - sizeof state->metadata.tunnel,
                         hash);
-    if (state->stack && state->n_stack) {
-        hash = hash_bytes64((const uint64_t *) state->stack,
-                            state->n_stack * sizeof *state->stack, hash);
+    if (state->stack && state->stack_size) {
+        hash = hash_bytes(state->stack, state->stack_size, hash);
     }
     hash = hash_int(state->mirrors, hash);
     hash = hash_int(state->action_set_len, hash);
@@ -149,8 +148,8 @@ frozen_state_equal(const struct frozen_state *a, const struct frozen_state *b)
             && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel)
             && !memcmp(&a->metadata.metadata, &b->metadata.metadata,
                        sizeof a->metadata - sizeof a->metadata.tunnel)
-            && a->n_stack == b->n_stack
-            && !memcmp(a->stack, b->stack, a->n_stack * sizeof *a->stack)
+            && a->stack_size == b->stack_size
+            && !memcmp(a->stack, b->stack, a->stack_size)
             && a->mirrors == b->mirrors
             && a->conntracked == b->conntracked
             && ofpacts_equal(a->ofpacts, a->ofpacts_len,
@@ -196,8 +195,8 @@ frozen_state_clone(struct frozen_state *new, const struct frozen_state *old,
     flow_tnl_copy__(tunnel, old->metadata.tunnel);
     new->metadata.tunnel = tunnel;
 
-    new->stack = (new->n_stack
-                  ? xmemdup(new->stack, new->n_stack * sizeof *new->stack)
+    new->stack = (new->stack_size
+                  ? xmemdup(new->stack, new->stack_size)
                   : NULL);
     new->ofpacts = (new->ofpacts_len
                     ? xmemdup(new->ofpacts, new->ofpacts_len)
diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
index 3bca817..c357591 100644
--- a/ofproto/ofproto-dpif-rid.h
+++ b/ofproto/ofproto-dpif-rid.h
@@ -143,8 +143,8 @@ struct frozen_state {
     /* Pipeline context for processing when thawing. */
     struct uuid ofproto_uuid;     /* Bridge to resume from. */
     struct frozen_metadata metadata; /* Flow metadata. */
-    union mf_subvalue *stack;     /* Stack if any. */
-    size_t n_stack;
+    uint8_t *stack;               /* Stack if any. */
+    size_t stack_size;
     mirror_mask_t mirrors;        /* Mirrors already output. */
     bool conntracked;             /* Conntrack occurred prior to freeze. */
 
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 371ced4..6b33089 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -183,8 +183,8 @@ struct xlate_ctx {
      * actually set the tun_dst field. */
     struct in6_addr orig_tunnel_ipv6_dst;
 
-    /* Stack for the push and pop actions.  Each stack element is of type
-     * "union mf_subvalue". */
+    /* Stack for the push and pop actions.  See comment above nx_stack_push()
+     * in nx-match.c for info on how the stack is stored. */
     struct ofpbuf stack;
 
     /* The rule that we are currently translating, or NULL. */
@@ -2964,7 +2964,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         bool old_was_mpls = ctx->was_mpls;
         ovs_version_t old_version = ctx->xin->tables_version;
         struct ofpbuf old_stack = ctx->stack;
-        union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
+        uint8_t new_stack[1024];
         struct ofpbuf old_action_set = ctx->action_set;
         uint64_t actset_stub[1024 / 8];
 
@@ -3722,9 +3722,8 @@ emit_continuation(struct xlate_ctx *ctx, const struct frozen_state *state)
                     .reason = ctx->pause->reason,
                 },
                 .bridge = ctx->xbridge->ofproto->uuid,
-                .stack = xmemdup(state->stack,
-                                 state->n_stack * sizeof *state->stack),
-                .n_stack = state->n_stack,
+                .stack = xmemdup(state->stack, state->stack_size),
+                .stack_size = state->stack_size,
                 .mirrors = state->mirrors,
                 .conntracked = state->conntracked,
                 .actions = xmemdup(state->ofpacts, state->ofpacts_len),
@@ -3760,7 +3759,7 @@ finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
         .table_id = table,
         .ofproto_uuid = ctx->xbridge->ofproto->uuid,
         .stack = ctx->stack.data,
-        .n_stack = ctx->stack.size / sizeof(union mf_subvalue),
+        .stack_size = ctx->stack.size,
         .mirrors = ctx->mirrors,
         .conntracked = ctx->conntracked,
         .ofpacts = ctx->frozen_actions.data,
@@ -5386,7 +5385,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
 
     struct flow *flow = &xin->flow;
 
-    union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)];
+    uint8_t stack_stub[1024];
     uint64_t action_set_stub[1024 / 8];
     uint64_t frozen_actions_stub[1024 / 8];
     uint64_t actions_stub[256 / 8];
@@ -5497,8 +5496,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
 
         /* Restore stack, if any. */
         if (state->stack) {
-            ofpbuf_put(&ctx.stack, state->stack,
-                       state->n_stack * sizeof *state->stack);
+            ofpbuf_put(&ctx.stack, state->stack, state->stack_size);
         }
 
         /* Restore mirror state. */
@@ -5790,7 +5788,7 @@ xlate_resume(struct ofproto_dpif *ofproto,
         .table_id = 0,     /* Not the table where NXAST_PAUSE was executed. */
         .ofproto_uuid = pin->bridge,
         .stack = pin->stack,
-        .n_stack = pin->n_stack,
+        .stack_size = pin->stack_size,
         .mirrors = pin->mirrors,
         .conntracked = pin->conntracked,
 
-- 
2.1.4



More information about the dev mailing list