[ovs-dev] [multipath 7/7] Implement a new Nicira extension action for multipath link selection.

Ben Pfaff blp at nicira.com
Thu Dec 16 22:42:31 UTC 2010


---
 include/openflow/nicira-ext.h |  127 +++++++++++++++++
 lib/automake.mk               |    2 +
 lib/multipath.c               |  301 +++++++++++++++++++++++++++++++++++++++++
 lib/multipath.h               |   38 +++++
 lib/ofp-parse.c               |   64 ++++++++--
 lib/ofp-print.c               |   10 ++-
 lib/ofp-util.c                |    9 ++
 ofproto/ofproto.c             |    7 +
 tests/automake.mk             |    7 +
 tests/multipath.at            |  280 ++++++++++++++++++++++++++++++++++++++
 tests/ovs-ofctl.at            |   15 ++-
 tests/test-multipath.c        |  131 ++++++++++++++++++
 tests/testsuite.at            |    1 +
 utilities/ovs-ofctl.8.in      |   15 ++
 14 files changed, 990 insertions(+), 17 deletions(-)
 create mode 100644 lib/multipath.c
 create mode 100644 lib/multipath.h
 create mode 100644 tests/multipath.at
 create mode 100644 tests/test-multipath.c

diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index b18214f..d4184c7 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -230,6 +230,7 @@ enum nx_action_subtype {
     NXAST_REG_LOAD,             /* struct nx_action_reg_load */
     NXAST_NOTE,                 /* struct nx_action_note */
     NXAST_SET_TUNNEL64,         /* struct nx_action_set_tunnel64 */
+    NXAST_MULTIPATH             /* struct nx_action_multipath */
 };
 
 /* Header for Nicira-defined actions. */
@@ -477,6 +478,132 @@ struct nx_action_note {
 };
 OFP_ASSERT(sizeof(struct nx_action_note) == 16);
 
+/* Action structure for NXAST_MULTIPATH.
+ *
+ * This action performs the following steps in sequence:
+ *
+ *    1. Hashes the fields designated by 'fields', one of NX_MP_FIELDS_*.
+ *       Refer to the definition of "enum nx_mp_fields" for details.
+ *
+ *       The 'basis' value is used as a universal hash parameter, that is,
+ *       different values of 'basis' yield different hash functions.  The
+ *       particular universal hash function used is implementation-defined.
+ *
+ *       The hashed fields' values are drawn from the current state of the
+ *       flow, including all modifications that have been made by actions up to
+ *       this point.
+ *
+ *    2. Applies the multipath link choice algorithm specified by 'algorithm',
+ *       one of NX_MP_ALG_*.  Refer to the definition of "enum nx_mp_algorithm"
+ *       for details.
+ *
+ *       The output of the algorithm is 'link', an unsigned integer less than
+ *       or equal to 'max_link'.
+ *
+ *       Some algorithms use 'arg' as an additional argument.
+ *
+ *    3. Stores 'link' in dst[ofs:ofs+n_bits].  The format and semantics of
+ *       'dst' and 'ofs_nbits' are identical to those for the NXAST_REG_LOAD
+ *       action; refer to the description of that action for details.
+ *
+ * The switch will reject actions that have an unknown 'fields', or an unknown
+ * 'algorithm', or in which ofs+n_bits is greater than the width of 'dst', or
+ * in which 'max_link' is greater than or equal to 2**n_bits, with error type
+ * OFPET_BAD_ACTION, code OFPBAC_BAD_ARGUMENT.
+ */
+struct nx_action_multipath {
+    ovs_be16 type;              /* OFPAT_VENDOR. */
+    ovs_be16 len;               /* Length is 32. */
+    ovs_be32 vendor;            /* NX_VENDOR_ID. */
+    ovs_be16 subtype;           /* NXAST_MULTIPATH. */
+
+    /* What fields to hash and how. */
+    ovs_be16 fields;            /* One of NX_MP_FIELDS_*. */
+    ovs_be16 basis;             /* Universal hash parameter. */
+    ovs_be16 pad0;
+
+    /* Multipath link choice algorithm to apply to hash value. */
+    ovs_be16 algorithm;         /* One of NX_MP_ALG_*. */
+    ovs_be16 max_link;          /* Number of output links, minus 1. */
+    ovs_be32 arg;               /* Algorithm-specific argument. */
+    ovs_be16 pad1;
+
+    /* Where to store the result. */
+    ovs_be16 ofs_nbits;         /* (ofs << 6) | (n_bits - 1). */
+    ovs_be32 dst;               /* Destination register. */
+};
+OFP_ASSERT(sizeof(struct nx_action_multipath) == 32);
+
+/* NXAST_MULTIPATH: Fields to hash. */
+enum nx_mp_fields {
+    /* Ethernet source address (NXM_OF_ETH_SRC) only. */
+    NX_MP_FIELDS_ETH_SRC,
+
+    /* L2 through L4, symmetric across src/dst.  Specifically, each of the
+     * following fields, if present, is hashed (slashes separate symmetric
+     * pairs):
+     *
+     *  - NXM_OF_ETH_DST / NXM_OF_ETH_SRC
+     *  - NXM_OF_ETH_TYPE
+     *  - NXM_OF_VLAN_TCI (ignoring VLAN_CFI)
+     *  - NXM_OF_IP_PROTO
+     *  - NXM_OF_IP_SRC / NXM_OF_IP_DST
+     *  - NXM_OF_TCP_SRC / NXM_OF_TCP_DST
+     *  - NXM_OF_UDP_SRC / NXM_OF_UDP_DST
+     */
+    NX_MP_FIELDS_SYMMETRIC_L4
+};
+
+/* NXAST_MULTIPATH: Multipath link choice algorithm to apply.
+ *
+ * In the descriptions below, 'n_links' is max_link + 1. */
+enum nx_mp_algorithm {
+    /* link = hash(flow) % n_links.
+     *
+     * Redistributes all traffic when n_links changes.  O(1) performance.  See
+     * RFC 2992.
+     *
+     * Use UINT16_MAX for max_link to get a raw hash value. */
+    NX_MP_ALG_MODULO_N,
+
+    /* link = hash(flow) / (MAX_HASH / n_links).
+     *
+     * Redistributes between one-quarter and one-half of traffic when n_links
+     * changes.  O(1) performance.  See RFC 2992.
+     */
+    NX_MP_ALG_HASH_THRESHOLD,
+
+    /* for i in [0,n_links):
+     *   weights[i] = hash(flow, i)
+     * link = { i such that weights[i] >= weights[j] for all j != i }
+     *
+     * Redistributes 1/n_links of traffic when n_links changes.  O(n_links)
+     * performance.  If n_links is greater than a threshold (currently 64, but
+     * subject to change), Open vSwitch will substitute another algorithm
+     * automatically.  See RFC 2992. */
+    NX_MP_ALG_HRW,              /* Highest Random Weight. */
+
+    /* i = 0
+     * repeat:
+     *     i = i + 1
+     *     link = hash(flow, i) % arg
+     * while link > max_link
+     *
+     * Redistributes 1/n_links of traffic when n_links changes.  O(1)
+     * performance when arg/max_link is bounded by a constant.
+     *
+     * Redistributes all traffic when arg changes.
+     *
+     * arg must be greater than max_link and for best performance should be no
+     * more than approximately max_link * 2.  If arg is outside the acceptable
+     * range, Open vSwitch will automatically substitute the least power of 2
+     * greater than max_link.
+     *
+     * This algorithm is specific to Open vSwitch.
+     */
+    NX_MP_ALG_ITER_HASH         /* Iterative Hash. */
+};
+
 /* Wildcard for tunnel ID. */
 #define NXFW_TUN_ID  (1 << 25)
 
diff --git a/lib/automake.mk b/lib/automake.mk
index a23a569..f7d1499 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -68,6 +68,8 @@ lib_libopenvswitch_a_SOURCES = \
 	lib/lockfile.h \
 	lib/mac-learning.c \
 	lib/mac-learning.h \
+	lib/multipath.c \
+	lib/multipath.h \
 	lib/netdev-dummy.c \
 	lib/netdev-provider.h \
 	lib/netdev.c \
diff --git a/lib/multipath.c b/lib/multipath.c
new file mode 100644
index 0000000..b89ce5a
--- /dev/null
+++ b/lib/multipath.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "multipath.h"
+#include <inttypes.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include "dynamic-string.h"
+#include "nx-match.h"
+#include "ofp-util.h"
+#include "openflow/nicira-ext.h"
+#include "packets.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(multipath);
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+/* multipath_check(). */
+int
+multipath_check(const struct nx_action_multipath *mp)
+{
+    uint32_t dst = ntohl(mp->dst);
+    int ofs = nxm_decode_ofs(mp->ofs_nbits);
+    int n_bits = nxm_decode_n_bits(mp->ofs_nbits);
+
+    if (mp->fields != htons(NX_MP_FIELDS_ETH_SRC)
+        && mp->fields != htons(NX_MP_FIELDS_SYMMETRIC_L4)) {
+        VLOG_WARN_RL(&rl, "unsupported fields %"PRIu16, ntohs(mp->fields));
+    } else if (mp->algorithm != htons(NX_MP_ALG_MODULO_N)) {
+        VLOG_WARN_RL(&rl, "unsupported algorithm %"PRIu16,
+                     ntohs(mp->algorithm));
+    } else if (!NXM_IS_NX_REG(dst) || NXM_NX_REG_IDX(dst) >= FLOW_N_REGS) {
+        VLOG_WARN_RL(&rl, "unsupported destination field %#"PRIx32, dst);
+    } else if (ofs + n_bits > nxm_field_bits(dst)) {
+        VLOG_WARN_RL(&rl, "destination overflows output field");
+    } else if (n_bits < 16 && ntohs(mp->max_link) > (1u << n_bits)) {
+        VLOG_WARN_RL(&rl, "max_link overflows output field");
+    } else {
+        return 0;
+    }
+
+    return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_ARGUMENT);
+}
+
+/* multipath_execute(). */
+
+static uint32_t multipath_hash(const struct flow *, enum nx_mp_fields,
+                               uint16_t basis);
+static uint16_t multipath_algorithm(uint32_t hash, enum nx_mp_algorithm,
+                                    unsigned int n_links, unsigned int arg);
+
+void
+multipath_execute(const struct nx_action_multipath *mp, struct flow *flow)
+{
+    /* Calculate value to store. */
+    uint32_t hash = multipath_hash(flow, ntohs(mp->fields), ntohs(mp->basis));
+    uint16_t link = multipath_algorithm(hash, ntohs(mp->algorithm),
+                                        ntohs(mp->max_link) + 1,
+                                        ntohs(mp->arg));
+
+    /* Store it. */
+    uint32_t *reg = &flow->regs[NXM_NX_REG_IDX(ntohl(mp->dst))];
+    int ofs = nxm_decode_ofs(mp->ofs_nbits);
+    int n_bits = nxm_decode_n_bits(mp->ofs_nbits);
+    uint32_t mask = n_bits == 32 ? UINT32_MAX : (UINT32_C(1) << n_bits) - 1;
+    *reg = (*reg & ~(mask << ofs)) | (link << ofs);
+}
+
+static uint32_t
+hash_symmetric_l4(const struct flow *flow, uint16_t basis)
+{
+    struct {
+        ovs_be32 ip_addr;
+        ovs_be16 eth_type;
+        ovs_be16 vlan_tci;
+        ovs_be16 tp_addr;
+        uint8_t eth_addr[ETH_ADDR_LEN];
+        uint8_t ip_proto;
+    } fields;
+
+    int i;
+
+    for (i = 0; i < 6; i++) {
+        fields.eth_addr[i] = flow->dl_src[i] ^ flow->dl_dst[i];
+    }
+    fields.vlan_tci = flow->vlan_tci & ~htons(VLAN_CFI);
+    fields.eth_type = flow->dl_type;
+    if (fields.eth_type == htons(ETH_TYPE_IP)) {
+        fields.ip_addr = flow->nw_src ^ flow->nw_dst;
+        fields.ip_proto = flow->nw_proto;
+        if (fields.ip_proto == IP_TYPE_TCP || fields.ip_proto == IP_TYPE_UDP) {
+            fields.tp_addr = flow->tp_src ^ flow->tp_dst;
+        } else {
+            fields.tp_addr = htons(0);
+        }
+    } else {
+        fields.ip_addr = htonl(0);
+        fields.ip_proto = 0;
+        fields.tp_addr = htons(0);
+    }
+    return hash_bytes(&fields, sizeof fields, basis);
+}
+
+static uint32_t
+multipath_hash(const struct flow *flow, enum nx_mp_fields fields,
+               uint16_t basis)
+{
+    switch (fields) {
+    case NX_MP_FIELDS_ETH_SRC:
+        return hash_bytes(flow->dl_src, sizeof flow->dl_src, basis);
+
+    case NX_MP_FIELDS_SYMMETRIC_L4:
+        return hash_symmetric_l4(flow, basis);
+    }
+
+    NOT_REACHED();
+}
+
+static uint16_t
+algorithm_hrw(uint32_t hash, unsigned int n_links)
+{
+    uint32_t best_weight;
+    uint16_t best_link;
+    unsigned int link;
+
+    best_link = 0;
+    best_weight = hash_2words(hash, 0);
+    for (link = 1; link < n_links; link++) {
+        uint32_t weight = hash_2words(hash, link);
+        if (weight > best_weight) {
+            best_link = link;
+            best_weight = weight;
+        }
+    }
+    return best_link;
+}
+
+/* Works for 'x' in the range [1,65536], which is all we need.  */
+static unsigned int
+round_up_pow2(unsigned int x)
+{
+    x--;
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    return x + 1;
+}
+
+static uint16_t
+algorithm_iter_hash(uint32_t hash, unsigned int n_links, unsigned int modulo)
+{
+    uint16_t link;
+    int i;
+
+    if (modulo < n_links || modulo / 2 > n_links) {
+        modulo = round_up_pow2(n_links);
+    }
+
+    i = 0;
+    do {
+        link = hash_2words(hash, i++) % modulo;
+    } while (link >= n_links);
+
+    return link;
+}
+
+static uint16_t
+multipath_algorithm(uint32_t hash, enum nx_mp_algorithm algorithm,
+                    unsigned int n_links, unsigned int arg)
+{
+    switch (algorithm) {
+    case NX_MP_ALG_MODULO_N:
+        return hash % n_links;
+
+    case NX_MP_ALG_HASH_THRESHOLD:
+        return hash / (UINT32_MAX / n_links);
+
+    case NX_MP_ALG_HRW:
+        return (n_links <= 64
+                ? algorithm_hrw(hash, n_links)
+                : algorithm_iter_hash(hash, n_links, 0));
+
+    case NX_MP_ALG_ITER_HASH:
+        return algorithm_iter_hash(hash, n_links, arg);
+    }
+
+    NOT_REACHED();
+}
+
+/* multipath_parse(). */
+
+void
+multipath_parse(struct nx_action_multipath *mp, const char *s_)
+{
+    char *s = xstrdup(s_);
+    char *save_ptr = NULL;
+    char *fields, *basis, *algorithm, *n_links, *arg, *dst;
+    uint32_t header;
+    int ofs, n_bits;
+
+    fields = strtok_r(s, ", ", &save_ptr);
+    basis = strtok_r(NULL, ", ", &save_ptr);
+    algorithm = strtok_r(NULL, ", ", &save_ptr);
+    n_links = strtok_r(NULL, ", ", &save_ptr);
+    arg = strtok_r(NULL, ", ", &save_ptr);
+    dst = strtok_r(NULL, ", ", &save_ptr);
+    if (!dst) {
+        ovs_fatal(0, "%s: not enough arguments to multipath action", s);
+    }
+
+    memset(mp, 0, sizeof *mp);
+    mp->type = htons(OFPAT_VENDOR);
+    mp->len = htons(sizeof *mp);
+    mp->vendor = htonl(NX_VENDOR_ID);
+    mp->subtype = htons(NXAST_MULTIPATH);
+    if (!strcasecmp(fields, "eth_src")) {
+        mp->fields = htons(NX_MP_FIELDS_ETH_SRC);
+    } else if (!strcasecmp(fields, "symmetric_l4")) {
+        mp->fields = htons(NX_MP_FIELDS_SYMMETRIC_L4);
+    } else {
+        ovs_fatal(0, "%s: unknown fields `%s'", s, fields);
+    }
+    mp->basis = htons(atoi(basis));
+    if (!strcasecmp(algorithm, "modulo_n")) {
+        mp->algorithm = htons(NX_MP_ALG_MODULO_N);
+    } else if (!strcasecmp(algorithm, "hash_threshold")) {
+        mp->algorithm = htons(NX_MP_ALG_HASH_THRESHOLD);
+    } else if (!strcasecmp(algorithm, "hrw")) {
+        mp->algorithm = htons(NX_MP_ALG_HRW);
+    } else if (!strcasecmp(algorithm, "iter_hash")) {
+        mp->algorithm = htons(NX_MP_ALG_ITER_HASH);
+    } else {
+        ovs_fatal(0, "%s: unknown algorithm `%s'", s, algorithm);
+    }
+    mp->max_link = htons(atoi(n_links) - 1);
+    mp->arg = htons(atoi(arg));
+
+    nxm_parse_field_bits(dst, &header, &ofs, &n_bits);
+    mp->ofs_nbits = nxm_encode_ofs_nbits(ofs, n_bits);
+    mp->dst = htonl(header);
+
+    free(s);
+}
+
+void
+multipath_format(const struct nx_action_multipath *mp, struct ds *s)
+{
+    const char *fields, *algorithm;
+
+    switch ((enum nx_mp_fields) ntohs(mp->fields)) {
+    case NX_MP_FIELDS_ETH_SRC:
+        fields = "eth_src";
+        break;
+    case NX_MP_FIELDS_SYMMETRIC_L4:
+        fields = "symmetric_l4";
+        break;
+    default:
+        fields = "<unknown>";
+    }
+
+    switch ((enum nx_mp_algorithm) ntohs(mp->algorithm)) {
+    case NX_MP_ALG_MODULO_N:
+        algorithm = "modulo_n";
+        break;
+    case NX_MP_ALG_HASH_THRESHOLD:
+        algorithm = "hash_threshold";
+        break;
+    case NX_MP_ALG_HRW:
+        algorithm = "hrw";
+        break;
+    case NX_MP_ALG_ITER_HASH:
+        algorithm = "iter_hash";
+        break;
+    default:
+        algorithm = "<unknown>";
+    }
+
+    ds_put_format(s, "multipath(%s,%"PRIu16",%s,%d,%"PRIu16",",
+                  fields, ntohs(mp->basis), algorithm, ntohs(mp->max_link) + 1,
+                  ntohs(mp->arg));
+    nxm_format_field_bits(s, ntohl(mp->dst), nxm_decode_ofs(mp->ofs_nbits),
+                          nxm_decode_n_bits(mp->ofs_nbits));
+    ds_put_char(s, ')');
+}
diff --git a/lib/multipath.h b/lib/multipath.h
new file mode 100644
index 0000000..962602b
--- /dev/null
+++ b/lib/multipath.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MULTIPATH_H
+#define MULTIPATH_H 1
+
+#include <stdint.h>
+
+struct ds;
+struct flow;
+struct nx_action_multipath;
+struct nx_action_reg_move;
+
+/* NXAST_MULTIPATH helper functions.
+ *
+ * See include/openflow/nicira-ext.h for NXAST_MULTIPATH specification.
+ */
+
+int multipath_check(const struct nx_action_multipath *);
+void multipath_execute(const struct nx_action_multipath *, struct flow *);
+
+void multipath_parse(struct nx_action_multipath *, const char *);
+void multipath_format(const struct nx_action_multipath *, struct ds *);
+
+#endif /* multipath.h */
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index a72d1aa..7326429 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -25,6 +25,7 @@
 #include "byte-order.h"
 #include "dynamic-string.h"
 #include "netdev.h"
+#include "multipath.h"
 #include "nx-match.h"
 #include "ofp-util.h"
 #include "ofpbuf.h"
@@ -194,26 +195,65 @@ parse_port_name(const char *name, uint16_t *port)
 static void
 str_to_action(char *str, struct ofpbuf *b)
 {
-    char *act, *arg;
-    char *saveptr = NULL;
     bool drop = false;
     int n_actions;
+    char *pos;
 
-    for (act = strtok_r(str, ", \t\r\n", &saveptr), n_actions = 0; act;
-         act = strtok_r(NULL, ", \t\r\n", &saveptr), n_actions++)
-    {
+    pos = str;
+    for (;;) {
+        char *act, *arg;
+        size_t actlen;
         uint16_t port;
 
+        pos += strspn(pos, ", \t\r\n");
+        if (*pos == '\0') {
+            break;
+        }
+
         if (drop) {
             ovs_fatal(0, "Drop actions must not be followed by other actions");
         }
 
-        /* Arguments are separated by colons */
-        arg = strchr(act, ':');
-        if (arg) {
-            *arg = '\0';
-            arg++;
+        act = pos;
+        actlen = strcspn(pos, ":(, \t\r\n");
+        if (act[actlen] == ':') {
+            /* The argument can be separated by a colon. */
+            size_t arglen;
+
+            arg = act + actlen + 1;
+            arglen = strcspn(arg, ", \t\r\n");
+            pos = arg + arglen + (arg[arglen] != '\0');
+            arg[arglen] = '\0';
+        } else if (act[actlen] == '(') {
+            /* The argument can be surrounded by balanced parentheses.  The
+             * outemost set of parentheses is removed. */
+            int level = 1;
+            size_t arglen;
+
+            arg = act + actlen + 1;
+            for (arglen = 0; level > 0; arglen++) {
+                switch (arg[arglen]) {
+                case '\0':
+                    ovs_fatal(0, "unbalanced parentheses in argument to %s "
+                              "action", act);
+
+                case '(':
+                    level++;
+                    break;
+
+                case ')':
+                    level--;
+                    break;
+                }
+            }
+            arg[arglen - 1] = '\0';
+            pos = arg + arglen;
+        } else {
+            /* There might be no argument at all. */
+            arg = NULL;
+            pos = act + actlen + (act[actlen] != '\0');
         }
+        act[actlen] = '\0';
 
         if (!strcasecmp(act, "mod_vlan_vid")) {
             struct ofp_action_vlan_vid *va;
@@ -334,6 +374,10 @@ str_to_action(char *str, struct ofpbuf *b)
             struct nx_action_reg_load *load;
             load = ofpbuf_put_uninit(b, sizeof *load);
             nxm_parse_reg_load(load, arg);
+        } else if (!strcasecmp(act, "multipath")) {
+            struct nx_action_multipath *nam;
+            nam = ofpbuf_put_uninit(b, sizeof *nam);
+            multipath_parse(nam, arg);
         } else if (!strcasecmp(act, "output")) {
             put_output_action(b, str_to_u32(arg));
         } else if (!strcasecmp(act, "enqueue")) {
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 63edb79..d88a574 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -30,6 +30,7 @@
 #include "compiler.h"
 #include "dynamic-string.h"
 #include "flow.h"
+#include "multipath.h"
 #include "nx-match.h"
 #include "ofp-util.h"
 #include "ofpbuf.h"
@@ -216,6 +217,7 @@ nx_action_len(enum nx_action_subtype subtype)
     case NXAST_REG_LOAD: return sizeof(struct nx_action_reg_load);
     case NXAST_NOTE: return -1;
     case NXAST_SET_TUNNEL64: return sizeof(struct nx_action_set_tunnel64);
+    case NXAST_MULTIPATH: return sizeof(struct nx_action_multipath);
     default: return -1;
     }
 }
@@ -240,6 +242,7 @@ ofp_print_nx_action(struct ds *string, const struct nx_action_header *nah)
         const struct nx_action_resubmit *nar;
         const struct nx_action_reg_move *move;
         const struct nx_action_reg_load *load;
+        const struct nx_action_multipath *nam;
 
         switch ((enum nx_action_subtype) subtype) {
         case NXAST_RESUBMIT:
@@ -281,11 +284,16 @@ ofp_print_nx_action(struct ds *string, const struct nx_action_header *nah)
             return;
 
         case NXAST_SET_TUNNEL64:
-            nast64 = (struct nx_action_set_tunnel64 *) nah;
+            nast64 = (const struct nx_action_set_tunnel64 *) nah;
             ds_put_format(string, "set_tunnel64:%#"PRIx64,
                           ntohll(nast64->tun_id));
             return;
 
+        case NXAST_MULTIPATH:
+            nam = (const struct nx_action_multipath *) nah;
+            multipath_format(nam, string);
+            return;
+
         case NXAST_SNAT__OBSOLETE:
         default:
             break;
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index f99b2b6..8f28edb 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 #include "byte-order.h"
 #include "classifier.h"
+#include "multipath.h"
 #include "nx-match.h"
 #include "ofp-util.h"
 #include "ofpbuf.h"
@@ -1747,6 +1748,14 @@ check_nicira_action(const union ofp_action *a, unsigned int len,
         return check_action_exact_len(a, len,
                                       sizeof(struct nx_action_set_tunnel64));
 
+    case NXAST_MULTIPATH:
+        error = check_action_exact_len(a, len,
+                                       sizeof(struct nx_action_multipath));
+        if (error) {
+            return error;
+        }
+        return multipath_check((const struct nx_action_multipath *) a);
+
     case NXAST_SNAT__OBSOLETE:
     default:
         return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_VENDOR_TYPE);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 2435690..e4bc199 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -35,6 +35,7 @@
 #include "hmap.h"
 #include "in-band.h"
 #include "mac-learning.h"
+#include "multipath.h"
 #include "netdev.h"
 #include "netflow.h"
 #include "netlink.h"
@@ -2877,6 +2878,7 @@ xlate_nicira_action(struct action_xlate_ctx *ctx,
     const struct nx_action_resubmit *nar;
     const struct nx_action_set_tunnel *nast;
     const struct nx_action_set_queue *nasq;
+    const struct nx_action_multipath *nam;
     enum nx_action_subtype subtype = ntohs(nah->subtype);
     ovs_be64 tun_id;
 
@@ -2927,6 +2929,11 @@ xlate_nicira_action(struct action_xlate_ctx *ctx,
         ctx->flow.tun_id = tun_id;
         break;
 
+    case NXAST_MULTIPATH:
+        nam = (const struct nx_action_multipath *) nah;
+        multipath_execute(nam, &ctx->flow);
+        break;
+
     /* If you add a new action here that modifies flow data, don't forget to
      * update the flow key in ctx->flow at the same time. */
 
diff --git a/tests/automake.mk b/tests/automake.mk
index c97684f..c50b62e 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -13,6 +13,7 @@ TESTSUITE_AT = \
 	tests/daemon.at \
 	tests/daemon-py.at \
 	tests/ovs-ofctl.at \
+	tests/multipath.at \
 	tests/vconn.at \
 	tests/file_name.at \
 	tests/aes128.at \
@@ -74,6 +75,7 @@ lcov_wrappers = \
 	tests/lcov/test-jsonrpc \
 	tests/lcov/test-list \
 	tests/lcov/test-lockfile \
+	tests/lcov/test-multipath \
 	tests/lcov/test-ovsdb \
 	tests/lcov/test-random \
 	tests/lcov/test-reconnect \
@@ -125,6 +127,7 @@ valgrind_wrappers = \
 	tests/valgrind/test-jsonrpc \
 	tests/valgrind/test-list \
 	tests/valgrind/test-lockfile \
+	tests/valgrind/test-multipath \
 	tests/valgrind/test-ovsdb \
 	tests/valgrind/test-random \
 	tests/valgrind/test-reconnect \
@@ -219,6 +222,10 @@ noinst_PROGRAMS += tests/test-lockfile
 tests_test_lockfile_SOURCES = tests/test-lockfile.c
 tests_test_lockfile_LDADD = lib/libopenvswitch.a
 
+noinst_PROGRAMS += tests/test-multipath
+tests_test_multipath_SOURCES = tests/test-multipath.c
+tests_test_multipath_LDADD = lib/libopenvswitch.a
+
 noinst_PROGRAMS += tests/test-random
 tests_test_random_SOURCES = tests/test-random.c
 tests_test_random_LDADD = lib/libopenvswitch.a
diff --git a/tests/multipath.at b/tests/multipath.at
new file mode 100644
index 0000000..a5a1a7b
--- /dev/null
+++ b/tests/multipath.at
@@ -0,0 +1,280 @@
+AT_BANNER([multipath link selection])
+
+# The test-multipath program prints a lot of output on stdout, but each of the
+# tests below ignores it because it will vary a bit depending on endianness and
+# floating point precision.  test-multipath will output an error message on
+# stderr and return with exit code 1 if anything really goes wrong.  In each
+# case, we list the (approximate) expected output in a comment to aid debugging
+# if the test does fail.
+
+AT_SETUP([modulo_n multipath link selection])
+AT_CHECK([[test-multipath 'eth_src,50,modulo_n,0,0,NXM_NX_REG0[]']],
+  [0], [ignore])
+# 1 ->  2: disruption=0.50 (perfect=0.50); stddev/expected=0.0000
+# 2 ->  3: disruption=0.66 (perfect=0.33); stddev/expected=0.0023
+# 3 ->  4: disruption=0.75 (perfect=0.25); stddev/expected=0.0061
+# 4 ->  5: disruption=0.80 (perfect=0.20); stddev/expected=0.0082
+# 5 ->  6: disruption=0.83 (perfect=0.17); stddev/expected=0.0083
+# 6 ->  7: disruption=0.86 (perfect=0.14); stddev/expected=0.0061
+# 7 ->  8: disruption=0.88 (perfect=0.12); stddev/expected=0.0103
+# 8 ->  9: disruption=0.89 (perfect=0.11); stddev/expected=0.0129
+# 9 -> 10: disruption=0.90 (perfect=0.10); stddev/expected=0.0091
+#10 -> 11: disruption=0.91 (perfect=0.09); stddev/expected=0.0114
+#11 -> 12: disruption=0.91 (perfect=0.08); stddev/expected=0.0073
+#12 -> 13: disruption=0.92 (perfect=0.08); stddev/expected=0.0165
+#13 -> 14: disruption=0.93 (perfect=0.07); stddev/expected=0.0149
+#14 -> 15: disruption=0.93 (perfect=0.07); stddev/expected=0.0127
+#15 -> 16: disruption=0.94 (perfect=0.06); stddev/expected=0.0142
+#16 -> 17: disruption=0.94 (perfect=0.06); stddev/expected=0.0098
+#17 -> 18: disruption=0.94 (perfect=0.06); stddev/expected=0.0159
+#18 -> 19: disruption=0.95 (perfect=0.05); stddev/expected=0.0121
+#19 -> 20: disruption=0.95 (perfect=0.05); stddev/expected=0.0195
+#20 -> 21: disruption=0.95 (perfect=0.05); stddev/expected=0.0120
+#21 -> 22: disruption=0.95 (perfect=0.05); stddev/expected=0.0181
+#22 -> 23: disruption=0.96 (perfect=0.04); stddev/expected=0.0222
+#23 -> 24: disruption=0.96 (perfect=0.04); stddev/expected=0.0164
+#24 -> 25: disruption=0.96 (perfect=0.04); stddev/expected=0.0146
+#25 -> 26: disruption=0.96 (perfect=0.04); stddev/expected=0.0175
+#26 -> 27: disruption=0.96 (perfect=0.04); stddev/expected=0.0231
+#27 -> 28: disruption=0.96 (perfect=0.04); stddev/expected=0.0172
+#28 -> 29: disruption=0.97 (perfect=0.03); stddev/expected=0.0211
+#29 -> 30: disruption=0.97 (perfect=0.03); stddev/expected=0.0213
+#30 -> 31: disruption=0.97 (perfect=0.03); stddev/expected=0.0253
+#31 -> 32: disruption=0.97 (perfect=0.03); stddev/expected=0.0208
+#32 -> 33: disruption=0.97 (perfect=0.03); stddev/expected=0.0223
+#33 -> 34: disruption=0.97 (perfect=0.03); stddev/expected=0.0215
+#34 -> 35: disruption=0.97 (perfect=0.03); stddev/expected=0.0201
+#35 -> 36: disruption=0.97 (perfect=0.03); stddev/expected=0.0220
+#36 -> 37: disruption=0.97 (perfect=0.03); stddev/expected=0.0221
+#37 -> 38: disruption=0.97 (perfect=0.03); stddev/expected=0.0201
+#38 -> 39: disruption=0.97 (perfect=0.03); stddev/expected=0.0215
+#39 -> 40: disruption=0.97 (perfect=0.03); stddev/expected=0.0271
+#40 -> 41: disruption=0.98 (perfect=0.02); stddev/expected=0.0272
+#41 -> 42: disruption=0.98 (perfect=0.02); stddev/expected=0.0208
+#42 -> 43: disruption=0.98 (perfect=0.02); stddev/expected=0.0226
+#43 -> 44: disruption=0.98 (perfect=0.02); stddev/expected=0.0264
+#44 -> 45: disruption=0.98 (perfect=0.02); stddev/expected=0.0233
+#45 -> 46: disruption=0.98 (perfect=0.02); stddev/expected=0.0285
+#46 -> 47: disruption=0.98 (perfect=0.02); stddev/expected=0.0246
+#47 -> 48: disruption=0.98 (perfect=0.02); stddev/expected=0.0282
+#48 -> 49: disruption=0.98 (perfect=0.02); stddev/expected=0.0233
+#49 -> 50: disruption=0.98 (perfect=0.02); stddev/expected=0.0197
+#50 -> 51: disruption=0.98 (perfect=0.02); stddev/expected=0.0317
+#51 -> 52: disruption=0.98 (perfect=0.02); stddev/expected=0.0283
+#52 -> 53: disruption=0.98 (perfect=0.02); stddev/expected=0.0282
+#53 -> 54: disruption=0.98 (perfect=0.02); stddev/expected=0.0273
+#54 -> 55: disruption=0.98 (perfect=0.02); stddev/expected=0.0283
+#55 -> 56: disruption=0.98 (perfect=0.02); stddev/expected=0.0288
+#56 -> 57: disruption=0.98 (perfect=0.02); stddev/expected=0.0263
+#57 -> 58: disruption=0.98 (perfect=0.02); stddev/expected=0.0339
+#58 -> 59: disruption=0.98 (perfect=0.02); stddev/expected=0.0262
+#59 -> 60: disruption=0.98 (perfect=0.02); stddev/expected=0.0309
+#60 -> 61: disruption=0.98 (perfect=0.02); stddev/expected=0.0285
+#61 -> 62: disruption=0.98 (perfect=0.02); stddev/expected=0.0288
+#62 -> 63: disruption=0.98 (perfect=0.02); stddev/expected=0.0298
+#63 -> 64: disruption=0.98 (perfect=0.02); stddev/expected=0.0277
+AT_CLEANUP
+
+AT_SETUP([hash_threshold multipath link selection])
+AT_CHECK([[test-multipath 'eth_src,50,hash_threshold,0,0,NXM_NX_REG0[]']],
+  [0], [ignore])
+# 1 ->  2: disruption=0.50 (perfect=0.50); stddev/expected=0.0000
+# 2 ->  3: disruption=0.50 (perfect=0.33); stddev/expected=0.0056
+# 3 ->  4: disruption=0.50 (perfect=0.25); stddev/expected=0.0050
+# 4 ->  5: disruption=0.50 (perfect=0.20); stddev/expected=0.0074
+# 5 ->  6: disruption=0.50 (perfect=0.17); stddev/expected=0.0031
+# 6 ->  7: disruption=0.50 (perfect=0.14); stddev/expected=0.0078
+# 7 ->  8: disruption=0.50 (perfect=0.12); stddev/expected=0.0085
+# 8 ->  9: disruption=0.50 (perfect=0.11); stddev/expected=0.0093
+# 9 -> 10: disruption=0.50 (perfect=0.10); stddev/expected=0.0083
+#10 -> 11: disruption=0.51 (perfect=0.09); stddev/expected=0.0110
+#11 -> 12: disruption=0.50 (perfect=0.08); stddev/expected=0.0124
+#12 -> 13: disruption=0.50 (perfect=0.08); stddev/expected=0.0143
+#13 -> 14: disruption=0.50 (perfect=0.07); stddev/expected=0.0148
+#14 -> 15: disruption=0.50 (perfect=0.07); stddev/expected=0.0099
+#15 -> 16: disruption=0.50 (perfect=0.06); stddev/expected=0.0166
+#16 -> 17: disruption=0.50 (perfect=0.06); stddev/expected=0.0099
+#17 -> 18: disruption=0.50 (perfect=0.06); stddev/expected=0.0194
+#18 -> 19: disruption=0.50 (perfect=0.05); stddev/expected=0.0169
+#19 -> 20: disruption=0.50 (perfect=0.05); stddev/expected=0.0169
+#20 -> 21: disruption=0.50 (perfect=0.05); stddev/expected=0.0185
+#21 -> 22: disruption=0.50 (perfect=0.05); stddev/expected=0.0160
+#22 -> 23: disruption=0.50 (perfect=0.04); stddev/expected=0.0236
+#23 -> 24: disruption=0.50 (perfect=0.04); stddev/expected=0.0147
+#24 -> 25: disruption=0.50 (perfect=0.04); stddev/expected=0.0195
+#25 -> 26: disruption=0.50 (perfect=0.04); stddev/expected=0.0199
+#26 -> 27: disruption=0.50 (perfect=0.04); stddev/expected=0.0227
+#27 -> 28: disruption=0.50 (perfect=0.04); stddev/expected=0.0198
+#28 -> 29: disruption=0.50 (perfect=0.03); stddev/expected=0.0216
+#29 -> 30: disruption=0.50 (perfect=0.03); stddev/expected=0.0233
+#30 -> 31: disruption=0.50 (perfect=0.03); stddev/expected=0.0266
+#31 -> 32: disruption=0.51 (perfect=0.03); stddev/expected=0.0238
+#32 -> 33: disruption=0.50 (perfect=0.03); stddev/expected=0.0194
+#33 -> 34: disruption=0.50 (perfect=0.03); stddev/expected=0.0173
+#34 -> 35: disruption=0.50 (perfect=0.03); stddev/expected=0.0223
+#35 -> 36: disruption=0.50 (perfect=0.03); stddev/expected=0.0220
+#36 -> 37: disruption=0.50 (perfect=0.03); stddev/expected=0.0237
+#37 -> 38: disruption=0.50 (perfect=0.03); stddev/expected=0.0237
+#38 -> 39: disruption=0.50 (perfect=0.03); stddev/expected=0.0251
+#39 -> 40: disruption=0.50 (perfect=0.03); stddev/expected=0.0212
+#40 -> 41: disruption=0.50 (perfect=0.02); stddev/expected=0.0267
+#41 -> 42: disruption=0.50 (perfect=0.02); stddev/expected=0.0242
+#42 -> 43: disruption=0.50 (perfect=0.02); stddev/expected=0.0222
+#43 -> 44: disruption=0.50 (perfect=0.02); stddev/expected=0.0244
+#44 -> 45: disruption=0.50 (perfect=0.02); stddev/expected=0.0231
+#45 -> 46: disruption=0.50 (perfect=0.02); stddev/expected=0.0299
+#46 -> 47: disruption=0.50 (perfect=0.02); stddev/expected=0.0263
+#47 -> 48: disruption=0.50 (perfect=0.02); stddev/expected=0.0307
+#48 -> 49: disruption=0.50 (perfect=0.02); stddev/expected=0.0253
+#49 -> 50: disruption=0.50 (perfect=0.02); stddev/expected=0.0228
+#50 -> 51: disruption=0.50 (perfect=0.02); stddev/expected=0.0273
+#51 -> 52: disruption=0.50 (perfect=0.02); stddev/expected=0.0243
+#52 -> 53: disruption=0.50 (perfect=0.02); stddev/expected=0.0268
+#53 -> 54: disruption=0.50 (perfect=0.02); stddev/expected=0.0251
+#54 -> 55: disruption=0.50 (perfect=0.02); stddev/expected=0.0297
+#55 -> 56: disruption=0.50 (perfect=0.02); stddev/expected=0.0287
+#56 -> 57: disruption=0.50 (perfect=0.02); stddev/expected=0.0299
+#57 -> 58: disruption=0.50 (perfect=0.02); stddev/expected=0.0272
+#58 -> 59: disruption=0.50 (perfect=0.02); stddev/expected=0.0295
+#59 -> 60: disruption=0.50 (perfect=0.02); stddev/expected=0.0312
+#60 -> 61: disruption=0.50 (perfect=0.02); stddev/expected=0.0361
+#61 -> 62: disruption=0.50 (perfect=0.02); stddev/expected=0.0308
+#62 -> 63: disruption=0.50 (perfect=0.02); stddev/expected=0.0283
+#63 -> 64: disruption=0.50 (perfect=0.02); stddev/expected=0.0325
+AT_CLEANUP
+
+AT_SETUP([hrw multipath link selection])
+AT_CHECK([[test-multipath 'eth_src,50,hrw,0,0,NXM_NX_REG0[]']],
+  [0], [ignore])
+# 1 ->  2: disruption=0.50 (perfect=0.50); stddev/expected=0.0000
+# 2 ->  3: disruption=0.33 (perfect=0.33); stddev/expected=0.0033
+# 3 ->  4: disruption=0.25 (perfect=0.25); stddev/expected=0.0076
+# 4 ->  5: disruption=0.20 (perfect=0.20); stddev/expected=0.0059
+# 5 ->  6: disruption=0.17 (perfect=0.17); stddev/expected=0.0030
+# 6 ->  7: disruption=0.14 (perfect=0.14); stddev/expected=0.0124
+# 7 ->  8: disruption=0.13 (perfect=0.12); stddev/expected=0.0072
+# 8 ->  9: disruption=0.11 (perfect=0.11); stddev/expected=0.0074
+# 9 -> 10: disruption=0.10 (perfect=0.10); stddev/expected=0.0161
+#10 -> 11: disruption=0.09 (perfect=0.09); stddev/expected=0.0055
+#11 -> 12: disruption=0.08 (perfect=0.08); stddev/expected=0.0092
+#12 -> 13: disruption=0.08 (perfect=0.08); stddev/expected=0.0134
+#13 -> 14: disruption=0.07 (perfect=0.07); stddev/expected=0.0124
+#14 -> 15: disruption=0.07 (perfect=0.07); stddev/expected=0.0156
+#15 -> 16: disruption=0.06 (perfect=0.06); stddev/expected=0.0182
+#16 -> 17: disruption=0.06 (perfect=0.06); stddev/expected=0.0150
+#17 -> 18: disruption=0.06 (perfect=0.06); stddev/expected=0.0109
+#18 -> 19: disruption=0.05 (perfect=0.05); stddev/expected=0.0162
+#19 -> 20: disruption=0.05 (perfect=0.05); stddev/expected=0.0149
+#20 -> 21: disruption=0.05 (perfect=0.05); stddev/expected=0.0148
+#21 -> 22: disruption=0.05 (perfect=0.05); stddev/expected=0.0230
+#22 -> 23: disruption=0.04 (perfect=0.04); stddev/expected=0.0208
+#23 -> 24: disruption=0.04 (perfect=0.04); stddev/expected=0.0210
+#24 -> 25: disruption=0.04 (perfect=0.04); stddev/expected=0.0228
+#25 -> 26: disruption=0.04 (perfect=0.04); stddev/expected=0.0155
+#26 -> 27: disruption=0.04 (perfect=0.04); stddev/expected=0.0208
+#27 -> 28: disruption=0.04 (perfect=0.04); stddev/expected=0.0218
+#28 -> 29: disruption=0.03 (perfect=0.03); stddev/expected=0.0193
+#29 -> 30: disruption=0.03 (perfect=0.03); stddev/expected=0.0169
+#30 -> 31: disruption=0.03 (perfect=0.03); stddev/expected=0.0163
+#31 -> 32: disruption=0.03 (perfect=0.03); stddev/expected=0.0192
+#32 -> 33: disruption=0.03 (perfect=0.03); stddev/expected=0.0212
+#33 -> 34: disruption=0.03 (perfect=0.03); stddev/expected=0.0240
+#34 -> 35: disruption=0.03 (perfect=0.03); stddev/expected=0.0227
+#35 -> 36: disruption=0.03 (perfect=0.03); stddev/expected=0.0230
+#36 -> 37: disruption=0.03 (perfect=0.03); stddev/expected=0.0183
+#37 -> 38: disruption=0.03 (perfect=0.03); stddev/expected=0.0227
+#38 -> 39: disruption=0.03 (perfect=0.03); stddev/expected=0.0255
+#39 -> 40: disruption=0.03 (perfect=0.03); stddev/expected=0.0247
+#40 -> 41: disruption=0.02 (perfect=0.02); stddev/expected=0.0228
+#41 -> 42: disruption=0.02 (perfect=0.02); stddev/expected=0.0247
+#42 -> 43: disruption=0.02 (perfect=0.02); stddev/expected=0.0265
+#43 -> 44: disruption=0.02 (perfect=0.02); stddev/expected=0.0250
+#44 -> 45: disruption=0.02 (perfect=0.02); stddev/expected=0.0258
+#45 -> 46: disruption=0.02 (perfect=0.02); stddev/expected=0.0196
+#46 -> 47: disruption=0.02 (perfect=0.02); stddev/expected=0.0235
+#47 -> 48: disruption=0.02 (perfect=0.02); stddev/expected=0.0314
+#48 -> 49: disruption=0.02 (perfect=0.02); stddev/expected=0.0293
+#49 -> 50: disruption=0.02 (perfect=0.02); stddev/expected=0.0241
+#50 -> 51: disruption=0.02 (perfect=0.02); stddev/expected=0.0291
+#51 -> 52: disruption=0.02 (perfect=0.02); stddev/expected=0.0304
+#52 -> 53: disruption=0.02 (perfect=0.02); stddev/expected=0.0307
+#53 -> 54: disruption=0.02 (perfect=0.02); stddev/expected=0.0250
+#54 -> 55: disruption=0.02 (perfect=0.02); stddev/expected=0.0290
+#55 -> 56: disruption=0.02 (perfect=0.02); stddev/expected=0.0284
+#56 -> 57: disruption=0.02 (perfect=0.02); stddev/expected=0.0272
+#57 -> 58: disruption=0.02 (perfect=0.02); stddev/expected=0.0272
+#58 -> 59: disruption=0.02 (perfect=0.02); stddev/expected=0.0304
+#59 -> 60: disruption=0.02 (perfect=0.02); stddev/expected=0.0345
+#60 -> 61: disruption=0.02 (perfect=0.02); stddev/expected=0.0251
+#61 -> 62: disruption=0.02 (perfect=0.02); stddev/expected=0.0249
+#62 -> 63: disruption=0.02 (perfect=0.02); stddev/expected=0.0285
+#63 -> 64: disruption=0.02 (perfect=0.02); stddev/expected=0.0285
+AT_CLEANUP
+
+AT_SETUP([iter_hash multipath link selection])
+AT_CHECK([[test-multipath 'eth_src,50,iter_hash,0,0,NXM_NX_REG0[]']],
+  [0], [ignore])
+# 1 ->  2: disruption=0.50 (perfect=0.50); stddev/expected=0.0000
+# 2 ->  3: disruption=0.42 (perfect=0.33); stddev/expected=0.0034
+# 3 ->  4: disruption=0.25 (perfect=0.25); stddev/expected=0.0082
+# 4 ->  5: disruption=0.42 (perfect=0.20); stddev/expected=0.0073
+# 5 ->  6: disruption=0.17 (perfect=0.17); stddev/expected=0.0040
+# 6 ->  7: disruption=0.14 (perfect=0.14); stddev/expected=0.0069
+# 7 ->  8: disruption=0.13 (perfect=0.12); stddev/expected=0.0131
+# 8 ->  9: disruption=0.45 (perfect=0.11); stddev/expected=0.0093
+# 9 -> 10: disruption=0.10 (perfect=0.10); stddev/expected=0.0127
+#10 -> 11: disruption=0.09 (perfect=0.09); stddev/expected=0.0134
+#11 -> 12: disruption=0.08 (perfect=0.08); stddev/expected=0.0101
+#12 -> 13: disruption=0.08 (perfect=0.08); stddev/expected=0.0127
+#13 -> 14: disruption=0.07 (perfect=0.07); stddev/expected=0.0115
+#14 -> 15: disruption=0.07 (perfect=0.07); stddev/expected=0.0100
+#15 -> 16: disruption=0.06 (perfect=0.06); stddev/expected=0.0111
+#16 -> 17: disruption=0.47 (perfect=0.06); stddev/expected=0.0137
+#17 -> 18: disruption=0.05 (perfect=0.06); stddev/expected=0.0204
+#18 -> 19: disruption=0.05 (perfect=0.05); stddev/expected=0.0082
+#19 -> 20: disruption=0.05 (perfect=0.05); stddev/expected=0.0124
+#20 -> 21: disruption=0.05 (perfect=0.05); stddev/expected=0.0203
+#21 -> 22: disruption=0.05 (perfect=0.05); stddev/expected=0.0196
+#22 -> 23: disruption=0.04 (perfect=0.04); stddev/expected=0.0183
+#23 -> 24: disruption=0.04 (perfect=0.04); stddev/expected=0.0212
+#24 -> 25: disruption=0.04 (perfect=0.04); stddev/expected=0.0176
+#25 -> 26: disruption=0.04 (perfect=0.04); stddev/expected=0.0173
+#26 -> 27: disruption=0.04 (perfect=0.04); stddev/expected=0.0159
+#27 -> 28: disruption=0.03 (perfect=0.04); stddev/expected=0.0168
+#28 -> 29: disruption=0.03 (perfect=0.03); stddev/expected=0.0190
+#29 -> 30: disruption=0.03 (perfect=0.03); stddev/expected=0.0305
+#30 -> 31: disruption=0.03 (perfect=0.03); stddev/expected=0.0282
+#31 -> 32: disruption=0.03 (perfect=0.03); stddev/expected=0.0255
+#32 -> 33: disruption=0.49 (perfect=0.03); stddev/expected=0.0220
+#33 -> 34: disruption=0.03 (perfect=0.03); stddev/expected=0.0188
+#34 -> 35: disruption=0.03 (perfect=0.03); stddev/expected=0.0203
+#35 -> 36: disruption=0.03 (perfect=0.03); stddev/expected=0.0207
+#36 -> 37: disruption=0.03 (perfect=0.03); stddev/expected=0.0261
+#37 -> 38: disruption=0.03 (perfect=0.03); stddev/expected=0.0226
+#38 -> 39: disruption=0.03 (perfect=0.03); stddev/expected=0.0233
+#39 -> 40: disruption=0.03 (perfect=0.03); stddev/expected=0.0161
+#40 -> 41: disruption=0.03 (perfect=0.02); stddev/expected=0.0303
+#41 -> 42: disruption=0.02 (perfect=0.02); stddev/expected=0.0249
+#42 -> 43: disruption=0.02 (perfect=0.02); stddev/expected=0.0262
+#43 -> 44: disruption=0.02 (perfect=0.02); stddev/expected=0.0260
+#44 -> 45: disruption=0.02 (perfect=0.02); stddev/expected=0.0266
+#45 -> 46: disruption=0.02 (perfect=0.02); stddev/expected=0.0287
+#46 -> 47: disruption=0.02 (perfect=0.02); stddev/expected=0.0213
+#47 -> 48: disruption=0.02 (perfect=0.02); stddev/expected=0.0301
+#48 -> 49: disruption=0.02 (perfect=0.02); stddev/expected=0.0230
+#49 -> 50: disruption=0.02 (perfect=0.02); stddev/expected=0.0248
+#50 -> 51: disruption=0.02 (perfect=0.02); stddev/expected=0.0203
+#51 -> 52: disruption=0.02 (perfect=0.02); stddev/expected=0.0235
+#52 -> 53: disruption=0.02 (perfect=0.02); stddev/expected=0.0340
+#53 -> 54: disruption=0.02 (perfect=0.02); stddev/expected=0.0264
+#54 -> 55: disruption=0.02 (perfect=0.02); stddev/expected=0.0292
+#55 -> 56: disruption=0.02 (perfect=0.02); stddev/expected=0.0246
+#56 -> 57: disruption=0.02 (perfect=0.02); stddev/expected=0.0270
+#57 -> 58: disruption=0.02 (perfect=0.02); stddev/expected=0.0299
+#58 -> 59: disruption=0.02 (perfect=0.02); stddev/expected=0.0307
+#59 -> 60: disruption=0.02 (perfect=0.02); stddev/expected=0.0275
+#60 -> 61: disruption=0.02 (perfect=0.02); stddev/expected=0.0289
+#61 -> 62: disruption=0.02 (perfect=0.02); stddev/expected=0.0292
+#62 -> 63: disruption=0.02 (perfect=0.02); stddev/expected=0.0292
+#63 -> 64: disruption=0.02 (perfect=0.02); stddev/expected=0.0307
+AT_CLEANUP
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index 563a7c6..66c8934 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -1,7 +1,7 @@
 AT_BANNER([ovs-ofctl])
 
 AT_SETUP([ovs-ofctl parse-flows])
-AT_DATA([flows.txt], [
+AT_DATA([flows.txt], [[
 # comment
 tcp,tp_src=123,actions=flood
 in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop
@@ -13,11 +13,13 @@ cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller
 actions=note:41.42.43,note:00.01.02.03.04.05.06.07,note
 tun_id=0x1234,cookie=0x5678,actions=flood
 actions=set_tunnel:0x1234,set_tunnel64:0x9876,set_tunnel:0x123456789
+actions=multipath(eth_src, 50, hrw, 12, 0, NXM_NX_REG0[0..3]),multipath(symmetric_l4, 1024, iter_hash, 5000, 5050, NXM_NX_REG0[0..12])
 actions=drop
-])
-AT_CHECK([ovs-ofctl parse-flows flows.txt], [0], [stdout], [stderr])
-AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [dnl
-OFPT_FLOW_MOD: ADD tcp,tp_src=123 actions=FLOOD
+]])
+AT_CHECK([ovs-ofctl parse-flows flows.txt
+], [0], [stdout], [stderr])
+AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], 
+[[OFPT_FLOW_MOD: ADD tcp,tp_src=123 actions=FLOOD
 OFPT_FLOW_MOD: ADD in_port=65534,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0 actions=drop
 OFPT_FLOW_MOD: ADD arp,nw_src=192.168.0.1 actions=drop_spoofed_arp,NORMAL
 OFPT_FLOW_MOD: ADD udp,dl_vlan_pcp=7 idle:5 actions=strip_vlan,output:0
@@ -28,8 +30,9 @@ OFPT_FLOW_MOD: ADD actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.0
 NXT_TUN_ID_FROM_COOKIE: set=1
 OFPT_FLOW_MOD: ADD cookie:0x123400005678 actions=FLOOD
 OFPT_FLOW_MOD: ADD actions=set_tunnel:0x1234,set_tunnel64:0x9876,set_tunnel64:0x123456789
+OFPT_FLOW_MOD: ADD actions=multipath(eth_src,50,hrw,12,0,NXM_NX_REG0[0..3]),multipath(symmetric_l4,1024,iter_hash,5000,5050,NXM_NX_REG0[0..12])
 OFPT_FLOW_MOD: ADD actions=drop
-])
+]])
 AT_CHECK([sed 's/.*|//' stderr], [0], [dnl
 normalization changed ofp_match, details:
  pre: wildcards=  0x3820f8  in_port=65534  dl_src=00:0a:e4:25:6b:b0  dl_dst=00:00:00:00:00:00  dl_vlan=    9  dl_vlan_pcp=  0  dl_type=     0  nw_tos=   0  nw_proto=   0  nw_src=         0  nw_dst=         0  tp_src=    0  tp_dst=    0
diff --git a/tests/test-multipath.c b/tests/test-multipath.c
new file mode 100644
index 0000000..03a666f
--- /dev/null
+++ b/tests/test-multipath.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "multipath.h"
+
+#include <assert.h>
+#include <getopt.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "flow.h"
+#include "random.h"
+#include "util.h"
+
+int
+main(int argc, char *argv[])
+{
+    enum { MP_MAX_LINKS = 63 };
+    struct nx_action_multipath mp;
+    bool ok = true;
+    int n;
+
+    set_program_name(argv[0]);
+    random_init();
+
+    if (argc != 2) {
+        ovs_fatal(0, "usage: %s multipath_action", program_name);
+    }
+
+    multipath_parse(&mp, argv[1]);
+    for (n = 1; n <= MP_MAX_LINKS; n++) {
+        enum { N_FLOWS = 65536 };
+        double disruption, perfect, distribution;
+        int histogram[MP_MAX_LINKS];
+        double sum_dev2, stddev;
+        int changed;
+        int i;
+
+        changed = 0;
+        memset(histogram, 0, sizeof histogram);
+        for (i = 0; i < N_FLOWS; i++) {
+            int old_link, new_link;
+            struct flow flow;
+
+            random_bytes(&flow, sizeof flow);
+
+            mp.max_link = htons(n - 1);
+            multipath_execute(&mp, &flow);
+            old_link = flow.regs[0];
+
+            mp.max_link = htons(n);
+            multipath_execute(&mp, &flow);
+            new_link = flow.regs[0];
+
+            assert(old_link >= 0 && old_link < n);
+            assert(new_link >= 0 && new_link < n + 1);
+
+            histogram[old_link]++;
+            changed += old_link != new_link;
+        }
+
+        sum_dev2 = 0.0;
+        for (i = 0; i < n; i++) {
+            double mean = (double) N_FLOWS / n;
+            double deviation = histogram[i] - mean;
+
+            sum_dev2 += deviation * deviation;
+        }
+        stddev = sqrt(sum_dev2 / n);
+
+        disruption = (double) changed / N_FLOWS;
+        perfect = 1.0 / (n + 1);
+        distribution = stddev / ((double) N_FLOWS / n);
+        printf("%2d -> %2d: disruption=%.2f (perfect=%.2f); "
+               "stddev/expected=%.4f\n",
+               n, n + 1, disruption, perfect, distribution);
+
+        switch (ntohs(mp.algorithm)) {
+        case NX_MP_ALG_MODULO_N:
+            if (disruption < (n < 2 ? .25 : .5)) {
+                fprintf(stderr, "%d -> %d: disruption=%.2f < .5\n",
+                        n, n + 1, disruption);
+                ok = false;
+            }
+            break;
+
+        case NX_MP_ALG_HASH_THRESHOLD:
+            if (disruption < .48 || disruption > .52) {
+                fprintf(stderr, "%d -> %d: disruption=%.2f not approximately "
+                        ".5\n", n, n + 1, disruption);
+                ok = false;
+            }
+            break;
+
+        case NX_MP_ALG_ITER_HASH:
+            if (!(n & (n - 1))) {
+                break;
+            }
+            /* Fall through. */
+        case NX_MP_ALG_HRW:
+            if (fabs(disruption - perfect) >= .01) {
+                fprintf(stderr, "%d -> %d: disruption=%.5f differs from "
+                        "perfect=%.5f by more than .01\n",
+                        n, n + 1, disruption, perfect);
+                ok = false;
+            }
+            break;
+
+        default:
+            NOT_REACHED();
+        }
+    }
+
+    return ok ? 0 : 1;
+}
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 35b02be..4642902 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -43,6 +43,7 @@ m4_include([tests/check-structs.at])
 m4_include([tests/daemon.at])
 m4_include([tests/daemon-py.at])
 m4_include([tests/ovs-ofctl.at])
+m4_include([tests/multipath.at])
 m4_include([tests/vconn.at])
 m4_include([tests/file_name.at])
 m4_include([tests/aes128.at])
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index c3cbbf1..383c6a7 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -549,6 +549,21 @@ in field \fBdst\fR.
 .IP
 Example: \fBload:55\->NXM_NX_REG2[0..5]\fR loads value 55 (bit pattern
 \fB110111\fR) into bits 0 through 5, inclusive, in register 2.
+.
+.IP "\fBmultipath(\fIfields\fB, \fIbasis\fB, \fIalgorithm\fB, \fIn_links\fB, \fIarg\fB, \fIdst\fB[\fIstart\fB..\fIend\fB])\fR"
+Hashes \fIfields\fR using \fIbasis\fR as a universal hash parameter,
+then the applies multipath link selection \fIalgorithm\fR (with
+parameter \fIarg\fR) to choose one of \fIn_links\fR output links
+numbered 0 through \fIn_links\fR minus 1, and stores the link into
+\fIdst\fB[\fIstart\fB..\fIend\fB]\fR, which must be an NXM register as
+described above.
+.IP
+Currently, \fIfields\fR must be either \fBeth_src\fR or
+\fBsymmetric_l4\fR and \fIalgorithm\fR must be one of \fBmodulo_n\fR,
+\fBhash_threshold\fR, \fBhrw\fR, and \fBiter_hash\fR.  Only
+the \BIiter_hash\fR algorithm uses \fIarg\fR.
+.IP
+Refer to \fBnicira\-ext.h\fR for more details.
 .RE
 .
 .IP
-- 
1.7.1





More information about the dev mailing list