[ovs-dev] [PATCH 5/7] datapath: Change listing flows to use an iterator concept.

Ben Pfaff blp at nicira.com
Fri Dec 24 01:15:39 UTC 2010


One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 datapath/datapath.c                     |  140 +++++++++++++------------------
 datapath/odp-compat.h                   |    7 ++-
 datapath/table.c                        |   43 ++++++++++
 datapath/table.h                        |    3 +
 include/openvswitch/datapath-protocol.h |   18 ++++-
 lib/dpif-linux.c                        |   19 +++--
 lib/dpif-netdev.c                       |   26 +++---
 lib/dpif-provider.h                     |   25 ++++--
 lib/dpif.c                              |  101 ++++++++---------------
 lib/dpif.h                              |   10 ++-
 lib/hmap.c                              |   40 +++++++++
 lib/hmap.h                              |    3 +
 ofproto/ofproto.c                       |   25 ++----
 utilities/ovs-dpctl.c                   |   26 +++---
 14 files changed, 277 insertions(+), 209 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index b95c8f2..7b55698 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -1037,48 +1037,6 @@ static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec
 	return flowvec->n_flows;
 }
 
-struct list_flows_cbdata {
-	struct datapath *dp;
-	struct odp_flow __user *uflows;
-	u32 n_flows;
-	u32 listed_flows;
-};
-
-static int list_flow(struct tbl_node *node, void *cbdata_)
-{
-	struct sw_flow *flow = flow_cast(node);
-	struct list_flows_cbdata *cbdata = cbdata_;
-	struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
-	int error;
-
-	if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
-		return -EFAULT;
-	error = answer_query(cbdata->dp, flow, 0, ufp);
-	if (error)
-		return error;
-
-	if (cbdata->listed_flows >= cbdata->n_flows)
-		return cbdata->listed_flows;
-	return 0;
-}
-
-static int do_list_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
-{
-	struct list_flows_cbdata cbdata;
-	int error;
-
-	if (!flowvec->n_flows)
-		return 0;
-
-	cbdata.dp = dp;
-	cbdata.uflows = (struct odp_flow __user *)flowvec->flows;
-	cbdata.n_flows = flowvec->n_flows;
-	cbdata.listed_flows = 0;
-
-	error = tbl_foreach(get_table_protected(dp), list_flow, &cbdata);
-	return error ? error : cbdata.listed_flows;
-}
-
 static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
 			    int (*function)(struct datapath *,
 					    const struct odp_flowvec *))
@@ -1100,6 +1058,44 @@ static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
 		: put_user(retval, &uflowvec->n_flows));
 }
 
+static struct sw_flow *do_dump_flow(struct datapath *dp, u32 __user *state)
+{
+	struct tbl *table = get_table_protected(dp);
+	struct tbl_node *tbl_node;
+	u32 bucket, obj;
+
+	if (get_user(bucket, &state[0]) || get_user(obj, &state[1]))
+		return ERR_PTR(-EFAULT);
+
+	tbl_node = tbl_next(table, bucket, obj, &bucket, &obj);
+
+	if (__put_user(bucket, &state[0]) || __put_user(obj, &state[1]))
+		return ERR_PTR(-EFAULT);
+
+	return tbl_node ? flow_cast(tbl_node) : NULL;
+}
+
+static int dump_flow(struct datapath *dp, struct odp_flow_dump __user *ufdp)
+{
+	struct odp_flow __user *ufp;
+	struct sw_flow *flow;
+
+	flow = do_dump_flow(dp, ufdp->state);
+	if (IS_ERR(flow))
+		return PTR_ERR(flow);
+
+	if (get_user(ufp, &ufdp->flow))
+		return -EFAULT;
+
+	if (!flow)
+		return put_user(ODPFF_EOF, &ufp->flags);
+
+	if (copy_to_user(&ufp->key, &flow->key, sizeof(struct odp_flow_key)) ||
+	    put_user(0, &ufp->flags))
+		return -EFAULT;
+	return answer_query(dp, flow, 0, ufp);
+}
+
 static int do_execute(struct datapath *dp, const struct odp_execute *execute)
 {
 	struct odp_flow_key key;
@@ -1489,8 +1485,8 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
 		err = do_flowvec_ioctl(dp, argp, do_query_flows);
 		break;
 
-	case ODP_FLOW_LIST:
-		err = do_flowvec_ioctl(dp, argp, do_list_flows);
+	case ODP_FLOW_DUMP:
+		err = dump_flow(dp, (struct odp_flow_dump __user *)argp);
 		break;
 
 	case ODP_EXECUTE:
@@ -1628,47 +1624,27 @@ static int compat_query_flows(struct datapath *dp,
 	return n_flows;
 }
 
-struct compat_list_flows_cbdata {
-	struct datapath *dp;
-	struct compat_odp_flow __user *uflows;
-	u32 n_flows;
-	u32 listed_flows;
-};
-
-static int compat_list_flow(struct tbl_node *node, void *cbdata_)
+static int compat_dump_flow(struct datapath *dp, struct compat_odp_flow_dump __user *ufdp)
 {
-	struct sw_flow *flow = flow_cast(node);
-	struct compat_list_flows_cbdata *cbdata = cbdata_;
-	struct compat_odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
-	int error;
-
-	if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
-		return -EFAULT;
-	error = compat_answer_query(cbdata->dp, flow, 0, ufp);
-	if (error)
-		return error;
-
-	if (cbdata->listed_flows >= cbdata->n_flows)
-		return cbdata->listed_flows;
-	return 0;
-}
+	struct compat_odp_flow __user *ufp;
+	compat_uptr_t compat_ufp;
+	struct sw_flow *flow;
 
-static int compat_list_flows(struct datapath *dp,
-			     struct compat_odp_flow __user *flows, u32 n_flows)
-{
-	struct compat_list_flows_cbdata cbdata;
-	int error;
+	flow = do_dump_flow(dp, ufdp->state);
+	if (IS_ERR(flow))
+		return PTR_ERR(flow);
 
-	if (!n_flows)
-		return 0;
+	if (get_user(compat_ufp, &ufdp->flow))
+		return -EFAULT;
+	ufp = compat_ptr(compat_ufp);
 
-	cbdata.dp = dp;
-	cbdata.uflows = flows;
-	cbdata.n_flows = n_flows;
-	cbdata.listed_flows = 0;
+	if (!flow)
+		return put_user(ODPFF_EOF, &ufp->flags);
 
-	error = tbl_foreach(get_table_protected(dp), compat_list_flow, &cbdata);
-	return error ? error : cbdata.listed_flows;
+	if (copy_to_user(&ufp->key, &flow->key, sizeof(struct odp_flow_key)) ||
+	    put_user(0, &ufp->flags))
+		return -EFAULT;
+	return compat_answer_query(dp, flow, 0, ufp);
 }
 
 static int compat_flowvec_ioctl(struct datapath *dp, unsigned long argp,
@@ -1775,8 +1751,8 @@ static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned
 		err = compat_flowvec_ioctl(dp, argp, compat_query_flows);
 		break;
 
-	case ODP_FLOW_LIST32:
-		err = compat_flowvec_ioctl(dp, argp, compat_list_flows);
+	case ODP_FLOW_DUMP32:
+		err = compat_dump_flow(dp, compat_ptr(argp));
 		break;
 
 	case ODP_EXECUTE32:
diff --git a/datapath/odp-compat.h b/datapath/odp-compat.h
index b03539a..f359a71 100644
--- a/datapath/odp-compat.h
+++ b/datapath/odp-compat.h
@@ -18,7 +18,7 @@
 #define ODP_VPORT_LIST32	_IOWR('O', 10, struct compat_odp_portvec)
 #define ODP_FLOW_GET32		_IOWR('O', 13, struct compat_odp_flowvec)
 #define ODP_FLOW_PUT32		_IOWR('O', 14, struct compat_odp_flow)
-#define ODP_FLOW_LIST32		_IOWR('O', 15, struct compat_odp_flowvec)
+#define ODP_FLOW_DUMP32		_IOWR('O', 15, struct compat_odp_flow_dump)
 #define ODP_FLOW_DEL32		_IOWR('O', 17, struct compat_odp_flow)
 #define ODP_EXECUTE32		_IOR('O', 18, struct compat_odp_execute)
 #define ODP_FLOW_DEL32		_IOWR('O', 17, struct compat_odp_flow)
@@ -41,6 +41,11 @@ struct compat_odp_flow_put {
 	u32 flags;
 };
 
+struct compat_odp_flow_dump {
+	compat_uptr_t flow;
+	uint32_t state[4];
+};
+
 struct compat_odp_flowvec {
 	compat_uptr_t flows;
 	u32 n_flows;
diff --git a/datapath/table.c b/datapath/table.c
index 2806308..ccdf45b 100644
--- a/datapath/table.c
+++ b/datapath/table.c
@@ -246,6 +246,49 @@ int tbl_foreach(struct tbl *table,
 	return 0;
 }
 
+/**
+ * tbl_next - find next node in hash table
+ * @table: table to iterate
+ * @s_bucket: hash value of bucket to start from
+ * @s_obj: index to start from within first bucket
+ * @e_bucket: returns bucket to pass as @s_bucket on next call
+ * @e_obj: returns index to pass as @e_bucket on next call
+ *
+ * Returns the next node in @table in hash order, using @s_bucket and @s_obj to
+ * determine where to begin iteration and storing the values to pass on the
+ * next iteration into @e_bucket and @e_obj.  Returns %NULL when no nodes
+ * remain in the hash table.
+ *
+ * Pass 0 for @s_bucket and @s_obj to begin a new iteration.
+ */
+struct tbl_node *tbl_next(struct tbl *table,
+			  u32 s_bucket, u32 s_obj,
+			  u32 *e_bucket, u32 *e_obj)
+{
+	unsigned int l2_size = table->n_buckets >> TBL_L1_BITS;
+	unsigned int i;
+
+	for (i = s_bucket >> TBL_L1_BITS; i < l2_size; i++) {
+		struct tbl_bucket __rcu **l2 = table->buckets[i];
+		unsigned int j;
+
+		for (j = s_bucket & (TBL_L1_SIZE - 1); j < TBL_L1_SIZE; j++) {
+			struct tbl_bucket *bucket = rcu_dereference(l2[j]);
+
+			if (bucket && s_obj < bucket->n_objs) {
+				*e_bucket = (i << TBL_L1_BITS) + j;
+				*e_obj = s_obj + 1;
+				return bucket->objs[s_obj];
+			}
+
+			s_bucket = s_obj = 0;
+		}
+	}
+	*e_bucket = 0;
+	*e_obj = 0;
+	return NULL;
+}
+
 static int insert_table_flow(struct tbl_node *node, void *new_table_)
 {
 	struct tbl *new_table = new_table_;
diff --git a/datapath/table.h b/datapath/table.h
index 609d9b1..8ca16f2 100644
--- a/datapath/table.h
+++ b/datapath/table.h
@@ -59,6 +59,9 @@ int tbl_remove(struct tbl *, struct tbl_node *);
 unsigned int tbl_count(struct tbl *);
 int tbl_foreach(struct tbl *,
 		int (*callback)(struct tbl_node *, void *aux), void *aux);
+struct tbl_node *tbl_next(struct tbl *,
+			  u32 s_bucket, u32 s_obj,
+			  u32 *e_bucket, u32 *e_obj);
 
 int tbl_n_buckets(struct tbl *);
 struct tbl *tbl_expand(struct tbl *);
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index b8f93de..2dc3f83 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -89,7 +89,7 @@
 
 #define ODP_FLOW_GET            _IOWR('O', 13, struct odp_flowvec)
 #define ODP_FLOW_PUT            _IOWR('O', 14, struct odp_flow)
-#define ODP_FLOW_LIST           _IOWR('O', 15, struct odp_flowvec)
+#define ODP_FLOW_DUMP           _IOWR('O', 15, struct odp_flow_dump)
 #define ODP_FLOW_FLUSH          _IO('O', 16)
 #define ODP_FLOW_DEL            _IOWR('O', 17, struct odp_flow)
 
@@ -238,6 +238,7 @@ struct odp_flow_key {
 
 /* Flags for ODP_FLOW. */
 #define ODPFF_ZERO_TCP_FLAGS (1 << 0) /* Zero the TCP flags. */
+#define ODPFF_EOF            (1 << 1) /* ODP_FLOW_DUMP: end of flow table. */
 
 struct odp_flow {
     struct odp_flow_stats stats;
@@ -263,6 +264,21 @@ struct odp_flowvec {
     uint32_t n_flows;
 };
 
+/* ODP_FLOW_DUMP argument.
+ *
+ * This is used to iterate through the flow table flow-by-flow.  Each
+ * ODP_FLOW_DUMP call either stores a new odp_flow into 'flow' or stores
+ * ODPFF_EOF into flow->flags to indicate that the end of the table has been
+ * reaches, and updates 'state' in-place.
+ *
+ * Before the first call, zero 'state'.  The format of 'state' is otherwise
+ * unspecified.
+ */
+struct odp_flow_dump {
+	struct odp_flow *flow;
+	uint32_t state[4];
+};
+
 /* Action types. */
 enum odp_action_type {
     ODPAT_UNSPEC,
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 870e03e..d753a99 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -379,15 +379,20 @@ dpif_linux_flow_del(struct dpif *dpif_, struct odp_flow *flow)
 }
 
 static int
-dpif_linux_flow_list(const struct dpif *dpif_, struct odp_flow flows[], int n)
+dpif_linux_flow_dump(const struct dpif *dpif, struct dpif_dump *dump,
+                     struct odp_flow *flow)
 {
-    struct odp_flowvec fv;
+    struct odp_flow_dump fd;
     int error;
 
-    fv.flows = flows;
-    fv.n_flows = n;
-    error = do_ioctl(dpif_, ODP_FLOW_LIST, &fv);
-    return error ? -error : fv.n_flows;
+    BUILD_ASSERT(sizeof fd.state == sizeof *dump);
+
+    fd.flow = flow;
+    memcpy(fd.state, dump, sizeof fd.state);
+    error = do_ioctl(dpif, ODP_FLOW_DUMP, &fd);
+    memcpy(dump, fd.state, sizeof fd.state);
+
+    return error ? error : fd.flow->flags & ODPFF_EOF ? EOF : 0;
 }
 
 static int
@@ -514,7 +519,7 @@ const struct dpif_class dpif_linux_class = {
     dpif_linux_flow_put,
     dpif_linux_flow_del,
     dpif_linux_flow_flush,
-    dpif_linux_flow_list,
+    dpif_linux_flow_dump,
     dpif_linux_execute,
     dpif_linux_recv_get_mask,
     dpif_linux_recv_set_mask,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 74cd0ca..18ed808 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -780,23 +780,23 @@ dpif_netdev_flow_del(struct dpif *dpif, struct odp_flow *odp_flow)
 }
 
 static int
-dpif_netdev_flow_list(const struct dpif *dpif, struct odp_flow flows[], int n)
+dpif_netdev_flow_dump(const struct dpif *dpif, struct dpif_dump *dump,
+                      struct odp_flow *odp_flow)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
-    int i;
-
-    i = 0;
-    HMAP_FOR_EACH (flow, node, &dp->flow_table) {
-        if (i >= n) {
-            break;
-        }
+    struct hmap_node *node;
 
-        odp_flow_key_from_flow(&flows[i].key, &flow->key);
-        answer_flow_query(flow, 0, &flows[i]);
-        i++;
+    node = hmap_at_position(&dp->flow_table, &dump->state[0], &dump->state[1]);
+    if (!node) {
+        return EOF;
     }
-    return hmap_count(&dp->flow_table);
+
+    flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
+    odp_flow_key_from_flow(&odp_flow->key, &flow->key);
+    answer_flow_query(flow, 0, odp_flow);
+
+    return 0;
 }
 
 static int
@@ -1275,7 +1275,7 @@ const struct dpif_class dpif_netdev_class = {
     dpif_netdev_flow_put,
     dpif_netdev_flow_del,
     dpif_netdev_flow_flush,
-    dpif_netdev_flow_list,
+    dpif_netdev_flow_dump,
     dpif_netdev_execute,
     dpif_netdev_recv_get_mask,
     dpif_netdev_recv_set_mask,
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index deb3bf2..dd79b8a 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -240,12 +240,25 @@ struct dpif_class {
      * packets. */
     int (*flow_flush)(struct dpif *dpif);
 
-    /* Stores up to 'n' flows in 'dpif' into 'flows', updating their statistics
-     * and actions as described under the flow_get member function.  If
-     * successful, returns the number of flows actually present in 'dpif',
-     * which might be greater than the number stored (if 'dpif' has more than
-     * 'n' flows).  On failure, returns a negative errno value. */
-    int (*flow_list)(const struct dpif *dpif, struct odp_flow flows[], int n);
+    /* Iterates through 'dpif''s flow table flow-by-flow.  Each call stores a
+     * new odp_flow into 'flow' and returns 0, or returns EOF if the end of the
+     * table has been reached, or returns a positive errno value to indicate an
+     * error.
+     *
+     * The caller provides an all-zero 'dump' on the first call for a given
+     * iteration.  The provider should update 'dump' in-place to keep its
+     * position.  The provider may use any desired format for 'dump'.
+     *
+     * If the caller does not want to dump flow actions, it initializes
+     * 'flow->actions' to NULL and 'flow->actions_len' to 0.  Otherwise, it
+     * points 'flow->actions' to an array of struct nlattr and initializes
+     * 'flow->actions_len' with the number of elements in the array.
+     * Regardless of 'flow->actions' or 'flow->actions_len', the provider
+     * should also store the actually required number of elements into
+     * 'flow->actions_len'.
+     */
+    int (*flow_dump)(const struct dpif *dpif, struct dpif_dump *dump,
+                     struct odp_flow *flow);
 
     /* Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet
      * frame specified in 'packet'. */
diff --git a/lib/dpif.c b/lib/dpif.c
index a0f638a..6143ae4 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -792,85 +792,52 @@ dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
     return error;
 }
 
-/* Stores up to 'n' flows in 'dpif' into 'flows', including their statistics
- * but not including any information about their actions.  If successful,
- * returns 0 and sets '*n_out' to the number of flows actually present in
- * 'dpif', which might be greater than the number stored (if 'dpif' has more
- * than 'n' flows).  On failure, returns a negative errno value and sets
- * '*n_out' to 0. */
-int
-dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n,
-               size_t *n_out)
+/* Initializes 'dump' to begin dumping the flows in a dpif. */
+void
+dpif_dump_init(struct dpif_dump *dump)
 {
-    uint32_t i;
-    int retval;
-
-    COVERAGE_INC(dpif_flow_query_list);
-    if (RUNNING_ON_VALGRIND) {
-        memset(flows, 0, n * sizeof *flows);
-    } else {
-        for (i = 0; i < n; i++) {
-            flows[i].actions = NULL;
-            flows[i].actions_len = 0;
-        }
-    }
-    retval = dpif->dpif_class->flow_list(dpif, flows, n);
-    if (retval < 0) {
-        *n_out = 0;
-        VLOG_WARN_RL(&error_rl, "%s: flow list failed (%s)",
-                     dpif_name(dpif), strerror(-retval));
-        return -retval;
-    } else {
-        COVERAGE_ADD(dpif_flow_query_list_n, retval);
-        *n_out = MIN(n, retval);
-        VLOG_DBG_RL(&dpmsg_rl, "%s: listed %zu flows (of %d)",
-                    dpif_name(dpif), *n_out, retval);
-        return 0;
-    }
+    memset(dump, 0, sizeof *dump);
 }
 
-/* Retrieves all of the flows in 'dpif'.
+/* Iterates through 'dpif''s flow table flow-by-flow.  Each call stores a
+ * new odp_flow into 'flow' and returns 0, or returns EOF if the end of the
+ * table has been reached, or returns a positive errno value to indicate an
+ * error.
  *
- * If successful, returns 0 and stores in '*flowsp' a pointer to a newly
- * allocated array of flows, including their statistics but not including any
- * information about their actions, and sets '*np' to the number of flows in
- * '*flowsp'.  The caller is responsible for freeing '*flowsp' by calling
- * free().
+ * The caller should initialize 'dump' with dpif_dump_init() before the first
+ * call.  'dpif' will update 'dump' in-place to keep its position.  The format
+ * of 'dump' is opaque to the caller.
  *
- * On failure, returns a positive errno value and sets '*flowsp' to NULL and
- * '*np' to 0. */
+ * Dumping flow actions is optional.  To avoid dumping actions initialize
+ * 'flow->actions' to NULL and 'flow->actions_len' to 0.  Otherwise, point
+ * 'flow->actions' to an array of struct nlattr and initialize
+ * 'flow->actions_len' with the number of elements in the array.
+ * dpif_dump_next() will fill in as many actions as will fit into the provided
+ * array.  Regardless of 'flow->actions' or 'flow->actions_len',
+ * dpif_dump_next() will also store the actually required number of elements
+ * into 'flow->actions_len'.
+ */
 int
-dpif_flow_list_all(const struct dpif *dpif,
-                   struct odp_flow **flowsp, size_t *np)
+dpif_dump_next(const struct dpif *dpif, struct dpif_dump *dump,
+               struct odp_flow *flow)
 {
-    struct odp_stats stats;
-    struct odp_flow *flows;
-    size_t n_flows;
     int error;
 
-    *flowsp = NULL;
-    *np = 0;
-
-    error = dpif_get_dp_stats(dpif, &stats);
-    if (error) {
-        return error;
-    }
+    check_rw_odp_flow(flow);
+    error = dpif->dpif_class->flow_dump(dpif, dump, flow);
 
-    flows = xmalloc(sizeof *flows * stats.n_flows);
-    error = dpif_flow_list(dpif, flows, stats.n_flows, &n_flows);
-    if (error) {
-        free(flows);
-        return error;
+    if (error == EOF) {
+        VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif));
+    } else {
+        if (error) {
+            flow->key_len = 0;
+        }
+        if (should_log_flow_message(error)) {
+            log_flow_operation(dpif, "dump flow", error, flow);
+        }
     }
 
-    if (stats.n_flows != n_flows) {
-        VLOG_WARN_RL(&error_rl, "%s: datapath stats reported %"PRIu32" "
-                     "flows but flow listing reported %zu",
-                     dpif_name(dpif), stats.n_flows, n_flows);
-    }
-    *flowsp = flows;
-    *np = n_flows;
-    return 0;
+    return error;
 }
 
 /* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on
diff --git a/lib/dpif.h b/lib/dpif.h
index dfd179b..483fcde 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -79,10 +79,12 @@ int dpif_flow_put(struct dpif *, struct odp_flow_put *);
 int dpif_flow_del(struct dpif *, struct odp_flow *);
 int dpif_flow_get(const struct dpif *, struct odp_flow *);
 int dpif_flow_get_multiple(const struct dpif *, struct odp_flow[], size_t n);
-int dpif_flow_list(const struct dpif *, struct odp_flow[], size_t n,
-                   size_t *n_out);
-int dpif_flow_list_all(const struct dpif *,
-                       struct odp_flow **flowsp, size_t *np);
+
+struct dpif_dump {
+    uint32_t state[4];
+};
+void dpif_dump_init(struct dpif_dump *);
+int dpif_dump_next(const struct dpif *, struct dpif_dump *, struct odp_flow *);
 
 int dpif_execute(struct dpif *, const struct nlattr *actions,
                  size_t actions_len, const struct ofpbuf *);
diff --git a/lib/hmap.c b/lib/hmap.c
index 6b850fd..f4ae786 100644
--- a/lib/hmap.c
+++ b/lib/hmap.c
@@ -218,3 +218,43 @@ hmap_random_node(const struct hmap *hmap)
     }
     return node;
 }
+
+/* Returns the next node in 'hmap' in hash order, or NULL if no nodes remain in
+ * 'hmap'.  Uses '*bucketp' and '*offsetp' to determine where to begin
+ * iteration, and stores new values to pass on the next iteration into them
+ * before returning.
+ *
+ * Before beginning iteration, store 0 into '*bucketp' and '*offsetp'.
+ */
+struct hmap_node *
+hmap_at_position(const struct hmap *hmap,
+                 uint32_t *bucketp, uint32_t *offsetp)
+{
+    size_t offset;
+    size_t b_idx;
+
+    offset = *offsetp;
+    for (b_idx = *bucketp & hmap->mask; b_idx <= hmap->mask; b_idx++) {
+        struct hmap_node *node;
+        size_t n_idx;
+
+        for (n_idx = 0, node = hmap->buckets[b_idx]; node != NULL;
+             n_idx++, node = node->next) {
+            if (n_idx == offset) {
+                if (node->next) {
+                    *bucketp = node->hash;
+                    *offsetp = offset + 1;
+                } else {
+                    *bucketp = node->hash + 1;
+                    *offsetp = 0;
+                }
+                return node;
+            }
+        }
+        offset = 0;
+    }
+
+    *bucketp = 0;
+    *offsetp = 0;
+    return NULL;
+}
diff --git a/lib/hmap.h b/lib/hmap.h
index f6d2827..a987fa4 100644
--- a/lib/hmap.h
+++ b/lib/hmap.h
@@ -157,6 +157,9 @@ static inline struct hmap_node *hmap_first(const struct hmap *);
 static inline struct hmap_node *hmap_next(const struct hmap *,
                                           const struct hmap_node *);
 
+struct hmap_node *hmap_at_position(const struct hmap *,
+                                   uint32_t *bucket, uint32_t *offset);
+
 /* Returns the number of nodes currently in 'hmap'. */
 static inline size_t
 hmap_count(const struct hmap *hmap)
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index e4bc199..f435690 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -4470,36 +4470,29 @@ ofproto_expire(struct ofproto *ofproto)
 static void
 ofproto_update_used(struct ofproto *p)
 {
-    struct odp_flow *flows;
-    size_t n_flows;
-    size_t i;
-    int error;
+    struct dpif_dump dump;
+    struct odp_flow f;
 
-    error = dpif_flow_list_all(p->dpif, &flows, &n_flows);
-    if (error) {
-        return;
-    }
+    memset(&f, 0, sizeof f);
 
-    for (i = 0; i < n_flows; i++) {
-        struct odp_flow *f = &flows[i];
+    dpif_dump_init(&dump);
+    while (!dpif_dump_next(p->dpif, &dump, &f)) {
         struct facet *facet;
         struct flow flow;
 
-        odp_flow_key_to_flow(&f->key, &flow);
+        odp_flow_key_to_flow(&f.key, &flow);
         facet = facet_find(p, &flow);
 
         if (facet && facet->installed) {
-            facet_update_time(p, facet, &f->stats);
-            facet_account(p, facet, f->stats.n_bytes);
+            facet_update_time(p, facet, &f.stats);
+            facet_account(p, facet, f.stats.n_bytes);
         } else {
             /* There's a flow in the datapath that we know nothing about.
              * Delete it. */
             COVERAGE_INC(ofproto_unexpected_rule);
-            dpif_flow_del(p->dpif, f);
+            dpif_flow_del(p->dpif, &f);
         }
-
     }
-    free(flows);
 }
 
 /* Calculates and returns the number of milliseconds of idle time after which
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
index 8f2a2bc..ad8d98a 100644
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -466,28 +466,30 @@ do_dump_dps(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 static void
 do_dump_flows(int argc OVS_UNUSED, char *argv[])
 {
-    struct odp_flow *flows;
+    struct dpif_dump dump;
     struct dpif *dpif;
-    size_t n_flows;
     struct ds ds;
-    size_t i;
 
     run(parsed_dpif_open(argv[1], false, &dpif), "opening datapath");
-    run(dpif_flow_list_all(dpif, &flows, &n_flows), "listing all flows");
 
     ds_init(&ds);
-    for (i = 0; i < n_flows; i++) {
-        struct odp_flow *f = &flows[i];
+    dpif_dump_init(&dump);
+    for (;;) {
         enum { MAX_ACTIONS = 4096 }; /* An arbitrary but large number. */
         struct nlattr actions[MAX_ACTIONS];
+        struct odp_flow f;
+
+        memset(&f, 0, sizeof f);
+        f.actions = actions;
+        f.actions_len = sizeof actions;
 
-        f->actions = actions;
-        f->actions_len = sizeof actions;
-        if (!dpif_flow_get(dpif, f)) {
-            ds_clear(&ds);
-            format_odp_flow(&ds, f);
-            printf("%s\n", ds_cstr(&ds));
+        if (dpif_dump_next(dpif, &dump, &f)) {
+            break;
         }
+
+        ds_clear(&ds);
+        format_odp_flow(&ds, &f);
+        printf("%s\n", ds_cstr(&ds));
     }
     ds_destroy(&ds);
     dpif_close(dpif);
-- 
1.7.1





More information about the dev mailing list