[ovs-dev] [netlink v3 05/16] datapath: Change listing flows to use an iterator concept.

Ben Pfaff blp at nicira.com
Thu Dec 30 00:56:26 UTC 2010


One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 datapath/datapath.c                     |  140 +++++++++++++------------------
 datapath/odp-compat.h                   |    7 ++-
 datapath/table.c                        |   47 ++++++++++
 datapath/table.h                        |    1 +
 include/openvswitch/datapath-protocol.h |   18 ++++-
 lib/dpif-linux.c                        |   30 +++++--
 lib/dpif-netdev.c                       |   48 ++++++++---
 lib/dpif-provider.h                     |   33 ++++++--
 lib/dpif.c                              |  123 ++++++++++++---------------
 lib/dpif.h                              |   13 ++-
 ofproto/ofproto.c                       |   26 ++----
 utilities/ovs-dpctl.c                   |   27 ++++---
 12 files changed, 303 insertions(+), 210 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 8e76af2..a3b24e3 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -1043,48 +1043,6 @@ static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec
 	return flowvec->n_flows;
 }
 
-struct list_flows_cbdata {
-	struct datapath *dp;
-	struct odp_flow __user *uflows;
-	u32 n_flows;
-	u32 listed_flows;
-};
-
-static int list_flow(struct tbl_node *node, void *cbdata_)
-{
-	struct sw_flow *flow = flow_cast(node);
-	struct list_flows_cbdata *cbdata = cbdata_;
-	struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
-	int error;
-
-	if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
-		return -EFAULT;
-	error = answer_query(cbdata->dp, flow, 0, ufp);
-	if (error)
-		return error;
-
-	if (cbdata->listed_flows >= cbdata->n_flows)
-		return cbdata->listed_flows;
-	return 0;
-}
-
-static int do_list_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
-{
-	struct list_flows_cbdata cbdata;
-	int error;
-
-	if (!flowvec->n_flows)
-		return 0;
-
-	cbdata.dp = dp;
-	cbdata.uflows = (struct odp_flow __user __force*)flowvec->flows;
-	cbdata.n_flows = flowvec->n_flows;
-	cbdata.listed_flows = 0;
-
-	error = tbl_foreach(get_table_protected(dp), list_flow, &cbdata);
-	return error ? error : cbdata.listed_flows;
-}
-
 static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
 			    int (*function)(struct datapath *,
 					    const struct odp_flowvec *))
@@ -1106,6 +1064,44 @@ static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
 		: put_user(retval, &uflowvec->n_flows));
 }
 
+static struct sw_flow *do_dump_flow(struct datapath *dp, u32 __user *state)
+{
+	struct tbl *table = get_table_protected(dp);
+	struct tbl_node *tbl_node;
+	u32 bucket, obj;
+
+	if (get_user(bucket, &state[0]) || get_user(obj, &state[1]))
+		return ERR_PTR(-EFAULT);
+
+	tbl_node = tbl_next(table, &bucket, &obj);
+
+	if (put_user(bucket, &state[0]) || put_user(obj, &state[1]))
+		return ERR_PTR(-EFAULT);
+
+	return tbl_node ? flow_cast(tbl_node) : NULL;
+}
+
+static int dump_flow(struct datapath *dp, struct odp_flow_dump __user *udumpp)
+{
+	struct odp_flow __user *uflowp;
+	struct sw_flow *flow;
+
+	flow = do_dump_flow(dp, udumpp->state);
+	if (IS_ERR(flow))
+		return PTR_ERR(flow);
+
+	if (get_user(uflowp, (struct odp_flow __user *__user*)&udumpp->flow))
+		return -EFAULT;
+
+	if (!flow)
+		return put_user(ODPFF_EOF, &uflowp->flags);
+
+	if (copy_to_user(&uflowp->key, &flow->key, sizeof(struct odp_flow_key)) ||
+	    put_user(0, &uflowp->flags))
+		return -EFAULT;
+	return answer_query(dp, flow, 0, uflowp);
+}
+
 static int do_execute(struct datapath *dp, const struct odp_execute *execute)
 {
 	struct odp_flow_key key;
@@ -1483,8 +1479,8 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
 		err = do_flowvec_ioctl(dp, argp, do_query_flows);
 		break;
 
-	case ODP_FLOW_LIST:
-		err = do_flowvec_ioctl(dp, argp, do_list_flows);
+	case ODP_FLOW_DUMP:
+		err = dump_flow(dp, (struct odp_flow_dump __user *)argp);
 		break;
 
 	case ODP_EXECUTE:
@@ -1622,47 +1618,27 @@ static int compat_query_flows(struct datapath *dp,
 	return n_flows;
 }
 
-struct compat_list_flows_cbdata {
-	struct datapath *dp;
-	struct compat_odp_flow __user *uflows;
-	u32 n_flows;
-	u32 listed_flows;
-};
-
-static int compat_list_flow(struct tbl_node *node, void *cbdata_)
+static int compat_dump_flow(struct datapath *dp, struct compat_odp_flow_dump __user *udumpp)
 {
-	struct sw_flow *flow = flow_cast(node);
-	struct compat_list_flows_cbdata *cbdata = cbdata_;
-	struct compat_odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
-	int error;
-
-	if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
-		return -EFAULT;
-	error = compat_answer_query(cbdata->dp, flow, 0, ufp);
-	if (error)
-		return error;
-
-	if (cbdata->listed_flows >= cbdata->n_flows)
-		return cbdata->listed_flows;
-	return 0;
-}
+	struct compat_odp_flow __user *uflowp;
+	compat_uptr_t compat_ufp;
+	struct sw_flow *flow;
 
-static int compat_list_flows(struct datapath *dp,
-			     struct compat_odp_flow __user *flows, u32 n_flows)
-{
-	struct compat_list_flows_cbdata cbdata;
-	int error;
+	flow = do_dump_flow(dp, udumpp->state);
+	if (IS_ERR(flow))
+		return PTR_ERR(flow);
 
-	if (!n_flows)
-		return 0;
+	if (get_user(compat_ufp, &udumpp->flow))
+		return -EFAULT;
+	uflowp = compat_ptr(compat_ufp);
 
-	cbdata.dp = dp;
-	cbdata.uflows = flows;
-	cbdata.n_flows = n_flows;
-	cbdata.listed_flows = 0;
+	if (!flow)
+		return put_user(ODPFF_EOF, &uflowp->flags);
 
-	error = tbl_foreach(get_table_protected(dp), compat_list_flow, &cbdata);
-	return error ? error : cbdata.listed_flows;
+	if (copy_to_user(&uflowp->key, &flow->key, sizeof(struct odp_flow_key)) ||
+	    put_user(0, &uflowp->flags))
+		return -EFAULT;
+	return compat_answer_query(dp, flow, 0, uflowp);
 }
 
 static int compat_flowvec_ioctl(struct datapath *dp, unsigned long argp,
@@ -1769,8 +1745,8 @@ static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned
 		err = compat_flowvec_ioctl(dp, argp, compat_query_flows);
 		break;
 
-	case ODP_FLOW_LIST32:
-		err = compat_flowvec_ioctl(dp, argp, compat_list_flows);
+	case ODP_FLOW_DUMP32:
+		err = compat_dump_flow(dp, compat_ptr(argp));
 		break;
 
 	case ODP_EXECUTE32:
diff --git a/datapath/odp-compat.h b/datapath/odp-compat.h
index b03539a..ca2f170 100644
--- a/datapath/odp-compat.h
+++ b/datapath/odp-compat.h
@@ -18,7 +18,7 @@
 #define ODP_VPORT_LIST32	_IOWR('O', 10, struct compat_odp_portvec)
 #define ODP_FLOW_GET32		_IOWR('O', 13, struct compat_odp_flowvec)
 #define ODP_FLOW_PUT32		_IOWR('O', 14, struct compat_odp_flow)
-#define ODP_FLOW_LIST32		_IOWR('O', 15, struct compat_odp_flowvec)
+#define ODP_FLOW_DUMP32		_IOWR('O', 15, struct compat_odp_flow_dump)
 #define ODP_FLOW_DEL32		_IOWR('O', 17, struct compat_odp_flow)
 #define ODP_EXECUTE32		_IOR('O', 18, struct compat_odp_execute)
 #define ODP_FLOW_DEL32		_IOWR('O', 17, struct compat_odp_flow)
@@ -41,6 +41,11 @@ struct compat_odp_flow_put {
 	u32 flags;
 };
 
+struct compat_odp_flow_dump {
+	compat_uptr_t flow;
+	uint32_t state[2];
+};
+
 struct compat_odp_flowvec {
 	compat_uptr_t flows;
 	u32 n_flows;
diff --git a/datapath/table.c b/datapath/table.c
index 5c1b82a..f4e7f5b 100644
--- a/datapath/table.c
+++ b/datapath/table.c
@@ -251,6 +251,53 @@ int tbl_foreach(struct tbl *table,
 	return 0;
 }
 
+/**
+ * tbl_next - find next node in hash table
+ * @table: table to iterate
+ * @bucketp: On entry, hash value of bucket to start from.  On exit, updated
+ * to bucket to start from on next call.
+ * @objp: On entry, index to start from within first bucket.  On exit, updated
+ * to index to start from on next call.
+ *
+ * Returns the next node in @table in hash order, or %NULL when no nodes remain
+ * in the hash table.
+ *
+ * On entry, uses the values that @bucketp and @objp reference to determine
+ * where to begin iteration.  Use 0 for both values to begin a new iteration.
+ * On exit, stores the values to pass on the next iteration into @bucketp and
+ * @objp's referents.
+ */
+struct tbl_node *tbl_next(struct tbl *table, u32 *bucketp, u32 *objp)
+{
+	unsigned int n_l1 = table->n_buckets >> TBL_L1_SHIFT;
+	u32 s_l1_idx = *bucketp >> TBL_L1_SHIFT;
+	u32 s_l2_idx = *bucketp & (TBL_L2_SIZE - 1);
+	u32 s_obj = *objp;
+	unsigned int l1_idx;
+
+	for (l1_idx = s_l1_idx; l1_idx < n_l1; l1_idx++) {
+		struct tbl_bucket __rcu **l2 = table->buckets[l1_idx];
+		unsigned int l2_idx;
+
+		for (l2_idx = s_l2_idx; l2_idx < TBL_L2_SIZE; l2_idx++) {
+			struct tbl_bucket *bucket;
+
+			bucket = rcu_dereference(l2[l2_idx]);
+			if (bucket && s_obj < bucket->n_objs) {
+				*bucketp = (l1_idx << TBL_L1_SHIFT) + l2_idx;
+				*objp = s_obj + 1;
+				return bucket->objs[s_obj];
+			}
+
+			s_obj = 0;
+		}
+		s_l2_idx = 0;
+	}
+	*bucketp = 0;
+	*objp = 0;
+	return NULL;
+}
+
 static int insert_table_flow(struct tbl_node *node, void *new_table_)
 {
 	struct tbl *new_table = new_table_;
diff --git a/datapath/table.h b/datapath/table.h
index f186600..22574be 100644
--- a/datapath/table.h
+++ b/datapath/table.h
@@ -62,6 +62,7 @@ int tbl_remove(struct tbl *, struct tbl_node *);
 unsigned int tbl_count(struct tbl *);
 int tbl_foreach(struct tbl *,
 		int (*callback)(struct tbl_node *, void *aux), void *aux);
+struct tbl_node *tbl_next(struct tbl *, u32 *bucketp, u32 *objp);
 
 int tbl_n_buckets(struct tbl *);
 struct tbl *tbl_expand(struct tbl *);
diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h
index 6bab4bc..730aa22 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -89,7 +89,7 @@
 
 #define ODP_FLOW_GET            _IOWR('O', 13, struct odp_flowvec)
 #define ODP_FLOW_PUT            _IOWR('O', 14, struct odp_flow)
-#define ODP_FLOW_LIST           _IOWR('O', 15, struct odp_flowvec)
+#define ODP_FLOW_DUMP           _IOWR('O', 15, struct odp_flow_dump)
 #define ODP_FLOW_FLUSH          _IO('O', 16)
 #define ODP_FLOW_DEL            _IOWR('O', 17, struct odp_flow)
 
@@ -237,6 +237,7 @@ struct odp_flow_key {
 
 /* Flags for ODP_FLOW. */
 #define ODPFF_ZERO_TCP_FLAGS (1 << 0) /* Zero the TCP flags. */
+#define ODPFF_EOF            (1 << 1) /* ODP_FLOW_DUMP: end of flow table. */
 
 struct odp_flow {
     struct odp_flow_stats stats;
@@ -262,6 +263,21 @@ struct odp_flowvec {
     uint32_t n_flows;
 };
 
+/* ODP_FLOW_DUMP argument.
+ *
+ * This is used to iterate through the flow table flow-by-flow.  Each
+ * ODP_FLOW_DUMP call either stores a new odp_flow into 'flow' or stores
+ * ODPFF_EOF into flow->flags to indicate that the end of the table has been
+ * reaches, and updates 'state' in-place.
+ *
+ * Before the first call, zero 'state'.  The format of 'state' is otherwise
+ * unspecified.
+ */
+struct odp_flow_dump {
+	struct odp_flow *flow;
+	uint32_t state[2];
+};
+
 /* Action types. */
 enum odp_action_type {
     ODPAT_UNSPEC,
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 66610cf..6664145 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -393,15 +393,29 @@ dpif_linux_flow_del(struct dpif *dpif_, struct odp_flow *flow)
 }
 
 static int
-dpif_linux_flow_list(const struct dpif *dpif_, struct odp_flow flows[], int n)
+dpif_linux_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
 {
-    struct odp_flowvec fv;
+    *statep = xzalloc(sizeof(struct odp_flow_dump));
+    return 0;
+}
+
+static int
+dpif_linux_flow_dump_next(const struct dpif *dpif, void *state,
+                          struct odp_flow *flow)
+{
+    struct odp_flow_dump *dump = state;
     int error;
 
-    fv.flows = flows;
-    fv.n_flows = n;
-    error = do_ioctl(dpif_, ODP_FLOW_LIST, &fv);
-    return error ? -error : fv.n_flows;
+    dump->flow = flow;
+    error = do_ioctl(dpif, ODP_FLOW_DUMP, dump);
+    return error ? error : flow->flags & ODPFF_EOF ? EOF : 0;
+}
+
+static int
+dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state)
+{
+    free(state);
+    return 0;
 }
 
 static int
@@ -528,7 +542,9 @@ const struct dpif_class dpif_linux_class = {
     dpif_linux_flow_put,
     dpif_linux_flow_del,
     dpif_linux_flow_flush,
-    dpif_linux_flow_list,
+    dpif_linux_flow_dump_start,
+    dpif_linux_flow_dump_next,
+    dpif_linux_flow_dump_done,
     dpif_linux_execute,
     dpif_linux_recv_get_mask,
     dpif_linux_recv_set_mask,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a52accc..8b5df36 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -779,24 +779,44 @@ dpif_netdev_flow_del(struct dpif *dpif, struct odp_flow *odp_flow)
     }
 }
 
+struct dp_netdev_flow_state {
+    uint32_t bucket;
+    uint32_t offset;
+};
+
 static int
-dpif_netdev_flow_list(const struct dpif *dpif, struct odp_flow flows[], int n)
+dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
 {
+    *statep = xzalloc(sizeof(struct dp_netdev_flow_state));
+    return 0;
+}
+
+static int
+dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_,
+                           struct odp_flow *odp_flow)
+{
+    struct dp_netdev_flow_state *state = state_;
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_flow *flow;
-    int i;
-
-    i = 0;
-    HMAP_FOR_EACH (flow, node, &dp->flow_table) {
-        if (i >= n) {
-            break;
-        }
+    struct hmap_node *node;
 
-        odp_flow_key_from_flow(&flows[i].key, &flow->key);
-        answer_flow_query(flow, 0, &flows[i]);
-        i++;
+    node = hmap_at_position(&dp->flow_table, &state->bucket, &state->offset);
+    if (!node) {
+        return EOF;
     }
-    return hmap_count(&dp->flow_table);
+
+    flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
+    odp_flow_key_from_flow(&odp_flow->key, &flow->key);
+    answer_flow_query(flow, 0, odp_flow);
+
+    return 0;
+}
+
+static int
+dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state)
+{
+    free(state);
+    return 0;
 }
 
 static int
@@ -1276,7 +1296,9 @@ const struct dpif_class dpif_netdev_class = {
     dpif_netdev_flow_put,
     dpif_netdev_flow_del,
     dpif_netdev_flow_flush,
-    dpif_netdev_flow_list,
+    dpif_netdev_flow_dump_start,
+    dpif_netdev_flow_dump_next,
+    dpif_netdev_flow_dump_done,
     dpif_netdev_execute,
     dpif_netdev_recv_get_mask,
     dpif_netdev_recv_set_mask,
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index deb3bf2..6b66a5d 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -240,12 +240,33 @@ struct dpif_class {
      * packets. */
     int (*flow_flush)(struct dpif *dpif);
 
-    /* Stores up to 'n' flows in 'dpif' into 'flows', updating their statistics
-     * and actions as described under the flow_get member function.  If
-     * successful, returns the number of flows actually present in 'dpif',
-     * which might be greater than the number stored (if 'dpif' has more than
-     * 'n' flows).  On failure, returns a negative errno value. */
-    int (*flow_list)(const struct dpif *dpif, struct odp_flow flows[], int n);
+    /* Attempts to begin dumping the flows in a dpif.  On success, returns 0
+     * and initializes '*statep' with any data needed for iteration.  On
+     * failure, returns a positive errno value. */
+    int (*flow_dump_start)(const struct dpif *dpif, void **statep);
+
+    /* Attempts to retrieve another flow from 'dpif' for 'state', which was
+     * initialized by a successful call to the 'flow_dump_start' function for
+     * 'dpif'.  On success, stores a new odp_flow into 'flow' and returns 0.
+     * Returns EOF if the end of the flow table has been reached, or a positive
+     * errno value on error.  This function will not be called again once it
+     * returns nonzero once for a given iteration (but the 'flow_dump_done'
+     * function will be called afterward).
+     *
+     * Dumping flow actions is optional.  If the caller does not want to dump
+     * actions it will initialize 'flow->actions' to NULL and
+     * 'flow->actions_len' to 0.  Otherwise, 'flow->actions' points to an array
+     * of struct nlattr and 'flow->actions_len' contains the number of bytes of
+     * Netlink attributes.  The implemention should fill in as many actions as
+     * will fit into the provided array and update 'flow->actions_len' with the
+     * number of bytes required (regardless of whether they fit in the provided
+     * space). */
+    int (*flow_dump_next)(const struct dpif *dpif, void *state,
+                          struct odp_flow *flow);
+
+    /* Releases resources from 'dpif' for 'state', which was initialized by a
+     * successful call to the 'flow_dump_start' function for 'dpif'.  */
+    int (*flow_dump_done)(const struct dpif *dpif, void *state);
 
     /* Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet
      * frame specified in 'packet'. */
diff --git a/lib/dpif.c b/lib/dpif.c
index a0f638a..1bf6e41 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -792,85 +792,72 @@ dpif_flow_del(struct dpif *dpif, struct odp_flow *flow)
     return error;
 }
 
-/* Stores up to 'n' flows in 'dpif' into 'flows', including their statistics
- * but not including any information about their actions.  If successful,
- * returns 0 and sets '*n_out' to the number of flows actually present in
- * 'dpif', which might be greater than the number stored (if 'dpif' has more
- * than 'n' flows).  On failure, returns a negative errno value and sets
- * '*n_out' to 0. */
-int
-dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n,
-               size_t *n_out)
+/* Initializes 'dump' to begin dumping the flows in a dpif.
+ *
+ * This function provides no status indication.  An error status for the entire
+ * dump operation is provided when it is completed by calling
+ * dpif_flow_dump_done().
+ */
+void
+dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif)
 {
-    uint32_t i;
-    int retval;
-
-    COVERAGE_INC(dpif_flow_query_list);
-    if (RUNNING_ON_VALGRIND) {
-        memset(flows, 0, n * sizeof *flows);
-    } else {
-        for (i = 0; i < n; i++) {
-            flows[i].actions = NULL;
-            flows[i].actions_len = 0;
-        }
-    }
-    retval = dpif->dpif_class->flow_list(dpif, flows, n);
-    if (retval < 0) {
-        *n_out = 0;
-        VLOG_WARN_RL(&error_rl, "%s: flow list failed (%s)",
-                     dpif_name(dpif), strerror(-retval));
-        return -retval;
-    } else {
-        COVERAGE_ADD(dpif_flow_query_list_n, retval);
-        *n_out = MIN(n, retval);
-        VLOG_DBG_RL(&dpmsg_rl, "%s: listed %zu flows (of %d)",
-                    dpif_name(dpif), *n_out, retval);
-        return 0;
-    }
+    dump->dpif = dpif;
+    dump->error = dpif->dpif_class->flow_dump_start(dpif, &dump->state);
+    log_operation(dpif, "flow_dump_start", dump->error);
 }
 
-/* Retrieves all of the flows in 'dpif'.
- *
- * If successful, returns 0 and stores in '*flowsp' a pointer to a newly
- * allocated array of flows, including their statistics but not including any
- * information about their actions, and sets '*np' to the number of flows in
- * '*flowsp'.  The caller is responsible for freeing '*flowsp' by calling
- * free().
+/* Attempts to retrieve another flow from 'dump', which must have been
+ * initialized with dpif_flow_dump_start().  On success, stores a new odp_flow
+ * into 'flow' and returns true.  Failure might indicate an actual error or
+ * merely the end of the flow table.  An error status for the entire dump
+ * operation is provided when it is completed by calling dpif_flow_dump_done().
  *
- * On failure, returns a positive errno value and sets '*flowsp' to NULL and
- * '*np' to 0. */
-int
-dpif_flow_list_all(const struct dpif *dpif,
-                   struct odp_flow **flowsp, size_t *np)
+ * Dumping flow actions is optional.  To avoid dumping actions initialize
+ * 'flow->actions' to NULL and 'flow->actions_len' to 0.  Otherwise, point
+ * 'flow->actions' to an array of struct nlattr and initialize
+ * 'flow->actions_len' with the number of bytes of Netlink attributes.
+ * dpif_flow_dump_next() will fill in as many actions as will fit into the
+ * provided array and update 'flow->actions_len' with the number of bytes
+ * required (regardless of whether they fit in the provided space). */
+bool
+dpif_flow_dump_next(struct dpif_flow_dump *dump, struct odp_flow *flow)
 {
-    struct odp_stats stats;
-    struct odp_flow *flows;
-    size_t n_flows;
-    int error;
+    const struct dpif *dpif = dump->dpif;
 
-    *flowsp = NULL;
-    *np = 0;
+    check_rw_odp_flow(flow);
 
-    error = dpif_get_dp_stats(dpif, &stats);
-    if (error) {
-        return error;
+    if (dump->error) {
+        return false;
     }
 
-    flows = xmalloc(sizeof *flows * stats.n_flows);
-    error = dpif_flow_list(dpif, flows, stats.n_flows, &n_flows);
-    if (error) {
-        free(flows);
-        return error;
+    dump->error = dpif->dpif_class->flow_dump_next(dpif, dump->state, flow);
+    if (dump->error == EOF) {
+        VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif));
+    } else {
+        if (should_log_flow_message(dump->error)) {
+            log_flow_operation(dpif, "flow_dump_next", dump->error, flow);
+        }
     }
 
-    if (stats.n_flows != n_flows) {
-        VLOG_WARN_RL(&error_rl, "%s: datapath stats reported %"PRIu32" "
-                     "flows but flow listing reported %zu",
-                     dpif_name(dpif), stats.n_flows, n_flows);
+    if (dump->error) {
+        dpif->dpif_class->flow_dump_done(dpif, dump->state);
+        return false;
     }
-    *flowsp = flows;
-    *np = n_flows;
-    return 0;
+    return true;
+}
+
+/* Completes flow table dump operation 'dump', which must have been initialized
+ * with dpif_flow_dump_start().  Returns 0 if the dump operation was
+ * error-free, otherwise a positive errno value describing the problem. */
+int
+dpif_flow_dump_done(struct dpif_flow_dump *dump)
+{
+    const struct dpif *dpif = dump->dpif;
+    if (!dump->error) {
+        dump->error = dpif->dpif_class->flow_dump_done(dpif, dump->state);
+        log_operation(dpif, "flow_dump_done", dump->error);
+    }
+    return dump->error == EOF ? 0 : dump->error;
 }
 
 /* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on
diff --git a/lib/dpif.h b/lib/dpif.h
index dfd179b..0a41b77 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -79,10 +79,15 @@ int dpif_flow_put(struct dpif *, struct odp_flow_put *);
 int dpif_flow_del(struct dpif *, struct odp_flow *);
 int dpif_flow_get(const struct dpif *, struct odp_flow *);
 int dpif_flow_get_multiple(const struct dpif *, struct odp_flow[], size_t n);
-int dpif_flow_list(const struct dpif *, struct odp_flow[], size_t n,
-                   size_t *n_out);
-int dpif_flow_list_all(const struct dpif *,
-                       struct odp_flow **flowsp, size_t *np);
+
+struct dpif_flow_dump {
+    const struct dpif *dpif;
+    int error;
+    void *state;
+};
+void dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *);
+bool dpif_flow_dump_next(struct dpif_flow_dump *, struct odp_flow *);
+int dpif_flow_dump_done(struct dpif_flow_dump *);
 
 int dpif_execute(struct dpif *, const struct nlattr *actions,
                  size_t actions_len, const struct ofpbuf *);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index e4057c2..5829702 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -4470,36 +4470,30 @@ ofproto_expire(struct ofproto *ofproto)
 static void
 ofproto_update_used(struct ofproto *p)
 {
-    struct odp_flow *flows;
-    size_t n_flows;
-    size_t i;
-    int error;
+    struct dpif_flow_dump dump;
+    struct odp_flow f;
 
-    error = dpif_flow_list_all(p->dpif, &flows, &n_flows);
-    if (error) {
-        return;
-    }
+    memset(&f, 0, sizeof f);
 
-    for (i = 0; i < n_flows; i++) {
-        struct odp_flow *f = &flows[i];
+    dpif_flow_dump_start(&dump, p->dpif);
+    while (dpif_flow_dump_next(&dump, &f)) {
         struct facet *facet;
         struct flow flow;
 
-        odp_flow_key_to_flow(&f->key, &flow);
+        odp_flow_key_to_flow(&f.key, &flow);
         facet = facet_find(p, &flow);
 
         if (facet && facet->installed) {
-            facet_update_time(p, facet, &f->stats);
-            facet_account(p, facet, f->stats.n_bytes);
+            facet_update_time(p, facet, &f.stats);
+            facet_account(p, facet, f.stats.n_bytes);
         } else {
             /* There's a flow in the datapath that we know nothing about.
              * Delete it. */
             COVERAGE_INC(ofproto_unexpected_rule);
-            dpif_flow_del(p->dpif, f);
+            dpif_flow_del(p->dpif, &f);
         }
-
     }
-    free(flows);
+    dpif_flow_dump_done(&dump);
 }
 
 /* Calculates and returns the number of milliseconds of idle time after which
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
index ff31fb8..cd46d35 100644
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -462,29 +462,32 @@ do_dump_dps(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 static void
 do_dump_flows(int argc OVS_UNUSED, char *argv[])
 {
-    struct odp_flow *flows;
+    struct dpif_flow_dump dump;
     struct dpif *dpif;
-    size_t n_flows;
     struct ds ds;
-    size_t i;
 
     run(parsed_dpif_open(argv[1], false, &dpif), "opening datapath");
-    run(dpif_flow_list_all(dpif, &flows, &n_flows), "listing all flows");
 
     ds_init(&ds);
-    for (i = 0; i < n_flows; i++) {
-        struct odp_flow *f = &flows[i];
+    dpif_flow_dump_start(&dump, dpif);
+    for (;;) {
         enum { MAX_ACTIONS = 4096 }; /* An arbitrary but large number. */
         struct nlattr actions[MAX_ACTIONS];
+        struct odp_flow f;
+
+        memset(&f, 0, sizeof f);
+        f.actions = actions;
+        f.actions_len = sizeof actions;
 
-        f->actions = actions;
-        f->actions_len = sizeof actions;
-        if (!dpif_flow_get(dpif, f)) {
-            ds_clear(&ds);
-            format_odp_flow(&ds, f);
-            printf("%s\n", ds_cstr(&ds));
+        if (!dpif_flow_dump_next(&dump, &f)) {
+            break;
         }
+
+        ds_clear(&ds);
+        format_odp_flow(&ds, &f);
+        printf("%s\n", ds_cstr(&ds));
     }
+    dpif_flow_dump_done(&dump);
     ds_destroy(&ds);
     dpif_close(dpif);
 }
-- 
1.7.1





More information about the dev mailing list