[ovs-dev] [PATCH] datapath: Don't query time for every packet.

Jesse Gross jesse at nicira.com
Fri Jul 23 21:02:54 UTC 2010


On Fri, Jul 23, 2010 at 11:26 AM, Ben Pfaff <blp at nicira.com> wrote:

> On Thu, Jul 22, 2010 at 05:13:50PM -0700, Jesse Gross wrote:
> > On Thu, Jul 22, 2010 at 1:45 PM, Ben Pfaff <blp at nicira.com> wrote:
> > > OK, I see.  We're converting between two scales based on a fixed point
> > > and a conversion factor, like converting between Fahrenheit and Celsius
> > > by knowing that 32 F = 0 C and that there are 1.8 degF per degC.  But
> > > instead of using a *fixed* fixed point, like 32 F = 0 C, we're using a
> > > *variable* fixed point.  It's like, every time you want to convert
> > > between temperature scales, walking over to the thermometer and reading
> > > off that it's currently 72 degF and 22 degC and writing out a new
> > > equation based on those facts.
> > >
> > > So: can we used a *fixed* fixed point somehow?  It would be cheaper
> than
> > > obtaining the current time for every conversion, and it might be more
> > > accurate.
> > >
> >
> > I can think of a couple of ways that to do this but they assume that the
> > relationship between jiffies and time is fixed.  I haven't been able to
> > convince myself that this is true, especially across different kernel
> > versions and architectures.  It's probably fine most of the time but
> events
> > like suspend/resume and lost interrupts seem likely to cause problems.
>  In
> > any case, the two definitely come from different sources, so there is a
> > potential for drift.  We can still run into these types of problems with
> > this implementation but at least they will be limited to the duration of
> a
> > flow, rather than potentially accumulating over time.
> >
> > Do you know of something that would guarantee that the relationship is
> > fixed?
>
> I don't.  You are probably right.
>
> The only case where it really bothers me that we call this is where
> we're dumping a number of flows in a single system call.  Could we at
> least amortize obtaining the conversion factor over all the flows that
> we grab in a single system call?
>

That's reasonable.  What do you think of this?

commit ace5cd5b0ede62919b598133689db89764018013
Author: Jesse Gross <jesse at nicira.com>
Date:   Thu Jul 15 19:22:07 2010 -0700

    datapath: Don't query time for every packet.

    Rather than actually query the time every time a packet comes through,
    just store the current jiffies and convert it to actual time when
    requested.  GRE is the primary beneficiary of this because the traffic
    travels through the datapath twice.  This change reduces CPU utilization
    3-4% with GRE.

diff --git a/datapath/datapath.c b/datapath/datapath.c
index eb260e3..3785df0 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -903,15 +903,37 @@ error:
  return ERR_PTR(error);
 }

-static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats)
+static void get_time_offset(struct timespec *offset)
 {
- if (flow->used.tv_sec) {
- stats->used_sec = flow->used.tv_sec;
- stats->used_nsec = flow->used.tv_nsec;
+ struct timespec now_mono, now_jiffies;
+
+ ktime_get_ts(&now_mono);
+ jiffies_to_timespec(jiffies, &now_jiffies);
+ *offset = timespec_sub(now_mono, now_jiffies);
+}
+
+static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats,
+      struct timespec *time_offset)
+{
+ if (flow->used) {
+ struct timespec flow_ts, used, local_offset;
+
+ if (!time_offset) {
+ time_offset = &local_offset;
+ get_time_offset(time_offset);
+ }
+
+ jiffies_to_timespec(flow->used, &flow_ts);
+ set_normalized_timespec(&used, flow_ts.tv_sec + time_offset->tv_sec,
+ flow_ts.tv_nsec + time_offset->tv_nsec);
+
+ stats->used_sec = used.tv_sec;
+ stats->used_nsec = used.tv_nsec;
  } else {
  stats->used_sec = 0;
  stats->used_nsec = 0;
  }
+
  stats->n_packets = flow->packet_count;
  stats->n_bytes = flow->byte_count;
  stats->ip_tos = flow->ip_tos;
@@ -921,7 +943,7 @@ static void get_stats(struct sw_flow *flow, struct
odp_flow_stats *stats)

 static void clear_stats(struct sw_flow *flow)
 {
- flow->used.tv_sec = flow->used.tv_nsec = 0;
+ flow->used = 0;
  flow->tcp_flags = 0;
  flow->ip_tos = 0;
  flow->packet_count = 0;
@@ -1021,7 +1043,7 @@ static int do_put_flow(struct datapath *dp, struct
odp_flow_put *uf,

  /* Fetch stats, then clear them if necessary. */
  spin_lock_bh(&flow->lock);
- get_stats(flow, stats);
+ get_stats(flow, stats, NULL);
  if (uf->flags & ODPPF_ZERO_STATS)
  clear_stats(flow);
  spin_unlock_bh(&flow->lock);
@@ -1058,6 +1080,7 @@ static int put_flow(struct datapath *dp, struct
odp_flow_put __user *ufp)
 }

 static int do_answer_query(struct sw_flow *flow, u32 query_flags,
+   struct timespec *time_offset,
    struct odp_flow_stats __user *ustats,
    union odp_action __user *actions,
    u32 __user *n_actionsp)
@@ -1067,7 +1090,7 @@ static int do_answer_query(struct sw_flow *flow, u32
query_flags,
  u32 n_actions;

  spin_lock_bh(&flow->lock);
- get_stats(flow, &stats);
+ get_stats(flow, &stats, time_offset);
  if (query_flags & ODPFF_ZERO_TCP_FLAGS)
  flow->tcp_flags = 0;

@@ -1091,6 +1114,7 @@ static int do_answer_query(struct sw_flow *flow, u32
query_flags,
 }

 static int answer_query(struct sw_flow *flow, u32 query_flags,
+ struct timespec *time_offset,
  struct odp_flow __user *ufp)
 {
  union odp_action *actions;
@@ -1098,7 +1122,7 @@ static int answer_query(struct sw_flow *flow, u32
query_flags,
  if (get_user(actions, &ufp->actions))
  return -EFAULT;

- return do_answer_query(flow, query_flags,
+ return do_answer_query(flow, query_flags, time_offset,
        &ufp->stats, actions, &ufp->n_actions);
 }

@@ -1137,7 +1161,7 @@ static int del_flow(struct datapath *dp, struct
odp_flow __user *ufp)
  if (IS_ERR(flow))
  return PTR_ERR(flow);

- error = answer_query(flow, 0, ufp);
+ error = answer_query(flow, 0, NULL, ufp);
  flow_deferred_free(flow);
  return error;
 }
@@ -1145,8 +1169,11 @@ static int del_flow(struct datapath *dp, struct
odp_flow __user *ufp)
 static int do_query_flows(struct datapath *dp, const struct odp_flowvec
*flowvec)
 {
  struct tbl *table = rcu_dereference(dp->table);
+ struct timespec time_offset;
  u32 i;

+ get_time_offset(&time_offset);
+
  for (i = 0; i < flowvec->n_flows; i++) {
  struct odp_flow __user *ufp = &flowvec->flows[i];
  struct odp_flow uf;
@@ -1161,7 +1188,7 @@ static int do_query_flows(struct datapath *dp, const
struct odp_flowvec *flowvec
  if (!flow_node)
  error = put_user(ENOENT, &ufp->stats.error);
  else
- error = answer_query(flow_cast(flow_node), uf.flags, ufp);
+ error = answer_query(flow_cast(flow_node), uf.flags, &time_offset, ufp);
  if (error)
  return -EFAULT;
  }
@@ -1172,6 +1199,7 @@ struct list_flows_cbdata {
  struct odp_flow __user *uflows;
  u32 n_flows;
  u32 listed_flows;
+ struct timespec *time_offset;
 };

 static int list_flow(struct tbl_node *node, void *cbdata_)
@@ -1183,7 +1211,7 @@ static int list_flow(struct tbl_node *node, void
*cbdata_)

  if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
  return -EFAULT;
- error = answer_query(flow, 0, ufp);
+ error = answer_query(flow, 0, cbdata->time_offset, ufp);
  if (error)
  return error;

@@ -1195,14 +1223,19 @@ static int list_flow(struct tbl_node *node, void
*cbdata_)
 static int do_list_flows(struct datapath *dp, const struct odp_flowvec
*flowvec)
 {
  struct list_flows_cbdata cbdata;
+ struct timespec time_offset;
  int error;

  if (!flowvec->n_flows)
  return 0;

+ get_time_offset(&time_offset);
+
  cbdata.uflows = flowvec->flows;
  cbdata.n_flows = flowvec->n_flows;
  cbdata.listed_flows = 0;
+ cbdata.time_offset = &time_offset;
+
  error = tbl_foreach(rcu_dereference(dp->table), list_flow, &cbdata);
  return error ? error : cbdata.listed_flows;
 }
@@ -1811,6 +1844,7 @@ static int compat_put_flow(struct datapath *dp, struct
compat_odp_flow_put __use
 }

 static int compat_answer_query(struct sw_flow *flow, u32 query_flags,
+       struct timespec *time_offset,
        struct compat_odp_flow __user *ufp)
 {
  compat_uptr_t actions;
@@ -1818,7 +1852,7 @@ static int compat_answer_query(struct sw_flow *flow,
u32 query_flags,
  if (get_user(actions, &ufp->actions))
  return -EFAULT;

- return do_answer_query(flow, query_flags, &ufp->stats,
+ return do_answer_query(flow, query_flags, time_offset, &ufp->stats,
        compat_ptr(actions), &ufp->n_actions);
 }

@@ -1835,7 +1869,7 @@ static int compat_del_flow(struct datapath *dp, struct
compat_odp_flow __user *u
  if (IS_ERR(flow))
  return PTR_ERR(flow);

- error = compat_answer_query(flow, 0, ufp);
+ error = compat_answer_query(flow, 0, NULL, ufp);
  flow_deferred_free(flow);
  return error;
 }
@@ -1843,8 +1877,11 @@ static int compat_del_flow(struct datapath *dp,
struct compat_odp_flow __user *u
 static int compat_query_flows(struct datapath *dp, struct compat_odp_flow
*flows, u32 n_flows)
 {
  struct tbl *table = rcu_dereference(dp->table);
+ struct timespec time_offset;
  u32 i;

+ get_time_offset(&time_offset);
+
  for (i = 0; i < n_flows; i++) {
  struct compat_odp_flow __user *ufp = &flows[i];
  struct odp_flow uf;
@@ -1859,7 +1896,7 @@ static int compat_query_flows(struct datapath *dp,
struct compat_odp_flow *flows
  if (!flow_node)
  error = put_user(ENOENT, &ufp->stats.error);
  else
- error = compat_answer_query(flow_cast(flow_node), uf.flags, ufp);
+ error = compat_answer_query(flow_cast(flow_node), uf.flags, &time_offset,
ufp);
  if (error)
  return -EFAULT;
  }
@@ -1870,6 +1907,7 @@ struct compat_list_flows_cbdata {
  struct compat_odp_flow __user *uflows;
  u32 n_flows;
  u32 listed_flows;
+ struct timespec *time_offset;
 };

 static int compat_list_flow(struct tbl_node *node, void *cbdata_)
@@ -1881,7 +1919,7 @@ static int compat_list_flow(struct tbl_node *node,
void *cbdata_)

  if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
  return -EFAULT;
- error = compat_answer_query(flow, 0, ufp);
+ error = compat_answer_query(flow, 0, cbdata->time_offset, ufp);
  if (error)
  return error;

@@ -1893,14 +1931,19 @@ static int compat_list_flow(struct tbl_node *node,
void *cbdata_)
 static int compat_list_flows(struct datapath *dp, struct compat_odp_flow
*flows, u32 n_flows)
 {
  struct compat_list_flows_cbdata cbdata;
+ struct timespec time_offset;
  int error;

  if (!n_flows)
  return 0;

+ get_time_offset(&time_offset);
+
  cbdata.uflows = flows;
  cbdata.n_flows = n_flows;
  cbdata.listed_flows = 0;
+ cbdata.time_offset = &time_offset;
+
  error = tbl_foreach(rcu_dereference(dp->table), compat_list_flow,
&cbdata);
  return error ? error : cbdata.listed_flows;
 }
diff --git a/datapath/flow.c b/datapath/flow.c
index bf50e94..95cc042 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -111,7 +111,7 @@ void flow_used(struct sw_flow *flow, struct sk_buff
*skb)
  }

  spin_lock_bh(&flow->lock);
- ktime_get_ts(&flow->used);
+ flow->used = jiffies;
  flow->packet_count++;
  flow->byte_count += skb->len;
  flow->tcp_flags |= tcp_flags;
diff --git a/datapath/flow.h b/datapath/flow.h
index 9704489..29a1ac4 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/rcupdate.h>
 #include <linux/gfp.h>
+#include <linux/jiffies.h>
 #include <linux/time.h>

 #include "openvswitch/datapath-protocol.h"
@@ -34,11 +35,10 @@ struct sw_flow {
  struct odp_flow_key key;
  struct sw_flow_actions *sf_acts;

- struct timespec used; /* Last used time. */
-
  u8 ip_tos; /* IP TOS value. */

  spinlock_t lock; /* Lock for values below. */
+ unsigned long used; /* Last used time (in jiffies). */
  u64 packet_count; /* Number of packets matched. */
  u64 byte_count; /* Number of bytes matched. */
  u8 tcp_flags; /* Union of seen TCP flags. */
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20100723/0c9c53ce/attachment-0003.html>


More information about the dev mailing list