[ovs-dev] [PATCH 12/19] datapath: Allow inner_flows
Isaku Yamahata
yamahata at valinux.co.jp
Tue Dec 25 07:31:39 UTC 2012
Found one typo.
Minor comment. there are many
eth_type == ETH_P_MPLS_UC || eth_type == ETH_P_MPLS_UC.
So how about introducing helper function like bool eth_p_mpls(eth_type).
On Mon, Dec 24, 2012 at 11:35:18AM +0900, Simon Horman wrote:
> Allow a flow to be visible in two forms, an outer flow and an inner flow.
> This may occur if the actions allow further decoding of a packet to
> provide a more fine-grained match. In this case the first-pass
> coarse-grained match will hit the outer-flow and the second-pass
> fined-grained match will hit the inner-flow.
>
> Inner-flows are not visible to user-space. Rather, they are just an
> internal representation to handle the case of actions allowing further
> packet decoding. An inner-flow is associated with its outer-flow and
> deleted when the outer-flow is deleted. An outer-flow may have more than
> one inner-flow but an inner-flow may only have one outer-flow.
>
> For example:
>
> In the case of MPLS, L3 and L4 information may not initially be decoded
> from the frame as the ethernet type of the frame is an MPLS type and no
> information is known about the type of the inner frame.
>
> However, the type of the inner frame may be provided by an mpls_pop action
> in which case L3 and L4 information may be decoded providing a finer
> grained match than is otherwise possible.
>
> Signed-off-by: Simon Horman <horms at verge.net.au>
> ---
> datapath/datapath.c | 197 +++++++++++++++++++++++++++++++++++++--------------
> datapath/flow.c | 71 ++++++++++++++-----
> datapath/flow.h | 19 ++++-
> 3 files changed, 212 insertions(+), 75 deletions(-)
>
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index 9ed7c91..93a380f 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -341,7 +341,7 @@ void ovs_dp_detach_port(struct vport *p)
> void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
> {
> struct datapath *dp = p->dp;
> - struct sw_flow *flow;
> + struct sw_flow *flow, *outer_flow = NULL;
> struct dp_stats_percpu *stats;
> u64 *stats_counter;
> int error;
> @@ -377,6 +377,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
> kfree_skb(skb);
> return;
> }
> + outer_flow = flow;
> flow = ovs_flow_tbl_lookup(table, &key, key_len);
> }
> }
> @@ -398,6 +399,8 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
> }
>
> stats_counter = &stats->n_hit;
> + if (unlikely(outer_flow))
> + ovs_flow_used(outer_flow, skb);
> ovs_flow_used(OVS_CB(skb)->flow, skb);
> ovs_execute_actions(dp, skb);
>
> @@ -1115,43 +1118,24 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
> return skb;
> }
>
> -static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
> +enum flow_type {
> + flow_inner,
> + flow_outer,
> + flow_singleton,
> +};
> +
> +static int ovs_flow_cmd_new_or_set__(struct sk_buff *skb,
> + struct genl_info *info,
> + struct nlattr **a, struct datapath *dp,
> + struct sw_flow_key key, int key_len,
> + enum flow_type flow_type,
> + struct sw_flow **flowp)
> {
> - struct nlattr **a = info->attrs;
> - struct ovs_header *ovs_header = info->userhdr;
> - struct sw_flow_key key;
> - struct sw_flow *flow;
> - struct sk_buff *reply;
> - struct datapath *dp;
> - struct flow_table *table;
> int error;
> - int key_len;
> -
> - /* Extract key. */
> - error = -EINVAL;
> - if (!a[OVS_FLOW_ATTR_KEY])
> - goto error;
> - error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
> - if (error)
> - goto error;
> -
> - /* Validate actions. */
> - if (a[OVS_FLOW_ATTR_ACTIONS]) {
> - error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0);
> - if (error)
> - goto error;
> - } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
> - error = -EINVAL;
> - goto error;
> - }
> -
> - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
> - error = -ENODEV;
> - if (!dp)
> - goto error;
> + struct sk_buff *reply = NULL;
> + struct flow_table *table = genl_dereference(dp->table);
> + struct sw_flow *flow = ovs_flow_tbl_lookup(table, &key, key_len);
>
> - table = genl_dereference(dp->table);
> - flow = ovs_flow_tbl_lookup(table, &key, key_len);
> if (!flow) {
> struct sw_flow_actions *acts;
>
> @@ -1179,6 +1163,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
> goto error;
> }
> clear_stats(flow);
> + if (flow_type == flow_inner) {
> + flow->flags = SW_FLOW_F_NO_DUMP;
> + *flowp = flow;
> + }
>
> /* Obtain actions. */
> acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
> @@ -1187,16 +1175,24 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
> goto error_free_flow;
> rcu_assign_pointer(flow->sf_acts, acts);
>
> + /* Add inner flow without locking */
> + if (flow_type == flow_outer && *flowp)
> + list_add_rcu(&(*flowp)->inner_flows,
> + &flow->inner_flows);
> +
> /* Put flow in bucket. */
> ovs_flow_tbl_insert(table, flow, &key, key_len);
>
> - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
> - info->snd_seq,
> - OVS_FLOW_CMD_NEW);
> + if (flow_type == flow_inner)
> + reply = ovs_flow_cmd_build_info(flow, dp,
> + info->snd_portid,
> + info->snd_seq,
> + OVS_FLOW_CMD_NEW);
> } else {
> /* We found a matching flow. */
> struct sw_flow_actions *old_acts;
> struct nlattr *acts_attrs;
> + bool may_clear_stats = true;
>
> /* Bail out if we're not allowed to modify an existing flow.
> * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
> @@ -1206,8 +1202,16 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
> */
> error = -EEXIST;
> if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
> - info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
> - goto error;
> + info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) {
> + /* If this is an outer_flow then it may duplicate
> + * the outer_flow of another flow's inner_flow.
> + * Just accept this and more on.
> + */
> + if (flow_type != flow_outer)
> + goto error;
> + may_clear_stats = false;
> + goto update;
> + }
>
> /* Update actions. */
> old_acts = rcu_dereference_protected(flow->sf_acts,
> @@ -1231,14 +1235,23 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
> reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
> info->snd_seq, OVS_FLOW_CMD_NEW);
>
> - /* Clear stats. */
> - if (a[OVS_FLOW_ATTR_CLEAR]) {
> +update:
> + if (a[OVS_FLOW_ATTR_CLEAR] || flow_type == flow_outer) {
> spin_lock_bh(&flow->lock);
> - clear_stats(flow);
> + /* Clear stats. */
> + if (a[OVS_FLOW_ATTR_CLEAR] && may_clear_stats)
> + clear_stats(flow);
> + /* Add inner flow */
> + if (flow_type == flow_outer)
> + list_add_rcu(&(*flowp)->inner_flows,
> + &flow->inner_flows);
> spin_unlock_bh(&flow->lock);
> }
> }
>
> + if (!reply)
> + return 0;
> +
> if (!IS_ERR(reply))
> genl_notify(reply, genl_info_net(info), info->snd_portid,
> ovs_dp_flow_multicast_group.id, info->nlhdr,
> @@ -1254,6 +1267,66 @@ error:
> return error;
> }
>
> +static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
> +{
> + struct nlattr **a = info->attrs;
> + struct ovs_header *ovs_header = info->userhdr;
> + struct sw_flow_key key;
> + struct sw_flow *inner_flow = NULL;
> + struct datapath *dp;
> + int error;
> + int key_len[2];
> +
> + /* Extract key. */
> + error = -EINVAL;
> + if (!a[OVS_FLOW_ATTR_KEY])
> + goto error;
> + error = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]);
> + if (error)
> + goto error;
> +
> + /* Validate actions. */
> + if (a[OVS_FLOW_ATTR_ACTIONS]) {
> + error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0);
> + if (error)
> + goto error;
> + } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
> + error = -EINVAL;
> + goto error;
> + }
> +
> + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
> + error = -ENODEV;
> + if (!dp)
> + goto error;
> +
> + if (key_len[1]) {
> + error = ovs_flow_cmd_new_or_set__(skb, info, a, dp,
> + key, key_len[1],
> + flow_inner,
> + &inner_flow);
> + if (error)
> + goto error;
> + }
> +
> + error = ovs_flow_cmd_new_or_set__(skb, info, a, dp,
> + key, key_len[0],
> + key_len[1] ? flow_outer :
> + flow_singleton,
> + &inner_flow);
> + if (error) {
> + if (key_len[1]) {
> + struct flow_table *table = genl_dereference(dp->table);
> + ovs_flow_tbl_remove(table, inner_flow);
> + ovs_flow_deferred_free(inner_flow);
> + }
> + goto error;
> + }
> +
> +error:
> + return error;
> +}
> +
> static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
> {
> struct nlattr **a = info->attrs;
> @@ -1264,11 +1337,11 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
> struct datapath *dp;
> struct flow_table *table;
> int err;
> - int key_len;
> + int key_len[2];
>
> if (!a[OVS_FLOW_ATTR_KEY])
> return -EINVAL;
> - err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
> + err = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]);
> if (err)
> return err;
>
> @@ -1277,7 +1350,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
> return -ENODEV;
>
> table = genl_dereference(dp->table);
> - flow = ovs_flow_tbl_lookup(table, &key, key_len);
> + flow = ovs_flow_tbl_lookup(table, &key,
> + key_len[1] ? key_len[1] : key_len[0]);
> if (!flow)
> return -ENOENT;
>
> @@ -1295,11 +1369,11 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
> struct ovs_header *ovs_header = info->userhdr;
> struct sw_flow_key key;
> struct sk_buff *reply;
> - struct sw_flow *flow;
> + struct sw_flow *notify_flow, *outer_flow, *inner_flow;
> struct datapath *dp;
> struct flow_table *table;
> int err;
> - int key_len;
> + int key_len[2];
>
> dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
> if (!dp)
> @@ -1308,26 +1382,39 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
> if (!a[OVS_FLOW_ATTR_KEY])
> return flush_flows(dp);
>
> - err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
> + err = ovs_flow_from_nlattrs(&key, key_len, a[OVS_FLOW_ATTR_KEY]);
> if (err)
> return err;
>
> table = genl_dereference(dp->table);
> - flow = ovs_flow_tbl_lookup(table, &key, key_len);
> - if (!flow)
> + notify_flow = outer_flow = ovs_flow_tbl_lookup(table, &key, key_len[0]);
> + if (!outer_flow)
> return -ENOENT;
>
> - reply = ovs_flow_cmd_alloc_info(flow);
> + if (key_len[1]) {
> + notify_flow = inner_flow = ovs_flow_tbl_lookup(table, &key,
> + key_len[1]);
> + if (!inner_flow)
> + return -ENOENT;
> + } else {
> + inner_flow = NULL;
> + }
> +
> + reply = ovs_flow_cmd_alloc_info(notify_flow);
> if (!reply)
> return -ENOMEM;
>
> - ovs_flow_tbl_remove(table, flow);
> + ovs_flow_tbl_remove(table, outer_flow);
> + if (inner_flow)
> + ovs_flow_tbl_remove(table, inner_flow);
>
> - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
> + err = ovs_flow_cmd_fill_info(notify_flow, dp, reply, info->snd_portid,
> info->snd_seq, 0, OVS_FLOW_CMD_DEL);
> BUG_ON(err < 0);
>
> - ovs_flow_deferred_free(flow);
> + ovs_flow_deferred_free(outer_flow);
> + if (inner_flow)
> + ovs_flow_deferred_free(inner_flow);
>
> genl_notify(reply, genl_info_net(info), info->snd_portid,
> ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
> diff --git a/datapath/flow.c b/datapath/flow.c
> index 7e7dc0a..13c67ca 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -226,7 +226,9 @@ struct sw_flow *ovs_flow_alloc(void)
> return ERR_PTR(-ENOMEM);
>
> spin_lock_init(&flow->lock);
> + INIT_LIST_HEAD(&flow->inner_flows);
> flow->sf_acts = NULL;
> + flow->flags = 0;
>
> return flow;
> }
> @@ -343,7 +345,7 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
> i = 0;
> head = flex_array_get(table->buckets, *bucket);
> hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) {
> - if (i < *last) {
> + if (i < *last || flow->flags | SW_FLOW_F_NO_DUMP) {
typo &
> i++;
> continue;
> }
> @@ -878,13 +880,25 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
> __flow_tbl_insert(table, flow);
> }
>
> -void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
> +static void __flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
> {
> hlist_del_rcu(&flow->hash_node[table->node_ver]);
> table->count--;
> BUG_ON(table->count < 0);
> }
>
> +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
> +{
> + struct sw_flow *inner_flow;
> +
> + __flow_tbl_remove(table, flow);
> +
> + list_for_each_entry_rcu(inner_flow, &flow->inner_flows, inner_flows) {
> + __flow_tbl_remove(table, inner_flow);
> + ovs_flow_deferred_free(inner_flow);
> + }
> +}
> +
> /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
> const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
> [OVS_KEY_ATTR_ENCAP] = -1,
> @@ -1051,7 +1065,7 @@ static int parse_flow_nlattrs(const struct nlattr *attr,
> * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
> * sequence.
> */
> -int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
> +int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int key_lenp[2],
> const struct nlattr *attr)
> {
> const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
> @@ -1059,6 +1073,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
> int key_len;
> u64 attrs;
> int err;
> + __be16 eth_type;
>
> memset(swkey, 0, sizeof(struct sw_flow_key));
> key_len = SW_FLOW_KEY_OFFSET(eth);
> @@ -1172,7 +1187,30 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
> swkey->eth.type = htons(ETH_P_802_2);
> }
>
> - if (swkey->eth.type == htons(ETH_P_IP)) {
> + eth_type = swkey->eth.type;
> + if (eth_type == htons(ETH_P_MPLS_UC) ||
> + eth_type == htons(ETH_P_MPLS_MC)) {
> + const struct ovs_key_mpls *mpls_key;
> +
> + if (!(attrs & (1 << OVS_KEY_ATTR_MPLS)))
> + return -EINVAL;
> + attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> +
> + key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
> + mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> + swkey->mpls.top_label = mpls_key->mpls_top_label;
> +
> + if (attrs & (1 << OVS_KEY_ATTR_IPV4))
> + eth_type = htons(ETH_P_IP);
> + if (attrs & (1 << OVS_KEY_ATTR_IPV6))
> + eth_type = htons(ETH_P_IPV6);
> + if (attrs & (1 << OVS_KEY_ATTR_ARP))
> + eth_type = htons(ETH_P_ARP);
> + }
> +
> + key_lenp[0] = key_len;
> +
> + if (eth_type == htons(ETH_P_IP)) {
> const struct ovs_key_ipv4 *ipv4_key;
>
> if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
> @@ -1195,7 +1233,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
> if (err)
> return err;
> }
> - } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
> + } else if (eth_type == htons(ETH_P_IPV6)) {
> const struct ovs_key_ipv6 *ipv6_key;
>
> if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
> @@ -1221,8 +1259,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
> if (err)
> return err;
> }
> - } else if (swkey->eth.type == htons(ETH_P_ARP) ||
> - swkey->eth.type == htons(ETH_P_RARP)) {
> + } else if (eth_type == htons(ETH_P_ARP) ||
> + eth_type == htons(ETH_P_RARP)) {
> const struct ovs_key_arp *arp_key;
>
> if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
> @@ -1238,22 +1276,17 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
> swkey->ip.proto = ntohs(arp_key->arp_op);
> memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
> memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
> - } else if (swkey->eth.type == htons(ETH_P_MPLS_UC) ||
> - swkey->eth.type == htons(ETH_P_MPLS_MC)) {
> - const struct ovs_key_mpls *mpls_key;
> -
> - if (!(attrs & (1 << OVS_KEY_ATTR_MPLS)))
> - return -EINVAL;
> - attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> -
> - key_len = SW_FLOW_KEY_OFFSET(mpls.top_label);
> - mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> - swkey->mpls.top_label = mpls_key->mpls_top_label;
> }
>
> if (attrs)
> return -EINVAL;
> - *key_lenp = key_len;
> +
> + if (eth_type == swkey->eth.type) {
> + key_lenp[0] = key_len;
> + key_lenp[1] = 0;
> + } else {
> + key_lenp[1] = key_len;
> + }
>
> return 0;
> }
> diff --git a/datapath/flow.h b/datapath/flow.h
> index c476e82..ebea079 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -98,6 +98,8 @@ struct sw_flow_key {
> };
> };
>
> +#define SW_FLOW_F_NO_DUMP 0x01
> +
> struct sw_flow {
> struct rcu_head rcu;
> struct hlist_node hash_node[2];
> @@ -107,10 +109,25 @@ struct sw_flow {
> struct sw_flow_actions __rcu *sf_acts;
>
> spinlock_t lock; /* Lock for values below. */
> +
> + struct list_head inner_flows; /* Inner flows.
> + * For an inner flow this is its
> + * entry in the hlist of its outer
> + * flow.
> + * For an outer flow this is the
> + * list of inner flows
> + * For a singleton flow this is
> + * unused
> + */
> +
> unsigned long used; /* Last used time (in jiffies). */
> u64 packet_count; /* Number of packets matched. */
> u64 byte_count; /* Number of bytes matched. */
> u8 tcp_flags; /* Union of seen TCP flags. */
> + /* End values that need loc*/
> +
> + u8 flags; /* SW_FLOW_F_* */
> +
> };
>
> struct arp_eth_header {
> @@ -142,7 +159,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *);
> void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
>
> int ovs_flow_extract_l3_onwards(struct sk_buff *, struct sw_flow_key *,
> - int *key_lenp, __be16 eth_type);
> + int key_lenp[2], __be16 eth_type);
> int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
> int *key_lenp);
> void ovs_flow_used(struct sw_flow *, struct sk_buff *);
> --
> 1.7.10.4
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
--
yamahata
More information about the dev
mailing list