[ovs-dev] [PATCH ovs V1 7/9] dpif-hw-acc: operate implementation
Paul Blakey
paulb at mellanox.com
Tue Nov 1 14:53:28 UTC 2016
added operate implemenation using tc for flow offload,
supporting flow get, flow put, and flow del.
Signed-off-by: Paul Blakey <paulb at mellanox.com>
Signed-off-by: Shahar Klein <shahark at mellanox.com>
---
lib/dpif-hw-acc.c | 821 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
lib/dpif-hw-acc.h | 10 +
2 files changed, 829 insertions(+), 2 deletions(-)
diff --git a/lib/dpif-hw-acc.c b/lib/dpif-hw-acc.c
index 52f5dd1..fba8ec4 100644
--- a/lib/dpif-hw-acc.c
+++ b/lib/dpif-hw-acc.c
@@ -48,6 +48,76 @@
VLOG_DEFINE_THIS_MODULE(dpif_hw_acc);
+extern bool SKIP_HW;
+
+static inline void *
+nla_data(const struct nlattr *nla)
+{
+ return (char *) nla + NLA_HDRLEN;
+}
+
+static char *
+attrname(int type)
+{
+ static char unkowntype[64];
+
+ switch (type) {
+ case OVS_KEY_ATTR_ENCAP:
+ return "OVS_KEY_ATTR_ENCAP";
+ case OVS_KEY_ATTR_PRIORITY:
+ return "OVS_KEY_ATTR_PRIORITY";
+ case OVS_KEY_ATTR_CT_LABELS:
+ return "OVS_KEY_ATTR_CT_LABELS";
+ case OVS_KEY_ATTR_IN_PORT:
+ return "OVS_KEY_ATTR_IN_PORT";
+ case OVS_KEY_ATTR_ETHERNET:
+ return "OVS_KEY_ATTR_ETHERNET";
+ case OVS_KEY_ATTR_VLAN:
+ return "OVS_KEY_ATTR_VLAN";
+ case OVS_KEY_ATTR_ETHERTYPE:
+ return "OVS_KEY_ATTR_ETHERTYPE";
+ case OVS_KEY_ATTR_IPV4:
+ return "OVS_KEY_ATTR_IPV4";
+ case OVS_KEY_ATTR_IPV6:
+ return "OVS_KEY_ATTR_IPV6";
+ case OVS_KEY_ATTR_TCP:
+ return "OVS_KEY_ATTR_TCP";
+ case OVS_KEY_ATTR_UDP:
+ return "OVS_KEY_ATTR_UDP";
+ case OVS_KEY_ATTR_ICMP:
+ return "OVS_KEY_ATTR_ICMP";
+ case OVS_KEY_ATTR_ICMPV6:
+ return "OVS_KEY_ATTR_ICMPV6";
+ case OVS_KEY_ATTR_ARP:
+ return "OVS_KEY_ATTR_ARP";
+ case OVS_KEY_ATTR_ND:
+ return "OVS_KEY_ATTR_ND";
+ case OVS_KEY_ATTR_SKB_MARK:
+ return "OVS_KEY_ATTR_SKB_MARK";
+ case OVS_KEY_ATTR_TUNNEL:
+ return "OVS_KEY_ATTR_TUNNEL";
+ case OVS_KEY_ATTR_SCTP:
+ return "OVS_KEY_ATTR_SCTP";
+ case OVS_KEY_ATTR_TCP_FLAGS:
+ return "OVS_KEY_ATTR_TCP_FLAGS";
+ case OVS_KEY_ATTR_DP_HASH:
+ return "OVS_KEY_ATTR_DP_HASH";
+ case OVS_KEY_ATTR_RECIRC_ID:
+ return "OVS_KEY_ATTR_RECIRC_ID";
+ case OVS_KEY_ATTR_MPLS:
+ return "OVS_KEY_ATTR_MPLS";
+ case OVS_KEY_ATTR_CT_STATE:
+ return "OVS_KEY_ATTR_CT_STATE";
+ case OVS_KEY_ATTR_CT_ZONE:
+ return "OVS_KEY_ATTR_CT_ZONE";
+ case OVS_KEY_ATTR_CT_MARK:
+ return "OVS_KEY_ATTR_CT_MARK";
+ default:
+ sprintf(unkowntype, "unkown_type(%d)\n", type);
+ return unkowntype;
+ }
+}
+
static char *
printufid(const ovs_u128 * ovs_ufid)
{
@@ -539,6 +609,7 @@ initmaps(struct dpif_hw_acc *dpif)
hmap_init(&dpif->port_to_netdev);
hmap_init(&dpif->ufid_to_handle);
hmap_init(&dpif->handle_to_ufid);
+ hmap_init(&dpif->mask_to_prio);
ovs_mutex_init(&dpif->hash_mutex);
return 0;
}
@@ -818,13 +889,759 @@ dpif_hw_acc_flow_dump_next(struct dpif_flow_dump_thread *thread_,
max_flows);
}
+static bool
+odp_mask_attr_is_wildcard(const struct nlattr *ma)
+{
+ return is_all_zeros(nl_attr_get(ma), nl_attr_get_size(ma));
+}
+
+static bool
+odp_mask_is_exact(enum ovs_key_attr attr, const void *mask, size_t size)
+{
+ if (attr == OVS_KEY_ATTR_TCP_FLAGS) {
+ return TCP_FLAGS(*(ovs_be16 *) mask) == TCP_FLAGS(OVS_BE16_MAX);
+ }
+ if (attr == OVS_KEY_ATTR_IPV6) {
+ const struct ovs_key_ipv6 *ipv6_mask = mask;
+
+ return ((ipv6_mask->ipv6_label & htonl(IPV6_LABEL_MASK))
+ == htonl(IPV6_LABEL_MASK))
+ && ipv6_mask->ipv6_proto == UINT8_MAX
+ && ipv6_mask->ipv6_tclass == UINT8_MAX
+ && ipv6_mask->ipv6_hlimit == UINT8_MAX
+ && ipv6_mask->ipv6_frag == UINT8_MAX
+ && ipv6_mask_is_exact((const struct in6_addr *)
+ ipv6_mask->ipv6_src)
+ && ipv6_mask_is_exact((const struct in6_addr *)
+ ipv6_mask->ipv6_dst);
+ }
+ if (attr == OVS_KEY_ATTR_TUNNEL) {
+ return false;
+ }
+
+ if (attr == OVS_KEY_ATTR_ARP) {
+ /* ARP key has padding, ignore it. */
+ BUILD_ASSERT_DECL(sizeof (struct ovs_key_arp) == 24);
+ BUILD_ASSERT_DECL(offsetof(struct ovs_key_arp, arp_tha) == 10 + 6);
+ size = offsetof(struct ovs_key_arp, arp_tha) + ETH_ADDR_LEN;
+
+ ovs_assert(((uint16_t *) mask)[size / 2] == 0);
+ }
+
+ return is_all_ones(mask, size);
+}
+
+static bool
+odp_mask_attr_is_exact(const struct nlattr *ma)
+{
+ enum ovs_key_attr attr = nl_attr_type(ma);
+ const void *mask;
+ size_t size;
+
+ if (attr == OVS_KEY_ATTR_TUNNEL) {
+ return false;
+ } else {
+ mask = nl_attr_get(ma);
+ size = nl_attr_get_size(ma);
+ }
+
+ return odp_mask_is_exact(attr, mask, size);
+}
+
+static int
+parse_to_tc_flow(struct dpif_hw_acc *dpif, struct tc_flow *tc_flow,
+ const struct nlattr *key, int key_len,
+ const struct nlattr *key_mask, int key_mask_len)
+{
+ size_t left;
+ const struct nlattr *a;
+ const struct nlattr *mask[__OVS_KEY_ATTR_MAX] = { 0 };
+
+ VLOG_DBG("parsing mask:\n");
+ NL_ATTR_FOR_EACH_UNSAFE(a, left, key_mask, key_mask_len) {
+ mask[nl_attr_type(a)] = a;
+ }
+
+ VLOG_DBG("parsing key attributes:\n");
+ NL_ATTR_FOR_EACH_UNSAFE(a, left, key, key_len) {
+ const struct nlattr *ma = mask[nl_attr_type(a)];
+ bool is_wildcard = false;
+ bool is_exact = true;
+
+ if (key_mask && key_mask_len) {
+ is_wildcard = ma ? odp_mask_attr_is_wildcard(ma) : true;
+ is_exact = ma ? odp_mask_attr_is_exact(ma) : false;
+ }
+
+ if (is_exact)
+ VLOG_DBG("mask: %s exact: %p\n", attrname(nl_attr_type(a)), ma);
+ else if (is_wildcard)
+ VLOG_DBG("mask: %s wildcard: %p\n", attrname(nl_attr_type(a)), ma);
+ else
+ VLOG_DBG("mask %s is partial, ma: %p\n", attrname(nl_attr_type(a)),
+ ma);
+
+ switch (nl_attr_type(a)) {
+ case OVS_KEY_ATTR_UNSPEC:
+ case OVS_KEY_ATTR_PRIORITY:
+ case OVS_KEY_ATTR_SKB_MARK:
+ case OVS_KEY_ATTR_CT_STATE:
+ case OVS_KEY_ATTR_CT_ZONE:
+ case OVS_KEY_ATTR_CT_MARK:
+ case OVS_KEY_ATTR_CT_LABELS:
+ case OVS_KEY_ATTR_ND:
+ case OVS_KEY_ATTR_MPLS:
+ case OVS_KEY_ATTR_DP_HASH:
+ case OVS_KEY_ATTR_TUNNEL:
+ case OVS_KEY_ATTR_SCTP:
+ case OVS_KEY_ATTR_ICMP:
+ case OVS_KEY_ATTR_ARP:
+ case OVS_KEY_ATTR_ICMPV6:;
+ if (is_wildcard) {
+ VLOG_DBG("unsupported key attribute: %s is wildcard\n",
+ attrname(nl_attr_type(a)));
+ break;
+ }
+ VLOG_ERR("unsupported key attribute: %s is not wildcard\n",
+ attrname(nl_attr_type(a)));
+ return 1;
+ break;
+
+ case OVS_KEY_ATTR_TCP_FLAGS:
+ case OVS_KEY_ATTR_RECIRC_ID:
+ /* IGNORE this attributes for now, (might disable some of it in
+ * probe? */
+ VLOG_DBG
+ ("ignoring attribute %s -- fix me, exact: %s, wildcard: %s, partial: %s\n",
+ attrname(nl_attr_type(a)), is_exact ? "yes" : "no",
+ is_wildcard ? "yes" : "no", (!is_wildcard
+ && !is_exact) ? "yes" : "no");
+ break;
+
+ case OVS_KEY_ATTR_VLAN:{
+ ovs_be16 tci = nl_attr_get_be16(a);
+ ovs_be16 tci_mask = ma ? nl_attr_get_be16(ma) : OVS_BE16_MAX;
+ if (vlan_tci_to_vid(tci_mask) != VLAN_VID_MASK) {
+ /* Partially masked. */
+ VLOG_ERR("unsupported partial mask on vlan_vid attribute");
+ return 1;
+ }
+ VLOG_DBG("vid=%"PRIu16, vlan_tci_to_vid(tci));
+ tc_flow->vlan_id = vlan_tci_to_vid(tci);
+ if (vlan_tci_to_pcp(tci_mask) != (VLAN_PCP_MASK >> VLAN_PCP_SHIFT)) {
+ /* Partially masked. */
+ VLOG_ERR("unsupported partial mask on vlan_pcp attribute");
+ return 1;
+ }
+ VLOG_DBG("pcp/prio=%"PRIu16, vlan_tci_to_pcp(tci));
+ tc_flow->vlan_prio = vlan_tci_to_pcp(tci);
+ if (!(tci & htons(VLAN_CFI))) {
+ VLOG_ERR("unsupported partial mask on vlan cfi=0 attribute");
+ return 1;
+ }
+ }
+ break;
+ case OVS_KEY_ATTR_ENCAP:{
+ VLOG_DBG("ENCAP!\n.");
+ const struct nlattr *nested_encap = nl_attr_get(a);
+ const size_t encap_len = nl_attr_get_size(a);
+ const struct nlattr *nested_encap_mask = nl_attr_get(ma);
+ const size_t nested_encap_mask_len = nl_attr_get_size(ma);
+ struct tc_flow encap_flow;
+
+ int nested_cant_offload = parse_to_tc_flow(dpif, &encap_flow,
+ nested_encap, encap_len,
+ nested_encap_mask,
+ nested_encap_mask_len);
+ VLOG_DBG("end of ENCAP!\n.");
+ if (nested_cant_offload) return 1;
+ tc_flow->encap_ip_proto = encap_flow.ip_proto;
+ tc_flow->encap_eth_type = encap_flow.eth_type;
+ memcpy(&tc_flow->encap_ipv4, &encap_flow.ipv4,
+ (sizeof(encap_flow.ipv4) > sizeof(encap_flow.ipv6)?
+ sizeof(encap_flow.ipv4) : sizeof(encap_flow.ipv6)));
+ VLOG_DBG("encap_eth_type(0x%x)", encap_flow.eth_type);
+ VLOG_DBG("encap ip proto (%d)", encap_flow.ip_proto);
+ }
+ break;
+
+ case OVS_KEY_ATTR_IN_PORT:{
+ if (!is_exact) {
+ VLOG_ERR("%s isn't exact, can't offload!\n",
+ attrname(nl_attr_type(a)));
+ return 1;
+ }
+
+ VLOG_DBG("in_port(%d)\n", nl_attr_get_u32(a));
+ tc_flow->ovs_inport = nl_attr_get_u32(a);
+ tc_flow->indev = port_find(dpif, tc_flow->ovs_inport);
+ tc_flow->ifindex =
+ tc_flow->indev ? netdev_get_ifindex(tc_flow->indev) : 0;
+ if (!tc_flow->ovs_inport || !tc_flow->ifindex) {
+ VLOG_ERR
+ ("RESULT: not found inport: %d or ifindex: %d for ovs in_port: %d\n",
+ tc_flow->ovs_inport, tc_flow->ifindex,
+ tc_flow->ovs_inport);
+ return 1;
+ }
+ }
+ break;
+
+ case OVS_KEY_ATTR_ETHERNET:{
+ const struct ovs_key_ethernet *eth_key = 0;
+ struct ovs_key_ethernet full_mask;
+
+ memset(&full_mask, 0xFF, sizeof (full_mask));
+
+ /* TODO: fix masks on mac address (because of HW syndrome 0x3ad328) */
+ ma = 0;
+
+ const struct ovs_key_ethernet *eth_key_mask =
+ ma ? nla_data(ma) : &full_mask;
+ eth_key = nla_data(a);
+
+ const struct eth_addr *src = ð_key->eth_src;
+ const struct eth_addr *src_mask = ð_key_mask->eth_src;
+ const struct eth_addr *dst = ð_key->eth_dst;
+ const struct eth_addr *dst_mask = ð_key_mask->eth_dst;
+
+ memcpy(&tc_flow->src_mac, src, sizeof (tc_flow->src_mac));
+ memcpy(&tc_flow->src_mac_mask, src_mask,
+ sizeof (tc_flow->src_mac_mask));
+ memcpy(&tc_flow->dst_mac, dst, sizeof (tc_flow->dst_mac));
+ memcpy(&tc_flow->dst_mac_mask, dst_mask,
+ sizeof (tc_flow->dst_mac_mask));
+
+ VLOG_DBG("eth(src=" ETH_ADDR_FMT ", src_mask=" ETH_ADDR_FMT
+ ", dst=" ETH_ADDR_FMT ", dst_mask=" ETH_ADDR_FMT "\n",
+ ETH_ADDR_ARGS(tc_flow->src_mac),
+ ETH_ADDR_ARGS(tc_flow->src_mac_mask),
+ ETH_ADDR_ARGS(tc_flow->dst_mac),
+ ETH_ADDR_ARGS(tc_flow->dst_mac_mask));
+ }
+ break;
+ case OVS_KEY_ATTR_ETHERTYPE:{
+ if (!is_exact) {
+ VLOG_ERR("attribute %s isn't exact, can't offload!\n",
+ attrname(nl_attr_type(a)));
+ return 1;
+ }
+
+ tc_flow->eth_type = nl_attr_get_be16(a);
+ VLOG_DBG("eth_type(0x%04x)\n", ntohs(tc_flow->eth_type));
+ }
+ break;
+ case OVS_KEY_ATTR_IPV6:{
+ const struct ovs_key_ipv6 *ipv6 = nla_data(a);
+ struct ovs_key_ipv6 full_mask;
+
+ memset(&full_mask, 0xFF, sizeof (full_mask));
+ const struct ovs_key_ipv6 *ipv6_mask =
+ ma ? nla_data(ma) : &full_mask;
+
+ if (ipv6_mask->ipv6_frag) {
+ VLOG_WARN
+ ("*** ignoring exact or partial mask on unsupported ipv6_frag, mask: %x",
+ ipv6_mask->ipv6_frag);
+ }
+ if (ipv6_mask->ipv6_tclass || ipv6_mask->ipv6_hlimit || ipv6_mask->ipv6_label) {
+ VLOG_ERR
+ ("ipv6 mask exact or partial one of unsupported sub attributes (tclass: %x, hlimit: %x, label: %x)\n",
+ ipv6_mask->ipv6_tclass, ipv6_mask->ipv6_hlimit,
+ ipv6_mask->ipv6_label);
+ return 1;
+ }
+ if (ipv6_mask->ipv6_proto != 0
+ && ipv6_mask->ipv6_proto != 0xFF) {
+ VLOG_WARN
+ ("*** ignoring partial mask on ipv6_proto, taking exact ip_proto: %d (%x)\n",
+ ipv6_mask->ipv6_proto, ipv6->ipv6_proto);
+ }
+ /* If not wildcard out, take exact match for ipv6_proto
+ * (ignoring mask) */
+ if (ipv6_mask->ipv6_proto != 0)
+ tc_flow->ip_proto = ipv6->ipv6_proto;
+
+ memcpy(tc_flow->ipv6.ipv6_src, ipv6->ipv6_src, sizeof(ipv6->ipv6_src));
+ memcpy(tc_flow->ipv6.ipv6_src_mask, ipv6_mask->ipv6_src, sizeof(ipv6_mask->ipv6_src));
+
+ memcpy(tc_flow->ipv6.ipv6_dst, ipv6->ipv6_dst, sizeof(ipv6->ipv6_dst));
+ memcpy(tc_flow->ipv6.ipv6_dst_mask, ipv6_mask->ipv6_dst, sizeof(ipv6_mask->ipv6_dst));
+ }
+ break;
+ case OVS_KEY_ATTR_IPV4:{
+ const struct ovs_key_ipv4 *ipv4 = nla_data(a);
+ struct ovs_key_ipv4 full_mask;
+
+ memset(&full_mask, 0xFF, sizeof (full_mask));
+ const struct ovs_key_ipv4 *ipv4_mask =
+ ma ? nla_data(ma) : &full_mask;
+
+ if (ipv4_mask->ipv4_frag) {
+ VLOG_WARN
+ ("*** ignoring exact or partial mask on unsupported ipv4_frag, mask: %x",
+ ipv4_mask->ipv4_frag);
+ }
+
+ if (ipv4_mask->ipv4_ttl || ipv4_mask->ipv4_tos) {
+ VLOG_ERR
+ ("ipv4 mask exact or partial one of unsupported sub attributes (ttl: %x, tos: %x, frag: %x)\n",
+ ipv4_mask->ipv4_ttl, ipv4_mask->ipv4_tos,
+ ipv4_mask->ipv4_frag);
+ return 1;
+ }
+
+ if (ipv4_mask->ipv4_proto != 0
+ && ipv4_mask->ipv4_proto != 0xFF) {
+ VLOG_WARN
+ ("*** ignoring partial mask on ipv4_proto, taking exact ip_proto: %d (%x)\n",
+ ipv4_mask->ipv4_proto, ipv4->ipv4_proto);
+ }
+
+ /* If not wildcard out, take exact match for ipv4_proto
+ * (ignoring mask) */
+ if (ipv4_mask->ipv4_proto != 0)
+ tc_flow->ip_proto = ipv4->ipv4_proto;
+
+ if (ipv4_mask->ipv4_src) {
+ tc_flow->ipv4.ipv4_src = ipv4->ipv4_src;
+ tc_flow->ipv4.ipv4_src_mask = ipv4_mask->ipv4_src;
+ }
+ if (ipv4_mask->ipv4_dst) {
+ tc_flow->ipv4.ipv4_dst = ipv4->ipv4_dst;
+ tc_flow->ipv4.ipv4_dst_mask = ipv4_mask->ipv4_dst;
+ }
+ }
+ break;
+ case OVS_KEY_ATTR_TCP:{
+ struct ovs_key_tcp full_mask;
+
+ memset(&full_mask, 0xFF, sizeof (full_mask));
+ const struct ovs_key_tcp *tcp_mask =
+ ma ? nla_data(ma) : &full_mask;
+ const struct ovs_key_tcp *tcp = nla_data(a);
+
+ if (tcp_mask->tcp_src) {
+ tc_flow->src_port = tcp->tcp_src;
+ tc_flow->src_port_mask = tcp_mask->tcp_src;
+ }
+ if (tcp_mask->tcp_dst) {
+ tc_flow->dst_port = tcp->tcp_dst;
+ tc_flow->dst_port_mask = tcp_mask->tcp_dst;
+ }
+
+ VLOG_DBG("tcp(src=%d, msk: 0x%x, dst=%d, msk: 0x%x)\n",
+ htons(tcp->tcp_src), htons(tcp_mask->tcp_src),
+ htons(tcp->tcp_dst), htons(tcp_mask->tcp_dst));
+ }
+ break;
+ case OVS_KEY_ATTR_UDP:{
+ struct ovs_key_udp full_mask;
+
+ memset(&full_mask, 0xFF, sizeof (full_mask));
+ const struct ovs_key_udp *udp_mask =
+ ma ? nla_data(ma) : &full_mask;
+ const struct ovs_key_udp *udp = nla_data(a);
+
+ if (udp_mask->udp_src) {
+ tc_flow->src_port = udp->udp_src;
+ tc_flow->src_port_mask = udp_mask->udp_src;
+ }
+ if (udp_mask->udp_dst) {
+ tc_flow->dst_port = udp->udp_dst;
+ tc_flow->dst_port_mask = udp_mask->udp_dst;
+ }
+ VLOG_DBG("udp(src=%d/0x%x, dst=%d/0x%x)\n",
+ htons(udp->udp_src), htons(udp_mask->udp_src),
+ htons(udp->udp_dst), htons(udp_mask->udp_dst));
+ }
+ break;
+
+ case __OVS_KEY_ATTR_MAX:
+ default:
+ VLOG_ERR("unknown (default/max) key attribute: %s\n",
+ attrname(nl_attr_type(a)));
+ return 1;
+ }
+ }
+ VLOG_DBG("--- finished parsing attr - can offload!\n");
+ return 0;
+
+}
+
+#define PRIO_ADD_TO_HASH(var) \
+do { \
+ hash_mask = hash_bytes(&var, sizeof(var), hash_mask); \
+ memcpy(&buf[i], &var, sizeof(var)); \
+ i+= sizeof(var); \
+} while (0)
+
+static uint16_t
+get_new_prio(struct dpif_hw_acc *dpif, struct tc_flow *tc_flow)
+{
+ struct mask_prio_data *data;
+ size_t hash_mask = 0;
+ char buf[128];
+ size_t i = 0;
+
+ memset(buf, 0, sizeof(buf));
+
+ PRIO_ADD_TO_HASH(tc_flow->dst_mac_mask);
+ PRIO_ADD_TO_HASH(tc_flow->src_mac_mask);
+
+ PRIO_ADD_TO_HASH(tc_flow->src_port_mask);
+ PRIO_ADD_TO_HASH(tc_flow->dst_port_mask);
+
+ PRIO_ADD_TO_HASH(tc_flow->encap_ipv4.ipv4_src_mask);
+ PRIO_ADD_TO_HASH(tc_flow->encap_ipv4.ipv4_dst_mask);
+
+ PRIO_ADD_TO_HASH(tc_flow->encap_ipv6.ipv6_src_mask);
+ PRIO_ADD_TO_HASH(tc_flow->encap_ipv6.ipv6_dst_mask);
+
+ PRIO_ADD_TO_HASH(tc_flow->ipv4.ipv4_src_mask);
+ PRIO_ADD_TO_HASH(tc_flow->ipv4.ipv4_dst_mask);
+
+ PRIO_ADD_TO_HASH(tc_flow->ipv6.ipv6_src_mask);
+ PRIO_ADD_TO_HASH(tc_flow->ipv6.ipv6_dst_mask);
+
+ PRIO_ADD_TO_HASH(tc_flow->eth_type);
+
+ ovs_mutex_lock(&dpif->hash_mutex);
+ HMAP_FOR_EACH_WITH_HASH(data, node, hash_mask, &dpif->mask_to_prio) {
+ if (data->data && data->len == i &&
+ !memcmp(buf, data->data, data->len)) {
+ ovs_mutex_unlock(&dpif->hash_mutex);
+ return data->prio;
+ }
+ }
+
+ struct mask_prio_data *data_mask = malloc(sizeof(struct mask_prio_data));
+ memcpy(data_mask->data, buf, i);
+ data_mask->len = i;
+ data_mask->prio = ++dpif->last_prio;
+ hmap_insert(&dpif->mask_to_prio, &data_mask->node, hash_mask);
+ ovs_mutex_unlock(&dpif->hash_mutex);
+
+ return data_mask->prio;
+}
+
+static enum dpif_hw_offload_policy
+parse_flow_put(struct dpif_hw_acc *dpif, struct dpif_flow_put *put)
+{
+
+/*
+ * if this is a modify flow cmd and the policy changed:
+ * delete the old one
+ * handle the new/modify flow
+ *
+ *
+*/
+ const struct nlattr *a;
+ size_t left;
+ struct netdev *in = 0;
+ enum dpif_hw_offload_policy policy;
+
+ int probe_feature = ((put->flags & DPIF_FP_PROBE) ? 1 : 0);
+
+ if (probe_feature) {
+ VLOG_DBG("\n.\nPROBE REQUEST!\n.\n");
+ /* see usage at dpif_probe_feature, we might want to intercept and
+ * disable some features */
+ return DPIF_HW_NO_OFFLAOAD;
+ }
+ int cmd =
+ put->flags & DPIF_FP_CREATE ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET;
+ if (!put->ufid) {
+ VLOG_INFO
+ ("%s %d %s missing ufid for flow put, might be from dpctl add-flow.",
+ __FILE__, __LINE__, __func__);
+ }
+
+ policy = HW_offload_test_put(dpif, put);
+ uint16_t getprio = 0;
+ int handle = gethandle(dpif, put->ufid, &in, &getprio, "DPIF_OP_FLOW_PUT", 1);
+
+ if (policy == DPIF_HW_NO_OFFLAOAD)
+ return DPIF_HW_NO_OFFLAOAD;
+
+ if (cmd == OVS_FLOW_CMD_NEW)
+ VLOG_DBG("cmd is OVS_FLOW_CMD_NEW - create\n");
+ else
+ VLOG_DBG("cmd is OVS_FLOW_CMD_SET - modify\n");
+
+ if (put->flags & DPIF_FP_ZERO_STATS && cmd == OVS_FLOW_CMD_SET)
+ VLOG_WARN
+ ("We need to zero the stats of a modified flow, not implemented, ignored\n");
+
+ if (put->stats)
+ VLOG_WARN("FLOW PUT WANTS STATS\n");
+
+ /* if not present, and cmd == OVS_FLOW_CMD_SET, means don't modify ACTIONs
+ * (which we wrongly parse as a drop rule) see include/odp-netlink.h +:490
+ * to clear actions with OVS_FLOW_CMD_SET, actions will be present but
+ * empty */
+ if (!put->key) {
+ VLOG_ERR("%s %d %s error ,missing key, cmd: %d!", __FILE__, __LINE__,
+ __func__, cmd);
+ return DPIF_HW_NO_OFFLAOAD;
+ }
+ if (!put->actions) {
+ if (cmd == OVS_FLOW_CMD_SET) {
+ VLOG_WARN
+ ("%s %d %s missing actions on cmd modify, find and modify key only",
+ __FILE__, __LINE__, __func__);
+ return DPIF_HW_NO_OFFLAOAD;
+ }
+ }
+
+ int outport_count = 0;
+
+ VLOG_DBG("parsing actions\n");
+ NL_ATTR_FOR_EACH_UNSAFE(a, left, put->actions, put->actions_len) {
+ if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT) {
+ VLOG_DBG("output to port: %d\n", nl_attr_get_u32(a));
+ outport_count++;
+ }
+ }
+ if (outport_count == 0)
+ VLOG_DBG("output to port: drop\n");
+
+ struct ds ds;
+
+ ds_init(&ds);
+ ds_clear(&ds);
+ if (put->ufid) {
+ odp_format_ufid(put->ufid, &ds);
+ ds_put_cstr(&ds, ", ");
+ }
+
+ ds_put_cstr(&ds, "verbose: ");
+ odp_flow_format(put->key, put->key_len, put->mask, put->mask_len, 0, &ds,
+ true);
+ ds_put_cstr(&ds, ", not_verbose: ");
+ odp_flow_format(put->key, put->key_len, put->mask, put->mask_len, 0, &ds,
+ false);
+
+ /* can also use dpif_flow_stats_format(&f->stats, ds) to print stats */
+
+ ds_put_cstr(&ds, ", actions:");
+ format_odp_actions(&ds, put->actions, put->actions_len);
+ VLOG_DBG("%s\n", ds_cstr(&ds));
+ ds_destroy(&ds);
+
+ /* parse tc_flow */
+ struct tc_flow tc_flow;
+
+ memset(&tc_flow, 0, sizeof (tc_flow));
+ tc_flow.handle = handle;
+ int cant_offload =
+ parse_to_tc_flow(dpif, &tc_flow, put->key, put->key_len, put->mask,
+ put->mask_len);
+
+ int new = handle ? 0 : 1;
+
+ VLOG_DBG
+ ("cant_offload: %d ifindex: %d, eth_type: %x, ip_proto: %d, outport_count: %d\n",
+ cant_offload, tc_flow.ifindex, ntohs(tc_flow.eth_type),
+ tc_flow.ip_proto, outport_count);
+ if (!cant_offload && tc_flow.ifindex && tc_flow.eth_type
+ && outport_count <= 1) {
+ uint16_t prio = get_new_prio(dpif, &tc_flow);
+
+ VLOG_DBG("RESULT: %p, ***** offloading (HW_ONLY!), prio: %d\n", dpif, prio);
+ if (cmd != OVS_FLOW_CMD_NEW && !handle) {
+ /* modify and flow is now offloadable, remove from kernel netlink
+ * datapath */
+ int error =
+ dpif_flow_del(dpif->lp_dpif_netlink, put->key, put->key_len,
+ put->ufid, PMD_ID_NULL, NULL);
+
+ if (!error)
+ VLOG_DBG("modify, deleted old flow and offloading new\n");
+ else
+ VLOG_ERR("modify, error: %d\n", error);
+ }
+
+ int error = 0;
+
+ outport_count = 0;
+ /* TODO: actions_len = 0 <=> drop rule */
+ NL_ATTR_FOR_EACH_UNSAFE(a, left, put->actions, put->actions_len) {
+ if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT) {
+ outport_count++;
+
+ tc_flow.ovs_outport = nl_attr_get_u32(a);
+ tc_flow.outdev = port_find(dpif, tc_flow.ovs_outport);
+ tc_flow.ifindex_out =
+ tc_flow.outdev ? netdev_get_ifindex(tc_flow.outdev) : 0;
+ if (tc_flow.ifindex_out) {
+ VLOG_DBG
+ (" **** handle: %d, new? %d, adding %d -> %d (ifindex: %d -> %d)\n",
+ tc_flow.handle, new, tc_flow.ovs_inport,
+ tc_flow.ovs_outport, tc_flow.ifindex,
+ tc_flow.ifindex_out);
+
+ int error = tc_replace_flower(&tc_flow, prio);
+
+ if (!error) {
+ if (new)
+ puthandle(dpif, put->ufid, tc_flow.indev,
+ tc_flow.ovs_inport, tc_flow.handle,
+ tc_flow.prio);
+
+ VLOG_DBG(" **** offloaded! handle: %d (%x)\n",
+ tc_flow.handle, tc_flow.handle);
+ } else
+ VLOG_ERR
+ (" **** error! adding fwd rule! tc error: %d\n",
+ error);
+ } else {
+ VLOG_ERR
+ (" **** error! not found output port %d, ifindex: %d\n",
+ tc_flow.ovs_outport, tc_flow.ifindex_out);
+ break;
+ }
+ }
+ else if (nl_attr_type(a) == OVS_ACTION_ATTR_PUSH_VLAN) {
+ const struct ovs_action_push_vlan *vlan_push = nl_attr_get(a);
+ tc_flow.vlan_push_id = vlan_tci_to_vid(vlan_push->vlan_tci);
+ tc_flow.vlan_push_prio = vlan_tci_to_pcp(vlan_push->vlan_tci);
+ }
+ else if (nl_attr_type(a) == OVS_ACTION_ATTR_POP_VLAN) {
+ tc_flow.vlan_pop = 1;
+ }
+ else {
+ VLOG_ERR("Unsupported output type!\n");
+ return DPIF_HW_NO_OFFLAOAD;
+ }
+ }
+ if (!outport_count) {
+ VLOG_DBG
+ (" ***** handle: %d, new? %d, adding %d -> DROP (ifindex: %d -> DROP)\n",
+ tc_flow.handle, new, tc_flow.ovs_inport, tc_flow.ifindex);
+ error = tc_replace_flower(&tc_flow, prio);
+ if (!error) {
+ if (new)
+ puthandle(dpif, put->ufid, tc_flow.indev,
+ tc_flow.ovs_inport, tc_flow.handle,
+ tc_flow.prio);
+
+ VLOG_DBG(" **** offloaded! handle: %d (%x)\n", tc_flow.handle,
+ tc_flow.handle);
+ } else
+ VLOG_ERR(" **** error adding drop rule! tc error: %d\n",
+ error);
+ }
+
+ if (error)
+ return DPIF_HW_NO_OFFLAOAD;
+ return DPIF_HW_OFFLOAD_ONLY;
+ }
+
+ VLOG_DBG("RESULT: SW\n");
+
+ return DPIF_HW_NO_OFFLAOAD;
+}
+
+static enum dpif_hw_offload_policy
+parse_flow_get(struct dpif_hw_acc *dpif, struct dpif_flow_get *get)
+{
+ struct netdev *in = 0;
+ uint16_t prio = 0;
+ int handle =
+ gethandle(dpif, get->ufid, &in, &prio, "DPIF_OP_FLOW_GET", 1);
+
+ if (handle && prio) {
+ struct tc_flow tc_flow;
+ int ifindex = netdev_get_ifindex(in);
+ int ovs_port = get_ovs_port(dpif, ifindex);
+ int error = ENOENT;
+
+ if (ovs_port != -1)
+ error = tc_get_flower(ifindex, handle, prio, &tc_flow);
+
+ if (!error) {
+ dpif_hw_tc_flow_to_dpif_flow(dpif, &tc_flow, get->flow, ovs_port,
+ get->buffer, in);
+ return DPIF_HW_OFFLOAD_ONLY;
+ }
+ }
+
+ return DPIF_HW_NO_OFFLAOAD;
+}
+
+static enum dpif_hw_offload_policy
+parse_flow_del(struct dpif_hw_acc *dpif, struct dpif_flow_del *del)
+{
+ struct netdev *in = 0;
+ uint16_t prio = 0;
+ int handle =
+ gethandle(dpif, del->ufid, &in, &prio, "DPIF_OP_FLOW_DEL", 1);
+
+ /* we delete the handle anyway (even if not deleted from tc) */
+ delhandle(dpif, del->ufid);
+
+ if (handle && prio) {
+ int ifindex = netdev_get_ifindex(in);
+
+ VLOG_DBG("deleting ufid %s, handle %d, prio: %d, ifindex: %d\n",
+ printufid(del->ufid), handle, prio, ifindex);
+ int error = tc_del_flower(ifindex, handle, prio);
+
+ if (error)
+ VLOG_ERR("DELETE FAILED: tc error: %d\n", error);
+ else
+ VLOG_DBG("DELETE SUCCESS!\n");
+
+ if (error)
+ return DPIF_HW_NO_OFFLAOAD;
+
+ return DPIF_HW_OFFLOAD_ONLY;
+ }
+
+ VLOG_DBG("del with no handle/ufid/prio, SW only\n");
+ return DPIF_HW_NO_OFFLAOAD;
+}
+
+static enum dpif_hw_offload_policy
+parse_operate(struct dpif_hw_acc *dpif, struct dpif_op *op)
+{
+ switch (op->type) {
+ case DPIF_OP_FLOW_PUT:
+ VLOG_DBG("DPIF_OP_FLOW_PUT");
+ return parse_flow_put(dpif, &op->u.flow_put);
+ case DPIF_OP_FLOW_GET:
+ VLOG_DBG("DPIF_OP_FLOW_GET");
+ return parse_flow_get(dpif, &op->u.flow_get);
+ case DPIF_OP_FLOW_DEL:
+ VLOG_DBG("DPIF_OP_FLOW_DEL");
+ return parse_flow_del(dpif, &op->u.flow_del);
+
+ case DPIF_OP_EXECUTE:
+ default:
+ return DPIF_HW_NO_OFFLAOAD;
+ }
+ return DPIF_HW_NO_OFFLAOAD;
+}
+
static void
dpif_hw_acc_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
{
struct dpif_hw_acc *dpif = dpif_hw_acc_cast(dpif_);
- return dpif->lp_dpif_netlink->dpif_class->operate(dpif->lp_dpif_netlink,
- ops, n_ops);
+ struct dpif_op **new_ops = xmalloc(sizeof (struct dpif_op *) * n_ops);
+ int n_new_ops = 0;
+ int i = 0;
+
+ for (i = 0; i < n_ops; i++) {
+ if (parse_operate(dpif, ops[i]) == DPIF_HW_OFFLOAD_ONLY) {
+ ops[i]->error = 0;
+ } else
+ new_ops[n_new_ops++] = ops[i];
+ }
+ dpif->lp_dpif_netlink->dpif_class->operate(dpif->lp_dpif_netlink, new_ops,
+ n_new_ops);
+ free(new_ops);
}
static int
diff --git a/lib/dpif-hw-acc.h b/lib/dpif-hw-acc.h
index 23bd7ec..ea76865 100644
--- a/lib/dpif-hw-acc.h
+++ b/lib/dpif-hw-acc.h
@@ -14,6 +14,16 @@ struct dpif_hw_acc {
struct hmap port_to_netdev;
struct hmap ufid_to_handle;
struct hmap handle_to_ufid;
+ struct hmap mask_to_prio;
+
+ uint16_t last_prio;
+};
+
+struct mask_prio_data {
+ struct hmap_node node;
+ char data[128];
+ size_t len;
+ uint16_t prio;
};
struct port_netdev_hash_data {
--
1.8.3.1
More information about the dev
mailing list