[ovs-dev] [PATCHv2 RFC 1/3] afxdp: add ebpf code for afxdp and xskmap.

William Tu u9012063 at gmail.com
Fri Aug 31 23:24:14 UTC 2018


AF_XDP requires attaching an xdp program and xskmap for
each netdev.  The patch provides these program/map and
the loading and attaching implementation.

Signed-off-by: William Tu <u9012063 at gmail.com>
---
 acinclude.m4      |  1 +
 bpf/api.h         |  6 ++++++
 bpf/helpers.h     |  2 ++
 bpf/maps.h        | 12 ++++++++++++
 bpf/xdp.h         | 42 +++++++++++++++++++++++++++++++++++++-----
 lib/bpf.c         | 41 +++++++++++++++++++++++++++++++++++++----
 lib/bpf.h         |  6 ++++--
 vswitchd/bridge.c |  1 +
 8 files changed, 100 insertions(+), 11 deletions(-)

diff --git a/acinclude.m4 b/acinclude.m4
index 257de4e178a8..badc1e564487 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -376,6 +376,7 @@ AC_DEFUN([OVS_CHECK_BPF], [
       AC_DEFINE([HAVE_BPF], [1],
                 [Define to 1 if BPF is available.])
       BPF_LDADD="-lbpf -lelf"
+      AC_DEFINE([AFXDP_NETDEV], [1], [System uses the AFXDP module.])
       AC_SUBST([BPF_LDADD])
   fi
 ])
diff --git a/bpf/api.h b/bpf/api.h
index f2db1f729157..15a44b744e6a 100644
--- a/bpf/api.h
+++ b/bpf/api.h
@@ -131,6 +131,12 @@
               sizeof(uint32_t), pin, __NR_CPUS__)
 #endif
 
+#ifndef BPF_XSKMAP
+# define BPF_XSKMAP(name, max_elem) \
+    __BPF_MAP(name, BPF_MAP_TYPE_XSKMAP, 0, sizeof(int), \
+              sizeof(int), 1, max_elem)
+#endif
+
 /** Classifier helper */
 
 #ifndef BPF_H_DEFAULT
diff --git a/bpf/helpers.h b/bpf/helpers.h
index fc4c4933e189..424cc06bd6aa 100644
--- a/bpf/helpers.h
+++ b/bpf/helpers.h
@@ -163,6 +163,8 @@ static int (*bpf_skb_change_tail)(void *ctx, int len, int flags) =
     (void *) BPF_FUNC_skb_change_tail;
 static int (*bpf_get_hash_recalc)(void *ctx) =
     (void *) BPF_FUNC_get_hash_recalc;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+    (void *) BPF_FUNC_redirect_map;
 
 static int OVS_UNUSED vlan_push(void *ctx, ovs_be16 proto, u16 tci)
 {
diff --git a/bpf/maps.h b/bpf/maps.h
index d0e39c79a098..63953f3b045f 100644
--- a/bpf/maps.h
+++ b/bpf/maps.h
@@ -153,6 +153,18 @@ BPF_PERCPU_ARRAY(percpu_executing_key,
         1
 );
 
+/* af_xdp map:
+ * key can be anything by our design,
+ * value is the receive queue id the userspace
+ * program received from
+ * we need one map per device
+ * the only parameter is the number of queues
+ */
+BPF_XSKMAP(xsks_map0, 4);
+BPF_XSKMAP(xsks_map1, 4);
+BPF_XSKMAP(xsks_map2, 4);
+BPF_XSKMAP(xsks_map3, 4);
+
 struct ebpf_headers_t;
 struct ebpf_metadata_t;
 
diff --git a/bpf/xdp.h b/bpf/xdp.h
index 15c379e7f43c..19c9021cf773 100644
--- a/bpf/xdp.h
+++ b/bpf/xdp.h
@@ -68,10 +68,42 @@ static int xdp_ingress(struct xdp_md *ctx OVS_UNUSED)
 #endif
 }
 
-__section("af_xdp")
-static int af_xdp_ingress(struct xdp_md *ctx OVS_UNUSED)
-{
-    /* TODO: see xdpsock_kern.c ans xdpsock_user.c */
-    return XDP_PASS;
+#ifdef DEBUG
+#define AFXDP_REDIRECT(xskmap) { \
+    int idx = 0; \
+    int flags = 0; \
+    int len = (long)ctx->data_end - (long)ctx->data; \
+    printt("ingress_ifindex %d rx_queue_index %d pkt len %d\n", \
+            ctx->ingress_ifindex, ctx->rx_queue_index, len); \
+    printt("send to queue xsk queue 0\n"); \
+    return bpf_redirect_map(xskmap, idx, flags); \
+}
+#else
+#define AFXDP_REDIRECT(xskmap) { \
+    int idx = 0; \
+    int flags = 0; \
+    return bpf_redirect_map(xskmap, idx, flags); \
 }
+#endif
 
+/* For AFXDP, we need one map and one afxdp program per netdev */
+__section("afxdp0")
+static int af_xdp_ingress0(struct xdp_md *ctx OVS_UNUSED)
+{
+    AFXDP_REDIRECT(&xsks_map0);
+}
+__section("afxdp1")
+static int af_xdp_ingress1(struct xdp_md *ctx OVS_UNUSED)
+{
+    AFXDP_REDIRECT(&xsks_map1);
+}
+__section("afxdp2")
+static int af_xdp_ingress2(struct xdp_md *ctx OVS_UNUSED)
+{
+    AFXDP_REDIRECT(&xsks_map2);
+}
+__section("afxdp3")
+static int af_xdp_ingress3(struct xdp_md *ctx OVS_UNUSED)
+{
+    AFXDP_REDIRECT(&xsks_map3);
+}
diff --git a/lib/bpf.c b/lib/bpf.c
index 48c677e54659..d59ed1bf1e65 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -174,6 +174,7 @@ bpf_format_state(struct ds *ds, struct bpf_state *state)
     bpf_format_prog(ds, &state->egress);
     bpf_format_prog(ds, &state->ingress);
     bpf_format_prog(ds, &state->xdp);
+    //bpf_format_prog(ds, &state->afxdp);
 }
 
 /* Populates 'state' with the standard set of programs and maps for openvswitch
@@ -194,6 +195,10 @@ bpf_get(struct bpf_state *state, bool verbose)
         {&state->egress.fd, "egress/0"},
         {&state->downcall.fd, "downcall/0"},
         {&state->xdp.fd, "xdp/0"},
+        {&state->afxdp[0].fd, "afxdp0/0"},
+        {&state->afxdp[1].fd, "afxdp1/0"},
+        {&state->afxdp[2].fd, "afxdp2/0"},
+        {&state->afxdp[3].fd, "afxdp3/0"},
         /* BPF Maps */
         {&state->upcalls.fd, "upcalls"},
         {&state->flow_table.fd, "flow_table"},
@@ -201,6 +206,10 @@ bpf_get(struct bpf_state *state, bool verbose)
         {&state->tailcalls.fd, "tailcalls"},
         {&state->execute_actions.fd, "execute_actions"},
         {&state->dp_flow_stats.fd, "dp_flow_stats"},
+        {&state->xsks_map[0].fd, "xsks_map0"},
+        {&state->xsks_map[1].fd, "xsks_map1"},
+        {&state->xsks_map[2].fd, "xsks_map2"},
+        {&state->xsks_map[3].fd, "xsks_map3"},
     };
     int i, k, error = 0;
     char buf[BUFSIZ];
@@ -217,7 +226,7 @@ bpf_get(struct bpf_state *state, bool verbose)
         }
         error = bpf_obj_get(buf);
         if (error > 0) {
-            VLOG_DBG("Loaded BPF object at %s fd %d", buf, error);
+            VLOG_INFO("Loaded BPF object at %s fd %d", buf, error);
             *objs[i].fd = error;
             error = 0;
             continue;
@@ -229,7 +238,7 @@ bpf_get(struct bpf_state *state, bool verbose)
 
     prog_array_fd = state->tailcalls.fd;
 
-    VLOG_DBG("start loading/pinning program array\n");
+    VLOG_INFO("start loading/pinning program array\n");
     for (k = 0; k < BPF_MAX_PROG_ARRAY; k++) {
         struct stat s;
         int prog_fd;
@@ -243,7 +252,7 @@ bpf_get(struct bpf_state *state, bool verbose)
 
         prog_fd = bpf_obj_get(buf);
         if (prog_fd > 0) {
-            VLOG_DBG("Loaded BPF object at %s", buf);
+            VLOG_INFO("Loaded BPF object at %s", buf);
             state->tailarray[k].fd = prog_fd;
             error = bpf_map_update_elem(prog_array_fd, &k, &prog_fd, BPF_ANY);
             if (error < 0) {
@@ -280,9 +289,17 @@ bpf_get(struct bpf_state *state, bool verbose)
         state->downcall.name = xstrdup("ovs_cls_downcall");
         state->upcalls.name = xstrdup("upcalls");
         state->xdp.name = xstrdup("xdp");
+        state->afxdp[0].name = xstrdup("afxdp0");
+        state->afxdp[1].name = xstrdup("afxdp1");
+        state->afxdp[2].name = xstrdup("afxdp2");
+        state->afxdp[3].name = xstrdup("afxdp3");
         state->flow_table.name = xstrdup("flow_table");
         state->datapath_stats.name = xstrdup("datapath_stats");
         state->dp_flow_stats.name = xstrdup("dp_flow_stats");
+        state->xsks_map[0].name = xstrdup("xsks_map0");
+        state->xsks_map[1].name = xstrdup("xsks_map1");
+        state->xsks_map[2].name = xstrdup("xsks_map2");
+        state->xsks_map[3].name = xstrdup("xsks_map3");
         // add parser, lookup, action, deparser
         state->tailcalls.name = xstrdup("tailcalls");
 
@@ -309,17 +326,33 @@ bpf_put(struct bpf_state *state)
     xclose(state->downcall.fd, state->downcall.name);
     xclose(state->upcalls.fd, state->upcalls.name);
     xclose(state->xdp.fd, state->xdp.name);
+    xclose(state->afxdp[0].fd, state->afxdp[0].name);
+    xclose(state->afxdp[1].fd, state->afxdp[1].name);
+    xclose(state->afxdp[2].fd, state->afxdp[2].name);
+    xclose(state->afxdp[3].fd, state->afxdp[3].name);
     xclose(state->flow_table.fd, "ovs_map_flow_table");
     xclose(state->datapath_stats.fd, "ovs_datapath_stats");
     xclose(state->dp_flow_stats.fd, state->dp_flow_stats.name);
+    xclose(state->xsks_map[0].fd, state->xsks_map[0].name);
+    xclose(state->xsks_map[1].fd, state->xsks_map[1].name);
+    xclose(state->xsks_map[2].fd, state->xsks_map[2].name);
+    xclose(state->xsks_map[3].fd, state->xsks_map[3].name);
     free((void *)state->ingress.name);
     free((void *)state->egress.name);
     free((void *)state->downcall.name);
     free((void *)state->upcalls.name);
     free((void *)state->xdp.name);
+    free((void *)state->afxdp[0].name);
+    free((void *)state->afxdp[1].name);
+    free((void *)state->afxdp[2].name);
+    free((void *)state->afxdp[3].name);
     free((void *)state->flow_table.name);
     free((void *)state->datapath_stats.name);
     free((void *)state->dp_flow_stats.name);
+    free((void *)state->xsks_map[0].name);
+    free((void *)state->xsks_map[1].name);
+    free((void *)state->xsks_map[2].name);
+    free((void *)state->xsks_map[3].name);
 }
 
 static void
@@ -335,7 +368,7 @@ process(struct bpf_object *obj)
         int error;
 
         VLOG_DBG(" - %s\n",  title);
-        if (strstr(title, "xdp")) {
+        if (strstr(title, "xdp")) { /* handle both xdp and afxdp */
             error = bpf_program__set_xdp(prog);
         } else {
             error = bpf_program__set_sched_cls(prog); // or sched_act?
diff --git a/lib/bpf.h b/lib/bpf.h
index 4b5afaf4f77f..69091aa640d3 100644
--- a/lib/bpf.h
+++ b/lib/bpf.h
@@ -38,6 +38,7 @@ struct bpf_map {
 struct bpf_state;
 struct ds;
 
+#define MAX_AFXDP_DEV 4 /* Max number of supported AFXDP netdev */
 #define BPF_MAX_PROG_ARRAY 64
 struct bpf_state {
     /* File descriptors for programs. */
@@ -46,14 +47,15 @@ struct bpf_state {
     struct bpf_prog downcall;           /* BPF_PROG_TYPE_SCHED_CLS */
     struct bpf_prog tailarray[BPF_MAX_PROG_ARRAY];
     struct bpf_prog xdp;                /* BPF_PROG_TYPE_XDP */
-    // william: struct bpf_prog parser, deparser, action,
-
+    struct bpf_prog afxdp[MAX_AFXDP_DEV];  /* BPF_PROG_TYPE_XDP:
+                                           each netdev need one */
     struct bpf_map upcalls;             /* BPF_MAP_TYPE_PERF_ARRAY */
     struct bpf_map flow_table;          /* BPF_MAP_TYPE_HASH */
     struct bpf_map datapath_stats;      /* BPF_MAP_TYPE_ARRAY */
     struct bpf_map tailcalls;           /* BPF_PROG_TYPE_PROG_ARRARY */
     struct bpf_map execute_actions;     /* BPF_MAP_TYPE_ARRAY */
     struct bpf_map dp_flow_stats;       /* BPF_MAP_TYPE_HASH */
+    struct bpf_map xsks_map[MAX_AFXDP_DEV];     /* BPF_MAP_TYPE_XSKMAP */
 };
 
 int bpf_get(struct bpf_state *state, bool verbose);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index ca6d73810420..56711c657dd4 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -518,6 +518,7 @@ init_ebpf(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED)
     if (ovsthread_once_start(&once)) {
         char *bpf_elf = xasprintf("%s/bpf/datapath.o", ovs_pkgdatadir());
 
+        VLOG_DBG("%s bpf elf: %s", __func__, bpf_elf);
         error = bpf_init();
         if (!error) {
             error = bpf_load(bpf_elf);
-- 
2.7.4



More information about the dev mailing list