[ovs-dev] [RFC PATCH 04/11] lib/bpf: add support for managing bpf program/map.

William Tu u9012063 at gmail.com
Sat Jun 23 12:16:36 UTC 2018


From: Joe Stringer <joe at ovn.org>

Through libbpf, the patch adds support for loading bpf program
and maps, pinning the program and map to /sys/fs/bpf/ovs/, managing
the file descriptor of each loaded map, and printting.

Signed-off-by: Joe Stringer <joe at ovn.org>
Co-authored-by: William Tu <u9012063 at gmail.com>
Co-authored-by: Yifeng Sun <pkusunyifeng at gmail.com>
---
 lib/bpf.c | 524 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/bpf.h |  69 +++++++++
 2 files changed, 593 insertions(+)
 create mode 100644 lib/bpf.c
 create mode 100644 lib/bpf.h

diff --git a/lib/bpf.c b/lib/bpf.c
new file mode 100644
index 000000000000..48c677e54659
--- /dev/null
+++ b/lib/bpf.c
@@ -0,0 +1,524 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/bpf.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <iproute2/bpf_elf.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <sys/resource.h>
+
+#include "bpf.h"
+#include "bpf/odp-bpf.h"
+#include "util.h"
+#include "openvswitch/dynamic-string.h"
+#include "openvswitch/vlog.h"
+
+#define BPF_FS_PATH "/sys/fs/bpf/ovs/"
+static const char *ovs_bpf_path = BPF_FS_PATH;
+
+#define MAX_BPF_PROG_ARRAY 64 //FIXME
+VLOG_DEFINE_THIS_MODULE(bpf);
+
+static void
+bpf_format_prog(struct ds *ds, const struct bpf_prog *prog)
+{
+    ds_put_format(ds, "    %s:\n", prog->name);
+    ds_put_format(ds, "        handle: %08"PRIx32"\n", prog->handle);
+}
+
+typedef void map_element_writer_t(struct ds *, uint64_t, void *);
+
+static void
+format_dp_stats(struct ds *ds, uint64_t key, void *value_)
+{
+    uint64_t value = *(uint64_t *)value_;
+
+    switch (key) {
+    case OVS_DP_STATS_UNSPEC:
+        while (ds_chomp(ds, ' ')) {
+            /* nom nom nom */
+        }
+        break;
+    case OVS_DP_STATS_HIT:
+        ds_put_cstr(ds, "hit");
+        break;
+    case OVS_DP_STATS_MISSED:
+        ds_put_cstr(ds, "missed");
+        break;
+    case OVS_DP_STATS_LOST:
+        ds_put_cstr(ds, "lost");
+        break;
+    case OVS_DP_STATS_FLOWS:
+        ds_put_cstr(ds, "flows");
+        break;
+    case OVS_DP_STATS_MASK_HIT:
+        ds_put_cstr(ds, "masks_hit");
+        break;
+    case OVS_DP_STATS_MASKS:
+        ds_put_cstr(ds, "masks");
+        break;
+    case OVS_DP_STATS_ERRORS:
+        ds_put_cstr(ds, "errors");
+        break;
+    default:
+        ds_put_format(ds, "unknown-%"PRIu64, key);
+        break;
+    }
+    if (key) {
+        ds_put_format(ds, ": %"PRIu64"\n", value);
+    }
+}
+
+static void
+format_upcalls(struct ds *ds, uint64_t key, void *value OVS_UNUSED)
+{
+    ds_put_format(ds, "cpu-%"PRIu64"\n", key);
+}
+
+static void
+format_tailcalls(struct ds *ds, uint64_t key, void *value_)
+{
+    uint32_t value = *(uint32_t *)value_;
+    ds_put_format(ds, "index-%"PRIu64"prog_fd-%d\n", key, value);
+}
+
+static int
+lookup_elem(int fd, void *key, size_t key_len, void *value)
+{
+    int err = bpf_map_lookup_elem(fd, (uint64_t *)key, (uint64_t *)value);
+    if (err) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+
+        ds_put_cstr(&ds, "error occurred looking up elem ");
+        ds_put_hex(&ds, key, key_len);
+        ds_put_format(&ds, ": %s", ovs_strerror(errno));
+        VLOG_DBG("%s", ds_cstr(&ds));
+        ds_destroy(&ds);
+    }
+
+    return err;
+}
+
+#define MAP_FORMAT_FUNC(NAME, KTYPE, VTYPE, PRINT_COUNT)                \
+    static void NAME(struct ds *ds, const struct bpf_map *map,          \
+                     map_element_writer_t fmt)                          \
+    {                                                                   \
+        KTYPE key = 0;                                                  \
+        VTYPE value;                                                    \
+        int count = 0;                                                  \
+                                                                        \
+        VLOG_DBG("reading map %s", map->name);                          \
+        ds_put_format(ds, "    %s:\n", map->name);                      \
+        if (!lookup_elem(map->fd, &key, sizeof key, &value)) {          \
+            count++;                                                    \
+            if (fmt) {                                                  \
+                ds_put_cstr(ds, "        ");                            \
+                fmt(ds, key, &value);                                   \
+            }                                                           \
+        }                                                               \
+        while (!bpf_map_get_next_key(map->fd, &key, &key)) {            \
+            count++;                                                    \
+            if (fmt) {                                                  \
+                if (!lookup_elem(map->fd, &key, sizeof key, &value)) {  \
+                    ds_put_cstr(ds, "        ");                        \
+                    fmt(ds, key, &value);                               \
+                }                                                       \
+            }                                                           \
+        };                                                              \
+        if (PRINT_COUNT) {                                              \
+            ds_put_format(ds, "        count: %d\n", count);            \
+        }                                                               \
+    }
+
+MAP_FORMAT_FUNC(bpf_format_map_stats, uint64_t, uint64_t, false);
+MAP_FORMAT_FUNC(bpf_format_map_flows, uint64_t, struct bpf_flow, true);
+MAP_FORMAT_FUNC(bpf_format_map_upcalls, uint32_t, uint32_t, true);
+MAP_FORMAT_FUNC(bpf_format_map_tailcalls, uint32_t, uint32_t, true);//FIXME
+//MAP_FORMAT_FUNC(bpf_format_map_dp_flow_stats,
+
+void
+bpf_format_state(struct ds *ds, struct bpf_state *state)
+{
+    ds_put_format(ds, "path: %s\n", ovs_bpf_path);
+    ds_put_cstr(ds, "maps:\n");
+    bpf_format_map_stats(ds, &state->datapath_stats, format_dp_stats);
+    bpf_format_map_flows(ds, &state->flow_table, NULL);
+    bpf_format_map_upcalls(ds, &state->upcalls, format_upcalls);
+    bpf_format_map_tailcalls(ds, &state->tailcalls, format_tailcalls);
+    //bpf_format_map_dp_flow_stats(ds, &state->dp_flow_stats, NULL);
+    ds_put_cstr(ds, "programs:\n");
+    bpf_format_prog(ds, &state->downcall);
+    bpf_format_prog(ds, &state->egress);
+    bpf_format_prog(ds, &state->ingress);
+    bpf_format_prog(ds, &state->xdp);
+}
+
+/* Populates 'state' with the standard set of programs and maps for openvswitch
+ * datapath as sourced from pinned programs at ovs_bpf_path.
+ *
+ * Returns 0 on success, or positive errno on error. If successful, the caller
+ * is resposible for releasing the resources in 'state' via bpf_put().
+ */
+int
+bpf_get(struct bpf_state *state, bool verbose)
+{
+    const struct {
+        int *fd;
+        const char *path;
+    } objs[] = {
+        /* BPF Programs */
+        {&state->ingress.fd, "ingress/0"},
+        {&state->egress.fd, "egress/0"},
+        {&state->downcall.fd, "downcall/0"},
+        {&state->xdp.fd, "xdp/0"},
+        /* BPF Maps */
+        {&state->upcalls.fd, "upcalls"},
+        {&state->flow_table.fd, "flow_table"},
+        {&state->datapath_stats.fd, "datapath_stats"},
+        {&state->tailcalls.fd, "tailcalls"},
+        {&state->execute_actions.fd, "execute_actions"},
+        {&state->dp_flow_stats.fd, "dp_flow_stats"},
+    };
+    int i, k, error = 0;
+    char buf[BUFSIZ];
+    int prog_array_fd;
+
+    for (i = 0; i < ARRAY_SIZE(objs); i++) {
+        struct stat s;
+
+        //Failed to load /sys/fs/bpf/ovs/progs/ingress_0:
+        snprintf(buf, ARRAY_SIZE(buf), "%s/%s", ovs_bpf_path, objs[i].path);
+        if (stat(buf, &s)) {
+            error = errno;
+            break;
+        }
+        error = bpf_obj_get(buf);
+        if (error > 0) {
+            VLOG_DBG("Loaded BPF object at %s fd %d", buf, error);
+            *objs[i].fd = error;
+            error = 0;
+            continue;
+        } else {
+            error = errno;
+            break;
+        }
+    }
+
+    prog_array_fd = state->tailcalls.fd;
+
+    VLOG_DBG("start loading/pinning program array\n");
+    for (k = 0; k < BPF_MAX_PROG_ARRAY; k++) {
+        struct stat s;
+        int prog_fd;
+
+        state->tailarray[k].fd = 0;
+
+        snprintf(buf, ARRAY_SIZE(buf), "%s/tail-%d/0", ovs_bpf_path, k);
+        if (stat(buf, &s)) {
+            continue;
+        }
+
+        prog_fd = bpf_obj_get(buf);
+        if (prog_fd > 0) {
+            VLOG_DBG("Loaded BPF object at %s", buf);
+            state->tailarray[k].fd = prog_fd;
+            error = bpf_map_update_elem(prog_array_fd, &k, &prog_fd, BPF_ANY);
+            if (error < 0) {
+                VLOG_ERR("Can not add %s into BPF_MAP_PROG_ARRAY\n", buf);
+                break;
+            }
+        } else {
+            error = errno;
+            break;
+        }
+    }
+
+    if (error) {
+        VLOG(verbose ? VLL_WARN : VLL_DBG, "Failed to load %s: %s",
+             buf, ovs_strerror(error));
+
+        for (int j = 0; j < i; j++) {
+            close(*objs[j].fd);
+            *objs[j].fd = 0;
+        }
+
+        for (int j = 0; j < BPF_MAX_PROG_ARRAY; j++) {
+            if (state->tailarray[j].fd)
+                close(state->tailarray[j].fd);
+        }
+    }
+
+    if (!error) {
+        state->ingress.handle = INGRESS_HANDLE;
+        state->ingress.name = xstrdup("ovs_cls_ingress");
+        state->egress.handle = EGRESS_HANDLE;
+        state->egress.name = xstrdup("ovs_cls_egress");
+        state->downcall.handle = INGRESS_HANDLE;
+        state->downcall.name = xstrdup("ovs_cls_downcall");
+        state->upcalls.name = xstrdup("upcalls");
+        state->xdp.name = xstrdup("xdp");
+        state->flow_table.name = xstrdup("flow_table");
+        state->datapath_stats.name = xstrdup("datapath_stats");
+        state->dp_flow_stats.name = xstrdup("dp_flow_stats");
+        // add parser, lookup, action, deparser
+        state->tailcalls.name = xstrdup("tailcalls");
+
+    }
+
+    return error;
+}
+
+static void
+xclose(int fd, const char *name)
+{
+    int error = close(fd);
+    if (error) {
+        VLOG_WARN("Failed to close BPF fd %s: %s", name, ovs_strerror(errno));
+    }
+}
+
+/* Frees resources allocated by bpf_put(). */
+void
+bpf_put(struct bpf_state *state)
+{
+    xclose(state->ingress.fd, state->ingress.name);
+    xclose(state->egress.fd, state->egress.name);
+    xclose(state->downcall.fd, state->downcall.name);
+    xclose(state->upcalls.fd, state->upcalls.name);
+    xclose(state->xdp.fd, state->xdp.name);
+    xclose(state->flow_table.fd, "ovs_map_flow_table");
+    xclose(state->datapath_stats.fd, "ovs_datapath_stats");
+    xclose(state->dp_flow_stats.fd, state->dp_flow_stats.name);
+    free((void *)state->ingress.name);
+    free((void *)state->egress.name);
+    free((void *)state->downcall.name);
+    free((void *)state->upcalls.name);
+    free((void *)state->xdp.name);
+    free((void *)state->flow_table.name);
+    free((void *)state->datapath_stats.name);
+    free((void *)state->dp_flow_stats.name);
+}
+
+static void
+process(struct bpf_object *obj)
+{
+    struct bpf_program *prog;
+    struct bpf_map *map;
+
+    VLOG_DBG("Opened object '%s'\n", bpf_object__name(obj));
+    VLOG_DBG("Programs:\n");
+    bpf_object__for_each_program(prog, obj) {
+        const char *title = bpf_program__title(prog, false);
+        int error;
+
+        VLOG_DBG(" - %s\n",  title);
+        if (strstr(title, "xdp")) {
+            error = bpf_program__set_xdp(prog);
+        } else {
+            error = bpf_program__set_sched_cls(prog); // or sched_act?
+        }
+        if (error) {
+            VLOG_WARN("Failed to set '%s' prog type: %s\n", title,
+                     ovs_strerror(error));
+        }
+
+    }
+
+    if (VLOG_IS_DBG_ENABLED()) {
+        VLOG_DBG("Maps:\n");
+        bpf_map__for_each(map, obj) {
+            const char *name = bpf_map__name(map);
+            VLOG_DBG(" - %s\n", name);
+        }
+    }
+}
+
+/* Attempts to load the BPF datapath in the form of an ELF compiled for the BPF
+ * ISA in 'path', install it into the kernel, and pin it to the filesystem
+ * under ovs_bpf_path/{maps,progs}/foo.
+ *
+ * Returns 0 on success, or positive errno on error.
+ */
+int
+bpf_load(const char *path)
+{
+    const char *stage = NULL;
+    struct bpf_state state;
+    struct bpf_object *obj;
+    long error;
+    struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+
+    if ((error = setrlimit(RLIMIT_MEMLOCK, &r))) {
+        VLOG_ERR("Failed to set rlimit %s", ovs_strerror(error));
+        return error;
+    }
+
+    if (!bpf_get(&state, false)) {
+        /* XXX: Restart; Upgrade */
+        VLOG_INFO("Re-using preloaded BPF datapath");
+        bpf_put(&state);
+        return 0;
+    }
+
+    obj = bpf_object__open(path);
+    error = libbpf_get_error(obj);
+    if (error) {
+        stage = "open";
+        goto out;
+    }
+    process(obj);
+    error = bpf_object__load(obj);
+    if (error) {
+        stage = "load";
+        goto close;
+    }
+    error = bpf_object__pin(obj, ovs_bpf_path);
+    if (error) {
+        stage = "pin";
+        goto close;
+    }
+
+    error = bpf_object__unload(obj);
+    if (error) {
+        stage = "unload";
+        goto close;
+    }
+
+close:
+    bpf_object__close(obj);
+out:
+    if (error < 0) {
+        error = -error;
+    } else if (!error) {
+        VLOG_DBG("Loaded BPF datapath from %s", path);
+    }
+    if (error > __LIBBPF_ERRNO__START && error < __LIBBPF_ERRNO__END) {
+        char buf[BUFSIZ];
+
+        libbpf_strerror(error, buf, ARRAY_SIZE(buf));
+        VLOG_WARN("Failed to %s BPF datapath: %s\n", stage ? stage : "", buf);
+        error = EINVAL;
+    }
+    return error;
+}
+
+#define PRINT_FN(NAME)                                  \
+static int                                              \
+print_##NAME(const char *fmt, ...)                      \
+{                                                       \
+    va_list args;                                       \
+                                                        \
+    va_start(args, fmt);                                \
+    vlog_valist(&this_module, VLL_##NAME, fmt, args);   \
+    va_end(args);                                       \
+    return 0;                                           \
+}
+
+PRINT_FN(WARN);
+PRINT_FN(INFO);
+PRINT_FN(DBG);
+
+#define stringize(x) #x
+
+static int OVS_UNUSED
+mount_bpf(void)
+{
+    struct statfs st_fs;
+    char path[PATH_MAX];
+    char type[NAME_MAX];
+    int err = 0;
+    FILE *fp;
+    int idx;
+
+    fp = fopen("/proc/mounts", "r");
+    if (fp) {
+        const char *fmt;
+        int match;
+
+        fmt = "%*s %"stringize(PATH_MAX)"s %#"stringize(NAME_MAX)"s %*s\n";
+        for (match = 0; match != EOF; match = fscanf(fp, fmt, path, type)) {
+            if (match == 2 && !strcmp(type, "bpf"))
+                break;
+        }
+        if (fclose(fp)) {
+            err = errno;
+            VLOG_INFO("Failed to close /proc/mounts: %s", ovs_strerror(err));
+        }
+        if (strcmp(type, "bpf")) {
+            err = errno;
+            VLOG_DBG("Couldn't find bpf mountpoint in /proc/mounts");
+        }
+    } else {
+        err = errno;
+        VLOG_INFO("Cannot open /proc/mounts: %s", ovs_strerror(err));
+    }
+    if (err || strlen(path) == 0) {
+        VLOG_DBG("Using %s for BPF filesystem mountpoint", BPF_FS_PATH);
+        strcpy(path, BPF_FS_PATH);
+    }
+
+    if (!statfs(path, &st_fs) && st_fs.f_type == BPF_FS_MAGIC) {
+        VLOG_INFO("BPF filesystem already mounted to %s", path);
+        return 0;
+    }
+
+    if (mkdir(path, 0755) && errno != EEXIST) {
+        VLOG_WARN("Failed to create %s: %s", path, ovs_strerror(errno));
+        return errno;
+    }
+
+    if (mount("bpf", path, "bpf", 0, NULL)) {
+        VLOG_WARN("Failed to mount BPF filesystem: %s", ovs_strerror(errno));
+        return errno;
+    }
+
+    idx = strlen(path);
+    if (idx >= PATH_MAX - strlen("/ovs")) {
+        VLOG_WARN("BPF filesystem path \"%s\" is too long.", path);
+        return ENAMETOOLONG;
+    } else {
+        strncpy(&path[idx], "/ovs", strlen("/ovs"));
+    }
+
+    if (mkdir(path, 0755) && errno != EEXIST) {
+        VLOG_WARN("Failed to create %s: %s", path, ovs_strerror(errno));
+        return errno;
+    }
+
+    if (ovs_bpf_path) {
+        free(CONST_CAST(char *, ovs_bpf_path));
+    }
+    ovs_bpf_path = xstrdup(path);
+    return 0;
+}
+
+int
+bpf_init(void)
+{
+    libbpf_set_print(print_WARN, print_INFO, print_DBG);
+    /* skip using mount_bpf */
+    return 0;
+}
diff --git a/lib/bpf.h b/lib/bpf.h
new file mode 100644
index 000000000000..4b5afaf4f77f
--- /dev/null
+++ b/lib/bpf.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIB_BPF_H
+#define LIB_BPF_H 1
+
+#include <errno.h>
+#include "openvswitch/compiler.h"
+
+#define INGRESS_HANDLE     0xFFFFFFF2
+#define EGRESS_HANDLE      0xFFFFFFF3
+
+struct bpf_prog {
+    const char *name;
+    uint32_t handle;            /* tc handle */
+    int fd;
+};
+
+struct bpf_map {
+    const char *name;
+    int fd;
+};
+
+#if HAVE_BPF
+struct bpf_state;
+struct ds;
+
+#define BPF_MAX_PROG_ARRAY 64
+struct bpf_state {
+    /* File descriptors for programs. */
+    struct bpf_prog ingress;            /* BPF_PROG_TYPE_SCHED_CLS */
+    struct bpf_prog egress;             /* BPF_PROG_TYPE_SCHED_CLS */
+    struct bpf_prog downcall;           /* BPF_PROG_TYPE_SCHED_CLS */
+    struct bpf_prog tailarray[BPF_MAX_PROG_ARRAY];
+    struct bpf_prog xdp;                /* BPF_PROG_TYPE_XDP */
+    // william: struct bpf_prog parser, deparser, action,
+
+    struct bpf_map upcalls;             /* BPF_MAP_TYPE_PERF_ARRAY */
+    struct bpf_map flow_table;          /* BPF_MAP_TYPE_HASH */
+    struct bpf_map datapath_stats;      /* BPF_MAP_TYPE_ARRAY */
+    struct bpf_map tailcalls;           /* BPF_PROG_TYPE_PROG_ARRARY */
+    struct bpf_map execute_actions;     /* BPF_MAP_TYPE_ARRAY */
+    struct bpf_map dp_flow_stats;       /* BPF_MAP_TYPE_HASH */
+};
+
+int bpf_get(struct bpf_state *state, bool verbose);
+void bpf_put(struct bpf_state *state);
+int bpf_load(const char *path);
+int bpf_init(void);
+void bpf_format_state(struct ds *ds, struct bpf_state *state);
+#else /* !HAVE_BPF */
+static inline int bpf_load(const char *path OVS_UNUSED) { return EOPNOTSUPP; }
+static inline int bpf_init(void) { return 0; }
+#endif /* HAVE_BPF */
+
+#endif /* LIB_BPF_H */
-- 
2.7.4



More information about the dev mailing list