[ovs-dev] [PATCH v2] lib: Introduce netlink-devlink library

Frode Nordahl frode.nordahl at canonical.com
Thu May 6 10:28:40 UTC 2021


The devlink interface was introduced [0] in the Linux 4.6 time
frame and has since gained traction among multiple hardware
vendors.

The devlink-port [1] and devlink-info[2] interfaces are
particularly useful for managing NICs connected to multiple
distinct CPUs such as SmartNICs.

In such a topology it would be useful to be able to offload
Open vSwitch and OVN onto the NIC SoC operating system and this
library will help with discovering and managing ports representing
resources made available on the host from the NIC SoC side.

The library will be consumed by upcoming proposed changes to OVN,
and I think it makes sense to maintain it together with the Open
vSwitch netlink library code. The proposed changes are also
referenced in specifications proposed to the OpenStack project [3][4].

0: https://lore.kernel.org/netdev/1456504351-18871-1-git-send-email-jiri@resnulli.us/
1: https://github.com/torvalds/linux/blob/master/Documentation/networking/devlink/devlink-port.rst
2: https://github.com/torvalds/linux/blob/master/Documentation/networking/devlink/devlink-info.rst
3: https://review.opendev.org/c/openstack/nova-specs/+/787458
4: https://review.opendev.org/c/openstack/neutron-specs/+/788821
Signed-off-by: Frode Nordahl <frode.nordahl at canonical.com>
---
 include/linux/automake.mk         |   1 +
 include/linux/devlink.h           | 625 ++++++++++++++++++++++++++++++
 include/openvswitch/types.h       |   8 +
 lib/automake.mk                   |   2 +
 lib/netlink-devlink.c             | 499 ++++++++++++++++++++++++
 lib/netlink-devlink.h             | 114 ++++++
 lib/netlink.c                     |  16 +
 lib/netlink.h                     |   5 +
 lib/packets.h                     |   1 +
 tests/.gitignore                  |   3 +
 tests/automake.mk                 |  16 +
 tests/system-devlink-info.at      |   9 +
 tests/system-devlink-port.at      |  12 +
 tests/system-devlink-testsuite.at |  38 ++
 utilities/.gitignore              |   1 +
 utilities/automake.mk             |   3 +
 utilities/devlink.c               | 246 ++++++++++++
 17 files changed, 1599 insertions(+)
 create mode 100644 include/linux/devlink.h
 create mode 100644 lib/netlink-devlink.c
 create mode 100644 lib/netlink-devlink.h
 create mode 100644 tests/system-devlink-info.at
 create mode 100644 tests/system-devlink-port.at
 create mode 100644 tests/system-devlink-testsuite.at
 create mode 100644 utilities/devlink.c

diff --git a/include/linux/automake.mk b/include/linux/automake.mk
index 8f063f482..8718f980d 100644
--- a/include/linux/automake.mk
+++ b/include/linux/automake.mk
@@ -1,4 +1,5 @@
 noinst_HEADERS += \
+	include/linux/devlink.h \
 	include/linux/netlink.h \
 	include/linux/netfilter/nf_conntrack_sctp.h \
 	include/linux/pkt_cls.h \
diff --git a/include/linux/devlink.h b/include/linux/devlink.h
new file mode 100644
index 000000000..28ea92b62
--- /dev/null
+++ b/include/linux/devlink.h
@@ -0,0 +1,625 @@
+/*
+ * The kernel devlink interface has gained a number of additions in recent
+ * kernel versions. To allow Open vSwitch to consume these interfaces in its
+ * runtime environment regardless of what kernel version was available at build
+ * time, and also avoiding an elaborate set of autoconf macros to check for
+ * presence of individual pieces, we include the entire file here.
+ *
+ * Source:
+ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/linux/devlink.h @ a556dded9c23c51c82654f1ebe389cbc0bc22057 */
+#if !defined(__KERNEL__)
+#ifndef __UAPI_LINUX_DEVLINK_WRAPPER_H
+#define __UAPI_LINUX_DEVLINK_WRAPPER_H 1
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * include/uapi/linux/devlink.h - Network physical device Netlink interface
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri at mellanox.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_DEVLINK_H_
+#define _UAPI_LINUX_DEVLINK_H_
+
+#include <linux/const.h>
+
+#define DEVLINK_GENL_NAME "devlink"
+#define DEVLINK_GENL_VERSION 0x1
+#define DEVLINK_GENL_MCGRP_CONFIG_NAME "config"
+
+enum devlink_command {
+	/* don't change the order or add anything between, this is ABI! */
+	DEVLINK_CMD_UNSPEC,
+
+	DEVLINK_CMD_GET,		/* can dump */
+	DEVLINK_CMD_SET,
+	DEVLINK_CMD_NEW,
+	DEVLINK_CMD_DEL,
+
+	DEVLINK_CMD_PORT_GET,		/* can dump */
+	DEVLINK_CMD_PORT_SET,
+	DEVLINK_CMD_PORT_NEW,
+	DEVLINK_CMD_PORT_DEL,
+
+	DEVLINK_CMD_PORT_SPLIT,
+	DEVLINK_CMD_PORT_UNSPLIT,
+
+	DEVLINK_CMD_SB_GET,		/* can dump */
+	DEVLINK_CMD_SB_SET,
+	DEVLINK_CMD_SB_NEW,
+	DEVLINK_CMD_SB_DEL,
+
+	DEVLINK_CMD_SB_POOL_GET,	/* can dump */
+	DEVLINK_CMD_SB_POOL_SET,
+	DEVLINK_CMD_SB_POOL_NEW,
+	DEVLINK_CMD_SB_POOL_DEL,
+
+	DEVLINK_CMD_SB_PORT_POOL_GET,	/* can dump */
+	DEVLINK_CMD_SB_PORT_POOL_SET,
+	DEVLINK_CMD_SB_PORT_POOL_NEW,
+	DEVLINK_CMD_SB_PORT_POOL_DEL,
+
+	DEVLINK_CMD_SB_TC_POOL_BIND_GET,	/* can dump */
+	DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+	DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+	DEVLINK_CMD_SB_TC_POOL_BIND_DEL,
+
+	/* Shared buffer occupancy monitoring commands */
+	DEVLINK_CMD_SB_OCC_SNAPSHOT,
+	DEVLINK_CMD_SB_OCC_MAX_CLEAR,
+
+	DEVLINK_CMD_ESWITCH_GET,
+#define DEVLINK_CMD_ESWITCH_MODE_GET /* obsolete, never use this! */ \
+	DEVLINK_CMD_ESWITCH_GET
+
+	DEVLINK_CMD_ESWITCH_SET,
+#define DEVLINK_CMD_ESWITCH_MODE_SET /* obsolete, never use this! */ \
+	DEVLINK_CMD_ESWITCH_SET
+
+	DEVLINK_CMD_DPIPE_TABLE_GET,
+	DEVLINK_CMD_DPIPE_ENTRIES_GET,
+	DEVLINK_CMD_DPIPE_HEADERS_GET,
+	DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+	DEVLINK_CMD_RESOURCE_SET,
+	DEVLINK_CMD_RESOURCE_DUMP,
+
+	/* Hot driver reload, makes configuration changes take place. The
+	 * devlink instance is not released during the process.
+	 */
+	DEVLINK_CMD_RELOAD,
+
+	DEVLINK_CMD_PARAM_GET,		/* can dump */
+	DEVLINK_CMD_PARAM_SET,
+	DEVLINK_CMD_PARAM_NEW,
+	DEVLINK_CMD_PARAM_DEL,
+
+	DEVLINK_CMD_REGION_GET,
+	DEVLINK_CMD_REGION_SET,
+	DEVLINK_CMD_REGION_NEW,
+	DEVLINK_CMD_REGION_DEL,
+	DEVLINK_CMD_REGION_READ,
+
+	DEVLINK_CMD_PORT_PARAM_GET,	/* can dump */
+	DEVLINK_CMD_PORT_PARAM_SET,
+	DEVLINK_CMD_PORT_PARAM_NEW,
+	DEVLINK_CMD_PORT_PARAM_DEL,
+
+	DEVLINK_CMD_INFO_GET,		/* can dump */
+
+	DEVLINK_CMD_HEALTH_REPORTER_GET,
+	DEVLINK_CMD_HEALTH_REPORTER_SET,
+	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+	DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+
+	DEVLINK_CMD_FLASH_UPDATE,
+	DEVLINK_CMD_FLASH_UPDATE_END,		/* notification only */
+	DEVLINK_CMD_FLASH_UPDATE_STATUS,	/* notification only */
+
+	DEVLINK_CMD_TRAP_GET,		/* can dump */
+	DEVLINK_CMD_TRAP_SET,
+	DEVLINK_CMD_TRAP_NEW,
+	DEVLINK_CMD_TRAP_DEL,
+
+	DEVLINK_CMD_TRAP_GROUP_GET,	/* can dump */
+	DEVLINK_CMD_TRAP_GROUP_SET,
+	DEVLINK_CMD_TRAP_GROUP_NEW,
+	DEVLINK_CMD_TRAP_GROUP_DEL,
+
+	DEVLINK_CMD_TRAP_POLICER_GET,	/* can dump */
+	DEVLINK_CMD_TRAP_POLICER_SET,
+	DEVLINK_CMD_TRAP_POLICER_NEW,
+	DEVLINK_CMD_TRAP_POLICER_DEL,
+
+	DEVLINK_CMD_HEALTH_REPORTER_TEST,
+
+	/* add new commands above here */
+	__DEVLINK_CMD_MAX,
+	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
+};
+
+enum devlink_port_type {
+	DEVLINK_PORT_TYPE_NOTSET,
+	DEVLINK_PORT_TYPE_AUTO,
+	DEVLINK_PORT_TYPE_ETH,
+	DEVLINK_PORT_TYPE_IB,
+};
+
+enum devlink_sb_pool_type {
+	DEVLINK_SB_POOL_TYPE_INGRESS,
+	DEVLINK_SB_POOL_TYPE_EGRESS,
+};
+
+/* static threshold - limiting the maximum number of bytes.
+ * dynamic threshold - limiting the maximum number of bytes
+ *   based on the currently available free space in the shared buffer pool.
+ *   In this mode, the maximum quota is calculated based
+ *   on the following formula:
+ *     max_quota = alpha / (1 + alpha) * Free_Buffer
+ *   While Free_Buffer is the amount of none-occupied buffer associated to
+ *   the relevant pool.
+ *   The value range which can be passed is 0-20 and serves
+ *   for computation of alpha by following formula:
+ *     alpha = 2 ^ (passed_value - 10)
+ */
+
+enum devlink_sb_threshold_type {
+	DEVLINK_SB_THRESHOLD_TYPE_STATIC,
+	DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC,
+};
+
+#define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
+
+enum devlink_eswitch_mode {
+	DEVLINK_ESWITCH_MODE_LEGACY,
+	DEVLINK_ESWITCH_MODE_SWITCHDEV,
+};
+
+enum devlink_eswitch_inline_mode {
+	DEVLINK_ESWITCH_INLINE_MODE_NONE,
+	DEVLINK_ESWITCH_INLINE_MODE_LINK,
+	DEVLINK_ESWITCH_INLINE_MODE_NETWORK,
+	DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT,
+};
+
+enum devlink_eswitch_encap_mode {
+	DEVLINK_ESWITCH_ENCAP_MODE_NONE,
+	DEVLINK_ESWITCH_ENCAP_MODE_BASIC,
+};
+
+enum devlink_port_flavour {
+	DEVLINK_PORT_FLAVOUR_PHYSICAL, /* Any kind of a port physically
+					* facing the user.
+					*/
+	DEVLINK_PORT_FLAVOUR_CPU, /* CPU port */
+	DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture
+				   * interconnect port.
+				   */
+	DEVLINK_PORT_FLAVOUR_PCI_PF, /* Represents eswitch port for
+				      * the PCI PF. It is an internal
+				      * port that faces the PCI PF.
+				      */
+	DEVLINK_PORT_FLAVOUR_PCI_VF, /* Represents eswitch port
+				      * for the PCI VF. It is an internal
+				      * port that faces the PCI VF.
+				      */
+	DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */
+	DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but
+				      * is not used in any way.
+				      */
+	DEVLINK_PORT_FLAVOUR_PCI_SF, /* Represents eswitch port
+				      * for the PCI SF. It is an internal
+				      * port that faces the PCI SF.
+				      */
+};
+
+enum devlink_param_cmode {
+	DEVLINK_PARAM_CMODE_RUNTIME,
+	DEVLINK_PARAM_CMODE_DRIVERINIT,
+	DEVLINK_PARAM_CMODE_PERMANENT,
+
+	/* Add new configuration modes above */
+	__DEVLINK_PARAM_CMODE_MAX,
+	DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1
+};
+
+enum devlink_param_fw_load_policy_value {
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_UNKNOWN,
+};
+
+enum devlink_param_reset_dev_on_drv_probe_value {
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK,
+};
+
+enum {
+	DEVLINK_ATTR_STATS_RX_PACKETS,		/* u64 */
+	DEVLINK_ATTR_STATS_RX_BYTES,		/* u64 */
+	DEVLINK_ATTR_STATS_RX_DROPPED,		/* u64 */
+
+	__DEVLINK_ATTR_STATS_MAX,
+	DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1
+};
+
+/* Specify what sections of a flash component can be overwritten when
+ * performing an update. Overwriting of firmware binary sections is always
+ * implicitly assumed to be allowed.
+ *
+ * Each section must be documented in
+ * Documentation/networking/devlink/devlink-flash.rst
+ *
+ */
+enum {
+	DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT,
+	DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT,
+
+	__DEVLINK_FLASH_OVERWRITE_MAX_BIT,
+	DEVLINK_FLASH_OVERWRITE_MAX_BIT = __DEVLINK_FLASH_OVERWRITE_MAX_BIT - 1
+};
+
+#define DEVLINK_FLASH_OVERWRITE_SETTINGS _BITUL(DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT)
+#define DEVLINK_FLASH_OVERWRITE_IDENTIFIERS _BITUL(DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT)
+
+#define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \
+	(_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1)
+
+/**
+ * enum devlink_trap_action - Packet trap action.
+ * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not
+ *                            sent to the CPU.
+ * @DEVLINK_TRAP_ACTION_TRAP: The sole copy of the packet is sent to the CPU.
+ * @DEVLINK_TRAP_ACTION_MIRROR: Packet is forwarded by the device and a copy is
+ *                              sent to the CPU.
+ */
+enum devlink_trap_action {
+	DEVLINK_TRAP_ACTION_DROP,
+	DEVLINK_TRAP_ACTION_TRAP,
+	DEVLINK_TRAP_ACTION_MIRROR,
+};
+
+/**
+ * enum devlink_trap_type - Packet trap type.
+ * @DEVLINK_TRAP_TYPE_DROP: Trap reason is a drop. Trapped packets are only
+ *                          processed by devlink and not injected to the
+ *                          kernel's Rx path.
+ * @DEVLINK_TRAP_TYPE_EXCEPTION: Trap reason is an exception. Packet was not
+ *                               forwarded as intended due to an exception
+ *                               (e.g., missing neighbour entry) and trapped to
+ *                               control plane for resolution. Trapped packets
+ *                               are processed by devlink and injected to
+ *                               the kernel's Rx path.
+ * @DEVLINK_TRAP_TYPE_CONTROL: Packet was trapped because it is required for
+ *                             the correct functioning of the control plane.
+ *                             For example, an ARP request packet. Trapped
+ *                             packets are injected to the kernel's Rx path,
+ *                             but not reported to drop monitor.
+ */
+enum devlink_trap_type {
+	DEVLINK_TRAP_TYPE_DROP,
+	DEVLINK_TRAP_TYPE_EXCEPTION,
+	DEVLINK_TRAP_TYPE_CONTROL,
+};
+
+enum {
+	/* Trap can report input port as metadata */
+	DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT,
+	/* Trap can report flow action cookie as metadata */
+	DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE,
+};
+
+enum devlink_reload_action {
+	DEVLINK_RELOAD_ACTION_UNSPEC,
+	DEVLINK_RELOAD_ACTION_DRIVER_REINIT,	/* Driver entities re-instantiation */
+	DEVLINK_RELOAD_ACTION_FW_ACTIVATE,	/* FW activate */
+
+	/* Add new reload actions above */
+	__DEVLINK_RELOAD_ACTION_MAX,
+	DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1
+};
+
+enum devlink_reload_limit {
+	DEVLINK_RELOAD_LIMIT_UNSPEC,	/* unspecified, no constraints */
+	DEVLINK_RELOAD_LIMIT_NO_RESET,	/* No reset allowed, no down time allowed,
+					 * no link flap and no configuration is lost.
+					 */
+
+	/* Add new reload limit above */
+	__DEVLINK_RELOAD_LIMIT_MAX,
+	DEVLINK_RELOAD_LIMIT_MAX = __DEVLINK_RELOAD_LIMIT_MAX - 1
+};
+
+#define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1)
+
+enum devlink_attr {
+	/* don't change the order or add anything between, this is ABI! */
+	DEVLINK_ATTR_UNSPEC,
+
+	/* bus name + dev name together are a handle for devlink entity */
+	DEVLINK_ATTR_BUS_NAME,			/* string */
+	DEVLINK_ATTR_DEV_NAME,			/* string */
+
+	DEVLINK_ATTR_PORT_INDEX,		/* u32 */
+	DEVLINK_ATTR_PORT_TYPE,			/* u16 */
+	DEVLINK_ATTR_PORT_DESIRED_TYPE,		/* u16 */
+	DEVLINK_ATTR_PORT_NETDEV_IFINDEX,	/* u32 */
+	DEVLINK_ATTR_PORT_NETDEV_NAME,		/* string */
+	DEVLINK_ATTR_PORT_IBDEV_NAME,		/* string */
+	DEVLINK_ATTR_PORT_SPLIT_COUNT,		/* u32 */
+	DEVLINK_ATTR_PORT_SPLIT_GROUP,		/* u32 */
+	DEVLINK_ATTR_SB_INDEX,			/* u32 */
+	DEVLINK_ATTR_SB_SIZE,			/* u32 */
+	DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,	/* u16 */
+	DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,	/* u16 */
+	DEVLINK_ATTR_SB_INGRESS_TC_COUNT,	/* u16 */
+	DEVLINK_ATTR_SB_EGRESS_TC_COUNT,	/* u16 */
+	DEVLINK_ATTR_SB_POOL_INDEX,		/* u16 */
+	DEVLINK_ATTR_SB_POOL_TYPE,		/* u8 */
+	DEVLINK_ATTR_SB_POOL_SIZE,		/* u32 */
+	DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,	/* u8 */
+	DEVLINK_ATTR_SB_THRESHOLD,		/* u32 */
+	DEVLINK_ATTR_SB_TC_INDEX,		/* u16 */
+	DEVLINK_ATTR_SB_OCC_CUR,		/* u32 */
+	DEVLINK_ATTR_SB_OCC_MAX,		/* u32 */
+	DEVLINK_ATTR_ESWITCH_MODE,		/* u16 */
+	DEVLINK_ATTR_ESWITCH_INLINE_MODE,	/* u8 */
+
+	DEVLINK_ATTR_DPIPE_TABLES,		/* nested */
+	DEVLINK_ATTR_DPIPE_TABLE,		/* nested */
+	DEVLINK_ATTR_DPIPE_TABLE_NAME,		/* string */
+	DEVLINK_ATTR_DPIPE_TABLE_SIZE,		/* u64 */
+	DEVLINK_ATTR_DPIPE_TABLE_MATCHES,	/* nested */
+	DEVLINK_ATTR_DPIPE_TABLE_ACTIONS,	/* nested */
+	DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,	/* u8 */
+
+	DEVLINK_ATTR_DPIPE_ENTRIES,		/* nested */
+	DEVLINK_ATTR_DPIPE_ENTRY,		/* nested */
+	DEVLINK_ATTR_DPIPE_ENTRY_INDEX,		/* u64 */
+	DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES,	/* nested */
+	DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES,	/* nested */
+	DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,	/* u64 */
+
+	DEVLINK_ATTR_DPIPE_MATCH,		/* nested */
+	DEVLINK_ATTR_DPIPE_MATCH_VALUE,		/* nested */
+	DEVLINK_ATTR_DPIPE_MATCH_TYPE,		/* u32 */
+
+	DEVLINK_ATTR_DPIPE_ACTION,		/* nested */
+	DEVLINK_ATTR_DPIPE_ACTION_VALUE,	/* nested */
+	DEVLINK_ATTR_DPIPE_ACTION_TYPE,		/* u32 */
+
+	DEVLINK_ATTR_DPIPE_VALUE,
+	DEVLINK_ATTR_DPIPE_VALUE_MASK,
+	DEVLINK_ATTR_DPIPE_VALUE_MAPPING,	/* u32 */
+
+	DEVLINK_ATTR_DPIPE_HEADERS,		/* nested */
+	DEVLINK_ATTR_DPIPE_HEADER,		/* nested */
+	DEVLINK_ATTR_DPIPE_HEADER_NAME,		/* string */
+	DEVLINK_ATTR_DPIPE_HEADER_ID,		/* u32 */
+	DEVLINK_ATTR_DPIPE_HEADER_FIELDS,	/* nested */
+	DEVLINK_ATTR_DPIPE_HEADER_GLOBAL,	/* u8 */
+	DEVLINK_ATTR_DPIPE_HEADER_INDEX,	/* u32 */
+
+	DEVLINK_ATTR_DPIPE_FIELD,		/* nested */
+	DEVLINK_ATTR_DPIPE_FIELD_NAME,		/* string */
+	DEVLINK_ATTR_DPIPE_FIELD_ID,		/* u32 */
+	DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH,	/* u32 */
+	DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE,	/* u32 */
+
+	DEVLINK_ATTR_PAD,
+
+	DEVLINK_ATTR_ESWITCH_ENCAP_MODE,	/* u8 */
+	DEVLINK_ATTR_RESOURCE_LIST,		/* nested */
+	DEVLINK_ATTR_RESOURCE,			/* nested */
+	DEVLINK_ATTR_RESOURCE_NAME,		/* string */
+	DEVLINK_ATTR_RESOURCE_ID,		/* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE,		/* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE_NEW,		/* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE_VALID,	/* u8 */
+	DEVLINK_ATTR_RESOURCE_SIZE_MIN,		/* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE_MAX,		/* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE_GRAN,        /* u64 */
+	DEVLINK_ATTR_RESOURCE_UNIT,		/* u8 */
+	DEVLINK_ATTR_RESOURCE_OCC,		/* u64 */
+	DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,	/* u64 */
+	DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,/* u64 */
+
+	DEVLINK_ATTR_PORT_FLAVOUR,		/* u16 */
+	DEVLINK_ATTR_PORT_NUMBER,		/* u32 */
+	DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,	/* u32 */
+
+	DEVLINK_ATTR_PARAM,			/* nested */
+	DEVLINK_ATTR_PARAM_NAME,		/* string */
+	DEVLINK_ATTR_PARAM_GENERIC,		/* flag */
+	DEVLINK_ATTR_PARAM_TYPE,		/* u8 */
+	DEVLINK_ATTR_PARAM_VALUES_LIST,		/* nested */
+	DEVLINK_ATTR_PARAM_VALUE,		/* nested */
+	DEVLINK_ATTR_PARAM_VALUE_DATA,		/* dynamic */
+	DEVLINK_ATTR_PARAM_VALUE_CMODE,		/* u8 */
+
+	DEVLINK_ATTR_REGION_NAME,               /* string */
+	DEVLINK_ATTR_REGION_SIZE,               /* u64 */
+	DEVLINK_ATTR_REGION_SNAPSHOTS,          /* nested */
+	DEVLINK_ATTR_REGION_SNAPSHOT,           /* nested */
+	DEVLINK_ATTR_REGION_SNAPSHOT_ID,        /* u32 */
+
+	DEVLINK_ATTR_REGION_CHUNKS,             /* nested */
+	DEVLINK_ATTR_REGION_CHUNK,              /* nested */
+	DEVLINK_ATTR_REGION_CHUNK_DATA,         /* binary */
+	DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
+	DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
+
+	DEVLINK_ATTR_INFO_DRIVER_NAME,		/* string */
+	DEVLINK_ATTR_INFO_SERIAL_NUMBER,	/* string */
+	DEVLINK_ATTR_INFO_VERSION_FIXED,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_RUNNING,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_STORED,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_NAME,		/* string */
+	DEVLINK_ATTR_INFO_VERSION_VALUE,	/* string */
+
+	DEVLINK_ATTR_SB_POOL_CELL_SIZE,		/* u32 */
+
+	DEVLINK_ATTR_FMSG,			/* nested */
+	DEVLINK_ATTR_FMSG_OBJ_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_PAIR_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_ARR_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_NEST_END,		/* flag */
+	DEVLINK_ATTR_FMSG_OBJ_NAME,		/* string */
+	DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,	/* u8 */
+	DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA,	/* dynamic */
+
+	DEVLINK_ATTR_HEALTH_REPORTER,			/* nested */
+	DEVLINK_ATTR_HEALTH_REPORTER_NAME,		/* string */
+	DEVLINK_ATTR_HEALTH_REPORTER_STATE,		/* u8 */
+	DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,	/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
+
+	DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME,	/* string */
+	DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,	/* string */
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,	/* string */
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,	/* u64 */
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,	/* u64 */
+
+	DEVLINK_ATTR_PORT_PCI_PF_NUMBER,	/* u16 */
+	DEVLINK_ATTR_PORT_PCI_VF_NUMBER,	/* u16 */
+
+	DEVLINK_ATTR_STATS,				/* nested */
+
+	DEVLINK_ATTR_TRAP_NAME,				/* string */
+	/* enum devlink_trap_action */
+	DEVLINK_ATTR_TRAP_ACTION,			/* u8 */
+	/* enum devlink_trap_type */
+	DEVLINK_ATTR_TRAP_TYPE,				/* u8 */
+	DEVLINK_ATTR_TRAP_GENERIC,			/* flag */
+	DEVLINK_ATTR_TRAP_METADATA,			/* nested */
+	DEVLINK_ATTR_TRAP_GROUP_NAME,			/* string */
+
+	DEVLINK_ATTR_RELOAD_FAILED,			/* u8 0 or 1 */
+
+	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,	/* u64 */
+
+	DEVLINK_ATTR_NETNS_FD,			/* u32 */
+	DEVLINK_ATTR_NETNS_PID,			/* u32 */
+	DEVLINK_ATTR_NETNS_ID,			/* u32 */
+
+	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,	/* u8 */
+
+	DEVLINK_ATTR_TRAP_POLICER_ID,			/* u32 */
+	DEVLINK_ATTR_TRAP_POLICER_RATE,			/* u64 */
+	DEVLINK_ATTR_TRAP_POLICER_BURST,		/* u64 */
+
+	DEVLINK_ATTR_PORT_FUNCTION,			/* nested */
+
+	DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,	/* string */
+
+	DEVLINK_ATTR_PORT_LANES,			/* u32 */
+	DEVLINK_ATTR_PORT_SPLITTABLE,			/* u8 */
+
+	DEVLINK_ATTR_PORT_EXTERNAL,		/* u8 */
+	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,	/* u32 */
+
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,	/* u64 */
+	DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK,	/* bitfield32 */
+
+	DEVLINK_ATTR_RELOAD_ACTION,		/* u8 */
+	DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED,	/* bitfield32 */
+	DEVLINK_ATTR_RELOAD_LIMITS,		/* bitfield32 */
+
+	DEVLINK_ATTR_DEV_STATS,			/* nested */
+	DEVLINK_ATTR_RELOAD_STATS,		/* nested */
+	DEVLINK_ATTR_RELOAD_STATS_ENTRY,	/* nested */
+	DEVLINK_ATTR_RELOAD_STATS_LIMIT,	/* u8 */
+	DEVLINK_ATTR_RELOAD_STATS_VALUE,	/* u32 */
+	DEVLINK_ATTR_REMOTE_RELOAD_STATS,	/* nested */
+	DEVLINK_ATTR_RELOAD_ACTION_INFO,        /* nested */
+	DEVLINK_ATTR_RELOAD_ACTION_STATS,       /* nested */
+
+	DEVLINK_ATTR_PORT_PCI_SF_NUMBER,	/* u32 */
+	/* add new attributes above here, update the policy in devlink.c */
+
+	__DEVLINK_ATTR_MAX,
+	DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1
+};
+
+/* Mapping between internal resource described by the field and system
+ * structure
+ */
+enum devlink_dpipe_field_mapping_type {
+	DEVLINK_DPIPE_FIELD_MAPPING_TYPE_NONE,
+	DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX,
+};
+
+/* Match type - specify the type of the match */
+enum devlink_dpipe_match_type {
+	DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT,
+};
+
+/* Action type - specify the action type */
+enum devlink_dpipe_action_type {
+	DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY,
+};
+
+enum devlink_dpipe_field_ethernet_id {
+	DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+};
+
+enum devlink_dpipe_field_ipv4_id {
+	DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+};
+
+enum devlink_dpipe_field_ipv6_id {
+	DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+};
+
+enum devlink_dpipe_header_id {
+	DEVLINK_DPIPE_HEADER_ETHERNET,
+	DEVLINK_DPIPE_HEADER_IPV4,
+	DEVLINK_DPIPE_HEADER_IPV6,
+};
+
+enum devlink_resource_unit {
+	DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+enum devlink_port_function_attr {
+	DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
+	DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR,	/* binary */
+	DEVLINK_PORT_FN_ATTR_STATE,	/* u8 */
+	DEVLINK_PORT_FN_ATTR_OPSTATE,	/* u8 */
+
+	__DEVLINK_PORT_FUNCTION_ATTR_MAX,
+	DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
+};
+
+enum devlink_port_fn_state {
+	DEVLINK_PORT_FN_STATE_INACTIVE,
+	DEVLINK_PORT_FN_STATE_ACTIVE,
+};
+
+/**
+ * enum devlink_port_fn_opstate - indicates operational state of the function
+ * @DEVLINK_PORT_FN_OPSTATE_ATTACHED: Driver is attached to the function.
+ * For graceful tear down of the function, after inactivation of the
+ * function, user should wait for operational state to turn DETACHED.
+ * @DEVLINK_PORT_FN_OPSTATE_DETACHED: Driver is detached from the function.
+ * It is safe to delete the port.
+ */
+enum devlink_port_fn_opstate {
+	DEVLINK_PORT_FN_OPSTATE_DETACHED,
+	DEVLINK_PORT_FN_OPSTATE_ATTACHED,
+};
+
+#endif /* _UAPI_LINUX_DEVLINK_H_ */
+#endif /* __UAPI_LINUX_DEVLINK_WRAPPER_H */
+#endif /* !__KERNEL__ */
diff --git a/include/openvswitch/types.h b/include/openvswitch/types.h
index 45e70790e..069e479ae 100644
--- a/include/openvswitch/types.h
+++ b/include/openvswitch/types.h
@@ -186,6 +186,14 @@ struct eth_addr64 {
 #define ETH_ADDR64_C(A,B,C,D,E,F,G,H) \
     { { { 0x##A, 0x##B, 0x##C, 0x##D, 0x##E, 0x##F, 0x##G, 0x##H } } }
 
+/* Similar to struct eth_addr, for InfiniBand addresses. */
+struct ib_addr {
+    union {
+        uint8_t ea[20];
+        ovs_be16 be16[10];
+    };
+};
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/automake.mk b/lib/automake.mk
index 39901bd6d..dca2ad46d 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -440,6 +440,8 @@ lib_libopenvswitch_la_SOURCES += \
 	lib/netlink-protocol.h \
 	lib/netlink-socket.c \
 	lib/netlink-socket.h \
+	lib/netlink-devlink.c \
+	lib/netlink-devlink.h \
 	lib/rtnetlink.c \
 	lib/rtnetlink.h \
 	lib/route-table.c \
diff --git a/lib/netlink-devlink.c b/lib/netlink-devlink.c
new file mode 100644
index 000000000..6c6fc8c2c
--- /dev/null
+++ b/lib/netlink-devlink.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (c) 2021 Canonical
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <config.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/devlink.h>
+#include <linux/genetlink.h>
+#include "netlink.h"
+#include "netlink-socket.h"
+#include "netlink-devlink.h"
+#include "openvswitch/vlog.h"
+#include "packets.h"
+
+VLOG_DEFINE_THIS_MODULE(netlink_devlink);
+
+/* Initialized by nl_devlink_init() */
+static int ovs_devlink_family;
+
+struct nl_dl_dump_state {
+    struct nl_dump dump;
+    struct ofpbuf buf;
+    int error;
+};
+
+static int nl_devlink_init(void);
+
+const char *dl_str_not_present = "";
+
+/* Allocates memory for and returns a pointer to devlink dump state object.
+ *
+ * One-time initialization and lookup of the devlink generic netlink family is
+ * also performed, and the caller should check for error condition with a call
+ * to nl_dl_dump_init_error before attempting to dump devlink data.
+ *
+ * The caller owns the allocated object and is responsible for freeing the
+ * allocated memory with a call to nl_dl_dump_destroy when done. */
+struct nl_dl_dump_state *
+nl_dl_dump_init(void)
+{
+    struct nl_dl_dump_state *dump_state;
+
+    dump_state = xmalloc(sizeof(*dump_state));
+    dump_state->error = nl_devlink_init();
+    return dump_state;
+}
+
+/* Get error indicator from the devlink initialization process. */
+int
+nl_dl_dump_init_error(struct nl_dl_dump_state *dump_state)
+{
+    return dump_state->error;
+}
+
+/* Free memory previously allocated by call to nl_dl_dump_init.
+ *
+ * Note that the caller is responsible for making a call to nl_dl_dump_finish
+ * to free up resources associated with any in-flight dump process prior to
+ * destroying the dump state object. */
+void
+nl_dl_dump_destroy(struct nl_dl_dump_state *dump_state)
+{
+    free(dump_state);
+}
+
+void
+nl_msg_put_dlgenmsg(struct ofpbuf *msg, size_t expected_payload,
+                    int family, uint8_t cmd, uint32_t flags)
+{
+    nl_msg_put_genlmsghdr(msg, expected_payload, family,
+                          flags, cmd, DEVLINK_GENL_VERSION);
+}
+
+/* Starts a Netlink-devlink "dump" operation, by sending devlink request with
+ * command 'cmd' to the kernel on a Netlink socket, and initializes 'state'
+ * with buffer and dump state. */
+void
+nl_dl_dump_start(uint8_t cmd, struct nl_dl_dump_state *state)
+{
+    struct ofpbuf *request;
+
+    request = ofpbuf_new(NLMSG_HDRLEN + GENL_HDRLEN);
+    nl_msg_put_dlgenmsg(request, 0, ovs_devlink_family, cmd,
+                        NLM_F_REQUEST);
+    nl_dump_start(&state->dump, NETLINK_GENERIC, request);
+    ofpbuf_delete(request);
+
+    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
+}
+
+static bool
+nl_dl_dump_next__(struct nl_dl_dump_state *state,
+                  bool (*parse_function)(struct ofpbuf *, void *),
+                  void *entry)
+{
+    struct ofpbuf msg;
+
+    if (!nl_dump_next(&state->dump, &msg, &state->buf)) {
+        return false;
+    }
+    if (!parse_function(&msg, entry)) {
+        ovs_mutex_lock(&state->dump.mutex);
+        state->dump.status = EPROTO;
+        ovs_mutex_unlock(&state->dump.mutex);
+        return false;
+    }
+    return true;
+}
+
+/* Attempts to retrieve and parse another reply in on-going dump operation.
+ *
+ * If successful, returns true and assignes values or pointers to data in
+ * 'port_entry'.  The caller must not modify 'port_entry' (because it may
+ * contain pointers to data within the buffer which will be used by future
+ * calls to this function.
+ *
+ * On failure, returns false.  Failure might indicate an actual error or merely
+ * the end of replies.  An error status for the entire dump operation is
+ * provided when it is completed by calling nl_dl_dump_finish()
+ */
+bool
+nl_dl_port_dump_next(struct nl_dl_dump_state *state,
+                     struct dl_port *port_entry)
+{
+    return nl_dl_dump_next__(
+        state,
+        (bool ( * )(struct ofpbuf *, void *)) &nl_dl_parse_port_policy,
+        (void *) port_entry);
+}
+
+bool
+nl_dl_info_dump_next(struct nl_dl_dump_state *state,
+                     struct dl_info *info_entry)
+{
+    return nl_dl_dump_next__(
+        state,
+        (bool ( * )(struct ofpbuf *, void *)) &nl_dl_parse_info_policy,
+        (void *) info_entry);
+}
+
+int
+nl_dl_dump_finish(struct nl_dl_dump_state *state)
+{
+    ofpbuf_uninit(&state->buf);
+    return nl_dump_done(&state->dump);
+}
+
+static uint64_t
+attr_get_up_to_u64(size_t attr_idx, struct nlattr *attrs[],
+                   const struct nl_policy policy[],
+                   size_t policy_len)
+{
+    if (attr_idx < policy_len && attrs[attr_idx]) {
+        switch (policy[attr_idx].type) {
+        case NL_A_U8:
+            return nl_attr_get_u8(attrs[attr_idx]);
+            break;
+        case NL_A_U16:
+            return nl_attr_get_u16(attrs[attr_idx]);
+            break;
+        case NL_A_U32:
+            return nl_attr_get_u32(attrs[attr_idx]);
+            break;
+        case NL_A_U64:
+            return nl_attr_get_u64(attrs[attr_idx]);
+            break;
+        case NL_A_U128:
+        case NL_A_STRING:
+        case NL_A_NO_ATTR:
+        case NL_A_UNSPEC:
+        case NL_A_FLAG:
+        case NL_A_IPV6:
+        case NL_A_NESTED:
+        case NL_A_HW_ADDR:
+        case N_NL_ATTR_TYPES: default: OVS_NOT_REACHED();
+        };
+    }
+    return -1;
+}
+
+static const char *
+attr_get_str(size_t attr_idx, struct nlattr *attrs[],
+             const struct nl_policy policy[],
+             size_t policy_len)
+{
+    if (attr_idx < policy_len && attrs[attr_idx]) {
+        ovs_assert(policy[attr_idx].type == NL_A_STRING);
+        return nl_attr_get_string(attrs[attr_idx]);
+    }
+    return dl_str_not_present;
+}
+
+bool
+nl_dl_parse_port_function(struct nlattr *nla, struct dl_port_function *port_fn)
+{
+    static const struct nl_policy policy[] = {
+        /* Appeared in Linux v5.9 */
+        [DEVLINK_PORT_FUNCTION_ATTR_UNSPEC] = { .type = NL_A_UNSPEC,
+                                                .optional = true, },
+        [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NL_A_HW_ADDR,
+                                                 .optional = true, },
+
+        /* Appeared in Linnux v5.12 */
+        [DEVLINK_PORT_FN_ATTR_STATE] = { .type = NL_A_U8, .optional = true, },
+        [DEVLINK_PORT_FN_ATTR_OPSTATE] = { .type = NL_A_U8,
+                                           .optional = true, },
+    };
+    struct nlattr *attrs[ARRAY_SIZE(policy)];
+    bool parsed;
+
+    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
+
+    if (parsed) {
+        if (attrs[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]) {
+            size_t hw_addr_size = nl_attr_get_size(
+                            attrs[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]);
+            if (hw_addr_size == sizeof(struct eth_addr)) {
+                port_fn->eth_addr = nl_attr_get_eth_addr(
+                                attrs[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]);
+            } else if (hw_addr_size == sizeof(struct ib_addr)) {
+                port_fn->ib_addr = nl_attr_get_ib_addr(
+                                attrs[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]);
+            } else {
+                return false;
+            }
+        } else {
+            memset(&port_fn->eth_addr, 0, sizeof(port_fn->eth_addr));
+            memset(&port_fn->ib_addr, 0, sizeof(port_fn->ib_addr));
+        }
+        port_fn->state = attr_get_up_to_u64(
+                        DEVLINK_PORT_FN_ATTR_STATE,
+                        attrs, policy, ARRAY_SIZE(policy));
+        port_fn->opstate = attr_get_up_to_u64(
+                        DEVLINK_PORT_FN_ATTR_OPSTATE,
+                        attrs, policy, ARRAY_SIZE(policy));
+    }
+
+    return parsed;
+}
+
+bool
+nl_dl_parse_port_policy(struct ofpbuf *msg, struct dl_port *port)
+{
+    static const struct nl_policy policy[] = {
+        /* Appeared in Linux v4.6 */
+        [DEVLINK_ATTR_BUS_NAME] = { .type = NL_A_STRING, .optional = false, },
+        [DEVLINK_ATTR_DEV_NAME] = { .type = NL_A_STRING, .optional = false, },
+        [DEVLINK_ATTR_PORT_INDEX] = { .type = NL_A_U32, .optional = false, },
+
+        [DEVLINK_ATTR_PORT_TYPE] = { .type = NL_A_U16, .optional = true, },
+        [DEVLINK_ATTR_PORT_DESIRED_TYPE] = { .type = NL_A_U16,
+                                            .optional = true, },
+        [DEVLINK_ATTR_PORT_NETDEV_IFINDEX] = { .type = NL_A_U32,
+                                               .optional = true, },
+        [DEVLINK_ATTR_PORT_NETDEV_NAME] = { .type = NL_A_STRING,
+                                            .optional = true, },
+        [DEVLINK_ATTR_PORT_IBDEV_NAME] = { .type = NL_A_STRING,
+                                           .optional = true, },
+        [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NL_A_U32,
+                                            .optional = true, },
+        [DEVLINK_ATTR_PORT_SPLIT_GROUP] = { .type = NL_A_U32,
+                                            .optional = true, },
+
+        /* Appeared in Linux v4.18 */
+        [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NL_A_U16, .optional = true, },
+        [DEVLINK_ATTR_PORT_NUMBER] = { .type = NL_A_U32, .optional = true, },
+        [DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER] = { .type = NL_A_U32,
+                                                     .optional = true, },
+
+        /* Appeared in Linux v5.3 */
+        [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NL_A_U16,
+                                              .optional = true, },
+        [DEVLINK_ATTR_PORT_PCI_VF_NUMBER] = { .type = NL_A_U16,
+                                              .optional = true, },
+
+        /* Appeared in Linux v5.9 */
+        [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NL_A_NESTED,
+                                         .optional = true, },
+        [DEVLINK_ATTR_PORT_LANES] = { .type = NL_A_U32, .optional = true, },
+        [DEVLINK_ATTR_PORT_SPLITTABLE] = { .type = NL_A_U8,
+                                           .optional = true, },
+
+        /* Appeared in Linux v5.10 */
+        [DEVLINK_ATTR_PORT_EXTERNAL] = { .type = NL_A_U8, .optional = true },
+        [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NL_A_U32,
+                                                  .optional = true},
+
+        /* Appeared in Linux v5.12 */
+        [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NL_A_U32,
+                                              .optional = true },
+    };
+    struct nlattr *attrs[ARRAY_SIZE(policy)];
+
+    if (!nl_policy_parse(msg, NLMSG_HDRLEN + GENL_HDRLEN,
+                         policy, attrs,
+                         ARRAY_SIZE(policy)))
+    {
+        return false;
+    }
+    port->bus_name = nl_attr_get_string(attrs[DEVLINK_ATTR_BUS_NAME]);
+    port->dev_name = nl_attr_get_string(attrs[DEVLINK_ATTR_DEV_NAME]);
+    port->index = nl_attr_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+    port->type = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_TYPE,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->desired_type = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_DESIRED_TYPE,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->netdev_ifindex = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
+                    attrs, policy, ARRAY_SIZE(policy));
+    if (port->type == DEVLINK_PORT_TYPE_ETH &&
+            attrs[DEVLINK_ATTR_PORT_NETDEV_NAME]) {
+        port->netdev_name = nl_attr_get_string(
+            attrs[DEVLINK_ATTR_PORT_NETDEV_NAME]);
+    } else if (port->type == DEVLINK_PORT_TYPE_IB &&
+            attrs[DEVLINK_ATTR_PORT_IBDEV_NAME]) {
+        port->ibdev_name = nl_attr_get_string(
+            attrs[DEVLINK_ATTR_PORT_IBDEV_NAME]);
+    } else {
+        port->netdev_name = dl_str_not_present;
+    }
+    port->split_count = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_SPLIT_COUNT,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->split_group = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_SPLIT_GROUP,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->flavour = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_FLAVOUR,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->number = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_NUMBER,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->split_subport_number = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->pci_pf_number = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->pci_vf_number = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_PCI_VF_NUMBER,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->lanes = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_LANES,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->splittable = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_SPLITTABLE,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->external = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_EXTERNAL,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->controller_number = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+                    attrs, policy, ARRAY_SIZE(policy));
+    port->pci_sf_number = attr_get_up_to_u64(
+                    DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
+                    attrs, policy, ARRAY_SIZE(policy));
+
+    if (attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
+        if (!nl_dl_parse_port_function(attrs[DEVLINK_ATTR_PORT_FUNCTION],
+                                       &port->function))
+        {
+            return false;
+        }
+    } else {
+        memset(&port->function, 0, sizeof(port->function));
+        port->function.state = UINT8_MAX;
+        port->function.opstate = UINT8_MAX;
+    }
+
+    return true;
+}
+
+bool
+nl_dl_parse_info_version(struct nlattr *nla, struct dl_info_version *info_ver)
+{
+    static const struct nl_policy policy[] = {
+        /* Appeared in Linux v5.1 */
+        [DEVLINK_ATTR_INFO_VERSION_NAME] = { .type = NL_A_STRING,
+                                             .optional = true, },
+        [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .type = NL_A_STRING,
+                                              .optional = true, },
+    };
+    struct nlattr *attrs[ARRAY_SIZE(policy)];
+    bool parsed;
+
+    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
+
+    if (parsed) {
+        info_ver->name = attr_get_str(
+                        DEVLINK_ATTR_INFO_VERSION_NAME,
+                        attrs, policy, ARRAY_SIZE(policy));
+        info_ver->value = attr_get_str(
+                        DEVLINK_ATTR_INFO_VERSION_NAME,
+                        attrs, policy, ARRAY_SIZE(policy));
+    }
+
+    return parsed;
+}
+
+static bool
+attr_fill_version(size_t attr_idx, struct nlattr *attrs[],
+                  size_t attrs_len,
+                  struct dl_info_version *version)
+{
+    if (attr_idx < attrs_len && attrs[attr_idx]) {
+        if (!nl_dl_parse_info_version(attrs[attr_idx],
+                                      version))
+        {
+            return false;
+        }
+    } else {
+        version->name = dl_str_not_present;
+        version->value = dl_str_not_present;
+    }
+    return true;
+}
+
+bool
+nl_dl_parse_info_policy(struct ofpbuf *msg, struct dl_info *info)
+{
+    static const struct nl_policy policy[] = {
+        /* Appeared in Linux v5.1 */
+        [DEVLINK_ATTR_INFO_DRIVER_NAME] = { .type = NL_A_STRING,
+                                            .optional = false, },
+        [DEVLINK_ATTR_INFO_SERIAL_NUMBER] = { .type = NL_A_STRING,
+                                              .optional = true, },
+        [DEVLINK_ATTR_INFO_VERSION_FIXED] = { .type = NL_A_NESTED,
+                                              .optional = true, },
+        [DEVLINK_ATTR_INFO_VERSION_RUNNING] = { .type = NL_A_NESTED,
+                                                .optional = true, },
+        [DEVLINK_ATTR_INFO_VERSION_STORED] = { .type = NL_A_NESTED,
+                                               .optional = true, },
+
+        /* Appeared in Linux v5.9 */
+        [DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER] = { .type = NL_A_STRING,
+                                                    .optional = true, },
+    };
+    struct nlattr *attrs[ARRAY_SIZE(policy)];
+
+    if (!nl_policy_parse(msg, NLMSG_HDRLEN + GENL_HDRLEN,
+                         policy, attrs,
+                         ARRAY_SIZE(policy)))
+    {
+        return false;
+    }
+    info->driver_name = attr_get_str(
+                    DEVLINK_ATTR_INFO_DRIVER_NAME,
+                    attrs, policy, ARRAY_SIZE(policy));
+    info->serial_number = attr_get_str(
+                    DEVLINK_ATTR_INFO_SERIAL_NUMBER,
+                    attrs, policy, ARRAY_SIZE(policy));
+    info->board_serial_number = attr_get_str(
+                    DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
+                    attrs, policy, ARRAY_SIZE(policy));
+    if (!attr_fill_version(DEVLINK_ATTR_INFO_VERSION_FIXED, attrs,
+                           ARRAY_SIZE(policy), &info->version_fixed)
+        || !attr_fill_version(DEVLINK_ATTR_INFO_VERSION_RUNNING, attrs,
+                              ARRAY_SIZE(policy), &info->version_running)
+        || !attr_fill_version(DEVLINK_ATTR_INFO_VERSION_STORED, attrs,
+                              ARRAY_SIZE(policy), &info->version_stored))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+static int
+nl_devlink_init(void)
+{
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+    static int error;
+
+    if (ovsthread_once_start(&once)) {
+        error = nl_lookup_genl_family(DEVLINK_GENL_NAME, &ovs_devlink_family);
+        if (error) {
+            VLOG_INFO("Generic Netlink family '%s' does not exist. "
+                      "Linux version 4.6 or newer required.",
+                      DEVLINK_GENL_NAME);
+        }
+        ovsthread_once_done(&once);
+    }
+    return error;
+}
diff --git a/lib/netlink-devlink.h b/lib/netlink-devlink.h
new file mode 100644
index 000000000..afcb2d898
--- /dev/null
+++ b/lib/netlink-devlink.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Canonical
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NETLINK_DEVLINK_H
+#define NETLINK_DEVLINK_H 1
+
+struct eth_addr;
+
+/* Presence of each individual value in these structs is determined at runtime
+ * and depends on which kernel version we are communicating with as well as
+ * which driver implementation is filling in the information for each
+ * individual device or port.
+ *
+ * To signal non-presence of values the library follows the following
+ * convention:
+ *
+ * - integer type values will be set to their maximum value
+ *   (e.g. UNIT8_MAX for a unit8_t)
+ *
+ * - hardware address type values will be set to all zero
+ *
+ * - string type values will be set to a pointer to dl_str_not_present
+ *   (an empty string).
+ */
+
+extern const char *dl_str_not_present;
+
+struct dl_port_function {
+    struct eth_addr eth_addr;
+    struct ib_addr ib_addr;
+    uint8_t state;
+    uint8_t opstate;
+};
+
+struct dl_port {
+    const char *bus_name;
+    const char *dev_name;
+    uint32_t index;
+    uint16_t type;
+    uint16_t desired_type;
+    uint32_t netdev_ifindex;
+    union {
+        const char *netdev_name; /* type DEVLINK_PORT_TYPE_ETH */
+        const char *ibdev_name;  /* type DEVLINK_PORT_TYPE_IB */
+    };
+    uint32_t split_count;
+    uint32_t split_group;
+    uint16_t flavour;
+    uint32_t number;
+    uint32_t split_subport_number;
+    uint16_t pci_pf_number;
+    uint16_t pci_vf_number;
+    struct dl_port_function function;
+    uint32_t lanes;
+    uint8_t splittable;
+    uint8_t external;
+    uint32_t controller_number;
+    uint32_t pci_sf_number;
+};
+
+struct dl_info_version {
+    const char *name;
+    const char *value;
+};
+
+struct dl_info {
+    const char *driver_name;
+    const char *serial_number;
+    const char *board_serial_number;
+    struct dl_info_version version_fixed;
+    struct dl_info_version version_running;
+    struct dl_info_version version_stored;
+};
+
+/* The nl_dl_dump_state record declaration refers to types declared in
+ * netlink-socket.h, which requires OVS internal autoconf macros and
+ * definitions to be present for successful compilation.
+ *
+ * To enable friction free consumtion of these interfaces from programs
+ * external to Open vSwitch, such as OVN, we keep the declaration of
+ * nl_dl_dump_state private.
+ *
+ * Use the nl_dl_dump_init function to allocate memory for and get a pointer to
+ * a devlink dump state object. The caller owns the allocated object and is
+ * responsible for freeing the allocated memory when done. */
+struct nl_dl_dump_state;
+
+struct nl_dl_dump_state * nl_dl_dump_init(void);
+int nl_dl_dump_init_error(struct nl_dl_dump_state *);
+void nl_dl_dump_destroy(struct nl_dl_dump_state *);
+void nl_msg_put_dlgenmsg(struct ofpbuf *, size_t, int, uint8_t, uint32_t);
+void nl_dl_dump_start(uint8_t, struct nl_dl_dump_state *);
+bool nl_dl_port_dump_next(struct nl_dl_dump_state *, struct dl_port *);
+bool nl_dl_info_dump_next(struct nl_dl_dump_state *, struct dl_info *);
+int nl_dl_dump_finish(struct nl_dl_dump_state *);
+bool nl_dl_parse_port_policy(struct ofpbuf *, struct dl_port *);
+bool nl_dl_parse_port_function(struct nlattr *, struct dl_port_function *);
+bool nl_dl_parse_info_policy(struct ofpbuf *, struct dl_info *);
+bool nl_dl_parse_info_version(struct nlattr *, struct dl_info_version *);
+
+#endif /* NETLINK_DEVLINK_H */
diff --git a/lib/netlink.c b/lib/netlink.c
index 26ab20bb4..efc126528 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -741,6 +741,20 @@ nl_attr_get_nested(const struct nlattr *nla, struct ofpbuf *nested)
     ofpbuf_use_const(nested, nl_attr_get(nla), nl_attr_get_size(nla));
 }
 
+/* Returns the Ethernet Address value in 'nla''s payload. */
+struct eth_addr
+nl_attr_get_eth_addr(const struct nlattr *nla)
+{
+    return NL_ATTR_GET_AS(nla, struct eth_addr);
+}
+
+/* Returns the Infiniband Address value in 'nla''s payload. */
+struct ib_addr
+nl_attr_get_ib_addr(const struct nlattr *nla)
+{
+    return NL_ATTR_GET_AS(nla, struct ib_addr);
+}
+
 /* Default minimum payload size for each type of attribute. */
 static size_t
 min_attr_len(enum nl_attr_type type)
@@ -757,6 +771,7 @@ min_attr_len(enum nl_attr_type type)
     case NL_A_FLAG: return 0;
     case NL_A_IPV6: return 16;
     case NL_A_NESTED: return 0;
+    case NL_A_HW_ADDR: return 6;
     case N_NL_ATTR_TYPES: default: OVS_NOT_REACHED();
     }
 }
@@ -777,6 +792,7 @@ max_attr_len(enum nl_attr_type type)
     case NL_A_FLAG: return SIZE_MAX;
     case NL_A_IPV6: return 16;
     case NL_A_NESTED: return SIZE_MAX;
+    case NL_A_HW_ADDR: return 20;
     case N_NL_ATTR_TYPES: default: OVS_NOT_REACHED();
     }
 }
diff --git a/lib/netlink.h b/lib/netlink.h
index 44b8e4d1a..8410235fb 100644
--- a/lib/netlink.h
+++ b/lib/netlink.h
@@ -126,6 +126,8 @@ struct nlmsghdr *nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg);
 #define NL_A_BE128_SIZE NL_ATTR_SIZE(sizeof(ovs_be128))
 #define NL_A_FLAG_SIZE NL_ATTR_SIZE(0)
 #define NL_A_IPV6_SIZE NL_ATTR_SIZE(sizeof(struct in6_addr))
+#define NL_A_HW_ADDR_ETH_SIZE NL_ATTR_SIZE(sizeof(struct eth_addr))
+#define NL_A_HW_ADDR_IB_SIZE NL_ATTR_SIZE(sizeof(struct ib_addr))
 
 bool nl_attr_oversized(size_t payload_size);
 
@@ -147,6 +149,7 @@ enum nl_attr_type
     NL_A_FLAG,
     NL_A_IPV6,
     NL_A_NESTED,
+    NL_A_HW_ADDR,
     N_NL_ATTR_TYPES
 };
 
@@ -214,6 +217,8 @@ struct in6_addr nl_attr_get_in6_addr(const struct nlattr *nla);
 odp_port_t nl_attr_get_odp_port(const struct nlattr *);
 const char *nl_attr_get_string(const struct nlattr *);
 void nl_attr_get_nested(const struct nlattr *, struct ofpbuf *);
+struct eth_addr nl_attr_get_eth_addr(const struct nlattr *nla);
+struct ib_addr nl_attr_get_ib_addr(const struct nlattr *nla);
 
 /* Netlink attribute policy.
  *
diff --git a/lib/packets.h b/lib/packets.h
index 481bc22fa..a5e1a3dc9 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -197,6 +197,7 @@ pkt_metadata_prefetch_init(struct pkt_metadata *md)
 bool dpid_from_string(const char *s, uint64_t *dpidp);
 
 #define ETH_ADDR_LEN           6
+#define IB_ADDR_LEN           20
 
 static const struct eth_addr eth_addr_broadcast OVS_UNUSED
     = ETH_ADDR_C(ff,ff,ff,ff,ff,ff);
diff --git a/tests/.gitignore b/tests/.gitignore
index 45b4f67b2..aa7709534 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -30,6 +30,9 @@
 /system-offloads-testsuite
 /system-offloads-testsuite.dir/
 /system-offloads-testsuite.log
+/system-devlink-testsuite
+/system-devlink-testsuite.dir/
+/system-devlink-testsuite.log
 /test-aes128
 /test-atomic
 /test-bundle
diff --git a/tests/automake.mk b/tests/automake.mk
index 1a528aa39..ee70cbd59 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -8,6 +8,7 @@ EXTRA_DIST += \
 	$(SYSTEM_AFXDP_TESTSUITE_AT) \
 	$(SYSTEM_OFFLOADS_TESTSUITE_AT) \
 	$(SYSTEM_DPDK_TESTSUITE_AT) \
+	$(SYSTEM_DEVLINK_TESTSUITE_AT) \
 	$(OVSDB_CLUSTER_TESTSUITE_AT) \
 	$(TESTSUITE) \
 	$(SYSTEM_KMOD_TESTSUITE) \
@@ -16,6 +17,7 @@ EXTRA_DIST += \
 	$(SYSTEM_AFXDP_TESTSUITE) \
 	$(SYSTEM_OFFLOADS_TESTSUITE) \
 	$(SYSTEM_DPDK_TESTSUITE) \
+	$(SYSTEM_DEVLINK_TESTSUITE) \
 	$(OVSDB_CLUSTER_TESTSUITE) \
 	tests/atlocal.in \
 	$(srcdir)/package.m4 \
@@ -187,6 +189,11 @@ SYSTEM_DPDK_TESTSUITE_AT = \
 	tests/system-dpdk-testsuite.at \
 	tests/system-dpdk.at
 
+SYSTEM_DEVLINK_TESTSUITE_AT = \
+	tests/system-devlink-info.at \
+	tests/system-devlink-port.at \
+	tests/system-devlink-testsuite.at
+
 check_SCRIPTS += tests/atlocal
 
 TESTSUITE = $(srcdir)/tests/testsuite
@@ -198,6 +205,7 @@ SYSTEM_TSO_TESTSUITE = $(srcdir)/tests/system-tso-testsuite
 SYSTEM_AFXDP_TESTSUITE = $(srcdir)/tests/system-afxdp-testsuite
 SYSTEM_OFFLOADS_TESTSUITE = $(srcdir)/tests/system-offloads-testsuite
 SYSTEM_DPDK_TESTSUITE = $(srcdir)/tests/system-dpdk-testsuite
+SYSTEM_DEVLINK_TESTSUITE = $(srcdir)/tests/system-devlink-testsuite
 OVSDB_CLUSTER_TESTSUITE = $(srcdir)/tests/ovsdb-cluster-testsuite
 DISTCLEANFILES += tests/atconfig tests/atlocal
 
@@ -362,6 +370,10 @@ check-dpdk: all
 	set $(SHELL) '$(SYSTEM_DPDK_TESTSUITE)' -C tests  AUTOTEST_PATH='$(AUTOTEST_PATH)'; \
 	"$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck)
 
+check-system-devlink: all
+	set $(SHELL) '$(SYSTEM_DEVLINK_TESTSUITE)' -C tests  AUTOTEST_PATH='$(AUTOTEST_PATH)'; \
+	"$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck)
+
 clean-local:
 	test ! -f '$(TESTSUITE)' || $(SHELL) '$(TESTSUITE)' -C tests --clean
 
@@ -407,6 +419,10 @@ $(SYSTEM_DPDK_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_DPDK_TESTSU
 	$(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
 	$(AM_V_at)mv $@.tmp $@
 
+$(SYSTEM_DEVLINK_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_DEVLINK_TESTSUITE_AT) $(COMMON_MACROS_AT)
+	$(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
+	$(AM_V_at)mv $@.tmp $@
+
 $(OVSDB_CLUSTER_TESTSUITE): package.m4 $(OVSDB_CLUSTER_TESTSUITE_AT) $(COMMON_MACROS_AT)
 	$(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
 	$(AM_V_at)mv $@.tmp $@
diff --git a/tests/system-devlink-info.at b/tests/system-devlink-info.at
new file mode 100644
index 000000000..9ce5b5bfd
--- /dev/null
+++ b/tests/system-devlink-info.at
@@ -0,0 +1,9 @@
+AT_BANNER([devlink info])
+
+AT_SETUP([devlink - dump info])
+NETDEVSIM_START()
+AT_CHECK([netdevsim_add_device 10 1])
+AT_CHECK([$abs_top_builddir/utilities/devlink dump > devlink-dump-info.out 2>&1])
+AT_SKIP_IF([ ! (grep -q driver_name devlink-dump-info.out)])
+AT_CHECK([grep -q driver_name.*netdevsim devlink-dump-info.out])
+AT_CLEANUP
diff --git a/tests/system-devlink-port.at b/tests/system-devlink-port.at
new file mode 100644
index 000000000..8f9bcfacc
--- /dev/null
+++ b/tests/system-devlink-port.at
@@ -0,0 +1,12 @@
+AT_BANNER([devlink port])
+
+AT_SETUP([devlink - dump port])
+NETDEVSIM_START()
+AT_CHECK([netdevsim_add_device 11 1])
+AT_CHECK([$abs_top_builddir/utilities/devlink dump > devlink-dump-port.out 2>&1])
+AT_CHECK([grep -q bus_name.*netdevsim devlink-dump-port.out])
+AT_CHECK([grep -q dev_name.*netdevsim11 devlink-dump-port.out])
+AT_CHECK([grep -q type.*ETH devlink-dump-port.out])
+AT_CHECK([grep -q flavour.*PHYSICAL devlink-dump-port.out])
+AT_CHECK([grep -q number.*1 devlink-dump-port.out])
+AT_CLEANUP
diff --git a/tests/system-devlink-testsuite.at b/tests/system-devlink-testsuite.at
new file mode 100644
index 000000000..5d1018dde
--- /dev/null
+++ b/tests/system-devlink-testsuite.at
@@ -0,0 +1,38 @@
+AT_INIT
+
+AT_COPYRIGHT([Copyright (c) 2021 Canonical
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at:
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.])
+
+m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
+
+m4_include([tests/ovs-macros.at])
+
+OVS_START_SHELL_HELPERS
+netdevsim_add_device () {
+    local slot=$1
+    local n=$2
+    echo "$slot $n" > /sys/bus/netdevsim/new_device
+}
+OVS_END_SHELL_HELPERS
+
+# NETDEVSIM_START
+#
+# Loads the netdevsim kernel module and creates a device.
+m4_define([NETDEVSIM_START],
+  [AT_CHECK([modprobe netdevsim])
+   on_exit 'modprobe -r netdevsim'
+])
+
+m4_include([tests/system-devlink-info.at])
+m4_include([tests/system-devlink-port.at])
diff --git a/utilities/.gitignore b/utilities/.gitignore
index 0a11356d4..414bb8058 100644
--- a/utilities/.gitignore
+++ b/utilities/.gitignore
@@ -1,5 +1,6 @@
 /Makefile
 /Makefile.in
+/devlink
 /nlmon
 /ovs-appctl
 /ovs-appctl.8
diff --git a/utilities/automake.mk b/utilities/automake.mk
index e2e22c39a..feda1b419 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -122,6 +122,9 @@ if LINUX
 noinst_PROGRAMS += utilities/nlmon
 utilities_nlmon_SOURCES = utilities/nlmon.c
 utilities_nlmon_LDADD = lib/libopenvswitch.la
+noinst_PROGRAMS += utilities/devlink
+utilities_devlink_SOURCES = utilities/devlink.c
+utilities_devlink_LDADD = lib/libopenvswitch.la
 endif
 
 FLAKE8_PYFILES += utilities/ovs-pcap.in \
diff --git a/utilities/devlink.c b/utilities/devlink.c
new file mode 100644
index 000000000..c8d27108d
--- /dev/null
+++ b/utilities/devlink.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2021 Canonical
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <config.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <sysexits.h>
+#include <net/if.h>
+#include <poll.h>
+#include <stddef.h>
+#include <linux/devlink.h>
+#include "openvswitch/ofpbuf.h"
+#include "openvswitch/poll-loop.h"
+#include "openvswitch/vlog.h"
+#include "packets.h"
+#include "netlink-socket.h"
+#include "netlink-devlink.h"
+
+VLOG_DEFINE_THIS_MODULE(devlink);
+
+enum {
+    CMD_DUMP,
+    CMD_MONITOR,
+};
+
+static const char *CMD_NAME[] = {
+    "dump",
+    "monitor",
+};
+
+static void
+usage(void)
+{
+    printf("usage: %s MODE\n"
+           "where MODE is one of 'dump' or 'monitor'.\n",
+           program_name);
+}
+
+static void
+print_port(struct dl_port *port_entry) {
+    VLOG_INFO("bus_name: '%s'", port_entry->bus_name);
+    VLOG_INFO("dev_name: '%s'", port_entry->dev_name);
+    VLOG_INFO("index: %"PRIu32, port_entry->index);
+    VLOG_INFO("type: %s",
+        port_entry->type == DEVLINK_PORT_TYPE_AUTO ? "AUTO" :
+        port_entry->type == DEVLINK_PORT_TYPE_ETH ? "ETH" :
+        port_entry->type == DEVLINK_PORT_TYPE_IB ? "IB" :
+        "unknown");
+    VLOG_INFO("desired_type: %"PRIu16, port_entry->desired_type);
+    VLOG_INFO("netdev_ifindex: %"PRIu32, port_entry->netdev_ifindex);
+    VLOG_INFO("netdev_name: '%s'", port_entry->netdev_name);
+    VLOG_INFO("split_count: %"PRIu32, port_entry->split_count);
+    VLOG_INFO("split_group: %"PRIu32, port_entry->split_group);
+    VLOG_INFO("flavour: %s",
+        port_entry->flavour == DEVLINK_PORT_FLAVOUR_PHYSICAL ? "PHYSICAL" :
+        port_entry->flavour == DEVLINK_PORT_FLAVOUR_CPU ? "CPU" :
+        port_entry->flavour == DEVLINK_PORT_FLAVOUR_DSA ? "DSA" :
+        port_entry->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF ? "PCI_PF":
+        port_entry->flavour == DEVLINK_PORT_FLAVOUR_PCI_VF ? "PCI_VF":
+        port_entry->flavour == DEVLINK_PORT_FLAVOUR_VIRTUAL ? "VIRTUAL":
+        port_entry->flavour == DEVLINK_PORT_FLAVOUR_UNUSED ? "UNUSED":
+        port_entry->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF ? "PCI_SF":
+        "UNKNOWN");
+    VLOG_INFO("number: %"PRIu32, port_entry->number);
+    VLOG_INFO("split_subport_number: %"PRIu32,
+        port_entry->split_subport_number);
+    VLOG_INFO("pci_pf_number: %"PRIu16, port_entry->pci_pf_number);
+    VLOG_INFO("pci_vf_number: %"PRIu16, port_entry->pci_vf_number);
+    VLOG_INFO("function eth_addr: "ETH_ADDR_FMT,
+        ETH_ADDR_ARGS(port_entry->function.eth_addr));
+    VLOG_INFO("function state: %"PRIu8, port_entry->function.state);
+    VLOG_INFO("function opstate: %"PRIu8, port_entry->function.opstate);
+    VLOG_INFO("lanes: %"PRIu32, port_entry->lanes);
+    VLOG_INFO("splittable: %s",
+        port_entry->splittable == 0 ? "false" :
+        port_entry->splittable == 1 ? "true" :
+        "unknown");
+    VLOG_INFO("external: %s",
+        port_entry->external == 0 ? "false" :
+        port_entry->external == 1 ? "true" :
+        "unknown");
+    VLOG_INFO("controller_number: %"PRIu32, port_entry->controller_number);
+    VLOG_INFO("pci_sf_number: %"PRIu32, port_entry->pci_sf_number);
+}
+
+static void
+print_version(const char *prefix, struct dl_info_version *version) {
+    if (!version->name || version->name == dl_str_not_present) {
+        return;
+    }
+    VLOG_INFO("%s %s: %s", prefix, version->name, version->value);
+}
+
+static void
+print_info(struct dl_info *info_entry) {
+    VLOG_INFO("driver_name: '%s'", info_entry->driver_name);
+    VLOG_INFO("serial_number: '%s'", info_entry->serial_number);
+    VLOG_INFO("board_serial_number: '%s'", info_entry->board_serial_number);
+    print_version("fixed", &info_entry->version_fixed);
+    print_version("running", &info_entry->version_running);
+    print_version("stored", &info_entry->version_stored);
+}
+
+static void
+dump(void)
+{
+    struct nl_dl_dump_state *port_dump;
+    struct nl_dl_dump_state *info_dump;
+    struct dl_port port_entry;
+    struct dl_info info_entry;
+    int error;
+
+    printf("port dump\n");
+    port_dump = nl_dl_dump_init();
+    if ((error = nl_dl_dump_init_error(port_dump))) {
+        ovs_fatal(error, "error");
+    }
+
+    nl_dl_dump_start(DEVLINK_CMD_PORT_GET, port_dump);
+    while (nl_dl_port_dump_next(port_dump, &port_entry)) {
+        print_port(&port_entry);
+    }
+    nl_dl_dump_finish(port_dump);
+    nl_dl_dump_destroy(port_dump);
+
+    printf("info dump\n");
+    info_dump = nl_dl_dump_init();
+    if ((error = nl_dl_dump_init_error(info_dump))) {
+        ovs_fatal(error, "error");
+    }
+    nl_dl_dump_start(DEVLINK_CMD_INFO_GET, info_dump);
+    while (nl_dl_info_dump_next(info_dump, &info_entry)) {
+        print_info(&info_entry);
+    }
+    nl_dl_dump_finish(info_dump);
+    nl_dl_dump_destroy(info_dump);
+}
+
+static void
+monitor(void)
+{
+    uint64_t buf_stub[4096 / 64];
+    struct nl_sock *sock;
+    struct ofpbuf buf;
+    unsigned int devlink_mcgroup;
+    int error;
+
+    error = nl_lookup_genl_mcgroup(DEVLINK_GENL_NAME,
+                                   DEVLINK_GENL_MCGRP_CONFIG_NAME,
+                                   &devlink_mcgroup);
+    if (error) {
+        ovs_fatal(error, "unable to lookup devlink genl multicast group");
+    }
+
+    error = nl_sock_create(NETLINK_GENERIC, &sock);
+    if (error) {
+        ovs_fatal(error, "could not create genlnetlink socket");
+    }
+
+    error = nl_sock_join_mcgroup(sock, devlink_mcgroup);
+    if (error) {
+        ovs_fatal(error, "could not join devlink config multicast group");
+    }
+
+    ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
+    for (;;) {
+        error = nl_sock_recv(sock, &buf, NULL, false);
+        if (error == EAGAIN) {
+            /* Nothing to do. */
+        } else if (error == ENOBUFS) {
+            ovs_error(0, "network monitor socket overflowed");
+        } else if (error) {
+            ovs_fatal(error, "error on network monitor socket");
+        } else {
+            struct genlmsghdr *genl;
+            struct dl_port port_entry;
+
+            genl = nl_msg_genlmsghdr(&buf);
+            printf("cmd=%"PRIu8",version=%"PRIu8")\n",
+                   genl->cmd, genl->version);
+            switch (genl->cmd) {
+            case DEVLINK_CMD_PORT_GET:
+            case DEVLINK_CMD_PORT_SET:
+            case DEVLINK_CMD_PORT_NEW:
+            case DEVLINK_CMD_PORT_DEL:
+                if (!nl_dl_parse_port_policy(&buf, &port_entry)) {
+                    VLOG_WARN("could not parse port entry");
+                    continue;
+                }
+                VLOG_INFO("%s",
+                    genl->cmd == DEVLINK_CMD_PORT_GET ? "DEVLINK_CMD_PORT_GET":
+                    genl->cmd == DEVLINK_CMD_PORT_SET ? "DEVLINK_CMD_PORT_SET":
+                    genl->cmd == DEVLINK_CMD_PORT_NEW ? "DEVLINK_CMD_PORT_NEW":
+                    genl->cmd == DEVLINK_CMD_PORT_DEL ? "DEVLINK_CMD_PORT_DEL":
+                    "UNKNOWN");
+                print_port(&port_entry);
+                break;
+            };
+
+        }
+
+        nl_sock_wait(sock, POLLIN);
+        poll_block();
+    }
+}
+
+int
+main(int argc, char *argv[])
+{
+    int cmd = -1;
+
+    set_program_name(argv[0]);
+    vlog_set_levels(NULL, VLF_ANY_DESTINATION, VLL_DBG);
+
+    if (argc > 1 && !strcmp(argv[1], CMD_NAME[CMD_DUMP])) {
+        cmd = CMD_DUMP;
+    } else if (argc > 1 && !strcmp(argv[1], CMD_NAME[CMD_MONITOR])) {
+        cmd = CMD_MONITOR;
+    }
+
+    switch (cmd) {
+    case CMD_DUMP:
+        dump();
+        break;
+    case CMD_MONITOR:
+        monitor();
+        break;
+    default:
+        usage();
+        return EX_USAGE;
+    };
+
+    return 0;
+}
-- 
2.30.2



More information about the dev mailing list