[ovs-dev] 答复: [PATCH V1 3/4] Fix tap interface statistics issue
Yi Yang (杨燚)-云服务集团
yangyi01 at inspur.com
Wed Aug 26 00:53:26 UTC 2020
Aaron, thank you so much for review, yes, I should handle error. For setns, I will check if it can work as you commented. I'll send V2 to fix your comments.
-----邮件原件-----
发件人: dev [mailto:ovs-dev-bounces at openvswitch.org] 代表 Aaron Conole
发送时间: 2020年8月25日 23:06
收件人: yang_y_yi at 163.com
抄送: ovs-dev at openvswitch.org; i.maximets at ovn.org; fbl at sysclose.org
主题: Re: [ovs-dev] [PATCH V1 3/4] Fix tap interface statistics issue
yang_y_yi at 163.com writes:
> From: Yi Yang <yangyi01 at inspur.com>
>
> After tap interface is moved to network namespace, "ovs-vsctl list
> interface tapXXX" can get statistics info of tap interface, the root
> cause is OVS still gets statistics info in root namespace.
>
> With netns option help, OVS can get statistics info in tap interface
> netns.
>
> This patch added enter and exit netns helpers and change
> statistics-related functions for those tap interfaces which have been
> moved into netns and make sure "ovs-vsctl list interface tapXXX" can
> get statistics info correctly.
>
> Here is a result sample for reference:
> name : tap1
> ofport : 4
> ofport_request : []
> options : {netns=ns01}
> other_config : {}
> statistics : {rx_bytes=6228, rx_packets=68, tx_bytes=8310, tx_packets=95}
> status : {}
> type : tap
>
> Signed-off-by: Yi Yang <yangyi01 at inspur.com>
> ---
> lib/dpif-netlink.c | 51 ++++++++++++++++++
> lib/dpif-netlink.h | 3 ++
> lib/netdev-linux.c | 60 ++++++++++++++++++++-
> lib/netlink-socket.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++
> lib/netlink-socket.h | 2 +
> 5 files changed, 260 insertions(+), 2 deletions(-)
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index
> 7da4fb5..8ed37ed 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -4282,6 +4282,43 @@ dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
> return error;
> }
>
> +static int
> +dpif_netlink_vport_transact_nopool(const struct dpif_netlink_vport *request,
> + struct dpif_netlink_vport *reply,
> + struct ofpbuf **bufp) {
> + struct ofpbuf *request_buf;
> + int error;
> +
> + ovs_assert((reply != NULL) == (bufp != NULL));
> +
> + error = dpif_netlink_init();
> + if (error) {
> + if (reply) {
> + *bufp = NULL;
> + dpif_netlink_vport_init(reply);
> + }
> + return error;
> + }
> +
> + request_buf = ofpbuf_new(1024);
> + dpif_netlink_vport_to_ofpbuf(request, request_buf);
> + error = nl_transact_nopool(NETLINK_GENERIC, request_buf, bufp);
> + ofpbuf_delete(request_buf);
> +
> + if (reply) {
> + if (!error) {
> + error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
> + }
> + if (error) {
> + dpif_netlink_vport_init(reply);
> + ofpbuf_delete(*bufp);
> + *bufp = NULL;
> + }
> + }
> + return error;
> +}
> +
> /* Obtains information about the kernel vport named 'name' and stores it into
> * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
> * longer needed ('reply' will contain pointers into '*bufp'). */ @@
> -4298,6 +4335,20 @@ dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
> return dpif_netlink_vport_transact(&request, reply, bufp); }
>
> +int
> +dpif_netlink_vport_get_nopool(const char *name,
> + struct dpif_netlink_vport *reply,
> + struct ofpbuf **bufp) {
> + struct dpif_netlink_vport request;
> +
> + dpif_netlink_vport_init(&request);
> + request.cmd = OVS_VPORT_CMD_GET;
> + request.name = name;
> +
> + return dpif_netlink_vport_transact_nopool(&request, reply, bufp);
> +}
> +
> /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
> * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
> * positive errno value.
> diff --git a/lib/dpif-netlink.h b/lib/dpif-netlink.h index
> 24294bc..9372241 100644
> --- a/lib/dpif-netlink.h
> +++ b/lib/dpif-netlink.h
> @@ -55,6 +55,9 @@ int dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
> struct ofpbuf **bufp); int
> dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
> struct ofpbuf **bufp);
> +int dpif_netlink_vport_get_nopool(const char *name,
> + struct dpif_netlink_vport *reply,
> + struct ofpbuf **bufp);
>
> bool dpif_netlink_is_internal_device(const char *name);
>
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index
> bb3aa9b..2a7d5ec 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -520,8 +520,16 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> * changes in the device miimon status, so we can use atomic_count.
> */ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
>
> +struct netns_knob {
> + int main_fd;
> + int netns_fd;
> + char nspath[128];
> +};
> +
> static int netdev_linux_parse_vnet_hdr(struct dp_packet *b); static
> void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> +static int enter_netns(struct netns_knob *netns_knob, const char
> +*netns); static void exit_netns(struct netns_knob *netns_knob);
> static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
> int cmd, const char *cmd_name);
> static int get_flags(const struct netdev *, unsigned int *flags); @@
> -2158,11 +2166,18 @@ netdev_stats_from_ovs_vport_stats(struct
> netdev_stats *dst, static int get_stats_via_vport__(const struct
> netdev *netdev, struct netdev_stats *stats) {
> + struct netdev_linux *netdev_linux = netdev_linux_cast(netdev);
> struct dpif_netlink_vport reply;
> struct ofpbuf *buf;
> int error;
>
> - error = dpif_netlink_vport_get(netdev_get_name(netdev), &reply, &buf);
> + if (is_tap_netdev(netdev) && (netdev_linux->netns != NULL)) {
> + error = dpif_netlink_vport_get_nopool(netdev_get_name(netdev),
> + &reply, &buf);
> + } else {
> + error = dpif_netlink_vport_get(netdev_get_name(netdev),
> + &reply, &buf);
> + }
> if (error) {
> return error;
> } else if (!reply.stats) {
> @@ -2237,6 +2252,33 @@ netdev_linux_get_stats(const struct netdev *netdev_,
> return error;
> }
>
> +static int
> +enter_netns(struct netns_knob *netns_knob, const char *netns) {
> + sprintf(netns_knob->nspath, "/proc/%d/ns/net", getpid());
> + netns_knob->main_fd = open(netns_knob->nspath, O_RDONLY);
> + if (netns_knob->main_fd < 0) {
> + return errno;
> + }
> +
> + sprintf(netns_knob->nspath, "/var/run/netns/%s", netns);
> + netns_knob->netns_fd = open(netns_knob->nspath, O_RDONLY);
> + if (netns_knob->netns_fd < 0) {
> + close(netns_knob->main_fd);
> + return errno;
> + }
> + setns(netns_knob->netns_fd, 0);
I think this reassociates the entire thread with the namespace, right?
So whatever thread calls this will be moved to the netns.
But actually, that 0 type could also change to a mount / pid namespace as well. I think it should just be CLONE_NEWNET here and in the exit_netns call. That way we don't get some additional changes that are unexpected (like cgroup or something). We expect that the FD should refer to a netns type.
> + return 0;
> +}
> +
> +static void
> +exit_netns(struct netns_knob *netns_knob) {
> + setns(netns_knob->main_fd, 0);
> + close(netns_knob->netns_fd);
> + close(netns_knob->main_fd);
> +}
> +
> /* Retrieves current device stats for 'netdev-tap' netdev or
> * netdev-internal. */
> static int
> @@ -2245,6 +2287,11 @@ netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
> struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> struct netdev_stats dev_stats;
> int error;
> + struct netns_knob netns_knob;
> +
> + if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> + error = enter_netns(&netns_knob, netdev->netns);
Shouldn't we handle this error? Otherwise why even do the assignment?
> + }
>
> ovs_mutex_lock(&netdev->mutex);
> get_stats_via_vport(netdev_, stats); @@ -2298,6 +2345,10 @@
> netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
> stats->rx_dropped += netdev->rx_dropped;
> ovs_mutex_unlock(&netdev->mutex);
>
> + if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> + exit_netns(&netns_knob);
Because if the above fails, this will call setns() on an invalid FD.
And then I think the thread will be in a bad state.
> + }
> +
> return error;
> }
>
> @@ -6245,6 +6296,7 @@ netdev_stats_from_rtnl_link_stats64(struct
> netdev_stats *dst, int get_stats_via_netlink(const struct netdev
> *netdev_, struct netdev_stats *stats) {
> + struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> struct ofpbuf request;
> struct ofpbuf *reply;
> int error;
> @@ -6258,7 +6310,11 @@ get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
> RTM_GETLINK, NLM_F_REQUEST);
> ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
> nl_msg_put_string(&request, IFLA_IFNAME, netdev_get_name(netdev_));
> - error = nl_transact(NETLINK_ROUTE, &request, &reply);
> + if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> + error = nl_transact_nopool(NETLINK_ROUTE, &request, &reply);
> + } else {
> + error = nl_transact(NETLINK_ROUTE, &request, &reply);
> + }
> ofpbuf_uninit(&request);
> if (error) {
> return error;
> diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index
> 47077e9..2377ca7 100644
> --- a/lib/netlink-socket.c
> +++ b/lib/netlink-socket.c
> @@ -1807,6 +1807,152 @@ nl_transact(int protocol, const struct ofpbuf *request,
> return error;
> }
>
> +static int
> +nl_sock_create_nopool(int protocol, struct nl_sock **sockp) {
> + struct nl_sock *sock;
> +#ifndef _WIN32
> + struct sockaddr_nl local, remote; #endif
> + socklen_t local_size;
> + int rcvbuf;
> + int retval = 0;
> +
> + *sockp = NULL;
> + sock = xmalloc(sizeof *sock);
> +
> +#ifdef _WIN32
> + sock->overlapped.hEvent = NULL;
> + sock->handle = CreateFile(OVS_DEVICE_NAME_USER,
> + GENERIC_READ | GENERIC_WRITE,
> + FILE_SHARE_READ | FILE_SHARE_WRITE,
> + NULL, OPEN_EXISTING,
> + FILE_FLAG_OVERLAPPED, NULL);
> +
> + if (sock->handle == INVALID_HANDLE_VALUE) {
> + VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
> + goto error;
> + }
> +
> + memset(&sock->overlapped, 0, sizeof sock->overlapped);
> + sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
> + if (sock->overlapped.hEvent == NULL) {
> + VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
> + goto error;
> + }
> + /* Initialize the type/ioctl to Generic */
> + sock->read_ioctl = OVS_IOCTL_READ; #else
> + sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
> + if (sock->fd < 0) {
> + VLOG_ERR("fcntl: %s", ovs_strerror(errno));
> + goto error;
> + }
> +#endif
> +
> + sock->protocol = protocol;
> + sock->next_seq = 1;
> +
> + rcvbuf = 1024 * 1024;
> +#ifdef _WIN32
> + sock->rcvbuf = rcvbuf;
> + retval = get_sock_pid_from_kernel(sock);
> + if (retval != 0) {
> + goto error;
> + }
> + retval = set_sock_property(sock);
> + if (retval != 0) {
> + goto error;
> + }
> +#else
> + if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
> + &rcvbuf, sizeof rcvbuf)) {
> + /* Only root can use SO_RCVBUFFORCE. Everyone else gets EPERM.
> + * Warn only if the failure is therefore unexpected. */
> + if (errno != EPERM) {
> + VLOG_WARN_RL(&rl, "setting %d-byte socket receive buffer failed "
> + "(%s)", rcvbuf, ovs_strerror(errno));
> + }
> + }
> +
> + retval = get_socket_rcvbuf(sock->fd);
> + if (retval < 0) {
> + retval = -retval;
> + goto error;
> + }
> + sock->rcvbuf = retval;
> + retval = 0;
> +
> + /* Connect to kernel (pid 0) as remote address. */
> + memset(&remote, 0, sizeof remote);
> + remote.nl_family = AF_NETLINK;
> + remote.nl_pid = 0;
> + if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) {
> + VLOG_ERR("connect(0): %s", ovs_strerror(errno));
> + goto error;
> + }
> +
> + /* Obtain pid assigned by kernel. */
> + local_size = sizeof local;
> + if (getsockname(sock->fd, (struct sockaddr *) &local, &local_size) < 0) {
> + VLOG_ERR("getsockname: %s", ovs_strerror(errno));
> + goto error;
> + }
> + if (local_size < sizeof local || local.nl_family != AF_NETLINK) {
> + VLOG_ERR("getsockname returned bad Netlink name");
> + retval = EINVAL;
> + goto error;
> + }
> + sock->pid = local.nl_pid;
> +#endif
> +
> + *sockp = sock;
> + return 0;
> +
> +error:
> + if (retval == 0) {
> + retval = errno;
> + if (retval == 0) {
> + retval = EINVAL;
> + }
> + }
> +#ifdef _WIN32
> + if (sock->overlapped.hEvent) {
> + CloseHandle(sock->overlapped.hEvent);
> + }
> + if (sock->handle != INVALID_HANDLE_VALUE) {
> + CloseHandle(sock->handle);
> + }
> +#else
> + if (sock->fd >= 0) {
> + close(sock->fd);
> + }
> +#endif
> + free(sock);
> + return retval;
> +}
> +
> +int
> +nl_transact_nopool(int protocol, const struct ofpbuf *request,
> + struct ofpbuf **replyp)
> +{
> + struct nl_sock *sock;
> + int error;
> +
> + error = nl_sock_create_nopool(protocol, &sock);
> + if (error) {
> + if (replyp) {
> + *replyp = NULL;
> + }
> + return error;
> + }
> +
> + error = nl_sock_transact(sock, request, replyp);
> + nl_sock_destroy(sock);
> +
> + return error;
> +}
> +
> /* Sends the 'request' member of the 'n' transactions in 'transactions' on a
> * Netlink socket for the given 'protocol' (e.g. NETLINK_ROUTE or
> * NETLINK_GENERIC), in order, and receives responses to all of them.
> Fills in diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
> index 7852ad0..08cbab8 100644
> --- a/lib/netlink-socket.h
> +++ b/lib/netlink-socket.h
> @@ -254,6 +254,8 @@ struct nl_transaction { int nl_transact(int
> protocol, const struct ofpbuf *request,
> struct ofpbuf **replyp); void
> nl_transact_multiple(int protocol, struct nl_transaction **, size_t
> n);
> +int nl_transact_nopool(int protocol, const struct ofpbuf *request,
> + struct ofpbuf **replyp);
>
> /* Table dumping. */
> #define NL_DUMP_BUFSIZE 4096
_______________________________________________
dev mailing list
dev at openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev
More information about the dev
mailing list