[ovs-dev] 答复: [PATCH V1 3/4] Fix tap interface statistics issue

Yi Yang (杨燚)-云服务集团 yangyi01 at inspur.com
Wed Aug 26 00:53:26 UTC 2020


Aaron, thank you so much for review, yes, I should handle error. For setns, I will check if it can work as you commented. I'll send V2 to fix your comments.

-----邮件原件-----
发件人: dev [mailto:ovs-dev-bounces at openvswitch.org] 代表 Aaron Conole
发送时间: 2020年8月25日 23:06
收件人: yang_y_yi at 163.com
抄送: ovs-dev at openvswitch.org; i.maximets at ovn.org; fbl at sysclose.org
主题: Re: [ovs-dev] [PATCH V1 3/4] Fix tap interface statistics issue

yang_y_yi at 163.com writes:

> From: Yi Yang <yangyi01 at inspur.com>
>
> After tap interface is moved to network namespace, "ovs-vsctl list 
> interface tapXXX" can get statistics info of tap interface, the root 
> cause is OVS still gets statistics info in root namespace.
>
> With netns option help, OVS can get statistics info in tap interface 
> netns.
>
> This patch added enter and exit netns helpers and change 
> statistics-related functions for those tap interfaces which have been 
> moved into netns and make sure "ovs-vsctl list interface tapXXX" can 
> get statistics info correctly.
>
> Here is a result sample for reference:
> name                : tap1
> ofport              : 4
> ofport_request      : []
> options             : {netns=ns01}
> other_config        : {}
> statistics          : {rx_bytes=6228, rx_packets=68, tx_bytes=8310, tx_packets=95}
> status              : {}
> type                : tap
>
> Signed-off-by: Yi Yang <yangyi01 at inspur.com>
> ---
>  lib/dpif-netlink.c   |  51 ++++++++++++++++++
>  lib/dpif-netlink.h   |   3 ++
>  lib/netdev-linux.c   |  60 ++++++++++++++++++++-
>  lib/netlink-socket.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/netlink-socket.h |   2 +
>  5 files changed, 260 insertions(+), 2 deletions(-)
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 
> 7da4fb5..8ed37ed 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -4282,6 +4282,43 @@ dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
>      return error;
>  }
>  
> +static int
> +dpif_netlink_vport_transact_nopool(const struct dpif_netlink_vport *request,
> +                                   struct dpif_netlink_vport *reply,
> +                                   struct ofpbuf **bufp) {
> +    struct ofpbuf *request_buf;
> +    int error;
> +
> +    ovs_assert((reply != NULL) == (bufp != NULL));
> +
> +    error = dpif_netlink_init();
> +    if (error) {
> +        if (reply) {
> +            *bufp = NULL;
> +            dpif_netlink_vport_init(reply);
> +        }
> +        return error;
> +    }
> +
> +    request_buf = ofpbuf_new(1024);
> +    dpif_netlink_vport_to_ofpbuf(request, request_buf);
> +    error = nl_transact_nopool(NETLINK_GENERIC, request_buf, bufp);
> +    ofpbuf_delete(request_buf);
> +
> +    if (reply) {
> +        if (!error) {
> +            error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
> +        }
> +        if (error) {
> +            dpif_netlink_vport_init(reply);
> +            ofpbuf_delete(*bufp);
> +            *bufp = NULL;
> +        }
> +    }
> +    return error;
> +}
> +
>  /* Obtains information about the kernel vport named 'name' and stores it into
>   * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
>   * longer needed ('reply' will contain pointers into '*bufp').  */ @@ 
> -4298,6 +4335,20 @@ dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
>      return dpif_netlink_vport_transact(&request, reply, bufp);  }
>  
> +int
> +dpif_netlink_vport_get_nopool(const char *name,
> +                              struct dpif_netlink_vport *reply,
> +                              struct ofpbuf **bufp) {
> +    struct dpif_netlink_vport request;
> +
> +    dpif_netlink_vport_init(&request);
> +    request.cmd = OVS_VPORT_CMD_GET;
> +    request.name = name;
> +
> +    return dpif_netlink_vport_transact_nopool(&request, reply, bufp); 
> +}
> +
>  /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
>   * by Netlink attributes, into 'dp'.  Returns 0 if successful, otherwise a
>   * positive errno value.
> diff --git a/lib/dpif-netlink.h b/lib/dpif-netlink.h index 
> 24294bc..9372241 100644
> --- a/lib/dpif-netlink.h
> +++ b/lib/dpif-netlink.h
> @@ -55,6 +55,9 @@ int dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
>                                  struct ofpbuf **bufp);  int 
> dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
>                             struct ofpbuf **bufp);
> +int dpif_netlink_vport_get_nopool(const char *name,
> +                                  struct dpif_netlink_vport *reply,
> +                                  struct ofpbuf **bufp);
>  
>  bool dpif_netlink_is_internal_device(const char *name);
>  
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 
> bb3aa9b..2a7d5ec 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -520,8 +520,16 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>   * changes in the device miimon status, so we can use atomic_count. 
> */  static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
>  
> +struct netns_knob {
> +    int main_fd;
> +    int netns_fd;
> +    char nspath[128];
> +};
> +
>  static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);  static 
> void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> +static int enter_netns(struct netns_knob *netns_knob, const char 
> +*netns); static void exit_netns(struct netns_knob *netns_knob);
>  static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
>                                     int cmd, const char *cmd_name);  
> static int get_flags(const struct netdev *, unsigned int *flags); @@ 
> -2158,11 +2166,18 @@ netdev_stats_from_ovs_vport_stats(struct 
> netdev_stats *dst,  static int  get_stats_via_vport__(const struct 
> netdev *netdev, struct netdev_stats *stats)  {
> +    struct netdev_linux *netdev_linux = netdev_linux_cast(netdev);
>      struct dpif_netlink_vport reply;
>      struct ofpbuf *buf;
>      int error;
>  
> -    error = dpif_netlink_vport_get(netdev_get_name(netdev), &reply, &buf);
> +    if (is_tap_netdev(netdev) && (netdev_linux->netns != NULL)) {
> +        error = dpif_netlink_vport_get_nopool(netdev_get_name(netdev),
> +                                              &reply, &buf);
> +    } else {
> +        error = dpif_netlink_vport_get(netdev_get_name(netdev),
> +                                              &reply, &buf);
> +    }
>      if (error) {
>          return error;
>      } else if (!reply.stats) {
> @@ -2237,6 +2252,33 @@ netdev_linux_get_stats(const struct netdev *netdev_,
>      return error;
>  }
>  
> +static int
> +enter_netns(struct netns_knob *netns_knob, const char *netns) {
> +    sprintf(netns_knob->nspath, "/proc/%d/ns/net", getpid());
> +    netns_knob->main_fd = open(netns_knob->nspath, O_RDONLY);
> +    if (netns_knob->main_fd < 0) {
> +        return errno;
> +    }
> +
> +    sprintf(netns_knob->nspath, "/var/run/netns/%s", netns);
> +    netns_knob->netns_fd = open(netns_knob->nspath, O_RDONLY);
> +    if (netns_knob->netns_fd < 0) {
> +        close(netns_knob->main_fd);
> +        return errno;
> +    }
> +    setns(netns_knob->netns_fd, 0);

I think this reassociates the entire thread with the namespace, right?
So whatever thread calls this will be moved to the netns.

But actually, that 0 type could also change to a mount / pid namespace as well.  I think it should just be CLONE_NEWNET here and in the exit_netns call.  That way we don't get some additional changes that are unexpected (like cgroup or something).  We expect that the FD should refer to a netns type.

> +    return 0;
> +}
> +
> +static void
> +exit_netns(struct netns_knob *netns_knob) {
> +    setns(netns_knob->main_fd, 0);
> +    close(netns_knob->netns_fd);
> +    close(netns_knob->main_fd);
> +}
> +
>  /* Retrieves current device stats for 'netdev-tap' netdev or
>   * netdev-internal. */
>  static int
> @@ -2245,6 +2287,11 @@ netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
>      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      struct netdev_stats dev_stats;
>      int error;
> +    struct netns_knob netns_knob;
> +
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        error = enter_netns(&netns_knob, netdev->netns);

Shouldn't we handle this error?  Otherwise why even do the assignment?

> +    }
>  
>      ovs_mutex_lock(&netdev->mutex);
>      get_stats_via_vport(netdev_, stats); @@ -2298,6 +2345,10 @@ 
> netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
>      stats->rx_dropped += netdev->rx_dropped;
>      ovs_mutex_unlock(&netdev->mutex);
>  
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        exit_netns(&netns_knob);

Because if the above fails, this will call setns() on an invalid FD.
And then I think the thread will be in a bad state.

> +    }
> +
>      return error;
>  }
>  
> @@ -6245,6 +6296,7 @@ netdev_stats_from_rtnl_link_stats64(struct 
> netdev_stats *dst,  int  get_stats_via_netlink(const struct netdev 
> *netdev_, struct netdev_stats *stats)  {
> +    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      struct ofpbuf request;
>      struct ofpbuf *reply;
>      int error;
> @@ -6258,7 +6310,11 @@ get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
>                          RTM_GETLINK, NLM_F_REQUEST);
>      ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
>      nl_msg_put_string(&request, IFLA_IFNAME, netdev_get_name(netdev_));
> -    error = nl_transact(NETLINK_ROUTE, &request, &reply);
> +    if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) {
> +        error = nl_transact_nopool(NETLINK_ROUTE, &request, &reply);
> +    } else {
> +        error = nl_transact(NETLINK_ROUTE, &request, &reply);
> +    }
>      ofpbuf_uninit(&request);
>      if (error) {
>          return error;
> diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 
> 47077e9..2377ca7 100644
> --- a/lib/netlink-socket.c
> +++ b/lib/netlink-socket.c
> @@ -1807,6 +1807,152 @@ nl_transact(int protocol, const struct ofpbuf *request,
>      return error;
>  }
>  
> +static int
> +nl_sock_create_nopool(int protocol, struct nl_sock **sockp) {
> +    struct nl_sock *sock;
> +#ifndef _WIN32
> +    struct sockaddr_nl local, remote; #endif
> +    socklen_t local_size;
> +    int rcvbuf;
> +    int retval = 0;
> +
> +    *sockp = NULL;
> +    sock = xmalloc(sizeof *sock);
> +
> +#ifdef _WIN32
> +    sock->overlapped.hEvent = NULL;
> +    sock->handle = CreateFile(OVS_DEVICE_NAME_USER,
> +                              GENERIC_READ | GENERIC_WRITE,
> +                              FILE_SHARE_READ | FILE_SHARE_WRITE,
> +                              NULL, OPEN_EXISTING,
> +                              FILE_FLAG_OVERLAPPED, NULL);
> +
> +    if (sock->handle == INVALID_HANDLE_VALUE) {
> +        VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
> +        goto error;
> +    }
> +
> +    memset(&sock->overlapped, 0, sizeof sock->overlapped);
> +    sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
> +    if (sock->overlapped.hEvent == NULL) {
> +        VLOG_ERR("fcntl: %s", ovs_lasterror_to_string());
> +        goto error;
> +    }
> +    /* Initialize the type/ioctl to Generic */
> +    sock->read_ioctl = OVS_IOCTL_READ; #else
> +    sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
> +    if (sock->fd < 0) {
> +        VLOG_ERR("fcntl: %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +#endif
> +
> +    sock->protocol = protocol;
> +    sock->next_seq = 1;
> +
> +    rcvbuf = 1024 * 1024;
> +#ifdef _WIN32
> +    sock->rcvbuf = rcvbuf;
> +    retval = get_sock_pid_from_kernel(sock);
> +    if (retval != 0) {
> +        goto error;
> +    }
> +    retval = set_sock_property(sock);
> +    if (retval != 0) {
> +        goto error;
> +    }
> +#else
> +    if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
> +                   &rcvbuf, sizeof rcvbuf)) {
> +        /* Only root can use SO_RCVBUFFORCE.  Everyone else gets EPERM.
> +         * Warn only if the failure is therefore unexpected. */
> +        if (errno != EPERM) {
> +            VLOG_WARN_RL(&rl, "setting %d-byte socket receive buffer failed "
> +                         "(%s)", rcvbuf, ovs_strerror(errno));
> +        }
> +    }
> +
> +    retval = get_socket_rcvbuf(sock->fd);
> +    if (retval < 0) {
> +        retval = -retval;
> +        goto error;
> +    }
> +    sock->rcvbuf = retval;
> +    retval = 0;
> +
> +    /* Connect to kernel (pid 0) as remote address. */
> +    memset(&remote, 0, sizeof remote);
> +    remote.nl_family = AF_NETLINK;
> +    remote.nl_pid = 0;
> +    if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) {
> +        VLOG_ERR("connect(0): %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +
> +    /* Obtain pid assigned by kernel. */
> +    local_size = sizeof local;
> +    if (getsockname(sock->fd, (struct sockaddr *) &local, &local_size) < 0) {
> +        VLOG_ERR("getsockname: %s", ovs_strerror(errno));
> +        goto error;
> +    }
> +    if (local_size < sizeof local || local.nl_family != AF_NETLINK) {
> +        VLOG_ERR("getsockname returned bad Netlink name");
> +        retval = EINVAL;
> +        goto error;
> +    }
> +    sock->pid = local.nl_pid;
> +#endif
> +
> +    *sockp = sock;
> +    return 0;
> +
> +error:
> +    if (retval == 0) {
> +        retval = errno;
> +        if (retval == 0) {
> +            retval = EINVAL;
> +        }
> +    }
> +#ifdef _WIN32
> +    if (sock->overlapped.hEvent) {
> +        CloseHandle(sock->overlapped.hEvent);
> +    }
> +    if (sock->handle != INVALID_HANDLE_VALUE) {
> +        CloseHandle(sock->handle);
> +    }
> +#else
> +    if (sock->fd >= 0) {
> +        close(sock->fd);
> +    }
> +#endif
> +    free(sock);
> +    return retval;
> +}
> +
> +int
> +nl_transact_nopool(int protocol, const struct ofpbuf *request,
> +            struct ofpbuf **replyp)
> +{
> +    struct nl_sock *sock;
> +    int error;
> +
> +    error = nl_sock_create_nopool(protocol, &sock);
> +    if (error) {
> +        if (replyp) {
> +            *replyp = NULL;
> +        }
> +        return error;
> +    }
> +
> +    error = nl_sock_transact(sock, request, replyp);
> +    nl_sock_destroy(sock);
> +
> +    return error;
> +}
> +
>  /* Sends the 'request' member of the 'n' transactions in 'transactions' on a
>   * Netlink socket for the given 'protocol' (e.g. NETLINK_ROUTE or
>   * NETLINK_GENERIC), in order, and receives responses to all of them.  
> Fills in diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h 
> index 7852ad0..08cbab8 100644
> --- a/lib/netlink-socket.h
> +++ b/lib/netlink-socket.h
> @@ -254,6 +254,8 @@ struct nl_transaction {  int nl_transact(int 
> protocol, const struct ofpbuf *request,
>                  struct ofpbuf **replyp);  void 
> nl_transact_multiple(int protocol, struct nl_transaction **, size_t 
> n);
> +int nl_transact_nopool(int protocol, const struct ofpbuf *request,
> +                       struct ofpbuf **replyp);
>  
>  /* Table dumping. */
>  #define NL_DUMP_BUFSIZE         4096

_______________________________________________
dev mailing list
dev at openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list