[ovs-dev] [optimize 11/26] netlink: Postpone choosing sequence numbers until send time.
Ethan Jackson
ethan at nicira.com
Wed Apr 18 00:21:03 UTC 2012
Does nl_sock_allocate_seq() need to take 'n' as a parameter? It's
only caller passes in 1 currently. Perhaps a future patch will need
it?
It's probably worth adding a comment to nl_sock_allocate_seq()
explaining why it wraps around at (UINT32_MAX / 2) without a deep
understanding of netlink, it's not obvious to me why it's necessary.
Does the theoretical race condition described in the old
get_nlmsg_seq() function still exist? Just curious, may not be a big
deal. If it does, maybe it makes sense to use a random initial
sequence number instead of 1? Wouldn't solve the problem, but might
reduce the probability of a collision.
Ethan
On Mon, Apr 16, 2012 at 17:18, Ben Pfaff <blp at nicira.com> wrote:
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
> lib/netlink-socket.c | 30 +++++++++++++++++++++++-------
> lib/netlink.c | 29 +++++------------------------
> 2 files changed, 28 insertions(+), 31 deletions(-)
>
> diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
> index 15a800b..90734c7 100644
> --- a/lib/netlink-socket.c
> +++ b/lib/netlink-socket.c
> @@ -54,6 +54,7 @@ COVERAGE_DEFINE(netlink_sent);
> * information. Also, at high logging levels we log *all* Netlink messages. */
> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600);
>
> +static uint32_t nl_sock_allocate_seq(struct nl_sock *, unsigned int n);
> static void log_nlmsg(const char *function, int error,
> const void *message, size_t size, int protocol);
>
> @@ -62,6 +63,7 @@ static void log_nlmsg(const char *function, int error,
> struct nl_sock
> {
> int fd;
> + uint32_t next_seq;
> uint32_t pid;
> int protocol;
> struct nl_dump *dump;
> @@ -122,6 +124,7 @@ nl_sock_create(int protocol, struct nl_sock **sockp)
> }
> sock->protocol = protocol;
> sock->dump = NULL;
> + sock->next_seq = 1;
>
> rcvbuf = 1024 * 1024;
> if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
> @@ -256,6 +259,7 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg, bool wait)
> int error;
>
> nlmsg->nlmsg_len = msg->size;
> + nlmsg->nlmsg_seq = nl_sock_allocate_seq(sock, 1);
> nlmsg->nlmsg_pid = sock->pid;
> do {
> int retval;
> @@ -522,7 +526,6 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
> *
> * Before sending each message, this function will finalize nlmsg_len in each
> * 'request' to match the ofpbuf's size, and set nlmsg_pid to 'sock''s pid.
> - * NLM_F_ACK will be added to some requests' nlmsg_flags.
> *
> * Bare Netlink is an unreliable transport protocol. This function layers
> * reliable delivery and reply semantics on top of bare Netlink. See
> @@ -597,9 +600,9 @@ nl_sock_transact_multiple(struct nl_sock *sock,
> * on failure '*replyp' is set to NULL. If 'replyp' is null, then the kernel's
> * reply, if any, is discarded.
> *
> - * nlmsg_len in 'msg' will be finalized to match msg->size, and nlmsg_pid will
> - * be set to 'sock''s pid, before the message is sent. NLM_F_ACK will be set
> - * in nlmsg_flags.
> + * Before the message is sent, nlmsg_len in 'request' will be finalized to
> + * match msg->size, nlmsg_pid will be set to 'sock''s pid, and nlmsg_seq will
> + * be initialized, NLM_F_ACK will be set in nlmsg_flags.
> *
> * The caller is responsible for destroying 'request'.
> *
> @@ -721,9 +724,6 @@ void
> nl_dump_start(struct nl_dump *dump,
> struct nl_sock *sock, const struct ofpbuf *request)
> {
> - struct nlmsghdr *nlmsghdr = nl_msg_nlmsghdr(request);
> - nlmsghdr->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK;
> - dump->seq = nlmsghdr->nlmsg_seq;
> dump->buffer = NULL;
> if (sock->dump) {
> /* 'sock' already has an ongoing dump. Clone the socket because
> @@ -737,7 +737,10 @@ nl_dump_start(struct nl_dump *dump,
> dump->sock = sock;
> dump->status = 0;
> }
> +
> + nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK;
> dump->status = nl_sock_send__(sock, request, true);
> + dump->seq = nl_msg_nlmsghdr(request)->nlmsg_seq;
> }
>
> /* Helper function for nl_dump_next(). */
> @@ -1057,6 +1060,19 @@ nl_lookup_genl_family(const char *name, int *number)
> return *number > 0 ? 0 : -*number;
> }
>
> +static uint32_t
> +nl_sock_allocate_seq(struct nl_sock *sock, unsigned int n)
> +{
> + uint32_t seq = sock->next_seq;
> +
> + sock->next_seq += n;
> + if (sock->next_seq >= UINT32_MAX / 2) {
> + sock->next_seq = 1;
> + }
> +
> + return seq;
> +}
> +
> static void
> nlmsghdr_to_string(const struct nlmsghdr *h, int protocol, struct ds *ds)
> {
> diff --git a/lib/netlink.c b/lib/netlink.c
> index 6445049..6e0701c 100644
> --- a/lib/netlink.c
> +++ b/lib/netlink.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira Networks.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -88,26 +88,6 @@ nl_msg_reserve(struct ofpbuf *msg, size_t size)
> ofpbuf_prealloc_tailroom(msg, NLMSG_ALIGN(size));
> }
>
> -static uint32_t
> -get_nlmsg_seq(void)
> -{
> - /* Next nlmsghdr sequence number.
> - *
> - * This implementation uses sequence numbers that are unique process-wide,
> - * to avoid a hypothetical race: send request, close socket, open new
> - * socket that reuses the old socket's PID value, send request on new
> - * socket, receive reply from kernel to old socket but with same PID and
> - * sequence number. (This race could be avoided other ways, e.g. by
> - * preventing PIDs from being quickly reused). */
> - static uint32_t next_seq;
> -
> - if (next_seq == 0) {
> - /* Pick initial sequence number. */
> - next_seq = getpid() ^ time_wall();
> - }
> - return next_seq++;
> -}
> -
> /* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty.
> * Uses the given 'type' and 'flags'. 'expected_payload' should be
> * an estimate of the number of payload bytes to be supplied; if the size of
> @@ -121,8 +101,9 @@ get_nlmsg_seq(void)
> * is often NLM_F_REQUEST indicating that a request is being made, commonly
> * or'd with NLM_F_ACK to request an acknowledgement.
> *
> - * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
> - * fill it in just before sending the message.
> + * Sets the new nlmsghdr's nlmsg_len, nlmsg_seq, and nlmsg_pid fields to 0 for
> + * now. Functions that send Netlink messages will fill these in just before
> + * sending the message.
> *
> * nl_msg_put_genlmsghdr() is more convenient for composing a Generic Netlink
> * message. */
> @@ -139,7 +120,7 @@ nl_msg_put_nlmsghdr(struct ofpbuf *msg,
> nlmsghdr->nlmsg_len = 0;
> nlmsghdr->nlmsg_type = type;
> nlmsghdr->nlmsg_flags = flags;
> - nlmsghdr->nlmsg_seq = get_nlmsg_seq();
> + nlmsghdr->nlmsg_seq = 0;
> nlmsghdr->nlmsg_pid = 0;
> }
>
> --
> 1.7.9
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
More information about the dev
mailing list