[ovs-dev] [optimize 11/26] netlink: Postpone choosing sequence numbers until send time.

Wed Apr 18 00:21:03 UTC 2012

Does nl_sock_allocate_seq() need to take 'n' as a parameter?  It's
only caller passes in 1 currently.  Perhaps a future patch will need
it?

It's probably worth adding a comment to nl_sock_allocate_seq()
explaining why it wraps around at (UINT32_MAX / 2) without a deep
understanding of netlink, it's not obvious to me why it's necessary.

Does the theoretical race condition described in the old
get_nlmsg_seq() function still exist? Just curious, may not be a big
deal.  If it does, maybe it makes sense to use a random initial
sequence number instead of 1?  Wouldn't solve the problem, but might
reduce the probability of a collision.

Ethan

On Mon, Apr 16, 2012 at 17:18, Ben Pfaff <blp at nicira.com> wrote:
> Signed-off-by: Ben Pfaff <blp at nicira.com>
> ---
>  lib/netlink-socket.c |   30 +++++++++++++++++++++++-------
>  lib/netlink.c        |   29 +++++------------------------
>  2 files changed, 28 insertions(+), 31 deletions(-)
>
> diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
> index 15a800b..90734c7 100644
> --- a/lib/netlink-socket.c
> +++ b/lib/netlink-socket.c
> @@ -54,6 +54,7 @@ COVERAGE_DEFINE(netlink_sent);
>  * information.  Also, at high logging levels we log *all* Netlink messages. */
>  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600);
>
> +static uint32_t nl_sock_allocate_seq(struct nl_sock *, unsigned int n);
>  static void log_nlmsg(const char *function, int error,
>                       const void *message, size_t size, int protocol);
>
> @@ -62,6 +63,7 @@ static void log_nlmsg(const char *function, int error,
>  struct nl_sock
>  {
>     int fd;
> +    uint32_t next_seq;
>     uint32_t pid;
>     int protocol;
>     struct nl_dump *dump;
> @@ -122,6 +124,7 @@ nl_sock_create(int protocol, struct nl_sock **sockp)
>     }
>     sock->protocol = protocol;
>     sock->dump = NULL;
> +    sock->next_seq = 1;
>
>     rcvbuf = 1024 * 1024;
>     if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
> @@ -256,6 +259,7 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg, bool wait)
>     int error;
>
>     nlmsg->nlmsg_len = msg->size;
> +    nlmsg->nlmsg_seq = nl_sock_allocate_seq(sock, 1);
>     nlmsg->nlmsg_pid = sock->pid;
>     do {
>         int retval;
> @@ -522,7 +526,6 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
>  *
>  * Before sending each message, this function will finalize nlmsg_len in each
>  * 'request' to match the ofpbuf's size, and set nlmsg_pid to 'sock''s pid.
> - * NLM_F_ACK will be added to some requests' nlmsg_flags.
>  *
>  * Bare Netlink is an unreliable transport protocol.  This function layers
>  * reliable delivery and reply semantics on top of bare Netlink.  See
> @@ -597,9 +600,9 @@ nl_sock_transact_multiple(struct nl_sock *sock,
>  * on failure '*replyp' is set to NULL.  If 'replyp' is null, then the kernel's
>  * reply, if any, is discarded.
>  *
> - * nlmsg_len in 'msg' will be finalized to match msg->size, and nlmsg_pid will
> - * be set to 'sock''s pid, before the message is sent.  NLM_F_ACK will be set
> - * in nlmsg_flags.
> + * Before the message is sent, nlmsg_len in 'request' will be finalized to
> + * match msg->size, nlmsg_pid will be set to 'sock''s pid, and nlmsg_seq will
> + * be initialized, NLM_F_ACK will be set in nlmsg_flags.
>  *
>  * The caller is responsible for destroying 'request'.
>  *
> @@ -721,9 +724,6 @@ void
>  nl_dump_start(struct nl_dump *dump,
>               struct nl_sock *sock, const struct ofpbuf *request)
>  {
> -    struct nlmsghdr *nlmsghdr = nl_msg_nlmsghdr(request);
> -    nlmsghdr->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK;
> -    dump->seq = nlmsghdr->nlmsg_seq;
>     dump->buffer = NULL;
>     if (sock->dump) {
>         /* 'sock' already has an ongoing dump.  Clone the socket because
> @@ -737,7 +737,10 @@ nl_dump_start(struct nl_dump *dump,
>         dump->sock = sock;
>         dump->status = 0;
>     }
> +
> +    nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK;
>     dump->status = nl_sock_send__(sock, request, true);
> +    dump->seq = nl_msg_nlmsghdr(request)->nlmsg_seq;
>  }
>
>  /* Helper function for nl_dump_next(). */
> @@ -1057,6 +1060,19 @@ nl_lookup_genl_family(const char *name, int *number)
>     return *number > 0 ? 0 : -*number;
>  }
>
> +static uint32_t
> +nl_sock_allocate_seq(struct nl_sock *sock, unsigned int n)
> +{
> +    uint32_t seq = sock->next_seq;
> +
> +    sock->next_seq += n;
> +    if (sock->next_seq >= UINT32_MAX / 2) {
> +        sock->next_seq = 1;
> +    }
> +
> +    return seq;
> +}
> +
>  static void
>  nlmsghdr_to_string(const struct nlmsghdr *h, int protocol, struct ds *ds)
>  {
> diff --git a/lib/netlink.c b/lib/netlink.c
> index 6445049..6e0701c 100644
> --- a/lib/netlink.c
> +++ b/lib/netlink.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
> + * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira Networks.
>  *
>  * Licensed under the Apache License, Version 2.0 (the "License");
>  * you may not use this file except in compliance with the License.
> @@ -88,26 +88,6 @@ nl_msg_reserve(struct ofpbuf *msg, size_t size)
>     ofpbuf_prealloc_tailroom(msg, NLMSG_ALIGN(size));
>  }
>
> -static uint32_t
> -get_nlmsg_seq(void)
> -{
> -    /* Next nlmsghdr sequence number.
> -     *
> -     * This implementation uses sequence numbers that are unique process-wide,
> -     * to avoid a hypothetical race: send request, close socket, open new
> -     * socket that reuses the old socket's PID value, send request on new
> -     * socket, receive reply from kernel to old socket but with same PID and
> -     * sequence number.  (This race could be avoided other ways, e.g. by
> -     * preventing PIDs from being quickly reused). */
> -    static uint32_t next_seq;
> -
> -    if (next_seq == 0) {
> -        /* Pick initial sequence number. */
> -        next_seq = getpid() ^ time_wall();
> -    }
> -    return next_seq++;
> -}
> -
>  /* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty.
>  * Uses the given 'type' and 'flags'.  'expected_payload' should be
>  * an estimate of the number of payload bytes to be supplied; if the size of
> @@ -121,8 +101,9 @@ get_nlmsg_seq(void)
>  * is often NLM_F_REQUEST indicating that a request is being made, commonly
>  * or'd with NLM_F_ACK to request an acknowledgement.
>  *
> - * Sets the new nlmsghdr's nlmsg_pid field to 0 for now.  nl_sock_send() will
> - * fill it in just before sending the message.
> + * Sets the new nlmsghdr's nlmsg_len, nlmsg_seq, and nlmsg_pid fields to 0 for
> + * now.  Functions that send Netlink messages will fill these in just before
> + * sending the message.
>  *
>  * nl_msg_put_genlmsghdr() is more convenient for composing a Generic Netlink
>  * message. */
> @@ -139,7 +120,7 @@ nl_msg_put_nlmsghdr(struct ofpbuf *msg,
>     nlmsghdr->nlmsg_len = 0;
>     nlmsghdr->nlmsg_type = type;
>     nlmsghdr->nlmsg_flags = flags;
> -    nlmsghdr->nlmsg_seq = get_nlmsg_seq();
> +    nlmsghdr->nlmsg_seq = 0;
>     nlmsghdr->nlmsg_pid = 0;
>  }
>
> --
> 1.7.9
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev