[ovs-dev] [PATCH v4 1/2] Optimize the poll loop for poll_immediate_wake()

Anton Ivanov anton.ivanov at cambridgegreys.com
Tue Aug 3 09:33:50 UTC 2021


On 03/08/2021 00:29, Gaëtan Rivet wrote:
> On Tue, Jul 20, 2021, at 17:06, anton.ivanov at cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov at cambridgegreys.com>
>>
>> If we are not obtaining any useful information out of the poll(),
>> such as is a fd busy or not, we do not need to do a poll() if
>> an immediate_wake() has been requested.
>>
>> This cuts out all the pollfd hash additions, forming the poll
>> arguments and the actual poll() after a call to
>> poll_immediate_wake()
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov at cambridgegreys.com>
> Hi Anton,
>
> Patch generally looks good to me. A few nits below.
>
>> ---
>>   lib/poll-loop.c | 80 ++++++++++++++++++++++++++++++++-----------------
>>   lib/timeval.c   | 13 +++++++-
>>   2 files changed, 65 insertions(+), 28 deletions(-)
>>
>> diff --git a/lib/poll-loop.c b/lib/poll-loop.c
>> index 4e751ff2c..9452f6e22 100644
>> --- a/lib/poll-loop.c
>> +++ b/lib/poll-loop.c
>> @@ -107,6 +107,13 @@ poll_create_node(int fd, HANDLE wevent, short int
>> events, const char *where)
>>   
>>       COVERAGE_INC(poll_create_node);
>>   
>> +    if (loop->timeout_when == LLONG_MIN) {
>> +        /* There was a previous request to bail out of this poll loop.
>> +         * There is no point to engage in yack shaving a poll hmap.
>> +         */
>> +        return;
>> +    }
>> +
>>       /* Both 'fd' and 'wevent' cannot be set. */
>>       ovs_assert(!fd != !wevent);
>>   
>> @@ -181,8 +188,21 @@ poll_wevent_wait_at(HANDLE wevent, const char *where)
>>   void
>>   poll_timer_wait_at(long long int msec, const char *where)
>>   {
>> -    long long int now = time_msec();
>> +    long long int now;
>>       long long int when;
>> +    struct poll_loop *loop = poll_loop();
>> +
>> +    /* We have an outstanding request for an immediate wake -
>> +     * either explicit (from poll_immediate_wake()) or as a
>> +     * result of a previous timeout calculation which gave
>> +     * a negative timeout.
>> +     * No point trying to recalculate the timeout.
>> +     */
>> +    if (loop->timeout_when == LLONG_MIN) {
>> +        return;
>> +    }
>> +
>> +    now = time_msec();
>>   
>>       if (msec <= 0) {
>>           /* Wake up immediately. */
>> @@ -229,7 +249,7 @@ poll_timer_wait_until_at(long long int when, const
>> char *where)
>>   void
>>   poll_immediate_wake_at(const char *where)
>>   {
>> -    poll_timer_wait_at(0, where);
>> +    poll_timer_wait_at(LLONG_MIN, where);
>>   }
>>   
>>   /* Logs, if appropriate, that the poll loop was awakened by an event
>> @@ -320,49 +340,55 @@ poll_block(void)
>>   {
>>       struct poll_loop *loop = poll_loop();
>>       struct poll_node *node;
>> -    struct pollfd *pollfds;
>> +    struct pollfd *pollfds = NULL;
>>       HANDLE *wevents = NULL;
>>       int elapsed;
>> -    int retval;
>> +    int retval = 0;
>>       int i;
>>   
>> -    /* Register fatal signal events before actually doing any real work for
>> -     * poll_block. */
>> -    fatal_signal_wait();
>>   
>>       if (loop->timeout_when == LLONG_MIN) {
>>           COVERAGE_INC(poll_zero_timeout);
>> +    } else {
>> +        /* Register fatal signal events only if we intend to do real work for
>> +         * poll_block.
>> +         */
>> +        fatal_signal_wait();
> When should we avoid registering fatal signal events?
> Even for immediate wake, time_poll() will execute for general
> upkeep (coverage + RCU), should a fatal signal handler not
> be installed in that case?

The fatal signal events are handled in ALL cases in the fatal_signal_run() which is towards the end of the poll loop.

fatal_singal_wait only adds the pipe to the fds. If we are not polling on that fd anyway, no point adding it either.

>
>>       }
>>   
>>       timewarp_run();
>> -    pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
>> +    if (loop->timeout_when != LLONG_MIN) {
>> +        pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
>>   
>>   #ifdef _WIN32
>> -    wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
>> +        wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
>>   #endif
>>   
>> -    /* Populate with all the fds and events. */
>> -    i = 0;
>> -    HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
>> -        pollfds[i] = node->pollfd;
>> +        /* Populate with all the fds and events. */
>> +        i = 0;
>> +        HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
>> +            pollfds[i] = node->pollfd;
>>   #ifdef _WIN32
>> -        wevents[i] = node->wevent;
>> -        if (node->pollfd.fd && node->wevent) {
>> -            short int wsa_events = 0;
>> -            if (node->pollfd.events & POLLIN) {
>> -                wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
>> +            wevents[i] = node->wevent;
>> +            if (node->pollfd.fd && node->wevent) {
>> +                short int wsa_events = 0;
>> +                if (node->pollfd.events & POLLIN) {
>> +                    wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
>> +                }
>> +                if (node->pollfd.events & POLLOUT) {
>> +                    wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
>> +                }
>> +                WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
>>               }
>> -            if (node->pollfd.events & POLLOUT) {
>> -                wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
>> -            }
>> -            WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
>> -        }
>>   #endif
>> -        i++;
>> -    }
>> +            i++;
>> +        }
>>   
>> -    retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
>> -                       loop->timeout_when, &elapsed);
>> +        retval = time_poll(pollfds, hmap_count(&loop->poll_nodes),
>> wevents,
>> +                           loop->timeout_when, &elapsed);
>> +    } else {
>> +        retval = time_poll(NULL, 0, NULL, loop->timeout_when,
>> &elapsed);
>> +    }
>>       if (retval < 0) {
>>           static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>>           VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
>> diff --git a/lib/timeval.c b/lib/timeval.c
>> index 193c7bab1..ef09a15e0 100644
>> --- a/lib/timeval.c
>> +++ b/lib/timeval.c
>> @@ -305,6 +305,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
>> HANDLE *handles OVS_UNUSED,
>>       for (;;) {
>>           long long int now = time_msec();
>>           int time_left;
>> +        retval = 0;
>>   
>>           if (now >= timeout_when) {
>>               time_left = 0;
>> @@ -323,7 +324,14 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
>> HANDLE *handles OVS_UNUSED,
>>           }
>>   
>>   #ifndef _WIN32
>> -        retval = poll(pollfds, n_pollfds, time_left);
>> +        /* If timeout_when is LLONG_MIN, we should skip the poll().
>> +         * We do not skip on time_left == 0, because we may have
>> +         * ended up with time_left = 0 after a poll with valid
>> +         * pollfds was interrupted and restarted.
>> +         */
>> +        if (timeout_when != LLONG_MIN) {
> The _WIN32 case checks 'n_pollfds != 0' before calling.
> Should both be aligned? Maybe the _WIN32 should avoid the call if
> 'n_pollfds == 0 || timeout_when == LLONG_MIN'?
Sure. I try to avoid touching the WIN32 code as I do not have an environment to test it.
>
>> +            retval = poll(pollfds, n_pollfds, time_left);
>> +        }
>>           if (retval < 0) {
>>               retval = -errno;
>>           }
>> @@ -331,6 +339,9 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
>> HANDLE *handles OVS_UNUSED,
>>           if (n_pollfds > MAXIMUM_WAIT_OBJECTS) {
>>               VLOG_ERR("Cannot handle more than maximum wait objects\n");
>>           } else if (n_pollfds != 0) {
>> +            /* If we are doing an immediate_wake shortcut n_pollfds is
>> +             * zero, so we skip the actual call.
>> +             */
> I find this sentence hard to read.
> I would suggest instead:
>
>    /* n_pollfds can be zero on immediate_wake.
>     * In that case the call is not necessary. */
>
>>               retval = WaitForMultipleObjects(n_pollfds, handles, FALSE,
>>                                               time_left);
>>           }
>> -- 
>> 2.20.1
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/



More information about the dev mailing list