[ovs-dev] [PATCH v4 1/2] Optimize the poll loop for poll_immediate_wake()
Anton Ivanov
anton.ivanov at cambridgegreys.com
Tue Aug 3 09:33:50 UTC 2021
On 03/08/2021 00:29, Gaëtan Rivet wrote:
> On Tue, Jul 20, 2021, at 17:06, anton.ivanov at cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov at cambridgegreys.com>
>>
>> If we are not obtaining any useful information out of the poll(),
>> such as is a fd busy or not, we do not need to do a poll() if
>> an immediate_wake() has been requested.
>>
>> This cuts out all the pollfd hash additions, forming the poll
>> arguments and the actual poll() after a call to
>> poll_immediate_wake()
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov at cambridgegreys.com>
> Hi Anton,
>
> Patch generally looks good to me. A few nits below.
>
>> ---
>> lib/poll-loop.c | 80 ++++++++++++++++++++++++++++++++-----------------
>> lib/timeval.c | 13 +++++++-
>> 2 files changed, 65 insertions(+), 28 deletions(-)
>>
>> diff --git a/lib/poll-loop.c b/lib/poll-loop.c
>> index 4e751ff2c..9452f6e22 100644
>> --- a/lib/poll-loop.c
>> +++ b/lib/poll-loop.c
>> @@ -107,6 +107,13 @@ poll_create_node(int fd, HANDLE wevent, short int
>> events, const char *where)
>>
>> COVERAGE_INC(poll_create_node);
>>
>> + if (loop->timeout_when == LLONG_MIN) {
>> + /* There was a previous request to bail out of this poll loop.
>> + * There is no point to engage in yack shaving a poll hmap.
>> + */
>> + return;
>> + }
>> +
>> /* Both 'fd' and 'wevent' cannot be set. */
>> ovs_assert(!fd != !wevent);
>>
>> @@ -181,8 +188,21 @@ poll_wevent_wait_at(HANDLE wevent, const char *where)
>> void
>> poll_timer_wait_at(long long int msec, const char *where)
>> {
>> - long long int now = time_msec();
>> + long long int now;
>> long long int when;
>> + struct poll_loop *loop = poll_loop();
>> +
>> + /* We have an outstanding request for an immediate wake -
>> + * either explicit (from poll_immediate_wake()) or as a
>> + * result of a previous timeout calculation which gave
>> + * a negative timeout.
>> + * No point trying to recalculate the timeout.
>> + */
>> + if (loop->timeout_when == LLONG_MIN) {
>> + return;
>> + }
>> +
>> + now = time_msec();
>>
>> if (msec <= 0) {
>> /* Wake up immediately. */
>> @@ -229,7 +249,7 @@ poll_timer_wait_until_at(long long int when, const
>> char *where)
>> void
>> poll_immediate_wake_at(const char *where)
>> {
>> - poll_timer_wait_at(0, where);
>> + poll_timer_wait_at(LLONG_MIN, where);
>> }
>>
>> /* Logs, if appropriate, that the poll loop was awakened by an event
>> @@ -320,49 +340,55 @@ poll_block(void)
>> {
>> struct poll_loop *loop = poll_loop();
>> struct poll_node *node;
>> - struct pollfd *pollfds;
>> + struct pollfd *pollfds = NULL;
>> HANDLE *wevents = NULL;
>> int elapsed;
>> - int retval;
>> + int retval = 0;
>> int i;
>>
>> - /* Register fatal signal events before actually doing any real work for
>> - * poll_block. */
>> - fatal_signal_wait();
>>
>> if (loop->timeout_when == LLONG_MIN) {
>> COVERAGE_INC(poll_zero_timeout);
>> + } else {
>> + /* Register fatal signal events only if we intend to do real work for
>> + * poll_block.
>> + */
>> + fatal_signal_wait();
> When should we avoid registering fatal signal events?
> Even for immediate wake, time_poll() will execute for general
> upkeep (coverage + RCU), should a fatal signal handler not
> be installed in that case?
The fatal signal events are handled in ALL cases in the fatal_signal_run() which is towards the end of the poll loop.
fatal_singal_wait only adds the pipe to the fds. If we are not polling on that fd anyway, no point adding it either.
>
>> }
>>
>> timewarp_run();
>> - pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
>> + if (loop->timeout_when != LLONG_MIN) {
>> + pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
>>
>> #ifdef _WIN32
>> - wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
>> + wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
>> #endif
>>
>> - /* Populate with all the fds and events. */
>> - i = 0;
>> - HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
>> - pollfds[i] = node->pollfd;
>> + /* Populate with all the fds and events. */
>> + i = 0;
>> + HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
>> + pollfds[i] = node->pollfd;
>> #ifdef _WIN32
>> - wevents[i] = node->wevent;
>> - if (node->pollfd.fd && node->wevent) {
>> - short int wsa_events = 0;
>> - if (node->pollfd.events & POLLIN) {
>> - wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
>> + wevents[i] = node->wevent;
>> + if (node->pollfd.fd && node->wevent) {
>> + short int wsa_events = 0;
>> + if (node->pollfd.events & POLLIN) {
>> + wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
>> + }
>> + if (node->pollfd.events & POLLOUT) {
>> + wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
>> + }
>> + WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
>> }
>> - if (node->pollfd.events & POLLOUT) {
>> - wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
>> - }
>> - WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
>> - }
>> #endif
>> - i++;
>> - }
>> + i++;
>> + }
>>
>> - retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
>> - loop->timeout_when, &elapsed);
>> + retval = time_poll(pollfds, hmap_count(&loop->poll_nodes),
>> wevents,
>> + loop->timeout_when, &elapsed);
>> + } else {
>> + retval = time_poll(NULL, 0, NULL, loop->timeout_when,
>> &elapsed);
>> + }
>> if (retval < 0) {
>> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
>> diff --git a/lib/timeval.c b/lib/timeval.c
>> index 193c7bab1..ef09a15e0 100644
>> --- a/lib/timeval.c
>> +++ b/lib/timeval.c
>> @@ -305,6 +305,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
>> HANDLE *handles OVS_UNUSED,
>> for (;;) {
>> long long int now = time_msec();
>> int time_left;
>> + retval = 0;
>>
>> if (now >= timeout_when) {
>> time_left = 0;
>> @@ -323,7 +324,14 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
>> HANDLE *handles OVS_UNUSED,
>> }
>>
>> #ifndef _WIN32
>> - retval = poll(pollfds, n_pollfds, time_left);
>> + /* If timeout_when is LLONG_MIN, we should skip the poll().
>> + * We do not skip on time_left == 0, because we may have
>> + * ended up with time_left = 0 after a poll with valid
>> + * pollfds was interrupted and restarted.
>> + */
>> + if (timeout_when != LLONG_MIN) {
> The _WIN32 case checks 'n_pollfds != 0' before calling.
> Should both be aligned? Maybe the _WIN32 should avoid the call if
> 'n_pollfds == 0 || timeout_when == LLONG_MIN'?
Sure. I try to avoid touching the WIN32 code as I do not have an environment to test it.
>
>> + retval = poll(pollfds, n_pollfds, time_left);
>> + }
>> if (retval < 0) {
>> retval = -errno;
>> }
>> @@ -331,6 +339,9 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
>> HANDLE *handles OVS_UNUSED,
>> if (n_pollfds > MAXIMUM_WAIT_OBJECTS) {
>> VLOG_ERR("Cannot handle more than maximum wait objects\n");
>> } else if (n_pollfds != 0) {
>> + /* If we are doing an immediate_wake shortcut n_pollfds is
>> + * zero, so we skip the actual call.
>> + */
> I find this sentence hard to read.
> I would suggest instead:
>
> /* n_pollfds can be zero on immediate_wake.
> * In that case the call is not necessary. */
>
>> retval = WaitForMultipleObjects(n_pollfds, handles, FALSE,
>> time_left);
>> }
>> --
>> 2.20.1
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/
More information about the dev
mailing list