[ovs-dev] [PATCH] lib/util: Only define count_1bits_8 when needed.
Jarno Rajahalme
jrajahalme at nicira.com
Wed Dec 11 23:32:38 UTC 2013
Ben,
I like this, I’ll post a new patch ASAP,
Jarno
On Dec 11, 2013, at 11:33 AM, Ben Pfaff <blp at nicira.com> wrote:
> On Wed, Dec 11, 2013 at 10:54:00AM -0800, Jarno Rajahalme wrote:
>> util.h declares this when needed, make sure the definition is compiled
>> in only in that case.
>>
>> Signed-off-by: Jarno Rajahalme <jrajahalme at nicira.com>
>
> With this, I still get:
> ../lib/util.c:921:15: warning: symbol 'count_1bits_8' was not declared. Should it be static?
> because the previous declaration of count_1bits_8[] is not in scope at
> the point of definition in util.c.
>
> How about something like this? (Not tested outside of 32-bit without
> __POPCNT__.)
>
> diff --git a/lib/util.c b/lib/util.c
> index 13d41a7..000504c 100644
> --- a/lib/util.c
> +++ b/lib/util.c
> @@ -901,7 +901,7 @@ raw_clz64(uint64_t n)
> }
> #endif
>
> -#if !(__GNUC__ >= 4 && defined(__corei7))
> +#if NEED_COUNT_1BITS_8
> #define INIT1(X) \
> ((((X) & (1 << 0)) != 0) + \
> (((X) & (1 << 1)) != 0) + \
> diff --git a/lib/util.h b/lib/util.h
> index 8d810c2..b158c2f 100644
> --- a/lib/util.h
> +++ b/lib/util.h
> @@ -371,49 +371,55 @@ log_2_ceil(uint64_t n)
> return log_2_floor(n) + !is_pow2(n);
> }
>
> -extern const uint8_t count_1bits_8[256];
> -
> -/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
> +/* unsigned int count_1bits(uint64_t x):
> + *
> + * Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
> +#if UINTPTR_MAX == UINT64_MAX
> +static inline unsigned int
> +count_1bits(uint64_t x)
> +{
> +#if __GNUC__ >= 4 && __POPCNT__
> + return __builtin_popcountll(x);
> +#else
> + /* This portable implementation is the fastest one we know of for 64
> + * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
> + const uint64_t h55 = UINT64_C(0x5555555555555555);
> + const uint64_t h33 = UINT64_C(0x3333333333333333);
> + const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
> + const uint64_t h01 = UINT64_C(0x0101010101010101);
> + x -= (x >> 1) & h55; /* Count of each 2 bits in-place. */
> + x = (x & h33) + ((x >> 2) & h33); /* Count of each 4 bits in-place. */
> + x = (x + (x >> 4)) & h0F; /* Count of each 8 bits in-place. */
> + return (x * h01) >> 56; /* Sum of all bytes. */
> +#endif
> +}
> +#else /* not 64-bit */
> +#if __GNUC__ >= 4 && __POPCNT__
> static inline unsigned int
> count_1bits_32(uint32_t x)
> {
> -#if __GNUC__ >= 4 && defined(__corei7)
> - /* __builtin_popcount() is fast only when supported by the CPU. */
> return __builtin_popcount(x);
> +}
> #else
> +#define NEED_COUNT_1BITS_8 1
> +extern const uint8_t count_1bits_8[256];
> +static inline unsigned int
> +count_1bits_32(uint32_t x)
> +{
> /* This portable implementation is the fastest one we know of for 32 bits,
> * and faster than GCC __builtin_popcount(). */
> return (count_1bits_8[x & 0xff] +
> count_1bits_8[(x >> 8) & 0xff] +
> count_1bits_8[(x >> 16) & 0xff] +
> count_1bits_8[x >> 24]);
> -#endif
> }
> -
> -/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
> +#endif
> static inline unsigned int
> count_1bits(uint64_t x)
> {
> - if (sizeof(void *) == 8) { /* 64-bit CPU */
> -#if __GNUC__ >= 4 && defined(__corei7)
> - /* __builtin_popcountll() is fast only when supported by the CPU. */
> - return __builtin_popcountll(x);
> -#else
> - /* This portable implementation is the fastest one we know of for 64
> - * bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
> - const uint64_t h55 = UINT64_C(0x5555555555555555);
> - const uint64_t h33 = UINT64_C(0x3333333333333333);
> - const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
> - const uint64_t h01 = UINT64_C(0x0101010101010101);
> - x -= (x >> 1) & h55; /* Count of each 2 bits in-place. */
> - x = (x & h33) + ((x >> 2) & h33); /* Count of each 4 bits in-place. */
> - x = (x + (x >> 4)) & h0F; /* Count of each 8 bits in-place. */
> - return (x * h01) >> 56; /* Sum of all bytes. */
> -#endif
> - } else { /* 32-bit CPU */
> - return count_1bits_32(x) + count_1bits_32(x >> 32);
> - }
> + return count_1bits_32(x) + count_1bits_32(x >> 32);
> }
> +#endif
>
> /* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if 'x'
> * is 0. */
More information about the dev
mailing list