Bug#846996: The mini-if is really not needed
I just tested with the following test program. The result is as expected:
#include <x86intrin.h>
#include <cstdio>
#include <cstdint>
int main(void)
{
printf("0 -> %d\n\n", 16U);
puts("__tzcnt_u16:");
printf("0x%x -> %d\n", 0, __tzcnt_u16(0));
for (uint16_t i = 1; i > 0; i <<= 1)
{
printf("0x%x -> %d\n", i, __tzcnt_u16(i));
}
puts("\n__lzcnt16:");
printf("0x%x -> %d\n", 0, __lzcnt16(0));
for (uint16_t i = 1; i > 0; i <<= 1)
{
printf("0x%x -> %d\n", i, __lzcnt16(i));
}
return 0;
}
g++ -march=native -o intrins intrinsictest.cpp
./intrins
Attached is the patch for
/usr/include/x86_64-linux-gnu/qt5/QtCore/qalgorithms.h
--- qalgorithms.h.orig 2016-12-18 15:57:30.279325472 +0100
+++ qalgorithms.h 2016-12-18 23:09:33.129324260 +0100
@@ -47,6 +47,10 @@ QT_WARNING_PUSH
QT_WARNING_DISABLE_GCC("-Wdeprecated-declarations")
QT_WARNING_DISABLE_CLANG("-Wdeprecated-declarations")
+#if defined(__BMI__) && defined(__LZCNT__) && !QT_HAS_BUILTIN(__builtin_ctzs)
+# include <x86intrin.h>
+#endif
+
/*
Warning: The contents of QAlgorithmsPrivate is not a part of the public Qt API
and may be changed from version to version or even be completely removed.
@@ -626,8 +630,10 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCo
Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint16 v) Q_DECL_NOTHROW
{
#if defined(Q_CC_GNU)
-# if QT_HAS_BUILTIN(__builtin_ctzs) || (defined(__LZCNT__) && defined(__BMI__))
+# if QT_HAS_BUILTIN(__builtin_ctzs)
return v ? __builtin_ctzs(v) : 16U;
+# elif (defined(__LZCNT__) && defined(__BMI__))
+ return __tzcnt_u16(v);
# else
return v ? __builtin_ctz(v) : 16U;
# endif
@@ -689,8 +695,10 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCo
Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint16 v) Q_DECL_NOTHROW
{
#if defined(Q_CC_GNU)
-# if QT_HAS_BUILTIN(__builtin_clzs) || (defined(__LZCNT__) && defined(__BMI__))
+# if QT_HAS_BUILTIN(__builtin_clzs)
return v ? __builtin_clzs(v) : 16U;
+# elif (defined(__LZCNT__) && defined(__BMI__))
+ return __lzcnt16(v);
# else
return v ? __builtin_clz(v)-16U : 16U;
# endif
Reply to: