pixman: Changes to 'upstream-unstable'
configure.ac | 22 -
pixman/pixman-arm-simd-asm.S | 41 +++
pixman/pixman-arm-simd.c | 6
pixman/pixman-general.c | 18 -
pixman/pixman-implementation.c | 16 +
pixman/pixman-mmx.c | 64 -----
pixman/pixman-vmx.c | 492 ++++++++++++++++-------------------------
pixman/pixman.c | 17 -
test/Makefile.sources | 2
test/affine-bench.c | 24 +-
test/cover-test.c | 449 +++++++++++++++++++++++++++++++++++++
test/fence-image-self-test.c | 239 +++++++++++++++++++
test/lowlevel-blt-bench.c | 6
test/scaling-test.c | 66 +++--
test/utils.c | 133 ++++++++++-
test/utils.h | 21 +
16 files changed, 1199 insertions(+), 417 deletions(-)
New commits:
commit fa71d08a81c9bf3f2366ee45474ff868d9e10b8e
Author: Oded Gabbay <oded.gabbay@gmail.com>
Date: Fri Oct 23 17:58:49 2015 +0300
Pre-release version bump to 0.33.4
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
diff --git a/configure.ac b/configure.ac
index b04cc69..dcacff1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
m4_define([pixman_major], 0)
m4_define([pixman_minor], 33)
-m4_define([pixman_micro], 3)
+m4_define([pixman_micro], 4)
m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
commit 9728241bd098bc4260e6cd83997dfecc64adc356
Author: Andrea Canciani <ranma42@gmail.com>
Date: Tue Oct 13 13:35:59 2015 +0200
test: Fix fence-image-self-test on Mac
On MacOS X, according to the manpage of mprotect(), "When a program
violates the protections of a page, it gets a SIGBUS or SIGSEGV
signal.", but fence-image-self-test was only accepting a SIGSEGV as
notification of invalid access.
Fixes fence-image-self-test
Reviewed-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
diff --git a/test/fence-image-self-test.c b/test/fence-image-self-test.c
index c883038..c80b3cf 100644
--- a/test/fence-image-self-test.c
+++ b/test/fence-image-self-test.c
@@ -73,7 +73,7 @@ prinfo (const char *fmt, ...)
}
static void
-do_expect_segv (void (*fn)(void *), void *data)
+do_expect_signal (void (*fn)(void *), void *data)
{
struct sigaction sa;
@@ -82,6 +82,8 @@ do_expect_segv (void (*fn)(void *), void *data)
sa.sa_sigaction = segv_handler;
if (sigaction (SIGSEGV, &sa, NULL) == -1)
die ("sigaction failed", errno);
+ if (sigaction (SIGBUS, &sa, NULL) == -1)
+ die ("sigaction failed", errno);
(*fn)(data);
@@ -96,7 +98,7 @@ do_expect_segv (void (*fn)(void *), void *data)
* to exit with success, and return failure otherwise.
*/
static pixman_bool_t
-expect_segv (void (*fn)(void *), void *data)
+expect_signal (void (*fn)(void *), void *data)
{
pid_t pid, wp;
int status;
@@ -106,7 +108,7 @@ expect_segv (void (*fn)(void *), void *data)
die ("fork failed", errno);
if (pid == 0)
- do_expect_segv (fn, data); /* never returns */
+ do_expect_signal (fn, data); /* never returns */
wp = waitpid (pid, &status, 0);
if (wp != pid)
@@ -131,9 +133,9 @@ test_read_fault (uint8_t *p, int offset)
{
prinfo ("*(uint8_t *)(%p + %d)", p, offset);
- if (expect_segv (read_u8, p + offset))
+ if (expect_signal (read_u8, p + offset))
{
- prinfo ("\tSEGV OK\n");
+ prinfo ("\tsignal OK\n");
return TRUE;
}
diff --git a/test/utils.c b/test/utils.c
index 8657966..f8e42a5 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -471,9 +471,9 @@ fence_image_destroy (pixman_image_t *image, void *data)
* min_width is only a minimum width for the image. The width is aligned up
* for the row size to be divisible by both page size and pixel size.
*
- * If stride_fence is true, the additional page on each row will be armed
- * to cause SIGSEVG on all accesses. This should catch all accesses outside
- * the valid row pixels.
+ * If stride_fence is true, the additional page on each row will be
+ * armed to cause SIGSEGV or SIGBUS on all accesses. This should catch
+ * all accesses outside the valid row pixels.
*/
pixman_image_t *
fence_image_create_bits (pixman_format_code_t format,
commit 7de61d8d14e84623b6fa46506eb74f938287f536
Author: Matt Turner <mattst88@gmail.com>
Date: Sun Oct 11 14:44:46 2015 -0700
mmx: Use MMX2 intrinsics from xmmintrin.h directly.
We had lots of hacks to handle the inability to include xmmintrin.h
without compiling with -msse (lest SSE instructions be used in
pixman-mmx.c). Some recent version of gcc relaxed this restriction.
Change configure.ac to test that xmmintrin.h can be included and that we
can use some intrinsics from it, and remove the work-around code from
pixman-mmx.c.
Evidently allows gcc 4.9.3 to optimize better as well:
text data bss dec hex filename
657078 30848 680 688606 a81de libpixman-1.so.0.33.3 before
656710 30848 680 688238 a806e libpixman-1.so.0.33.3 after
Reviewed-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
Tested-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
Signed-off-by: Matt Turner <mattst88@gmail.com>
diff --git a/configure.ac b/configure.ac
index 424bfd3..b04cc69 100644
--- a/configure.ac
+++ b/configure.ac
@@ -347,21 +347,14 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#error "Need GCC >= 3.4 for MMX intrinsics"
#endif
#include <mmintrin.h>
+#include <xmmintrin.h>
int main () {
__m64 v = _mm_cvtsi32_si64 (1);
__m64 w;
- /* Some versions of clang will choke on K */
- asm ("pshufw %2, %1, %0\n\t"
- : "=y" (w)
- : "y" (v), "K" (5)
- );
-
- /* Some versions of clang will choke on this */
- asm ("pmulhuw %1, %0\n\t"
- : "+y" (w)
- : "y" (v)
- );
+ /* Test some intrinsics from xmmintrin.h */
+ w = _mm_shuffle_pi16(v, 5);
+ w = _mm_mulhi_pu16(w, w);
return _mm_cvtsi64_si32 (v);
}]])], have_mmx_intrinsics=yes)
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 05c48a4..88c3a39 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -40,6 +40,9 @@
#else
#include <mmintrin.h>
#endif
+#ifdef USE_X86_MMX
+#include <xmmintrin.h>
+#endif
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
@@ -59,66 +62,7 @@ _mm_empty (void)
}
#endif
-#ifdef USE_X86_MMX
-# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64))
-# include <xmmintrin.h>
-# else
-/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
- * instructions to be generated that we don't want. Just duplicate the
- * functions we want to use. */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_pi8 (__m64 __A)
-{
- int ret;
-
- asm ("pmovmskb %1, %0\n\t"
- : "=r" (ret)
- : "y" (__A)
- );
-
- return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_pu16 (__m64 __A, __m64 __B)
-{
- asm ("pmulhuw %1, %0\n\t"
- : "+y" (__A)
- : "y" (__B)
- );
- return __A;
-}
-
-# ifdef __OPTIMIZE__
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
-{
- __m64 ret;
-
- asm ("pshufw %2, %1, %0\n\t"
- : "=y" (ret)
- : "y" (__A), "K" (__N)
- );
-
- return ret;
-}
-# else
-# define _mm_shuffle_pi16(A, N) \
- ({ \
- __m64 ret; \
- \
- asm ("pshufw %2, %1, %0\n\t" \
- : "=y" (ret) \
- : "y" (A), "K" ((const int8_t)N) \
- ); \
- \
- ret; \
- })
-# endif
-# endif
-#endif
-
-#ifndef _MSC_VER
+#ifndef _MM_SHUFFLE
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#endif
commit 90e62c086766afffd289a321c7de8ea4b5cac87d
Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
Date: Fri Sep 4 15:39:00 2015 +0300
vmx: implement fast path vmx_composite_over_n_8888
Running "lowlevel-blt-bench over_n_8888" on Playstation3 3.2GHz,
Gentoo ppc (32-bit userland) gave the following results:
before: over_n_8888 = L1: 147.47 L2: 205.86 M:121.07
after: over_n_8888 = L1: 287.27 L2: 261.09 M:133.48
Cairo non-trimmed benchmarks on POWER8, 3.4GHz 8 Cores:
ocitysmap 659.69 -> 611.71 : 1.08x speedup
xfce4-terminal-a1 2725.22 -> 2547.47 : 1.07x speedup
Signed-off-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 3eaa866..41efdcf 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2628,6 +2628,58 @@ vmx_composite_src_x888_8888 (pixman_implementation_t *imp,
}
static void
+vmx_composite_over_n_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst;
+ uint32_t src, ia;
+ int i, w, dst_stride;
+ vector unsigned int vdst, vsrc, via;
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ vsrc = (vector unsigned int){src, src, src, src};
+ via = negate (splat_alpha (vsrc));
+ ia = ALPHA_8 (~src);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ w = width;
+
+ while (w && ((uintptr_t)dst & 15))
+ {
+ uint32_t d = *dst;
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
+ *dst++ = d;
+ w--;
+ }
+
+ for (i = w / 4; i > 0; i--)
+ {
+ vdst = pix_multiply (load_128_aligned (dst), via);
+ save_128_aligned (dst, pix_add (vsrc, vdst));
+ dst += 4;
+ }
+
+ for (i = w % 4; --i >= 0;)
+ {
+ uint32_t d = dst[i];
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
+ dst[i] = d;
+ }
+ }
+}
+
+static void
vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
@@ -2936,6 +2988,8 @@ FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,
static const pixman_fast_path_t vmx_fast_paths[] =
{
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, vmx_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, vmx_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),
commit 2876d8d3dd6a71cb9eb3ac93e5b9c18b71a452da
Author: Ben Avison <bavison@riscosopen.org>
Date: Fri Sep 4 03:09:20 2015 +0100
affine-bench: remove 8e margin from COVER area
Patch "Remove the 8e extra safety margin in COVER_CLIP analysis" reduced
the required image area for setting the COVER flags in
pixman.c:analyze_extent(). Do the same reduction in affine-bench.
Leaving the old calculations in place would be very confusing for anyone
reading the code.
Also add a comment that explains how affine-bench wants to hit the COVER
paths. This explains why the intricate extent calculations are copied
from pixman.c.
[Pekka: split patch, change comments, write commit message]
Signed-off-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
Reviewed-by: Ben Avison <bavison@riscosopen.org>
diff --git a/test/affine-bench.c b/test/affine-bench.c
index 9e0121e..86bf46e 100644
--- a/test/affine-bench.c
+++ b/test/affine-bench.c
@@ -395,14 +395,26 @@ main (int argc, char *argv[])
return EXIT_FAILURE;
}
+ /* Compute required extents for source and mask image so they qualify
+ * for COVER fast paths and get the flags in pixman.c:analyze_extent().
+ * These computations are for FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,
+ * but at the same time they also allow COVER_CLIP_NEAREST.
+ */
compute_transformed_extents (&binfo.transform, &dest_box, &transformed);
- /* The source area is expanded by a tiny bit (8/65536th pixel)
- * to match the calculation of the COVER_CLIP flags in analyze_extent()
+ xmin = pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2);
+ ymin = pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2);
+ xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2);
+ ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2);
+ /* Note:
+ * The upper limits can be reduced to the following when fetchers
+ * are guaranteed to not access pixels with zero weight. This concerns
+ * particularly all bilinear samplers.
+ *
+ * xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2 - pixman_fixed_e);
+ * ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2 - pixman_fixed_e);
+ * This is equivalent to subtracting 0.5 and rounding up, rather than
+ * subtracting 0.5, rounding down and adding 1.
*/
- xmin = pixman_fixed_to_int (transformed.x1 - 8 * pixman_fixed_e - pixman_fixed_1 / 2);
- ymin = pixman_fixed_to_int (transformed.y1 - 8 * pixman_fixed_e - pixman_fixed_1 / 2);
- xmax = pixman_fixed_to_int (transformed.x2 + 8 * pixman_fixed_e + pixman_fixed_1 / 2);
- ymax = pixman_fixed_to_int (transformed.y2 + 8 * pixman_fixed_e + pixman_fixed_1 / 2);
binfo.src_x = -xmin;
binfo.src_y = -ymin;
commit 0e2e9751282b19280c92be4a80c5ae476bae0ce4
Author: Ben Avison <bavison@riscosopen.org>
Date: Fri Sep 4 03:09:20 2015 +0100
Remove the 8e extra safety margin in COVER_CLIP analysis
As discussed in
http://lists.freedesktop.org/archives/pixman/2015-August/003905.html
the 8 * pixman_fixed_e (8e) adjustment which was applied to the transformed
coordinates is a legacy of rounding errors which used to occur in old
versions of Pixman, but which no longer apply. For any affine transform,
you are now guaranteed to get the same result by transforming the upper
coordinate as though you transform the lower coordinate and add (size-1)
steps of the increment in source coordinate space. No projective
transform routines use the COVER_CLIP flags, so they cannot be affected.
Proof by Siarhei Siamashka:
Let's take a look at the following affine transformation matrix (with 16.16
fixed point values) and two vectors:
| a b c |
M = | d e f |
| 0 0 0x10000 |
| x_dst |
P = | y_dst |
| 0x10000 |
| 0x10000 |
ONE_X = | 0 |
| 0 |
The current matrix multiplication code does the following calculations:
| (a * x_dst + b * y_dst + 0x8000) / 0x10000 + c |
M * P = | (d * x_dst + e * y_dst + 0x8000) / 0x10000 + f |
| 0x10000 |
These calculations are not perfectly exact and we may get rounding
because the integer coordinates are adjusted by 0.5 (or 0x8000 in the
16.16 fixed point format) before doing matrix multiplication. For
example, if the 'a' coefficient is an odd number and 'b' is zero,
then we are losing some of the least significant bits when dividing by
0x10000.
So we need to strictly prove that the following expression is always
true even though we have to deal with rounding:
| a |
M * (P + ONE_X) - M * P = M * ONE_X = | d |
| 0 |
or
((a * (x_dst + 0x10000) + b * y_dst + 0x8000) / 0x10000 + c)
-
((a * x_dst + b * y_dst + 0x8000) / 0x10000 + c)
=
a
It's easy to see that this is equivalent to
a + ((a * x_dst + b * y_dst + 0x8000) / 0x10000 + c)
- ((a * x_dst + b * y_dst + 0x8000) / 0x10000 + c)
=
a
Which means that stepping exactly by one pixel horizontally in the
destination image space (advancing 'x_dst' by 0x10000) is the same as
changing the transformed 'x_src' coordinate in the source image space
exactly by 'a'. The same applies to the vertical direction too.
Repeating these steps, we can reach any pixel in the source image
space and get exactly the same fixed point coordinates as doing
matrix multiplications per each pixel.
By the way, the older matrix multiplication implementation, which was
relying on less accurate calculations with three intermediate roundings
"((a + 0x8000) >> 16) + ((b + 0x8000) >> 16) + ((c + 0x8000) >> 16)",
also has the same properties. However reverting
http://cgit.freedesktop.org/pixman/commit/?id=ed39992564beefe6b12f81e842caba11aff98a9c
and applying this "Remove the 8e extra safety margin in COVER_CLIP
analysis" patch makes the cover test fail. The real reason why it fails
is that the old pixman code was using "pixman_transform_point_3d()"
function
http://cgit.freedesktop.org/pixman/tree/pixman/pixman-matrix.c?id=pixman-0.28.2#n49
for getting the transformed coordinate of the top left corner pixel
in the image scaling code, but at the same time using a different
"pixman_transform_point()" function
http://cgit.freedesktop.org/pixman/tree/pixman/pixman-matrix.c?id=pixman-0.28.2#n82
in the extents calculation code for setting the cover flag. And these
functions did the intermediate rounding differently. That's why the 8e
safety margin was needed.
** proof ends
However, for COVER_CLIP_NEAREST, the actual margins added were not 8e.
Because the half-way cases round down, that is, coordinate 0 hits pixel
index -1 while coordinate e hits pixel index 0, the extra safety margins
were actually 7e to the left and up, and 9e to the right and down. This
patch removes the 7e and 9e margins and restores the -e adjustment
required for NEAREST sampling in Pixman. For reference, see
pixman/rounding.txt.
For COVER_CLIP_BILINEAR, the margins were exactly 8e as there are no
additional offsets to be restored, so simply removing the 8e additions
is enough.
Proof:
All implementations must give the same numerical results as
bits_image_fetch_pixel_nearest() / bits_image_fetch_pixel_bilinear().
The former does
int x0 = pixman_fixed_to_int (x - pixman_fixed_e);
which maps directly to the new test for the nearest flag, when you consider
that x0 must fall in the interval [0,width).
The latter does
x1 = x - pixman_fixed_1 / 2;
x1 = pixman_fixed_to_int (x1);
x2 = x1 + 1;
When you write a COVER path, you take advantage of the assumption that
both x1 and x2 fall in the interval [0, width).
As samplers are allowed to fetch the pixel at x2 unconditionally, we
require
x1 >= 0
x2 < width
so
x - pixman_fixed_1 / 2 >= 0
x - pixman_fixed_1 / 2 + pixman_fixed_1 < width * pixman_fixed_1
so
pixman_fixed_to_int (x - pixman_fixed_1 / 2) >= 0
pixman_fixed_to_int (x + pixman_fixed_1 / 2) < width
which matches the source code lines for the bilinear case, once you delete
the lines that add the 8e margin.
Signed-off-by: Ben Avison <bavison@riscosopen.org>
[Pekka: adjusted commit message, left affine-bench changes for another patch]
[Pekka: add commit message parts from Siarhei]
Signed-off-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
Reviewed-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
Reviewed-by: Ben Avison <bavison@riscosopen.org>
diff --git a/pixman/pixman.c b/pixman/pixman.c
index a07c577..f932eac 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -497,21 +497,12 @@ analyze_extent (pixman_image_t *image,
if (!compute_transformed_extents (transform, extents, &transformed))
return FALSE;
- /* Expand the source area by a tiny bit so account of different rounding that
- * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
- * 0.5 so this won't cause the area computed to be overly pessimistic.
- */
- transformed.x1 -= 8 * pixman_fixed_e;
- transformed.y1 -= 8 * pixman_fixed_e;
- transformed.x2 += 8 * pixman_fixed_e;
- transformed.y2 += 8 * pixman_fixed_e;
-
if (image->common.type == BITS)
{
- if (pixman_fixed_to_int (transformed.x1) >= 0 &&
- pixman_fixed_to_int (transformed.y1) >= 0 &&
- pixman_fixed_to_int (transformed.x2) < image->bits.width &&
- pixman_fixed_to_int (transformed.y2) < image->bits.height)
+ if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_e) >= 0 &&
+ pixman_fixed_to_int (transformed.y1 - pixman_fixed_e) >= 0 &&
+ pixman_fixed_to_int (transformed.x2 - pixman_fixed_e) < image->bits.width &&
+ pixman_fixed_to_int (transformed.y2 - pixman_fixed_e) < image->bits.height)
{
*flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
}
commit 23525b4ea5bc2dd67f8f65b90d023b6580ecbc36
Author: Ben Avison <bavison@riscosopen.org>
Date: Tue Sep 22 12:43:25 2015 +0100
pixman-general: Tighten up calculation of temporary buffer sizes
Each of the aligns can only add a maximum of 15 bytes to the space
requirement. This permits some edge cases to use the stack buffer where
previously it would have deduced that a heap buffer was required.
Reviewed-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index fa88463..6141cb0 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -158,9 +158,9 @@ general_composite_rect (pixman_implementation_t *imp,
if (width <= 0 || _pixman_multiply_overflows_int (width, Bpp * 3))
return;
- if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 32 * 3)
+ if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 15 * 3)
{
- scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 32 * 3);
+ scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 15 * 3);
if (!scanline_buffer)
return;
commit 8b49d4b6b460d0c9299bca4ccddd7cd00d8f8441
Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
Date: Tue Sep 22 04:25:40 2015 +0300
pixman-general: Fix stack related pointer arithmetic overflow
As https://bugs.freedesktop.org/show_bug.cgi?id=92027#c6 explains,
the stack is allocated at the very top of the process address space
in some configurations (32-bit x86 systems with ASLR disabled).
And the careless computations done with the 'dest_buffer' pointer
may overflow, failing the buffer upper limit check.
The problem can be reproduced using the 'stress-test' program,
which segfaults when executed via setarch:
export CFLAGS="-O2 -m32" && ./autogen.sh
./configure --disable-libpng --disable-gtk && make
setarch i686 -R test/stress-test
This patch introduces the required corrections. The extra check
for negative 'width' may be redundant (the invalid 'width' value
is not supposed to reach here), but it's better to play safe
when dealing with the buffers allocated on stack.
Reported-by: Ludovic Courtès <ludo@gnu.org>
Signed-off-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
Reviewed-by: soren.sandmann@gmail.com
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index 7cdea29..fa88463 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -155,23 +155,21 @@ general_composite_rect (pixman_implementation_t *imp,
#define ALIGN(addr) \
((uint8_t *)((((uintptr_t)(addr)) + 15) & (~15)))
- src_buffer = ALIGN (scanline_buffer);
- mask_buffer = ALIGN (src_buffer + width * Bpp);
- dest_buffer = ALIGN (mask_buffer + width * Bpp);
+ if (width <= 0 || _pixman_multiply_overflows_int (width, Bpp * 3))
+ return;
- if (ALIGN (dest_buffer + width * Bpp) >
- scanline_buffer + sizeof (stack_scanline_buffer))
+ if (width * Bpp * 3 > sizeof (stack_scanline_buffer) - 32 * 3)
{
scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 32 * 3);
if (!scanline_buffer)
return;
-
- src_buffer = ALIGN (scanline_buffer);
- mask_buffer = ALIGN (src_buffer + width * Bpp);
- dest_buffer = ALIGN (mask_buffer + width * Bpp);
}
+ src_buffer = ALIGN (scanline_buffer);
+ mask_buffer = ALIGN (src_buffer + width * Bpp);
+ dest_buffer = ALIGN (mask_buffer + width * Bpp);
+
if (width_flag == ITER_WIDE)
{
/* To make sure there aren't any NANs in the buffers */
commit 4297e9058d252cac653723fe0b1bee559fbac3a4
Author: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu Sep 17 15:43:27 2015 +0200
test: add a check for FE_DIVBYZERO
Some architectures, such as Microblaze and Nios2, currently do not
implement FE_DIVBYZERO, even though they have <fenv.h> and
feenableexcept(). This commit adds a configure.ac check to verify
whether FE_DIVBYZERO is defined or not, and if not, disables the
problematic code in test/utils.c.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
diff --git a/configure.ac b/configure.ac
index f93cc30..424bfd3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -891,6 +891,11 @@ if test x$have_feenableexcept = xyes; then
AC_DEFINE(HAVE_FEENABLEEXCEPT, 1, [Whether we have feenableexcept()])
fi
+AC_CHECK_DECL([FE_DIVBYZERO],
+ [AC_DEFINE(HAVE_FEDIVBYZERO, 1, [Whether we have FE_DIVBYZERO])],
+ [],
+ [[#include <fenv.h>]])
+
AC_CHECK_FUNC(gettimeofday, have_gettimeofday=yes, have_gettimeofday=no)
AC_CHECK_HEADER(sys/time.h, have_sys_time_h=yes, have_sys_time_h=no)
if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then
diff --git a/test/utils.c b/test/utils.c
index 222d4d5..8657966 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -966,9 +966,11 @@ enable_divbyzero_exceptions (void)
{
#ifdef HAVE_FENV_H
#ifdef HAVE_FEENABLEEXCEPT
+#ifdef HAVE_FEDIVBYZERO
feenableexcept (FE_DIVBYZERO);
#endif
#endif
+#endif
}
void
commit 8189fad9610981d5b4dcd8f8980ff169110fb33c
Author: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sun Sep 6 11:45:20 2015 +0300
vmx: Remove unused expensive functions
Now that we replaced the expensive functions with better performing
alternatives, we should remove them so they will not be used again.
Running Cairo benchmark on trimmed traces gave the following results:
POWER8, 8 cores, 3.4GHz, RHEL 7.2 ppc64le.
Speedups
========
t-firefox-scrolling 1232.30 -> 1096.55 : 1.12x
t-gnome-terminal-vim 613.86 -> 553.10 : 1.11x
t-evolution 405.54 -> 371.02 : 1.09x
t-firefox-talos-gfx 919.31 -> 862.27 : 1.07x
t-gvim 653.02 -> 616.85 : 1.06x
t-firefox-canvas-alpha 941.29 -> 890.42 : 1.06x
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 7ef8bed..3eaa866 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -35,7 +35,6 @@
#define AVV(x...) {x}
-static vector unsigned int mask_00ff;
static vector unsigned int mask_ff000000;
static vector unsigned int mask_red;
static vector unsigned int mask_green;
@@ -280,20 +279,6 @@ save_128_aligned (uint32_t* data,
}
static force_inline vector unsigned int
-create_mask_16_128 (uint16_t mask)
-{
- uint16_t* src;
- vector unsigned short vsrc;
- DECLARE_SRC_MASK_VAR;
-
- src = &mask;
-
- COMPUTE_SHIFT_MASK (src);
- LOAD_VECTOR (src);
- return (vector unsigned int) vec_splat(vsrc, 0);
-}
-
-static force_inline vector unsigned int
create_mask_1x32_128 (const uint32_t *src)
{
vector unsigned int vsrc;
@@ -311,24 +296,6 @@ create_mask_32_128 (uint32_t mask)
}
static force_inline vector unsigned int
-unpack_32_1x128 (uint32_t data)
-{
- vector unsigned int vdata = {0, 0, 0, data};
- vector unsigned short lo;
-
- lo = (vector unsigned short)
-#ifdef WORDS_BIGENDIAN
- vec_mergel ((vector unsigned char) AVV(0),
- (vector unsigned char) vdata);
-#else
- vec_mergel ((vector unsigned char) vdata,
- (vector unsigned char) AVV(0));
-#endif
-
- return (vector unsigned int) lo;
-}
-
-static force_inline vector unsigned int
unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
{
vector unsigned char lo;
@@ -437,38 +404,6 @@ unpack_565_to_8888 (vector unsigned int lo)
return vec_or (rb, g);
}
-static force_inline uint32_t
-pack_1x128_32 (vector unsigned int data)
-{
- vector unsigned char vpack;
-
- vpack = vec_packsu((vector unsigned short) data,
- (vector unsigned short) AVV(0));
-
- return vec_extract((vector unsigned int) vpack, 1);
-}
-
-static force_inline vector unsigned int
-pack_2x128_128 (vector unsigned int lo, vector unsigned int hi)
-{
- vector unsigned char vpack;
-
- vpack = vec_packsu((vector unsigned short) hi,
- (vector unsigned short) lo);
-
- return (vector unsigned int) vpack;
-}
-
-static force_inline void
-negate_2x128 (vector unsigned int data_lo,
- vector unsigned int data_hi,
- vector unsigned int* neg_lo,
- vector unsigned int* neg_hi)
-{
- *neg_lo = vec_xor (data_lo, mask_00ff);
- *neg_hi = vec_xor (data_hi, mask_00ff);
-}
-
static force_inline int
is_opaque (vector unsigned int x)
{
@@ -499,136 +434,6 @@ is_transparent (vector unsigned int x)
return (cmp_result & 0x8888) == 0x8888;
}
-static force_inline vector unsigned int
-expand_pixel_8_1x128 (uint8_t data)
-{
- vector unsigned int vdata;
-
- vdata = unpack_32_1x128 ((uint32_t) data);
-
-#ifdef WORDS_BIGENDIAN
- return vec_perm (vdata, vdata,
- (vector unsigned char)AVV (
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
-#else
- return vec_perm (vdata, vdata,
- (vector unsigned char)AVV (
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
-#endif
-}
-
-static force_inline vector unsigned int
-expand_alpha_1x128 (vector unsigned int data)
-{
-#ifdef WORDS_BIGENDIAN
- return vec_perm (data, data,
- (vector unsigned char)AVV (
- 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
- 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
-#else
- return vec_perm (data, data,
- (vector unsigned char)AVV (
- 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
- 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
-#endif
-}
-
-static force_inline void
-expand_alpha_2x128 (vector unsigned int data_lo,
- vector unsigned int data_hi,
- vector unsigned int* alpha_lo,
- vector unsigned int* alpha_hi)
-{
-
- *alpha_lo = expand_alpha_1x128(data_lo);
- *alpha_hi = expand_alpha_1x128(data_hi);
-}
-
-static force_inline void
-expand_alpha_rev_2x128 (vector unsigned int data_lo,
- vector unsigned int data_hi,
- vector unsigned int* alpha_lo,
- vector unsigned int* alpha_hi)
-{
-#ifdef WORDS_BIGENDIAN
- *alpha_lo = vec_perm (data_lo, data_lo,
- (vector unsigned char)AVV (
- 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
- 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
-
- *alpha_hi = vec_perm (data_hi, data_hi,
- (vector unsigned char)AVV (
- 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
- 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
-#else
- *alpha_lo = vec_perm (data_lo, data_lo,
- (vector unsigned char)AVV (
- 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
- 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
-
- *alpha_hi = vec_perm (data_hi, data_hi,
- (vector unsigned char)AVV (
- 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
- 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
-#endif
-}
-
-static force_inline void
-pix_multiply_2x128 (vector unsigned int* data_lo,
- vector unsigned int* data_hi,
- vector unsigned int* alpha_lo,
- vector unsigned int* alpha_hi,
- vector unsigned int* ret_lo,
- vector unsigned int* ret_hi)
-{
- *ret_lo = pix_multiply(*data_lo, *alpha_lo);
- *ret_hi = pix_multiply(*data_hi, *alpha_hi);
-}
-
-static force_inline void
-over_2x128 (vector unsigned int* src_lo,
- vector unsigned int* src_hi,
- vector unsigned int* alpha_lo,
- vector unsigned int* alpha_hi,
- vector unsigned int* dst_lo,
- vector unsigned int* dst_hi)
-{
- vector unsigned int t1, t2;
-
- negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
-
- pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
-
- *dst_lo = (vector unsigned int)
- vec_adds ((vector unsigned char) *src_lo,
- (vector unsigned char) *dst_lo);
-
- *dst_hi = (vector unsigned int)
- vec_adds ((vector unsigned char) *src_hi,
- (vector unsigned char) *dst_hi);
-}
-
-static force_inline void
-in_over_2x128 (vector unsigned int* src_lo,
- vector unsigned int* src_hi,
- vector unsigned int* alpha_lo,
- vector unsigned int* alpha_hi,
- vector unsigned int* mask_lo,
- vector unsigned int* mask_hi,
- vector unsigned int* dst_lo,
- vector unsigned int* dst_hi)
-{
- vector unsigned int s_lo, s_hi;
- vector unsigned int a_lo, a_hi;
-
- pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
- pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
-
- over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
-}
-
static force_inline uint32_t
core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)
{
@@ -3259,7 +3064,6 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
/* VMX constants */
Reply to: