[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

pixman: Changes to 'upstream-experimental'



 .gitignore                        |   13 
 COPYING                           |    1 
 RELEASING                         |   30 
 TODO                              |    5 
 autogen.sh                        |    2 
 configure.ac                      |  187 +-
 libcomp.pc                        |   11 
 pixman/Makefile.am                |   29 
 pixman/pixman-access.c            |  206 +-
 pixman/pixman-arm-common.h        |  273 +++
 pixman/pixman-arm-neon-asm.S      | 1713 ++++++++++++++++++++++
 pixman/pixman-arm-neon-asm.h      |  906 +++++++++++
 pixman/pixman-arm-neon.c          | 2892 +++-----------------------------------
 pixman/pixman-arm-simd-asm.S      |  330 ++++
 pixman/pixman-arm-simd.c          |  225 +-
 pixman/pixman-bits-image.c        |  413 ++++-
 pixman/pixman-compiler.h          |   60 
 pixman/pixman-conical-gradient.c  |    2 
 pixman/pixman-cpu.c               |   14 
 pixman/pixman-edge-imp.h          |   20 
 pixman/pixman-edge.c              |    5 
 pixman/pixman-fast-path.c         | 1004 ++++++++++---
 pixman/pixman-general.c           |   52 
 pixman/pixman-image.c             |  306 +++-
 pixman/pixman-implementation.c    |   55 
 pixman/pixman-mmx.c               |  290 +--
 pixman/pixman-private.h           |  277 ++-
 pixman/pixman-region.c            |  334 +++-
 pixman/pixman-solid-fill.c        |   35 
 pixman/pixman-sse2.c              |  710 +++++----
 pixman/pixman-trap.c              |   14 
 pixman/pixman-utils.c             |  620 --------
 pixman/pixman-vmx.c               |    7 
 pixman/pixman-x64-mmx-emulation.h |    8 
 pixman/pixman.c                   | 1012 +++++++++++--
 pixman/pixman.h                   |   81 -
 pixman/solaris-hwcap.mapfile      |   36 
 test/Makefile.am                  |   52 
 test/a1-trap-test.c               |   50 
 test/alpha-test.c                 |   12 
 test/alphamap.c                   |   49 
 test/blitters-test.c              |  315 +---
 test/clip-in.c                    |    2 
 test/clip-test.c                  |    4 
 test/composite-test.c             |    7 
 test/composite.c                  |  901 +++++++++++
 test/convolution-test.c           |    2 
 test/fetch-test.c                 |  104 -
 test/gradient-test.c              |   10 
 test/gtk-utils.c                  |  113 +
 test/gtk-utils.h                  |   13 
 test/region-test.c                |   49 
 test/scaling-test.c               |  207 --
 test/screen-test.c                |    2 
 test/trap-test.c                  |    2 
 test/utils.c                      |  289 ++-
 test/utils.h                      |   47 
 test/window-test.c                |    6 
 58 files changed, 8942 insertions(+), 5472 deletions(-)

New commits:
commit e9dc568d6f585a153c47e970168a9c71d3e45fde
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Thu Apr 1 05:23:31 2010 -0400

    Pre-release version bump to 0.18.0

diff --git a/configure.ac b/configure.ac
index 95ba1c9..d13b6e6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,8 +53,8 @@ AC_PREREQ([2.57])
 #
 
 m4_define([pixman_major], 0)
-m4_define([pixman_minor], 17)
-m4_define([pixman_micro], 15)
+m4_define([pixman_minor], 18)
+m4_define([pixman_micro], 0)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit efd41c62875d97c5127233cb6a4c353b4d495531
Author: Matthias Hopf <mhopf@suse.de>
Date:   Wed Mar 24 18:54:29 2010 +0100

    Revert "Improve PIXREGION_NIL to return true on degenerated regions."
    
    This reverts commit ebba1493136a5a0dd7667073165b2115de203eda.
    Scheduled for re-discussion after stable 0.18 has been released.

diff --git a/pixman/pixman-region.c b/pixman/pixman-region.c
index 179241d..a6a4005 100644
--- a/pixman/pixman-region.c
+++ b/pixman/pixman-region.c
@@ -69,11 +69,7 @@
 #include <stdio.h>
 #include "pixman-private.h"
 
-#define PIXREGION_NIL(reg) (((reg)->data && !(reg)->data->numRects) ||	\
-			    (! (reg)->data &&				\
-			     (reg)->extents.x1 == (reg)->extents.x2 &&	\
-			     (reg)->extents.y1 == (reg)->extents.y2))
-
+#define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects)
 /* not a region */
 #define PIXREGION_NAR(reg)      ((reg)->data == pixman_broken_data)
 #define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1)

commit ebba1493136a5a0dd7667073165b2115de203eda
Author: Matthias Hopf <mhopf@suse.de>
Date:   Wed Mar 24 12:00:21 2010 +0100

    Improve PIXREGION_NIL to return true on degenerated regions.
    
    Fixes Novell bug 568811.

diff --git a/pixman/pixman-region.c b/pixman/pixman-region.c
index a6a4005..179241d 100644
--- a/pixman/pixman-region.c
+++ b/pixman/pixman-region.c
@@ -69,7 +69,11 @@
 #include <stdio.h>
 #include "pixman-private.h"
 
-#define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects)
+#define PIXREGION_NIL(reg) (((reg)->data && !(reg)->data->numRects) ||	\
+			    (! (reg)->data &&				\
+			     (reg)->extents.x1 == (reg)->extents.x2 &&	\
+			     (reg)->extents.y1 == (reg)->extents.y2))
+
 /* not a region */
 #define PIXREGION_NAR(reg)      ((reg)->data == pixman_broken_data)
 #define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1)

commit c0f8d417b512b7d526fb6127954a50d14214f420
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Tue Mar 23 17:25:54 2010 -0400

    Post-release version bump to 0.17.15

diff --git a/configure.ac b/configure.ac
index 3edca54..95ba1c9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 17)
-m4_define([pixman_micro], 14)
+m4_define([pixman_micro], 15)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit b35f0b0158cd7aac388ba4c72c6c8aada77d2e22
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Tue Mar 23 16:52:02 2010 -0400

    Pre-release version bump to 0.17.14

diff --git a/configure.ac b/configure.ac
index ed7d16a..3edca54 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 17)
-m4_define([pixman_micro], 13)
+m4_define([pixman_micro], 14)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit 3ef203331f124bf137c6e0c8d5516b1209c92dd9
Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date:   Mon Mar 22 21:56:17 2010 +0200

    ARM: SIMD optimizations moved to a separate .S file
    
    This should be the last step in providing full armv4t compatibility
    with CPU features runtime autodetection in pixman.

diff --git a/configure.ac b/configure.ac
index 4668715..ed7d16a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -361,30 +361,24 @@ AC_SUBST(VMX_CFLAGS)
 
 AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
 
-dnl ===========================================================================
-dnl Check for ARM SIMD instructions
-ARM_SIMD_CFLAGS=""
-
+dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports ARM SIMD instructions
 have_arm_simd=no
 AC_MSG_CHECKING(whether to use ARM SIMD assembler)
-# check with default CFLAGS in case the toolchain turns on a sufficiently recent -mcpu=
-AC_COMPILE_IFELSE([
-int main () {
-    asm("uqadd8 r1, r1, r2");
-    return 0;
-}], have_arm_simd=yes,
-    # check again with an explicit -mcpu= in case the toolchain defaults to an
-    # older one; note that uqadd8 isn't available in Thumb mode on arm1136j-s
-    # so we force ARM mode
-    ARM_SIMD_CFLAGS="-mcpu=arm1136j-s -marm"
-    xserver_save_CFLAGS=$CFLAGS
-    CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
-    AC_COMPILE_IFELSE([
-    int main () {
-        asm("uqadd8 r1, r1, r2");
-        return 0;
-    }], have_arm_simd=yes)
-    CFLAGS=$xserver_save_CFLAGS)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="-x assembler-with-cpp $CFLAGS"
+AC_COMPILE_IFELSE([[
+.text
+.arch armv6
+.object_arch armv4
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+uqadd8 r0, r0, r0]], have_arm_simd=yes)
+CFLAGS=$xserver_save_CFLAGS
 
 AC_ARG_ENABLE(arm-simd,
    [AC_HELP_STRING([--disable-arm-simd],
@@ -396,20 +390,16 @@ if test $enable_arm_simd = no ; then
 fi
 
 if test $have_arm_simd = yes ; then
-   AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics])
-else
-   ARM_SIMD_CFLAGS=
+   AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations])
 fi
 
+AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
+
 AC_MSG_RESULT($have_arm_simd)
 if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
    AC_MSG_ERROR([ARM SIMD intrinsics not detected])
 fi
 
-AC_SUBST(ARM_SIMD_CFLAGS)
-
-AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-
 dnl ==========================================================================
 dnl Check if assembler is gas compatible and supports NEON instructions
 have_arm_neon=no
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 5a0e7a9..66ad7f0 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -97,12 +97,14 @@ endif
 if USE_ARM_SIMD
 noinst_LTLIBRARIES += libpixman-arm-simd.la
 libpixman_arm_simd_la_SOURCES = \
-	pixman-arm-simd.c
-libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS)
+	pixman-arm-simd.c	\
+	pixman-arm-common.h	\
+	pixman-arm-simd-asm.S
+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS)
 libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
 libpixman_1_la_LIBADD += libpixman-arm-simd.la
 
-ASM_CFLAGS_arm_simd=$(ARM_SIMD_CFLAGS)
+ASM_CFLAGS_arm_simd=
 endif
 
 # arm neon code
@@ -110,6 +112,7 @@ if USE_ARM_NEON
 noinst_LTLIBRARIES += libpixman-arm-neon.la
 libpixman_arm_neon_la_SOURCES = \
         pixman-arm-neon.c	\
+        pixman-arm-common.h	\
         pixman-arm-neon-asm.S	\
         pixman-arm-neon-asm.h
 libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
new file mode 100644
index 0000000..1a1a0d6
--- /dev/null
+++ b/pixman/pixman-arm-simd-asm.S
@@ -0,0 +1,330 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+/* Prevent the stack from becoming executable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+	.text
+	.arch armv6
+	.object_arch armv4
+	.arm
+	.altmacro
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+	.func fname
+	.global fname
+#ifdef __ELF__
+	.hidden fname
+	.type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * The code below was generated by gcc 4.3.4 from the commented out
+ * functions in 'pixman-arm-simd.c' file with the following optimization
+ * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
+ *
+ * TODO: replace gcc generated code with hand tuned versions because
+ * the code quality is not very good, introduce symbolic register
+ * aliases for better readability and maintainability.
+ */
+
+pixman_asm_function pixman_composite_add_8000_8000_asm_armv6
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	mov	r10, r1
+	sub	sp, sp, #4
+	subs	r10, r10, #1
+	mov	r11, r0
+	mov	r8, r2
+	str	r3, [sp]
+	ldr	r7, [sp, #36]
+	bcc	0f
+6:	cmp	r11, #0
+	beq	1f
+	orr	r3, r8, r7
+	tst	r3, #3
+	beq	2f
+	mov	r1, r8
+	mov	r0, r7
+	mov	r12, r11
+	b	3f
+5:	tst	r3, #3
+	beq	4f
+3:	ldrb	r2, [r0], #1
+	subs	r12, r12, #1
+	ldrb	r3, [r1]
+	uqadd8	r3, r2, r3
+	strb	r3, [r1], #1
+	orr	r3, r1, r0
+	bne	5b
+1:	ldr	r3, [sp]
+	add	r8, r8, r3
+	ldr	r3, [sp, #40]
+	add	r7, r7, r3
+10:	subs	r10, r10, #1
+	bcs	6b
+0:	add	sp, sp, #4
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+2:	mov	r12, r11
+	mov	r1, r8
+	mov	r0, r7
+4:	cmp	r12, #3
+	subgt	r6, r12, #4
+	movgt	r9, r12
+	lsrgt	r5, r6, #2
+	addgt	r3, r5, #1
+	movgt	r12, #0
+	lslgt	r4, r3, #2
+	ble	7f
+8:	ldr	r3, [r0, r12]
+	ldr	r2, [r1, r12]
+	uqadd8	r3, r3, r2
+	str	r3, [r1, r12]
+	add	r12, r12, #4
+	cmp	r12, r4
+	bne	8b
+	sub	r3, r9, #4
+	bic	r3, r3, #3
+	add	r3, r3, #4
+	subs	r12, r6, r5, lsl #2
+	add	r1, r1, r3
+	add	r0, r0, r3
+	beq	1b
+7:	mov	r4, #0
+9:	ldrb	r3, [r1, r4]
+	ldrb	r2, [r0, r4]
+	uqadd8	r3, r2, r3
+	strb	r3, [r1, r4]
+	add	r4, r4, #1
+	cmp	r4, r12
+	bne	9b
+	ldr	r3, [sp]
+	add	r8, r8, r3
+	ldr	r3, [sp, #40]
+	add	r7, r7, r3
+	b	10b
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	sub	sp, sp, #20
+	cmp	r1, #0
+	mov	r12, r2
+	str	r1, [sp, #12]
+	str	r0, [sp, #16]
+	ldr	r2, [sp, #52]
+	beq	0f
+	lsl	r3, r3, #2
+	str	r3, [sp]
+	ldr	r3, [sp, #56]
+	mov	r10, #0
+	lsl	r3, r3, #2
+	str	r3, [sp, #8]
+	mov	r11, r3
+	b	1f
+6:	ldr	r11, [sp, #8]
+1:	ldr	r9, [sp]
+	mov	r0, r12
+	add	r12, r12, r9
+	mov	r1, r2
+	str	r12, [sp, #4]
+	add	r2, r2, r11
+	ldr	r12, [sp, #16]
+	ldr	r3, =0x00800080
+	ldr	r9, =0xff00ff00
+	mov	r11, #255
+	cmp	r12, #0
+	beq	4f
+5:	ldr	r5, [r1], #4
+	ldr	r4, [r0]
+	sub	r8, r11, r5, lsr #24
+	uxtb16	r6, r4
+	uxtb16	r7, r4, ror #8
+	mla	r6, r6, r8, r3
+	mla	r7, r7, r8, r3
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	and	r7, r7, r9
+	uxtab16	r6, r7, r6, ror #8
+	uqadd8	r5, r6, r5
+	str	r5, [r0], #4
+	subs	r12, r12, #1
+	bne	5b
+4:	ldr	r3, [sp, #12]
+	add	r10, r10, #1
+	cmp	r10, r3
+	ldr	r12, [sp, #4]
+	bne	6b
+0:	add	sp, sp, #20
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	sub	sp, sp, #28
+	cmp	r1, #0
+	str	r1, [sp, #12]
+	ldrb	r1, [sp, #71]
+	mov	r12, r2
+	str	r0, [sp, #16]
+	ldr	r2, [sp, #60]
+	str	r1, [sp, #24]
+	beq	0f
+	lsl	r3, r3, #2
+	str	r3, [sp, #20]
+	ldr	r3, [sp, #64]
+	mov	r10, #0
+	lsl	r3, r3, #2
+	str	r3, [sp, #8]
+	mov	r11, r3
+	b	1f
+5:	ldr	r11, [sp, #8]
+1:	ldr	r4, [sp, #20]
+	mov	r0, r12
+	mov	r1, r2
+	add	r12, r12, r4
+	add	r2, r2, r11
+	str	r12, [sp]
+	str	r2, [sp, #4]
+	ldr	r12, [sp, #16]
+	ldr	r2, =0x00800080
+	ldr	r3, [sp, #24]
+	mov	r11, #255
+	cmp	r12, #0
+	beq	3f
+4:	ldr	r5, [r1], #4
+	ldr	r4, [r0]
+	uxtb16	r6, r5
+	uxtb16	r7, r5, ror #8
+	mla	r6, r6, r3, r2
+	mla	r7, r7, r3, r2
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	uxtb16	r6, r6, ror #8
+	uxtb16	r7, r7, ror #8
+	orr	r5, r6, r7, lsl #8
+	uxtb16	r6, r4
+	uxtb16	r7, r4, ror #8
+	sub	r8, r11, r5, lsr #24
+	mla	r6, r6, r8, r2
+	mla	r7, r7, r8, r2
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	uxtb16	r6, r6, ror #8
+	uxtb16	r7, r7, ror #8
+	orr	r6, r6, r7, lsl #8
+	uqadd8	r5, r6, r5
+	str	r5, [r0], #4
+	subs	r12, r12, #1
+	bne	4b
+3:	ldr	r1, [sp, #12]
+	add	r10, r10, #1
+	cmp	r10, r1
+	ldr	r12, [sp]
+	ldr	r2, [sp, #4]
+	bne	5b
+0:	add	sp, sp, #28
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	sub	sp, sp, #28
+	cmp	r1, #0
+	ldr	r9, [sp, #60]
+	str	r1, [sp, #12]
+	bic	r1, r9, #-16777216
+	str	r1, [sp, #20]
+	mov	r12, r2
+	lsr	r1, r9, #8
+	ldr	r2, [sp, #20]
+	bic	r1, r1, #-16777216
+	bic	r2, r2, #65280
+	bic	r1, r1, #65280
+	str	r2, [sp, #20]
+	str	r0, [sp, #16]
+	str	r1, [sp, #4]
+	ldr	r2, [sp, #68]
+	beq	0f
+	lsl	r3, r3, #2
+	str	r3, [sp, #24]
+	mov	r0, #0
+	b	1f
+5:	ldr	r3, [sp, #24]
+1:	ldr	r4, [sp, #72]
+	mov	r10, r12
+	mov	r1, r2
+	add	r12, r12, r3
+	add	r2, r2, r4
+	str	r12, [sp, #8]
+	str	r2, [sp]
+	ldr	r12, [sp, #16]
+	ldr	r11, =0x00800080
+	ldr	r2, [sp, #4]
+	ldr	r3, [sp, #20]
+	cmp	r12, #0
+	beq	3f
+4:	ldrb	r5, [r1], #1
+	ldr	r4, [r10]
+	mla	r6, r3, r5, r11
+	mla	r7, r2, r5, r11
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	uxtb16	r6, r6, ror #8
+	uxtb16	r7, r7, ror #8
+	orr	r5, r6, r7, lsl #8
+	uxtb16	r6, r4
+	uxtb16	r7, r4, ror #8
+	mvn	r8, r5
+	lsr	r8, r8, #24
+	mla	r6, r6, r8, r11
+	mla	r7, r7, r8, r11
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	uxtb16	r6, r6, ror #8
+	uxtb16	r7, r7, ror #8
+	orr	r6, r6, r7, lsl #8
+	uqadd8	r5, r6, r5
+	str	r5, [r10], #4
+	subs	r12, r12, #1
+	bne	4b
+3:	ldr	r4, [sp, #12]
+	add	r0, r0, #1
+	cmp	r0, r4
+	ldr	r12, [sp, #8]
+	ldr	r2, [sp]
+	bne	5b
+0:	add	sp, sp, #28
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+.endfunc
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index f110753..389c9e0 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -30,6 +30,8 @@
 #include "pixman-private.h"
 #include "pixman-arm-common.h"
 
+#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
+
 void
 pixman_composite_add_8000_8000_asm_armv6 (int32_t  width,
                                           int32_t  height,
@@ -371,6 +373,8 @@ pixman_composite_over_n_8_8888_asm_armv6 (int32_t   width,
     }
 }
 
+#endif
+
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
                                    uint8_t, 1, uint8_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,

commit 0a0591c2f7abde8880f4aebd510c27517a414450
Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date:   Mon Mar 22 19:51:00 2010 +0200

    ARM: SIMD optimizations updated to use common assembly calling conventions

diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 09a2888..f110753 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -28,31 +28,20 @@
 #endif
 
 #include "pixman-private.h"
-
-static void
-arm_composite_add_8000_8000 (pixman_implementation_t * impl,
-			     pixman_op_t               op,
-			     pixman_image_t *          src_image,
-			     pixman_image_t *          mask_image,
-			     pixman_image_t *          dst_image,
-			     int32_t                   src_x,
-			     int32_t                   src_y,
-			     int32_t                   mask_x,
-			     int32_t                   mask_y,
-			     int32_t                   dest_x,
-			     int32_t                   dest_y,
-			     int32_t                   width,
-			     int32_t                   height)
+#include "pixman-arm-common.h"
+
+void
+pixman_composite_add_8000_8000_asm_armv6 (int32_t  width,
+                                          int32_t  height,
+                                          uint8_t *dst_line,
+                                          int32_t  dst_stride,
+                                          uint8_t *src_line,
+                                          int32_t  src_stride)
 {
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int dst_stride, src_stride;
+    uint8_t *dst, *src;
     int32_t w;
     uint8_t s, d;
 
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
     while (height--)
     {
 	dst = dst_line;
@@ -101,32 +90,21 @@ arm_composite_add_8000_8000 (pixman_implementation_t * impl,
 
 }
 
-static void
-arm_composite_over_8888_8888 (pixman_implementation_t * impl,
-			      pixman_op_t               op,
-			      pixman_image_t *          src_image,
-			      pixman_image_t *          mask_image,
-			      pixman_image_t *          dst_image,
-			      int32_t                   src_x,
-			      int32_t                   src_y,
-			      int32_t                   mask_x,
-			      int32_t                   mask_y,
-			      int32_t                   dest_x,
-			      int32_t                   dest_y,
-			      int32_t                   width,
-			      int32_t                   height)
+void
+pixman_composite_over_8888_8888_asm_armv6 (int32_t   width,
+                                           int32_t   height,
+                                           uint32_t *dst_line,
+                                           int32_t   dst_stride,
+                                           uint32_t *src_line,
+                                           int32_t   src_stride)
 {
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
+    uint32_t    *dst;
+    uint32_t    *src;
     int32_t w;
     uint32_t component_half = 0x800080;
     uint32_t upper_component_mask = 0xff00ff00;
     uint32_t alpha_mask = 0xff;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
     while (height--)
     {
 	dst = dst_line;
@@ -194,33 +172,21 @@ arm_composite_over_8888_8888 (pixman_implementation_t * impl,
     }
 }
 
-static void
-arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
-				pixman_op_t               op,
-				pixman_image_t *          src_image,
-				pixman_image_t *          mask_image,
-				pixman_image_t *          dst_image,
-				int32_t                   src_x,
-				int32_t                   src_y,
-				int32_t                   mask_x,
-				int32_t                   mask_y,
-				int32_t                   dest_x,
-				int32_t                   dest_y,
-				int32_t                   width,
-				int32_t                   height)
+void
+pixman_composite_over_8888_n_8888_asm_armv6 (int32_t   width,
+                                             int32_t   height,
+                                             uint32_t *dst_line,
+                                             int32_t   dst_stride,
+                                             uint32_t *src_line,
+                                             int32_t   src_stride,
+                                             uint32_t  mask)
 {
-    uint32_t *dst_line, *dst;
-    uint32_t *src_line, *src;
-    uint32_t mask;
-    int dst_stride, src_stride;
+    uint32_t *dst;
+    uint32_t *src;
     int32_t w;
     uint32_t component_half = 0x800080;
     uint32_t alpha_mask = 0xff;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8);
     mask = (mask) >> 24;
 
     while (height--)
@@ -303,33 +269,22 @@ arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
     }
 }
 
-static void
-arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
-			     pixman_op_t               op,
-			     pixman_image_t *          src_image,
-			     pixman_image_t *          mask_image,
-			     pixman_image_t *          dst_image,
-			     int32_t                   src_x,
-			     int32_t                   src_y,
-			     int32_t                   mask_x,
-			     int32_t                   mask_y,
-			     int32_t                   dest_x,
-			     int32_t                   dest_y,
-			     int32_t                   width,
-			     int32_t                   height)
+void
+pixman_composite_over_n_8_8888_asm_armv6 (int32_t   width,
+                                          int32_t   height,
+                                          uint32_t *dst_line,
+                                          int32_t   dst_stride,
+                                          uint32_t  src,
+                                          int32_t   unused,
+                                          uint8_t  *mask_line,
+                                          int32_t   mask_stride)
 {
-    uint32_t src, srca;
-    uint32_t *dst_line, *dst;
-    uint8_t  *mask_line, *mask;
-    int dst_stride, mask_stride;
+    uint32_t  srca;
+    uint32_t *dst;
+    uint8_t  *mask;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    /* bail out if fully transparent */
     srca = src >> 24;
-    if (src == 0)
-	return;
 
     uint32_t component_mask = 0xff00ff;
     uint32_t component_half = 0x800080;
@@ -337,9 +292,6 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
     uint32_t src_hi = (src >> 8) & component_mask;
     uint32_t src_lo = src & component_mask;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
     while (height--)
     {
 	dst = dst_line;
@@ -419,21 +371,34 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
     }
 }
 
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
+                                   uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
+                                     uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
+                                      uint8_t, 1, uint32_t, 1)
+
 static const pixman_fast_path_t arm_simd_fast_paths[] =
 {
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, arm_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, arm_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, arm_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, arm_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, arm_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, arm_composite_over_8888_n_8888),
-
-    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, arm_composite_add_8000_8000),
-
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, arm_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, arm_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, arm_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, arm_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+
+    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8000_8000),
+
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
 
     { PIXMAN_OP_NONE },
 };

commit c1e8d4533aea3aa10c49465cf5e9a44d946f70bb
Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date:   Mon Mar 22 18:51:54 2010 +0200

    ARM: Helper ARM NEON assembly binding macros moved into a separate header
    
    This is needed for future reuse of the same macros for the other
    ARM assembly optimizations (armv4t, armv6)

diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
new file mode 100644
index 0000000..8d432b1
--- /dev/null
+++ b/pixman/pixman-arm-common.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+#ifndef PIXMAN_ARM_COMMON_H
+#define PIXMAN_ARM_COMMON_H
+
+/* Define some macros which can expand into proxy functions between
+ * ARM assembly optimized functions and the rest of pixman fast path API.
+ *
+ * All the low level ARM assembly functions have to use ARM EABI
+ * calling convention and take up to 8 arguments:
+ *    width, height, dst, dst_stride, src, src_stride, mask, mask_stride
+ *
+ * The arguments are ordered with the most important coming first (the
+ * first 4 arguments are passed to function in registers, the rest are
+ * on stack). The last arguments are optional, for example if the
+ * function is not using mask, then 'mask' and 'mask_stride' can be
+ * omitted when doing a function call.
+ *
+ * Arguments 'src' and 'mask' contain either a pointer to the top left
+ * pixel of the composited rectangle or a pixel color value depending
+ * on the function type. In the case of just a color value (solid source
+ * or mask), the corresponding stride argument is unused.
+ */
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name,                \
+                                          src_type, src_cnt,            \
+                                          dst_type, dst_cnt)            \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t   w,                   \
+                                         int32_t   h,                   \
+                                         dst_type *dst,                 \
+                                         int32_t   dst_stride,          \
+                                         src_type *src,                 \
+                                         int32_t   src_stride);         \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type *dst_line;                                                 \
+    src_type *src_line;                                                 \
+    int32_t dst_stride, src_stride;                                     \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
+                           src_stride, src_line, src_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src_line, src_stride);     \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name,                  \
+                                        dst_type, dst_cnt)              \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \


Reply to: