[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

pixman: Changes to 'upstream-experimental'



 .gitignore                       |   23 +
 autogen.sh                       |    4 
 configure.ac                     |   78 ++++-
 demos/composite-test.c           |    2 
 demos/gradient-test.c            |   13 
 pixman/Makefile.am               |   16 +
 pixman/pixman-access.c           |    9 
 pixman/pixman-accessor.h         |   15 -
 pixman/pixman-bits-image.c       |   81 ++---
 pixman/pixman-combine.c.template |    6 
 pixman/pixman-combine.h.template |    4 
 pixman/pixman-compiler.h         |    9 
 pixman/pixman-cpu.c              |  207 ++++++++++++--
 pixman/pixman-fast-path.c        |    4 
 pixman/pixman-general.c          |   16 -
 pixman/pixman-gradient-walker.c  |    7 
 pixman/pixman-image.c            |   22 -
 pixman/pixman-implementation.c   |  119 +-------
 pixman/pixman-mips-dspr2-asm.S   |  310 +++++++++++++++++++++
 pixman/pixman-mips-dspr2-asm.h   |  206 ++++++++++++++
 pixman/pixman-mips-dspr2.c       |  247 +++++++++++++++++
 pixman/pixman-mips-dspr2.h       |   88 ++++++
 pixman/pixman-mips-memcpy-asm.S  |  382 ++++++++++++++++++++++++++
 pixman/pixman-mmx.c              |  563 ++++++++++++++++++++-------------------
 pixman/pixman-noop.c             |   27 +
 pixman/pixman-private.h          |   38 --
 pixman/pixman-region.c           |   19 -
 pixman/pixman-sse2.c             |    2 
 pixman/pixman-trap.c             |    2 
 pixman/pixman-utils.c            |   35 +-
 pixman/pixman.h                  |    2 
 test/Makefile.win32              |   21 +
 test/a1-trap-test.c              |    2 
 test/alphamap.c                  |  144 +++++++--
 test/blitters-test.c             |    7 
 test/composite-traps-test.c      |    1 
 test/composite.c                 |  334 ++++++-----------------
 test/fetch-test.c                |    8 
 test/gradient-crash-test.c       |    6 
 test/lowlevel-blt-bench.c        |   10 
 test/oob-test.c                  |    4 
 test/region-contains-test.c      |    5 
 test/region-translate-test.c     |    2 
 test/scaling-crash-test.c        |    7 
 test/scaling-test.c              |    1 
 test/stress-test.c               |    4 
 test/trap-crasher.c              |    4 
 test/utils.c                     |  176 ++++++++++++
 test/utils.h                     |   36 ++
 49 files changed, 2475 insertions(+), 853 deletions(-)

New commits:
commit f73f7985318bf0e7446941d9bea9a94b35580342
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Thu Mar 8 09:33:16 2012 -0500

    Pre-release version bump to 0.25.2

diff --git a/configure.ac b/configure.ac
index 5eeb6a5..5ca062a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 25)
-m4_define([pixman_micro], 1)
+m4_define([pixman_micro], 2)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit 62df04eb257d16fbb4449855a48f6fdaf567e201
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Thu Mar 8 09:29:46 2012 -0500

    mmx: Squash a warning by making the argument to ldl_u() const

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index fe31b08..f9efd73 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -336,7 +336,7 @@ static __inline__ __m64 ldq_u(uint64_t *p)
 #endif
 }
 
-static __inline__ uint32_t ldl_u(uint32_t *p)
+static __inline__ uint32_t ldl_u(const uint32_t *p)
 {
 #ifdef USE_X86_MMX
     /* x86's alignment restrictions are very relaxed. */

commit 85943733cbd7b62991ee962aa22f28bc5d1be353
Author: Alan Coopersmith <alan.coopersmith@oracle.com>
Date:   Fri Feb 24 18:02:56 2012 -0800

    Just use xmmintrin.h when building with Solaris Studio compilers
    
    Since the Solaris Studio compilers don't have a mode where MMX
    instructions are available and SSE instructions are not, we can
    just use the <xmmintrin.h> header directly.
    
    Fixes build failure due to Studio not supporting the __gnu_inline__
    or __artificial__ attributes.
    
    Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
    Acked-by: Matt Turner <mattst88@gmail.com>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index bd44f63..fe31b08 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -57,6 +57,9 @@ _mm_empty (void)
 #endif
 
 #ifdef USE_X86_MMX
+# ifdef __SUNPRO_C
+#  include <xmmintrin.h>
+# else
 /* We have to compile with -msse to use xmmintrin.h, but that causes SSE
  * instructions to be generated that we don't want. Just duplicate the
  * functions we want to use.  */
@@ -82,6 +85,7 @@ _mm_shuffle_pi16 (__m64 __A, int8_t const __N)
 
     return ret;
 }
+# endif
 #endif
 
 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \

commit 304f57644ac6a991c6e538675de935356252c0a5
Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
Date:   Wed Feb 29 12:04:33 2012 +0100

    MIPS: DSPr2: Added mips_dspr2_blt and mips_dspr2_fill routines.
    
    Performance numbers before/after on MIPS-74kc @ 1GHz
    
    Referent (before):
    
    lowlevel-blt-bench:
                  src_n_0565 =  L1: 238.14  L2: 233.15  M: 57.88 ( 77.23%)  HT: 53.22  VT: 49.99  R: 47.73  RT: 24.79 (  91Kops/s)
                  src_n_8888 =  L1: 190.19  L2: 187.57  M: 28.94 ( 77.23%)  HT: 27.91  VT: 27.33  R: 26.64  RT: 14.68 (  77Kops/s)
    cairo-perf-trace:
    [ # ]  backend                         test   min(s) median(s) stddev. count
    [ # ]    image: pixman 0.25.1
    [  0]    image         gnome-system-monitor  268.460  269.712   0.22%    6/6
    
    Optimized:
    
    lowlevel-blt-bench:
                  src_n_0565 =  L1:1081.39  L2: 258.22  M:189.59 (252.91%)  HT: 60.23  VT: 55.01  R: 53.44  RT: 23.68 (  89Kops/s)
                  src_n_8888 =  L1: 653.46  L2: 113.55  M:135.26 (360.86%)  HT: 38.99  VT: 37.38  R: 34.95  RT: 18.67 (  84Kops/s)
    cairo-perf-trace:
    [ # ]  backend                         test   min(s) median(s) stddev. count
    [ # ]    image: pixman 0.25.1
    [  0]    image         gnome-system-monitor  246.565  246.706   0.04%    6/6

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 0a4c87e..f1087a7 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -31,6 +31,111 @@
 
 #include "pixman-mips-dspr2-asm.h"
 
+LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     andi    t1, a0, 0x0002
+    beqz     t1, 0f          /* check if address is 4-byte aligned */
+     nop
+    sh       a2, 0(a0)
+    addiu    a0, a0, 2
+    addiu    a1, a1, -2
+0:
+    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
+    replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
+    beqz     t1, 2f
+     nop
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    pref     30, 32(a0)
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -2
+    sh       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 2
+3:
+    jr       ra
+     nop
+
+END(pixman_fill_buff16_mips)
+
+LEAF_MIPS32R2(pixman_fill_buff32_mips)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     nop
+    srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
+    beqz     t1, 2f
+     nop
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    pref     30, 32(a0)
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -4
+    sw       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 4
+3:
+    jr       ra
+     nop
+
+END(pixman_fill_buff32_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
 /*
  * a0 - dst (r5g6b5)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index e331853..2beada3 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -49,6 +49,119 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
                                     uint8_t, 3, uint8_t, 3)
 
+static pixman_bool_t
+pixman_fill_mips (uint32_t *bits,
+                  int       stride,
+                  int       bpp,
+                  int       x,
+                  int       y,
+                  int       width,
+                  int       height,
+                  uint32_t  _xor)
+{
+    uint8_t *byte_line;
+    uint32_t byte_width;
+    switch (bpp)
+    {
+    case 16:
+        stride = stride * (int) sizeof (uint32_t) / 2;
+        byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+        byte_width = width * 2;
+        stride *= 2;
+
+        while (height--)
+        {
+            uint8_t *dst = byte_line;
+            byte_line += stride;
+            pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff);
+        }
+        return TRUE;
+    case 32:
+        stride = stride * (int) sizeof (uint32_t) / 4;
+        byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+        byte_width = width * 4;
+        stride *= 4;
+
+        while (height--)
+        {
+            uint8_t *dst = byte_line;
+            byte_line += stride;
+            pixman_fill_buff32_mips (dst, byte_width, _xor);
+        }
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static pixman_bool_t
+pixman_blt_mips (uint32_t *src_bits,
+                 uint32_t *dst_bits,
+                 int       src_stride,
+                 int       dst_stride,
+                 int       src_bpp,
+                 int       dst_bpp,
+                 int       src_x,
+                 int       src_y,
+                 int       dest_x,
+                 int       dest_y,
+                 int       width,
+                 int       height)
+{
+    if (src_bpp != dst_bpp)
+        return FALSE;
+
+    uint8_t *src_bytes;
+    uint8_t *dst_bytes;
+    uint32_t byte_width;
+
+    switch (src_bpp)
+    {
+    case 16:
+        src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+        src_bytes =(uint8_t *)(((uint16_t *)src_bits)
+                                          + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
+                                           + dst_stride * (dest_y) + (dest_x));
+        byte_width = width * 2;
+        src_stride *= 2;
+        dst_stride *= 2;
+
+        while (height--)
+        {
+            uint8_t *src = src_bytes;
+            uint8_t *dst = dst_bytes;
+            src_bytes += src_stride;
+            dst_bytes += dst_stride;
+            pixman_mips_fast_memcpy (dst, src, byte_width);
+        }
+        return TRUE;
+    case 32:
+        src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+        src_bytes = (uint8_t *)(((uint32_t *)src_bits)
+                                           + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
+                                           + dst_stride * (dest_y) + (dest_x));
+        byte_width = width * 4;
+        src_stride *= 4;
+        dst_stride *= 4;
+
+        while (height--)
+        {
+            uint8_t *src = src_bytes;
+            uint8_t *dst = dst_bytes;
+            src_bytes += src_stride;
+            dst_bytes += dst_stride;
+            pixman_mips_fast_memcpy (dst, src, byte_width);
+        }
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
 static const pixman_fast_path_t mips_dspr2_fast_paths[] =
 {
     PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, r5g6b5,   mips_composite_src_0565_0565),
@@ -74,11 +187,61 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     { PIXMAN_OP_NONE },
 };
 
+static pixman_bool_t
+mips_dspr2_blt (pixman_implementation_t *imp,
+                uint32_t *               src_bits,
+                uint32_t *               dst_bits,
+                int                      src_stride,
+                int                      dst_stride,
+                int                      src_bpp,
+                int                      dst_bpp,
+                int                      src_x,
+                int                      src_y,
+                int                      dest_x,
+                int                      dest_y,
+                int                      width,
+                int                      height)
+{
+    if (!pixman_blt_mips (
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dest_x, dest_y, width, height))
+
+    {
+        return _pixman_implementation_blt (
+            imp->delegate,
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dest_x, dest_y, width, height);
+    }
+
+    return TRUE;
+}
+
+static pixman_bool_t
+mips_dspr2_fill (pixman_implementation_t *imp,
+                 uint32_t *               bits,
+                 int                      stride,
+                 int                      bpp,
+                 int                      x,
+                 int                      y,
+                 int                      width,
+                 int                      height,
+                 uint32_t xor)
+{
+    if (pixman_fill_mips (bits, stride, bpp, x, y, width, height, xor))
+        return TRUE;
+
+    return _pixman_implementation_fill (
+        imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+}
+
 pixman_implementation_t *
 _pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback)
 {
     pixman_implementation_t *imp =
         _pixman_implementation_create (fallback, mips_dspr2_fast_paths);
 
+    imp->blt = mips_dspr2_blt;
+    imp->fill = mips_dspr2_fill;
+
     return imp;
 }
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 449c42a..a40e7c8 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -41,6 +41,10 @@
 
 void
 pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes);
+void
+pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value);
 
 /****************************************************************/
 

commit 999e72b80bd5e3fab5f45b6ad19511389b58d9ab
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Thu Mar 1 02:24:54 2012 -0500

    pixman-access.c: Remove some unused macros
    
    The macros related to palette entries:
    
    RGB15_TO_ENTRY,
    RGB24_TO_ENTRY,
    RGB24_TO_ENTRY_Y
    
    are not used anywhere.

diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index 189b191..6743887 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -45,15 +45,6 @@
      (((s) >> 6) & 0x03e0) |                                            \
      (((s) >> 9) & 0x7c00))
 
-#define RGB15_TO_ENTRY(mif,rgb15)					\
-    ((mif)->ent[rgb15])
-
-#define RGB24_TO_ENTRY(mif,rgb24)					\
-    RGB15_TO_ENTRY (mif,CONVERT_RGB24_TO_RGB15 (rgb24))
-
-#define RGB24_TO_ENTRY_Y(mif,rgb24)					\
-    ((mif)->ent[CONVERT_RGB24_TO_Y15 (rgb24)])
-
 /* Fetch macros */
 
 #ifdef WORDS_BIGENDIAN

commit c0cb48aae0d09200a187965094138fbf488498cd
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Wed Feb 29 04:44:46 2012 -0500

    pixman-accessors.h: Delete unused macros
    
    The MEMCPY_WRAPPED and ACCESS macros are not used anymore.

diff --git a/pixman/pixman-accessor.h b/pixman/pixman-accessor.h
index 90c8ea7..8e0b036 100644
--- a/pixman/pixman-accessor.h
+++ b/pixman/pixman-accessor.h
@@ -1,21 +1,10 @@
 #ifdef PIXMAN_FB_ACCESSORS
 
-#define ACCESS(sym) sym##_accessors
-
 #define READ(img, ptr)							\
     (((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr))))
 #define WRITE(img, ptr,val)						\
     (((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr))))
 
-#define MEMCPY_WRAPPED(img, dst, src, size)				\
-    do {								\
-	size_t _i;							\
-	uint8_t *_dst = (uint8_t*)(dst), *_src = (uint8_t*)(src);	\
-	for(_i = 0; _i < size; _i++) {					\
-	    WRITE((img), _dst +_i, READ((img), _src + _i));		\
-	}								\
-    } while (0)
-
 #define MEMSET_WRAPPED(img, dst, val, size)				\
     do {								\
 	size_t _i;							\
@@ -27,12 +16,8 @@
 
 #else
 
-#define ACCESS(sym) sym
-
 #define READ(img, ptr)		(*(ptr))
 #define WRITE(img, ptr, val)	(*(ptr) = (val))
-#define MEMCPY_WRAPPED(img, dst, src, size)				\
-    memcpy(dst, src, size)
 #define MEMSET_WRAPPED(img, dst, val, size)				\
     memset(dst, val, size)
 

commit 5adf569317f923cd5eb547209a8d927be0d81049
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Sun Feb 26 17:35:20 2012 -0500

    Move fetching for solid bits images to pixman-noop.c
    
    This should be a bit faster because it can reuse the scanline on each iteration.

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 2f56de3..d105d2f 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -970,28 +970,6 @@ replicate_pixel_64 (bits_image_t *   bits,
 }
 
 static void
-bits_image_fetch_solid_32 (pixman_image_t * image,
-                           int              x,
-                           int              y,
-                           int              width,
-                           uint32_t *       buffer,
-                           const uint32_t * mask)
-{
-    replicate_pixel_32 (&image->bits, 0, 0, width, buffer);
-}
-
-static void
-bits_image_fetch_solid_64 (pixman_image_t * image,
-                           int              x,
-                           int              y,
-                           int              width,
-                           uint32_t *       b,
-                           const uint32_t * unused)
-{
-    replicate_pixel_64 (&image->bits, 0, 0, width, b);
-}
-
-static void
 bits_image_fetch_untransformed_repeat_none (bits_image_t *image,
                                             pixman_bool_t wide,
                                             int           x,
@@ -1131,12 +1109,6 @@ typedef struct
 
 static const fetcher_info_t fetcher_info[] =
 {
-    { PIXMAN_solid,
-      FAST_PATH_NO_ALPHA_MAP,
-      bits_image_fetch_solid_32,
-      bits_image_fetch_solid_64
-    },
-
     { PIXMAN_any,
       (FAST_PATH_NO_ALPHA_MAP			|
        FAST_PATH_ID_TRANSFORM			|
diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c
index f4012d8..d835de6 100644
--- a/pixman/pixman-noop.c
+++ b/pixman/pixman-noop.c
@@ -76,6 +76,33 @@ noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
     {
 	iter->get_scanline = _pixman_iter_get_scanline_noop;
     }
+    else if (image->common.extended_format_code == PIXMAN_solid		&&
+	     ((image->common.flags & (FAST_PATH_BITS_IMAGE | FAST_PATH_NO_ALPHA_MAP)) ==
+	      (FAST_PATH_BITS_IMAGE | FAST_PATH_NO_ALPHA_MAP)))
+    {
+	bits_image_t *bits = &image->bits;
+
+	if (iter->flags & ITER_NARROW)
+	{
+	    uint32_t color = bits->fetch_pixel_32 (bits, 0, 0);
+	    uint32_t *buffer = iter->buffer;
+	    uint32_t *end = buffer + iter->width;
+
+	    while (buffer < end)
+		*(buffer++) = color;
+	}
+	else
+	{
+	    uint64_t color = bits->fetch_pixel_64 (bits, 0, 0);
+	    uint64_t *buffer = (uint64_t *)iter->buffer;
+	    uint64_t *end = buffer + iter->width;
+
+	    while (buffer < end)
+		*(buffer++) = color;
+	}
+
+	iter->get_scanline = _pixman_iter_get_scanline_noop;
+    }
     else if (image->common.extended_format_code == PIXMAN_a8r8g8b8	&&
 	     (iter->flags & ITER_NARROW)				&&
 	     (image->common.flags & FLAGS) == FLAGS			&&

commit 3c3c70fa0b524569df0ec20c50d481626e518462
Author: Matt Turner <mattst88@gmail.com>
Date:   Fri Feb 24 20:11:11 2012 -0500

    lowlevel-blt-bench: add in_8_8 and in_n_8_8
    
    Signed-off-by: Matt Turner <mattst88@gmail.com>

diff --git a/test/lowlevel-blt-bench.c b/test/lowlevel-blt-bench.c
index 95513ba..8a39a46 100644
--- a/test/lowlevel-blt-bench.c
+++ b/test/lowlevel-blt-bench.c
@@ -582,6 +582,8 @@ tests_tbl[] =
     { "add_1555_1555",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
     { "add_0565_2x10",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
     { "add_2a10_2a10",         PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
+    { "in_n_8_8",              PIXMAN_a8r8g8b8,    1, PIXMAN_OP_IN,      PIXMAN_a8,       0, PIXMAN_a8 },
+    { "in_8_8",                PIXMAN_a8,          0, PIXMAN_OP_IN,      PIXMAN_null,     0, PIXMAN_a8 },
     { "src_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
     { "src_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
     { "src_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },

commit fcea053561893d116a79f41a113993f1f61b58cf
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Wed Jan 26 13:16:09 2011 -0500

    Disable implementations mentioned in the PIXMAN_DISABLE environment variable.
    
    With this, it becomes possible to do
    
         PIXMAN_DISABLE="sse2 mmx" some_app
    
    which will run some_app without SSE2 and MMX enabled. This is useful
    for benchmarking, testing and narrowing down bugs.
    
    The current list of implementations that can be disabled:
    
        fast
        mmx
        sse2
        arm-simd
        arm-iwmmxt
        arm-neon
        mips-dspr2
        vmx
    
    The general and noop implementations can't be disabled because pixman
    depends on those being available for correct operation.
    
    Reviewed-by: Matt Turner <mattst88@gmail.com>

diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index fcf591a..bb97ae3 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -24,6 +24,7 @@
 #endif
 
 #include <string.h>
+#include <stdlib.h>
 
 #if defined(USE_ARM_SIMD) && defined(_MSC_VER)
 /* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
@@ -328,7 +329,6 @@ pixman_arm_read_auxv_or_cpu_features ()
 
 #elif defined (__linux__) /* linux ELF */
 
-#include <stdlib.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -711,51 +711,84 @@ pixman_have_sse2 (void)
 #endif /* __amd64__ */
 #endif
 
+static pixman_bool_t
+disabled (const char *name)
+{
+    const char *env;
+
+    if ((env = getenv ("PIXMAN_DISABLE")))
+    {
+	do
+	{
+	    const char *end;
+	    int len;
+
+	    if ((end = strchr (env, ' ')))
+		len = end - env;
+	    else
+		len = strlen (env);
+
+	    if (strlen (name) == len && strncmp (name, env, len) == 0)
+	    {
+		printf ("pixman: Disabled %s implementation\n", name);
+		return TRUE;
+	    }
+
+	    env += len;
+	}
+	while (*env++);
+    }
+
+    return FALSE;
+}
+
 pixman_implementation_t *
 _pixman_choose_implementation (void)
 {
     pixman_implementation_t *imp;
 
     imp = _pixman_implementation_create_general();
-    imp = _pixman_implementation_create_fast_path (imp);
-    
+
+    if (!disabled ("fast"))
+	imp = _pixman_implementation_create_fast_path (imp);
+
 #ifdef USE_X86_MMX
-    if (pixman_have_mmx ())
+    if (!disabled ("mmx") && pixman_have_mmx ())
 	imp = _pixman_implementation_create_mmx (imp);
 #endif
 
 #ifdef USE_SSE2
-    if (pixman_have_sse2 ())
+    if (!disabled ("sse2") && pixman_have_sse2 ())
 	imp = _pixman_implementation_create_sse2 (imp);
 #endif
 
 #ifdef USE_ARM_SIMD
-    if (pixman_have_arm_simd ())
+    if (!disabled ("arm-simd") && pixman_have_arm_simd ())
 	imp = _pixman_implementation_create_arm_simd (imp);
 #endif
 
 #ifdef USE_ARM_IWMMXT
-    if (pixman_have_arm_iwmmxt ())
+    if (!disabled ("arm-iwmmxt") && pixman_have_arm_iwmmxt ())
 	imp = _pixman_implementation_create_mmx (imp);
 #endif
 
 #ifdef USE_ARM_NEON
-    if (pixman_have_arm_neon ())
+    if (!disabled ("arm-neon") && pixman_have_arm_neon ())
 	imp = _pixman_implementation_create_arm_neon (imp);
 #endif
 
 #ifdef USE_MIPS_DSPR2
-    if (pixman_have_mips_dspr2 ())
+    if (!disabled ("mips-dspr2") && pixman_have_mips_dspr2 ())
 	imp = _pixman_implementation_create_mips_dspr2 (imp);
 #endif
 
 #ifdef USE_VMX
-    if (pixman_have_vmx ())
+    if (!disabled ("vmx") && pixman_have_vmx ())
 	imp = _pixman_implementation_create_vmx (imp);
 #endif
 
     imp = _pixman_implementation_create_noop (imp);
-    
+
     return imp;
 }
 

commit e7574d336b7c812a888fac22f99f1b0e9a3518b0
Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
Date:   Wed Feb 22 14:23:48 2012 +0100

    MIPS: DSPr2: Added fast-paths for SRC operation.
    
    Following fast-path functions are implemented (routines 4, 5 and 6 utilize
    same fast-memcpy routine):
        1. src_x888_8888
        2. src_8888_0565
        3. src_0565_8888
        4. src_0565_0565
        5. src_8888_8888
        6. src_0888_0888
    
    Performance numbers before/after on MIPS-74kc @ 1GHz
    
    Referent (before):
    
    lowlevel-blt-bench:
            src_x888_8888 =  L1: 199.35  L2:  96.54  M: 18.87 (100.68%)  HT: 17.12  VT: 16.24  R: 15.43  RT:  9.33 (  61Kops/s)
            src_8888_0565 =  L1:  71.22  L2:  51.95  M: 24.19 ( 96.17%)  HT: 20.71  VT: 19.92  R: 18.15  RT:  9.92 (  63Kops/s)
            src_0565_8888 =  L1:  38.82  L2:  36.22  M: 18.60 ( 73.95%)  HT: 14.47  VT: 13.19  R: 12.97  RT:  6.61 (  49Kops/s)
            src_0565_0565 =  L1: 286.05  L2: 155.02  M: 37.68 (100.54%)  HT: 31.08  VT: 28.07  R: 26.26  RT: 11.93 (  68Kops/s)
            src_8888_8888 =  L1: 454.32  L2: 139.15  M: 19.30 (102.98%)  HT: 17.73  VT: 16.08  R: 16.62  RT: 10.45 (  64Kops/s)
            src_0888_0888 =  L1: 190.47  L2: 106.14  M: 25.26 (101.08%)  HT: 21.88  VT: 20.32  R: 18.83  RT: 10.10 (  63Kops/s)
    cairo-perf-trace:
    [ # ]  backend                         test   min(s) median(s) stddev. count
    [ # ]    image: pixman 0.25.1
    [  0]    image            firefox-asteroids  421.215  421.325   0.01%    4/6
    [  1]    image         firefox-planet-gnome  647.708  648.486   0.13%    6/6
    [  2]    image         gnome-system-monitor  276.073  277.506   0.38%    6/6
    [  3]    image           gnome-terminal-vim  263.866  265.229   0.39%    6/6
    [  4]    image                      poppler  123.576  124.003   0.15%    6/6
    
    Optimized (with these optimizations):
    
    lowlevel-blt-bench:
            src_x888_8888 =  L1: 369.50  L2:  99.37  M: 27.19 (145.07%)  HT: 20.24  VT: 19.48  R: 19.00  RT: 10.22 (  63Kops/s)
            src_8888_0565 =  L1: 105.65  L2:  67.87  M: 25.41 (101.00%)  HT: 20.78  VT: 19.84  R: 18.52  RT:  9.81 (  63Kops/s)
            src_0565_8888 =  L1:  77.10  L2:  63.04  M: 23.37 ( 92.90%)  HT: 20.29  VT: 19.37  R: 18.14  RT: 10.02 (  63Kops/s)
            src_0565_0565 =  L1: 519.02  L2: 241.32  M: 62.35 (166.34%)  HT: 33.74  VT: 27.63  R: 26.12  RT: 11.70 (  67Kops/s)
            src_8888_8888 =  L1: 390.48  L2: 113.99  M: 30.32 (161.77%)  HT: 19.55  VT: 17.05  R: 17.13  RT: 10.19 (  63Kops/s)
            src_0888_0888 =  L1: 349.74  L2: 156.68  M: 40.68 (162.78%)  HT: 25.58  VT: 20.57  R: 20.20  RT:  9.96 (  63Kops/s)
    cairo-perf-trace:
    [ # ]  backend                         test   min(s) median(s) stddev. count
    [ # ]    image: pixman 0.25.1
    [  0]    image            firefox-asteroids  400.050  400.308   0.04%    6/6
    [  1]    image         firefox-planet-gnome  628.978  629.364   0.07%    6/6
    [  2]    image         gnome-system-monitor  270.247  270.313   0.03%    6/6
    [  3]    image           gnome-terminal-vim  256.413  257.641   0.21%    6/6
    [  4]    image                      poppler  119.540  120.023   0.21%    6/6

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index a7fba33..fb7e047 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -107,7 +107,10 @@ if USE_MIPS_DSPR2
 noinst_LTLIBRARIES += libpixman-mips-dspr2.la
 libpixman_mips_dspr2_la_SOURCES = \
         pixman-mips-dspr2.c \
-        pixman-mips-dspr2.h
+        pixman-mips-dspr2.h \
+        pixman-mips-dspr2-asm.S \
+        pixman-mips-dspr2-asm.h \
+        pixman-mips-memcpy-asm.S
 libpixman_mips_dspr2_la_CFLAGS = $(DEP_CFLAGS)
 libpixman_mips_dspr2_la_LIBADD = $(DEP_LIBS)
 libpixman_1_la_LIBADD += libpixman-mips-dspr2.la
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
new file mode 100644
index 0000000..0a4c87e
--- /dev/null
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nemanja Lukic (nlukic@mips.com)
+ */
+
+#include "pixman-mips-dspr2-asm.h"
+
+LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+    beqz     a2, 3f
+     nop
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+    li       t4, 0xf800f800
+    li       t5, 0x07e007e0
+    li       t6, 0x001f001f
+1:
+    lw       t0, 0(a1)
+    lw       t1, 4(a1)
+    addiu    a1, a1, 8
+    addiu    a2, a2, -2
+
+    CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8
+
+    sh       t2, 0(a0)
+    sh       t3, 2(a0)
+
+    addiu    t2, a2, -1
+    bgtz     t2, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a2, 3f
+     nop
+    lw       t0, 0(a1)
+
+    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+    sh       t1, 0(a0)
+3:
+    j        ra
+     nop
+
+END(pixman_composite_src_8888_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (r5g6b5)
+ * a2 - w
+ */
+
+    beqz     a2, 3f
+     nop
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+    li       t4, 0x07e007e0
+    li       t5, 0x001F001F
+1:
+    lhu      t0, 0(a1)
+    lhu      t1, 2(a1)
+    addiu    a1, a1, 4
+    addiu    a2, a2, -2
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
+
+    sw       t2, 0(a0)
+    sw       t3, 4(a0)
+
+    addiu    t2, a2, -1
+    bgtz     t2, 1b
+     addiu   a0, a0, 8
+2:


Reply to: