[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

pixman: Changes to 'upstream-experimental'



 .gitignore                     |    2 
 Makefile.am                    |    4 
 configure.ac                   |  102 +
 demos/Makefile.am              |   13 
 demos/checkerboard.c           |   71 +
 demos/composite-test.c         |   55 -
 demos/gtk-utils.c              |   46 
 demos/parrot.c                 | 1079 ++++++++++++++++++++
 demos/parrot.jpg               |binary
 demos/quad2quad.c              | 2183 +++++++++++++++++++++++++++++++++++++++++
 pixman/Makefile.am             |   12 
 pixman/loongson-mmintrin.h     |  273 +++++
 pixman/pixman-bits-image.c     |   10 
 pixman/pixman-compiler.h       |    4 
 pixman/pixman-cpu.c            |   39 
 pixman/pixman-fast-path.c      |    6 
 pixman/pixman-mips-dspr2-asm.S |  443 ++++++++
 pixman/pixman-mips-dspr2-asm.h |  363 ++++++
 pixman/pixman-mips-dspr2.c     |   22 
 pixman/pixman-mips-dspr2.h     |   42 
 pixman/pixman-mmx.c            |  665 +++++++++---
 pixman/pixman-private.h        |    9 
 test/utils.c                   |   35 
 test/utils.h                   |    9 
 24 files changed, 5211 insertions(+), 276 deletions(-)

New commits:
commit 1e1a00e964a1d8ef43d6d75c1c3a0b5d518d1979
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Tue May 15 13:20:09 2012 -0400

    Pre-release version bump to 0.25.6
    
    Note that 0.25.4 was a botched release that doesn't have a tag and
    doesn't correspond to any commit ID. It was however uploaded and
    announced, so I'll just use the 0.25.6 version number.

diff --git a/configure.ac b/configure.ac
index d949839..502815e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 25)
-m4_define([pixman_micro], 3)
+m4_define([pixman_micro], 6)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit b2c16aaadfae64d2573abb537bfedd92c13b8d06
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Tue May 15 13:19:19 2012 -0400

    demos/Makefile.am: Add parrot.c to EXTRA_DIST
    
    To get 'make distcheck' to pass.

diff --git a/demos/Makefile.am b/demos/Makefile.am
index a664d93..8f734cf 100644
--- a/demos/Makefile.am
+++ b/demos/Makefile.am
@@ -22,6 +22,8 @@ DEMOS =				\
 	quad2quad		\
 	checkerboard
 
+EXTRA_DIST = parrot.c
+
 gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
 alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
 composite_test_SOURCES = composite-test.c $(GTK_UTILS)

commit 50d3088d7882e1054a35e917becb7752662da6f0
Author: Matt Turner <mattst88@gmail.com>
Date:   Fri May 11 21:59:13 2012 -0400

    configure.ac: Rename loongson -> loongson-mmi
    
    Make it match with the other fast paths, and the PIXMAN_DISABLE value is
    already loongson-mmi.

diff --git a/configure.ac b/configure.ac
index 57fd060..d949839 100644
--- a/configure.ac
+++ b/configure.ac
@@ -278,7 +278,7 @@ if test "x$LS_CFLAGS" = "x" ; then
 fi
 
 have_loongson_mmi=no
-AC_MSG_CHECKING(whether to use Loongson MMI)
+AC_MSG_CHECKING(whether to use Loongson MMI assembler)
 
 xserver_save_CFLAGS=$CFLAGS
 CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir"
@@ -301,12 +301,12 @@ int main () {
 }]])], have_loongson_mmi=yes)
 CFLAGS=$xserver_save_CFLAGS
 
-AC_ARG_ENABLE(loongson,
-   [AC_HELP_STRING([--disable-loongson],
-                   [disable Loongson fast paths])],
-   [enable_loongson=$enableval], [enable_loongson=auto])
+AC_ARG_ENABLE(loongson-mmi,
+   [AC_HELP_STRING([--disable-loongson-mmi],
+                   [disable Loongson MMI fast paths])],
+   [enable_loongson_mmi=$enableval], [enable_loongson_mmi=auto])
 
-if test $enable_loongson = no ; then
+if test $enable_loongson_mmi = no ; then
    have_loongson_mmi=disabled
 fi
 
@@ -317,7 +317,7 @@ else
 fi
 
 AC_MSG_RESULT($have_loongson_mmi)
-if test $enable_loongson = yes && test $have_loongson_mmi = no ; then
+if test $enable_loongson_mmi = yes && test $have_loongson_mmi = no ; then
    AC_MSG_ERROR([Loongson MMI not detected])
 fi
 

commit a0a40cb822bec52494c64e6750be50b734dc29df
Author: Matt Turner <mattst88@gmail.com>
Date:   Fri May 11 21:49:42 2012 -0400

    configure.ac: Fix loongson-mmi out-of-tree builds
    
    When building out-of-tree, gcc wasn't able to find loongson-mmintrin.h
    to compile the test program. Add -I$srcdir to CFLAGS to point gcc to it.

diff --git a/configure.ac b/configure.ac
index 345bc33..57fd060 100644
--- a/configure.ac
+++ b/configure.ac
@@ -281,7 +281,7 @@ have_loongson_mmi=no
 AC_MSG_CHECKING(whether to use Loongson MMI)
 
 xserver_save_CFLAGS=$CFLAGS
-CFLAGS=" $LS_CFLAGS $CFLAGS"
+CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir"
 AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 #ifndef __mips_loongson_vector_rev
 #error "Loongson Multimedia Instructions are only available on Loongson"

commit 618a08e6aa03b38e8dc71ac610f7fdd55e8a8558
Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
Date:   Thu May 3 00:03:42 2012 +0200

    MIPS: DSPr2: Added over_n_8_8888 and over_n_8_0565 fast paths.
    
    Performance numbers before/after on MIPS-74kc @ 1GHz
    
    Referent (before):
    
    lowlevel-blt-bench:
         over_n_8_8888 =  L1:  10.40  L2:   9.79  M:  8.47 ( 33.62%)  HT:  7.64  VT:  7.59  R:  7.48  RT:  5.30 (  40Kops/s)
         over_n_8_0565 =  L1:   7.40  L2:   7.23  M:  6.78 ( 17.94%)  HT:  6.23  VT:  6.17  R:  6.14  RT:  4.62 (  37Kops/s)
    
    Optimized:
    
    lowlevel-blt-bench:
         over_n_8_8888 =  L1:  27.25  L2:  26.24  M: 18.15 ( 72.12%)  HT: 14.52  VT: 14.31  R: 13.83  RT:  7.57 (  48Kops/s)
         over_n_8_0565 =  L1:  18.91  L2:  17.59  M: 15.06 ( 39.90%)  HT: 12.18  VT: 11.98  R: 11.83  RT:  6.80 (  46Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 6a0fc18..68ad33f 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -527,3 +527,227 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
      nop
 
 END(pixman_composite_over_n_8888_0565_ca_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4
+    beqz      a3, 4f
+     nop
+    li        t4, 0x00ff00ff
+    li        t5, 0xff
+    addiu     t0, a3, -1
+    beqz      t0, 3f         /* last pixel */
+     srl      t6, a1, 24     /* t6 = srca */
+    not       s4, a1
+    beq       t5, t6, 2f     /* if (srca == 0xff) */
+     srl      s4, s4, 24
+1:
+                             /* a1 = src */
+    lbu       t0, 0(a2)      /* t0 = mask */
+    lbu       t1, 1(a2)      /* t1 = mask */
+    or        t2, t0, t1
+    beqz      t2, 111f       /* if (t0 == 0) && (t1 == 0) */
+     addiu    a2, a2, 2
+    and       t3, t0, t1
+
+    lw        t2, 0(a0)      /* t2 = dst */
+    beq       t3, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
+     lw       t3, 4(a0)      /* t3 = dst */
+
+    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3
+    not       s2, s0
+    not       s3, s1
+    srl       s2, s2, 24
+    srl       s3, s3, 24
+    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9
+    addu_s.qb s2, t2, s0
+    addu_s.qb s3, t3, s1
+    sw        s2, 0(a0)
+    b         111f
+     sw       s3, 4(a0)
+11:
+    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9
+    addu_s.qb s2, t2, a1
+    addu_s.qb s3, t3, a1
+    sw        s2, 0(a0)
+    sw        s3, 4(a0)
+
+111:
+    addiu     a3, a3, -2
+    addiu     t0, a3, -1
+    bgtz      t0, 1b
+     addiu    a0, a0, 8
+    b         3f
+     nop
+2:
+                             /* a1 = src */
+    lbu       t0, 0(a2)      /* t0 = mask */
+    lbu       t1, 1(a2)      /* t1 = mask */
+    or        t2, t0, t1
+    beqz      t2, 222f       /* if (t0 == 0) && (t1 == 0) */
+     addiu    a2, a2, 2
+    and       t3, t0, t1
+    beq       t3, t5, 22f    /* if (t0 == 0xff) && (t1 == 0xff) */
+     nop
+    lw        t2, 0(a0)      /* t2 = dst */
+    lw        t3, 4(a0)      /* t3 = dst */
+
+    OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \
+                           t6, t7, t4, t8, t9, s0, s1, s2, s3
+    sw        t6, 0(a0)
+    b         222f
+     sw        t7, 4(a0)
+22:
+    sw        a1, 0(a0)
+    sw        a1, 4(a0)
+222:
+    addiu     a3, a3, -2
+    addiu     t0, a3, -1
+    bgtz      t0, 2b
+     addiu    a0, a0, 8
+3:
+    blez      a3, 4f
+     nop
+                             /* a1 = src */
+    lbu       t0, 0(a2)      /* t0 = mask */
+    beqz      t0, 4f         /* if (t0 == 0) */
+     addiu    a2, a2, 1
+    move      t3, a1
+    beq       t0, t5, 31f    /* if (t0 == 0xff) */
+     lw       t1, 0(a0)      /* t1 = dst */
+
+    MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
+31:
+    not       t2, t3
+    srl       t2, t2, 24
+    MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
+    addu_s.qb t2, t1, t3
+    sw        t2, 0(a0)
+4:
+    RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4
+    j         ra
+     nop
+
+END(pixman_composite_over_n_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
+/*
+ * a0 - dst  (r5g6b5)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+    beqz     a3, 4f
+     nop
+    li       t4, 0x00ff00ff
+    li       t5, 0xff
+    li       t6, 0xf800f800
+    li       t7, 0x07e007e0
+    li       t8, 0x001F001F
+    addiu    t1, a3, -1
+    beqz     t1, 3f         /* last pixel */
+     srl     t0, a1, 24     /* t0 = srca */
+    not      v0, a1
+    beq      t0, t5, 2f     /* if (srca == 0xff) */
+     srl     v0, v0, 24
+1:
+                            /* a1 = src */
+    lbu      t0, 0(a2)      /* t0 = mask */
+    lbu      t1, 1(a2)      /* t1 = mask */
+    or       t2, t0, t1
+    beqz     t2, 111f       /* if (t0 == 0) && (t1 == 0) */
+     addiu   a2, a2, 2
+    lhu      t2, 0(a0)      /* t2 = dst */
+    lhu      t3, 2(a0)      /* t3 = dst */
+    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4
+    and      t9, t0, t1
+    beq      t9, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
+     nop
+
+    MIPS_2xUN8x4_MUL_2xUN8   a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8
+    not      s4, s2
+    not      s5, s3
+    srl      s4, s4, 24
+    srl      s5, s5, 24
+    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8
+    addu_s.qb                s4, s2, s0
+    addu_s.qb                s5, s3, s1
+    CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
+    sh       t2, 0(a0)
+    b        111f
+     sh      t3, 2(a0)
+11:
+    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8
+    addu_s.qb                s4, a1, s0
+    addu_s.qb                s5, a1, s1
+    CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
+    sh       t2, 0(a0)
+    sh       t3, 2(a0)
+111:
+    addiu    a3, a3, -2
+    addiu    t0, a3, -1
+    bgtz     t0, 1b
+     addiu   a0, a0, 4
+    b        3f
+     nop
+2:
+    CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2
+21:
+                            /* a1 = src */
+    lbu      t0, 0(a2)      /* t0 = mask */
+    lbu      t1, 1(a2)      /* t1 = mask */
+    or       t2, t0, t1
+    beqz     t2, 222f       /* if (t0 == 0) && (t1 == 0) */
+     addiu   a2, a2, 2
+    and      t9, t0, t1
+    move     s2, s0
+    beq      t9, t5, 22f    /* if (t0 == 0xff) && (t2 == 0xff) */
+     move    s3, s0
+    lhu      t2, 0(a0)      /* t2 = dst */
+    lhu      t3, 2(a0)      /* t3 = dst */
+
+    CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7
+    OVER_2x8888_2x8_2x8888   a1, a1, t0, t1, s2, s3, \
+                             t2, t3, t4, t9, s4, s5, s6, s7, s8
+    CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5
+22:
+    sh       s2, 0(a0)
+    sh       s3, 2(a0)
+222:
+    addiu    a3, a3, -2
+    addiu    t0, a3, -1
+    bgtz     t0, 21b
+     addiu   a0, a0, 4
+3:
+    blez      a3, 4f
+     nop
+                            /* a1 = src */
+    lbu      t0, 0(a2)      /* t0 = mask */
+    beqz     t0, 4f         /* if (t0 == 0) */
+     nop
+    lhu      t1, 0(a0)      /* t1 = dst */
+    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
+    beq      t0, t5, 31f    /* if (t0 == 0xff) */
+     move    t3, a1
+
+    MIPS_UN8x4_MUL_UN8       a1, t0, t3, t4, t7, t8, t9
+31:
+    not      t6, t3
+    srl      t6, t6, 24
+    MIPS_UN8x4_MUL_UN8       t2, t6, t2, t4, t7, t8, t9
+    addu_s.qb                t1, t2, t3
+    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
+    sh       t2, 0(a0)
+4:
+    RESTORE_REGS_FROM_STACK  24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+    j        ra
+     nop
+
+END(pixman_composite_over_n_8_0565_asm_mips)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 12ff42c..8383060 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -499,4 +499,71 @@ LEAF_MIPS32R2(symbol)                                   \
     precr.qb.ph       \d2_8888,  \scratch5, \scratch6
 .endm
 
+/*
+ * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
+ * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ *   li       maskLSR, 0x00ff00ff
+ */
+.macro OVER_8888_8_8888 s_8888,   \
+                        m_8,      \
+                        d_8888,   \
+                        out_8888, \
+                        maskLSR,  \
+                        scratch1, scratch2, scratch3, scratch4
+    MIPS_UN8x4_MUL_UN8 \s_8888,   \m_8, \
+                       \scratch1, \maskLSR, \
+                       \scratch2, \scratch3, \scratch4
+
+    not                \scratch2, \scratch1
+    srl                \scratch2, \scratch2, 24
+
+    MIPS_UN8x4_MUL_UN8 \d_8888,   \scratch2, \
+                       \d_8888,   \maskLSR,  \
+                       \scratch3, \scratch4, \out_8888
+
+    addu_s.qb          \out_8888, \d_8888,   \scratch1
+.endm
+
+/*
+ * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
+ * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and
+ * m2_8). It also requires maskLSR needed for rounding process. maskLSR must
+ * have following value:
+ *   li       maskLSR, 0x00ff00ff
+ */
+.macro OVER_2x8888_2x8_2x8888 s1_8888,   \
+                              s2_8888,   \
+                              m1_8,      \
+                              m2_8,      \
+                              d1_8888,   \
+                              d2_8888,   \
+                              out1_8888, \
+                              out2_8888, \
+                              maskLSR,   \
+                              scratch1, scratch2, scratch3, \
+                              scratch4, scratch5, scratch6
+    MIPS_2xUN8x4_MUL_2xUN8 \s1_8888,   \s2_8888, \
+                           \m1_8,      \m2_8, \
+                           \scratch1,  \scratch2, \
+                           \maskLSR, \
+                           \scratch3,  \scratch4, \out1_8888, \
+                           \out2_8888, \scratch5, \scratch6
+
+    not                    \scratch3,  \scratch1
+    srl                    \scratch3,  \scratch3, 24
+    not                    \scratch4,  \scratch2
+    srl                    \scratch4,  \scratch4, 24
+
+    MIPS_2xUN8x4_MUL_2xUN8 \d1_8888,   \d2_8888, \
+                           \scratch3,  \scratch4, \
+                           \d1_8888,   \d2_8888, \
+                           \maskLSR, \
+                           \scratch5,  \scratch6, \out1_8888, \
+                           \out2_8888, \scratch3, \scratch4
+
+    addu_s.qb              \out1_8888, \d1_8888,  \scratch1
+    addu_s.qb              \out2_8888, \d2_8888,  \scratch2
+.endm
+
 #endif //PIXMAN_MIPS_DSPR2_ASM_H
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 018770a..7081734 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -53,6 +53,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
                                        uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
                                        uint32_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
+                                       uint8_t, 1, uint16_t, 1)
 
 static pixman_bool_t
 pixman_fill_mips (uint32_t *bits,
@@ -195,6 +199,12 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5,   mips_composite_over_n_8888_0565_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5,   mips_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, mips_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, mips_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, mips_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, mips_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
 
     { PIXMAN_OP_NONE },
 };

commit 7d4beedc612a32b73d7673bbf6447de0f3fca298
Author: Matt Turner <mattst88@gmail.com>
Date:   Wed May 9 19:20:55 2012 -0400

    mmx: add and use pack_4x565 function
    
    The pack_4x565 makes use of the pack_4xpacked565 function which uses pmadd.
    
    Some of the speed up is probably attributable to removing the artificial
    serialization imposed by the
    	vdest = pack_565 (..., vdest, 0);
    	vdest = pack_565 (..., vdest, 1);
    	...
    pattern.
    
    Loongson:
            over_n_0565 =  L1:  16.44  L2:  16.42  M: 13.83 (  9.85%)  HT: 12.83  VT: 12.61  R: 12.34  RT:  8.90 (  93Kops/s)
            over_n_0565 =  L1:  42.48  L2:  42.53  M: 29.83 ( 21.20%)  HT: 23.39  VT: 23.72  R: 21.80  RT: 11.60 ( 113Kops/s)
    
         over_8888_0565 =  L1:  15.61  L2:  15.42  M: 12.11 ( 25.79%)  HT: 11.07  VT: 10.70  R: 10.37  RT:  7.25 (  82Kops/s)
         over_8888_0565 =  L1:  35.01  L2:  35.20  M: 21.42 ( 45.57%)  HT: 18.12  VT: 17.61  R: 16.09  RT:  9.01 (  97Kops/s)
    
          over_n_8_0565 =  L1:  15.17  L2:  14.94  M: 12.57 ( 17.86%)  HT: 11.96  VT: 11.52  R: 10.79  RT:  7.31 (  79Kops/s)
          over_n_8_0565 =  L1:  29.83  L2:  29.79  M: 21.85 ( 30.94%)  HT: 18.82  VT: 18.25  R: 16.15  RT:  8.72 (  91Kops/s)
    
    over_n_8888_0565_ca =  L1:  15.25  L2:  15.02  M: 11.64 ( 41.39%)  HT: 11.08  VT: 10.72  R: 10.02  RT:  7.00 (  77Kops/s)
    over_n_8888_0565_ca =  L1:  30.12  L2:  29.99  M: 19.47 ( 68.99%)  HT: 17.05  VT: 16.55  R: 14.67  RT:  8.38 (  88Kops/s)
    
    ARM/iwMMXt:
            over_n_0565 =  L1:  19.29  L2:  19.88  M: 17.38 ( 10.54%)  HT: 15.53  VT: 16.11  R: 13.69  RT: 11.00 (  96Kops/s)
            over_n_0565 =  L1:  36.02  L2:  34.85  M: 28.04 ( 16.97%)  HT: 22.12  VT: 24.21  R: 22.36  RT: 12.22 ( 103Kops/s)
    
         over_8888_0565 =  L1:  18.38  L2:  16.59  M: 12.34 ( 22.29%)  HT: 11.67  VT: 11.71  R: 11.02  RT:  6.89 (  72Kops/s)
         over_8888_0565 =  L1:  24.96  L2:  22.17  M: 15.11 ( 26.81%)  HT: 14.14  VT: 13.71  R: 13.18  RT:  8.13 (  78Kops/s)
    
          over_n_8_0565 =  L1:  14.65  L2:  12.44  M: 11.56 ( 14.50%)  HT: 10.93  VT: 10.39  R: 10.06  RT:  7.05 (  70Kops/s)
          over_n_8_0565 =  L1:  18.37  L2:  14.98  M: 13.97 ( 16.51%)  HT: 12.67  VT: 10.35  R: 11.80  RT:  8.14 (  74Kops/s)
    
    over_n_8888_0565_ca =  L1:  14.27  L2:  12.93  M: 10.52 ( 33.23%)  HT:  9.70  VT:  9.90  R:  9.31  RT:  6.34 (  65Kops/s)
    over_n_8888_0565_ca =  L1:  19.69  L2:  17.58  M: 13.40 ( 42.35%)  HT: 11.75  VT: 11.33  R: 11.17  RT:  7.49 (  73Kops/s)

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index b14201a..01a2bc9 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -598,6 +598,12 @@ pack_4xpacked565 (__m64 a, __m64 b)
 #endif
 }
 
+static force_inline __m64
+pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3)
+{
+    return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3));
+}
+
 #ifndef _MSC_VER
 
 static force_inline __m64
@@ -1396,16 +1402,14 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
 
 	while (w >= 4)
 	{
-	    __m64 vdest;
+	    __m64 vdest = *(__m64 *)dst;
 
-	    vdest = *(__m64 *)dst;
-
-	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0);
-	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1);
-	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2);
-	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3);
+	    __m64 v0 = over (vsrc, vsrca, expand565 (vdest, 0));
+	    __m64 v1 = over (vsrc, vsrca, expand565 (vdest, 1));
+	    __m64 v2 = over (vsrc, vsrca, expand565 (vdest, 2));
+	    __m64 v3 = over (vsrc, vsrca, expand565 (vdest, 3));
 
-	    *(__m64 *)dst = vdest;
+	    *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
 
 	    dst += 4;
 	    w -= 4;
@@ -1818,22 +1822,19 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
 
 	while (w >= 4)
 	{
-	    __m64 vsrc0, vsrc1, vsrc2, vsrc3;
-	    __m64 vdest;
+	    __m64 vdest = *(__m64 *)dst;
 
-	    vsrc0 = load8888 ((src + 0));
-	    vsrc1 = load8888 ((src + 1));
-	    vsrc2 = load8888 ((src + 2));
-	    vsrc3 = load8888 ((src + 3));
+	    __m64 vsrc0 = load8888 ((src + 0));
+	    __m64 vsrc1 = load8888 ((src + 1));
+	    __m64 vsrc2 = load8888 ((src + 2));
+	    __m64 vsrc3 = load8888 ((src + 3));
 
-	    vdest = *(__m64 *)dst;
-
-	    vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0);
-	    vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1);
-	    vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2);
-	    vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3);
+	    __m64 v0 = over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0));
+	    __m64 v1 = over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1));
+	    __m64 v2 = over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2));
+	    __m64 v3 = over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3));
 
-	    *(__m64 *)dst = vdest;
+	    *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
 
 	    w -= 4;
 	    dst += 4;
@@ -2368,25 +2369,22 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	    }
 	    else if (m0 | m1 | m2 | m3)
 	    {
-		__m64 vdest;
-		__m64 vm0, vm1, vm2, vm3;
-
-		vdest = *(__m64 *)dst;
+		__m64 vdest = *(__m64 *)dst;
 
-		vm0 = to_m64 (m0);
-		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0),
-					   expand565 (vdest, 0)), vdest, 0);
-		vm1 = to_m64 (m1);
-		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1),
-					   expand565 (vdest, 1)), vdest, 1);
-		vm2 = to_m64 (m2);
-		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2),
-					   expand565 (vdest, 2)), vdest, 2);
-		vm3 = to_m64 (m3);
-		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3),
-					   expand565 (vdest, 3)), vdest, 3);
-
-		*(__m64 *)dst = vdest;
+		__m64 vm0 = to_m64 (m0);
+		__m64 v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0),
+					   expand565 (vdest, 0));
+		__m64 vm1 = to_m64 (m1);
+		__m64 v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1),
+					   expand565 (vdest, 1));
+		__m64 vm2 = to_m64 (m2);
+		__m64 v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2),
+					   expand565 (vdest, 2));
+		__m64 vm3 = to_m64 (m3);
+		__m64 v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3),
+					   expand565 (vdest, 3));
+
+		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);;
 	    }
 
 	    w -= 4;
@@ -2483,24 +2481,23 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 
 	    if ((a0 & a1 & a2 & a3) == 0xFF)
 	    {
-		__m64 vdest;
-		vdest = pack_565 (invert_colors (load8888 (&s0)), _mm_setzero_si64 (), 0);
-		vdest = pack_565 (invert_colors (load8888 (&s1)), vdest, 1);
-		vdest = pack_565 (invert_colors (load8888 (&s2)), vdest, 2);
-		vdest = pack_565 (invert_colors (load8888 (&s3)), vdest, 3);
+		__m64 v0 = invert_colors (load8888 (&s0));
+		__m64 v1 = invert_colors (load8888 (&s1));
+		__m64 v2 = invert_colors (load8888 (&s2));
+		__m64 v3 = invert_colors (load8888 (&s3));
 
-		*(__m64 *)dst = vdest;
+		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
 	    }
 	    else if (s0 | s1 | s2 | s3)
 	    {
 		__m64 vdest = *(__m64 *)dst;
 
-		vdest = pack_565 (over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0)), vdest, 0);
-		vdest = pack_565 (over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1)), vdest, 1);
-		vdest = pack_565 (over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2)), vdest, 2);
-		vdest = pack_565 (over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3)), vdest, 3);
+		__m64 v0 = over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0));
+		__m64 v1 = over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1));
+		__m64 v2 = over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2));
+		__m64 v3 = over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3));
 
-		*(__m64 *)dst = vdest;
+		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
 	    }
 
 	    w -= 4;
@@ -2675,12 +2672,12 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	    {
 		__m64 vdest = *(__m64 *)q;
 
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0)), vdest, 0);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1)), vdest, 1);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2)), vdest, 2);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3)), vdest, 3);
+		__m64 v0 = in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0));
+		__m64 v1 = in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1));
+		__m64 v2 = in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2));
+		__m64 v3 = in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3));
 
-		*(__m64 *)q = vdest;
+		*(__m64 *)q = pack_4x565 (v0, v1, v2, v3);
 	    }
 	    twidth -= 4;
 	    p += 4;

commit 2beabd9fed76de0023eb36b0c938b8803aa8d129
Author: Matt Turner <mattst88@gmail.com>
Date:   Thu May 10 16:15:34 2012 -0400

    configure.ac: make -march=loongson2f come before CFLAGS
    
    Otherwise we'd have -march=loongson2f being overridden by automake's
    CFLAGS ordering which causes build failures when -march=<not loongson2f>
    is specified by the user.

diff --git a/configure.ac b/configure.ac
index 5478734..345bc33 100644
--- a/configure.ac
+++ b/configure.ac
@@ -281,7 +281,7 @@ have_loongson_mmi=no
 AC_MSG_CHECKING(whether to use Loongson MMI)
 
 xserver_save_CFLAGS=$CFLAGS
-CFLAGS=" $CFLAGS $LS_CFLAGS"
+CFLAGS=" $LS_CFLAGS $CFLAGS"
 AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 #ifndef __mips_loongson_vector_rev
 #error "Loongson Multimedia Instructions are only available on Loongson"

commit dadb9a318b8ca10c65e31e7278f4335a6968d246
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Tue May 8 10:05:18 2012 -0400

    Add Makefile.win32 and Makefile.win32.common to EXTRA_DIST
    
    https://bugs.freedesktop.org/show_bug.cgi?id=46905

diff --git a/Makefile.am b/Makefile.am
index df8677a..88ff897 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -21,6 +21,10 @@ RELEASE_XORG_HOST =	$(USERNAME)@xorg.freedesktop.org
 RELEASE_XORG_DIR =	/srv/xorg.freedesktop.org/archive/individual/lib
 RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org, pixman@lists.freedesktop.org
 
+EXTRA_DIST =				\
+	Makefile.win32			\
+	Makefile.win32.common
+
 tar_gz = $(PACKAGE)-$(VERSION).tar.gz
 tar_bz2 = $(PACKAGE)-$(VERSION).tar.bz2
 

commit 3c57ec471e1aacc863747b82bbe0a84c6d776ab7
Author: Matt Turner <mattst88@gmail.com>
Date:   Wed May 9 22:50:50 2012 -0400

    .gitignore: add demos/checkerboard and demos/quad2quad

diff --git a/.gitignore b/.gitignore
index 60b5bb4..98612c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,11 +27,13 @@ config.h
 config.h.in
 .*.swp
 demos/alpha-test
+demos/checkerboard
 demos/clip-in
 demos/clip-test
 demos/composite-test
 demos/convolution-test
 demos/gradient-test
+demos/quad2quad
 demos/radial-test
 demos/screen-test
 demos/trap-test

commit 2d431b53d3cdbf1997e2d3b8e17408c12220c3a1
Author: Matt Turner <mattst88@gmail.com>
Date:   Fri Apr 27 14:12:56 2012 -0400

    mmx: Use wpackhus in src_x888_0565 on iwMMXt
    
    iwMMXt which has an unsigned saturation pack instruction, while MMX/EXT
    and Loongson don't.
    
    ARM/iwMMXt:
    src_8888_0565 =  L1: 110.38  L2:  82.33  M: 40.92 ( 73.22%)  HT: 35.63  VT: 32.22  R: 30.07  RT: 18.40 ( 132Kops/s)
    src_8888_0565 =  L1: 117.91  L2:  83.05  M: 41.52 ( 75.58%)  HT: 37.63  VT: 35.40  R: 29.37  RT: 19.39 ( 134Kops/s)

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 7fe19d5..b14201a 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -589,9 +589,13 @@ pack_4xpacked565 (__m64 a, __m64 b)
     t1 = _mm_or_si64 (t1, g1);
 
     t0 = shift(t0, -5);
+#ifdef USE_ARM_IWMMXT
+    t1 = shift(t1, -5);
+    return _mm_packs_pu32 (t0, t1);
+#else
     t1 = shift(t1, -5 + 16);
-
     return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0));
+#endif
 }
 
 #ifndef _MSC_VER

commit 2ddd1c498b723e8e48a38eef01d5befba30b5259
Author: Matt Turner <mattst88@gmail.com>
Date:   Thu Apr 19 17:33:27 2012 -0400

    mmx: add src_8888_0565
    
    Uses the pmadd technique described in
    http://software.intel.com/sites/landingpage/legacy/mmx/MMX_App_24-16_Bit_Conversion.pdf
    
    The technique uses the packssdw instruction which uses signed
    saturatation. This works in their example because they pack 888 to 555
    leaving the high bit as zero. For packing to 565, it is unsuitable, so
    we replace it with an or+shuffle.
    
    Loongson:
    src_8888_0565 =  L1: 106.13  L2:  83.57  M: 33.46 ( 68.90%)  HT: 30.29  VT: 27.67  R: 26.11  RT: 15.06 ( 135Kops/s)
    src_8888_0565 =  L1: 122.10  L2: 117.53  M: 37.97 ( 78.58%)  HT: 33.14  VT: 30.09  R: 29.01  RT: 15.76 ( 139Kops/s)
    
    ARM/iwMMXt:
    src_8888_0565 =  L1:  67.88  L2:  56.61  M: 31.20 ( 56.74%)  HT: 29.22  VT: 27.01  R: 25.39  RT: 19.29 ( 130Kops/s)
    src_8888_0565 =  L1: 110.38  L2:  82.33  M: 40.92 ( 73.22%)  HT: 35.63  VT: 32.22  R: 30.07  RT: 18.40 ( 132Kops/s)

diff --git a/pixman/loongson-mmintrin.h b/pixman/loongson-mmintrin.h
index 76ae892..8295ba0 100644
--- a/pixman/loongson-mmintrin.h
+++ b/pixman/loongson-mmintrin.h
@@ -84,6 +84,17 @@ _mm_empty (void)
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+	__m64 ret;
+	asm("pmaddhw %0, %1, %2\n\t"
+	   : "=f" (ret)
+	   : "f" (__m1), "f" (__m2)
+	);
+	return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mulhi_pu16 (__m64 __m1, __m64 __m2)
 {
 	__m64 ret;
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 320e20a..7fe19d5 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -179,9 +179,12 @@ typedef struct
     mmxdatafield mmx_4x0080;
     mmxdatafield mmx_565_rgb;
     mmxdatafield mmx_565_unpack_multiplier;
+    mmxdatafield mmx_565_pack_multiplier;
     mmxdatafield mmx_565_r;
     mmxdatafield mmx_565_g;
     mmxdatafield mmx_565_b;
+    mmxdatafield mmx_packed_565_rb;
+    mmxdatafield mmx_packed_565_g;
 #ifndef USE_LOONGSON_MMI
     mmxdatafield mmx_mask_0;
     mmxdatafield mmx_mask_1;
@@ -207,9 +210,12 @@ static const mmx_data_t c =
     MMXDATA_INIT (.mmx_4x0080,                   0x0080008000800080),
     MMXDATA_INIT (.mmx_565_rgb,                  0x000001f0003f001f),
     MMXDATA_INIT (.mmx_565_unpack_multiplier,    0x0000008404100840),
+    MMXDATA_INIT (.mmx_565_pack_multiplier,      0x2000000420000004),
     MMXDATA_INIT (.mmx_565_r,                    0x000000f800000000),
     MMXDATA_INIT (.mmx_565_g,                    0x0000000000fc0000),
     MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
+    MMXDATA_INIT (.mmx_packed_565_rb,            0x00f800f800f800f8),
+    MMXDATA_INIT (.mmx_packed_565_g,             0x0000fc000000fc00),
 #ifndef USE_LOONGSON_MMI
     MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
     MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
@@ -567,6 +573,27 @@ pack_565 (__m64 pixel, __m64 target, int pos)
 #endif
 }
 
+static force_inline __m64
+pack_4xpacked565 (__m64 a, __m64 b)
+{
+    __m64 rb0 = _mm_and_si64 (a, MC (packed_565_rb));
+    __m64 rb1 = _mm_and_si64 (b, MC (packed_565_rb));
+
+    __m64 t0 = _mm_madd_pi16 (rb0, MC (565_pack_multiplier));
+    __m64 t1 = _mm_madd_pi16 (rb1, MC (565_pack_multiplier));
+
+    __m64 g0 = _mm_and_si64 (a, MC (packed_565_g));
+    __m64 g1 = _mm_and_si64 (b, MC (packed_565_g));
+
+    t0 = _mm_or_si64 (t0, g0);
+    t1 = _mm_or_si64 (t1, g1);
+
+    t0 = shift(t0, -5);
+    t1 = shift(t1, -5 + 16);
+
+    return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0));
+}
+
 #ifndef _MSC_VER
 
 static force_inline __m64
@@ -2091,6 +2118,60 @@ pixman_fill_mmx (uint32_t *bits,
 }
 
 static void
+mmx_composite_src_x888_0565 (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    int32_t w;
+
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    s = *src++;
+	    *dst = CONVERT_8888_TO_0565 (s);
+	    dst++;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    __m64 vdest;
+	    __m64 vsrc0 = ldq_u ((__m64 *)(src + 0));
+	    __m64 vsrc1 = ldq_u ((__m64 *)(src + 2));
+
+	    vdest = pack_4xpacked565 (vsrc0, vsrc1);
+
+	    *(__m64 *)dst = vdest;
+
+	    w -= 4;
+	    src += 4;
+	    dst += 4;
+	}
+
+	while (w)
+	{
+	    s = *src++;
+	    *dst = CONVERT_8888_TO_0565 (s);
+	    dst++;
+	    w--;
+	}
+    }
+}
+
+static void
 mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
                             pixman_composite_info_t *info)
 {
@@ -3433,6 +3514,10 @@ static const pixman_fast_path_t mmx_fast_paths[] =
     PIXMAN_STD_FAST_PATH    (ADD,  a8,       null,     a8,       mmx_composite_add_8_8		   ),
     PIXMAN_STD_FAST_PATH    (ADD,  solid,    a8,       a8,       mmx_composite_add_n_8_8           ),
 
+    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     r5g6b5,   mmx_composite_src_x888_0565       ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     b5g6r5,   mmx_composite_src_x888_0565       ),
+    PIXMAN_STD_FAST_PATH    (SRC,  x8r8g8b8, null,     r5g6b5,   mmx_composite_src_x888_0565       ),
+    PIXMAN_STD_FAST_PATH    (SRC,  x8b8g8r8, null,     b5g6r5,   mmx_composite_src_x888_0565       ),
     PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8r8g8b8, mmx_composite_src_n_8_8888        ),
     PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8r8g8b8, mmx_composite_src_n_8_8888        ),
     PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8b8g8r8, mmx_composite_src_n_8_8888        ),

commit 3e8fe65a0893fcd82bdea205de49f53be32bb074
Author: Matt Turner <mattst88@gmail.com>
Date:   Wed Apr 18 16:24:28 2012 -0400

    mmx: add x8f8g8b8 fetcher
    
    Loongson:


Reply to: