pixman: Changes to 'debian-unstable'

To: debian-x@lists.debian.org
Subject: pixman: Changes to 'debian-unstable'
From: Cyril Brulebois <kibi@alioth.debian.org>
Date: Fri, 29 Apr 2011 16:07:23 +0000
Message-id: <[🔎] E1QFqDz-0006wS-S9@alioth.debian.org>
 ChangeLog                             |  579 ++++++++++++++++++++
 Makefile.am                           |    7 
 configure.ac                          |   51 +
 debian/changelog                      |   11 
 demos/tri-test.c                      |    2 
 pixman/Makefile.am                    |    1 
 pixman/Makefile.win32                 |    6 
 pixman/pixman-access.c                |   97 +++
 pixman/pixman-arm-common.h            |   90 +++
 pixman/pixman-arm-neon-asm-bilinear.S |  768 ++++++++++++++++++++++++++
 pixman/pixman-arm-neon-asm.S          |  982 +++++++++++++++++++++++++++++++++-
 pixman/pixman-arm-neon-asm.h          |   17 
 pixman/pixman-arm-neon.c              |   62 ++
 pixman/pixman-arm-simd-asm.S          |   66 +-
 pixman/pixman-arm-simd.c              |    9 
 pixman/pixman-bits-image.c            |   20 
 pixman/pixman-conical-gradient.c      |    7 
 pixman/pixman-fast-path.h             |  432 ++++++++++++++
 pixman/pixman-general.c               |   58 --
 pixman/pixman-image.c                 |    1 
 pixman/pixman-implementation.c        |   46 -
 pixman/pixman-linear-gradient.c       |   16 
 pixman/pixman-private.h               |   51 -
 pixman/pixman-radial-gradient.c       |    7 
 pixman/pixman-solid-fill.c            |   17 
 pixman/pixman-sse2.c                  |  139 ++++
 pixman/pixman-trap.c                  |   23 
 pixman/pixman.c                       |    6 
 pixman/pixman.h                       |    6 
 test/Makefile.am                      |    2 
 test/Makefile.win32                   |   73 ++
 test/affine-test.c                    |    6 
 test/blitters-test.c                  |   13 
 test/composite-traps-test.c           |    8 
 test/composite.c                      |   60 +-
 test/fetch-test.c                     |   63 +-
 test/scaling-helpers-test.c           |   93 +++
 test/scaling-test.c                   |    6 
 test/stress-test.c                    |   41 +
 test/trap-crasher.c                   |   20 
 test/utils.c                          |   19 
 test/utils.h                          |    5 
 42 files changed, 3679 insertions(+), 307 deletions(-)

New commits:
commit 2296b15c9d4d5002f354695992e12ac5d912677d
Author: Cyril Brulebois <kibi@debian.org>
Date:   Fri Apr 29 17:53:20 2011 +0200

    Upload to unstable.

diff --git a/debian/changelog b/debian/changelog
index a2680f6..b14d5e2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-pixman (0.21.8-1) UNRELEASED; urgency=low
+pixman (0.21.8-1) unstable; urgency=low
 
   * New upstream release.
   * As seen in the upstream announcement: “When this version of pixman is
@@ -7,7 +7,7 @@ pixman (0.21.8-1) UNRELEASED; urgency=low
   * This new release should fix the FTBFS on big endian machines, tests
     were failing due to missing swapping (Closes: #622211).
 
- -- Cyril Brulebois <kibi@debian.org>  Fri, 29 Apr 2011 17:52:08 +0200
+ -- Cyril Brulebois <kibi@debian.org>  Fri, 29 Apr 2011 17:53:12 +0200
 
 pixman (0.21.6-2) unstable; urgency=low
 

commit c48a9b803597eebd63b3a77f5cc65c7eb2f98fdf
Author: Cyril Brulebois <kibi@debian.org>
Date:   Fri Apr 29 17:53:09 2011 +0200

    Mention endianness-related FTBFS fix (Closes: #622211).

diff --git a/debian/changelog b/debian/changelog
index a5fdd88..a2680f6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,6 +4,8 @@ pixman (0.21.8-1) UNRELEASED; urgency=low
   * As seen in the upstream announcement: “When this version of pixman is
     used with the git version of the X server, trapezoid rendering will be
     corrupted. This is a known bug in the X server.”
+  * This new release should fix the FTBFS on big endian machines, tests
+    were failing due to missing swapping (Closes: #622211).
 
  -- Cyril Brulebois <kibi@debian.org>  Fri, 29 Apr 2011 17:52:08 +0200
 

commit fa956ebd6b28216e5144cfdc87f44660256e1b1a
Author: Cyril Brulebois <kibi@debian.org>
Date:   Fri Apr 29 17:52:36 2011 +0200

    Bump changelogs.

diff --git a/ChangeLog b/ChangeLog
index 17896a2..69d93cb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,582 @@
+commit 89868e93bd8d66f0fac0f0b42cf7718756992e4e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Apr 19 00:00:37 2011 -0400
+
+    Pre-release version bump to 0.21.8
+
+commit 33f1652b953467f3910605b3be723e21b3ebe078
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Apr 13 11:57:35 2011 +0900
+
+    ARM: Enable bilinear fast paths using scanline functions in pixman-arm-neon-asm-bilinear.S
+    
+    Enable fast paths which is supported by scanline functions in
+    pixman-arm-neon-asm-bilinear.S
+
+commit e8185f1cb43417d9f7b1d2856bb899f1b84fde81
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Apr 13 11:48:40 2011 +0900
+
+    ARM: NEON scanline functions for bilinear scaling
+    
+    General fetch->combine->store based bilinear scanline functions.
+    Need further optimizations and eventually will be replaced with optimal
+    functions one by one.
+    General functions should be located in pixman-arm-neon-asm-bilinear.S and
+    optimal functions in pixman-arm-neon-asm.S
+    
+    Following general bilinear scanline functions are implemented
+        over_8888_8888
+        add_8888_8888
+        src_8888_8_8888
+        src_8888_8_0565
+        src_0565_8_x888
+        src_0565_8_0565
+        over_8888_8_8888
+        add_8888_8_8888
+
+commit 00939d35628e733fab63606cfb1d7fcb667860d3
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Apr 13 11:43:44 2011 +0900
+
+    ARM: Common macro for scaled bilinear scanline function with A8 mask
+    
+    Defining PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST macro for declaration of
+    scaled bilinear scanline functions in common header.
+
+commit b455496890f7f941d561c284aca14783300bedd6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 11 07:52:57 2011 -0500
+
+    Offset rendering in pixman_composite_trapezoids() by (x_dst, y_dst)
+    
+    Previously, this function would do coordinate calculations in such a
+    way that (x_dst, y_dst) would only affect the alignment of the source
+    image, but not of the traps, which would always be considered to be in
+    absolute destination coordinates. This is unlike the
+    pixman_image_composite() function which also registers the mask to the
+    destination.
+    
+    This patch makes it so that traps are also offset by (x_dst, y_dst).
+    
+    Also add a comment explaining how this function is supposed to
+    operate, and update tri-test.c and composite-trap-test.c to deal with
+    the new semantics.
+
+commit e75e6a4ef5c5a8ac8b0e8464f08f83fd2b6e86ed
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 2 23:24:48 2011 -0400
+
+    ARM: Add 'neon_composite_over_n_8888_0565_ca' fast path
+    
+    This improves the performance of the firefox-talos-gfx benchmark with
+    the image16 backend. Benchmark on an 800 MHz ARM Cortex A8:
+    
+    Before:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]  image16            firefox-talos-gfx  121.773  122.218   0.15%    6/6
+    
+    After:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]  image16            firefox-talos-gfx   85.247   85.563   0.22%    6/6
+    
+    V2: Slightly better instruction scheduling based on comments from Taekyun Kim.
+    V3: Eliminate all stalls from the inner loop. Also based on comments from Taekyun Kim.
+
+commit 1670b952143284f480c39ff087b5694a64eb7db3
+Author: Gilles Espinasse <g.esp@free.fr>
+Date:   Tue Apr 12 22:44:56 2011 +0200
+
+    Fix OpenMP not supported case
+    
+    PIXMAN_LINK_WITH_ENV did not fail unless -Wall -Werror is used.
+    So even when the compiler did not support OpenMP, USE_OPENMP was defined.
+    Fix that by running the second OpenMP test only when first AC_OPENMP find supported
+    
+    configure tested in the cases :
+    gcc without libgomp support, no openmp option, --enable-openmp and --disable-openmp
+    gcc with libgomp support, no openmp option, --enable-openmp and --disable-openmp
+    
+    Not tested with autoconf version not knowing openmp (<2.62)
+    
+    Warn when --enable-openmp is requested but no support is found
+    
+    Signed-off-by: Gilles Espinasse <g.esp@free.fr>
+
+commit b9e8f7fb7494e4ee4be56d1555632233a494b28e
+Author: Gilles Espinasse <g.esp@free.fr>
+Date:   Tue Apr 12 22:44:25 2011 +0200
+
+    Fix missing AC_MSG_RESULT value from Werror test
+    
+    Use the correct variable name
+    
+    Signed-off-by: Gilles Espinasse <g.esp@free.fr>
+
+commit caae4e82ffdeebfb9aa98a6c49dd563e065c0959
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 21 20:25:27 2011 +0200
+
+    ARM: pipelined NEON implementation of bilinear scaled 'src_8888_0565'
+    
+    Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=10020565, speed=33.59 MPix/s
+      after:  op=1, src=20028888, dst=10020565, speed=46.25 MPix/s
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=10020565, speed=63.86 MPix/s
+      after:  op=1, src=20028888, dst=10020565, speed=84.22 MPix/s
+
+commit d080d59b802c351daed84b92bd4eb20c775b81c7
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 16 17:24:49 2011 +0200
+
+    ARM: pipelined NEON implementation of bilinear scaled 'src_8888_8888'
+    
+    Performance of the inner loop when working with the data in L1 cache:
+        ARM Cortex-A8: 41 cycles per 4 pixels (no stalls and partial dual issue)
+        ARM Cortex-A9: 48 cycles per 4 pixels (no stalls)
+    
+    It might be still possible to improve performance even more on ARM Cortex-A8
+    with a better use of dual issue.
+    
+    Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=40.38 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=48.47 MPix/s
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=79.68 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=93.11 MPix/s
+
+commit b496a8b279baebb8b9ab4fbcb2101583be08fe3b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Mar 17 19:42:01 2011 +0200
+
+    ARM: support different levels of loop unrolling in bilinear scaler
+    
+    Now an extra 'flag' parameter is supported in bilinear scaline scaling
+    function generation macro. It can be used to enable 4 or 8 pixels per
+    loop iteration unrolling and provide save/restore code for d8-d15
+    registers.
+
+commit 34ca9cf03fa897cd377cdb19acc22e876b2f4b0e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 21 18:41:53 2011 +0200
+
+    ARM: use less ARM instructions in NEON bilinear scaling code
+    
+    This reduces code size and also puts less pressure on the
+    instruction decoder.
+
+commit 0f7be9f72ef6bfe2555b7f2cc29297c4f4762740
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 16 16:33:41 2011 +0200
+
+    ARM: support for software pipelining in bilinear macros
+    
+    Now it's possible to override the main loop of bilinear scaling code
+    with optimized pipelined implementation.
+
+commit 9638af95832563040d6bd861cf4c20ab632058df
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Mar 10 16:12:23 2011 +0200
+
+    ARM: use aligned memory writes in NEON bilinear scaling code
+
+commit 8bba3a0e1e54f03ea78fb44314f3bfa57ec8da31
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Mar 10 15:34:10 2011 +0200
+
+    ARM: tweaked horizontal weights update in NEON bilinear scaling code
+    
+    Moving horizontal interpolation weights update instructions from the
+    beginning of loop to its end allows to hide some pipeline stalls and
+    improve performance.
+
+commit a2153222677327be43251012f462d19a7e98ce14
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Apr 3 20:32:30 2011 -0400
+
+    ARM: Tiny improvement in over_n_8888_8888_ca_process_pixblock_head
+    
+    Instead of two
+    
+    	mvn d24, d24
+    	mvn d25, d25
+    
+    use just one
+    
+    	mvn q12, q12
+    
+    Also move another vmvn instruction into the created pipeline bubble,
+    as pointed out by Siarhei.
+
+commit 44f99735d9c6a897078db12172d9d2d07b204f37
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 2 14:12:12 2011 -0400
+
+    Makefile.am: Put development releases in "snapshots" directory
+    
+    Up until now, all pixman release, both snapshots and releases were
+    uploaded to the "releases" directory on www.cairographics.org, but
+    it's better to development snapshots in the "snapshots" directory.
+    
+    This patch changes Makefile.am to do that.
+
+commit ad3cbfb073fc325e1b3152898ca71b8255675957
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Mar 22 13:42:05 2011 -0400
+
+    test: Fix infinite loop in composite
+    
+    When run in PIXMAN_RANDOMIZE_TESTS mode, this test would go into an
+    infinite loop because the loop started at 'seed' but the stop
+    condition was still N_TESTS.
+
+commit b514e63cfc58af21f7097db5a1b04292a758782a
+Author: Alexandros Frantzis <alexandros.frantzis@linaro.org>
+Date:   Fri Mar 18 14:37:27 2011 +0200
+
+    Add support for the r8g8b8a8 and r8g8b8x8 formats to the tests.
+
+commit f05a90e5f8d1d0af60e2c684cbe9f1327c33135a
+Author: Alexandros Frantzis <alexandros.frantzis@linaro.org>
+Date:   Fri Mar 18 14:36:15 2011 +0200
+
+    Add simple support for the r8g8b8a8 and r8g8b8x8 formats.
+    
+    This format is particularly useful on big-endian architectures, where RGBA in
+    memory/file order corresponds to r8g8b8a8 as an uint32_t. This is important
+    because RGBA is in some cases the only available choice (for example as a pixel
+    format in OpenGL ES 2.0).
+
+commit 7eb0abb5e819046537b9f809c7ec332c6679c557
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Mar 14 14:56:22 2011 -0400
+
+    test: Randomize some tests if PIXMAN_RANDOMIZE_TESTS is set
+    
+    This patch makes so that composite and stress-test will start from a
+    random seed if the PIXMAN_RANDOMIZE_TESTS environment variable is
+    set. Running the test suite in this mode is useful to get more test
+    coverage.
+    
+    Also, in stress-test.c make it so that setting the initial seed causes
+    threads to be turned off. This makes it much easier to see when
+    something fails.
+
+commit 6b27768d81c254a4f1d05473157328d5a5d99b9c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Mar 12 19:42:58 2011 -0500
+
+    Simplify the prototype for iterator initializers.
+    
+    All of the information previously passed to the iterator initializers
+    is now available in the iterator itself, so there is no need to pass
+    it as arguments anymore.
+
+commit 74d0f44b6d6d613d24541b849835da0464cc6fd0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Mar 12 19:12:35 2011 -0500
+
+    Fill out parts of iters in _pixman_implementation_{src,dest}_iter_init()
+    
+    This makes _pixman_implementation_{src,dest}_iter_init() responsible
+    for filling parts of the information in the iterators. Specifically,
+    the information passed as arguments is stored in the iterator.
+    
+    Also add a height field to pixman_iter_t().
+
+commit be4eaa0e4f79af38b7b89c5b09ca88d3a88d9396
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Mar 12 19:06:02 2011 -0500
+
+    In delegate_{src,dest}_iter_init() call delegate directly.
+    
+    There is no reason to go through
+    _pixman_implementation_{src,dest}_iter_init(), especially since
+    _pixman_implementation_src_iter_init() is doing various other checks
+    that only need to be done once.
+    
+    Also call delegate->src_iter_init() directly in pixman-sse2.c
+
+commit 70a923882ca24664344ba91a649e7aa12c3063f7
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 13:55:48 2011 +0200
+
+    ARM: a bit faster NEON bilinear scaling for r5g6b5 source images
+    
+    Instructions scheduling improved in the code responsible for fetching r5g6b5
+    pixels and converting them to the intermediate x8r8g8b8 color format used in
+    the interpolation part of code. Still a lot of NEON stalls are remaining,
+    which can be resolved later by the use of pipelining.
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+              op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+      after:  op=1, src=10020565, dst=10020565, speed=41.35 MPix/s
+              op=1, src=10020565, dst=20020888, speed=49.16 MPix/s
+
+commit fe99673719091d4a880d031add1369332a75731b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 13:27:41 2011 +0200
+
+    ARM: NEON optimization for bilinear scaled 'src_0565_0565'
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s
+      after:  op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+
+commit 29003c3befe2159396d181ef9ac1caaadcabf382
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 13:21:53 2011 +0200
+
+    ARM: NEON optimization for bilinear scaled 'src_0565_x888'
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=20020888, speed=3.39 MPix/s
+      after:  op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+
+commit 2ee27e7d79637da9173ee1bf3423e5a81534ccb4
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 11:53:04 2011 +0200
+
+    ARM: NEON optimization for bilinear scaled 'src_8888_0565'
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=10020565, speed=6.56 MPix/s
+      after:  op=1, src=20028888, dst=10020565, speed=61.65 MPix/s
+
+commit 11a0c5badbc59ce967707ef836313cc98f8aec4e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 11:46:48 2011 +0200
+
+    ARM: use common macro template for bilinear scaled 'src_8888_8888'
+    
+    This is a cleanup for old and now duplicated code. The performance improvement
+    is mostly coming from the enabled use of software prefetch, but instructions
+    scheduling is also slightly better.
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=53.24 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=74.36 MPix/s
+
+commit 34098dba6763afd3636a14f9c2a079ab08f23b2d
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 11:34:15 2011 +0200
+
+    ARM: NEON: common macro template for bilinear scanline scalers
+    
+    This allows to generate bilinear scanline scaling functions targeting
+    various source and destination color formats. Right now a8r8g8b8/x8r8g8b8
+    and r5g6b5 color formats are supported. More formats can be added if needed.
+
+commit 66f4ee1b3bccf4516433d61dbf2035551a712fa2
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 10:59:46 2011 +0200
+
+    ARM: new bilinear fast path template macro in 'pixman-arm-common.h'
+    
+    It can be reused in different ARM NEON bilinear scaling fast path functions.
+
+commit 5921c17639fe5fdc595c850e3347281c1c8746ba
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun Mar 6 22:16:32 2011 +0200
+
+    ARM: assembly optimized nearest scaled 'src_8888_8888'
+    
+    Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=44.36 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=39.79 MPix/s
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=102.36 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=163.12 MPix/s
+
+commit f3e17872f5522e25da8e32de83e62bee8cc198d7
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 7 03:10:43 2011 +0200
+
+    ARM: common macro for nearest scaling fast paths
+    
+    The code of nearest scaled 'src_0565_0565' function was generalized
+    and moved to a common macro, so that it can be reused for other
+    fast paths.
+
+commit bb3d1b67fd0f42ae00af811c624ea1c44541034d
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun Mar 6 16:17:12 2011 +0200
+
+    ARM: use prefetch in nearest scaled 'src_0565_0565'
+    
+    Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s
+      after:  op=1, src=10020565, dst=10020565, speed=73.63 MPix/s
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s
+      after:  op=1, src=10020565, dst=10020565, speed=267.50 MPix/s
+
+commit 84e361c8e357e26f299213fbeefe64c73447b116
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 4 15:51:18 2011 -0500
+
+    test: Do endian swapping of the source and destination images.
+    
+    Otherwise the test fails on big endian. Fix for bug 34767, reported by
+    Siarhei Siamashka.
+
+commit 84f3c5a71a2de1a96dcf0c7f9ab0a8ee1b1b158f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Mar 7 13:45:54 2011 -0500
+
+    test: In image_endian_swap() use pixman_image_get_format() to get the bpp.
+    
+    There is no reason to pass in the bpp as an argument; it can be gotten
+    directly from the image.
+
+commit 17feaa9c50bb8521b0366345efe181bd99754957
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Feb 22 18:45:03 2011 +0200
+
+    ARM: NEON optimization for bilinear scaled 'src_8888_8888'
+    
+    Initial NEON optimization for bilinear scaling. Can be probably
+    improved more.
+    
+    Benchmark on ARM Cortex-A8:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=44.27 MPix/s
+
+commit 350029396d911941591149cc82b5e68a78ad6747
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Feb 21 20:18:02 2011 +0200
+
+    SSE2 optimization for bilinear scaled 'src_8888_8888'
+    
+    A primitive naive implementation of bilinear scaling using SSE2 intrinsics,
+    which only handles one pixel at a time. It is approximately 2x faster than
+    pixman general compositing path. Single pass processing without intermediate
+    temporary buffer contributes to ~15% and loop unrolling contributes to ~20%
+    of this speedup.
+    
+    Benchmark on Intel Core i7 (x86-64):
+     Using cairo-perf-trace:
+      before: image        firefox-planet-gnome   12.566   12.610   0.23%    6/6
+      after:  image        firefox-planet-gnome   10.961   11.013   0.19%    5/6
+    
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=165.38 MPix/s
+
+commit 0df43b8ae5031dd83775d00b57b6bed809db0e89
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Feb 21 02:07:09 2011 +0200
+
+    test: check correctness of 'bilinear_pad_repeat_get_scanline_bounds'
+    
+    Individual correctness check for the new bilinear scaling related
+    supplementary function. This test program uses a bit wider range
+    of input arguments, not covered by other tests.
+
+commit d506bf68fd0e9a1c5dd484daee70631699918387
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Feb 21 01:29:02 2011 +0200
+
+    Main loop template for fast single pass bilinear scaling
+    
+    Can be used for implementing SIMD optimized fast path
+    functions which work with bilinear scaled source images.
+    
+    Similar to the template for nearest scaling main loop, the
+    following types of mask are supported:
+    1. no mask
+    2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
+    3. solid mask
+    
+    PAD repeat is fully supported. NONE repeat is partially
+    supported (right now only works if source image has alpha
+    channel or when alpha channel of the source image does not
+    have any effect on the compositing operation).
+
+commit 9ebde285fa990bfa1524f166fbfb1368c346b14a
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Feb 24 12:53:39 2011 +0100
+
+    test: Silence MSVC warnings
+    
+    MSVC does not notice non-returning functions (abort() / assert(0))
+    and warns about paths which end with them in non-void functions:
+    
+    c:\cygwin\home\ranma42\code\fdo\pixman\test\fetch-test.c(114) :
+    warning C4715: 'reader' : not all control paths return a value
+    c:\cygwin\home\ranma42\code\fdo\pixman\test\stress-test.c(133) :
+    warning C4715: 'real_reader' : not all control paths return a value
+    c:\cygwin\home\ranma42\code\fdo\pixman\test\composite.c(431) :
+    warning C4715: 'calc_op' : not all control paths return a value
+    
+    These warnings can be silenced by adding a return after the
+    termination call.
+
+commit 8868778ea1fdc8e70da76b3b00ea78106c5840d8
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Feb 22 22:43:48 2011 +0100
+
+    Do not include unused headers
+    
+    pixman-combine32.h is included without being used both in
+    pixman-image.c and in pixman-general.c.
+
+commit 72f5e5f608506c18c484bc5bc3e58bd83aeb7691
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Feb 22 22:04:49 2011 +0100
+
+    test: Add Makefile for Win32
+
+commit 11305b4ecdd36a17592c5c75de9157874853ab20
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Feb 22 21:46:37 2011 +0100
+
+    test: Fix tests for compilation on Windows
+    
+    The Microsoft C compiler cannot handle subobject initialization and
+    Win32 does not provide snprintf.
+    
+    Work around these limitations by using normal struct initialization
+    and using sprintf (a manual check shows that the buffer size is
+    sufficient).
+
+commit 20ed723a5a42fb8636bc9a5f32974dec1b66a785
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Feb 24 10:44:04 2011 +0100
+
+    Fix compilation on Win32
+    
+    Makefile.win32 contained a typo and was missing the dependency from
+    the built sources.
+
+commit 48e951000c7ff14f40c671f3efb6abb18162c840
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 22 16:13:32 2011 -0500
+
+    Post-release version bump to 0.21.7
+
 commit 8b3332166094db657e96c365a524b2cd7513359b
 Author: Søren Sandmann Pedersen <ssp@redhat.com>
 Date:   Tue Feb 22 15:43:41 2011 -0500
diff --git a/debian/changelog b/debian/changelog
index e26a43b..a5fdd88 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,12 @@
+pixman (0.21.8-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+  * As seen in the upstream announcement: “When this version of pixman is
+    used with the git version of the X server, trapezoid rendering will be
+    corrupted. This is a known bug in the X server.”
+
+ -- Cyril Brulebois <kibi@debian.org>  Fri, 29 Apr 2011 17:52:08 +0200
+
 pixman (0.21.6-2) unstable; urgency=low
 
   * Upload to unstable.

commit 89868e93bd8d66f0fac0f0b42cf7718756992e4e
Author: Søren Sandmann Pedersen <ssp@redhat.com>
Date:   Tue Apr 19 00:00:37 2011 -0400

    Pre-release version bump to 0.21.8

diff --git a/configure.ac b/configure.ac
index 09a4948..0d51bd0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 21)
-m4_define([pixman_micro], 7)
+m4_define([pixman_micro], 8)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit 33f1652b953467f3910605b3be723e21b3ebe078
Author: Taekyun Kim <tkq.kim@samsung.com>
Date:   Wed Apr 13 11:57:35 2011 +0900

    ARM: Enable bilinear fast paths using scanline functions in pixman-arm-neon-asm-bilinear.S
    
    Enable fast paths which is supported by scanline functions in
    pixman-arm-neon-asm-bilinear.S

diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 77875ad..e5127a6 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -137,6 +137,23 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
                                          uint16_t, uint32_t)
 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
                                          uint16_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
+                                         uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
+                                         uint32_t, uint32_t)
+
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC,
+                                            uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC,
+                                            uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC,
+                                            uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC,
+                                            uint16_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER,
+                                            uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD,
+                                            uint32_t, uint32_t)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
@@ -366,6 +383,28 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
     SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
 
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
     { PIXMAN_OP_NONE },
 };
 

commit e8185f1cb43417d9f7b1d2856bb899f1b84fde81
Author: Taekyun Kim <tkq.kim@samsung.com>
Date:   Wed Apr 13 11:48:40 2011 +0900

    ARM: NEON scanline functions for bilinear scaling
    
    General fetch->combine->store based bilinear scanline functions.
    Need further optimizations and eventually will be replaced with optimal
    functions one by one.
    General functions should be located in pixman-arm-neon-asm-bilinear.S and
    optimal functions in pixman-arm-neon-asm.S
    
    Following general bilinear scanline functions are implemented
        over_8888_8888
        add_8888_8888
        src_8888_8_8888
        src_8888_8_0565
        src_0565_8_x888
        src_0565_8_0565
        over_8888_8_8888
        add_8888_8_8888

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index d016e9f..be08266 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -115,6 +115,7 @@ libpixman_arm_neon_la_SOURCES = \
         pixman-arm-neon.c	\
         pixman-arm-common.h	\
         pixman-arm-neon-asm.S	\
+		pixman-arm-neon-asm-bilinear.S \
         pixman-arm-neon-asm.h
 libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
 libpixman_arm_neon_la_LIBADD = $(DEP_LIBS)
diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S
new file mode 100644
index 0000000..9a4a1ff
--- /dev/null
+++ b/pixman/pixman-arm-neon-asm-bilinear.S
@@ -0,0 +1,768 @@
+/*
+ * Copyright © 2011 SCore Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ * Author:  Taekyun Kim (tkq.kim@samsung.com)
+ */
+
+/*
+ * This file contains scaled bilinear scanline functions implemented
+ * using older siarhei's bilinear macro template.
+ *
+ * << General scanline function procedures >>
+ *  1. bilinear interpolate source pixels
+ *  2. load mask pixels
+ *  3. load destination pixels
+ *  4. duplicate mask to fill whole register
+ *  5. interleave source & destination pixels
+ *  6. apply mask to source pixels
+ *  7. combine source & destination pixels
+ *  8, Deinterleave final result
+ *  9. store destination pixels
+ *
+ * All registers with single number (i.e. src0, tmp0) are 64-bits registers.
+ * Registers with double numbers(src01, dst01) are 128-bits registers.
+ * All temp registers can be used freely outside the code block.
+ * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks.
+ *
+ * TODOs
+ *  Support 0565 pixel format
+ *  Optimization for two and last pixel cases
+ *
+ * Remarks
+ *  There can be lots of pipeline stalls inside code block and between code blocks.
+ *  Further optimizations will be done by new macro templates using head/tail_head/tail scheme.
+ */
+
+/* Prevent the stack from becoming executable for no reason... */
+#if defined(__linux__) && defined (__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+.text
+.fpu neon
+.arch armv7a
+.object_arch armv4
+.eabi_attribute 10, 0
+.eabi_attribute 12, 0
+.arm
+.altmacro
+
+#include "pixman-arm-neon-asm.h"
+
+/*
+ * Bilinear macros from pixman-arm-neon-asm.S
+ */
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+    .func fname
+    .global fname
+#ifdef __ELF__
+    .hidden fname
+    .type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * Bilinear scaling support code which tries to provide pixel fetching, color
+ * format conversion, and interpolation as separate macros which can be used
+ * as the basic building blocks for constructing bilinear scanline functions.
+ */
+
+.macro bilinear_load_8888 reg1, reg2, tmp
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #2
+    add       TMP2, BOTTOM, TMP2, asl #2
+    vld1.32   {reg1}, [TMP1]
+    vld1.32   {reg2}, [TMP2]
+.endm
+
+.macro bilinear_load_0565 reg1, reg2, tmp
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #1
+    add       TMP2, BOTTOM, TMP2, asl #1
+    vld1.32   {reg2[0]}, [TMP1]
+    vld1.32   {reg2[1]}, [TMP2]
+    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_two_8888 \
+                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
+
+    bilinear_load_8888 reg1, reg2, tmp1
+    vmull.u8  acc1, reg1, d28
+    vmlal.u8  acc1, reg2, d29
+    bilinear_load_8888 reg3, reg4, tmp2
+    vmull.u8  acc2, reg3, d28
+    vmlal.u8  acc2, reg4, d29
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_four_8888 \
+                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+
+    bilinear_load_and_vertical_interpolate_two_8888 \
+                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
+    bilinear_load_and_vertical_interpolate_two_8888 \
+                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_two_0565 \
+                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
+
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    mov       TMP4, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #1
+    add       TMP2, BOTTOM, TMP2, asl #1
+    add       TMP3, TOP, TMP4, asl #1
+    add       TMP4, BOTTOM, TMP4, asl #1
+    vld1.32   {acc2lo[0]}, [TMP1]
+    vld1.32   {acc2hi[0]}, [TMP3]
+    vld1.32   {acc2lo[1]}, [TMP2]
+    vld1.32   {acc2hi[1]}, [TMP4]
+    convert_0565_to_x888 acc2, reg3, reg2, reg1
+    vzip.u8   reg1, reg3
+    vzip.u8   reg2, reg4
+    vzip.u8   reg3, reg4
+    vzip.u8   reg1, reg2
+    vmull.u8  acc1, reg1, d28
+    vmlal.u8  acc1, reg2, d29
+    vmull.u8  acc2, reg3, d28
+    vmlal.u8  acc2, reg4, d29
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_four_0565 \
+                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+
+    mov       TMP2, X, asr #16
+    add       X, X, UX
+    mov       TMP4, X, asr #16
+    add       X, X, UX
+    add       TMP1, TOP, TMP2, asl #1
+    add       TMP2, BOTTOM, TMP2, asl #1
+    add       TMP3, TOP, TMP4, asl #1
+    add       TMP4, BOTTOM, TMP4, asl #1
+    vld1.32   {xacc2lo[0]}, [TMP1]
Reply to:
Prev by Date: Comprar da China, Aprenda como e Ganhe Dinheiro
Next by Date: pixman: Changes to 'upstream-unstable'
Previous by thread: pixman: Changes to 'debian-unstable'
Next by thread: pixman: Changes to 'upstream-unstable'
Index(es):
- Date
- Thread