[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

pixman: Changes to 'upstream-experimental'



 .gitignore                            |    9 
 Makefile.am                           |  121 
 README                                |   26 
 RELEASING                             |   49 
 TODO                                  |  124 
 configure.ac                          |  233 +
 pixman/Makefile.am                    |   78 
 pixman/Makefile.win32                 |   92 
 pixman/combine.h.inc                  |  215 +
 pixman/combine.inc                    | 1269 +++++++++
 pixman/combine.pl                     |   81 
 pixman/pixman-access-accessors.c      |    3 
 pixman/pixman-access.c                | 1968 ++++++++++++++
 pixman/pixman-combine.c               | 1455 ++++++++++
 pixman/pixman-compose-accessors.c     |    4 
 pixman/pixman-compose.c               | 4285 +------------------------------
 pixman/pixman-compute-region.c        |  108 
 pixman/pixman-edge-accessors.c        |    4 
 pixman/pixman-edge-imp.h              |   64 
 pixman/pixman-edge.c                  |   72 
 pixman/pixman-image.c                 |  298 +-
 pixman/pixman-mmx.c                   |  724 ++---
 pixman/pixman-mmx.h                   |    5 
 pixman/pixman-pict.c                  | 1408 ++++------
 pixman/pixman-private.h               |  466 +--
 pixman/pixman-region.c                |  717 +----
 pixman/pixman-region16.c              |   75 
 pixman/pixman-region32.c              |   73 
 pixman/pixman-source.c                |  709 +++++
 pixman/pixman-sse.c                   | 4653 ++++++++++++++++++++++++++++++++++
 pixman/pixman-sse.h                   |  358 ++
 pixman/pixman-timer.c                 |    7 
 pixman/pixman-transformed-accessors.c |    3 
 pixman/pixman-transformed.c           |  814 +++++
 pixman/pixman-trap.c                  |   25 
 pixman/pixman-utils.c                 |  253 +
 pixman/pixman-version.h.in            |   50 
 pixman/pixman-vmx.c                   | 1068 +++++++
 pixman/pixman-vmx.h                   |  308 ++
 pixman/pixman.h                       |  390 +-
 test/Makefile.am                      |   10 
 test/fetch-test.c                     |  163 +
 test/region-test.c                    |   23 
 43 files changed, 16538 insertions(+), 6322 deletions(-)

New commits:
commit d8e5ff20f12c52a32dcf0543ab436eb7194b794c
Author: Søren Sandmann Pedersen <sandmann@redhat.com>
Date:   Wed Jun 25 07:58:21 2008 -0400

    Pre-release version bump

diff --git a/configure.ac b/configure.ac
index 1bfa2a5..0997d64 100644
--- a/configure.ac
+++ b/configure.ac
@@ -57,7 +57,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 11)
-m4_define([pixman_micro], 5)
+m4_define([pixman_micro], 6)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit 4a9df4be7e384cf18e3d7a65d1e0023d2e2a280f
Author: Søren Sandmann Pedersen <sandmann@redhat.com>
Date:   Sun Jun 22 13:44:23 2008 -0400

    TODO

diff --git a/TODO b/TODO
index 01b3ddf..11b276d 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,5 @@
+  - Rename "SSE" to "MMX_EXTENSIONS"
+
   - Behdad's MMX issue - see list
 
   - SSE 2 issues:

commit a766b62880108f278478888f5167a5fbf2819a97
Author: Søren Sandmann Pedersen <sandmann@redhat.com>
Date:   Sun Jun 22 13:42:21 2008 -0400

    Add configure time options to disable mmx/sse2/vmx

diff --git a/configure.ac b/configure.ac
index 52ecb0d..1bfa2a5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -149,7 +149,15 @@ int main () {
     return _mm_cvtsi64_si32 (v);
 }], have_mmx_intrinsics=yes)
 CFLAGS=$xserver_save_CFLAGS
-AC_MSG_RESULT($have_mmx_intrinsics)
+
+AC_ARG_ENABLE(mmx,
+   [AC_HELP_STRING([--disable-mmx],
+                   [disable MMX fast paths])],
+   [disable_mmx=yes], [disable_mmx=no])
+
+if test $disable_mmx = yes ; then
+   have_mmx_intrinsics=disabled
+fi
 
 if test $have_mmx_intrinsics = yes ; then
    AC_DEFINE(USE_MMX, 1, [use MMX compiler intrinsics])
@@ -157,6 +165,8 @@ else
    MMX_CFLAGS=
 fi
 
+AC_MSG_RESULT($have_mmx_intrinsics)
+
 AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
 
 dnl =======================================================
@@ -225,12 +235,22 @@ int main () {
     return 0;
 }], have_sse2_intrinsics=yes)
 CFLAGS=$xserver_save_CFLAGS
-AC_MSG_RESULT($have_sse2_intrinsics)
+
+AC_ARG_ENABLE(sse2,
+   [AC_HELP_STRING([--disable-sse2],
+                   [disable SSE2 fast paths])],
+   [disable_sse2=yes], [disable_sse2=no])
+
+if test $disable_sse2 = yes ; then
+   have_sse2_intrinsics=disabled
+fi
 
 if test $have_sse2_intrinsics = yes ; then
-   AC_DEFINE(USE_SSE2, 1, [use SSE compiler intrinsics])
+   AC_DEFINE(USE_SSE2, 1, [use SSE2 compiler intrinsics])
 fi
 
+AC_MSG_RESULT($have_sse2_intrinsics)
+
 AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
 
 dnl ========================================================
@@ -259,13 +279,24 @@ int main () {
     return 0;
 }], have_vmx_intrinsics=yes)
 CFLAGS=$xserver_save_CFLAGS
-AC_MSG_RESULT($have_vmx_intrinsics)
+
+AC_ARG_ENABLE(vmx,
+   [AC_HELP_STRING([--disable-vmx],
+                   [disable VMX fast paths])],
+   [disable_vmx=yes], [disable_vmx=no])
+
+if test $disable_vmx = yes ; then
+   have_vmx_intrinsics=disabled
+fi
 
 if test $have_vmx_intrinsics = yes ; then
    AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
 else
    VMX_CFLAGS=
 fi
+
+AC_MSG_RESULT($have_vmx_intrinsics)
+
 AC_SUBST(VMX_CFLAGS)
 
 AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)

commit 02268903e4311709744c11e495f9b17f171ec5e9
Author: David Sharp <whereami@gmail.com>
Date:   Thu Jun 19 20:23:33 2008 -0700

    pixman-sse.c: silence pointer-cast compiler warnings.
    
    Cast pointers to words of the same size, not 32-bits.
    
    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>

diff --git a/pixman/pixman-sse.c b/pixman/pixman-sse.c
index 13bfe1c..6f7876c 100644
--- a/pixman/pixman-sse.c
+++ b/pixman/pixman-sse.c
@@ -643,7 +643,7 @@ coreCombineInUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
 
-    while (w && ((uint32_t) pd & 15))
+    while (w && ((unsigned long) pd & 15))
     {
         s = *ps++;
         d = *pd;
@@ -700,7 +700,7 @@ coreCombineReverseInUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
 
-    while (w && ((uint32_t) pd & 15))
+    while (w && ((unsigned long) pd & 15))
     {
         s = *ps++;
         d = *pd;
@@ -752,7 +752,7 @@ coreCombineReverseOutUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
 
-    while (w && ((uint32_t) pd & 15))
+    while (w && ((unsigned long) pd & 15))
     {
         uint32_t s = *ps++;
         uint32_t d = *pd;
@@ -809,7 +809,7 @@ coreCombineOutUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
 
-    while (w && ((uint32_t) pd & 15))
+    while (w && ((unsigned long) pd & 15))
     {
         uint32_t s = *ps++;
         uint32_t d = *pd;
@@ -885,7 +885,7 @@ coreCombineAtopUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
 
-    while (w && ((uint32_t) pd & 15))
+    while (w && ((unsigned long) pd & 15))
     {
         s = *ps++;
         d = *pd;
@@ -962,7 +962,7 @@ coreCombineReverseAtopUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
 
-    while (w && ((uint32_t) pd & 15))
+    while (w && ((unsigned long) pd & 15))
     {
         s = *ps++;
         d = *pd;
@@ -1039,7 +1039,7 @@ coreCombineXorUsse2 (uint32_t* dst, const uint32_t* src, int width)
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
 
-    while (w && ((uint32_t) pd & 15))
+    while (w && ((unsigned long) pd & 15))
     {
         s = *ps++;
         d = *pd;

commit 534e65d54831018b47c169932a04224e5ba53cb8
Author: Søren Sandmann Pedersen <sandmann@redhat.com>
Date:   Fri Jun 13 16:16:59 2008 -0400

    Update TODO

diff --git a/TODO b/TODO
index 4f8f9c4..01b3ddf 100644
--- a/TODO
+++ b/TODO
@@ -42,7 +42,7 @@
   - Make pixman_region_point_in() survive a NULL box, then fix up
     pixman-compose.c
 
-    - Possibly look into inlining the fetch functions
+      - Possibly look into inlining the fetch functions
 
   - Test suite
 
@@ -93,6 +93,37 @@
     (0, 0). Cairo would have to make sure that the delta *within* a
     batch of trapezoids does not exceed 16 bit.
 
+  - Consider adding actual backends. Brain dump:
+
+    A backend is something that knows how to
+
+      - Create images
+      - Composite three images
+      - Rasterize trapezoids
+      - Do solid fills and blits
+
+    These operations are provided by a vtable that the backend will
+    create when it is initialized. Initial backends:
+
+      - VMX
+      - SSE2
+      - MMX
+      - Plain Old C
+
+    When the SIMD backends are initialized, they will be passed a
+    pointer to the Plain Old C backend that they can use for fallback
+    purposes.
+
+    Images would gain a vtable as well that would contain things like
+
+      - Read scanline
+      - Write scanline
+
+    (Or even read_patch/write_patch as suggested by Keith a while
+    back).
+
+    This could simplify the compositing code considerably.
+
   - Review the pixman_format_code_t enum to make sure it will support
     future formats. Some formats we will probably need:
 

commit eb2d95de98683a387153f010077ad9c3c3b1b01d
Author: Luo Jinghua <sunmoon1997@gmail.com>
Date:   Sat Jun 14 09:07:22 2008 +0800

    Fix implicit declaration of function 'free'.
    
    Otherwise pointer will be truncated on 64bit arch and your programs will crash.

diff --git a/pixman/pixman-region16.c b/pixman/pixman-region16.c
index 869e18d..e0dc008 100644
--- a/pixman/pixman-region16.c
+++ b/pixman/pixman-region16.c
@@ -30,6 +30,8 @@
 
 #include "pixman-private.h"
 
+#include <stdlib.h>
+
 typedef pixman_box16_t		box_type_t;
 typedef pixman_region16_data_t	region_data_type_t;
 typedef pixman_region16_t	region_type_t;
diff --git a/pixman/pixman-region32.c b/pixman/pixman-region32.c
index 6e083b5..8a30d1d 100644
--- a/pixman/pixman-region32.c
+++ b/pixman/pixman-region32.c
@@ -28,6 +28,8 @@
 
 #include "pixman-private.h"
 
+#include <stdlib.h>
+
 typedef pixman_box32_t		box_type_t;
 typedef pixman_region32_data_t	region_data_type_t;
 typedef pixman_region32_t	region_type_t;

commit 29d144712e558aaeb49f4384028dd669d76a410b
Author: Maximilian Grothusmann <maxi@own-hero.net>
Date:   Fri Jun 13 12:44:50 2008 -0700

    Fix memory leak by freeing boxes{16,32}.
    
    After calling pixman_region_init_rects() or
    pixman_region32_init_rects(), boxes{16,32} were not freed before
    returning. Fixes bug 16312.

diff --git a/pixman/pixman-region16.c b/pixman/pixman-region16.c
index 1a0edfe..869e18d 100644
--- a/pixman/pixman-region16.c
+++ b/pixman/pixman-region16.c
@@ -47,6 +47,7 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
     int n_boxes, i;
     pixman_box32_t *boxes32;
     pixman_box16_t *boxes16;
+    pixman_bool_t retval;
     
     boxes32 = pixman_region32_rectangles (src, &n_boxes);
 
@@ -64,7 +65,9 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
     }
 
     pixman_region_fini (dst);
-    return pixman_region_init_rects (dst, boxes16, n_boxes);
+    retval = pixman_region_init_rects (dst, boxes16, n_boxes);
+    free (boxes16);
+    return retval;
 }
 
 #include "pixman-region.c"
diff --git a/pixman/pixman-region32.c b/pixman/pixman-region32.c
index 4b5598d..6e083b5 100644
--- a/pixman/pixman-region32.c
+++ b/pixman/pixman-region32.c
@@ -45,6 +45,7 @@ pixman_region32_copy_from_region16 (pixman_region32_t *dst,
     int n_boxes, i;
     pixman_box16_t *boxes16;
     pixman_box32_t *boxes32;
+    pixman_bool_t retval;
     
     boxes16 = pixman_region_rectangles (src, &n_boxes);
 
@@ -62,7 +63,9 @@ pixman_region32_copy_from_region16 (pixman_region32_t *dst,
     }
 
     pixman_region32_fini (dst);
-    return pixman_region32_init_rects (dst, boxes32, n_boxes);
+    retval = pixman_region32_init_rects (dst, boxes32, n_boxes);
+    free (boxes32);
+    return retval;
 }
 
 #include "pixman-region.c"

commit 5d32519316b40b35113c6df9e15d955a16709ba2
Author: Aaron Plattner <aplattner@nvidia.com>
Date:   Fri Jun 13 09:52:53 2008 -0700

    Use pixman_malloc_ab instead of plain malloc for the fbStore64_generic scratch buffer.

diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index ec187d3..ca9619c 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -1857,7 +1857,7 @@ fbStore64_generic (pixman_image_t *image,
     assert(image->common.type == BITS);
     assert(store32);
 
-    argb8Pixels = malloc(sizeof(uint32_t) * width);
+    argb8Pixels = pixman_malloc_ab (width, sizeof(uint32_t));
     if (!argb8Pixels) return;
 
     // Contract the scanline.  We could do this in place if values weren't

commit b1c70c4e6435d7f15751111828c381feb1d139cf
Author: Søren Sandmann Pedersen <sandmann@redhat.com>
Date:   Fri Jun 13 00:25:45 2008 -0400

    Move PIXMAN_FORMAT_16BPC to pixman-private.h

diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index fa1311d..3bbf641 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -647,6 +647,10 @@ union pixman_image
     } while (0)
 
 
+#define PIXMAN_FORMAT_16BPC(f)	(PIXMAN_FORMAT_A(f) > 8 || \
+				 PIXMAN_FORMAT_R(f) > 8 || \
+				 PIXMAN_FORMAT_G(f) > 8 || \
+				 PIXMAN_FORMAT_B(f) > 8)
 /*
  * Edges
  */
diff --git a/pixman/pixman.h b/pixman/pixman.h
index 10871a3..f60534b 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -495,10 +495,6 @@ struct pixman_indexed
 				 PIXMAN_FORMAT_R(f) +	\
 				 PIXMAN_FORMAT_G(f) +	\
 				 PIXMAN_FORMAT_B(f))
-#define PIXMAN_FORMAT_16BPC(f)	(PIXMAN_FORMAT_A(f) > 8 || \
-				 PIXMAN_FORMAT_R(f) > 8 || \
-				 PIXMAN_FORMAT_G(f) > 8 || \
-				 PIXMAN_FORMAT_B(f) > 8)
 
 #define PIXMAN_TYPE_OTHER	0
 #define PIXMAN_TYPE_A		1

commit 7fa966df0ee781fa486715710f389b148c11d36e
Author: Aaron Plattner <aplattner@nvidia.com>
Date:   Fri Jun 6 23:02:51 2008 -0700

    Decide based on the image formats whether we need wide compositing.
    
    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 812e70d..c583ea6 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -549,7 +549,13 @@ void
 pixman_composite_rect_general (const FbComposeData *data)
 {
     uint32_t _scanline_buffer[SCANLINE_BUFFER_LENGTH * 3];
-    const int wide = 0;
+    const pixman_format_code_t srcFormat = data->src->type == BITS ? data->src->bits.format : 0;
+    const pixman_format_code_t maskFormat = data->mask && data->mask->type == BITS ? data->mask->bits.format : 0;
+    const pixman_format_code_t destFormat = data->dest->type == BITS ? data->dest->bits.format : 0;
+    const int srcWide = PIXMAN_FORMAT_16BPC(srcFormat);
+    const int maskWide = data->mask && PIXMAN_FORMAT_16BPC(maskFormat);
+    const int destWide = PIXMAN_FORMAT_16BPC(destFormat);
+    const int wide = srcWide || maskWide || destWide;
     const int Bpp = wide ? 8 : 4;
     uint8_t *scanline_buffer = (uint8_t*)_scanline_buffer;
     uint8_t *src_buffer, *mask_buffer, *dest_buffer;

commit 7cb735c9c0fa55ae1f4d8d13da9f33e3da2ae8fe
Author: Aaron Plattner <aplattner@nvidia.com>
Date:   Fri Jun 6 19:40:25 2008 -0700

    Take the source format into account in pixman_expand.
    
    Extract the original bits of the source image for each component and then
    replicate up to 16 bits to fill the wide components.  Make sure to hard-code the
    alpha value to 1 if the source format didn't have alpha.
    
    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>

diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index 29ddcb3..ec187d3 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -1879,30 +1879,70 @@ storeProc64 ACCESS(pixman_storeProcForPicture64) (bits_image_t * pict)
 
 #ifndef PIXMAN_FB_ACCESSORS
 /*
+ * Helper routine to expand a color component from 0 < n <= 8 bits to 16 bits by
+ * replication.
+ */
+static inline uint64_t expand16(const uint8_t val, int nbits)
+{
+    // Start out with the high bit of val in the high bit of result.
+    uint16_t result = (uint16_t)val << (16 - nbits);
+
+    if (nbits == 0)
+        return 0;
+
+    // Copy the bits in result, doubling the number of bits each time, until we
+    // fill all 16 bits.
+    while (nbits < 16) {
+        result |= result >> nbits;
+        nbits *= 2;
+    }
+
+    return result;
+}
+
+/*
  * This function expands images from ARGB8 format to ARGB16.  To preserve
  * precision, it needs to know the original source format.  For example, if the
  * source was PIXMAN_x1r5g5b5 and the red component contained bits 12345, then
  * the expanded value is 12345123.  To correctly expand this to 16 bits, it
  * should be 1234512345123451 and not 1234512312345123.
- *
- * XXX[AGP]: For now, this just does naïve byte replication.
  */
 void pixman_expand(uint64_t *dst, const uint32_t *src,
                    pixman_format_code_t format, int width)
 {
+    /*
+     * Determine the sizes of each component and the masks and shifts required
+     * to extract them from the source pixel.
+     */
+    const int a_size = PIXMAN_FORMAT_A(format),
+              r_size = PIXMAN_FORMAT_R(format),
+              g_size = PIXMAN_FORMAT_G(format),
+              b_size = PIXMAN_FORMAT_B(format);
+    const int a_shift = 32 - a_size,
+              r_shift = 24 - r_size,
+              g_shift = 16 - g_size,
+              b_shift =  8 - b_size;
+    const uint8_t a_mask = ~(~0 << a_size),
+                  r_mask = ~(~0 << r_size),
+                  g_mask = ~(~0 << g_size),
+                  b_mask = ~(~0 << b_size);
     int i;
 
     /* Start at the end so that we can do the expansion in place when src == dst */
     for (i = width - 1; i >= 0; i--)
     {
-        const uint8_t a = src[i] >> 24,
-                      r = src[i] >> 16,
-                      g = src[i] >> 8,
-                      b = src[i];
-        dst[i] = (uint64_t)a << 56 | (uint64_t) a << 48 |
-                 (uint64_t)r << 40 | (uint64_t) r << 32 |
-                 (uint64_t)g << 24 | (uint64_t) g << 16 |
-                 (uint64_t)b << 8 | (uint64_t)b;
+        const uint32_t pixel = src[i];
+        // Extract the components.
+        const uint8_t a = (pixel >> a_shift) & a_mask,
+                      r = (pixel >> r_shift) & r_mask,
+                      g = (pixel >> g_shift) & g_mask,
+                      b = (pixel >> b_shift) & b_mask;
+        const uint64_t a16 = a_size ? expand16(a, a_size) : 0xffff,
+                       r16 = expand16(r, r_size),
+                       g16 = expand16(g, g_size),
+                       b16 = expand16(b, b_size);
+
+        dst[i] = a16 << 48 | r16 << 32 | g16 << 16 | b16;
     }
 }
 

commit c0d98e96605c6d03f4b02f337f2f5827165bb092
Author: Aaron Plattner <aplattner@nvidia.com>
Date:   Fri Jun 6 18:51:48 2008 -0700

    Make expansion and contraction loops clearer.
    
    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>

diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index d5d3952..29ddcb3 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -1890,17 +1890,19 @@ storeProc64 ACCESS(pixman_storeProcForPicture64) (bits_image_t * pict)
 void pixman_expand(uint64_t *dst, const uint32_t *src,
                    pixman_format_code_t format, int width)
 {
+    int i;
+
     /* Start at the end so that we can do the expansion in place when src == dst */
-    for (width--; width >= 0; width--)
+    for (i = width - 1; i >= 0; i--)
     {
-        const uint8_t a = src[width] >> 24,
-                      r = src[width] >> 16,
-                      g = src[width] >> 8,
-                      b = src[width];
-        dst[width] = (uint64_t)a << 56 | (uint64_t) a << 48 |
-                     (uint64_t)r << 40 | (uint64_t) r << 32 |
-                     (uint64_t)g << 24 | (uint64_t) g << 16 |
-                     (uint64_t)b << 8 | (uint64_t)b;
+        const uint8_t a = src[i] >> 24,
+                      r = src[i] >> 16,
+                      g = src[i] >> 8,
+                      b = src[i];
+        dst[i] = (uint64_t)a << 56 | (uint64_t) a << 48 |
+                 (uint64_t)r << 40 | (uint64_t) r << 32 |
+                 (uint64_t)g << 24 | (uint64_t) g << 16 |
+                 (uint64_t)b << 8 | (uint64_t)b;
     }
 }
 
@@ -1910,15 +1912,17 @@ void pixman_expand(uint64_t *dst, const uint32_t *src,
  */
 void pixman_contract(uint32_t *dst, const uint64_t *src, int width)
 {
+    int i;
+
     /* Start at the beginning so that we can do the contraction in place when
      * src == dst */
-    for (width--; width >= 0; width--, src++, dst++)
+    for (i = 0; i < width; i++)
     {
-        const uint8_t a = *src >> 56,
-                      r = *src >> 40,
-                      g = *src >> 24,
-                      b = *src >> 8;
-        *dst = a << 24 | r << 16 | g << 8 | b;
+        const uint8_t a = src[i] >> 56,
+                      r = src[i] >> 40,
+                      g = src[i] >> 24,
+                      b = src[i] >> 8;
+        dst[i] = a << 24 | r << 16 | g << 8 | b;
     }
 }
 #endif // PIXMAN_FB_ACCESSORS

commit fc0b28bf6af81428b7ac045614eea97fbf9c4a70
Author: Aaron Plattner <aplattner@nvidia.com>
Date:   Fri Jun 6 18:05:15 2008 -0700

    Add wide source picture, external alpha, and transformed image routines.
    
    The wide external alpha path should work correctly with wide formats.  The wide
    transformed fetch code for now just does a 32-bit fetch and then expands, which
    will lose precision.  Source pictures, for now, are evaluated at depth 32 and
    then are expanded to depth 64.  We could get higher precision by evaluating them
    directly at depth 64, but this should be good enough for now.
    
    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index c5773eb..812e70d 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -174,6 +174,62 @@ typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
 typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
 			      uint32_t *, uint32_t);
 
+static inline scanFetchProc get_fetch_source_pict(const int wide)
+{
+    if (wide)
+	return (scanFetchProc)pixmanFetchSourcePict64;
+    else
+	return (scanFetchProc)pixmanFetchSourcePict;
+}
+
+static inline scanFetchProc get_fetch_solid(const int wide)
+{
+    if (wide)
+	return (scanFetchProc)fbFetchSolid64;
+    else
+	return (scanFetchProc)fbFetchSolid;
+}
+
+static inline scanFetchProc get_fetch(const int wide)
+{
+    if (wide)
+	return (scanFetchProc)fbFetch64;
+    else
+	return (scanFetchProc)fbFetch;
+}
+
+static inline scanFetchProc get_fetch_external_alpha(const int wide)
+{
+    if (wide)
+	return (scanFetchProc)ACCESS(fbFetchExternalAlpha64);
+    else
+	return (scanFetchProc)ACCESS(fbFetchExternalAlpha);
+}
+
+static inline scanFetchProc get_fetch_transformed(const int wide)
+{
+    if (wide)
+	return (scanFetchProc)ACCESS(fbFetchTransformed64);
+    else
+	return (scanFetchProc)ACCESS(fbFetchTransformed);
+}
+
+static inline scanStoreProc get_store(const int wide)
+{
+    if (wide)
+	return (scanStoreProc)fbStore64;
+    else
+	return (scanStoreProc)fbStore;
+}
+
+static inline scanStoreProc get_store_external_alpha(const int wide)
+{
+    if (wide)
+	return (scanStoreProc)ACCESS(fbStoreExternalAlpha64);
+    else
+	return (scanStoreProc)ACCESS(fbStoreExternalAlpha);
+}
+
 #ifndef PIXMAN_FB_ACCESSORS
 static
 #endif
@@ -195,7 +251,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
         fetchSrc = NULL;
     else if (IS_SOURCE_IMAGE (data->src))
     {
-	fetchSrc = (scanFetchProc)pixmanFetchSourcePict;
+	fetchSrc = get_fetch_source_pict(wide);
 	srcClass = SourcePictureClassify ((source_image_t *)data->src,
 					  data->xSrc, data->ySrc,
 					  data->width, data->height);
@@ -206,25 +262,23 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 
 	if (bits->common.alpha_map)
 	{
-	    // TODO: Need wide external alpha routine.
-	    fetchSrc = (scanFetchProc)ACCESS(fbFetchExternalAlpha);
+	    fetchSrc = get_fetch_external_alpha(wide);
 	}
 	else if ((bits->common.repeat == PIXMAN_REPEAT_NORMAL || bits->common.repeat == PIXMAN_REPEAT_PAD) &&
 		 bits->width == 1 &&
 		 bits->height == 1)
 	{
-	    fetchSrc = wide ? (scanFetchProc)fbFetchSolid64 : (scanFetchProc)fbFetchSolid;
+	    fetchSrc = get_fetch_solid(wide);
 	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 	}
 	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
                 && bits->common.repeat != PIXMAN_REPEAT_PAD)
 	{
-	    fetchSrc = wide ? (scanFetchProc)fbFetch64 : (scanFetchProc)fbFetch;
+	    fetchSrc = get_fetch(wide);
 	}
 	else
 	{
-	    // TODO: Need wide transformed fetch.
-	    fetchSrc = (scanFetchProc)ACCESS(fbFetchTransformed);
+	    fetchSrc = get_fetch_transformed(wide);
 	}
     }
 
@@ -247,37 +301,34 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 
 	    if (bits->common.alpha_map)
 	    {
-		// TODO: Need wide external alpha routine.
-		fetchMask = (scanFetchProc)ACCESS(fbFetchExternalAlpha);
+		fetchMask = get_fetch_external_alpha(wide);
 	    }
 	    else if ((bits->common.repeat == PIXMAN_REPEAT_NORMAL || bits->common.repeat == PIXMAN_REPEAT_PAD) &&
 		     bits->width == 1 && bits->height == 1)
 	    {
-		fetchMask = wide ? (scanFetchProc)fbFetchSolid64 : (scanFetchProc)fbFetchSolid;
+		fetchMask = get_fetch_solid(wide);
 		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 	    }
 	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
                     && bits->common.repeat != PIXMAN_REPEAT_PAD)
-		fetchMask = wide ? (scanFetchProc)fbFetch64 : (scanFetchProc)fbFetch;
+		fetchMask = get_fetch(wide);
 	    else
-		// TODO: Need wide transformed fetch.
-		fetchMask = (scanFetchProc)ACCESS(fbFetchTransformed);
+		fetchMask = get_fetch_transformed(wide);
 	}
     }
 
     if (data->dest->common.alpha_map)
     {
-	// TODO: Need wide external alpha routine.
-	fetchDest = (scanFetchProc)ACCESS(fbFetchExternalAlpha);
-	store = (scanStoreProc)ACCESS(fbStoreExternalAlpha);
+	fetchDest = get_fetch_external_alpha(wide);
+	store = get_store_external_alpha(wide);
 
 	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 	    fetchDest = NULL;
     }
     else
     {
-	fetchDest = wide ? (scanFetchProc)fbFetch64 : (scanFetchProc)fbFetch;
-	store = wide ? (scanStoreProc)fbStore64 : (scanStoreProc)fbStore;
+	fetchDest = get_fetch(wide);
+	store = get_store(wide);
 
 	switch (data->op)
 	{
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 5a2f89e..fa1311d 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -219,6 +219,8 @@ void pixman_contract(uint32_t *dst, const uint64_t *src, int width);
 
 void pixmanFetchSourcePict(source_image_t *, int x, int y, int width,
                            uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
+void pixmanFetchSourcePict64(source_image_t *, int x, int y, int width,
+                             uint64_t *buffer, uint64_t *mask, uint32_t maskBits);
 
 void fbFetchTransformed(bits_image_t *, int x, int y, int width,
                         uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
@@ -236,6 +238,22 @@ void fbFetchExternalAlpha_accessors(bits_image_t *, int x, int y, int width,
                                     uint32_t *buffer, uint32_t *mask,
                                     uint32_t maskBits);
 
+void fbFetchTransformed64(bits_image_t *, int x, int y, int width,
+                          uint64_t *buffer, uint64_t *mask, uint32_t maskBits);
+void fbStoreExternalAlpha64(bits_image_t *, int x, int y, int width,
+                            uint64_t *buffer);
+void fbFetchExternalAlpha64(bits_image_t *, int x, int y, int width,
+                            uint64_t *buffer, uint64_t *mask, uint32_t maskBits);
+
+void fbFetchTransformed64_accessors(bits_image_t *, int x, int y, int width,
+                                    uint64_t *buffer, uint64_t *mask,
+                                    uint32_t maskBits);
+void fbStoreExternalAlpha64_accessors(bits_image_t *, int x, int y, int width,
+                                      uint64_t *buffer);
+void fbFetchExternalAlpha64_accessors(bits_image_t *, int x, int y, int width,
+                                      uint64_t *buffer, uint64_t *mask,
+                                      uint32_t maskBits);
+
 /* end */
 
 typedef enum
@@ -469,6 +487,7 @@ union pixman_image
 			 (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> 8)))) << (i))
 
 #define div_255(x) (((x) + 0x80 + (((x) + 0x80) >> 8)) >> 8)
+#define div_65535(x) (((x) + 0x8000 + (((x) + 0x8000) >> 16)) >> 16)
 
 #define MOD(a,b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b))
 
diff --git a/pixman/pixman-source.c b/pixman/pixman-source.c
index a5a4235..6a640fa 100644
--- a/pixman/pixman-source.c
+++ b/pixman/pixman-source.c
@@ -27,6 +27,7 @@
 #include <config.h>
 #endif
 
+#include <stdlib.h>
 #include <math.h>
 
 #include "pixman-private.h"
@@ -679,3 +680,30 @@ void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width,
         }
     }
 }
+
+/*
+ * For now, just evaluate the source picture at 32bpp and expand.  We could
+ * produce smoother gradients by evaluating them at higher color depth, but
+ * that's a project for the future.
+ */
+void pixmanFetchSourcePict64(source_image_t * pict, int x, int y, int width,
+                             uint64_t *buffer, uint64_t *mask, uint32_t maskBits)
+{
+    uint32_t *mask8 = NULL;
+
+    // Contract the mask image, if one exists, so that the 32-bit fetch function
+    // can use it.
+    if (mask) {
+        mask8 = pixman_malloc_ab(width, sizeof(uint32_t));
+        pixman_contract(mask8, mask, width);
+    }
+
+    // Fetch the source image into the first half of buffer.
+    pixmanFetchSourcePict(pict, x, y, width, (uint32_t*)buffer, mask8,
+                          maskBits);
+
+    // Expand from 32bpp to 64bpp in place.
+    pixman_expand(buffer, (uint32_t*)buffer, PIXMAN_a8r8g8b8, width);
+
+    free(mask8);
+}
diff --git a/pixman/pixman-transformed.c b/pixman/pixman-transformed.c
index cff1ba2..9f566bf 100644
--- a/pixman/pixman-transformed.c
+++ b/pixman/pixman-transformed.c
@@ -2,6 +2,7 @@
  *
  * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
  *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *             2008 Aaron Plattner, NVIDIA Corporation
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
  * documentation for any purpose is hereby granted without fee, provided that
@@ -36,6 +37,11 @@
 #define Green(x) (((x) >> 8) & 0xff)
 #define Blue(x) ((x) & 0xff)
 
+#define Alpha64(x) ((x) >> 48)
+#define Red64(x) (((x) >> 32) & 0xffff)
+#define Green64(x) (((x) >> 16) & 0xffff)
+#define Blue64(x) ((x) & 0xffff)
+
 /*
  * Fetch from region strategies
  */
@@ -632,6 +638,30 @@ ACCESS(fbFetchTransformed)(bits_image_t * pict, int x, int y, int width,
     }
 }
 
+void
+ACCESS(fbFetchTransformed64)(bits_image_t * pict, int x, int y, int width,
+                             uint64_t *buffer, uint64_t *mask, uint32_t maskBits)
+{
+    // TODO: Don't lose precision for wide pictures!
+    uint32_t *mask8 = NULL;
+
+    // Contract the mask image, if one exists, so that the 32-bit fetch function
+    // can use it.
+    if (mask) {
+        mask8 = pixman_malloc_ab(width, sizeof(uint32_t));
+        pixman_contract(mask8, mask, width);
+    }
+
+    // Fetch the image into the first half of buffer.
+    ACCESS(fbFetchTransformed)(pict, x, y, width, (uint32_t*)buffer, mask8,
+                               maskBits);
+
+    // Expand from 32bpp to 64bpp in place.
+    pixman_expand(buffer, (uint32_t*)buffer, pict->format, width);
+
+    free(mask8);
+}
+
 #define SCANLINE_BUFFER_LENGTH 2048
 
 void
@@ -670,6 +700,45 @@ ACCESS(fbFetchExternalAlpha)(bits_image_t * pict, int x, int y, int width,
 }
 
 void
+ACCESS(fbFetchExternalAlpha64)(bits_image_t * pict, int x, int y, int width,
+                               uint64_t *buffer, uint64_t *mask,
+                               uint32_t maskBits)
+{
+    int i;
+    uint64_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
+    uint64_t *alpha_buffer = _alpha_buffer;
+    uint64_t maskBits64;
+
+    if (!pict->common.alpha_map) {
+        ACCESS(fbFetchTransformed64) (pict, x, y, width, buffer, mask, maskBits);
+	return;
+    }
+    if (width > SCANLINE_BUFFER_LENGTH)
+        alpha_buffer = (uint64_t *) pixman_malloc_ab (width, sizeof(uint64_t));
+
+    ACCESS(fbFetchTransformed64)(pict, x, y, width, buffer, mask, maskBits);
+    ACCESS(fbFetchTransformed64)((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
+                                 y - pict->common.alpha_origin.y, width,
+                                 alpha_buffer, mask, maskBits);
+
+    pixman_expand(&maskBits64, &maskBits, PIXMAN_a8r8g8b8, 1);
+
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits64)
+	{
+	    int64_t a = alpha_buffer[i]>>48;
+	    *(buffer + i) = (a << 48)
+		| (div_65535(Red64(*(buffer + i)) * a) << 32)
+		| (div_65535(Green64(*(buffer + i)) * a) << 16)
+		| (div_65535(Blue64(*(buffer + i)) * a));
+	}
+    }
+
+    if (alpha_buffer != _alpha_buffer)
+        free(alpha_buffer);
+}
+
+void
 ACCESS(fbStoreExternalAlpha)(bits_image_t * pict, int x, int y, int width,


Reply to: