xserver-xorg-video-intel: Changes to 'ubuntu'
ChangeLog | 396 +++++++++++++++++++++++++++
NEWS | 28 +
configure.ac | 8
debian/changelog | 17 -
src/intel_module.c | 9
src/render_program/exa_wm_src_projective.g7a | 4
src/render_program/exa_wm_src_projective.g7b | 2
src/sna/compiler.h | 2
src/sna/gen4_render.c | 17 -
src/sna/kgem.c | 22 +
src/sna/sna.h | 13
src/sna/sna_accel.c | 32 +-
src/sna/sna_composite.c | 1
src/sna/sna_display.c | 1
src/sna/sna_dri.c | 157 +++++++---
src/sna/sna_render.c | 13
src/sna/sna_trapezoids.c | 241 ++++------------
src/sna/sna_video.c | 10
test/.gitignore | 1
19 files changed, 703 insertions(+), 271 deletions(-)
New commits:
commit cab8e66343917b848191e77ae0dcdc828d547033
Author: Timo Aaltonen <tjaalton@ubuntu.com>
Date: Mon Feb 25 10:51:23 2013 +0200
release to raring
diff --git a/debian/changelog b/debian/changelog
index 60527f1..fb3194a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+xserver-xorg-video-intel (2:2.21.3-0ubuntu1) raring; urgency=low
+
+ * Merge from unreleased debian git
+ - new upstream release
+
+ -- Timo Aaltonen <tjaalton@ubuntu.com> Mon, 25 Feb 2013 10:32:15 +0200
+
xserver-xorg-video-intel (2:2.21.3-1) UNRELEASED; urgency=low
* New upstream release.
commit 85bd3c938a63a66947bf237be134f5fdb1d38e63
Author: Timo Aaltonen <tjaalton@ubuntu.com>
Date: Mon Feb 25 10:17:08 2013 +0200
bump the changelogs
diff --git a/ChangeLog b/ChangeLog
index fe63739..27b8f4b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,399 @@
+commit d2442c74b8d41018f260f1da13f3fe5d2795792f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 20 10:53:57 2013 +0000
+
+ 2.21.3 release
+
+commit 2cab7e80eb6955a7f8ea051633f6975a12248b69
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 20 12:36:22 2013 +0000
+
+ sna/trapezoids: Clamp cells to valid range
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 14de90b251dd8a6ff106e989580ef01cf5c2944d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 20 12:00:54 2013 +0000
+
+ sna/trapezoids: Embed a few cells into the stack
+
+ Avoid an allocation in the common case where the set of trapezoids is
+ fairly narrow.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3eca4ea1a5d8ce04598b8d42e93e0dcb93e42e9a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 20 11:20:54 2013 +0000
+
+ sna/trapezoids: Perform the cell allocation upfront
+
+ As we know the maximum extents of the trapezoids, we know the maximum
+ number of cells we will need and so can preallocate them.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9df5e48c582e5c4edffdece75b5395c230a50b09
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 19 10:08:29 2013 +0000
+
+ sna/video: Only setup XvMC if we first setup Xv
+
+ Under certain circumstances, XvScreenInit can indeed fail, so do not
+ bother with creatin XvMC (as it triggers internal assertions if it
+ cannot find our adaptor amongst Xv's).
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b6588c48077600a3e015b6d37b101393a806ae1a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 19 08:59:12 2013 +0000
+
+ test: Add generated vsync.avi to gitignore
+
+commit 1e2fd66ade6bdbf1e6011f3d59e423fada3f12f6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 19 08:57:47 2013 +0000
+
+ sna: Assert that the GPU damage is NULL before destroy a proxy
+
+ If the GPU bo is a proxy, then it really is a pointer into a upload
+ buffer for CPU data. In these cases, there should never be any GPU
+ damage lying around.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d90a123db7ac99cf017167bf89df31c635df7e1e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 18 20:06:45 2013 +0000
+
+ sna: Assert that the gpu_bo exists if it is entirely damaged
+
+ This should help catch the error slightly earlier.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a03aba6f7f55577e29c6a3267528e2614016222d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 18 16:12:14 2013 +0000
+
+ sna: A couple more assertions that we forcibly attach pixmaps correctly
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1f16d854264ea923303b79379266bd789fd9dd4d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 18 14:30:55 2013 +0000
+
+ sna/dri: Prevent swapping a decoupled DRI2Buffer
+
+ If the DRI2Buffer is no longer valid for the Drawable, for example the
+ window had just been reparent, just complete the swap without triggering
+ any assertions.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b3ba758a0186c9abc6c0583f52775ea714165134
+Author: Damien Lespiau <damien.lespiau@intel.com>
+Date: Thu Feb 14 14:20:19 2013 +0000
+
+ uxa/gen7: Don't use a message register to store vl
+
+ Turns out the "new" assembler that uses mesa's opcode emission hits the
+ path that automatically transforms MRF registers into GRF ones in the
+ exa_wm_src_projective shader.
+
+ The diff with the new assembler is:
+
+ $ intel-gen4disasm -g7 -
+ - { 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 },
+ + { 0x00600041, 0x2e8077bd, 0x008d03c0, 0x008d0180 },
+ mul(8) m4<1>F g30<8,8,1>F g12<8,8,1>F { align1 };
+ mul(8) g116<1>F g30<8,8,1>F g12<8,8,1>F { align1 };
+
+ Of course, message registers are no more in gen7, so the shader is
+ trying to do something shaddy (ahem!).
+
+ Instead of using m4, let's make exa_wm_src_projective use g68 for v (aka
+ vl) which makes sense since:
+
+ 1/ vh is g69
+ 2/ exa_wm_src_affine uses g68 for vl already
+
+ This commit changes the generated assembly, here's the decoded diff:
+
+ $ intel-gen4disasm -g7 -
+ - { 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 },
+ + { 0x00600041, 0x288077bd, 0x008d03c0, 0x008d0180 },
+ mul(8) m4<1>F g30<8,8,1>F g12<8,8,1>F { align1 };
+ mul(8) g68<1>F g30<8,8,1>F g12<8,8,1>F { align1 };
+
+ Cc: Kenneth Graunke <kenneth@whitecape.org>
+ Reported-by: Xiang, Haihao <haihao.xiang@intel.com>
+ Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
+ Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 43ba22ef4a4142f334e9ae2d926250988ecbe8bc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Feb 17 09:25:38 2013 +0000
+
+ Confirm that i915.ko has KMS enabled before binding to the device
+
+ If the kernel has the module, but the KMS module option is not enabled,
+ we cannot function. So after checking to see if the i915.ko is bound,
+ then querying whether it provides any KMS resources. If it has no CRTCs
+ attached, then we need to failover to the VESA/fbdev drivers. Note that
+ this should have been detected by drmCheckModesettingSupported()
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=60987
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5a5943e2374a674067e0c48b0fe8a1f2eeff2177
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 15 12:05:14 2013 +0000
+
+ NEWS: Xserver 1.6 was not packaged in Xorg 7.7
+
+ I misinterpreted Sedat Dilek's advice on how to fix my confusing
+ changelog, so drop the Xorg 7.7 confusion and just refer to the version
+ of Xserver the driver is first compatible with.
+
+commit 15e1050ea505c40a288ec197d817d3c6d51693d5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Feb 14 21:59:36 2013 +0000
+
+ NEWS: Fix a couple of typos and factual errors
+
+ Sedat Dilek corrected my spelling and pointed out that what is known as
+ Xorg 1.6 in the log file is actually better known as releases of
+ Xserver 1.6 and Xorg 7.7.
+
+commit 86e025a1842f3c2f319676818b1f4624cd94ebc5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 13 22:05:34 2013 +0000
+
+ sna/gen4: Restore nonrectiinear spans workaround
+
+ Instead of relying on the macro, the intention was simply to use the
+ prefer_gpu hint. However, I dropped it whilst refactoring ideas from
+ later generations. So restore both the debug control to force spans as
+ well as the intended workaround.
+
+ Reported-by: Edward Sheldrake <ejsheldrake@gmail.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f2a64dbdfceac985d235c4873f52013633d6cfd8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 13 17:15:46 2013 +0000
+
+ sna: Avoid stalling on a SHM upload when otherwise idle
+
+ Fixes regresion from
+
+ commit 09ea1f4402b3bd0e411b90eb5575b3ff066d7356
+ Author: Chris Wilson <chris@chris-wilson.co.uk>
+ Date: Thu Jan 10 16:26:24 2013 +0000
+
+ sna: Prefer to use the GPU for copies from SHM onto tiled destinations
+
+ As the stalls on IVB 64-bit machines at least greatly offset the
+ benefits. As those earlier measurements were made on the same IVB
+ machine but running in 32-bit mode, I need to double-check whether or
+ not this is another 32-bit peculiarity.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4bf51ec7bcc7818fbee8643a88aebee0362691b2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 13 16:32:18 2013 +0000
+
+ Revert "sna: Avoid promoting SHM CPU bo to GPU to maintain coherence with SHM clients"
+
+ This reverts commit f743cd5734ca502aa8bdb0e1327fe84d6ce82755.
+
+commit 9861423a76402c260724a752ada293a03ce1a79b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 13 15:11:45 2013 +0000
+
+ sna/dri: Fix typo limiting gen4 to TILING_X depth buffers
+
+ Fixes regression from
+
+ commit 98b312e579385e6e4adf6bf0abe20f8ca84592af
+ Author: Chris Wilson <chris@chris-wilson.co.uk>
+ Date: Wed Jan 23 20:51:35 2013 +0000
+
+ sna/dri: Stop feeding I915_TILING_Y to mesa i915c
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=60178
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 38a3506aa1c9c2e07125b54fc319b6de89febff9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 13 14:09:38 2013 +0000
+
+ sna: Handle scanouts still active at the time of destruction
+
+ Basically remove the bogus assert, and reorder the list to
+ preferentially reuse the still active scanouts.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3b82d93162b9e127c6d055d5007e75fcc31f4af8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 12 21:53:38 2013 +0000
+
+ sna/dri: Validate that the scanout flag is set
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 04e4805f57fdf0581e5e75c29c275c15c79852a3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 12 21:52:35 2013 +0000
+
+ sna: Prefer inplace readback if the GPU doesn't support cacheable BLTs
+
+ As we must perform the GTT reads anyway, first see if we can copy
+ directly to the destination.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2c569d22d2d243badc15d5dbcc09e6759f4ff01a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 12 20:15:33 2013 +0000
+
+ sna: Only use the inplace read fallback if we have no CPU bo support
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit aeaed0b7810e9c794749a4f59c72790063cddc86
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 12 19:30:44 2013 +0000
+
+ sna/dri: Assert that we are not passed deceased DRI2Buffers
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5fb165b02d47634ccf9fb72ab6fb4d65b89f78d0
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 22:15:17 2013 +0000
+
+ sna/dri: Assert that the old scanout is clear before overwriting
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2667b821e193c0badf16420f82f8a99a93c27a46
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 22:06:00 2013 +0000
+
+ sna/dri: Update flush status for tear-free exported GPU bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a4442045df9164651702b166e0de2bfd25e8a76f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 21:16:19 2013 +0000
+
+ sna/dri: Harden the replacement of the back buffer against alloc failure
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 75406775b83ca5c320e4bc2d2dbd8f1eb0d1677d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 13:00:13 2013 +0000
+
+ NEWS: Fix marketing name for 945gm
+
+ The GMA3100 was the desktop G31/G33 which whilst almost identical to the
+ GMA950, notably does not require the extra unfenced alignment.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0b116a1d023ad7cc148d91f268dbf71452dbd9dd
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 12:36:43 2013 +0000
+
+ sna: Fix inaccurate use of __attribute__((const))
+
+ 'const' is only allowed to use the function parameters and not allowed
+ to access global memory - that includes not allowed to deference its
+ arguments...
+
+ Thanks to Jiri Slaby for spotting my mistake.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 47657e5355103b2e61da6f059c560e63fe13b0ed
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 12:31:18 2013 +0000
+
+ sna/dri: Ignore a compiler barf breaking an assertion
+
+ Manual confirmation with gdb says all is fine and things work again
+ after printing the variables responsible. The curious reader is welcome
+ to read the disassembly to find where the compiler goofed.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3540554e5a74ccd2ca746482700a9f5657954227
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 11:14:17 2013 +0000
+
+ sna/dri: Couple up the flush pixmap after checking for allocation failures
+
+ Tidy up the error path not to leave the GPU bo pinned and marked for
+ flushing.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 58b61bcd02467d2b08b2dd194ab6884437b83fa6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 11:05:02 2013 +0000
+
+ sna/dri: Tighten the assertions that we have one DRI2Buffer per Pixmap
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a13f806ae28459a5cf671ccbf9cc8725f541a4ba
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 10:50:20 2013 +0000
+
+ sna: Assert that we do not destroy a GPU bo for an active DRI pixmap
+
+ This is an extra paranoid check that the bo is still pinned.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c2eb2db83280ae45b8df8a3b6e488b6a1404391a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 10:47:13 2013 +0000
+
+ sna: Avoid stalling for changing cache levels on an active scanout
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 696347a6260f3741ad3e6238db0fd285384bb753
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Feb 11 09:58:58 2013 +0000
+
+ sna/dri: Only clear the GPU bo flush flag when the last export is destroyed
+
+ Add various assertions around DRI to be sure that we don't change the
+ GPU bo for a pixmap that does not match our flush bookkeeping. Then be
+ more careful during the DRI2Buffer destroy that we don't accidentally
+ remove the flush flag from a shared bo.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 301aebfa71143fb610eb7b816376a8c1f3e07208
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Feb 10 18:01:33 2013 +0000
+
+ configure: Add -lX11 required to link the legacy libI810XvMC.so
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
commit 65c320e677b8ab4d745568b33f381d5865bf8b73
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun Feb 10 14:20:59 2013 +0000
diff --git a/debian/changelog b/debian/changelog
index 3e521d6..fbb4834 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-xserver-xorg-video-intel (2:2.21.2-1) UNRELEASED; urgency=low
+xserver-xorg-video-intel (2:2.21.3-1) UNRELEASED; urgency=low
* New upstream release.
commit d2442c74b8d41018f260f1da13f3fe5d2795792f
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed Feb 20 10:53:57 2013 +0000
2.21.3 release
diff --git a/NEWS b/NEWS
index f1951a7..4c088bd 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,25 @@
+Release 2.21.3 (2013-02-20)
+===========================
+A few minor bugfixes, another point release.
+
+ * Fix tracking of DRI pixmaps and their backing bo across reparenting. If
+ we tried to execute a SwapBuffers after a Window was reparented, but
+ before the DRI client has updated its references, then we would end up
+ manipulating an exported pixmap without a flush flag set. In the worst
+ case, this would culminate in a segfault in the driver.
+ https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1127497
+
+ * Restore the gen4 workarounds for flickering rendering - a few cases still
+ remain, as the root cause persists.
+ https://bugs.freedesktop.org/show_bug.cgi?id=60402
+
+ * Double check that the device has KMS enabled before claiming. This allows
+ X to gracefully fallback to VESA/fbdev rather than bailing out.
+ https://bugs.freedesktop.org/show_bug.cgi?id=60987
+
+ * Fix the UXA render programs for projective transforms on Ivybridge.
+
+
Release 2.21.2 (2013-02-10)
===========================
Pass the brown paper bags, I need half a dozen or so. That seemingly
diff --git a/configure.ac b/configure.ac
index 6aa0e6c..97daee6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
# Initialize Autoconf
AC_PREREQ([2.60])
AC_INIT([xf86-video-intel],
- [2.21.2],
+ [2.21.3],
[https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
[xf86-video-intel])
AC_CONFIG_SRCDIR([Makefile.am])
commit 2cab7e80eb6955a7f8ea051633f6975a12248b69
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed Feb 20 12:36:22 2013 +0000
sna/trapezoids: Clamp cells to valid range
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index d0e1bd1..9396baf 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -262,6 +262,7 @@ struct cell_list {
/* Points to the left-most cell in the scan line. */
struct cell head, tail;
+ int16_t x1, x2;
int16_t count, size;
struct cell *cells;
struct cell embedded[256];
@@ -331,7 +332,7 @@ cell_list_rewind(struct cell_list *cells)
}
static bool
-cell_list_init(struct cell_list *cells, int width)
+cell_list_init(struct cell_list *cells, int x1, int x2)
{
cells->tail.next = NULL;
cells->tail.x = INT_MAX;
@@ -339,7 +340,9 @@ cell_list_init(struct cell_list *cells, int width)
cells->head.next = &cells->tail;
cell_list_rewind(cells);
cells->count = 0;
- cells->size = width+1;
+ cells->x1 = x1;
+ cells->x2 = x2;
+ cells->size = x2 - x1 + 1;
cells->cells = cells->embedded;
if (cells->size > ARRAY_SIZE(cells->embedded))
cells->cells = malloc(cells->size * sizeof(struct cell));
@@ -392,6 +395,15 @@ cell_list_find(struct cell_list *cells, int x)
if (tail->x == x)
return tail;
+ if (x >= cells->x2)
+ return &cells->tail;
+
+ if (x < cells->x1)
+ x = cells->x1;
+
+ if (tail->x == x)
+ return tail;
+
do {
if (tail->next->x > x)
break;
@@ -980,7 +992,7 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges)
converter->xmax = box->x2;
converter->ymax = box->y2;
- if (!cell_list_init(converter->coverages, box->x2 - box->x1))
+ if (!cell_list_init(converter->coverages, box->x1, box->x2))
return false;
active_list_reset(converter->active);
@@ -1135,32 +1147,21 @@ tor_blt(struct sna *sna,
int xmin, int xmax,
int unbounded)
{
- struct cell *cell = cells->head.next;
+ struct cell *cell;
BoxRec box;
- int cover = 0;
-
- /* Skip cells to the left of the clip region. */
- while (cell->x < xmin) {
- __DBG(("%s: skipping cell (%d, %d, %d)\n",
- __FUNCTION__,
- cell->x, cell->covered_height, cell->uncovered_area));
-
- cover += cell->covered_height;
- cell = cell->next;
- }
- cover *= FAST_SAMPLES_X*2;
+ int cover;
box.y1 = y;
box.y2 = y + height;
box.x1 = xmin;
/* Form the spans from the coverages and areas. */
- for (; cell != NULL; cell = cell->next) {
+ cover = 0;
+ for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
int x = cell->x;
- if (x >= xmax)
- break;
-
+ assert(x >= xmin);
+ assert(x < xmax);
__DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
cell->x, cell->covered_height, cell->uncovered_area,
cover, xmax));
commit 14de90b251dd8a6ff106e989580ef01cf5c2944d
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed Feb 20 12:00:54 2013 +0000
sna/trapezoids: Embed a few cells into the stack
Avoid an allocation in the common case where the set of trapezoids is
fairly narrow.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 4ac8b8b..d0e1bd1 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -264,6 +264,7 @@ struct cell_list {
int16_t count, size;
struct cell *cells;
+ struct cell embedded[256];
};
/* The active list contains edges in the current scan line ordered by
@@ -339,14 +340,17 @@ cell_list_init(struct cell_list *cells, int width)
cell_list_rewind(cells);
cells->count = 0;
cells->size = width+1;
- cells->cells = malloc(cells->size * sizeof(struct cell));
+ cells->cells = cells->embedded;
+ if (cells->size > ARRAY_SIZE(cells->embedded))
+ cells->cells = malloc(cells->size * sizeof(struct cell));
return cells->cells != NULL;
}
static void
cell_list_fini(struct cell_list *cells)
{
- free(cells->cells);
+ if (cells->cells != cells->embedded)
+ free(cells->cells);
}
inline static void
commit 3eca4ea1a5d8ce04598b8d42e93e0dcb93e42e9a
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed Feb 20 11:20:54 2013 +0000
sna/trapezoids: Perform the cell allocation upfront
As we know the maximum extents of the trapezoids, we know the maximum
number of cells we will need and so can preallocate them.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index c547fb5..4ac8b8b 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -160,26 +160,6 @@ struct quorem {
int32_t rem;
};
-struct _pool_chunk {
- size_t size;
- struct _pool_chunk *prev_chunk;
- /* Actual data starts here. Well aligned for pointers. */
-};
-
-/* A memory pool. This is supposed to be embedded on the stack or
- * within some other structure. It may optionally be followed by an
- * embedded array from which requests are fulfilled until
- * malloc needs to be called to allocate a first real chunk. */
-struct pool {
- struct _pool_chunk *current;
- struct _pool_chunk *first_free;
-
- /* Header for the sentinel chunk. Directly following the pool
- * struct should be some space for embedded elements from which
- * the sentinel chunk allocates from. */
- struct _pool_chunk sentinel[1];
-};
-
struct edge {
struct edge *next, *prev;
@@ -277,17 +257,13 @@ struct cell {
* ascending x. It is geared towards scanning the cells in order
* using an internal cursor. */
struct cell_list {
+ struct cell *cursor;
+
/* Points to the left-most cell in the scan line. */
struct cell head, tail;
- struct cell *cursor;
-
- /* Cells in the cell list are owned by the cell list and are
- * allocated from this pool. */
- struct {
- struct pool base[1];
- struct cell embedded[256];
- } cell_pool;
+ int16_t count, size;
+ struct cell *cells;
};
/* The active list contains edges in the current scan line ordered by
@@ -345,103 +321,6 @@ floored_muldivrem(int32_t x, int32_t a, int32_t b)
return qr;
}
-static inline void
-_pool_chunk_init(struct _pool_chunk *p,
- struct _pool_chunk *prev_chunk)
-{
- p->prev_chunk = prev_chunk;
- p->size = sizeof(*p);
-}
-
-static struct _pool_chunk *
-_pool_chunk_create(struct _pool_chunk *prev_chunk)
-{
- size_t size = 256*sizeof(struct cell);
- struct _pool_chunk *p;
-
- p = malloc(size + sizeof(struct _pool_chunk));
- if (unlikely (p == NULL))
- abort();
-
- _pool_chunk_init(p, prev_chunk);
- return p;
-}
-
-static void
-pool_init(struct pool *pool)
-{
- pool->current = pool->sentinel;
- pool->first_free = NULL;
- _pool_chunk_init(pool->sentinel, NULL);
-}
-
-static void
-pool_fini(struct pool *pool)
-{
- struct _pool_chunk *p = pool->current;
- do {
- while (NULL != p) {
- struct _pool_chunk *prev = p->prev_chunk;
- if (p != pool->sentinel)
- free(p);
- p = prev;
- }
- p = pool->first_free;
- pool->first_free = NULL;
- } while (NULL != p);
-}
-
-static void *
-_pool_alloc_from_new_chunk(struct pool *pool)
-{
- struct _pool_chunk *chunk;
- void *obj;
-
- chunk = pool->first_free;
- if (chunk) {
- pool->first_free = chunk->prev_chunk;
- _pool_chunk_init(chunk, pool->current);
- } else {
- chunk = _pool_chunk_create(pool->current);
- }
- pool->current = chunk;
-
- obj = (unsigned char*)chunk + chunk->size;
- chunk->size += sizeof(struct cell);
- return obj;
-}
-
-inline static void *
-pool_alloc(struct pool *pool)
-{
- struct _pool_chunk *chunk = pool->current;
-
- if (chunk->size < 256*sizeof(struct cell)+sizeof(*chunk)) {
- void *obj = (unsigned char*)chunk + chunk->size;
- chunk->size += sizeof(struct cell);
- return obj;
- } else
- return _pool_alloc_from_new_chunk(pool);
-}
-
-static void
-pool_reset(struct pool *pool)
-{
- /* Transfer all used chunks to the chunk free list. */
- struct _pool_chunk *chunk = pool->current;
- if (chunk != pool->sentinel) {
- while (chunk->prev_chunk != pool->sentinel)
- chunk = chunk->prev_chunk;
-
- chunk->prev_chunk = pool->first_free;
- pool->first_free = pool->current;
- }
-
- /* Reset the sentinel as the current chunk. */
- pool->current = pool->sentinel;
- pool->sentinel->size = sizeof(*chunk);
-}
-
/* Rewinds the cell list's cursor to the beginning. After rewinding
* we're good to cell_list_find() the cell any x coordinate. */
inline static void
@@ -450,21 +329,24 @@ cell_list_rewind(struct cell_list *cells)
cells->cursor = &cells->head;
}
-static void
-cell_list_init(struct cell_list *cells)
+static bool
+cell_list_init(struct cell_list *cells, int width)
{
- pool_init(cells->cell_pool.base);
cells->tail.next = NULL;
cells->tail.x = INT_MAX;
cells->head.x = INT_MIN;
cells->head.next = &cells->tail;
cell_list_rewind(cells);
+ cells->count = 0;
+ cells->size = width+1;
+ cells->cells = malloc(cells->size * sizeof(struct cell));
+ return cells->cells != NULL;
}
static void
cell_list_fini(struct cell_list *cells)
{
- pool_fini(cells->cell_pool.base);
+ free(cells->cells);
}
inline static void
@@ -472,7 +354,7 @@ cell_list_reset(struct cell_list *cells)
{
cell_list_rewind(cells);
cells->head.next = &cells->tail;
- pool_reset(cells->cell_pool.base);
+ cells->count = 0;
}
inline static struct cell *
@@ -482,10 +364,11 @@ cell_list_alloc(struct cell_list *cells,
{
struct cell *cell;
- cell = pool_alloc(cells->cell_pool.base);
-
+ assert(cells->count < cells->size);
+ cell = cells->cells + cells->count++;
cell->next = tail->next;
tail->next = cell;
+
cell->x = x;
cell->uncovered_area = 0;
cell->covered_height = 0;
@@ -594,7 +477,7 @@ polygon_fini(struct polygon *polygon)
free(polygon->edges);
}
-static int
+static bool
polygon_init(struct polygon *polygon,
int num_edges,
grid_scaled_y_t ymin,
@@ -627,11 +510,11 @@ polygon_init(struct polygon *polygon,
polygon->ymin = ymin;
polygon->ymax = ymax;
- return 0;
+ return true;
bail_no_mem:
polygon_fini(polygon);
- return -1;
+ return false;
}
static void
@@ -1079,7 +962,7 @@ tor_fini(struct tor *converter)
cell_list_fini(converter->coverages);
}
-static int
+static bool
tor_init(struct tor *converter, const BoxRec *box, int num_edges)
{
__DBG(("%s: (%d, %d),(%d, %d) x (%d, %d), num_edges=%d\n",
@@ -1093,12 +976,19 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges)
converter->xmax = box->x2;
converter->ymax = box->y2;
- cell_list_init(converter->coverages);
+ if (!cell_list_init(converter->coverages, box->x2 - box->x1))
+ return false;
+
active_list_reset(converter->active);
- return polygon_init(converter->polygon,
+ if (!polygon_init(converter->polygon,
num_edges,
box->y1 * FAST_SAMPLES_Y,
- box->y2 * FAST_SAMPLES_Y);
+ box->y2 * FAST_SAMPLES_Y)) {
+ cell_list_fini(converter->coverages);
+ return false;
+ }
+
+ return true;
}
static void
@@ -4591,7 +4481,7 @@ span_thread(void *arg)
const xTrapezoid *t;
int n, y1, y2;
- if (tor_init(&tor, &thread->extents, 2*thread->ntrap))
+ if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
return;
boxes.op = thread->op;
@@ -4753,7 +4643,7 @@ trapezoid_span_converter(struct sna *sna,
if (num_threads == 1) {
struct tor tor;
- if (tor_init(&tor, &extents, 2*ntrap))
+ if (!tor_init(&tor, &extents, 2*ntrap))
goto skip;
for (n = 0; n < ntrap; n++) {
@@ -4774,7 +4664,6 @@ trapezoid_span_converter(struct sna *sna,
choose_span(&tmp, dst, maskFormat, &clip),
!was_clear && maskFormat && !operator_is_bounded(op));
-skip:
tor_fini(&tor);
} else {
struct span_thread threads[num_threads];
@@ -4815,6 +4704,7 @@ skip:
sna_threads_wait();
}
+skip:
tmp.done(sna, &tmp);
REGION_UNINIT(NULL, &clip);
@@ -4938,7 +4828,7 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
DBG(("%s: created buffer %p, stride %d\n",
__FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
- if (tor_init(&tor, &extents, 2*ntrap)) {
+ if (!tor_init(&tor, &extents, 2*ntrap)) {
sna_pixmap_destroy(scratch);
return true;
}
@@ -5690,7 +5580,7 @@ static void inplace_x8r8g8b8_thread(void *arg)
RegionPtr clip;
int y1, y2, n;
- if (tor_init(&tor, &thread->extents, 2*thread->ntrap))
+ if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
return;
y1 = thread->extents.y1 - thread->dst->pDrawable->y;
@@ -5884,7 +5774,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
struct tor tor;
span_func_t span;
- if (tor_init(&tor, ®ion.extents, 2*ntrap))
+ if (!tor_init(&tor, ®ion.extents, 2*ntrap))
return true;
for (n = 0; n < ntrap; n++) {
Reply to: