[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'upstream-experimental'



 COPYING                |  169 +++++
 NEWS                   |  162 +++++
 configure.ac           |    4 
 src/Makefile.am        |    6 
 src/common.h           |    3 
 src/drmmode_display.c  |   87 +-
 src/i810_dga.c         |    4 
 src/i810_dri.c         |   91 +-
 src/i810_driver.c      |   12 
 src/i810_hwmc.c        |   10 
 src/i810_video.c       |   26 
 src/i830.h             |  135 +++-
 src/i830_3d.c          |    4 
 src/i830_accel.c       |    2 
 src/i830_batchbuffer.c |  172 +++--
 src/i830_batchbuffer.h |   93 +-
 src/i830_dri.c         |  265 ++++----
 src/i830_driver.c      |  176 +----
 src/i830_hwmc.c        |  239 +++++--
 src/i830_hwmc.h        |   54 -
 src/i830_memory.c      |  172 ++---
 src/i830_render.c      |  294 +++++----
 src/i830_uxa.c         |  680 +++++++++++++--------
 src/i830_video.c       |   54 -
 src/i830_video.h       |    2 
 src/i915_3d.c          |   11 
 src/i915_3d.h          |  860 ++++++++++++++++-----------
 src/i915_hwmc.c        |  301 ---------
 src/i915_hwmc.h        |   47 -
 src/i915_reg.h         |   27 
 src/i915_render.c      | 1152 +++++++++++++++++++++++-------------
 src/i915_video.c       |  160 ++---
 src/i965_hwmc.c        |  280 --------
 src/i965_hwmc.h        |   16 
 src/i965_render.c      |  191 +++---
 src/i965_video.c       |   26 
 src/xvmc/Makefile.am   |    2 
 src/xvmc/i915_xvmc.c   |  239 +------
 src/xvmc/i915_xvmc.h   |   34 -
 src/xvmc/i965_xvmc.c   |   90 --
 src/xvmc/i965_xvmc.h   |    1 
 src/xvmc/intel_xvmc.c  |  233 +------
 src/xvmc/intel_xvmc.h  |   49 -
 src/xvmc/xvmc_vld.c    |  139 +---
 src/xvmc/xvmc_vld.h    |    1 
 uxa/uxa-accel.c        |  543 ++++++++++++-----
 uxa/uxa-glyphs.c       | 1526 +++++++++++++++++++++++++++++--------------------
 uxa/uxa-priv.h         |  118 +--
 uxa/uxa-render.c       | 1359 ++++++++++++++++++++++++++++++++-----------
 uxa/uxa.c              |   82 +-
 uxa/uxa.h              |   46 +
 51 files changed, 5833 insertions(+), 4616 deletions(-)

New commits:
commit ca16e3133658504b68a6b5c83bb8712899016544
Author: Carl Worth <cworth@cworth.org>
Date:   Thu Jun 24 14:07:15 2010 -0700

    Update version to 2.12.0
    
    For the imminent 2.12.0 release.

diff --git a/configure.ac b/configure.ac
index 624d940..a5d0617 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,7 +22,7 @@
 
 AC_PREREQ(2.57)
 AC_INIT([xf86-video-intel],
-        2.11.901,
+        2.12.0,
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         xf86-video-intel)
 

commit 8caf1564a75d65636d9b7a4da03219bb302bb37b
Author: Carl Worth <cworth@cworth.org>
Date:   Thu Jun 24 14:06:43 2010 -0700

    NEWS: Add notes for the 2.12.0 release.
    
    Let it ship!

diff --git a/NEWS b/NEWS
index ffccffd..aee6a21 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,97 @@
+Snapshot 2.12.0 (2010-06-24)
+=============================
+We are pleased to announce this major release of the xf86-video-intel
+2D driver. It contains many correctness and performance improvements
+compared to the 2.11 releases. We encourage everyone using 2.11 to
+upgrade to 2.12.
+
+[This release contains several bug fixes since the earlier 2.11.901
+release candidate---see below for details.]
+
+New requirements compared to 2.11
+---------------------------------
+ * Libdrm >= 2.4.21
+
+Major performance improvements
+------------------------------
+There have been many performance improvements targeting the
+915G/945G/Pineview graphics chipsets (thank to Chris Wilson!). Areas
+improved include:
+
+ * Major improvements to glyph rendering
+
+ * Avoid software fallback when using extended desktops, (for example,
+   video playback)
+
+ * Better performance when using large windows, (for example with
+   firefox scrolling)
+
+ * Avoiding migration ping-pong with gigantic drawing
+
+The following speedups (measured with cairo-perf-trace/cairo-traces)
+capture some of the improvements. These were measured on a Pineview
+system with X server 1.8.1. This compares how this 2.12 release
+improves performance compared to 2.11.0:
+
+	Trace			Speedup
+	-----			-------
+	ocitysmap		5.41x speedup
+	firefox-talos-gfx       2.41x speedup
+	gnome-terminal-vim      2.15x speedup
+	poppler                 1.86x speedup
+	firefox-planet-gnome	1.43x speedup
+	midori-zoomed		1.39x speedup
+	swfdec-giant-steps      1.36x speedup
+	xfce4-terminal-a1	1.17x speedup
+	evolution		1.09x speedup
+	swfdec-youtube		1.06x speedup
+
+Other improvements
+------------------
+Daniel Vetter improved Xvmc and overlay, adding GEM support.
+
+Bug fixes
+---------
+There have been many conformance improvements targeting the
+915G/945G/Pineview graphics chipsets. The rendercheck test suite
+passes once again and there has been a significant reduction in the
+number of errors detected by the cairo test suite (notably center
+sampling and EXTEND_NONE behaviors are fixed).
+
+Other notable bug fixes include:
+
+ * Fix for bug 28446 Garbled fonts with Mathematica
+   https://bugs.freedesktop.org/show_bug.cgi?id=28446
+
+ * Fixes for page-flipping and other DRI2-handling bugs, (Thanks Jesse
+   Barnes, and Kristian Høgsberg)
+
+ * Avoid corruption when using extended desktops, (for example, video
+   playback)
+
+(The bug fixes below are new since the 2.11.901 release candidate)
+
+ * Fix corrupted output when screen rotated
+   https://bugs.freedesktop.org/show_bug.cgi?id=28461
+
+ * Fix corrupted rendering in KDE due to missing flush.
+
+ * Fixes i830_uxa_put_image for the following issues:
+
+      Bug 28569 - [i965] IGN's flash-based video player crashes X
+      https://bugs.freedesktop.org/show_bug.cgi?id=28569
+    
+      Bug 28573 - [i965] Fullscreen flash and windowed SDL games fail to
+                  update the screen
+      https://bugs.freedesktop.org/show_bug.cgi?id=28573
+
+ * Fix visual corruption of scrollbar in Chromium
+
+ * Fix tiling limits to resolve this issue:
+
+      Bug 28497 - Graphics corruption after opening a specific website
+      https://bugs.freedesktop.org/show_bug.cgi?id=28497
+
 Snapshot 2.11.901 (2010-06-14)
 ==============================
 This is the first release candidate in preparation for the upcoming

commit b58a6a39c1568800938eb0e3ebc7664683b61200
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jun 23 23:56:44 2010 +0100

    uxa: Fallback to pixman if source is out-of-bounds
    
    If the source is outside the drawable, then CopyArea will fail to
    initialise the source correctly. The simplest fix in this case is to
    fallback to pixman to generate the source texture.
    
    Fixes:
    
      Bug 28497 - Graphics corruption after opening a specific website
      https://bugs.freedesktop.org/show_bug.cgi?id=28497
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index a4cebe3..9709a14 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -761,6 +761,21 @@ uxa_render_picture(ScreenPtr screen,
 	return picture;
 }
 
+static int
+drawable_contains (DrawablePtr drawable, int x, int y, int w, int h)
+{
+	if (x < 0 || y < 0)
+		return FALSE;
+
+	if (x + w > drawable->width)
+		return FALSE;
+
+	if (y + h > drawable->height)
+		return FALSE;
+
+	return TRUE;
+}
+
 PicturePtr
 uxa_acquire_drawable(ScreenPtr pScreen,
 		     PicturePtr pSrc,
@@ -770,14 +785,15 @@ uxa_acquire_drawable(ScreenPtr pScreen,
 {
 	PixmapPtr pPixmap;
 	PicturePtr pDst;
-	GCPtr pGC;
 	int depth, error;
 	int tx, ty;
+	GCPtr pGC;
 
 	depth = pSrc->pDrawable->depth;
-	if (depth == 1 ||
-	    pSrc->filter == PictFilterConvolution || /* XXX */
-	    !transform_is_integer_translation(pSrc->transform, &tx, &ty)) {
+	if (!transform_is_integer_translation(pSrc->transform, &tx, &ty) ||
+	    !drawable_contains(pSrc->pDrawable, x + tx, y + ty, width, height) ||
+	    depth == 1 ||
+	    pSrc->filter == PictFilterConvolution) {
 		/* XXX extract the sample extents and do the transformation on the GPU */
 		pDst = uxa_render_picture(pScreen, pSrc,
 					  pSrc->format | (BitsPerPixel(pSrc->pDrawable->depth) << 24),
@@ -785,7 +801,7 @@ uxa_acquire_drawable(ScreenPtr pScreen,
 
 		goto done;
 	} else {
-		if (width == pSrc->pDrawable->width && height == pSrc->pDrawable->depth) {
+		if (width == pSrc->pDrawable->width && height == pSrc->pDrawable->height) {
 			*out_x = x + pSrc->pDrawable->x;
 			*out_y = y + pSrc->pDrawable->y;
 			return pSrc;
@@ -799,7 +815,7 @@ uxa_acquire_drawable(ScreenPtr pScreen,
 		return 0;
 
 	/* Skip the copy if the result remains in memory and not a bo */
-	if (!uxa_drawable_is_offscreen(&pPixmap->drawable)) {
+	if (!uxa_pixmap_is_offscreen(pPixmap)) {
 		pScreen->DestroyPixmap(pPixmap);
 		return 0;
 	}
@@ -816,15 +832,15 @@ uxa_acquire_drawable(ScreenPtr pScreen,
 	FreeScratchGC(pGC);
 
 	pDst = CreatePicture(0, &pPixmap->drawable,
-				 PictureMatchFormat(pScreen, depth, pSrc->format),
-				 0, 0, serverClient, &error);
+			     PictureMatchFormat(pScreen, depth, pSrc->format),
+			     0, 0, serverClient, &error);
 	pScreen->DestroyPixmap(pPixmap);
 	ValidatePicture(pDst);
 
 done:
 	pDst->componentAlpha = pSrc->componentAlpha;
-	*out_x = x;
-	*out_y = y;
+	*out_x = 0;
+	*out_y = 0;
 	return pDst;
 }
 
@@ -844,8 +860,8 @@ uxa_acquire_picture(ScreenPtr screen,
 			*out_x = x + src->pDrawable->x;
 			*out_y = y + src->pDrawable->y;
 		} else {
-			*out_x = 0;
-			*out_y = 0;
+			*out_x = x;
+			*out_y = y;
 		}
 		return src;
 	}
@@ -1477,21 +1493,6 @@ compatible_formats (CARD8 op, PicturePtr dst, PicturePtr src)
 	return 0;
 }
 
-static int
-drawable_contains (DrawablePtr drawable, int x, int y, int w, int h)
-{
-	if (x < 0 || y < 0)
-		return FALSE;
-
-	if (x + w > drawable->width)
-		return FALSE;
-
-	if (y + h > drawable->height)
-		return FALSE;
-
-	return TRUE;
-}
-
 void
 uxa_composite(CARD8 op,
 	      PicturePtr pSrc,

commit 6d33e578de4e23336ac69cc3c5d0935a65d4dda1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jun 23 21:28:50 2010 +0100

    Limit maximum tiled stride to 8k and untiled to 32k.
    
    Tiling on gen 2/3 hardware is only supported for pitches up to 8192
    bytes, so above this limit the surface will be untiled and we will no
    longer have to comply with the power-of-two pitch alignment. So
    disabling tiling for these too wide surface should ~halve the memory
    requirement for the full surface.
    
    Also the absolute limit for the 2D blitter is 32,768 bytes. The
    documentation says "up to 32,768 bytes" and my PineView box was
    malfunction with a surface stride of 32,768 so set the limit to be
    32,767.
    
    References:
    
      Bug 28497 - Graphics corruption after opening a specific website
      https://bugs.freedesktop.org/show_bug.cgi?id=28497
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 1ec7ab8..61e857f 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -152,14 +152,13 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
 {
 	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
 	intel_screen_private *intel = intel_get_screen_private(scrn);
-	int pitch, pitch_align;
-	int size;
+	int pitch, size;
 
 	if (*tiling != I915_TILING_NONE) {
 		/* First check whether tiling is necessary. */
-		pitch_align = intel->accel_pixmap_pitch_alignment;
-		size = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8,
-				pitch_align) * ALIGN (h, 2);
+		pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8;
+		pitch = ROUND_TO(pitch, intel->accel_pixmap_pitch_alignment);
+		size = pitch * ALIGN (h, 2);
 		if (!IS_I965G(intel)) {
 			/* Older hardware requires fences to be pot size
 			 * aligned with a minimum of 1 MiB, so causes
@@ -167,6 +166,12 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
 			 */
 			if (size < 1024*1024/2)
 				*tiling = I915_TILING_NONE;
+
+			/* Gen 2/3 has a maximum stride for tiling of
+			 * 8192 bytes.
+			 */
+			if (pitch > KB(8))
+				*tiling = I915_TILING_NONE;
 		} else if (size <= 4096) {
 			/* Disable tiling beneath a page size, we will not see
 			 * any benefit from reducing TLB misses and instead
@@ -179,29 +184,19 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
 	pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8;
 	if (pitch <= 256)
 		*tiling = I915_TILING_NONE;
-  repeat:
-	if (*tiling == I915_TILING_NONE) {
-		pitch_align = intel->accel_pixmap_pitch_alignment;
-	} else {
-		pitch_align = 512;
-	}
-
-	*stride = ROUND_TO(pitch, pitch_align);
 
-	if (*tiling == I915_TILING_NONE) {
-		/* Round the height up so that the GPU's access to a 2x2 aligned
-		 * subspan doesn't address an invalid page offset beyond the
-		 * end of the GTT.
-		 */
-		size = *stride * ALIGN(h, 2);
-	} else {
+	if (*tiling != I915_TILING_NONE) {
 		int aligned_h;
+
 		if (*tiling == I915_TILING_X)
 			aligned_h = ALIGN(h, 8);
 		else
 			aligned_h = ALIGN(h, 32);
 
-		*stride = i830_get_fence_pitch(intel, *stride, *tiling);
+		*stride = i830_get_fence_pitch(intel,
+					       ROUND_TO(pitch, 512),
+					       *tiling);
+
 		/* Round the object up to the size of the fence it will live in
 		 * if necessary.  We could potentially make the kernel allocate
 		 * a larger aperture space and just bind the subset of pages in,
@@ -209,12 +204,18 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
 		 * with drm_intel_bufmgr_check_aperture().
 		 */
 		size = i830_get_fence_size(intel, *stride * aligned_h);
-		assert(size >= *stride * aligned_h);
+
+		if (size > intel->max_tiling_size)
+			*tiling = I915_TILING_NONE;
 	}
 
-	if (*tiling != I915_TILING_NONE && size > intel->max_tiling_size) {
-		*tiling = I915_TILING_NONE;
-		goto repeat;
+	if (*tiling == I915_TILING_NONE) {
+		/* Round the height up so that the GPU's access to a 2x2 aligned
+		 * subspan doesn't address an invalid page offset beyond the
+		 * end of the GTT.
+		 */
+		*stride = ROUND_TO(pitch, intel->accel_pixmap_pitch_alignment);
+		size = *stride * ALIGN(h, 2);
 	}
 
 	return size;
@@ -987,7 +988,7 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 		 * frequently, and also will tend to fail to successfully map when doing
 		 * SW fallbacks because we overcommit address space for BO access.
 		 */
-		if (size > intel->max_bo_size) {
+		if (size > intel->max_bo_size || stride >= KB(32)) {
 			fbDestroyPixmap(pixmap);
 			return fbCreatePixmap(screen, w, h, depth, usage);
 		}

commit 5bf470bd38b1f6a7a540585186a54c9dbbca98f9
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jun 21 22:28:58 2010 +0100

    i965: Compile fix.
    
    Oops, I spent more time discussing these flushing bugs than I spent
    paying attention to what I was actually doing.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i830_driver.c b/src/i830_driver.c
index 91e5ac9..b8e0c0b 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -870,7 +870,7 @@ I830BlockHandler(int i, pointer blockData, pointer pTimeout, pointer pReadmask)
 		 */
 		intel_batch_submit(scrn,
 				   intel->need_mi_flush ||
-				   !list_is_empty(intel->flush_pixmaps));
+				   !list_is_empty(&intel->flush_pixmaps));
 		drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE);
 	}
 
diff --git a/src/i965_render.c b/src/i965_render.c
index e5b0916..9ba6105 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1162,7 +1162,7 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
 
 	/* Mark the destination dirty within this batch */
 	intel_batch_mark_pixmap_domains(intel,
-					i830_uxa_get_pixmap_intel(dest_picture),
+					i830_get_pixmap_intel(dest),
 					I915_GEM_DOMAIN_RENDER,
 					I915_GEM_DOMAIN_RENDER);
 

commit 0203cf91b54ee8a7cea8560e559288ee9b6e8554
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jun 21 22:25:08 2010 +0100

    Do not clear need_mi_flush within the batch.
    
    This is a situation that should not be possible, need_mi_flush being
    true but the list of pending flush pixmaps being clear. However, an
    earlier bug in doing just that revealed this minor bug. So for
    correctness, be careful not to clear need_mi_flush without emitting a
    MI_FLUSH.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c
index b2ee639..2b7227b 100644
--- a/src/i830_batchbuffer.c
+++ b/src/i830_batchbuffer.c
@@ -223,7 +223,7 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush)
 		list_del(&entry->batch);
 	}
 
-	intel->need_mi_flush = !list_is_empty(&intel->flush_pixmaps);
+	intel->need_mi_flush |= !list_is_empty(&intel->flush_pixmaps);
 	while (!list_is_empty(&intel->flush_pixmaps))
 		list_del(intel->flush_pixmaps.next);
 

commit 5107b6fa26ecfdbdd60b869a86765c9c484db3a2
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jun 21 22:21:58 2010 +0100

    i965: Mark the render target as dirty within composite_setup()
    
    The key difference between i965 and earlier, is that the surfaces passed
    to the samplers through an indirect table and so the batch and render
    target was not being marked dirty by the relocation (since the
    relocation only happens within prepare_composite() which may have been
    in another batch.) Simply call intel_pixmap_mark_dirty() when binding
    the sampler table into the batch to ensure that the dirty is tracked
    appropriately.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index 7328b6c..e5b0916 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1160,6 +1160,12 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
 	IntelEmitInvarientState(scrn);
 	intel->last_3d = LAST_3D_RENDER;
 
+	/* Mark the destination dirty within this batch */
+	intel_batch_mark_pixmap_domains(intel,
+					i830_uxa_get_pixmap_intel(dest_picture),
+					I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER);
+
 	urb_vs_start = 0;
 	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
 	urb_gs_start = urb_vs_start + urb_vs_size;
@@ -1544,6 +1550,12 @@ i965_prepare_composite(int op, PicturePtr source_picture,
 		composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE;
 	}
 
+	/* Flush any pending writes prior to relocating the textures. */
+	if(i830_uxa_pixmap_is_dirty(source) ||
+	   (mask && i830_uxa_pixmap_is_dirty(mask)))
+		intel_batch_emit_flush(scrn);
+
+
 	/* Set up the surface states. */
 	surface_state_bo = dri_bo_alloc(intel->bufmgr, "surface_state",
 					3 * sizeof(brw_surface_state_padded),
@@ -1676,10 +1688,6 @@ i965_prepare_composite(int op, PicturePtr source_picture,
 		}
 	}
 
-	if(i830_uxa_pixmap_is_dirty(source) ||
-	   (mask && i830_uxa_pixmap_is_dirty(mask)))
-		intel_batch_emit_flush(scrn);
-
 	intel->needs_render_state_emit = TRUE;
 
 	return TRUE;

commit bebd64d82121d2b61893e1d0adebda03f4faea61
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jun 21 22:05:19 2010 +0100

    Also submit any pending flush for *this* batch in the BlockHander.
    
    We still need to submit an additional flush if we have further writes
    since the last flush.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i830_driver.c b/src/i830_driver.c
index 5d5a3c5..91e5ac9 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -868,7 +868,9 @@ I830BlockHandler(int i, pointer blockData, pointer pTimeout, pointer pReadmask)
 		 * rendering results may not hit the framebuffer until significantly
 		 * later.
 		 */
-		intel_batch_submit(scrn, intel->need_mi_flush);
+		intel_batch_submit(scrn,
+				   intel->need_mi_flush ||
+				   !list_is_empty(intel->flush_pixmaps));
 		drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE);
 	}
 

commit c4d200517755e6b0f48d6c6a6ba8dd0ddc883d8b
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jun 21 21:57:21 2010 +0100

    Only append the pixmap to the flushing list if we are writing to it.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h
index ba03f6a..4315bba 100644
--- a/src/i830_batchbuffer.h
+++ b/src/i830_batchbuffer.h
@@ -126,7 +126,7 @@ intel_batch_mark_pixmap_domains(intel_screen_private *intel,
 
 	if (list_is_empty(&priv->batch))
 		list_add(&priv->batch, &intel->batch_pixmaps);
-	if (list_is_empty(&priv->flush))
+	if (write_domain && list_is_empty(&priv->flush))
 		list_add(&priv->flush, &intel->flush_pixmaps);
 
 	priv->batch_write |= write_domain != 0;

commit c942585098ac84ae461821cbb8f52dedce7a0da1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jun 21 21:45:04 2010 +0100

    Emit the flush after a potential draw from the BlockHandler.
    
    As the batch submit may not trigger further drawing through flushing the
    vertices, pass the requirement to emit the flush down to the submission
    routine so that the flush can be appended after the final commands.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i830_accel.c b/src/i830_accel.c
index da7e773..9e8a5ed 100644
--- a/src/i830_accel.c
+++ b/src/i830_accel.c
@@ -58,7 +58,7 @@ void i830_debug_flush(ScrnInfoPtr scrn)
 		intel_batch_emit_flush(scrn);
 
 	if (intel->debug_flush & DEBUG_FLUSH_BATCHES)
-		intel_batch_submit(scrn);
+		intel_batch_submit(scrn, FALSE);
 }
 
 /* The following function sets up the supported acceleration. Call it
diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c
index 6da20d7..b2ee639 100644
--- a/src/i830_batchbuffer.c
+++ b/src/i830_batchbuffer.c
@@ -157,20 +157,23 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn)
 	intel_batch_do_flush(scrn);
 }
 
-void intel_batch_submit(ScrnInfoPtr scrn)
+void intel_batch_submit(ScrnInfoPtr scrn, int flush)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 	int ret;
 
 	assert (!intel->in_batch_atomic);
 
-	if (intel->batch_used == 0)
-		return;
-
 	if (intel->vertex_flush)
 		intel->vertex_flush(intel);
 	intel_end_vertex(intel);
 
+	if (flush)
+		intel_batch_emit_flush(scrn);
+
+	if (intel->batch_used == 0)
+		return;
+
 	/* Mark the end of the batchbuffer. */
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 	/* Emit a padding dword if we aren't going to be quad-word aligned. */
@@ -220,10 +223,6 @@ void intel_batch_submit(ScrnInfoPtr scrn)
 		list_del(&entry->batch);
 	}
 
-	/* Mark that we need to flush whatever potential rendering we've done in the
-	 * blockhandler.  We could set this less often, but it's probably not worth
-	 * the work.
-	 */
 	intel->need_mi_flush = !list_is_empty(&intel->flush_pixmaps);
 	while (!list_is_empty(&intel->flush_pixmaps))
 		list_del(intel->flush_pixmaps.next);
@@ -249,7 +248,6 @@ void intel_batch_submit(ScrnInfoPtr scrn)
 
 	intel_next_batch(scrn);
 
-
 	if (intel->debug_flush & DEBUG_FLUSH_WAIT)
 		intel_batch_wait_last(scrn);
 
diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h
index 5375d2c..ba03f6a 100644
--- a/src/i830_batchbuffer.h
+++ b/src/i830_batchbuffer.h
@@ -37,7 +37,7 @@ void intel_batch_init(ScrnInfoPtr scrn);
 void intel_batch_teardown(ScrnInfoPtr scrn);
 void intel_batch_emit_flush(ScrnInfoPtr scrn);
 void intel_batch_do_flush(ScrnInfoPtr scrn);
-void intel_batch_submit(ScrnInfoPtr scrn);
+void intel_batch_submit(ScrnInfoPtr scrn, int flush);
 void intel_batch_wait_last(ScrnInfoPtr scrn);
 
 static inline int intel_batch_space(intel_screen_private *intel)
@@ -55,7 +55,7 @@ intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, GLuint
 {
 	assert(sz < intel->batch_bo->size - 8);
 	if (intel_batch_space(intel) < sz)
-		intel_batch_submit(scrn);
+		intel_batch_submit(scrn, FALSE);
 }
 
 static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz)
@@ -200,7 +200,7 @@ do {									\
 	if ((intel->batch_emitting > 8) &&				\
 	    (I810_DEBUG & DEBUG_ALWAYS_SYNC)) {				\
 		/* Note: not actually syncing, just flushing each batch. */ \
-		intel_batch_submit(scrn);			\
+		intel_batch_submit(scrn, FALSE);			\
 	}								\
 	intel->batch_emitting = 0;					\
 } while (0)
diff --git a/src/i830_dri.c b/src/i830_dri.c
index 87865fe..34e2336 100644
--- a/src/i830_dri.c
+++ b/src/i830_dri.c
@@ -387,9 +387,9 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion,
 	 * later.
 	 *
 	 * We can't rely on getting into the block handler before the DRI
-	 * client gets to run again so flush now. */
-	intel_batch_emit_flush(scrn);
-	intel_batch_submit(scrn);
+	 * client gets to run again so flush now.
+	 */
+	intel_batch_submit(scrn, TRUE);
 	drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE);
 }
 
diff --git a/src/i830_driver.c b/src/i830_driver.c
index eed755c..5d5a3c5 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -867,16 +867,8 @@ I830BlockHandler(int i, pointer blockData, pointer pTimeout, pointer pReadmask)
 		/* Emit a flush of the rendering cache, or on the 965 and beyond
 		 * rendering results may not hit the framebuffer until significantly
 		 * later.
-		 *
-		 * XXX Under KMS this is only required because tfp does not have
-		 * the appropriate synchronisation points, so that outstanding updates
-		 * to the pixmap are flushed prior to use as a texture. The framebuffer
-		 * should be handled by the kernel domain management...
 		 */
-		if (intel->need_mi_flush || !list_is_empty(&intel->flush_pixmaps))
-			intel_batch_emit_flush(scrn);
-
-		intel_batch_submit(scrn);
+		intel_batch_submit(scrn, intel->need_mi_flush);
 		drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE);
 	}
 
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 7946f91..1ec7ab8 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -118,7 +118,7 @@ i830_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table,
 
 	bo_table[0] = intel->batch_bo;
 	if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != 0) {
-		intel_batch_submit(scrn);
+		intel_batch_submit(scrn, FALSE);
 		bo_table[0] = intel->batch_bo;
 		if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) !=
 		    0) {
@@ -677,7 +677,7 @@ static Bool i830_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access)
 
 	if (!list_is_empty(&priv->batch) &&
 	    (access == UXA_ACCESS_RW || priv->batch_write))
-		intel_batch_submit(scrn);
+		intel_batch_submit(scrn, FALSE);
 
 	if (bo->size > intel->max_gtt_map_size) {
 		ret = dri_bo_map(bo, access == UXA_ACCESS_RW);
@@ -904,7 +904,7 @@ static Bool i830_uxa_get_image(PixmapPtr pixmap,
 
 		FreeScratchGC(gc);
 
-		intel_batch_submit(xf86Screens[screen->myNum]);
+		intel_batch_submit(xf86Screens[screen->myNum], FALSE);
 
 		x = y = 0;
 		pixmap = scratch;
diff --git a/src/i965_render.c b/src/i965_render.c
index f5904cc..7328b6c 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1667,7 +1667,7 @@ i965_prepare_composite(int op, PicturePtr source_picture,
 	}
 
 	if (!i965_composite_check_aperture(scrn)) {
-		intel_batch_submit(scrn);
+		intel_batch_submit(scrn, FALSE);
 		if (!i965_composite_check_aperture(scrn)) {
 			intel_debug_fallback(scrn,
 					     "Couldn't fit render operation "
@@ -1844,7 +1844,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb);
 
 	if (!i965_composite_check_aperture(scrn))
-		intel_batch_submit(scrn);
+		intel_batch_submit(scrn, FALSE);
 
 	intel_batch_start_atomic(scrn, 200);
 	if (intel->needs_render_state_emit)
diff --git a/src/i965_video.c b/src/i965_video.c
index 855f0b5..a5136e0 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1196,7 +1196,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
 		if (drm_intel_bufmgr_check_aperture_space(bo_table,
 							  ARRAY_SIZE(bo_table))
 		    < 0) {
-			intel_batch_submit(scrn);
+			intel_batch_submit(scrn, FALSE);
 		}
 
 		intel_batch_start_atomic(scrn, 100);

commit e8783869ad55d337601b6f6a51c02f6576c64f38
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jun 20 00:09:20 2010 +0100

    uxa: Apply the source offsets to the pixmap source, not target.
    
    A slight confusion in computing the correction image location resulted
    in the application of the source offsets to the pixel location in the
    target and not in the source as intended.
    
    Fixes the visual corruption of the scrollbar in Chromium, and hopefully
    the crash reported by Robert Hooker when starting gdm after plymouth.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/uxa/uxa-accel.c b/uxa/uxa-accel.c
index 00a43b1..ded66a1 100644
--- a/uxa/uxa-accel.c
+++ b/uxa/uxa-accel.c
@@ -562,13 +562,13 @@ uxa_copy_n_to_n(DrawablePtr pSrcDrawable,
 		bpp /= 8;
 		while (nbox--) {
 			if (!uxa_screen->info->get_image(pSrcPixmap,
-							 pbox->x1 + src_off_x,
-							 pbox->y1 + src_off_y,
+							 pbox->x1 + dx + src_off_x,
+							 pbox->y1 + dy + src_off_y,
 							 pbox->x2 - pbox->x1,
 							 pbox->y2 - pbox->y1,
 							 (char *) dst +
-							 (pbox->y1 + dy + dst_off_y) * stride +
-							 (pbox->x1 + dx + dst_off_x) * bpp,
+							 (pbox->y1 + dst_off_y) * stride +
+							 (pbox->x1 + dst_off_x) * bpp,
 							 stride))
 				goto fallback;
 

commit 4b7142baa0b3bf6f38843d06aadc579d8624cefc
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jun 19 12:30:23 2010 +0100

    uxa: Enable SHM pixmaps
    
    Now with streaming uploads and downloads for composite operations in
    place, shared memory pixmaps are no longer that dire performance wise.
    With careful use these can in fact be the most efficient means of
    transfer between a wholly software renderer in the client and a backing
    store. For instance, Chromium renders internally to an ARGB32 image
    buffer and uses a shared pixmap to composite dirty regions into the
    backing store. Thereby using the GPU to either perform the blit or the
    format conversion. Enabling shared pixmaps, reduces our CPU overhead
    whilst scrolling by a factor of 5 or so.
    
    And this is achieved simply by deleting obsolete code!
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/uxa/uxa-accel.c b/uxa/uxa-accel.c
index acbc69f..00a43b1 100644
--- a/uxa/uxa-accel.c
+++ b/uxa/uxa-accel.c
@@ -361,92 +361,12 @@ uxa_do_put_image(DrawablePtr pDrawable, GCPtr pGC, int depth, int x, int y,
 	return TRUE;
 }
 
-#ifdef MITSHM
-
-#include "xorgVersion.h"
-
-static Bool
-uxa_do_shm_put_image(DrawablePtr pDrawable, GCPtr pGC, int depth,
-		     unsigned int format, int w, int h, int sx, int sy, int sw,
-		     int sh, int dx, int dy, char *data)
-{
-	int src_stride = PixmapBytePad(w, depth);
-
-	if (uxa_do_put_image
-	    (pDrawable, pGC, depth, dx, dy, sw, sh, format,
-	     data + sy * src_stride + sx * BitsPerPixel(depth) / 8, src_stride))
-		return TRUE;
-
-	if (format == ZPixmap) {
-		PixmapPtr pPixmap;
-
-		pPixmap =
-		    GetScratchPixmapHeader(pDrawable->pScreen, w, h, depth,
-					   BitsPerPixel(depth), PixmapBytePad(w,
-									      depth),
-					   (pointer) data);
-		if (!pPixmap)
-			return FALSE;
-
-		if (!uxa_prepare_access(pDrawable, UXA_ACCESS_RW)) {
-			FreeScratchPixmapHeader(pPixmap);
-			return FALSE;
-		}
-
-		fbCopyArea((DrawablePtr) pPixmap, pDrawable, pGC, sx, sy, sw,
-			   sh, dx, dy);
-		uxa_finish_access(pDrawable);
-
-		FreeScratchPixmapHeader(pPixmap);
-
-		return TRUE;
-	}
-
-	return FALSE;
-}
-
-#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,5,99,0,0)
-
-/* The actual ShmPutImage isn't wrapped by the damage layer, so we need to
- * inform any interested parties of the damage incurred to the drawable.
- *
- * We also need to set the pending damage to ensure correct migration in all
- * cases.
- */
-void
-uxa_shm_put_image(DrawablePtr pDrawable, GCPtr pGC, int depth,
-		  unsigned int format, int w, int h, int sx, int sy, int sw,
-		  int sh, int dx, int dy, char *data)
-{
-	if (!uxa_do_shm_put_image
-	    (pDrawable, pGC, depth, format, w, h, sx, sy, sw, sh, dx, dy,
-	     data)) {
-		if (!uxa_prepare_access(pDrawable, UXA_ACCESS_RW))
-			return;
-		fbShmPutImage(pDrawable, pGC, depth, format, w, h, sx, sy, sw,
-			      sh, dx, dy, data);
-		uxa_finish_access(pDrawable);
-	}
-}
-#else
-#define uxa_shm_put_image NULL
-#endif
-
-ShmFuncs uxa_shm_funcs = { NULL, uxa_shm_put_image };
-
-#endif
-
 static void
 uxa_put_image(DrawablePtr pDrawable, GCPtr pGC, int depth, int x, int y,
 	      int w, int h, int leftPad, int format, char *bits)
 {
-#ifdef MITSHM
-	if (!uxa_do_shm_put_image
-	    (pDrawable, pGC, depth, format, w, h, 0, 0, w, h, x, y, bits))
-#else
 	if (!uxa_do_put_image(pDrawable, pGC, depth, x, y, w, h, format, bits,
 			      PixmapBytePad(w, pDrawable->depth)))
-#endif
 		uxa_check_put_image(pDrawable, pGC, depth, x, y, w, h, leftPad,
 				    format, bits);
 }
diff --git a/uxa/uxa-priv.h b/uxa/uxa-priv.h
index 626823f..b107928 100644
--- a/uxa/uxa-priv.h
+++ b/uxa/uxa-priv.h
@@ -41,14 +41,6 @@
 #include <X11/X.h>
 #define NEED_EVENTS
 #include <X11/Xproto.h>


Reply to: