[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'upstream-experimental'



 NEWS                          |   40 +
 configure.ac                  |   28 -
 man/intel.man                 |   13 
 src/Makefile.am               |    1 
 src/intel.h                   |   17 
 src/intel_display.c           |    2 
 src/intel_dri.c               |  146 ------
 src/intel_driver.c            |   34 -
 src/intel_options.c           |    1 
 src/intel_options.h           |    1 
 src/intel_shadow.c            |  200 --------
 src/intel_uxa.c               |   34 -
 src/intel_video.c             |    3 
 src/legacy/i810/Makefile.am   |    3 
 src/legacy/i810/i810.h        |    8 
 src/legacy/i810/i810_accel.c  |  343 +-------------
 src/legacy/i810/i810_dri.c    |    2 
 src/legacy/i810/i810_driver.c |   35 -
 src/legacy/i810/i810_xaa.c    |  320 +++++++++++++
 src/sna/fb/fbbitmap.c         |   11 
 src/sna/fb/fbblt.c            |   44 -
 src/sna/fb/fbclip.c           |   13 
 src/sna/fb/fbclip.h           |   25 -
 src/sna/fb/fbcopy.c           |   41 -
 src/sna/fb/fbfill.c           |   30 -
 src/sna/fb/fbtile.c           |   27 -
 src/sna/gen2_render.c         |   48 +-
 src/sna/gen3_render.c         |   47 +-
 src/sna/gen4_render.c         |  334 ++++++++++++++
 src/sna/gen5_render.c         |   50 +-
 src/sna/gen6_render.c         |  780 +++++++++++++++++----------------
 src/sna/gen6_render.h         |    4 
 src/sna/gen7_render.c         |  640 +++++++++++++--------------
 src/sna/kgem.c                |  911 ++++++++++++++++++++++++---------------
 src/sna/kgem.h                |   55 +-
 src/sna/sna.h                 |   16 
 src/sna/sna_accel.c           |  977 +++++++++++++++++++++++++++++++-----------
 src/sna/sna_blt.c             |  309 ++++++-------
 src/sna/sna_display.c         |   54 +-
 src/sna/sna_dri.c             |   62 +-
 src/sna/sna_driver.c          |   12 
 src/sna/sna_glyphs.c          |   78 ++-
 src/sna/sna_io.c              |   14 
 src/sna/sna_render.c          |  200 +++++---
 src/sna/sna_render.h          |   21 
 src/sna/sna_render_inline.h   |    3 
 src/sna/sna_trapezoids.c      |  366 ++++++++++-----
 src/sna/sna_video.c           |    2 
 48 files changed, 3666 insertions(+), 2739 deletions(-)

New commits:
commit 83f683b47063eab8cfb5037d02133dd977c3fc25
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jul 22 23:20:23 2012 +0100

    2.20.1 release
    
    A good brown paper bag bug release for SNA.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/NEWS b/NEWS
index 8e30d9e..a6819d4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,43 @@
+Release 2.20.1 (2012-07-22)
+===========================
+A week in, grab the brown paper bags, for it is time to reveal a couple
+of critical bugs that spoilt the 2.20.0 release.
+
+Firstly we have the restoration of DRI for i810. I am sure that the
+solitary user will be overjoyed in a couple of years when a new xserver
+is forced upon him. That enjoyment will be short-lived when as no actual
+acceleration remains, not even shadow, for the chipset.
+
+Perhaps a little more wildly felt, I hope!, will be that the SNA
+fallbacks were broken on 64-bit machines if they required clipping. One
+little misplaced cast of a pointer, and the screen is filled with
+corruption.
+
+Among the other tweaks this week:
+
+* A bug affecting gen4 handling of trapezoids was fixed, and CPU
+  overhead reduced.
+  https://bugs.freedesktop.org/show_bug.cgi?id=52158
+
+* A fix for a bug causing corruption of a DRI2 unredirected client
+  window that was resized whilst under a compositor.
+
+* Support for snoopable buffers on non-LLC architectures, coming to
+  a future kernel. The aim to accelerate transfers between the CPU
+  and the GPU, in particular to dramatically improve readback
+  performance, and to further minimise clflushes.
+
+* Improvement to the composite performance on GT2 SandyBridge and
+  IvyBridge devices, in particular the render copy is significantly
+  improved.
+
+* Improved handling for when acceleration is disabled, including
+  permitting DRI2 to remain supported even if the X server believes
+  the GPU wedged.
+
+* Shadow support was dropped from UXA as it was neither complete nor
+  correct, use SNA instead.
+
 Release 2.12.0 (2012-07-15)
 ===========================
 First the big news, a new acceleration method that aims to be faster and
diff --git a/configure.ac b/configure.ac
index 45157a8..3cdacdd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.20.0],
+        [2.20.1],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 9402bdcc13f7e96dfe527ff4a3da8d13a7870a02
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jul 22 22:14:52 2012 +0100

    sna/glyphs: Also discard the glyph mask for bitmaps with an opaque source
    
    Though I expect all such glyphs to be caught by the non-overlapping
    checks...
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index 46fbf8d..918b51c 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -1248,6 +1248,44 @@ out:
 	return format;
 }
 
+static bool can_discard_mask(uint8_t op, PicturePtr src, PictFormatPtr mask,
+			     int nlist, GlyphListPtr list, GlyphPtr *glyphs)
+{
+	PictFormatPtr g;
+	uint32_t color;
+
+	if (nlist == 1 && list->len == 1)
+		return true;
+
+	if (!op_is_bounded(op))
+		return false;
+
+	/* No glyphs overlap and we are not performing a mask conversion. */
+	g = glyphs_format(nlist, list, glyphs);
+	if (mask == g)
+		return true;
+
+	/* Otherwise if the glyphs are all bitmaps and we have an
+	 * opaque source we can also render directly to the dst.
+	 */
+	if (g == NULL) {
+		while (nlist--) {
+			if (list->format->depth != 1)
+				return false;
+
+			list++;
+		}
+	} else {
+		if (g->depth != 1)
+			return false;
+	}
+
+	if (!sna_picture_is_solid(src, &color))
+		return false;
+
+	return color >> 24 == 0xff;
+}
+
 #if HAS_PIXMAN_GLYPHS
 static void
 glyphs_fallback(CARD8 op,
@@ -1309,8 +1347,7 @@ glyphs_fallback(CARD8 op,
 	RegionTranslate(&region, -dst->pDrawable->x, -dst->pDrawable->y);
 
 	if (mask_format &&
-	    (op_is_bounded(op) || (nlist == 1 && list->len == 1)) &&
-	    mask_format == glyphs_format(nlist, list, glyphs))
+	    can_discard_mask(op, src, mask_format, nlist, list, glyphs))
 		mask_format = NULL;
 
 	cache = sna->render.glyph_cache;
@@ -1674,8 +1711,7 @@ sna_glyphs(CARD8 op,
 
 	/* Try to discard the mask for non-overlapping glyphs */
 	if (mask && dst->pCompositeClip->data == NULL &&
-	    (op_is_bounded(op) || (nlist == 1 && list->len == 1)) &&
-	    mask == glyphs_format(nlist, list, glyphs)) {
+	    can_discard_mask(op, src, mask, nlist, list, glyphs)) {
 		DBG(("%s: discarding mask\n", __FUNCTION__));
 		if (glyphs_to_dst(sna, op,
 				  src, dst,

commit b315e0ebb75d8391ebef7ebe53741a5e33c968bb
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jul 22 20:07:38 2012 +0100

    sna: Tweak the fallback hints for XYPixmap PutImage
    
    As the fallback uses a multiple-pass algorithm updating one plane at a
    time, we wish to prepare the fallback surface for reads.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index e881051..5f14f4e 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3698,6 +3698,7 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
 	struct sna_pixmap *priv = sna_pixmap(pixmap);
 	RegionRec region;
 	int16_t dx, dy;
+	unsigned hint;
 
 	DBG(("%s((%d, %d)x(%d, %d), depth=%d, format=%d)\n",
 	     __FUNCTION__, x, y, w, h, depth, format));
@@ -3726,39 +3727,47 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
 	if (priv == NULL) {
 		DBG(("%s: fallback -- unattached(%d, %d, %d, %d)\n",
 		     __FUNCTION__, x, y, w, h));
+hint_and_fallback:
+		hint = (format == XYPixmap ?
+			MOVE_READ | MOVE_WRITE :
+			drawable_gc_flags(drawable, gc, false));
 		goto fallback;
 	}
 
 	RegionTranslate(&region, dx, dy);
 
 	if (FORCE_FALLBACK)
-		goto fallback;
+		goto hint_and_fallback;
 
 	if (wedged(sna))
-		goto fallback;
+		goto hint_and_fallback;
 
 	if (!ACCEL_PUT_IMAGE)
-		goto fallback;
+		goto hint_and_fallback;
 
 	switch (format) {
 	case ZPixmap:
 		if (!PM_IS_SOLID(drawable, gc->planemask))
-			goto fallback;
+			goto hint_and_fallback;
 
 		if (sna_put_zpixmap_blt(drawable, gc, &region,
 					x, y, w, h,
 					bits, PixmapBytePad(w, depth)))
 			return;
+
+		hint = drawable_gc_flags(drawable, gc, false);
 		break;
 
 	case XYBitmap:
 		if (!PM_IS_SOLID(drawable, gc->planemask))
-			goto fallback;
+			goto hint_and_fallback;
 
 		if (sna_put_xybitmap_blt(drawable, gc, &region,
 					 x, y, w, h,
 					 bits))
 			return;
+
+		hint = drawable_gc_flags(drawable, gc, false);
 		break;
 
 	case XYPixmap:
@@ -3766,10 +3775,12 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
 					 x, y, w, h, left,
 					 bits))
 			return;
+
+		hint = MOVE_READ | MOVE_WRITE;
 		break;
 
 	default:
-		break;
+		return;
 	}
 
 fallback:
@@ -3778,8 +3789,7 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, drawable, &region))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region,
-					     drawable_gc_flags(drawable, gc, false)))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region, hint))
 		goto out_gc;
 
 	DBG(("%s: fbPutImage(%d, %d, %d, %d)\n",

commit 8acaf2693e176a92993a498683f121cfe0343fd4
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jul 22 12:28:34 2012 +0100

    sna: Promote tiled operations to the GPU if the tile is already on the GPU
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index f39052f..e881051 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3993,6 +3993,21 @@ out:
 		free(box);
 }
 
+static inline bool
+sna_pixmap_is_gpu(PixmapPtr pixmap)
+{
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+
+	if (priv == NULL || priv->clear)
+		return false;
+
+	if (DAMAGE_IS_ALL(priv->gpu_damage) ||
+	    (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo) && !priv->gpu_bo->proxy))
+		return true;
+
+	return priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo);
+}
+
 static int
 source_prefer_gpu(struct sna_pixmap *priv)
 {
@@ -4001,6 +4016,11 @@ source_prefer_gpu(struct sna_pixmap *priv)
 		return 0;
 	}
 
+	if (priv->clear) {
+		DBG(("%s: source is clear, don't force use of GPU\n", __FUNCTION__));
+		return 0;
+	}
+
 	if (priv->gpu_damage) {
 		DBG(("%s: source has gpu damage, force gpu\n", __FUNCTION__));
 		return PREFER_GPU | FORCE_GPU;
@@ -11014,6 +11034,12 @@ sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect)
 			priv->cpu = false;
 	}
 
+	/* If the source is already on the GPU, keep the operation on the GPU */
+	if (gc->fillStyle == FillTiled) {
+		if (!gc->tileIsPixel && sna_pixmap_is_gpu(gc->tile.pixmap))
+			hint |= PREFER_GPU | FORCE_GPU;
+	}
+
 	bo = sna_drawable_use_bo(draw, hint, &region.extents, &damage);
 	if (bo == NULL)
 		goto fallback;

commit 7d4a3e371beea65bf66e54ae13789d6d5ca91f8b
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jul 22 11:19:13 2012 +0100

    sna: Use an upload buffer for large stipples
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 0b0a304..f39052f 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3519,9 +3519,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 		} while (--bh);
 
 		b = sna->kgem.batch + sna->kgem.nbatch;
-		b[0] = XY_MONO_SRC_COPY;
-		if (drawable->bitsPerPixel == 32)
-			b[0] |= 3 << 20;
+		b[0] = XY_MONO_SRC_COPY | 3 << 20;
 		b[0] |= ((box->x1 - x) & 7) << 17;
 		b[1] = bo->pitch;
 		if (sna->kgem.gen >= 40 && bo->tiling) {
@@ -3649,9 +3647,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 			} while (--bh);
 
 			b = sna->kgem.batch + sna->kgem.nbatch;
-			b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT;
-			if (drawable->bitsPerPixel == 32)
-				b[0] |= 3 << 20;
+			b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20;
 			b[0] |= ((box->x1 - x) & 7) << 17;
 			b[1] = bo->pitch;
 			if (sna->kgem.gen >= 40 && bo->tiling) {
@@ -5852,9 +5848,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
 	get_drawable_deltas(drawable, dst_pixmap, &dx, &dy);
 	assert_pixmap_contains_boxes(dst_pixmap, box, n, dx, dy);
 
-	br00 = XY_MONO_SRC_COPY;
-	if (drawable->bitsPerPixel == 32)
-		br00 |= 3 << 20;
+	br00 = XY_MONO_SRC_COPY | 3 << 20;
 	br13 = arg->bo->pitch;
 	if (sna->kgem.gen >= 40 && arg->bo->tiling) {
 		br00 |= BLT_DST_TILED;
@@ -9667,9 +9661,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
 		unsigned px = (0 - gc->patOrg.x - dx) & 7;
 		unsigned py = (0 - gc->patOrg.y - dy) & 7;
 		DBG(("%s: pat offset (%d, %d)\n", __FUNCTION__ ,px, py));
-		br00 = XY_MONO_PAT | px << 12 | py << 8;
-		if (drawable->bitsPerPixel == 32)
-			br00 |= 3 << 20;
+		br00 = XY_MONO_PAT | px << 12 | py << 8 | 3 << 20;
 
 		br13 = bo->pitch;
 		if (sna->kgem.gen >= 40 && bo->tiling) {
@@ -10327,12 +10319,12 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 }
 
 static void
-sna_poly_fill_rect_stippled_n_box(struct sna *sna,
-				  struct kgem_bo *bo,
-				  uint32_t br00, uint32_t br13,
-				  GCPtr gc,
-				  const BoxRec *box,
-				  const DDXPointRec *origin)
+sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna,
+				       struct kgem_bo *bo,
+				       uint32_t br00, uint32_t br13,
+				       const GC *gc,
+				       const BoxRec *box,
+				       const DDXPointRec *origin)
 {
 	int x1, x2, y1, y2;
 	uint32_t *b;
@@ -10415,6 +10407,299 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna,
 	}
 }
 
+static void
+sna_poly_fill_rect_stippled_n_box(struct sna *sna,
+				  struct kgem_bo *bo,
+				  uint32_t br00, uint32_t br13,
+				  const GC *gc,
+				  const BoxRec *box,
+				  const DDXPointRec *origin)
+{
+	int x1, x2, y1, y2;
+	int w = gc->stipple->drawable.width;
+	int h = gc->stipple->drawable.height;
+	int stride = gc->stipple->devKind;
+	uint32_t *b;
+
+	if ((((box->y2-box->y1) | (box->x2-box->x1)) & ~31) == 0) {
+		br00 = XY_MONO_SRC_COPY_IMM |(br00 & (BLT_DST_TILED | 3 << 20));
+		sna_poly_fill_rect_stippled_n_box__imm(sna, bo,
+						       br00, br13, gc,
+						       box, origin);
+		return;
+	}
+
+	for (y1 = box->y1; y1 < box->y2; y1 = y2) {
+		int row, oy = (y1 - origin->y) % gc->stipple->drawable.height;
+		if (oy < 0)
+			oy += h;
+
+		y2 = box->y2;
+		if (y2 - y1 > h - oy)
+			y2 = y1 + h - oy;
+
+		row = oy * stride;
+		for (x1 = box->x1; x1 < box->x2; x1 = x2) {
+			int bx1, bx2, bw, bh, len, ox;
+
+			x2 = box->x2;
+			ox = (x1 - origin->x) % w;
+			if (ox < 0)
+				ox += w;
+			bx1 = ox & ~7;
+			bx2 = ox + (x2 - x1);
+			if (bx2 > w) {
+				bx2 = w;
+				x2 = x1 + bx2-ox;
+			}
+
+			DBG(("%s: box((%d, %d)x(%d, %d)) origin=(%d, %d), pat=(%d, %d), up=(%d, %d), stipple=%dx%d\n",
+			     __FUNCTION__,
+			     x1, y1, x2-x1, y2-y1,
+			     origin->x, origin->y,
+			     ox, oy, bx1, bx2, w, h));
+
+			bw = (bx2 - bx1 + 7)/8;
+			bw = ALIGN(bw, 2);
+			bh = y2 - y1;
+
+			len = bw*bh;
+			len = ALIGN(len, 8) / 4;
+			if (!kgem_check_batch(&sna->kgem, 7+len) ||
+			    !kgem_check_bo_fenced(&sna->kgem, bo) ||
+			    !kgem_check_reloc(&sna->kgem, 2)) {
+				_kgem_submit(&sna->kgem);
+				_kgem_set_mode(&sna->kgem, KGEM_BLT);
+			}
+
+			b = sna->kgem.batch + sna->kgem.nbatch;
+
+			if (len <= 128) {
+				uint8_t *dst, *src;
+
+				b[0] = XY_MONO_SRC_COPY_IMM;
+				b[0] |= (br00 & (BLT_DST_TILED | 3 << 20));
+				b[0] |= (ox & 7) << 17;
+				b[0] |= (5 + len);
+				b[1] = br13;
+				b[2] = y1 << 16 | x1;
+				b[3] = y2 << 16 | x2;
+				b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+						      bo,
+						      I915_GEM_DOMAIN_RENDER << 16 |
+						      I915_GEM_DOMAIN_RENDER |
+						      KGEM_RELOC_FENCED,
+						      0);
+				b[5] = gc->bgPixel;
+				b[6] = gc->fgPixel;
+
+				sna->kgem.nbatch += 7 + len;
+
+				dst = (uint8_t *)&b[7];
+				len = gc->stipple->devKind;
+				src = gc->stipple->devPrivate.ptr;
+				src += oy*len + ox/8;
+				len -= bw;
+				do {
+					int i = bw;
+					do {
+						*dst++ = byte_reverse(*src++);
+						*dst++ = byte_reverse(*src++);
+						i -= 2;
+					} while (i);
+					src += len;
+				} while (--bh);
+			} else {
+				struct kgem_bo *upload;
+				uint8_t *dst, *src;
+				void *ptr;
+
+				upload = kgem_create_buffer(&sna->kgem, bw*bh,
+							    KGEM_BUFFER_WRITE_INPLACE,
+							    &ptr);
+				if (!upload)
+					return;
+
+				b = sna->kgem.batch + sna->kgem.nbatch;
+				b[0] = br00 | (ox & 7) << 17;
+				b[1] = br13;
+				b[2] = y1 << 16 | x1;
+				b[3] = y2 << 16 | x2;
+				b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+						      bo,
+						      I915_GEM_DOMAIN_RENDER << 16 |
+						      I915_GEM_DOMAIN_RENDER |
+						      KGEM_RELOC_FENCED,
+						      0);
+				b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
+						      upload,
+						      I915_GEM_DOMAIN_RENDER << 16 |
+						      KGEM_RELOC_FENCED,
+						      0);
+				b[6] = gc->bgPixel;
+				b[7] = gc->fgPixel;
+
+				sna->kgem.nbatch += 8;
+
+				dst = ptr;
+				len = stride;
+				src = gc->stipple->devPrivate.ptr;
+				src += row + (ox >> 3);
+				len -= bw;
+				do {
+					int i = bw;
+					do {
+						*dst++ = byte_reverse(*src++);
+						*dst++ = byte_reverse(*src++);
+						i -= 2;
+					} while (i);
+					src += len;
+				} while (--bh);
+
+				kgem_bo_destroy(&sna->kgem, upload);
+			}
+		}
+	}
+}
+
+static bool
+sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable,
+				       struct kgem_bo *bo,
+				       struct sna_damage **damage,
+				       GCPtr gc, int n, xRectangle *r,
+				       const BoxRec *extents, unsigned clipped)
+{
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	struct sna *sna = to_sna_from_pixmap(pixmap);
+	DDXPointRec origin = gc->patOrg;
+	int16_t dx, dy;
+	uint32_t br00, br13;
+
+	DBG(("%s: upload (%d, %d), (%d, %d), origin (%d, %d), clipped=%d, alu=%d, opaque=%d\n", __FUNCTION__,
+	     extents->x1, extents->y1,
+	     extents->x2, extents->y2,
+	     origin.x, origin.y,
+	     clipped, gc->alu, gc->fillStyle == FillOpaqueStippled));
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+
+	br00 = XY_MONO_SRC_COPY_IMM | 3 << 20;
+	br13 = bo->pitch;
+	if (sna->kgem.gen >= 40 && bo->tiling) {
+		br00 |= BLT_DST_TILED;
+		br13 >>= 2;
+	}
+	br13 |= (gc->fillStyle == FillStippled) << 29;
+	br13 |= blt_depth(drawable->depth) << 24;
+	br13 |= copy_ROP[gc->alu] << 16;
+
+	origin.x += dx + drawable->x;
+	origin.y += dy + drawable->y;
+
+	if (!clipped) {
+		dx += drawable->x;
+		dy += drawable->y;
+
+		sna_damage_add_rectangles(damage, r, n, dx, dy);
+		do {
+			BoxRec box;
+
+			box.x1 = r->x + dx;
+			box.y1 = r->y + dy;
+			box.x2 = box.x1 + r->width;
+			box.y2 = box.y1 + r->height;
+
+			sna_poly_fill_rect_stippled_n_box__imm(sna, bo,
+							       br00, br13, gc,
+							       &box, &origin);
+			r++;
+		} while (--n);
+	} else {
+		RegionRec clip;
+
+		region_set(&clip, extents);
+		region_maybe_clip(&clip, gc->pCompositeClip);
+		if (!RegionNotEmpty(&clip)) {
+			DBG(("%s: all clipped\n", __FUNCTION__));
+			return true;
+		}
+
+		if (clip.data == NULL) {
+			DBG(("%s: clipped to extents ((%d, %d), (%d, %d))\n",
+			     __FUNCTION__,
+			     clip.extents.x1, clip.extents.y1,
+			     clip.extents.x2, clip.extents.y2));
+			do {
+				BoxRec box;
+
+				box.x1 = r->x + drawable->x;
+				box.x2 = bound(box.x1, r->width);
+				box.y1 = r->y + drawable->y;
+				box.y2 = bound(box.y1, r->height);
+				r++;
+
+				DBG(("%s: box (%d, %d), (%d, %d)\n",
+				     __FUNCTION__,
+				     box.x1, box.y1, box.x2, box.y2));
+				if (!box_intersect(&box, &clip.extents))
+					continue;
+
+				box.x1 += dx; box.x2 += dx;
+				box.y1 += dy; box.y2 += dy;
+
+				sna_poly_fill_rect_stippled_n_box__imm(sna, bo,
+								       br00, br13, gc,
+								       &box, &origin);
+			} while (--n);
+		} else {
+			const BoxRec * const clip_start = RegionBoxptr(&clip);
+			const BoxRec * const clip_end = clip_start + clip.data->numRects;
+			const BoxRec *c;
+
+			DBG(("%s: clipped to boxes: start((%d, %d), (%d, %d)); end=((%d, %d), (%d, %d))\n", __FUNCTION__,
+			     clip_start->x1, clip_start->y1,
+			     clip_start->x2, clip_start->y2,
+			     clip_end->x1, clip_end->y1,
+			     clip_end->x2, clip_end->y2));
+			do {
+				BoxRec unclipped;
+
+				unclipped.x1 = r->x + drawable->x;
+				unclipped.x2 = bound(unclipped.x1, r->width);
+				unclipped.y1 = r->y + drawable->y;
+				unclipped.y2 = bound(unclipped.y1, r->height);
+				r++;
+
+				c = find_clip_box_for_y(clip_start,
+							clip_end,
+							unclipped.y1);
+				while (c != clip_end) {
+					BoxRec box;
+
+					if (unclipped.y2 <= c->y1)
+						break;
+
+					box = unclipped;
+					if (!box_intersect(&box, c++))
+						continue;
+
+					box.x1 += dx; box.x2 += dx;
+					box.y1 += dy; box.y2 += dy;
+
+					sna_poly_fill_rect_stippled_n_box__imm(sna, bo,
+									       br00, br13, gc,
+									       &box, &origin);
+				}
+			} while (--n);
+		}
+	}
+
+	assert_pixmap_damage(pixmap);
+	sna->blt_state.fill_bo = 0;
+	return true;
+}
+
 static bool
 sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable,
 				  struct kgem_bo *bo,
@@ -10434,14 +10719,16 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable,
 	     origin.x, origin.y,
 	     clipped, gc->alu, gc->fillStyle == FillOpaqueStippled));
 
-	if (gc->stipple->drawable.width > 32 ||
-	    gc->stipple->drawable.height > 32)
-		return false;
+	if (((gc->stipple->drawable.width | gc->stipple->drawable.height) & ~31) == 0)
+		return sna_poly_fill_rect_stippled_n_blt__imm(drawable,
+							      bo, damage,
+							      gc, n, r,
+							      extents, clipped);
 
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
 	kgem_set_mode(&sna->kgem, KGEM_BLT);
 
-	br00 = XY_MONO_SRC_COPY_IMM | 3 << 20;
+	br00 = XY_MONO_SRC_COPY | 3 << 20;
 	br13 = bo->pitch;
 	if (sna->kgem.gen >= 40 && bo->tiling) {
 		br00 |= BLT_DST_TILED;
@@ -12165,9 +12452,7 @@ sna_push_pixels_solid_blt(GCPtr gc,
 		} while (--bh);
 
 		b = sna->kgem.batch + sna->kgem.nbatch;
-		b[0] = XY_MONO_SRC_COPY;
-		if (drawable->bitsPerPixel == 32)
-			b[0] |= 3 << 20;
+		b[0] = XY_MONO_SRC_COPY | 3 << 20;
 		b[0] |= ((box->x1 - region->extents.x1) & 7) << 17;
 		b[1] = bo->pitch;
 		if (sna->kgem.gen >= 40 && bo->tiling) {

commit 40e0cf32a25e43e16184b2af87a1e1abeb8e4052
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jul 21 19:43:23 2012 +0100

    sna/dri: We fail at predicting the flip frame
    
    Simply report the values from the kernel, and transfer the blame...
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index cfb9d98..9698247 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -71,7 +71,6 @@ struct sna_dri_frame_event {
 	DrawablePtr draw;
 	ClientPtr client;
 	enum frame_event_type type;
-	unsigned frame;
 	int pipe;
 	int count;
 
@@ -1300,18 +1299,6 @@ static void sna_dri_flip_event(struct sna *sna,
 	/* We assume our flips arrive in order, so we don't check the frame */
 	switch (flip->type) {
 	case DRI2_FLIP:
-		/* Deliver cached msc, ust from reference crtc */
-		/* Check for too small vblank count of pageflip completion,
-		 * taking wraparound * into account. This usually means some
-		 * defective kms pageflip completion, causing wrong (msc, ust)
-		 * return values and possible visual corruption.
-		 */
-		if (flip->fe_frame < flip->frame &&
-		    flip->frame - flip->fe_frame < 5) {
-			/* All-0 values signal timestamping failure. */
-			flip->fe_frame = flip->fe_tv_sec = flip->fe_tv_usec = 0;
-		}
-
 		DBG(("%s: flip complete\n", __FUNCTION__));
 		DRI2SwapComplete(flip->client, draw,
 				 flip->fe_frame,
@@ -1596,8 +1583,6 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 			sna_dri_frame_event_info_free(sna, draw, info);
 			return false;
 		}
-
-		info->frame = *target_msc;
 	}
 
 	return true;
@@ -1812,7 +1797,6 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 		     (int)*target_msc,
 		     (int)divisor));
 
-		info->frame = *target_msc;
 		info->type = DRI2_SWAP;
 
 		vbl.request.type =
@@ -1861,7 +1845,6 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 		goto blit_fallback;
 
 	*target_msc = vbl.reply.sequence;
-	info->frame = *target_msc;
 	return TRUE;
 
 blit_fallback:
@@ -2102,7 +2085,6 @@ sna_dri_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
 		if (sna_wait_vblank(sna, &vbl))
 			goto out_free_info;
 
-		info->frame = vbl.reply.sequence;
 		DRI2BlockClient(client, draw);
 		return TRUE;
 	}
@@ -2130,7 +2112,6 @@ sna_dri_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
 	if (sna_wait_vblank(sna, &vbl))
 		goto out_free_info;
 
-	info->frame = vbl.reply.sequence;
 	DRI2BlockClient(client, draw);
 	return TRUE;
 

commit c6e316eeba3008b351f2cd63829154f4672c5417
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jul 21 18:18:32 2012 +0100

    sna: Adjust hints to prefer rendering convex polygon with the GPU
    
    Keep the general polygons as only using the GPU if necessary, until the
    cost of the routines is analysed.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 5bec59a..0b0a304 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -9274,7 +9274,7 @@ sna_poly_fill_polygon(DrawablePtr draw, GCPtr gc,
 		goto fallback;
 
 	if ((data.bo = sna_drawable_use_bo(draw,
-					   use_wide_spans(draw, gc, &data.region.extents),
+					   (shape == Convex ? use_zero_spans : use_wide_spans)(draw, gc, &data.region.extents),
 					   &data.region.extents,
 					   &data.damage))) {
 		uint32_t color;

commit f1e7248cb353d634f27d297059911168ce1a0762
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jul 21 16:08:31 2012 +0100

    sna: Expand the heuristic for predicting when to use CPU bo for readback
    
    For tiny transfers, the cost of setting up the GPU operation outweighs
    the actual savings through increased throughput. So we try to guess when
    it will be preferrable to simply read from the GPU bo directly.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 5a9eb1e..5bec59a 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1093,12 +1093,31 @@ sna_pixmap_create_mappable_gpu(PixmapPtr pixmap)
 }
 
 static inline bool use_cpu_bo_for_download(struct sna *sna,
-					   struct sna_pixmap *priv)
+					   struct sna_pixmap *priv,
+					   const BoxRec *box)
 {
 	if (DBG_NO_CPU_DOWNLOAD)
 		return false;
 
-	return priv->cpu_bo != NULL && sna->kgem.can_blt_cpu;
+	if (priv->cpu_bo == NULL || !sna->kgem.can_blt_cpu)
+		return false;
+
+	if (kgem_bo_is_busy(priv->gpu_bo) || kgem_bo_is_busy(priv->cpu_bo)) {
+		DBG(("%s: yes, either bo is busy, so use GPU for readback\n",
+		     __FUNCTION__));
+		return true;
+	}
+
+	/* Is it worth detiling? */
+	if (kgem_bo_is_mappable(&sna->kgem, priv->gpu_bo) &&
+	    (box->y2 - box->y1 - 1) * priv->gpu_bo->pitch < 4096) {
+		DBG(("%s: no, tiny transfer, expect to read inplace\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	DBG(("%s: yes, default action\n", __FUNCTION__));
+	return true;
 }
 
 static inline bool use_cpu_bo_for_upload(struct sna_pixmap *priv,
@@ -1329,7 +1348,7 @@ skip_inplace_map:
 		if (n) {
 			bool ok = false;
 
-			if (use_cpu_bo_for_download(sna, priv)) {
+			if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) {
 				DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
 				ok = sna->render.copy_boxes(sna, GXcopy,
 							    pixmap, priv->gpu_bo, 0, 0,
@@ -1794,7 +1813,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 			assert(pixmap_contains_damage(pixmap, priv->gpu_damage));
 
 			ok = false;
-			if (use_cpu_bo_for_download(sna, priv)) {
+			if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) {
 				DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
 				ok = sna->render.copy_boxes(sna, GXcopy,
 							    pixmap, priv->gpu_bo, 0, 0,
@@ -1904,7 +1923,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 				if (n) {
 					bool ok = false;
 
-					if (use_cpu_bo_for_download(sna, priv)) {
+					if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) {
 						DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
 						ok = sna->render.copy_boxes(sna, GXcopy,
 									    pixmap, priv->gpu_bo, 0, 0,
@@ -1931,7 +1950,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 				DBG(("%s: region wholly inside damage\n",
 				     __FUNCTION__));
 
-				if (use_cpu_bo_for_download(sna, priv)) {
+				if (use_cpu_bo_for_download(sna, priv, &r->extents)) {
 					DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
 					ok = sna->render.copy_boxes(sna, GXcopy,
 								    pixmap, priv->gpu_bo, 0, 0,
@@ -1958,7 +1977,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 					DBG(("%s: region intersects damage\n",
 					     __FUNCTION__));
 
-					if (use_cpu_bo_for_download(sna, priv)) {
+					if (use_cpu_bo_for_download(sna, priv, &need.extents)) {
 						DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
 						ok = sna->render.copy_boxes(sna, GXcopy,
 									    pixmap, priv->gpu_bo, 0, 0,

commit 06db69c2c7023f702f9773be90144fdf7a1159e4
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jul 21 15:28:10 2012 +0100

    sna: Update assertion for cached io buffers
    
    As kgem_buffers may be reused and repurposed through the snoop cache it
    is no longer true that only proxies will have the io flag set.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 7b388fb..d7c3812 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -417,7 +417,7 @@ void kgem_get_tile_size(struct kgem *kgem, int tiling,
 
 static inline int __kgem_buffer_size(struct kgem_bo *bo)
 {
-	assert(bo->proxy && bo->io);
+	assert(bo->proxy != NULL);
 	return bo->size.bytes;
 }


Reply to: