xserver-xorg-video-intel: Changes to 'upstream-experimental'

To: debian-x@lists.debian.org
Subject: xserver-xorg-video-intel: Changes to 'upstream-experimental'
From: Timo Aaltonen <tjaalton-guest@alioth.debian.org>
Date: Thu, 03 Jan 2013 12:22:39 +0000
Message-id: <[🔎] E1Tqjol-0000tD-61@vasks.debian.org>
 NEWS                         |   20 
 configure.ac                 |    4 
 man/intel.man                |   18 
 src/intel_dri.c              |    2 
 src/intel_driver.c           |    3 
 src/intel_module.c           |    1 
 src/intel_uxa.c              |   14 
 src/sna/Makefile.am          |    2 
 src/sna/gen2_render.c        |    2 
 src/sna/gen3_render.c        |   30 -
 src/sna/gen4_render.c        | 1279 +++++++++++++------------------------------
 src/sna/gen4_render.h        |   73 ++
 src/sna/gen4_vertex.c        |  893 ++++++++++++++++++++++++++++++
 src/sna/gen4_vertex.h        |   39 +
 src/sna/gen5_render.c        | 1113 ++++++++-----------------------------
 src/sna/gen5_render.h        |   17 
 src/sna/gen6_render.c        |  812 ++-------------------------
 src/sna/gen7_render.c        |  770 ++-----------------------
 src/sna/kgem.c               |  285 +++++----
 src/sna/kgem.h               |   17 
 src/sna/kgem_debug.c         |    9 
 src/sna/kgem_debug.h         |    2 
 src/sna/kgem_debug_gen5.c    |   21 
 src/sna/sna.h                |    7 
 src/sna/sna_accel.c          |   33 -
 src/sna/sna_display.c        |    4 
 src/sna/sna_dri.c            |   58 +
 src/sna/sna_driver.c         |    3 
 src/sna/sna_render.c         |   11 
 src/sna/sna_render.h         |   14 
 src/sna/sna_render_inline.h  |   40 -
 src/sna/sna_trapezoids.c     |    4 
 src/sna/sna_video.c          |  207 +++---
 src/sna/sna_video.h          |    6 
 src/sna/sna_video_hwmc.c     |  150 ++---
 src/sna/sna_video_hwmc.h     |   26 
 src/sna/sna_video_overlay.c  |   12 
 src/sna/sna_video_sprite.c   |   57 +
 src/sna/sna_video_textured.c |   13 
 uxa/uxa-accel.c              |    2 
 40 files changed, 2444 insertions(+), 3629 deletions(-)

New commits:
commit 90b1b220ee7a3c543301956b01c54a4a04632db4
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Dec 26 12:51:58 2012 +0000

    2.20.17 release
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/NEWS b/NEWS
index 36c81d7..2e0f021 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,23 @@
+Release 2.20.17 (2012-12-26)
+============================
+A minor update to prepare for co-operating with the kernel over managing
+stability on 830gm/845g. On this pair of chipsets, the kernel will perform
+an extra copy of the batchbuffer into reserved memory, which prevents them
+from randomly dying. However, that extra copy does have a noticeable
+impact upon throughput, so we also have a mechanism for userspace to
+opt-out of the kernel workaround and take responsibility for ensuring its
+batches are coherent.
+
+ * Build fixes against xorg-1.14
+   https://bugs.freedesktop.org/show_bug.cgi?id=58552
+   https://bugs.freedesktop.org/show_bug.cgi?id=58406
+
+ * Fixed the origin of cropped (textured) video windows (Xv and XvMC)
+   https://bugs.freedesktop.org/show_bug.cgi?id=23033
+
+ * Fix potential corruption when using images larger than ~1GiB
+
+
 Release 2.20.16 (2012-12-15)
 ============================
 Rejoice! We have found a trick to make 830gm/845g stable at long last.
diff --git a/configure.ac b/configure.ac
index 52db4e4..afe79ab 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.20.16],
+        [2.20.17],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 52fd223fc970118cbdcb31f9574414debc905e9c
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Dec 21 21:36:30 2012 +0000

    sna/video: Initialise alignment for video ports > 0
    
    We repeatedly set the alignment value on the first port, rather than
    once for each.
    
    Reported-by: Jiri Slaby <jirislaby@gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47597
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index ebcf48d..d5b9c0f 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -200,6 +200,10 @@ sna_video_frame_init(struct sna *sna,
 {
 	int align;
 
+	DBG(("%s: id=%d [planar? %d], width=%d, height=%d, align=%d\n",
+	     __FUNCTION__, id, is_planar_fourcc(id), width, height, video->alignment));
+	assert(width && height);
+
 	frame->bo = NULL;
 	frame->id = id;
 	frame->width = width;
@@ -212,7 +216,6 @@ sna_video_frame_init(struct sna *sna,
 		align = 1024;
 #endif
 
-
 	/* Determine the desired destination pitch (representing the chroma's pitch,
 	 * in the planar case.
 	 */
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index c5947ed..e5cae85 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -453,7 +453,7 @@ XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna,
 		struct sna_video *v = &video[i];
 
 		v->textured = true;
-		video->alignment = 4;
+		v->alignment = 4;
 		v->rotation = RR_Rotate_0;
 		v->SyncToVblank = 1;
 

commit 3793ccf7804cfc870b46c623dfeefbe0c381c1d4
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Dec 21 14:48:07 2012 +0000

    sna: Remove assertions that the pixmap is wholly defined when uploading
    
    As the user may only write to a portion of a pixmap (thus only creating
    a small amount of damage) and then attempt to use the whole as a source,
    we run the risk of triggering an assertion that the whole was defined.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 336a423..d7fa5cb 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -528,7 +528,6 @@ static struct kgem_bo *upload(struct sna *sna,
 		    channel->width  == pixmap->drawable.width &&
 		    channel->height == pixmap->drawable.height) {
 			assert(priv->gpu_damage == NULL);
-			assert(DAMAGE_IS_ALL(priv->cpu_damage));
 			assert(priv->gpu_bo == NULL);
 			kgem_proxy_bo_attach(bo, &priv->gpu_bo);
 		}
@@ -1164,7 +1163,6 @@ sna_render_picture_extract(struct sna *sna,
 				struct sna_pixmap *priv = sna_pixmap(pixmap);
 				if (priv) {
 					assert(priv->gpu_damage == NULL);
-					assert(DAMAGE_IS_ALL(priv->cpu_damage));
 					assert(priv->gpu_bo == NULL);
 					kgem_proxy_bo_attach(bo, &priv->gpu_bo);
 				}

commit 07dde33a4f51941b4f612823ea6ea7ca01a6efbc
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Dec 21 14:35:32 2012 +0000

    sna: Remove a pair of stale assertions
    
    For gen2-5, it does not matter what mode the batch is in when we
    insert the scanline wait. With the more aggressive batch flushing, and
    relaxed assigned of mode for those generations, we are likely to see
    that the batch is idle when we go to insert the waits.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index c150078..0736cfa 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -2823,8 +2823,6 @@ static bool sna_emit_wait_for_scanline_gen4(struct sna *sna,
 	uint32_t event;
 	uint32_t *b;
 
-	assert(sna->kgem.mode != KGEM_NONE);
-
 	if (pipe == 0) {
 		if (full_height)
 			event = MI_WAIT_FOR_PIPEA_SVBLANK;
@@ -2854,8 +2852,6 @@ static bool sna_emit_wait_for_scanline_gen2(struct sna *sna,
 {
 	uint32_t *b;
 
-	assert(sna->kgem.mode != KGEM_NONE);
-
 	/*
 	 * Pre-965 doesn't have SVBLANK, so we need a bit
 	 * of extra time for the blitter to start up and

commit bdd0cca4e1192df0038621925c4e6243ba419a81
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Dec 21 14:20:23 2012 +0000

    sna: Refactor test for a rotation matrix
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index b302233..a70c3c8 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -691,8 +691,7 @@ void gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 				DBG(("%s: identity source, identity mask\n", __FUNCTION__));
 				tmp->prim_emit = emit_primitive_identity_source_mask;
 			} else if (tmp->src.is_affine) {
-				if (tmp->src.transform->matrix[0][1] == 0 &&
-				    tmp->src.transform->matrix[1][0] == 0) {
+				if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
 					DBG(("%s: simple src, identity mask\n", __FUNCTION__));
 					tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
 					tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
@@ -713,8 +712,7 @@ void gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 			DBG(("%s: identity src, no mask\n", __FUNCTION__));
 			tmp->prim_emit = emit_primitive_identity_source;
 		} else if (tmp->src.is_affine) {
-			if (tmp->src.transform->matrix[0][1] == 0 &&
-			    tmp->src.transform->matrix[1][0] == 0) {
+			if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
 				DBG(("%s: simple src, no mask\n", __FUNCTION__));
 				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
 				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
@@ -883,8 +881,7 @@ void gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
 	} else if (tmp->base.src.transform == NULL) {
 		tmp->prim_emit = emit_spans_identity;
 	} else if (tmp->base.is_affine) {
-		if (tmp->base.src.transform->matrix[0][1] == 0 &&
-		    tmp->base.src.transform->matrix[1][0] == 0) {
+		if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) {
 			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
 			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
 			tmp->prim_emit = emit_spans_simple;
diff --git a/src/sna/sna.h b/src/sna/sna.h
index c7ebbd9..3abe36d 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -619,6 +619,12 @@ bool sna_transform_is_integer_translation(const PictTransform *t,
 					  int16_t *tx, int16_t *ty);
 bool sna_transform_is_translation(const PictTransform *t,
 				  pixman_fixed_t *tx, pixman_fixed_t *ty);
+static inline bool
+sna_affine_transform_is_rotation(const PictTransform *t)
+{
+	assert(sna_transform_is_affine(t));
+	return t->matrix[0][1] | t->matrix[1][0];
+}
 
 static inline bool
 sna_transform_equal(const PictTransform *a, const PictTransform *b)

commit 347c5a7b33729f1bedd408d2ef24756d51b66f1d
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Dec 21 10:40:47 2012 +0000

    sna/dri: Refactor get_current_msc between blit/flip paths
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index fd5e3de..10b6360 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -1710,6 +1710,24 @@ sna_dri_page_flip_handler(struct sna *sna,
 	sna_dri_flip_event(sna, info);
 }
 
+static CARD64
+get_current_msc_for_target(struct sna *sna, CARD64 target_msc, int pipe)
+{
+	CARD64 ret = -1;
+
+	if (target_msc && (sna->flags & SNA_NO_WAIT) == 0) {
+		drmVBlank vbl;
+
+		VG_CLEAR(vbl);
+		vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
+		vbl.request.sequence = 0;
+		if (sna_wait_vblank(sna, &vbl) == 0)
+			ret = vbl.reply.sequence;
+	}
+
+	return ret;
+}
+
 static bool
 sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 		      DRI2BufferPtr back, CARD64 *target_msc, CARD64 divisor,
@@ -1741,19 +1759,11 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 			return false;
 	}
 
-	/* Get current count */
-	if (*target_msc) {
-		vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
-		vbl.request.sequence = 0;
-		if (sna_wait_vblank(sna, &vbl))
-			return false;
-		current_msc = vbl.reply.sequence;
-	} else
-		current_msc = 0;
+	current_msc = get_current_msc_for_target(sna, *target_msc, pipe);
+
+	DBG(("%s: target_msc=%u, current_msc=%u, divisor=%u\n", __FUNCTION__,
+	     (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor));
 
-	/* Truncate to match kernel interfaces; means occasional overflow
-	 * misses, but that's generally not a big deal */
-	divisor &= 0xffffffff;
 	if (divisor == 0 && current_msc >= *target_msc) {
 		DBG(("%s: performing immediate swap on pipe %d, pending? %d\n",
 		     __FUNCTION__, pipe, sna->dri.flip_pending != NULL));
@@ -2032,6 +2042,12 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 	     (long long)divisor,
 	     (long long)remainder));
 
+	/* Truncate to match kernel interfaces; means occasional overflow
+	 * misses, but that's generally not a big deal */
+	*target_msc &= 0xffffffff;
+	divisor &= 0xffffffff;
+	remainder &= 0xffffffff;
+
 	if (can_flip(sna, draw, front, back)) {
 		DBG(("%s: try flip\n", __FUNCTION__));
 		if (sna_dri_schedule_flip(client, draw, front, back,
@@ -2058,12 +2074,6 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 
 	VG_CLEAR(vbl);
 
-	/* Truncate to match kernel interfaces; means occasional overflow
-	 * misses, but that's generally not a big deal */
-	*target_msc &= 0xffffffff;
-	divisor &= 0xffffffff;
-	remainder &= 0xffffffff;
-
 	info = calloc(1, sizeof(struct sna_dri_frame_event));
 	if (!info)
 		goto blit_fallback;
@@ -2082,16 +2092,7 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 
 	info->type = swap_type;
 
-	if (*target_msc && (sna->flags & SNA_NO_WAIT) == 0) {
-		/* Get current count */
-		vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
-		vbl.request.sequence = 0;
-		if (sna_wait_vblank(sna, &vbl))
-			goto blit_fallback;
-		current_msc = vbl.reply.sequence;
-	} else
-		current_msc = -1;
-
+	current_msc = get_current_msc_for_target(sna, *target_msc, pipe);
 	DBG(("%s: target_msc=%u, current_msc=%u, divisor=%u\n", __FUNCTION__,
 	     (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor));
 

commit 8a67d3f808fcc7c8c51553b1703e8312f28b87a1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Dec 21 10:21:06 2012 +0000

    sna/dri: Set the correct current_msc for the no readback path
    
    If we are asked to render immediately, then in order to pass the tests
    when comparing it to target, we need to set the current_msc to the
    ultimate future value, -1.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 72244f8..fd5e3de 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -2090,10 +2090,10 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 			goto blit_fallback;
 		current_msc = vbl.reply.sequence;
 	} else
-		current_msc = 0;
+		current_msc = -1;
 
-	DBG(("%s: target_msc=%u, current_msc=%u, divisor=%u\n",
-	     __FUNCTION__, *target_msc, current_msc, divisor));
+	DBG(("%s: target_msc=%u, current_msc=%u, divisor=%u\n", __FUNCTION__,
+	     (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor));
 
 	if (divisor == 0 && current_msc >= *target_msc) {
 		if (can_exchange(sna, draw, front, back)) {

commit 48e4dc4bd4b2980f0f804f572d0e3fc1bb4bc21e
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Dec 20 21:54:25 2012 +0000

    sna/gen4: Backport tight vertex packing of renderblits
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index d899ad3..21c860e 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -48,7 +48,6 @@
  * after every rectangle... So until that is resolved, prefer
  * the BLT engine.
  */
-#define PREFER_BLT 1
 #define FORCE_SPANS 0
 
 #define NO_COMPOSITE 0
@@ -172,6 +171,8 @@ static const struct blendinfo {
 #define SAMPLER_OFFSET(sf, se, mf, me, k) \
 	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
+#define VERTEX_2s2s 0
+
 static void
 gen4_emit_pipelined_pointers(struct sna *sna,
 			     const struct sna_composite_op *op,
@@ -882,6 +883,44 @@ gen4_emit_vertex_elements(struct sna *sna,
 		return;
 	render->ve_id = id;
 
+	if (id == VERTEX_2s2s) {
+		DBG(("%s: setup COPY\n", __FUNCTION__));
+		assert(op->floats_per_rect == 6);
+
+		OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + 2)) + 1 - 2));
+
+		/* x,y */
+		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
+			  0 << VE0_OFFSET_SHIFT);
+		OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+			  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
+			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+			  4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
+
+		/* s,t */
+		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
+			  4 << VE0_OFFSET_SHIFT);
+		OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+			  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
+			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+			  8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
+
+		/* magic */
+		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+			  GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
+			  0 << VE0_OFFSET_SHIFT);
+		OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
+			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
+			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
+			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT |
+			  12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
+		return;
+	}
+
 	/* The VUE layout
 	 *    dword 0-3: position (x, y, 1.0, 1.0),
 	 *    dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
@@ -2272,39 +2311,6 @@ gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
 	gen4_emit_state(sna, op, offset | dirty);
 }
 
-static void
-gen4_render_copy_one(struct sna *sna,
-		     const struct sna_composite_op *op,
-		     int sx, int sy,
-		     int w, int h,
-		     int dx, int dy)
-{
-	gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces);
-
-	OUT_VERTEX(dx+w, dy+h);
-	OUT_VERTEX_F((sx+w)*op->src.scale[0]);
-	OUT_VERTEX_F((sy+h)*op->src.scale[1]);
-
-	OUT_VERTEX(dx, dy+h);
-	OUT_VERTEX_F(sx*op->src.scale[0]);
-	OUT_VERTEX_F((sy+h)*op->src.scale[1]);
-
-	OUT_VERTEX(dx, dy);
-	OUT_VERTEX_F(sx*op->src.scale[0]);
-	OUT_VERTEX_F(sy*op->src.scale[1]);
-}
-
-static inline bool prefer_blt_copy(struct sna *sna, unsigned flags)
-{
-#if PREFER_BLT
-	return true;
-	(void)sna;
-#else
-	return sna->kgem.mode != KGEM_RENDER;
-#endif
-	(void)flags;
-}
-
 static bool
 gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
 		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
@@ -2315,8 +2321,7 @@ gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
 
 	DBG(("%s x %d\n", __FUNCTION__, n));
 
-	if (prefer_blt_copy(sna, flags) &&
-	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy_boxes(sna, alu,
 			       src_bo, src_dx, src_dy,
 			       dst_bo, dst_dx, dst_dy,
@@ -2408,20 +2413,20 @@ fallback_blt:
 					       extents.x2 - extents.x1,
 					       extents.y2 - extents.y1))
 			goto fallback_tiled_dst;
+
+		src_dx += tmp.src.offset[0];
+		src_dy += tmp.src.offset[1];
 	} else {
 		tmp.src.bo = kgem_bo_reference(src_bo);
 		tmp.src.width  = src->drawable.width;
 		tmp.src.height = src->drawable.height;
-		tmp.src.offset[0] = tmp.src.offset[1] = 0;
-		tmp.src.scale[0] = 1.f/src->drawable.width;
-		tmp.src.scale[1] = 1.f/src->drawable.height;
 	}
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 3;
-	tmp.floats_per_rect = 9;
+	tmp.floats_per_vertex = 2;
+	tmp.floats_per_rect = 6;
 	tmp.u.gen4.wm_kernel = WM_KERNEL;
-	tmp.u.gen4.ve_id = 2;
+	tmp.u.gen4.ve_id = VERTEX_2s2s;
 	tmp.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
@@ -2434,19 +2439,33 @@ fallback_blt:
 	dst_dy += tmp.dst.y;
 	tmp.dst.x = tmp.dst.y = 0;
 
-	src_dx += tmp.src.offset[0];
-	src_dy += tmp.src.offset[1];
-
 	gen4_copy_bind_surfaces(sna, &tmp);
 	gen4_align_vertex(sna, &tmp);
 
 	do {
-		gen4_render_copy_one(sna, &tmp,
-				     box->x1 + src_dx, box->y1 + src_dy,
-				     box->x2 - box->x1, box->y2 - box->y1,
-				     box->x1 + dst_dx, box->y1 + dst_dy);
-		box++;
-	} while (--n);
+		int n_this_time;
+
+		n_this_time = gen4_get_rectangles(sna, &tmp, n,
+						  gen4_copy_bind_surfaces);
+		n -= n_this_time;
+
+		do {
+			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
+			     box->x1 + src_dx, box->y1 + src_dy,
+			     box->x1 + dst_dx, box->y1 + dst_dy,
+			     box->x2 - box->x1, box->y2 - box->y1));
+			OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
+			OUT_VERTEX(box->x2 + src_dx, box->y2 + src_dy);
+
+			OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
+			OUT_VERTEX(box->x1 + src_dx, box->y2 + src_dy);
+
+			OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
+			OUT_VERTEX(box->x1 + src_dx, box->y1 + src_dy);
+
+			box++;
+		} while (--n_this_time);
+	} while (n);
 
 	gen4_vertex_flush(sna);
 	sna_render_composite_redirect_done(sna, &tmp);
@@ -2472,7 +2491,19 @@ gen4_render_copy_blt(struct sna *sna,
 		     int16_t w,  int16_t h,
 		     int16_t dx, int16_t dy)
 {
-	gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy);
+	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", __FUNCTION__,
+	     sx, sy, dx, dy, w, h));
+
+	gen4_get_rectangles(sna, &op->base, 1, gen4_copy_bind_surfaces);
+
+	OUT_VERTEX(dx+w, dy+h);
+	OUT_VERTEX(sx+w, sy+h);
+
+	OUT_VERTEX(dx, dy+h);
+	OUT_VERTEX(sx, sy+h);
+
+	OUT_VERTEX(dx, dy);
+	OUT_VERTEX(sx, sy);
 }
 
 static void
@@ -2480,16 +2511,8 @@ gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
 {
 	if (sna->render.vertex_offset)
 		gen4_vertex_flush(sna);
-}
 
-static inline bool prefer_blt_fill(struct sna *sna)
-{
-#if PREFER_BLT
-	return true;
-	(void)sna;
-#else
-	return sna->kgem.mode != KGEM_RENDER;
-#endif
+	DBG(("%s()\n", __FUNCTION__));
 }
 
 static bool
@@ -2504,8 +2527,7 @@ gen4_render_copy(struct sna *sna, uint8_t alu,
 	     dst->drawable.serialNumber,
 	     alu));
 
-	if (prefer_blt_fill(sna) &&
-	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy(sna, alu,
 			 src_bo, dst_bo,
 			 dst->drawable.bitsPerPixel,
@@ -2546,16 +2568,14 @@ fallback:
 		gen4_get_card_format(op->base.src.pict_format);
 	op->base.src.width  = src->drawable.width;
 	op->base.src.height = src->drawable.height;
-	op->base.src.scale[0] = 1.f/src->drawable.width;
-	op->base.src.scale[1] = 1.f/src->drawable.height;
 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
 
 	op->base.is_affine = true;
-	op->base.floats_per_vertex = 3;
-	op->base.floats_per_rect = 9;
+	op->base.floats_per_vertex = 2;
+	op->base.floats_per_rect = 6;
 	op->base.u.gen4.wm_kernel = WM_KERNEL;
-	op->base.u.gen4.ve_id = 2;
+	op->base.u.gen4.ve_id = VERTEX_2s2s;
 	op->base.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
@@ -2581,26 +2601,6 @@ fallback:
 	return true;
 }
 
-static void
-gen4_render_fill_rectangle(struct sna *sna,
-			   const struct sna_composite_op *op,
-			   int x, int y, int w, int h)
-{
-	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
-
-	OUT_VERTEX(x+w, y+h);
-	OUT_VERTEX_F(1);
-	OUT_VERTEX_F(1);
-
-	OUT_VERTEX(x, y+h);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(1);
-
-	OUT_VERTEX(x, y);
-	OUT_VERTEX_F(0);
-	OUT_VERTEX_F(0);
-}
-
 static bool
 gen4_render_fill_boxes(struct sna *sna,
 		       CARD8 op,
@@ -2618,10 +2618,7 @@ gen4_render_fill_boxes(struct sna *sna,
 		return false;
 	}
 
-	if (op <= PictOpSrc &&
-	    (prefer_blt_fill(sna) ||
-	     too_large(dst->drawable.width, dst->drawable.height) ||
-	     !gen4_check_dst_format(format))) {
+	if (op <= PictOpSrc) {
 		uint8_t alu = GXinvalid;
 
 		pixel = 0;
@@ -2675,10 +2672,10 @@ gen4_render_fill_boxes(struct sna *sna,
 	gen4_composite_solid_init(sna, &tmp.src, pixel);
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 3;
-	tmp.floats_per_rect = 9;
+	tmp.floats_per_vertex = 2;
+	tmp.floats_per_rect = 6;
 	tmp.u.gen4.wm_kernel = WM_KERNEL;
-	tmp.u.gen4.ve_id = 2;
+	tmp.u.gen4.ve_id = VERTEX_2s2s;
 	tmp.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -2690,12 +2687,27 @@ gen4_render_fill_boxes(struct sna *sna,
 	gen4_align_vertex(sna, &tmp);
 
 	do {
-		gen4_render_fill_rectangle(sna, &tmp,
-					   box->x1, box->y1,
-					   box->x2 - box->x1,
-					   box->y2 - box->y1);
-		box++;
-	} while (--n);
+		int n_this_time;
+
+		n_this_time = gen4_get_rectangles(sna, &tmp, n,
+						  gen4_bind_surfaces);
+		n -= n_this_time;
+
+		do {
+			DBG(("	(%d, %d), (%d, %d)\n",
+			     box->x1, box->y1, box->x2, box->y2));
+			OUT_VERTEX(box->x2, box->y2);
+			OUT_VERTEX(1, 1);
+
+			OUT_VERTEX(box->x1, box->y2);
+			OUT_VERTEX(0, 1);
+
+			OUT_VERTEX(box->x1, box->y1);
+			OUT_VERTEX(0, 0);
+
+			box++;
+		} while (--n_this_time);
+	} while (n);
 
 	gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
@@ -2703,10 +2715,22 @@ gen4_render_fill_boxes(struct sna *sna,
 }
 
 static void
-gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op,
+gen4_render_fill_op_blt(struct sna *sna,
+			const struct sna_fill_op *op,
 			int16_t x, int16_t y, int16_t w, int16_t h)
 {
-	gen4_render_fill_rectangle(sna, &op->base, x, y, w, h);
+	DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h));
+
+	gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
+
+	OUT_VERTEX(x+w, y+h);
+	OUT_VERTEX(1, 1);
+
+	OUT_VERTEX(x, y+h);
+	OUT_VERTEX(0, 1);
+
+	OUT_VERTEX(x, y);
+	OUT_VERTEX(0, 0);
 }
 
 fastcall static void
@@ -2714,9 +2738,19 @@ gen4_render_fill_op_box(struct sna *sna,
 			const struct sna_fill_op *op,
 			const BoxRec *box)
 {
-	gen4_render_fill_rectangle(sna, &op->base,
-				   box->x1, box->y1,
-				   box->x2-box->x1, box->y2-box->y1);
+	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
+	     box->x1, box->y1, box->x2, box->y2));
+
+	gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
+
+	OUT_VERTEX(box->x2, box->y2);
+	OUT_VERTEX(1, 1);
+
+	OUT_VERTEX(box->x1, box->y2);
+	OUT_VERTEX(0, 1);
+
+	OUT_VERTEX(box->x1, box->y1);
+	OUT_VERTEX(0, 0);
 }
 
 fastcall static void
@@ -2725,12 +2759,28 @@ gen4_render_fill_op_boxes(struct sna *sna,
 			  const BoxRec *box,
 			  int nbox)
 {
+	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
+	     box->x1, box->y1, box->x2, box->y2, nbox));
+
 	do {
-		gen4_render_fill_rectangle(sna, &op->base,
-					   box->x1, box->y1,
-					   box->x2-box->x1, box->y2-box->y1);
-		box++;
-	} while (--nbox);
+		int nbox_this_time;
+
+		nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
+						     gen4_bind_surfaces);
+		nbox -= nbox_this_time;
+
+		do {
+			OUT_VERTEX(box->x2, box->y2);
+			OUT_VERTEX(1, 1);
+
+			OUT_VERTEX(box->x1, box->y2);
+			OUT_VERTEX(0, 1);
+
+			OUT_VERTEX(box->x1, box->y1);
+			OUT_VERTEX(0, 0);
+			box++;
+		} while (--nbox_this_time);
+	} while (nbox);
 }
 
 static void
@@ -2739,6 +2789,8 @@ gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
 	if (sna->render.vertex_offset)
 		gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+
+	DBG(("%s()\n", __FUNCTION__));
 }
 
 static bool
@@ -2747,8 +2799,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
 		 uint32_t color,
 		 struct sna_fill_op *op)
 {
-	if (prefer_blt_fill(sna) &&
-	    sna_blt_fill(sna, alu,
+	if (sna_blt_fill(sna, alu,
 			 dst_bo, dst->drawable.bitsPerPixel,
 			 color,
 			 op))
@@ -2782,10 +2833,10 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
 	op->base.mask.bo = NULL;
 
 	op->base.is_affine = true;
-	op->base.floats_per_vertex = 3;
-	op->base.floats_per_rect = 9;
+	op->base.floats_per_vertex = 2;
+	op->base.floats_per_rect = 6;
 	op->base.u.gen4.wm_kernel = WM_KERNEL;
-	op->base.u.gen4.ve_id = 2;
+	op->base.u.gen4.ve_id = VERTEX_2s2s;
 	op->base.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -2859,13 +2910,13 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.mask.bo = NULL;
 
 	tmp.is_affine = true;
-	tmp.floats_per_vertex = 3;
-	tmp.floats_per_rect = 9;
+	tmp.floats_per_vertex = 2;
+	tmp.floats_per_rect = 6;
 	tmp.has_component_alpha = false;
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen4.wm_kernel = WM_KERNEL;
-	tmp.u.gen4.ve_id = 2;
+	tmp.u.gen4.ve_id = VERTEX_2s2s;
 	tmp.u.gen4.sf = 0;
 
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
@@ -2876,7 +2927,17 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	gen4_bind_surfaces(sna, &tmp);
 	gen4_align_vertex(sna, &tmp);
 
-	gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1);
+	gen4_get_rectangles(sna, &tmp, 1, gen4_bind_surfaces);
+
+	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
+	OUT_VERTEX(x2, y2);
+	OUT_VERTEX(1, 1);
+
+	OUT_VERTEX(x1, y2);
+	OUT_VERTEX(0, 1);
+
+	OUT_VERTEX(x1, y1);
+	OUT_VERTEX(0, 0);
 
 	gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);

commit 08d2b073692836aa22f65f8ba30db5d14550c03e
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Dec 20 21:30:32 2012 +0000

    sna/gen4: Backport more recent state tracking tweaks
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 8f4f1d4..d899ad3 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -215,6 +215,8 @@ static void gen4_magic_ca_pass(struct sna *sna,
 	if (!op->need_magic_ca_pass)
 		return;
 
+	assert(sna->render.vertex_index > sna->render.vertex_start);
+
 	DBG(("%s: CA fixup\n", __FUNCTION__));
 	assert(op->mask.bo != NULL);
 	assert(op->has_component_alpha);
@@ -468,6 +470,17 @@ static bool gen4_check_repeat(PicturePtr picture)
 	}
 }
 
+static uint32_t
+gen4_tiling_bits(uint32_t tiling)
+{
+	switch (tiling) {
+	default: assert(0);
+	case I915_TILING_NONE: return 0;
+	case I915_TILING_X: return GEN4_SURFACE_TILED;
+	case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
+	}
+}
+
 /**
  * Sets up the common fields for a surface state buffer for the given
  * picture in the given surface state buffer.
@@ -480,9 +493,9 @@ gen4_bind_bo(struct sna *sna,
 	     uint32_t format,
 	     bool is_dst)
 {
-	struct gen4_surface_state *ss;
 	uint32_t domains;
 	uint16_t offset;
+	uint32_t *ss;
 
 	assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
 
@@ -496,32 +509,30 @@ gen4_bind_bo(struct sna *sna,
 
 	offset = sna->kgem.surface -=
 		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
-	ss = memset(sna->kgem.batch + offset, 0, sizeof(*ss));
+	ss = sna->kgem.batch + offset;
 
-	ss->ss0.surface_type = GEN4_SURFACE_2D;
-	ss->ss0.surface_format = format;
+	ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
+		 GEN4_SURFACE_BLEND_ENABLED |
+		 format << GEN4_SURFACE_FORMAT_SHIFT);
 
 	if (is_dst)
 		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
 	else
 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
+	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
-	ss->ss0.data_return_format = GEN4_SURFACERETURNFORMAT_FLOAT32;
-	ss->ss0.color_blend = 1;
-	ss->ss1.base_addr =
-		kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
-
-	ss->ss2.height = height - 1;
-	ss->ss2.width  = width - 1;
-	ss->ss3.pitch  = bo->pitch - 1;
-	ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE;
-	ss->ss3.tile_walk     = bo->tiling == I915_TILING_Y;
+	ss[2] = ((width - 1)  << GEN4_SURFACE_WIDTH_SHIFT |
+		 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
+	ss[3] = (gen4_tiling_bits(bo->tiling) |
+		 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
+	ss[4] = 0;
+	ss[5] = 0;
 
 	kgem_bo_set_binding(bo, format, offset);
 
 	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
Reply to:
Prev by Date: xserver-xorg-video-intel: Changes to 'debian-experimental'
Next by Date: xorg: Changes to 'ubuntu-precise'
Previous by thread: xserver-xorg-video-intel: Changes to 'debian-experimental'
Next by thread: xserver-xorg-video-intel: Changes to 'upstream-experimental'
Index(es):
- Date
- Thread