xserver-xorg-video-intel: Changes to 'upstream-experimental'

To: debian-x@lists.debian.org
Subject: xserver-xorg-video-intel: Changes to 'upstream-experimental'
From: Timo Aaltonen <tjaalton-guest@alioth.debian.org>
Date: Tue, 05 Feb 2013 14:13:08 +0000
Message-id: <[🔎] E1U2jGm-0001md-8m@vasks.debian.org>
 NEWS                      |   46 +
 configure.ac              |   54 +-
 src/intel_batchbuffer.c   |   25 
 src/sna/Makefile.am       |    4 
 src/sna/atomic.h          |   89 +++
 src/sna/compiler.h        |    2 
 src/sna/gen3_render.c     |  619 +++++++++++++++++++++--
 src/sna/gen4_render.c     |  135 ++++-
 src/sna/gen4_vertex.c     |  475 ++++++++++++++++-
 src/sna/gen5_render.c     |  136 ++++-
 src/sna/gen6_render.c     |  146 +++++
 src/sna/gen7_render.c     |  166 +++++-
 src/sna/kgem.c            |  298 +++++------
 src/sna/kgem.h            |   44 +
 src/sna/kgem_debug_gen6.c |    4 
 src/sna/sna.h             |   31 +
 src/sna/sna_accel.c       |  225 ++++----
 src/sna/sna_blt.c         |  306 +++++++++++
 src/sna/sna_composite.c   |   86 +++
 src/sna/sna_display.c     |  114 +++-
 src/sna/sna_dri.c         |   62 +-
 src/sna/sna_driver.c      |    2 
 src/sna/sna_io.c          |    3 
 src/sna/sna_render.c      |   51 +
 src/sna/sna_render.h      |   57 ++
 src/sna/sna_threads.c     |  306 +++++++++++
 src/sna/sna_trapezoids.c  | 1236 ++++++++++++++++++++++++++++++++++++++--------
 src/sna/sna_vertex.c      |   37 +
 src/sna/sna_video.c       |   11 
 29 files changed, 4078 insertions(+), 692 deletions(-)

New commits:
commit d3ff1cb9d7f788002337b1e6c4c81c58112b85b1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Feb 1 13:46:33 2013 +0000

    2.21.0 release

diff --git a/NEWS b/NEWS
index 9fa9009..fff55ad 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,49 @@
+Release 2.21.0 (2013-02-01)
+===========================
+A few new features:
+
+ * Enable render acceleration for Haswell GT1/GT2.
+
+ * Enable multi-threaded rasterisation of trapezoids and fallback composition
+
+ * Utilile a new kernel interface (v3.9) for processing relocations
+
+along with a few older features from the 2.20.x series:
+
+ * PRIME support for hotplug GPUs and hybrid systems
+
+ * Support for IvyBridge GT1 machines, aka HD2500 graphics.
+
+ * Stable 830gm/845g support, at last!
+
+As usual we have a large number of bug fixes since the last release:
+
+ * Prevent a stray relocation being left after a buffer is removed from
+   a batch, leading to GPU hangs.
+
+ * Make the driver more robust against its own failures to submit batches
+   by falling back to software rendering.
+
+ * Fix emission of scanline waits for secondary pipes on gen6/7. Otherwise
+   you may encounter GPU hangs in MI_WAIT_FOR_EVENT.
+
+ * Fix a missing corner pixel when drawing rectangles with PolyLines
+   https://bugs.freedesktop.org/show_bug.cgi?id=55484
+
+ * Don't try to use Y-tiling colour buffers with mesa/i915c as mesa
+   doesn't support them and wil fallback to software rendering
+
+ * Ensure that any cached mmaps are invalidated for a SwapBuffers
+   https://bugs.freedesktop.org/show_bug.cgi?id=60042
+
+ * Correctly handle the composition of rotated displays too large for the
+   3D pipeline
+   https://bugs.freedesktop.org/show_bug.cgi?id=60124
+
+ * Fix the computation of the planar video frame size
+   https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1104180
+
+
 Release 2.20.19 (2013-01-20)
 ============================
 A quick release as the last broke USB DisplayLink slave outputs badly. The
diff --git a/configure.ac b/configure.ac
index 46affdc..5135ecc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.20.19],
+        [2.21.0],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 008f8230a7c47f1249eb51e53b3abf158f2a42bf
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Feb 1 01:54:52 2013 +0000

    sna: Assert that if we have GPU damage we have a GPU bo
    
    Scatter the asserts around the migration points to catch where this
    invariant may be untrue.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 9b1a385..bb2ecb5 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -323,6 +323,8 @@ static void assert_pixmap_damage(PixmapPtr p)
 	if (priv == NULL)
 		return;
 
+	assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
 	if (priv->clear) {
 		assert(DAMAGE_IS_ALL(priv->gpu_damage));
 		assert(priv->cpu_damage == NULL);
@@ -1371,6 +1373,7 @@ sna_pixmap_create_mappable_gpu(PixmapPtr pixmap)
 
 	assert_pixmap_damage(pixmap);
 
+	assert(priv->gpu_damage == NULL);
 	assert(priv->gpu_bo == NULL);
 	priv->gpu_bo =
 		kgem_create_2d(&sna->kgem,
@@ -1496,6 +1499,8 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 	     priv->gpu_bo ? priv->gpu_bo->handle : 0,
 	     priv->gpu_damage, priv->cpu_damage, priv->clear));
 
+	assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
 	if (USE_INPLACE && (flags & MOVE_READ) == 0) {
 		assert(flags & MOVE_WRITE);
 		DBG(("%s: no readbck, discarding gpu damage [%d], pending clear[%d]\n",
@@ -1690,6 +1695,7 @@ skip_inplace_map:
 		int n;
 
 		DBG(("%s: flushing GPU damage\n", __FUNCTION__));
+		assert(priv->gpu_bo);
 
 		n = sna_damage_get_boxes(priv->gpu_damage, &box);
 		if (n) {
@@ -1835,6 +1841,7 @@ static inline bool region_inplace(struct sna *sna,
 
 	if (DAMAGE_IS_ALL(priv->gpu_damage)) {
 		DBG(("%s: yes, already wholly damaged on the GPU\n", __FUNCTION__));
+		assert(priv->gpu_bo);
 		return true;
 	}
 
@@ -1880,6 +1887,8 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		return true;
 	}
 
+	assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
 	if (sna_damage_is_all(&priv->cpu_damage,
 			      pixmap->drawable.width,
 			      pixmap->drawable.height)) {
@@ -2046,6 +2055,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 			DBG(("%s: forced migration\n", __FUNCTION__));
 
 			assert(pixmap_contains_damage(pixmap, priv->gpu_damage));
+			assert(priv->gpu_bo);
 
 			ok = false;
 			if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) {
@@ -2071,6 +2081,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		     __FUNCTION__,
 		     region->extents.x2 - region->extents.x1,
 		     region->extents.y2 - region->extents.y1));
+		assert(priv->gpu_bo);
 
 		if (priv->cpu_damage == NULL) {
 			if ((flags & MOVE_WRITE) == 0 &&
@@ -2391,6 +2402,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
 	assert_pixmap_damage(pixmap);
 	assert_pixmap_contains_box(pixmap, box);
 	assert(!wedged(sna));
+	assert(priv->gpu_damage == NULL || priv->gpu_bo);
 
 	if (sna_damage_is_all(&priv->gpu_damage,
 			      pixmap->drawable.width,
@@ -2414,6 +2426,8 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
 	if (priv->gpu_bo == NULL) {
 		unsigned create, tiling;
 
+		assert(priv->gpu_damage == NULL);
+
 		create = CREATE_INACTIVE;
 		if (pixmap->usage_hint == SNA_CREATE_FB)
 			create |= CREATE_EXACT | CREATE_SCANOUT;
@@ -2626,6 +2640,7 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
 	if (DAMAGE_IS_ALL(priv->gpu_damage)) {
 		DBG(("%s: use GPU fast path (all-damaged)\n", __FUNCTION__));
 		assert(priv->cpu_damage == NULL);
+		assert(priv->gpu_bo);
 		goto use_gpu_bo;
 	}
 
@@ -2727,6 +2742,7 @@ create_gpu_bo:
 	     region.extents.x2, region.extents.y2));
 
 	if (priv->gpu_damage) {
+		assert(priv->gpu_bo);
 		if (!priv->cpu_damage) {
 			if (sna_damage_contains_box__no_reduce(priv->gpu_damage,
 							       &region.extents)) {
@@ -3021,6 +3037,8 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 			       pixmap->drawable.height);
 	}
 
+	assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
 	if (sna_damage_is_all(&priv->gpu_damage,
 			      pixmap->drawable.width,
 			      pixmap->drawable.height)) {
@@ -3077,6 +3095,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 		}
 		if (priv->gpu_bo == NULL) {
 			DBG(("%s: not creating GPU bo\n", __FUNCTION__));
+			assert(priv->gpu_damage == NULL);
 			assert(list_is_empty(&priv->list));
 			return NULL;
 		}
@@ -3845,8 +3864,10 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
 	int h = box->y2 - box->y1;
 	int count;
 
-	if (DAMAGE_IS_ALL(priv->gpu_damage))
+	if (DAMAGE_IS_ALL(priv->gpu_damage)) {
+		assert(priv->gpu_bo);
 		return true;
+	}
 
 	if (priv->gpu_bo) {
 		if (alu != GXcopy)
@@ -3977,6 +3998,8 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 		goto fallback;
 
 	if (priv->gpu_damage) {
+		assert(priv->gpu_bo);
+
 		if (alu == GXcopy && priv->clear)
 			goto out;
 
@@ -4073,6 +4096,7 @@ source_prefer_gpu(struct sna *sna, struct sna_pixmap *priv)
 
 	if (priv->gpu_damage) {
 		DBG(("%s: source has gpu damage, force gpu\n", __FUNCTION__));
+		assert(priv->gpu_bo);
 		return PREFER_GPU | FORCE_GPU;
 	}
 

commit cf0576f87102b1535268691e7e29661b0f9ee73b
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Feb 1 00:19:21 2013 +0000

    sna/video: Correct computation of planar frame size
    
    The total frame size is less than 3 times the subsampled chroma planes
    due to the additional alignment bytes.
    
    Bugzilla: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1104180
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index 05d76dd..07fa829 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -215,19 +215,22 @@ sna_video_frame_init(struct sna *sna,
 		align = 1024;
 #endif
 
-	/* Determine the desired destination pitch (representing the chroma's pitch,
-	 * in the planar case.
+	/* Determine the desired destination pitch (representing the
+	 * chroma's pitch in the planar case).
 	 */
 	if (is_planar_fourcc(id)) {
+		assert((width & 1) == 0);
+		assert((height & 1) == 0);
 		if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
 			frame->pitch[0] = ALIGN((height / 2), align);
 			frame->pitch[1] = ALIGN(height, align);
-			frame->size = 3U * frame->pitch[0] * width;
+			frame->size = width;
 		} else {
 			frame->pitch[0] = ALIGN((width / 2), align);
 			frame->pitch[1] = ALIGN(width, align);
-			frame->size = 3U * frame->pitch[0] * height;
+			frame->size = height;
 		}
+		frame->size *= frame->pitch[0] + frame->pitch[1];
 	} else {
 		if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
 			frame->pitch[0] = ALIGN((height << 1), align);

commit 268285d9a64fc47fe81fe5bfbfbd1890dad53e1e
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jan 31 21:57:41 2013 +0000

    sna/gen3+: Flush vertex threads before touching global state
    
    We need to be careful not just when finishing the current vbo to
    synchronize with the sharing threads, but also before we emit the batch
    state that no other thread will try and do the same.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 06c4e78..6b9a2f8 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1900,6 +1900,9 @@ static bool gen3_rectangle_begin(struct sna *sna,
 	struct gen3_render_state *state = &sna->render_state.gen3;
 	int ndwords, i1_cmd = 0, i1_len = 0;
 
+	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+		return true;
+
 	ndwords = 2;
 	if (op->need_magic_ca_pass)
 		ndwords += 100;
@@ -1939,6 +1942,13 @@ static bool gen3_rectangle_begin(struct sna *sna,
 static int gen3_get_rectangles__flush(struct sna *sna,
 				      const struct sna_composite_op *op)
 {
+	/* Preventing discarding new vbo after lock contention */
+	if (sna_vertex_wait__locked(&sna->render)) {
+		int rem = vertex_space(sna);
+		if (rem > op->floats_per_rect)
+			return rem;
+	}
+
 	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
 		return 0;
 	if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
@@ -1955,17 +1965,6 @@ static int gen3_get_rectangles__flush(struct sna *sna,
 		}
 	}
 
-	/* Preventing discarding new vbo after lock contention */
-	if (sna->render.active) {
-		int rem;
-
-		sna_vertex_wait__locked(&sna->render);
-
-		rem = vertex_space(sna);
-		if (rem > op->floats_per_rect)
-			return rem;
-	}
-
 	return gen3_vertex_finish(sna);
 }
 
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 829f4e9..7fc8520 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -587,6 +587,9 @@ static bool gen4_rectangle_begin(struct sna *sna,
 	int id = op->u.gen4.ve_id;
 	int ndwords;
 
+	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+		return true;
+
 	/* 7xpipelined pointers + 6xprimitive + 1xflush */
 	ndwords = op->need_magic_ca_pass? 20 : 6;
 	if ((sna->render.vb_id & (1 << id)) == 0)
@@ -606,6 +609,13 @@ static bool gen4_rectangle_begin(struct sna *sna,
 static int gen4_get_rectangles__flush(struct sna *sna,
 				      const struct sna_composite_op *op)
 {
+	/* Preventing discarding new vbo after lock contention */
+	if (sna_vertex_wait__locked(&sna->render)) {
+		int rem = vertex_space(sna);
+		if (rem > op->floats_per_rect)
+			return rem;
+	}
+
 	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
 		return 0;
 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
@@ -621,17 +631,6 @@ static int gen4_get_rectangles__flush(struct sna *sna,
 						     op->u.gen4.wm_kernel);
 	}
 
-	/* Preventing discarding new vbo after lock contention */
-	if (sna->render.active) {
-		int rem;
-
-		sna_vertex_wait__locked(&sna->render);
-
-		rem = vertex_space(sna);
-		if (rem > op->floats_per_rect)
-			return rem;
-	}
-
 	return gen4_vertex_finish(sna);
 }
 
@@ -644,7 +643,7 @@ inline static int gen4_get_rectangles(struct sna *sna,
 
 start:
 	rem = vertex_space(sna);
-	if (rem < op->floats_per_rect) {
+	if (unlikely(rem < op->floats_per_rect)) {
 		DBG(("flushing vbo for %s: %d < %d\n",
 		     __FUNCTION__, rem, op->floats_per_rect));
 		rem = gen4_get_rectangles__flush(sna, op);
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 07e9571..35f70f8 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -577,6 +577,9 @@ static bool gen5_rectangle_begin(struct sna *sna,
 	int id = op->u.gen5.ve_id;
 	int ndwords;
 
+	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+		return true;
+
 	ndwords = op->need_magic_ca_pass ? 20 : 6;
 	if ((sna->render.vb_id & (1 << id)) == 0)
 		ndwords += 5;
@@ -595,6 +598,13 @@ static bool gen5_rectangle_begin(struct sna *sna,
 static int gen5_get_rectangles__flush(struct sna *sna,
 				      const struct sna_composite_op *op)
 {
+	/* Preventing discarding new vbo after lock contention */
+	if (sna_vertex_wait__locked(&sna->render)) {
+		int rem = vertex_space(sna);
+		if (rem > op->floats_per_rect)
+			return rem;
+	}
+
 	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 20 : 6))
 		return 0;
 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
@@ -607,17 +617,6 @@ static int gen5_get_rectangles__flush(struct sna *sna,
 						     op->u.gen5.wm_kernel);
 	}
 
-	/* Preventing discarding new vbo after lock contention */
-	if (sna->render.active) {
-		int rem;
-
-		sna_vertex_wait__locked(&sna->render);
-
-		rem = vertex_space(sna);
-		if (rem > op->floats_per_rect)
-			return rem;
-	}
-
 	return gen4_vertex_finish(sna);
 }
 
@@ -631,7 +630,7 @@ inline static int gen5_get_rectangles(struct sna *sna,
 
 start:
 	rem = vertex_space(sna);
-	if (rem < op->floats_per_rect) {
+	if (unlikely(rem < op->floats_per_rect)) {
 		DBG(("flushing vbo for %s: %d < %d\n",
 		     __FUNCTION__, rem, op->floats_per_rect));
 		rem = gen5_get_rectangles__flush(sna, op);
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index a9d8111..ec64250 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1125,6 +1125,9 @@ static bool gen6_rectangle_begin(struct sna *sna,
 	int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
 	int ndwords;
 
+	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+		return true;
+
 	ndwords = op->need_magic_ca_pass ? 60 : 6;
 	if ((sna->render.vb_id & id) == 0)
 		ndwords += 5;
@@ -1141,6 +1144,13 @@ static bool gen6_rectangle_begin(struct sna *sna,
 static int gen6_get_rectangles__flush(struct sna *sna,
 				      const struct sna_composite_op *op)
 {
+	/* Preventing discarding new vbo after lock contention */
+	if (sna_vertex_wait__locked(&sna->render)) {
+		int rem = vertex_space(sna);
+		if (rem > op->floats_per_rect)
+			return rem;
+	}
+
 	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
 		return 0;
 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
@@ -1157,17 +1167,6 @@ static int gen6_get_rectangles__flush(struct sna *sna,
 		}
 	}
 
-	/* Preventing discarding new vbo after lock contention */
-	if (sna->render.active) {
-		int rem;
-
-		sna_vertex_wait__locked(&sna->render);
-
-		rem = vertex_space(sna);
-		if (rem > op->floats_per_rect)
-			return rem;
-	}
-
 	return gen4_vertex_finish(sna);
 }
 
@@ -1180,7 +1179,7 @@ inline static int gen6_get_rectangles(struct sna *sna,
 
 start:
 	rem = vertex_space(sna);
-	if (rem < op->floats_per_rect) {
+	if (unlikely(rem < op->floats_per_rect)) {
 		DBG(("flushing vbo for %s: %d < %d\n",
 		     __FUNCTION__, rem, op->floats_per_rect));
 		rem = gen6_get_rectangles__flush(sna, op);
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index dc704dd..59b38f6 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1270,6 +1270,9 @@ static bool gen7_rectangle_begin(struct sna *sna,
 	int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
 	int ndwords;
 
+	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+		return true;
+
 	ndwords = op->need_magic_ca_pass ? 60 : 6;
 	if ((sna->render.vb_id & id) == 0)
 		ndwords += 5;
@@ -1286,6 +1289,13 @@ static bool gen7_rectangle_begin(struct sna *sna,
 static int gen7_get_rectangles__flush(struct sna *sna,
 				      const struct sna_composite_op *op)
 {
+	/* Preventing discarding new vbo after lock contention */
+	if (sna_vertex_wait__locked(&sna->render)) {
+		int rem = vertex_space(sna);
+		if (rem > op->floats_per_rect)
+			return rem;
+	}
+
 	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
 		return 0;
 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
@@ -1300,17 +1310,6 @@ static int gen7_get_rectangles__flush(struct sna *sna,
 		}
 	}
 
-	/* Preventing discarding new vbo after lock contention */
-	if (sna->render.active) {
-		int rem;
-
-		sna_vertex_wait__locked(&sna->render);
-
-		rem = vertex_space(sna);
-		if (rem > op->floats_per_rect)
-			return rem;
-	}
-
 	return gen4_vertex_finish(sna);
 }
 
@@ -1323,7 +1322,7 @@ inline static int gen7_get_rectangles(struct sna *sna,
 
 start:
 	rem = vertex_space(sna);
-	if (rem < op->floats_per_rect) {
+	if (unlikely(rem < op->floats_per_rect)) {
 		DBG(("flushing vbo for %s: %d < %d\n",
 		     __FUNCTION__, rem, op->floats_per_rect));
 		rem = gen7_get_rectangles__flush(sna, op);
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index c953e50..6cf90eb 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -763,10 +763,12 @@ static inline void sna_vertex_release__locked(struct sna_render *r)
 		pthread_cond_signal(&r->wait);
 }
 
-static inline void sna_vertex_wait__locked(struct sna_render *r)
+static inline bool sna_vertex_wait__locked(struct sna_render *r)
 {
+	bool was_active = r->active;
 	while (r->active)
 		pthread_cond_wait(&r->wait, &r->lock);
+	return was_active;
 }
 
 #endif /* SNA_RENDER_H */

commit 1239e012ae6d4f00ce73f32d7244905a601170ea
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jan 31 19:18:17 2013 +0000

    sna: Make sure the needs_flush is always accompanied by a tracking request
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 4f9306a..c050a52 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -82,8 +82,8 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 #define SHOW_BATCH 0
 
 #if 0
-#define ASSERT_IDLE(kgem__, handle__) assert(!kgem_busy(kgem__, handle__))
-#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !kgem_busy(kgem__, handle__))
+#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
+#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
 #else
 #define ASSERT_IDLE(kgem__, handle__)
 #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
@@ -375,8 +375,7 @@ static int gem_read(int fd, uint32_t handle, const void *dst,
 	return 0;
 }
 
-static bool
-kgem_busy(struct kgem *kgem, int handle)
+bool __kgem_busy(struct kgem *kgem, int handle)
 {
 	struct drm_i915_gem_busy busy;
 
@@ -394,19 +393,14 @@ static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
 {
 	DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
 	     __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
-	     kgem_busy(kgem, bo->handle)));
+	     __kgem_busy(kgem, bo->handle)));
+	assert(bo->refcnt);
 	assert(bo->exec == NULL);
 	assert(list_is_empty(&bo->vma));
 
 	if (bo->rq) {
-		if (bo->needs_flush)
-			bo->needs_flush = kgem_busy(kgem, bo->handle);
-		if (bo->needs_flush) {
-			list_move(&bo->request, &kgem->flushing);
-			bo->rq = (void *)kgem;
-		} else {
-			bo->rq = NULL;
-			list_del(&bo->request);
+		if (!__kgem_busy(kgem, bo->handle)) {
+			__kgem_bo_clear_busy(bo);
 			kgem_retire(kgem);
 		}
 	} else {
@@ -1575,7 +1569,6 @@ static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
 	}
 
 	bo->scanout = false;
-	bo->needs_flush = true;
 	bo->flush = false;
 	bo->reusable = true;
 
@@ -1702,16 +1695,12 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
 		assert(!bo->flush);
 		assert(list_is_empty(&bo->list));
+		if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
+			__kgem_bo_clear_busy(bo);
 		if (bo->rq == NULL) {
-			if (bo->needs_flush && kgem_busy(kgem, bo->handle)) {
-				DBG(("%s: handle=%d is snooped, tracking until free\n",
-				     __FUNCTION__, bo->handle));
-				list_add(&bo->request, &kgem->flushing);
-				bo->rq = (void *)kgem;
-			}
-		}
-		if (bo->rq == NULL)
+			assert(!bo->needs_flush);
 			kgem_bo_move_to_snoop(kgem, bo);
+		}
 		return;
 	}
 
@@ -1766,6 +1755,9 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		bo->refcnt = 0;
 	}
 
+	if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
+		__kgem_bo_clear_busy(bo);
+
 	if (bo->rq) {
 		struct list *cache;
 
@@ -1781,27 +1773,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	assert(bo->exec == NULL);
 	assert(list_is_empty(&bo->request));
 
-	if (bo->needs_flush) {
-		if ((bo->needs_flush = kgem_busy(kgem, bo->handle))) {
-			struct list *cache;
-
-			DBG(("%s: handle=%d -> flushing\n",
-			     __FUNCTION__, bo->handle));
-
-			assert(bo->reusable);
-			list_add(&bo->request, &kgem->flushing);
-			if (bucket(bo) < NUM_CACHE_BUCKETS)
-				cache = &kgem->active[bucket(bo)][bo->tiling];
-			else
-				cache = &kgem->large;
-			list_add(&bo->list, cache);
-			bo->rq = (void *)kgem;
-			return;
-		}
-
-		bo->domain = DOMAIN_NONE;
-	}
-
 	if (!IS_CPU_MAP(bo->map)) {
 		if (!kgem_bo_set_purgeable(kgem, bo))
 			goto destroy;
@@ -1878,26 +1849,23 @@ static bool kgem_retire__flushing(struct kgem *kgem)
 		assert(bo->rq == (void *)kgem);
 		assert(bo->exec == NULL);
 
-		if (kgem_busy(kgem, bo->handle))
+		if (__kgem_busy(kgem, bo->handle))
 			break;
 
-		bo->needs_flush = false;
-		bo->domain = DOMAIN_NONE;
-		bo->rq = NULL;
-		list_del(&bo->request);
+		__kgem_bo_clear_busy(bo);
 
-		if (!bo->refcnt) {
-			if (bo->snoop) {
-				kgem_bo_move_to_snoop(kgem, bo);
-			} else if (bo->scanout) {
-				kgem_bo_move_to_scanout(kgem, bo);
-			} else if (bo->reusable &&
-				   kgem_bo_set_purgeable(kgem, bo)) {
-				kgem_bo_move_to_inactive(kgem, bo);
-				retired = true;
-			} else
-				kgem_bo_free(kgem, bo);
-		}
+		if (bo->refcnt)
+			continue;
+
+		if (bo->snoop) {
+			kgem_bo_move_to_snoop(kgem, bo);
+		} else if (bo->scanout) {
+			kgem_bo_move_to_scanout(kgem, bo);
+		} else if (bo->reusable && kgem_bo_set_purgeable(kgem, bo)) {
+			kgem_bo_move_to_inactive(kgem, bo);
+			retired = true;
+		} else
+			kgem_bo_free(kgem, bo);
 	}
 #if HAS_DEBUG_FULL
 	{
@@ -1935,44 +1903,31 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
 		list_del(&bo->request);
 
 		if (bo->needs_flush)
-			bo->needs_flush = kgem_busy(kgem, bo->handle);
+			bo->needs_flush = __kgem_busy(kgem, bo->handle);
 		if (bo->needs_flush) {
 			DBG(("%s: moving %d to flushing\n",
 			     __FUNCTION__, bo->handle));
 			list_add(&bo->request, &kgem->flushing);
 			bo->rq = (void *)kgem;
-		} else {
-			bo->domain = DOMAIN_NONE;
-			bo->rq = NULL;
+			continue;
 		}
 
+		bo->domain = DOMAIN_NONE;
+		bo->rq = NULL;
 		if (bo->refcnt)
 			continue;
 
 		if (bo->snoop) {
-			if (!bo->needs_flush)
-				kgem_bo_move_to_snoop(kgem, bo);
-			continue;
-		}
-
-		if (!bo->reusable) {
+			kgem_bo_move_to_snoop(kgem, bo);
+		} else if (bo->scanout) {
+			kgem_bo_move_to_scanout(kgem, bo);
+		} else if (bo->reusable && kgem_bo_set_purgeable(kgem, bo)) {
+			kgem_bo_move_to_inactive(kgem, bo);
+			retired = true;
+		} else {
 			DBG(("%s: closing %d\n",
 			     __FUNCTION__, bo->handle));
 			kgem_bo_free(kgem, bo);
-			continue;
-		}
-
-		if (!bo->needs_flush) {
-			if (bo->scanout) {
-				kgem_bo_move_to_scanout(kgem, bo);
-			} else if (kgem_bo_set_purgeable(kgem, bo)) {
-				kgem_bo_move_to_inactive(kgem, bo);
-				retired = true;
-			} else {
-				DBG(("%s: closing %d\n",
-				     __FUNCTION__, bo->handle));
-				kgem_bo_free(kgem, bo);
-			}
 		}
 	}
 
@@ -2004,7 +1959,7 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
 		rq = list_first_entry(&kgem->requests[ring],
 				      struct kgem_request,
 				      list);
-		if (kgem_busy(kgem, rq->bo->handle))
+		if (__kgem_busy(kgem, rq->bo->handle))
 			break;
 
 		retired |= __kgem_retire_rq(kgem, rq);
@@ -2073,7 +2028,7 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
 
 	rq = list_last_entry(&kgem->requests[ring],
 			     struct kgem_request, list);
-	if (kgem_busy(kgem, rq->bo->handle)) {
+	if (__kgem_busy(kgem, rq->bo->handle)) {
 		DBG(("%s: last requests handle=%d still busy\n",
 		     __FUNCTION__, rq->bo->handle));
 		return false;
@@ -2121,10 +2076,8 @@ static void kgem_commit(struct kgem *kgem)
 
 		if (bo->proxy) {
 			/* proxies are not used for domain tracking */
-			list_del(&bo->request);
-			bo->rq = NULL;
 			bo->exec = NULL;
-			bo->needs_flush = false;
+			__kgem_bo_clear_busy(bo);
 		}
 
 		kgem->scanout_busy |= bo->scanout;
@@ -2272,6 +2225,7 @@ static void kgem_finish_buffers(struct kgem *kgem)
 					list_init(&bo->base.request);
 					shrink->needs_flush = bo->base.dirty;
 
+					assert(bo->base.domain == DOMAIN_NONE);
 					bo->base.exec = NULL;
 					bo->base.rq = NULL;
 					bo->base.dirty = false;
@@ -2317,6 +2271,7 @@ static void kgem_finish_buffers(struct kgem *kgem)
 					list_init(&bo->base.request);
 					shrink->needs_flush = bo->base.dirty;
 
+					assert(bo->base.domain == DOMAIN_NONE);
 					bo->base.exec = NULL;
 					bo->base.rq = NULL;
 					bo->base.dirty = false;
@@ -2364,12 +2319,9 @@ static void kgem_cleanup(struct kgem *kgem)
 						      struct kgem_bo,
 						      request);
 
-				list_del(&bo->request);
-				bo->rq = NULL;
 				bo->exec = NULL;
-				bo->domain = DOMAIN_NONE;
 				bo->dirty = false;
-				bo->needs_flush = false;
+				__kgem_bo_clear_busy(bo);
 				if (bo->refcnt == 0)
 					kgem_bo_free(kgem, bo);
 			}
@@ -2434,15 +2386,12 @@ void kgem_reset(struct kgem *kgem)
 			bo->exec = NULL;
 			bo->target_handle = -1;
 			bo->dirty = false;
-			bo->domain = DOMAIN_NONE;
 
-			if (bo->needs_flush && kgem_busy(kgem, bo->handle)) {
+			if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
 				list_add(&bo->request, &kgem->flushing);
 				bo->rq = (void *)kgem;
-			} else {
-				bo->rq = NULL;
-				bo->needs_flush = false;
-			}
+			} else
+				__kgem_bo_clear_busy(bo);
 
 			if (!bo->refcnt && !bo->reusable) {
 				assert(!bo->snoop);
@@ -2521,7 +2470,7 @@ out_4096:
 			return kgem_bo_reference(bo);
 		}
 
-		if (!kgem_busy(kgem, bo->handle)) {
+		if (!__kgem_busy(kgem, bo->handle)) {
 			assert(RQ(bo->rq)->bo == bo);
 			__kgem_retire_rq(kgem, RQ(bo->rq));
 			goto out_4096;
@@ -2538,7 +2487,7 @@ out_16384:
 			return kgem_bo_reference(bo);
 		}
 
-		if (!kgem_busy(kgem, bo->handle)) {
+		if (!__kgem_busy(kgem, bo->handle)) {
 			assert(RQ(bo->rq)->bo == bo);
 			__kgem_retire_rq(kgem, RQ(bo->rq));
 			goto out_16384;
@@ -4078,16 +4027,18 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	__kgem_bo_destroy(kgem, bo);
 }
 
-bool __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
+void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
 {
+	assert(bo->rq);
+	assert(bo->exec == NULL);
+	assert(bo->needs_flush);
+
 	/* The kernel will emit a flush *and* update its own flushing lists. */
-	if (!bo->needs_flush)
-		return false;
+	if (!__kgem_busy(kgem, bo->handle))
+		__kgem_bo_clear_busy(bo);
 
-	bo->needs_flush = kgem_busy(kgem, bo->handle);
 	DBG(("%s: handle=%d, busy?=%d\n",
-	     __FUNCTION__, bo->handle, bo->needs_flush));
-	return bo->needs_flush;
+	     __FUNCTION__, bo->handle, bo->rq != NULL));
 }
 
 inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
@@ -4505,7 +4456,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
 		struct drm_i915_gem_set_domain set_domain;
 
 		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
-		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
+		     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
 		/* XXX use PROT_READ to avoid the write flush? */
 
@@ -4680,10 +4631,7 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
 	 */
 	bo->reusable = false;
 
-	/* The bo is outside of our control, so presume it is written to */
-	bo->needs_flush = true;
-	if (bo->domain != DOMAIN_GPU)
-		bo->domain = DOMAIN_NONE;
+	kgem_bo_unclean(kgem, bo);
 
 	/* Henceforth, we need to broadcast all updates to clients and
 	 * flush our rendering before doing so.
@@ -4732,7 +4680,7 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 		struct drm_i915_gem_set_domain set_domain;
 
 		DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
-		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
+		     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
 		VG_CLEAR(set_domain);
 		set_domain.handle = bo->handle;
@@ -4755,7 +4703,7 @@ void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write)
 		struct drm_i915_gem_set_domain set_domain;
 
 		DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
-		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
+		     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
 		VG_CLEAR(set_domain);
 		set_domain.handle = bo->handle;
@@ -4779,7 +4727,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
 		struct drm_i915_gem_set_domain set_domain;
 
 		DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
-		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
+		     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
 		VG_CLEAR(set_domain);
Reply to:
Prev by Date: xserver-xorg-video-intel: Changes to 'debian-experimental'
Next by Date: Bug#699396: CVE-2013-0241 - qxl: synchronous io guest DoS
Previous by thread: xserver-xorg-video-intel: Changes to 'debian-experimental'
Next by thread: xserver-xorg-video-intel: Changes to 'upstream-experimental'
Index(es):
- Date
- Thread