[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'ubuntu'



 debian/changelog                                |    7 
 debian/patches/series                           |    1 
 debian/patches/upstream-snapshot-778dba90c.diff |  934 ++++++++++++++++++++++++
 3 files changed, 942 insertions(+)

New commits:
commit fe1e9e7d613809a2b023b45bd0fb05e3bc0cbc18
Author: Timo Aaltonen <tjaalton@ubuntu.com>
Date:   Thu Jan 24 10:25:28 2013 +0200

    upstream-snapshot-778dba90c.diff: Patch to update to current git master (LP: #1102390).

diff --git a/debian/changelog b/debian/changelog
index 2eadae2..527e9ea 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+xserver-xorg-video-intel (2:2.20.19-0ubuntu2) raring; urgency=low
+
+  * upstream-snapshot-778dba90c.diff: Patch to update to current git
+    master (LP: #1102390).
+
+ -- Timo Aaltonen <tjaalton@ubuntu.com>  Thu, 24 Jan 2013 10:22:41 +0200
+
 xserver-xorg-video-intel (2:2.20.19-0ubuntu1) raring; urgency=low
 
   * Merge from unreleased debian git
diff --git a/debian/patches/series b/debian/patches/series
index 23ed8a4..d550fdb 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1 +1,2 @@
 0002-Update-manpage-for-new-accelmethod-option.patch
+upstream-snapshot-778dba90c.diff
diff --git a/debian/patches/upstream-snapshot-778dba90c.diff b/debian/patches/upstream-snapshot-778dba90c.diff
new file mode 100644
index 0000000..bd47852
--- /dev/null
+++ b/debian/patches/upstream-snapshot-778dba90c.diff
@@ -0,0 +1,934 @@
+diff --git a/configure.ac b/configure.ac
+index 317bc6d..cb1496b 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -291,24 +291,6 @@ if test "x$USERPTR" = xyes; then
+ 	AC_DEFINE(USE_USERPTR,1,[Assume USERPTR support])
+ fi
+ 
+-AC_ARG_ENABLE(fast-reloc,
+-	      AS_HELP_STRING([--enable-fast-reloc],
+-			     [Enable use of "fast reloc" (experimental) [default=no]]),
+-	      [FASTRELOC="$enableval"],
+-	      [FASTRELOC=no])
+-if test "x$FASTRELOC" = xyes; then
+-	AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support])
+-fi
+-
+-AC_ARG_ENABLE(handle-lut,
+-	      AS_HELP_STRING([--enable-handle-lut],
+-			     [Enable use of "handle LUT" (experimental) [default=no]]),
+-	      [HANDLE_LUT="$enableval"],
+-	      [HANDLE_LUT=no])
+-if test "x$HANDLE_LUT" = xyes; then
+-	AC_DEFINE(USE_HANDLE_LUT,1,[Assume "handle LUT" support])
+-fi
+-
+ AC_ARG_ENABLE(async-swap,
+ 	      AS_HELP_STRING([--enable-async-swap],
+ 			     [Enable use of asynchronous swaps (experimental) [default=no]]),
+diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
+index 42e4cdd..01c0aee 100644
+--- a/src/sna/gen3_render.c
++++ b/src/sna/gen3_render.c
+@@ -1569,11 +1569,11 @@ static void gen3_emit_composite_state(struct sna *sna,
+ 	gen3_composite_emit_shader(sna, op, op->op);
+ }
+ 
+-static void gen3_magic_ca_pass(struct sna *sna,
++static bool gen3_magic_ca_pass(struct sna *sna,
+ 			       const struct sna_composite_op *op)
+ {
+ 	if (!op->need_magic_ca_pass)
+-		return;
++		return false;
+ 
+ 	DBG(("%s(%d)\n", __FUNCTION__,
+ 	     sna->render.vertex_index - sna->render.vertex_start));
+@@ -1587,6 +1587,7 @@ static void gen3_magic_ca_pass(struct sna *sna,
+ 	OUT_BATCH(sna->render.vertex_start);
+ 
+ 	sna->render_state.gen3.last_blend = 0;
++	return true;
+ }
+ 
+ static void gen3_vertex_flush(struct sna *sna)
+@@ -1613,22 +1614,22 @@ static int gen3_vertex_finish(struct sna *sna)
+ 	DBG(("%s: used=%d/%d, vbo active? %d\n",
+ 	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
+ 	     sna->render.vbo ? sna->render.vbo->handle : 0));
++	assert(sna->render.vertex_offset == 0);
+ 	assert(sna->render.vertex_used);
+ 	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	bo = sna->render.vbo;
+ 	if (bo) {
+-		if (sna->render.vertex_offset)
+-			gen3_vertex_flush(sna);
+-
+ 		DBG(("%s: reloc = %d\n", __FUNCTION__,
+ 		     sna->render.vertex_reloc[0]));
+ 
+-		sna->kgem.batch[sna->render.vertex_reloc[0]] =
+-			kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+-				       bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
++		if (sna->render.vertex_reloc[0]) {
++			sna->kgem.batch[sna->render.vertex_reloc[0]] =
++				kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
++					       bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
+ 
+-		sna->render.vertex_reloc[0] = 0;
++			sna->render.vertex_reloc[0] = 0;
++		}
+ 		sna->render.vertex_used = 0;
+ 		sna->render.vertex_index = 0;
+ 		sna->render.vbo = NULL;
+@@ -1664,14 +1665,13 @@ static void gen3_vertex_close(struct sna *sna)
+ 	unsigned int delta = 0;
+ 
+ 	assert(sna->render.vertex_offset == 0);
++	if (sna->render.vertex_reloc[0] == 0)
++		return;
+ 
+ 	DBG(("%s: used=%d/%d, vbo active? %d\n",
+ 	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
+ 	     sna->render.vbo ? sna->render.vbo->handle : 0));
+ 
+-	if (sna->render.vertex_used == 0)
+-		return;
+-
+ 	bo = sna->render.vbo;
+ 	if (bo) {
+ 		if (sna->render.vertex_size - sna->render.vertex_used < 64) {
+@@ -1717,15 +1717,11 @@ static void gen3_vertex_close(struct sna *sna)
+ 		}
+ 	}
+ 
+-	DBG(("%s: reloc = %d\n", __FUNCTION__,
+-	     sna->render.vertex_reloc[0]));
+-
+-	if (sna->render.vertex_reloc[0]) {
+-		sna->kgem.batch[sna->render.vertex_reloc[0]] =
+-			kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+-				       bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
+-		sna->render.vertex_reloc[0] = 0;
+-	}
++	DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
++	sna->kgem.batch[sna->render.vertex_reloc[0]] =
++		kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
++			       bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
++	sna->render.vertex_reloc[0] = 0;
+ 
+ 	if (sna->render.vbo == NULL) {
+ 		DBG(("%s: resetting vbo\n", __FUNCTION__));
+@@ -1789,8 +1785,16 @@ static int gen3_get_rectangles__flush(struct sna *sna,
+ 	if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
+ 		return 0;
+ 
+-	if (op->need_magic_ca_pass && sna->render.vbo)
+-		return 0;
++	if (sna->render.vertex_offset) {
++		gen3_vertex_flush(sna);
++		if (gen3_magic_ca_pass(sna, op)) {
++			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
++			OUT_BATCH(gen3_get_blend_cntl(op->op,
++						      op->has_component_alpha,
++						      op->dst.format));
++			gen3_composite_emit_shader(sna, op, op->op);
++		}
++	}
+ 
+ 	return gen3_vertex_finish(sna);
+ }
+@@ -1836,6 +1840,8 @@ flush:
+ 	}
+ 	_kgem_submit(&sna->kgem);
+ 	gen3_emit_composite_state(sna, op);
++	assert(sna->render.vertex_offset == 0);
++	assert(sna->render.vertex_reloc[0] == 0);
+ 	goto start;
+ }
+ 
+@@ -1972,6 +1978,9 @@ gen3_render_reset(struct sna *sna)
+ 		     __FUNCTION__, sna->render.vbo->presumed_offset));
+ 		discard_vbo(sna);
+ 	}
++
++	sna->render.vertex_reloc[0] = 0;
++	sna->render.vertex_offset = 0;
+ }
+ 
+ static void
+@@ -4647,6 +4656,9 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
+ static void gen3_render_flush(struct sna *sna)
+ {
+ 	gen3_vertex_close(sna);
++
++	assert(sna->render.vertex_reloc[0] == 0);
++	assert(sna->render.vertex_offset == 0);
+ }
+ 
+ static void
+diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
+index 6b3f864..d2f3fff 100644
+--- a/src/sna/gen4_render.c
++++ b/src/sna/gen4_render.c
+@@ -207,13 +207,13 @@ gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
+ 	return base + !is_affine;
+ }
+ 
+-static void gen4_magic_ca_pass(struct sna *sna,
++static bool gen4_magic_ca_pass(struct sna *sna,
+ 			       const struct sna_composite_op *op)
+ {
+ 	struct gen4_render_state *state = &sna->render_state.gen4;
+ 
+ 	if (!op->need_magic_ca_pass)
+-		return;
++		return false;
+ 
+ 	assert(sna->render.vertex_index > sna->render.vertex_start);
+ 
+@@ -237,6 +237,7 @@ static void gen4_magic_ca_pass(struct sna *sna,
+ 	OUT_BATCH(0);	/* index buffer offset, ignored */
+ 
+ 	state->last_primitive = sna->kgem.nbatch;
++	return true;
+ }
+ 
+ static uint32_t gen4_get_blend(int op,
+@@ -613,6 +614,13 @@ static int gen4_get_rectangles__flush(struct sna *sna,
+ 	if (op->need_magic_ca_pass && sna->render.vbo)
+ 		return 0;
+ 
++	if (sna->render.vertex_offset) {
++		gen4_vertex_flush(sna);
++		if (gen4_magic_ca_pass(sna, op))
++			gen4_emit_pipelined_pointers(sna, op, op->op,
++						     op->u.gen4.wm_kernel);
++	}
++
+ 	return gen4_vertex_finish(sna);
+ }
+ 
+@@ -2784,6 +2792,10 @@ static void gen4_render_reset(struct sna *sna)
+ 		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
+ 		discard_vbo(sna);
+ 	}
++
++	sna->render.vertex_offset = 0;
++	sna->render.nvertex_reloc = 0;
++	sna->render.vb_id = 0;
+ }
+ 
+ static void gen4_render_fini(struct sna *sna)
+diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
+index 9891453..4e40467 100644
+--- a/src/sna/gen4_vertex.c
++++ b/src/sna/gen4_vertex.c
+@@ -55,16 +55,13 @@ int gen4_vertex_finish(struct sna *sna)
+ 
+ 	DBG(("%s: used=%d / %d\n", __FUNCTION__,
+ 	     sna->render.vertex_used, sna->render.vertex_size));
++	assert(sna->render.vertex_offset == 0);
+ 	assert(sna->render.vertex_used);
+-	assert(sna->render.nvertex_reloc);
+ 
+ 	/* Note: we only need dword alignment (currently) */
+ 
+ 	bo = sna->render.vbo;
+ 	if (bo) {
+-		if (sna->render.vertex_offset)
+-			gen4_vertex_flush(sna);
+-
+ 		for (i = 0; i < sna->render.nvertex_reloc; i++) {
+ 			DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ 			     i, sna->render.vertex_reloc[i]));
+diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
+index eec2f83..5995d1d 100644
+--- a/src/sna/gen5_render.c
++++ b/src/sna/gen5_render.c
+@@ -199,13 +199,13 @@ gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
+ 	return base + !is_affine;
+ }
+ 
+-static void gen5_magic_ca_pass(struct sna *sna,
++static bool gen5_magic_ca_pass(struct sna *sna,
+ 			       const struct sna_composite_op *op)
+ {
+ 	struct gen5_render_state *state = &sna->render_state.gen5;
+ 
+ 	if (!op->need_magic_ca_pass)
+-		return;
++		return false;
+ 
+ 	assert(sna->render.vertex_index > sna->render.vertex_start);
+ 
+@@ -230,6 +230,7 @@ static void gen5_magic_ca_pass(struct sna *sna,
+ 	OUT_BATCH(0);	/* index buffer offset, ignored */
+ 
+ 	state->last_primitive = sna->kgem.nbatch;
++	return true;
+ }
+ 
+ static uint32_t gen5_get_blend(int op,
+@@ -599,8 +600,12 @@ static int gen5_get_rectangles__flush(struct sna *sna,
+ 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
+ 		return 0;
+ 
+-	if (op->need_magic_ca_pass && sna->render.vbo)
+-		return 0;
++	if (sna->render.vertex_offset) {
++		gen4_vertex_flush(sna);
++		if (gen5_magic_ca_pass(sna, op))
++			gen5_emit_pipelined_pointers(sna, op, op->op,
++						     op->u.gen5.wm_kernel);
++	}
+ 
+ 	return gen4_vertex_finish(sna);
+ }
+@@ -2914,6 +2919,10 @@ static void gen5_render_reset(struct sna *sna)
+ 		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
+ 		discard_vbo(sna);
+ 	}
++
++	sna->render.vertex_offset = 0;
++	sna->render.nvertex_reloc = 0;
++	sna->render.vb_id = 0;
+ }
+ 
+ static void gen5_render_fini(struct sna *sna)
+diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
+index 7af59ae..35ff862 100644
+--- a/src/sna/gen6_render.c
++++ b/src/sna/gen6_render.c
+@@ -886,13 +886,13 @@ gen6_emit_state(struct sna *sna,
+ 	sna->render_state.gen6.first_state_packet = false;
+ }
+ 
+-static void gen6_magic_ca_pass(struct sna *sna,
++static bool gen6_magic_ca_pass(struct sna *sna,
+ 			       const struct sna_composite_op *op)
+ {
+ 	struct gen6_render_state *state = &sna->render_state.gen6;
+ 
+ 	if (!op->need_magic_ca_pass)
+-		return;
++		return false;
+ 
+ 	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
+ 	     sna->render.vertex_start, sna->render.vertex_index));
+@@ -918,6 +918,7 @@ static void gen6_magic_ca_pass(struct sna *sna,
+ 	OUT_BATCH(0);	/* index buffer offset, ignored */
+ 
+ 	state->last_primitive = sna->kgem.nbatch;
++	return true;
+ }
+ 
+ typedef struct gen6_surface_state_padded {
+@@ -1145,8 +1146,16 @@ static int gen6_get_rectangles__flush(struct sna *sna,
+ 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
+ 		return 0;
+ 
+-	if (op->need_magic_ca_pass && sna->render.vbo)
+-		return 0;
++	if (sna->render.vertex_offset) {
++		gen4_vertex_flush(sna);
++		if (gen6_magic_ca_pass(sna, op)) {
++			gen6_emit_flush(sna);
++			gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
++			gen6_emit_wm(sna,
++				     GEN6_KERNEL(op->u.gen6.flags),
++				     GEN6_VERTEX(op->u.gen6.flags) >> 2);
++		}
++	}
+ 
+ 	return gen4_vertex_finish(sna);
+ }
+@@ -3422,6 +3431,10 @@ static void gen6_render_reset(struct sna *sna)
+ 	sna->render_state.gen6.drawrect_offset = -1;
+ 	sna->render_state.gen6.drawrect_limit = -1;
+ 	sna->render_state.gen6.surface_table = -1;
++
++	sna->render.vertex_offset = 0;
++	sna->render.nvertex_reloc = 0;
++	sna->render.vb_id = 0;
+ }
+ 
+ static void gen6_render_fini(struct sna *sna)
+diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
+index 5880e7a..fa36ce6 100644
+--- a/src/sna/gen7_render.c
++++ b/src/sna/gen7_render.c
+@@ -1034,13 +1034,13 @@ gen7_emit_state(struct sna *sna,
+ 	sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
+ }
+ 
+-static void gen7_magic_ca_pass(struct sna *sna,
++static bool gen7_magic_ca_pass(struct sna *sna,
+ 			       const struct sna_composite_op *op)
+ {
+ 	struct gen7_render_state *state = &sna->render_state.gen7;
+ 
+ 	if (!op->need_magic_ca_pass)
+-		return;
++		return false;
+ 
+ 	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
+ 	     sna->render.vertex_start, sna->render.vertex_index));
+@@ -1064,6 +1064,7 @@ static void gen7_magic_ca_pass(struct sna *sna,
+ 	OUT_BATCH(0);	/* index buffer offset, ignored */
+ 
+ 	state->last_primitive = sna->kgem.nbatch;
++	return true;
+ }
+ 
+ static void null_create(struct sna_static_stream *stream)
+@@ -1272,8 +1273,14 @@ static int gen7_get_rectangles__flush(struct sna *sna,
+ 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
+ 		return 0;
+ 
+-	if (op->need_magic_ca_pass && sna->render.vbo)
+-		return 0;
++	if (sna->render.vertex_offset) {
++		gen4_vertex_flush(sna);
++		if (gen7_magic_ca_pass(sna, op)) {
++			gen7_emit_pipe_invalidate(sna);
++			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
++			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
++		}
++	}
+ 
+ 	return gen4_vertex_finish(sna);
+ }
+@@ -2557,12 +2564,11 @@ static inline bool prefer_blt_copy(struct sna *sna,
+ 				   struct kgem_bo *dst_bo,
+ 				   unsigned flags)
+ {
+-	if (flags & COPY_SYNC)
+-		return false;
+-
+ 	if (sna->kgem.ring == KGEM_BLT)
+ 		return true;
+ 
++	assert((flags & COPY_SYNC) == 0);
++
+ 	if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
+ 		return true;
+ 
+@@ -3503,6 +3509,10 @@ static void gen7_render_reset(struct sna *sna)
+ 	sna->render_state.gen7.drawrect_offset = -1;
+ 	sna->render_state.gen7.drawrect_limit = -1;
+ 	sna->render_state.gen7.surface_table = -1;
++
++	sna->render.vertex_offset = 0;
++	sna->render.nvertex_reloc = 0;
++	sna->render.vb_id = 0;
+ }
+ 
+ static void gen7_render_fini(struct sna *sna)
+diff --git a/src/sna/kgem.c b/src/sna/kgem.c
+index 49815e7..6fa8ce4 100644
+--- a/src/sna/kgem.c
++++ b/src/sna/kgem.c
+@@ -81,16 +81,6 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
+ 
+ #define SHOW_BATCH 0
+ 
+-#ifndef USE_FASTRELOC
+-#undef DBG_NO_FAST_RELOC
+-#define DBG_NO_FAST_RELOC 1
+-#endif
+-
+-#ifndef USE_HANDLE_LUT
+-#undef DBG_NO_HANDLE_LUT
+-#define DBG_NO_HANDLE_LUT 1
+-#endif
+-
+ /* Worst case seems to be 965gm where we cannot write within a cacheline that
+  * is being simultaneously being read by the GPU, or within the sampler
+  * prefetch. In general, the chipsets seem to have a requirement that sampler
+@@ -1560,9 +1550,7 @@ inline static void kgem_bo_remove_from_active(struct kgem *kgem,
+ 
+ static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
+ {
+-	if (!bo->scanout)
+-		return;
+-
++	assert(bo->scanout);
+ 	assert(bo->proxy == NULL);
+ 
+ 	DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
+@@ -1722,7 +1710,9 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+ 	}
+ 
+ 	if (bo->scanout) {
+-		DBG(("%s: handle=%d -> scanout\n", __FUNCTION__, bo->handle));
++		assert (bo->delta);
++		DBG(("%s: handle=%d -> scanout\n",
++		     __FUNCTION__, bo->handle));
+ 		list_add(&bo->list, &kgem->scanout);
+ 		return;
+ 	}
+@@ -1776,6 +1766,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+ 			DBG(("%s: handle=%d -> flushing\n",
+ 			     __FUNCTION__, bo->handle));
+ 
++			assert(bo->reusable);
+ 			list_add(&bo->request, &kgem->flushing);
+ 			if (bucket(bo) < NUM_CACHE_BUCKETS)
+ 				cache = &kgem->active[bucket(bo)][bo->tiling];
+@@ -1876,8 +1867,8 @@ static bool kgem_retire__flushing(struct kgem *kgem)
+ 		if (!bo->refcnt) {
+ 			if (bo->snoop) {
+ 				kgem_bo_move_to_snoop(kgem, bo);
+-			} else if (kgem_bo_set_purgeable(kgem, bo)) {
+-				assert(bo->reusable);
++			} else if (bo->reusable &&
++				   kgem_bo_set_purgeable(kgem, bo)) {
+ 				kgem_bo_move_to_inactive(kgem, bo);
+ 				retired = true;
+ 			} else
+@@ -1935,12 +1926,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
+ 			continue;
+ 
+ 		if (bo->snoop) {
+-			if (bo->needs_flush) {
+-				list_add(&bo->request, &kgem->flushing);
+-				bo->rq = (void *)kgem;
+-			} else {
++			if (!bo->needs_flush)
+ 				kgem_bo_move_to_snoop(kgem, bo);
+-			}
+ 			continue;
+ 		}
+ 
+@@ -2422,7 +2409,8 @@ void kgem_reset(struct kgem *kgem)
+ 			bo->rq = NULL;
+ 			bo->domain = DOMAIN_NONE;
+ 
+-			if (!bo->refcnt) {
++			if (!bo->refcnt && !bo->reusable) {
++				assert(!bo->snoop);
+ 				DBG(("%s: discarding handle=%d\n",
+ 				     __FUNCTION__, bo->handle));
+ 				kgem_bo_free(kgem, bo);
+@@ -2648,33 +2636,23 @@ void _kgem_submit(struct kgem *kgem)
+ 					       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+ 					       &execbuf);
+ 			}
+-			if (ret == -1 && (errno == EIO || errno == EBUSY)) {
+-				DBG(("%s: GPU hang detected\n", __FUNCTION__));
+-				kgem_throttle(kgem);
+-				ret = 0;
+-			}
+-
+ 			if (DEBUG_SYNC && ret == 0) {
+ 				struct drm_i915_gem_set_domain set_domain;
+ 
+-				DBG(("%s: debug sync, starting\n", __FUNCTION__));
+-
+ 				VG_CLEAR(set_domain);
+ 				set_domain.handle = handle;
+ 				set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+ 				set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+ 
+ 				ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+-				if (ret == -1) {
+-					DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
+-					kgem_throttle(kgem);
+-				}
+-
+-				DBG(("%s: debug sync, completed\n", __FUNCTION__));
+ 			}
++			if (ret == -1) {
++				DBG(("%s: GPU hang detected [%d]\n",
++				     __FUNCTION__, errno));
++				kgem_throttle(kgem);
++				kgem->wedged = true;
+ 
+ #if !NDEBUG
+-			if (ret < 0) {
+ 				ret = errno;
+ 				ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
+ 				       kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
+@@ -2710,15 +2688,17 @@ void _kgem_submit(struct kgem *kgem)
+ 					       (int)kgem->reloc[i].presumed_offset);
+ 				}
+ 
+-				i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
+-				if (i != -1) {
+-					i = write(i, kgem->batch, batch_end*sizeof(uint32_t));
+-					(void)i;
+-				}
++				if (DEBUG_SYNC) {
++					int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
++					if (fd != -1) {
++						write(fd, kgem->batch, batch_end*sizeof(uint32_t));
++						close(fd);
++					}
+ 
+-				FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
+-			}
++					FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
++				}
+ #endif
++			}
+ 		}
+ 
+ 		kgem_commit(kgem);
+diff --git a/src/sna/kgem.h b/src/sna/kgem.h
+index d2b89f5..b345b49 100644
+--- a/src/sna/kgem.h
++++ b/src/sna/kgem.h
+@@ -420,9 +420,9 @@ static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem,
+ 		kgem_check_exec(kgem, num_surfaces);
+ }
+ 
+-static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords)
++static inline uint32_t *kgem_get_batch(struct kgem *kgem)
+ {
+-	if (!kgem_check_batch(kgem, num_dwords)) {
++	if (kgem->nreloc) {
+ 		unsigned mode = kgem->mode;
+ 		_kgem_submit(kgem);
+ 		_kgem_set_mode(kgem, mode);
+@@ -431,11 +431,6 @@ static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords)
+ 	return kgem->batch + kgem->nbatch;
+ }
+ 
+-static inline void kgem_advance_batch(struct kgem *kgem, int num_dwords)
+-{
+-	kgem->nbatch += num_dwords;
+-}
+-
+ bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0)));
+ bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo);
+ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0)));
+diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
+index ba9a3cb..e388d80 100644
+--- a/src/sna/sna_accel.c
++++ b/src/sna/sna_accel.c
+@@ -7006,6 +7006,8 @@ sna_poly_line_blt(DrawablePtr drawable,
+ 				b->y1 = p.y;
+ 				b->y2 = last.y;
+ 			}
++			b->y2 += last.x == p.x;
++			b->x2 += last.y == p.y;
+ 			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+ 			     __FUNCTION__,
+ 			     b->x1, b->y1, b->x2, b->y2));
+@@ -7063,6 +7065,8 @@ sna_poly_line_blt(DrawablePtr drawable,
+ 					b->y1 = p.y;
+ 					b->y2 = last.y;
+ 				}
++				b->y2 += last.x == p.x;
++				b->x2 += last.y == p.y;
+ 				DBG(("%s: blt (%d, %d), (%d, %d)\n",
+ 				     __FUNCTION__,
+ 				     b->x1, b->y1, b->x2, b->y2));
+@@ -7119,6 +7123,8 @@ sna_poly_line_blt(DrawablePtr drawable,
+ 					box.y1 = p.y;
+ 					box.y2 = last.y;
+ 				}
++				b->y2 += last.x == p.x;
++				b->x2 += last.y == p.y;
+ 				DBG(("%s: blt (%d, %d), (%d, %d)\n",
+ 				     __FUNCTION__,
+ 				     box.x1, box.y1, box.x2, box.y2));
+diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
+index 0a581da..307e763 100644
+--- a/src/sna/sna_display.c
++++ b/src/sna/sna_display.c
+@@ -197,13 +197,15 @@ sna_output_backlight_set(xf86OutputPtr output, int level)
+ 	char path[1024], val[BACKLIGHT_VALUE_LEN];
+ 	int fd, len, ret;
+ 
+-	DBG(("%s: level=%d\n", __FUNCTION__, level));
++	DBG(("%s: level=%d, max=%d\n", __FUNCTION__,
++	     level, sna_output->backlight_max));
+ 
+-	if (level > sna_output->backlight_max)
+-		level = sna_output->backlight_max;
+-	if (!sna_output->backlight_iface || level < 0)
++	if (!sna_output->backlight_iface)
+ 		return;
+ 
++	if ((unsigned)level > sna_output->backlight_max)
++		level = sna_output->backlight_max;
++
+ 	len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level);
+ 	sprintf(path, "%s/%s/brightness",
+ 		BACKLIGHT_CLASS, sna_output->backlight_iface);
+@@ -2768,10 +2770,12 @@ sna_covering_crtc(ScrnInfoPtr scrn,
+ #define MI_LOAD_REGISTER_IMM			(0x22<<23)
+ 
+ static bool sna_emit_wait_for_scanline_gen7(struct sna *sna,
++					    xf86CrtcPtr crtc,
+ 					    int pipe, int y1, int y2,
+ 					    bool full_height)
+ {
+ 	uint32_t *b;
++	uint32_t event;
+ 
+ 	if (!sna->kgem.has_secure_batches)
+ 		return false;
+@@ -2780,60 +2784,106 @@ static bool sna_emit_wait_for_scanline_gen7(struct sna *sna,
+ 	assert(y2 > y1);
+ 	assert(sna->kgem.mode);
+ 
+-	b = kgem_get_batch(&sna->kgem, 16);
++	/* Always program one less than the desired value */
++	if (--y1 < 0)
++		y1 = crtc->bounds.y2;
++	y2--;
++
++	switch (pipe) {
++	default:
++		assert(0);
++	case 0:
++		event = 1 << (full_height ? 3 : 0);
++		break;
++	case 1:
++		event = 1 << (full_height ? 11 : 8);
++		break;
++	case 2:
++		event = 1 << (full_height ? 21 : 14);
++		break;
++	}
++
++	b = kgem_get_batch(&sna->kgem);
++
++	/* Both the LRI and WAIT_FOR_EVENT must be in the same cacheline */
++	if (((sna->kgem.nbatch + 6) >> 4) != (sna->kgem.nbatch + 10) >> 4) {
++		int dw = sna->kgem.nbatch + 6;
++		dw = ALIGN(dw, 16) - dw;
++		while (dw--)
++			*b++ = MI_NOOP;
++	}
++
+ 	b[0] = MI_LOAD_REGISTER_IMM | 1;
+ 	b[1] = 0x44050; /* DERRMR */
+-	b[2] = ~(1 << (3*full_height + pipe*8));
++	b[2] = ~event;
+ 	b[3] = MI_LOAD_REGISTER_IMM | 1;
+ 	b[4] = 0xa188; /* FORCEWAKE_MT */
+ 	b[5] = 2 << 16 | 2;
+ 	b[6] = MI_LOAD_REGISTER_IMM | 1;
+ 	b[7] = 0x70068 + 0x1000 * pipe;
+-	b[8] = (1 << 31) | (1 << 30) | (y1 << 16) | (y2 - 1);
+-	b[9] = MI_WAIT_FOR_EVENT | 1 << (3*full_height + pipe*5);
++	b[8] = (1 << 31) | (1 << 30) | (y1 << 16) | y2;
++	b[9] = MI_WAIT_FOR_EVENT | event;
+ 	b[10] = MI_LOAD_REGISTER_IMM | 1;
+ 	b[11] = 0xa188; /* FORCEWAKE_MT */
+ 	b[12] = 2 << 16;
+ 	b[13] = MI_LOAD_REGISTER_IMM | 1;
+ 	b[14] = 0x44050; /* DERRMR */
+ 	b[15] = ~0;
+-	kgem_advance_batch(&sna->kgem, 16);
++
++	sna->kgem.nbatch = b - sna->kgem.batch + 16;
+ 
+ 	sna->kgem.batch_flags |= I915_EXEC_SECURE;
+ 	return true;
+ }
+ 
+ static bool sna_emit_wait_for_scanline_gen6(struct sna *sna,
++					    xf86CrtcPtr crtc,
+ 					    int pipe, int y1, int y2,
+ 					    bool full_height)
+ {
+ 	uint32_t *b;
++	uint32_t event;
+ 
+ 	if (!sna->kgem.has_secure_batches)
+ 		return false;
+ 
+ 	assert(y1 >= 0);
+ 	assert(y2 > y1);
+-	assert(sna->kgem.mode);
++	assert(sna->kgem.mode == KGEM_RENDER);
++
++	/* Always program one less than the desired value */
++	if (--y1 < 0)
++		y1 = crtc->bounds.y2;
++	y2--;
++
++	/* The scanline granularity is 3 bits */
++	y1 &= ~7;
++	y2 &= ~7;
++	if (y2 == y1)
++		return false;
++
++	event = 1 << (3*full_height + pipe*8);
++
++	b = kgem_get_batch(&sna->kgem);
++	sna->kgem.nbatch += 10;
+ 
+-	b = kgem_get_batch(&sna->kgem, 10);
+ 	b[0] = MI_LOAD_REGISTER_IMM | 1;
+ 	b[1] = 0x44050; /* DERRMR */
+-	b[2] = ~(1 << (3*full_height + pipe*8));
++	b[2] = ~event;
+ 	b[3] = MI_LOAD_REGISTER_IMM | 1;
+ 	b[4] = 0x4f100; /* magic */
+-	b[5] = (1 << 31) | (1 << 30) | pipe << 29 | (y1 << 16) | (y2 - 1);
+-	b[6] = MI_WAIT_FOR_EVENT | 1 << (3*full_height + pipe*5);
++	b[5] = (1 << 31) | (1 << 30) | pipe << 29 | (y1 << 16) | y2;
++	b[6] = MI_WAIT_FOR_EVENT | event;
+ 	b[7] = MI_LOAD_REGISTER_IMM | 1;
+ 	b[8] = 0x44050; /* DERRMR */
+ 	b[9] = ~0;
+-	kgem_advance_batch(&sna->kgem, 10);
+ 
+ 	sna->kgem.batch_flags |= I915_EXEC_SECURE;
+ 	return true;
+ }
+ 
+ static bool sna_emit_wait_for_scanline_gen4(struct sna *sna,
++					    xf86CrtcPtr crtc,
+ 					    int pipe, int y1, int y2,
+ 					    bool full_height)
+ {
+@@ -2852,18 +2902,20 @@ static bool sna_emit_wait_for_scanline_gen4(struct sna *sna,
+ 			event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+ 	}
+ 
+-	b = kgem_get_batch(&sna->kgem, 5);
++	b = kgem_get_batch(&sna->kgem);
++	sna->kgem.nbatch += 5;
++
+ 	/* The documentation says that the LOAD_SCAN_LINES command
+ 	 * always comes in pairs. Don't ask me why. */
+ 	b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
+ 	b[3] = b[1] = (y1 << 16) | (y2-1);
+ 	b[4] = MI_WAIT_FOR_EVENT | event;
+-	kgem_advance_batch(&sna->kgem, 5);
+ 
+ 	return true;
+ }
+ 
+ static bool sna_emit_wait_for_scanline_gen2(struct sna *sna,
++					    xf86CrtcPtr crtc,
+ 					    int pipe, int y1, int y2,
+ 					    bool full_height)
+ {
+@@ -2877,16 +2929,14 @@ static bool sna_emit_wait_for_scanline_gen2(struct sna *sna,
+ 	if (full_height)
+ 		y2 -= 2;
+ 
+-	b = kgem_get_batch(&sna->kgem, 5);
++	b = kgem_get_batch(&sna->kgem);
++	sna->kgem.nbatch += 5;
++
+ 	/* The documentation says that the LOAD_SCAN_LINES command
+ 	 * always comes in pairs. Don't ask me why. */
+ 	b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
+ 	b[3] = b[1] = (y1 << 16) | (y2-1);
+-	if (pipe == 0)
+-		b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
+-	else
+-		b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+-	kgem_advance_batch(&sna->kgem, 5);
++	b[4] = MI_WAIT_FOR_EVENT | 1 << (1 + 4*pipe);
+ 
+ 	return true;
+ }
+@@ -2934,13 +2984,13 @@ sna_wait_for_scanline(struct sna *sna,
+ 	if (sna->kgem.gen >= 0100)
+ 		ret = false;
+ 	else if (sna->kgem.gen >= 070)
+-		ret = sna_emit_wait_for_scanline_gen7(sna, pipe, y1, y2, full_height);
++		ret = sna_emit_wait_for_scanline_gen7(sna, crtc, pipe, y1, y2, full_height);
+ 	else if (sna->kgem.gen >= 060)
+-		ret =sna_emit_wait_for_scanline_gen6(sna, pipe, y1, y2, full_height);
++		ret =sna_emit_wait_for_scanline_gen6(sna, crtc, pipe, y1, y2, full_height);
+ 	else if (sna->kgem.gen >= 040)
+-		ret = sna_emit_wait_for_scanline_gen4(sna, pipe, y1, y2, full_height);
++		ret = sna_emit_wait_for_scanline_gen4(sna, crtc, pipe, y1, y2, full_height);
+ 	else
+-		ret = sna_emit_wait_for_scanline_gen2(sna, pipe, y1, y2, full_height);
++		ret = sna_emit_wait_for_scanline_gen2(sna, crtc, pipe, y1, y2, full_height);
+ 
+ 	return ret;
+ }
+diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
+index b48894e..9d249e3 100644
+--- a/src/sna/sna_dri.c
++++ b/src/sna/sna_dri.c
+@@ -146,7 +146,8 @@ static uint32_t color_tiling(struct sna *sna, DrawablePtr draw)
+ static uint32_t other_tiling(struct sna *sna, DrawablePtr draw)
+ {
+ 	/* XXX Can mix color X / depth Y? */
+-	return kgem_choose_tiling(&sna->kgem, -I915_TILING_Y,
++	return kgem_choose_tiling(&sna->kgem,
++				  sna->kgem.gen >=40 ? -I915_TILING_Y : -I915_TILING_X,
+ 				  draw->width,
+ 				  draw->height,
+ 				  draw->bitsPerPixel);
+@@ -513,8 +514,11 @@ static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *dst, struct kge
+ 		return;
+ 
+ 	if (sync) {
+-		DBG(("%s: sync, force RENDER ring\n", __FUNCTION__));
+-		kgem_set_mode(&sna->kgem, KGEM_RENDER, dst);
++		DBG(("%s: sync, force %s ring\n", __FUNCTION__,
++		     sna->kgem.gen >= 070 ? "BLT" : "RENDER"));
++		kgem_set_mode(&sna->kgem,
++			      sna->kgem.gen >= 070 ? KGEM_BLT : KGEM_RENDER,
++			      dst);
+ 		return;
+ 	}
+ 
+@@ -837,14 +841,23 @@ can_blit(struct sna * sna,
+ 	 DRI2BufferPtr front,
+ 	 DRI2BufferPtr back)
+ {
+-	uint32_t f, b;
++	RegionPtr clip;
++	uint32_t s;
+ 
+ 	if (draw->type == DRAWABLE_PIXMAP)
+ 		return true;
+ 
+-	f = get_private(front)->size;
+-	b = get_private(back)->size;
+-	return (f >> 16) >= (b >> 16) && (f & 0xffff) >= (b & 0xffff);
++	clip = &((WindowPtr)draw)->clipList;
++
++	s = get_private(front)->size;
++	if ((s>>16) < clip->extents.y2 || (s&0xffff) < clip->extents.x2)
++		return false;
++
++	s = get_private(back)->size;
++	if ((s>>16) < clip->extents.y2 || (s&0xffff) < clip->extents.x2)
++		return false;
++
++	return true;
+ }
+ 
+ static void
+@@ -2069,18 +2082,17 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
+ 	     (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor));
+ 
+ 	if (divisor == 0 && current_msc >= *target_msc - 1) {
++		bool sync = current_msc < *target_msc;
+ 		if (can_exchange(sna, draw, front, back)) {
+-			sna_dri_immediate_xchg(sna, draw, info,
+-					       current_msc < *target_msc);
++			sna_dri_immediate_xchg(sna, draw, info, sync);
+ 		} else if (can_blit(sna, draw, front, back)) {
+-			sna_dri_immediate_blit(sna, draw, info,
+-					       current_msc < *target_msc);
++			sna_dri_immediate_blit(sna, draw, info, sync);
+ 		} else {
+ 			DRI2SwapComplete(client, draw, 0, 0, 0,
+ 					 DRI2_BLIT_COMPLETE, func, data);
+ 			sna_dri_frame_event_info_free(sna, draw, info);
+ 		}
+-		*target_msc = current_msc + 1;
++		*target_msc = current_msc + sync;
+ 		return TRUE;
+ 	}
+ 


Reply to: