xserver-xorg-video-intel: Changes to 'ubuntu'
debian/changelog | 7
debian/patches/series | 1
debian/patches/upstream-snapshot-778dba90c.diff | 934 ++++++++++++++++++++++++
3 files changed, 942 insertions(+)
New commits:
commit fe1e9e7d613809a2b023b45bd0fb05e3bc0cbc18
Author: Timo Aaltonen <tjaalton@ubuntu.com>
Date: Thu Jan 24 10:25:28 2013 +0200
upstream-snapshot-778dba90c.diff: Patch to update to current git master (LP: #1102390).
diff --git a/debian/changelog b/debian/changelog
index 2eadae2..527e9ea 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+xserver-xorg-video-intel (2:2.20.19-0ubuntu2) raring; urgency=low
+
+ * upstream-snapshot-778dba90c.diff: Patch to update to current git
+ master (LP: #1102390).
+
+ -- Timo Aaltonen <tjaalton@ubuntu.com> Thu, 24 Jan 2013 10:22:41 +0200
+
xserver-xorg-video-intel (2:2.20.19-0ubuntu1) raring; urgency=low
* Merge from unreleased debian git
diff --git a/debian/patches/series b/debian/patches/series
index 23ed8a4..d550fdb 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1 +1,2 @@
0002-Update-manpage-for-new-accelmethod-option.patch
+upstream-snapshot-778dba90c.diff
diff --git a/debian/patches/upstream-snapshot-778dba90c.diff b/debian/patches/upstream-snapshot-778dba90c.diff
new file mode 100644
index 0000000..bd47852
--- /dev/null
+++ b/debian/patches/upstream-snapshot-778dba90c.diff
@@ -0,0 +1,934 @@
+diff --git a/configure.ac b/configure.ac
+index 317bc6d..cb1496b 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -291,24 +291,6 @@ if test "x$USERPTR" = xyes; then
+ AC_DEFINE(USE_USERPTR,1,[Assume USERPTR support])
+ fi
+
+-AC_ARG_ENABLE(fast-reloc,
+- AS_HELP_STRING([--enable-fast-reloc],
+- [Enable use of "fast reloc" (experimental) [default=no]]),
+- [FASTRELOC="$enableval"],
+- [FASTRELOC=no])
+-if test "x$FASTRELOC" = xyes; then
+- AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support])
+-fi
+-
+-AC_ARG_ENABLE(handle-lut,
+- AS_HELP_STRING([--enable-handle-lut],
+- [Enable use of "handle LUT" (experimental) [default=no]]),
+- [HANDLE_LUT="$enableval"],
+- [HANDLE_LUT=no])
+-if test "x$HANDLE_LUT" = xyes; then
+- AC_DEFINE(USE_HANDLE_LUT,1,[Assume "handle LUT" support])
+-fi
+-
+ AC_ARG_ENABLE(async-swap,
+ AS_HELP_STRING([--enable-async-swap],
+ [Enable use of asynchronous swaps (experimental) [default=no]]),
+diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
+index 42e4cdd..01c0aee 100644
+--- a/src/sna/gen3_render.c
++++ b/src/sna/gen3_render.c
+@@ -1569,11 +1569,11 @@ static void gen3_emit_composite_state(struct sna *sna,
+ gen3_composite_emit_shader(sna, op, op->op);
+ }
+
+-static void gen3_magic_ca_pass(struct sna *sna,
++static bool gen3_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+ {
+ if (!op->need_magic_ca_pass)
+- return;
++ return false;
+
+ DBG(("%s(%d)\n", __FUNCTION__,
+ sna->render.vertex_index - sna->render.vertex_start));
+@@ -1587,6 +1587,7 @@ static void gen3_magic_ca_pass(struct sna *sna,
+ OUT_BATCH(sna->render.vertex_start);
+
+ sna->render_state.gen3.last_blend = 0;
++ return true;
+ }
+
+ static void gen3_vertex_flush(struct sna *sna)
+@@ -1613,22 +1614,22 @@ static int gen3_vertex_finish(struct sna *sna)
+ DBG(("%s: used=%d/%d, vbo active? %d\n",
+ __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
+ sna->render.vbo ? sna->render.vbo->handle : 0));
++ assert(sna->render.vertex_offset == 0);
+ assert(sna->render.vertex_used);
+ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ bo = sna->render.vbo;
+ if (bo) {
+- if (sna->render.vertex_offset)
+- gen3_vertex_flush(sna);
+-
+ DBG(("%s: reloc = %d\n", __FUNCTION__,
+ sna->render.vertex_reloc[0]));
+
+- sna->kgem.batch[sna->render.vertex_reloc[0]] =
+- kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+- bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
++ if (sna->render.vertex_reloc[0]) {
++ sna->kgem.batch[sna->render.vertex_reloc[0]] =
++ kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
++ bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
+
+- sna->render.vertex_reloc[0] = 0;
++ sna->render.vertex_reloc[0] = 0;
++ }
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ sna->render.vbo = NULL;
+@@ -1664,14 +1665,13 @@ static void gen3_vertex_close(struct sna *sna)
+ unsigned int delta = 0;
+
+ assert(sna->render.vertex_offset == 0);
++ if (sna->render.vertex_reloc[0] == 0)
++ return;
+
+ DBG(("%s: used=%d/%d, vbo active? %d\n",
+ __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
+ sna->render.vbo ? sna->render.vbo->handle : 0));
+
+- if (sna->render.vertex_used == 0)
+- return;
+-
+ bo = sna->render.vbo;
+ if (bo) {
+ if (sna->render.vertex_size - sna->render.vertex_used < 64) {
+@@ -1717,15 +1717,11 @@ static void gen3_vertex_close(struct sna *sna)
+ }
+ }
+
+- DBG(("%s: reloc = %d\n", __FUNCTION__,
+- sna->render.vertex_reloc[0]));
+-
+- if (sna->render.vertex_reloc[0]) {
+- sna->kgem.batch[sna->render.vertex_reloc[0]] =
+- kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+- bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
+- sna->render.vertex_reloc[0] = 0;
+- }
++ DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
++ sna->kgem.batch[sna->render.vertex_reloc[0]] =
++ kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
++ bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
++ sna->render.vertex_reloc[0] = 0;
+
+ if (sna->render.vbo == NULL) {
+ DBG(("%s: resetting vbo\n", __FUNCTION__));
+@@ -1789,8 +1785,16 @@ static int gen3_get_rectangles__flush(struct sna *sna,
+ if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
+ return 0;
+
+- if (op->need_magic_ca_pass && sna->render.vbo)
+- return 0;
++ if (sna->render.vertex_offset) {
++ gen3_vertex_flush(sna);
++ if (gen3_magic_ca_pass(sna, op)) {
++ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
++ OUT_BATCH(gen3_get_blend_cntl(op->op,
++ op->has_component_alpha,
++ op->dst.format));
++ gen3_composite_emit_shader(sna, op, op->op);
++ }
++ }
+
+ return gen3_vertex_finish(sna);
+ }
+@@ -1836,6 +1840,8 @@ flush:
+ }
+ _kgem_submit(&sna->kgem);
+ gen3_emit_composite_state(sna, op);
++ assert(sna->render.vertex_offset == 0);
++ assert(sna->render.vertex_reloc[0] == 0);
+ goto start;
+ }
+
+@@ -1972,6 +1978,9 @@ gen3_render_reset(struct sna *sna)
+ __FUNCTION__, sna->render.vbo->presumed_offset));
+ discard_vbo(sna);
+ }
++
++ sna->render.vertex_reloc[0] = 0;
++ sna->render.vertex_offset = 0;
+ }
+
+ static void
+@@ -4647,6 +4656,9 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
+ static void gen3_render_flush(struct sna *sna)
+ {
+ gen3_vertex_close(sna);
++
++ assert(sna->render.vertex_reloc[0] == 0);
++ assert(sna->render.vertex_offset == 0);
+ }
+
+ static void
+diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
+index 6b3f864..d2f3fff 100644
+--- a/src/sna/gen4_render.c
++++ b/src/sna/gen4_render.c
+@@ -207,13 +207,13 @@ gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
+ return base + !is_affine;
+ }
+
+-static void gen4_magic_ca_pass(struct sna *sna,
++static bool gen4_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+ {
+ struct gen4_render_state *state = &sna->render_state.gen4;
+
+ if (!op->need_magic_ca_pass)
+- return;
++ return false;
+
+ assert(sna->render.vertex_index > sna->render.vertex_start);
+
+@@ -237,6 +237,7 @@ static void gen4_magic_ca_pass(struct sna *sna,
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
++ return true;
+ }
+
+ static uint32_t gen4_get_blend(int op,
+@@ -613,6 +614,13 @@ static int gen4_get_rectangles__flush(struct sna *sna,
+ if (op->need_magic_ca_pass && sna->render.vbo)
+ return 0;
+
++ if (sna->render.vertex_offset) {
++ gen4_vertex_flush(sna);
++ if (gen4_magic_ca_pass(sna, op))
++ gen4_emit_pipelined_pointers(sna, op, op->op,
++ op->u.gen4.wm_kernel);
++ }
++
+ return gen4_vertex_finish(sna);
+ }
+
+@@ -2784,6 +2792,10 @@ static void gen4_render_reset(struct sna *sna)
+ DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
+ discard_vbo(sna);
+ }
++
++ sna->render.vertex_offset = 0;
++ sna->render.nvertex_reloc = 0;
++ sna->render.vb_id = 0;
+ }
+
+ static void gen4_render_fini(struct sna *sna)
+diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
+index 9891453..4e40467 100644
+--- a/src/sna/gen4_vertex.c
++++ b/src/sna/gen4_vertex.c
+@@ -55,16 +55,13 @@ int gen4_vertex_finish(struct sna *sna)
+
+ DBG(("%s: used=%d / %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->render.vertex_size));
++ assert(sna->render.vertex_offset == 0);
+ assert(sna->render.vertex_used);
+- assert(sna->render.nvertex_reloc);
+
+ /* Note: we only need dword alignment (currently) */
+
+ bo = sna->render.vbo;
+ if (bo) {
+- if (sna->render.vertex_offset)
+- gen4_vertex_flush(sna);
+-
+ for (i = 0; i < sna->render.nvertex_reloc; i++) {
+ DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ i, sna->render.vertex_reloc[i]));
+diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
+index eec2f83..5995d1d 100644
+--- a/src/sna/gen5_render.c
++++ b/src/sna/gen5_render.c
+@@ -199,13 +199,13 @@ gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
+ return base + !is_affine;
+ }
+
+-static void gen5_magic_ca_pass(struct sna *sna,
++static bool gen5_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+ {
+ struct gen5_render_state *state = &sna->render_state.gen5;
+
+ if (!op->need_magic_ca_pass)
+- return;
++ return false;
+
+ assert(sna->render.vertex_index > sna->render.vertex_start);
+
+@@ -230,6 +230,7 @@ static void gen5_magic_ca_pass(struct sna *sna,
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
++ return true;
+ }
+
+ static uint32_t gen5_get_blend(int op,
+@@ -599,8 +600,12 @@ static int gen5_get_rectangles__flush(struct sna *sna,
+ if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
+ return 0;
+
+- if (op->need_magic_ca_pass && sna->render.vbo)
+- return 0;
++ if (sna->render.vertex_offset) {
++ gen4_vertex_flush(sna);
++ if (gen5_magic_ca_pass(sna, op))
++ gen5_emit_pipelined_pointers(sna, op, op->op,
++ op->u.gen5.wm_kernel);
++ }
+
+ return gen4_vertex_finish(sna);
+ }
+@@ -2914,6 +2919,10 @@ static void gen5_render_reset(struct sna *sna)
+ DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
+ discard_vbo(sna);
+ }
++
++ sna->render.vertex_offset = 0;
++ sna->render.nvertex_reloc = 0;
++ sna->render.vb_id = 0;
+ }
+
+ static void gen5_render_fini(struct sna *sna)
+diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
+index 7af59ae..35ff862 100644
+--- a/src/sna/gen6_render.c
++++ b/src/sna/gen6_render.c
+@@ -886,13 +886,13 @@ gen6_emit_state(struct sna *sna,
+ sna->render_state.gen6.first_state_packet = false;
+ }
+
+-static void gen6_magic_ca_pass(struct sna *sna,
++static bool gen6_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+ {
+ struct gen6_render_state *state = &sna->render_state.gen6;
+
+ if (!op->need_magic_ca_pass)
+- return;
++ return false;
+
+ DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
+ sna->render.vertex_start, sna->render.vertex_index));
+@@ -918,6 +918,7 @@ static void gen6_magic_ca_pass(struct sna *sna,
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
++ return true;
+ }
+
+ typedef struct gen6_surface_state_padded {
+@@ -1145,8 +1146,16 @@ static int gen6_get_rectangles__flush(struct sna *sna,
+ if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
+ return 0;
+
+- if (op->need_magic_ca_pass && sna->render.vbo)
+- return 0;
++ if (sna->render.vertex_offset) {
++ gen4_vertex_flush(sna);
++ if (gen6_magic_ca_pass(sna, op)) {
++ gen6_emit_flush(sna);
++ gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
++ gen6_emit_wm(sna,
++ GEN6_KERNEL(op->u.gen6.flags),
++ GEN6_VERTEX(op->u.gen6.flags) >> 2);
++ }
++ }
+
+ return gen4_vertex_finish(sna);
+ }
+@@ -3422,6 +3431,10 @@ static void gen6_render_reset(struct sna *sna)
+ sna->render_state.gen6.drawrect_offset = -1;
+ sna->render_state.gen6.drawrect_limit = -1;
+ sna->render_state.gen6.surface_table = -1;
++
++ sna->render.vertex_offset = 0;
++ sna->render.nvertex_reloc = 0;
++ sna->render.vb_id = 0;
+ }
+
+ static void gen6_render_fini(struct sna *sna)
+diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
+index 5880e7a..fa36ce6 100644
+--- a/src/sna/gen7_render.c
++++ b/src/sna/gen7_render.c
+@@ -1034,13 +1034,13 @@ gen7_emit_state(struct sna *sna,
+ sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
+ }
+
+-static void gen7_magic_ca_pass(struct sna *sna,
++static bool gen7_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+ {
+ struct gen7_render_state *state = &sna->render_state.gen7;
+
+ if (!op->need_magic_ca_pass)
+- return;
++ return false;
+
+ DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
+ sna->render.vertex_start, sna->render.vertex_index));
+@@ -1064,6 +1064,7 @@ static void gen7_magic_ca_pass(struct sna *sna,
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
++ return true;
+ }
+
+ static void null_create(struct sna_static_stream *stream)
+@@ -1272,8 +1273,14 @@ static int gen7_get_rectangles__flush(struct sna *sna,
+ if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
+ return 0;
+
+- if (op->need_magic_ca_pass && sna->render.vbo)
+- return 0;
++ if (sna->render.vertex_offset) {
++ gen4_vertex_flush(sna);
++ if (gen7_magic_ca_pass(sna, op)) {
++ gen7_emit_pipe_invalidate(sna);
++ gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
++ gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
++ }
++ }
+
+ return gen4_vertex_finish(sna);
+ }
+@@ -2557,12 +2564,11 @@ static inline bool prefer_blt_copy(struct sna *sna,
+ struct kgem_bo *dst_bo,
+ unsigned flags)
+ {
+- if (flags & COPY_SYNC)
+- return false;
+-
+ if (sna->kgem.ring == KGEM_BLT)
+ return true;
+
++ assert((flags & COPY_SYNC) == 0);
++
+ if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
+ return true;
+
+@@ -3503,6 +3509,10 @@ static void gen7_render_reset(struct sna *sna)
+ sna->render_state.gen7.drawrect_offset = -1;
+ sna->render_state.gen7.drawrect_limit = -1;
+ sna->render_state.gen7.surface_table = -1;
++
++ sna->render.vertex_offset = 0;
++ sna->render.nvertex_reloc = 0;
++ sna->render.vb_id = 0;
+ }
+
+ static void gen7_render_fini(struct sna *sna)
+diff --git a/src/sna/kgem.c b/src/sna/kgem.c
+index 49815e7..6fa8ce4 100644
+--- a/src/sna/kgem.c
++++ b/src/sna/kgem.c
+@@ -81,16 +81,6 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
+
+ #define SHOW_BATCH 0
+
+-#ifndef USE_FASTRELOC
+-#undef DBG_NO_FAST_RELOC
+-#define DBG_NO_FAST_RELOC 1
+-#endif
+-
+-#ifndef USE_HANDLE_LUT
+-#undef DBG_NO_HANDLE_LUT
+-#define DBG_NO_HANDLE_LUT 1
+-#endif
+-
+ /* Worst case seems to be 965gm where we cannot write within a cacheline that
+ * is being simultaneously being read by the GPU, or within the sampler
+ * prefetch. In general, the chipsets seem to have a requirement that sampler
+@@ -1560,9 +1550,7 @@ inline static void kgem_bo_remove_from_active(struct kgem *kgem,
+
+ static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
+ {
+- if (!bo->scanout)
+- return;
+-
++ assert(bo->scanout);
+ assert(bo->proxy == NULL);
+
+ DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
+@@ -1722,7 +1710,9 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+ }
+
+ if (bo->scanout) {
+- DBG(("%s: handle=%d -> scanout\n", __FUNCTION__, bo->handle));
++ assert (bo->delta);
++ DBG(("%s: handle=%d -> scanout\n",
++ __FUNCTION__, bo->handle));
+ list_add(&bo->list, &kgem->scanout);
+ return;
+ }
+@@ -1776,6 +1766,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+ DBG(("%s: handle=%d -> flushing\n",
+ __FUNCTION__, bo->handle));
+
++ assert(bo->reusable);
+ list_add(&bo->request, &kgem->flushing);
+ if (bucket(bo) < NUM_CACHE_BUCKETS)
+ cache = &kgem->active[bucket(bo)][bo->tiling];
+@@ -1876,8 +1867,8 @@ static bool kgem_retire__flushing(struct kgem *kgem)
+ if (!bo->refcnt) {
+ if (bo->snoop) {
+ kgem_bo_move_to_snoop(kgem, bo);
+- } else if (kgem_bo_set_purgeable(kgem, bo)) {
+- assert(bo->reusable);
++ } else if (bo->reusable &&
++ kgem_bo_set_purgeable(kgem, bo)) {
+ kgem_bo_move_to_inactive(kgem, bo);
+ retired = true;
+ } else
+@@ -1935,12 +1926,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
+ continue;
+
+ if (bo->snoop) {
+- if (bo->needs_flush) {
+- list_add(&bo->request, &kgem->flushing);
+- bo->rq = (void *)kgem;
+- } else {
++ if (!bo->needs_flush)
+ kgem_bo_move_to_snoop(kgem, bo);
+- }
+ continue;
+ }
+
+@@ -2422,7 +2409,8 @@ void kgem_reset(struct kgem *kgem)
+ bo->rq = NULL;
+ bo->domain = DOMAIN_NONE;
+
+- if (!bo->refcnt) {
++ if (!bo->refcnt && !bo->reusable) {
++ assert(!bo->snoop);
+ DBG(("%s: discarding handle=%d\n",
+ __FUNCTION__, bo->handle));
+ kgem_bo_free(kgem, bo);
+@@ -2648,33 +2636,23 @@ void _kgem_submit(struct kgem *kgem)
+ DRM_IOCTL_I915_GEM_EXECBUFFER2,
+ &execbuf);
+ }
+- if (ret == -1 && (errno == EIO || errno == EBUSY)) {
+- DBG(("%s: GPU hang detected\n", __FUNCTION__));
+- kgem_throttle(kgem);
+- ret = 0;
+- }
+-
+ if (DEBUG_SYNC && ret == 0) {
+ struct drm_i915_gem_set_domain set_domain;
+
+- DBG(("%s: debug sync, starting\n", __FUNCTION__));
+-
+ VG_CLEAR(set_domain);
+ set_domain.handle = handle;
+ set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+ set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+
+ ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+- if (ret == -1) {
+- DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
+- kgem_throttle(kgem);
+- }
+-
+- DBG(("%s: debug sync, completed\n", __FUNCTION__));
+ }
++ if (ret == -1) {
++ DBG(("%s: GPU hang detected [%d]\n",
++ __FUNCTION__, errno));
++ kgem_throttle(kgem);
++ kgem->wedged = true;
+
+ #if !NDEBUG
+- if (ret < 0) {
+ ret = errno;
+ ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
+ kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
+@@ -2710,15 +2688,17 @@ void _kgem_submit(struct kgem *kgem)
+ (int)kgem->reloc[i].presumed_offset);
+ }
+
+- i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
+- if (i != -1) {
+- i = write(i, kgem->batch, batch_end*sizeof(uint32_t));
+- (void)i;
+- }
++ if (DEBUG_SYNC) {
++ int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
++ if (fd != -1) {
++ write(fd, kgem->batch, batch_end*sizeof(uint32_t));
++ close(fd);
++ }
+
+- FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
+- }
++ FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
++ }
+ #endif
++ }
+ }
+
+ kgem_commit(kgem);
+diff --git a/src/sna/kgem.h b/src/sna/kgem.h
+index d2b89f5..b345b49 100644
+--- a/src/sna/kgem.h
++++ b/src/sna/kgem.h
+@@ -420,9 +420,9 @@ static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem,
+ kgem_check_exec(kgem, num_surfaces);
+ }
+
+-static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords)
++static inline uint32_t *kgem_get_batch(struct kgem *kgem)
+ {
+- if (!kgem_check_batch(kgem, num_dwords)) {
++ if (kgem->nreloc) {
+ unsigned mode = kgem->mode;
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, mode);
+@@ -431,11 +431,6 @@ static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords)
+ return kgem->batch + kgem->nbatch;
+ }
+
+-static inline void kgem_advance_batch(struct kgem *kgem, int num_dwords)
+-{
+- kgem->nbatch += num_dwords;
+-}
+-
+ bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0)));
+ bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo);
+ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0)));
+diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
+index ba9a3cb..e388d80 100644
+--- a/src/sna/sna_accel.c
++++ b/src/sna/sna_accel.c
+@@ -7006,6 +7006,8 @@ sna_poly_line_blt(DrawablePtr drawable,
+ b->y1 = p.y;
+ b->y2 = last.y;
+ }
++ b->y2 += last.x == p.x;
++ b->x2 += last.y == p.y;
+ DBG(("%s: blt (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ b->x1, b->y1, b->x2, b->y2));
+@@ -7063,6 +7065,8 @@ sna_poly_line_blt(DrawablePtr drawable,
+ b->y1 = p.y;
+ b->y2 = last.y;
+ }
++ b->y2 += last.x == p.x;
++ b->x2 += last.y == p.y;
+ DBG(("%s: blt (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ b->x1, b->y1, b->x2, b->y2));
+@@ -7119,6 +7123,8 @@ sna_poly_line_blt(DrawablePtr drawable,
+ box.y1 = p.y;
+ box.y2 = last.y;
+ }
++ b->y2 += last.x == p.x;
++ b->x2 += last.y == p.y;
+ DBG(("%s: blt (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box.x1, box.y1, box.x2, box.y2));
+diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
+index 0a581da..307e763 100644
+--- a/src/sna/sna_display.c
++++ b/src/sna/sna_display.c
+@@ -197,13 +197,15 @@ sna_output_backlight_set(xf86OutputPtr output, int level)
+ char path[1024], val[BACKLIGHT_VALUE_LEN];
+ int fd, len, ret;
+
+- DBG(("%s: level=%d\n", __FUNCTION__, level));
++ DBG(("%s: level=%d, max=%d\n", __FUNCTION__,
++ level, sna_output->backlight_max));
+
+- if (level > sna_output->backlight_max)
+- level = sna_output->backlight_max;
+- if (!sna_output->backlight_iface || level < 0)
++ if (!sna_output->backlight_iface)
+ return;
+
++ if ((unsigned)level > sna_output->backlight_max)
++ level = sna_output->backlight_max;
++
+ len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level);
+ sprintf(path, "%s/%s/brightness",
+ BACKLIGHT_CLASS, sna_output->backlight_iface);
+@@ -2768,10 +2770,12 @@ sna_covering_crtc(ScrnInfoPtr scrn,
+ #define MI_LOAD_REGISTER_IMM (0x22<<23)
+
+ static bool sna_emit_wait_for_scanline_gen7(struct sna *sna,
++ xf86CrtcPtr crtc,
+ int pipe, int y1, int y2,
+ bool full_height)
+ {
+ uint32_t *b;
++ uint32_t event;
+
+ if (!sna->kgem.has_secure_batches)
+ return false;
+@@ -2780,60 +2784,106 @@ static bool sna_emit_wait_for_scanline_gen7(struct sna *sna,
+ assert(y2 > y1);
+ assert(sna->kgem.mode);
+
+- b = kgem_get_batch(&sna->kgem, 16);
++ /* Always program one less than the desired value */
++ if (--y1 < 0)
++ y1 = crtc->bounds.y2;
++ y2--;
++
++ switch (pipe) {
++ default:
++ assert(0);
++ case 0:
++ event = 1 << (full_height ? 3 : 0);
++ break;
++ case 1:
++ event = 1 << (full_height ? 11 : 8);
++ break;
++ case 2:
++ event = 1 << (full_height ? 21 : 14);
++ break;
++ }
++
++ b = kgem_get_batch(&sna->kgem);
++
++ /* Both the LRI and WAIT_FOR_EVENT must be in the same cacheline */
++ if (((sna->kgem.nbatch + 6) >> 4) != (sna->kgem.nbatch + 10) >> 4) {
++ int dw = sna->kgem.nbatch + 6;
++ dw = ALIGN(dw, 16) - dw;
++ while (dw--)
++ *b++ = MI_NOOP;
++ }
++
+ b[0] = MI_LOAD_REGISTER_IMM | 1;
+ b[1] = 0x44050; /* DERRMR */
+- b[2] = ~(1 << (3*full_height + pipe*8));
++ b[2] = ~event;
+ b[3] = MI_LOAD_REGISTER_IMM | 1;
+ b[4] = 0xa188; /* FORCEWAKE_MT */
+ b[5] = 2 << 16 | 2;
+ b[6] = MI_LOAD_REGISTER_IMM | 1;
+ b[7] = 0x70068 + 0x1000 * pipe;
+- b[8] = (1 << 31) | (1 << 30) | (y1 << 16) | (y2 - 1);
+- b[9] = MI_WAIT_FOR_EVENT | 1 << (3*full_height + pipe*5);
++ b[8] = (1 << 31) | (1 << 30) | (y1 << 16) | y2;
++ b[9] = MI_WAIT_FOR_EVENT | event;
+ b[10] = MI_LOAD_REGISTER_IMM | 1;
+ b[11] = 0xa188; /* FORCEWAKE_MT */
+ b[12] = 2 << 16;
+ b[13] = MI_LOAD_REGISTER_IMM | 1;
+ b[14] = 0x44050; /* DERRMR */
+ b[15] = ~0;
+- kgem_advance_batch(&sna->kgem, 16);
++
++ sna->kgem.nbatch = b - sna->kgem.batch + 16;
+
+ sna->kgem.batch_flags |= I915_EXEC_SECURE;
+ return true;
+ }
+
+ static bool sna_emit_wait_for_scanline_gen6(struct sna *sna,
++ xf86CrtcPtr crtc,
+ int pipe, int y1, int y2,
+ bool full_height)
+ {
+ uint32_t *b;
++ uint32_t event;
+
+ if (!sna->kgem.has_secure_batches)
+ return false;
+
+ assert(y1 >= 0);
+ assert(y2 > y1);
+- assert(sna->kgem.mode);
++ assert(sna->kgem.mode == KGEM_RENDER);
++
++ /* Always program one less than the desired value */
++ if (--y1 < 0)
++ y1 = crtc->bounds.y2;
++ y2--;
++
++ /* The scanline granularity is 3 bits */
++ y1 &= ~7;
++ y2 &= ~7;
++ if (y2 == y1)
++ return false;
++
++ event = 1 << (3*full_height + pipe*8);
++
++ b = kgem_get_batch(&sna->kgem);
++ sna->kgem.nbatch += 10;
+
+- b = kgem_get_batch(&sna->kgem, 10);
+ b[0] = MI_LOAD_REGISTER_IMM | 1;
+ b[1] = 0x44050; /* DERRMR */
+- b[2] = ~(1 << (3*full_height + pipe*8));
++ b[2] = ~event;
+ b[3] = MI_LOAD_REGISTER_IMM | 1;
+ b[4] = 0x4f100; /* magic */
+- b[5] = (1 << 31) | (1 << 30) | pipe << 29 | (y1 << 16) | (y2 - 1);
+- b[6] = MI_WAIT_FOR_EVENT | 1 << (3*full_height + pipe*5);
++ b[5] = (1 << 31) | (1 << 30) | pipe << 29 | (y1 << 16) | y2;
++ b[6] = MI_WAIT_FOR_EVENT | event;
+ b[7] = MI_LOAD_REGISTER_IMM | 1;
+ b[8] = 0x44050; /* DERRMR */
+ b[9] = ~0;
+- kgem_advance_batch(&sna->kgem, 10);
+
+ sna->kgem.batch_flags |= I915_EXEC_SECURE;
+ return true;
+ }
+
+ static bool sna_emit_wait_for_scanline_gen4(struct sna *sna,
++ xf86CrtcPtr crtc,
+ int pipe, int y1, int y2,
+ bool full_height)
+ {
+@@ -2852,18 +2902,20 @@ static bool sna_emit_wait_for_scanline_gen4(struct sna *sna,
+ event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+ }
+
+- b = kgem_get_batch(&sna->kgem, 5);
++ b = kgem_get_batch(&sna->kgem);
++ sna->kgem.nbatch += 5;
++
+ /* The documentation says that the LOAD_SCAN_LINES command
+ * always comes in pairs. Don't ask me why. */
+ b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
+ b[3] = b[1] = (y1 << 16) | (y2-1);
+ b[4] = MI_WAIT_FOR_EVENT | event;
+- kgem_advance_batch(&sna->kgem, 5);
+
+ return true;
+ }
+
+ static bool sna_emit_wait_for_scanline_gen2(struct sna *sna,
++ xf86CrtcPtr crtc,
+ int pipe, int y1, int y2,
+ bool full_height)
+ {
+@@ -2877,16 +2929,14 @@ static bool sna_emit_wait_for_scanline_gen2(struct sna *sna,
+ if (full_height)
+ y2 -= 2;
+
+- b = kgem_get_batch(&sna->kgem, 5);
++ b = kgem_get_batch(&sna->kgem);
++ sna->kgem.nbatch += 5;
++
+ /* The documentation says that the LOAD_SCAN_LINES command
+ * always comes in pairs. Don't ask me why. */
+ b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
+ b[3] = b[1] = (y1 << 16) | (y2-1);
+- if (pipe == 0)
+- b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
+- else
+- b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+- kgem_advance_batch(&sna->kgem, 5);
++ b[4] = MI_WAIT_FOR_EVENT | 1 << (1 + 4*pipe);
+
+ return true;
+ }
+@@ -2934,13 +2984,13 @@ sna_wait_for_scanline(struct sna *sna,
+ if (sna->kgem.gen >= 0100)
+ ret = false;
+ else if (sna->kgem.gen >= 070)
+- ret = sna_emit_wait_for_scanline_gen7(sna, pipe, y1, y2, full_height);
++ ret = sna_emit_wait_for_scanline_gen7(sna, crtc, pipe, y1, y2, full_height);
+ else if (sna->kgem.gen >= 060)
+- ret =sna_emit_wait_for_scanline_gen6(sna, pipe, y1, y2, full_height);
++ ret =sna_emit_wait_for_scanline_gen6(sna, crtc, pipe, y1, y2, full_height);
+ else if (sna->kgem.gen >= 040)
+- ret = sna_emit_wait_for_scanline_gen4(sna, pipe, y1, y2, full_height);
++ ret = sna_emit_wait_for_scanline_gen4(sna, crtc, pipe, y1, y2, full_height);
+ else
+- ret = sna_emit_wait_for_scanline_gen2(sna, pipe, y1, y2, full_height);
++ ret = sna_emit_wait_for_scanline_gen2(sna, crtc, pipe, y1, y2, full_height);
+
+ return ret;
+ }
+diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
+index b48894e..9d249e3 100644
+--- a/src/sna/sna_dri.c
++++ b/src/sna/sna_dri.c
+@@ -146,7 +146,8 @@ static uint32_t color_tiling(struct sna *sna, DrawablePtr draw)
+ static uint32_t other_tiling(struct sna *sna, DrawablePtr draw)
+ {
+ /* XXX Can mix color X / depth Y? */
+- return kgem_choose_tiling(&sna->kgem, -I915_TILING_Y,
++ return kgem_choose_tiling(&sna->kgem,
++ sna->kgem.gen >=40 ? -I915_TILING_Y : -I915_TILING_X,
+ draw->width,
+ draw->height,
+ draw->bitsPerPixel);
+@@ -513,8 +514,11 @@ static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *dst, struct kge
+ return;
+
+ if (sync) {
+- DBG(("%s: sync, force RENDER ring\n", __FUNCTION__));
+- kgem_set_mode(&sna->kgem, KGEM_RENDER, dst);
++ DBG(("%s: sync, force %s ring\n", __FUNCTION__,
++ sna->kgem.gen >= 070 ? "BLT" : "RENDER"));
++ kgem_set_mode(&sna->kgem,
++ sna->kgem.gen >= 070 ? KGEM_BLT : KGEM_RENDER,
++ dst);
+ return;
+ }
+
+@@ -837,14 +841,23 @@ can_blit(struct sna * sna,
+ DRI2BufferPtr front,
+ DRI2BufferPtr back)
+ {
+- uint32_t f, b;
++ RegionPtr clip;
++ uint32_t s;
+
+ if (draw->type == DRAWABLE_PIXMAP)
+ return true;
+
+- f = get_private(front)->size;
+- b = get_private(back)->size;
+- return (f >> 16) >= (b >> 16) && (f & 0xffff) >= (b & 0xffff);
++ clip = &((WindowPtr)draw)->clipList;
++
++ s = get_private(front)->size;
++ if ((s>>16) < clip->extents.y2 || (s&0xffff) < clip->extents.x2)
++ return false;
++
++ s = get_private(back)->size;
++ if ((s>>16) < clip->extents.y2 || (s&0xffff) < clip->extents.x2)
++ return false;
++
++ return true;
+ }
+
+ static void
+@@ -2069,18 +2082,17 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
+ (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor));
+
+ if (divisor == 0 && current_msc >= *target_msc - 1) {
++ bool sync = current_msc < *target_msc;
+ if (can_exchange(sna, draw, front, back)) {
+- sna_dri_immediate_xchg(sna, draw, info,
+- current_msc < *target_msc);
++ sna_dri_immediate_xchg(sna, draw, info, sync);
+ } else if (can_blit(sna, draw, front, back)) {
+- sna_dri_immediate_blit(sna, draw, info,
+- current_msc < *target_msc);
++ sna_dri_immediate_blit(sna, draw, info, sync);
+ } else {
+ DRI2SwapComplete(client, draw, 0, 0, 0,
+ DRI2_BLIT_COMPLETE, func, data);
+ sna_dri_frame_event_info_free(sna, draw, info);
+ }
+- *target_msc = current_msc + 1;
++ *target_msc = current_msc + sync;
+ return TRUE;
+ }
+
Reply to: