xserver-xorg-video-intel: Changes to 'upstream-experimental'
NEWS | 66 ++++++++
configure.ac | 20 ++
src/intel.h | 1
src/intel_display.c | 29 +++
src/intel_dri.c | 13 +
src/intel_driver.c | 12 +
src/intel_module.c | 6
src/intel_options.c | 2
src/sna/fb/fbblt.c | 2
src/sna/gen2_render.c | 27 ++-
src/sna/gen3_render.c | 6
src/sna/gen4_render.c | 70 +++++----
src/sna/gen5_render.c | 36 +++-
src/sna/gen6_render.c | 41 +++--
src/sna/gen7_render.c | 12 -
src/sna/kgem.c | 124 +++++++++++++---
src/sna/kgem.h | 17 +-
src/sna/sna.h | 22 ++
src/sna/sna_accel.c | 330 ++++++++++---------------------------------
src/sna/sna_blt.c | 11 +
src/sna/sna_composite.c | 16 --
src/sna/sna_damage.c | 1
src/sna/sna_display.c | 161 ++++++++++++--------
src/sna/sna_dri.c | 43 +++++
src/sna/sna_driver.c | 58 ++++---
src/sna/sna_render.c | 47 +++---
src/sna/sna_render_inline.h | 25 ++-
src/sna/sna_trapezoids.c | 38 ++--
src/sna/sna_video_overlay.c | 1
src/sna/sna_video_sprite.c | 12 +
src/sna/sna_video_textured.c | 1
src/xvmc/Makefile.am | 2
32 files changed, 754 insertions(+), 498 deletions(-)
New commits:
commit 8f1afde57dca27e6542b0b8e7c87750f3d6367bf
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun Nov 11 16:16:20 2012 +0000
2.20.13 release
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/NEWS b/NEWS
index 014921d..3d29cfe 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,45 @@
+Release 2.20.13 (2012-11-11)
+============================
+Nothing but bug fixes. Many thanks to everyone who took the time to
+report their issues, and for their help in improving the driver.
+
+ * Sanity check the platform probe points to our expected i915 device
+ https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1069031
+
+ * Prevent 16-bit overflow for computing the sample area to upload of
+ sources for render operations
+ https://bugs.freedesktop.org/show_bug.cgi?id=56324
+
+ * Clamp the drawable box for migration to prevent 16-bit overflow
+ https://bugs.freedesktop.org/show_bug.cgi?id=56591
+
+ * Disable RandR hotplug events if Xinerama is enabled and thereby prevent
+ a crash upon hotplug
+ https://bugs.freedesktop.org/show_bug.cgi?id=55260
+
+ * Call ValidatePicture before attempting to flatten the alphamaps
+ https://bugs.freedesktop.org/show_bug.cgi?id=56367
+
+ * Clip the trapezoid correctly if it ends on the boundary pixel
+ https://bugs.freedesktop.org/show_bug.cgi?id=56395
+
+ * Make sure the pipeline choice is propagated to the scanline wait
+ across a batch flush
+ https://bugs.freedesktop.org/show_bug.cgi?id=47597
+
+ * Set the valid drawable box when choosing placement of BLT composite ops
+ https://bugs.freedesktop.org/show_bug.cgi?id=47597
+
+ * Prevent use-after-free when promoting a partial-GPU bo to a full-GPU bo
+ https://bugs.freedesktop.org/show_bug.cgi?id=56591
+
+ * gen4 opacity spans require the per-rectangle workaround
+ https://bugs.freedesktop.org/show_bug.cgi?id=55500
+
+ * Prevent use of invalid damage pointers when redirecting rendering
+ https://bugs.freedesktop.org/show_bug.cgi?id=56785
+
+
Release 2.20.12 (2012-10-20)
============================
More bug reports, more bug fixes! Perhaps the headline feature is
diff --git a/configure.ac b/configure.ac
index ce3b007..d92269f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
# Initialize Autoconf
AC_PREREQ([2.60])
AC_INIT([xf86-video-intel],
- [2.20.12],
+ [2.20.13],
[https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
[xf86-video-intel])
AC_CONFIG_SRCDIR([Makefile.am])
commit b16219a19f48b52dda91f26fcbbbbeda056589ab
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun Nov 11 11:05:35 2012 +0000
sna: Filter out the full-damage marker when undoing redirection
==25902== Invalid read of size 4
==25902== at 0x4980E13: _list_del (intel_list.h:218)
==25902== by 0x4980EAB: list_del (intel_list.h:240)
==25902== by 0x4981F4B: free_list (sna_damage.c:403)
==25902== by 0x4985131: __sna_damage_destroy (sna_damage.c:1467)
==25902== by 0x49A5276: sna_render_composite_redirect_done (sna_render.c:1921)
==25902== by 0x49C68FC: gen2_render_composite_done (gen2_render.c:1136)
==25902== by 0x497F90F: sna_composite (sna_composite.c:567)
==25902== by 0x4994725: glyphs_via_mask (sna_glyphs.c:1139)
==25902== by 0x4995FB7: sna_glyphs (sna_glyphs.c:1688)
==25902== by 0x8150EB4: ??? (in /usr/bin/Xorg)
==25902== by 0x813CA38: CompositeGlyphs (in /usr/bin/Xorg)
==25902== by 0x8146DE1: ??? (in /usr/bin/Xorg)
==25902== Address 0x7c079ac2 is not stack'd, malloc'd or (recently) free'd
Reported-by: bonbons67@internet.lu
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 3cb1449..34c795b 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -1914,11 +1914,13 @@ sna_render_composite_redirect_done(struct sna *sna,
assert(ok);
}
if (t->damage) {
- DBG(("%s: combining damage, offset=(%d, %d)\n",
- __FUNCTION__, t->box.x1, t->box.y1));
- sna_damage_combine(t->real_damage, t->damage,
+ DBG(("%s: combining damage (all? %d), offset=(%d, %d)\n",
+ __FUNCTION__, DAMAGE_IS_ALL(t->damage),
+ t->box.x1, t->box.y1));
+ sna_damage_combine(t->real_damage,
+ DAMAGE_PTR(t->damage),
t->box.x1, t->box.y1);
- __sna_damage_destroy(t->damage);
+ __sna_damage_destroy(DAMAGE_PTR(t->damage));
}
kgem_bo_destroy(&sna->kgem, op->dst.bo);
commit 69acbb77e8aad3370d5e8d9a9e067c54872d7082
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun Nov 11 10:49:59 2012 +0000
sna: Fix printing of uninitialied value in DBG
==25902== Use of uninitialised value of size 4
==25902== at 0x423098E: _itoa_word (_itoa.c:196)
==25902== by 0x4233F7F: vfprintf (vfprintf.c:1602)
==25902== by 0x42FAFAD: __vsnprintf_chk (vsnprintf_chk.c:65)
==25902== by 0x81DBE8E: Xvscnprintf (in /usr/bin/Xorg)
==25902== by 0x81DC8FB: LogVMessageVerb (in /usr/bin/Xorg)
==25902== by 0x81DCA62: LogVWrite (in /usr/bin/Xorg)
==25902== by 0x81DCA9B: VErrorF (in /usr/bin/Xorg)
==25902== by 0x81DC333: ErrorF (in /usr/bin/Xorg)
==25902== by 0x49B2FA8: trapezoid_span_inplace__x8r8g8b8 (sna_trapezoids.c:5069)
==25902== by 0x49B3407: trapezoid_span_inplace (sna_trapezoids.c:5166)
==25902== by 0x49B4C96: sna_composite_trapezoids (sna_trapezoids.c:5619)
Reported-by: bonbons67@internet.lu
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 008ba2e..8f2ea34 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -5066,8 +5066,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
pixmap = get_drawable_pixmap(dst->pDrawable);
get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
- DBG(("%s: format=%x, op=%d, color=%x\n",
- __FUNCTION__, dst->format, op, color));
+ DBG(("%s: format=%x, op=%d, lerp?=%d\n",
+ __FUNCTION__, dst->format, op, lerp));
if (lerp) {
struct inplace inplace;
commit 66e4c8ff40ab8cf722efa4293bb17b0d8f2dfa88
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun Nov 11 09:40:09 2012 +0000
sna: Flush pending rendering before enabling an output
This is to prevent falling in the trap of the rendering being delayed
until the next client renders some new content.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 87acb5d..d384bb2 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -1251,6 +1251,8 @@ retry: /* Attach per-crtc pixmap or direct */
if (bo == NULL)
return FALSE;
+ kgem_bo_submit(&sna->kgem, bo);
+
sna_crtc->bo = bo;
mode_to_kmode(&sna_crtc->kmode, mode);
if (!sna_crtc_apply(crtc)) {
commit 94dd0b9ee9f55e7c09b8c0ee18939fa69ce66da2
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat Nov 10 16:52:09 2012 +0000
sna/gen2: Fix use of uninitialised redirection
==29553== Invalid read of size 4
==29553== at 0x4980E1B: _list_del (intel_list.h:218)
==29553== by 0x4980EB3: list_del (intel_list.h:240)
==29553== by 0x4981F53: free_list (sna_damage.c:403)
==29553== by 0x4985139: __sna_damage_destroy (sna_damage.c:1467)
==29553== by 0x49A527E: sna_render_composite_redirect_done (sna_render.c:1921)
==29553== by 0x49C6904: gen2_render_composite_done (gen2_render.c:1136)
==29553== by 0x497F917: sna_composite (sna_composite.c:567)
==29553== by 0x8150C41: ??? (in /usr/bin/Xorg)
==29553== by 0x8142F13: CompositePicture (in /usr/bin/Xorg)
==29553== by 0x8145F58: ??? (in /usr/bin/Xorg)
==29553== by 0x81436F2: ??? (in /usr/bin/Xorg)
==29553== by 0x807965C: ??? (in /usr/bin/Xorg)
==29553== Address 0x9407e188 is not stack'd, malloc'd or (recently) free'd
Reported-by: bonbons67@internet.lu
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 6e51c18..9663dff 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1803,6 +1803,8 @@ gen2_render_composite(struct sna *sna,
}
tmp->op = op;
+
+ sna_render_composite_redirect_init(tmp);
if (too_large(tmp->dst.width, tmp->dst.height) ||
tmp->dst.bo->pitch > MAX_3D_PITCH) {
if (!sna_render_composite_redirect(sna, tmp,
@@ -2298,6 +2300,8 @@ gen2_render_composite_spans(struct sna *sna,
}
tmp->base.op = op;
+
+ sna_render_composite_redirect_init(&tmp->base);
if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
tmp->base.dst.bo->pitch > MAX_3D_PITCH) {
if (!sna_render_composite_redirect(sna, &tmp->base,
commit 0f1c30818c9d782b066147448bbcc9ac95ac834f
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat Nov 10 16:52:09 2012 +0000
sna: Fix use of uninitialised value in DBG
==29553== Use of uninitialised value of size 4
==29553== at 0x4230964: _itoa_word (_itoa.c:195)
==29553== by 0x4233F7F: vfprintf (vfprintf.c:1602)
==29553== by 0x42FAFAD: __vsnprintf_chk (vsnprintf_chk.c:65)
==29553== by 0x81DBE8E: Xvscnprintf (in /usr/bin/Xorg)
==29553== by 0x81DC8FB: LogVMessageVerb (in /usr/bin/Xorg)
==29553== by 0x81DCA62: LogVWrite (in /usr/bin/Xorg)
==29553== by 0x81DCA9B: VErrorF (in /usr/bin/Xorg)
==29553== by 0x81DC333: ErrorF (in /usr/bin/Xorg)
==29553== by 0x49434F0: kgem_create_buffer (kgem.c:4887)
==29553== by 0x4943B09: kgem_create_buffer_2d (kgem.c:4969)
==29553== by 0x4943E19: kgem_upload_source_image (kgem.c:5021)
==29553== by 0x49A0567: upload (sna_render.c:505)
==29553==
Reported-by: bonbons67@internet.lu
References: https://bugs.freedesktop.org/show_bug.cgi?id=56785
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 28e69c3..4fb8a6f 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -4885,7 +4885,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
}
DBG(("%s: created handle=%d for buffer\n",
- __FUNCTION__, bo->base.handle));
+ __FUNCTION__, handle));
__kgem_bo_init(&bo->base, handle, alloc);
debug_alloc(kgem, alloc * PAGE_SIZE);
commit cc2b13c9c05e57dc5004d93b56f332ea95f0a4ef
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat Nov 10 11:50:15 2012 +0000
sna: Specify read/write domains for no-relocation fastpath
On review (read triggering BUGs), we do need to supply the domain tracking
of the buffers that is being replaced from the relocation path.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index ea56adf..28e69c3 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -114,8 +114,6 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_EXEC_NO_RELOC (1<<10)
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<11)
-#define LOCAL_EXEC_OBJECT_WRITE (1<<1)
-
#define LOCAL_I915_GEM_USERPTR 0x32
#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
struct local_i915_gem_userptr {
@@ -2315,7 +2313,9 @@ void _kgem_submit(struct kgem *kgem)
kgem->exec[i].alignment = 0;
kgem->exec[i].offset = rq->bo->presumed_offset;
kgem->exec[i].flags = 0;
- kgem->exec[i].rsvd1 = 0;
+ kgem->exec[i].rsvd1 = (I915_GEM_DOMAIN_COMMAND |
+ I915_GEM_DOMAIN_INSTRUCTION |
+ I915_GEM_DOMAIN_VERTEX);
kgem->exec[i].rsvd2 = 0;
rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
@@ -3925,9 +3925,10 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
- if (read_write_domain & 0x7ff) {
+ bo->exec->rsvd1 |= read_write_domain >> 16;
+ if (read_write_domain & 0x7fff) {
assert(!bo->snoop || kgem->can_blt_cpu);
- bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
+ bo->exec->rsvd1 |= (uint64_t)(read_write_domain & 0x7fff) << 32;
kgem_bo_mark_dirty(bo);
}
@@ -4353,10 +4354,10 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
void kgem_clear_dirty(struct kgem *kgem)
{
- struct kgem_request *rq = kgem->next_request;
+ struct list * const buffers = &kgem->next_request->buffers;
struct kgem_bo *bo;
- list_for_each_entry(bo, &rq->buffers, request) {
+ list_for_each_entry(bo, buffers, request) {
if (!bo->dirty)
break;
commit 0c4a2bcc3d63ecc02e3a940e38e9a416b51ad0c8
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat Nov 10 12:34:52 2012 +0000
sna: Allow snooped buffers to be retained (and reused) between batches
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index f22febd..ea56adf 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2023,10 +2023,9 @@ static void kgem_finish_buffers(struct kgem *kgem)
used = ALIGN(bo->used + PAGE_SIZE-1, PAGE_SIZE);
if (!DBG_NO_UPLOAD_ACTIVE &&
used + PAGE_SIZE <= bytes(&bo->base) &&
- (kgem->has_llc || !IS_CPU_MAP(bo->base.map))) {
+ (kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) {
DBG(("%s: retaining upload buffer (%d/%d)\n",
__FUNCTION__, bo->used, bytes(&bo->base)));
- assert(!bo->base.snoop);
bo->used = used;
list_move(&bo->base.list,
&kgem->active_buffers);
@@ -4663,8 +4662,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
assert(bo->mmapped);
- assert(!bo->base.snoop);
- assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc);
+ assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop);
if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE) {
DBG(("%s: skip write %x buffer, need %x\n",
commit f5d79b202dd448e61ab6ffce26fe9cbf9051d770
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat Nov 10 10:30:04 2012 +0000
sna/gen2: Add a modicum of fallback DBG
References: https://bugs.freedesktop.org/show_bug.cgi?id=56785
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 024b107..6e51c18 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1816,6 +1816,8 @@ gen2_render_composite(struct sna *sna,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
+ DBG(("%s: fallback -- unable to prepare source\n",
+ __FUNCTION__));
goto cleanup_dst;
case 0:
gen2_composite_solid_init(sna, &tmp->src, 0);
@@ -1839,6 +1841,8 @@ gen2_render_composite(struct sna *sna,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
+ DBG(("%s: fallback -- unable to prepare mask\n",
+ __FUNCTION__));
goto cleanup_src;
case 0:
gen2_composite_solid_init(sna, &tmp->mask, 0);
@@ -1855,8 +1859,12 @@ gen2_render_composite(struct sna *sna,
tmp->has_component_alpha = true;
if (gen2_blend_op[op].src_alpha &&
(gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
- if (op != PictOpOver)
- return false;
+ if (op != PictOpOver) {
+ DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n",
+ __FUNCTION__,
+ gen2_blend_op[op].src_blend));
+ goto cleanup_dst;
+ }
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
@@ -1903,8 +1911,11 @@ gen2_render_composite(struct sna *sna,
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
- NULL))
+ NULL)) {
+ DBG(("%s: fallback, operation does not fit into GTT\n",
+ __FUNCTION__));
goto cleanup_mask;
+ }
}
gen2_emit_composite_state(sna, tmp);
commit 27327633138dce159ca2e91fe5eac1565bd45e1c
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri Nov 9 17:08:01 2012 +0000
sna/gen4: Only 965gm suffers the !snoop restriction
So fixup the bogus assertion for g4x
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 6d44a4a..be97458 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -649,7 +649,7 @@ gen4_bind_bo(struct sna *sna,
uint32_t domains;
uint16_t offset;
- assert(!kgem_bo_is_snoop(bo));
+ assert(sna->kgem.gen != 40 || !kgem_bo_is_snoop(bo));
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format);
commit 8d3b5ea135fd8f16da2cbfb98041e32c7001a38f
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri Nov 9 15:31:03 2012 +0000
xvmc: Use DRMINTEL_LIBS instead of hardcoding -ldrm_intel
Reported-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/xvmc/Makefile.am b/src/xvmc/Makefile.am
index d3ed449..36a939b 100644
--- a/src/xvmc/Makefile.am
+++ b/src/xvmc/Makefile.am
@@ -20,4 +20,4 @@ AM_CFLAGS = @XORG_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \
@XVMCLIB_CFLAGS@ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0
libIntelXvMC_la_LDFLAGS = -version-number 1:0:0
-libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ -lpthread -ldrm_intel
+libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ @DRMINTEL_LIBS@ -lpthread
commit f040b97b01495aa43f7771ebb8ca5c0d44038bc1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu Nov 8 23:42:10 2012 +0000
sna: Mark no-reloc write buffers
If we bypass the relocation processing, we also then bypass the
pending-write analysis, so we need to supply those to the kernel
ourselves (to maintain gpu-cpu coherency).
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index e2c5da8..f22febd 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -114,6 +114,8 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_EXEC_NO_RELOC (1<<10)
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<11)
+#define LOCAL_EXEC_OBJECT_WRITE (1<<1)
+
#define LOCAL_I915_GEM_USERPTR 0x32
#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
struct local_i915_gem_userptr {
@@ -3926,6 +3928,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
if (read_write_domain & 0x7ff) {
assert(!bo->snoop || kgem->can_blt_cpu);
+ bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
kgem_bo_mark_dirty(bo);
}
commit 85ba7e96268dbb8da4bb34078333695a451c6570
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu Nov 8 15:56:13 2012 +0000
sna: Experiment with using reloc.handle as an index into the execbuffer
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/configure.ac b/configure.ac
index 9ea1e3c..ce3b007 100644
--- a/configure.ac
+++ b/configure.ac
@@ -301,6 +301,15 @@ if test "x$FASTRELOC" = xyes; then
AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support])
fi
+AC_ARG_ENABLE(handle-lut,
+ AS_HELP_STRING([--enable-handle-lut],
+ [Enable use of "handle LUT" (experimental) [default=no]]),
+ [HANDLE_LUT="$enableval"],
+ [HANDLE_LUT=no])
+if test "x$HANDLE_LUT" = xyes; then
+ AC_DEFINE(USE_HANDLE_LUT,1,[Assume "handle LUT" support])
+fi
+
AC_ARG_ENABLE(async-swap,
AS_HELP_STRING([--enable-async-swap],
[Enable use of asynchronous swaps (experimental) [default=no]]),
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index e643b85..e2c5da8 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -71,6 +71,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_FAST_RELOC 0
+#define DBG_NO_HANDLE_LUT 0
#define DBG_DUMP 0
#define SHOW_BATCH 0
@@ -80,6 +81,11 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_FAST_RELOC 1
#endif
+#ifndef USE_HANDLE_LUT
+#undef DBG_NO_HANDLE_LUT
+#define DBG_NO_HANDLE_LUT 1
+#endif
+
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
@@ -103,8 +109,10 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_NO_RELOC 24
+#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 25
#define LOCAL_I915_EXEC_NO_RELOC (1<<10)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<11)
#define LOCAL_I915_GEM_USERPTR 0x32
#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
@@ -668,6 +676,14 @@ static bool test_has_no_reloc(struct kgem *kgem)
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
}
+static bool test_has_handle_lut(struct kgem *kgem)
+{
+ if (DBG_NO_HANDLE_LUT)
+ return false;
+
+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
+}
+
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
FILE *file;
@@ -859,6 +875,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
kgem->has_no_reloc));
+ kgem->has_handle_lut = test_has_handle_lut(kgem);
+ DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
+ kgem->has_handle_lut));
+
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
@@ -1212,6 +1232,7 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
__FUNCTION__, bo->handle, kgem->nexec));
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
+ bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
exec->handle = bo->handle;
exec->offset = bo->presumed_offset;
@@ -1246,8 +1267,8 @@ static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
int n;
for (n = 0; n < kgem->nreloc; n++) {
- if (kgem->reloc[n].target_handle == 0) {
- kgem->reloc[n].target_handle = bo->handle;
+ if (kgem->reloc[n].target_handle == ~0U) {
+ kgem->reloc[n].target_handle = bo->target_handle;
kgem->reloc[n].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + bo->presumed_offset;
@@ -2047,9 +2068,11 @@ static void kgem_finish_buffers(struct kgem *kgem)
gem_write(kgem->fd, shrink->handle,
0, bo->used, bo->mem);
+ shrink->target_handle =
+ kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
- if (kgem->reloc[n].target_handle == bo->base.handle) {
- kgem->reloc[n].target_handle = shrink->handle;
+ if (kgem->reloc[n].target_handle == bo->base.target_handle) {
+ kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
@@ -2202,6 +2225,8 @@ void kgem_reset(struct kgem *kgem)
kgem->batch_flags = 0;
if (kgem->has_no_reloc)
kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC;
+ if (kgem->has_handle_lut)
+ kgem->batch_flags |= LOCAL_I915_EXEC_HANDLE_LUT;
kgem->next_request = __kgem_request_alloc();
@@ -2227,7 +2252,7 @@ static int compact_batch_surface(struct kgem *kgem)
shrink *= sizeof(uint32_t);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
- kgem->reloc[n].target_handle == 0)
+ kgem->reloc[n].target_handle == ~0U)
kgem->reloc[n].delta -= shrink;
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
@@ -2292,6 +2317,7 @@ void _kgem_submit(struct kgem *kgem)
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
+ rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
rq->bo->exec = &kgem->exec[i];
rq->bo->rq = rq; /* useful sanity check */
list_add(&rq->bo->request, &rq->buffers);
@@ -3895,7 +3921,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
}
kgem->reloc[index].delta = delta;
- kgem->reloc[index].target_handle = bo->handle;
+ kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
if (read_write_domain & 0x7ff) {
@@ -3906,7 +3932,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
delta += bo->presumed_offset;
} else {
kgem->reloc[index].delta = delta;
- kgem->reloc[index].target_handle = 0;
+ kgem->reloc[index].target_handle = ~0U;
kgem->reloc[index].presumed_offset = 0;
}
kgem->reloc[index].read_domains = read_write_domain >> 16;
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 8789b55..b42a8e0 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -64,6 +64,7 @@ struct kgem_bo {
uint32_t unique_id;
uint32_t refcnt;
uint32_t handle;
+ uint32_t target_handle;
uint32_t presumed_offset;
uint32_t delta;
union {
@@ -165,6 +166,7 @@ struct kgem {
uint32_t has_cacheing :1;
uint32_t has_llc :1;
uint32_t has_no_reloc :1;
+ uint32_t has_handle_lut :1;
uint32_t can_blt_cpu :1;
commit 93d8dddbb92431d6e2c48a17b71cac9f7047902e
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu Nov 8 09:41:21 2012 +0000
sna: Set the known offset for the batch as well
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 131a209..e643b85 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2287,7 +2287,7 @@ void _kgem_submit(struct kgem *kgem)
kgem->exec[i].relocation_count = kgem->nreloc;
kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
kgem->exec[i].alignment = 0;
- kgem->exec[i].offset = 0;
+ kgem->exec[i].offset = rq->bo->presumed_offset;
kgem->exec[i].flags = 0;
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
commit 120fa0ef8d04f5e82e5f7a0636033d3d96efa1e8
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed Nov 7 17:41:20 2012 +0000
sna: Support a fast no relocation changed path
x11perf -copywinwin10 on gm45 with c2d L9400:
before: 553,000 op/s
after: 565,000 op/s
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/configure.ac b/configure.ac
index 8ddf40b..9ea1e3c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -292,6 +292,15 @@ if test "x$USERPTR" = xyes; then
AC_DEFINE(USE_USERPTR,1,[Assume USERPTR support])
fi
+AC_ARG_ENABLE(fast-reloc,
+ AS_HELP_STRING([--enable-fast-reloc],
+ [Enable use of "fast reloc" (experimental) [default=no]]),
+ [FASTRELOC="$enableval"],
+ [FASTRELOC=no])
+if test "x$FASTRELOC" = xyes; then
+ AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support])
+fi
+
AC_ARG_ENABLE(async-swap,
AS_HELP_STRING([--enable-async-swap],
[Enable use of asynchronous swaps (experimental) [default=no]]),
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 9c01694..131a209 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -70,10 +70,16 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
+#define DBG_NO_FAST_RELOC 0
#define DBG_DUMP 0
#define SHOW_BATCH 0
+#ifndef USE_FASTRELOC
+#undef DBG_NO_FAST_RELOC
+#define DBG_NO_FAST_RELOC 1
+#endif
+
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
@@ -96,6 +102,9 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
+#define LOCAL_I915_PARAM_HAS_NO_RELOC 24
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<10)
#define LOCAL_I915_GEM_USERPTR 0x32
#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
@@ -651,6 +660,14 @@ static bool test_has_execbuffer2(struct kgem *kgem)
errno == EFAULT);
}
+static bool test_has_no_reloc(struct kgem *kgem)
+{
+ if (DBG_NO_FAST_RELOC)
+ return false;
+
+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
+}
+
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
FILE *file;
@@ -838,6 +855,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
DBG(("%s: has userptr? %d\n", __FUNCTION__,
kgem->has_userptr));
+ kgem->has_no_reloc = test_has_no_reloc(kgem);
+ DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
+ kgem->has_no_reloc));
+
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
@@ -2177,8 +2198,10 @@ void kgem_reset(struct kgem *kgem)
kgem->nbatch = 0;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
- kgem->batch_flags = 0;
kgem->flush = 0;
+ kgem->batch_flags = 0;
+ if (kgem->has_no_reloc)
+ kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC;
kgem->next_request = __kgem_request_alloc();
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 7e48db5..8789b55 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -164,6 +164,7 @@ struct kgem {
uint32_t has_secure_batches :1;
uint32_t has_cacheing :1;
uint32_t has_llc :1;
+ uint32_t has_no_reloc :1;
uint32_t can_blt_cpu :1;
commit b7d2fcf47a9569d0944097a8be60ca3be72b42f6
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu Nov 8 08:55:25 2012 +0000
Remove reliance on hard-coded DRI name
This provides for using the existing DDX with future DRI drivers which
may break from the traditional names - but only with the help of the
user/packager. This scheme needs to be replaced with a robust mechanism
for driver loading if AIGLX and co are to be kept.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/src/intel_dri.c b/src/intel_dri.c
index 867a465..17d9d50 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -1515,6 +1515,17 @@ out_complete:
static int dri2_server_generation;
#endif
+static const char *dri_driver_name(intel_screen_private *intel)
+{
+ const char *s = xf86GetOptValString(intel->Options, OPTION_DRI);
+ Bool dummy;
+
+ if (s == NULL || xf86getBoolValue(&dummy, s))
+ return INTEL_INFO(intel)->gen < 40 ? "i915" : "i965";
+
+ return s;
+}
+
Bool I830DRI2ScreenInit(ScreenPtr screen)
{
ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
@@ -1564,7 +1575,7 @@ Bool I830DRI2ScreenInit(ScreenPtr screen)
intel->deviceName = drmGetDeviceNameFromFd(intel->drmSubFD);
memset(&info, '\0', sizeof(info));
info.fd = intel->drmSubFD;
- info.driverName = INTEL_INFO(intel)->gen < 40 ? "i915" : "i965";
+ info.driverName = dri_driver_name(intel);
info.deviceName = intel->deviceName;
#if DRI2INFOREC_VERSION == 1
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 3029b22..254aafa 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -221,11 +221,19 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn)
return TRUE;
}
+static Bool intel_option_cast_string_to_bool(intel_screen_private *intel,
+ int id, Bool val)
+{
+ xf86getBoolValue(&val, xf86GetOptValString(intel->Options, id));
+ return val;
+}
+
static void intel_check_dri_option(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
+
intel->directRenderingType = DRI_NONE;
- if (!xf86ReturnOptValBool(intel->Options, OPTION_DRI, TRUE))
+ if (!intel_option_cast_string_to_bool(intel, OPTION_DRI, TRUE))
intel->directRenderingType = DRI_DISABLED;
if (scrn->depth != 16 && scrn->depth != 24 && scrn->depth != 30) {
diff --git a/src/intel_options.c b/src/intel_options.c
index dcab9e7..443e84d 100644
--- a/src/intel_options.c
+++ b/src/intel_options.c
@@ -8,7 +8,7 @@ const OptionInfoRec intel_options[] = {
{OPTION_ACCEL_DISABLE, "NoAccel", OPTV_BOOLEAN, {0}, 0},
{OPTION_ACCEL_METHOD, "AccelMethod", OPTV_STRING, {0}, 0},
{OPTION_BACKLIGHT, "Backlight", OPTV_STRING, {0}, 0},
- {OPTION_DRI, "DRI", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_DRI, "DRI", OPTV_STRING, {0}, 0},
{OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, 0},
{OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, 0},
{OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, 1},
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 83c79c1..23d9572 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -41,6 +41,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "sna.h"
#include "sna_reg.h"
+#include "intel_options.h"
#include <xf86drm.h>
#include <i915_drm.h>
@@ -2317,6 +2318,17 @@ out_complete:
}
#endif
+static const char *dri_driver_name(struct sna *sna)
+{
+ const char *s = xf86GetOptValString(sna->Options, OPTION_DRI);
+ Bool dummy;
+
+ if (s == NULL || xf86getBoolValue(&dummy, s))
+ return (sna->kgem.gen && sna->kgem.gen < 40) ? "i915" : "i965";
+
+ return s;
+}
+
bool sna_dri_open(struct sna *sna, ScreenPtr screen)
{
DRI2InfoRec info;
@@ -2344,8 +2356,7 @@ bool sna_dri_open(struct sna *sna, ScreenPtr screen)
sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd);
memset(&info, '\0', sizeof(info));
info.fd = sna->kgem.fd;
- info.driverName =
- (sna->kgem.gen && sna->kgem.gen < 40) ? "i915" : "i965";
+ info.driverName = dri_driver_name(sna);
info.deviceName = sna->deviceName;
DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n",
diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
index 055c71c..f214b90 100644
--- a/src/sna/sna_driver.c
+++ b/src/sna/sna_driver.c
@@ -363,6 +363,12 @@ static void sna_setup_capabilities(ScrnInfoPtr scrn, int fd)
#endif
}
+static Bool sna_option_cast_to_bool(struct sna *sna, int id, Bool val)
+{
+ xf86getBoolValue(&val, xf86GetOptValString(sna->Options, id));
+ return val;
+}
+
Reply to: