[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'upstream-experimental'



 NEWS                         |   30 +++
 configure.ac                 |   22 ++
 src/intel_display.c          |   77 ++++++++-
 src/intel_dri.c              |   14 +
 src/sna/blt.c                |    4 
 src/sna/compiler.h           |    6 
 src/sna/gen4_render.c        |   70 +++++---
 src/sna/gen4_vertex.c        |  354 ++++++++++++++++++++++++++++++++++++++++++-
 src/sna/gen5_render.c        |    9 -
 src/sna/gen6_render.c        |    9 -
 src/sna/gen7_render.c        |   25 +--
 src/sna/gen7_render.h        |    2 
 src/sna/kgem.c               |   47 +++--
 src/sna/kgem.h               |    2 
 src/sna/sna_accel.c          |   54 ++++--
 src/sna/sna_cpu.c            |   24 ++
 src/sna/sna_display.c        |   78 +++++++++
 src/sna/sna_dri.c            |   45 +++--
 src/sna/sna_io.c             |   25 ---
 src/sna/sna_render.c         |    2 
 src/sna/sna_video_textured.c |    5 
 21 files changed, 767 insertions(+), 137 deletions(-)

New commits:
commit 678279eb373310f1a71a3d74e5a500b343e98830
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Apr 6 15:58:50 2013 +0100

    2.21.6 release

diff --git a/NEWS b/NEWS
index 05a20fa..4fc9a6b 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,33 @@
+Release 2.21.6 (2013-04-06)
+===========================
+A surprising highlight of this release is a little refresh to the KMS
+support for OpenBSD. OpenBSD now has its own KMS implementation which is
+mostly compatible with the interface in Linux, with one or two tweaks
+supplied by Mark Kettenis. This release continues to cleanup behaviour
+for Haswell.
+
+ * Workaround a failure by the xserver to invalidate DRI buffers
+   following a pixmap change for XComposite redirection.
+   https://bugs.freedesktop.org/show_bug.cgi?id=62614
+
+ * Fix computation of clip extents for stippling
+   https://bugs.freedesktop.org/show_bug.cgi?id=62618
+
+ * Support KMS on OpenBSD, by Mark Kettenis
+
+ * Clean up sockets upon CloseScreen (making ourselves better behaved
+   for muxed setups).
+
+ * Fix the tests for AVX/AVX2 support in CPUID and remember to check for
+   OS support as well.
+
+ * Report a monotonic UST value for undisplayed drawables rather than 0
+   by Daniel Kurtz
+
+ * Fix video playback on gen4 through a complex clip (more gen4 GPU woes)
+   https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1162046
+
+
 Release 2.21.5 (2013-03-21)
 ===========================
 Haswell reintroduces a command to load the scanline window from the
diff --git a/configure.ac b/configure.ac
index 794b383..fa82507 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.21.5],
+        [2.21.6],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 5332d5a7e055042233e279385bfe1388adfe15fa
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Apr 6 15:42:23 2013 +0100

    configure: Allow valgrind support to be manually enabled
    
    Irrespective of the DDX debug settings, some people wish to run Xorg
    under valgrind and so prefer to have the cleaner output by making the
    DDX valgrind aware.
    
    (Actually Maarten wants valgrind support enabled by default...)
    
    Suggested-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/configure.ac b/configure.ac
index 8b12d01..794b383 100644
--- a/configure.ac
+++ b/configure.ac
@@ -353,6 +353,12 @@ AC_ARG_ENABLE(debug,
 			     [Enables internal debugging [default=no]]),
               [DEBUG="$enableval"],
               [DEBUG=no])
+AC_ARG_ENABLE(valgrind,
+	      AS_HELP_STRING([--enable-valgrind],
+			     [Enables valgrindified ioctls for debugging [default=no]]),
+              [VG="$enableval"],
+              [VG=no])
+
 # Store the list of server defined optional extensions in REQUIRED_MODULES
 XORG_DRIVER_CHECK_EXT(RANDR, randrproto)
 XORG_DRIVER_CHECK_EXT(RENDER, renderproto)
@@ -439,12 +445,22 @@ AM_CONDITIONAL(DEBUG, test x$DEBUG != xno)
 AM_CONDITIONAL(FULL_DEBUG, test x$DEBUG = xfull)
 if test "x$DEBUG" = xno; then
 	AC_DEFINE(NDEBUG,1,[Disable internal debugging])
+else
+	if test "x$VG" != xyes; then
+		VG=auto
+	fi
 fi
-if test "x$DEBUG" != xno; then
+if test "x$VG" != xno; then
 	PKG_CHECK_MODULES(VALGRIND, [valgrind], have_valgrind=yes, have_valgrind=no)
+	AC_MSG_CHECKING([whether to include valgrind support])
 	if test x$have_valgrind = xyes; then
 		AC_DEFINE([HAVE_VALGRIND], 1, [Use valgrind intrinsics to suppress false warnings])
+	else
+		if test "x$VG" = xyes; then
+			AC_MSG_ERROR([valgrind support requested, but valgrind-dev headers not found])
+		fi
 	fi
+	AC_MSG_RESULT([$have_valgrind ($VG)])
 fi
 if test "x$DEBUG" = xsync; then
 	AC_DEFINE(DEBUG_SYNC,1,[Enable synchronous rendering for debugging])

commit 091cf6f0477824c5826547c02394752b6dc944ce
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Apr 6 09:30:57 2013 +0100

    sna: Improve assertions to detect rogue priv->cpu status
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 9bc6fe3..217a4a2 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -340,6 +340,10 @@ static void assert_pixmap_damage(PixmapPtr p)
 		return;
 	}
 
+	if (DAMAGE_IS_ALL(priv->gpu_damage)) {
+		assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
+	}
+
 	assert(!DAMAGE_IS_ALL(priv->gpu_damage) || priv->cpu_damage == NULL);
 	assert(!DAMAGE_IS_ALL(priv->cpu_damage) || priv->gpu_damage == NULL);
 
@@ -1788,6 +1792,7 @@ mark_damage:
 	}
 
 done:
+	assert(priv->gpu_damage == NULL);
 	if (flags & MOVE_WRITE) {
 		assert(DAMAGE_IS_ALL(priv->cpu_damage));
 		priv->source_count = SOURCE_BIAS;
@@ -1898,6 +1903,7 @@ static inline bool region_inplace(struct sna *sna,
 	if (DAMAGE_IS_ALL(priv->gpu_damage)) {
 		DBG(("%s: yes, already wholly damaged on the GPU\n", __FUNCTION__));
 		assert(priv->gpu_bo);
+		assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
 		return true;
 	}
 
@@ -1915,6 +1921,11 @@ static inline bool region_inplace(struct sna *sna,
 		>= sna->kgem.half_cpu_cache_pages;
 }
 
+static inline bool box_empty(const BoxRec *box)
+{
+	return box->x2 <= box->x1 || box->y2 <= box->y1;
+}
+
 bool
 sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 				RegionPtr region,
@@ -1937,6 +1948,9 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		assert_drawable_contains_box(drawable, &region->extents);
 	}
 
+	if (box_empty(&region->extents))
+		return true;
+
 	priv = sna_pixmap(pixmap);
 	if (priv == NULL) {
 		DBG(("%s: not attached to %p\n", __FUNCTION__, pixmap));
@@ -2012,6 +2026,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 							      pixmap->drawable.height)) {
 						DBG(("%s: replaced entire pixmap, destroying CPU shadow\n",
 						     __FUNCTION__));
+						assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
 						sna_damage_destroy(&priv->cpu_damage);
 						list_del(&priv->list);
 					} else
@@ -2348,11 +2363,6 @@ out:
 	return true;
 }
 
-static inline bool box_empty(const BoxRec *box)
-{
-	return box->x2 <= box->x1 || box->y2 <= box->y1;
-}
-
 bool
 sna_drawable_move_to_cpu(DrawablePtr drawable, unsigned flags)
 {
@@ -2474,6 +2484,8 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
 			      pixmap->drawable.width,
 			      pixmap->drawable.height)) {
 		assert(priv->gpu_bo);
+		assert(priv->gpu_bo->proxy == NULL);
+		assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
 		sna_damage_destroy(&priv->cpu_damage);
 		list_del(&priv->list);
 		goto done;
@@ -2715,6 +2727,8 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
 		DBG(("%s: use GPU fast path (all-damaged)\n", __FUNCTION__));
 		assert(priv->cpu_damage == NULL);
 		assert(priv->gpu_bo);
+		assert(priv->gpu_bo->proxy == NULL);
+		assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
 		goto use_gpu_bo;
 	}
 
@@ -2879,6 +2893,7 @@ done:
 	if (sna_damage_is_all(&priv->gpu_damage,
 			      pixmap->drawable.width,
 			      pixmap->drawable.height)) {
+		assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
 		sna_damage_destroy(&priv->cpu_damage);
 		list_del(&priv->list);
 		*damage = NULL;
@@ -3121,9 +3136,10 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 			      pixmap->drawable.height)) {
 		DBG(("%s: already all-damaged\n", __FUNCTION__));
 		assert(priv->gpu_bo);
+		assert(priv->gpu_bo->proxy == NULL);
+		assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
 		sna_damage_destroy(&priv->cpu_damage);
 		list_del(&priv->list);
-		assert(priv->cpu == false || IS_CPU_MAP(priv->gpu_bo->map));
 		goto active;
 	}
 
@@ -3279,8 +3295,10 @@ done:
 	sna_damage_reduce_all(&priv->gpu_damage,
 			      pixmap->drawable.width,
 			      pixmap->drawable.height);
-	if (DAMAGE_IS_ALL(priv->gpu_damage))
+	if (DAMAGE_IS_ALL(priv->gpu_damage)) {
+		assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
 		sna_pixmap_free_cpu(sna, priv);
+	}
 
 active:
 	if (flags & MOVE_WRITE)
@@ -3945,6 +3963,7 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
 
 	if (DAMAGE_IS_ALL(priv->gpu_damage)) {
 		assert(priv->gpu_bo);
+		assert(priv->cpu == false || (priv->mapped && IS_CPU_MAP(priv->gpu_bo->map)));
 		return true;
 	}
 

commit 4a43aa81e27e8a651fde8a4761fd14bd8824d90c
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Apr 4 10:53:55 2013 +0100

    sna: Restore bo->flush status for large bo
    
    Since we started discarding the flush flags on cached bo (in order to
    prevent DRI flush states leaking), we failed to preserve the flush flag
    for large bo (which uses it to keep batches trim and other hints).
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index c670dbb..4136ce9 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3606,7 +3606,6 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 			assert(!bo->scanout);
 			assert(bo->refcnt == 0);
 			assert(bo->reusable);
-			assert(bo->flush == true);
 
 			if (kgem->gen < 040) {
 				if (bo->pitch < pitch) {
@@ -3640,6 +3639,7 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
 			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
 			bo->refcnt = 1;
+			bo->flush = true;
 			return bo;
 		}
 

commit ed3dab44a717a1a88470228b5e33f20de1e4ad0d
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Apr 2 15:20:52 2013 +0100

    sna: Adjust userptr structure for implicit padding
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 9013e68..c670dbb 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -128,7 +128,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
 struct local_i915_gem_userptr {
 	uint64_t user_ptr;
-	uint32_t user_size;
+	uint64_t user_size;
 	uint32_t flags;
 #define I915_USERPTR_READ_ONLY (1<<0)
 #define I915_USERPTR_UNSYNCHRONIZED (1<<31)
@@ -1482,6 +1482,7 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
 
 	if (IS_USER_MAP(bo->map)) {
 		assert(bo->rq == NULL);
+		assert(!__kgem_busy(kgem, bo->handle));
 		assert(MAP(bo->map) != bo || bo->io || bo->flush);
 		if (!(bo->io || bo->flush)) {
 			DBG(("%s: freeing snooped base\n", __FUNCTION__));

commit 4e2fc5aee035c3059ca33dbcafc71dc5988d6b09
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Apr 2 14:29:29 2013 +0100

    sna: Relax scanline waits on HSW to be emittable from either ring
    
    My overzealous reading of the bspec lead me to the conclusion that the
    MI_LOAD_SCANLINES command was only available on the blitter ring. This
    is false, thankfully, and allows us to do vsync'ed Xv.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 6d61650..0d32086 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -2887,15 +2887,13 @@ static bool sna_emit_wait_for_scanline_hsw(struct sna *sna,
 	uint32_t event;
 	uint32_t *b;
 
-	if (sna->kgem.mode != KGEM_BLT)
-		return false;
-
 	b = kgem_get_batch(&sna->kgem);
 	sna->kgem.nbatch += 5;
 
 	/* The documentation says that the LOAD_SCAN_LINES command
 	 * always comes in pairs. Don't ask me why. */
 	switch (pipe) {
+	default: assert(0);
 	case 0: event = 0; break;
 	case 1: event = 1 << 19; break;
 	case 2: event = 4 << 19; break;
@@ -2904,6 +2902,7 @@ static bool sna_emit_wait_for_scanline_hsw(struct sna *sna,
 	b[3] = b[1] = (y1 << 16) | (y2-1);
 
 	switch (pipe) {
+	default: assert(0);
 	case 0: event = 0; break;
 	case 1: event = 1 << 8; break;
 	case 2: event = 1 << 14; break;

commit 5a36fdcee769195d5c6e642e84a8976114e7c6de
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Apr 2 11:01:53 2013 +0100

    sna/gen4: Kill stray debugging ErrorF from previous commit
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index c05b37b..69a5c77 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1392,7 +1392,6 @@ gen4_render_video(struct sna *sna,
 
 		n = gen4_get_rectangles(sna, &tmp, min(nbox, 16),
 					gen4_video_bind_surfaces);
-		ErrorF("n=%d/%d\n", n, nbox);
 		assert(n);
 		nbox -= n;
 

commit 3d7e16addb2fb5f35936aafe8e16685a91d30f59
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Apr 2 10:58:52 2013 +0100

    sna/gen4: Break the Video rendering loop into 16 rectangle chunks
    
    If we feed more than 16 rectangles into the video rendering pipeline,
    the GPU goes crazy and starts emitting corruption. Lalalala.
    
    Bugzilla: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1162046
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 1bf5ad2..c05b37b 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1387,37 +1387,51 @@ gen4_render_video(struct sna *sna,
 
 	box = REGION_RECTS(dstRegion);
 	nbox = REGION_NUM_RECTS(dstRegion);
-	while (nbox--) {
-		BoxRec r;
+	do {
+		int n;
 
-		r.x1 = box->x1 + pix_xoff;
-		r.x2 = box->x2 + pix_xoff;
-		r.y1 = box->y1 + pix_yoff;
-		r.y2 = box->y2 + pix_yoff;
+		n = gen4_get_rectangles(sna, &tmp, min(nbox, 16),
+					gen4_video_bind_surfaces);
+		ErrorF("n=%d/%d\n", n, nbox);
+		assert(n);
+		nbox -= n;
 
-		gen4_get_rectangles(sna, &tmp, 1, gen4_video_bind_surfaces);
+		do {
+			BoxRec r;
 
-		OUT_VERTEX(r.x2, r.y2);
-		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
-		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
+			r.x1 = box->x1 + pix_xoff;
+			r.x2 = box->x2 + pix_xoff;
+			r.y1 = box->y1 + pix_yoff;
+			r.y2 = box->y2 + pix_yoff;
 
-		OUT_VERTEX(r.x1, r.y2);
-		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
+			OUT_VERTEX(r.x2, r.y2);
+			OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
+			OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
-		OUT_VERTEX(r.x1, r.y1);
-		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
+			OUT_VERTEX(r.x1, r.y2);
+			OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
+			OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
-		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
-			sna_damage_add_box(&priv->gpu_damage, &r);
-			sna_damage_subtract_box(&priv->cpu_damage, &r);
-		}
-		box++;
-	}
-	priv->clear = false;
+			OUT_VERTEX(r.x1, r.y1);
+			OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
+			OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
-	gen4_vertex_flush(sna);
+			if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
+				sna_damage_add_box(&priv->gpu_damage, &r);
+				sna_damage_subtract_box(&priv->cpu_damage, &r);
+			}
+			box++;
+		} while (--n);
+
+		gen4_vertex_flush(sna);
+		if (!nbox)
+			break;
+
+		/* VUE corruption strikes again */
+		OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
+	} while (1);
+
+	priv->clear = false;
 	return true;
 }
 
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index bd20325..d94dbd8 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -230,6 +230,11 @@ sna_video_textured_put_image(ScrnInfoPtr scrn,
 	     drw_x, drw_y, drw_w, drw_h,
 	     id, width, height, sync));
 
+	DBG(("%s: region %d:(%d, %d), (%d, %d)\n", __FUNCTION__,
+	     RegionNumRects(clip),
+	     clip->extents.x1, clip->extents.y1,
+	     clip->extents.x2, clip->extents.y2));
+
 	if (buf == 0) {
 		DBG(("%s: garbage video buffer\n", __FUNCTION__));
 		return BadAlloc;

commit f09aa788d79d36688bcfdd3b49b92367590c5f16
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Apr 2 10:01:21 2013 +0100

    DRI2GetMSC: Do not send a bogus ust for when the drawable is not displayed
    
    According to the opengl glx_sync_control spec, the Unadjusted System Time
    (or UST) is a 64-bit monotonically increasing counter that is available
    throughout the system:
    http://www.opengl.org/registry/specs/OML/glx_sync_control.txt
    
    Therefore, sending 0, even in this corner case, is out of spec. However,
    we cannot just return FALSE here as that triggers a BadDrawable error to
    be sent, and as is often the case mishandled, to the client. This results
    in a certain compositor terminating, for example.
    
    As an alternative we can use the monotonic system timestamp which in
    theory should also be monotonic with the previous and subsequent vblank
    times.
    
    Based on a patch by Daniel Kurtz.
    
    Reported-by: Daniel Kurtz <djkurtz@chromium.org>
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_dri.c b/src/intel_dri.c
index f351203..8f27921 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -1326,6 +1326,16 @@ blit_fallback:
 	return TRUE;
 }
 
+static uint64_t gettime_us(void)
+{
+	struct timespec tv;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &tv))
+		return 0;
+
+	return (uint64_t)tv.tv_sec * 1000000 + tv.tv_nsec / 1000;
+}
+
 /*
  * Get current frame count and frame count timestamp, based on drawable's
  * crtc.
@@ -1339,9 +1349,9 @@ I830DRI2GetMSC(DrawablePtr draw, CARD64 *ust, CARD64 *msc)
 	drmVBlank vbl;
 	int ret, pipe = I830DRI2DrawablePipe(draw);
 
-	/* Drawable not displayed, make up a value */
+	/* Drawable not displayed, make up a *monotonic* value */
 	if (pipe == -1) {
-		*ust = 0;
+		*ust = gettime_us();
 		*msc = 0;
 		return TRUE;
 	}
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 0962e25..5fb1662 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -37,6 +37,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #endif
 
 #include <errno.h>
+#include <time.h>
 #include <string.h>
 
 #include "sna.h"
@@ -2216,6 +2217,16 @@ sna_dri_async_swap(ClientPtr client, DrawablePtr draw,
 }
 #endif
 
+static uint64_t gettime_us(void)
+{
+	struct timespec tv;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &tv))
+		return 0;
+
+	return (uint64_t)tv.tv_sec * 1000000 + tv.tv_nsec / 1000;
+}
+
 /*
  * Get current frame count and frame count timestamp, based on drawable's
  * crtc.
@@ -2227,13 +2238,16 @@ sna_dri_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc)
 	drmVBlank vbl;
 	int pipe;
 
-	/* Drawable not displayed, make up a value */
-	*ust = *msc = 0;
 
 	pipe = sna_dri_get_pipe(draw);
 	DBG(("%s(pipe=%d)\n", __FUNCTION__, pipe));
-	if (pipe == -1)
+	if (pipe == -1) {
+fail:
+		/* Drawable not displayed, make up a *monotonic* value */
+		*ust = gettime_us();
+		*msc = 0;
 		return TRUE;
+	}
 
 	VG_CLEAR(vbl);
 	vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
@@ -2246,6 +2260,7 @@ sna_dri_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc)
 	} else {
 		DBG(("%s: query failed on pipe %d, ret=%d\n",
 		     __FUNCTION__, pipe, errno));
+		goto fail;
 	}
 
 	return TRUE;

commit 4af622edfc18af523e1fa9063379f68374e19b04
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Apr 1 22:44:13 2013 +0100

    sna: Try to eliminate pending operations to the bo being replaced
    
    When we are replacing a bo with fresh data, we can drop pending
    operations to it and thereby reduce the complexity of the replacement.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 231dc8e..9013e68 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1732,6 +1732,23 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
 	return NULL;
 }
 
+void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo)
+{
+	if (kgem->nexec != 1 || bo->exec == NULL)
+		return;
+
+	DBG(("%s: only handle in batch, discarding last operations\n",
+	     __FUNCTION__));
+
+	assert(bo->exec == &kgem->exec[0]);
+	assert(kgem->exec[0].handle == bo->handle);
+	assert(RQ(bo->rq) == kgem->next_request);
+
+	bo->refcnt++;
+	kgem_reset(kgem);
+	bo->refcnt--;
+}
+
 static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 {
 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
@@ -1782,16 +1799,8 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	assert(bo->io == false);
 	assert(bo->scanout == false);
 
-	if (bo->exec && kgem->nexec == 1) {
-		DBG(("%s: only handle in batch, discarding last operations\n",
-		     __FUNCTION__));
-		assert(bo->exec == &kgem->exec[0]);
-		assert(kgem->exec[0].handle == bo->handle);
-		assert(RQ(bo->rq) == kgem->next_request);
-		bo->refcnt = 1;
-		kgem_reset(kgem);
-		bo->refcnt = 0;
-	}
+	kgem_bo_undo(kgem, bo);
+	assert(bo->refcnt == 0);
 
 	if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
 		__kgem_bo_clear_busy(bo);
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 82f9b52..f2b1c98 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -573,6 +573,8 @@ static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
 	return bo->snoop;
 }
 
+void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo);
+
 bool __kgem_busy(struct kgem *kgem, int handle);
 
 static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring)
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 540f3a6..14c0d8c 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -1362,38 +1362,25 @@ bool sna_replace(struct sna *sna,
 {
 	struct kgem_bo *bo = *_bo;
 	struct kgem *kgem = &sna->kgem;
-	bool busy;
 	void *dst;
 
-	busy = __kgem_bo_is_busy(kgem, bo);
 	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d) busy?=%d\n",
 	     __FUNCTION__, bo->handle,
 	     pixmap->drawable.width,
 	     pixmap->drawable.height,
 	     pixmap->drawable.bitsPerPixel,
-	     bo->tiling, busy));
+	     bo->tiling,
+	     __kgem_bo_is_busy(kgem, bo)));
 
 	assert(!sna_pixmap(pixmap)->pinned);
 
-	if (!busy && upload_inplace__tiled(kgem, bo)) {
-		BoxRec box;
+	kgem_bo_undo(kgem, bo);
 
-		box.x1 = box.y1 = 0;
-		box.x2 = pixmap->drawable.width;
-		box.y2 = pixmap->drawable.height;
+	if (__kgem_bo_is_busy(kgem, bo)) {
+		struct kgem_bo *new_bo;
 
-		if (write_boxes_inplace__tiled(kgem, src,
-					       stride, pixmap->drawable.bitsPerPixel, 0, 0,
-					       bo, 0, 0, &box, 1))
+		if (indirect_replace(sna, pixmap, bo, src, stride))
 			return true;
-	}
-
-	if ((busy || !kgem_bo_can_map(kgem, bo)) &&
-	    indirect_replace(sna, pixmap, bo, src, stride))
-		return true;
-
-	if (busy) {
-		struct kgem_bo *new_bo;
 
 		new_bo = kgem_create_2d(kgem,
 					pixmap->drawable.width,

commit ef0038d358e613381e03c077e06a87fc49108d87
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Apr 1 22:43:48 2013 +0100

    sna: Allow the compiler to inline memcpy for the bitblt routines
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/blt.c b/src/sna/blt.c
index 4735d14..af87667 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -138,7 +138,7 @@ xmm_save_128(__m128i *dst, __m128i data)
 }
 #endif
 
-void
+fast_memcpy void
 memcpy_blt(const void *src, void *dst, int bpp,
 	   int32_t src_stride, int32_t dst_stride,
 	   int16_t src_x, int16_t src_y,
@@ -213,7 +213,7 @@ memcpy_blt(const void *src, void *dst, int bpp,
 	}
 }
 
-void
+fast_memcpy void
 memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
 		  int32_t src_stride, int32_t dst_stride,
 		  int16_t src_x, int16_t src_y,
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index b5c9ac2..62f51f0 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -63,6 +63,12 @@
 #define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse")))
 #endif
 
+#if HAS_GCC(4, 5) && defined(__OPTIMIZE__)
+#define fast_memcpy __attribute__((target("inline-all-stringops")))
+#else
+#define fast_memcpy
+#endif
+
 #ifdef HAVE_VALGRIND
 #define VG(x) x
 #else

commit 43181692f752f0a552d2e2c76d8379fe16e521cf
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Mar 28 15:41:38 2013 +0000

    sna/gen7: Refine is_gt2() for Haswell versus Ivybridge
    
    The two similar chipsets do not use the same PCI-ID encoding schema.
    
    Fixes regression from
    commit 235a3981ea9759317b392302a2b2b8f4fafab410
    Author: Chris Wilson <chris@chris-wilson.co.uk>
    Date:   Tue Mar 26 20:37:14 2013 +0000
    
        sna/gen7: Use GT2 values for GT2 variants
    
    Reported-by: zaverel@free.fr
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index e3f80d4..74b0cba 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -3687,7 +3687,7 @@ static void gen7_render_fini(struct sna *sna)
 
 static bool is_gt2(struct sna *sna)
 {
-	return DEVICE_ID(sna->PciInfo) & 0x30;
+	return DEVICE_ID(sna->PciInfo) & (sna->kgem.gen == 075 ? 0x30 : 0x20);
 }
 
 static bool is_mobile(struct sna *sna)

commit 96c10bdff95a3f8a68c6623446655c4c3dbf738a
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Mar 27 22:10:37 2013 +0000

    sna/gen7: Resist the temptation to overprogram the number of PS threads for HSW
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 773d2f3..e3f80d4 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -125,10 +125,10 @@ static const struct gt_info hsw_gt1_info = {
 };
 
 static const struct gt_info hsw_gt2_info = {
-	.max_vs_threads = 280,
-	.max_gs_threads = 280,
+	.max_vs_threads = 140,
+	.max_gs_threads = 140,
 	.max_wm_threads =
-		(204 - 1) << HSW_PS_MAX_THREADS_SHIFT |
+		(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
 		1 << HSW_PS_SAMPLE_MASK_SHIFT,
 	.urb = { 256, 1664, 640 },
 };

commit 19dfa72c28c6dc677dbfec3a538d4481985195e5
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Mar 27 16:56:10 2013 +0000

    sna/gen4+: Set read-write allocation mode for the target render cache
    
    As we often first clear the destination before performing a blend, we
    get a performance boost if that first write populates the render cache.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index e40a1b7..1bf5ad2 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -502,7 +502,7 @@ gen4_bind_bo(struct sna *sna,
 	assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
 
 	/* After the first bind, we manage the cache domains within the batch */
-	offset = kgem_bo_get_binding(bo, format);
+	offset = kgem_bo_get_binding(bo, format | is_dst << 31);
 	if (offset) {
 		if (is_dst)
 			kgem_bo_mark_dirty(bo);
@@ -517,9 +517,10 @@ gen4_bind_bo(struct sna *sna,
 		 GEN4_SURFACE_BLEND_ENABLED |
 		 format << GEN4_SURFACE_FORMAT_SHIFT);
 
-	if (is_dst)
+	if (is_dst) {
+		ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
 		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
-	else
+	} else
 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
 	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
@@ -530,7 +531,7 @@ gen4_bind_bo(struct sna *sna,
 	ss[4] = 0;
 	ss[5] = 0;
 
-	kgem_bo_set_binding(bo, format, offset);
+	kgem_bo_set_binding(bo, format | is_dst << 31, offset);
 
 	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
 	     offset, bo->handle, ss[1],
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 8b50d22..7038444 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -490,7 +490,7 @@ gen5_bind_bo(struct sna *sna,
 
 	/* After the first bind, we manage the cache domains within the batch */
 	if (!DBG_NO_SURFACE_CACHE) {
-		offset = kgem_bo_get_binding(bo, format);
+		offset = kgem_bo_get_binding(bo, format | is_dst << 31);
 		if (offset) {
 			if (is_dst)
 				kgem_bo_mark_dirty(bo);
@@ -506,9 +506,10 @@ gen5_bind_bo(struct sna *sna,
 		 GEN5_SURFACE_BLEND_ENABLED |
 		 format << GEN5_SURFACE_FORMAT_SHIFT);
 
-	if (is_dst)
+	if (is_dst) {
+		ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
 		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
-	else
+	} else
 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
 	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
@@ -519,7 +520,7 @@ gen5_bind_bo(struct sna *sna,
 	ss[4] = 0;
 	ss[5] = 0;
 
-	kgem_bo_set_binding(bo, format, offset);
+	kgem_bo_set_binding(bo, format | is_dst << 31, offset);
 
 	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
 	     offset, bo->handle, ss[1],
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 64eccc5..8101faf 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1035,7 +1035,7 @@ gen6_bind_bo(struct sna *sna,
 	uint32_t is_scanout = is_dst && bo->scanout;
 
 	/* After the first bind, we manage the cache domains within the batch */
-	offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
+	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
 	if (offset) {
 		DBG(("[%x]  bo(handle=%d), format=%d, reuse %s binding\n",
 		     offset, bo->handle, format,
@@ -1051,9 +1051,10 @@ gen6_bind_bo(struct sna *sna,
 	ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
 		 GEN6_SURFACE_BLEND_ENABLED |
 		 format << GEN6_SURFACE_FORMAT_SHIFT);
-	if (is_dst)
+	if (is_dst) {
+		ss[0] |= GEN6_SURFACE_RC_READ_WRITE;
 		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
-	else
+	} else
 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
 	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 	ss[2] = ((width - 1)  << GEN6_SURFACE_WIDTH_SHIFT |
@@ -1064,7 +1065,7 @@ gen6_bind_bo(struct sna *sna,
 	ss[4] = 0;
 	ss[5] = is_scanout ? 0 : 3 << 16;
 
-	kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
+	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
 
 	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
 	     offset, bo->handle, ss[1],

commit d9b8c2039d1be17af8c56364341fc3e10795f200
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Mar 27 14:49:15 2013 +0000

    sna/gen7: Fix MOCS for Haswell
    
    The memory attributes changed slightly, and in particular there is now
    an explicit uncached setting - which of course happened to be the value
    currently selected.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 9e40860..773d2f3 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1190,7 +1190,7 @@ gen7_bind_bo(struct sna *sna,
 	COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
 
 	/* After the first bind, we manage the cache domains within the batch */
-	offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
+	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
 	if (offset) {
 		if (is_dst)
 			kgem_bo_mark_dirty(bo);


Reply to: