[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'debian-unstable'



 configure.ac                                                 |    3 
 debian/changelog                                             |    8 
 debian/patches/fix-fd-leak-when-intel-scrn-create-fails.diff |  117 
 debian/patches/series                                        |    1 
 src/sna/Makefile.am                                          |    2 
 src/sna/blt.c                                                |  693 +
 src/sna/compiler.h                                           |    9 
 src/sna/gen6_common.h                                        |   11 
 src/sna/gen7_render.c                                        |   34 
 src/sna/gen8_render.c                                        |   40 
 src/sna/gen8_render.h                                        |    1 
 src/sna/gen9_render.c                                        | 4101 +++++++++++
 src/sna/gen9_render.h                                        | 1130 +++
 src/sna/kgem.c                                               |    4 
 src/sna/kgem.h                                               |   23 
 src/sna/sna.h                                                |    9 
 src/sna/sna_accel.c                                          |    5 
 src/sna/sna_blt.c                                            |   19 
 src/sna/sna_display.c                                        |  295 
 src/sna/sna_dri2.c                                           |   64 
 src/sna/sna_dri3.c                                           |    1 
 src/sna/sna_present.c                                        |  152 
 src/sna/sna_render.c                                         |   79 
 src/sna/sna_render.h                                         |   58 
 src/sna/sna_video_sprite.c                                   |   65 
 src/uxa/i830_reg.h                                           |    6 
 src/uxa/intel.h                                              |    1 
 src/uxa/intel_batchbuffer.c                                  |   11 
 src/uxa/intel_batchbuffer.h                                  |   19 
 src/uxa/intel_present.c                                      |    7 
 src/uxa/intel_uxa.c                                          |   29 
 test/Makefile.am                                             |    6 
 test/dri2-race.c                                             |   30 
 test/xvidmode.c                                              |   54 
 34 files changed, 6778 insertions(+), 309 deletions(-)

New commits:
commit a1ca6219097af9cefd7561d7cb20349ac4314d6b
Author: Timo Aaltonen <tjaalton@debian.org>
Date:   Tue May 24 11:40:18 2016 +0300

    upload to unstable

diff --git a/debian/changelog b/debian/changelog
index c3c282b..98ceac2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,9 +1,10 @@
-xserver-xorg-video-intel (2:2.99.917+git20160325-2) UNRELEASED; urgency=medium
+xserver-xorg-video-intel (2:2.99.917+git20160522-1) unstable; urgency=medium
 
+  * New upstream snapshot. (Closes: #823116)
   * fix-fd-leak-when-intel-scrn-create-fails.diff: Fix a failure falling back
     on another driver when scrn create fails.
 
- -- Timo Aaltonen <tjaalton@debian.org>  Fri, 22 Apr 2016 15:58:10 +0300
+ -- Timo Aaltonen <tjaalton@debian.org>  Tue, 24 May 2016 11:40:02 +0300
 
 xserver-xorg-video-intel (2:2.99.917+git20160325-1) unstable; urgency=medium
 

commit 8477615ae1bd284aca1221185ffefe0630d3f7ab
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun May 22 14:18:20 2016 +0100

    sna: Allow disconnected outputs to retain state without EDID checks
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 79c660f..4ddb20e 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -5056,6 +5056,9 @@ output_check_status(struct sna *sna, struct sna_output *output)
 	if (output->status != status)
 		return false;
 
+	if (status != XF86OutputStatusConnected)
+		return true;
+
 	if (output->edid_len == 0)
 		return false;
 

commit 25d2c2d049a8c9f2b2ef0895d6079c8b273ad121
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed May 18 23:26:12 2016 +0100

    sna: Confirm the EDID is the same after a hotplug before ignoring
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 16d0321..79c660f 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -5021,19 +5021,25 @@ output_check_status(struct sna *sna, struct sna_output *output)
 {
 	union compat_mode_get_connector compat_conn;
 	struct drm_mode_modeinfo dummy;
+	struct drm_mode_get_blob blob;
 	xf86OutputStatus status;
+	char *edid;
 
 	VG_CLEAR(compat_conn);
 
+	compat_conn.conn.connection = -1;
 	compat_conn.conn.connector_id = output->id;
 	compat_conn.conn.count_modes = 1; /* skip detect */
 	compat_conn.conn.modes_ptr = (uintptr_t)&dummy;
 	compat_conn.conn.count_encoders = 0;
-	compat_conn.conn.count_props = 0;
+	compat_conn.conn.props_ptr = (uintptr_t)output->prop_ids;
+	compat_conn.conn.prop_values_ptr = (uintptr_t)output->prop_values;
+	compat_conn.conn.count_props = output->num_props;
 
-	(void)drmIoctl(sna->kgem.fd,
-		       DRM_IOCTL_MODE_GETCONNECTOR,
-		       &compat_conn.conn);
+	if (drmIoctl(sna->kgem.fd,
+		     DRM_IOCTL_MODE_GETCONNECTOR,
+		     &compat_conn.conn) == 0)
+		output->update_properties = false;
 
 	switch (compat_conn.conn.connection) {
 	case DRM_MODE_CONNECTED:
@@ -5047,7 +5053,25 @@ output_check_status(struct sna *sna, struct sna_output *output)
 		status = XF86OutputStatusUnknown;
 		break;
 	}
-	return output->status == status;
+	if (output->status != status)
+		return false;
+
+	if (output->edid_len == 0)
+		return false;
+
+	edid = alloca(output->edid_len);
+
+	VG_CLEAR(blob);
+	blob.blob_id = output->prop_values[output->edid_idx];
+	blob.length = output->edid_len;
+	blob.data = (uintptr_t)edid;
+	if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob))
+		return false;
+
+	if (blob.length != output->edid_len)
+		return false;
+
+	return memcmp(edid, output->edid_raw, output->edid_len) == 0;
 }
 
 void sna_mode_discover(struct sna *sna, bool tell)

commit a508b11bde9f3119b49b3e0f652587efb9e037af
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue May 17 07:55:03 2016 +0100

    sna: Don't skip migration-to-GPU for TearFree
    
    In 46caee86db0f ("sna: Fix reporting of errno after setcrtc failure"),
    the intention was to avoid reporting a fail to migrate whilst wedged for
    a simple copy from the frontbuffer to TearFree's shadow buffer. However,
    by skipping the migration, we never flushed any dirt from the CPU buffer
    prior to doing the TearFree flip.
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=95401#c7
    References: https://bugs.freedesktop.org/show_bug.cgi?id=95414#c4
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 7976171..16d0321 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -8519,11 +8519,9 @@ static bool move_crtc_to_gpu(struct sna *sna)
 	xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn);
 	int i;
 
-	if (sna->flags & SNA_TEAR_FREE)
-		return true;
-
 	for (i = 0; i < sna->mode.num_real_crtc; i++) {
 		struct sna_crtc *crtc = to_sna_crtc(config->crtc[i]);
+		unsigned hint;
 
 		assert(crtc);
 
@@ -8539,10 +8537,13 @@ static bool move_crtc_to_gpu(struct sna *sna)
 		if (crtc->shadow_bo)
 			continue;
 
+		hint = MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT;
+		if (sna->flags & SNA_TEAR_FREE)
+			hint |= __MOVE_FORCE;
+
 		DBG(("%s: CRTC %d [pipe=%d] requires frontbuffer\n",
 		     __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc)));
-		return sna_pixmap_move_to_gpu(sna->front,
-					      MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT);
+		return sna_pixmap_move_to_gpu(sna->front, hint);
 	}
 
 	return true;

commit 48569eb18d125e20aa817549506fc4c1609829c9
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue May 17 07:45:46 2016 +0100

    sna: Track the minimum damage when doing CRTC-local TearFree
    
    We avoid having to redraw the entire CRTC's buffer on every flip as we
    know the contents from the previous flip are still available and only
    need to invalidate the dirty region.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index d01e6a4..7976171 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -210,6 +210,7 @@ struct sna_crtc {
 
 	struct pict_f_transform cursor_to_fb, fb_to_cursor;
 
+	RegionRec crtc_damage;
 	uint16_t shadow_bo_width, shadow_bo_height;
 
 	uint32_t rotation;
@@ -2749,6 +2750,7 @@ sna_crtc_damage(xf86CrtcPtr crtc)
 	assert(sna->mode.shadow_damage && sna->mode.shadow_active);
 	damage = DamageRegion(sna->mode.shadow_damage);
 	RegionUnion(damage, damage, &region);
+	to_sna_crtc(crtc)->crtc_damage = region;
 
 	DBG(("%s: damage now %dx[(%d, %d), (%d, %d)]\n",
 	     __FUNCTION__,
@@ -8693,23 +8695,28 @@ void sna_mode_redisplay(struct sna *sna)
 		sigio = sigio_block();
 		if (!box_empty(&damage.extents)) {
 			if (sna->flags & SNA_TEAR_FREE) {
+				RegionRec new_damage;
 				struct drm_mode_crtc_page_flip arg;
 				struct kgem_bo *bo;
 
-				RegionUninit(&damage);
-				damage.extents = crtc->bounds;
-				damage.data = NULL;
+				RegionNull(&new_damage);
+				RegionCopy(&new_damage, &damage);
 
 				bo = sna_crtc->cache_bo;
-				if (bo == NULL)
+				if (bo == NULL) {
+					damage.extents = crtc->bounds;
+					damage.data = NULL;
 					bo = kgem_create_2d(&sna->kgem,
 							    crtc->mode.HDisplay,
 							    crtc->mode.VDisplay,
 							    crtc->scrn->bitsPerPixel,
 							    sna_crtc->bo->tiling,
 							    CREATE_SCANOUT);
-				if (bo == NULL)
-					continue;
+					if (bo == NULL)
+						continue;
+				} else
+					RegionUnion(&damage, &damage, &sna_crtc->crtc_damage);
+				sna_crtc->crtc_damage = new_damage;
 
 				sna_crtc_redisplay(crtc, &damage, bo);
 				kgem_bo_submit(&sna->kgem, bo);

commit ab3ab412a459a95e94707a4e39a18790ee04e6e3
Author: Chris Bainbridge <chris.bainbridge@gmail.com>
Date:   Tue May 17 07:26:10 2016 +0100

    sna: Fix increment of modeset serial after applying CRTC
    
    We track how many mode sets have been made in order to detect stale
    flips (i.e. a sequence that crosses a mode change). This was broken by
    the logic inversion in setcrc in 46caee86db0f ("sna: Fix reporting of
    errno after setcrtc failure")
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=95401
    Signed-off-by: Chris Bainbridge <chris.bainbridge@gmail.com>
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 867a10b..d01e6a4 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -1447,9 +1447,12 @@ sna_crtc_apply(xf86CrtcPtr crtc)
 	ret = 0;
 	if (unlikely(drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg))) {
 		ret = errno;
-		sna_crtc->mode_serial++;
-		sna_crtc_force_outputs_on(crtc);
+		goto unblock;
 	}
+
+	sna_crtc->mode_serial++;
+	sna_crtc_force_outputs_on(crtc);
+
 unblock:
 	kmsg_close(&kmsg, ret);
 	sigio_unblock(sigio);

commit 34f63f28c8f39e1d64cd159482f86cacbbff05b4
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon May 16 10:14:04 2016 +0100

    sna: Ensure we only cache the local CRTC scanout buffer
    
    Double check that we are not about to cache the common, untransformed,
    shadow buffer for the per-CRTC transformed buffer.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 080efbe..867a10b 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -8777,9 +8777,11 @@ disable1:
 				sna_crtc->flip_serial = sna_crtc->mode_serial;
 				sna_crtc->flip_pending = true;
 
-				assert_scanout(&sna->kgem, sna_crtc->bo,
-					       crtc->mode.HDisplay, crtc->mode.VDisplay);
-				sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo);
+				if (sna_crtc->bo != sna->mode.shadow) {
+					assert_scanout(&sna->kgem, sna_crtc->bo,
+						       crtc->mode.HDisplay, crtc->mode.VDisplay);
+					sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo);
+				}
 				DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n",
 				     __FUNCTION__, __sna_crtc_id(sna_crtc), sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial));
 			} else {

commit 3a7d6afd85f85b8b10bf0c08b7b5fa5265624850
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat May 14 16:13:33 2016 +0100

    sna: Do not cache the current crtc bo after performing the setcrtc
    
    When doing a SETCRTC as a fallack for a failed pageflip, do not use the
    then current CRTC bo as the next bo for pageflipping - as then we will
    render into it prior to flipping and so cause tearing.
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=95401
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 2c6059d..080efbe 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -8734,7 +8734,6 @@ void sna_mode_redisplay(struct sna *sna)
 
 						sna_crtc->bo = kgem_bo_reference(bo);
 						sna_crtc->bo->active_scanout++;
-						sna_crtc->cache_bo = kgem_bo_reference(bo);
 					} else {
 						BoxRec box;
 						DrawableRec tmp;
@@ -8762,10 +8761,10 @@ disable1:
 								   __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc));
 							sna_crtc_disable(crtc, false);
 						}
-
-						kgem_bo_destroy(&sna->kgem, bo);
-						sna_crtc->cache_bo = NULL;
 					}
+
+					kgem_bo_destroy(&sna->kgem, bo);
+					sna_crtc->cache_bo = NULL;
 					continue;
 				}
 				sna->mode.flip_active++;
@@ -8778,6 +8777,8 @@ disable1:
 				sna_crtc->flip_serial = sna_crtc->mode_serial;
 				sna_crtc->flip_pending = true;
 
+				assert_scanout(&sna->kgem, sna_crtc->bo,
+					       crtc->mode.HDisplay, crtc->mode.VDisplay);
 				sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo);
 				DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n",
 				     __FUNCTION__, __sna_crtc_id(sna_crtc), sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial));

commit f71447998cc8e22570cd5641bcf008cb68e9f4a3
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat May 14 09:00:08 2016 +0100

    sna/dri2: Refactor open-coded __kgem_bo_is_busy
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c
index ca9251a..59877e9 100644
--- a/src/sna/sna_dri2.c
+++ b/src/sna/sna_dri2.c
@@ -2553,22 +2553,15 @@ static inline bool rq_is_busy(struct kgem *kgem, struct kgem_bo *bo)
 	if (bo == NULL)
 		return false;
 
-	DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
-	     bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
-	assert(bo->refcnt);
-
-	if (bo->exec)
-		return true;
-
-	if (bo->rq == NULL)
-		return false;
-
-	return __kgem_busy(kgem, bo->handle);
+	return __kgem_bo_is_busy(kgem, bo);
 }
 
 static bool sna_dri2_blit_complete(struct sna_dri2_event *info)
 {
-	if (rq_is_busy(&info->sna->kgem, info->bo)) {
+	if (!info->bo)
+		return true;
+
+	if (__kgem_bo_is_busy(&info->sna->kgem, info->bo)) {
 		DBG(("%s: vsync'ed blit is still busy, postponing\n",
 		     __FUNCTION__));
 		if (sna_next_vblank(info))
@@ -2578,10 +2571,9 @@ static bool sna_dri2_blit_complete(struct sna_dri2_event *info)
 	}
 
 	DBG(("%s: blit finished\n", __FUNCTION__));
-	if (info->bo) {
-		kgem_bo_destroy(&info->sna->kgem, info->bo);
-		info->bo = NULL;
-	}
+	kgem_bo_destroy(&info->sna->kgem, info->bo);
+	info->bo = NULL;
+
 	return true;
 }
 

commit 3c95efe5f7989d95153f527eb7d2946d3bbc2af1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat May 14 08:49:46 2016 +0100

    sna/dri2: Force blocking wait if vblank queue fails
    
    Whilst waiting for the previous blit to complete, if we fail to queue
    the vblank to wake up on the next frame, block before replying the blit
    is complete.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c
index 4ffa7c3..ca9251a 100644
--- a/src/sna/sna_dri2.c
+++ b/src/sna/sna_dri2.c
@@ -2573,6 +2573,8 @@ static bool sna_dri2_blit_complete(struct sna_dri2_event *info)
 		     __FUNCTION__));
 		if (sna_next_vblank(info))
 			return false;
+
+		kgem_bo_sync__gtt(&info->sna->kgem, info->bo);
 	}
 
 	DBG(("%s: blit finished\n", __FUNCTION__));

commit 1486cfdf04b070787074493a30cd698455b016fe
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri May 13 22:53:26 2016 +0100

    sna/gen6+: Don't force a switch to BLT if the target bo cannot be blitted
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen6_common.h b/src/sna/gen6_common.h
index 83adc7d..b53ec0c 100644
--- a/src/sna/gen6_common.h
+++ b/src/sna/gen6_common.h
@@ -52,6 +52,9 @@ inline static bool can_switch_to_blt(struct sna *sna,
 	if (bo && bo->tiling == I915_TILING_Y)
 		return false;
 
+	if (bo && !kgem_bo_can_blt(&sna->kgem, bo))
+		return false;
+
 	if (sna->render_state.gt < 2)
 		return true;
 

commit 512284fd47bc225236e403920647703ea4842666
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon May 9 21:11:13 2016 +0100

    sna/dri2: Enforce swap-limits on stale buffers
    
    If the client sends an out-of-date swap request, first make sure that we
    don't cause an error by chasing a NULL CRTC and secondly force them to
    wait for a whole vblank before the next swap.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c
index d3fe18b..4ffa7c3 100644
--- a/src/sna/sna_dri2.c
+++ b/src/sna/sna_dri2.c
@@ -1617,6 +1617,9 @@ static void fake_swap_complete(struct sna *sna, ClientPtr client,
 
 	assert(draw);
 
+	if (crtc == NULL)
+		crtc = sna_primary_crtc(sna);
+
 	swap = sna_crtc_last_swap(crtc);
 	DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc %lld], tv=%d.%06d\n",
 	     __FUNCTION__, type, (long)draw->id, crtc ? sna_crtc_pipe(crtc) : -1,
@@ -3340,7 +3343,7 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 		     __FUNCTION__,
 		     get_private(front)->pixmap->drawable.serialNumber,
 		     get_drawable_pixmap(draw)->drawable.serialNumber));
-		goto fake;
+		goto skip;
 	}
 
 	if (get_private(back)->stale) {
@@ -3484,7 +3487,7 @@ skip:
 		if (!sna_next_vblank(info))
 			goto fake;
 
-		swap_limit(draw, 2);
+		swap_limit(draw, 1);
 	} else {
 fake:
 		/* XXX Use a Timer to throttle the client? */

commit 88733a7874f7c9b45da5d612802947a9de12893a
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat May 7 15:36:53 2016 +0100

    sna/dri2: Force consideration of the DRI2CopyRegion source as unclean
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c
index bb7070e..d3fe18b 100644
--- a/src/sna/sna_dri2.c
+++ b/src/sna/sna_dri2.c
@@ -1246,6 +1246,7 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region,
 
 	src_bo = src_priv->bo;
 	assert(src_bo->refcnt);
+	kgem_bo_unclean(&sna->kgem, src_bo);
 	if (is_front(src->attachment)) {
 		struct sna_pixmap *priv;
 

commit 08865b0af288e0460c38c2e3ca20a7f9d0311f27
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat May 7 15:24:28 2016 +0100

    sna: Add a special case for fast DRI2CopyRegion and NoAccel
    
    Enable copying onto a scanout buffer using a WC mmap - so long as it is
    X-tiled and no swizzling.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/blt.c b/src/sna/blt.c
index eced971..ab7bd22 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -349,6 +349,71 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
 	}
 }
 
+static fast_memcpy void
+memcpy_between_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
+				  int32_t src_stride, int32_t dst_stride,
+				  int16_t src_x, int16_t src_y,
+				  int16_t dst_x, int16_t dst_y,
+				  uint16_t width, uint16_t height)
+{
+	const unsigned tile_width = 512;
+	const unsigned tile_height = 8;
+	const unsigned tile_size = 4096;
+
+	const unsigned cpp = bpp / 8;
+	const unsigned tile_pixels = tile_width / cpp;
+	const unsigned tile_shift = ffs(tile_pixels) - 1;
+	const unsigned tile_mask = tile_pixels - 1;
+
+	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+	assert(src != dst);
+	assert((dst_x & tile_mask) == (src_x & tile_mask));
+
+	while (height--) {
+		unsigned w = width * cpp;
+		uint8_t *dst_row = dst;
+		const uint8_t *src_row = src;
+
+		dst_row += dst_y / tile_height * dst_stride * tile_height;
+		dst_row += (dst_y & (tile_height-1)) * tile_width;
+		if (dst_x)
+			dst_row += (dst_x >> tile_shift) * tile_size;
+		dst_y++;
+
+		src_row += src_y / tile_height * src_stride * tile_height;
+		src_row += (src_y & (tile_height-1)) * tile_width;
+		if (src_x)
+			src_row += (src_x >> tile_shift) * tile_size;
+		src_y++;
+
+		if (dst_x & tile_mask) {
+			const unsigned x = (dst_x & tile_mask) * cpp;
+			const unsigned len = min(tile_width - x, w);
+
+			memcpy(assume_misaligned(dst_row + x, tile_width, x),
+			       assume_misaligned(src_row + x, tile_width, x),
+			       len);
+
+			dst_row += tile_size;
+			src_row += tile_size;
+			w -= len;
+		}
+
+		while (w >= tile_width) {
+			memcpy(assume_aligned(dst_row, tile_width),
+			       assume_aligned(src_row, tile_width),
+			       tile_width);
+			dst_row += tile_size;
+			src_row += tile_size;
+			w -= tile_width;
+		}
+		memcpy(assume_aligned(dst_row, tile_width),
+		       assume_aligned(src_row, tile_width),
+		       w);
+	}
+}
+
 #if defined(sse2) && defined(__x86_64__)
 
 sse2 static force_inline void
@@ -461,7 +526,7 @@ sse2 static void to_memcpy(uint8_t *dst, const uint8_t *src, unsigned len)
 	while (len >= 64) {
 		to_sse64(dst, src);
 		dst += 64;
-		src = (const uint8_t *)src + 64;
+		src += 64;
 		len -= 64;
 	}
 	if (len == 0)
@@ -470,22 +535,22 @@ sse2 static void to_memcpy(uint8_t *dst, const uint8_t *src, unsigned len)
 	if (len & 32) {
 		to_sse32(dst, src);
 		dst += 32;
-		src = (const uint8_t *)src + 32;
+		src += 32;
 	}
 	if (len & 16) {
 		to_sse16(dst, src);
 		dst += 16;
-		src = (const uint8_t *)src + 16;
+		src += 16;
 	}
 	if (len & 8) {
 		*(uint64_t *)dst = *(uint64_t *)src;
 		dst += 8;
-		src = (const uint8_t *)src + 8;
+		src += 8;
 	}
 	if (len & 4) {
 		*(uint32_t *)dst = *(uint32_t *)src;
 		dst += 4;
-		src = (const uint8_t *)src + 4;
+		src += 4;
 	}
 	memcpy(dst, src, len & 3);
 }
@@ -820,6 +885,86 @@ memcpy_from_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp,
 	}
 }
 
+sse2 static fast_memcpy void
+memcpy_between_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp,
+					int32_t src_stride, int32_t dst_stride,
+					int16_t src_x, int16_t src_y,
+					int16_t dst_x, int16_t dst_y,
+					uint16_t width, uint16_t height)
+{
+	const unsigned tile_width = 512;
+	const unsigned tile_height = 8;
+	const unsigned tile_size = 4096;
+
+	const unsigned cpp = bpp / 8;
+	const unsigned tile_pixels = tile_width / cpp;
+	const unsigned tile_shift = ffs(tile_pixels) - 1;
+	const unsigned tile_mask = tile_pixels - 1;
+
+	unsigned ox, lx;
+
+	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+	assert(src != dst);
+
+	width *= cpp;
+	dst_stride *= tile_height;
+	src_stride *= tile_height;
+
+	assert((dst_x & tile_mask) == (src_x & tile_mask));
+	if (dst_x & tile_mask) {
+		ox = (dst_x & tile_mask) * cpp;
+		lx = min(tile_width - ox, width);
+		assert(lx != 0);
+	} else
+		lx = 0;
+
+	if (dst_x)
+		dst = (uint8_t *)dst + (dst_x >> tile_shift) * tile_size;
+	if (src_x)
+		src = (const uint8_t *)src + (src_x >> tile_shift) * tile_size;
+
+	while (height--) {
+		const uint8_t *src_row;
+		uint8_t *dst_row;
+		unsigned w = width;
+
+		dst_row = dst;
+		dst_row += dst_y / tile_height * dst_stride;
+		dst_row += (dst_y & (tile_height-1)) * tile_width;
+		dst_y++;
+
+		src_row = src;
+		src_row += src_y / tile_height * src_stride;
+		src_row += (src_y & (tile_height-1)) * tile_width;
+		src_y++;
+
+		if (lx) {
+			to_memcpy(dst_row + ox, src_row + ox, lx);
+			dst_row += tile_size;
+			src_row += tile_size;
+			w -= lx;
+		}
+		while (w >= tile_width) {
+			assert(((uintptr_t)dst_row & (tile_width - 1)) == 0);
+			assert(((uintptr_t)src_row & (tile_width - 1)) == 0);
+			to_sse128xN(assume_aligned(dst_row, tile_width),
+				    assume_aligned(src_row, tile_width),
+				    tile_width);
+			dst_row += tile_size;
+			src_row += tile_size;
+			w -= tile_width;
+		}
+		if (w) {
+			assert(((uintptr_t)dst_row & (tile_width - 1)) == 0);
+			assert(((uintptr_t)src_row & (tile_width - 1)) == 0);
+			to_memcpy(assume_aligned(dst_row, tile_width),
+				  assume_aligned(src_row, tile_width),
+				  w);
+		}
+	}
+}
+
 #endif
 
 #define memcpy_to_tiled_x(swizzle) \
@@ -1100,11 +1245,13 @@ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling, unsigned cpu)
 		if (cpu & SSE2) {
 			kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0__sse2;
 			kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0__sse2;
+			kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0__sse2;
 		} else
 #endif
 	       	{
 			kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0;
 			kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0;
+			kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0;
 		}
 		break;
 	case I915_BIT_6_SWIZZLE_9:
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index cd07756..ded8f78 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -113,6 +113,12 @@ enum {
 	NUM_MAP_TYPES,
 };
 
+typedef void (*memcpy_box_func)(const void *src, void *dst, int bpp,
+				int32_t src_stride, int32_t dst_stride,
+				int16_t src_x, int16_t src_y,
+				int16_t dst_x, int16_t dst_y,
+				uint16_t width, uint16_t height);
+
 struct kgem {
 	unsigned wedged;
 	int fd;
@@ -212,16 +218,9 @@ struct kgem {
 	void (*retire)(struct kgem *kgem);
 	void (*expire)(struct kgem *kgem);
 
-	void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp,
-				  int32_t src_stride, int32_t dst_stride,
-				  int16_t src_x, int16_t src_y,
-				  int16_t dst_x, int16_t dst_y,
-				  uint16_t width, uint16_t height);
-	void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp,
-				    int32_t src_stride, int32_t dst_stride,
-				    int16_t src_x, int16_t src_y,
-				    int16_t dst_x, int16_t dst_y,
-				    uint16_t width, uint16_t height);
+	memcpy_box_func memcpy_to_tiled_x;
+	memcpy_box_func memcpy_from_tiled_x;
+	memcpy_box_func memcpy_between_tiled_x;
 
 	struct kgem_bo *batch_bo;
 
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 5a8df06..f8281e9 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -2298,16 +2298,22 @@ static bool can_copy_cpu(struct sna *sna,
 			 struct kgem_bo *src,
 			 struct kgem_bo *dst)
 {
-	if (src->tiling != dst->tiling)
-		return false;
+	DBG(("%s: tiling=%d:%d, pitch=%d:%d, can_map=%d:%d[%d]\n",
+	     __FUNCTION__,
+	     src->tiling, dst->tiling,
+	     src->pitch, dst->pitch,
+	     kgem_bo_can_map__cpu(&sna->kgem, src, false),
+	     kgem_bo_can_map__cpu(&sna->kgem, dst, true),
+	     sna->kgem.has_wc_mmap));
 
-	if (src->pitch != dst->pitch)
+	if (src->tiling != dst->tiling)
 		return false;
 
 	if (!kgem_bo_can_map__cpu(&sna->kgem, src, false))
 		return false;
 
-	if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true))
+	if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true) &&
+	    !sna->kgem.has_wc_mmap)
 		return false;
 
 	DBG(("%s -- yes, src handle=%d, dst handle=%d\n", __FUNCTION__, src->handle, dst->handle));
@@ -2320,8 +2326,8 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op,
 		  const DrawableRec *dst_draw, struct kgem_bo *dst_bo, int16_t dx, int16_t dy,
 		  const BoxRec *box, int n, unsigned flags)
 {
+	memcpy_box_func detile = NULL;
 	void *dst, *src;
-	bool clipped;
 
 	if (op != GXcopy)
 		return false;
@@ -2329,25 +2335,53 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op,
 	if (src_draw->depth != dst_draw->depth)
 		return false;
 
-	clipped = (n > 1 ||
-		   box->x1 + dx > 0 ||
-		   box->y1 + dy > 0 ||
-		   box->x2 + dx < dst_draw->width ||
-		   box->y2 + dy < dst_draw->height);
-
 	dst = src = NULL;
-	if (!clipped && can_copy_cpu(sna, src_bo, dst_bo)) {
-		dst = kgem_bo_map__cpu(&sna->kgem, dst_bo);
+	if (can_copy_cpu(sna, src_bo, dst_bo)) {
+		if (src_bo->pitch != dst_bo->pitch ||
+		    dx != sx || dy != sy || n > 1 ||
+		    box->x1 + dx > 0 ||
+		    box->y1 + dy > 0 ||
+		    box->x2 + dx < dst_draw->width ||
+		    box->y2 + dy < dst_draw->height) {
+			if (dx != sx) /* not implemented in memcpy yet */
+				goto use_gtt;
+
+			switch (dst_bo->tiling) {
+			default:
+			case I915_TILING_Y:
+				goto use_gtt;
+
+			case I915_TILING_X:
+				detile = sna->kgem.memcpy_between_tiled_x;
+				if (detile == NULL)
+					goto use_gtt;
+				break;
+
+			case I915_TILING_NONE:
+				break;
+			}
+		}
+
+		if (kgem_bo_can_map__cpu(&sna->kgem, dst_bo, true))
+			dst = kgem_bo_map__cpu(&sna->kgem, dst_bo);
+		else
+			dst = kgem_bo_map__wc(&sna->kgem, dst_bo);
 		src = kgem_bo_map__cpu(&sna->kgem, src_bo);
 	}
 
 	if (dst == NULL || src == NULL) {
+use_gtt:
 		dst = kgem_bo_map__gtt(&sna->kgem, dst_bo);
 		src = kgem_bo_map__gtt(&sna->kgem, src_bo);
 		if (dst == NULL || src == NULL)
 			return false;
+
+		detile = NULL;
 	} else {
-		kgem_bo_sync__cpu_full(&sna->kgem, dst_bo, true);
+		if (dst == dst_bo->map__wc)
+			kgem_bo_sync__gtt(&sna->kgem, dst_bo);
+		else
+			kgem_bo_sync__cpu_full(&sna->kgem, dst_bo, true);
 		kgem_bo_sync__cpu_full(&sna->kgem, src_bo, false);
 	}
 
@@ -2355,7 +2389,16 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op,
 	     __FUNCTION__, sx, sy, dx, dy, n));
 
 	if (sigtrap_get() == 0) {
-		do {
+		if (detile) {
+			do {
+				detile(src, dst, dst_draw->bitsPerPixel,
+				       src_bo->pitch, dst_bo->pitch,
+				       box->x1 + sx, box->y1 + sy,
+				       box->x1 + dx, box->y1 + dy,
+				       box->x2 - box->x1, box->y2 - box->y1);
+				box++;
+			} while (--n);
+		} else do {
 			memcpy_blt(src, dst, dst_draw->bitsPerPixel,
 				   src_bo->pitch, dst_bo->pitch,
 				   box->x1 + sx, box->y1 + sy,

commit b89f203b0d65b607bc906b9a1ac184ebef7b41df
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat May 7 15:20:32 2016 +0100

    sna: Do not force ping-pong migration for TearFree + NoAccel
    
    If acceleration is disabled, but we are using TearFree, then ideally we
    want to flip the shadow buffer onto the scanout. If the shadow buffer is
    already on the GPU, e.g. having been swapped in by a compositor, then we
    do not want to move it to the CPU domain only to copy it back to a new
    buffer and then flipped for a TearFree update.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 759659d..2c6059d 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -210,8 +210,6 @@ struct sna_crtc {
 
 	struct pict_f_transform cursor_to_fb, fb_to_cursor;
 
-	RegionRec client_damage; /* XXX overlap with shadow damage? */
-
 	uint16_t shadow_bo_width, shadow_bo_height;
 
 	uint32_t rotation;
@@ -1665,12 +1663,13 @@ static bool wait_for_shadow(struct sna *sna,
 		     sna->mode.shadow_region.extents.y1,
 		     sna->mode.shadow_region.extents.x2,
 		     sna->mode.shadow_region.extents.y2));
-		ret = sna->render.copy_boxes(sna, GXcopy,
-					     &pixmap->drawable, priv->gpu_bo, 0, 0,
-					     &pixmap->drawable, bo, 0, 0,
-					     region_rects(&sna->mode.shadow_region),
-					     region_num_rects(&sna->mode.shadow_region),
-					     0);
+		if (!sna->render.copy_boxes(sna, GXcopy,
+					    &pixmap->drawable, priv->gpu_bo, 0, 0,
+					    &pixmap->drawable, bo, 0, 0,
+					    region_rects(&sna->mode.shadow_region),
+					    region_num_rects(&sna->mode.shadow_region),
+					    0))
+			ERR(("%s: copy failed\n", __FUNCTION__));
 	}
 
 	if (priv->cow)
@@ -2552,7 +2551,7 @@ out_shadow:
 				return NULL;
 			}
 
-			if (sna->mode.shadow == NULL && !wedged(sna)) {
+			if (sna->mode.shadow == NULL) {
 				struct kgem_bo *shadow;
 
 				DBG(("%s: creating TearFree shadow bo\n", __FUNCTION__));
@@ -2743,7 +2742,6 @@ sna_crtc_damage(xf86CrtcPtr crtc)
 	     __FUNCTION__, sna_crtc_id(crtc),
 	     region.extents.x1, region.extents.y1,


Reply to: