[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'upstream-experimental'



 NEWS                        |   27 +
 configure.ac                |    2 
 src/intel.h                 |    2 
 src/intel_batchbuffer.c     |   41 +-
 src/intel_display.c         |    1 
 src/intel_driver.c          |    9 
 src/intel_options.c         |    2 
 src/intel_options.h         |    2 
 src/intel_uxa.c             |    2 
 src/sna/Makefile.am         |    2 
 src/sna/gen2_render.c       |  226 +++--------
 src/sna/gen3_render.c       |  127 ++----
 src/sna/gen4_render.c       |  491 ++++++------------------
 src/sna/gen4_source.c       |  179 ++++++++
 src/sna/gen4_source.h       |   22 +
 src/sna/gen4_vertex.c       |  401 +++++++++++++++----
 src/sna/gen4_vertex.h       |   27 -
 src/sna/gen5_render.c       |  328 ++++------------
 src/sna/gen5_render.h       |   63 +--
 src/sna/gen6_render.c       |  318 +++------------
 src/sna/gen7_render.c       |  324 +++------------
 src/sna/kgem.c              |  446 +++++++++++++++------
 src/sna/kgem.h              |   42 +-
 src/sna/sna.h               |   26 -
 src/sna/sna_accel.c         |  900 +++++++++++++++++++-------------------------
 src/sna/sna_blt.c           |   28 -
 src/sna/sna_damage.h        |   28 -
 src/sna/sna_display.c       |   30 +
 src/sna/sna_dri.c           |  827 +++++++++++++++++-----------------------
 src/sna/sna_driver.c        |  173 ++++----
 src/sna/sna_glyphs.c        |   53 +-
 src/sna/sna_gradient.c      |  100 +++-
 src/sna/sna_io.c            |   33 +
 src/sna/sna_render.c        |    2 
 src/sna/sna_render.h        |   18 
 src/sna/sna_render_inline.h |    9 
 src/sna/sna_trapezoids.c    |  146 ++++++-
 src/sna/sna_video.c         |    5 
 38 files changed, 2683 insertions(+), 2779 deletions(-)

New commits:
commit dbf1cfec9cd4e9efe7650f2940c92b4e51214288
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jan 16 12:20:48 2013 +0000

    2.20.18 release
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/NEWS b/NEWS
index 2e0f021..e9dd6e4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,30 @@
+Release 2.20.18 (2013-01-16)
+============================
+A bunch of miscellaneous fixes for assertion failures and various
+performance regressions when mixing new methods for offloads, along with
+a couple of improvements for rendering with gen4.
+
+ * Remove use of packed unnormalized texture coordinates on gen4/5 as
+   these GPUs do not support unnormalized coordinates in the sampler.
+
+ * Remove dependency upon x86 asm for cross-building to unsupported
+   architectures.
+   https://bugs.gentoo.org/show_bug.cgi?id=448570
+
+ * Apply damage around PRIME updates in the correct order.
+
+ * Correctly read the initial backlight level for when the user
+   overrides UXA's choice of backlight controller.
+
+ * Throttle UXA and prevent it queuing work much faster than the GPU can
+   complete it. This realised itself in impossible performance figures and
+   the entire display freezing for several seconds whlist the GPU caught
+   up. One side effect is that it also caused the DDX to consume more
+   memory than was required as it could not recycle buffers quick
+   enough, and in some cases this produces a marked improvement in
+   performance. Also note on gen2/3 this requires a new libdrm [2.4.41]
+   in order to prevent a bug causing the DDX to fallback to swrast.
+
 Release 2.20.17 (2012-12-26)
 ============================
 A minor update to prepare for co-operating with the kernel over managing
diff --git a/configure.ac b/configure.ac
index afe79ab..b3cf50f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.20.17],
+        [2.20.18],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 47caffc50b5cdd288ad868fa9a697f0d4e2d28dc
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jan 16 10:49:24 2013 +0000

    sna: Restrict upload buffers to reduce sampler TLB misses
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 368d880..f672c3e 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -691,14 +691,18 @@ fallback:
 		     sna->render.max_3d_size, sna->render.max_3d_size));
 		if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) {
 			BoxRec tile, stack[64], *clipped, *c;
-			int step;
+			int cpp, step;
 
 tile:
-			step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel,
-				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
-			while (step * step * 4 > sna->kgem.max_upload_tile_size)
+			cpp = dst->drawable.bitsPerPixel / 8;
+			step = MIN(sna->render.max_3d_size,
+				   (MAXSHORT&~63) / cpp);
+			while (step * step * cpp > sna->kgem.max_upload_tile_size)
 				step /= 2;
 
+			if (step * cpp > 4096)
+				step = 4096 / cpp;
+
 			DBG(("%s: tiling upload, using %dx%d tiles\n",
 			     __FUNCTION__, step, step));
 

commit ab36300a22222086b94857f356612106ffbeb480
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jan 16 09:17:59 2013 +0000

    sna: Correct DBG to refer to the actual tiling mode forced
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 74a7c24..98f3ec9 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3293,12 +3293,12 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 			tiling = I915_TILING_X;
 
 		if (width*bpp > (MAXSHORT-512) * 8) {
-			DBG(("%s: large pitch [%d], forcing TILING_X\n",
-			     __FUNCTION__, width*bpp/8));
 			if (tiling > 0)
 				tiling = -tiling;
 			else if (tiling == 0)
 				tiling = -I915_TILING_X;
+			DBG(("%s: large pitch [%d], forcing TILING [%d]\n",
+			     __FUNCTION__, width*bpp/8, tiling));
 		} else if (tiling && (width|height) > 8192) {
 			DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
 			     __FUNCTION__, width, height));

commit a6ecb6d31d8c543f38fca0be6b0ec82e59dcd8d2
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jan 16 09:14:40 2013 +0000

    sna: Discard the batch if we are discarding the only buffer in it
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index af5702a..74a7c24 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1738,6 +1738,18 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	assert(bo->io == false);
 	assert(bo->scanout == false);
 
+	if (bo->exec && kgem->nexec == 1) {
+		DBG(("%s: only handle in batch, discarding last operations\n",
+		     __FUNCTION__));
+		assert(bo->exec == &kgem->exec[0]);
+		assert(kgem->exec[0].handle == bo->handle);
+		assert(RQ(bo->rq) == kgem->next_request);
+		bo->refcnt = 1;
+		kgem_reset(kgem);
+		assert(bo->rq == NULL);
+		bo->refcnt = 0;
+	}
+
 	if (bo->rq) {
 		struct list *cache;
 
@@ -2397,6 +2409,8 @@ void kgem_reset(struct kgem *kgem)
 						 request);
 			list_del(&bo->request);
 
+			assert(RQ(bo->rq) == rq);
+
 			bo->binding.offset = 0;
 			bo->exec = NULL;
 			bo->target_handle = -1;
@@ -2411,8 +2425,10 @@ void kgem_reset(struct kgem *kgem)
 			}
 		}
 
-		if (kgem->next_request != &kgem->static_request)
-			free(kgem->next_request);
+		if (rq != &kgem->static_request) {
+			list_init(&rq->list);
+			__kgem_request_free(rq);
+		}
 	}
 
 	kgem->nfence = 0;

commit 26db2438e34feb8f28444bf7418869b4ecd870da
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jan 16 09:00:21 2013 +0000

    sna: Fix computation of large object sizes to prevent overflow
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index fdba699..af5702a 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1075,6 +1075,11 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	if (aperture.aper_size == 0)
 		aperture.aper_size = 64*1024*1024;
 
+	DBG(("%s: aperture size %lld, available now %lld\n",
+	     __FUNCTION__,
+	     (long long)aperture.aper_size,
+	     (long long)aperture.aper_available_size));
+
 	kgem->aperture_total = aperture.aper_size;
 	kgem->aperture_high = aperture.aper_size * 3/4;
 	kgem->aperture_low = aperture.aper_size * 1/3;
@@ -1102,7 +1107,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
 	     kgem->buffer_size, kgem->buffer_size / 1024));
 
-	kgem->max_object_size = 3 * kgem->aperture_high / 4;
+	kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
 	kgem->max_gpu_size = kgem->max_object_size;
 	if (!kgem->has_llc)
 		kgem->max_gpu_size = MAX_CACHE_SIZE;
@@ -1119,9 +1124,9 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	if (kgem->max_gpu_size > totalram / 4)
 		kgem->max_gpu_size = totalram / 4;
 
-	half_gpu_max = kgem->max_gpu_size / 2;
-	kgem->max_cpu_size = half_gpu_max;
+	kgem->max_cpu_size = kgem->max_object_size;
 
+	half_gpu_max = kgem->max_gpu_size / 2;
 	kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
 	if (kgem->max_copy_tile_size > half_gpu_max)
 		kgem->max_copy_tile_size = half_gpu_max;

commit 54c1d97d5ab325874e1c7b2639e58111d7a6b93f
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jan 16 09:00:04 2013 +0000

    sna: Add DBG for when we add the inplace hint
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 2d947a3..4bb50cd 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1906,8 +1906,11 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 
 	if (USE_INPLACE &&
 	    (flags & MOVE_READ) == 0 &&
-	    (priv->flush || box_inplace(pixmap, &region->extents)))
+	    (priv->flush || box_inplace(pixmap, &region->extents))) {
+		DBG(("%s: marking for inplace hint (%d, %d)\n",
+		     __FUNCTION__, priv->flush, box_inplace(pixmap, &region->extents)));
 		flags |= MOVE_INPLACE_HINT;
+	}
 
 	if (flags & MOVE_WHOLE_HINT)
 		return _sna_pixmap_move_to_cpu(pixmap, flags);

commit 588c5aa6bca441d7c9305fe2fcf268e89b6b617d
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 22:21:56 2013 +0000

    sna: Revert use of a separate CAN_CREATE_SMALL flag
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 2f8d696..fdba699 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1102,7 +1102,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
 	     kgem->buffer_size, kgem->buffer_size / 1024));
 
-	kgem->max_object_size = 2 * kgem->aperture_high / 3;
+	kgem->max_object_size = 3 * kgem->aperture_high / 4;
 	kgem->max_gpu_size = kgem->max_object_size;
 	if (!kgem->has_llc)
 		kgem->max_gpu_size = MAX_CACHE_SIZE;
@@ -3387,8 +3387,6 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
 				 I915_TILING_NONE, &pitch);
 	DBG(("%s: untiled size=%d\n", __FUNCTION__, size));
 	if (size > 0) {
-		if (size < 4096)
-			flags |= KGEM_CAN_CREATE_SMALL;
 		if (size <= kgem->max_cpu_size)
 			flags |= KGEM_CAN_CREATE_CPU;
 		if (size <= kgem->max_gpu_size)
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 372bfdb..d2b89f5 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -248,7 +248,6 @@ unsigned kgem_can_create_2d(struct kgem *kgem, int width, int height, int depth)
 #define KGEM_CAN_CREATE_CPU	0x2
 #define KGEM_CAN_CREATE_LARGE	0x4
 #define KGEM_CAN_CREATE_GTT	0x8
-#define KGEM_CAN_CREATE_SMALL	0x10
 
 struct kgem_bo *
 kgem_replace_bo(struct kgem *kgem,
diff --git a/src/sna/sna.h b/src/sna/sna.h
index bddeed4..112af35 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -126,7 +126,7 @@ struct sna_pixmap {
 #define PIN_SCANOUT 0x1
 #define PIN_DRI 0x2
 #define PIN_PRIME 0x4
-	uint8_t create :5;
+	uint8_t create :4;
 	uint8_t mapped :1;
 	uint8_t shm :1;
 	uint8_t clear :1;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index a168925..2d947a3 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1224,7 +1224,7 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
 
 		ptr = MAKE_STATIC_PTR(pixmap->devPrivate.ptr);
 		pad = pixmap->devKind;
-		flags |= KGEM_CAN_CREATE_SMALL;
+		flags &= ~(KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_CPU);
 	} else {
 		DBG(("%s: creating GPU pixmap %dx%d, stride=%d, flags=%x\n",
 		     __FUNCTION__, width, height, pad, flags));
diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index a329707..432201f 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -97,7 +97,7 @@ too_small(struct sna_pixmap *priv)
 	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
 		return false;
 
-	return priv->create & KGEM_CAN_CREATE_SMALL;
+	return (priv->create & KGEM_CAN_CREATE_GPU) == 0;
 }
 
 static inline bool

commit af85ffdec7047efa452d6bab3a0ee3889dd4f046
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 20:37:11 2013 +0000

    sna: Avoid serialising on an move-to-cpu for an async operation
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 0b46218..a168925 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1615,6 +1615,8 @@ skip_inplace_map:
 	}
 
 	if (priv->gpu_damage &&
+	    ((flags & MOVE_ASYNC_HINT) == 0 ||
+	     !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) &&
 	    priv->gpu_bo->tiling == I915_TILING_NONE &&
 	    sna_pixmap_move_to_gpu(pixmap, MOVE_READ)) {
 		kgem_bo_submit(&sna->kgem, priv->gpu_bo);

commit d70be85dc723168a481c1955444afd951c4817bf
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 20:16:45 2013 +0000

    sna: Assert that we never try to mix INPLACE / ASYNC hints for move-to-cpu
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index a6affcf..0b46218 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1444,13 +1444,15 @@ static inline bool use_cpu_bo_for_upload(struct sna *sna,
 
 static inline bool operate_inplace(struct sna_pixmap *priv, unsigned flags)
 {
-	if ((priv->create & KGEM_CAN_CREATE_GTT) == 0) {
-		DBG(("%s: no, not accessible via GTT\n", __FUNCTION__));
+	if ((flags & MOVE_INPLACE_HINT) == 0) {
+		DBG(("%s: no, inplace operation not suitable\n", __FUNCTION__));
 		return false;
 	}
 
-	if ((flags & MOVE_INPLACE_HINT) == 0) {
-		DBG(("%s: no, inplace operation not suitable\n", __FUNCTION__));
+	assert((flags & MOVE_ASYNC_HINT) == 0);
+
+	if ((priv->create & KGEM_CAN_CREATE_GTT) == 0) {
+		DBG(("%s: no, not accessible via GTT\n", __FUNCTION__));
 		return false;
 	}
 

commit 1287c3a24c277cb42930d8af2943b9f7b016f31d
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 18:59:15 2013 +0000

    sna: Specialise sna_get_image_blt for clears to avoid sync readback
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 27f2920..a6affcf 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -13411,6 +13411,26 @@ sna_get_image_blt(DrawablePtr drawable,
 	if (priv == NULL)
 		return false;
 
+	if (priv->clear) {
+		int w = region->extents.x2 - region->extents.x1;
+		int h = region->extents.y2 - region->extents.y1;
+
+		pitch = PixmapBytePad(w, pixmap->drawable.depth);
+		if (priv->clear_color == 0 ||
+		    pixmap->drawable.bitsPerPixel == 8) {
+			memset(dst, priv->clear_color, pitch * h);
+		} else {
+			pixman_fill((uint32_t *)dst,
+				    pitch/sizeof(uint32_t),
+				    pixmap->drawable.bitsPerPixel,
+				    0, 0,
+				    w, h,
+				    priv->clear_color);
+		}
+
+		return true;
+	}
+
 	if (!sna->kgem.has_userptr)
 		return false;
 

commit da4972eec57e662b98a7abced6338ceb8a533a48
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 18:34:07 2013 +0000

    sna/trapezoids: Avoid the multiply for an opaque source
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 1a4b109..95e7d4f 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -4417,7 +4417,8 @@ struct inplace {
 static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
 {
 	coverage = coverage * 256 / FAST_SAMPLES_XY;
-	return mul_8_8(coverage - (coverage >> 8), opacity);
+	coverage -= coverage >> 8;
+	return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
 }
 
 static void

commit 7f968c8c991cff751459939bdb42e14255f529b7
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 18:41:00 2013 +0000

    sna: Add DBG to use_shm_bo()
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index cd36c95..2269b3a 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2617,8 +2617,8 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
 	struct sna_composite_op tmp;
 	BoxRec extents;
 
-	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
-	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
+	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n",
+	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags,
 	     src_bo == dst_bo,
 	     overlaps(sna,
 		      src_bo, src_dx, src_dy,
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 2a0955a..27f2920 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4134,19 +4134,36 @@ static bool use_shm_bo(struct sna *sna,
 		       struct sna_pixmap *priv,
 		       int alu)
 {
-	if (priv == NULL || priv->cpu_bo == NULL)
+	if (priv == NULL || priv->cpu_bo == NULL) {
+		DBG(("%s: no, not attached\n", __FUNCTION__));
 		return false;
+	}
 
-	if (!priv->shm)
+	if (!priv->shm) {
+		DBG(("%s: yes, ordinary CPU bo\n", __FUNCTION__));
 		return true;
+	}
 
-	if (alu != GXcopy)
+	if (alu != GXcopy) {
+		DBG(("%s: yes, complex alu=%d\n", __FUNCTION__, alu));
+		return true;
+	}
+	if (bo->tiling) {
+		DBG(("%s:, yes, dst tiled=%d\n", __FUNCTION__, bo->tiling));
 		return true;
+	}
 
-	if (kgem_bo_is_busy(bo))
+	if (__kgem_bo_is_busy(&sna->kgem, bo)) {
+		DBG(("%s: yes, dst is busy\n", __FUNCTION__));
 		return true;
+	}
 
-	return bo->tiling || __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo);
+	if (__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
+		DBG(("%s: yes, src is busy\n", __FUNCTION__));
+		return true;
+	}
+
+	return false;
 }
 
 static void
@@ -4335,8 +4352,8 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 		if (use_shm_bo(sna, bo, src_priv, alu)) {
 			bool ret;
 
-			DBG(("%s: region overlaps CPU damage, copy from CPU bo\n",
-			     __FUNCTION__));
+			DBG(("%s: region overlaps CPU damage, copy from CPU bo (shm? %d)\n",
+			     __FUNCTION__, src_priv->shm));
 
 			assert(bo != dst_priv->cpu_bo);
 

commit af63fab5047a43716c5df875ddc50f7c877f8a83
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 18:21:11 2013 +0000

    sna: Hint that a copy from a SHM bo will likely be the last in a batch
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index ea0b786..2a0955a 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4351,7 +4351,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 			if (!sna->render.copy_boxes(sna, alu,
 						    src_pixmap, src_priv->cpu_bo, src_dx, src_dy,
 						    dst_pixmap, bo, 0, 0,
-						    box, n, 0)) {
+						    box, n, src_priv->shm ? COPY_LAST : 0)) {
 				DBG(("%s: fallback - accelerated copy boxes failed\n",
 				     __FUNCTION__));
 				goto fallback;

commit 1be436409222c00ff66c6d747487b77f1037b27a
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 18:20:29 2013 +0000

    sna: Pass the async hint for the upload into the GPU
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index fc2ef07..ea0b786 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4312,7 +4312,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 			area.y2 += src_dy;
 
 			if (!sna_pixmap_move_area_to_gpu(src_pixmap, &area,
-							 MOVE_READ))
+							 MOVE_READ | MOVE_ASYNC_HINT))
 				goto fallback;
 
 			if (!sna->render.copy_boxes(sna, alu,

commit 2113f7f440dd2f10e80f0bb3bd5cd155f7e19098
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 09:33:03 2013 +0000

    sna: Free the SHM pixmaps after b266ae6f6f
    
    Since b266ae6f6f protected the static allocations from being reaped in
    the normal course of events, we need to penetrate those defenses in
    order to finally free the SHM mappings.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 595d140..fc2ef07 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -468,14 +468,8 @@ done:
 	return priv->ptr != NULL;
 }
 
-static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
+static void __sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
 {
-	assert(priv->cpu_damage == NULL);
-	assert(list_is_empty(&priv->list));
-
-	if (IS_STATIC_PTR(priv->ptr))
-		return;
-
 	if (priv->cpu_bo) {
 		DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n",
 		     __FUNCTION__, priv->cpu_bo->handle, kgem_bo_size(priv->cpu_bo)));
@@ -489,9 +483,21 @@ static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
 			sna_accel_watch_flush(sna, -1);
 		}
 		kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
-		priv->cpu_bo = NULL;
-	} else
+	} else if (!IS_STATIC_PTR(priv->ptr))
 		free(priv->ptr);
+}
+
+static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
+{
+	assert(priv->cpu_damage == NULL);
+	assert(list_is_empty(&priv->list));
+
+	if (IS_STATIC_PTR(priv->ptr))
+		return;
+
+	__sna_pixmap_free_cpu(sna, priv);
+
+	priv->cpu_bo = NULL;
 	priv->ptr = NULL;
 
 	if (!priv->mapped)
@@ -1275,7 +1281,7 @@ static void __sna_free_pixmap(struct sna *sna,
 	sna_damage_destroy(&priv->gpu_damage);
 	sna_damage_destroy(&priv->cpu_damage);
 
-	sna_pixmap_free_cpu(sna, priv);
+	__sna_pixmap_free_cpu(sna, priv);
 
 	if (priv->header) {
 		assert(!priv->shm);
@@ -13585,8 +13591,10 @@ sna_accel_flush_callback(CallbackListPtr *list,
 
 		list_del(&priv->list);
 		if (priv->shm) {
-			DBG(("%s: syncing SHM pixmap=%ld\n", __FUNCTION__,
-			     priv->pixmap->drawable.serialNumber));
+			DBG(("%s: syncing SHM pixmap=%ld (refcnt=%d)\n",
+			     __FUNCTION__,
+			     priv->pixmap->drawable.serialNumber,
+			     priv->pixmap->refcnt));
 			ret = sna_pixmap_move_to_cpu(priv->pixmap,
 						     MOVE_READ | MOVE_WRITE);
 			assert(!ret || priv->gpu_bo == NULL);

commit 441c481630a5cf09a7eb26d5db80b1e60cb2b10f
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 01:26:19 2013 +0000

    sna: Mark uploads with async hints when appropriate
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 0562641..595d140 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1974,8 +1974,8 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	if ((priv->clear || (flags & MOVE_READ) == 0) &&
 	    priv->cpu_bo && !priv->cpu_bo->flush &&
 	    __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
-		sna_damage_subtract(&priv->gpu_damage, region);
-		if (sna_pixmap_move_to_gpu(pixmap, MOVE_READ)) {
+		sna_damage_subtract(&priv->cpu_damage, region);
+		if (sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT)) {
 			sna_damage_all(&priv->gpu_damage,
 				       pixmap->drawable.width,
 				       pixmap->drawable.height);
@@ -4015,7 +4015,7 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 			goto out;
 
 		assert(priv->gpu_bo->proxy == NULL);
-		if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE | MOVE_READ)) {
+		if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE | MOVE_READ | MOVE_ASYNC_HINT)) {
 			DBG(("%s: fallback - not a pure copy and failed to move dst to GPU\n",
 			     __FUNCTION__));
 			goto fallback;
@@ -4274,7 +4274,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 		if (src_priv &&
 		    move_to_gpu(src_pixmap, src_priv, &region->extents, alu) &&
-		    sna_pixmap_move_to_gpu(src_pixmap, MOVE_READ)) {
+		    sna_pixmap_move_to_gpu(src_pixmap, MOVE_READ | MOVE_ASYNC_HINT)) {
 			DBG(("%s: move whole src_pixmap to GPU and copy\n",
 			     __FUNCTION__));
 			if (!sna->render.copy_boxes(sna, alu,
@@ -9629,7 +9629,8 @@ sna_pixmap_get_source_bo(PixmapPtr pixmap)
 		return upload;
 	}
 
-	if (priv->gpu_damage && !sna_pixmap_move_to_gpu(pixmap, MOVE_READ))
+	if (priv->gpu_damage &&
+	    !sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT))
 		return NULL;
 
 	if (priv->cpu_damage && priv->cpu_bo)
@@ -13853,10 +13854,10 @@ fallback:
 				box++;
 			} while (--n);
 		} else {
-			if (!sna_pixmap_move_to_gpu(src, MOVE_READ | __MOVE_FORCE))
+			if (!sna_pixmap_move_to_gpu(src, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_FORCE))
 				goto fallback;
 
-			if (!sna_pixmap_move_to_gpu(dst, MOVE_READ | MOVE_WRITE | __MOVE_FORCE))
+			if (!sna_pixmap_move_to_gpu(dst, MOVE_READ | MOVE_WRITE | MOVE_ASYNC_HINT | __MOVE_FORCE))
 				goto fallback;
 
 			if (!sna->render.copy_boxes(sna, GXcopy,

commit 6abd442279fd32d1ce9b33a72eabbeb922316151
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Jan 15 00:15:23 2013 +0000

    sna: Avoid allocating an active CPU bo unnecessarily
    
    If we will not write back the GPU damage to the bo as we intend to
    overwrite it for the next operation, we can forgo allocating the active
    CPU bo and skip the synchronisation overhead.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 9125f9c..0562641 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1645,7 +1645,8 @@ skip_inplace_map:
 	}
 
 	if (pixmap->devPrivate.ptr == NULL &&
-	    !sna_pixmap_alloc_cpu(sna, pixmap, priv, priv->gpu_damage != NULL && !priv->clear))
+	    !sna_pixmap_alloc_cpu(sna, pixmap, priv,
+				  flags & MOVE_READ ? priv->gpu_damage && !priv->clear : 0))
 		return false;
 
 	if (priv->clear) {
@@ -1985,7 +1986,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 
 	if (pixmap->devPrivate.ptr == NULL &&
 	    !sna_pixmap_alloc_cpu(sna, pixmap, priv,
-				  priv->gpu_damage && !priv->clear)) {
+				  flags & MOVE_READ ? priv->gpu_damage && !priv->clear : 0)) {
 		if (dx | dy)
 			RegionTranslate(region, -dx, -dy);
 		return false;

commit f235c74cd661970c76e152777e9a2c314a368a56
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jan 14 15:49:42 2013 +0000

    sna: Tweak considering of last-cpu placement for inplace regions
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 9c33bb4..9125f9c 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1806,12 +1806,7 @@ static inline bool region_inplace(struct sna *sna,
 	if (wedged(sna) && !priv->pinned)
 		return false;
 
-	if (priv->cpu) {
-		DBG(("%s: no, preferring last action of CPU\n", __FUNCTION__));
-		return false;
-	}
-
-	if (!write_only &&
+	if ((priv->cpu || !write_only) &&
 	    region_overlaps_damage(region, priv->cpu_damage, 0, 0)) {
 		DBG(("%s: no, uncovered CPU damage pending\n", __FUNCTION__));
 		return false;
@@ -1822,6 +1817,11 @@ static inline bool region_inplace(struct sna *sna,
 		return true;
 	}
 
+	if (priv->cpu) {
+		DBG(("%s: no, preferring last action of CPU\n", __FUNCTION__));
+		return false;
+	}
+
 	if (priv->mapped) {
 		DBG(("%s: yes, already mapped, continuiung\n", __FUNCTION__));
 		return !IS_CPU_MAP(priv->gpu_bo->map);

commit 70c5e41b519e44e620948d683d3b1111494d2f48
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jan 14 15:03:59 2013 +0000

    sna: Limit temporary userptr uploads to large busy targets or LLC machines
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index c80f51d..9c33bb4 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4363,8 +4363,9 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 		if (USE_USERPTR_UPLOADS &&
 		    src_priv == NULL &&
 		    sna->kgem.has_userptr &&
-		    ((bo->tiling && !bo->scanout) || __kgem_bo_is_busy(&sna->kgem, bo)) &&
-		    box_inplace(src_pixmap, &region->extents)) {
+		    box_inplace(src_pixmap, &region->extents) &&
+		    ((sna->kgem.has_llc && bo->tiling && !bo->scanout) ||
+		     __kgem_bo_is_busy(&sna->kgem, bo))) {
 			struct kgem_bo *src_bo;
 			bool ok = false;
 

commit cf860da1c78244036c59edf934b312cc1367e8aa
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jan 14 12:50:54 2013 +0000

    sna: Apply PutImage optimisations to move-to-cpu
    
    We can replace the custom heuristics for PutImage by applying them to
    the common path, where hopefully they are equally valid.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 4d1d46a..2f8d696 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3385,6 +3385,7 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
 	size = kgem_surface_size(kgem, false, 0,
 				 width, height, bpp,
 				 I915_TILING_NONE, &pitch);
+	DBG(("%s: untiled size=%d\n", __FUNCTION__, size));
 	if (size > 0) {
 		if (size < 4096)
 			flags |= KGEM_CAN_CREATE_SMALL;
@@ -3409,6 +3410,7 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
 		size = kgem_surface_size(kgem, false, 0,
 					 width, height, bpp, tiling,
 					 &pitch);
+		DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size));
 		if (size > 0 && size <= kgem->max_gpu_size)
 			flags |= KGEM_CAN_CREATE_GPU;
 		if (size > 0 && size <= kgem->aperture_mappable/4)
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 6fe9e5e..bddeed4 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -111,6 +111,7 @@ struct sna_pixmap {
 	struct kgem_bo *gpu_bo, *cpu_bo;
 	struct sna_damage *gpu_damage, *cpu_damage;
 	void *ptr;
+#define PTR(ptr) ((void*)((uintptr_t)(ptr) & ~1))
 
 	struct list list;
 
@@ -125,11 +126,11 @@ struct sna_pixmap {
 #define PIN_SCANOUT 0x1
 #define PIN_DRI 0x2
 #define PIN_PRIME 0x4
+	uint8_t create :5;
 	uint8_t mapped :1;
 	uint8_t shm :1;
 	uint8_t clear :1;
 	uint8_t undamaged :1;
-	uint8_t create :3;
 	uint8_t header :1;
 	uint8_t cpu :1;
 };
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 636411b..c80f51d 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -96,7 +96,6 @@
 
 #define IS_STATIC_PTR(ptr) ((uintptr_t)(ptr) & 1)
 #define MAKE_STATIC_PTR(ptr) ((void*)((uintptr_t)(ptr) | 1))
-#define PTR(ptr) ((void*)((uintptr_t)(ptr) & ~1))
 
 #if 0
 static void __sna_fallback_flush(DrawablePtr d)
@@ -1331,7 +1330,8 @@ void sna_pixmap_destroy(PixmapPtr pixmap)
 
 static inline bool pixmap_inplace(struct sna *sna,
 				  PixmapPtr pixmap,
-				  struct sna_pixmap *priv)
+				  struct sna_pixmap *priv,
+				  bool write_only)
 {
 	if (FORCE_INPLACE)
 		return FORCE_INPLACE > 0;
@@ -1342,6 +1342,9 @@ static inline bool pixmap_inplace(struct sna *sna,
 	if (priv->mapped)
 		return !IS_CPU_MAP(priv->gpu_bo->map);
 
+	if (!write_only && priv->cpu_damage)
+		return false;
+
 	return (pixmap->devKind * pixmap->drawable.height >> 12) >
 		sna->kgem.half_cpu_cache_pages;
 }
@@ -1355,6 +1358,9 @@ sna_pixmap_create_mappable_gpu(PixmapPtr pixmap)
 	if (wedged(sna))
 		return false;
 
+	if ((priv->create & KGEM_CAN_CREATE_GTT) == 0)
+		return false;
+
 	assert_pixmap_damage(pixmap);
 
 	assert(priv->gpu_bo == NULL);
@@ -1432,14 +1438,25 @@ static inline bool use_cpu_bo_for_upload(struct sna *sna,
 
 static inline bool operate_inplace(struct sna_pixmap *priv, unsigned flags)
 {
-	if ((priv->create & KGEM_CAN_CREATE_GTT) == 0)
+	if ((priv->create & KGEM_CAN_CREATE_GTT) == 0) {
+		DBG(("%s: no, not accessible via GTT\n", __FUNCTION__));
 		return false;
+	}
 
-	if ((flags & MOVE_INPLACE_HINT) == 0)
+	if ((flags & MOVE_INPLACE_HINT) == 0) {
+		DBG(("%s: no, inplace operation not suitable\n", __FUNCTION__));
 		return false;
+	}
+
+	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) {
+		DBG(("%s: yes, CPU is busy\n", __FUNCTION__));
+		return true;
+	}
 
-	if (priv->gpu_damage && kgem_bo_is_busy(priv->gpu_bo))
+	if (flags & MOVE_WRITE && priv->gpu_bo&&kgem_bo_is_busy(priv->gpu_bo)) {
+		DBG(("%s: no, GPU is busy, so stage write\n", __FUNCTION__));
 		return false;
+	}
 
 	return true;
 }
@@ -1475,7 +1492,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 		     __FUNCTION__, priv->gpu_damage != NULL, priv->clear));
 
 		if (priv->create & KGEM_CAN_CREATE_GPU &&
-		    pixmap_inplace(sna, pixmap, priv)) {
+		    pixmap_inplace(sna, pixmap, priv, true)) {
 			assert(!priv->shm);
 			DBG(("%s: write inplace\n", __FUNCTION__));
 			if (priv->gpu_bo) {
@@ -1533,6 +1550,13 @@ skip_inplace_map:
 
 			sna_pixmap_free_gpu(sna, priv);
 			sna_pixmap_free_cpu(sna, priv);
+
+			if (!sna_pixmap_alloc_cpu(sna, pixmap, priv, false))
+				return false;


Reply to: