[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'upstream-experimental'



 NEWS                         |   80 ++++
 configure.ac                 |    9 
 src/Makefile.am              |   63 +--
 src/intel_device.c           |  279 ++++++++++++++
 src/intel_display.c          |    2 
 src/intel_driver.c           |    2 
 src/intel_driver.h           |    8 
 src/intel_module.c           |   75 +--
 src/sna/Makefile.am          |    2 
 src/sna/blt.c                |  604 ++++++++++++++++++++++++++----
 src/sna/brw/brw_eu.h         |    2 
 src/sna/brw/brw_eu_debug.c   |   95 ----
 src/sna/compiler.h           |    2 
 src/sna/gen2_render.c        |   14 
 src/sna/gen3_render.c        |   29 +
 src/sna/gen4_render.c        |   25 -
 src/sna/gen5_render.c        |   63 +--
 src/sna/gen6_render.c        |   31 -
 src/sna/gen7_render.c        |   79 ++--
 src/sna/kgem.c               |  226 +++++++++--
 src/sna/kgem.h               |   98 +++--
 src/sna/sna.h                |    9 
 src/sna/sna_accel.c          |  842 ++++++++++++++++++++++++++++++++++---------
 src/sna/sna_blt.c            |  182 +++++----
 src/sna/sna_composite.c      |    2 
 src/sna/sna_damage.c         |   54 ++
 src/sna/sna_display.c        |  747 ++++++++++++++++++++++++++------------
 src/sna/sna_dri.c            |  170 ++++----
 src/sna/sna_driver.c         |  130 ------
 src/sna/sna_gradient.c       |   13 
 src/sna/sna_io.c             |  150 ++++---
 src/sna/sna_render.c         |   78 ++-
 src/sna/sna_render.h         |    3 
 src/sna/sna_tiling.c         |   71 ++-
 src/sna/sna_trapezoids.c     |    7 
 src/sna/sna_video.c          |   44 ++
 src/sna/sna_video.h          |    3 
 src/sna/sna_video_overlay.c  |   10 
 src/sna/sna_video_sprite.c   |    5 
 src/sna/sna_video_textured.c |   20 -
 test/Makefile.am             |    2 
 41 files changed, 3105 insertions(+), 1225 deletions(-)

New commits:
commit 7d9163983ea2e960c0a7b55266fcc532b9c6e382
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jun 30 15:26:57 2013 +0100

    2.21.11 release

diff --git a/NEWS b/NEWS
index 0455c87..576ef86 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,55 @@
+Release 2.21.11 (2013-06-30)
+============================
+An eventful week. What started with a regression with some builds of
+firefox on some machines lead ultimately to the discovery of an older
+kernel bug. Aside from the work to fix the image bug and a few other
+older bugs that were reported and resolved this week, there is also a
+(hopefully) subtle change to the initial configuration of displays. In
+the absence of user overrides in xorg.conf, the DDX will try to preserve
+the same display configuration as used by the kernel, which hopefully
+will be the same configuration as setup by the BIOS. The result should
+be a boot sequence that does not resize at all (aka fastboot) - until
+the display manager takes over and loads a completely different
+configuration!
+
+ * Add reference counting of drmMaster for ZaphodHeads
+   https://bugs.freedesktop.org/show_bug.cgi?id=66041
+
+ * Add a GPU flush before changing blend modes on Ironlake
+   https://bugs.freedesktop.org/show_bug.cgi?id=51422
+
+ * Fix occasional missing images for inplace uploads
+   [regression from 2.21.10]
+   https://bugs.freedesktop.org/show_bug.cgi?id=66059
+
+ * Add missing utility files to the tarball and remove a few unused ones
+
+ * Initialise PolyPoint operand state before calling miWideDash
+   https://bugs.freedesktop.org/show_bug.cgi?id=66104
+
+ * Fix redirection handling for rendering into large surfaces
+   https://bugs.freedesktop.org/show_bug.cgi?id=66168
+   https://bugs.freedesktop.org/show_bug.cgi?id=66249
+
+ * Fix compilation of UXA with xorg-xserver < 1.10
+   [regression from 2.20.0]
+
+ * Fix consideration of gradients for deciding when to migrate render
+   operations
+   [performance regression from 2.21.10, the bug itself is older]
+   https://bugs.freedesktop.org/show_bug.cgi?id=66297
+
+Also fixed this week was:
+
+commit 22fd5ca947b58901927d100d2b1aa0f1672b3435
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Jun 28 16:54:08 2013 +0100
+
+    drm/i915: Only clear write-domains after a successful wait-seqno
+
+which affects kernels 3.7 - 3.10, coming to a stable kernel near you soon.
+
+
 Release 2.21.10 (2013-06-22)
 ============================
 Fixes missing support for Xv (with the textured video adaptor) on
diff --git a/configure.ac b/configure.ac
index 7425fda..79018d5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.21.10],
+        [2.21.11],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 5005bd2d52ab64cbeae099d512d0b65be6c4abaa
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jun 30 15:02:19 2013 +0100

    intel: Fix failure code for reporting !drmCheckModesetingSupported
    
    The new function returns the fd, not a Bool, so the error code must now
    be -1.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_device.c b/src/intel_device.c
index cb48c34..da4d21e 100644
--- a/src/intel_device.c
+++ b/src/intel_device.c
@@ -98,7 +98,7 @@ static int __intel_open_device(const struct pci_device *pci, char **path)
 			if (xf86LoadKernelModule("i915"))
 				ret = drmCheckModesettingSupported(id);
 			if (ret)
-				return FALSE;
+				return -1;
 			/* Be nice to the user and load fbcon too */
 			(void)xf86LoadKernelModule("fbcon");
 		}

commit f8738d7b4cc1c624d4390ef9ce7426ba457d7dd3
Author: Jonathan Gray <jsg@jsg.id.au>
Date:   Sun Jun 30 19:37:45 2013 +1000

    intel: replace direct ioctl use with drm{Set, Drop}Master
    
    Use drmSetMaster/drmDropMaster instead of calling the ioctls
    directly.  Fixes compilation on OpenBSD where these ioctls
    aren't defined.
    
    Signed-off-by: Jonathan Gray <jsg@jsg.id.au>

diff --git a/src/intel_device.c b/src/intel_device.c
index 5c36935..cb48c34 100644
--- a/src/intel_device.c
+++ b/src/intel_device.c
@@ -222,7 +222,7 @@ int intel_get_master(ScrnInfoPtr scrn)
 		int retry = 2000;
 
 		do {
-			ret = ioctl(dev->fd, DRM_IOCTL_SET_MASTER);
+			ret = drmSetMaster(dev->fd);
 			if (ret == 0)
 				break;
 			usleep(1000);
@@ -242,8 +242,8 @@ int intel_put_master(ScrnInfoPtr scrn)
 	ret = 0;
 	assert(dev->master_count);
 	if (--dev->master_count == 0) {
-		assert(ioctl(dev->fd, DRM_IOCTL_SET_MASTER) == 0);
-		ret = ioctl(dev->fd, DRM_IOCTL_DROP_MASTER);
+		assert(drmSetMaster(dev->fd) == 0);
+		ret = drmDropMaster(dev->fd);
 	}
 
 	return ret;

commit 40301e6d03f6e8d2d2d01e6bb9f1754a7e543a08
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jun 30 11:12:34 2013 +0100

    sna: Store the path used to open the device and pass to DRI
    
    Avoid having to search the device tree once again in order to simply
    recover the path we used to open the device.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_device.c b/src/intel_device.c
index 5c49db0..5c36935 100644
--- a/src/intel_device.c
+++ b/src/intel_device.c
@@ -42,6 +42,7 @@
 #include "intel_driver.h"
 
 struct intel_device {
+	char *path;
 	int fd;
 	int open_count;
 	int master_count;
@@ -80,11 +81,11 @@ static int fd_set_cloexec(int fd)
 	return fd;
 }
 
-static int __intel_open_device(const struct pci_device *pci, const char *path)
+static int __intel_open_device(const struct pci_device *pci, char **path)
 {
 	int fd;
 
-	if (path == NULL) {
+	if (*path == NULL) {
 		char id[20];
 		int ret;
 
@@ -103,14 +104,21 @@ static int __intel_open_device(const struct pci_device *pci, const char *path)
 		}
 
 		fd = drmOpen(NULL, id);
+		if (fd != -1) {
+			*path = drmGetDeviceNameFromFd(fd);
+			if (*path == NULL) {
+				close(fd);
+				fd = -1;
+			}
+		}
 	} else {
 #ifdef O_CLOEXEC
-		fd = open(path, O_RDWR | O_CLOEXEC);
+		fd = open(*path, O_RDWR | O_CLOEXEC);
 #else
 		fd = -1;
 #endif
 		if (fd == -1)
-			fd = fd_set_cloexec(open(path, O_RDWR));
+			fd = fd_set_cloexec(open(*path, O_RDWR));
 	}
 
 	return fd;
@@ -121,6 +129,7 @@ int intel_open_device(int entity_num,
 		      const char *path)
 {
 	struct intel_device *dev;
+	char *local_path;
 	int fd;
 
 	if (intel_device_key == -1)
@@ -132,16 +141,20 @@ int intel_open_device(int entity_num,
 	if (dev)
 		return dev->fd;
 
-	fd = __intel_open_device(pci, path);
+	local_path = path ? strdup(path) : NULL;
+
+	fd = __intel_open_device(pci, &local_path);
 	if (fd == -1)
 		return -1;
 
 	dev = malloc(sizeof(*dev));
 	if (dev == NULL) {
+		free(local_path);
 		close(fd);
 		return -1;
 	}
 
+	dev->path = local_path;
 	dev->fd = fd;
 	dev->open_count = 0;
 	dev->master_count = 0;
@@ -190,6 +203,13 @@ int intel_get_device(ScrnInfoPtr scrn)
 	return dev->fd;
 }
 
+const char *intel_get_device_name(ScrnInfoPtr scrn)
+{
+	struct intel_device *dev = intel_device(scrn);
+	assert(dev && dev->path);
+	return dev->path;
+}
+
 int intel_get_master(ScrnInfoPtr scrn)
 {
 	struct intel_device *dev = intel_device(scrn);
@@ -236,6 +256,7 @@ void __intel_uxa_release_device(ScrnInfoPtr scrn)
 		intel_set_device(scrn, NULL);
 
 		drmClose(dev->fd);
+		free(dev->path);
 		free(dev);
 	}
 }
@@ -253,5 +274,6 @@ void intel_put_device(ScrnInfoPtr scrn)
 	intel_set_device(scrn, NULL);
 
 	drmClose(dev->fd);
+	free(dev->path);
 	free(dev);
 }
diff --git a/src/intel_driver.h b/src/intel_driver.h
index ed58444..22b623f 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -313,6 +313,7 @@ void intel_detect_chipset(ScrnInfoPtr scrn,
 
 int intel_open_device(int entity_num, const struct pci_device *pci, const char *path);
 int intel_get_device(ScrnInfoPtr scrn);
+const char *intel_get_device_name(ScrnInfoPtr scrn);
 int intel_get_master(ScrnInfoPtr scrn);
 int intel_put_master(ScrnInfoPtr scrn);
 void intel_put_device(ScrnInfoPtr scrn);
diff --git a/src/sna/sna.h b/src/sna/sna.h
index f720c64..7fe7359 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -293,7 +293,6 @@ struct sna {
 
 	bool dri_available;
 	bool dri_open;
-	char *deviceName;
 
 	/* Broken-out options. */
 	OptionInfoPtr Options;
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index e610d52..ca5f088 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -2445,11 +2445,10 @@ bool sna_dri_open(struct sna *sna, ScreenPtr screen)
 		return false;
 	}
 
-	sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd);
 	memset(&info, '\0', sizeof(info));
 	info.fd = sna->kgem.fd;
 	info.driverName = dri_driver_name(sna);
-	info.deviceName = sna->deviceName;
+	info.deviceName = intel_get_device_name(sna->scrn);
 
 	DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n",
 	     __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName));
@@ -2487,5 +2486,4 @@ void sna_dri_close(struct sna *sna, ScreenPtr screen)
 {
 	DBG(("%s()\n", __FUNCTION__));
 	DRI2CloseScreen(screen);
-	drmFree(sna->deviceName);
 }

commit 17da58f904e75d434aaf71e297e15d41153ba954
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Jun 30 11:01:49 2013 +0100

    sna: Replace conflicting drmDropMaster
    
    Calling drmDropMaster twice along the CloseScreen path is not a good
    idea.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
index 3428d5b..7092edc 100644
--- a/src/sna/sna_driver.c
+++ b/src/sna/sna_driver.c
@@ -734,6 +734,8 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL)
 
 	DBG(("%s\n", __FUNCTION__));
 
+	/* XXX Note that we will leak kernel resources if !vtSema */
+
 	xf86_hide_cursors(scrn);
 	sna_uevent_fini(scrn);
 
@@ -749,8 +751,10 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL)
 		sna->front = NULL;
 	}
 
-	drmDropMaster(sna->kgem.fd);
-	scrn->vtSema = FALSE;
+	if (scrn->vtSema) {
+		intel_put_master(scrn);
+		scrn->vtSema = FALSE;
+	}
 
 	xf86_cursors_fini(screen);
 
@@ -773,7 +777,6 @@ static Bool sna_late_close_screen(CLOSE_SCREEN_ARGS_DECL)
 	free(depths);
 
 	free(screen->visuals);
-	intel_put_master(xf86ScreenToScrn(screen));
 
 	return TRUE;
 }

commit 3a787da7e888da7e9943be94bd1cb177fe1495ab
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jun 29 21:57:20 2013 +0100

    sna: Allow tiled memcpy on i386
    
    With the split into per-swizzle functions, and with the forced
    optimisation levels, it appears that i386 doesn't suffer so badly and
    the tiled memcpy are a viable method.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 5c029ad..aadc5f2 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -985,15 +985,6 @@ static void kgem_init_swizzling(struct kgem *kgem)
 {
 	struct drm_i915_gem_get_tiling tiling;
 
-#ifndef __x86_64__
-	/* Between a register starved compiler emitting attrocious code
-	 * and the extra overhead in the kernel for managing the tight
-	 * 32-bit address space, unless we have a 64-bit system,
-	 * using memcpy_to_tiled_x() is extremely slow.
-	 */
-	return;
-#endif
-
 	if (kgem->gen < 050) /* bit17 swizzling :( */
 		return;
 

commit 1d9941a7c003587c0bd732fb8b21fee5cefa6f87
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jun 29 21:56:13 2013 +0100

    sna: Add the Ofast option to the critical memcpy routines
    
    Always enable gcc to fully optimize the core memcpy routines (provided
    that optimisations are not entirely disabled, for instance for
    debugging).
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index 75e691a..641b490 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -66,7 +66,7 @@
 #endif
 
 #if HAS_GCC(4, 5) && defined(__OPTIMIZE__)
-#define fast_memcpy __attribute__((target("inline-all-stringops")))
+#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops")))
 #else
 #define fast_memcpy
 #endif

commit 84c190db33142f3c1ec347ec0bf87f77ce132d36
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jun 29 19:06:40 2013 +0100

    sna: Fix get_image_inplace to use the pixmap offset
    
    The inplace routine assumed that the region to be read was already in
    pixmap coordinates. Making it so makes the code easier, so do it.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 2ed5007..80bc198 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -14279,12 +14279,11 @@ static int sna_create_gc(GCPtr gc)
 }
 
 static bool
-sna_get_image_blt(DrawablePtr drawable,
+sna_get_image_blt(PixmapPtr pixmap,
 		  RegionPtr region,
 		  char *dst,
 		  unsigned flags)
 {
-	PixmapPtr pixmap = get_drawable_pixmap(drawable);
 	struct sna_pixmap *priv = sna_pixmap(pixmap);
 	struct sna *sna = to_sna_from_pixmap(pixmap);
 	struct kgem_bo *dst_bo;
@@ -14343,21 +14342,17 @@ sna_get_image_blt(DrawablePtr drawable,
 	DBG(("%s: download through a temporary map\n", __FUNCTION__));
 
 	pitch = PixmapBytePad(region->extents.x2 - region->extents.x1,
-			      drawable->depth);
+			      pixmap->drawable.depth);
 	dst_bo = kgem_create_map(&sna->kgem, dst,
 				 pitch * (region->extents.y2 - region->extents.y1),
 				 false);
 	if (dst_bo) {
-		int16_t dx, dy;
-
 		dst_bo->flush = true;
 		dst_bo->pitch = pitch;
 		kgem_bo_mark_unreusable(dst_bo);
 
-		get_drawable_deltas(drawable, pixmap, &dx, &dy);
-
 		ok = sna->render.copy_boxes(sna, GXcopy,
-					    pixmap, priv->gpu_bo, dx, dy,
+					    pixmap, priv->gpu_bo, 0, 0,
 					    pixmap, dst_bo,
 					    -region->extents.x1,
 					    -region->extents.y1,
@@ -14373,12 +14368,11 @@ sna_get_image_blt(DrawablePtr drawable,
 }
 
 static bool
-sna_get_image_inplace(DrawablePtr drawable,
+sna_get_image_inplace(PixmapPtr pixmap,
 		      RegionPtr region,
 		      char *dst,
 		      unsigned flags)
 {
-	PixmapPtr pixmap = get_drawable_pixmap(drawable);
 	struct sna_pixmap *priv = sna_pixmap(pixmap);
 	struct sna *sna = to_sna_from_pixmap(pixmap);
 	char *src;
@@ -14411,14 +14405,13 @@ sna_get_image_inplace(DrawablePtr drawable,
 
 	kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC);
 
-
 	if (priv->gpu_bo->tiling) {
 		DBG(("%s: download through a tiled CPU map\n", __FUNCTION__));
 		memcpy_from_tiled_x(&sna->kgem, src, dst,
 				    pixmap->drawable.bitsPerPixel,
 				    priv->gpu_bo->pitch,
 				    PixmapBytePad(region->extents.x2 - region->extents.x1,
-						  drawable->depth),
+						  pixmap->drawable.depth),
 				    region->extents.x1, region->extents.y1,
 				    0, 0,
 				    region->extents.x2 - region->extents.x1,
@@ -14429,7 +14422,7 @@ sna_get_image_inplace(DrawablePtr drawable,
 			   pixmap->drawable.bitsPerPixel,
 			   priv->gpu_bo->pitch,
 			   PixmapBytePad(region->extents.x2 - region->extents.x1,
-					 drawable->depth),
+					 pixmap->drawable.depth),
 			   region->extents.x1, region->extents.y1,
 			   0, 0,
 			   region->extents.x2 - region->extents.x1,
@@ -14447,7 +14440,6 @@ sna_get_image(DrawablePtr drawable,
 {
 	RegionRec region;
 	unsigned int flags;
-	bool can_blt;
 
 	if (!fbDrawableEnabled(drawable))
 		return;
@@ -14457,50 +14449,55 @@ sna_get_image(DrawablePtr drawable,
 	     (long)get_drawable_pixmap(drawable)->drawable.serialNumber,
 	     x, y, w, h, format, mask, drawable->depth));
 
-	region.extents.x1 = x + drawable->x;
-	region.extents.y1 = y + drawable->y;
-	region.extents.x2 = region.extents.x1 + w;
-	region.extents.y2 = region.extents.y1 + h;
-	region.data = NULL;
-
-	can_blt = (ACCEL_GET_IMAGE &&
-		   !FORCE_FALLBACK &&
-		   format == ZPixmap &&
-		   drawable->bitsPerPixel >= 8 &&
-		   PM_IS_SOLID(drawable, mask));
-
 	flags = MOVE_READ;
 	if ((w | h) == 1)
 		flags |= MOVE_INPLACE_HINT;
 	if (w == drawable->width)
 		flags |= MOVE_WHOLE_HINT;
 
-	if (can_blt && sna_get_image_blt(drawable, &region, dst, flags))
-		return;
+	if (ACCEL_GET_IMAGE &&
+	    !FORCE_FALLBACK &&
+	    format == ZPixmap &&
+	    drawable->bitsPerPixel >= 8 &&
+	    PM_IS_SOLID(drawable, mask)) {
+		PixmapPtr pixmap = get_drawable_pixmap(drawable);
+		int16_t dx, dy;
 
-	if (can_blt && sna_get_image_inplace(drawable, &region, dst, flags))
-		return;
+		get_drawable_deltas(drawable, pixmap, &dx, &dy);
+		region.extents.x1 = x + drawable->x + dx;
+		region.extents.y1 = y + drawable->y + dy;
+		region.extents.x2 = region.extents.x1 + w;
+		region.extents.y2 = region.extents.y1 + h;
+		region.data = NULL;
 
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, flags))
-		return;
+		if (sna_get_image_blt(pixmap, &region, dst, flags))
+			return;
 
-	if (can_blt) {
-		PixmapPtr pixmap = get_drawable_pixmap(drawable);
-		int16_t dx, dy;
+		if (sna_get_image_inplace(pixmap, &region, dst, flags))
+			return;
+
+		if (!sna_drawable_move_region_to_cpu(&pixmap->drawable,
+						     &region, flags))
+			return;
 
 		DBG(("%s: copy box (%d, %d), (%d, %d)\n",
 		     __FUNCTION__,
 		     region.extents.x1, region.extents.y1,
 		     region.extents.x2, region.extents.y2));
-		get_drawable_deltas(drawable, pixmap, &dx, &dy);
 		assert(has_coherent_ptr(sna_pixmap(pixmap)));
 		memcpy_blt(pixmap->devPrivate.ptr, dst, drawable->bitsPerPixel,
 			   pixmap->devKind, PixmapBytePad(w, drawable->depth),
-			   region.extents.x1 + dx,
-			   region.extents.y1 + dy,
-			   0, 0, w, h);
-	} else
-		fbGetImage(drawable, x, y, w, h, format, mask, dst);
+			   region.extents.x1, region.extents.y1, 0, 0, w, h);
+	} else {
+		region.extents.x1 = x + drawable->x;
+		region.extents.y1 = y + drawable->y;
+		region.extents.x2 = region.extents.x1 + w;
+		region.extents.y2 = region.extents.y1 + h;
+		region.data = NULL;
+
+		if (sna_drawable_move_region_to_cpu(drawable, &region, flags))
+			fbGetImage(drawable, x, y, w, h, format, mask, dst);
+	}
 }
 
 static void

commit c7d246ba6f750ee080c38ccc5603d01fcf7fce92
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jun 29 16:31:34 2013 +0100

    sna: Move the clone discard into free-gpu
    
    Rather than peppering the discard manually before the call to free the
    GPU bo, always discard the COW when we actually free the GPU bo.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 2666798..2ed5007 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -423,6 +423,9 @@ sna_copy_init_blt(struct sna_copy_op *copy,
 static void sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv)
 {
 	assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
+	if (priv->cow)
+		sna_pixmap_undo_cow(sna, priv, 0);
 	assert(priv->cow == NULL);
 
 	sna_damage_destroy(&priv->gpu_damage);
@@ -2069,8 +2072,6 @@ mark_damage:
 			       pixmap->drawable.width,
 			       pixmap->drawable.height);
 		assert(priv->gpu_damage == NULL);
-		if (priv->cow)
-			sna_pixmap_undo_cow(sna, priv, 0);
 		sna_pixmap_free_gpu(sna, priv);
 
 		if (priv->flush) {
@@ -2083,14 +2084,14 @@ done:
 	if (flags & MOVE_WRITE) {
 		assert(DAMAGE_IS_ALL(priv->cpu_damage));
 		assert(priv->gpu_damage == NULL);
+		assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL);
 		if (priv->cow)
 			sna_pixmap_undo_cow(sna, priv, 0);
-		priv->source_count = SOURCE_BIAS;
-		assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL);
-		if (priv->gpu_bo && priv->gpu_bo->domain != DOMAIN_GPU) {
-			DBG(("%s: discarding inactive GPU bo\n", __FUNCTION__));
+		if (priv->gpu_bo && priv->gpu_bo->rq == NULL) {
+			DBG(("%s: discarding idle GPU bo\n", __FUNCTION__));
 			sna_pixmap_free_gpu(sna, priv);
 		}
+		priv->source_count = SOURCE_BIAS;
 	}
 
 	if (priv->cpu_bo) {
@@ -2102,10 +2103,6 @@ done:
 			assert(pixmap->devPrivate.ptr == (void *)((unsigned long)priv->cpu_bo->map & ~3));
 			assert((flags & MOVE_WRITE) == 0 || !kgem_bo_is_busy(priv->cpu_bo));
 		}
-		if (flags & MOVE_WRITE) {
-			DBG(("%s: discarding GPU bo in favour of CPU bo\n", __FUNCTION__));
-			sna_pixmap_free_gpu(sna, priv);
-		}
 	}
 	priv->cpu =
 		(flags & (MOVE_INPLACE_HINT | MOVE_ASYNC_HINT)) == 0 &&
@@ -2691,11 +2688,8 @@ done:
 				      pixmap->drawable.width,
 				      pixmap->drawable.height);
 		if (DAMAGE_IS_ALL(priv->cpu_damage)) {
-			if (priv->gpu_bo) {
-				DBG(("%s: replaced entire pixmap\n",
-				     __FUNCTION__));
-				sna_pixmap_free_gpu(sna, priv);
-			}
+			DBG(("%s: replaced entire pixmap\n", __FUNCTION__));
+			sna_pixmap_free_gpu(sna, priv);
 		}
 		if (priv->flush) {
 			assert(!priv->shm);
@@ -4032,8 +4026,6 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
 
 	if (priv->gpu_bo && (replaces || priv->gpu_bo->proxy)) {
 		DBG(("%s: discarding cached upload proxy\n", __FUNCTION__));
-		if (priv->cow)
-			sna_pixmap_undo_cow(sna, priv, 0);
 		sna_pixmap_free_gpu(sna, priv);
 	}
 

commit 6ab2a3acf71b5204c399c7649e5601c93a99f25f
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Jun 29 15:04:09 2013 +0100

    sna: Improve checks for coherent access through CPU mappings
    
    Refactor the CPU mapping tests to a single function, and remember to
    test for a pending GPU write (i.e. bo->exec).
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index d1a391a..c7c7fce 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -564,6 +564,22 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
 	return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
 }
 
+static inline bool kgem_bo_can_map__cpu(struct kgem *kgem,
+					struct kgem_bo *bo,
+					bool write)
+{
+	if (bo->scanout)
+		return false;
+
+	if (kgem->has_llc)
+		return true;
+
+	if (bo->domain != DOMAIN_CPU)
+		return false;
+
+	return !write || bo->exec == NULL;
+}
+
 static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
 {
 	assert(bo->refcnt);
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 18836c8..2666798 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1403,7 +1403,8 @@ void sna_pixmap_destroy(PixmapPtr pixmap)
 }
 
 static inline bool has_coherent_map(struct sna *sna,
-				    struct kgem_bo *bo)
+				    struct kgem_bo *bo,
+				    unsigned flags)
 {
 	assert(bo->map);
 
@@ -1413,7 +1414,7 @@ static inline bool has_coherent_map(struct sna *sna,
 	if (bo->tiling != I915_TILING_NONE)
 		return false;
 
-	return bo->domain == DOMAIN_CPU || sna->kgem.has_llc;
+	return kgem_bo_can_map__cpu(&sna->kgem, bo, flags & MOVE_WRITE);
 }
 
 static inline bool has_coherent_ptr(struct sna_pixmap *priv)
@@ -1437,7 +1438,7 @@ static inline bool has_coherent_ptr(struct sna_pixmap *priv)
 static inline bool pixmap_inplace(struct sna *sna,
 				  PixmapPtr pixmap,
 				  struct sna_pixmap *priv,
-				  bool write_only)
+				  unsigned flags)
 {
 	if (FORCE_INPLACE)
 		return FORCE_INPLACE > 0;
@@ -1446,9 +1447,9 @@ static inline bool pixmap_inplace(struct sna *sna,
 		return false;
 
 	if (priv->mapped)
-		return has_coherent_map(sna, priv->gpu_bo);
+		return has_coherent_map(sna, priv->gpu_bo, flags);
 
-	if (!write_only && priv->cpu_damage)
+	if (flags & MOVE_READ && priv->cpu_damage)
 		return false;
 
 	return (pixmap->devKind * pixmap->drawable.height >> 12) >
@@ -1858,7 +1859,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 			if (!priv->mapped)
 				goto skip_inplace_map;
 
-			assert(has_coherent_map(sna, priv->gpu_bo));
+			assert(has_coherent_map(sna, priv->gpu_bo, flags));
 			pixmap->devKind = priv->gpu_bo->pitch;
 
 			assert(priv->gpu_bo->proxy == NULL);
@@ -1906,7 +1907,7 @@ skip_inplace_map:
 	assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL);
 
 	if (operate_inplace(priv, flags) &&
-	    pixmap_inplace(sna, pixmap, priv, (flags & MOVE_READ) == 0) &&
+	    pixmap_inplace(sna, pixmap, priv, flags) &&
 	     sna_pixmap_create_mappable_gpu(pixmap, (flags & MOVE_READ) == 0)) {
 		DBG(("%s: try to operate inplace (GTT)\n", __FUNCTION__));
 		assert(priv->cow == NULL || (flags & MOVE_WRITE) == 0);
@@ -1918,7 +1919,7 @@ skip_inplace_map:
 		pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo);
 		priv->mapped = pixmap->devPrivate.ptr != NULL;
 		if (priv->mapped) {
-			assert(has_coherent_map(sna, priv->gpu_bo));
+			assert(has_coherent_map(sna, priv->gpu_bo, flags));
 			pixmap->devKind = priv->gpu_bo->pitch;
 			if (flags & MOVE_WRITE) {
 				assert(priv->gpu_bo->proxy == NULL);
@@ -1946,7 +1947,7 @@ skip_inplace_map:
 	}
 
 	if (priv->gpu_damage && priv->cpu_damage == NULL && !priv->cow &&
-	    (flags & MOVE_READ || priv->gpu_bo->domain == DOMAIN_CPU || sna->kgem.has_llc) &&
+	    (flags & MOVE_READ || kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE)) &&
 	    priv->gpu_bo->tiling == I915_TILING_NONE &&
 	    ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 ||
 	     !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))) {
@@ -2161,7 +2162,7 @@ static inline bool region_inplace(struct sna *sna,
 				  PixmapPtr pixmap,
 				  RegionPtr region,
 				  struct sna_pixmap *priv,
-				  bool write_only)
+				  unsigned flags)
 {
 	assert_pixmap_damage(pixmap);
 
@@ -2171,7 +2172,7 @@ static inline bool region_inplace(struct sna *sna,
 	if (wedged(sna) && !priv->pinned)
 		return false;
 
-	if ((priv->cpu || !write_only) &&
+	if ((priv->cpu || flags & MOVE_READ) &&
 	    region_overlaps_damage(region, priv->cpu_damage, 0, 0)) {
 		DBG(("%s: no, uncovered CPU damage pending\n", __FUNCTION__));
 		return false;
@@ -2184,7 +2185,7 @@ static inline bool region_inplace(struct sna *sna,
 
 	if (priv->mapped) {
 		DBG(("%s: yes, already mapped, continuiung\n", __FUNCTION__));
-		return has_coherent_map(sna, priv->gpu_bo);
+		return has_coherent_map(sna, priv->gpu_bo, flags);
 	}
 
 	if (priv->flush) {
@@ -2301,7 +2302,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	}
 
 	if (operate_inplace(priv, flags) &&
-	    region_inplace(sna, pixmap, region, priv, (flags & MOVE_READ) == 0) &&
+	    region_inplace(sna, pixmap, region, priv, flags) &&
 	     sna_pixmap_create_mappable_gpu(pixmap, false)) {
 		DBG(("%s: try to operate inplace\n", __FUNCTION__));
 		assert(priv->cow == NULL || (flags & MOVE_WRITE) == 0);
@@ -2312,7 +2313,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo);
 		priv->mapped = pixmap->devPrivate.ptr != NULL;
 		if (priv->mapped) {
-			assert(has_coherent_map(sna, priv->gpu_bo));
+			assert(has_coherent_map(sna, priv->gpu_bo, flags));
 			pixmap->devKind = priv->gpu_bo->pitch;
 			if (flags & MOVE_WRITE) {
 				if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
@@ -2359,7 +2360,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	     sna_damage_contains_box__no_reduce(priv->gpu_damage,
 						&region->extents)) &&
 	    priv->gpu_bo->tiling == I915_TILING_NONE &&
-	    (priv->gpu_bo->domain == DOMAIN_CPU || sna->kgem.has_llc) &&
+	    kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE) &&
 	    ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 ||
 	     !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))) {
 		DBG(("%s: try to operate inplace (CPU), read? %d, write? %d\n",
@@ -2370,7 +2371,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		pixmap->devPrivate.ptr =
 			kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
 		if (pixmap->devPrivate.ptr != NULL) {
-			assert(has_coherent_map(sna, priv->gpu_bo));
+			assert(has_coherent_map(sna, priv->gpu_bo, flags));
 			assert(IS_CPU_MAP(priv->gpu_bo->map));
 			pixmap->devKind = priv->gpu_bo->pitch;
 			priv->cpu = true;
@@ -3966,13 +3967,7 @@ static bool can_upload_tiled_x(struct kgem *kgem, struct sna_pixmap *priv)
 		return false;
 	}
 
-	if (bo->scanout) {
-		DBG(("%s: no, is scanout\n", __FUNCTION__, bo->scanout));
-		return false;
-	}
-
-	DBG(("%s? domain=%d, has_llc=%d\n", __FUNCTION__, bo->domain, kgem->has_llc));
-	return bo->domain == DOMAIN_CPU || kgem->has_llc;
+	return kgem_bo_can_map__cpu(kgem, bo, true);
 }
 
 static bool
@@ -4972,7 +4967,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 	hint = source_prefer_gpu(sna, src_priv, region, src_dx, src_dy) ?:
 		region_inplace(sna, dst_pixmap, region,
-			       dst_priv, alu_overwrites(alu));
+			       dst_priv, alu_overwrites(alu) ? MOVE_WRITE : MOVE_READ | MOVE_WRITE);
 	if (dst_priv->cpu_damage && alu_overwrites(alu)) {
 		DBG(("%s: overwritting CPU damage\n", __FUNCTION__));
 		if (region_subsumes_damage(region, dst_priv->cpu_damage)) {
@@ -14409,10 +14404,7 @@ sna_get_image_inplace(DrawablePtr drawable,
 		break;
 	}
 
-	if (priv->gpu_bo->scanout)
-		return false;
-
-	if (!sna->kgem.has_llc && priv->gpu_bo->domain != DOMAIN_CPU)
+	if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, false))
 		return false;
 
 	if (priv->gpu_damage == NULL ||
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 6ab907f..a4932b8 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -53,15 +53,12 @@ static inline bool must_tile(struct sna *sna, int width, int height)
 		upload_too_large(sna, width, height));
 }
 
-static bool bo_inplace_tiled(struct kgem *kgem, struct kgem_bo *bo)
+static bool bo_inplace_tiled(struct kgem *kgem, struct kgem_bo *bo, bool write)
 {
 	if (bo->tiling != I915_TILING_X)
 		return false;
 
-	if (bo->scanout)
-		return false;
-
-	return bo->domain == DOMAIN_CPU || kgem->has_llc;
+	return kgem_bo_can_map__cpu(kgem, bo, write);
 }
 
 static bool download_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
@@ -69,7 +66,7 @@ static bool download_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
 	if (!kgem->memcpy_from_tiled_x)
 		return false;
 
-	return bo_inplace_tiled(kgem, bo);
+	return bo_inplace_tiled(kgem, bo, false);
 }
 
 static bool
@@ -537,7 +534,7 @@ static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
 	if (!kgem->memcpy_to_tiled_x)
 		return false;
 
-	return bo_inplace_tiled(kgem, bo);
+	return bo_inplace_tiled(kgem, bo, true);
 }
 
 static bool

commit 9026bb954646c0425360c2236e26c79d097142cd
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Jun 28 15:59:17 2013 +0100

    sna: Inspect the dirty boxes when querying whether damage contains a rectangle
    
    This helps in the cases where we have subtracted a small number of
    rectangles from an all-damage pixmap (such as a number of successive
    GetImage, PutImage operations). The danger is that we end up searching a
    long list of dirty boxes - maybe just search the first chunk if that
    becomes noticeable?


Reply to: