[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'upstream-experimental'



 .gitignore                    |    1 
 NEWS                          |   29 
 configure.ac                  |    3 
 src/brw_defines.h             |    7 
 src/brw_structs.h             |    6 
 src/i965_3d.c                 |    7 
 src/i965_reg.h                |    4 
 src/i965_render.c             |   16 
 src/i965_video.c              |   24 
 src/intel_dri.c               |   29 
 src/intel_driver.c            |   24 
 src/intel_driver.h            |    8 
 src/intel_module.c            |   17 
 src/intel_options.c           |    1 
 src/intel_options.h           |    1 
 src/legacy/i810/i810_driver.c |    2 
 src/sna/Makefile.am           |    4 
 src/sna/brw/Makefile.am       |   59 +
 src/sna/brw/brw.h             |   17 
 src/sna/brw/brw_disasm.c      | 1101 ++++++++++++++++++++
 src/sna/brw/brw_eu.c          |  150 ++
 src/sna/brw/brw_eu.h          | 2266 ++++++++++++++++++++++++++++++++++++++++++
 src/sna/brw/brw_eu_debug.c    |   95 +
 src/sna/brw/brw_eu_emit.c     | 2002 +++++++++++++++++++++++++++++++++++++
 src/sna/brw/brw_eu_util.c     |  126 ++
 src/sna/brw/brw_sf.c          |   54 +
 src/sna/brw/brw_test.c        |   60 +
 src/sna/brw/brw_test.h        |   46 
 src/sna/brw/brw_test_gen4.c   |  199 +++
 src/sna/brw/brw_test_gen5.c   |  208 +++
 src/sna/brw/brw_test_gen6.c   |  209 +++
 src/sna/brw/brw_test_gen7.c   |  191 +++
 src/sna/brw/brw_wm.c          |  681 ++++++++++++
 src/sna/gen4_render.c         |  409 ++-----
 src/sna/gen4_render.h         |   13 
 src/sna/gen5_render.c         |  249 +---
 src/sna/gen5_render.h         |   13 
 src/sna/gen6_render.c         |  471 ++++----
 src/sna/gen6_render.h         |   32 
 src/sna/gen7_render.c         |  510 ++++-----
 src/sna/gen7_render.h         |   15 
 src/sna/kgem.c                |   21 
 src/sna/sna.h                 |   64 +
 src/sna/sna_accel.c           |  120 --
 src/sna/sna_composite.c       |   87 +
 src/sna/sna_display.c         |   17 
 src/sna/sna_driver.c          |   18 
 src/sna/sna_glyphs.c          |    6 
 src/sna/sna_render.h          |   42 
 src/sna/sna_stream.c          |   46 
 50 files changed, 8574 insertions(+), 1206 deletions(-)

New commits:
commit 5833ef173a01afb710acf10e806b83c5ca6efc09
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Aug 4 09:31:41 2012 +0100

    2.20.3 release
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/NEWS b/NEWS
index 7e267a6..5a9c495 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,32 @@
+Release 2.20.3 (2012-08-04)
+===========================
+Just a minor bugfix for gen4 chipsets (965gm, gm45 and friends) that
+crept into 2.20.2. As an added bonus, the pessimistic workaround for a
+GPU hang on gen4 has been relaxed and the shaders have been overhauled
+which should pave the way to eliminating the last of the uncommon CPU
+operations, along with immediately realising a small perforamnce
+improvement.
+
+Bugs fixed since 2.20.2:
+
+ * Update DPMS bookkeeping after modeset
+   https://bugs.freedesktop.org/show_bug.cgi?id=52142
+
+ * Avoid overlapping gpu/cpu damage after ignoring cpu damage in the
+   consideration of placement for the operation.
+
+ * Enable acceleration by default on 830gm/845g. The GMCH on this pair
+   of chipsets is notoriously incoherent, so the GPU is almost certainly
+   going to hang at some point, though unlikely to hang the system and
+   should automatically disable acceleration (and thence behave
+   identically as if the acceleration was disabled from the start).
+   Option "NoAccel" can be used to disable all 2D acceleration and
+   Option "DRI" can be used to disable all 3D acceleration.
+   https://bugs.freedesktop.org/show_bug.cgi?id=52624
+
+ * Fix vertex bookkeeping for gen4 that was causing corruption in the
+   command stream.
+
 Release 2.20.2 (2012-07-27)
 ===========================
 For the last 9 months, since 2.16.901, we have been shipping a driver that
diff --git a/configure.ac b/configure.ac
index 2a8d08b..7ffbb75 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.20.2],
+        [2.20.3],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 036b90f099af21e60fb4c3684616daf1927f705e
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Aug 3 21:41:59 2012 +0100

    sna/gen7: Correct number of texture coordinates used for video
    
    Fixes regresion from
    
    commit 33c028f8be829caa4fdb9416ff177dc71f24b68e
    Author: Chris Wilson <chris@chris-wilson.co.uk>
    Date:   Wed Aug 1 01:17:50 2012 +0100
    
        sna/gen6+: Reduce floats-per-vertex for spans
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index b4a9223..9ef5e17 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2086,7 +2086,7 @@ gen7_render_video(struct sna *sna,
 			       is_planar_fourcc(frame->id) ?
 			       GEN7_WM_KERNEL_VIDEO_PLANAR :
 			       GEN7_WM_KERNEL_VIDEO_PACKED,
-			       1);
+			       2);
 	tmp.priv = frame;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER);

commit 05dcc5f1699ba90fc14c50882e8d4be89bc4a4f9
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Aug 3 15:08:45 2012 +0100

    Pass the chipset info through driverPrivate rather than a global pointer
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_driver.c b/src/intel_driver.c
index 0e27c48..1ef06fb 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -186,7 +186,7 @@ static void PreInitCleanup(ScrnInfoPtr scrn)
 static void intel_check_chipset_option(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
-	intel->info = intel_detect_chipset(scrn, intel->pEnt, intel->PciInfo);
+	intel_detect_chipset(scrn, intel->pEnt, intel->PciInfo);
 }
 
 static Bool I830GetEarlyOptions(ScrnInfoPtr scrn)
@@ -458,14 +458,15 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 	if (flags & PROBE_DETECT)
 		return TRUE;
 
-	intel = intel_get_screen_private(scrn);
-	if (intel == NULL) {
-		intel = xnfcalloc(sizeof(intel_screen_private), 1);
+	if (((uintptr_t)scrn->driverPrivate) & 1) {
+		intel = xnfcalloc(sizeof(*intel), 1);
 		if (intel == NULL)
 			return FALSE;
 
+		intel->info = (void *)((uintptr_t)scrn->driverPrivate & ~1);
 		scrn->driverPrivate = intel;
 	}
+	intel = intel_get_screen_private(scrn);
 	intel->scrn = scrn;
 	intel->pEnt = pEnt;
 
diff --git a/src/intel_driver.h b/src/intel_driver.h
index d760cb4..882d889 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -238,9 +238,9 @@ struct intel_device_info {
 	int gen;
 };
 
-const struct intel_device_info *
-intel_detect_chipset(ScrnInfoPtr scrn,
-		     EntityInfoPtr ent, struct pci_device *pci);
+void intel_detect_chipset(ScrnInfoPtr scrn,
+			  EntityInfoPtr ent,
+			  struct pci_device *pci);
 
 
 #endif /* INTEL_DRIVER_H */
diff --git a/src/intel_module.c b/src/intel_module.c
index f1d9fc0..ae19f75 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -44,8 +44,6 @@
 #include "legacy/legacy.h"
 #include "sna/sna_module.h"
 
-static struct intel_device_info *chipset_info;
-
 static const struct intel_device_info intel_generic_info = {
 	.gen = -1,
 };
@@ -227,9 +225,10 @@ static const struct pci_id_match intel_device_match[] = {
 	{ 0, 0, 0 },
 };
 
-const struct intel_device_info *
+void
 intel_detect_chipset(ScrnInfoPtr scrn,
-		     EntityInfoPtr ent, struct pci_device *pci)
+		     EntityInfoPtr ent,
+		     struct pci_device *pci)
 {
 	MessageType from = X_PROBED;
 	const char *name = NULL;
@@ -258,7 +257,6 @@ intel_detect_chipset(ScrnInfoPtr scrn,
 	}
 
 	scrn->chipset = name;
-	return chipset_info;
 }
 
 /*
@@ -368,8 +366,6 @@ static Bool intel_pci_probe(DriverPtr		driver,
 	PciChipsets intel_pci_chipsets[NUM_CHIPSETS];
 	unsigned i;
 
-	chipset_info = (void *)match_data;
-
 	if (!has_kernel_mode_setting(device)) {
 #if KMS_ONLY
 		return FALSE;
@@ -404,6 +400,7 @@ static Bool intel_pci_probe(DriverPtr		driver,
 	scrn->driverVersion = INTEL_VERSION;
 	scrn->driverName = INTEL_DRIVER_NAME;
 	scrn->name = INTEL_NAME;
+	scrn->driverPrivate = (void *)(match_data | 1);
 	scrn->Probe = NULL;
 
 #if !KMS_ONLY
diff --git a/src/legacy/i810/i810_driver.c b/src/legacy/i810/i810_driver.c
index 949fd27..fc6369e 100644
--- a/src/legacy/i810/i810_driver.c
+++ b/src/legacy/i810/i810_driver.c
@@ -152,7 +152,7 @@ static int i810_pitches[] = {
 static Bool
 I810GetRec(ScrnInfoPtr scrn)
 {
-   if (scrn->driverPrivate)
+   if (((uintptr_t)scrn->driverPrivate & 1) == 0)
       return TRUE;
 
    scrn->driverPrivate = xnfcalloc(sizeof(I810Rec), 1);
diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
index bd31996..2ccad59 100644
--- a/src/sna/sna_driver.c
+++ b/src/sna/sna_driver.c
@@ -383,14 +383,15 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
 
 	sna_selftest();
 
-	sna = to_sna(scrn);
-	if (sna == NULL) {
+	if (((uintptr_t)scrn->driverPrivate) & 1) {
 		sna = xnfcalloc(sizeof(struct sna), 1);
 		if (sna == NULL)
 			return FALSE;
 
+		sna->info = (void *)((uintptr_t)scrn->driverPrivate & ~1);
 		scrn->driverPrivate = sna;
 	}
+	sna = to_sna(scrn);
 	sna->scrn = scrn;
 	sna->pEnt = pEnt;
 
@@ -438,7 +439,7 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
 	if (sna->Options == NULL)
 		return FALSE;
 
-	sna->info = intel_detect_chipset(scrn, sna->pEnt, sna->PciInfo);
+	intel_detect_chipset(scrn, sna->pEnt, sna->PciInfo);
 
 	kgem_init(&sna->kgem, fd, sna->PciInfo, sna->info->gen);
 	if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE)) {

commit 2b3f4ca33a00440a7005fef69099f8dbaddbbad1
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Aug 3 14:27:51 2012 +0100

    Unexport intel_chipsets
    
    Only used by the core module code, so make it static.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_driver.h b/src/intel_driver.h
index d88f225..d760cb4 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -234,7 +234,6 @@
 #define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 40)
 #define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 60)
 
-extern SymTabRec *intel_chipsets;
 struct intel_device_info {
 	int gen;
 };
diff --git a/src/intel_module.c b/src/intel_module.c
index 7640916..f1d9fc0 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -155,7 +155,7 @@ static const SymTabRec _intel_chipsets[] = {
 };
 #define NUM_CHIPSETS (sizeof(_intel_chipsets) / sizeof(_intel_chipsets[0]))
 
-SymTabRec *intel_chipsets = (SymTabRec *) _intel_chipsets;
+static SymTabRec *intel_chipsets = (SymTabRec *) _intel_chipsets;
 
 #define INTEL_DEVICE_MATCH(d,i) \
     { 0x8086, (d), PCI_MATCH_ANY, PCI_MATCH_ANY, 0x3 << 16, 0xff << 16, (intptr_t)(i) }

commit 5ff749727d3590368806508ac0d0fa8efd1d1d51
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jul 25 22:21:29 2012 +0100

    sna/gen7: Add constant variations and hookup a basic GT descriptor for Haswell
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 193de00..b4a9223 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -83,20 +83,29 @@ struct gt_info {
 	} urb;
 };
 
-static const struct gt_info gt1_info = {
+static const struct gt_info ivb_gt1_info = {
 	.max_vs_threads = 36,
 	.max_gs_threads = 36,
-	.max_wm_threads = (48-1) << GEN7_PS_MAX_THREADS_SHIFT,
+	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
 	.urb = { 128, 512, 192 },
 };
 
-static const struct gt_info gt2_info = {
+static const struct gt_info ivb_gt2_info = {
 	.max_vs_threads = 128,
 	.max_gs_threads = 128,
-	.max_wm_threads = (172-1) << GEN7_PS_MAX_THREADS_SHIFT,
+	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
 	.urb = { 256, 704, 320 },
 };
 
+static const struct gt_info hsw_gt_info = {
+	.max_vs_threads = 8,
+	.max_gs_threads = 8,
+	.max_wm_threads =
+		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
+		1 << HSW_PS_SAMPLE_MASK_SHIFT,
+	.urb = { 128, 64, 64 },
+};
+
 static const uint32_t ps_kernel_packed[][4] = {
 #include "exa_wm_src_affine.g7b"
 #include "exa_wm_src_sample_argb.g7b"
@@ -1363,6 +1372,8 @@ gen7_bind_bo(struct sna *sna,
 	ss[5] = 0;
 	ss[6] = 0;
 	ss[7] = 0;
+	if (sna->kgem.gen == 75)
+		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
 	kgem_bo_set_binding(bo, format, offset);
 
@@ -4234,9 +4245,14 @@ static bool gen7_render_setup(struct sna *sna)
 	struct gen7_sampler_state *ss;
 	int i, j, k, l, m;
 
-	state->info = &gt1_info;
-	if (DEVICE_ID(sna->PciInfo) & 0x20)
-		state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
+	if (sna->kgem.gen == 70) {
+		state->info = &ivb_gt1_info;
+		if (DEVICE_ID(sna->PciInfo) & 0x20)
+			state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
+	} else if (sna->kgem.gen == 75) {
+		state->info = &hsw_gt_info;
+	} else
+		return false;
 
 	sna_static_stream_init(&general);
 
diff --git a/src/sna/gen7_render.h b/src/sna/gen7_render.h
index 8de52a4..1661d4c 100644
--- a/src/sna/gen7_render.h
+++ b/src/sna/gen7_render.h
@@ -1237,6 +1237,17 @@ struct gen7_sampler_state {
 #define GEN7_SURFACE_DEPTH_SHIFT         21
 #define GEN7_SURFACE_PITCH_SHIFT         0
 
+#define HSW_SWIZZLE_ZERO		0
+#define HSW_SWIZZLE_ONE			1
+#define HSW_SWIZZLE_RED			4
+#define HSW_SWIZZLE_GREEN		5
+#define HSW_SWIZZLE_BLUE		6
+#define HSW_SWIZZLE_ALPHA		7
+#define __HSW_SURFACE_SWIZZLE(r,g,b,a) \
+	((a) << 16 | (b) << 19 | (g) << 22 | (r) << 25)
+#define HSW_SURFACE_SWIZZLE(r,g,b,a) \
+	__HSW_SURFACE_SWIZZLE(HSW_SWIZZLE_##r, HSW_SWIZZLE_##g, HSW_SWIZZLE_##b, HSW_SWIZZLE_##a)
+
 /* _3DSTATE_VERTEX_BUFFERS on GEN7*/
 /* DW1 */
 #define GEN7_VB0_ADDRESS_MODIFYENABLE   (1 << 14)
@@ -1281,7 +1292,9 @@ struct gen7_sampler_state {
 # define GEN7_PS_FLOATING_POINT_MODE_ALT                (1 << 16)
 /* DW3: scratch space */
 /* DW4 */
-# define GEN7_PS_MAX_THREADS_SHIFT                      24
+# define IVB_PS_MAX_THREADS_SHIFT                      24
+# define HSW_PS_MAX_THREADS_SHIFT                      23
+# define HSW_PS_SAMPLE_MASK_SHIFT                      12
 # define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)

commit cd028cad3dc9b059a3d83b818d581f86e16ec317
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jul 26 13:17:11 2012 +0100

    sna: Limit the batch size on all gen7 variants
    
    Seems the limit on the surface state size is common across the family
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 635dd24..d7458ec 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -811,7 +811,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	if (gen == 22)
 		/* 865g cannot handle a batch spanning multiple pages */
 		kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
-	if (gen == 70)
+	if (gen >= 70 && gen < 80)
 		kgem->batch_size = 16*1024;
 	if (!kgem->has_relaxed_delta)
 		kgem->batch_size = 4*1024;

commit 4cd9ec9d404d934268952a1058afa07741b09efe
Author: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
Date:   Fri May 4 18:26:46 2012 +0200

    uxa: fix 3DSTATE_PS to fill in number of samples for Haswell
    
    The sample mask value must match what is set for 3DSTATE_SAMPLE_MASK,
    through gen6_upload_invariant_states().
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

diff --git a/src/i965_reg.h b/src/i965_reg.h
index 45b6d08..4bb5e4d 100644
--- a/src/i965_reg.h
+++ b/src/i965_reg.h
@@ -221,6 +221,7 @@
 /* DW4 */
 # define GEN7_PS_MAX_THREADS_SHIFT_IVB                  24
 # define GEN7_PS_MAX_THREADS_SHIFT_HSW                  23
+# define GEN7_PS_SAMPLE_MASK_SHIFT_HSW			12
 # define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)
diff --git a/src/i965_render.c b/src/i965_render.c
index f7b21c8..42b1959 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2695,9 +2695,11 @@ gen7_composite_wm_state(intel_screen_private *intel,
 {
 	int num_surfaces = has_mask ? 3 : 2;
 	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+	unsigned int num_samples = 0;
 
 	if (IS_HSW(intel)) {
 		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+		num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
 	}
 
 	if (intel->gen6_render_state.kernel == bo)
@@ -2715,7 +2717,7 @@ gen7_composite_wm_state(intel_screen_private *intel,
 	OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
 		  (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
 	OUT_BATCH(0); /* scratch space base offset */
-	OUT_BATCH(((48 - 1) << max_threads_shift) |
+	OUT_BATCH(((48 - 1) << max_threads_shift) | num_samples |
 		  GEN7_PS_ATTRIBUTE_ENABLE |
 		  GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
diff --git a/src/i965_video.c b/src/i965_video.c
index 58b6222..3276788 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1641,9 +1641,11 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+	unsigned int num_samples = 0;
 
 	if (IS_HSW(intel)) {
 		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+		num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
 	}
 
 	/* disable WM constant buffer */
@@ -1678,7 +1680,7 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 
 	OUT_BATCH(0); /* scratch space base offset */
 	OUT_BATCH(
-		((48 - 1) << max_threads_shift) |
+		((48 - 1) << max_threads_shift) | num_samples |
 		GEN7_PS_ATTRIBUTE_ENABLE |
 		GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH(

commit 412668464cf9505629eac20001701af3402dc6e8
Author: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
Date:   Fri May 4 17:55:10 2012 +0200

    uxa: set "Shader Channel Select" fields in surface state for Haswell
    
    For normal behaviour, each Shader Channel Select should be set to the
    value indicating that same channel. i.e. Shader Channel Select Red is
    set to SCS_RED, Shader Channel Select Green is set to SCS_GREEN, etc.
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

diff --git a/src/brw_defines.h b/src/brw_defines.h
index 0df2491..e580a8f 100644
--- a/src/brw_defines.h
+++ b/src/brw_defines.h
@@ -469,6 +469,13 @@
 #define BRW_BORDER_COLOR_MODE_DEFAULT	0
 #define BRW_BORDER_COLOR_MODE_LEGACY	1
 
+#define HSW_SCS_ZERO					0
+#define HSW_SCS_ONE						1
+#define HSW_SCS_RED						4
+#define HSW_SCS_GREEN					5
+#define HSW_SCS_BLUE					6
+#define HSW_SCS_ALPHA					7
+
 #define BRW_TEXCOORDMODE_WRAP            0
 #define BRW_TEXCOORDMODE_MIRROR          1
 #define BRW_TEXCOORDMODE_CLAMP           2
diff --git a/src/brw_structs.h b/src/brw_structs.h
index f4dc927..20c2f85 100644
--- a/src/brw_structs.h
+++ b/src/brw_structs.h
@@ -1659,7 +1659,11 @@ struct gen7_surface_state
 
 	struct {
 		unsigned int resource_min_lod:12;
-		unsigned int pad0:16;
+		unsigned int pad0:4;
+		unsigned int shader_chanel_select_a:3;
+		unsigned int shader_chanel_select_b:3;
+		unsigned int shader_chanel_select_g:3;
+		unsigned int shader_chanel_select_r:3;
 		unsigned int alpha_clear_color:1;
 		unsigned int blue_clear_color:1;
 		unsigned int green_clear_color:1;
diff --git a/src/i965_render.c b/src/i965_render.c
index 30fef57..f7b21c8 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1392,6 +1392,13 @@ gen7_set_picture_surface_state(intel_screen_private *intel,
 	ss->ss2.width = pixmap->drawable.width - 1;
 	ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
 
+	if (IS_HSW(intel)) {
+		ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+		ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+		ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+		ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+	}
+
 	dri_bo_emit_reloc(intel->surface_bo,
 			  read_domains, write_domain,
 			  0,
diff --git a/src/i965_video.c b/src/i965_video.c
index bba282d..58b6222 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -510,6 +510,13 @@ static void gen7_create_dst_surface_state(ScrnInfoPtr scrn,
 
 	dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
 
+	if (IS_HSW(intel)) {
+		dest_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED;
+		dest_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+		dest_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+		dest_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+	}
+
 	dri_bo_subdata(surf_bo,
 		       offset, sizeof(dest_surf_state),
 		       &dest_surf_state);
@@ -525,6 +532,7 @@ static void gen7_create_src_surface_state(ScrnInfoPtr scrn,
 					drm_intel_bo *surface_bo,
 					uint32_t offset)
 {
+	intel_screen_private * const intel = intel_get_screen_private(scrn);
 	struct gen7_surface_state src_surf_state;
 
 	memset(&src_surf_state, 0, sizeof(src_surf_state));
@@ -547,6 +555,13 @@ static void gen7_create_src_surface_state(ScrnInfoPtr scrn,
 
 	src_surf_state.ss3.pitch = src_pitch - 1;
 
+	if (IS_HSW(intel)) {
+		src_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED;
+		src_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+		src_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+		src_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+	}
+
 	dri_bo_subdata(surface_bo,
 		       offset, sizeof(src_surf_state),
 		       &src_surf_state);

commit a47ba68996f117fabcb601d35bcc5f99cbcd6122
Author: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
Date:   Fri May 4 17:17:22 2012 +0200

    uxa: fix max PS threads shift value for Haswell
    
    The maximum number of threads is now a 9-bit value. Thus, one more bit
    towards LSB was re-used. i.e. bit position is now 23 instead of 24 on
    Ivy Bridge.
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

diff --git a/src/i965_reg.h b/src/i965_reg.h
index e7b0d15..45b6d08 100644
--- a/src/i965_reg.h
+++ b/src/i965_reg.h
@@ -219,7 +219,8 @@
 # define GEN7_PS_FLOATING_POINT_MODE_ALT                (1 << 16)
 /* DW3: scratch space */
 /* DW4 */
-# define GEN7_PS_MAX_THREADS_SHIFT                      24
+# define GEN7_PS_MAX_THREADS_SHIFT_IVB                  24
+# define GEN7_PS_MAX_THREADS_SHIFT_HSW                  23
 # define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)
diff --git a/src/i965_render.c b/src/i965_render.c
index 2182df8..30fef57 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2687,6 +2687,11 @@ gen7_composite_wm_state(intel_screen_private *intel,
 			drm_intel_bo *bo)
 {
 	int num_surfaces = has_mask ? 3 : 2;
+	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+
+	if (IS_HSW(intel)) {
+		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+	}
 
 	if (intel->gen6_render_state.kernel == bo)
 		return;
@@ -2703,7 +2708,7 @@ gen7_composite_wm_state(intel_screen_private *intel,
 	OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
 		  (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
 	OUT_BATCH(0); /* scratch space base offset */
-	OUT_BATCH(((48 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+	OUT_BATCH(((48 - 1) << max_threads_shift) |
 		  GEN7_PS_ATTRIBUTE_ENABLE |
 		  GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
diff --git a/src/i965_video.c b/src/i965_video.c
index bcd6063..bba282d 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1625,6 +1625,11 @@ static void
 gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
+	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+
+	if (IS_HSW(intel)) {
+		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+	}
 
 	/* disable WM constant buffer */
 	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
@@ -1658,7 +1663,7 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 
 	OUT_BATCH(0); /* scratch space base offset */
 	OUT_BATCH(
-		((48 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+		((48 - 1) << max_threads_shift) |
 		GEN7_PS_ATTRIBUTE_ENABLE |
 		GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH(

commit ce4421e175ceb9259208c7c223af8d66282c3db3
Author: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
Date:   Fri May 4 17:09:19 2012 +0200

    uxa: use at least 64 URB entries for Haswell
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

diff --git a/src/i965_3d.c b/src/i965_3d.c
index d4d38e5..a18db12 100644
--- a/src/i965_3d.c
+++ b/src/i965_3d.c
@@ -104,12 +104,17 @@ gen6_upload_urb(intel_screen_private *intel)
 void
 gen7_upload_urb(intel_screen_private *intel)
 {
+	unsigned int num_urb_entries = 32;
+
+	if (IS_HSW(intel))
+		num_urb_entries = 64;
+
 	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
 	OUT_BATCH(8); /* in 1KBs */
 
 	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
 	OUT_BATCH(
-		(32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
+		(num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
 		(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
 		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 

commit 8c880aa34c522b0d67cbb932771f00c947d00dec
Author: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
Date:   Fri May 4 17:43:19 2012 +0200

    uxa: add IS_HSW() macro to distinguish Haswell from Ivybridge
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>

diff --git a/src/intel_driver.h b/src/intel_driver.h
index 31c11f6..d88f225 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -218,6 +218,7 @@
 #define IS_GEN5(intel) IS_GENx(intel, 5)
 #define IS_GEN6(intel) IS_GENx(intel, 6)
 #define IS_GEN7(intel) IS_GENx(intel, 7)
+#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 75)
 
 /* Some chips have specific errata (or limits) that we need to workaround. */
 #define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M)

commit 0c0d1d956a8ba37d9e6f4a5e4f52018c8ce498e5
Author: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
Date:   Fri Aug 3 12:03:00 2012 +0100

    Introduce a chipset identifier for Haswell (Ivybridge successor)
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>

diff --git a/src/intel_module.c b/src/intel_module.c
index e1755ff..7640916 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -98,6 +98,10 @@ static const struct intel_device_info intel_ivybridge_info = {
 	.gen = 70,
 };
 
+static const struct intel_device_info intel_haswell_info = {
+	.gen = 75,
+};
+
 static const SymTabRec _intel_chipsets[] = {
 	{PCI_CHIP_I810,				"i810"},
 	{PCI_CHIP_I810_DC100,			"i810-dc100"},

commit 146959dd5ef28384a3db4fce4bf7840f2b3ec58c
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Aug 1 23:43:15 2012 +0100

    sna: Drop the clear flag as we discard the GPU damage
    
    Hopefully only to keep the sanity checks happy...
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 06c9fcc..496f57b 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -388,6 +388,7 @@ sna_copy_init_blt(struct sna_copy_op *copy,
 static void sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv)
 {
 	sna_damage_destroy(&priv->gpu_damage);
+	priv->clear = false;
 
 	if (priv->gpu_bo && !priv->pinned) {
 		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);

commit 7404e3085b2ee36fa24f77a02d156b4b1d2dff60
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Aug 1 23:37:35 2012 +0100

    sna: Ensure we only mark a clear for a fill on the GPU bo
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 8760f91..06c9fcc 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -9035,7 +9035,6 @@ sna_poly_fill_rect_blt(DrawablePtr drawable,
 						sna_damage_all(damage,
 							       pixmap->drawable.width,
 							       pixmap->drawable.height);
-						sna_pixmap(pixmap)->undamaged = false;
 					} else
 						sna_damage_add_box(damage, &r);
 				}
@@ -9045,12 +9044,19 @@ sna_poly_fill_rect_blt(DrawablePtr drawable,
 				    r.x2 - r.x1 == pixmap->drawable.width &&
 				    r.y2 - r.y1 == pixmap->drawable.height) {
 					struct sna_pixmap *priv = sna_pixmap(pixmap);
+					if (bo == priv->gpu_bo) {
+						sna_damage_all(&priv->gpu_damage,
+							       pixmap->drawable.width,
+							       pixmap->drawable.height);
+						sna_damage_destroy(&priv->cpu_damage);
+						list_del(&priv->list);
+						priv->undamaged = false;
+						priv->clear = true;
+						priv->clear_color = gc->alu == GXcopy ? pixel : 0;
 
-					priv->clear = true;
-					priv->clear_color = gc->alu == GXcopy ? pixel : 0;
-
-					DBG(("%s: pixmap=%ld, marking clear [%08x]\n",
-					     __FUNCTION__, pixmap->drawable.serialNumber, priv->clear_color));
+						DBG(("%s: pixmap=%ld, marking clear [%08x]\n",
+						     __FUNCTION__, pixmap->drawable.serialNumber, priv->clear_color));
+					}
 				}
 			} else
 				success = false;

commit ca46d1c7a18596ea9fe2b0577ccf1d110e3e42ac
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Aug 1 20:20:29 2012 +0100

    sna/gen7: Prefer the BLT for self-copies
    
    Looking at the test results for a third time, gives the edge to the BLT
    again.

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 96eb86a..193de00 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2470,6 +2470,13 @@ try_blt(struct sna *sna,
 	if (can_switch_rings(sna)) {
 		if (sna_picture_is_solid(src, NULL))
 			return true;
+
+		if (dst->pDrawable == src->pDrawable)
+			return true;
+
+		if (src->pDrawable &&
+		    get_drawable_pixmap(dst->pDrawable) == get_drawable_pixmap(src->pDrawable))
+			return true;
 	}
 
 	return false;
@@ -3311,7 +3318,8 @@ static inline bool prefer_blt_copy(struct sna *sna,
 }
 
 static inline bool
-overlaps(struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+overlaps(struct sna *sna,
+	 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
 	 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
 	 const BoxRec *box, int n)
 {
@@ -3320,6 +3328,9 @@ overlaps(struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
 	if (src_bo != dst_bo)
 		return false;
 
+	if (can_switch_rings(sna))
+		return true;
+
 	extents = box[0];
 	while (--n) {
 		box++;
@@ -3352,7 +3363,8 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
 	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
 	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
 	     src_bo == dst_bo,
-	     overlaps(src_bo, src_dx, src_dy,
+	     overlaps(sna,
+		      src_bo, src_dx, src_dy,
 		      dst_bo, dst_dx, dst_dy,
 		      box, n)));
 
@@ -3391,7 +3403,8 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
 	}
 
 	if (!(alu == GXcopy || alu == GXclear) ||
-	    overlaps(src_bo, src_dx, src_dy,
+	    overlaps(sna,
+		     src_bo, src_dx, src_dy,
 		     dst_bo, dst_dx, dst_dy,
 		     box, n)) {
 fallback_blt:

commit e4a3cd3d16447b5d83d1c8c63c342f1240935267
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Aug 1 17:37:33 2012 +0100

    sna: Add validation of the clear flag to pixmap debugging
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index a4287f7..8760f91 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -314,6 +314,11 @@ static void assert_pixmap_damage(PixmapPtr p)
 	if (priv == NULL)
 		return;
 
+	if (priv->clear) {
+		assert(DAMAGE_IS_ALL(priv->gpu_damage));
+		assert(priv->cpu_damage == NULL);
+	}
+
 	if (DAMAGE_IS_ALL(priv->gpu_damage) && DAMAGE_IS_ALL(priv->cpu_damage)) {
 		/* special upload buffer */
 		assert(priv->gpu_bo && priv->gpu_bo->proxy);

commit eaeda34bef711cc566f51dee092a19a3c4ac1a16
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Aug 1 16:23:05 2012 +0100

    sna: Fix computation of st values for SIMD8 dispatch
    
    Fixes regression with enabling 8-pixels.
    
    Reported-by: Mehran Kholdi <semekh.dev@gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=53044
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c
index bd4003d..f54e55e 100644
--- a/src/sna/brw/brw_wm.c
+++ b/src/sna/brw/brw_wm.c
@@ -42,12 +42,11 @@ static void brw_wm_affine_st(struct brw_compile *p, int dw,
 	if (dw == 16) {
 		brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 		uv = p->gen >= 60 ? 6 : 3;
-		uv += 2*channel;
 	} else {
 		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 		uv = p->gen >= 60 ? 4 : 3;
-		uv += channel;
 	}
+	uv += 2*channel;
 
 	msg++;
 	if (p->gen >= 60) {
@@ -462,9 +461,6 @@ brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
 {
 	int src, mask;
 
-	if (dispatch == 8)
-		return false; /* XXX sampler alpha retuns all 0 */
-
 	if (p->gen < 60)


Reply to: