[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'upstream-experimental'



 NEWS                                                 |   63 +
 configure.ac                                         |    9 
 man/intel.man                                        |    6 
 src/Makefile.am                                      |    5 
 src/brw_structs.h                                    |  100 ++
 src/common.h                                         |    7 
 src/i830_reg.h                                       |    2 
 src/i830_render.c                                    |    2 
 src/i965_reg.h                                       |   98 ++
 src/i965_render.c                                    |  789 ++++++++++++++++--
 src/i965_video.c                                     |  794 ++++++++++++++++---
 src/intel.h                                          |   31 
 src/intel_batchbuffer.c                              |   47 -
 src/intel_batchbuffer.h                              |   13 
 src/intel_display.c                                  |    8 
 src/intel_dri.c                                      |  247 ++++-
 src/intel_driver.c                                   |  206 +++-
 src/intel_driver.h                                   |   78 -
 src/intel_hwmc.c                                     |   14 
 src/intel_memory.c                                   |   40 
 src/intel_module.c                                   |  113 ++
 src/intel_shadow.c                                   |  204 ++++
 src/intel_uxa.c                                      |  334 +------
 src/intel_video.c                                    |   53 -
 src/intel_video.h                                    |    7 
 src/legacy/i810/i810.h                               |    3 
 src/legacy/i810/i810_dri.c                           |    4 
 src/render_program/Makefile.am                       |   45 +
 src/render_program/exa_wm_ca.g6a                     |    1 
 src/render_program/exa_wm_ca.g6b                     |    4 
 src/render_program/exa_wm_ca_srcalpha.g6a            |    1 
 src/render_program/exa_wm_ca_srcalpha.g6b            |    4 
 src/render_program/exa_wm_mask_affine.g6a            |   47 +
 src/render_program/exa_wm_mask_affine.g6b            |    4 
 src/render_program/exa_wm_mask_projective.g6a        |   63 +
 src/render_program/exa_wm_mask_projective.g6b        |   12 
 src/render_program/exa_wm_mask_sample_a.g4a          |    3 
 src/render_program/exa_wm_mask_sample_a.g4b          |    3 
 src/render_program/exa_wm_mask_sample_a.g4b.gen5     |    3 
 src/render_program/exa_wm_mask_sample_a.g6a          |    1 
 src/render_program/exa_wm_mask_sample_a.g6b          |    3 
 src/render_program/exa_wm_mask_sample_argb.g4a       |    3 
 src/render_program/exa_wm_mask_sample_argb.g4b       |    3 
 src/render_program/exa_wm_mask_sample_argb.g4b.gen5  |    3 
 src/render_program/exa_wm_mask_sample_argb.g6a       |    1 
 src/render_program/exa_wm_mask_sample_argb.g6b       |    3 
 src/render_program/exa_wm_noca.g6a                   |    1 
 src/render_program/exa_wm_noca.g6b                   |    4 
 src/render_program/exa_wm_src_affine.g6a             |   47 +
 src/render_program/exa_wm_src_affine.g6b             |    4 
 src/render_program/exa_wm_src_projective.g6a         |   63 +
 src/render_program/exa_wm_src_projective.g6b         |   12 
 src/render_program/exa_wm_src_sample_a.g4a           |    3 
 src/render_program/exa_wm_src_sample_a.g4b           |    3 
 src/render_program/exa_wm_src_sample_a.g4b.gen5      |    3 
 src/render_program/exa_wm_src_sample_a.g6a           |    1 
 src/render_program/exa_wm_src_sample_a.g6b           |    3 
 src/render_program/exa_wm_src_sample_argb.g4a        |    3 
 src/render_program/exa_wm_src_sample_argb.g4b        |    3 
 src/render_program/exa_wm_src_sample_argb.g4b.gen5   |    3 
 src/render_program/exa_wm_src_sample_argb.g6a        |    1 
 src/render_program/exa_wm_src_sample_argb.g6b        |    3 
 src/render_program/exa_wm_src_sample_planar.g4a      |    7 
 src/render_program/exa_wm_src_sample_planar.g4b      |    7 
 src/render_program/exa_wm_src_sample_planar.g4b.gen5 |    7 
 src/render_program/exa_wm_src_sample_planar.g6a      |    1 
 src/render_program/exa_wm_src_sample_planar.g6b      |    5 
 src/render_program/exa_wm_write.g6a                  |   77 +
 src/render_program/exa_wm_write.g6b                  |   17 
 src/render_program/exa_wm_yuv_rgb.g6a                |    1 
 src/render_program/exa_wm_yuv_rgb.g6b                |   12 
 uxa/uxa-glyphs.c                                     |    7 
 uxa/uxa.c                                            |    3 
 73 files changed, 3083 insertions(+), 702 deletions(-)

New commits:
commit 861ee17ae662ffec2269292f92da7833df508b1a
Author: Carl Worth <cworth@cworth.org>
Date:   Fri Nov 5 12:55:02 2010 -0700

    Update version number to 2.13.901
    
    In preparation for a snapshot.

diff --git a/configure.ac b/configure.ac
index be9b473..054ab12 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.13.0],
+        [2.13.901],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit aaa436b437839a80d340cc5ad598ec4b47ed2eff
Author: Carl Worth <cworth@cworth.org>
Date:   Fri Nov 5 12:54:16 2010 -0700

    NEWS: Add release notes for 2.13.901 snapshot.
    
    An intermediate snapshot to capture recent developments.

diff --git a/NEWS b/NEWS
index 05d3849..db7778d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+Snapshot 2.13.901 (2010-11-05)
+==============================
+This is an intermediate snapshot of ongoing driver development. The
+primary purpose of this snapshot is to capture some recent
+improvements, (particularly in Sandybridge support), for further
+testing.
+
 Release 2.13.0 (2010-09-30)
 ===========================
 We are pleased to announce this major release of the xf86-video-intel

commit a44a63d2ff6c01c3dc61de6f736dd441ddd25e52
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Nov 5 09:58:45 2010 +0000

    Wait for any pending rendering before switching modes.
    
    A perennial problem we have is the accursed WAIT_FOR_EVENT hangs, which
    occur when we switch the framebuffer before the WAIT_FOR_EVENT completes
    and upsets the GPU.
    
    We have tried more subtle approaches to detected these and fix them up in
    the kernel, to no avail. What we need to do is to delay the framebuffer
    flip until the WAIT completes, which is quite tricky in the kernel
    without new ioctls and round-trips. Instead, apply the big hammer from
    userspace and synchronise all rendering before changing the framebuffer.
    I expect this not to cause noticeable latency on switching modes (far
    less than the actual modeswitch) and should stop these hangs once and
    for all.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=31401 (...)
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index fec5281..9f8ca6f 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -283,6 +283,9 @@ void intel_batch_wait_last(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
+	if (intel->last_batch_bo == NULL)
+		return;
+
 	/* Map it CPU write, which guarantees it's done.  This is a completely
 	 * non performance path, so we don't need anything better.
 	 */
diff --git a/src/intel_display.c b/src/intel_display.c
index d32224e..7eef80b 100644
--- a/src/intel_display.c
+++ b/src/intel_display.c
@@ -359,6 +359,8 @@ intel_crtc_apply(xf86CrtcPtr crtc)
 			       crtc->gamma_blue, crtc->gamma_size);
 #endif
 
+	/* drain any pending waits on the current framebuffer */
+	intel_batch_wait_last(crtc->scrn);
 
 	x = crtc->x;
 	y = crtc->y;

commit 52b32436b9e14a3e13818f80102150ff5bc3c002
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Nov 3 19:42:26 2010 +0000

    Downgrade tiling allocation failure to a warning
    
    We emitted this message as an error even though we fallback and attempt
    to allocate a non-tiled framebuffer before failing (with an appropriate
    error message).
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_memory.c b/src/intel_memory.c
index 4daf036..e9ea58d 100644
--- a/src/intel_memory.c
+++ b/src/intel_memory.c
@@ -232,7 +232,7 @@ retry:
 	}
 
 	if (intel->tiling && tiling_mode != I915_TILING_X) {
-		xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+		xf86DrvMsg(scrn->scrnIndex, X_WARNING,
 			   "Failed to set tiling on frontbuffer.\n");
 	}
 

commit 18839aaec505f8bbdb0690fe694162bf09a87d5c
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Nov 3 18:14:29 2010 +0000

    Fallback to shadow for Sandybridge if we don't have access to the BLT
    
    If we attempt to emit BLT batches without kernel support, we just end up
    with EINVAL and no rendering. Prevent this, and avoid uncached
    rendering, by restoring the shadow fallback paths if there is no BLT
    support.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_driver.c b/src/intel_driver.c
index fd51899..926c7e3 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -468,6 +468,24 @@ static void I830XvInit(ScrnInfoPtr scrn)
 		   intel->colorKey);
 }
 
+static Bool can_accelerate_2d(struct intel_screen_private *intel)
+{
+	if (INTEL_INFO(intel)->gen >= 60) {
+		drm_i915_getparam_t gp;
+		int value;
+
+		/* On Sandybridge we need the BLT in order to do anything since
+		 * it so frequently used in the acceleration code paths.
+		 */
+		gp.value = &value;
+		gp.param = I915_PARAM_HAS_BLT;
+		if (drmIoctl(intel->drmSubFD, DRM_IOCTL_I915_GETPARAM, &gp))
+			return FALSE;
+	}
+
+	return TRUE;
+}
+
 /**
  * This is called before ScreenInit to do any require probing of screen
  * configuration.
@@ -575,6 +593,8 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 	}
 
 	intel->use_shadow = FALSE;
+	if (!can_accelerate_2d(intel))
+		intel->use_shadow = TRUE;
 
 	if (xf86IsOptionSet(intel->Options, OPTION_SHADOW)) {
 		intel->use_shadow =

commit 8ff37667bf864b771d16a58fc5041cb48408b6a8
Author: Eric Anholt <eric@anholt.net>
Date:   Tue Nov 2 10:36:03 2010 -0700

    Remove the intermittent GEM_THROTTLE call.
    
    This is a holdover from early GEM work when we weren't syncing on the
    DRI client side.  It would keep clients from getting too far ahead and
    killing their interactivity, by bringing everyone to a halt when
    anyone was too far ahead.
    
    Now, GL clients throttle themselves to avoid the problem, and it turns
    out that in the case that they don't (long rendering to buffers with
    no swap), this actually reduces X Server interactivity: instead of
    lagging of X rendering behind input, you get no response for seconds
    at a time, then a burst of rendering, then nothing again.
    
    Reported by ajax.  Tested with moving a window while running
    cairo-perf-trace on the GL backend (improvement) and X backend (no
    significant change in responsiveness).

diff --git a/src/intel_driver.c b/src/intel_driver.c
index 01c8c1b..fd51899 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -688,9 +688,6 @@ I830BlockHandler(int i, pointer blockData, pointer pTimeout, pointer pReadmask)
 	intel->BlockHandler = screen->BlockHandler;
 	screen->BlockHandler = I830BlockHandler;
 
-	if (scrn->vtSema == TRUE)
-		drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE);
-
 	intel_uxa_block_handler(intel);
 	intel_video_block_handler(intel);
 }

commit 540c5742186c26c3aeccb7b5d3ff0f374722a20c
Author: Xiang, Haihao <haihao.xiang@intel.com>
Date:   Tue Nov 2 11:05:32 2010 +0800

    render: use headerless render target write
    
    It is weird that some rendercheck cases only work fine with headerless write.
    Need to update intel-gen4asm to support headerless write
    
    Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>

diff --git a/src/render_program/exa_wm_write.g6a b/src/render_program/exa_wm_write.g6a
index 27f91b5..c0f3cc1 100644
--- a/src/render_program/exa_wm_write.g6a
+++ b/src/render_program/exa_wm_write.g6a
@@ -36,6 +36,7 @@ define(`slot_b_00',     `m6')
 define(`slot_b_01',     `m7')
 define(`slot_a_00',     `m8')
 define(`slot_a_01',     `m9')
+define(`data_port_msg_2_ind',	`2')
 
 mov (8) slot_r_00<1>F     src_sample_r_01<8,8,1>F { align1 };
 mov (8) slot_r_01<1>F     src_sample_r_23<8,8,1>F { align1 };
@@ -49,22 +50,19 @@ mov (8) slot_b_01<1>F     src_sample_b_23<8,8,1>F { align1 };
 mov (8) slot_a_00<1>F     src_sample_a_01<8,8,1>F { align1 };
 mov (8) slot_a_01<1>F     src_sample_a_23<8,8,1>F { align1 };
 
-/* pass payload in m0-m1 */
-mov (8) data_port_msg_0<1>UD    g0<8,8,1>UD { align1 };
-mov (8) data_port_msg_1<1>UD    g1<8,8,1>UD { align1 };
-
 /* write */
 send (16) 
-	data_port_msg_0_ind 
+	data_port_msg_2_ind 
 	acc0<1>UW 
 	null
 	write (
 	       0,  /* binding_table */
 	       16,  /* pixel scordboard clear, msg type simd16 single source */
 	       12,  /* render target write */
-	       0   /* no write commit message */
+	       0,   /* no write commit message */
+	       0  /* headerless render target write */
 	) 
-	mlen 10
+	mlen 8
 	rlen 0
 	{ align1 EOT };
 
diff --git a/src/render_program/exa_wm_write.g6b b/src/render_program/exa_wm_write.g6b
index 9db2129..3cb6bff 100644
--- a/src/render_program/exa_wm_write.g6b
+++ b/src/render_program/exa_wm_write.g6b
@@ -6,9 +6,7 @@
    { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
    { 0x00600001, 0x210003be, 0x008d0280, 0x00000000 },
    { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
-   { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
-   { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x05800031, 0x24001cc8, 0x00000000, 0x94099000 },
+   { 0x05800031, 0x24001cc8, 0x00000040, 0x90019000 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },

commit 7a3109312e43ce4c2f600a65fe6bc821a8f0ebde
Author: Xiang, Haihao <haihao.xiang@intel.com>
Date:   Wed Oct 27 16:51:28 2010 +0800

    render: acceleration for composite on Sandybridge
    
    Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>

diff --git a/src/i965_render.c b/src/i965_render.c
index 885889e..e2b67c3 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -208,14 +208,8 @@ i965_check_composite(int op,
 		     int width, int height)
 {
 	ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum];
-	intel_screen_private *intel = intel_get_screen_private(scrn);
 	uint32_t tmp1;
 
-	if (IS_GEN6(intel)) {
-		intel_debug_fallback(scrn, "Unsupported hardware\n");
-		return FALSE;
-	}
-
 	/* Check for unsupported compositing operations. */
 	if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) {
 		intel_debug_fallback(scrn,
@@ -522,6 +516,73 @@ static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = {
 #include "exa_wm_write.g4b.gen5"
 };
 
+/* programs for GEN6 */
+static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca.g4b.gen5"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_a.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_a.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_a.g6b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_a.g6b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g6b"
+};
+
 #define WM_STATE_DECL(kernel) \
     struct brw_wm_unit_state wm_state_ ## kernel[SAMPLER_STATE_FILTER_COUNT] \
 						[SAMPLER_STATE_EXTEND_COUNT] \
@@ -607,6 +668,25 @@ static struct wm_kernel_info wm_kernels_gen5[] = {
 	       ps_kernel_masknoca_projective_static_gen5, TRUE),
 };
 
+static struct wm_kernel_info wm_kernels_gen6[] = {
+	KERNEL(WM_KERNEL_NOMASK_AFFINE,
+	       ps_kernel_nomask_affine_static_gen6, FALSE),
+	KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
+	       ps_kernel_nomask_projective_static_gen6, FALSE),
+	KERNEL(WM_KERNEL_MASKCA_AFFINE,
+	       ps_kernel_maskca_affine_static_gen6, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
+	       ps_kernel_maskca_projective_static_gen6, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
+	       ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+	       ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE),
+	KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
+	       ps_kernel_masknoca_affine_static_gen6, TRUE),
+	KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
+	       ps_kernel_masknoca_projective_static_gen6, TRUE),
+};
+
 #undef KERNEL
 
 typedef struct _brw_cc_unit_state_padded {
@@ -656,12 +736,22 @@ struct gen4_render_state {
 	drm_intel_bo *sip_kernel_bo;
 	dri_bo *vertex_buffer_bo;
 
+	drm_intel_bo *cc_vp_bo;
+	drm_intel_bo *gen6_blend_bo;
+	drm_intel_bo *gen6_depth_stencil_bo;
+	drm_intel_bo *ps_sampler_state_bo[SAMPLER_STATE_FILTER_COUNT]
+	    [SAMPLER_STATE_EXTEND_COUNT]
+	    [SAMPLER_STATE_FILTER_COUNT]
+	    [SAMPLER_STATE_EXTEND_COUNT];
 	gen4_composite_op composite_op;
 
 	int vb_offset;
 	int vertex_size;
 };
 
+static void gen6_emit_composite_state(ScrnInfoPtr scrn);
+static void gen6_render_state_init(ScrnInfoPtr scrn);
+
 /**
  * Sets up the SF state pointing at an SF kernel.
  *
@@ -1489,9 +1579,27 @@ static Bool i965_composite_check_aperture(ScrnInfoPtr scrn)
 		render_state->cc_state_bo,
 		render_state->sip_kernel_bo,
 	};
-
-	return drm_intel_bufmgr_check_aperture_space(bo_table,
-						     ARRAY_SIZE(bo_table)) == 0;
+	drm_intel_bo *gen6_bo_table[] = {
+		intel->batch_bo,
+		composite_op->surface_state_binding_table_bo,
+		render_state->vertex_buffer_bo,
+		render_state->wm_kernel_bo[composite_op->wm_kernel],
+		render_state->ps_sampler_state_bo[composite_op->src_filter]
+		    [composite_op->src_extend]
+		    [composite_op->mask_filter]
+		    [composite_op->mask_extend],
+		render_state->cc_vp_bo,
+		render_state->cc_state_bo,
+		render_state->gen6_blend_bo,
+		render_state->gen6_depth_stencil_bo,
+	};
+	
+	if (INTEL_INFO(intel)->gen >= 60)
+		return drm_intel_bufmgr_check_aperture_space(gen6_bo_table,
+							ARRAY_SIZE(gen6_bo_table)) == 0;
+	else
+		return drm_intel_bufmgr_check_aperture_space(bo_table,
+							ARRAY_SIZE(bo_table)) == 0;
 }
 
 Bool
@@ -1833,19 +1941,32 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 		intel_batch_submit(scrn, FALSE);
 
 	intel_batch_start_atomic(scrn, 200);
-	if (intel->needs_render_state_emit)
-		i965_emit_composite_state(scrn);
+	if (intel->needs_render_state_emit) {
+		if (INTEL_INFO(intel)->gen >= 60)
+			gen6_emit_composite_state(scrn);
+		else
+			i965_emit_composite_state(scrn);
+	} else {
+		OUT_BATCH(MI_FLUSH);
+	}
 
-	OUT_BATCH(MI_FLUSH);
 	/* Set up the pointer to our (single) vertex buffer */
 	OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
-	OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
-		  VB0_VERTEXDATA |
-		  (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
+
+	if (INTEL_INFO(intel)->gen >= 60) {
+		OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+			GEN6_VB0_VERTEXDATA |
+			(render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
+	} else {
+		OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
+			VB0_VERTEXDATA |
+			(render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
+	}
+
 	OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
 		  render_state->vb_offset * 4);
 
-	if (IS_GEN5(intel))
+	if (INTEL_INFO(intel)->gen >= 50)
 		OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
 			  render_state->vb_offset * 4 + i * 4);
 	else
@@ -1894,6 +2015,9 @@ void gen4_render_state_init(ScrnInfoPtr scrn)
 	drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo;
 	drm_intel_bo *border_color_bo;
 
+	if (INTEL_INFO(intel)->gen >= 60)
+		return gen6_render_state_init(scrn);
+
 	if (intel->gen4_render_state == NULL)
 		intel->gen4_render_state = calloc(sizeof(*render_state), 1);
 
@@ -2031,9 +2155,539 @@ void gen4_render_state_cleanup(ScrnInfoPtr scrn)
 						     wm_state_bo[m][i][j][k]
 						     [l]);
 
+	for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++)
+		for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++)
+			for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++)
+				for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++)
+					drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]);
+
 	drm_intel_bo_unreference(render_state->cc_state_bo);
 	drm_intel_bo_unreference(render_state->sip_kernel_bo);
 
+	drm_intel_bo_unreference(render_state->cc_vp_bo);
+	drm_intel_bo_unreference(render_state->gen6_blend_bo);
+	drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo);
+
 	free(intel->gen4_render_state);
 	intel->gen4_render_state = NULL;
 }
+
+/*
+ * for GEN6+
+ */
+#define GEN6_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen6_blend_state), 64)
+
+static drm_intel_bo *
+gen6_composite_create_cc_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct gen6_color_calc_state *cc_state;
+	drm_intel_bo *cc_bo;
+
+	cc_bo = drm_intel_bo_alloc(intel->bufmgr,
+				"gen6 CC state",
+				sizeof(*cc_state), 
+				4096);
+	drm_intel_bo_map(cc_bo, TRUE);
+	cc_state = cc_bo->virtual;
+	memset(cc_state, 0, sizeof(*cc_state));
+	cc_state->constant_r = 1.0;
+	cc_state->constant_g = 0.0;
+	cc_state->constant_b = 1.0;
+	cc_state->constant_a = 1.0;
+	drm_intel_bo_unmap(cc_bo);
+
+	return cc_bo;
+}
+
+static drm_intel_bo *
+gen6_composite_create_blend_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct gen6_blend_state *blend_state;
+	drm_intel_bo *blend_bo;
+	int src_blend, dst_blend;
+
+	blend_bo = drm_intel_bo_alloc(intel->bufmgr,
+				"gen6 BLEND state",
+				BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
+				4096);
+	drm_intel_bo_map(blend_bo, TRUE);
+	memset(blend_bo->virtual, 0, blend_bo->size);
+
+	for (src_blend = 0; src_blend < BRW_BLENDFACTOR_COUNT; src_blend++) {
+		for (dst_blend = 0; dst_blend < BRW_BLENDFACTOR_COUNT; dst_blend++) {
+			uint32_t blend_state_offset = ((src_blend * BRW_BLENDFACTOR_COUNT) + dst_blend) * GEN6_BLEND_STATE_PADDED_SIZE;
+
+			blend_state = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset);
+			blend_state->blend0.dest_blend_factor = dst_blend;
+			blend_state->blend0.source_blend_factor = src_blend;
+			blend_state->blend0.blend_func = BRW_BLENDFUNCTION_ADD;
+			blend_state->blend0.ia_blend_enable = 0;
+			blend_state->blend0.blend_enable = 1;
+
+			blend_state->blend1.post_blend_clamp_enable = 1;
+			blend_state->blend1.pre_blend_clamp_enable = 1;
+			blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
+			blend_state->blend1.dither_enable = 0;
+			blend_state->blend1.logic_op_enable = 0;
+			blend_state->blend1.alpha_test_enable = 0;
+		}
+	}
+
+	drm_intel_bo_unmap(blend_bo);
+	return blend_bo;
+}
+
+static drm_intel_bo *
+gen6_composite_create_depth_stencil_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct gen6_depth_stencil_state *depth_stencil_state;
+	drm_intel_bo *depth_stencil_bo;
+
+	depth_stencil_bo = drm_intel_bo_alloc(intel->bufmgr,
+					"gen6 DEPTH_STENCIL state",
+					sizeof(*depth_stencil_state),
+					4096);
+	drm_intel_bo_map(depth_stencil_bo, TRUE);
+	depth_stencil_state = depth_stencil_bo->virtual;
+	memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
+	drm_intel_bo_unmap(depth_stencil_bo);
+
+	return depth_stencil_bo;
+}
+
+static void
+gen6_composite_invarient_states(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(MI_FLUSH |
+		MI_STATE_INSTRUCTION_CACHE_FLUSH |
+		BRW_MI_GLOBAL_SNAPSHOT_RESET);
+	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
+	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+		GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+	OUT_BATCH(1);
+
+	/* Set system instruction pointer */
+	OUT_BATCH(BRW_STATE_SIP | 0);
+	OUT_BATCH(0);
+}
+
+static void
+gen6_composite_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
+	OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+	OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+}
+
+static void
+gen6_composite_viewport_state_pointers(ScrnInfoPtr scrn, drm_intel_bo *cc_vp_bo)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+		GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+		(4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_composite_urb(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
+	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+		(24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+		(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+static void
+gen6_composite_cc_state_pointers(ScrnInfoPtr scrn,
+				drm_intel_bo *blend_state_bo,
+				uint32_t blend_state_offset,
+				drm_intel_bo *depth_stencil_state_bo,
+				uint32_t depth_stencil_state_offset,
+				drm_intel_bo *cc_state_bo,
+				uint32_t cc_state_offset)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+	OUT_RELOC(blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, blend_state_offset | 1);
+	OUT_RELOC(depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, depth_stencil_state_offset | 1);
+	OUT_RELOC(cc_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, cc_state_offset | 1);
+}
+
+static void
+gen6_composite_sampler_state_pointers(ScrnInfoPtr scrn, drm_intel_bo *ps_sampler_state_bo)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+		GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+		(4 - 2));
+	OUT_BATCH(0); /* VS */
+	OUT_BATCH(0); /* GS */
+	OUT_RELOC(ps_sampler_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void 
+gen6_composite_vs_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	/* disable VS constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	
+	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
+	OUT_BATCH(0); /* without VS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+}
+
+static void 
+gen6_composite_gs_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	/* disable GS constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	
+	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+	OUT_BATCH(0); /* without GS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+}
+
+static void 
+gen6_composite_clip_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+	OUT_BATCH(0);
+}
+
+static void 
+gen6_composite_sf_state(ScrnInfoPtr scrn, Bool has_mask)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	int num_sf_outputs = has_mask ? 2 : 1;
+
+	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
+	OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW9 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW14 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW19 */
+}
+
+static void 
+gen6_composite_wm_state(ScrnInfoPtr scrn, Bool has_mask, drm_intel_bo *kernel_bo)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	int num_surfaces = has_mask ? 3 : 2;
+	int num_sf_outputs = has_mask ? 2 : 1;
+
+	/* disable WM constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
+	OUT_RELOC(kernel_bo,
+		I915_GEM_DOMAIN_INSTRUCTION, 0,
+		0);
+	OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+		(num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
+	OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+		GEN6_3DSTATE_WM_DISPATCH_ENABLE |
+		GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+	OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
+		GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+static void
+gen6_composite_binding_table_pointers(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	/* Binding table pointers */
+	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
+		GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
+		(4 - 2));
+	OUT_BATCH(0);		/* vs */
+	OUT_BATCH(0);		/* gs */
+	/* Only the PS uses the binding table */
+	OUT_BATCH(PS_BINDING_TABLE_OFFSET);
+}
+
+static void
+gen6_composite_depth_buffer_state(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
+	OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
+		(BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
+	OUT_BATCH(0);
+}
+
+static void
+gen6_composite_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr dest)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
+	OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
+	OUT_BATCH(0x00000000);	/* ymin, xmin */
+	OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | DRAW_XMAX(dest->drawable.width - 1));	/* ymax, xmax */
+	OUT_BATCH(0x00000000);	/* yorigin, xorigin */
+}
+
+static void
+gen6_composite_vertex_element_state(ScrnInfoPtr scrn, Bool has_mask, Bool is_affine)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct gen4_render_state *render_state = intel->gen4_render_state;
+	/*
+	 * vertex data in vertex buffer
+	 *    position: (x, y)
+	 *    texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
+	 *    texture coordinate 1 if (has_mask is TRUE): same as above
+	 */
+	int nelem = has_mask ? 2 : 1;
+	int selem = is_affine ? 2 : 3;
+	uint32_t w_component;
+	uint32_t src_format;
+
+	render_state->vertex_size = 4 * (2 + nelem * selem);
+
+	if (is_affine) {
+		src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
+		w_component = BRW_VFCOMPONENT_STORE_1_FLT;
+	} else {
+		src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+		w_component = BRW_VFCOMPONENT_STORE_SRC;
+	}
+
+	/* The VUE layout
+	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
+	 *    dword 4-7: position (x, y, 1.0, 1.0),
+	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
+	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
+	 *
+	 * dword 4-15 are fetched from vertex buffer
+	 */
+	OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
+		((2 * (2 + nelem)) + 1 - 2));
+	
+	OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+		GEN6_VE0_VALID | 
+		(BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 
+		(0 << VE0_OFFSET_SHIFT));
+	OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
+
+	/* x,y */
+	OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+		GEN6_VE0_VALID |
+		(BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+		(0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
+	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+		(BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+
+	/* u0, v0, w0 */
+	OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |


Reply to: