[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-intel: Changes to 'upstream-unstable'



 NEWS                    |   53 +
 configure.ac            |    2 
 man/intel.man           |    9 
 src/brw_structs.h       |    5 
 src/i915_render.c       |   30 -
 src/i965_reg.h          |    1 
 src/i965_render.c       | 1320 ++++++++++++++++++++++++------------------------
 src/i965_video.c        |    6 
 src/intel.h             |   25 
 src/intel_batchbuffer.c |   24 
 src/intel_batchbuffer.h |    8 
 src/intel_display.c     |    4 
 src/intel_dri.c         |   24 
 src/intel_driver.c      |   55 +-
 src/intel_memory.c      |    7 
 src/intel_module.c      |    2 
 src/intel_uxa.c         |   46 +
 src/intel_video.c       |    4 
 18 files changed, 883 insertions(+), 742 deletions(-)

New commits:
commit c9fb69cb2502917dfb2828c90802de7766072899
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Apr 17 10:42:05 2011 +0100

    i965/video: We need 150 dwords of space for video state emission
    
    (Actually around 131, with additional 10% just for safety.)
    
    Reported-by: Modestas Vainius <geromanas@mailas.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36319
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i965_video.c b/src/i965_video.c
index c757681..53a9394 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1210,7 +1210,7 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
 			intel_batch_submit(scrn);
 		}
 
-		intel_batch_start_atomic(scrn, 100);
+		intel_batch_start_atomic(scrn, 150);
 
 		i965_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap);
 

commit a51cd83d25f2f9f2107219d5671194f931601244
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Apr 17 10:36:26 2011 +0100

    intel: Beware the unsigned promotion when checking for batch overflows
    
    Reported-by: Modestas Vainius <geromanas@mailas.com>
    References: https://bugs.freedesktop.org/show_bug.cgi?id=36319
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 605932a..2403a38 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -50,14 +50,14 @@ static inline int intel_vertex_space(intel_screen_private *intel)
 }
 
 static inline void
-intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, unsigned int sz)
+intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, int sz)
 {
 	assert(sz < intel->batch_bo->size - 8);
 	if (intel_batch_space(intel) < sz)
 		intel_batch_submit(scrn);
 }
 
-static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz)
+static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, int sz)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 

commit 030aa3d13643a0acb7126fa1fa6c5a8eac134639
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Apr 14 10:25:21 2011 +0100

    NEWS: typo.
    
    Spotted too late...
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/NEWS b/NEWS
index ea92bcf..bf55b4b 100644
--- a/NEWS
+++ b/NEWS
@@ -4,7 +4,7 @@ We are pleased to announce this major release of the xf86-video-intel
 driver, roughly on schedule at 3 months since 2.14.0. With the many bug
 fixes in this release, we encourage everyone to upgrade to 2.14.
 
-The priority for this quarter has been simply to unexciting and stabilise
+The priority for this quarter has been simply to be unexciting and stabilise
 the driver further, seeking to capitalise upon the improvements elsewhere
 in the stack.
 

commit 0e425b30e12b11dbf51eea1d3366e3372f8b93af
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Apr 14 10:16:03 2011 +0100

    configure,NEWS: 2.15.0 release
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/NEWS b/NEWS
index d292704..ea92bcf 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,30 @@
+Release 2.15.0 (2011-04-14)
+==============================
+We are pleased to announce this major release of the xf86-video-intel
+driver, roughly on schedule at 3 months since 2.14.0. With the many bug
+fixes in this release, we encourage everyone to upgrade to 2.14.
+
+The priority for this quarter has been simply to unexciting and stabilise
+the driver further, seeking to capitalise upon the improvements elsewhere
+in the stack.
+
+Bugs fixed in this snapshot (compared to 2.14.903)
+--------------------------------------------------
+
+* Turn off relaxed fencing by default for older chipsets
+  This was continuing to destabilize those system, so for the release
+  we disabled the feature. If you wish to help us debug this, you can
+  re-enable the optimisation with Option "RelaxedFencing" "True".
+  Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36147
+
+* Build fix for xserver-1.7.7
+
+* KDE glitches on SNB
+  [Technically fixed in the previous snapshot, but I'm really pleased
+   that this got fixed in time for the release!]
+  https://bugs.freedesktop.org/show_bug.cgi?id=35808
+
+
 Snapshot 2.14.903 (2011-04-11)
 ==============================
 This is the third release candidate in preparation for the upcoming
diff --git a/configure.ac b/configure.ac
index bc9a868..fd5a3cf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.63])
 AC_INIT([xf86-video-intel],
-        [2.14.903],
+        [2.15.0],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 686018f283f1d131073ef5917213e6a8ac013f26
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Tue Apr 12 08:23:04 2011 +0100

    Turn relaxed-fencing off by default for older (pre-G33) chipsets
    
    There are still too many unresolved bugs, typically GPU hangs, that are
    related to using relaxed fencing (i.e. only allocating the minimal
    amount of memory required for a buffer) on older hardware, so turn off
    the feature by default for the release.
    
    Reported-and-tested-by: Knut Petersen <Knut_Petersen@t-online.de>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36147
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
    Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

diff --git a/man/intel.man b/man/intel.man
index 4fd0ce7..85e2b2e 100644
--- a/man/intel.man
+++ b/man/intel.man
@@ -195,6 +195,15 @@ you may wish to override this and force a linear layout.
 .IP
 Default: disabled
 .TP
+.BI "Option \*qRelaxedFencing\*q \*q" boolean \*q
+This option controls whether we attempt to allocate the minimal amount of
+memory required for the buffers. The reduction in working set has a substantial
+improvement on system performance. However, this has been demonstrate to be
+buggy on older hardware (845-865 and 915-945, but ok on PineView and later)
+so on those chipsets defaults to off.
+.IP
+Default: Enabled for G33 (includes PineView), and later, class machines.
+.TP
 .BI "Option \*qXvMC\*q \*q" boolean \*q
 Enable XvMC driver. Current support MPEG2 MC on 915/945 and G33 series.
 User should provide absolute path to libIntelXvMC.so in XvMCConfig file.
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 1b0d740..e867351 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -101,6 +101,7 @@ typedef enum {
    OPTION_DEBUG_FLUSH_CACHES,
    OPTION_DEBUG_WAIT,
    OPTION_HOTPLUG,
+   OPTION_RELAXED_FENCING,
 } I830Opts;
 
 static OptionInfoRec I830Options[] = {
@@ -121,6 +122,7 @@ static OptionInfoRec I830Options[] = {
    {OPTION_DEBUG_FLUSH_CACHES, "DebugFlushCaches", OPTV_BOOLEAN, {0}, FALSE},
    {OPTION_DEBUG_WAIT, "DebugWait", OPTV_BOOLEAN, {0}, FALSE},
    {OPTION_HOTPLUG,	"HotPlug",	OPTV_BOOLEAN,	{0},	TRUE},
+   {OPTION_RELAXED_FENCING,	"RelaxedFencing",	OPTV_BOOLEAN,	{0},	TRUE},
    {-1,			NULL,		OPTV_NONE,	{0},	FALSE}
 };
 /* *INDENT-ON* */
@@ -448,23 +450,33 @@ static void I830XvInit(ScrnInfoPtr scrn)
 		   intel->colorKey);
 }
 
-static Bool has_kernel_flush(struct intel_screen_private *intel)
+static Bool drm_has_boolean_param(struct intel_screen_private *intel,
+				  int param)
 {
 	drm_i915_getparam_t gp;
 	int value;
 
-	/* The BLT ring was introduced at the same time as the
-	 * automatic flush for the busy-ioctl.
-	 */
-
 	gp.value = &value;
-	gp.param = I915_PARAM_HAS_BLT;
+	gp.param = param;
 	if (drmIoctl(intel->drmSubFD, DRM_IOCTL_I915_GETPARAM, &gp))
 		return FALSE;
 
 	return value;
 }
 
+static Bool has_kernel_flush(struct intel_screen_private *intel)
+{
+	/* The BLT ring was introduced at the same time as the
+	 * automatic flush for the busy-ioctl.
+	 */
+	return drm_has_boolean_param(intel, I915_PARAM_HAS_BLT);
+}
+
+static Bool has_relaxed_fencing(struct intel_screen_private *intel)
+{
+	return drm_has_boolean_param(intel, I915_PARAM_HAS_RELAXED_FENCING);
+}
+
 static Bool can_accelerate_blt(struct intel_screen_private *intel)
 {
 	if (0 && (IS_I830(intel) || IS_845G(intel))) {
@@ -630,6 +642,18 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 			   " 2D GPU acceleration disabled.\n");
 	}
 
+	intel->has_relaxed_fencing =
+		xf86ReturnOptValBool(intel->Options,
+				     OPTION_RELAXED_FENCING,
+				     INTEL_INFO(intel)->gen >= 33);
+	/* And override the user if there is no kernel support */
+	if (intel->has_relaxed_fencing)
+		intel->has_relaxed_fencing = has_relaxed_fencing(intel);
+
+	xf86DrvMsg(scrn->scrnIndex, X_CONFIG,
+		   "Relaxed fencing %s\n",
+		   intel->has_relaxed_fencing ? "enabled" : "disabled");
+
 	/* SwapBuffers delays to avoid tearing */
 	intel->swapbuffers_wait = xf86ReturnOptValBool(intel->Options,
 						       OPTION_SWAPBUFFERS_WAIT,
diff --git a/src/intel_memory.c b/src/intel_memory.c
index 64dfd8e..763a6ad 100644
--- a/src/intel_memory.c
+++ b/src/intel_memory.c
@@ -294,8 +294,6 @@ void intel_set_gem_max_sizes(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 	struct drm_i915_gem_get_aperture aperture;
-	drm_i915_getparam_t gp;
-	int ret, value;
 
 	aperture.aper_available_size = 0;
 	drmIoctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
@@ -303,9 +301,4 @@ void intel_set_gem_max_sizes(ScrnInfoPtr scrn)
 	intel_set_max_bo_size(intel, &aperture);
 	intel_set_max_gtt_map_size(intel, &aperture);
 	intel_set_max_tiling_size(intel, &aperture);
-
-	gp.value = &value;
-	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
-	ret = drmIoctl(intel->drmSubFD, DRM_IOCTL_I915_GETPARAM, &gp);
-	intel->has_relaxed_fencing = ret == 0;
 }

commit 3d2b79098ccb3fed86be66f619f4ed338741d454
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Apr 11 15:20:16 2011 +0100

    dri: Rearrange code to compile against xorg-server-1.7
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel_dri.c b/src/intel_dri.c
index 720820f..a39b512 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -398,14 +398,6 @@ static void I830DRI2DestroyBuffer(DrawablePtr drawable, DRI2Buffer2Ptr buffer)
 
 #endif
 
-static void I830DRI2ReferenceBuffer(DRI2Buffer2Ptr buffer)
-{
-	if (buffer) {
-		I830DRI2BufferPrivatePtr private = buffer->driverPrivate;
-		private->refcnt++;
-	}
-}
-
 static void
 I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion,
 		   DRI2BufferPtr destBuffer, DRI2BufferPtr sourceBuffer)
@@ -553,6 +545,13 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion,
 
 #if DRI2INFOREC_VERSION >= 4
 
+static void I830DRI2ReferenceBuffer(DRI2Buffer2Ptr buffer)
+{
+	if (buffer) {
+		I830DRI2BufferPrivatePtr private = buffer->driverPrivate;
+		private->refcnt++;
+	}
+}
 
 static int
 I830DRI2DrawablePipe(DrawablePtr pDraw)
@@ -1291,9 +1290,9 @@ out_complete:
 	DRI2WaitMSCComplete(client, draw, target_msc, 0, 0);
 	return TRUE;
 }
-#endif
 
 static int dri2_server_generation;
+#endif
 
 Bool I830DRI2ScreenInit(ScreenPtr screen)
 {
@@ -1321,6 +1320,7 @@ Bool I830DRI2ScreenInit(ScreenPtr screen)
 		return FALSE;
 	}
 
+#if DRI2INFOREC_VERSION >= 4
 	if (serverGeneration != dri2_server_generation) {
 	    dri2_server_generation = serverGeneration;
 	    if (!i830_dri2_register_frame_event_resource_types()) {
@@ -1329,6 +1329,8 @@ Bool I830DRI2ScreenInit(ScreenPtr screen)
 		return FALSE;
 	    }
 	}
+#endif
+
 	intel->deviceName = drmGetDeviceNameFromFd(intel->drmSubFD);
 	memset(&info, '\0', sizeof(info));
 	info.fd = intel->drmSubFD;

commit 4fa35dd5e13a58070220f787fc0678f1c679808d
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Apr 11 10:44:55 2011 +0100

    NEWS: version bump for 2.14.903 snapshot
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/NEWS b/NEWS
index d163097..d292704 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,29 @@
+Snapshot 2.14.903 (2011-04-11)
+==============================
+This is the third release candidate in preparation for the upcoming
+2.15.0 release. We will appreciate any feedback we can get from
+testing of this snapshot.
+
+There was a bit of churn since 2.14.902 as a potential fix for a
+performance regression was tried but had to reverted when it was found to
+cause glitches running Compiz on SandyBridge. Otherwise, there were just a
+couple of fixes for building against old xservers and running on an
+obscure chipset.
+
+Bugs fixed in this snapshot (compared to 2.14.902)
+--------------------------------------------------
+
+* Prevent issuing an invalid scanline wait command
+  https://bugs.freedesktop.org/show_bug.cgi?id=35576
+
+* The 946GZ in not a 945, but a 965.
+  https://bugs.freedesktop.org/show_bug.cgi?id=35854
+
+* Fix tile sizes for gen2 (finally).
+
+* Allow building of recent dri2 changes against old xservers.
+
+
 Snapshot 2.14.902 (2011-03-29)
 ==============================
 This is the second release candidate in preparation for the upcoming
diff --git a/configure.ac b/configure.ac
index b90e071..bc9a868 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.63])
 AC_INIT([xf86-video-intel],
-        [2.14.902],
+        [2.14.903],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])

commit 97e9557619e58ef769eb7cbf1a03fbd52be7f2ed
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Apr 8 13:38:48 2011 +0100

    intel: Restore manual flush for old kernels
    
    Daniel Vetter pointed out that the automagic flush by the kernel for the
    busy-ioctl was only introduced upstream in 2.6.37. So we still need to
    manually emit a flush on old kernels.
    
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/intel.h b/src/intel.h
index 48a7cf9..2b114c3 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -425,6 +425,8 @@ typedef struct intel_screen_private {
 	Bool use_pageflipping;
 	Bool force_fallback;
 	Bool can_blt;
+	Bool has_kernel_flush;
+	Bool needs_flush;
 	Bool use_shadow;
 
 	/* Broken-out options. */
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 95eca43..289ed2b 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -175,6 +175,13 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn)
 	intel_batch_do_flush(scrn);
 }
 
+static Bool intel_batch_needs_flush(intel_screen_private *intel)
+{
+	ScreenPtr screen = intel->scrn->pScreen;
+	PixmapPtr pixmap = screen->GetScreenPixmap(screen);
+	return intel_get_pixmap_private(pixmap)->batch_write;
+}
+
 void intel_batch_submit(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -234,6 +241,8 @@ void intel_batch_submit(ScrnInfoPtr scrn)
 		}
 	}
 
+	intel->needs_flush |= intel_batch_needs_flush(intel);
+
 	while (!list_is_empty(&intel->batch_pixmaps)) {
 		struct intel_pixmap *entry;
 
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 9e01836..1b0d740 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -448,6 +448,23 @@ static void I830XvInit(ScrnInfoPtr scrn)
 		   intel->colorKey);
 }
 
+static Bool has_kernel_flush(struct intel_screen_private *intel)
+{
+	drm_i915_getparam_t gp;
+	int value;
+
+	/* The BLT ring was introduced at the same time as the
+	 * automatic flush for the busy-ioctl.
+	 */
+
+	gp.value = &value;
+	gp.param = I915_PARAM_HAS_BLT;
+	if (drmIoctl(intel->drmSubFD, DRM_IOCTL_I915_GETPARAM, &gp))
+		return FALSE;
+
+	return value;
+}
+
 static Bool can_accelerate_blt(struct intel_screen_private *intel)
 {
 	if (0 && (IS_I830(intel) || IS_845G(intel))) {
@@ -597,6 +614,7 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 		intel->tiling &= ~INTEL_TILING_FB;
 
 	intel->can_blt = can_accelerate_blt(intel);
+	intel->has_kernel_flush = has_kernel_flush(intel);
 	intel->use_shadow = !intel->can_blt;
 
 	if (xf86IsOptionSet(intel->Options, OPTION_SHADOW)) {
diff --git a/src/intel_uxa.c b/src/intel_uxa.c
index 24ef6fa..df3adcb 100644
--- a/src/intel_uxa.c
+++ b/src/intel_uxa.c
@@ -937,7 +937,18 @@ static Bool intel_uxa_get_image(PixmapPtr pixmap,
 
 static void intel_flush_rendering(intel_screen_private *intel)
 {
-    drm_intel_bo_busy(intel->front_buffer);
+	if (intel->needs_flush == 0)
+		return;
+
+	if (intel->has_kernel_flush) {
+		intel_batch_submit(intel->scrn);
+		drm_intel_bo_busy(intel->front_buffer);
+	} else {
+		intel_batch_emit_flush(intel->scrn);
+		intel_batch_submit(intel->scrn);
+	}
+
+	intel->needs_flush = 0;
 }
 
 void intel_uxa_block_handler(intel_screen_private *intel)

commit fb40bf2b33a6d26f0e6a4e5798d10c905faa8aad
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Fri Apr 8 12:37:35 2011 +0200

    Tell users to grab i915_error_state on gpu hangs
    
    Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
    Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 282d8ab..95eca43 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -221,6 +221,7 @@ void intel_batch_submit(ScrnInfoPtr scrn)
 			/* The GPU has hung and unlikely to recover by this point. */
 			if (!once) {
 				xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Detected a hung GPU, disabling acceleration.\n");
+				xf86DrvMsg(scrn->scrnIndex, X_ERROR, "When reporting this, please include i915_error_state from debugfs and the full dmesg.\n");
 				uxa_set_force_fallback(screenInfo.screens[scrn->scrnIndex], TRUE);
 				intel->force_fallback = TRUE;
 				once = 1;

commit 59ed6b05db99d7e5144f4f8499c67cee9b833673
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Apr 7 15:28:46 2011 +0100

    Revert "i965: Convert to relative relocations for state"
    
    This reverts commit d2106384be6f9df498392127c3ff64d0a2b17457.
    
    Breaks compiz (but not mutter/gnome-shell) on gen6. Not sure if this is
    not seem deep interaction issue with multiple clients sharing the GPU or
    just with compiz, but for now we have to revert and suffer the inane
    performance hit. It looks suspiciously like another deferred damage
    issue...
    
    Bugzilla: 51a27e88b073cff229fff4362cb6ac22835c4044
    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index 718e2f6..bfcd3f2 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -147,7 +147,6 @@ static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format,
 
 static uint32_t i965_get_dest_format(PicturePtr dest_picture)
 {
-
 	switch (dest_picture->format) {
 	case PICT_a8r8g8b8:
 	case PICT_x8r8g8b8:
@@ -628,79 +627,6 @@ static const struct wm_kernel_info wm_kernels_gen6[] = {
 
 #undef KERNEL
 
-struct i965_static_stream {
-	uint32_t size, used;
-	uint8_t *data;
-};
-
-static int i965_static_stream_init(struct i965_static_stream *stream)
-{
-	stream->used = 0;
-	stream->size = 64*1024;
-
-	stream->data = malloc(stream->size);
-	return stream->data != NULL;
-}
-
-static uint32_t i965_static_stream_add(struct i965_static_stream *stream,
-				       const void *data, uint32_t len, uint32_t align)
-{
-	uint32_t offset = ALIGN(stream->used, align);
-	if (offset + len > stream->size) {
-		do
-			stream->size *= 2;
-		while (stream->size < offset + len);
-
-		stream->data = realloc(stream->data, stream->size);
-	}
-
-	memcpy(stream->data + offset, data, len);
-	stream->used = offset + len;
-	return offset;
-}
-
-static void *i965_static_stream_map(struct i965_static_stream *stream,
-				    uint32_t len, uint32_t align)
-{
-	uint32_t offset = ALIGN(stream->used, align);
-	if (offset + len > stream->size) {
-		do
-			stream->size *= 2;
-		while (stream->size < offset + len);
-
-		stream->data = realloc(stream->data, stream->size);
-	}
-
-	stream->used = offset + len;
-	return memset(stream->data + offset, 0, len);
-}
-
-static uint32_t i965_static_stream_offsetof(struct i965_static_stream *stream, void *ptr)
-{
-	return (uint8_t *)ptr - stream->data;
-}
-
-static drm_intel_bo *i965_static_stream_fini(struct intel_screen_private *intel,
-					     struct i965_static_stream *stream)
-{
-	drm_intel_bo *bo = NULL;
-
-	if (stream->used) {
-		bo = drm_intel_bo_alloc(intel->bufmgr, "stream", stream->used, 0);
-		if (bo) {
-			if (drm_intel_bo_subdata(bo, 0, stream->used, stream->data)) {
-				drm_intel_bo_unreference(bo);
-				bo = NULL;
-			}
-		}
-	}
-
-	free(stream->data);
-	memset(stream, 0, sizeof(*stream));
-
-	return bo;
-}
-
 typedef struct _brw_cc_unit_state_padded {
 	struct brw_cc_unit_state state;
 	char pad[64 - sizeof(struct brw_cc_unit_state)];
@@ -729,20 +655,22 @@ typedef struct gen4_composite_op {
 
 /** Private data for gen4 render accel implementation. */
 struct gen4_render_state {
-	drm_intel_bo *general_bo;
-	drm_intel_bo *instruction_bo;
-
-	uint32_t vs_state;
-	uint32_t sf_state;
-	uint32_t sf_mask_state;
-	uint32_t cc_state;
-	uint32_t wm_state;
-	uint32_t wm_kernel[KERNEL_COUNT];
-
-	uint32_t gen6_cc_state;
-	uint32_t gen6_cc_vp;
-	uint32_t gen6_cc_blend;
-	uint32_t gen6_cc_depth_stencil;
+	drm_intel_bo *vs_state_bo;
+	drm_intel_bo *sf_state_bo;
+	drm_intel_bo *sf_mask_state_bo;
+	drm_intel_bo *cc_state_bo;
+	drm_intel_bo *wm_state_bo[KERNEL_COUNT]
+	    [FILTER_COUNT] [EXTEND_COUNT]
+	    [FILTER_COUNT] [EXTEND_COUNT];
+	drm_intel_bo *wm_kernel_bo[KERNEL_COUNT];
+
+	drm_intel_bo *cc_vp_bo;
+	drm_intel_bo *gen6_blend_bo;
+	drm_intel_bo *gen6_depth_stencil_bo;
+	drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT]
+	    [EXTEND_COUNT]
+	    [FILTER_COUNT]
+	    [EXTEND_COUNT];
 	gen4_composite_op composite_op;
 };
 
@@ -756,16 +684,24 @@ static void gen6_render_state_init(ScrnInfoPtr scrn);
  * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
  * back to SF which then hands pixels off to WM.
  */
-static uint32_t gen4_create_sf_state(struct intel_screen_private *intel,
-				     struct i965_static_stream *stream,
-				     uint32_t kernel)
+static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel,
+					  drm_intel_bo * kernel_bo)
 {
 	struct brw_sf_unit_state *sf_state;
+	drm_intel_bo *sf_state_bo;
 
-	sf_state = i965_static_stream_map(stream, sizeof(*sf_state), 32);
+	sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state",
+					 sizeof(*sf_state), 4096);
+	drm_intel_bo_map(sf_state_bo, TRUE);
+	sf_state = sf_state_bo->virtual;
 
+	memset(sf_state, 0, sizeof(*sf_state));
 	sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
-	sf_state->thread0.kernel_start_pointer = kernel >> 6;
+	sf_state->thread0.kernel_start_pointer =
+	    intel_emit_reloc(sf_state_bo,
+			     offsetof(struct brw_sf_unit_state, thread0),
+			     kernel_bo, sf_state->thread0.grf_reg_count << 1,
+			     I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
 	sf_state->sf1.single_program_flow = 1;
 	sf_state->sf1.binding_table_entry_count = 0;
 	sf_state->sf1.thread_priority = 0;
@@ -785,7 +721,6 @@ static uint32_t gen4_create_sf_state(struct intel_screen_private *intel,
 	sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
 	sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
 	sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
-	sf_state->thread4.stats_enable = 1;
 	sf_state->sf5.viewport_transform = FALSE;	/* skip viewport */
 	sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
 	sf_state->sf6.scissor = 0;
@@ -793,25 +728,43 @@ static uint32_t gen4_create_sf_state(struct intel_screen_private *intel,
 	sf_state->sf6.dest_org_vbias = 0x8;
 	sf_state->sf6.dest_org_hbias = 0x8;
 
-	return i965_static_stream_offsetof(stream, sf_state);
+	drm_intel_bo_unmap(sf_state_bo);
+
+	return sf_state_bo;
 }
 
-static uint32_t sampler_border_color_create(struct i965_static_stream *stream)
+static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel)
 {
-	struct brw_sampler_legacy_border_color *color;
+	struct brw_sampler_legacy_border_color sampler_border_color;
 
 	/* Set up the sampler border color (always transparent black) */
-	color = i965_static_stream_map(stream, sizeof(*color), 32);
-
-	return i965_static_stream_offsetof(stream, color);
+	memset(&sampler_border_color, 0, sizeof(sampler_border_color));
+	sampler_border_color.color[0] = 0;	/* R */
+	sampler_border_color.color[1] = 0;	/* G */
+	sampler_border_color.color[2] = 0;	/* B */
+	sampler_border_color.color[3] = 0;	/* A */
+
+	return intel_bo_alloc_for_data(intel,
+				       &sampler_border_color,
+				       sizeof(sampler_border_color),
+				       "gen4 render sampler border color");
 }
 
 static void
-sampler_state_init(struct brw_sampler_state *sampler_state,
+sampler_state_init(drm_intel_bo * sampler_state_bo,
+		   struct brw_sampler_state *sampler_state,
 		   sampler_state_filter_t filter,
 		   sampler_state_extend_t extend,
-		   uint32_t border_color)
+		   drm_intel_bo * border_color_bo)
 {
+	uint32_t sampler_state_offset;
+
+	sampler_state_offset = (char *)sampler_state -
+	    (char *)sampler_state_bo->virtual;
+
+	/* PS kernel use this sampler */
+	memset(sampler_state, 0, sizeof(*sampler_state));
+
 	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
 
 	/* We use the legacy mode to get the semantics specified by
@@ -854,36 +807,104 @@ sampler_state_init(struct brw_sampler_state *sampler_state,
 		break;
 	}
 
-	sampler_state->ss2.border_color_pointer = border_color >> 5;
+	sampler_state->ss2.border_color_pointer =
+	    intel_emit_reloc(sampler_state_bo, sampler_state_offset +
+			     offsetof(struct brw_sampler_state, ss2),
+			     border_color_bo, 0,
+			     I915_GEM_DOMAIN_SAMPLER, 0) >> 5;
+
 	sampler_state->ss3.chroma_key_enable = 0;	/* disable chromakey */
 }
 
-static uint32_t gen4_create_sampler_state(struct i965_static_stream *stream,
-					  sampler_state_filter_t src_filter,
-					  sampler_state_extend_t src_extend,
-					  sampler_state_filter_t mask_filter,
-					  sampler_state_extend_t mask_extend,
-					  uint32_t border_color)
+static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel,
+					       sampler_state_filter_t src_filter,
+					       sampler_state_extend_t src_extend,
+					       sampler_state_filter_t mask_filter,
+					       sampler_state_extend_t mask_extend,
+					       drm_intel_bo * border_color_bo)
 {
+	drm_intel_bo *sampler_state_bo;
 	struct brw_sampler_state *sampler_state;
 
-	sampler_state = i965_static_stream_map(stream,
-					       sizeof(struct brw_sampler_state) * 2,
-					       32);
-	sampler_state_init(&sampler_state[0], src_filter, src_extend, border_color);
-	sampler_state_init(&sampler_state[1], mask_filter, mask_extend, border_color);
+	sampler_state_bo =
+	    drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state",
+			       sizeof(struct brw_sampler_state) * 2, 4096);
+	drm_intel_bo_map(sampler_state_bo, TRUE);
+	sampler_state = sampler_state_bo->virtual;
+
+	sampler_state_init(sampler_state_bo,
+			   &sampler_state[0],
+			   src_filter, src_extend, border_color_bo);
+	sampler_state_init(sampler_state_bo,
+			   &sampler_state[1],
+			   mask_filter, mask_extend, border_color_bo);
 
-	return i965_static_stream_offsetof(stream, sampler_state);
+	drm_intel_bo_unmap(sampler_state_bo);
+
+	return sampler_state_bo;
 }
 
-static void gen4_init_wm_state(struct intel_screen_private *intel,
-			       struct brw_wm_unit_state *state,
-			       Bool has_mask,
-			       uint32_t kernel,
-			       uint32_t sampler)
+static void
+cc_state_init(drm_intel_bo * cc_state_bo,
+	      uint32_t cc_state_offset,
+	      int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo)
 {
+	struct brw_cc_unit_state *cc_state;
+
+	cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual +
+						cc_state_offset);
+
+	memset(cc_state, 0, sizeof(*cc_state));
+	cc_state->cc0.stencil_enable = 0;	/* disable stencil */
+	cc_state->cc2.depth_test = 0;	/* disable depth test */
+	cc_state->cc2.logicop_enable = 0;	/* disable logic op */
+	cc_state->cc3.ia_blend_enable = 0;	/* blend alpha same as colors */
+	cc_state->cc3.blend_enable = 1;	/* enable color blend */
+	cc_state->cc3.alpha_test = 0;	/* disable alpha test */
+
+	cc_state->cc4.cc_viewport_state_offset =
+	    intel_emit_reloc(cc_state_bo, cc_state_offset +
+			     offsetof(struct brw_cc_unit_state, cc4),
+			     cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
+
+	cc_state->cc5.dither_enable = 0;	/* disable dither */
+	cc_state->cc5.logicop_func = 0xc;	/* COPY */
+	cc_state->cc5.statistics_enable = 1;
+	cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
+
+	/* Fill in alpha blend factors same as color, for the future. */
+	cc_state->cc5.ia_src_blend_factor = src_blend;
+	cc_state->cc5.ia_dest_blend_factor = dst_blend;
+
+	cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
+	cc_state->cc6.clamp_post_alpha_blend = 1;
+	cc_state->cc6.clamp_pre_alpha_blend = 1;
+	cc_state->cc6.clamp_range = 0;	/* clamp range [0,1] */
+
+	cc_state->cc6.src_blend_factor = src_blend;
+	cc_state->cc6.dest_blend_factor = dst_blend;
+}
+
+static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel,
+					  Bool has_mask,
+					  drm_intel_bo * kernel_bo,
+					  drm_intel_bo * sampler_bo)
+{
+	struct brw_wm_unit_state *state;
+	drm_intel_bo *wm_state_bo;
+
+	wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state",
+					 sizeof(*state), 4096);
+	drm_intel_bo_map(wm_state_bo, TRUE);
+	state = wm_state_bo->virtual;
+
+	memset(state, 0, sizeof(*state));
 	state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
-	state->thread0.kernel_start_pointer = kernel >> 6;
+	state->thread0.kernel_start_pointer =
+	    intel_emit_reloc(wm_state_bo,
+			     offsetof(struct brw_wm_unit_state, thread0),
+			     kernel_bo, state->thread0.grf_reg_count << 1,
+			     I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
 
 	state->thread1.single_program_flow = 0;
 
@@ -903,7 +924,12 @@ static void gen4_init_wm_state(struct intel_screen_private *intel,
 	else
 		state->wm4.sampler_count = 1;	/* 1-4 samplers used */
 
-	state->wm4.sampler_state_pointer = sampler >> 5;
+	state->wm4.sampler_state_pointer =
+	    intel_emit_reloc(wm_state_bo,
+			     offsetof(struct brw_wm_unit_state, wm4),
+			     sampler_bo,
+			     state->wm4.sampler_count << 2,
+			     I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
 	state->wm5.max_threads = PS_MAX_THREADS - 1;
 	state->wm5.transposed_urb_read = 0;
 	state->wm5.thread_dispatch_enable = 1;
@@ -928,87 +954,73 @@ static void gen4_init_wm_state(struct intel_screen_private *intel,
 	 */
 	if (IS_GEN5(intel))
 		state->thread1.binding_table_entry_count = 0;
-}
-
-static uint32_t gen4_create_vs_unit_state(struct intel_screen_private *intel,
-					  struct i965_static_stream *stream)
-{
-	struct brw_vs_unit_state *vs = i965_static_stream_map(stream, sizeof(*vs), 32);
 
-	/* Set up the vertex shader to be disabled (passthrough) */
-	if (IS_GEN5(intel))
-		vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;	/* hardware requirement */
-	else
-		vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
-	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
-	vs->vs6.vs_enable = 0;
-	vs->vs6.vert_cache_disable = 1;
+	drm_intel_bo_unmap(wm_state_bo);
 
-	return i965_static_stream_offsetof(stream, vs);
+	return wm_state_bo;
 }
 
-/**
- * Set up all combinations of cc state: each blendfactor for source and
- * dest.
- */
-static void
-cc_state_init(struct brw_cc_unit_state *state,
-	      int src_blend, int dst_blend,
-	      uint32_t vp)
+static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel)
 {
-	state->cc0.stencil_enable = 0;	/* disable stencil */
-	state->cc2.depth_test = 0;	/* disable depth test */
-	state->cc2.logicop_enable = 0;	/* disable logic op */
-	state->cc3.ia_blend_enable = 0;	/* blend alpha same as colors */
-	state->cc3.blend_enable = 1;	/* enable color blend */
-	state->cc3.alpha_test = 0;	/* disable alpha test */
-
-	state->cc4.cc_viewport_state_offset = vp >> 5;
-
-	state->cc5.dither_enable = 0;	/* disable dither */
-	state->cc5.logicop_func = 0xc;	/* COPY */
-	state->cc5.statistics_enable = 1;
-	state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
+	drm_intel_bo *bo;
+	struct brw_cc_viewport vp;
 
-	/* Fill in alpha blend factors same as color, for the future. */
-	state->cc5.ia_src_blend_factor = src_blend;
-	state->cc5.ia_dest_blend_factor = dst_blend;
+	vp.min_depth = -1.e35;
+	vp.max_depth = 1.e35;
 
-	state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
-	state->cc6.clamp_post_alpha_blend = 1;
-	state->cc6.clamp_pre_alpha_blend = 1;
-	state->cc6.clamp_range = 0;	/* clamp range [0,1] */
+	bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state",
+				sizeof(vp), 4096);
+	drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp);
 
-	state->cc6.src_blend_factor = src_blend;


Reply to: