[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'upstream-unstable'



 Makefile                                             |   12 +
 docs/contents.html                                   |    1 
 docs/llvmpipe.html                                   |  204 +++++++++++++++++++
 docs/relnotes-7.10.2.html                            |    9 
 docs/relnotes.html                                   |    1 
 src/gallium/SConscript                               |    1 
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp        |    2 
 src/gallium/drivers/llvmpipe/README                  |  153 --------------
 src/gallium/drivers/llvmpipe/lp_fence.c              |    3 
 src/glsl/ast_to_hir.cpp                              |    2 
 src/glsl/glcpp/glcpp-parse.c                         |   82 ++++---
 src/glsl/glcpp/glcpp-parse.y                         |   82 ++++---
 src/mesa/drivers/dri/i965/brw_context.c              |   18 +
 src/mesa/drivers/dri/i965/brw_context.h              |    8 
 src/mesa/drivers/dri/i965/brw_eu_emit.c              |   71 +++---
 src/mesa/drivers/dri/i965/brw_fs.cpp                 |    8 
 src/mesa/drivers/dri/i965/brw_vs_emit.c              |   10 
 src/mesa/drivers/dri/i965/gen6_urb.c                 |   27 +-
 src/mesa/drivers/dri/i965/gen6_vs_state.c            |    3 
 src/mesa/drivers/dri/i965/gen6_wm_state.c            |    2 
 src/mesa/drivers/dri/intel/intel_chipset.h           |   13 -
 src/mesa/drivers/dri/intel/intel_context.h           |   30 ++
 src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c |    7 
 src/mesa/drivers/dri/r300/compiler/radeon_optimize.c |   24 ++
 src/mesa/main/get.c                                  |    2 
 src/mesa/program/ir_to_mesa.cpp                      |   28 ++
 26 files changed, 496 insertions(+), 307 deletions(-)

New commits:
commit b8c6362389e56b7fbe72147727995ae45b12e2a0
Author: José Fonseca <jose.r.fonseca@gmail.com>
Date:   Sat Apr 30 22:36:18 2011 +0100

    mesa: GL_PROVOKING_VERTEX_EXT is a GLenum, not GLboolean.

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index d5eef49..b6d2a99 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1155,7 +1155,7 @@ static const struct value_desc values[] = {
 
    /* GL_EXT_provoking_vertex */
    { GL_PROVOKING_VERTEX_EXT,
-     CONTEXT_BOOL(Light.ProvokingVertex), extra_EXT_provoking_vertex },
+     CONTEXT_ENUM(Light.ProvokingVertex), extra_EXT_provoking_vertex },
    { GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION_EXT,
      CONTEXT_BOOL(Const.QuadsFollowProvokingVertexConvention),
      extra_EXT_provoking_vertex },

commit 73f4273b8647abe0020ad3c2d593e009092d2be5
Author: Ian Romanick <ian.d.romanick@intel.com>
Date:   Thu Apr 21 17:26:39 2011 -0700

    glsl: Regenerate compiler and glcpp files from cherry picks

diff --git a/src/glsl/glcpp/glcpp-parse.c b/src/glsl/glcpp/glcpp-parse.c
index 46e690d..78dd351 100644
--- a/src/glsl/glcpp/glcpp-parse.c
+++ b/src/glsl/glcpp/glcpp-parse.c
@@ -165,16 +165,16 @@ _token_list_append_list (token_list_t *list, token_list_t *tail);
 static int
 _token_list_equal_ignoring_space (token_list_t *a, token_list_t *b);
 
-static active_list_t *
-_active_list_push (active_list_t *list,
-		   const char *identifier,
-		   token_node_t *marker);
+static void
+_parser_active_list_push (glcpp_parser_t *parser,
+			  const char *identifier,
+			  token_node_t *marker);
 
-static active_list_t *
-_active_list_pop (active_list_t *list);
+static void
+_parser_active_list_pop (glcpp_parser_t *parser);
 
-int
-_active_list_contains (active_list_t *list, const char *identifier);
+static int
+_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier);
 
 static void
 _glcpp_parser_expand_if (glcpp_parser_t *parser, int type, token_list_t *list);
@@ -3791,7 +3791,7 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser,
 
 	/* Finally, don't expand this macro if we're already actively
 	 * expanding it, (to avoid infinite recursion). */
-	if (_active_list_contains (parser->active, identifier)) {
+	if (_parser_active_list_contains (parser, identifier)) {
 		/* We change the token type here from IDENTIFIER to
 		 * OTHER to prevent any future expansion of this
 		 * unexpanded token. */
@@ -3821,51 +3821,53 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser,
 	return _glcpp_parser_expand_function (parser, node, last);
 }
 
-/* Push a new identifier onto the active list, returning the new list.
+/* Push a new identifier onto the parser's active list.
  *
  * Here, 'marker' is the token node that appears in the list after the
  * expansion of 'identifier'. That is, when the list iterator begins
- * examinging 'marker', then it is time to pop this node from the
+ * examining 'marker', then it is time to pop this node from the
  * active stack.
  */
-active_list_t *
-_active_list_push (active_list_t *list,
-		   const char *identifier,
-		   token_node_t *marker)
+static void
+_parser_active_list_push (glcpp_parser_t *parser,
+			  const char *identifier,
+			  token_node_t *marker)
 {
 	active_list_t *node;
 
-	node = ralloc (list, active_list_t);
+	node = ralloc (parser->active, active_list_t);
 	node->identifier = ralloc_strdup (node, identifier);
 	node->marker = marker;
-	node->next = list;
+	node->next = parser->active;
 
-	return node;
+	parser->active = node;
 }
 
-active_list_t *
-_active_list_pop (active_list_t *list)
+static void
+_parser_active_list_pop (glcpp_parser_t *parser)
 {
-	active_list_t *node = list;
+	active_list_t *node = parser->active;
 
-	if (node == NULL)
-		return NULL;
+	if (node == NULL) {
+		parser->active = NULL;
+		return;
+	}
 
-	node = list->next;
-	ralloc_free (list);
+	node = parser->active->next;
+	ralloc_free (parser->active);
 
-	return node;
+	parser->active = node;
 }
 
-int
-_active_list_contains (active_list_t *list, const char *identifier)
+static int
+_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier)
 {
 	active_list_t *node;
 
-	if (list == NULL)
+	if (parser->active == NULL)
 		return 0;
 
-	for (node = list; node; node = node->next)
+	for (node = parser->active; node; node = node->next)
 		if (strcmp (node->identifier, identifier) == 0)
 			return 1;
 
@@ -3884,6 +3886,7 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 	token_node_t *node_prev;
 	token_node_t *node, *last = NULL;
 	token_list_t *expansion;
+	active_list_t *active_initial = parser->active;
 
 	if (list == NULL)
 		return;
@@ -3896,10 +3899,8 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 	while (node) {
 
 		while (parser->active && parser->active->marker == node)
-			parser->active = _active_list_pop (parser->active);
+			_parser_active_list_pop (parser);
 
-		/* Find the expansion for node, which will replace all
-		 * nodes from node to last, inclusive. */
 		expansion = _glcpp_parser_expand_node (parser, node, &last);
 		if (expansion) {
 			token_node_t *n;
@@ -3908,12 +3909,12 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 				while (parser->active &&
 				       parser->active->marker == n)
 				{
-					parser->active = _active_list_pop (parser->active);
+					_parser_active_list_pop (parser);
 				}
 
-			parser->active = _active_list_push (parser->active,
-							    node->token->value.str,
-							    last->next);
+			_parser_active_list_push (parser,
+						  node->token->value.str,
+						  last->next);
 			
 			/* Splice expansion into list, supporting a
 			 * simple deletion if the expansion is
@@ -3940,8 +3941,11 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 		node = node_prev ? node_prev->next : list->head;
 	}
 
-	while (parser->active)
-		parser->active = _active_list_pop (parser->active);
+	/* Remove any lingering effects of this invocation on the
+	 * active list. That is, pop until the list looks like it did
+	 * at the beginning of this function. */
+	while (parser->active && parser->active != active_initial)
+		_parser_active_list_pop (parser);
 
 	list->non_space_tail = list->tail;
 }

commit 6d35d0bda67d528dfca4897d57ec61f6839c9a6b
Author: Carl Worth <cworth@cworth.org>
Date:   Fri Apr 15 12:03:25 2011 -0700

    glcpp: Fix attempts to expand recursive macros infinitely (bug #32835).
    
    The 095-recursive-define test case was triggering infinite recursion
    with the following test case:
    
    	#define A(a, b) B(a, b)
    	#define C A(0, C)
    	C
    
    Here's what was happening:
    
      1. "C" was pushed onto the active list to expand the C node
    
      2. While expanding the "0" argument, the active list would be
         emptied by the code at the end of _glcpp_parser_expand_token_list
    
      3. When expanding the "C" argument, the active list was now empty,
         so lather, rinse, repeat.
    
    We fix this by adjusting the final popping at the end of
    _glcpp_parser_expand_token_list to never pop more nodes then this
    particular invocation had pushed itself. This is as simple as saving
    the original state of the active list, and then interrupting the
    popping when we reach this same state.
    
    With this fix, all of the glcpp-test tests now pass.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=32835
    Signed-off-by: Carl Worth <cworth@cworth.org>
    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
    Reviewed-and-tested-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit 9dacbe222641443af000a82161922a5ade206340)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 5eed9c0..bf94592 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -1561,6 +1561,7 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 	token_node_t *node_prev;
 	token_node_t *node, *last = NULL;
 	token_list_t *expansion;
+	active_list_t *active_initial = parser->active;
 
 	if (list == NULL)
 		return;
@@ -1615,7 +1616,10 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 		node = node_prev ? node_prev->next : list->head;
 	}
 
-	while (parser->active)
+	/* Remove any lingering effects of this invocation on the
+	 * active list. That is, pop until the list looks like it did
+	 * at the beginning of this function. */
+	while (parser->active && parser->active != active_initial)
 		_parser_active_list_pop (parser);
 
 	list->non_space_tail = list->tail;

commit 29c2e1f3f7c0a76db97afb8adb64d2d98e57ca3b
Author: Carl Worth <cworth@cworth.org>
Date:   Thu Apr 14 15:35:41 2011 -0700

    glcpp: Simplify calling convention of parser's active_list functions
    
    These were all written as generic list functions, (accepting and returning
    a list to act upon). But they were only ever used with parser->active as
    the list. By simply accepting the parser itself, these functions can update
    parser->active and now return nothing at all. This makes the code a bit
    more compact.
    
    And hopefully the code is no less readable since the functions are also
    now renamed to have "_parser_active" in the name for better correlation
    with nearby tests of the parser->active field.
    (cherry picked from commit 02d293c08ee2375fc43b343bfc9b074f33a9063c)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 7c51299..5eed9c0 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -95,16 +95,16 @@ _token_list_append_list (token_list_t *list, token_list_t *tail);
 static int
 _token_list_equal_ignoring_space (token_list_t *a, token_list_t *b);
 
-static active_list_t *
-_active_list_push (active_list_t *list,
-		   const char *identifier,
-		   token_node_t *marker);
+static void
+_parser_active_list_push (glcpp_parser_t *parser,
+			  const char *identifier,
+			  token_node_t *marker);
 
-static active_list_t *
-_active_list_pop (active_list_t *list);
+static void
+_parser_active_list_pop (glcpp_parser_t *parser);
 
-int
-_active_list_contains (active_list_t *list, const char *identifier);
+static int
+_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier);
 
 static void
 _glcpp_parser_expand_if (glcpp_parser_t *parser, int type, token_list_t *list);
@@ -1466,7 +1466,7 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser,
 
 	/* Finally, don't expand this macro if we're already actively
 	 * expanding it, (to avoid infinite recursion). */
-	if (_active_list_contains (parser->active, identifier)) {
+	if (_parser_active_list_contains (parser, identifier)) {
 		/* We change the token type here from IDENTIFIER to
 		 * OTHER to prevent any future expansion of this
 		 * unexpanded token. */
@@ -1496,51 +1496,53 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser,
 	return _glcpp_parser_expand_function (parser, node, last);
 }
 
-/* Push a new identifier onto the active list, returning the new list.
+/* Push a new identifier onto the parser's active list.
  *
  * Here, 'marker' is the token node that appears in the list after the
  * expansion of 'identifier'. That is, when the list iterator begins
- * examinging 'marker', then it is time to pop this node from the
+ * examining 'marker', then it is time to pop this node from the
  * active stack.
  */
-active_list_t *
-_active_list_push (active_list_t *list,
-		   const char *identifier,
-		   token_node_t *marker)
+static void
+_parser_active_list_push (glcpp_parser_t *parser,
+			  const char *identifier,
+			  token_node_t *marker)
 {
 	active_list_t *node;
 
-	node = ralloc (list, active_list_t);
+	node = ralloc (parser->active, active_list_t);
 	node->identifier = ralloc_strdup (node, identifier);
 	node->marker = marker;
-	node->next = list;
+	node->next = parser->active;
 
-	return node;
+	parser->active = node;
 }
 
-active_list_t *
-_active_list_pop (active_list_t *list)
+static void
+_parser_active_list_pop (glcpp_parser_t *parser)
 {
-	active_list_t *node = list;
+	active_list_t *node = parser->active;
 
-	if (node == NULL)
-		return NULL;
+	if (node == NULL) {
+		parser->active = NULL;
+		return;
+	}
 
-	node = list->next;
-	ralloc_free (list);
+	node = parser->active->next;
+	ralloc_free (parser->active);
 
-	return node;
+	parser->active = node;
 }
 
-int
-_active_list_contains (active_list_t *list, const char *identifier)
+static int
+_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier)
 {
 	active_list_t *node;
 
-	if (list == NULL)
+	if (parser->active == NULL)
 		return 0;
 
-	for (node = list; node; node = node->next)
+	for (node = parser->active; node; node = node->next)
 		if (strcmp (node->identifier, identifier) == 0)
 			return 1;
 
@@ -1571,10 +1573,8 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 	while (node) {
 
 		while (parser->active && parser->active->marker == node)
-			parser->active = _active_list_pop (parser->active);
+			_parser_active_list_pop (parser);
 
-		/* Find the expansion for node, which will replace all
-		 * nodes from node to last, inclusive. */
 		expansion = _glcpp_parser_expand_node (parser, node, &last);
 		if (expansion) {
 			token_node_t *n;
@@ -1583,12 +1583,12 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 				while (parser->active &&
 				       parser->active->marker == n)
 				{
-					parser->active = _active_list_pop (parser->active);
+					_parser_active_list_pop (parser);
 				}
 
-			parser->active = _active_list_push (parser->active,
-							    node->token->value.str,
-							    last->next);
+			_parser_active_list_push (parser,
+						  node->token->value.str,
+						  last->next);
 			
 			/* Splice expansion into list, supporting a
 			 * simple deletion if the expansion is
@@ -1616,7 +1616,7 @@ _glcpp_parser_expand_token_list (glcpp_parser_t *parser,
 	}
 
 	while (parser->active)
-		parser->active = _active_list_pop (parser->active);
+		_parser_active_list_pop (parser);
 
 	list->non_space_tail = list->tail;
 }

commit dca5ddf4718cf05858c9da5cc29417228cf8ca73
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Tue Apr 12 15:42:06 2011 -0700

    i965: Allocate the whole URB to the VS and fix calculations for Gen6.
    
    Since we never enable the GS on Sandybridge, there's no need to allocate
    it any URB space.
    
    Furthermore, the previous calculation was incorrect: it neglected to
    multiply by nr_vs_entries, instead comparing whether twice the size of
    a single VS URB entry was bigger than the entire URB space.  It also
    neglected to take into account that vs_size is in units of 128 byte
    blocks, while urb_size is in bytes.
    
    Despite the above problems, the calculations resulted in an acceptable
    programming of the URB in most cases, at least on GT2.
    
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Eric Anholt <eric@anholt.net>
    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
    
    (cherry picked from commit 42a805700039e81a9245f46f153e2cd9705cd0d7)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index a74ba5c..230d326 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -182,7 +182,6 @@ GLboolean brwCreateContext( int api,
 
    /* WM maximum threads is number of EUs times number of threads per EU. */
    if (intel->gen >= 6) {
-      brw->urb.size = 1024;
       if (IS_GT2(intel->intelScreen->deviceID)) {
 	 /* This could possibly be 80, but is supposed to require
 	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
@@ -190,9 +189,13 @@ GLboolean brwCreateContext( int api,
 	  */
 	 brw->wm_max_threads = 40;
 	 brw->vs_max_threads = 60;
+	 brw->urb.size = 64;            /* volume 5c.5 section 5.1 */
+	 brw->urb.max_vs_handles = 128; /* volume 2a (see 3DSTATE_URB) */
       } else {
 	 brw->wm_max_threads = 40;
 	 brw->vs_max_threads = 24;
+	 brw->urb.size = 32;            /* volume 5c.5 section 5.1 */
+	 brw->urb.max_vs_handles = 256; /* volume 2a (see 3DSTATE_URB) */
       }
    } else if (intel->gen == 5) {
       brw->urb.size = 1024;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7069724..083e79f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -549,6 +549,9 @@ struct brw_context
 
       GLboolean constrained;
 
+      GLuint max_vs_handles;	/* Maximum number of VS handles */
+      GLuint max_gs_handles;	/* Maximum number of GS handles */
+
       GLuint nr_vs_entries;
       GLuint nr_gs_entries;
       GLuint nr_clip_entries;
@@ -557,10 +560,7 @@ struct brw_context
 
       /* gen6 */
       GLuint vs_size;
-/*       GLuint gs_size; */
-/*       GLuint clip_size; */
-/*       GLuint sf_size; */
-/*       GLuint cs_size; */
+      GLuint gs_size;
 
       GLuint vs_start;
       GLuint gs_start;
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index de97fd3..91d7b01 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -34,19 +34,25 @@
 static void
 prepare_urb( struct brw_context *brw )
 {
-   brw->urb.nr_vs_entries = 24;
-   if (brw->gs.prog_bo)
-      brw->urb.nr_gs_entries = 4;
-   else
-      brw->urb.nr_gs_entries = 0;
+   int nr_vs_entries;
+
    /* CACHE_NEW_VS_PROG */
    brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
 
-   /* Check that the number of URB rows (8 floats each) allocated is less
-    * than the URB space.
+   /* Calculate how many VS URB entries fit in the total URB size */
+   nr_vs_entries = (brw->urb.size * 1024) / (brw->urb.vs_size * 128);
+
+   if (nr_vs_entries > brw->urb.max_vs_handles)
+      nr_vs_entries = brw->urb.max_vs_handles;
+
+   /* According to volume 2a, nr_vs_entries must be a multiple of 4. */
+   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
+
+   /* Since we currently don't support Geometry Shaders, we always put the
+    * GS unit in passthrough mode and don't allocate it any URB space.
     */
-   assert((brw->urb.nr_vs_entries +
-	   brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024);
+   brw->urb.nr_gs_entries = 0;
+   brw->urb.gs_size = 1; /* Incorrect, but with 0 GS entries it doesn't matter. */
 }
 
 static void
@@ -54,6 +60,7 @@ upload_urb(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
 
+   assert(brw->urb.nr_vs_entries >= 24);
    assert(brw->urb.nr_vs_entries % 4 == 0);
    assert(brw->urb.nr_gs_entries % 4 == 0);
    /* GS requirement */
@@ -63,7 +70,7 @@ upload_urb(struct brw_context *brw)
    OUT_BATCH(CMD_URB << 16 | (3 - 2));
    OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
 	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
-   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+   OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
 	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
    ADVANCE_BATCH();
 }

commit ff77a69ae33f04b2fbfdc9a228214ef373d54f61
Author: Ian Romanick <ian.d.romanick@intel.com>
Date:   Fri Apr 8 16:31:22 2011 -0700

    intel: Fix ROUND_DOWN_TO macro
    
    Previously the macro would (ALIGN(value - alignment - 1, alignment)).
    At the very least, this was missing parenthesis around "alignment -
    1".  As a result, if value was already aligned, it would be reduced by
    alignment.  Condisder:
    
         x = ROUND_DOWN_TO(256, 128);
    
    This becomes:
    
        x = ALIGN(256 - 128 - 1, 128);
    
    Or:
    
        x = ALIGN(127, 128);
    
    Which becomes:
    
        x = 128;
    
    This macro is currently only used in brw_state_batch
    (brw_state_batch.c).  It looks like the original version of this macro
    would just use too much space in the batch buffer.  It's possible, but
    not at all clear to me from the code, that the original behavior is
    actually desired.
    
    In any case, this patch does not cause any piglit regressions on my
    Ironlake system.
    
    I also think that ALIGN_FLOOR would be a better name for this macro,
    but ROUND_DOWN_TO matches rounddown in the Linux kernel.
    
    Reviewed-by: Eric Anholt <eric@anholt.net>
    Reviewed-by: Keith Whitwell <keithw@vmware.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit 7e809f0b8d635c8d5519b3d0fdaf11ac0ddda7eb)

diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 53a11ba..f3e856d 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -278,9 +278,33 @@ extern char *__progname;
 #define SUBPIXEL_Y 0.125
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
-#define ALIGN(value, alignment)  ((value + alignment - 1) & ~(alignment - 1))
-#define ROUND_DOWN_TO(value, alignment) (ALIGN(value - alignment - 1, \
-					       alignment))
+
+/**
+ * Align a value up to an alignment value
+ *
+ * If \c value is not already aligned to the requested alignment value, it
+ * will be rounded up.
+ *
+ * \param value  Value to be rounded
+ * \param alignment  Alignment value to be used.  This must be a power of two.
+ *
+ * \sa ROUND_DOWN_TO()
+ */
+#define ALIGN(value, alignment)  (((value) + alignment - 1) & ~(alignment - 1))
+
+/**
+ * Align a value down to an alignment value
+ *
+ * If \c value is not already aligned to the requested alignment value, it
+ * will be rounded down.
+ *
+ * \param value  Value to be rounded
+ * \param alignment  Alignment value to be used.  This must be a power of two.
+ *
+ * \sa ALIGN()
+ */
+#define ROUND_DOWN_TO(value, alignment) ((value) & ~(alignment - 1))
+
 #define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0)
 
 static INLINE uint32_t

commit 510fb3269e5d66c03497b647aed3178290c7624d
Author: Eric Anholt <eric@anholt.net>
Date:   Tue Mar 29 13:22:13 2011 -0700

    i965: Fix the VS thread limits for GT1, and clarify the WM limits on both.
    
    (cherry picked from commit 904b8ba1bb604b2eaaa22f7f074d236011fe213f)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 9483ec6..a74ba5c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -183,8 +183,17 @@ GLboolean brwCreateContext( int api,
    /* WM maximum threads is number of EUs times number of threads per EU. */
    if (intel->gen >= 6) {
       brw->urb.size = 1024;
-      brw->vs_max_threads = 60;
-      brw->wm_max_threads = 80;
+      if (IS_GT2(intel->intelScreen->deviceID)) {
+	 /* This could possibly be 80, but is supposed to require
+	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
+	  * GPU reset to change.
+	  */
+	 brw->wm_max_threads = 40;
+	 brw->vs_max_threads = 60;
+      } else {
+	 brw->wm_max_threads = 40;
+	 brw->vs_max_threads = 24;
+      }
    } else if (intel->gen == 5) {
       brw->urb.size = 1024;
       brw->vs_max_threads = 72;
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index ed132bd..4deba46 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -136,7 +136,8 @@ upload_vs_state(struct brw_context *brw)
    OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
-   OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
+
+   OUT_BATCH(((brw->vs_max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
 	     GEN6_VS_STATISTICS_ENABLE |
 	     GEN6_VS_ENABLE);
    ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 2ae0c09..c8a7134 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -144,7 +144,7 @@ upload_wm_state(struct brw_context *brw)
    dw4 |= (brw->wm.prog_data->first_curbe_grf <<
 	   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
 
-   dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+   dw5 |= (brw->wm_max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
 
    /* CACHE_NEW_WM_PROG */
    if (brw->wm.prog_data->dispatch_width == 8)

commit 62fad6cb302cfb2cb4e0af102486ae8e567047d1
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Tue Mar 29 17:03:22 2011 -0700

    intel: Add IS_GT2 macro for recognizing Sandybridge GT2 systems.
    
    Also, refactor IS_GEN6 to use the IS_GT1 and IS_GT2 macros.
    
    (cherry picked from commit ee8d182426d4ecda7b9f5089d19d36f7de2a4dfe)

diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 4fecdbe..a3f40ef 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -125,14 +125,17 @@
 /* Compat macro for intel_decode.c */
 #define IS_IRONLAKE(devid)	IS_GEN5(devid)
 
-#define IS_GEN6(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
-				 devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
-				 devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS	|| \
+#define IS_GT1(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
-				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
-				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
 				 devid == PCI_CHIP_SANDYBRIDGE_S)
 
+#define IS_GT2(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+				 devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS	|| \
+				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)
+
+#define IS_GEN6(devid)		(IS_GT1(devid) || IS_GT2(devid))
+
 #define IS_965(devid)		(IS_GEN4(devid) || \
 				 IS_G4X(devid) || \
 				 IS_GEN5(devid) || \

commit 0f02b4253d5b3a3bbdfc72711bb9a7c0781061f4
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Tue Mar 29 05:49:40 2011 -0700

    i965: Resolve implied moves in brw_dp_READ_4_vs_relative.
    
    Fixes piglit test glsl-vs-arrays-3 on Sandybridge, as well as garbage
    rendering in 3DMarkMobileES 2.0's Taiji demo and GLBenchmark 2.0's
    Egypt and PRO demos.
    
    NOTE: This a candidate for stable release branches.  It depends on
    commit 9a21bc640188e4078075b9f8e6701853a4f0bbe4.
    (cherry picked from commit 9d60a7ce08a67eb8b79c60f829d090ba4a37ed7e)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 33b6412..f6838ff 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1799,6 +1799,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
 			       GLuint bind_table_index)
 {
    struct intel_context *intel = &p->brw->intel;
+   struct brw_reg src = brw_vec8_grf(0, 0);
    int msg_type;
 
    /* Setup MRF[1] with offset into const buffer */
@@ -1815,6 +1816,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
 	   addr_reg, brw_imm_d(offset));
    brw_pop_insn_state(p);
 
+   gen6_resolve_implied_move(p, &src, 0);
    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
 
    insn->header.predicate_control = BRW_PREDICATE_NONE;
@@ -1823,7 +1825,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
    insn->header.mask_control = BRW_MASK_DISABLE;
 
    brw_set_dest(p, insn, dest);
-   brw_set_src0(insn, brw_vec8_grf(0, 0));
+   brw_set_src0(insn, src);
 
    if (intel->gen == 6)
       msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;

commit 773ea1a2345b5d5f8291339a02d7cacb5ea7302e
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Wed Mar 16 14:09:17 2011 -0700

    i965: Refactor Sandybridge implied move handling.
    
    This is actually a squash of the following two commits.  The first
    caused a regression, and the second fixes it.  The refactor of the
    first is needed for another patch that fixes an SNB bug.
    
        i965: Refactor Sandybridge implied move handling.
    
        This was open-coded in three different places, and more are necessary.
        Extract this into a function so it can be reused.
    
        Unfortunately, not all variations were the same: in particular, one set
        compression control and checked that the source register was not
        ARF_NULL.  This seemed like a good idea, so all cases now do so.
        (cherry picked from commit 9a21bc640188e4078075b9f8e6701853a4f0bbe4)
    
        i965: Fix null register use in Sandybridge implied move resolution.
    
        Fixes regressions caused by commit 9a21bc6401, namely GPU hangs when
        running gnome-shell or compiz (Mesa bugs #35820 and #35853).
    
        I incorrectly refactored the case that dealt with ARF_NULL; even in that
        case, the source register needs to be changed to the MRF.
    
        NOTE: This is a candidate for the 7.10 branch (if 9a21bc6401 is
        cherry-picked, take this one too).
        (cherry picked from commit a019dd0d6e5bba00e8ee7818e004ee42ca507102)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 88131c4..33b6412 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -52,6 +52,34 @@ static void guess_execution_size(struct brw_compile *p,
 }
 
 
+/**
+ * Prior to Sandybridge, the SEND instruction accepted non-MRF source
+ * registers, implicitly moving the operand to a message register.
+ *
+ * On Sandybridge, this is no longer the case.  This function performs the
+ * explicit move; it should be called before emitting a SEND instruction.
+ */
+static void
+gen6_resolve_implied_move(struct brw_compile *p,
+			  struct brw_reg *src,
+			  GLuint msg_reg_nr)
+{
+   struct intel_context *intel = &p->brw->intel;
+   if (intel->gen != 6)
+      return;
+
+   if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
+	      retype(*src, BRW_REGISTER_TYPE_UD));
+      brw_pop_insn_state(p);
+   }
+   *src = brw_message_reg(msg_reg_nr);
+}
+
+
 static void brw_set_dest(struct brw_compile *p,
 			 struct brw_instruction *insn,
 			 struct brw_reg dest)
@@ -1966,20 +1994,7 @@ void brw_SAMPLE(struct brw_compile *p,
    {
       struct brw_instruction *insn;
    
-      /* Sandybridge doesn't have the implied move for SENDs,
-       * and the first message register index comes from src0.
-       */
-      if (intel->gen >= 6) {
-	 if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
-	     src0.nr != BRW_ARF_NULL) {
-	    brw_push_insn_state(p);
-	    brw_set_mask_control( p, BRW_MASK_DISABLE );
-	    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-	    brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0);
-	    brw_pop_insn_state(p);
-	 }
-	 src0 = brw_message_reg(msg_reg_nr);
-      }
+      gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
       insn = next_insn(p, BRW_OPCODE_SEND);
       insn->header.predicate_control = 0; /* XXX */
@@ -2034,17 +2049,7 @@ void brw_urb_WRITE(struct brw_compile *p,
    struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn;
 
-   /* Sandybridge doesn't have the implied move for SENDs,
-    * and the first message register index comes from src0.
-    */
-   if (intel->gen >= 6) {
-      brw_push_insn_state(p);
-      brw_set_mask_control( p, BRW_MASK_DISABLE );
-      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
-	      retype(src0, BRW_REGISTER_TYPE_UD));
-      brw_pop_insn_state(p);
-      src0 = brw_message_reg(msg_reg_nr);
-   }
+   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
    insn = next_insn(p, BRW_OPCODE_SEND);
 
@@ -2154,17 +2159,7 @@ void brw_ff_sync(struct brw_compile *p,
    struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn;
 
-   /* Sandybridge doesn't have the implied move for SENDs,
-    * and the first message register index comes from src0.
-    */
-   if (intel->gen >= 6) {
-      brw_push_insn_state(p);
-      brw_set_mask_control( p, BRW_MASK_DISABLE );
-      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
-	      retype(src0, BRW_REGISTER_TYPE_UD));
-      brw_pop_insn_state(p);
-      src0 = brw_message_reg(msg_reg_nr);
-   }
+   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
    insn = next_insn(p, BRW_OPCODE_SEND);
    brw_set_dest(p, insn, dest);

commit 41d5dd4a6e10ae5ae6ac701cbb739fd89a449044
Author: Zou Nan hai <nanhai.zou@intel.com>
Date:   Tue Mar 1 10:39:35 2011 +0800

    i965: Align interleaved URB write length to 2
    
    The BSpec says that interleave URB writes must be aligned, so this
    patch fulfills that requirement.
    
    This is half of patch 6c32477 from master.
    
    Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 0411ce0..6ec6255 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1561,6 +1561,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    int eot;
    GLuint len_vertex_header = 2;
    int next_mrf, i;
+   int msg_len;
 
    if (c->key.copy_edgeflag) {
       brw_MOV(p, 
@@ -1727,13 +1728,20 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 
    eot = (c->first_overflow_output == 0);
 
+   msg_len = c->nr_outputs + 2 + len_vertex_header; 
+   if (intel->gen >= 6) {
+	   /* interleaved urb write message length for gen6 should be multiple of 2 */
+	   if ((msg_len % 2) != 0)
+		msg_len++;
+   }
+
    brw_urb_WRITE(p, 
 		 brw_null_reg(), /* dest */
 		 0,		/* starting mrf reg nr */
 		 c->r0,		/* src */
 		 0,		/* allocate */
 		 1,		/* used */
-		 MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */
+		 MIN2(msg_len - 1, (BRW_MAX_MRF - 1)), /* msg len */
 		 0,		/* response len */
 		 eot, 		/* eot */
 		 eot, 		/* writes complete */

commit 2c83c2860886ad881490bbb7708bb974792d2f0a
Author: Brian Paul <brianp@vmware.com>
Date:   Thu Apr 7 13:56:45 2011 -0600

    docs: replace llvmpipe/README with docs/llvmpipe.html

diff --git a/docs/contents.html b/docs/contents.html
index cf1661e..8fc2ac0 100644
--- a/docs/contents.html


Reply to: