[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'upstream-unstable'



Rebased ref, commits from common ancestor:
commit 4a86465f4754a0d79ed98f2d1ce425864854418b
Author: Ian Romanick <ian.d.romanick@intel.com>
Date:   Wed Mar 5 08:59:46 2014 +0200

    mesa: Bump version to 10.1 (final)
    
    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>

diff --git a/VERSION b/VERSION
index 4b9caf0..4149c39 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-10.1.0-rc3
+10.1.0

commit 03d0c9fd308db2124efbe240eda24a2a67eb67d2
Author: Julien Cristau <jcristau@debian.org>
Date:   Sat Mar 1 10:11:11 2014 +0100

    glx/dri2: fix build failure on HURD
    
    Patch from Debian package.
    
    Cc: "10.0 10.1" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
    (cherry picked from commit 6f0e2731e862d1c2d8d21927040bead5049a2d68)

diff --git a/src/glx/dri2_query_renderer.c b/src/glx/dri2_query_renderer.c
index b50a202..95560cb 100644
--- a/src/glx/dri2_query_renderer.c
+++ b/src/glx/dri2_query_renderer.c
@@ -20,6 +20,9 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+
+#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
+
 #include "glxclient.h"
 #include "glx_error.h"
 #include "xf86drm.h"
@@ -95,3 +98,5 @@ dri2_query_renderer_string(struct glx_screen *base, int attribute,
 
    return psc->rendererQuery->queryString(psc->driScreen, dri_attribute, value);
 }
+
+#endif /* GLX_DIRECT_RENDERING */

commit 4c0702b05ca002e42dbf05c6c4451113df0c3799
Author: Chris Forbes <chrisf@ijw.co.nz>
Date:   Sat Feb 22 18:09:31 2014 +1300

    i965: Validate (and resolve) all the bound textures.
    
    BRW_MAX_TEX_UNIT is the static limit on the number of textures we
    support per-stage, not in total.
    
    Core's `Unit` array is sized by MAX_COMBINED_TEXTURE_IMAGE_UNITS, which
    is significantly larger, and across the various shader stages, up to
    ctx->Const.MaxCombinedTextureImageUnits elements of it may be actually
    used.
    
    Fixes invisible bad behavior in piglit's max-samplers test (although
    this escalated to an assertion failure on HSW with texture_view, since
    non-immutable textures only have _Format set by validation.)
    
    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
    Cc: "9.2 10.0 10.1" <mesa-stable@lists.freedesktop.org>
    Cc: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit befbda56a246f77797bdf13fc005353441db2879)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 39da953..1e018bb 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -326,7 +326,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
    /* Resolve depth buffer of each enabled depth texture, and color buffer of
     * each fast-clear-enabled color texture.
     */
-   for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+   for (int i = 0; i < ctx->Const.MaxCombinedTextureImageUnits; i++) {
       if (!ctx->Texture.Unit[i]._ReallyEnabled)
 	 continue;
       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c
index 9234e3a..b1f4de0 100644
--- a/src/mesa/drivers/dri/i965/brw_tex.c
+++ b/src/mesa/drivers/dri/i965/brw_tex.c
@@ -47,7 +47,7 @@ void brw_validate_textures( struct brw_context *brw )
    struct gl_context *ctx = &brw->ctx;
    int i;
 
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+   for (i = 0; i < ctx->Const.MaxCombinedTextureImageUnits; i++) {
       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
 
       if (texUnit->_ReallyEnabled) {

commit 5fbd6494517be0bc2f2ead4d6966776fcb0d426d
Author: Chris Forbes <chrisf@ijw.co.nz>
Date:   Thu Feb 27 07:28:05 2014 +1300

    i965: Widen sampler key bitfields for 32 samplers
    
    Previously the `high` 16 samplers on Haswell+ would not get sampler
    workarounds applied.
    
    Don't bother widening YUV fields, since they're ignored and going away
    soon anyway.
    
    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    Cc: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit 590920f93e227f1fb4258dd01b662e8bda8b3af4)

diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index 51182ea..f4dedf8 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -33,7 +33,7 @@ struct brw_sampler_prog_key_data {
     */
    uint16_t swizzles[MAX_SAMPLERS];
 
-   uint16_t gl_clamp_mask[3];
+   uint32_t gl_clamp_mask[3];
 
    /**
     * YUV conversions, needed for the GL_MESA_ycbcr extension.
@@ -44,12 +44,12 @@ struct brw_sampler_prog_key_data {
    /**
     * For RG32F, gather4's channel select is broken.
     */
-   uint16_t gather_channel_quirk_mask;
+   uint32_t gather_channel_quirk_mask;
 
    /**
     * Whether this sampler uses the compressed multisample surface layout.
     */
-   uint16_t compressed_multisample_layout_mask;
+   uint32_t compressed_multisample_layout_mask;
 };
 
 #ifdef __cplusplus

commit 05b9e6a96359aeafadbf06cb202e044626c19d2f
Author: Ian Romanick <ian.d.romanick@intel.com>
Date:   Sat Mar 1 08:31:24 2014 -0800

    mesa: Bump version to 10.1-rc3
    
    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>

diff --git a/VERSION b/VERSION
index 5364038..4b9caf0 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-10.1.0-rc2
+10.1.0-rc3

commit 92e8c52340d38054494e5de25a73470f0d8de110
Author: Emil Velikov <emil.l.velikov@gmail.com>
Date:   Sat Feb 22 03:04:02 2014 +0000

    dri/i9*5: correctly calculate the amount of system memory
    
    The variable name states megabytes, while we calculate the amount in
    kilobytes. Correct this by dividing with the correct amount.
    
    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
    Cc: "10.0 10.1" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
    (cherry picked from commit fc25956badb8e1932cc19d8c97b4be16e92dfc65)

diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c
index 296df16..884fdb5 100644
--- a/src/mesa/drivers/dri/i915/intel_screen.c
+++ b/src/mesa/drivers/dri/i915/intel_screen.c
@@ -744,7 +744,7 @@ i915_query_renderer_integer(__DRIscreen *psp, int param, unsigned int *value)
          * (uint64_t) system_page_size;
 
       const unsigned system_memory_megabytes =
-         (unsigned) (system_memory_bytes / 1024);
+         (unsigned) (system_memory_bytes / (1024 * 1024));
 
       value[0] = MIN2(system_memory_megabytes, gpu_mappable_megabytes);
       return 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 6ab3609..736e8fa 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -846,7 +846,7 @@ brw_query_renderer_integer(__DRIscreen *psp, int param, unsigned int *value)
          * (uint64_t) system_page_size;
 
       const unsigned system_memory_megabytes =
-         (unsigned) (system_memory_bytes / 1024);
+         (unsigned) (system_memory_bytes / (1024 * 1024));
 
       value[0] = MIN2(system_memory_megabytes, gpu_mappable_megabytes);
       return 0;

commit 3f0011edfd7a658c1cfce60a259d075f08493f42
Author: Brian Paul <brianp@vmware.com>
Date:   Fri Feb 28 07:55:04 2014 -0700

    mesa: add unpacking code for MESA_FORMAT_Z32_FLOAT_S8X24_UINT
    
    Fixes glGetTexImage() when converting from MESA_FORMAT_Z32_FLOAT_S8X24_UINT
    to GL_UNSIGNED_INT_24_8.  Hit by the piglit
    ext_packed_depth_stencil-getteximage test.
    
    Cc: "10.0" "10.1" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit a12d4d0398c437911720069d293b469c60c4488c)

diff --git a/src/mesa/main/format_unpack.c b/src/mesa/main/format_unpack.c
index c948a9d..276ba55 100644
--- a/src/mesa/main/format_unpack.c
+++ b/src/mesa/main/format_unpack.c
@@ -4189,11 +4189,30 @@ unpack_uint_24_8_depth_stencil_S8_Z24(const GLuint *src, GLuint *dst, GLuint n)
 }
 
 static void
+unpack_uint_24_8_depth_stencil_Z32_S8X24(const GLuint *src,
+                                         GLuint *dst, GLuint n)
+{
+   GLuint i;
+
+   for (i = 0; i < n; i++) {
+      /* 8 bytes per pixel (float + uint32) */
+      GLfloat zf = ((GLfloat *) src)[i * 2 + 0];
+      GLuint z24 = (GLuint) (zf * (GLfloat) 0xffffff);
+      GLuint s = src[i * 2 + 1] & 0xff;
+      dst[i] = (z24 << 8) | s;
+   }
+}
+
+static void
 unpack_uint_24_8_depth_stencil_Z24_S8(const GLuint *src, GLuint *dst, GLuint n)
 {
    memcpy(dst, src, n * 4);
 }
 
+/**
+ * Unpack depth/stencil returning as GL_UNSIGNED_INT_24_8.
+ * \param format  the source data format
+ */
 void
 _mesa_unpack_uint_24_8_depth_stencil_row(mesa_format format, GLuint n,
 					 const void *src, GLuint *dst)
@@ -4205,6 +4224,9 @@ _mesa_unpack_uint_24_8_depth_stencil_row(mesa_format format, GLuint n,
    case MESA_FORMAT_Z24_UNORM_S8_UINT:
       unpack_uint_24_8_depth_stencil_S8_Z24(src, dst, n);
       break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      unpack_uint_24_8_depth_stencil_Z32_S8X24(src, dst, n);
+      break;
    default:
       _mesa_problem(NULL,
                     "bad format %s in _mesa_unpack_uint_24_8_depth_stencil_row",

commit 6e3ce7997ac9adec4cc9aba62c0aea611460bbbb
Author: Ian Romanick <ian.d.romanick@intel.com>
Date:   Wed Feb 26 12:48:56 2014 -0800

    i915: Allocate the sys_buffer using _mesa_align_malloc
    
    Though it won't matter on Linux, use _mesa_align_free to release it.
    Since i965 doesn't have sys_buffer, I overlooked this in the
    GL_ARB_map_buffer_alignment work a few months ago.  Fixes i915 (and
    presumably i830) regressions in ARB_map_buffer_range tests and the
    failure in arb_map_buffer_alignment-sanity_test.
    
    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74960
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    (cherry picked from commit ff2cbf9e0ca0ee46a15add1a42cd48705c84e0de)

diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.c b/src/mesa/drivers/dri/i915/intel_buffer_objects.c
index 345db6d..2140315 100644
--- a/src/mesa/drivers/dri/i915/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i915/intel_buffer_objects.c
@@ -96,7 +96,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
    if (obj->Pointer)
       intel_bufferobj_unmap(ctx, obj);
 
-   free(intel_obj->sys_buffer);
+   _mesa_align_free(intel_obj->sys_buffer);
 
    drm_intel_bo_unreference(intel_obj->buffer);
    free(intel_obj);
@@ -129,7 +129,7 @@ intel_bufferobj_data(struct gl_context * ctx,
    if (intel_obj->buffer != NULL)
       release_buffer(intel_obj);
 
-   free(intel_obj->sys_buffer);
+   _mesa_align_free(intel_obj->sys_buffer);
    intel_obj->sys_buffer = NULL;
 
    if (size != 0) {
@@ -137,7 +137,8 @@ intel_bufferobj_data(struct gl_context * ctx,
        * contents anyway.
        */
       if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) {
-	 intel_obj->sys_buffer = malloc(size);
+	 intel_obj->sys_buffer =
+            _mesa_align_malloc(size, ctx->Const.MinMapBufferAlignment);
 	 if (intel_obj->sys_buffer != NULL) {
 	    if (data != NULL)
 	       memcpy(intel_obj->sys_buffer, data, size);
@@ -188,7 +189,7 @@ intel_bufferobj_subdata(struct gl_context * ctx,
 	 return;
       }
 
-      free(intel_obj->sys_buffer);
+      _mesa_align_free(intel_obj->sys_buffer);
       intel_obj->sys_buffer = NULL;
    }
 
@@ -295,7 +296,7 @@ intel_bufferobj_map_range(struct gl_context * ctx,
 	 return obj->Pointer;
       }
 
-      free(intel_obj->sys_buffer);
+      _mesa_align_free(intel_obj->sys_buffer);
       intel_obj->sys_buffer = NULL;
    }
 
@@ -478,7 +479,7 @@ intel_bufferobj_buffer(struct intel_context *intel,
 			   0, intel_obj->Base.Size,
 			   intel_obj->sys_buffer);
 
-      free(intel_obj->sys_buffer);
+      _mesa_align_free(intel_obj->sys_buffer);
       intel_obj->sys_buffer = NULL;
       intel_obj->offset = 0;
    }
@@ -663,7 +664,7 @@ intel_buffer_object_purgeable(struct gl_context * ctx,
       return intel_buffer_purgeable(intel_obj->buffer);
 
    if (option == GL_RELEASED_APPLE) {
-      free(intel_obj->sys_buffer);
+      _mesa_align_free(intel_obj->sys_buffer);
       intel_obj->sys_buffer = NULL;
 
       return GL_RELEASED_APPLE;

commit 1b6aad2234f277da1c4e0b2cd6634e8303526d9d
Author: Ian Romanick <ian.d.romanick@intel.com>
Date:   Wed Feb 26 12:32:29 2014 -0800

    i915: Only allow 8 vertex texture units
    
    There's no reason to have more vertex texture units than fragment
    texture units on this hardware.  Since increasing the default maximum
    number of texture units from 16 to 32, this has triggered some segfault
    in i915 driver.  There's probably some array or bitfield that isn't
    properly sized now.  This really papers over the bug, but I don't think
    I'll lose any sleep over that.
    
    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74071
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    (cherry picked from commit 8ba157006fe98bca96b79bea1394b0c33ddf4ad3)

diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index 7378fc3..ebcab94 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -193,6 +193,7 @@ i915CreateContext(int api,
 
    ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = I915_TEX_UNITS;
+   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = I915_TEX_UNITS;
    ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
    ctx->Const.MaxVarying = I915_TEX_UNITS;
    ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents =

commit b34f05f6a778d204d906531837bba53ad245eab6
Author: Petri Latvala <petri.latvala@intel.com>
Date:   Thu Feb 27 16:15:04 2014 +0200

    i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.
    
    v2: Don't add function parameters, pass the required size in
    prog_data->nr_params.
    
    v3:
    - Use the name uniform_array_size instead of uniform_param_count.
    - Round up when dividing param_count by 4.
    - Use MAX2() instead of taking the maximum by hand.
    - Don't crash if prog_data passed to vec4_visitor constructor is NULL
    
    v4: Rebase for current master
    
    v5 (idr): Trivial whitespace change.
    
    Signed-off-by: Petri Latvala <petri.latvala@intel.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71254
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
    (cherry picked from commit 7189fce237cc7f4bc76a85cca8bcf75756d9affc)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 9b38c32..a944616 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -353,8 +353,9 @@ public:
     */
    dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
    const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   int *uniform_size;
+   int *uniform_vector_size;
+   int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
    int uniforms;
 
    src_reg shader_start_time;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index abc181b..27748ce 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -64,6 +64,11 @@ do_gs_prog(struct brw_context *brw,
 
    c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
    c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
+   /* Setting nr_params here NOT to the size of the param and pull_param
+    * arrays, but to the number of uniform components vec4_visitor
+    * needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+    */
+   c.prog_data.base.nr_params = ALIGN(param_count, 4) / 4 + gs->num_samplers;
 
    if (gp->program.OutputType == GL_POINTS) {
       /* When the output type is points, the geometry shader may output data
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3089ef6..b5b8f36 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3365,6 +3365,17 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
    this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
 
    this->uniforms = 0;
+
+   /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
+    * at least one. See setup_uniforms() in brw_vec4.cpp.
+    */
+   this->uniform_array_size = 1;
+   if (prog_data) {
+      this->uniform_array_size = MAX2(prog_data->nr_params, 1);
+   }
+
+   this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
+   this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
 }
 
 vec4_visitor::~vec4_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a4f2ac6..e1b6eda 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -244,6 +244,15 @@ do_vs_prog(struct brw_context *brw,
    prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
    prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
 
+   /* Setting nr_params here NOT to the size of the param and pull_param
+    * arrays, but to the number of uniform components vec4_visitor
+    * needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+    */
+   prog_data.base.nr_params = ALIGN(param_count, 4) / 4;
+   if (vs) {
+      prog_data.base.nr_params += vs->num_samplers;
+   }
+
    GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
    prog_data.inputs_read = vp->program.Base.InputsRead;
 

commit 677fde5ca08b4ad31656458eef0c460fac689c19
Author: Tom Stellard <thomas.stellard@amd.com>
Date:   Mon Feb 24 16:51:05 2014 -0500

    r600g/compute: PIPE_CAP_COMPUTE should be false for pre-evergreen GPUs
    
    This prevents clover from using unsupported devices.
    
    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
    
    CC: "10.0 10.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit f61e382f0afc48bc09f21c50639f760acca85bc5)

diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 4ca6a22..d88284b 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -347,7 +347,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
 	case PIPE_CAP_USER_INDEX_BUFFERS:
 	case PIPE_CAP_USER_CONSTANT_BUFFERS:
-	case PIPE_CAP_COMPUTE:
 	case PIPE_CAP_START_INSTANCE:
 	case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
 	case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
@@ -356,6 +355,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 		return 1;
 
+	case PIPE_CAP_COMPUTE:
+		return rscreen->b.chip_class > R700;
+
 	case PIPE_CAP_TGSI_TEXCOORD:
 		return 0;
 

commit 3305b9c96b8c1f00e66bda59aa69b44a5ff861db
Author: Matt Turner <mattst88@gmail.com>
Date:   Sat Feb 22 16:35:15 2014 -0800

    glsl: Don't vectorize horizontal expressions.
    
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75224
    (cherry picked from commit 4bd7f1d044eee17587d6523322303a61aeb8d660)

diff --git a/src/glsl/opt_vectorize.cpp b/src/glsl/opt_vectorize.cpp
index dba303d..13faac0 100644
--- a/src/glsl/opt_vectorize.cpp
+++ b/src/glsl/opt_vectorize.cpp
@@ -83,6 +83,7 @@ public:
    virtual ir_visitor_status visit_enter(ir_assignment *);
    virtual ir_visitor_status visit_enter(ir_swizzle *);
    virtual ir_visitor_status visit_enter(ir_dereference_array *);
+   virtual ir_visitor_status visit_enter(ir_expression *);
    virtual ir_visitor_status visit_enter(ir_if *);
    virtual ir_visitor_status visit_enter(ir_loop *);
 
@@ -303,6 +304,20 @@ ir_vectorize_visitor::visit_enter(ir_dereference_array *ir)
    return visit_continue_with_parent;
 }
 
+/**
+ * Upon entering an ir_expression, remove the current assignment from further
+ * consideration if the expression operates horizontally on vectors.
+ */
+ir_visitor_status
+ir_vectorize_visitor::visit_enter(ir_expression *ir)
+{
+   if (ir->is_horizontal()) {
+      this->current_assignment = NULL;
+      return visit_continue_with_parent;
+   }
+   return visit_continue;
+}
+
 /* Since there is no statement to visit between the "then" and "else"
  * instructions try to vectorize before, in between, and after them to avoid
  * combining statements from different basic blocks.

commit a43b8bfa780baf8974a1c949459c8781449af238
Author: Matt Turner <mattst88@gmail.com>
Date:   Sat Feb 22 16:35:14 2014 -0800

    glsl: Add is_horizontal() method to ir_expression.
    
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 5eff8576ba274858a0b242ead97b8b5fc2b4f8ff)

diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index e266328..d446292 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1444,6 +1444,18 @@ public:
    }
 
    /**
+    * Return whether the expression operates on vectors horizontally.
+    */
+   bool is_horizontal() const
+   {
+      return operation == ir_binop_all_equal ||
+             operation == ir_binop_any_nequal ||
+             operation == ir_unop_any ||
+             operation == ir_binop_dot ||
+             operation == ir_quadop_vector;
+   }
+
+   /**
     * Return a string representing this expression's operator.
     */
    const char *operator_string();

commit 862572b205acbfdb40f945ccf4add3310fb6bc18
Author: Brian Paul <brianp@vmware.com>
Date:   Thu Feb 27 08:36:13 2014 -0700

    mesa: do depth/stencil format conversion in glGetTexImage
    
    glGetTexImage(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8) was just
    using memcpy() instead of _mesa_unpack_uint_24_8_depth_stencil_row()
    to convert texels from the hardware format to the GL format.
    
    Fixes issue reported by David Meng at Intel.  The new piglit
    ext_packed_depth_stencil-getteximage test checks for this bug.
    
    Also, add some format/type assertions.  We don't yet handle the
    GL_FLOAT_32_UNSIGNED_INT_24_8_REV type.  That should be fixed in
    a follow-on patch.
    
    Reviewed-by: Eric Anholt <eric@anholt.net>
    Cc: "10.0" "10.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 43dee0295e5da42425f1a3b6a3b3108173f4b676)

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 133fa53..63da027 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -130,6 +130,10 @@ get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions,
    const GLint depth = texImage->Depth;
    GLint img, row;
 
+   assert(format == GL_DEPTH_STENCIL);
+   assert(type == GL_UNSIGNED_INT_24_8);
+   /* XXX type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV is not handled yet */
+
    for (img = 0; img < depth; img++) {
       GLubyte *srcMap;
       GLint rowstride;
@@ -145,8 +149,11 @@ get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions,
             void *dest = _mesa_image_address(dimensions, &ctx->Pack, pixels,
                                              width, height, format, type,
                                              img, row, 0);
-            /* XXX Z24_S8 vs. S8_Z24??? */
-            memcpy(dest, src, width * sizeof(GLuint));
+            /* Unpack from texture's format to GL's z24_s8 layout */
+            _mesa_unpack_uint_24_8_depth_stencil_row(texImage->TexFormat,
+                                                     width,
+                                                     (const GLuint *) src,
+                                                     dest);
             if (ctx->Pack.SwapBytes) {
                _mesa_swap4((GLuint *) dest, width);
             }

commit 037f357564360d1eaf819afb1d5179c6d0d6cac0
Author: Thomas Hellstrom <thellstrom@vmware.com>
Date:   Thu Feb 20 14:32:07 2014 +0100

    winsys/svga: Avoid calling drm getparam for max surface size on older kernels
    
    This avoids the kernel driver spewing out errors about the param not being
    supported.
    
    Also correct the max surface size used when the kernel does not support the
    query.
    
    Reported-by: Brian Paul <brianp@vmware.com>
    Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
    Reviewed-by: Charmaine Lee <charmainel@vmware.com>
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit f5e681f3fae6b95b76bb068b367f5f96c58073c8)

diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
index b7bedb1..2866a29 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -762,7 +762,7 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
    int ret;
    uint32_t *cap_buffer;
    drmVersionPtr version;
-   boolean drm_gb_capable;
+   boolean have_drm_2_5;
 
    VMW_FUNC;
 
@@ -770,7 +770,7 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
    if (!version)
       goto out_no_version;
 
-   drm_gb_capable = version->version_major > 2 ||
+   have_drm_2_5 = version->version_major > 2 ||
       (version->version_major == 2 && version->version_minor > 4);
 
    memset(&gp_arg, 0, sizeof(gp_arg));
@@ -803,7 +803,7 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
       vws->base.have_gb_objects =
          !!(gp_arg.value & (uint64_t) SVGA_CAP_GBOBJECTS);
    
-   if (vws->base.have_gb_objects && !drm_gb_capable)
+   if (vws->base.have_gb_objects && !have_drm_2_5)
       goto out_no_3d;
 
    if (vws->base.have_gb_objects) {
@@ -839,11 +839,12 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
 
       memset(&gp_arg, 0, sizeof(gp_arg));
       gp_arg.param = DRM_VMW_PARAM_MAX_SURF_MEMORY;
-      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
-                                &gp_arg, sizeof(gp_arg));
-      if (ret) {
+      if (have_drm_2_5)
+         ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                   &gp_arg, sizeof(gp_arg));
+      if (!have_drm_2_5 || ret) {
          /* Just guess a large enough value, around 800mb. */
-         vws->ioctl.max_surface_memory = 0x300000000;
+         vws->ioctl.max_surface_memory = 0x30000000;
       } else {
          vws->ioctl.max_surface_memory = gp_arg.value;
       }

commit bef555409256346dba3e74f73c18d900c82572ad
Author: Anuj Phogat <anuj.phogat@gmail.com>
Date:   Tue Jan 7 17:46:45 2014 -0800

    i965: Fix the region's pitch condition to use blitter
    
    intelEmitCopyBlit uses a signed 16-bit integer to represent
    buffer pitch, so it can only handle buffer pitches < 32k.
    
    Cc: mesa-stable@lists.freedesktop.org
    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
    (cherry picked from commit b3094d9927fe7aa5a84892262404aaad4d728724)

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index b12ecca..7b36a63 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -200,9 +200,9 @@ intel_miptree_blit(struct brw_context *brw,
     * As a result of these two limitations, we can only use the blitter to do
     * this copy when the region's pitch is less than 32k.
     */
-   if (src_mt->region->pitch > 32768 ||
-       dst_mt->region->pitch > 32768) {
-      perf_debug("Falling back due to >32k pitch\n");
+   if (src_mt->region->pitch >= 32768 ||
+       dst_mt->region->pitch >= 32768) {
+      perf_debug("Falling back due to >=32k pitch\n");
       return false;
    }
 

commit 09b03dcee6c27af5bf5ef05f50773f2be0769892
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Tue Feb 25 22:15:30 2014 -0800

    i965: Don't try to dump shader source for fixed-function FS programs.
    
    sh->Source is NULL and this will segfault.
    
    Fixes MESA_GLSL=dump with "The Swapper".
    
    Cc: mesa-stable@lists.freedesktop.org
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Eric Anholt <eric@anholt.net>
    (cherry picked from commit f896e82301255177894a6c51883e18d32c36b307)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 739c516..43ae560 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -258,7 +258,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
       }
    }
 
-   if (ctx->Shader.Flags & GLSL_DUMP) {
+   if ((ctx->Shader.Flags & GLSL_DUMP) && shProg->Name != 0) {
       for (unsigned i = 0; i < shProg->NumShaders; i++) {
          const struct gl_shader *sh = shProg->Shaders[i];
          if (!sh)

commit 45cb6063e7535b4aac94d81bbbfe6f835117d377
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Sun Feb 23 16:34:04 2014 -0800

    glsl: Delete LRP_TO_ARITH lowering pass flag.
    
    Tt's kind of a trap---calling do_common_optimization() after
    lower_instructions() may cause opt_algebraic() to reintroduce
    ir_triop_lrp expressions that were lowered, effectively defeating the
    point.  Because of this, nobody uses it.
    
    v2: Delete more code (caught by Ian Romanick).
    
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    Acked-by: Eric Anholt <eric@anholt.net>
    (cherry picked from commit ac0a8b9540b29eb6faa55e4c77ba8fa99478884a)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 055d655..1777b84 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -36,9 +36,8 @@
 #define LOG_TO_LOG2        0x10
 #define MOD_TO_FRACT       0x20
 #define INT_DIV_TO_MUL_RCP 0x40
-#define LRP_TO_ARITH       0x80
-#define BITFIELD_INSERT_TO_BFM_BFI 0x100
-#define LDEXP_TO_ARITH     0x200
+#define BITFIELD_INSERT_TO_BFM_BFI 0x80
+#define LDEXP_TO_ARITH     0x100
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 44a6e80..01ea0f0 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -38,7 +38,6 @@
  * - LOG_TO_LOG2
  * - MOD_TO_FRACT
  * - LDEXP_TO_ARITH
- * - LRP_TO_ARITH
  * - BITFIELD_INSERT_TO_BFM_BFI
  *
  * SUB_TO_ADD_NEG:
@@ -87,10 +86,6 @@
  * -------------
  * Converts ir_binop_ldexp to arithmetic and bit operations.
  *
- * LRP_TO_ARITH:
- * -------------
- * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
- *
  * BITFIELD_INSERT_TO_BFM_BFI:
  * ---------------------------
  * Breaks ir_quadop_bitfield_insert into ir_binop_bfm (bitfield mask) and
@@ -130,7 +125,6 @@ private:
    void exp_to_exp2(ir_expression *);
    void pow_to_exp2(ir_expression *);
    void log_to_log2(ir_expression *);
-   void lrp_to_arith(ir_expression *);
    void bitfield_insert_to_bfm_bfi(ir_expression *);
    void ldexp_to_arith(ir_expression *);
 };
@@ -299,27 +293,6 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
 }
 
 void
-lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
-{
-   /* (lrp x y a) -> x*(1-a) + y*a */
-
-   /* Save op2 */
-   ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor",
-					   ir_var_temporary);
-   this->base_ir->insert_before(temp);
-   this->base_ir->insert_before(assign(temp, ir->operands[2]));
-
-   ir_constant *one = new(ir) ir_constant(1.0f);
-
-   ir->operation = ir_binop_add;
-   ir->operands[0] = mul(ir->operands[0], sub(one, temp));
-   ir->operands[1] = mul(ir->operands[1], temp);
-   ir->operands[2] = NULL;
-
-   this->progress = true;
-}
-
-void
 lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
 {
    /* Translates
@@ -499,11 +472,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
 	 pow_to_exp2(ir);
       break;
 
-   case ir_triop_lrp:
-      if (lowering(LRP_TO_ARITH))
-	 lrp_to_arith(ir);
-      break;
-
    case ir_quadop_bitfield_insert:
       if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
          bitfield_insert_to_bfm_bfi(ir);

commit 9cc1bbcaf4d1dbff6746657e07de122db9c082ef
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Sun Feb 23 16:32:39 2014 -0800

    i965: Stop lowering ir_triop_lrp.
    
    Both the vector and scalar backends now support it natively, so there's
    no point in lowering it.
    
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    Acked-by: Eric Anholt <eric@anholt.net>
    (cherry picked from commit 2fdea48e21c48f4543e1239787d34cf84ab96959)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6cc2595..739c516 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -145,7 +145,6 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
       const int bitfield_insert = brw->gen >= 7
                                   ? BITFIELD_INSERT_TO_BFM_BFI
                                   : 0;
-      const int lrp_to_arith = brw->gen < 6 ? LRP_TO_ARITH : 0;
       lower_instructions(shader->base.ir,
 			 MOD_TO_FRACT |
 			 DIV_TO_MUL_RCP |
@@ -153,7 +152,6 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 			 EXP_TO_EXP2 |
 			 LOG_TO_LOG2 |
                          bitfield_insert |
-                         lrp_to_arith |
                          LDEXP_TO_ARITH);
 
       /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,

commit 24abd48ac0ac9561407662ee7c59e85d0e1375a5
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Sun Feb 23 16:29:46 2014 -0800

    i965/vec4: Handle ir_triop_lrp on Gen4-5 as well.
    
    When the vec4 backend encountered an ir_triop_lrp, it always emitted an
    actual LRP instruction, which only exists on Gen6+.  Gen4-5 used
    lower_instructions() to decompose ir_triop_lrp at the IR level.
    
    Since commit 8d37e9915a3b21 ("glsl: Optimize open-coded lrp into lrp."),
    we've had an bug where lower_instructions translates ir_triop_lrp into
    arithmetic, but opt_algebraic reassembles it back into a lrp.
    
    To avoid this ordering concern, just handle ir_triop_lrp in the backend.
    The FS backend already does this, so we may as well do likewise.
    
    v2: Add a comment reminding us that we could emit better assembly if we
        implemented the infrastructure necessary to support using MAC.
        (Assembly code provided by Eric Anholt).
    
    Cc: "10.1" <mesa-stable@lists.freedesktop.org>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75253
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    Acked-by: Eric Anholt <eric@anholt.net>
    (cherry picked from commit 56879a7ac41b8c7513a97cc02921f76a2ec8407c)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 2a1d992..9b38c32 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -472,6 +472,9 @@ public:
 
    void emit_minmax(uint32_t condmod, dst_reg dst, src_reg src0, src_reg src1);
 
+   void emit_lrp(const dst_reg &dst,
+                 const src_reg &x, const src_reg &y, const src_reg &a);
+
    void emit_block_move(dst_reg *dst, src_reg *src,
 			const struct glsl_type *type, uint32_t predicate);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 10e794b..3089ef6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1140,6 +1140,40 @@ vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
    }
 }
 
+void
+vec4_visitor::emit_lrp(const dst_reg &dst,
+                       const src_reg &x, const src_reg &y, const src_reg &a)
+{
+   if (brw->gen >= 6) {
+      /* Note that the instruction's argument order is reversed from GLSL
+       * and the IR.
+       */
+      emit(LRP(dst,
+               fix_3src_operand(a), fix_3src_operand(y), fix_3src_operand(x)));
+   } else {
+      /* Earlier generations don't support three source operations, so we
+       * need to emit x*(1-a) + y*a.
+       *
+       * A better way to do this would be:
+       *    ADD one_minus_a, negate(a), 1.0f
+       *    MUL null, y, a
+       *    MAC dst, x, one_minus_a
+       * but we would need to support MAC and implicit accumulator.
+       */
+      dst_reg y_times_a           = dst_reg(this, glsl_type::vec4_type);
+      dst_reg one_minus_a         = dst_reg(this, glsl_type::vec4_type);
+      dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
+      y_times_a.writemask           = dst.writemask;
+      one_minus_a.writemask         = dst.writemask;
+      x_times_one_minus_a.writemask = dst.writemask;
+
+      emit(MUL(y_times_a, y, a));
+      emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
+      emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));


Reply to: