[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'upstream-experimental'



 VERSION                                                       |    2 
 src/gallium/auxiliary/tgsi/tgsi_strings.c                     |    3 
 src/gallium/drivers/freedreno/Makefile.sources                |    3 
 src/gallium/drivers/freedreno/a2xx/a2xx.xml.h                 |   10 
 src/gallium/drivers/freedreno/a2xx/fd2_emit.c                 |    2 
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h                 |  158 ++-
 src/gallium/drivers/freedreno/a3xx/fd3_compiler.c             |  394 +++++++-
 src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c         |    3 
 src/gallium/drivers/freedreno/a3xx/fd3_context.c              |    3 
 src/gallium/drivers/freedreno/a3xx/fd3_program.c              |    4 
 src/gallium/drivers/freedreno/a3xx/fd3_program.h              |    4 
 src/gallium/drivers/freedreno/a3xx/fd3_query.c                |  139 ++
 src/gallium/drivers/freedreno/a3xx/fd3_query.h                |   36 
 src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c           |   18 
 src/gallium/drivers/freedreno/a3xx/fd3_texture.c              |    8 
 src/gallium/drivers/freedreno/a3xx/fd3_util.c                 |   28 
 src/gallium/drivers/freedreno/adreno_common.xml.h             |   43 
 src/gallium/drivers/freedreno/adreno_pm4.xml.h                |  102 ++
 src/gallium/drivers/freedreno/freedreno_context.c             |    3 
 src/gallium/drivers/freedreno/freedreno_context.h             |   65 +
 src/gallium/drivers/freedreno/freedreno_draw.c                |    6 
 src/gallium/drivers/freedreno/freedreno_gmem.c                |   19 
 src/gallium/drivers/freedreno/freedreno_query.c               |  117 --
 src/gallium/drivers/freedreno/freedreno_query.h               |   33 
 src/gallium/drivers/freedreno/freedreno_query_hw.c            |  465 ++++++++++
 src/gallium/drivers/freedreno/freedreno_query_hw.h            |  164 +++
 src/gallium/drivers/freedreno/freedreno_query_sw.c            |  165 +++
 src/gallium/drivers/freedreno/freedreno_query_sw.h            |   55 +
 src/gallium/drivers/freedreno/freedreno_resource.c            |    3 
 src/gallium/drivers/freedreno/freedreno_screen.c              |   17 
 src/gallium/drivers/freedreno/freedreno_util.h                |    9 
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp |   94 +-
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp      |   47 -
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp            |    2 
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp   |    2 
 src/gallium/drivers/nouveau/nv50/nv50_surface.c               |   16 
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c               |   14 
 src/gallium/targets/egl-static/Makefile.am                    |    2 
 src/glx/apple/apple_visual.c                                  |   14 
 src/mapi/glapi/gen/gl_gentable.py                             |    2 
 src/mesa/drivers/common/meta.c                                |  106 +-
 src/mesa/drivers/common/meta.h                                |   10 
 src/mesa/drivers/common/meta_blit.c                           |   50 -
 src/mesa/drivers/dri/i915/intel_fbo.c                         |    8 
 src/mesa/drivers/dri/i965/brw_reset.c                         |   18 
 src/mesa/drivers/dri/i965/brw_wm_state.c                      |    1 
 src/mesa/drivers/dri/i965/gen8_blend_state.c                  |    4 
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp               |   13 
 src/mesa/drivers/dri/i965/intel_extensions.c                  |    3 
 src/mesa/drivers/dri/i965/intel_fbo.c                         |   20 
 src/mesa/drivers/dri/nouveau/nouveau_driver.c                 |    2 
 src/mesa/drivers/dri/radeon/radeon_fbo.c                      |    2 
 src/mesa/drivers/x11/xm_dd.c                                  |    2 
 src/mesa/main/fbobject.c                                      |    6 
 54 files changed, 2150 insertions(+), 369 deletions(-)

New commits:
commit 697316fe0667fcf44dd52f69906a920450cbd1e4
Author: Ian Romanick <ian.d.romanick@intel.com>
Date:   Fri May 23 17:36:42 2014 -0700

    Bump version to 10.2-rc4
    
    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>

diff --git a/VERSION b/VERSION
index 24ee34e..01f7126 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-10.2.0-rc3
+10.2.0-rc4

commit 9a8f12ae034feefc7ce189485dfc8f387945eee0
Author: Pavel Popov <pavel.e.popov@intel.com>
Date:   Fri May 16 12:00:02 2014 +0700

    i965: Properly return *RESET* status in glGetGraphicsResetStatusARB
    
    The glGetGraphicsResetStatusARB from ARB_robustness extension always
    returns GUILTY_CONTEXT_RESET_ARB and never returns NO_ERROR for guilty
    context with LOSE_CONTEXT_ON_RESET_ARB strategy.  This is because Mesa
    returns GUILTY_CONTEXT_RESET_ARB if batch_active !=0 whereas kernel
    driver never reset batch_active and this variable always > 0 for guilty
    context.  The same behaviour also can be observed for batch_pending and
    INNOCENT_CONTEXT_RESET_ARB.
    
    But ARB_robustness spec says:
    
      If a reset status other than NO_ERROR is returned and subsequent calls
      return NO_ERROR, the context reset was encountered and completed. If a
      reset status is repeatedly returned, the context may be in the process
      of resetting.
    
      8. How should the application react to a reset context event?
      RESOLVED: For this extension, the application is expected to query the
      reset status until NO_ERROR is returned. If a reset is encountered, at
      least one *RESET* status will be returned. Once NO_ERROR is
      encountered, the application can safely destroy the old context and
      create a new one.
    
    The main problem is the context may be in the process of resetting and
    in this case a reset status should be repeatedly returned.  But looks
    like the kernel driver returns nonzero active/pending only if the
    context reset has already been encountered and completed.  For this
    reason the *RESET* status cannot be repeatedly returned and should be
    returned only once.
    
    The reset_count and brw->reset_count variables can be used to control
    that glGetGraphicsResetStatusARB returns *RESET* status only once for
    each context.  Note the i915 triggers reset_count twice which allows to
    return correct reset count immediately after active/pending have been
    incremented.
    
    v2 (idr): Trivial reformatting of comments.
    
    Signed-off-by: Pavel Popov <pavel.e.popov@intel.com>
    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
    Cc: "10.1 10.2" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 8dc4a98c44a824630f3cc234136833dbac9a1f4c)

diff --git a/src/mesa/drivers/dri/i965/brw_reset.c b/src/mesa/drivers/dri/i965/brw_reset.c
index 7eca1bc..e3182b1 100644
--- a/src/mesa/drivers/dri/i965/brw_reset.c
+++ b/src/mesa/drivers/dri/i965/brw_reset.c
@@ -42,6 +42,13 @@ brw_get_graphics_reset_status(struct gl_context *ctx)
     */
    assert(brw->hw_ctx != NULL);
 
+   /* A reset status other than NO_ERROR was returned last time. I915 returns
+    * nonzero active/pending only if reset has been encountered and completed.
+    * Return NO_ERROR from now on.
+    */
+   if (brw->reset_count != 0)
+      return GL_NO_ERROR;
+
    err = drm_intel_get_reset_stats(brw->hw_ctx, &reset_count, &active,
                                    &pending);
    if (err)
@@ -50,18 +57,19 @@ brw_get_graphics_reset_status(struct gl_context *ctx)
    /* A reset was observed while a batch from this context was executing.
     * Assume that this context was at fault.
     */
-   if (active != 0)
+   if (active != 0) {
+      brw->reset_count = reset_count;
       return GL_GUILTY_CONTEXT_RESET_ARB;
+   }
 
    /* A reset was observed while a batch from this context was in progress,
     * but the batch was not executing.  In this case, assume that the context
     * was not at fault.
     */
-   if (pending != 0)
+   if (pending != 0) {
+      brw->reset_count = reset_count;
       return GL_INNOCENT_CONTEXT_RESET_ARB;
-
-   /* FINISHME: Should we report anything if reset_count > brw->reset_count?
-    */
+   }
 
    return GL_NO_ERROR;
 }

commit a31062fcb3270ea0d90a9cf824fd1df8913a1347
Author: Emil Velikov <emil.l.velikov@gmail.com>
Date:   Sun May 18 08:07:24 2014 +0100

    targets/egl-static: add missing line break in ldflags
    
    Accidently omitted by commit 7b7944ee1cedeaf.
    
    Cc: "10.2" <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
    Reviewed-by: Jon TURNEY <jon.turney@dronecode.org.uk>
    (cherry picked from commit e0372239a5b716de21b5f4c7a4a7d8b4a74a51cb)

diff --git a/src/gallium/targets/egl-static/Makefile.am b/src/gallium/targets/egl-static/Makefile.am
index 1aa5d37..e64de4d 100644
--- a/src/gallium/targets/egl-static/Makefile.am
+++ b/src/gallium/targets/egl-static/Makefile.am
@@ -48,7 +48,7 @@ AM_LDFLAGS = \
 	-module \
 	-no-undefined \
 	-avoid-version \
-	-Wl,--version-script=$(top_srcdir)/src/gallium/targets/egl-static/egl.sym
+	-Wl,--version-script=$(top_srcdir)/src/gallium/targets/egl-static/egl.sym \
 	$(GC_SECTIONS) \
 	$(LD_NO_UNDEFINED)
 

commit a1fff38c9688359c9eaa119e14f84b0609a9225b
Author: James Legg <jlegg@feralinteractive.com>
Date:   Fri May 23 12:25:37 2014 +0100

    mesa: Fix unbinding GL_DEPTH_STENCIL_ATTACHMENT
    
    glFramebufferRender(..., GL_DEPTH_STENCIL_ATTACHMENT, ..., 0) only
    detached the depth buffer and not the stencil buffer.
    
    Bugzilla: http://bugs.freedesktop.org/show_bug.cgi?id=79115
    Reviewed-by: Brian Paul <brianp@vmware.com>
    Cc: "10.1 10.2" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 846c715abb17e13a5d39c565d0995404b6178d98)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 97538bc..ae3a418 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -500,6 +500,12 @@ _mesa_framebuffer_renderbuffer(struct gl_context *ctx,
    }
    else {
       remove_attachment(ctx, att);
+      if (attachment == GL_DEPTH_STENCIL_ATTACHMENT) {
+         /* detach stencil (depth was detached above) */
+         att = get_attachment(ctx, fb, GL_STENCIL_ATTACHMENT_EXT);
+         assert(att);
+         remove_attachment(ctx, att);
+      }
    }
 
    invalidate_framebuffer(fb);

commit 1db3ebd8a582e490a2ec32df14009125bddd7d4d
Author: Jordan Justen <jordan.l.justen@intel.com>
Date:   Wed May 21 22:34:26 2014 +0000

    meta blit: Set Z texcoord during meta blit to sample the correct layer
    
    If the source renderbuffer has a depth > 0, then send a Z texcoord
    which is set to the source attachment Z offset.
    
    This fixes piglit's gl-3.2-layered-rendering-gl-layer-render with the
    GL_TEXTURE_2D_MULTISAMPLE_ARRAY case test on i965/gen8.
    
    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
    Cc: "10.2" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 57876fee3872f4f224feeeb56eea6d2e10858fe9)

diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index e10a181..753afff 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -328,7 +328,10 @@ setup_glsl_blit_framebuffer(struct gl_context *ctx,
    /* target = GL_TEXTURE_RECTANGLE is not supported in GLES 3.0 */
    assert(_mesa_is_desktop_gl(ctx) || target == GL_TEXTURE_2D);
 
-   _mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true, 2, 2, 0);
+   unsigned texcoord_size = 2 + (src_rb->Depth > 1 ? 1 : 0);
+
+   _mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true,
+                                   2, texcoord_size, 0);
 
    if (target == GL_TEXTURE_2D_MULTISAMPLE ||
        target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
@@ -533,12 +536,16 @@ blitframebuffer_texture(struct gl_context *ctx,
 
       verts[0].tex[0] = s0;
       verts[0].tex[1] = t0;
+      verts[0].tex[2] = readAtt->Zoffset;
       verts[1].tex[0] = s1;
       verts[1].tex[1] = t0;
+      verts[1].tex[2] = readAtt->Zoffset;
       verts[2].tex[0] = s1;
       verts[2].tex[1] = t1;
+      verts[2].tex[2] = readAtt->Zoffset;
       verts[3].tex[0] = s0;
       verts[3].tex[1] = t1;
+      verts[3].tex[2] = readAtt->Zoffset;
 
       _mesa_BufferSubData(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
    }

commit 7cf3a674ea23e4a3a0beed7faf51c2a930f52264
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Tue May 20 14:52:40 2014 -0700

    i965: Listen to BRW_NEW_FRAGMENT_PROGRAM for 3DSTATE_PS_BLEND.
    
    brw_color_buffer_write_enabled depends on brw->fragment_program, which
    means we have to listen to BRW_NEW_FRAGMENT_PROGRAM.
    
    On most generations, this was only called from a function that already
    subscribed.  However, on Broadwell, we failed to listen to the necessary
    event in the atom that emits 3DSTATE_PS_BLEND.
    
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    Reviewed-by: Eric Anholt <eric@anholt.net>
    Cc: "10.2" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 746921cbb474f2df85232a8f0c10cde8df3349e9)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index fbd605c..98b5185 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -45,6 +45,7 @@ bool
 brw_color_buffer_write_enabled(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
    const struct gl_fragment_program *fp = brw->fragment_program;
    int i;
 
diff --git a/src/mesa/drivers/dri/i965/gen8_blend_state.c b/src/mesa/drivers/dri/i965/gen8_blend_state.c
index a02b765..9c5eedb 100644
--- a/src/mesa/drivers/dri/i965/gen8_blend_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_blend_state.c
@@ -215,7 +215,7 @@ gen8_upload_ps_blend(struct brw_context *brw)
    /* _NEW_BUFFERS */
    struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
 
-   /* _NEW_BUFFERS | _NEW_COLOR */
+   /* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */
    if (brw_color_buffer_write_enabled(brw))
       dw1 |= GEN8_PS_BLEND_HAS_WRITEABLE_RT;
 
@@ -290,7 +290,7 @@ gen8_upload_ps_blend(struct brw_context *brw)
 const struct brw_tracked_state gen8_ps_blend = {
    .dirty = {
       .mesa = _NEW_BUFFERS | _NEW_COLOR | _NEW_MULTISAMPLE,
-      .brw = BRW_NEW_CONTEXT,
+      .brw = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM,
       .cache = 0,
    },
    .emit = gen8_upload_ps_blend

commit d2521a44af66af5c99090eb30487798f8b6dde1c
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Tue May 20 14:52:39 2014 -0700

    i965: Use WE_all for FB write header setup on Broadwell.
    
    I forgot to disable writemasking on the OR and MOV which set the render
    target index and "source 0 alpha present to render target" bit.
    
    Using get_element_ud is equivalent and avoids a line-wrap.
    
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    Reviewed-by: Eric Anholt <eric@anholt.net>
    Cc: "10.2" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 7d3985ca6cdd5f2f7ff68b269798d69394164dec)

diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
index de06a97..26cb991 100644
--- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
@@ -73,16 +73,17 @@ gen8_fs_generator::generate_fb_write(fs_inst *ir)
 
       if (ir->target > 0 && c->key.replicate_alpha) {
          /* Set "Source0 Alpha Present to RenderTarget" bit in the header. */
-         OR(vec1(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD)),
-            vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
-            brw_imm_ud(1 << 11));
+         gen8_instruction *inst =
+            OR(get_element_ud(brw_message_reg(ir->base_mrf), 0),
+               vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+               brw_imm_ud(1 << 11));
+         gen8_set_mask_control(inst, BRW_MASK_DISABLE);
       }
 
       if (ir->target > 0) {
          /* Set the render target index for choosing BLEND_STATE. */
-         MOV(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, ir->base_mrf, 2),
-                    BRW_REGISTER_TYPE_UD),
-             brw_imm_ud(ir->target));
+         MOV_RAW(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, ir->base_mrf, 2),
+                 brw_imm_ud(ir->target));
       }
    }
 

commit 00f2dcb791bf108669d4d3c809613f81510bf8ba
Author: Anuj Phogat <anuj.phogat@gmail.com>
Date:   Mon May 19 11:55:01 2014 -0700

    meta: Use gl_FragColor to output color values to all the draw buffers
    
    _mesa_meta_setup_blit_shader() currently generates a fragment shader
    which, irrespective of the number of draw buffers, writes the color
    to only one 'out' variable. Current shader rely on an undefined
    behavior and possibly works by chance.
    
    From OpenGL 4.0  spec, page 256:
      "If a fragment shader writes to gl_FragColor, DrawBuffers specifies a
       set of draw buffers into which the single fragment color defined by
       gl_FragColor is written. If a fragment shader writes to gl_FragData,
       or a user-defined varying out variable, DrawBuffers specifies a set
       of draw buffers into which each of the multiple output colors defined
       by these variables are separately written. If a fragment shader writes
       to none of gl_FragColor, gl_FragData, nor any user defined varying out
       variables, the values of the fragment colors following shader execution
       are undefined, and may differ for each fragment color."
    
    OpenGL 4.4 spec, page 463, added an additional line in this section:
      "If some, but not all user-defined output variables are written, the
       values of fragment colors corresponding to unwritten variables are
       similarly undefined."
    
    V2: Write color output to gl_FragColor instead of writing to multiple
        'out' variables. This'll avoid recompiling the shader every time
        draw buffers count is updated.
    
    Cc: <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit 46737cebd337200f8b88ba9081796fa0f94e9143)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 1b2fe8a..fec0d2b 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -246,7 +246,6 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx,
    void *const mem_ctx = ralloc_context(NULL);
    struct blit_shader *shader = choose_blit_shader(target, table);
    const char *vs_input, *vs_output, *fs_input, *vs_preprocess, *fs_preprocess;
-   const char *fs_output_var, *fs_output_var_decl;
 
    if (ctx->Const.GLSLVersion < 130) {
       vs_preprocess = "";
@@ -254,16 +253,12 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx,
       vs_output = "varying";
       fs_preprocess = "#extension GL_EXT_texture_array : enable";
       fs_input = "varying";
-      fs_output_var_decl = "";
-      fs_output_var = "gl_FragColor";
    } else {
       vs_preprocess = "#version 130";
       vs_input = "in";
       vs_output = "out";
       fs_preprocess = "#version 130";
       fs_input = "in";
-      fs_output_var_decl = "out vec4 out_color;";
-      fs_output_var = "out_color";
       shader->func = "texture";
    }
 
@@ -291,15 +286,13 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx,
                 "#extension GL_ARB_texture_cube_map_array: enable\n"
                 "uniform %s texSampler;\n"
                 "%s vec4 texCoords;\n"
-                "%s\n"
                 "void main()\n"
                 "{\n"
-                "   vec4 color = %s(texSampler, %s);\n"
-                "   %s = color;\n"
-                "   gl_FragDepth = color.x;\n"
+                "   gl_FragColor = %s(texSampler, %s);\n"
+                "   gl_FragDepth = gl_FragColor.x;\n"
                 "}\n",
-                fs_preprocess, shader->type, fs_input, fs_output_var_decl,
-                shader->func, shader->texcoords, fs_output_var);
+                fs_preprocess, shader->type, fs_input,
+                shader->func, shader->texcoords);
 
    _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source,
                                        ralloc_asprintf(mem_ctx, "%s blit",

commit ed1ffa0197382253a348e1defcc0b84b0f5865c5
Author: Anuj Phogat <anuj.phogat@gmail.com>
Date:   Mon May 19 11:47:46 2014 -0700

    meta: Refactor _mesa_meta_setup_blit_shader() to avoid duplicate shader code
    
    Cc: <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit bee2915210e53585bca27c9c5f7d7407f61bcc0f)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index b194b6e..1b2fe8a 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -242,10 +242,30 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx,
                              GLenum target,
                              struct blit_shader_table *table)
 {
-   const char *vs_source;
-   char *fs_source;
+   char *vs_source, *fs_source;
    void *const mem_ctx = ralloc_context(NULL);
    struct blit_shader *shader = choose_blit_shader(target, table);
+   const char *vs_input, *vs_output, *fs_input, *vs_preprocess, *fs_preprocess;
+   const char *fs_output_var, *fs_output_var_decl;
+
+   if (ctx->Const.GLSLVersion < 130) {
+      vs_preprocess = "";
+      vs_input = "attribute";
+      vs_output = "varying";
+      fs_preprocess = "#extension GL_EXT_texture_array : enable";
+      fs_input = "varying";
+      fs_output_var_decl = "";
+      fs_output_var = "gl_FragColor";
+   } else {
+      vs_preprocess = "#version 130";
+      vs_input = "in";
+      vs_output = "out";
+      fs_preprocess = "#version 130";
+      fs_input = "in";
+      fs_output_var_decl = "out vec4 out_color;";
+      fs_output_var = "out_color";
+      shader->func = "texture";
+   }
 
    assert(shader != NULL);
 
@@ -254,57 +274,32 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx,
       return;
    }
 
-   if (ctx->Const.GLSLVersion < 130) {
-      vs_source =
-         "attribute vec2 position;\n"
-         "attribute vec4 textureCoords;\n"
-         "varying vec4 texCoords;\n"
-         "void main()\n"
-         "{\n"
-         "   texCoords = textureCoords;\n"
-         "   gl_Position = vec4(position, 0.0, 1.0);\n"
-         "}\n";
-
-      fs_source = ralloc_asprintf(mem_ctx,
-                                  "#extension GL_EXT_texture_array : enable\n"
-                                  "#extension GL_ARB_texture_cube_map_array: enable\n"
-                                  "uniform %s texSampler;\n"
-                                  "varying vec4 texCoords;\n"
-                                  "void main()\n"
-                                  "{\n"
-                                  "   gl_FragColor = %s(texSampler, %s);\n"
-                                  "   gl_FragDepth = gl_FragColor.x;\n"
-                                  "}\n",
-                                  shader->type,
-                                  shader->func, shader->texcoords);
-   }
-   else {
-      vs_source = ralloc_asprintf(mem_ctx,
-                                  "#version 130\n"
-                                  "in vec2 position;\n"
-                                  "in vec4 textureCoords;\n"
-                                  "out vec4 texCoords;\n"
-                                  "void main()\n"
-                                  "{\n"
-                                  "   texCoords = textureCoords;\n"
-                                  "   gl_Position = vec4(position, 0.0, 1.0);\n"
-                                  "}\n");
-      fs_source = ralloc_asprintf(mem_ctx,
-                                  "#version 130\n"
-                                  "#extension GL_ARB_texture_cube_map_array: enable\n"
-                                  "uniform %s texSampler;\n"
-                                  "in vec4 texCoords;\n"
-                                  "out vec4 out_color;\n"
-                                  "\n"
-                                  "void main()\n"
-                                  "{\n"
-                                  "   out_color = texture(texSampler, %s);\n"
-                                  "   gl_FragDepth = out_color.x;\n"
-                                  "}\n",
-                                  shader->type,
-                                  shader->texcoords);
-   }
-
+   vs_source = ralloc_asprintf(mem_ctx,
+                "%s\n"
+                "%s vec2 position;\n"
+                "%s vec4 textureCoords;\n"
+                "%s vec4 texCoords;\n"
+                "void main()\n"
+                "{\n"
+                "   texCoords = textureCoords;\n"
+                "   gl_Position = vec4(position, 0.0, 1.0);\n"
+                "}\n",
+                vs_preprocess, vs_input, vs_input, vs_output);
+
+   fs_source = ralloc_asprintf(mem_ctx,
+                "%s\n"
+                "#extension GL_ARB_texture_cube_map_array: enable\n"
+                "uniform %s texSampler;\n"
+                "%s vec4 texCoords;\n"
+                "%s\n"
+                "void main()\n"
+                "{\n"
+                "   vec4 color = %s(texSampler, %s);\n"
+                "   %s = color;\n"
+                "   gl_FragDepth = color.x;\n"
+                "}\n",
+                fs_preprocess, shader->type, fs_input, fs_output_var_decl,
+                shader->func, shader->texcoords, fs_output_var);
 
    _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source,
                                        ralloc_asprintf(mem_ctx, "%s blit",

commit 5d056f51abcc7d79158a744582ead6f1ce1276c8
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Mon May 19 23:54:40 2014 -0400

    tgsi: add GS_INVOCATIONS to property names array
    
    In commit 4be146b1, I neglected to add the new property to the strings
    array. This leads to the string '(null)' to be printed instead when
    converting a GS shader to text.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: "10.2" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
    (cherry picked from commit cdeb7004e03afbd5305913b5ae35d72529623507)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 5b6e47f..34dec4f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -120,7 +120,8 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
    "FS_COORD_PIXEL_CENTER",
    "FS_COLOR0_WRITES_ALL_CBUFS",
    "FS_DEPTH_LAYOUT",
-   "VS_PROHIBIT_UCPS"
+   "VS_PROHIBIT_UCPS",
+   "GS_INVOCATIONS",
 };
 
 const char *tgsi_type_names[5] =

commit 6be7789e116b5cc351da0a8e6f37ab88bcce069a
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Sat May 17 22:48:58 2014 -0400

    nv50,nvc0: fix 3d blits with mipmap levels
    
    Make sure to normalize the z coordinates as well as the x/y ones when
    there are mipmaps present. Fixes 3d mipmap generation, which now uses
    the blit path.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: "10.2" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
    (cherry picked from commit 28360fcad75a6917db6af42fb17b81572850ec0d)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index d02f5fe..1f37527 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -1142,6 +1142,12 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
    y0 *= (float)(1 << nv50_miptree(src)->ms_y);
    y1 *= (float)(1 << nv50_miptree(src)->ms_y);
 
+   /* XXX: multiply by 6 for cube arrays ? */
+   dz = (float)info->src.box.depth / (float)info->dst.box.depth;
+   z = (float)info->src.box.z;
+   if (nv50_miptree(src)->layout_3d)
+      z += 0.5f * dz;
+
    if (src->last_level > 0) {
       /* If there are mip maps, GPU always assumes normalized coordinates. */
       const unsigned l = info->src.level;
@@ -1151,14 +1157,12 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
       x1 /= fh;
       y0 /= fv;
       y1 /= fv;
+      if (nv50_miptree(src)->layout_3d) {
+         z /= u_minify(src->depth0, l);
+         dz /= u_minify(src->depth0, l);
+      }
    }
 
-   /* XXX: multiply by 6 for cube arrays ? */
-   dz = (float)info->src.box.depth / (float)info->dst.box.depth;
-   z = (float)info->src.box.z;
-   if (nv50_miptree(src)->layout_3d)
-      z += 0.5f * dz;
-
    BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
    PUSH_DATA (push, 0);
    BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index acadb2c..e15806e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -896,6 +896,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
    y0 *= (float)(1 << nv50_miptree(src)->ms_y);
    y1 *= (float)(1 << nv50_miptree(src)->ms_y);
 
+   dz = (float)info->src.box.depth / (float)info->dst.box.depth;
+   z = (float)info->src.box.z;
+   if (nv50_miptree(src)->layout_3d)
+      z += 0.5f * dz;
+
    if (src->last_level > 0) {
       /* If there are mip maps, GPU always assumes normalized coordinates. */
       const unsigned l = info->src.level;
@@ -905,13 +910,12 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
       x1 /= fh;
       y0 /= fv;
       y1 /= fv;
+      if (nv50_miptree(src)->layout_3d) {
+         z /= u_minify(src->depth0, l);
+         dz /= u_minify(src->depth0, l);
+      }
    }
 
-   dz = (float)info->src.box.depth / (float)info->dst.box.depth;
-   z = (float)info->src.box.z;
-   if (nv50_miptree(src)->layout_3d)
-      z += 0.5f * dz;
-
    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
    IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
               NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);

commit d6a4c3c29c789857eb60016a61f5db0716e463ef
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Wed May 14 23:22:32 2014 -0400

    nv50/ir: fix constant folding for OP_MUL subop HIGH
    
    These instructions can come in either through IMUL_HI/UMUL_HI TGSI
    opcodes, or from OP_DIV constant folding.
    
    Also make sure that the constant foldings which delete the original
    instruction still get counted as having done something.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: "10.1 10.2" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
    (cherry picked from commit d2a3de19c6aa5881228734c73df706483a4aecf9)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index cdae3c8..bb88b18 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -425,7 +425,17 @@ ConstantFolding::expr(Instruction *i,
       case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
       case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
       case TYPE_S32:
-      case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
+         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+            res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32;
+            break;
+         }
+         /* fallthrough */
+      case TYPE_U32:
+         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+            res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32;
+            break;
+         }
+         res.data.u32 = a->data.u32 * b->data.u32; break;
       default:
          return;
       }
@@ -691,12 +701,41 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
 {
    const int t = !s;
    const operation op = i->op;
+   Instruction *newi = i;
 
    switch (i->op) {
    case OP_MUL:
       if (i->dType == TYPE_F32)
          tryCollapseChainedMULs(i, s, imm0);
 
+      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+         assert(!isFloatType(i->sType));
+         if (imm0.isInteger(1) && i->dType == TYPE_S32) {
+            bld.setPosition(i, false);
+            // Need to set to the sign value, which is a compare.
+            newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
+                             TYPE_S32, i->getSrc(t), bld.mkImm(0));
+            delete_Instruction(prog, i);
+         } else if (imm0.isInteger(0) || imm0.isInteger(1)) {
+            // The high bits can't be set in this case (either mul by 0 or
+            // unsigned by 1)
+            i->op = OP_MOV;
+            i->subOp = 0;
+            i->setSrc(0, new_ImmediateValue(prog, 0u));
+            i->src(0).mod = Modifier(0);
+            i->setSrc(1, NULL);
+         } else if (!imm0.isNegative() && imm0.isPow2()) {
+            // Translate into a shift
+            imm0.applyLog2();
+            i->op = OP_SHR;
+            i->subOp = 0;
+            imm0.reg.data.u32 = 32 - imm0.reg.data.u32;
+            i->setSrc(0, i->getSrc(t));
+            i->src(0).mod = i->src(t).mod;
+            i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
+            i->src(1).mod = 0;
+         }
+      } else
       if (imm0.isInteger(0)) {
          i->op = OP_MOV;
          i->setSrc(0, new_ImmediateValue(prog, 0u));
@@ -787,7 +826,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
          else
             tA = tB;
          tB = s ? bld.getSSA() : i->getDef(0);
-         bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
+         newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
          if (s)
             bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
 
@@ -819,7 +858,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
          tA = bld.getSSA();
          bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
          tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
-         bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
+         newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
          if (d < 0)
             bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
 
@@ -897,7 +936,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
    default:
       return;
    }
-   if (i->op != op)
+   if (newi->op != op)
       foldCount++;
 }
 

commit 9028b946703da1d22de91fbfc55932455b482c35
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Wed May 14 23:30:16 2014 -0400

    nv50/ir: fix s32 x s32 -> high s32 multiply logic
    
    Retrieving the high 32 bits of a signed multiply is rather annoying. It
    appears that the simplest way to do this is to compute the absolute
    value of the arguments, and perform a u32 x u32 -> u64 operation. If the
    arguments' signs differ, then negate the result. Since there is no u64
    support in the cvt instruction, we have the perform the 2's complement
    negation "by hand".
    
    This logic can come into use by the IMUL_HI instruction (very unlikely
    to be seen), as well as from constant folding of division by a constant.
    Fixes dolphin's divisions by 255.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: "10.1 10.2" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
    (cherry picked from commit d3a5cf052c38087b395871b5b46776e2a7d4a7d7)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index b17d57d..0fb7666 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -37,18 +37,25 @@ namespace nv50_ir {
 //    ah*bl 00
 //
 // fffe0001 + fffe0001
+//
+// Note that this sort of splitting doesn't work for signed values, so we
+// compute the sign on those manually and then perform an unsigned multiply.
 static bool
 expandIntegerMUL(BuildUtil *bld, Instruction *mul)
 {
    const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
 
-   DataType fTy = mul->sType; // full type
-   DataType hTy;
+   DataType fTy; // full type
+   switch (mul->sType) {
+   case TYPE_S32: fTy = TYPE_U32; break;
+   case TYPE_S64: fTy = TYPE_U64; break;
+   default: fTy = mul->sType; break;
+   }
+
+   DataType hTy; // half type
    switch (fTy) {
-   case TYPE_S32: hTy = TYPE_S16; break;
    case TYPE_U32: hTy = TYPE_U16; break;
    case TYPE_U64: hTy = TYPE_U32; break;
-   case TYPE_S64: hTy = TYPE_S32; break;
    default:
       return false;
    }
@@ -59,15 +66,25 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
 
    bld->setPosition(mul, true);
 
+   Value *s[2];
    Value *a[2], *b[2];
-   Value *c[2];
    Value *t[4];
    for (int j = 0; j < 4; ++j)
       t[j] = bld->getSSA(fullSize);
 
+   s[0] = mul->getSrc(0);
+   s[1] = mul->getSrc(1);
+
+   if (isSignedType(mul->sType)) {
+      s[0] = bld->getSSA(fullSize);
+      s[1] = bld->getSSA(fullSize);
+      bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
+      bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1));
+   }
+
    // split sources into halves
-   i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0));
-   i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1));
+   i[0] = bld->mkSplit(a, halfSize, s[0]);
+   i[1] = bld->mkSplit(b, halfSize, s[1]);
 
    i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
    i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
@@ -75,24 +92,76 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
    i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
 
    if (highResult) {
-      Value *r[4];
+      Value *c[2];
+      Value *r[5];
       Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8));
       c[0] = bld->getSSA(1, FILE_FLAGS);
       c[1] = bld->getSSA(1, FILE_FLAGS);
-      for (int j = 0; j < 4; ++j)
+      for (int j = 0; j < 5; ++j)
          r[j] = bld->getSSA(fullSize);
 
       i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8));
       i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm);
       bld->mkMov(r[3], r[0])->setPredicate(CC_NC, c[0]);
       bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[3]);
-      i[5] = bld->mkOp3(OP_MAD, fTy, mul->getDef(0), a[1], b[1], r[2]);
+      i[5] = bld->mkOp3(OP_MAD, fTy, r[4], a[1], b[1], r[2]);
 
       // set carry defs / sources
       i[3]->setFlagsDef(1, c[0]);
-      i[4]->setFlagsDef(0, c[1]); // actual result not required, just the carry
+      // actual result required in negative case, but ignored for
+      // unsigned. for some reason the compiler ends up dropping the whole
+      // instruction if the destination is unused but the flags are.
+      if (isSignedType(mul->sType))
+         i[4]->setFlagsDef(1, c[1]);
+      else
+         i[4]->setFlagsDef(0, c[1]);
       i[6]->setPredicate(CC_C, c[0]);
       i[5]->setFlagsSrc(3, c[1]);
+
+      if (isSignedType(mul->sType)) {
+         Value *cc[2];
+         Value *rr[7];
+         Value *one = bld->getSSA(fullSize);
+         bld->loadImm(one, 1);
+         for (int j = 0; j < 7; j++)
+            rr[j] = bld->getSSA(fullSize);
+
+         // NOTE: this logic uses predicates because splitting basic blocks is
+         // ~impossible during the SSA phase. The RA relies on a correlation
+         // between edge order and phi node sources.
+
+         // Set the sign of the result based on the inputs
+         bld->mkOp2(OP_XOR, fTy, NULL, mul->getSrc(0), mul->getSrc(1))
+            ->setFlagsDef(0, (cc[0] = bld->getSSA(1, FILE_FLAGS)));
+
+         // 1s complement of 64-bit value
+         bld->mkOp1(OP_NOT, fTy, rr[0], r[4])
+            ->setPredicate(CC_S, cc[0]);
+         bld->mkOp1(OP_NOT, fTy, rr[1], t[3])
+            ->setPredicate(CC_S, cc[0]);
+
+         // add to low 32-bits, keep track of the carry
+         Instruction *n = bld->mkOp2(OP_ADD, fTy, NULL, rr[1], one);
+         n->setPredicate(CC_S, cc[0]);
+         n->setFlagsDef(0, (cc[1] = bld->getSSA(1, FILE_FLAGS)));
+
+         // If there was a carry, add 1 to the upper 32 bits
+         // XXX: These get executed even if they shouldn't be
+         bld->mkOp2(OP_ADD, fTy, rr[2], rr[0], one)
+            ->setPredicate(CC_C, cc[1]);
+         bld->mkMov(rr[3], rr[0])
+            ->setPredicate(CC_NC, cc[1]);
+         bld->mkOp2(OP_UNION, fTy, rr[4], rr[2], rr[3]);
+
+         // Merge the results from the negative and non-negative paths
+         bld->mkMov(rr[5], rr[4])
+            ->setPredicate(CC_S, cc[0]);
+         bld->mkMov(rr[6], r[4])
+            ->setPredicate(CC_NS, cc[0]);
+         bld->mkOp2(OP_UNION, mul->sType, mul->getDef(0), rr[5], rr[6]);
+      } else {
+         bld->mkMov(mul->getDef(0), r[4]);
+      }
    } else {
       bld->mkMov(mul->getDef(0), t[3]);
    }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index 0b2f27a..c844fa4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -331,6 +331,8 @@ TargetNV50::insnCanLoad(const Instruction *i, int s,
          return false;
       if (sf == FILE_IMMEDIATE)
          return false;
+      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH && sf == FILE_MEMORY_CONST)
+         return false;
       ldSize = 2;
    } else {


Reply to: