mesa: Changes to 'debian-experimental'

To: debian-x@lists.debian.org
Subject: mesa: Changes to 'debian-experimental'
From: Timo Aaltonen <tjaalton@moszumanska.debian.org>
Date: Mon, 31 Jul 2017 12:09:10 +0000
Message-id: <[🔎] E1dc9VW-0008M4-5I@moszumanska.debian.org>
 VERSION                                                            |    2 
 debian/changelog                                                   |    2 
 src/amd/common/ac_nir_to_llvm.c                                    |   13 -
 src/amd/common/ac_surface.c                                        |   12 +
 src/amd/vulkan/radv_device.c                                       |   17 +
 src/amd/vulkan/radv_image.c                                        |    5 
 src/egl/drivers/dri2/platform_wayland.c                            |    4 
 src/egl/main/eglcontext.c                                          |   21 -
 src/gallium/drivers/r600/r600_pipe.h                               |    4 
 src/gallium/drivers/radeon/r600_pipe_common.c                      |   39 ++-
 src/gallium/drivers/radeon/r600_pipe_common.h                      |    5 
 src/gallium/drivers/radeonsi/si_blit.c                             |   31 +-
 src/gallium/drivers/radeonsi/si_pipe.c                             |    6 
 src/gallium/drivers/radeonsi/si_pipe.h                             |   10 
 src/gallium/drivers/radeonsi/si_shader.c                           |  116 ++++++---
 src/gallium/drivers/radeonsi/si_shader_internal.h                  |    3 
 src/gallium/drivers/radeonsi/si_state.c                            |   43 +++
 src/gallium/drivers/radeonsi/si_state.h                            |    1 
 src/gallium/drivers/radeonsi/si_state_draw.c                       |   31 --
 src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp |   21 +
 src/gallium/drivers/swr/swr_draw.cpp                               |   39 ++-
 src/gallium/drivers/swr/swr_shader.cpp                             |   32 ++
 src/gallium/drivers/swr/swr_shader.h                               |    3 
 src/gallium/drivers/swr/swr_state.cpp                              |    5 
 src/gallium/drivers/vc4/vc4_blit.c                                 |   14 -
 src/gallium/drivers/virgl/virgl_encode.c                           |    2 
 src/gallium/drivers/virgl/virgl_tgsi.c                             |   10 
 src/gallium/include/state_tracker/st_api.h                         |   21 +
 src/gallium/state_trackers/dri/dri2.c                              |   36 ++-
 src/gallium/state_trackers/dri/dri_drawable.c                      |    1 
 src/gallium/state_trackers/dri/dri_screen.c                        |    3 
 src/gallium/state_trackers/glx/xlib/xm_api.c                       |    3 
 src/gallium/state_trackers/glx/xlib/xm_st.c                        |    1 
 src/gallium/state_trackers/wgl/stw_device.c                        |    3 
 src/gallium/state_trackers/wgl/stw_st.c                            |    1 
 src/intel/vulkan/anv_cmd_buffer.c                                  |    2 
 src/intel/vulkan/anv_private.h                                     |    2 
 src/mesa/drivers/dri/i965/brw_blorp.c                              |    2 
 src/mesa/drivers/dri/i965/brw_performance_query.c                  |   16 -
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c                   |    6 
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp                         |    2 
 src/mesa/state_tracker/st_manager.c                                |  119 +++++++---
 42 files changed, 511 insertions(+), 198 deletions(-)

New commits:
commit 7923abfe8c688cb539c4afa6de84aa7255b1ac4d
Author: Timo Aaltonen <tjaalton@debian.org>
Date:   Mon Jul 31 15:00:08 2017 +0300

    bump the version

diff --git a/debian/changelog b/debian/changelog
index 9ac5035..43d5e0d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-mesa (17.2.0~rc1-1) UNRELEASED; urgency=medium
+mesa (17.2.0~rc2-1) UNRELEASED; urgency=medium
 
   * New upstream release candidate.
   * control: Add wayland-protocols (>= 1.9) to Build-Depends.

commit 58fe86a6d69c1169cff0c0b3084c906dbda7e266
Author: Emil Velikov <emil.velikov@collabora.com>
Date:   Mon Jul 31 10:52:13 2017 +0100

    Update version to 17.2.0-rc2
    
    Signed-off-by: Emil Velikov <emil.velikov@collabora.com>

diff --git a/VERSION b/VERSION
index 8530591..f309d4a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-17.2.0-rc1
+17.2.0-rc2

commit d466a7053247fb76f23d7cdaf1745aa9c9bd7fd5
Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date:   Thu Jul 27 17:12:09 2017 +0200

    st/glsl_to_tgsi: fix getting the image type for array of structs
    
    Since array splitting for AoA is disabled, we have to retrieve
    the type of the first non-array type when an array of images is
    declared inside a structure. Otherwise, it will hit an assert
    in glsl_type::sampler_index() because it expects either a sampler
    or an image type.
    
    This fixes a regression in the following piglit test:
    arb_bindless_texture/compiler/images/arrays-of-struct.frag
    
    Fixes: 57165f2ef8 ("glsl: disable array splitting for AoA")
    Cc: 17.2 <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
    (cherry picked from commit f99e9335e2ca46ec8ead00b63e79f800fec75592)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index baa8359..3983fe7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3790,7 +3790,7 @@ get_image_qualifiers(ir_dereference *ir, const glsl_type **type,
       for (unsigned i = 0; i < struct_type->length; i++) {
          if (!strcmp(struct_type->fields.structure[i].name,
                      deref_record->field)) {
-            *type = struct_type->fields.structure[i].type;
+            *type = struct_type->fields.structure[i].type->without_array();
             *memory_coherent =
                struct_type->fields.structure[i].memory_coherent;
             *memory_volatile =

commit e62eddcdbe3702ba95a833e0565cc3fdd5e23296
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Fri Jul 28 01:52:13 2017 +0200

    st/mesa: release sampler views when redefining a texture in st_context_teximage
    
    Noticed randomly.
    
    Cc: 17.2 <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
    (cherry picked from commit 5c1241268ba9b240cb79ab9a30c5255b176c83c9)

diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 62924b0..4b4d4ac 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -48,6 +48,7 @@
 #include "st_cb_fbo.h"
 #include "st_cb_flush.h"
 #include "st_manager.h"
+#include "st_sampler_view.h"
 
 #include "state_tracker/st_gl_api.h"
 
@@ -735,6 +736,7 @@ st_context_teximage(struct st_context_iface *stctxi,
    pipe_resource_reference(&stImage->pt, tex);
    stObj->surface_format = pipe_format;
 
+   st_texture_release_all_sampler_views(st, stObj);
    stObj->needs_validation = true;
 
    _mesa_dirty_texobj(ctx, texObj);

commit 6d07e58afb6bcbeec601391d936ddbd46a8c0622
Author: Dave Airlie <airlied@redhat.com>
Date:   Thu Jul 27 04:51:48 2017 +0100

    radv: for stencil only set Z tile mode index to same value
    
    On SI this was causing a hang in
    dEQP-VK.pipeline.render_to_image.core.2d_array.mipmap.r16g16_sint_s8_uint
    
    This was due to not handling the tile mode index for depth like
    I fixed previously for new GPUs.
    
    Fixes: 01d0c5a9 (radv: fix stencil regression since new addrlib import)
    Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
    Signed-off-by: Dave Airlie <airlied@redhat.com>
    (cherry picked from commit 800d1622096ca52b955bdfc20eb770b80ef15221)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 5244c15..0de7b8b 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -3246,6 +3246,8 @@ radv_initialise_ds_surface(struct radv_device *device,
 			ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
 			tile_mode_index = si_tile_mode_index(iview->image, level, true);
 			ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
+			if (stencil_only)
+				ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
 		}
 
 		ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |

commit f9e563597d670083145359f393d31ce69a19ab9b
Author: Dave Airlie <airlied@redhat.com>
Date:   Fri Jul 28 11:04:35 2017 +1000

    virgl: drop precise modifier.
    
    The host doesn't understand this yet, so drop it for now.
    
    Fixes: virgl regressions.
    
    Fixes: af22adee4f (tgsi: add precise flag to tgsi_instruction)
    Signed-off-by: Dave Airlie <airlied@redhat.com>
    (cherry picked from commit 554aa094406f3f5a935c4adbe77569cc9beb4312)

diff --git a/src/gallium/drivers/virgl/virgl_tgsi.c b/src/gallium/drivers/virgl/virgl_tgsi.c
index 4a2271f..7ad1cbd 100644
--- a/src/gallium/drivers/virgl/virgl_tgsi.c
+++ b/src/gallium/drivers/virgl/virgl_tgsi.c
@@ -48,6 +48,15 @@ virgl_tgsi_transform_property(struct tgsi_transform_context *ctx,
    }
 }
 
+static void
+virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
+				 struct tgsi_full_instruction *inst)
+{
+   if (inst->Instruction.Precise)
+      inst->Instruction.Precise = 0;
+   ctx->emit_instruction(ctx, inst);
+}
+
 struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in)
 {
 
@@ -61,6 +70,7 @@ struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in)
 
    memset(&transform, 0, sizeof(transform));
    transform.base.transform_property = virgl_tgsi_transform_property;
+   transform.base.transform_instruction = virgl_tgsi_transform_instruction;
    tgsi_transform_shader(tokens_in, new_tokens, newLen, &transform.base);
 
    return new_tokens;

commit 5b61ba4432ec272b74d9eb1b16d18e6a43879b3a
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Tue Jul 4 16:11:16 2017 +0200

    radeonsi: update dirty_level_mask only when flushing or unbinding framebuffer
    
    This fixes corruption with bindless textures in Dawn Of War 3.
    
    The do_update_surf_dirtiness mechanism was complicated and dirty_level_mask
    was only updated after the first draw call. The problem is bindless textures
    are checked for decompression every draw call and we would only decompress
    after the first draw call. The solution is to set dirtiness after the last
    draw call to the framebuffer, so the (unconditional) decompression of
    bindless textures happens at the right time.
    
    Cc: 17.2 <mesa-stable@lists.freedesktop.org>
    Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
    (cherry picked from commit f4d095cc651af005d5760aa9dd06e6ae7007fab6)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 631676b..caa4c3c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -121,9 +121,7 @@ si_blit_dbcb_copy(struct si_context *sctx,
 
 	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
 
-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;
 
 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
@@ -169,7 +167,6 @@ si_blit_dbcb_copy(struct si_context *sctx,
 	}
 
 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
 	sctx->dbcb_depth_copy_enabled = false;
 	sctx->dbcb_stencil_copy_enabled = false;
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
@@ -225,9 +222,7 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
 
 	surf_tmpl.format = texture->resource.b.b.format;
 
-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;
 
 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
@@ -267,7 +262,6 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
 		texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
 
 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
 	sctx->db_flush_depth_inplace = false;
 	sctx->db_flush_stencil_inplace = false;
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
@@ -474,9 +468,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
 		custom_blend = sctx->custom_blend_eliminate_fastclear;
 	}
 
-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;
 
 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
@@ -519,7 +511,6 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
 	}
 
 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
 
 	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
 			 SI_CONTEXT_INV_GLOBAL_L2 |
@@ -971,10 +962,32 @@ static void si_decompress_subresource(struct pipe_context *ctx,
 		if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
 			planes &= ~PIPE_MASK_S;
 
+		/* If we've rendered into the framebuffer and it's a blitting
+		 * source, make sure the decompression pass is invoked
+		 * by dirtying the framebuffer.
+		 */
+		if (sctx->framebuffer.state.zsbuf &&
+		    sctx->framebuffer.state.zsbuf->u.tex.level == level &&
+		    sctx->framebuffer.state.zsbuf->texture == tex)
+			si_update_fb_dirtiness_after_rendering(sctx);
+
 		si_decompress_depth(sctx, rtex, planes,
 				    level, level,
 				    first_layer, last_layer);
 	} else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
+		/* If we've rendered into the framebuffer and it's a blitting
+		 * source, make sure the decompression pass is invoked
+		 * by dirtying the framebuffer.
+		 */
+		for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+			if (sctx->framebuffer.state.cbufs[i] &&
+			    sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
+			    sctx->framebuffer.state.cbufs[i]->texture == tex) {
+				si_update_fb_dirtiness_after_rendering(sctx);
+				break;
+			}
+		}
+
 		si_blit_decompress_color(ctx, rtex, level, level,
 					 first_layer, last_layer, false);
 	}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d25705b..2e8a3bf 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -187,7 +187,6 @@ struct si_framebuffer {
 	ubyte				dirty_cbufs;
 	bool				dirty_zsbuf;
 	bool				any_dst_linear;
-	bool				do_update_surf_dirtiness;
 };
 
 struct si_clip_state {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 42d81e7..b7f5566 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2452,6 +2452,38 @@ static void si_init_depth_surface(struct si_context *sctx,
 	surf->depth_initialized = true;
 }
 
+void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
+{
+	if (sctx->decompression_enabled)
+		return;
+
+	if (sctx->framebuffer.state.zsbuf) {
+		struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
+		struct r600_texture *rtex = (struct r600_texture *)surf->texture;
+
+		rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+
+		if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+			rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
+	}
+	if (sctx->framebuffer.compressed_cb_mask) {
+		struct pipe_surface *surf;
+		struct r600_texture *rtex;
+		unsigned mask = sctx->framebuffer.compressed_cb_mask;
+
+		do {
+			unsigned i = u_bit_scan(&mask);
+			surf = sctx->framebuffer.state.cbufs[i];
+			rtex = (struct r600_texture*)surf->texture;
+
+			if (rtex->fmask.size)
+				rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+			if (rtex->dcc_gather_statistics)
+				rtex->separate_dcc_dirty = true;
+		} while (mask);
+	}
+}
+
 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
 {
 	for (int i = 0; i < state->nr_cbufs; ++i) {
@@ -2479,6 +2511,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	bool unbound = false;
 	int i;
 
+	si_update_fb_dirtiness_after_rendering(sctx);
+
 	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
 		if (!sctx->framebuffer.state.cbufs[i])
 			continue;
@@ -2676,7 +2710,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		 * changes come from the decompression passes themselves.
 		 */
 		sctx->need_check_render_feedback = true;
-		sctx->framebuffer.do_update_surf_dirtiness = true;
 	}
 }
 
@@ -3984,6 +4017,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
+	si_update_fb_dirtiness_after_rendering(sctx);
+
 	/* Multisample surfaces are flushed in si_decompress_textures. */
 	if (sctx->framebuffer.nr_samples <= 1 &&
 	    sctx->framebuffer.state.nr_cbufs) {
@@ -3991,7 +4026,6 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 				 SI_CONTEXT_INV_GLOBAL_L2 |
 				 SI_CONTEXT_FLUSH_AND_INV_CB;
 	}
-	sctx->framebuffer.do_update_surf_dirtiness = true;
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index ec28aba..acc8fb7 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -384,6 +384,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 			      const struct pipe_sampler_view *state,
 			      unsigned width0, unsigned height0,
 			      unsigned force_level);
+void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
 
 /* si_state_shader.c */
 bool si_update_shaders(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 332e0c4..c1edf7f 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1207,7 +1207,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		sctx->framebuffer.dirty_cbufs |=
 			((1 << sctx->framebuffer.state.nr_cbufs) - 1);
 		sctx->framebuffer.dirty_zsbuf = true;
-		sctx->framebuffer.do_update_surf_dirtiness = true;
 		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
 		si_update_all_texture_descriptors(sctx);
 	}
@@ -1392,36 +1391,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
 	}
 
-	if (sctx->framebuffer.do_update_surf_dirtiness) {
-		/* Set the depth buffer as dirty. */
-		if (sctx->framebuffer.state.zsbuf) {
-			struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
-			struct r600_texture *rtex = (struct r600_texture *)surf->texture;
-
-			rtex->dirty_level_mask |= 1 << surf->u.tex.level;
-
-			if (rtex->surface.flags & RADEON_SURF_SBUFFER)
-				rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
-		}
-		if (sctx->framebuffer.compressed_cb_mask) {
-			struct pipe_surface *surf;
-			struct r600_texture *rtex;
-			unsigned mask = sctx->framebuffer.compressed_cb_mask;
-
-			do {
-				unsigned i = u_bit_scan(&mask);
-				surf = sctx->framebuffer.state.cbufs[i];
-				rtex = (struct r600_texture*)surf->texture;
-
-				if (rtex->fmask.size)
-					rtex->dirty_level_mask |= 1 << surf->u.tex.level;
-				if (rtex->dcc_gather_statistics)
-					rtex->separate_dcc_dirty = true;
-			} while (mask);
-		}
-		sctx->framebuffer.do_update_surf_dirtiness = false;
-	}
-
 	sctx->b.num_draw_calls++;
 	if (info->primitive_restart)
 		sctx->b.num_prim_restart_calls++;

commit 6625382b1c036335972b53620ac0a5144c63fa10
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Tue Jul 25 17:34:52 2017 +0200

    st/mesa: always unconditionally revalidate main framebuffer after SwapBuffers
    
    This fixes the black Feral launcher window.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101867
    
    Cc: 17.2 <mesa-stable@lists.freedesktop.org>
    Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com>
    (cherry picked from commit 7257c171e9eadc05903140cffa26a253f0d0178a)

diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 97bf89f..62924b0 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -642,6 +642,16 @@ st_context_flush(struct st_context_iface *stctxi, unsigned flags,
 
    if (flags & ST_FLUSH_FRONT)
       st_manager_flush_frontbuffer(st);
+
+   /* DRI3 changes the framebuffer after SwapBuffers, but we need to invoke
+    * st_manager_validate_framebuffers to notice that.
+    *
+    * Set gfx_shaders_may_be_dirty to invoke st_validate_state in the next
+    * draw call, which will invoke st_manager_validate_framebuffers, but it
+    * won't dirty states if there is no change.
+    */
+   if (flags & ST_FLUSH_END_OF_FRAME)
+      st->gfx_shaders_may_be_dirty = true;
 }
 
 static boolean

commit 2bca74253da64e15e49fa6ebaa11c99023a34e2c
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date:   Fri Jul 14 14:33:37 2017 +0200

    radeonsi/gfx9: always wrap GS and TCS in an if-block (v2)
    
    With merged ESGS shaders, the GS part of a wave may be empty, and the
    hardware gets confused if any GS messages are sent from that wave. Since
    S_SENDMSG is executed even when EXEC = 0, we have to wrap even
    non-monolithic GS shaders in an if-block, so that the entire shader and
    hence the S_SENDMSG instructions are skipped in empty waves.
    
    This change is not required for TCS/HS, but applying it there as well
    simplifies the logic a bit.
    
    Fixes GL45-CTS.geometry_shader.rendering.rendering.*
    
    v2: ensure that the TCS epilog doesn't run for non-existing patches
    
    Cc: mesa-stable@lists.freedesktop.org
    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
    (cherry picked from commit 081ac6e5c6d2ef3931b27eb755d1a38827582a45)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a153cb7..9376d90 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -175,6 +175,20 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
 }
 
 /**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+static LLVMValueRef
+build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+	  unsigned count_incoming, LLVMValueRef *values,
+	  LLVMBasicBlockRef *blocks)
+{
+	LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+	LLVMAddIncoming(phi, values, blocks, count_incoming);
+	return phi;
+}
+
+/**
  * Get the value of a shader input parameter and extract a bitfield.
  */
 static LLVMValueRef unpack_param(struct si_shader_context *ctx,
@@ -2698,6 +2712,7 @@ si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret,
 static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
+	LLVMBuilderRef builder = ctx->gallivm.builder;
 	LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
 
 	si_copy_tcs_inputs(bld_base);
@@ -2706,8 +2721,29 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 	invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
 	tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
 
+	if (ctx->screen->b.chip_class >= GFX9) {
+		LLVMBasicBlockRef blocks[2] = {
+			LLVMGetInsertBlock(builder),
+			ctx->merged_wrap_if_state.entry_block
+		};
+		LLVMValueRef values[2];
+
+		lp_build_endif(&ctx->merged_wrap_if_state);
+
+		values[0] = rel_patch_id;
+		values[1] = LLVMGetUndef(ctx->i32);
+		rel_patch_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+
+		values[0] = tf_lds_offset;
+		values[1] = LLVMGetUndef(ctx->i32);
+		tf_lds_offset = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+
+		values[0] = invocation_id;
+		values[1] = ctx->i32_1; /* cause the epilog to skip threads */
+		invocation_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+	}
+
 	/* Return epilog parameters from this function. */
-	LLVMBuilderRef builder = ctx->gallivm.builder;
 	LLVMValueRef ret = ctx->return_value;
 	unsigned vgpr;
 
@@ -2935,6 +2971,9 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
 
 	ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
 			 si_get_gs_wave_id(ctx));
+
+	if (ctx->screen->b.chip_class >= GFX9)
+		lp_build_endif(&ctx->merged_wrap_if_state);
 }
 
 static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
@@ -5502,14 +5541,20 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 	preload_ring_buffers(ctx);
 
 	/* For GFX9 merged shaders:
-	 * - Set EXEC. If the prolog is present, set EXEC there instead.
+	 * - Set EXEC for the first shader. If the prolog is present, set
+	 *   EXEC there instead.
 	 * - Add a barrier before the second shader.
+	 * - In the second shader, reset EXEC to ~0 and wrap the main part in
+	 *   an if-statement. This is required for correctness in geometry
+	 *   shaders, to ensure that empty GS waves do not send GS_EMIT and
+	 *   GS_CUT messages.
 	 *
-	 * The same thing for monolithic shaders is done in
-	 * si_build_wrapper_function.
+	 * For monolithic merged shaders, the first shader is wrapped in an
+	 * if-block together with its prolog in si_build_wrapper_function.
 	 */
-	if (ctx->screen->b.chip_class >= GFX9 && !is_monolithic) {
-		if (sel->info.num_instructions > 1 && /* not empty shader */
+	if (ctx->screen->b.chip_class >= GFX9) {
+		if (!is_monolithic &&
+		    sel->info.num_instructions > 1 && /* not empty shader */
 		    (shader->key.as_es || shader->key.as_ls) &&
 		    (ctx->type == PIPE_SHADER_TESS_EVAL ||
 		     (ctx->type == PIPE_SHADER_VERTEX &&
@@ -5518,9 +5563,19 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 						ctx->param_merged_wave_info, 0);
 		} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
 			   ctx->type == PIPE_SHADER_GEOMETRY) {
-			si_init_exec_from_input(ctx,
-						ctx->param_merged_wave_info, 8);
+			if (!is_monolithic)
+				si_init_exec_full_mask(ctx);
+
+			/* The barrier must execute for all shaders in a
+			 * threadgroup.
+			 */
 			si_llvm_emit_barrier(NULL, bld_base, NULL);
+
+			LLVMValueRef num_threads = unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
+			LLVMValueRef ena =
+				LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+					    ac_get_thread_id(&ctx->ac), num_threads, "");
+			lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
 		}
 	}
 
@@ -5991,15 +6046,9 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
 
 		/* Merged shaders are executed conditionally depending
 		 * on the number of enabled threads passed in the input SGPRs. */
-		if (is_merged_shader(ctx->shader) &&
-		    (part == 0 || part == next_shader_first_part)) {
+		if (is_merged_shader(ctx->shader) && part == 0) {
 			LLVMValueRef ena, count = initial[3];
 
-			/* The thread count for the 2nd shader is at bit-offset 8. */
-			if (part == next_shader_first_part) {
-				count = LLVMBuildLShr(builder, count,
-						      LLVMConstInt(ctx->i32, 8, 0), "");
-			}
 			count = LLVMBuildAnd(builder, count,
 					     LLVMConstInt(ctx->i32, 0x7f, 0), "");
 			ena = LLVMBuildICmp(builder, LLVMIntULT,
@@ -6056,26 +6105,20 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
 		ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
 
 		if (is_merged_shader(ctx->shader) &&
-		    (part + 1 == next_shader_first_part ||
-		     part + 1 == num_parts)) {
+		    part + 1 == next_shader_first_part) {
 			lp_build_endif(&if_state);
 
-			if (part + 1 == next_shader_first_part) {
-				/* A barrier is required between 2 merged shaders. */
-				si_llvm_emit_barrier(NULL, &ctx->bld_base, NULL);
-
-				/* The second half of the merged shader should use
-				 * the inputs from the toplevel (wrapper) function,
-				 * not the return value from the last call.
-				 *
-				 * That's because the last call was executed condi-
-				 * tionally, so we can't consume it in the main
-				 * block.
-				 */
-				memcpy(out, initial, sizeof(initial));
-				num_out = initial_num_out;
-				num_out_sgpr = initial_num_out_sgpr;
-			}
+			/* The second half of the merged shader should use
+			 * the inputs from the toplevel (wrapper) function,
+			 * not the return value from the last call.
+			 *
+			 * That's because the last call was executed condi-
+			 * tionally, so we can't consume it in the main
+			 * block.
+			 */
+			memcpy(out, initial, sizeof(initial));
+			num_out = initial_num_out;
+			num_out_sgpr = initial_num_out_sgpr;
 			continue;
 		}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 6e86e0b..6b98bca 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -25,6 +25,7 @@
 #define SI_SHADER_PRIVATE_H
 
 #include "si_shader.h"
+#include "gallivm/lp_bld_flow.h"
 #include "gallivm/lp_bld_init.h"
 #include "gallivm/lp_bld_tgsi.h"
 #include "tgsi/tgsi_parse.h"
@@ -105,6 +106,8 @@ struct si_shader_context {
 	unsigned flow_depth;
 	unsigned flow_depth_max;
 
+	struct lp_build_if_state merged_wrap_if_state;
+
 	struct tgsi_array_info *temp_arrays;
 	LLVMValueRef *temp_array_allocas;
 

commit b36ff2d1f207656114701f83af513e382dc0e870
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date:   Fri Jul 14 13:31:49 2017 +0200

    radeonsi/gfx9: fix vertex idx in ES with multiple waves per threadgroup
    
    Cc: mesa-stable@lists.freedesktop.org
    Reviewed: Marek Olšák <marek.olsak@amd.com>
    (cherry picked from commit 873789002f5d1c7c6c39231a8c8d541f4f61e65c)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a5baf71..a153cb7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2879,7 +2879,12 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
 
 	if (ctx->screen->b.chip_class >= GFX9 && info->num_outputs) {
 		unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
-		lds_base = LLVMBuildMul(gallivm->builder, ac_get_thread_id(&ctx->ac),
+		LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
+		LLVMValueRef wave_idx = unpack_param(ctx, ctx->param_merged_wave_info, 24, 4);
+		vertex_idx = LLVMBuildOr(gallivm->builder, vertex_idx,
+					 LLVMBuildMul(gallivm->builder, wave_idx,
+						      LLVMConstInt(ctx->i32, 64, false), ""), "");
+		lds_base = LLVMBuildMul(gallivm->builder, vertex_idx,
 					LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
 	}
 

commit 99b2613ce15c4391949ec221694b054c0910e68a
Author: George Kyriazis <george.kyriazis@intel.com>
Date:   Fri Jul 21 23:23:04 2017 -0500

    swr: fix transform feedback logic
    
    The shader that is used to copy vertex data out of the vs/gs shaders to
    the user-specified buffer (streamout or SO shader) was not using the
    correct offsets.
    
    Adjust the offsets that are used just for the SO shader:
    - Make sure that position is handled in the same special way
      as in the vs/gs shaders
    - Use the correct offset to be passed in the core
    - consolidate register slot mapping logic into one function, since it's
      been calculated in 2 different places (one for calcuating the slot mask,
      and one for the register offsets themselves
    
    Also make room for all attibutes in the backend vertex area.
    
    Fixes:
    - all vtk GL2PS tests
    - 18 piglit tests (16 ext_transform_feedback tests,
      arb-quads-follow-provoking-vertex and primitive-type gl_points
    
    v2:
    
    - take care of more SGV slots in slot mapping logic
    - trim feState.vsVertexSize
    - fix GS interface and incorporate GS while calculating vsVertexSize
    
    Note that vsVertexSize is used in the core as the one parameter that
    controls vertex size between all stages, so it has to be adjusted appropriately
    for the whole vs/gs/fs pipeline.
    
    Also note that GS and SO is not fully implemented.  This will be addressed
    later.
    
    fixes:
    - fixes total of 20 piglit tests
    
    CC: 17.2 <mesa-stable@lists.freedesktop.org>
    
    Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
    (cherry picked from commit 194ff5eed18f310bece0899595f678699badd32e)

diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
index 62ad3f7..df1c11a 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -81,8 +81,11 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                offsets[output_buffer] = so->output[i].dst_offset;
             }
 
+            unsigned attrib_slot = so->output[i].register_index;
+            attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
+
             state.stream.decl[num].bufferIndex = output_buffer;
-            state.stream.decl[num].attribSlot = so->output[i].register_index - 1;
+            state.stream.decl[num].attribSlot = attrib_slot;
             state.stream.decl[num].componentMask =
                ((1 << so->output[i].num_components) - 1)
                << so->output[i].start_component;
@@ -129,10 +132,36 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
     * XXX setup provokingVertex & topologyProvokingVertex */
    SWR_FRONTEND_STATE feState = {0};
 
-   feState.vsVertexSize =
-      VERTEX_ATTRIB_START_SLOT +
-      + ctx->vs->info.base.num_outputs
-      - (ctx->vs->info.base.writes_position ? 1 : 0);
+   // feState.vsVertexSize seeds the PA size that is used as an interface
+   // between all the shader stages, so it has to be large enough to
+   // incorporate all interfaces between stages
+
+   // max of gs and vs num_outputs
+   feState.vsVertexSize = ctx->vs->info.base.num_outputs;
+   if (ctx->gs &&
+       ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
+      feState.vsVertexSize = ctx->gs->info.base.num_outputs;
+   }
+
+   if (ctx->vs->info.base.num_outputs) {
+      // gs does not adjust for position in SGV slot at input from vs
+      if (!ctx->gs)
+         feState.vsVertexSize--;
+   }
+
+   // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
+   feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
+
+   // The PA in the clipper does not handle BE vertex sizes
+   // different from FE. Increase vertexsize only for the cases that needed it
+
+   // primid needs a slot
+   if (ctx->fs->info.base.uses_primid)
+      feState.vsVertexSize++;
+   // sprite coord enable
+   if (ctx->rasterizer->sprite_coord_enable)
+      feState.vsVertexSize++;
+
 
    if (ctx->rasterizer->flatshade_first) {
       feState.provokingVertex = {1, 0, 0};
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index 83b49c4..0a81eaa 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -414,7 +414,10 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
        } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
           attribSlot = VERTEX_POSITION_SLOT;
        } else {
-          attribSlot = VERTEX_ATTRIB_START_SLOT + attrib - 1;
+          attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
+          if (iface->info->writes_position) {
+             attribSlot--;
+          }
        }
 
 #if USE_SIMD16_FRONTEND
@@ -923,6 +926,33 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
    return func;
 }
 
+unsigned
+swr_so_adjust_attrib(unsigned in_attrib,
+                     swr_vertex_shader *swr_vs)
+{
+   ubyte semantic_name;
+   unsigned attrib;
+
+   attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
+
+   if (swr_vs) {
+      semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
+      if (semantic_name == TGSI_SEMANTIC_POSITION) {
+         attrib = VERTEX_POSITION_SLOT;
+      } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
+         attrib = VERTEX_SGV_SLOT;
+      } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
+         attrib = VERTEX_SGV_SLOT;
+      } else {
+         if (swr_vs->info.base.writes_position) {
+               attrib--;
+         }
+      }
+   }
+
+   return attrib;
+}
+
 static unsigned
 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
 {
diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
index 1ab6846..6468874 100644
--- a/src/gallium/drivers/swr/swr_shader.h
+++ b/src/gallium/drivers/swr/swr_shader.h
@@ -30,6 +30,9 @@ struct swr_jit_fs_key;
 struct swr_jit_vs_key;
 struct swr_jit_gs_key;
 
+unsigned swr_so_adjust_attrib(unsigned in_attrib,
+                              swr_vertex_shader *swr_vs);
+
 PFN_VERTEX_FUNC
 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key);
 
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 501fdea..47ab445 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -345,13 +345,14 @@ swr_create_vs_state(struct pipe_context *pipe,
       // soState.streamToRasterizer not used
 
       for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
+         unsigned attrib_slot = stream_output->output[i].register_index;
+         attrib_slot = swr_so_adjust_attrib(attrib_slot, swr_vs);
          swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
-            1 << (stream_output->output[i].register_index - 1);
+            (1 << attrib_slot);
       }
       for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
         swr_vs->soState.streamNumEntries[i] =
              _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
-        swr_vs->soState.vertexAttribOffset[i] = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
        }
    }
 

commit f9c7549605e412e3b77c727a0d86442260f8f46a
Author: Dave Airlie <airlied@redhat.com>
Date:   Wed Jul 26 02:34:54 2017 +0100

    radv/ac: port SI TC L1 write corruption fix.
    
    This ports 72e46c988 to radv.
        radeonsi: apply a TC L1 write corruption workaround for SI
    
    Fixes: f4e499ec7 (radv: add initial non-conformant radv vulkan driver)
    Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
    Signed-off-by: Dave Airlie <airlied@redhat.com>
    (cherry picked from commit e77ff11ffe1a52b8e17a847f263746c849db3f11)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index a427f48..d6b450f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3400,7 +3400,10 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 	char intrinsic_name[64];
 	const nir_variable *var = instr->variables[0]->var;
 	const struct glsl_type *type = glsl_without_array(var->type);
-
+	LLVMValueRef glc = ctx->i1false;
+	bool force_glc = ctx->options->chip_class == SI;
+	if (force_glc)
+		glc = ctx->i1true;
 	if (ctx->stage == MESA_SHADER_FRAGMENT)
 		ctx->shader_info->fs.writes_memory = true;
 
@@ -3410,7 +3413,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 		params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
 						    LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
 		params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
-		params[4] = ctx->i1false;  /* glc */
+		params[4] = glc;  /* glc */
 		params[5] = ctx->i1false;  /* slc */
 		ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
 				   params, 6, 0);
@@ -3418,7 +3421,6 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 		bool is_da = glsl_sampler_type_is_array(type) ||
Reply to:
Prev by Date: mesa: Changes to 'debian-experimental'
Next by Date: mesa: Changes to 'upstream-experimental'
Previous by thread: mesa: Changes to 'debian-experimental'
Next by thread: libdrm_2.4.82-1_amd64.changes ACCEPTED into unstable, unstable
Index(es):
- Date
- Thread