mesa: Changes to 'debian-experimental'
VERSION | 2
debian/changelog | 2
src/compiler/Makefile.sources | 1
src/compiler/glsl/ast_to_hir.cpp | 17
src/compiler/glsl/builtin_variables.cpp | 13
src/compiler/glsl/linker.cpp | 20 -
src/compiler/nir/nir.h | 2
src/compiler/nir/nir_lower_alu_to_scalar.c | 2
src/compiler/nir/nir_opt_algebraic.py | 2
src/compiler/nir/nir_propagate_invariant.c | 196 ++++++++++
src/gallium/auxiliary/gallivm/lp_bld_arit.c | 6
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 34 +
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h | 5
src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 4
src/gallium/drivers/radeonsi/si_pm4.h | 2
src/gallium/drivers/radeonsi/si_state.c | 13
src/intel/vulkan/anv_cmd_buffer.c | 2
src/intel/vulkan/anv_device.c | 2
src/intel/vulkan/anv_meta_clear.c | 1
src/intel/vulkan/anv_pipeline.c | 5
src/intel/vulkan/anv_private.h | 4
src/intel/vulkan/gen7_pipeline.c | 1
src/intel/vulkan/gen8_cmd_buffer.c | 41 +-
src/intel/vulkan/gen8_pipeline.c | 6
src/intel/vulkan/genX_cmd_buffer.c | 16
src/mesa/drivers/dri/i965/brw_context.c | 2
src/mesa/drivers/dri/i965/brw_fs.cpp | 5
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 53 ++
src/mesa/drivers/dri/i965/brw_tcs.c | 6
src/mesa/main/image.c | 8
src/mesa/main/mtypes.h | 2
src/mesa/main/pipelineobj.c | 17
src/mesa/program/prog_statevars.c | 19
src/mesa/program/prog_statevars.h | 2
src/mesa/state_tracker/st_cb_compute.c | 3
src/mesa/state_tracker/st_cb_copyimage.c | 3
src/mesa/state_tracker/st_cb_texture.c | 9
src/mesa/state_tracker/st_gen_mipmap.c | 3
38 files changed, 454 insertions(+), 77 deletions(-)
New commits:
commit 5ee64a01c106975169727603b64129e8ed460003
Author: Timo Aaltonen <tjaalton@debian.org>
Date: Wed Jun 22 15:07:20 2016 +0300
bump version
diff --git a/debian/changelog b/debian/changelog
index b5ce92a..ed2141c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-mesa (12.0.0~rc3-1) UNRELEASED; urgency=medium
+mesa (12.0.0~rc4-1) UNRELEASED; urgency=medium
* New upstream release candidate.
* symbols: Updated.
commit 5e0b11cb6dbeab7ca6a1ba2edca56701cdfde96c
Author: Emil Velikov <emil.velikov@collabora.com>
Date: Tue Jun 21 13:32:04 2016 +0100
Update version to 12.0.0-rc4
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
diff --git a/VERSION b/VERSION
index 6ef7d92..076ead7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-12.0.0-rc3
+12.0.0-rc4
commit 6306930c3f0cd790e9d8995468559453b6c75b2c
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date: Fri Jun 10 15:59:58 2016 +0200
st/mesa: flush bitmap cache before CopyImageSubData
Found by inspection.
Cc: 11.2 12.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
(cherry picked from commit f9ddd52317caf14a21ec7c040fd4bb944f9842e4)
diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c
index 617e470..8afb861 100644
--- a/src/mesa/state_tracker/st_cb_copyimage.c
+++ b/src/mesa/state_tracker/st_cb_copyimage.c
@@ -23,6 +23,7 @@
*/
#include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_bitmap.h"
#include "state_tracker/st_cb_copyimage.h"
#include "state_tracker/st_cb_fbo.h"
#include "state_tracker/st_texture.h"
@@ -547,6 +548,8 @@ st_CopyImageSubData(struct gl_context *ctx,
struct pipe_box box;
int src_level, dst_level;
+ st_flush_bitmap_cache(st);
+
if (src_image) {
struct st_texture_image *src = st_texture_image(src_image);
src_res = src->pt;
commit 76377387c2d44d0fbae21763386ac86ffb54c635
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date: Thu Jun 9 12:22:31 2016 +0200
st/mesa: flush bitmap cache before texture functions
As far as I can tell, a sequence of glBitmap followed by texture functions
that refer to a texture bound as the framebuffer is well within what should
be allowed.
Found by inspection.
Cc: 11.2 12.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
(cherry picked from commit e7fff3cfe156e13198107e5e76a77fb79ed02173)
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index cfe9c4b..cf1c351 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -48,6 +48,7 @@
#include "state_tracker/st_debug.h"
#include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_bitmap.h"
#include "state_tracker/st_cb_fbo.h"
#include "state_tracker/st_cb_flush.h"
#include "state_tracker/st_cb_texture.h"
@@ -1716,6 +1717,8 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
unsigned dstz = texImage->Face + texImage->TexObject->MinLayer;
unsigned dst_level = 0;
+ st_flush_bitmap_cache(st);
+
if (stObj->pt == stImage->pt)
dst_level = texImage->TexObject->MinLevel + texImage->Level;
@@ -2181,6 +2184,8 @@ st_GetTexSubImage(struct gl_context * ctx,
assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
+ st_flush_bitmap_cache(st);
+
if (!st->prefer_blit_based_texture_transfer &&
!_mesa_is_format_compressed(texImage->TexFormat)) {
/* Try to avoid the fallback if we're doing texture decompression here */
@@ -2644,6 +2649,8 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims,
unsigned bind;
GLint srcY0, srcY1;
+ st_flush_bitmap_cache(st);
+
assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
@@ -3166,6 +3173,8 @@ st_ClearTexSubImage(struct gl_context *ctx,
if (!pt)
return;
+ st_flush_bitmap_cache(st);
+
u_box_3d(xoffset, yoffset, zoffset + texImage->Face,
width, height, depth, &box);
if (texImage->TexObject->Immutable) {
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index a14bbfa..adf02e7 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -40,6 +40,7 @@
#include "st_context.h"
#include "st_texture.h"
#include "st_gen_mipmap.h"
+#include "st_cb_bitmap.h"
#include "st_cb_texture.h"
@@ -96,6 +97,8 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
if (lastLevel == 0)
return;
+ st_flush_bitmap_cache(st);
+
/* The texture isn't in a "complete" state yet so set the expected
* lastLevel here, since it won't get done in st_finalize_texture().
*/
commit 6775b169cdffecd373d57847e5d71db3fe39409a
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date: Thu Jun 9 12:12:34 2016 +0200
st/mesa: flush bitmap cache before compute dispatch
In the unlikely case that a program uses glBitmap to render to a framebuffer
whose texture is bound in a compute shader.
Found by inspection.
Cc: 11.2 12.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
(cherry picked from commit c542b7e43d3a504456518c9f407e21c4e7e5fa88)
diff --git a/src/mesa/state_tracker/st_cb_compute.c b/src/mesa/state_tracker/st_cb_compute.c
index bfc6d96..063d750 100644
--- a/src/mesa/state_tracker/st_cb_compute.c
+++ b/src/mesa/state_tracker/st_cb_compute.c
@@ -28,6 +28,7 @@
#include "main/state.h"
#include "st_atom.h"
#include "st_context.h"
+#include "st_cb_bitmap.h"
#include "st_cb_bufferobjects.h"
#include "st_cb_compute.h"
@@ -44,6 +45,8 @@ static void st_dispatch_compute_common(struct gl_context *ctx,
struct pipe_context *pipe = st->pipe;
struct pipe_grid_info info = { 0 };
+ st_flush_bitmap_cache(st);
+
if (ctx->NewState)
_mesa_update_state(ctx);
commit a0235eb0f716e05c290cad66292b703c2178af91
Author: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed Jun 8 16:09:02 2016 -0700
i965: Fix multiplication of immediates on Cherryview/Broxton.
Cherryview and Broxton don't support DW x DW multiplication. We have
piles of code to handle this, but apparently weren't retyping in the
immediate case.
For example,
tests/spec/arb_tessellation_shader/execution/dvec3-vs-tcs-tes
makes the simulator angry about instructions such as:
mul(8) r18<1>:D r10.0<8;8,1>:D 0x00000003:D
Just retype to W or UW. It should be safe on all platforms.
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
(cherry picked from commit cd89c834a8b3b4e5f5874c8e1f90c9b01d541181)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0347b0a..8337774 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3564,7 +3564,10 @@ fs_visitor::lower_integer_multiplication()
ibld.MOV(imm, inst->src[1]);
ibld.MUL(inst->dst, imm, inst->src[0]);
} else {
- ibld.MUL(inst->dst, inst->src[0], inst->src[1]);
+ const bool ud = (inst->src[1].type == BRW_REGISTER_TYPE_UD);
+ ibld.MUL(inst->dst, inst->src[0],
+ ud ? brw_imm_uw(inst->src[1].ud)
+ : brw_imm_w(inst->src[1].d));
}
} else {
/* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot
commit 09a098bdeb89baacd6bbadc4180daf9c2ffaa840
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue Jun 14 08:40:49 2016 -0700
anv: Add proper support for depth clamping
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit eb6764c4a73006eee32e19e3afc6eab100a2ce16)
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index f864248..97300c3 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -380,7 +380,7 @@ void anv_GetPhysicalDeviceFeatures(
.logicOp = true,
.multiDrawIndirect = false,
.drawIndirectFirstInstance = false,
- .depthClamp = false,
+ .depthClamp = true,
.depthBiasClamp = false,
.fillModeNonSolid = true,
.depthBounds = false,
diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c
index fe750c8..7ec0608 100644
--- a/src/intel/vulkan/anv_meta_clear.c
+++ b/src/intel/vulkan/anv_meta_clear.c
@@ -173,6 +173,7 @@ create_pipeline(struct anv_device *device,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
.depthBiasEnable = false,
+ .depthClampEnable = true,
},
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index e41f623..32594f7 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -1165,6 +1165,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
pipeline->batch.relocs = &pipeline->batch_relocs;
copy_non_dynamic_state(pipeline, pCreateInfo);
+ pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
+ pCreateInfo->pRasterizationState->depthClampEnable;
pipeline->use_repclear = extra && extra->use_repclear;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f5500c5..052ced4 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1360,7 +1360,8 @@ VkResult
anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
-void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer);
+void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
+ bool depth_clamp_enable);
void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
@@ -1485,6 +1486,8 @@ struct anv_pipeline {
uint32_t cs_right_mask;
+ bool depth_clamp_enable;
+
struct {
uint32_t sf[7];
uint32_t depth_stencil_state[3];
diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index f069db9..dd34d71 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -196,6 +196,7 @@ genX(graphics_pipeline_create)(
clip.ClipEnable = !(extra && extra->use_rectlist),
clip.APIMode = APIMODE_OGL,
clip.ViewportXYClipTestEnable = true,
+ clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
clip.ClipMode = CLIPMODE_NORMAL,
clip.TriangleStripListProvokingVertexSelect = 0,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index 395d0da..e22b4e2 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -77,7 +77,8 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
}
void
-gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
+gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
+ bool depth_clamp_enable)
{
uint32_t count = cmd_buffer->state.dynamic.viewport.count;
const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
@@ -88,8 +89,8 @@ gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
const VkViewport *vp = &viewports[i];
struct GENX(CC_VIEWPORT) cc_viewport = {
- .MinimumDepth = vp->minDepth,
- .MaximumDepth = vp->maxDepth,
+ .MinimumDepth = depth_clamp_enable ? vp->minDepth : 0.0f,
+ .MaximumDepth = depth_clamp_enable ? vp->maxDepth : 1.0f,
};
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index 54585c3..2a96be0 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -85,11 +85,11 @@ emit_rs_state(struct anv_pipeline *pipeline,
.BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
.ScissorRectangleEnable = !(extra && extra->use_rectlist),
#if GEN_GEN == 8
- .ViewportZClipTestEnable = true,
+ .ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
#else
/* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
- .ViewportZFarClipTestEnable = true,
- .ViewportZNearClipTestEnable = true,
+ .ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable,
+ .ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable,
#endif
.GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
.GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 58f5e0b..3d628df 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -525,9 +525,13 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
if (dirty)
gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
- if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
+ if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
gen8_cmd_buffer_emit_viewport(cmd_buffer);
- gen8_cmd_buffer_emit_depth_viewport(cmd_buffer);
+
+ if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |
+ ANV_CMD_DIRTY_PIPELINE)) {
+ gen8_cmd_buffer_emit_depth_viewport(cmd_buffer,
+ pipeline->depth_clamp_enable);
}
if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)
commit f3c8dde2e4cac98ab190c0378e20424f0b59d9ef
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue Jun 14 08:15:34 2016 -0700
anv/cmd_buffer: Split emit_viewport in two
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit 8a46b505cb2c7255ad430b56c1ce0dfa9c13c559)
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index cd3588a..f5500c5 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1360,6 +1360,7 @@ VkResult
anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
+void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer);
void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index df4036a..395d0da 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -40,8 +40,6 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
struct anv_state sf_clip_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
- struct anv_state cc_state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
for (uint32_t i = 0; i < count; i++) {
const VkViewport *vp = &viewports[i];
@@ -65,29 +63,45 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
.YMaxViewPort = vp->y + vp->height - 1,
};
+ GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
+ &sf_clip_viewport);
+ }
+
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(sf_clip_state);
+
+ anv_batch_emit(&cmd_buffer->batch,
+ GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
+ clip.SFClipViewportPointer = sf_clip_state.offset;
+ }
+}
+
+void
+gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
+{
+ uint32_t count = cmd_buffer->state.dynamic.viewport.count;
+ const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
+ struct anv_state cc_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
+
+ for (uint32_t i = 0; i < count; i++) {
+ const VkViewport *vp = &viewports[i];
+
struct GENX(CC_VIEWPORT) cc_viewport = {
.MinimumDepth = vp->minDepth,
- .MaximumDepth = vp->maxDepth
+ .MaximumDepth = vp->maxDepth,
};
- GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
- &sf_clip_viewport);
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
}
- if (!cmd_buffer->device->info.has_llc) {
- anv_state_clflush(sf_clip_state);
+ if (!cmd_buffer->device->info.has_llc)
anv_state_clflush(cc_state);
- }
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
cc.CCViewportPointer = cc_state.offset;
}
- anv_batch_emit(&cmd_buffer->batch,
- GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
- clip.SFClipViewportPointer = sf_clip_state.offset;
- }
}
#endif
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index c62bed4..58f5e0b 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -525,8 +525,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
if (dirty)
gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
- if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
+ if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
gen8_cmd_buffer_emit_viewport(cmd_buffer);
+ gen8_cmd_buffer_emit_depth_viewport(cmd_buffer);
+ }
if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)
gen7_cmd_buffer_emit_scissor(cmd_buffer);
commit 3fddb9fd46a6066d8ecf0bd19a370acbbbc05b2b
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon Jun 13 17:09:37 2016 -0700
anv/cmd_buffer: Set depth/stencil extent based on the image
It used to be based on the framebuffer which isn't quite right.
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit 20e95a746df34923eb4aac5e7f1ab6d722432d89)
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index d9acf58..c62bed4 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1024,11 +1024,11 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
db.DepthBufferObjectControlState = GENX(MOCS),
db.SurfacePitch = image->depth_surface.isl.row_pitch - 1;
- db.Height = fb->height - 1;
- db.Width = fb->width - 1;
- db.LOD = 0;
- db.Depth = 1 - 1;
- db.MinimumArrayElement = 0;
+ db.Height = image->extent.height - 1;
+ db.Width = image->extent.width - 1;
+ db.LOD = iview->base_mip;
+ db.Depth = image->array_size - 1; /* FIXME: 3-D */
+ db.MinimumArrayElement = iview->base_layer;
#if GEN_GEN >= 8
db.SurfaceQPitch =
commit f614a1f4d88d02f429f29a4e95596e4a40ba7cce
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed Jun 15 14:30:33 2016 -0700
anv/cmd_buffer: Don't crash if push constants are provided for missing stages
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit b65f2e4163c9180e6a022c0afec018b08e4c5aa5)
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 5be5f3e..24c18fe 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1038,7 +1038,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.pipeline->prog_data[stage];
/* If we don't actually have any push constants, bail. */
- if (data == NULL || prog_data->nr_params == 0)
+ if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
return (struct anv_state) { .offset = 0 };
struct anv_state state =
commit f4bc7218d59d55825c4ab2b76e6134827f10d401
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu Jun 16 10:57:39 2016 -0700
anv/pipeline: Do invariance propagation on SPIR-V shaders
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit e6c2fe451962e364f30f689dc48c34e2b6161b25)
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 60b7c6b..e41f623 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -165,6 +165,9 @@ anv_shader_compile_to_nir(struct anv_device *device,
nir_remove_dead_variables(nir, nir_var_system_value);
nir_validate_shader(nir);
+ nir_propagate_invariant(nir);
+ nir_validate_shader(nir);
+
nir_lower_io_to_temporaries(entry_point->shader, entry_point, true, false);
nir_lower_system_values(nir);
commit 77f241bd37e7d0a76a0ac9223bc4cebba322994c
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon Jun 13 14:41:05 2016 -0700
nir/alu_to_scalar: Respect the exact ALU operation qualifier
Just setting builder->exact isn't sufficient because that only applies to
instructions that are built with the builder but instructions created
manually and only inserted using the builder are left alone.
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit bec07b729242f6a2dcf5a12ce75bf8b07ea658e0)
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index b491791..4f72cf7 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -56,6 +56,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
}
+ chan->exact = instr->exact;
nir_builder_instr_insert(builder, &chan->instr);
@@ -229,6 +230,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size);
lower->dest.saturate = instr->dest.saturate;
comps[chan] = &lower->dest.dest.ssa;
+ lower->exact = instr->exact;
nir_builder_instr_insert(b, &lower->instr);
}
commit deedb368de7dc50f7196af440c338dcf6a361564
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Mon Jun 13 12:47:19 2016 -0700
nir: Add a pass for propagating invariant decorations
This pass is similar to propagate_invariance in the GLSL compiler. The
real "output" of this pass is that any algebraic operations which are
eventually consumed by an invariant variable get marked as "exact".
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit 202751fbb7e3d35c1aa84f325f862245dab67f6c)
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 09a756b..bbd5d14 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -231,6 +231,7 @@ NIR_FILES = \
nir/nir_phi_builder.c \
nir/nir_phi_builder.h \
nir/nir_print.c \
+ nir/nir_propagate_invariant.c \
nir/nir_remove_dead_variables.c \
nir/nir_repair_ssa.c \
nir/nir_search.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 20f6520..9816ed6 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2290,6 +2290,8 @@ bool nir_lower_returns(nir_shader *shader);
bool nir_inline_functions(nir_shader *shader);
+bool nir_propagate_invariant(nir_shader *shader);
+
void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
void nir_lower_var_copies(nir_shader *shader);
diff --git a/src/compiler/nir/nir_propagate_invariant.c b/src/compiler/nir/nir_propagate_invariant.c
new file mode 100644
index 0000000..7b5bd6c
--- /dev/null
+++ b/src/compiler/nir/nir_propagate_invariant.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+static void
+add_src(nir_src *src, struct set *invariants)
+{
+ if (src->is_ssa) {
+ _mesa_set_add(invariants, src->ssa);
+ } else {
+ _mesa_set_add(invariants, src->reg.reg);
+ }
+}
+
+static bool
+add_src_cb(nir_src *src, void *state)
+{
+ add_src(src, state);
+ return true;
+}
+
+static bool
+dest_is_invariant(nir_dest *dest, struct set *invariants)
+{
+ if (dest->is_ssa) {
+ return _mesa_set_search(invariants, &dest->ssa);
+ } else {
+ return _mesa_set_search(invariants, dest->reg.reg);
+ }
+}
+
+static void
+add_cf_node(nir_cf_node *cf, struct set *invariants)
+{
+ if (cf->type == nir_cf_node_if) {
+ nir_if *if_stmt = nir_cf_node_as_if(cf);
+ add_src(&if_stmt->condition, invariants);
+ }
+
+ if (cf->parent)
+ add_cf_node(cf->parent, invariants);
+}
+
+static void
+add_var(nir_variable *var, struct set *invariants)
+{
+ _mesa_set_add(invariants, var);
+}
+
+static bool
+var_is_invariant(nir_variable *var, struct set * invariants)
+{
+ return var->data.invariant || _mesa_set_search(invariants, var);
+}
+
+static void
+propagate_invariant_instr(nir_instr *instr, struct set *invariants)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ if (!dest_is_invariant(&alu->dest.dest, invariants))
+ break;
+
+ alu->exact = true;
+ nir_foreach_src(instr, add_src_cb, invariants);
+ break;
+ }
+
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (dest_is_invariant(&tex->dest, invariants))
+ nir_foreach_src(instr, add_src_cb, invariants);
+ break;
+ }
+
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_copy_var:
+ /* If the destination is invariant then so is the source */
+ if (var_is_invariant(intrin->variables[0]->var, invariants))
+ add_var(intrin->variables[1]->var, invariants);
+ break;
+
+ case nir_intrinsic_load_var:
+ if (dest_is_invariant(&intrin->dest, invariants))
+ add_var(intrin->variables[0]->var, invariants);
+ break;
+
+ case nir_intrinsic_store_var:
+ if (var_is_invariant(intrin->variables[0]->var, invariants))
+ add_src(&intrin->src[0], invariants);
+ break;
+
+ default:
+ /* Nothing to do */
+ break;
+ }
+ }
+
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_load_const:
+ break; /* Nothing to do */
+
+ case nir_instr_type_phi: {
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ if (!dest_is_invariant(&phi->dest, invariants))
+ break;
+
+ nir_foreach_phi_src(src, phi) {
+ add_src(&src->src, invariants);
+ add_cf_node(&src->pred->cf_node, invariants);
+ }
+ break;
+ }
+
+ case nir_instr_type_call:
+ unreachable("This pass must be run after function inlining");
+
+ case nir_instr_type_parallel_copy:
+ default:
+ unreachable("Cannot have this instruction type");
+ }
+}
+
+static bool
+propagate_invariant_impl(nir_function_impl *impl, struct set *invariants)
+{
+ bool progress = false;
+
+ while (true) {
+ uint32_t prev_entries = invariants->entries;
+
+ nir_foreach_block_reverse(block, impl) {
+ nir_foreach_instr_reverse(instr, block)
+ propagate_invariant_instr(instr, invariants);
+ }
+
+ /* Keep running until we make no more progress. */
+ if (invariants->entries > prev_entries) {
+ progress = true;
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ if (progress) {
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance |
+ nir_metadata_live_ssa_defs);
+ }
+
+ return progress;
+}
+
+bool
+nir_propagate_invariant(nir_shader *shader)
+{
+ /* Hash set of invariant things */
+ struct set *invariants = _mesa_set_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ bool progress = false;
+ nir_foreach_function(function, shader) {
+ if (function->impl && propagate_invariant_impl(function->impl, invariants))
+ progress = true;
+ }
+
+ _mesa_set_destroy(invariants, NULL);
+
+ return progress;
+}
commit bac23b13eb75a7bacdec439eb4c239a8dedb24e7
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Sat Jun 18 12:30:36 2016 -0700
nir/algebraic: Remove imprecise flog2 optimizations
While mathematically correct, these two optimizations result in an
expression with substantially lower precision than the original. For any
positive finite floating-point value, log2(x) is well-defined and finite.
More precisely, it is in the range [-150, 150] so any sum of logarithms
log2(a) + log2(b) is also well-defined and finite as long as a and b are
both positive and finite. However, if a and b are either very small or
very large, their product may get flushed to infinity or zero causing
log2(a * b) to be nowhere close to log2(a) + log2(b).
This imprecision was causing incorrect rendering in Talos Principal because
part of its HDR rendering process involves doing 8 texture operations,
clamping the result to [0, 65000], taking a dot-product with a constant,
and then taking the log2. This is done 6 or 8 times and summed to produce
the final result which is written to a red texture. In cases where you
have a region of the screen that is very dark, it can end up getting a
result value of -inf which is not what is intended.
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96425
Cc: "11.1 11.2 12.0" <mesa-stable@lists.freedesktop.org>
(cherry picked from commit 68e308d85355079ad93bd4e16cba164784740fdf)
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index f8db2b6..a7a541a 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -224,8 +224,6 @@ optimizations = [
(('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
(('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
(('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
- (('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
- (('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
(('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
# Division and reciprocal
(('~fdiv', 1.0, a), ('frcp', a)),
commit b03b256e921c3f7cebfcf9efd5bdf7b403b9c961
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date: Fri Jun 17 10:48:53 2016 +0200
radeonsi: fix calculation of valid RB mask per SE
The old calculation treated too many RBs as disabled.
Cc: 11.0 11.1 11.2 12.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
(cherry picked from commit c95175581e983642dc4b23d059e6eaff5b79d2db)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 47af9c8..96da179 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3478,10 +3478,15 @@ si_write_harvested_raster_configs(struct si_context *sctx,
unsigned se_mask[4];
unsigned se;
- se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
- se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
- se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
- se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+ se_mask[0] = ((1 << rb_per_se) - 1);
+ se_mask[1] = (se_mask[0] << rb_per_se);
+ se_mask[2] = (se_mask[1] << rb_per_se);
+ se_mask[3] = (se_mask[2] << rb_per_se);
+
+ se_mask[0] &= rb_mask;
+ se_mask[1] &= rb_mask;
+ se_mask[2] &= rb_mask;
+ se_mask[3] &= rb_mask;
assert(num_se == 1 || num_se == 2 || num_se == 4);
assert(sh_per_se == 1 || sh_per_se == 2);
commit 52ae654569cb1e0e42f12b492fdd62d7cf20dc12
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date: Fri Jun 17 10:30:44 2016 +0200
radeonsi: raise SI_PM4_MAX_DW
The old limit, introduced in commit afa752d3f03ac6697581ff5d324e8ac0512ef513,
was exceeded by 4 SE configurations which hit si_write_harvested_raster_configs.
Cc: 11.1 11.2 12.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
(cherry picked from commit 6c2e63698290d3ea868eefcc3e4dd51dc1e16c64)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 309a596..35fa6c3 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -29,7 +29,7 @@
#include "radeon/radeon_winsys.h"
-#define SI_PM4_MAX_DW 160
Reply to: