[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'debian-experimental'



 VERSION                                                                   |    2 
 debian/changelog                                                          |   10 
 debian/compat                                                             |    2 
 debian/control                                                            |    3 
 debian/patches/configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch |   52 +++
 debian/patches/series                                                     |    1 
 debian/rules                                                              |    6 
 src/amd/vulkan/Makefile.am                                                |    8 
 src/amd/vulkan/radv_device.c                                              |    3 
 src/compiler/glsl/ir_optimization.h                                       |    4 
 src/compiler/glsl/lower_instructions.cpp                                  |   19 -
 src/compiler/nir/nir_search.c                                             |   48 +--
 src/compiler/spirv/vtn_variables.c                                        |    3 
 src/gallium/auxiliary/gallivm/lp_bld_gather.c                             |    2 
 src/gallium/auxiliary/hud/hud_cpufreq.c                                   |    1 
 src/gallium/drivers/freedreno/a2xx/a2xx.xml.h                             |    2 
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h                             |    2 
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h                             |    2 
 src/gallium/drivers/freedreno/a5xx/a5xx.xml.h                             |   39 +-
 src/gallium/drivers/freedreno/a5xx/fd5_draw.c                             |   53 +--
 src/gallium/drivers/freedreno/a5xx/fd5_emit.c                             |    4 
 src/gallium/drivers/freedreno/a5xx/fd5_gmem.c                             |    3 
 src/gallium/drivers/freedreno/a5xx/fd5_program.c                          |   18 -
 src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c                       |   10 
 src/gallium/drivers/freedreno/adreno_common.xml.h                         |    2 
 src/gallium/drivers/freedreno/adreno_pm4.xml.h                            |    2 
 src/gallium/drivers/r600/r600_shader.c                                    |  157 +++++++---
 src/gallium/drivers/radeonsi/si_descriptors.c                             |   19 -
 src/gallium/drivers/radeonsi/si_state.c                                   |   22 +
 src/gallium/drivers/radeonsi/si_state_draw.c                              |    3 
 src/gallium/drivers/swr/rasterizer/core/threads.cpp                       |    9 
 src/gallium/drivers/swr/swr_query.cpp                                     |    7 
 src/gallium/drivers/swr/swr_query.h                                       |    2 
 src/intel/blorp/blorp_blit.c                                              |   82 +++++
 src/intel/vulkan/anv_image.c                                              |    7 
 src/mesa/drivers/dri/i965/brw_blorp.c                                     |   22 +
 src/mesa/drivers/dri/i965/brw_clear.c                                     |   56 ++-
 src/mesa/drivers/dri/i965/gen8_depth_state.c                              |   16 +
 src/mesa/drivers/dri/i965/genX_blorp_exec.c                               |   11 
 src/mesa/main/attrib.c                                                    |    3 
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp                                |    9 
 src/vulkan/wsi/wsi_common_x11.c                                           |    6 
 42 files changed, 553 insertions(+), 179 deletions(-)

New commits:
commit abb2f18588e473a784d4c13526cebdc984f917cf
Author: Andreas Boll <andreas.boll.dev@gmail.com>
Date:   Wed Jan 25 16:46:18 2017 +0100

    Upload to experimental.

diff --git a/debian/changelog b/debian/changelog
index e0eaede..2af4f69 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-mesa (17.0.0~rc2-1) UNRELEASED; urgency=medium
+mesa (17.0.0~rc2-1) experimental; urgency=medium
 
   * New upstream release candidate.
   * Add configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch:
@@ -6,7 +6,7 @@ mesa (17.0.0~rc2-1) UNRELEASED; urgency=medium
   * rules: Explicitly enable/disable gbm. Should fix FTBFS on hurd.
   * Revert to debhelper compat 9 (Workaround for #851130).
 
- -- Andreas Boll <andreas.boll.dev@gmail.com>  Wed, 25 Jan 2017 15:14:24 +0100
+ -- Andreas Boll <andreas.boll.dev@gmail.com>  Wed, 25 Jan 2017 16:45:53 +0100
 
 mesa (17.0.0~rc1-1) experimental; urgency=medium
 

commit b6a0489e6378a75743f10c6a71085ec04563b850
Author: Andreas Boll <andreas.boll.dev@gmail.com>
Date:   Wed Jan 25 16:00:46 2017 +0100

    Revert "Bump debhelper compat to 10."
    
    This reverts commit 181d812f4893be697cef128f407ff129331d0fc4.
    
    Conflicts:
    	debian/changelog

diff --git a/debian/changelog b/debian/changelog
index d1efeff..e0eaede 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,6 +4,7 @@ mesa (17.0.0~rc2-1) UNRELEASED; urgency=medium
   * Add configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch:
     Should fix FTBFS on some arches.
   * rules: Explicitly enable/disable gbm. Should fix FTBFS on hurd.
+  * Revert to debhelper compat 9 (Workaround for #851130).
 
  -- Andreas Boll <andreas.boll.dev@gmail.com>  Wed, 25 Jan 2017 15:14:24 +0100
 
diff --git a/debian/compat b/debian/compat
index f599e28..ec63514 100644
--- a/debian/compat
+++ b/debian/compat
@@ -1 +1 @@
-10
+9
diff --git a/debian/control b/debian/control
index 8f6f2ca..431bea2 100644
--- a/debian/control
+++ b/debian/control
@@ -5,7 +5,8 @@ Maintainer: Debian X Strike Force <debian-x@lists.debian.org>
 Uploaders: Andreas Boll <andreas.boll.dev@gmail.com>
 Standards-Version: 3.9.8
 Build-Depends:
- debhelper (>= 10),
+ debhelper (>= 9),
+ dh-autoreconf,
  quilt (>= 0.40),
  pkg-config,
  libdrm-dev (>= 2.4.74) [!hurd-any],
diff --git a/debian/rules b/debian/rules
index 30ecb42..8046c6c 100755
--- a/debian/rules
+++ b/debian/rules
@@ -219,7 +219,8 @@ override_dh_makeshlibs:
 	dh_makeshlibs -a -- -c4
 
 %:
-	dh $@ --with quilt \
+	dh $@ --with quilt,autoreconf \
+		--parallel \
 		--builddirectory=build/
 
 # For maintainer use only, generate a tarball:

commit f52f18f9e68d8082ab2fd6d22c3e79e38714d07e
Author: Andreas Boll <andreas.boll.dev@gmail.com>
Date:   Wed Jan 25 15:53:55 2017 +0100

    rules: Explicitly enable/disable gbm. Should fix FTBFS on hurd.

diff --git a/debian/changelog b/debian/changelog
index 17c8ee0..d1efeff 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,7 @@ mesa (17.0.0~rc2-1) UNRELEASED; urgency=medium
   * New upstream release candidate.
   * Add configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch:
     Should fix FTBFS on some arches.
+  * rules: Explicitly enable/disable gbm. Should fix FTBFS on hurd.
 
  -- Andreas Boll <andreas.boll.dev@gmail.com>  Wed, 25 Jan 2017 15:14:24 +0100
 
diff --git a/debian/rules b/debian/rules
index 685453c..30ecb42 100755
--- a/debian/rules
+++ b/debian/rules
@@ -33,6 +33,7 @@ confflags_DRI3 = --disable-dri3
 # hurd doesn't do direct rendering
 ifeq ($(DEB_HOST_ARCH_OS), hurd)
 	confflags_DIRECT_RENDERING = --disable-driglx-direct
+	confflags_GBM = --disable-gbm
 	DRI_DRIVERS = swrast
 else
   ifeq ($(DEB_HOST_ARCH_OS), linux)
@@ -98,6 +99,7 @@ else
   endif
 
 	confflags_DIRECT_RENDERING = --enable-driglx-direct
+	confflags_GBM = --enable-gbm
 	confflags_GALLIUM += --enable-vdpau
 	confflags_GALLIUM += --enable-va
 	confflags_GALLIUM += --enable-gallium-extra-hud
@@ -127,6 +129,7 @@ confflags += \
 	--disable-xvmc \
 	--disable-omx \
 	$(confflags_DIRECT_RENDERING) \
+	$(confflags_GBM) \
 	$(confflags_DRI3) \
 	$(confflags_EGL) \
 	$(confflags_GALLIUM) \

commit f84deee664dc925c52a2b28ee2e9fb11235020b7
Author: Andreas Boll <andreas.boll.dev@gmail.com>
Date:   Wed Jan 25 15:17:19 2017 +0100

    Add configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch
    
    Should fix FTBFS on some arches.

diff --git a/debian/changelog b/debian/changelog
index 21eeea8..17c8ee0 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,8 @@
 mesa (17.0.0~rc2-1) UNRELEASED; urgency=medium
 
   * New upstream release candidate.
+  * Add configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch:
+    Should fix FTBFS on some arches.
 
  -- Andreas Boll <andreas.boll.dev@gmail.com>  Wed, 25 Jan 2017 15:14:24 +0100
 
diff --git a/debian/patches/configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch b/debian/patches/configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch
new file mode 100644
index 0000000..dd286c4
--- /dev/null
+++ b/debian/patches/configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch
@@ -0,0 +1,52 @@
+From: Andreas Boll <andreas.boll.dev@gmail.com>
+Date: Tue, 24 Jan 2017 16:44:12 +0100
+Subject: [PATCH] configure.ac: Require LLVM for r300 only on x86 and x86_64
+
+b3119a3 introduced a strict LLVM requirement for r300 on all
+architectures and thus configure fails on architectures where LLVM is
+not available or buggy.
+
+r300 doesn't strictly require LLVM, but for performance reasons we
+highly recommend LLVM usage. So require it at least on x86 and x86_64
+architectures as we have done before b3119a3.
+
+Fixes: b3119a3 ("configure.ac: Check gallium LLVM version in gallium_require_llvm")
+Cc: 17.0 <mesa-stable@lists.freedesktop.org>
+Signed-off-by: Andreas Boll <andreas.boll.dev@gmail.com>
+---
+ configure.ac | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index de8af87..7410a50 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -2213,6 +2213,19 @@ gallium_require_llvm() {
+ }
+ 
+ dnl
++dnl r300 doesn't strictly require LLVM, but for performance reasons we
++dnl highly recommend LLVM usage. So require it at least on x86 and x86_64
++dnl architectures.
++dnl
++r300_require_llvm() {
++    case "$host" in *gnux32) return;; esac
++    case "$host_cpu" in
++    i*86|x86_64|amd64) gallium_require_llvm $1
++        ;;
++    esac
++}
++
++dnl
+ dnl DRM is needed by X, Wayland, and offscreen rendering.
+ dnl Surfaceless is an alternative for the last one.
+ dnl
+@@ -2298,7 +2311,7 @@ if test -n "$with_gallium_drivers"; then
+             HAVE_GALLIUM_R300=yes
+             PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
+             require_libdrm "r300"
+-            gallium_require_llvm "r300"
++            r300_require_llvm "r300"
+             ;;
+         xr600)
+             HAVE_GALLIUM_R600=yes
diff --git a/debian/patches/series b/debian/patches/series
index 9f0749f..3ea8e9a 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1 +1,2 @@
 07_gallium-fix-build-failure-on-powerpcspe.diff
+configure.ac-Require-LLVM-for-r300-only-on-x86-and-x.patch

commit 66ce86877888a3dbaa664aab9afa72ee9ad6d047
Author: Andreas Boll <andreas.boll.dev@gmail.com>
Date:   Wed Jan 25 15:14:38 2017 +0100

    Bump changelog

diff --git a/debian/changelog b/debian/changelog
index 2739c46..21eeea8 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+mesa (17.0.0~rc2-1) UNRELEASED; urgency=medium
+
+  * New upstream release candidate.
+
+ -- Andreas Boll <andreas.boll.dev@gmail.com>  Wed, 25 Jan 2017 15:14:24 +0100
+
 mesa (17.0.0~rc1-1) experimental; urgency=medium
 
   * New upstream release candidate.

commit d283ec0a7b61bc33d970f5cb4c2bfbd63d255c2c
Author: Emil Velikov <emil.velikov@collabora.com>
Date:   Wed Jan 25 13:24:27 2017 +0000

    Update version to 17.0.0-rc2
    
    Signed-off-by: Emil Velikov <emil.velikov@collabora.com>

diff --git a/VERSION b/VERSION
index 964d0ff..032c50c 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-17.0.0-rc1
+17.0.0-rc2

commit 9577977266068b1b3666e0265bd0d6dcb1563572
Author: Topi Pohjolainen <topi.pohjolainen@intel.com>
Date:   Tue Jan 17 12:00:37 2017 +0200

    i965/blorp: Make post draw flush more explicit
    
    Blits do not need any special treatment as the target buffer
    object is added to render cache just as one does for normal draw.
    Color clears and resolves in turn require explicit "end of pipe
    synchronization". It is not clear what this means exactly but the
    assumption is that render cache flush with command stream stall
    should be sufficient.
    
    Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
    (cherry picked from commit 180653c357d19ca88f7895f59874a58fac99cc53)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index 8d58616..d79f529 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -908,6 +908,17 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
       blorp_batch_finish(&batch);
    }
 
+   /*
+    * Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
+    *
+    *  Any transition from any value in {Clear, Render, Resolve} to a
+    *  different value in {Clear, Render, Resolve} requires end of pipe
+    *  synchronization.
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                               PIPE_CONTROL_CS_STALL);
+
    return true;
 }
 
@@ -975,6 +986,17 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
                      brw_blorp_to_isl_format(brw, format, true),
                      resolve_op);
    blorp_batch_finish(&batch);
+
+   /*
+    * Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
+    *
+    *  Any transition from any value in {Clear, Render, Resolve} to a
+    *  different value in {Clear, Render, Resolve} requires end of pipe
+    *  synchronization.
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                               PIPE_CONTROL_CS_STALL);
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index b72ecb6..647a362 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -261,9 +261,4 @@ retry:
 
    if (params->dst.enabled)
       brw_render_cache_set_add_bo(brw, params->dst.addr.buffer);
-
-   /* Flush the sampler cache so any texturing from the destination is
-    * coherent.
-    */
-   brw_emit_mi_flush(brw);
 }

commit 8621961d4334bbfd51cb9e1934c6dcbc741699a9
Author: Topi Pohjolainen <topi.pohjolainen@intel.com>
Date:   Tue Jan 17 11:48:49 2017 +0200

    i965/gen6: Issue direct depth stall and flush after depth clear
    
    instead of calling unconditionally brw_emit_mi_flush() which
    does:
    
       brw_emit_pipe_control_flush(brw,
                                    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                    PIPE_CONTROL_RENDER_TARGET_FLUSH |
                                    PIPE_CONTROL_CS_STALL);
    
       brw_emit_pipe_control_flush(brw,
                                    PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                                    PIPE_CONTROL_CONST_CACHE_INVALIDATE);
    
    Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
    (cherry picked from commit 46b346899d98e29943f8cd74c25bcb8d2f868a49)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 7fcde6c..ba9aa4b 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -234,7 +234,12 @@ brw_fast_clear_depth(struct gl_context *ctx)
        *      by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
        *      followed by Depth FLUSH'
       */
-      brw_emit_mi_flush(brw);
+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_DEPTH_STALL);
+
+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                                  PIPE_CONTROL_CS_STALL);
    }
 
    /* Now, the HiZ buffer contains data that needs to be resolved to the depth

commit 7d5a98f106b0695aca305bac8eb8833324bb8fe3
Author: Topi Pohjolainen <topi.pohjolainen@intel.com>
Date:   Tue Jan 17 11:44:52 2017 +0200

    i965: Make depth clear flushing more explicit
    
    Current blorp logic issues unconditional "flush everything"
    (see brw_emit_mi_flush()) after each render. For example, all
    blits issue this unconditionally which shouldn't be needed if
    they set render cache properly so that subsequent renders do
    necessary flushing before drawing.
    
    In case of piglit:
    
    ext_framebuffer_multisample-accuracy all_samples depth_draw small
    
    intel_hiz_exec() is always preceded by blorb blit and the
    unconditional flush looks to hide the lack of stall and flushes
    in depth clears. By removing the brw_emit_mi_flush() I get gpu
    hangs.
    
    This patch adds the stalls and flushes mandated by the spec
    and gets rid of those hangs.
    
    v2 (Jason, Ken): Document the rational for separating
                     depth cache flush and stall on Gen7.
    
    Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
    (cherry picked from commit e6da6943fed1228c551af1f0e1a405b6d67b41ae)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 488732c..7fcde6c 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -36,6 +36,7 @@
 
 #include "brw_context.h"
 #include "brw_blorp.h"
+#include "brw_defines.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLIT
 
@@ -174,14 +175,46 @@ brw_fast_clear_depth(struct gl_context *ctx)
       mt->depth_clear_value = depth_clear_value;
    }
 
-   /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
-    *
-    *     "If other rendering operations have preceded this clear, a
-    *      PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
-    *      must be issued before the rectangle primitive used for the depth
-    *      buffer clear operation.
-    */
-   brw_emit_mi_flush(brw);
+   if (brw->gen == 6) {
+      /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
+       *
+       *   "If other rendering operations have preceded this clear, a
+       *    PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
+       *    must be issued before the rectangle primitive used for the depth
+       *    buffer clear operation.
+       */
+       brw_emit_pipe_control_flush(brw,
+                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                                   PIPE_CONTROL_CS_STALL);
+   } else if (brw->gen >= 7) {
+      /*
+       * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
+       *
+       *   If other rendering operations have preceded this clear, a
+       *   PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
+       *   enabled must be issued before the rectangle primitive used for the
+       *   depth buffer clear operation.
+       *
+       * Same applies for Gen8 and Gen9.
+       *
+       * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 PIPE_CONTROL,
+       * Depth Cache Flush Enable:
+       *
+       *   This bit must not be set when Depth Stall Enable bit is set in
+       *   this packet.
+       *
+       * This is confirmed to hold for real, HSW gets immediate gpu hangs.
+       *
+       * Therefore issue two pipe control flushes, one for cache flush and
+       * another for depth stall.
+       */
+       brw_emit_pipe_control_flush(brw,
+                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                                   PIPE_CONTROL_CS_STALL);
+
+       brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
+   }
 
    if (fb->MaxNumLayers > 0) {
       for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) {
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 14689f4..ec29669 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -511,6 +511,22 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
    OUT_BATCH(0);
    ADVANCE_BATCH();
 
+   /*
+    * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
+    *
+    *  Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM
+    *  or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with
+    *  DEPTH_STALL bit and Depth FLUSH bits "set" before starting to render.
+    *  DepthStall and DepthFlush are not needed between consecutive depth
+    *  clear passes nor is it required if th e depth clear pass was done with
+    *  "full_surf_clear" bit set in the 3DSTATE_WM_HZ_OP.
+    *
+    *  TODO: Such as the spec says, this could be conditional.
+    */
+   brw_emit_pipe_control_flush(brw, 
+                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                               PIPE_CONTROL_DEPTH_STALL);
+
    /* Mark this buffer as needing a TC flush, as we've rendered to it. */
    brw_render_cache_set_add_bo(brw, mt->bo);
 

commit 4e6445caa96f66736b47b257a59dc922e31b7cf6
Author: Topi Pohjolainen <topi.pohjolainen@intel.com>
Date:   Tue Jan 17 11:04:22 2017 +0200

    i965/blorp: Use the render cache mechanism instead of explicit flushing
    
    by replacing brw_emit_mi_flush() with brw_render_cache_set_check_flush().
    The latter splits the flush in two:
    
       brw_emit_pipe_control_flush(brw,
                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
                                   PIPE_CONTROL_CS_STALL);
    
       brw_emit_pipe_control_flush(brw,
                                   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                                   PIPE_CONTROL_CONST_CACHE_INVALIDATE);
    
    instead of
    
       int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
       if (brw->gen >= 6) {
          flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
                   PIPE_CONTROL_CONST_CACHE_INVALIDATE |
                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                   PIPE_CONTROL_VF_CACHE_INVALIDATE |
                   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                   PIPE_CONTROL_CS_STALL;
       }
       brw_emit_pipe_control_flush(brw, flags);
    
    v2 (Jason): Check that destination exists before trying to add to
                render cache. Depth clears and resolves don't have it.
    
    Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
    (cherry picked from commit 4840a53e902b0f2b9841d9dbb90e479a3688153d)

diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index bb1dfa9..b72ecb6 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -25,6 +25,7 @@
 
 #include "intel_batchbuffer.h"
 #include "intel_mipmap_tree.h"
+#include "intel_fbo.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
@@ -179,7 +180,9 @@ genX(blorp_exec)(struct blorp_batch *batch,
     * data with different formats, which blorp does for stencil and depth
     * data.
     */
-   brw_emit_mi_flush(brw);
+   if (params->src.enabled)
+      brw_render_cache_set_check_flush(brw, params->src.addr.buffer);
+   brw_render_cache_set_check_flush(brw, params->dst.addr.buffer);
 
    brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
 
@@ -256,6 +259,9 @@ retry:
    brw->no_depth_or_stencil = false;
    brw->ib.type = -1;
 
+   if (params->dst.enabled)
+      brw_render_cache_set_add_bo(brw, params->dst.addr.buffer);
+
    /* Flush the sampler cache so any texturing from the destination is
     * coherent.
     */

commit e405d0d3c653dc00049e2c713dbc158014055529
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Fri Jan 20 01:13:39 2017 +0100

    radeonsi: always set the TCL1_ACTION_ENA when invalidating L2
    
    Some CIK-VI docs say this is the default behavior on SI. That doesn't
    answer whether it's also the default behavior on CIK-VI.
    
    Cc: 17.0 13.0 <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
    (cherry picked from commit 573bf0940a08e18a511e338de478f30fd95a1590)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 837c025..d296874 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -850,11 +850,12 @@ void si_emit_cache_flush(struct si_context *sctx)
 	if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 ||
 	    (rctx->chip_class <= CIK &&
 	     (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
-		/* Invalidate L1 & L2. (L1 is always invalidated)
+		/* Invalidate L1 & L2. (L1 is always invalidated on SI)
 		 * WB must be set on VI+ when TC_ACTION is set.
 		 */
 		si_emit_surface_sync(rctx, cp_coher_cntl |
 				     S_0085F0_TC_ACTION_ENA(1) |
+				     S_0085F0_TCL1_ACTION_ENA(1) |
 				     S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI));
 		cp_coher_cntl = 0;
 		sctx->b.num_L2_invalidates++;

commit 0c4b8c75e2df340db306a5193eb6bc03babfd8c1
Author: Grazvydas Ignotas <notasas@gmail.com>
Date:   Mon Jan 23 23:16:42 2017 +0200

    radv: don't resubmit the same cs over and over while tracing
    
    Fixes: 97dfff54 ("radv: Dump command buffer on hang.")
    Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
    Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
    CC: <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit f65b3641c3233f1697b96ea8126b578dae6de4f1)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 9371536..4aa6af2 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -989,8 +989,7 @@ VkResult radv_QueueSubmit(
 			if (queue->device->trace_bo)
 				*queue->device->trace_id_ptr = 0;
 
-			ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
-							pSubmits[i].commandBufferCount,
+			ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,
 							(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
 							b ? pSubmits[i].waitSemaphoreCount : 0,
 							(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,

commit e35cfa15cf2aa894dd267309eed250f6bc3c68c6
Author: George Kyriazis <george.kyriazis@intel.com>
Date:   Wed Jan 18 17:09:08 2017 -0600

    swr: Align query results allocation
    
    Some query results struct contents are declared as cache line aligned.
    Use aligned malloc, and align the whole struct, to be safe.
    
    Fixes crash when compiling with clang.
    
    CC: <mesa-stable@lists.freedesktop.org>
    
    Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
    (cherry picked from commit 00847e4f14dd237dfcdb2c3d15be1325a08ccf5a)

diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
index 6eb0781..e097790 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -29,7 +29,7 @@
 #include "swr_query.h"
 #include "swr_screen.h"
 #include "swr_state.h"
-
+#include "common/os.h"
 
 static struct swr_query *
 swr_query(struct pipe_query *p)
@@ -45,7 +45,8 @@ swr_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
    assert(type < PIPE_QUERY_TYPES);
    assert(index < MAX_SO_STREAMS);
 
-   pq = CALLOC_STRUCT(swr_query);
+   pq = (struct swr_query *) AlignedMalloc(sizeof(struct swr_query), 64);
+   memset(pq, 0, sizeof(*pq));
 
    if (pq) {
       pq->type = type;
@@ -67,7 +68,7 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
       swr_fence_reference(pipe->screen, &pq->fence, NULL);
    }
 
-   FREE(pq);
+   AlignedFree(pq);
 }
 
 
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
index c5160ce..1c736e4 100644
--- a/src/gallium/drivers/swr/swr_query.h
+++ b/src/gallium/drivers/swr/swr_query.h
@@ -34,7 +34,7 @@ struct swr_query_result {
    uint64_t timestamp_end;
 };
 
-struct swr_query {
+OSALIGNLINE(struct) swr_query {
    unsigned type; /* PIPE_QUERY_* */
    unsigned index;
 

commit 34f902e17efb2f3265d5629f387fdc9a8f08091d
Author: Bruce Cherniak <bruce.cherniak@intel.com>
Date:   Thu Jan 19 15:44:52 2017 -0600

    swr: Prune empty nodes in CalculateProcessorTopology.
    
    CalculateProcessorTopology tries to figure out system topology by
    parsing /proc/cpuinfo to determine the number of threads, cores, and
    NUMA nodes.  There are some architectures where the "physical id" begins
    with 1 rather than 0, which was creating and empty "0" node and causing a
    crash in CreateThreadPool.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97102
    Reviewed-By: George Kyriazis <george.kyriazis@intel.com>
    CC: <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit b829206b0739925501bcc68233437d6d03b79795)

diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index ee12612..f1c3030 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -217,6 +217,15 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
         out_numThreadsPerProcGroup++;
     }
 
+    /* Prune empty numa nodes */
+    for (auto it = out_nodes.begin(); it != out_nodes.end(); ) {
+       if ((*it).cores.size() == 0)
+          it = out_nodes.erase(it);
+       else
+          ++it;
+    }
+
+    /* Prune empty core nodes */
     for (uint32_t node = 0; node < out_nodes.size(); node++) {
         auto& numaNode = out_nodes[node];
         auto it = numaNode.cores.begin();

commit e4cf4690d1b20a8eab7296c79051ad05a9a5cc25
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date:   Mon Jan 16 16:43:54 2017 +0100

    st/glsl_to_tgsi: use DDIV instead of DRCP + DMUL
    
    Fixes GL45-CTS.gpu_shader_fp64.built_in_functions.
    
    v2: use DDIV unconditionally (Roland)
    
    Reviewed-by: Roland Scheidegger <sroland@vmware.com> (v1)
    Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v1)
    Tested-by: Glenn Kennard <glenn.kennard@gmail.com>
    Tested-by: James Harvey <lothmordor@gmail.com>
    Cc: 17.0 <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit cfabbbcfd778cc404813c9f05a9ef79efe531980)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 4bdb3a6..f9dc9b1 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -955,7 +955,7 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
       case3fid(MUL, UMUL, DMUL);
       case3fid(MAD, UMAD, DMAD);
       case3fid(FMA, UMAD, DFMA);
-      case3(DIV, IDIV, UDIV);
+      case4d(DIV, IDIV, UDIV, DDIV);
       case4d(MAX, IMAX, UMAX, DMAX);
       case4d(MIN, IMIN, UMIN, DMIN);
       case2iu(MOD, UMOD);
@@ -1710,10 +1710,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
       emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
    case ir_binop_div:
-      if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
-         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
-      else
-         emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
       break;
    case ir_binop_mod:
       if (result_dst.type == GLSL_TYPE_FLOAT)
@@ -6918,7 +6915,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
       lower_instructions(ir,
                          MOD_TO_FLOOR |
-                         DIV_TO_MUL_RCP |
+                         FDIV_TO_MUL_RCP |
                          EXP_TO_EXP2 |
                          LOG_TO_LOG2 |
                          LDEXP_TO_ARITH |

commit 7f6c6b910156629f988277ef0c177a2e97ca17e0
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date:   Mon Jan 16 16:39:06 2017 +0100

    glsl: split DIV_TO_MUL_RCP into single- and double-precision flags
    
    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
    Tested-by: Glenn Kennard <glenn.kennard@gmail.com>
    Tested-by: James Harvey <lothmordor@gmail.com>
    Cc: 17.0 <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit b71c415c3d288da4b5f533ece42f50f4f20a8c33)

diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index 0d6c4e6..01e5270 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -30,7 +30,7 @@
 
 /* Operations for lower_instructions() */
 #define SUB_TO_ADD_NEG     0x01
-#define DIV_TO_MUL_RCP     0x02
+#define FDIV_TO_MUL_RCP    0x02
 #define EXP_TO_EXP2        0x04
 #define POW_TO_EXP2        0x08
 #define LOG_TO_LOG2        0x10
@@ -49,6 +49,8 @@
 #define FIND_LSB_TO_FLOAT_CAST    0x20000
 #define FIND_MSB_TO_FLOAT_CAST    0x40000
 #define IMUL_HIGH_TO_MUL          0x80000
+#define DDIV_TO_MUL_RCP           0x100000
+#define DIV_TO_MUL_RCP            (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index 9fc83d1..729cb13 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -54,8 +54,8 @@
  * want to recognize add(op0, neg(op1)) or the other way around to
  * produce a subtract anyway.
  *
- * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
- * --------------------------------------
+ * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
+ * ---------------------------------------------------------
  * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
  *
  * Many GPUs don't have a divide instruction (945 and 965 included),
@@ -63,9 +63,11 @@
  * reciprocal.  By breaking the operation down, constant reciprocals
  * can get constant folded.
  *
- * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
- * handles the integer case, converting to and from floating point so that
- * RCP is possible.
+ * FDIV_TO_MUL_RCP only lowers single-precision floating point division;
+ * DDIV_TO_MUL_RCP only lowers double-precision floating point division.
+ * DIV_TO_MUL_RCP is a convenience macro that sets both flags.
+ * INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
+ * point so that RCP is possible.
  *
  * EXP_TO_EXP2 and LOG_TO_LOG2:
  * ----------------------------
@@ -326,7 +328,8 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
    /* Don't generate new IR that would need to be lowered in an additional
     * pass.
     */
-   if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
+   if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
+       (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
       div_to_mul_rcp(div_expr);
 
    ir_expression *const floor_expr =
@@ -1599,8 +1602,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
    case ir_binop_div:
       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
 	 int_div_to_mul_rcp(ir);
-      else if ((ir->operands[1]->type->is_float() ||
-                ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
+      else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
+               (ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
 	 div_to_mul_rcp(ir);
       break;
 

commit 23ead4c7b22aaf60f1a902fbea268b57b88d3a78
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date:   Thu Jan 19 14:44:57 2017 +0100

    r600: implement DDIV
    
    Tested-by: Glenn Kennard <glenn.kennard@gmail.com>
    Tested-by: James Harvey <lothmordor@gmail.com>
    Cc: 17.0 <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit e4f8f9a638c1ffb9b76840b088290f11f0f91813)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 5c4bc91..eaabb04 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4391,6 +4391,63 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
 }
 
 /*
+ * Emit RECIP_64 + MUL_64 to implement division.
+ */
+static int cayman_ddiv_instr(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	int r;
+	struct r600_bytecode_alu alu;
+	int t1 = ctx->temp_reg;
+	int k;
+
+	/* Only support one double at a time. This is the same constraint as
+	 * in DMUL lowering. */
+	assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
+	       inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
+
+	k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
+
+	r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false);
+	if (r)
+		return r;
+
+	for (int i = 0; i < 4; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP2_MUL_64;
+
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1));
+
+		alu.src[1].sel = t1;
+		alu.src[1].chan = (i == 3) ? 0 : 1;
+
+		alu.dst.sel = t1;
+		alu.dst.chan = i;
+		alu.dst.write = 1;
+		if (i == 3)
+			alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+
+	for (int i = 0; i < 2; i++) {
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+		alu.op = ALU_OP1_MOV;
+		alu.src[0].sel = t1;
+		alu.src[0].chan = i;
+		tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst);
+		alu.dst.write = 1;
+		if (i == 1)
+			alu.last = 1;
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+/*
  * r600 - trunc to -PI..PI range
  * r700 - normalize by dividing by 2PI
  * see fdo bug 27901
@@ -9400,6 +9457,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_DNEG]	= { ALU_OP2_ADD_64, tgsi_dneg},
 	[TGSI_OPCODE_DADD]	= { ALU_OP2_ADD_64, tgsi_op2_64},
 	[TGSI_OPCODE_DMUL]	= { ALU_OP2_MUL_64, cayman_mul_double_instr},
+	[TGSI_OPCODE_DDIV]	= { 0, cayman_ddiv_instr },
 	[TGSI_OPCODE_DMAX]	= { ALU_OP2_MAX_64, tgsi_op2_64},
 	[TGSI_OPCODE_DMIN]	= { ALU_OP2_MIN_64, tgsi_op2_64},


Reply to: