[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'ubuntu'



 VERSION                                                  |    2 
 bin/.cherry-ignore                                       |    4 
 debian/changelog                                         |   20 +
 debian/control                                           |    2 
 debian/libgl1-mesa-glx.symbols                           |    4 
 debian/rules                                             |    2 
 docs/relnotes/11.1.1.html                                |    3 
 docs/relnotes/11.1.2.html                                |  181 +++++++++++++++
 src/egl/drivers/dri2/egl_dri2.c                          |    2 
 src/gallium/auxiliary/pipe-loader/SConscript             |    9 
 src/gallium/auxiliary/pipe-loader/pipe_loader.c          |    5 
 src/gallium/auxiliary/util/u_cpu_detect.c                |    2 
 src/gallium/auxiliary/util/u_pstipple.c                  |    1 
 src/gallium/auxiliary/vl/vl_zscan.c                      |    7 
 src/gallium/auxiliary/vl/vl_zscan.h                      |    1 
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp |    2 
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp       |    3 
 src/gallium/drivers/nouveau/nv50/nv50_surface.c          |  120 ++++++++-
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c          |  179 ++++++++++++--
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c              |   16 +
 src/gallium/drivers/r600/r600_pipe.c                     |    6 
 src/gallium/drivers/radeonsi/si_descriptors.c            |   33 +-
 src/gallium/drivers/radeonsi/si_shader.c                 |    3 
 src/gallium/drivers/radeonsi/si_state_shaders.c          |   85 ++++---
 src/gallium/drivers/vc4/vc4_job.c                        |   11 
 src/gallium/state_trackers/omx/vid_dec_h264.c            |    7 
 src/gallium/targets/dri/Android.mk                       |    4 
 src/glsl/ast_to_hir.cpp                                  |   48 +++
 src/glsl/ir.h                                            |    7 
 src/glsl/link_varyings.cpp                               |   43 ++-
 src/glsl/linker.cpp                                      |    2 
 src/glsl/lower_subroutine.cpp                            |   24 +
 src/mesa/drivers/common/meta.c                           |   12 
 src/mesa/drivers/common/meta.h                           |    2 
 src/mesa/drivers/common/meta_blit.c                      |   35 +-
 src/mesa/drivers/common/meta_generate_mipmap.c           |   26 +-
 src/mesa/drivers/dri/i915/intel_buffer_objects.c         |    2 
 src/mesa/drivers/dri/i915/intel_context.c                |    2 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp          |    2 
 src/mesa/drivers/dri/i965/brw_device_info.c              |    2 
 src/mesa/drivers/dri/i965/brw_fs.cpp                     |    6 
 src/mesa/drivers/dri/i965/brw_fs.h                       |    4 
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp           |   16 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp                   |    2 
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp        |    2 
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp               |    6 
 src/mesa/drivers/dri/i965/intel_buffer_objects.c         |    2 
 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c      |    2 
 src/mesa/main/bufferobj.c                                |    2 
 src/mesa/main/bufferobj.h                                |    4 
 src/mesa/main/shaderapi.c                                |   10 
 src/mesa/state_tracker/st_cb_bufferobjects.c             |    4 
 src/mesa/state_tracker/st_cb_texture.c                   |   10 
 src/mesa/state_tracker/st_gen_mipmap.c                   |    8 
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp               |    5 
 55 files changed, 807 insertions(+), 197 deletions(-)

New commits:
commit 583578934ac6bf132293bac13924510be5f8c00a
Author: Timo Aaltonen <tjaalton@debian.org>
Date:   Thu Feb 11 13:36:17 2016 +0200

    release to xenial

diff --git a/debian/changelog b/debian/changelog
index 295bccc..0c201cd 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+mesa (11.1.2-1ubuntu1) xenial; urgency=medium
+
+  * Merge from Debian.
+  * control, rules: Use llvm-3.8 for OpenGL 4.1 support. (LP: #1535500)
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Tue, 09 Feb 2016 13:40:00 +0200
+
 mesa (11.1.2-1) unstable; urgency=medium
 
   * New upstream release.
@@ -11,12 +18,6 @@ mesa (11.1.1-2) unstable; urgency=medium
 
  -- Andreas Boll <andreas.boll.dev@gmail.com>  Fri, 15 Jan 2016 18:44:52 +0100
 
-mesa (11.1.1-1ubuntu3) xenial; urgency=medium
-
-  * control, rules: Use llvm-3.8 for OpenGL 4.1 support. (LP: #1535500)
-
- -- Timo Aaltonen <tjaalton@debian.org>  Tue, 09 Feb 2016 13:40:00 +0200
-
 mesa (11.1.1-1ubuntu2) xenial; urgency=medium
 
   * Remove architecture restrictions on Mir EGL platform;

commit 487e085acd106f1c208cdf5eb1ed28f15c3fd083
Author: Timo Aaltonen <tjaalton@debian.org>
Date:   Thu Feb 11 13:25:27 2016 +0200

    release to unstable

diff --git a/debian/changelog b/debian/changelog
index 0ec2a1e..c9c87e3 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+mesa (11.1.2-1) unstable; urgency=medium
+
+  * New upstream release.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Thu, 11 Feb 2016 13:14:08 +0200
+
 mesa (11.1.1-2) unstable; urgency=medium
 
   * Limit the symbols glXGetDriverConfig and glXGetScreenDriver to

commit af53126479492d68c00c65a98a10ff035b4ef456
Author: Timo Aaltonen <tjaalton@debian.org>
Date:   Thu Feb 11 13:12:35 2016 +0200

    control, rules: Use llvm-3.8 for OpenGL 4.1 support. (LP: #1535500)

diff --git a/debian/changelog b/debian/changelog
index 87204ed..88f3809 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+mesa (11.1.1-1ubuntu3) xenial; urgency=medium
+
+  * control, rules: Use llvm-3.8 for OpenGL 4.1 support. (LP: #1535500)
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Tue, 09 Feb 2016 13:40:00 +0200
+
 mesa (11.1.1-1ubuntu2) xenial; urgency=medium
 
   * Remove architecture restrictions on Mir EGL platform;
diff --git a/debian/control b/debian/control
index c63a749..473f5fd 100644
--- a/debian/control
+++ b/debian/control
@@ -40,7 +40,7 @@ Build-Depends:
  libudev-dev [linux-any],
  flex,
  bison,
- llvm-3.6-dev (>= 1:3.6) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
+ llvm-3.8-dev (>= 1:3.8~) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
  libelf-dev [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
  libwayland-dev (>= 1.2.0) [linux-any],
 # libclang-3.7-dev (>= 1:3.7~+rc2) [amd64 i386 armhf],
diff --git a/debian/rules b/debian/rules
index ffc5934..932270c 100755
--- a/debian/rules
+++ b/debian/rules
@@ -101,7 +101,7 @@ else
   ifneq (,$(filter $(DEB_HOST_ARCH),amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf))
 	GALLIUM_DRIVERS += radeonsi
 	confflags_GALLIUM += --enable-gallium-llvm
-	confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.6
+	confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.8
 	GALLIUM_DRIVERS += swrast
   else
 	DRI_DRIVERS += swrast

commit 7bcd827806b0816d61122ba3d37dd40178d96d98
Author: Emil Velikov <emil.velikov@collabora.com>
Date:   Thu Feb 11 00:03:22 2016 +0000

    docs: add release notes for 11.1.2
    
    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>

diff --git a/docs/relnotes/11.1.2.html b/docs/relnotes/11.1.2.html
new file mode 100644
index 0000000..947dcbf
--- /dev/null
+++ b/docs/relnotes/11.1.2.html
@@ -0,0 +1,181 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd";>
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.1.2 Release Notes / February 10, 2016</h1>
+
+<p>
+Mesa 11.1.2 is a bug fix release which fixes bugs found since the 11.1.1 release.
+</p>
+<p>
+Mesa 11.1.2 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596";>Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93628";>Bug 93628</a> - Exception: attempt to use unavailable module DRM when building MesaGL 11.1.0 on windows</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93648";>Bug 93648</a> - Random lines being rendered when playing Dolphin (geometry shaders related, w/ apitrace)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93650";>Bug 93650</a> - GL_ARB_separate_shader_objects is buggy (PCSX2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93717";>Bug 93717</a> - Meta mipmap generation can corrupt texture state</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93722";>Bug 93722</a> - Segfault when compiling shader with a subroutine that takes a parameter</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93731";>Bug 93731</a> - glUniformSubroutinesuiv segfaults when subroutine uniform is bound to a specific location</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93761";>Bug 93761</a> - A conditional discard in a fragment shader causes no depth writing at all</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965/bxt: Fix conservative wm thread counts.</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>glsl: fix subroutine lowering reusing actual parmaters</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.1.1</li>
+  <li>cherry-ignore: drop the i965/kbl .num_slices patch</li>
+  <li>i915: correctly parse/set the context flags</li>
+  <li>targets/dri: android: use WHOLE static libraries</li>
+  <li>egl/dri2: expose srgb configs when KHR_gl_colorspace is available</li>
+  <li>Update version to 11.1.2</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>vc4: Don't record the seqno of a failed job submit.</li>
+  <li>vc4: Throttle outstanding rendering after submission.</li>
+</ul>
+
+<p>François Tigeot (1):</p>
+<ul>
+  <li>gallium: Add DragonFly support</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>r600g: don't leak driver const buffers</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>meta/blit: Restore GL_DEPTH_STENCIL_TEXTURE_MODE state for GL_TEXTURE_RECTANGLE</li>
+  <li>meta: Use internal functions to set texture parameters</li>
+</ul>
+
+<p>Ilia Mirkin (6):</p>
+<ul>
+  <li>st/mesa: use surface format to generate mipmaps when available</li>
+  <li>glsl: always compute proper varying type, irrespective of varying packing</li>
+  <li>nvc0: avoid crashing when there are holes in vertex array bindings</li>
+  <li>nv50,nvc0: fix buffer clearing to respect engine alignment requirements</li>
+  <li>nv50/ir: fix false global CSE on instructions with multiple defs</li>
+  <li>st/mesa: treat a write as a read for range purposes</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>i965/vec4: Use UW type for multiply into accumulator on GEN8+</li>
+  <li>i965/fs/generator: Take an actual shader stage rather than a string</li>
+  <li>i965/fs: Always set channel 2 of texture headers in some stages</li>
+</ul>
+
+<p>Jose Fonseca (2):</p>
+<ul>
+  <li>scons: Conditionally use DRM module on pipe-loader.</li>
+  <li>pipe-loader: Fix PATH_MAX define on MSVC.</li>
+</ul>
+
+<p>Karol Herbst (1):</p>
+<ul>
+  <li>nv50/ir: fix memory corruption when spilling and redoing RA</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>glsl: Make bitfield_insert/extract and bfi/bfm non-vectorizable.</li>
+  <li>glsl: Allow implicit int -&gt; uint conversions for bitwise operators (&amp;, ^, |).</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+  <li>vl: add zig zag scan for list 4x4</li>
+  <li>st/omx/dec/h264: fix corruption when scaling matrix present flag set</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: don't miss changes to SPI_TMPRING_SIZE</li>
+</ul>
+
+<p>Nicolai Hähnle (11):</p>
+<ul>
+  <li>mesa/bufferobj: make _mesa_delete_buffer_object externally accessible</li>
+  <li>st/mesa: use _mesa_delete_buffer_object</li>
+  <li>radeon: use _mesa_delete_buffer_object</li>
+  <li>i915: use _mesa_delete_buffer_object</li>
+  <li>i965: use _mesa_delete_buffer_object</li>
+  <li>util/u_pstipple.c: copy immediates during transformation</li>
+  <li>radeonsi: extract the VGT_GS_MODE calculation into its own function</li>
+  <li>radeonsi: ensure that VGT_GS_MODE is sent when necessary</li>
+  <li>radeonsi: add DCC buffer for sampler views on new CS</li>
+  <li>st/mesa: use the correct address generation functions in st_TexSubImage blit</li>
+  <li>radeonsi: fix discard-only fragment shaders (11.1 version)</li>
+</ul>
+
+<p>Timothy Arceri (4):</p>
+<ul>
+  <li>glsl: fix segfault linking subroutine uniform with explicit location</li>
+  <li>mesa: fix segfault in glUniformSubroutinesuiv()</li>
+  <li>glsl: fix interface block error message</li>
+  <li>glsl: create helper to remove outer vertex index array used by some stages</li>
+</ul>
+
+
+</div>
+</body>
+</html>

commit ac65994a8ed4b5cf369a094e21876a08cf63dda7
Author: Emil Velikov <emil.velikov@collabora.com>
Date:   Wed Feb 10 23:59:40 2016 +0000

    Update version to 11.1.2
    
    Signed-off-by: Emil Velikov <emil.velikov@collabora.com>

diff --git a/VERSION b/VERSION
index 668182d..e9ac13b 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-11.1.1
+11.1.2

commit 185ee6b612aca567dfaa31ae1be93e3385af4be4
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date:   Tue Jan 19 15:18:31 2016 -0500

    radeonsi: fix discard-only fragment shaders (11.1 version)
    
    When a fragment shader is used that has no outputs but does conditional
    discard (KILL_IF), all fragments are killed without this patch.
    
    By comparing various register settings, my conclusion is that the exec mask
    is either not properly forwarded to the DB by NULL exports or ends up being
    unused, at least when there is _only_ a NULL export (the ISA documentation
    claims that NULL exports can be used to override a previously exported exec
    mask).
    
    Of the various approaches I have tried to work around the problem, this one
    seems to be the least invasive one.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93761
    Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Marek Olšák <marek.olsak@amd.com>

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 1baa2eb..1c1aaa0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2291,6 +2291,9 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
 		last_args[6]= uint->zero;
 		last_args[7]= uint->zero;
 		last_args[8]= uint->zero;
+
+		if (info->uses_kill)
+			si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_R;
 	}
 
 	/* Specify whether the EXEC mask represents the valid mask */

commit 25fc54992e96886d71e5f266069ca21ac4828a4b
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date:   Sat Jan 16 15:15:13 2016 -0500

    st/mesa: use the correct address generation functions in st_TexSubImage blit
    
    We need to tell the address generation functions about the dimensionality of
    the texture to correctly implement the part of Section 3.8.1 (Texture Image
    Specification) of the OpenGL 2.1 specification which says:
    
        "For the purposes of decoding the texture image, TexImage2D is
        equivalent to calling TexImage3D with corresponding arguments
        and depth of 1, except that
          ...
          * UNPACK SKIP IMAGES is ignored."
    
    Fixes a low impact bug that was found by chance while browsing the spec and
    extending piglit tests.
    
    Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
    (cherry picked from commit 4a448a63adbbece1d9bddacd9428aad7cc68a628)

diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 62f149a..a76410f 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -805,18 +805,18 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
             /* 1D array textures.
              * We need to convert gallium coords to GL coords.
              */
-            GLvoid *src = _mesa_image_address3d(unpack, pixels,
+            GLvoid *src = _mesa_image_address2d(unpack, pixels,
                                                 width, depth, format,
-                                                type, 0, slice, 0);
+                                                type, slice, 0);
             memcpy(map, src, bytesPerRow);
          }
          else {
             ubyte *slice_map = map;
 
             for (row = 0; row < (unsigned) height; row++) {
-               GLvoid *src = _mesa_image_address3d(unpack, pixels,
-                                                   width, height, format,
-                                                   type, slice, row, 0);
+               GLvoid *src = _mesa_image_address(dims, unpack, pixels,
+                                                 width, height, format,
+                                                 type, slice, row, 0);
                memcpy(slice_map, src, bytesPerRow);
                slice_map += transfer->stride;
             }

commit 37aed859693d5eee5e108d09deda249478cc07ec
Author: Leo Liu <leo.liu@amd.com>
Date:   Mon Feb 1 13:32:31 2016 -0500

    st/omx/dec/h264: fix corruption when scaling matrix present flag set
    
    The scaling list should be filled out with zig zag scan
    
    v2: integrate zig zag scan for list 4x4 to vl(Christian)
    v3: move list determination out from the loop(Ilia)
    
    Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Leo Liu <leo.liu@amd.com>
    Reviewed-by: Christian König <christian.koenig@amd.com>
    (cherry picked from commit 6ad2e55a1405ac3757439dae55ed86425bb65806)

diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c
index f66ed89..b453682 100644
--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
+++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
@@ -35,6 +35,7 @@
 #include "util/u_memory.h"
 #include "util/u_video.h"
 #include "vl/vl_rbsp.h"
+#include "vl/vl_zscan.h"
 
 #include "entrypoint.h"
 #include "vid_dec.h"
@@ -205,6 +206,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
                          const uint8_t *defaultList, const uint8_t *fallbackList)
 {
    unsigned lastScale = 8, nextScale = 8;
+   const int *list;
    unsigned i;
 
    /* (pic|seq)_scaling_list_present_flag[i] */
@@ -214,6 +216,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
       return;
    }
 
+   list = (sizeOfScalingList == 16) ? vl_zscan_normal_16 : vl_zscan_normal;
    for (i = 0; i < sizeOfScalingList; ++i ) {
 
       if (nextScale != 0) {
@@ -224,8 +227,8 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si
             return;
          }
       }
-      scalingList[i] = nextScale == 0 ? lastScale : nextScale;
-      lastScale = scalingList[i];
+      scalingList[list[i]] = nextScale == 0 ? lastScale : nextScale;
+      lastScale = scalingList[list[i]];
    }
 }
 

commit 3adf11182110f0e623c173d0658ae08b1012d6f6
Author: Leo Liu <leo.liu@amd.com>
Date:   Mon Feb 1 12:04:34 2016 -0500

    vl: add zig zag scan for list 4x4
    
    Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Leo Liu <leo.liu@amd.com>
    Reviewed-by: Christian König <christian.koenig@amd.com>
    (cherry picked from commit 4f598f2173c6555a52aad942ce6ea75c65afe21a)

diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index 1c6cdd4..5241471 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -49,6 +49,13 @@ enum VS_OUTPUT
    VS_O_VTEX = 0
 };
 
+const int vl_zscan_normal_16[] =
+{
+   /* Zig-Zag scan pattern */
+    0, 1, 4, 8, 5, 2, 3, 6,
+    9,12,13,10, 7,11,14,15
+};
+
 const int vl_zscan_linear[] =
 {
    /* Linear scan pattern */
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
index eacee2d..268cf0a 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.h
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -64,6 +64,7 @@ struct vl_zscan_buffer
    struct pipe_surface *dst;
 };
 
+extern const int vl_zscan_normal_16[];
 extern const int vl_zscan_linear[];
 extern const int vl_zscan_normal[];
 extern const int vl_zscan_alternate[];

commit f5f021ecc599fae8b668da76cd68a2b0e8c68cb2
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Fri Jan 29 14:45:38 2016 -0500

    st/mesa: treat a write as a read for range purposes
    
    We use this logic to detect live ranges and then do plain renaming
    across the whole codebase. As such, to prevent WaW hazards, we have to
    treat a write as if it were also a read.
    
    For example, the following sequence was observed before this patch:
    
     13: UIF TEMP[6].xxxx :0
     14:   ADD TEMP[6].x, CONST[6].xxxx, -IN[3].yyyy
     15:   RCP TEMP[7].x, TEMP[3].xxxx
     16:   MUL TEMP[3].x, TEMP[6].xxxx, TEMP[7].xxxx
     17:   ADD TEMP[6].x, CONST[7].xxxx, -IN[3].yyyy
     18:   RCP TEMP[7].x, TEMP[3].xxxx
     19:   MUL TEMP[4].x, TEMP[6].xxxx, TEMP[7].xxxx
    
    While after this patch it becomes:
    
     13: UIF TEMP[7].xxxx :0
     14:   ADD TEMP[7].x, CONST[6].xxxx, -IN[3].yyyy
     15:   RCP TEMP[8].x, TEMP[3].xxxx
     16:   MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].xxxx
     17:   ADD TEMP[7].x, CONST[7].xxxx, -IN[3].yyyy
     18:   RCP TEMP[8].x, TEMP[3].xxxx
     19:   MUL TEMP[5].x, TEMP[7].xxxx, TEMP[8].xxxx
    
    Most importantly note that in the first example, the second RCP is done
    on the result of the MUL while in the second, the second RCP should have
    the same value as the first. Looking at the GLSL source, it is apparent
    that both of the RCP's should have had the same source.
    
    Looking at what's going on, the GLSL looks something like
    
      float tmin_8;
      float tmin_10;
      tmin_10 = tmin_8;
    ... lots of code ...
      tmin_8 = tmpvar_17;
    ... more code that never looks at tmin_8 ...
    
    And so we end up with a last_read somewhere at the beginning, and a
    first_write somewhere at the bottom. For some reason DCE doesn't remove
    it, but even if that were fixed, DCE doesn't handle 100% of cases, esp
    including loops.
    
    With the last_read somewhere high up, we overwrite the previously
    correct (and large) last_read with a low one, and then proceed to decide
    to merge all kinds of junk onto this temp. Even if that weren't the
    case, and there were just some writes after the last read, then we might
    still overwrite a merged value with one of those.
    
    As a result, we should treat a write as a last_read for the purpose of
    determining the live range.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Reviewed-by: Dave Airlie <airlied@redhat.com>
    Cc: mesa-stable@lists.freedesktop.org
    (cherry picked from commit 047b91771845453826dcdd0019adc7333348b158)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index a32c4cf..a21e229 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3776,9 +3776,11 @@ glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *
             last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
       }
       for (j = 0; j < num_inst_dst_regs(inst); j++) {
-         if (inst->dst[j].file == PROGRAM_TEMPORARY)
+         if (inst->dst[j].file == PROGRAM_TEMPORARY) {
             if (first_writes[inst->dst[j].index] == -1)
                first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
+            last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
+         }
       }
       for (j = 0; j < inst->tex_offset_num_offset; j++) {
          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
@@ -4292,6 +4294,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
             /* Update the first_writes and last_reads arrays with the new
              * values for the merged register index, and mark the newly unused
              * register index as such. */
+            assert(last_reads[j] >= last_reads[i]);
             last_reads[i] = last_reads[j];
             first_writes[j] = -1;
             last_reads[j] = -1;

commit 3ef2a4bb2eb4cec7ee00a7f78da7edc24ceb2a43
Author: François Tigeot <ftigeot@wolfpond.org>
Date:   Sun Jan 17 10:10:21 2016 +0100

    gallium: Add DragonFly support
    
    Cc: mesa-stable@lists.freedesktop.org
    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
    (cherry picked from commit a48afb92ffda6e149c553ec82a05fee9a17441f8)

diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index d1f9e97..51f2766 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -52,7 +52,7 @@
 #include <machine/cpu.h>
 #endif
 
-#if defined(PIPE_OS_FREEBSD)
+#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY)
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #endif

commit 12888ad942a3e5580d153ed37bbcb4e48ced6a04
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Sat Jan 30 17:13:33 2016 -0500

    nv50/ir: fix false global CSE on instructions with multiple defs
    
    If an instruction has multiple defs, we have to do a lot more checks to
    make sure that we can move it forward. Among other things, various code
    likes to do
    
        a, b = tex()
        if () c = a
        else c = b
    
    which means that a single phi node will have results pointing at the
    same instruction. We obviously can't propagate the tex in this case, but
    properly accounting for this situation is tricky. Just don't try for
    instructions with multiple defs.
    
    This fixes about 20 shaders in shader-db, including the dolphin efb2ram
    shader.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: mesa-stable@lists.freedesktop.org
    (cherry picked from commit 3ca941d60ed38800038cd545842e0ed3a69946da)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 8a2516b..881836e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2814,6 +2814,8 @@ GlobalCSE::visit(BasicBlock *bb)
       ik = phi->getSrc(0)->getInsn();
       if (!ik)
          continue; // probably a function input
+      if (ik->defCount(0xff) > 1)
+         continue; // too painful to check if we can really push this forward
       for (s = 1; phi->srcExists(s); ++s) {
          if (phi->getSrc(s)->refCount() > 1)
             break;

commit 0f7d3d661d1cc3a0f0c57468bc4a378cacf88b0b
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Sat Jan 30 10:02:43 2016 -0500

    nv50,nvc0: fix buffer clearing to respect engine alignment requirements
    
    It appears that the nvidia render engine is quite picky when it comes to
    linear surfaces. It doesn't like non-256-byte aligned offsets, and
    apparently doesn't even do non-256-byte strides.
    
    This makes arb_clear_buffer_object-unaligned pass on both nv50 and nvc0.
    
    As a side-effect this also allows RGB32 clears to work via GPU data
    upload instead of synchronizing the buffer to the CPU (nvc0 only).
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> # tested on GF108, GT215
    Tested-by: Nick Sarnie <commendsarnex@gmail.com> # GK208
    Cc: mesa-stable@lists.freedesktop.org
    (cherry picked from commit 3ca2001b537a2709e7ef60410e7dfad5d38663f4)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 8ba19d2..58bbcd0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -592,6 +592,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
 }
 
 static void
+nv50_clear_buffer_push(struct pipe_context *pipe,
+                       struct pipe_resource *res,
+                       unsigned offset, unsigned size,
+                       const void *data, int data_size)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   struct nv04_resource *buf = nv04_resource(res);
+   unsigned count = (size + 3) / 4;
+   unsigned xcoord = offset & 0xff;
+   unsigned tmp, i;
+
+   if (data_size == 1) {
+      tmp = *(unsigned char *)data;
+      tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+      data = &tmp;
+      data_size = 4;
+   } else if (data_size == 2) {
+      tmp = *(unsigned short *)data;
+      tmp = (tmp << 16) | tmp;
+      data = &tmp;
+      data_size = 4;
+   }
+
+   unsigned data_words = data_size / 4;
+
+   nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+   nouveau_pushbuf_bufctx(push, nv50->bufctx);
+   nouveau_pushbuf_validate(push);
+
+   offset &= ~0xff;
+
+   BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+   PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+   PUSH_DATA (push, 1);
+   BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+   PUSH_DATA (push, 262144);
+   PUSH_DATA (push, 65536);
+   PUSH_DATA (push, 1);
+   PUSH_DATAh(push, buf->address + offset);
+   PUSH_DATA (push, buf->address + offset);
+   BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+   BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+   PUSH_DATA (push, size);
+   PUSH_DATA (push, 1);
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, 1);
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, 1);
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, xcoord);
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, 0);
+
+   while (count) {
+      unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+      unsigned nr = nr_data * data_words;
+
+      BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
+      for (i = 0; i < nr_data; i++)
+         PUSH_DATAp(push, data, data_words);
+
+      count -= nr;
+   }
+
+   if (buf->mm) {
+      nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+      nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+   }
+
+   nouveau_bufctx_reset(nv50->bufctx, 0);
+}
+
+static void
 nv50_clear_buffer(struct pipe_context *pipe,
                   struct pipe_resource *res,
                   unsigned offset, unsigned size,
@@ -640,9 +716,22 @@ nv50_clear_buffer(struct pipe_context *pipe,
 
    assert(size % data_size == 0);
 
+   if (offset & 0xff) {
+      unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+      assert(fixup_size % data_size == 0);
+      nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+      offset += fixup_size;
+      size -= fixup_size;
+      if (!size)
+         return;
+   }
+
    elements = size / data_size;
    height = (elements + 8191) / 8192;
    width = elements / height;
+   if (height > 1)
+      width &= ~0xff;
+   assert(width > 0);
 
    BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
    PUSH_DATAf(push, color.f[0]);
@@ -666,13 +755,13 @@ nv50_clear_buffer(struct pipe_context *pipe,
    BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
    PUSH_DATA (push, 1);
    BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
-   PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
-   PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
+   PUSH_DATAh(push, buf->address + offset);
+   PUSH_DATA (push, buf->address + offset);
    PUSH_DATA (push, nv50_format_table[dst_fmt].rt);
    PUSH_DATA (push, 0);
    PUSH_DATA (push, 0);
    BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
-   PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
+   PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100));
    PUSH_DATA (push, height);
    BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
    PUSH_DATA (push, 0);
@@ -691,26 +780,21 @@ nv50_clear_buffer(struct pipe_context *pipe,
    BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
    PUSH_DATA (push, 0x3c);
 
+   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+   PUSH_DATA (push, nv50->cond_condmode);
+
+   if (buf->mm) {
+      nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+      nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+   }
+
    if (width * height != elements) {
       offset += width * height * data_size;
       width = elements - width * height;
-      height = 1;
-      BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2);
-      PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
-      PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
-      BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
-      PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
-      PUSH_DATA (push, height);
-      BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
-      PUSH_DATA (push, 0x3c);
+      nv50_clear_buffer_push(pipe, res, offset, width * data_size,
+                             data, data_size);
    }
 
-   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
-   PUSH_DATA (push, nv50->cond_condmode);
-
-   nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
-   nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
-
    nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
 }
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index aa3e4f2..5b3a153 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -355,27 +355,132 @@ nvc0_clear_render_target(struct pipe_context *pipe,
 }
 
 static void
-nvc0_clear_buffer_cpu(struct pipe_context *pipe,
-                      struct pipe_resource *res,
-                      unsigned offset, unsigned size,
-                      const void *data, int data_size)
+nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe,
+                            struct pipe_resource *res,
+                            unsigned offset, unsigned size,
+                            const void *data, int data_size)
 {
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nv04_resource *buf = nv04_resource(res);
-   struct pipe_transfer *pt;
-   struct pipe_box box;
-   unsigned elements, i;
+   unsigned i;
 
-   elements = size / data_size;
+   nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+   nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+   nouveau_pushbuf_validate(push);
 
-   u_box_1d(offset, size, &box);
+   unsigned count = (size + 3) / 4;
+   unsigned data_words = data_size / 4;
 
-   uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE,
-                                          &box, &pt);
+   while (count) {
+      unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+      unsigned nr = nr_data * data_words;
 
-   for (i = 0; i < elements; ++i)
-      memcpy(&map[i*data_size], data, data_size);
+      if (!PUSH_SPACE(push, nr + 9))
+         break;
 
-   buf->vtbl->transfer_unmap(pipe, pt);
+      BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+      PUSH_DATAh(push, buf->address + offset);
+      PUSH_DATA (push, buf->address + offset);
+      BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+      PUSH_DATA (push, MIN2(size, nr * 4));
+      PUSH_DATA (push, 1);
+      BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+      PUSH_DATA (push, 0x100111);
+
+      /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+      BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
+      for (i = 0; i < nr_data; i++)
+         PUSH_DATAp(push, data, data_words);
+
+      count -= nr;
+      offset += nr * 4;
+      size -= nr * 4;
+   }
+
+   if (buf->mm) {
+      nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+      nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+   }
+
+   nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push_nve4(struct pipe_context *pipe,
+                            struct pipe_resource *res,
+                            unsigned offset, unsigned size,
+                            const void *data, int data_size)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nv04_resource *buf = nv04_resource(res);
+   unsigned i;
+
+   nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+   nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+   nouveau_pushbuf_validate(push);
+
+   unsigned count = (size + 3) / 4;
+   unsigned data_words = data_size / 4;
+
+   while (count) {
+      unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+      unsigned nr = nr_data * data_words;
+
+      if (!PUSH_SPACE(push, nr + 10))
+         break;
+
+      BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
+      PUSH_DATAh(push, buf->address + offset);
+      PUSH_DATA (push, buf->address + offset);
+      BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
+      PUSH_DATA (push, MIN2(size, nr * 4));


Reply to: