[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'upstream-experimental'



 VERSION                                                       |    2 
 bin/.cherry-ignore                                            |    5 
 configure.ac                                                  |   19 
 docs/relnotes/11.1.0.html                                     |    3 
 docs/relnotes/11.1.1.html                                     |  196 ++++++++++
 include/pci_ids/i965_pci_ids.h                                |   22 +
 src/gallium/auxiliary/tgsi/tgsi_scan.c                        |    3 
 src/gallium/auxiliary/tgsi/tgsi_scan.h                        |    1 
 src/gallium/auxiliary/util/u_helpers.c                        |    8 
 src/gallium/drivers/freedreno/ir3/ir3_print.c                 |    2 
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp |  129 ++----
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h   |   10 
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp      |    6 
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c              |    6 
 src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c       |    3 
 src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c           |    4 
 src/gallium/drivers/nouveau/nv50/nv50_screen.c                |    5 
 src/gallium/drivers/nouveau/nv50/nv50_state.c                 |    7 
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c                   |    2 
 src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c             |    4 
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c               |    6 
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c              |    7 
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c       |    3 
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c           |    4 
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c                |    1 
 src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c          |    1 
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c                 |    6 
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c               |    2 
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c                   |    7 
 src/gallium/drivers/nouveau/nvc0/nvc0_video.c                 |    7 
 src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c             |    4 
 src/gallium/drivers/r600/evergreen_state.c                    |   11 
 src/gallium/drivers/r600/r600_shader.c                        |    3 
 src/gallium/drivers/r600/r600_state.c                         |    2 
 src/gallium/drivers/radeon/Makefile.am                        |    5 
 src/gallium/drivers/radeon/cayman_msaa.c                      |   12 
 src/gallium/drivers/radeon/r600_query.c                       |    6 
 src/gallium/drivers/radeon/r600d_common.h                     |    2 
 src/gallium/drivers/radeon/radeon_llvm_emit.c                 |    5 
 src/gallium/drivers/radeonsi/si_state_draw.c                  |   18 
 src/gallium/drivers/vc4/vc4_job.c                             |    8 
 src/gallium/drivers/vc4/vc4_qpu_schedule.c                    |    3 
 src/gallium/targets/opencl/Makefile.am                        |    5 
 src/glsl/ir_set_program_inouts.cpp                            |   28 -
 src/glsl/link_varyings.cpp                                    |   19 
 src/glsl/linker.cpp                                           |   13 
 src/glsl/nir/glsl_types.cpp                                   |   18 
 src/glsl/nir/glsl_types.h                                     |   13 
 src/glsl/nir/nir.h                                            |    1 
 src/glsl/nir/nir_opt_algebraic.py                             |    1 
 src/glx/dri3_glx.c                                            |    6 
 src/mesa/drivers/common/meta_generate_mipmap.c                |   17 
 src/mesa/drivers/dri/common/drirc                             |    8 
 src/mesa/drivers/dri/i965/brw_context.c                       |    6 
 src/mesa/drivers/dri/i965/brw_device_info.c                   |   60 +++
 src/mesa/drivers/dri/i965/brw_shader.cpp                      |    1 
 src/mesa/drivers/dri/i965/brw_surface_formats.c               |    5 
 src/mesa/main/atifragshader.c                                 |    2 
 src/mesa/main/shader_query.cpp                                |   11 
 src/mesa/main/varray.c                                        |    2 
 src/mesa/program/prog_parameter.c                             |   47 +-
 src/mesa/program/prog_parameter.h                             |    4 
 src/mesa/state_tracker/st_atom.c                              |    2 
 src/mesa/state_tracker/st_cb_bitmap.c                         |    6 
 src/mesa/state_tracker/st_cb_drawpixels.c                     |   14 
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp                    |    6 
 src/mesa/state_tracker/st_program.c                           |   17 
 src/mesa/state_tracker/st_program.h                           |    1 
 src/util/ralloc.c                                             |    1 
 69 files changed, 647 insertions(+), 227 deletions(-)

New commits:
commit 330aa44a0da7548000a6b2fc2bb580e9c8e733cc
Author: Emil Velikov <emil.velikov@collabora.com>
Date:   Wed Jan 13 12:11:33 2016 +0200

    docs: add release notes for 11.1.1
    
    Signed-off-by: Emil Velikov <emil.velikov@collabora.com>

diff --git a/docs/relnotes/11.1.1.html b/docs/relnotes/11.1.1.html
new file mode 100644
index 0000000..94e8660
--- /dev/null
+++ b/docs/relnotes/11.1.1.html
@@ -0,0 +1,196 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd";>
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.1.1 Release Notes / January 13, 2016</h1>
+
+<p>
+Mesa 11.1.1 is a bug fix release which fixes bugs found since the 11.1.0 release.
+</p>
+<p>
+Mesa 11.1.1 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91806";>Bug 91806</a> - configure does not test whether assembler supports sse4.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92229";>Bug 92229</a> - [APITRACE] SOMA have serious graphical errors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92233";>Bug 92233</a> - Unigine Heaven 4.0 silhuette run</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93004";>Bug 93004</a> - Guild Wars 2 crash on nouveau DX11 cards</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93215";>Bug 93215</a> - [Regression bisected] Ogles1conform Automatic mipmap generation test is fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93257";>Bug 93257</a> - [SKL, bisected] ASTC dEQP tests segfault</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: check state-&gt;mesa in early return check in st_validate_state()</li>
+</ul>
+
+<p>Dave Airlie (6):</p>
+<ul>
+  <li>mesa/varray: set double arrays to non-normalised.</li>
+  <li>mesa/shader: return correct attribute location for double matrix arrays</li>
+  <li>glsl: pass stage into mark function</li>
+  <li>glsl/fp64: add helper for dual slot double detection.</li>
+  <li>glsl: fix count_attribute_slots to allow for different 64-bit handling</li>
+  <li>glsl: only update doubles inputs for vertex inputs.</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.1</li>
+  <li>cherry-ignore: drop the "re-enable" DCC on Stoney</li>
+  <li>cherry-ignore: don't pick a specific i965 formats patch</li>
+  <li>Update version to 11.1.1</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>vc4: Warn instead of abort()ing on exec ioctl failures.</li>
+  <li>vc4: Keep sample mask writes from being reordered after TLB writes</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>r600: fix constant buffer size programming</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>meta/generate_mipmap: Work-around GLES 1.x problem with GL_DRAW_FRAMEBUFFER</li>
+</ul>
+
+<p>Ilia Mirkin (9):</p>
+<ul>
+  <li>nv50/ir: can't have predication and immediates</li>
+  <li>gk104/ir: simplify and fool-proof texbar algorithm</li>
+  <li>glsl: assign varying locations to tess shaders when doing SSO</li>
+  <li>glx/dri3: a drawable might not be bound at wait time</li>
+  <li>nvc0: don't forget to reset VTX_TMP bufctx slot after blit completion</li>
+  <li>nv50/ir: float(s32 &amp; 0xff) = float(u8), not s8</li>
+  <li>nv50,nvc0: make sure there's pushbuf space and that we ref the bo early</li>
+  <li>nv50,nvc0: fix crash when increasing bsp bo size for h264</li>
+  <li>nvc0: scale up inter_bo size so that it's 16M for a 4K video</li>
+</ul>
+
+<p>Jonathan Gray (2):</p>
+<ul>
+  <li>configure.ac: use pkg-config for libelf</li>
+  <li>configure: check for python2.7 for PYTHON2</li>
+</ul>
+
+<p>Kenneth Graunke (5):</p>
+<ul>
+  <li>ralloc: Fix ralloc_adopt() to the old context's last child's parent.</li>
+  <li>drirc: Disable ARB_blend_func_extended for Heaven 4.0/Valley 1.0.</li>
+  <li>glsl: Fix varying struct locations when varying packing is disabled.</li>
+  <li>nvc0: Set winding order regardless of domain.</li>
+  <li>nir: Add a lower_fdiv option, turn fdiv into fmul/frcp.</li>
+</ul>
+
+<p>Marek Olšák (7):</p>
+<ul>
+  <li>tgsi/scan: add flag colors_written</li>
+  <li>r600g: write all MRTs only if there is exactly one output (fixes a hang)</li>
+  <li>radeonsi: don't call of u_prims_for_vertices for patches and rectangles</li>
+  <li>radeonsi: apply the streamout workaround to Fiji as well</li>
+  <li>gallium/radeon: fix Hyper-Z hangs by programming PA_SC_MODE_CNTL_1 correctly</li>
+  <li>program: add _mesa_reserve_parameter_storage</li>
+  <li>st/mesa: fix GLSL uniform updates for glBitmap &amp; glDrawPixels (v2)</li>
+</ul>
+
+<p>Mark Janes (1):</p>
+<ul>
+  <li>Add missing platform information for KBL</li>
+</ul>
+
+<p>Miklós Máté (1):</p>
+<ul>
+  <li>mesa: Don't leak ATIfs instructions in DeleteFragmentShader</li>
+</ul>
+
+<p>Neil Roberts (3):</p>
+<ul>
+  <li>i965: Add MESA_FORMAT_B8G8R8X8_SRGB to brw_format_for_mesa_format</li>
+  <li>i965: Add B8G8R8X8_SRGB to the alpha format override</li>
+  <li>i965: Fix crash when calling glViewport with no surface bound</li>
+</ul>
+
+<p>Nicolai Hähnle (2):</p>
+<ul>
+  <li>gallium/radeon: only dispose locally created target machine in radeon_llvm_compile</li>
+  <li>gallium/radeon: fix regression in a number of driver queries</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+  <li>configura.ac: fix test for SSE4.1 assembler support</li>
+</ul>
+
+<p>Patrick Rudolph (2):</p>
+<ul>
+  <li>nv50,nvc0: fix use-after-free when vertex buffers are unbound</li>
+  <li>gallium/util: return correct number of bound vertex buffers</li>
+</ul>
+
+<p>Rob Herring (1):</p>
+<ul>
+  <li>freedreno/ir3: fix 32-bit builds with pointer-to-int-cast error enabled</li>
+</ul>
+
+<p>Samuel Pitoiset (3):</p>
+<ul>
+  <li>nvc0: free memory allocated by the prog which reads MP perf counters</li>
+  <li>nv50,nvc0: free memory allocated by performance metrics</li>
+  <li>nv50: free memory allocated by the prog which reads MP perf counters</li>
+</ul>
+
+<p>Sarah Sharp (1):</p>
+<ul>
+  <li>mesa: Add KBL PCI IDs and platform information.</li>
+</ul>
+
+
+</div>
+</body>
+</html>

commit e429500dd16df6b2cb611fbb25e4cd528083f85f
Author: Emil Velikov <emil.velikov@collabora.com>
Date:   Wed Jan 13 12:00:19 2016 +0200

    Update version to 11.1.1
    
    Signed-off-by: Emil Velikov <emil.velikov@collabora.com>

diff --git a/VERSION b/VERSION
index 68d8f15..668182d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-11.1.0
+11.1.1

commit bf00b36c9df72758ce8a89399d5575d0725da315
Author: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date:   Mon Sep 21 14:22:53 2015 -0700

    mesa: Add KBL PCI IDs and platform information.
    
    Add PCI IDs for the Intel Kabylake platforms.  The IDs are taken
    directly from the Linux kernel patches, which are under review:
    
    http://lists.freedesktop.org/archives/intel-gfx/2015-October/078967.html
    http://cgit.freedesktop.org/~vivijim/drm-intel/log/?h=kbl-upstream-v2
    
    The Kabylake PCI IDs taken from the kernel are rearranged to be in order
    of GT type, then PCI ID.
    
    Please note that if this patch is backported, the following fixes will
    need to be added before this patch:
    
    commit 28ed1e08e8ba98e "i965/skl: Remove early platform support"
    commit c1e38ad37042b0e "i965/skl: Use larger URB size where available."
    
    Thanks to Ben for fixing a bug around setting urb.size, and being
    patient with my questions about what the various fields mean.
    
    Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
    Suggested-by: Ben Widawsky <benjamin.widawsky@intel.com>
    Tested-by: Rodrigo Vivi <rodrigo.vivi@intel.com> (KBL-GT2)
    Cc: "11.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 39c41be50d9474dde4c0dcf23a546d14b212e80a)

diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 5891ba6..5139e27 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -132,6 +132,28 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
+CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
+CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
+CHIPSET(0x590B, kbl_gt1, "Intel(R) Kabylake GT1")
+CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1")
+CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
+CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
+CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
+CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
+CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3")
+CHIPSET(0x592A, kbl_gt3, "Intel(R) Kabylake GT3")
+CHIPSET(0x592B, kbl_gt3, "Intel(R) Kabylake GT3")
+CHIPSET(0x5932, kbl_gt4, "Intel(R) Kabylake GT4")
+CHIPSET(0x593A, kbl_gt4, "Intel(R) Kabylake GT4")
+CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
+CHIPSET(0x593D, kbl_gt4, "Intel(R) Kabylake GT4")
 CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 4550550..c58e4a5 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -371,6 +371,66 @@ static const struct brw_device_info brw_device_info_bxt = {
    }
 };
 
+/*
+ * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
+ * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
+ */
+
+/*
+ * Both SKL and KBL support a maximum of 64 threads per
+ * Pixel Shader Dispatch (PSD) unit.
+ */
+#define  KBL_MAX_THREADS_PER_PSD 64
+
+static const struct brw_device_info brw_device_info_kbl_gt1 = {
+   GEN9_FEATURES,
+   .gt = 1,
+
+   .max_cs_threads = 7 * 6,
+   .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 2,
+   .urb.size = 192,
+};
+
+static const struct brw_device_info brw_device_info_kbl_gt1_5 = {
+   GEN9_FEATURES,
+   .gt = 1,
+
+   .max_cs_threads = 7 * 6,
+   .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3,
+};
+
+static const struct brw_device_info brw_device_info_kbl_gt2 = {
+   GEN9_FEATURES,
+   .gt = 2,
+
+   .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3,
+};
+
+static const struct brw_device_info brw_device_info_kbl_gt3 = {
+   GEN9_FEATURES,
+   .gt = 3,
+
+   .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 6,
+};
+
+static const struct brw_device_info brw_device_info_kbl_gt4 = {
+   GEN9_FEATURES,
+   .gt = 4,
+
+   .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 9,
+   /*
+    * From the "L3 Allocation and Programming" documentation:
+    *
+    * "URB is limited to 1008KB due to programming restrictions.  This
+    *  is not a restriction of the L3 implementation, but of the FF and
+    *  other clients.  Therefore, in a GT4 implementation it is
+    *  possible for the programmed allocation of the L3 data array to
+    *  provide 3*384KB=1152KB for URB, but only 1008KB of this
+    *  will be used."
+    */
+   .urb.size = 1008 / 3,
+};
+
 const struct brw_device_info *
 brw_get_device_info(int devid)
 {

commit fab2039588be1e5f6bbd53d8b1422776e213494a
Author: Brian Paul <brianp@vmware.com>
Date:   Tue Jan 5 17:10:12 2016 -0700

    st/mesa: check state->mesa in early return check in st_validate_state()
    
    We were checking the dirty->st flags but not the dirty->mesa flags.
    When we took the early return, we didn't clear the dirty->mesa flags
    so the next time we called st_validate_state() we'd often flush the
    glBitmap cache.  And since st_validate_state() is called from
    st_Bitmap(), it meant we flushed the bitmap cache for every glBitmap()
    call.
    
    This change seems to recover most of the performance loss observed
    with the ipers demo on llvmpipe since commit commit 36c93a6fae27561.
    
    Cc: mesa-stable@lists.freedesktop.org
    Reviewed-by: José Fonseca <jfonseca@vmware.com>
    (cherry picked from commit c28d72a3473ad0127c82c1244b6688dcc184e85e)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 43dbadd..c1a9d00 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -188,7 +188,7 @@ void st_validate_state( struct st_context *st )
 
    st_manager_validate_framebuffers(st);
 
-   if (state->st == 0)
+   if (state->st == 0 && state->mesa == 0)
       return;
 
    /*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/

commit 536c8cbcd3326fc7207f8a2c166c380135924589
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Tue Jan 5 05:09:46 2016 -0800

    nir: Add a lower_fdiv option, turn fdiv into fmul/frcp.
    
    The nir_opt_algebraic rule
    
    (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
    
    can produce new fdiv operations, which need to be lowered on i965,
    as we don't actually implement fdiv.  (Normally, we handle this in
    GLSL IR's lower_instructions pass, but in the above case we introduce
    an fdiv after that point.  So, make NIR do it for us.)
    
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
    Reviewed-by: Matt Turner <mattst88@gmail.com>
    Cc: mesa-stable@lists.freedesktop.org
    (cherry picked from commit 7295f4fcc2b2dd1bc6a8d1d834774b8152a029cf)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 087b453..09caf34 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1435,6 +1435,7 @@ typedef struct nir_function {
                   exec_list_get_head(&(func)->overload_list), node)
 
 typedef struct nir_shader_compiler_options {
+   bool lower_fdiv;
    bool lower_ffma;
    bool lower_flrp;
    bool lower_fpow;
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index 6aa8b1f..fb36acb 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -179,6 +179,7 @@ optimizations = [
    (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
    # Division and reciprocal
    (('fdiv', 1.0, a), ('frcp', a)),
+   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
    (('frcp', ('frcp', a)), a),
    (('frcp', ('fsqrt', a)), ('frsq', a)),
    (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 7a236cd..3dc57e4 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -94,6 +94,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
    nir_shader_compiler_options *nir_options =
       rzalloc(compiler, nir_shader_compiler_options);
    nir_options->native_integers = true;
+   nir_options->lower_fdiv = true;
    /* In order to help allow for better CSE at the NIR level we tell NIR
     * to split all ffma instructions during opt_algebraic and we then
     * re-combine them as a later step.

commit 978480d69f0b40ae45766bbc65c4a6c5bb805b98
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Mon Jan 4 11:26:27 2016 -0500

    nvc0: scale up inter_bo size so that it's 16M for a 4K video
    
    Experimentally, 4M causes corruption and slowness, try to ramp it up
    with size instead.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit b16c9be4a5561bd825176a228c300331f989e837)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
index 48ffac1..5a946ca 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
@@ -169,9 +169,12 @@ nvc0_create_decoder(struct pipe_context *context,
    for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i)
       ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
                            0, 1 << 20, &cfg, &dec->bsp_bo[i]);
-   if (!ret)
+   if (!ret) {
+      /* total fudge factor... just has to be bigger for higher bitrates? */
+      unsigned inter_size = align(templ->width * templ->height * 2, 4 << 20);
       ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
-                           0x100, 4 << 20, &cfg, &dec->inter_bo[0]);
+                           0x100, inter_size, &cfg, &dec->inter_bo[0]);
+   }
    if (!ret) {
       ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
                            0x100, dec->inter_bo[0]->size, &cfg,

commit c2be35d30907fc110df8de93b8fb344a7968c7b3
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Mon Jan 4 11:16:45 2016 -0500

    nv50,nvc0: fix crash when increasing bsp bo size for h264
    
    H264 doesn't have a bitplane bo. We just need a device reference, so use
    the one from the client.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit b5f2f7073f047b4e4128cf05af8dddf356f9b48c)

diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
index 6058c22..7668cc0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
@@ -77,7 +77,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
       bsp_size += (1 << 20) - 1;
       bsp_size &= ~((1 << 20) - 1);
 
-      ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_size, NULL, &tmp_bo);
+      ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_size, NULL, &tmp_bo);
       if (ret) {
          debug_printf("reallocating bsp %u -> %u failed with %i\n",
                       bsp_bo ? (unsigned)bsp_bo->size : 0, bsp_size, ret);
@@ -90,7 +90,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
    if (!inter_bo || bsp_bo->size * 4 > inter_bo->size) {
       struct nouveau_bo *tmp_bo = NULL;
 
-      ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, NULL, &tmp_bo);
+      ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, NULL, &tmp_bo);
       if (ret) {
          debug_printf("reallocating inter %u -> %u failed with %i\n",
                       inter_bo ? (unsigned)inter_bo->size : 0, (unsigned)bsp_bo->size * 4, ret);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
index 9139bc1..8c9662b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
@@ -81,7 +81,7 @@ nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
       bsp_size += (1 << 20) - 1;
       bsp_size &= ~((1 << 20) - 1);
 
-      ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_size, &cfg, &tmp_bo);
+      ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_size, &cfg, &tmp_bo);
       if (ret) {
          debug_printf("reallocating bsp %u -> %u failed with %i\n",
                       bsp_bo ? (unsigned)bsp_bo->size : 0, bsp_size, ret);
@@ -98,7 +98,7 @@ nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
       cfg.nvc0.tile_mode = 0x10;
       cfg.nvc0.memtype = 0xfe;
 
-      ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, &cfg, &tmp_bo);
+      ret = nouveau_bo_new(dec->client->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, &cfg, &tmp_bo);
       if (ret) {
          debug_printf("reallocating inter %u -> %u failed with %i\n",
                       inter_bo ? (unsigned)inter_bo->size : 0, (unsigned)bsp_bo->size * 4, ret);

commit df47b1e07844f6a8a9dc9b71868b92c18648e3b7
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Sun Dec 6 13:36:57 2015 +0100

    st/mesa: fix GLSL uniform updates for glBitmap & glDrawPixels (v2)
    
    Spotted by luck. The GLSL uniform storage is only associated once
    in LinkShader and can't be reallocated afterwards, because that would
    break the association.
    
    v2: don't remove st_upload_constants calls, clarify why they're needed
    
    Cc: 11.0 11.1 <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 36c93a6fae275614b6004ec5ab085774d527e1bc)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index cbc6845..a4a48a6 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -287,7 +287,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
       GLfloat colorSave[4];
       COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
       COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
-      st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+      st_upload_constants(st, st->fp->Base.Base.Parameters,
+                          PIPE_SHADER_FRAGMENT);
       COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
    }
 
@@ -404,6 +405,9 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    cso_restore_stream_outputs(cso);
 
    pipe_resource_reference(&vbuf, NULL);
+
+   /* We uploaded modified constants, need to invalidate them. */
+   st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
 }
 
 
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 262ad80..a125d1f 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -1110,8 +1110,11 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
          num_sampler_view++;
       }
 
-      /* update fragment program constants */
-      st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+      /* compiling a new fragment shader variant added new state constants
+       * into the constant buffer, we need to update them
+       */
+      st_upload_constants(st, st->fp->Base.Base.Parameters,
+                          PIPE_SHADER_FRAGMENT);
    }
 
    /* Put glDrawPixels image into a texture */
@@ -1463,8 +1466,11 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
          num_sampler_view++;
       }
 
-      /* update fragment program constants */
-      st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+      /* compiling a new fragment shader variant added new state constants
+       * into the constant buffer, we need to update them
+       */
+      st_upload_constants(st, st->fp->Base.Base.Parameters,
+                          PIPE_SHADER_FRAGMENT);
    }
    else {
       assert(type == GL_DEPTH);
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 40c7725..a32c4cf 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5640,6 +5640,12 @@ get_mesa_program(struct gl_context *ctx,
 
    _mesa_reference_program(ctx, &shader->Program, prog);
 
+   /* Avoid reallocation of the program parameter list, because the uniform
+    * storage is only associated with the original parameter list.
+    * This should be enough for Bitmap and DrawPixels constants.
+    */
+   _mesa_reserve_parameter_storage(prog->Parameters, 8);
+
    /* This has to be done last.  Any operation the can cause
     * prog->ParameterValues to get reallocated (e.g., anything that adds a
     * program constant) has to happen before creating this linkage.
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 75ccaf2..39c54c2 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -112,8 +112,6 @@ delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv)
 {
    if (fpv->driver_shader) 
       cso_delete_fragment_shader(st->cso_context, fpv->driver_shader);
-   if (fpv->parameters)
-      _mesa_free_parameter_list(fpv->parameters);
    free(fpv);
 }
 
@@ -914,8 +912,6 @@ st_create_fp_variant(struct st_context *st,
          if (tgsi.tokens != stfp->tgsi.tokens)
             tgsi_free_tokens(tgsi.tokens);
          tgsi.tokens = tokens;
-         variant->parameters =
-            _mesa_clone_parameter_list(stfp->Base.Base.Parameters);
       } else
          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
    }
@@ -924,6 +920,7 @@ st_create_fp_variant(struct st_context *st,
    if (key->drawpixels) {
       const struct tgsi_token *tokens;
       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
+      struct gl_program_parameter_list *params = stfp->Base.Base.Parameters;
 
       /* Find the first unused slot. */
       variant->drawpix_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1;
@@ -935,27 +932,21 @@ st_create_fp_variant(struct st_context *st,
          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
       }
 
-      variant->parameters =
-         _mesa_clone_parameter_list(stfp->Base.Base.Parameters);
-
       if (key->scaleAndBias) {
          static const gl_state_index scale_state[STATE_LENGTH] =
             { STATE_INTERNAL, STATE_PT_SCALE };
          static const gl_state_index bias_state[STATE_LENGTH] =
             { STATE_INTERNAL, STATE_PT_BIAS };
 
-         scale_const = _mesa_add_state_reference(variant->parameters,
-                                                 scale_state);
-         bias_const = _mesa_add_state_reference(variant->parameters,
-                                                bias_state);
+         scale_const = _mesa_add_state_reference(params, scale_state);
+         bias_const = _mesa_add_state_reference(params, bias_state);
       }
 
       {
          static const gl_state_index state[STATE_LENGTH] =
             { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
 
-         texcoord_const = _mesa_add_state_reference(variant->parameters,
-                                                    state);
+         texcoord_const = _mesa_add_state_reference(params, state);
       }
 
       tokens = st_get_drawpix_shader(tgsi.tokens,
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index d9b53ac..a8571f0 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -80,7 +80,6 @@ struct st_fp_variant
    void *driver_shader;
 
    /** For glBitmap variants */
-   struct gl_program_parameter_list *parameters;
    uint bitmap_sampler;
 
    /** For glDrawPixels variants */

commit bb3581ca3d60366f5e3621e62f5300f4f0c127c4
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Sun Dec 6 13:31:25 2015 +0100

    program: add _mesa_reserve_parameter_storage
    
    The next commit will use this.
    
    Reviewed-by: Brian Paul <brianp@vmware.com>
    Cc: 11.0 11.1 <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 294ed5cd13e878ec43126a2070343d6d99ef5669)

diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index 53e9813..e98946b 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -89,6 +89,37 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
 
 
 /**
+ * Make sure there are enough unused parameter slots. Reallocate the list
+ * if needed.
+ *
+ * \param paramList        where to reserve parameter slots
+ * \param reserve_slots    number of slots to reserve
+ */
+void
+_mesa_reserve_parameter_storage(struct gl_program_parameter_list *paramList,
+                                unsigned reserve_slots)
+{
+   const GLuint oldNum = paramList->NumParameters;
+
+   if (oldNum + reserve_slots > paramList->Size) {
+      /* Need to grow the parameter list array (alloc some extra) */
+      paramList->Size = paramList->Size + 4 * reserve_slots;
+
+      /* realloc arrays */
+      paramList->Parameters =
+         realloc(paramList->Parameters,
+                 paramList->Size * sizeof(struct gl_program_parameter));
+
+      paramList->ParameterValues = (gl_constant_value (*)[4])
+         _mesa_align_realloc(paramList->ParameterValues,         /* old buf */
+                             oldNum * 4 * sizeof(gl_constant_value),/* old sz */
+                             paramList->Size*4*sizeof(gl_constant_value),/*new*/
+                             16);
+   }
+}
+
+
+/**
  * Add a new parameter to a parameter list.
  * Note that parameter values are usually 4-element GLfloat vectors.
  * When size > 4 we'll allocate a sequential block of parameters to
@@ -115,21 +146,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
 
    assert(size > 0);
 
-   if (oldNum + sz4 > paramList->Size) {
-      /* Need to grow the parameter list array (alloc some extra) */
-      paramList->Size = paramList->Size + 4 * sz4;
-
-      /* realloc arrays */
-      paramList->Parameters =
-         realloc(paramList->Parameters,
-                 paramList->Size * sizeof(struct gl_program_parameter));
-
-      paramList->ParameterValues = (gl_constant_value (*)[4])
-         _mesa_align_realloc(paramList->ParameterValues,         /* old buf */
-                             oldNum * 4 * sizeof(gl_constant_value),/* old sz */
-                             paramList->Size*4*sizeof(gl_constant_value),/*new*/
-                             16);
-   }
+   _mesa_reserve_parameter_storage(paramList, sz4);
 
    if (!paramList->Parameters ||
        !paramList->ParameterValues) {
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index 74a5fd9..44700b7 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -112,6 +112,10 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list)
    return list ? list->NumParameters : 0;
 }
 
+extern void
+_mesa_reserve_parameter_storage(struct gl_program_parameter_list *paramList,
+                                unsigned reserve_slots);
+
 extern GLint
 _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     gl_register_file type, const char *name,

commit 132131af6b78a887dac606ccf640bb901359d289
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date:   Thu Dec 31 20:33:15 2015 -0500

    nv50,nvc0: make sure there's pushbuf space and that we ref the bo early
    
    First off, we can't flush in the middle of a command. Secondly
    requesting the extra push space might cause a flush to happen. If that
    flush happens, we'd have to do the PUSH_REFN again. So instead do
    PUSH_REFN after the push space request. This helps avoid rare crashes
    with supertuxkart in libdrm due to assertion failures.
    
    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit c1d14c6817e3fa9a1c04f9b6c51b4ca601637843)
    [Emil Velikov: resolve trivial conflict]
    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
    
    Conflicts:
    	src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 85878d5..c35637c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -633,8 +633,8 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
          BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
          PUSH_DATA (push, prim);
 
-         PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
          nouveau_pushbuf_space(push, 8, 0, 1);
+         PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
 
          switch (index_size) {
          case 4:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index a70d524..8021a65 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -473,7 +473,6 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
 
    PUSH_REFN(push, hq->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
-   nouveau_pushbuf_space(push, 0, 0, 1);
    nouveau_pushbuf_data(push, hq->bo, hq->offset + result_offset, 4 |
                         NVC0_IB_ENTRY_1_NO_PREFETCH);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 7e2e999..09c4358 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -320,6 +320,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
 
       if (!targ->clean)
          nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
+      nouveau_pushbuf_space(push, 0, 0, 1);
       BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
       PUSH_DATA (push, 1);
       PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index c464904..00a8143 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -784,7 +784,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
    }
 
    while (num_instances--) {
-      PUSH_SPACE(push, 8);
+      nouveau_pushbuf_space(push, 9, 0, 1);
       BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
       PUSH_DATA (push, mode);
       BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
@@ -811,7 +811,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
    if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
       IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
 
-   PUSH_SPACE(push, 8);
+   nouveau_pushbuf_space(push, 8, 0, 1);
+   PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
    if (info->indexed) {
       assert(nvc0->idxbuf.buffer);
       assert(nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer));
@@ -829,8 +830,6 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
    }
    PUSH_DATA(push, nvc0_prim_gl(info->mode));
 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
-   PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
-   nouveau_pushbuf_space(push, 0, 0, 1);
    nouveau_pushbuf_data(push,
                         buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size);
 }

commit f4977656c1d4732022946f4e3642fa128979e60d
Author: Kenneth Graunke <kenneth@whitecape.org>
Date:   Wed Dec 30 02:53:08 2015 -0800

    nvc0: Set winding order regardless of domain.
    
    Quads need to respect winding order, too - not just triangles.
    
    Fixes rendering in GFXBench 4.0's tessellation benchmark.
    
    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
    Cc: "11.0 11.1" <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 65d3f85eb3efb326a826c2db0225340d5421a389)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 43d7c7b..527712e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -285,8 +285,6 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
       break;
    case PIPE_PRIM_TRIANGLES:
       tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES;
-      if (info->prop.tp.winding > 0)
-         tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
       break;
    case PIPE_PRIM_QUADS:
       tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS;
@@ -295,6 +293,10 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
       tp->tp.tess_mode = ~0;
       return;
    }
+
+   if (info->prop.tp.winding > 0)
+      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
+


Reply to: