[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'ubuntu'



 debian/changelog                         |    7 
 debian/control                           |    2 
 debian/patches/i915-fix-gen4-hangs.patch |   59 ++
 debian/patches/llvm-3.6-fixes.patch      |  616 +++++++++++++++++++++++++++++++
 debian/patches/series                    |    2 
 debian/rules                             |    2 
 6 files changed, 686 insertions(+), 2 deletions(-)

New commits:
commit 710c202ec80cae4bb9fb49ab506739586aba0ca7
Author: Maarten Lankhorst <maarten.lankhorst@ubuntu.com>
Date:   Wed Jan 28 16:50:29 2015 +0100

    Add upstream workaround for hangs on gen4.

diff --git a/debian/changelog b/debian/changelog
index da98ae7..d5e6530 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
 mesa (10.4.2-2ubuntu2) UNRELEASED; urgency=medium
 
   * Flip the switch to llvm 3.6 with patches backported from upstream.
+  * Add upstream workaround for hangs on gen4.
 
  -- Maarten Lankhorst <maarten.lankhorst@ubuntu.com>  Wed, 28 Jan 2015 14:08:49 +0100
 
diff --git a/debian/patches/i915-fix-gen4-hangs.patch b/debian/patches/i915-fix-gen4-hangs.patch
new file mode 100644
index 0000000..34e8a5c
--- /dev/null
+++ b/debian/patches/i915-fix-gen4-hangs.patch
@@ -0,0 +1,59 @@
+commit 882f702441c6601589bdef805a9157cb113b91dd
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sat Jan 17 23:21:15 2015 -0800
+
+    i965: Work around mysterious Gen4 GPU hangs with minimal state changes.
+    
+    Gen4 hardware appears to GPU hang frequently when using Chromium, and
+    also when running 'glmark2 -b ideas'.  Most of the error states contain
+    3DPRIMITIVE commands in quick succession, with very few state packets
+    between them - usually VERTEX_BUFFERS/ELEMENTS and CONSTANT_BUFFER.
+    
+    I trimmed an apitrace of the glmark2 hang down to two draw calls with a
+    glUniformMatrix4fv call between the two.  Either draw by itself works
+    fine, but together, they hang the GPU.  Removing the glUniform call
+    makes the hangs disappear.  In the hardware state, this translates to
+    removing the CONSTANT_BUFFER packet between the two 3DPRIMITIVE packets.
+    
+    Flushing before emitting CONSTANT_BUFFER packets also appears to make
+    the hangs disappear.  I observed a slowdown in glxgears by doing it all
+    the time, so I've chosen to only do it when BRW_NEW_BATCH and
+    BRW_NEW_PSP are unset (i.e. we haven't done a CS_URB_STATE change or
+    already flushed the whole pipeline).
+    
+    I'd much rather understand the problem, but at this point, I don't see
+    how we'd ever be able to track it down further.  We have no real tools,
+    and the hardware people moved on years ago.  I've analyzed 20+ error
+    states and read every scrap of documentation I could find.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80568
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85367
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+    Cc: "10.4 10.3" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit c4fd0c9052dd391d6f2e9bb8e6da209dfc7ef35b)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
+index 1a828ed..718d87c 100644
+--- a/src/mesa/drivers/dri/i965/brw_curbe.c
++++ b/src/mesa/drivers/dri/i965/brw_curbe.c
+@@ -280,6 +280,19 @@ brw_upload_constant_buffer(struct brw_context *brw)
+     */
+ 
+ emit:
++   /* Work around mysterious 965 hangs that appear to happen if you do
++    * two 3DPRIMITIVEs with only a CONSTANT_BUFFER inbetween.  If we
++    * haven't already flushed for some other reason, explicitly do so.
++    *
++    * We've found no documented reason why this should be necessary.
++    */
++   if (brw->gen == 4 && !brw->is_g4x &&
++       (brw->state.dirty.brw & (BRW_NEW_BATCH | BRW_NEW_PSP)) == 0) {
++      BEGIN_BATCH(1);
++      OUT_BATCH(MI_FLUSH);
++      ADVANCE_BATCH();
++   }
++
+    /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8
+     * (CONSTANT_BUFFER (CURBE Load)):
+     *
diff --git a/debian/patches/series b/debian/patches/series
index 494cdfb..0d106a1 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -3,4 +3,5 @@
 # Ubuntu patches.
 egl-platform-mir.patch
 i915-dont-default-to-2.1.patch
-llvm-3.6-fixes.patch
+i915-fix-gen4-hangs.patch
+llvm-3.6-fixes.patch
\ No newline at end of file

commit f09055c78217196cfa377038a99fb7516add34ce
Author: Maarten Lankhorst <maarten.lankhorst@ubuntu.com>
Date:   Wed Jan 28 16:48:55 2015 +0100

    Flip the switch to llvm 3.6 with patches backported from upstream.

diff --git a/debian/changelog b/debian/changelog
index 7e1102c..da98ae7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+mesa (10.4.2-2ubuntu2) UNRELEASED; urgency=medium
+
+  * Flip the switch to llvm 3.6 with patches backported from upstream.
+
+ -- Maarten Lankhorst <maarten.lankhorst@ubuntu.com>  Wed, 28 Jan 2015 14:08:49 +0100
+
 mesa (10.4.2-2ubuntu1) vivid; urgency=medium
 
   [ Timo Aaltonen ]
diff --git a/debian/control b/debian/control
index 144382e..4c60ceb 100644
--- a/debian/control
+++ b/debian/control
@@ -38,7 +38,7 @@ Build-Depends:
  libudev-dev [linux-any],
  flex,
  bison,
- llvm-3.5-dev (>= 1:3.5-1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
+ llvm-3.6-dev (>= 1:3.5-1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
  libelf-dev [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
  libwayland-dev (>= 1.2.0) [linux-any],
  libmirclient-dev [!arm64 !powerpc !ppc64 !ppc64el],
diff --git a/debian/patches/llvm-3.6-fixes.patch b/debian/patches/llvm-3.6-fixes.patch
new file mode 100644
index 0000000..e518f51
--- /dev/null
+++ b/debian/patches/llvm-3.6-fixes.patch
@@ -0,0 +1,616 @@
+diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
+index 14c802b..869abb0 100644
+--- a/src/gallium/auxiliary/draw/draw_llvm.c
++++ b/src/gallium/auxiliary/draw/draw_llvm.c
+@@ -742,6 +742,7 @@ generate_fetch(struct gallivm_state *gallivm,
+       val = lp_build_fetch_rgba_aos(gallivm,
+                                     format_desc,
+                                     lp_float32_vec4_type(),
++                                    FALSE,
+                                     map_ptr,
+                                     zero, zero, zero);
+       LLVMBuildStore(builder, val, temp_ptr);
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
+index 1177fb2..969f1f6 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
++++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
+@@ -62,6 +62,7 @@ LLVMValueRef
+ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
+                         const struct util_format_description *format_desc,
+                         struct lp_type type,
++                        boolean aligned,
+                         LLVMValueRef base_ptr,
+                         LLVMValueRef offset,
+                         LLVMValueRef i,
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+index af755d4..3c25c32 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+@@ -356,6 +356,7 @@ lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
+  * Fetch a pixel into a 4 float AoS.
+  *
+  * \param format_desc  describes format of the image we're fetching from
++ * \param aligned  whether the data is guaranteed to be aligned
+  * \param ptr  address of the pixel block (or the texel if uncompressed)
+  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
+  *              these will always be (0, 0).
+@@ -365,6 +366,7 @@ LLVMValueRef
+ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
+                         const struct util_format_description *format_desc,
+                         struct lp_type type,
++                        boolean aligned,
+                         LLVMValueRef base_ptr,
+                         LLVMValueRef offset,
+                         LLVMValueRef i,
+@@ -400,7 +402,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
+ 
+       packed = lp_build_gather(gallivm, type.length/4,
+                                format_desc->block.bits, type.width*4,
+-                               base_ptr, offset, TRUE);
++                               aligned, base_ptr, offset, TRUE);
+ 
+       assert(format_desc->block.bits <= vec_len);
+ 
+@@ -437,7 +439,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
+          LLVMValueRef packed;
+ 
+          packed = lp_build_gather_elem(gallivm, num_pixels,
+-                                       format_desc->block.bits, 32,
++                                       format_desc->block.bits, 32, aligned,
+                                        base_ptr, offset, k, FALSE);
+ 
+          tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+index ff2887e..afaabc0 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+@@ -386,6 +386,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
+                                type.length,
+                                format_desc->block.bits,
+                                type.width,
++                               TRUE,
+                                base_ptr, offset, FALSE);
+ 
+       /*
+@@ -411,8 +412,8 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
+ 
+       packed = lp_build_gather(gallivm, type.length,
+                                format_desc->block.bits,
+-                               type.width, base_ptr, offset,
+-                               FALSE);
++                               type.width, TRUE,
++                               base_ptr, offset, FALSE);
+       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
+       }
+@@ -438,15 +439,15 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
+          unsigned mask = (1 << 8) - 1;
+          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
+          offset = LLVMBuildAdd(builder, offset, s_offset, "");
+-         packed = lp_build_gather(gallivm, type.length,
+-                                  32, type.width, base_ptr, offset, FALSE);
++         packed = lp_build_gather(gallivm, type.length, 32, type.width,
++                                  TRUE, base_ptr, offset, FALSE);
+          packed = LLVMBuildAnd(builder, packed,
+                                lp_build_const_int_vec(gallivm, type, mask), "");
+       }
+       else {
+          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
+-         packed = lp_build_gather(gallivm, type.length,
+-                                  32, type.width, base_ptr, offset, TRUE);
++         packed = lp_build_gather(gallivm, type.length, 32, type.width,
++                                  TRUE, base_ptr, offset, TRUE);
+          packed = LLVMBuildBitCast(builder, packed,
+                                    lp_build_vec_type(gallivm, type), "");
+       }
+@@ -472,7 +473,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
+       tmp_type.norm = TRUE;
+ 
+       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
+-                                    base_ptr, offset, i, j);
++                                    TRUE, base_ptr, offset, i, j);
+ 
+       lp_build_rgba8_to_fi32_soa(gallivm,
+                                 type,
+@@ -522,7 +523,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
+ 
+          /* Get a single float[4]={R,G,B,A} pixel */
+          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
+-                                       base_ptr, offset_elem,
++                                       TRUE, base_ptr, offset_elem,
+                                        i_elem, j_elem);
+ 
+          /*
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+index 873f354..4f5a45c 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+@@ -497,7 +497,7 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
+    assert(format_desc->block.width == 2);
+    assert(format_desc->block.height == 1);
+ 
+-   packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset, FALSE);
++   packed = lp_build_gather(gallivm, n, 32, 32, TRUE, base_ptr, offset, FALSE);
+ 
+    (void)j;
+ 
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+index 9155d81..d026020 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+@@ -76,6 +76,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
+                      unsigned length,
+                      unsigned src_width,
+                      unsigned dst_width,
++                     boolean aligned,
+                      LLVMValueRef base_ptr,
+                      LLVMValueRef offsets,
+                      unsigned i,
+@@ -93,6 +94,27 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
+    ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
+    res = LLVMBuildLoad(gallivm->builder, ptr, "");
+ 
++   /* XXX
++    * On some archs we probably really want to avoid having to deal
++    * with alignments lower than 4 bytes (if fetch size is a power of
++    * two >= 32). On x86 it doesn't matter, however.
++    * We should be able to guarantee full alignment for any kind of texture
++    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
++    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
++    * but I don't think that's quite what we wanted).
++    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
++    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
++    * enforcing what we want (which is what d3d10 does, the offset needs to
++    * be aligned to element size, but GL has bytes regardless of element
++    * size which would only leave us with minimum alignment restriction of 16
++    * which doesn't make much sense if the type isn't 4x32bit). Due to
++    * translation of offsets to first_elem in sampler_views it actually seems
++    * gallium could not do anything else except 16 no matter what...
++    */
++  if (!aligned) {
++      lp_set_load_alignment(res, 1);
++   }
++
+    assert(src_width <= dst_width);
+    if (src_width > dst_width) {
+       res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
+@@ -126,6 +148,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
+  * @param length length of the offsets
+  * @param src_width src element width in bits
+  * @param dst_width result element width in bits (src will be expanded to fit)
++ * @param aligned whether the data is guaranteed to be aligned (to src_width)
+  * @param base_ptr base pointer, should be a i8 pointer type.
+  * @param offsets vector with offsets
+  * @param vector_justify select vector rather than integer justification
+@@ -135,6 +158,7 @@ lp_build_gather(struct gallivm_state *gallivm,
+                 unsigned length,
+                 unsigned src_width,
+                 unsigned dst_width,
++                boolean aligned,
+                 LLVMValueRef base_ptr,
+                 LLVMValueRef offsets,
+                 boolean vector_justify)
+@@ -144,7 +168,7 @@ lp_build_gather(struct gallivm_state *gallivm,
+    if (length == 1) {
+       /* Scalar */
+       return lp_build_gather_elem(gallivm, length,
+-                                  src_width, dst_width,
++                                  src_width, dst_width, aligned,
+                                   base_ptr, offsets, 0, vector_justify);
+    } else {
+       /* Vector */
+@@ -158,7 +182,7 @@ lp_build_gather(struct gallivm_state *gallivm,
+          LLVMValueRef index = lp_build_const_int32(gallivm, i);
+          LLVMValueRef elem;
+          elem = lp_build_gather_elem(gallivm, length,
+-                                     src_width, dst_width,
++                                     src_width, dst_width, aligned,
+                                      base_ptr, offsets, i, vector_justify);
+          res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
+       }
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
+index ee69473..3ede476 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.h
++++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
+@@ -45,6 +45,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
+                      unsigned length,
+                      unsigned src_width,
+                      unsigned dst_width,
++                     boolean aligned,
+                      LLVMValueRef base_ptr,
+                      LLVMValueRef offsets,
+                      unsigned i,
+@@ -55,6 +56,7 @@ lp_build_gather(struct gallivm_state *gallivm,
+                 unsigned length,
+                 unsigned src_width,
+                 unsigned dst_width,
++                boolean aligned,
+                 LLVMValueRef base_ptr,
+                 LLVMValueRef offsets,
+                 boolean vector_justify);
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+index fe3c754..5210acc 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
++++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+@@ -500,8 +500,12 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
+        MM = new ShaderMemoryManager(JMM);
+        *OutCode = MM->getGeneratedCode();
+ 
++#if HAVE_LLVM >= 0x0306
++       builder.setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager>(MM));
++#else
+        builder.setMCJITMemoryManager(MM);
+ #endif
++#endif
+    } else {
+ #if HAVE_LLVM < 0x0306
+        BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+index 394521d..d7fde81 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+@@ -581,6 +581,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
+                               bld->texel_type.length,
+                               bld->format_desc->block.bits,
+                               bld->texel_type.width,
++                              TRUE,
+                               data_ptr, offset, TRUE);
+ 
+       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+@@ -589,6 +590,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
+       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
+                                       bld->format_desc,
+                                       u8n.type,
++                                      TRUE,
+                                       data_ptr, offset,
+                                       x_subcoord,
+                                       y_subcoord);
+@@ -919,6 +921,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
+                                        bld->texel_type.length,
+                                        bld->format_desc->block.bits,
+                                        bld->texel_type.width,
++                                       TRUE,
+                                        data_ptr, offset[k][j][i], TRUE);
+ 
+                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+@@ -927,6 +930,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
+                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
+                                                bld->format_desc,
+                                                u8n.type,
++                                               TRUE,
+                                                data_ptr, offset[k][j][i],
+                                                x_subcoord[i],
+                                                y_subcoord[j]);
+diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
+index 48bf06e..d9abd1a 100644
+--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
++++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
+@@ -112,7 +112,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
+    block = LLVMAppendBasicBlockInContext(context, func, "entry");
+    LLVMPositionBuilderAtEnd(builder, block);
+ 
+-   rgba = lp_build_fetch_rgba_aos(gallivm, desc, type,
++   rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE,
+                                   packed_ptr, offset, i, j);
+ 
+    LLVMBuildStore(builder, rgba, rgba_ptr);
+@@ -252,6 +252,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
+          }
+ 
+          /* To ensure it's 16-byte aligned */
++         /* Could skip this and use unaligned lp_build_fetch_rgba_aos */
+          memcpy(packed, test->packed, sizeof packed);
+ 
+          for (i = 0; i < desc->block.height; ++i) {
+diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
+index e8cae00..bca934e 100644
+--- a/src/gallium/drivers/r600/r600_llvm.c
++++ b/src/gallium/drivers/r600/r600_llvm.c
+@@ -881,7 +881,7 @@ unsigned r600_llvm_compile(
+ 	const char * gpu_family = r600_get_llvm_processor_name(family);
+ 
+ 	memset(&binary, 0, sizeof(struct radeon_shader_binary));
+-	r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
++	r = radeon_llvm_compile(mod, &binary, gpu_family, dump, NULL);
+ 
+ 	r = r600_create_shader(bc, &binary, use_kill);
+ 
+diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
+index f9393e6..b349123 100644
+--- a/src/gallium/drivers/radeon/r600_pipe_common.c
++++ b/src/gallium/drivers/radeon/r600_pipe_common.c
+@@ -36,6 +36,10 @@
+ #include "radeon/radeon_video.h"
+ #include <inttypes.h>
+ 
++#ifndef HAVE_LLVM
++#define HAVE_LLVM 0
++#endif
++
+ /*
+  * pipe_context
+  */
+@@ -501,6 +505,12 @@ static int r600_get_compute_param(struct pipe_screen *screen,
+ 	switch (param) {
+ 	case PIPE_COMPUTE_CAP_IR_TARGET: {
+ 		const char *gpu;
++		const char *triple;
++		if (rscreen->family <= CHIP_ARUBA || HAVE_LLVM < 0x0306) {
++			triple = "r600--";
++		} else {
++			triple = "amdgcn--";
++		}
+ 		switch(rscreen->family) {
+ 		/* Clang < 3.6 is missing Hainan in its list of
+ 		 * GPUs, so we need to use the name of a similar GPU.
+@@ -515,9 +525,10 @@ static int r600_get_compute_param(struct pipe_screen *screen,
+ 			break;
+ 		}
+ 		if (ret) {
+-			sprintf(ret, "%s-r600--", gpu);
++			sprintf(ret, "%s-%s", gpu, triple);
++
+ 		}
+-		return (8 + strlen(gpu)) * sizeof(char);
++		return (strlen(triple) + strlen(gpu)) * sizeof(char);
+ 	}
+ 	case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ 		if (ret) {
+diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
+index dc871d7..b98afb2 100644
+--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
++++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
+@@ -98,19 +98,19 @@ static void init_r600_target()
+ 	}
+ }
+ 
+-static LLVMTargetRef get_r600_target()
++LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
+ {
+ 	LLVMTargetRef target = NULL;
++	char *err_message = NULL;
+ 
+-	for (target = LLVMGetFirstTarget(); target;
+-					target = LLVMGetNextTarget(target)) {
+-		if (!strncmp(LLVMGetTargetName(target), "r600", 4)) {
+-			break;
+-		}
+-	}
++	init_r600_target();
+ 
+-	if (!target) {
+-		fprintf(stderr, "Can't find target r600\n");
++	if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
++		fprintf(stderr, "Cannot find target for triple %s ", triple);
++		if (err_message) {
++			fprintf(stderr, "%s\n", err_message);
++		}
++		LLVMDisposeMessage(err_message);
+ 		return NULL;
+ 	}
+ 	return target;
+@@ -138,14 +138,13 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
+  * @returns 0 for success, 1 for failure
+  */
+ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
+-					  const char *gpu_family, unsigned dump)
++			  const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm)
+ {
+ 
+-	LLVMTargetRef target;
+-	LLVMTargetMachineRef tm;
+ 	char cpu[CPU_STRING_LEN];
+ 	char fs[FS_STRING_LEN];
+ 	char *err;
++	bool dispose_tm = false;
+ 	LLVMContextRef llvm_ctx;
+ 	unsigned rval = 0;
+ 	LLVMMemoryBufferRef out_buffer;
+@@ -154,22 +153,23 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
+ 	char triple[TRIPLE_STRING_LEN];
+ 	LLVMBool mem_err;
+ 
+-	/* initialise */
+-	init_r600_target();
+-
+-	target = get_r600_target();
+-	if (!target) {
+-		return 1;
+-	}
+-
+-	strncpy(cpu, gpu_family, CPU_STRING_LEN);
+-	memset(fs, 0, sizeof(fs));
+-	if (dump) {
+-		LLVMDumpModule(M);
+-		strncpy(fs, "+DumpCode", FS_STRING_LEN);
++	if (!tm) {
++		strncpy(triple, "r600--", TRIPLE_STRING_LEN);
++		LLVMTargetRef target = radeon_llvm_get_r600_target(triple);
++		if (!target) {
++			return 1;
++		}
++		strncpy(cpu, gpu_family, CPU_STRING_LEN);
++		memset(fs, 0, sizeof(fs));
++		if (dump) {
++			LLVMDumpModule(M);
++			strncpy(fs, "+DumpCode", FS_STRING_LEN);
++		}
++		tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
++				  LLVMCodeGenLevelDefault, LLVMRelocDefault,
++						  LLVMCodeModelDefault);
++		dispose_tm = true;
+ 	}
+-	strncpy(triple, "r600--", TRIPLE_STRING_LEN);
+-
+ 	/* Setup Diagnostic Handler*/
+ 	llvm_ctx = LLVMGetModuleContext(M);
+ 
+@@ -179,9 +179,6 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
+ 	rval = 0;
+ 
+ 	/* Compile IR*/
+-	tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
+-				  LLVMCodeGenLevelDefault, LLVMRelocDefault,
+-						  LLVMCodeModelDefault);
+ 	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
+ 								 &out_buffer);
+ 
+@@ -205,6 +202,9 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
+ 
+ 	/* Clean up */
+ 	LLVMDisposeMemoryBuffer(out_buffer);
+-	LLVMDisposeTargetMachine(tm);
++
++	if (dispose_tm) {
++		LLVMDisposeTargetMachine(tm);
++	}
+ 	return rval;
+ }
+diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h
+index 780ff5f..3ccef78 100644
+--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
++++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
+@@ -28,15 +28,19 @@
+ #define RADEON_LLVM_EMIT_H
+ 
+ #include <llvm-c/Core.h>
++#include <llvm-c/TargetMachine.h>
+ 
+ struct radeon_shader_binary;
+ 
+ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
+ 
++LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
++
+ unsigned  radeon_llvm_compile(
+ 	LLVMModuleRef M,
+ 	struct radeon_shader_binary *binary,
+ 	const char * gpu_family,
+-	unsigned dump);
++	unsigned dump,
++	LLVMTargetMachineRef tm);
+ 
+ #endif /* RADEON_LLVM_EMIT_H */
+diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
+index 53c83ba..f99bf76 100644
+--- a/src/gallium/drivers/radeonsi/si_pipe.c
++++ b/src/gallium/drivers/radeonsi/si_pipe.c
+@@ -25,10 +25,14 @@
+ #include "si_public.h"
+ #include "sid.h"
+ 
++#include "radeon/radeon_llvm_emit.h"
+ #include "radeon/radeon_uvd.h"
+ #include "util/u_memory.h"
+ #include "vl/vl_decoder.h"
+ 
++#include <llvm-c/Target.h>
++#include <llvm-c/TargetMachine.h>
++
+ /*
+  * pipe_context
+  */
+@@ -417,6 +421,12 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
+ 	if (!sscreen->b.ws->unref(sscreen->b.ws))
+ 		return;
+ 
++#if HAVE_LLVM >= 0x0306
++	// r600_destroy_common_screen() frees sscreen, so we need to make
++	// sure to dispose the TargetMachine before we call it.
++	LLVMDisposeTargetMachine(sscreen->tm);
++#endif
++
+ 	r600_destroy_common_screen(&sscreen->b);
+ }
+ 
+@@ -474,6 +484,12 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen)
+ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
+ {
+ 	struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
++	LLVMTargetRef r600_target;
++#if HAVE_LLVM >= 0x0306
++	const char *triple = "amdgcn--";
++#else
++	const char *triple = "r600--";
++#endif
+ 	if (sscreen == NULL) {
+ 		return NULL;
+ 	}
+@@ -501,5 +517,13 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
+ 	/* Create the auxiliary context. This must be done last. */
+ 	sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL);
+ 
++#if HAVE_LLVM >= 0x0306
++	/* Initialize LLVM TargetMachine */
++	r600_target = radeon_llvm_get_r600_target(triple);
++	sscreen->tm = LLVMCreateTargetMachine(r600_target, triple,
++				r600_get_llvm_processor_name(sscreen->b.family),
++				"+DumpCode", LLVMCodeGenLevelDefault, LLVMRelocDefault,
++				LLVMCodeModelDefault);
++#endif
+ 	return &sscreen->b.b;
+ }
+diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
+index 5f5404d..597949d 100644
+--- a/src/gallium/drivers/radeonsi/si_pipe.h
++++ b/src/gallium/drivers/radeonsi/si_pipe.h
+@@ -28,6 +28,8 @@
+ 
+ #include "si_state.h"
+ 
++#include <llvm-c/TargetMachine.h>
++
+ #ifdef PIPE_ARCH_BIG_ENDIAN
+ #define SI_BIG_ENDIAN 1
+ #else
+@@ -43,6 +45,7 @@ struct si_compute;
+ 
+ struct si_screen {
+ 	struct r600_common_screen	b;
++	LLVMTargetMachineRef		tm;
+ };
+ 
+ struct si_sampler_view {
+diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
+index 541e733..afc6aad 100644
+--- a/src/gallium/drivers/radeonsi/si_shader.c
++++ b/src/gallium/drivers/radeonsi/si_shader.c
+@@ -2602,7 +2602,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
+ 			shader->selector ? shader->selector->tokens : NULL);
+ 	memset(&binary, 0, sizeof(binary));
+ 	r = radeon_llvm_compile(mod, &binary,
+-		r600_get_llvm_processor_name(sscreen->b.family), dump);
++		r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm);
+ 
+ 	if (r) {
+ 		return r;
+@@ -2740,6 +2740,13 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
+ 	bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
+ 	bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
+ 
++	if (HAVE_LLVM >= 0x0306) {
++		bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
++		bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
++		bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
++		bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
++	}
++
+ 	si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
+ 	si_shader_ctx.tokens = sel->tokens;
+ 	tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
+diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
+index e953822..f2b6f59 100644
+--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
++++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
+@@ -281,7 +281,11 @@ namespace {
+       }
+ 
+       for (unsigned i = 0; i < kernel_node->getNumOperands(); ++i) {
++#if HAVE_LLVM >= 0x0306
++         kernels.push_back(llvm::mdconst::dyn_extract<llvm::Function>(
++#else
+          kernels.push_back(llvm::dyn_cast<llvm::Function>(
++#endif
+                                     kernel_node->getOperand(i)->getOperand(0)));
+       }
+    }
diff --git a/debian/patches/series b/debian/patches/series
index af4c631..494cdfb 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -3,3 +3,4 @@
 # Ubuntu patches.
 egl-platform-mir.patch
 i915-dont-default-to-2.1.patch
+llvm-3.6-fixes.patch
diff --git a/debian/rules b/debian/rules
index 6d89ebe..9c50a17 100755
--- a/debian/rules
+++ b/debian/rules
@@ -109,7 +109,7 @@ else
   ifneq (,$(filter $(DEB_HOST_ARCH),amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf))
 	GALLIUM_DRIVERS += radeonsi
 	confflags_GALLIUM += --enable-gallium-llvm
-	confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.5
+	confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.6
   endif
 
 	confflags_DIRECT_RENDERING = --enable-driglx-direct


Reply to: