[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'ubuntu+1'



 debian/changelog                            |    1 
 debian/patches/fix-altivec-intrinsics.patch |  111 ++++++++++++++++++++++++++++
 debian/patches/series                       |    1 
 3 files changed, 113 insertions(+)

New commits:
commit 3d2d673b8719363a982a30185d1083c97000f6f9
Author: Maarten Lankhorst <dev@mblankhorst.nl>
Date:   Thu Sep 18 08:32:58 2014 +0200

    Import upstream fix for altivec little endian instructions.

diff --git a/debian/changelog b/debian/changelog
index 09fb41a..676e8a3 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
 mesa (10.3.0~rc3-2ubuntu1) UNRELEASED; urgency=low
 
   * Merge from released debian-experimental.
+  * Import upstream fix for altivec little endian instructions.
 
  -- Maarten Lankhorst <maarten.lankhorst@ubuntu.com>  Tue, 12 Aug 2014 09:38:49 +0200
 
diff --git a/debian/patches/fix-altivec-intrinsics.patch b/debian/patches/fix-altivec-intrinsics.patch
new file mode 100644
index 0000000..c67784a
--- /dev/null
+++ b/debian/patches/fix-altivec-intrinsics.patch
@@ -0,0 +1,111 @@
+commit 0feb977bbfb0d6bb2c8d3178246acb035a739f37
+Author: Ulrich Weigand <uweigand@de.ibm.com>
+Date:   Mon Aug 4 18:41:00 2014 +0200
+
+    gallivm: Fix Altivec pack intrinsics for little-endian
+    
+    This patch fixes use of Altivec pack intrinsics on little-endian PowerPC
+    systems.  Since little-endian operation only affects the load and store
+    instructions, the semantics of pack (and other) instructions that take
+    two input vectors implicitly change: the pack instructions still fill
+    a register placing values from the first operand into the "high" parts
+    of the register, and values from the second operand into the "low" parts
+    of the register, but since vector loads and stores perform an endian swap,
+    the high parts end up at high memory addresses.
+    
+    To still achieve the desired effect, we have to swap the two inputs to
+    the pack instruction on little-endian systems.  This is done automatically
+    by the back-end for instructions generated by LLVM, but needs to be done
+    manually when emitting intrisincs (which still result in that instruction
+    being emitted directly).
+    
+    Signed-off-by: Ulrich Weigand <ulrich.weigand@de.ibm.com>
+    Signed-off-by: Maarten Lankhorst <dev@mblankhorst.nl>
+
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+index a48a922..cdf6d80 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+@@ -464,6 +464,7 @@ lp_build_pack2(struct gallivm_state *gallivm,
+    if((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) &&
+        src_type.width * src_type.length >= 128) {
+       const char *intrinsic = NULL;
++      boolean swap_intrinsic_operands = FALSE;
+ 
+       switch(src_type.width) {
+       case 32:
+@@ -482,6 +483,9 @@ lp_build_pack2(struct gallivm_state *gallivm,
+            } else {
+               intrinsic = "llvm.ppc.altivec.vpkuwus";
+            }
++#ifdef PIPE_ARCH_LITTLE_ENDIAN
++           swap_intrinsic_operands = TRUE;
++#endif
+          }
+          break;
+       case 16:
+@@ -490,12 +494,18 @@ lp_build_pack2(struct gallivm_state *gallivm,
+               intrinsic = "llvm.x86.sse2.packsswb.128";
+             } else if (util_cpu_caps.has_altivec) {
+               intrinsic = "llvm.ppc.altivec.vpkshss";
++#ifdef PIPE_ARCH_LITTLE_ENDIAN
++              swap_intrinsic_operands = TRUE;
++#endif
+             }
+          } else {
+             if (util_cpu_caps.has_sse2) {
+               intrinsic = "llvm.x86.sse2.packuswb.128";
+             } else if (util_cpu_caps.has_altivec) {
+ 	      intrinsic = "llvm.ppc.altivec.vpkshus";
++#ifdef PIPE_ARCH_LITTLE_ENDIAN
++              swap_intrinsic_operands = TRUE;
++#endif
+             }
+          }
+          break;
+@@ -504,7 +514,11 @@ lp_build_pack2(struct gallivm_state *gallivm,
+       if (intrinsic) {
+          if (src_type.width * src_type.length == 128) {
+             LLVMTypeRef intr_vec_type = lp_build_vec_type(gallivm, intr_type);
+-            res = lp_build_intrinsic_binary(builder, intrinsic, intr_vec_type, lo, hi);
++            if (swap_intrinsic_operands) {
++               res = lp_build_intrinsic_binary(builder, intrinsic, intr_vec_type, hi, lo);
++            } else {
++               res = lp_build_intrinsic_binary(builder, intrinsic, intr_vec_type, lo, hi);
++            }
+             if (dst_vec_type != intr_vec_type) {
+                res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+             }
+@@ -513,6 +527,8 @@ lp_build_pack2(struct gallivm_state *gallivm,
+             int num_split = src_type.width * src_type.length / 128;
+             int i;
+             int nlen = 128 / src_type.width;
++            int lo_off = swap_intrinsic_operands ? nlen : 0;
++            int hi_off = swap_intrinsic_operands ? 0 : nlen;
+             struct lp_type ndst_type = lp_type_unorm(dst_type.width, 128);
+             struct lp_type nintr_type = lp_type_unorm(intr_type.width, 128);
+             LLVMValueRef tmpres[LP_MAX_VECTOR_WIDTH / 128];
+@@ -524,9 +540,9 @@ lp_build_pack2(struct gallivm_state *gallivm,
+ 
+             for (i = 0; i < num_split / 2; i++) {
+                tmplo = lp_build_extract_range(gallivm,
+-                                              lo, i*nlen*2, nlen);
++                                              lo, i*nlen*2 + lo_off, nlen);
+                tmphi = lp_build_extract_range(gallivm,
+-                                              lo, i*nlen*2 + nlen, nlen);
++                                              lo, i*nlen*2 + hi_off, nlen);
+                tmpres[i] = lp_build_intrinsic_binary(builder, intrinsic,
+                                                      nintr_vec_type, tmplo, tmphi);
+                if (ndst_vec_type != nintr_vec_type) {
+@@ -535,9 +551,9 @@ lp_build_pack2(struct gallivm_state *gallivm,
+             }
+             for (i = 0; i < num_split / 2; i++) {
+                tmplo = lp_build_extract_range(gallivm,
+-                                              hi, i*nlen*2, nlen);
++                                              hi, i*nlen*2 + lo_off, nlen);
+                tmphi = lp_build_extract_range(gallivm,
+-                                              hi, i*nlen*2 + nlen, nlen);
++                                              hi, i*nlen*2 + hi_off, nlen);
+                tmpres[i+num_split/2] = lp_build_intrinsic_binary(builder, intrinsic,
+                                                                  nintr_vec_type,
+                                                                  tmplo, tmphi);
diff --git a/debian/patches/series b/debian/patches/series
index 8a081b6..236f10d 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -4,3 +4,4 @@
 # Ubuntu patches.
 egl-platform-mir.patch
 i915-dont-default-to-2.1.patch
+fix-altivec-intrinsics.patch


Reply to: