mesa: Changes to 'debian-experimental'
VERSION | 2
configure.ac | 4
debian/changelog | 7
debian/control | 4
debian/rules | 2
docs/relnotes/11.2.0.html | 3
docs/relnotes/11.2.1.html | 118 ++++++++
include/D3D9/d3d9types.h | 16 -
src/compiler/glsl/glsl_lexer.ll | 2
src/compiler/glsl/lower_variable_index_to_cond_assign.cpp | 20 +
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2
src/gallium/drivers/radeonsi/si_state.c | 2
src/gallium/state_trackers/va/context.c | 4
src/gallium/state_trackers/va/image.c | 4
src/mesa/drivers/dri/common/drirc | 8
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14
src/mesa/drivers/dri/i965/gen7_blorp.cpp | 5
src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 198 ++++++--------
src/mesa/state_tracker/st_cb_drawpixels.c | 23 +
src/mesa/state_tracker/st_cb_fbo.c | 2
21 files changed, 319 insertions(+), 131 deletions(-)
New commits:
commit abf028947440577413c49feffbb15192701d991e
Author: Timo Aaltonen <tjaalton@debian.org>
Date: Mon Apr 18 11:25:26 2016 +0300
release to experimental
diff --git a/debian/changelog b/debian/changelog
index 0328b2c..9c11bba 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,9 +1,9 @@
-mesa (11.2.1-1) UNRELEASED; urgency=medium
+mesa (11.2.1-1) experimental; urgency=medium
* New upstream release.
* control, rules: Use llvm/clang 3.8.
- -- Timo Aaltonen <tjaalton@debian.org> Mon, 18 Apr 2016 11:20:48 +0300
+ -- Timo Aaltonen <tjaalton@debian.org> Mon, 18 Apr 2016 11:25:00 +0300
mesa (11.2.0-1) experimental; urgency=medium
commit 436b3472adde14b22e9ce204820dab417cfe00c6
Author: Timo Aaltonen <tjaalton@debian.org>
Date: Mon Apr 18 11:24:58 2016 +0300
control, rules: Use llvm/clang 3.8.
diff --git a/debian/changelog b/debian/changelog
index cf98bea..0328b2c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
mesa (11.2.1-1) UNRELEASED; urgency=medium
* New upstream release.
+ * control, rules: Use llvm/clang 3.8.
-- Timo Aaltonen <tjaalton@debian.org> Mon, 18 Apr 2016 11:20:48 +0300
diff --git a/debian/control b/debian/control
index e495555..19c802f 100644
--- a/debian/control
+++ b/debian/control
@@ -41,10 +41,10 @@ Build-Depends:
libudev-dev [linux-any],
flex,
bison,
- llvm-3.7-dev (>= 1:3.7~+rc2) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf ppc64el],
+ llvm-3.8-dev (>= 1:3.8) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf ppc64el],
libelf-dev [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf ppc64el],
libwayland-dev (>= 1.2.0) [linux-any],
- libclang-3.7-dev (>= 1:3.7~+rc2) [amd64 i386 armhf],
+ libclang-3.8-dev (>= 1:3.8) [amd64 i386 armhf],
libclc-dev (>= 0.2.0+git20150813) [amd64 i386 armhf],
Vcs-Git: https://anonscm.debian.org/git/pkg-xorg/lib/mesa.git
Vcs-Browser: https://anonscm.debian.org/cgit/pkg-xorg/lib/mesa.git
diff --git a/debian/rules b/debian/rules
index cf40811..ff216dd 100755
--- a/debian/rules
+++ b/debian/rules
@@ -89,7 +89,7 @@ else
ifneq (,$(filter $(DEB_HOST_ARCH),amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf ppc64el))
GALLIUM_DRIVERS += radeonsi
confflags_GALLIUM += --enable-gallium-llvm
- confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.7
+ confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.8
GALLIUM_DRIVERS += swrast
else
DRI_DRIVERS += swrast
commit 6faab1eb3ae95da1b71883be526ba520d2ad36f9
Author: Timo Aaltonen <tjaalton@debian.org>
Date: Mon Apr 18 11:21:58 2016 +0300
bump changelog
diff --git a/debian/changelog b/debian/changelog
index 76bb0a3..cf98bea 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+mesa (11.2.1-1) UNRELEASED; urgency=medium
+
+ * New upstream release.
+
+ -- Timo Aaltonen <tjaalton@debian.org> Mon, 18 Apr 2016 11:20:48 +0300
+
mesa (11.2.0-1) experimental; urgency=medium
[ Andreas Boll ]
commit 21e6440e82808364a6c2cc38ea92651c99b69aad
Author: Emil Velikov <emil.velikov@collabora.com>
Date: Sun Apr 17 16:03:34 2016 +0100
docs: add release notes for 11.2.1
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
diff --git a/docs/relnotes/11.2.1.html b/docs/relnotes/11.2.1.html
new file mode 100644
index 0000000..3100ebb
--- /dev/null
+++ b/docs/relnotes/11.2.1.html
@@ -0,0 +1,118 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.2.1 Release Notes / April 17, 2016</h1>
+
+<p>
+Mesa 11.2.1 is a bug fix release which fixes bugs found since the 11.2.0 release.
+</p>
+<p>
+Mesa 11.2.1 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1. OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93962">Bug 93962</a> - [HSW, regression, bisected, CTS] ES2-CTS.gtf.GL2FixedTests.scissor.scissor - segfault/asserts</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (2):</p>
+<ul>
+ <li>st/mesa: fix glReadBuffer() assertion failure</li>
+ <li>st/mesa: fix memleak in glDrawPixels cache code</li>
+</ul>
+
+<p>Christian Schmidbauer (1):</p>
+<ul>
+ <li>st/nine: specify WINAPI only for i386 and amd64</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+ <li>docs: add sha256 checksums for 11.2.0</li>
+ <li>configure.ac: update the path of the generated files</li>
+ <li>Update version to 11.2.1</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+ <li>glsl: allow usage of the keyword buffer before GLSL 430 / ESSL 310</li>
+</ul>
+
+<p>Iurie Salomov (1):</p>
+<ul>
+ <li>va: check null context in vlVaDestroyContext</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+ <li>i965/tiled_memcopy: Add aligned mem_copy parameters to the [de]tiling functions</li>
+ <li>i965/tiled_memcpy: Rework the RGBA -> BGRA mem_copy functions</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+ <li>i965: Fix textureSize() depth value for 1 layer surfaces on Gen4-6.</li>
+ <li>i965: Use brw->urb.min_vs_urb_entries instead of 32 for BLORP.</li>
+ <li>glsl: Lower variable indexing of system value arrays unconditionally.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+ <li>drirc: add a workaround for blackness in Warsow</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+ <li>radeonsi: fix bounds check in si_create_vertex_elements</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+ <li>nv50/ir: do not try to attach JOIN ops to ATOM</li>
+</ul>
+
+<p>Thomas Hindoe Paaboel Andersen (1):</p>
+<ul>
+ <li>st/va: avoid dereference after free in vlVaDestroyImage</li>
+</ul>
+
+
+</div>
+</body>
+</html>
commit 696614cbff849dc3644a35307d3f82ea1ec66266
Author: Emil Velikov <emil.velikov@collabora.com>
Date: Sun Apr 17 14:51:55 2016 +0100
Update version to 11.2.1
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
diff --git a/VERSION b/VERSION
index b85c6c7..dc170a1 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-11.2.0
+11.2.1
commit 106c1facae8f8e2e27d670397d1276958f0143c4
Author: Iurie Salomov <iurcic@gmail.com>
Date: Tue Apr 12 23:24:30 2016 +0100
va: check null context in vlVaDestroyContext
Signed-off-by: Iurie Salomov <iurcic@gmail.com>
Reviewed-by: Julien Isorce <j.isorce@samsung.com>
(cherry picked from commit 047e3264f67bc54365be7b0e163b6910a9e9de3a)
Nominated-by: Emil Velikov <emil.velikov@collabora.com>
diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index b25c381..25d587a 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -283,6 +283,10 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID context_id)
drv = VL_VA_DRIVER(ctx);
pipe_mutex_lock(drv->mutex);
context = handle_table_get(drv->htab, context_id);
+ if (!context) {
+ pipe_mutex_unlock(drv->mutex);
+ return VA_STATUS_ERROR_INVALID_CONTEXT;
+ }
if (context->decoder) {
if (u_reduce_video_profile(context->decoder->profile) ==
commit fbdd845ed213871c221f1076ea83621ebe779f4e
Author: Nicolai Hähnle <nicolai.haehnle@amd.com>
Date: Tue Apr 12 12:23:31 2016 -0500
radeonsi: fix bounds check in si_create_vertex_elements
This was triggered by
dEQP-GLES3.functional.vertex_array_objects.all_attributes
Cc: "11.1 11.2" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
(cherry picked from commit a191e6b719848a17963f185954f1696fa5a2bcb1)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 663ebb3..58573ac 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3284,7 +3284,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
int i;
- assert(count < SI_MAX_ATTRIBS);
+ assert(count <= SI_MAX_ATTRIBS);
if (!v)
return NULL;
commit 389d5bbc98f04740bb64ec2e9c94dcab7adcbd6b
Author: Brian Paul <brianp@vmware.com>
Date: Mon Apr 11 18:54:28 2016 -0600
st/mesa: fix memleak in glDrawPixels cache code
If the glDrawPixels size changed, we leaked the previously cached
texture, if there was one. This patch fixes the reference counting,
adds a refcount assertion check, and better handles potential malloc()
failures.
Tested with a modified version of the drawpix Mesa demo which changed
the image size for each glDrawPixels call.
Cc: "11.2" <mesa-stable@lists.freedesktop.org>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
(cherry picked from commit 6c014782138634d5d36e1484bf498cef2b2d888f)
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 51d4ae5..4f39ed7 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -374,7 +374,7 @@ make_texture(struct st_context *st,
struct gl_context *ctx = st->ctx;
struct pipe_context *pipe = st->pipe;
mesa_format mformat;
- struct pipe_resource *pt;
+ struct pipe_resource *pt = NULL;
enum pipe_format pipeFormat;
GLenum baseInternalFormat;
@@ -393,10 +393,18 @@ make_texture(struct st_context *st,
unpack->SkipRows == 0 &&
unpack->SwapBytes == GL_FALSE &&
st->drawpix_cache.image) {
+ assert(st->drawpix_cache.texture);
+
/* check if the pixel data is the same */
if (memcmp(pixels, st->drawpix_cache.image, width * height * bpp) == 0) {
/* OK, re-use the cached texture */
- return st->drawpix_cache.texture;
+ pipe_resource_reference(&pt, st->drawpix_cache.texture);
+ /* refcount of returned texture should be at least two here. One
+ * reference for the cache to hold on to, one for the caller (which
+ * it will release), and possibly more held by the driver.
+ */
+ assert(pt->reference.count >= 2);
+ return pt;
}
}
@@ -515,8 +523,14 @@ make_texture(struct st_context *st,
st->drawpix_cache.image = malloc(width * height * bpp);
if (st->drawpix_cache.image) {
memcpy(st->drawpix_cache.image, pixels, width * height * bpp);
+ pipe_resource_reference(&st->drawpix_cache.texture, pt);
+ }
+ else {
+ /* out of memory, free/disable cached texture */
+ st->drawpix_cache.width = 0;
+ st->drawpix_cache.height = 0;
+ pipe_resource_reference(&st->drawpix_cache.texture, NULL);
}
- st->drawpix_cache.texture = pt;
}
#endif
@@ -1150,9 +1164,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
if (num_sampler_view > 1)
pipe_sampler_view_reference(&sv[1], NULL);
-#if !USE_DRAWPIXELS_CACHE
+ /* free the texture (but may persist in the cache) */
pipe_resource_reference(&pt, NULL);
-#endif
}
commit 3840e67bf5a5c89fc644816e721d4527557ab3ef
Author: Emil Velikov <emil.velikov@collabora.com>
Date: Tue Apr 5 14:29:41 2016 +0100
configure.ac: update the path of the generated files
... in order to determine if we need bison/flex. Failing to locate the
files will lead to mandating bison/flex even when building from a
release tarball.
CC: "11.2" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
(cherry picked from commit c481c8f7f135d4cf17e35bb5126bdcf6b5611940)
diff --git a/configure.ac b/configure.ac
index d4ce99a..8109ac4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -110,10 +110,10 @@ LT_INIT([disable-static])
AC_CHECK_PROG(RM, rm, [rm -f])
AX_PROG_BISON([],
- AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
+ AS_IF([test ! -f "$srcdir/src/compiler/glsl/glcpp/glcpp-parse.c"],
[AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))
AX_PROG_FLEX([],
- AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-lex.c"],
+ AS_IF([test ! -f "$srcdir/src/compiler/glsl/glcpp/glcpp-lex.c"],
[AC_MSG_ERROR([flex not found - unable to compile glcpp-lex.l])]))
AC_CHECK_PROG(INDENT, indent, indent, cat)
commit 5b644f3a06da76cdd6d404c98696e6d78c39c0cc
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat Apr 9 13:11:42 2016 -0400
glsl: allow usage of the keyword buffer before GLSL 430 / ESSL 310
The GLSL 4.20 and ESSL 3.00 specs don't list 'buffer' as a reserved
keyword. Make the parser ignore it unless GLSL 4.30 / ESSL 3.10 are
used, or ARB_shader_storage_buffer_objects is enabled.
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Cc: mesa-stable@lists.freedesktop.org
(cherry picked from commit 9b5bd20eb2d09e1ec2319b55c83ad7f28b6fefee)
diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index 9704fc7..50e862a 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -304,7 +304,7 @@ in return IN_TOK;
out return OUT_TOK;
inout return INOUT_TOK;
uniform return UNIFORM;
-buffer return BUFFER;
+buffer KEYWORD_WITH_ALT(0, 0, 430, 310, yyextra->ARB_shader_storage_buffer_object_enable, BUFFER);
varying DEPRECATED_ES_KEYWORD(VARYING);
centroid KEYWORD(120, 300, 120, 300, CENTROID);
invariant KEYWORD(120, 100, 120, 100, INVARIANT);
commit bbbcad4f0d915c31f25b1beee19cea216ced4796
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu Apr 7 11:21:19 2016 -0700
i965/tiled_memcpy: Rework the RGBA -> BGRA mem_copy functions
This splits the two copy functions into three: One for unaligned copies,
one for aligned sources, and one for aligned destinations. Thanks to the
previous commit, we are now guaranteed that the aligned ones will *only*
operate on aligned memory so they should be safe.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93962
Cc: "11.1 11.2" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
(cherry picked from commit d2b32656e18607f5807b3f4d4dde02568370b9bf)
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 89de594..8082b9d 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -56,21 +56,32 @@ static const uint32_t ytile_width = 128;
static const uint32_t ytile_height = 32;
static const uint32_t ytile_span = 16;
+/**
+ * Copy RGBA to BGRA - swap R and B.
+ */
+static inline void *
+rgba8_copy(void *dst, const void *src, size_t bytes)
+{
+ uint8_t *d = dst;
+ uint8_t const *s = src;
+
+ assert(bytes % 4 == 0);
+
+ while (bytes >= 4) {
+ d[0] = s[2];
+ d[1] = s[1];
+ d[2] = s[0];
+ d[3] = s[3];
+ d += 4;
+ s += 4;
+ bytes -= 4;
+ }
+ return dst;
+}
+
#ifdef __SSSE3__
static const uint8_t rgba8_permutation[16] =
{ 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
-
-/* NOTE: dst must be 16-byte aligned. src may be unaligned. */
-#define rgba8_copy_16_aligned_dst(dst, src) \
- _mm_store_si128((__m128i *)(dst), \
- _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(src)), \
- *(__m128i *) rgba8_permutation))
-
-/* NOTE: src must be 16-byte aligned. dst may be unaligned. */
-#define rgba8_copy_16_aligned_src(dst, src) \
- _mm_storeu_si128((__m128i *)(dst), \
- _mm_shuffle_epi8(_mm_load_si128((__m128i *)(src)), \
- *(__m128i *) rgba8_permutation))
#endif
/**
@@ -82,32 +93,21 @@ rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
uint8_t *d = dst;
uint8_t const *s = src;
-#ifdef __SSSE3__
- if (bytes == 16) {
- assert(!(((uintptr_t)dst) & 0xf));
- rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
- return dst;
- }
+ assert(bytes == 0 || !(((uintptr_t)dst) & 0xf));
- if (bytes == 64) {
- assert(!(((uintptr_t)dst) & 0xf));
- rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
- rgba8_copy_16_aligned_dst(d+16, s+16);
- rgba8_copy_16_aligned_dst(d+32, s+32);
- rgba8_copy_16_aligned_dst(d+48, s+48);
- return dst;
+#ifdef __SSSE3__
+ while (bytes >= 16) {
+ _mm_store_si128((__m128i *)d,
+ _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)s),
+ *(__m128i *) rgba8_permutation));
+ s += 16;
+ d += 16;
+ bytes -= 16;
}
#endif
- while (bytes >= 4) {
- d[0] = s[2];
- d[1] = s[1];
- d[2] = s[0];
- d[3] = s[3];
- d += 4;
- s += 4;
- bytes -= 4;
- }
+ rgba8_copy(d, s, bytes);
+
return dst;
}
@@ -120,32 +120,21 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
uint8_t *d = dst;
uint8_t const *s = src;
-#ifdef __SSSE3__
- if (bytes == 16) {
- assert(!(((uintptr_t)src) & 0xf));
- rgba8_copy_16_aligned_src(d+ 0, s+ 0);
- return dst;
- }
+ assert(bytes == 0 || !(((uintptr_t)src) & 0xf));
- if (bytes == 64) {
- assert(!(((uintptr_t)src) & 0xf));
- rgba8_copy_16_aligned_src(d+ 0, s+ 0);
- rgba8_copy_16_aligned_src(d+16, s+16);
- rgba8_copy_16_aligned_src(d+32, s+32);
- rgba8_copy_16_aligned_src(d+48, s+48);
- return dst;
+#ifdef __SSSE3__
+ while (bytes >= 16) {
+ _mm_storeu_si128((__m128i *)d,
+ _mm_shuffle_epi8(_mm_load_si128((__m128i *)s),
+ *(__m128i *) rgba8_permutation));
+ s += 16;
+ d += 16;
+ bytes -= 16;
}
#endif
- while (bytes >= 4) {
- d[0] = s[2];
- d[1] = s[1];
- d[2] = s[0];
- d[3] = s[3];
- d += 4;
- s += 4;
- bytes -= 4;
- }
+ rgba8_copy(d, s, bytes);
+
return dst;
}
@@ -404,10 +393,10 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (mem_copy == memcpy)
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_dst)
+ else if (mem_copy == rgba8_copy)
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
+ rgba8_copy, rgba8_copy_aligned_dst);
else
unreachable("not reached");
} else {
@@ -415,10 +404,10 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_dst)
+ else if (mem_copy == rgba8_copy)
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
+ rgba8_copy, rgba8_copy_aligned_dst);
else
unreachable("not reached");
}
@@ -447,20 +436,20 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (mem_copy == memcpy)
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_dst)
+ else if (mem_copy == rgba8_copy)
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
+ rgba8_copy, rgba8_copy_aligned_dst);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_dst)
+ else if (mem_copy == rgba8_copy)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
+ rgba8_copy, rgba8_copy_aligned_dst);
else
unreachable("not reached");
}
@@ -489,20 +478,20 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (mem_copy == memcpy)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_src)
+ else if (mem_copy == rgba8_copy)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src, rgba8_copy_aligned_src);
+ rgba8_copy, rgba8_copy_aligned_src);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_src)
+ else if (mem_copy == rgba8_copy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src, rgba8_copy_aligned_src);
+ rgba8_copy, rgba8_copy_aligned_src);
else
unreachable("not reached");
}
@@ -531,20 +520,20 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (mem_copy == memcpy)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_src)
+ else if (mem_copy == rgba8_copy)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src, rgba8_copy_aligned_src);
+ rgba8_copy, rgba8_copy_aligned_src);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
- else if (mem_copy == rgba8_copy_aligned_src)
+ else if (mem_copy == rgba8_copy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src, rgba8_copy_aligned_src);
+ rgba8_copy, rgba8_copy_aligned_src);
else
unreachable("not reached");
}
@@ -773,8 +762,7 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
if (format == GL_BGRA) {
*mem_copy = memcpy;
} else if (format == GL_RGBA) {
- *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst
- : rgba8_copy_aligned_src;
+ *mem_copy = rgba8_copy;
}
} else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
(tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM)) {
@@ -783,8 +771,7 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
/* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
* use the same function.
*/
- *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst
- : rgba8_copy_aligned_src;
+ *mem_copy = rgba8_copy;
} else if (format == GL_RGBA) {
*mem_copy = memcpy;
}
commit b7bd6944c8ccfe2b6c0b1c68e16980fcac5cf7f0
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu Apr 7 10:52:28 2016 -0700
i965/tiled_memcopy: Add aligned mem_copy parameters to the [de]tiling functions
Each of the [de]tiling functions has three mem_copy calls:
1) Left edge to tile boundary
2) Tile boundary to tile boundary in a loop
3) Tile boundary to right edge
Copies 2 and 3 start at a tile edge so the pointer to tiled memory is
guaranteed to be at least 16-byte aligned. Copy 1, on the other hand,
starts at some arbitrary place in the tile so it doesn't have any such
alignment guarantees.
Cc: "11.1 11.2" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
(cherry picked from commit f6f54a29ca9bc8c1a1a994ff4e3ee09772de78e4)
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 2383401..89de594 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -172,6 +172,12 @@ typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
* Copy texture data from linear to X tile layout.
*
* \copydoc tile_copy_fn
+ *
+ * The mem_copy parameters allow the user to specify an alternative mem_copy
+ * function that, for instance, may do RGBA -> BGRA swizzling. The first
+ * function must handle any memory alignment while the second function must
+ * only handle 16-byte alignment in whichever side (source or destination) is
+ * tiled.
*/
static inline void
linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
@@ -179,7 +185,8 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
char *dst, const char *src,
int32_t src_pitch,
uint32_t swizzle_bit,
- mem_copy_fn mem_copy)
+ mem_copy_fn mem_copy,
+ mem_copy_fn mem_copy_align16)
{
/* The copy destination offset for each range copied is the sum of
* an X offset 'x0' or 'xo' and a Y offset 'yo.'
@@ -200,10 +207,10 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0);
for (xo = x1; xo < x2; xo += xtile_span) {
- mem_copy(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
+ mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
}
- mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+ mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
src += src_pitch;
}
@@ -220,7 +227,8 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
char *dst, const char *src,
int32_t src_pitch,
uint32_t swizzle_bit,
- mem_copy_fn mem_copy)
+ mem_copy_fn mem_copy,
+ mem_copy_fn mem_copy_align16)
{
/* Y tiles consist of columns that are 'ytile_span' wide (and the same height
* as the tile). Thus the destination offset for (x,y) is the sum of:
@@ -259,12 +267,12 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
* at each step so we don't need to calculate it explicitly.
*/
for (x = x1; x < x2; x += ytile_span) {
- mem_copy(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
+ mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
xo += bytes_per_column;
swizzle ^= swizzle_bit;
}
- mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+ mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
src += src_pitch;
}
@@ -281,7 +289,8 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
char *dst, const char *src,
int32_t dst_pitch,
uint32_t swizzle_bit,
- mem_copy_fn mem_copy)
+ mem_copy_fn mem_copy,
+ mem_copy_fn mem_copy_align16)
{
/* The copy destination offset for each range copied is the sum of
* an X offset 'x0' or 'xo' and a Y offset 'yo.'
@@ -302,10 +311,10 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0);
for (xo = x1; xo < x2; xo += xtile_span) {
- mem_copy(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
+ mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
}
- mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+ mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
dst += dst_pitch;
}
@@ -322,7 +331,8 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
char *dst, const char *src,
int32_t dst_pitch,
uint32_t swizzle_bit,
- mem_copy_fn mem_copy)
+ mem_copy_fn mem_copy,
+ mem_copy_fn mem_copy_align16)
{
/* Y tiles consist of columns that are 'ytile_span' wide (and the same height
* as the tile). Thus the destination offset for (x,y) is the sum of:
@@ -361,12 +371,12 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
* at each step so we don't need to calculate it explicitly.
*/
for (x = x1; x < x2; x += ytile_span) {
- mem_copy(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
+ mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
xo += bytes_per_column;
swizzle ^= swizzle_bit;
}
- mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+ mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
dst += dst_pitch;
}
@@ -393,26 +403,27 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
if (mem_copy == memcpy)
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
- dst, src, src_pitch, swizzle_bit, memcpy);
+ dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_dst)
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst);
+ rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, memcpy);
+ dst, src, src_pitch, swizzle_bit,
+ memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_dst)
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst);
+ rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
else
unreachable("not reached");
}
linear_to_xtiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, mem_copy);
+ dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
}
/**
@@ -435,26 +446,26 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
if (mem_copy == memcpy)
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
- dst, src, src_pitch, swizzle_bit, memcpy);
+ dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_dst)
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst);
+ rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, memcpy);
+ dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_dst)
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch, swizzle_bit,
- rgba8_copy_aligned_dst);
+ rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
else
unreachable("not reached");
}
linear_to_ytiled(x0, x1, x2, x3, y0, y1,
- dst, src, src_pitch, swizzle_bit, mem_copy);
+ dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
}
/**
@@ -477,26 +488,26 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
if (mem_copy == memcpy)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
- dst, src, dst_pitch, swizzle_bit, memcpy);
+ dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_src)
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src);
+ rgba8_copy_aligned_src, rgba8_copy_aligned_src);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, memcpy);
+ dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_src)
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src);
+ rgba8_copy_aligned_src, rgba8_copy_aligned_src);
else
unreachable("not reached");
}
xtiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, mem_copy);
+ dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
}
/**
@@ -519,26 +530,26 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
if (mem_copy == memcpy)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
- dst, src, dst_pitch, swizzle_bit, memcpy);
+ dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_src)
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src);
+ rgba8_copy_aligned_src, rgba8_copy_aligned_src);
else
unreachable("not reached");
} else {
if (mem_copy == memcpy)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
- dst, src, dst_pitch, swizzle_bit, memcpy);
+ dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
else if (mem_copy == rgba8_copy_aligned_src)
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch, swizzle_bit,
- rgba8_copy_aligned_src);
+ rgba8_copy_aligned_src, rgba8_copy_aligned_src);
else
unreachable("not reached");
}
Reply to: