libdrm: Changes to 'upstream-unstable'
configure.ac | 17
include/drm/drm_fourcc.h | 6
intel/Makefile.am | 2
intel/intel_aub.h | 123 +++
intel/intel_bufmgr.h | 19
intel/intel_bufmgr_gem.c | 514 ++++++++++++++-
intel/intel_decode.c | 253 +++++--
intel/tests/gen7-3d.batch |binary
intel/tests/gen7-3d.batch-ref.txt | 1290 ++++----------------------------------
radeon/radeon_cs_space.c | 15
radeon/radeon_surface.c | 14
xf86drmMode.h | 5
12 files changed, 1025 insertions(+), 1233 deletions(-)
New commits:
commit 51c3e7d7335ecdf572968db7d3eed661e8a61810
Author: Eric Anholt <eric@anholt.net>
Date: Fri Mar 16 16:11:10 2012 -0700
configure: Bump version for 2.4.32.
diff --git a/configure.ac b/configure.ac
index 71a596c..ff2c840 100644
--- a/configure.ac
+++ b/configure.ac
@@ -20,7 +20,7 @@
AC_PREREQ([2.63])
AC_INIT([libdrm],
- [2.4.31],
+ [2.4.32],
[https://bugs.freedesktop.org/enter_bug.cgi?product=DRI],
[libdrm])
commit 5de5b7484a3a41554e16c02a544a45db5516b031
Author: Eric Anholt <eric@anholt.net>
Date: Tue Mar 13 16:49:53 2012 -0700
intel: Quiet two more valgrind complaints with recent changes.
These are more cases where valgrind doesn't understand what gets read
or written by our ioctls.
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 51b963f..3c91090 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -2647,6 +2647,7 @@ get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
}
}
+ VG_CLEAR(devid);
VG_CLEAR(gp);
gp.param = I915_PARAM_CHIPSET_ID;
gp.value = &devid;
@@ -2790,6 +2791,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
bufmgr_gem->gtt_size -= 256*1024*1024;
}
+ VG_CLEAR(gp);
gp.value = &tmp;
gp.param = I915_PARAM_HAS_EXECBUF2;
commit 9d18ad254afc2afc41a919b86cd51ea40cfd8f0b
Author: Eric Anholt <eric@anholt.net>
Date: Fri Mar 2 10:27:55 2012 -0800
intel: Add per-dword decode of gen7 3DPRIMITIVE.
diff --git a/intel/intel_decode.c b/intel/intel_decode.c
index af621d4..df9b704 100644
--- a/intel/intel_decode.c
+++ b/intel/intel_decode.c
@@ -2577,10 +2577,8 @@ static const char *get_965_element_component(uint32_t data, int component)
}
}
-static const char *get_965_prim_type(uint32_t data)
+static const char *get_965_prim_type(uint32_t primtype)
{
- uint32_t primtype = (data >> 10) & 0x1f;
-
switch (primtype) {
case 0x01:
return "point list";
@@ -3009,7 +3007,7 @@ gen4_3DPRIMITIVE(struct drm_intel_decode *ctx)
{
instr_out(ctx, 0,
"3DPRIMITIVE: %s %s\n",
- get_965_prim_type(ctx->data[0]),
+ get_965_prim_type((ctx->data[0] >> 10) & 0x1f),
(ctx->data[0] & (1 << 15)) ? "random" : "sequential");
instr_out(ctx, 1, "vertex count\n");
instr_out(ctx, 2, "start vertex\n");
@@ -3021,6 +3019,27 @@ gen4_3DPRIMITIVE(struct drm_intel_decode *ctx)
}
static int
+gen7_3DPRIMITIVE(struct drm_intel_decode *ctx)
+{
+ bool indirect = !!(ctx->data[0] & (1 << 10));
+
+ instr_out(ctx, 0,
+ "3DPRIMITIVE: %s%s\n",
+ indirect ? " indirect" : "",
+ (ctx->data[0] & (1 << 8)) ? " predicated" : "");
+ instr_out(ctx, 1, "%s %s\n",
+ get_965_prim_type(ctx->data[1] & 0x3f),
+ (ctx->data[1] & (1 << 8)) ? "random" : "sequential");
+ instr_out(ctx, 2, indirect ? "ignored" : "vertex count\n");
+ instr_out(ctx, 3, indirect ? "ignored" : "start vertex\n");
+ instr_out(ctx, 4, indirect ? "ignored" : "instance count\n");
+ instr_out(ctx, 5, indirect ? "ignored" : "start instance\n");
+ instr_out(ctx, 6, indirect ? "ignored" : "index bias\n");
+
+ return 7;
+}
+
+static int
decode_3d_965(struct drm_intel_decode *ctx)
{
uint32_t opcode;
@@ -3120,7 +3139,7 @@ decode_3d_965(struct drm_intel_decode *ctx)
{ 0x7917, 0x00ff, 2, 2+128*2, "3DSTATE_SO_DECL_LIST" },
{ 0x7918, 0x00ff, 4, 4, "3DSTATE_SO_BUFFER" },
{ 0x7a00, 0x00ff, 4, 6, "PIPE_CONTROL" },
- { 0x7b00, 0x00ff, 7, 7, "3DPRIMITIVE", 7 },
+ { 0x7b00, 0x00ff, 7, 7, NULL, 7, gen7_3DPRIMITIVE },
{ 0x7b00, 0x00ff, 6, 6, NULL, 0, gen4_3DPRIMITIVE },
}, *opcode_3d = NULL;
diff --git a/intel/tests/gen7-3d.batch-ref.txt b/intel/tests/gen7-3d.batch-ref.txt
index 1488ca5..be3c85e 100644
--- a/intel/tests/gen7-3d.batch-ref.txt
+++ b/intel/tests/gen7-3d.batch-ref.txt
@@ -202,11 +202,11 @@
0x12300324: 0x11230000: (X, Y, 0.0, 1.0), dst offset 0x00 bytes
0x12300328: 0x02400008: buffer 0: invalid, type 0x0040, src offset 0x0008 bytes
0x1230032c: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes
-0x12300330: 0x7b000005: 3DPRIMITIVE
-0x12300334: 0x00000007: dword 1
-0x12300338: 0x00000004: dword 2
-0x1230033c: 0x00000000: dword 3
-0x12300340: 0x00000001: dword 4
-0x12300344: 0x00000000: dword 5
-0x12300348: 0x00000000: dword 6
+0x12300330: 0x7b000005: 3DPRIMITIVE:
+0x12300334: 0x00000007: quad list sequential
+0x12300338: 0x00000004: vertex count
+0x1230033c: 0x00000000: start vertex
+0x12300340: 0x00000001: instance count
+0x12300344: 0x00000000: start instance
+0x12300348: 0x00000000: index bias
0x1230034c: 0x05000000: MI_BATCH_BUFFER_END
commit 9b87fd9a3df8c59461bc90b4620526d10f9b5771
Author: Eric Anholt <eric@anholt.net>
Date: Fri Mar 2 10:18:51 2012 -0800
intel: Move the gen4-6 3DPRIMITIVE handling out of the switch statement.
diff --git a/intel/intel_decode.c b/intel/intel_decode.c
index 2ea8f67..af621d4 100644
--- a/intel/intel_decode.c
+++ b/intel/intel_decode.c
@@ -3005,6 +3005,22 @@ gen7_3DSTATE_WM(struct drm_intel_decode *ctx)
}
static int
+gen4_3DPRIMITIVE(struct drm_intel_decode *ctx)
+{
+ instr_out(ctx, 0,
+ "3DPRIMITIVE: %s %s\n",
+ get_965_prim_type(ctx->data[0]),
+ (ctx->data[0] & (1 << 15)) ? "random" : "sequential");
+ instr_out(ctx, 1, "vertex count\n");
+ instr_out(ctx, 2, "start vertex\n");
+ instr_out(ctx, 3, "instance count\n");
+ instr_out(ctx, 4, "start instance\n");
+ instr_out(ctx, 5, "index bias\n");
+
+ return 6;
+}
+
+static int
decode_3d_965(struct drm_intel_decode *ctx)
{
uint32_t opcode;
@@ -3105,7 +3121,7 @@ decode_3d_965(struct drm_intel_decode *ctx)
{ 0x7918, 0x00ff, 4, 4, "3DSTATE_SO_BUFFER" },
{ 0x7a00, 0x00ff, 4, 6, "PIPE_CONTROL" },
{ 0x7b00, 0x00ff, 7, 7, "3DPRIMITIVE", 7 },
- { 0x7b00, 0x00ff, 6, 6, "3DPRIMITIVE" },
+ { 0x7b00, 0x00ff, 6, 6, NULL, 0, gen4_3DPRIMITIVE },
}, *opcode_3d = NULL;
opcode = (data[0] & 0xffff0000) >> 16;
@@ -3593,20 +3609,6 @@ decode_3d_965(struct drm_intel_decode *ctx)
instr_out(ctx, 3, "immediate dword high\n");
return len;
}
- case 0x7b00:
- if (ctx->gen == 7)
- break;
-
- instr_out(ctx, 0,
- "3DPRIMITIVE: %s %s\n",
- get_965_prim_type(data[0]),
- (data[0] & (1 << 15)) ? "random" : "sequential");
- instr_out(ctx, 1, "vertex count\n");
- instr_out(ctx, 2, "start vertex\n");
- instr_out(ctx, 3, "instance count\n");
- instr_out(ctx, 4, "start instance\n");
- instr_out(ctx, 5, "index bias\n");
- return len;
}
if (opcode_3d) {
commit 99c73378a1b440bcf594742445dfe14ab1e89128
Author: Eric Anholt <eric@anholt.net>
Date: Fri Feb 10 04:12:15 2012 -0800
intel: Add support for (possibly) unsynchronized maps.
This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
in GL_ARB_map_buffer_range. Improves Unigine Tropics performance at
1024x768 by 2.30482% +/- 0.0492146% (n=61)
v2: Fix comment grammar.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index a8062c5..45389e1 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -156,8 +156,10 @@ void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr);
void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr);
void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr,
int limit);
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo);
int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
+
int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 0eb57c4..51b963f 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -1189,15 +1189,13 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
return 0;
}
-int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+static int
+map_gtt(drm_intel_bo *bo)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
- struct drm_i915_gem_set_domain set_domain;
int ret;
- pthread_mutex_lock(&bufmgr_gem->lock);
-
if (bo_gem->map_count++ == 0)
drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
@@ -1223,7 +1221,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
strerror(errno));
if (--bo_gem->map_count == 0)
drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
- pthread_mutex_unlock(&bufmgr_gem->lock);
return ret;
}
@@ -1240,7 +1237,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
strerror(errno));
if (--bo_gem->map_count == 0)
drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
- pthread_mutex_unlock(&bufmgr_gem->lock);
return ret;
}
}
@@ -1250,7 +1246,33 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
bo_gem->gtt_virtual);
- /* Now move it to the GTT domain so that the CPU caches are flushed */
+ return 0;
+}
+
+int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ struct drm_i915_gem_set_domain set_domain;
+ int ret;
+
+ pthread_mutex_lock(&bufmgr_gem->lock);
+
+ ret = map_gtt(bo);
+ if (ret) {
+ pthread_mutex_unlock(&bufmgr_gem->lock);
+ return ret;
+ }
+
+ /* Now move it to the GTT domain so that the GPU and CPU
+ * caches are flushed and the GPU isn't actively using the
+ * buffer.
+ *
+ * The pagefault handler does this domain change for us when
+ * it has unbound the BO from the GTT, but it's up to us to
+ * tell it when we're about to use things if we had done
+ * rendering and it still happens to be bound to the GTT.
+ */
VG_CLEAR(set_domain);
set_domain.handle = bo_gem->gem_handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
@@ -1271,6 +1293,42 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
return 0;
}
+/**
+ * Performs a mapping of the buffer object like the normal GTT
+ * mapping, but avoids waiting for the GPU to be done reading from or
+ * rendering to the buffer.
+ *
+ * This is used in the implementation of GL_ARB_map_buffer_range: The
+ * user asks to create a buffer, then does a mapping, fills some
+ * space, runs a drawing command, then asks to map it again without
+ * synchronizing because it guarantees that it won't write over the
+ * data that the GPU is busy using (or, more specifically, that if it
+ * does write over the data, it acknowledges that rendering is
+ * undefined).
+ */
+
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ int ret;
+
+ /* If the CPU cache isn't coherent with the GTT, then use a
+ * regular synchronized mapping. The problem is that we don't
+ * track where the buffer was last used on the CPU side in
+ * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
+ * we would potentially corrupt the buffer even when the user
+ * does reasonable things.
+ */
+ if (!bufmgr_gem->has_llc)
+ return drm_intel_gem_bo_map_gtt(bo);
+
+ pthread_mutex_lock(&bufmgr_gem->lock);
+ ret = map_gtt(bo);
+ pthread_mutex_unlock(&bufmgr_gem->lock);
+
+ return ret;
+}
+
static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
commit 3a8884851b72af012a8cb2beea320f094a58e5eb
Author: Eric Anholt <eric@anholt.net>
Date: Mon Feb 27 17:26:05 2012 -0800
intel: Fix error check for I915_PARAM_HAS_LLC.
drmIoctl returns -1 on error with errno set to the error value. Other
users of it in this file just check for != 0, and only use errno when
they need to send an error value on to the caller of the API.
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index e87690d..0eb57c4 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -2753,7 +2753,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
gp.param = I915_PARAM_HAS_LLC;
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
- if (ret == -EINVAL) {
+ if (ret != 0) {
/* Kernel does not supports HAS_LLC query, fallback to GPU
* generation detection and assume that we have LLC on GEN6/7
*/
commit c9ce2edfc8d33e760667529250e86e93ff656c3d
Author: Eric Anholt <eric@anholt.net>
Date: Fri Mar 9 16:08:23 2012 -0800
intel: Bump the copyright dates on the bufmgr files.
We've been hacking these constantly.
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index fa6f2b8..a8062c5 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -1,5 +1,5 @@
/*
- * Copyright © 2008 Intel Corporation
+ * Copyright © 2008-2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index d56593a..e87690d 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -1,7 +1,7 @@
/**************************************************************************
*
* Copyright � 2007 Red Hat Inc.
- * Copyright � 2007 Intel Corporation
+ * Copyright � 2007-2012 Intel Corporation
* Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
* All Rights Reserved.
*
commit 4db16a9480af2c4f36eb8023193cd54545efbe54
Author: Eric Anholt <eric@anholt.net>
Date: Tue Oct 11 15:59:03 2011 -0700
intel: Add .aub file output support.
This will allow the driver to capture all of its execution state to a
file for later debugging. intel_gpu_dump is limited in that it only
captures batchbuffers, and Mesa's captures, while more complete, still
capture only a portion of the state involved in execution.
This is a squash commit of a long series of hacking as we tried to get
the resulting traces to work in the internal simulator. It contains
contributions by Yuanhan Liu and Kenneth Graunke.
v2: Drop the MI_FLUSH_ENABLE setup.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
diff --git a/intel/Makefile.am b/intel/Makefile.am
index 06362b6..dc01a96 100644
--- a/intel/Makefile.am
+++ b/intel/Makefile.am
@@ -53,6 +53,7 @@ intel_bufmgr_gem_o_CFLAGS = $(AM_CFLAGS) -c99
libdrm_intelincludedir = ${includedir}/libdrm
libdrm_intelinclude_HEADERS = intel_bufmgr.h \
+ intel_aub.h \
intel_debug.h
# This may be interesting even outside of "make check", due to the -dump option.
diff --git a/intel/intel_aub.h b/intel/intel_aub.h
new file mode 100644
index 0000000..a36fd53
--- /dev/null
+++ b/intel/intel_aub.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_aub.h
+ *
+ * The AUB file is a file format used by Intel's internal simulation
+ * and other validation tools. It can be used at various levels by a
+ * driver to input state to the simulated hardware or a replaying
+ * debugger.
+ *
+ * We choose to dump AUB files using the trace block format for ease
+ * of implementation -- dump out the blocks of memory as plain blobs
+ * and insert ring commands to execute the batchbuffer blob.
+ */
+
+#ifndef _INTEL_AUB_H
+#define _INTEL_AUB_H
+
+#define AUB_MI_NOOP (0)
+#define AUB_MI_BATCH_BUFFER_START (0x31 << 23)
+#define AUB_PIPE_CONTROL (0x7a000002)
+
+/* DW0: instruction type. */
+
+#define CMD_AUB (7 << 29)
+
+#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16))
+/* DW1 */
+# define AUB_HEADER_MAJOR_SHIFT 24
+# define AUB_HEADER_MINOR_SHIFT 16
+
+#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16))
+#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16))
+
+/* DW1 */
+#define AUB_TRACE_OPERATION_MASK 0x000000ff
+#define AUB_TRACE_OP_COMMENT 0x00000000
+#define AUB_TRACE_OP_DATA_WRITE 0x00000001
+#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002
+#define AUB_TRACE_OP_MMIO_WRITE 0x00000003
+// operation = TRACE_DATA_WRITE, Type
+#define AUB_TRACE_TYPE_MASK 0x0000ff00
+#define AUB_TRACE_TYPE_NOTYPE (0 << 8)
+#define AUB_TRACE_TYPE_BATCH (1 << 8)
+#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8)
+#define AUB_TRACE_TYPE_2D_MAP (6 << 8)
+#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8)
+#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8)
+#define AUB_TRACE_TYPE_1D_MAP (10 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8)
+#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8)
+#define AUB_TRACE_TYPE_GENERAL (14 << 8)
+#define AUB_TRACE_TYPE_SURFACE (15 << 8)
+
+
+// operation = TRACE_COMMAND_WRITE, Type =
+#define AUB_TRACE_TYPE_RING_HWB (1 << 8)
+#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8)
+#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8)
+#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8)
+
+// Address space
+#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000
+#define AUB_TRACE_MEMTYPE_GTT (0 << 16)
+#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16)
+#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16)
+#define AUB_TRACE_MEMTYPE_PCI (3 << 16)
+#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16)
+
+/* DW2 */
+// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_GENERAL_STATE
+#define AUB_TRACE_GENERAL_STATE_MASK 0x000000ff
+
+#define AUB_TRACE_VS_STATE 0x00000001
+#define AUB_TRACE_GS_STATE 0x00000002
+#define AUB_TRACE_CL_STATE 0x00000003
+#define AUB_TRACE_SF_STATE 0x00000004
+#define AUB_TRACE_WM_STATE 0x00000005
+#define AUB_TRACE_CC_STATE 0x00000006
+#define AUB_TRACE_CL_VP 0x00000007
+#define AUB_TRACE_SF_VP 0x00000008
+#define AUB_TRACE_CC_VP 0x00000009
+#define AUB_TRACE_SAMPLER_STATE 0x0000000a
+#define AUB_TRACE_KERNEL 0x0000000b
+#define AUB_TRACE_SCRATCH 0x0000000c
+#define AUB_TRACE_SDC 0x0000000d
+#define AUB_TRACE_BLEND_STATE 0x00000016
+#define AUB_TRACE_DEPTH_STENCIL_STATE 0x00000017
+
+// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_SURFACE_STATE
+#define AUB_TRACE_SURFACE_STATE_MASK 0x00000ff00
+#define AUB_TRACE_BINDING_TABLE 0x000000100
+#define AUB_TRACE_SURFACE_STATE 0x000000200
+
+/* DW3: address */
+/* DW4: len */
+
+#endif /* _INTEL_AUB_H */
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 8036031..fa6f2b8 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -36,6 +36,7 @@
#include <stdio.h>
#include <stdint.h>
+#include <stdio.h>
struct drm_clip_rect;
@@ -84,6 +85,13 @@ struct _drm_intel_bo {
int handle;
};
+enum aub_dump_bmp_format {
+ AUB_DUMP_BMP_FORMAT_8BIT = 1,
+ AUB_DUMP_BMP_FORMAT_ARGB_4444 = 4,
+ AUB_DUMP_BMP_FORMAT_ARGB_0888 = 6,
+ AUB_DUMP_BMP_FORMAT_ARGB_8888 = 7,
+};
+
#define BO_ALLOC_FOR_RENDER (1<<0)
drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
@@ -154,6 +162,12 @@ int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
+void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable);
+void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
+ int x1, int y1, int width, int height,
+ enum aub_dump_bmp_format format,
+ int pitch, int offset);
+
int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index ba38e50..d56593a 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -58,6 +58,7 @@
#include "intel_bufmgr.h"
#include "intel_bufmgr_priv.h"
#include "intel_chipset.h"
+#include "intel_aub.h"
#include "string.h"
#include "i915_drm.h"
@@ -121,6 +122,9 @@ typedef struct _drm_intel_bufmgr_gem {
unsigned int bo_reuse : 1;
unsigned int no_exec : 1;
bool fenced_relocs;
+
+ FILE *aub_file;
+ uint32_t aub_offset;
} drm_intel_bufmgr_gem;
#define DRM_INTEL_RELOC_FENCE (1<<0)
@@ -215,6 +219,8 @@ struct _drm_intel_bo_gem {
/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
bool mapped_cpu_write;
+
+ uint32_t aub_offset;
};
static unsigned int
@@ -1715,6 +1721,247 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
}
}
+static void
+aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
+{
+ fwrite(&data, 1, 4, bufmgr_gem->aub_file);
+}
+
+static void
+aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
+{
+ fwrite(data, 1, size, bufmgr_gem->aub_file);
+}
+
+static void
+aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ uint32_t *data;
+ unsigned int i;
+
+ data = malloc(bo->size);
+ drm_intel_bo_get_subdata(bo, offset, size, data);
+
+ /* Easy mode: write out bo with no relocations */
+ if (!bo_gem->reloc_count) {
+ aub_out_data(bufmgr_gem, data, size);
+ free(data);
+ return;
+ }
+
+ /* Otherwise, handle the relocations while writing. */
+ for (i = 0; i < size / 4; i++) {
+ int r;
+ for (r = 0; r < bo_gem->reloc_count; r++) {
+ struct drm_i915_gem_relocation_entry *reloc;
+ drm_intel_reloc_target *info;
+
+ reloc = &bo_gem->relocs[r];
+ info = &bo_gem->reloc_target_info[r];
+
+ if (reloc->offset == offset + i * 4) {
+ drm_intel_bo_gem *target_gem;
+ uint32_t val;
+
+ target_gem = (drm_intel_bo_gem *)info->bo;
+
+ val = reloc->delta;
+ val += target_gem->aub_offset;
+
+ aub_out(bufmgr_gem, val);
+ data[i] = val;
+ break;
+ }
+ }
+ if (r == bo_gem->reloc_count) {
+ /* no relocation, just the data */
+ aub_out(bufmgr_gem, data[i]);
+ }
+ }
+
+ free(data);
+}
+
+static void
+aub_bo_get_address(drm_intel_bo *bo)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+ /* Give the object a graphics address in the AUB file. We
+ * don't just use the GEM object address because we do AUB
+ * dumping before execution -- we want to successfully log
+ * when the hardware might hang, and we might even want to aub
+ * capture for a driver trying to execute on a different
+ * generation of hardware by disabling the actual kernel exec
+ * call.
+ */
+ bo_gem->aub_offset = bufmgr_gem->aub_offset;
+ bufmgr_gem->aub_offset += bo->size;
+ /* XXX: Handle aperture overflow. */
+ assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
+}
+
+static void
+aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
+ uint32_t offset, uint32_t size)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+ aub_out(bufmgr_gem,
+ CMD_AUB_TRACE_HEADER_BLOCK |
+ (5 - 2));
+ aub_out(bufmgr_gem,
+ AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
+ aub_out(bufmgr_gem, subtype);
+ aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
+ aub_out(bufmgr_gem, size);
+ aub_write_bo_data(bo, offset, size);
+}
+
+static void
+aub_write_bo(drm_intel_bo *bo)
+{
+ uint32_t block_size;
+ uint32_t offset;
+
+ aub_bo_get_address(bo);
+
+ /* Break up large objects into multiple writes. Otherwise a
+ * 128kb VBO would overflow the 16 bits of size field in the
+ * packet header and everything goes badly after that.
+ */
+ for (offset = 0; offset < bo->size; offset += block_size) {
+ block_size = bo->size - offset;
+
+ if (block_size > 8 * 4096)
+ block_size = 8 * 4096;
+
+ aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
+ offset, block_size);
+ }
+}
+
+/*
+ * Make a ringbuffer on fly and dump it
+ */
+static void
+aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
+ uint32_t batch_buffer, int ring_flag)
+{
+ uint32_t ringbuffer[4096];
+ int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
+ int ring_count = 0;
+
+ if (ring_flag == I915_EXEC_BSD)
+ ring = AUB_TRACE_TYPE_RING_PRB1;
+
+ /* Make a ring buffer to execute our batchbuffer. */
+ memset(ringbuffer, 0, sizeof(ringbuffer));
+ ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
+ ringbuffer[ring_count++] = batch_buffer;
+
+ /* Write out the ring. This appears to trigger execution of
+ * the ring in the simulator.
+ */
+ aub_out(bufmgr_gem,
+ CMD_AUB_TRACE_HEADER_BLOCK |
+ (5 - 2));
+ aub_out(bufmgr_gem,
+ AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
+ aub_out(bufmgr_gem, 0); /* general/surface subtype */
+ aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
+ aub_out(bufmgr_gem, ring_count * 4);
+
+ /* FIXME: Need some flush operations here? */
+ aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
+
+ /* Update offset pointer */
+ bufmgr_gem->aub_offset += 4096;
+}
+
+void
+drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
+ int x1, int y1, int width, int height,
+ enum aub_dump_bmp_format format,
+ int pitch, int offset)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+ uint32_t cpp;
+
+ switch (format) {
+ case AUB_DUMP_BMP_FORMAT_8BIT:
+ cpp = 1;
+ break;
+ case AUB_DUMP_BMP_FORMAT_ARGB_4444:
+ cpp = 2;
+ break;
+ case AUB_DUMP_BMP_FORMAT_ARGB_0888:
+ case AUB_DUMP_BMP_FORMAT_ARGB_8888:
+ cpp = 4;
+ break;
+ default:
+ printf("Unknown AUB dump format %d\n", format);
+ return;
+ }
+
+ if (!bufmgr_gem->aub_file)
+ return;
+
+ aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
+ aub_out(bufmgr_gem, (y1 << 16) | x1);
+ aub_out(bufmgr_gem,
+ (format << 24) |
+ (cpp << 19) |
+ pitch / 4);
+ aub_out(bufmgr_gem, (height << 16) | width);
+ aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
+ aub_out(bufmgr_gem,
+ ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
+ ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
+}
+
+static void
+aub_exec(drm_intel_bo *bo, int ring_flag, int used)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ int i;
+
+ if (!bufmgr_gem->aub_file)
+ return;
+
+ /* Write out all but the batchbuffer to AUB memory */
+ for (i = 0; i < bufmgr_gem->exec_count - 1; i++) {
+ if (bufmgr_gem->exec_bos[i] != bo)
+ aub_write_bo(bufmgr_gem->exec_bos[i]);
+ }
+
+ aub_bo_get_address(bo);
+
+ /* Dump the batchbuffer. */
+ aub_write_trace_block(bo, AUB_TRACE_TYPE_BATCH, 0,
+ 0, used);
+ aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
+ used, bo->size - used);
+
+ /* Dump ring buffer */
+ aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
+
+ fflush(bufmgr_gem->aub_file);
+
+ /*
+ * One frame has been dumped. So reset the aub_offset for the next frame.
+ *
+ * FIXME: Can we do this?
+ */
+ bufmgr_gem->aub_offset = 0x10000;
+}
+
static int
drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
@@ -1830,6 +2077,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
execbuf.rsvd1 = 0;
execbuf.rsvd2 = 0;
+ aub_exec(bo, flags, used);
+
if (bufmgr_gem->no_exec)
goto skip_execution;
@@ -2360,6 +2609,62 @@ drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
}
/**
+ * Sets up AUB dumping.
+ *
+ * This is a trace file format that can be used with the simulator.
+ * Packets are emitted in a format somewhat like GPU command packets.
+ * You can set up a GTT and upload your objects into the referenced
+ * space, then send off batchbuffers and get BMPs out the other end.
+ */
+void
+drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+ int entry = 0x200003;
+ int i;
+ int gtt_size = 0x10000;
+
+ if (!enable) {
+ if (bufmgr_gem->aub_file) {
+ fclose(bufmgr_gem->aub_file);
+ bufmgr_gem->aub_file = NULL;
+ }
+ }
+
+ if (geteuid() != getuid())
+ return;
+
+ bufmgr_gem->aub_file = fopen("intel.aub", "w+");
+ if (!bufmgr_gem->aub_file)
+ return;
+
+ /* Start allocating objects from just after the GTT. */
+ bufmgr_gem->aub_offset = gtt_size;
+
+ /* Start with a (required) version packet. */
+ aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
+ aub_out(bufmgr_gem,
+ (4 << AUB_HEADER_MAJOR_SHIFT) |
+ (0 << AUB_HEADER_MINOR_SHIFT));
+ for (i = 0; i < 8; i++) {
+ aub_out(bufmgr_gem, 0); /* app name */
+ }
+ aub_out(bufmgr_gem, 0); /* timestamp */
+ aub_out(bufmgr_gem, 0); /* timestamp */
+ aub_out(bufmgr_gem, 0); /* comment len */
+
+ /* Set up the GTT. The max we can handle is 256M */
+ aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2));
+ aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE);
+ aub_out(bufmgr_gem, 0); /* subtype */
+ aub_out(bufmgr_gem, 0); /* offset */
+ aub_out(bufmgr_gem, gtt_size); /* size */
+ for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
+ aub_out(bufmgr_gem, entry);
+ }
+}
+
+/**
* Initializes the GEM buffer manager, which uses the kernel to allocate, map,
* and manage map buffer objections.
*
commit 6e642db7f4a5628ed63ca3c479f06bd6f2ca3893
Author: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue Oct 11 14:38:34 2011 -0700
intel: Add support for overriding the PCI ID via an environment variable
For example:
export INTEL_DEVID_OVERRIDE=0x162
If this variable is set, don't actually submit the batchbuffer to the
GPU, it probably contains commands for the wrong generation of hardware.
v2: Introduce a getter for the overridden devid, and avoid getenv per exec.
Reviewed-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 85da8b9..8036031 100644
--- a/intel/intel_bufmgr.h
Reply to: