libdrm: Changes to 'upstream-experimental'
amdgpu/amdgpu_device.c | 24 --
amdgpu/amdgpu_gpu_info.c | 72 +++----
amdgpu/amdgpu_internal.h | 4
amdgpu/amdgpu_vamgr.c | 6
autogen.sh | 10
configure.ac | 3
freedreno/Makefile.am | 1
freedreno/freedreno_bo.c | 12 +
freedreno/freedreno_bo_cache.c | 4
freedreno/freedreno_device.c | 3
freedreno/freedreno_priv.h | 56 +++++
freedreno/kgsl/kgsl_device.c | 2
freedreno/msm/msm_device.c | 2
freedreno/msm/msm_ringbuffer.c | 7
include/drm/README | 2
include/drm/amdgpu_drm.h | 409 +++++++++++++++++++++++++---------------
intel/intel_bufmgr_gem.c | 58 +----
intel/intel_decode.c | 3
tests/amdgpu/basic_tests.c | 27 ++
tests/amdgpu/cs_tests.c | 41 ++--
tests/amdgpu/vce_tests.c | 54 ++++-
tests/drmstat.c | 419 -----------------------------------------
22 files changed, 518 insertions(+), 701 deletions(-)
New commits:
commit 8d61a9a923c1ced974180609611ef615034fd484
Author: Marek Olšák <marek.olsak@amd.com>
Date: Wed Mar 29 20:06:22 2017 +0200
configure.ac: bump version for release
diff --git a/configure.ac b/configure.ac
index 2e50d3e..6a60ffc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -20,7 +20,7 @@
AC_PREREQ([2.63])
AC_INIT([libdrm],
- [2.4.75],
+ [2.4.76],
[https://bugs.freedesktop.org/enter_bug.cgi?product=DRI],
[libdrm])
commit c7b5aaeb1f77a53a46c091b1ba999a76baa6c3fb
Author: Leo Liu <leo.liu@amd.com>
Date: Thu Mar 23 10:43:40 2017 -0400
amdgpu_drm: add AMDGPU_HW_IP_UVD_ENC
Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 1e25a87..fa56499 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -388,7 +388,8 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_HW_IP_DMA 2
#define AMDGPU_HW_IP_UVD 3
#define AMDGPU_HW_IP_VCE 4
-#define AMDGPU_HW_IP_NUM 5
+#define AMDGPU_HW_IP_UVD_ENC 5
+#define AMDGPU_HW_IP_NUM 6
#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
commit f684bb109fcdb85faa1b212bb6efcc352d8cbcdc
Author: Christian König <christian.koenig@amd.com>
Date: Mon Mar 27 15:44:14 2017 +0200
amdgpu: stop reading CC_RB_BACKEND_DISABLE on Vega10
Follow up to 'drm: don't access deprecated register on Vega10'.
The same information is available in enabled_rb_pipes_mask and reading that
register can cause GRBM bus problems.
Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c
index cd31e1b..c5f5f6f 100644
--- a/amdgpu/amdgpu_gpu_info.c
+++ b/amdgpu/amdgpu_gpu_info.c
@@ -169,20 +169,20 @@ drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
dev->info.vce_harvest_config = dev->dev_info.vce_harvest_config;
dev->info.pci_rev_id = dev->dev_info.pci_rev;
- for (i = 0; i < (int)dev->info.num_shader_engines; i++) {
- unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) |
- (AMDGPU_INFO_MMR_SH_INDEX_MASK <<
- AMDGPU_INFO_MMR_SH_INDEX_SHIFT);
+ if (dev->info.family_id < AMDGPU_FAMILY_AI) {
+ for (i = 0; i < (int)dev->info.num_shader_engines; i++) {
+ unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) |
+ (AMDGPU_INFO_MMR_SH_INDEX_MASK <<
+ AMDGPU_INFO_MMR_SH_INDEX_SHIFT);
- r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0,
- &dev->info.backend_disable[i]);
- if (r)
- return r;
- /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */
- dev->info.backend_disable[i] =
- (dev->info.backend_disable[i] >> 16) & 0xff;
+ r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0,
+ &dev->info.backend_disable[i]);
+ if (r)
+ return r;
+ /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */
+ dev->info.backend_disable[i] =
+ (dev->info.backend_disable[i] >> 16) & 0xff;
- if (dev->info.family_id < AMDGPU_FAMILY_AI) {
r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
&dev->info.pa_sc_raster_cfg[i]);
if (r)
commit a784c38af77714b9e878a7ff97ba18553697304c
Author: Junwei Zhang <Jerry.Zhang@amd.com>
Date: Tue Dec 20 09:52:32 2016 +0800
tests/amdgpu: add Polaris12 support for cs test
Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c
index 0885d97..342815d 100644
--- a/tests/amdgpu/cs_tests.c
+++ b/tests/amdgpu/cs_tests.c
@@ -216,7 +216,8 @@ static void amdgpu_cs_uvd_create(void)
((uint8_t*)msg)[0x10] = 7;
/* chip beyond polaris 10/11 */
if ((family_id == AMDGPU_FAMILY_AI) ||
- (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+ (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+ chip_id == chip_rev+0x64)) {
/* dpb size */
((uint8_t*)msg)[0x28] = 0x00;
((uint8_t*)msg)[0x29] = 0x94;
@@ -296,7 +297,8 @@ static void amdgpu_cs_uvd_decode(void)
ptr[0x99] = 0x02;
/* chip beyond polaris10/11 */
if ((family_id == AMDGPU_FAMILY_AI) ||
- (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+ (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+ chip_id == chip_rev+0x64)) {
/* dpb size */
ptr[0x24] = 0x00;
ptr[0x25] = 0x94;
@@ -341,7 +343,8 @@ static void amdgpu_cs_uvd_decode(void)
if (family_id >= AMDGPU_FAMILY_VI) {
if ((family_id == AMDGPU_FAMILY_AI) ||
- (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+ (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+ chip_id == chip_rev+0x64)) {
ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
}
}
@@ -358,7 +361,8 @@ static void amdgpu_cs_uvd_decode(void)
if (family_id >= AMDGPU_FAMILY_VI) {
uvd_cmd(it_addr, 0x204, &i);
if ((family_id == AMDGPU_FAMILY_AI) ||
- (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A))
+ (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+ chip_id == chip_rev+0x64))
uvd_cmd(ctx_addr, 0x206, &i);
}
commit f810e31bcf686a156b7b5be6298cd52247a98189
Author: Leo Liu <leo.liu@amd.com>
Date: Tue Dec 13 15:24:05 2016 -0500
tests/amdgpu: add vce unit test support for vega10
swizzle mode needs reference and input picture luma and
chroma pitch aligned with 256
Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c
index de63aa1..b03807b 100644
--- a/tests/amdgpu/vce_tests.c
+++ b/tests/amdgpu/vce_tests.c
@@ -234,6 +234,7 @@ static void free_resource(struct amdgpu_vce_bo *vce_bo)
static void amdgpu_cs_vce_create(void)
{
+ unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
int len, r;
enc.width = vce_create[6];
@@ -250,6 +251,8 @@ static void amdgpu_cs_vce_create(void)
memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
len += sizeof(vce_taskinfo) / 4;
memcpy((ib_cpu + len), vce_create, sizeof(vce_create));
+ ib_cpu[len + 8] = ALIGN(enc.width, align);
+ ib_cpu[len + 9] = ALIGN(enc.width, align);
len += sizeof(vce_create) / 4;
memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
ib_cpu[len + 2] = enc.fb[0].addr >> 32;
@@ -291,10 +294,12 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
{
uint64_t luma_offset, chroma_offset;
- int len = 0, r;
+ unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+ unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
+ int len = 0, i, r;
luma_offset = enc->vbuf.addr;
- chroma_offset = luma_offset + enc->width * enc->height;
+ chroma_offset = luma_offset + luma_size;
memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
len += sizeof(vce_session) / 4;
@@ -309,6 +314,10 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
ib_cpu[len + 3] = enc->cpb.addr;
len += sizeof(vce_context_buffer) / 4;
memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
+ for (i = 0; i < 8; ++i)
+ ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
+ for (i = 0; i < 8; ++i)
+ ib_cpu[len + 10 + i] = luma_size * 1.5;
len += sizeof(vce_aux_buffer) / 4;
memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
ib_cpu[len + 2] = enc->fb[0].addr >> 32;
@@ -319,8 +328,10 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
ib_cpu[len + 10] = luma_offset;
ib_cpu[len + 11] = chroma_offset >> 32;
ib_cpu[len + 12] = chroma_offset;
- ib_cpu[len + 73] = 0x7800;
- ib_cpu[len + 74] = 0x7800 + 0x5000;
+ ib_cpu[len + 14] = ALIGN(enc->width, align);
+ ib_cpu[len + 15] = ALIGN(enc->width, align);
+ ib_cpu[len + 73] = luma_size * 1.5;
+ ib_cpu[len + 74] = luma_size * 2.5;
len += sizeof(vce_encode) / 4;
enc->ib_len = len;
if (!enc->two_instance) {
@@ -332,11 +343,13 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
{
uint64_t luma_offset, chroma_offset;
- int len, r;
+ int len, i, r;
+ unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+ unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
len = (enc->two_instance) ? enc->ib_len : 0;
luma_offset = enc->vbuf.addr;
- chroma_offset = luma_offset + enc->width * enc->height;
+ chroma_offset = luma_offset + luma_size;
if (!enc->two_instance) {
memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
@@ -353,6 +366,10 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
ib_cpu[len + 3] = enc->cpb.addr;
len += sizeof(vce_context_buffer) / 4;
memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
+ for (i = 0; i < 8; ++i)
+ ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
+ for (i = 0; i < 8; ++i)
+ ib_cpu[len + 10 + i] = luma_size * 1.5;
len += sizeof(vce_aux_buffer) / 4;
memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
ib_cpu[len + 2] = enc->fb[1].addr >> 32;
@@ -364,15 +381,17 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
ib_cpu[len + 10] = luma_offset;
ib_cpu[len + 11] = chroma_offset >> 32;
ib_cpu[len + 12] = chroma_offset;
+ ib_cpu[len + 14] = ALIGN(enc->width, align);
+ ib_cpu[len + 15] = ALIGN(enc->width, align);
ib_cpu[len + 18] = 0;
ib_cpu[len + 19] = 0;
ib_cpu[len + 56] = 3;
ib_cpu[len + 57] = 0;
ib_cpu[len + 58] = 0;
- ib_cpu[len + 59] = 0x7800;
- ib_cpu[len + 60] = 0x7800 + 0x5000;
+ ib_cpu[len + 59] = luma_size * 1.5;
+ ib_cpu[len + 60] = luma_size * 2.5;
ib_cpu[len + 73] = 0;
- ib_cpu[len + 74] = 0x5000;
+ ib_cpu[len + 74] = luma_size;
ib_cpu[len + 81] = 1;
ib_cpu[len + 82] = 1;
len += sizeof(vce_encode) / 4;
@@ -408,9 +427,10 @@ static void check_result(struct amdgpu_vce_encode *enc)
static void amdgpu_cs_vce_encode(void)
{
uint32_t vbuf_size, bs_size = 0x154000, cpb_size;
- int r;
+ unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+ int i, r;
- vbuf_size = enc.width * enc.height * 1.5;
+ vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5;
cpb_size = vbuf_size * 10;
num_resources = 0;
alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
@@ -429,7 +449,17 @@ static void amdgpu_cs_vce_encode(void)
r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr);
CU_ASSERT_EQUAL(r, 0);
- memcpy(enc.vbuf.ptr, frame, sizeof(frame));
+
+ memset(enc.vbuf.ptr, 0, vbuf_size);
+ for (i = 0; i < enc.height; ++i) {
+ memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width);
+ enc.vbuf.ptr += ALIGN(enc.width, align);
+ }
+ for (i = 0; i < enc.height / 2; ++i) {
+ memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width);
+ enc.vbuf.ptr += ALIGN(enc.width, align);
+ }
+
r = amdgpu_bo_cpu_unmap(enc.vbuf.handle);
CU_ASSERT_EQUAL(r, 0);
commit 5a44f9e6c6a460a5ea0b698fb64d02b359927999
Author: Leo Liu <leo.liu@amd.com>
Date: Mon Dec 5 11:18:09 2016 -0500
tests/amdgpu: add uvd unit test support for vega10
Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c
index 82c55aa..0885d97 100644
--- a/tests/amdgpu/cs_tests.c
+++ b/tests/amdgpu/cs_tests.c
@@ -175,11 +175,11 @@ static int submit(unsigned ndw, unsigned ip)
static void uvd_cmd(uint64_t addr, unsigned cmd, int *idx)
{
- ib_cpu[(*idx)++] = 0x3BC4;
+ ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC4 : 0x81C4;
ib_cpu[(*idx)++] = addr;
- ib_cpu[(*idx)++] = 0x3BC5;
+ ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC5 : 0x81C5;
ib_cpu[(*idx)++] = addr >> 32;
- ib_cpu[(*idx)++] = 0x3BC3;
+ ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC3 : 0x81C3;
ib_cpu[(*idx)++] = cmd << 1;
}
@@ -211,10 +211,12 @@ static void amdgpu_cs_uvd_create(void)
CU_ASSERT_EQUAL(r, 0);
memcpy(msg, uvd_create_msg, sizeof(uvd_create_msg));
+
if (family_id >= AMDGPU_FAMILY_VI) {
((uint8_t*)msg)[0x10] = 7;
- /* chip polaris 10/11 */
- if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) {
+ /* chip beyond polaris 10/11 */
+ if ((family_id == AMDGPU_FAMILY_AI) ||
+ (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
/* dpb size */
((uint8_t*)msg)[0x28] = 0x00;
((uint8_t*)msg)[0x29] = 0x94;
@@ -287,13 +289,15 @@ static void amdgpu_cs_uvd_decode(void)
CU_ASSERT_EQUAL(r, 0);
memcpy(ptr, uvd_decode_msg, sizeof(uvd_create_msg));
+
if (family_id >= AMDGPU_FAMILY_VI) {
ptr[0x10] = 7;
ptr[0x98] = 0x00;
ptr[0x99] = 0x02;
- /* chip polaris10/11 */
- if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) {
- /*dpb size */
+ /* chip beyond polaris10/11 */
+ if ((family_id == AMDGPU_FAMILY_AI) ||
+ (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+ /* dpb size */
ptr[0x24] = 0x00;
ptr[0x25] = 0x94;
ptr[0x26] = 0x6B;
@@ -335,9 +339,11 @@ static void amdgpu_cs_uvd_decode(void)
bs_addr = fb_addr + 4*1024;
dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024);
- if ((family_id >= AMDGPU_FAMILY_VI) &&
- (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
- ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
+ if (family_id >= AMDGPU_FAMILY_VI) {
+ if ((family_id == AMDGPU_FAMILY_AI) ||
+ (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+ ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
+ }
}
dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024);
@@ -348,12 +354,15 @@ static void amdgpu_cs_uvd_decode(void)
uvd_cmd(dt_addr, 0x2, &i);
uvd_cmd(fb_addr, 0x3, &i);
uvd_cmd(bs_addr, 0x100, &i);
+
if (family_id >= AMDGPU_FAMILY_VI) {
uvd_cmd(it_addr, 0x204, &i);
- if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)
+ if ((family_id == AMDGPU_FAMILY_AI) ||
+ (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A))
uvd_cmd(ctx_addr, 0x206, &i);
-}
- ib_cpu[i++] = 0x3BC6;
+ }
+
+ ib_cpu[i++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC6 : 0x81C6;
ib_cpu[i++] = 0x1;
for (; i % 16; ++i)
ib_cpu[i] = 0x80000000;
commit fee173dc77295c8624291a4336075361d5dafd67
Author: Huang Rui <ray.huang@amd.com>
Date: Wed Nov 9 11:28:45 2016 +0800
tests/amdgpu: fix the count number for vega10
Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index bfda21b..4dce67e 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -803,12 +803,16 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
uint32_t *pm4;
struct amdgpu_cs_ib_info *ib_info;
struct amdgpu_cs_request *ibs_request;
+ struct amdgpu_gpu_info gpu_info = {0};
uint64_t bo_mc;
volatile uint32_t *bo_cpu;
int i, j, r, loop;
uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
amdgpu_va_handle va_handle;
+ r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+ CU_ASSERT_EQUAL(r, 0);
+
pm4 = calloc(pm4_dw, sizeof(*pm4));
CU_ASSERT_NOT_EQUAL(pm4, NULL);
@@ -848,7 +852,10 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
pm4[i++] = 0xffffffff & bo_mc;
pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
- pm4[i++] = sdma_write_length;
+ if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+ pm4[i++] = sdma_write_length - 1;
+ else
+ pm4[i++] = sdma_write_length;
while(j++ < sdma_write_length)
pm4[i++] = 0xdeadbeaf;
} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
@@ -904,12 +911,16 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
uint32_t *pm4;
struct amdgpu_cs_ib_info *ib_info;
struct amdgpu_cs_request *ibs_request;
+ struct amdgpu_gpu_info gpu_info = {0};
uint64_t bo_mc;
volatile uint32_t *bo_cpu;
int i, j, r, loop;
uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
amdgpu_va_handle va_handle;
+ r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+ CU_ASSERT_EQUAL(r, 0);
+
pm4 = calloc(pm4_dw, sizeof(*pm4));
CU_ASSERT_NOT_EQUAL(pm4, NULL);
@@ -949,7 +960,10 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
pm4[i++] = 0xffffffff & bo_mc;
pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
pm4[i++] = 0xdeadbeaf;
- pm4[i++] = sdma_write_length;
+ if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+ pm4[i++] = sdma_write_length - 1;
+ else
+ pm4[i++] = sdma_write_length;
} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
(ip_type == AMDGPU_HW_IP_COMPUTE)) {
pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
@@ -1007,12 +1021,16 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
uint32_t *pm4;
struct amdgpu_cs_ib_info *ib_info;
struct amdgpu_cs_request *ibs_request;
+ struct amdgpu_gpu_info gpu_info = {0};
uint64_t bo1_mc, bo2_mc;
volatile unsigned char *bo1_cpu, *bo2_cpu;
int i, j, r, loop1, loop2;
uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
amdgpu_va_handle bo1_va_handle, bo2_va_handle;
+ r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+ CU_ASSERT_EQUAL(r, 0);
+
pm4 = calloc(pm4_dw, sizeof(*pm4));
CU_ASSERT_NOT_EQUAL(pm4, NULL);
@@ -1064,7 +1082,10 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
i = j = 0;
if (ip_type == AMDGPU_HW_IP_DMA) {
pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
- pm4[i++] = sdma_write_length;
+ if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+ pm4[i++] = sdma_write_length - 1;
+ else
+ pm4[i++] = sdma_write_length;
pm4[i++] = 0;
pm4[i++] = 0xffffffff & bo1_mc;
pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
commit 99908bfd4ce3132e99aabc96c1ee4946b1246fa5
Author: Huang Rui <ray.huang@amd.com>
Date: Tue Nov 8 14:00:45 2016 +0800
amdgpu: don't read registers not present on Vega10
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c
index 66c7e0e..cd31e1b 100644
--- a/amdgpu/amdgpu_gpu_info.c
+++ b/amdgpu/amdgpu_gpu_info.c
@@ -182,40 +182,44 @@ drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
dev->info.backend_disable[i] =
(dev->info.backend_disable[i] >> 16) & 0xff;
- r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
- &dev->info.pa_sc_raster_cfg[i]);
- if (r)
- return r;
-
- if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
- r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0,
- &dev->info.pa_sc_raster_cfg1[i]);
+ if (dev->info.family_id < AMDGPU_FAMILY_AI) {
+ r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
+ &dev->info.pa_sc_raster_cfg[i]);
if (r)
return r;
+
+ if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
+ r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0,
+ &dev->info.pa_sc_raster_cfg1[i]);
+ if (r)
+ return r;
+ }
}
}
- r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0,
- dev->info.gb_tile_mode);
+ r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0,
+ &dev->info.gb_addr_cfg);
if (r)
return r;
- if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
- r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0,
- dev->info.gb_macro_tile_mode);
+ if (dev->info.family_id < AMDGPU_FAMILY_AI) {
+ r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0,
+ dev->info.gb_tile_mode);
if (r)
return r;
- }
- r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0,
- &dev->info.gb_addr_cfg);
- if (r)
- return r;
+ if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
+ r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0,
+ dev->info.gb_macro_tile_mode);
+ if (r)
+ return r;
+ }
- r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0,
- &dev->info.mc_arb_ramcfg);
- if (r)
- return r;
+ r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0,
+ &dev->info.mc_arb_ramcfg);
+ if (r)
+ return r;
+ }
dev->info.cu_active_number = dev->dev_info.cu_active_number;
dev->info.cu_ao_mask = dev->dev_info.cu_ao_mask;
commit c34b28ae9bac7a20e60482a2bf72f16ad5e28c67
Author: Marek Olšák <marek.olsak@amd.com>
Date: Tue Mar 21 20:14:45 2017 +0100
amdgpu: update amdgpu_drm.h for Vega10
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 5797283..1e25a87 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -209,6 +209,7 @@ struct drm_amdgpu_gem_userptr {
__u32 handle;
};
+/* SI-CI-VI: */
/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0
#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf
@@ -227,10 +228,14 @@ struct drm_amdgpu_gem_userptr {
#define AMDGPU_TILING_NUM_BANKS_SHIFT 21
#define AMDGPU_TILING_NUM_BANKS_MASK 0x3
+/* GFX9 and later: */
+#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0
+#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f
+
#define AMDGPU_TILING_SET(field, value) \
- (((value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
+ (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
#define AMDGPU_TILING_GET(value, field) \
- (((value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
+ (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
#define AMDGPU_GEM_METADATA_OP_SET_METADATA 1
#define AMDGPU_GEM_METADATA_OP_GET_METADATA 2
@@ -755,6 +760,7 @@ struct drm_amdgpu_info_vce_clock_table {
#define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */
#define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */
#define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */
+#define AMDGPU_FAMILY_AI 141 /* Vega10 */
#if defined(__cplusplus)
}
commit 3dc002df3e5607a3ae0a194b35e1f2fb2cd36697
Author: Marek Olšák <marek.olsak@amd.com>
Date: Tue Mar 21 20:31:53 2017 +0100
amdgpu: sync amdgpu_drm.h with kernel 4.11-rc2
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index d8f2497..5797283 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -50,6 +50,7 @@ extern "C" {
#define DRM_AMDGPU_WAIT_CS 0x09
#define DRM_AMDGPU_GEM_OP 0x10
#define DRM_AMDGPU_GEM_USERPTR 0x11
+#define DRM_AMDGPU_WAIT_FENCES 0x12
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -63,6 +64,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
#define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
#define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+#define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
#define AMDGPU_GEM_DOMAIN_CPU 0x1
#define AMDGPU_GEM_DOMAIN_GTT 0x2
@@ -79,22 +81,26 @@ extern "C" {
#define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2)
/* Flag that the memory should be in VRAM and cleared */
#define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3)
+/* Flag that create shadow bo(GTT) while allocating vram bo */
+#define AMDGPU_GEM_CREATE_SHADOW (1 << 4)
+/* Flag that allocating the BO should use linear VRAM */
+#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5)
struct drm_amdgpu_gem_create_in {
/** the requested memory size */
- uint64_t bo_size;
+ __u64 bo_size;
/** physical start_addr alignment in bytes for some HW requirements */
- uint64_t alignment;
+ __u64 alignment;
/** the requested memory domains */
- uint64_t domains;
+ __u64 domains;
/** allocation flags */
- uint64_t domain_flags;
+ __u64 domain_flags;
};
struct drm_amdgpu_gem_create_out {
/** returned GEM object handle */
- uint32_t handle;
- uint32_t _pad;
+ __u32 handle;
+ __u32 _pad;
};
union drm_amdgpu_gem_create {
@@ -111,28 +117,28 @@ union drm_amdgpu_gem_create {
struct drm_amdgpu_bo_list_in {
/** Type of operation */
- uint32_t operation;
+ __u32 operation;
/** Handle of list or 0 if we want to create one */
- uint32_t list_handle;
+ __u32 list_handle;
/** Number of BOs in list */
- uint32_t bo_number;
+ __u32 bo_number;
/** Size of each element describing BO */
- uint32_t bo_info_size;
+ __u32 bo_info_size;
/** Pointer to array describing BOs */
- uint64_t bo_info_ptr;
+ __u64 bo_info_ptr;
};
struct drm_amdgpu_bo_list_entry {
/** Handle of BO */
- uint32_t bo_handle;
+ __u32 bo_handle;
/** New (if specified) BO priority to be used during migration */
- uint32_t bo_priority;
+ __u32 bo_priority;
};
struct drm_amdgpu_bo_list_out {
/** Handle of resource list */
- uint32_t list_handle;
- uint32_t _pad;
+ __u32 list_handle;
+ __u32 _pad;
};
union drm_amdgpu_bo_list {
@@ -156,26 +162,26 @@ union drm_amdgpu_bo_list {
struct drm_amdgpu_ctx_in {
/** AMDGPU_CTX_OP_* */
- uint32_t op;
+ __u32 op;
/** For future use, no flags defined so far */
- uint32_t flags;
- uint32_t ctx_id;
- uint32_t _pad;
+ __u32 flags;
+ __u32 ctx_id;
+ __u32 _pad;
};
union drm_amdgpu_ctx_out {
struct {
- uint32_t ctx_id;
- uint32_t _pad;
+ __u32 ctx_id;
+ __u32 _pad;
} alloc;
struct {
/** For future use, no flags defined so far */
- uint64_t flags;
+ __u64 flags;
/** Number of resets caused by this context so far. */
- uint32_t hangs;
+ __u32 hangs;
/** Reset status since the last call of the ioctl. */
- uint32_t reset_status;
+ __u32 reset_status;
} state;
};
@@ -195,12 +201,12 @@ union drm_amdgpu_ctx {
#define AMDGPU_GEM_USERPTR_REGISTER (1 << 3)
struct drm_amdgpu_gem_userptr {
- uint64_t addr;
- uint64_t size;
+ __u64 addr;
+ __u64 size;
/* AMDGPU_GEM_USERPTR_* */
- uint32_t flags;
+ __u32 flags;
/* Resulting GEM handle */
- uint32_t handle;
+ __u32 handle;
};
/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
@@ -232,28 +238,28 @@ struct drm_amdgpu_gem_userptr {
/** The same structure is shared for input/output */
struct drm_amdgpu_gem_metadata {
/** GEM Object handle */
- uint32_t handle;
+ __u32 handle;
/** Do we want get or set metadata */
- uint32_t op;
+ __u32 op;
struct {
/** For future use, no flags defined so far */
- uint64_t flags;
+ __u64 flags;
/** family specific tiling info */
- uint64_t tiling_info;
- uint32_t data_size_bytes;
- uint32_t data[64];
+ __u64 tiling_info;
+ __u32 data_size_bytes;
+ __u32 data[64];
} data;
};
struct drm_amdgpu_gem_mmap_in {
/** the GEM object handle */
- uint32_t handle;
- uint32_t _pad;
+ __u32 handle;
+ __u32 _pad;
};
struct drm_amdgpu_gem_mmap_out {
/** mmap offset from the vma offset manager */
- uint64_t addr_ptr;
+ __u64 addr_ptr;
};
union drm_amdgpu_gem_mmap {
@@ -263,18 +269,18 @@ union drm_amdgpu_gem_mmap {
struct drm_amdgpu_gem_wait_idle_in {
/** GEM object handle */
- uint32_t handle;
+ __u32 handle;
/** For future use, no flags defined so far */
- uint32_t flags;
+ __u32 flags;
/** Absolute timeout to wait */
- uint64_t timeout;
+ __u64 timeout;
};
struct drm_amdgpu_gem_wait_idle_out {
/** BO status: 0 - BO is idle, 1 - BO is busy */
- uint32_t status;
+ __u32 status;
/** Returned current memory domain */
- uint32_t domain;
+ __u32 domain;
};
union drm_amdgpu_gem_wait_idle {
@@ -284,18 +290,18 @@ union drm_amdgpu_gem_wait_idle {
struct drm_amdgpu_wait_cs_in {
/** Command submission handle */
- uint64_t handle;
+ __u64 handle;
/** Absolute timeout to wait */
- uint64_t timeout;
- uint32_t ip_type;
- uint32_t ip_instance;
- uint32_t ring;
- uint32_t ctx_id;
+ __u64 timeout;
+ __u32 ip_type;
+ __u32 ip_instance;
+ __u32 ring;
+ __u32 ctx_id;
};
struct drm_amdgpu_wait_cs_out {
/** CS status: 0 - CS completed, 1 - CS still busy */
- uint64_t status;
+ __u64 status;
};
union drm_amdgpu_wait_cs {
@@ -303,17 +309,43 @@ union drm_amdgpu_wait_cs {
struct drm_amdgpu_wait_cs_out out;
};
+struct drm_amdgpu_fence {
+ __u32 ctx_id;
+ __u32 ip_type;
+ __u32 ip_instance;
+ __u32 ring;
+ __u64 seq_no;
+};
+
+struct drm_amdgpu_wait_fences_in {
+ /** This points to uint64_t * which points to fences */
+ __u64 fences;
+ __u32 fence_count;
+ __u32 wait_all;
+ __u64 timeout_ns;
+};
+
+struct drm_amdgpu_wait_fences_out {
+ __u32 status;
+ __u32 first_signaled;
+};
+
+union drm_amdgpu_wait_fences {
+ struct drm_amdgpu_wait_fences_in in;
+ struct drm_amdgpu_wait_fences_out out;
+};
+
#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0
#define AMDGPU_GEM_OP_SET_PLACEMENT 1
/* Sets or returns a value associated with a buffer. */
struct drm_amdgpu_gem_op {
/** GEM object handle */
- uint32_t handle;
+ __u32 handle;
/** AMDGPU_GEM_OP_* */
- uint32_t op;
+ __u32 op;
/** Input or return value */
- uint64_t value;
+ __u64 value;
};
#define AMDGPU_VA_OP_MAP 1
@@ -332,18 +364,18 @@ struct drm_amdgpu_gem_op {
struct drm_amdgpu_gem_va {
/** GEM object handle */
- uint32_t handle;
- uint32_t _pad;
+ __u32 handle;
+ __u32 _pad;
/** AMDGPU_VA_OP_* */
- uint32_t operation;
+ __u32 operation;
/** AMDGPU_VM_PAGE_* */
- uint32_t flags;
+ __u32 flags;
/** va address to assign . Must be correctly aligned.*/
- uint64_t va_address;
+ __u64 va_address;
/** Specify offset inside of BO to assign. Must be correctly aligned.*/
- uint64_t offset_in_bo;
+ __u64 offset_in_bo;
/** Specify mapping size. Must be correctly aligned. */
- uint64_t map_size;
+ __u64 map_size;
};
#define AMDGPU_HW_IP_GFX 0
@@ -360,24 +392,24 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
struct drm_amdgpu_cs_chunk {
- uint32_t chunk_id;
- uint32_t length_dw;
- uint64_t chunk_data;
+ __u32 chunk_id;
+ __u32 length_dw;
+ __u64 chunk_data;
};
struct drm_amdgpu_cs_in {
/** Rendering context id */
- uint32_t ctx_id;
+ __u32 ctx_id;
/** Handle of resource list associated with CS */
- uint32_t bo_list_handle;
- uint32_t num_chunks;
- uint32_t _pad;
- /** this points to uint64_t * which point to cs chunks */
- uint64_t chunks;
+ __u32 bo_list_handle;
+ __u32 num_chunks;
+ __u32 _pad;
+ /** this points to __u64 * which point to cs chunks */
+ __u64 chunks;
};
struct drm_amdgpu_cs_out {
- uint64_t handle;
Reply to: