[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

libdrm: Changes to 'upstream-experimental'



 amdgpu/amdgpu_device.c         |   24 --
 amdgpu/amdgpu_gpu_info.c       |   72 +++----
 amdgpu/amdgpu_internal.h       |    4 
 amdgpu/amdgpu_vamgr.c          |    6 
 autogen.sh                     |   10 
 configure.ac                   |    3 
 freedreno/Makefile.am          |    1 
 freedreno/freedreno_bo.c       |   12 +
 freedreno/freedreno_bo_cache.c |    4 
 freedreno/freedreno_device.c   |    3 
 freedreno/freedreno_priv.h     |   56 +++++
 freedreno/kgsl/kgsl_device.c   |    2 
 freedreno/msm/msm_device.c     |    2 
 freedreno/msm/msm_ringbuffer.c |    7 
 include/drm/README             |    2 
 include/drm/amdgpu_drm.h       |  409 +++++++++++++++++++++++++---------------
 intel/intel_bufmgr_gem.c       |   58 +----
 intel/intel_decode.c           |    3 
 tests/amdgpu/basic_tests.c     |   27 ++
 tests/amdgpu/cs_tests.c        |   41 ++--
 tests/amdgpu/vce_tests.c       |   54 ++++-
 tests/drmstat.c                |  419 -----------------------------------------
 22 files changed, 518 insertions(+), 701 deletions(-)

New commits:
commit 8d61a9a923c1ced974180609611ef615034fd484
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Wed Mar 29 20:06:22 2017 +0200

    configure.ac: bump version for release

diff --git a/configure.ac b/configure.ac
index 2e50d3e..6a60ffc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -20,7 +20,7 @@
 
 AC_PREREQ([2.63])
 AC_INIT([libdrm],
-        [2.4.75],
+        [2.4.76],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=DRI],
         [libdrm])
 

commit c7b5aaeb1f77a53a46c091b1ba999a76baa6c3fb
Author: Leo Liu <leo.liu@amd.com>
Date:   Thu Mar 23 10:43:40 2017 -0400

    amdgpu_drm: add AMDGPU_HW_IP_UVD_ENC
    
    Signed-off-by: Leo Liu <leo.liu@amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
    Signed-off-by: Marek Olšák <marek.olsak@amd.com>

diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 1e25a87..fa56499 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -388,7 +388,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_DMA          2
 #define AMDGPU_HW_IP_UVD          3
 #define AMDGPU_HW_IP_VCE          4
-#define AMDGPU_HW_IP_NUM          5
+#define AMDGPU_HW_IP_UVD_ENC      5
+#define AMDGPU_HW_IP_NUM          6
 
 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
 

commit f684bb109fcdb85faa1b212bb6efcc352d8cbcdc
Author: Christian König <christian.koenig@amd.com>
Date:   Mon Mar 27 15:44:14 2017 +0200

    amdgpu: stop reading CC_RB_BACKEND_DISABLE on Vega10
    
    Follow up to 'drm: don't access deprecated register on Vega10'.
    
    The same information is available in enabled_rb_pipes_mask and reading that
    register can cause GRBM bus problems.
    
    Signed-off-by: Christian König <christian.koenig@amd.com>
    Signed-off-by: Marek Olšák <marek.olsak@amd.com>

diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c
index cd31e1b..c5f5f6f 100644
--- a/amdgpu/amdgpu_gpu_info.c
+++ b/amdgpu/amdgpu_gpu_info.c
@@ -169,20 +169,20 @@ drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
 	dev->info.vce_harvest_config = dev->dev_info.vce_harvest_config;
 	dev->info.pci_rev_id = dev->dev_info.pci_rev;
 
-	for (i = 0; i < (int)dev->info.num_shader_engines; i++) {
-		unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) |
-				    (AMDGPU_INFO_MMR_SH_INDEX_MASK <<
-				     AMDGPU_INFO_MMR_SH_INDEX_SHIFT);
+	if (dev->info.family_id < AMDGPU_FAMILY_AI) {
+		for (i = 0; i < (int)dev->info.num_shader_engines; i++) {
+			unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) |
+					    (AMDGPU_INFO_MMR_SH_INDEX_MASK <<
+					     AMDGPU_INFO_MMR_SH_INDEX_SHIFT);
 
-		r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0,
-					     &dev->info.backend_disable[i]);
-		if (r)
-			return r;
-		/* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */
-		dev->info.backend_disable[i] =
-			(dev->info.backend_disable[i] >> 16) & 0xff;
+			r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0,
+						     &dev->info.backend_disable[i]);
+			if (r)
+				return r;
+			/* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */
+			dev->info.backend_disable[i] =
+				(dev->info.backend_disable[i] >> 16) & 0xff;
 
-		if (dev->info.family_id < AMDGPU_FAMILY_AI) {
 			r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
 						     &dev->info.pa_sc_raster_cfg[i]);
 			if (r)

commit a784c38af77714b9e878a7ff97ba18553697304c
Author: Junwei Zhang <Jerry.Zhang@amd.com>
Date:   Tue Dec 20 09:52:32 2016 +0800

    tests/amdgpu: add Polaris12 support for cs test
    
    Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
    Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c
index 0885d97..342815d 100644
--- a/tests/amdgpu/cs_tests.c
+++ b/tests/amdgpu/cs_tests.c
@@ -216,7 +216,8 @@ static void amdgpu_cs_uvd_create(void)
 		((uint8_t*)msg)[0x10] = 7;
 		/* chip beyond polaris 10/11 */
 		if ((family_id == AMDGPU_FAMILY_AI) ||
-		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+		     chip_id == chip_rev+0x64)) {
 			/* dpb size */
 			((uint8_t*)msg)[0x28] = 0x00;
 			((uint8_t*)msg)[0x29] = 0x94;
@@ -296,7 +297,8 @@ static void amdgpu_cs_uvd_decode(void)
 		ptr[0x99] = 0x02;
 		/* chip beyond polaris10/11 */
 		if ((family_id == AMDGPU_FAMILY_AI) ||
-		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+		     chip_id == chip_rev+0x64)) {
 			/* dpb size */
 			ptr[0x24] = 0x00;
 			ptr[0x25] = 0x94;
@@ -341,7 +343,8 @@ static void amdgpu_cs_uvd_decode(void)
 
 	if (family_id >= AMDGPU_FAMILY_VI) {
 		if ((family_id == AMDGPU_FAMILY_AI) ||
-		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+		     chip_id == chip_rev+0x64)) {
 			ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
 		}
 	}
@@ -358,7 +361,8 @@ static void amdgpu_cs_uvd_decode(void)
 	if (family_id >= AMDGPU_FAMILY_VI) {
 		uvd_cmd(it_addr, 0x204, &i);
 		if ((family_id == AMDGPU_FAMILY_AI) ||
-		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A))
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A ||
+		     chip_id == chip_rev+0x64))
 			uvd_cmd(ctx_addr, 0x206, &i);
 	}
 

commit f810e31bcf686a156b7b5be6298cd52247a98189
Author: Leo Liu <leo.liu@amd.com>
Date:   Tue Dec 13 15:24:05 2016 -0500

    tests/amdgpu: add vce unit test support for vega10
    
    swizzle mode needs reference and input picture luma and
    chroma pitch aligned with 256
    
    Signed-off-by: Leo Liu <leo.liu@amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c
index de63aa1..b03807b 100644
--- a/tests/amdgpu/vce_tests.c
+++ b/tests/amdgpu/vce_tests.c
@@ -234,6 +234,7 @@ static void free_resource(struct amdgpu_vce_bo *vce_bo)
 
 static void amdgpu_cs_vce_create(void)
 {
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
 	int len, r;
 
 	enc.width = vce_create[6];
@@ -250,6 +251,8 @@ static void amdgpu_cs_vce_create(void)
 	memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
 	len += sizeof(vce_taskinfo) / 4;
 	memcpy((ib_cpu + len), vce_create, sizeof(vce_create));
+	ib_cpu[len + 8] = ALIGN(enc.width, align);
+	ib_cpu[len + 9] = ALIGN(enc.width, align);
 	len += sizeof(vce_create) / 4;
 	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
 	ib_cpu[len + 2] = enc.fb[0].addr >> 32;
@@ -291,10 +294,12 @@ static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 {
 
 	uint64_t luma_offset, chroma_offset;
-	int len = 0, r;
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+	unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
+	int len = 0, i, r;
 
 	luma_offset = enc->vbuf.addr;
-	chroma_offset = luma_offset + enc->width * enc->height;
+	chroma_offset = luma_offset + luma_size;
 
 	memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
 	len += sizeof(vce_session) / 4;
@@ -309,6 +314,10 @@ static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 	ib_cpu[len + 3] = enc->cpb.addr;
 	len += sizeof(vce_context_buffer) / 4;
 	memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
+	for (i = 0; i <  8; ++i)
+		ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
+	for (i = 0; i <  8; ++i)
+		ib_cpu[len + 10 + i] = luma_size * 1.5;
 	len += sizeof(vce_aux_buffer) / 4;
 	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
 	ib_cpu[len + 2] = enc->fb[0].addr >> 32;
@@ -319,8 +328,10 @@ static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 	ib_cpu[len + 10] = luma_offset;
 	ib_cpu[len + 11] = chroma_offset >> 32;
 	ib_cpu[len + 12] = chroma_offset;
-	ib_cpu[len + 73] = 0x7800;
-	ib_cpu[len + 74] = 0x7800 + 0x5000;
+	ib_cpu[len + 14] = ALIGN(enc->width, align);
+	ib_cpu[len + 15] = ALIGN(enc->width, align);
+	ib_cpu[len + 73] = luma_size * 1.5;
+	ib_cpu[len + 74] = luma_size * 2.5;
 	len += sizeof(vce_encode) / 4;
 	enc->ib_len = len;
 	if (!enc->two_instance) {
@@ -332,11 +343,13 @@ static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
 {
 	uint64_t luma_offset, chroma_offset;
-	int len, r;
+	int len, i, r;
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+	unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
 
 	len = (enc->two_instance) ? enc->ib_len : 0;
 	luma_offset = enc->vbuf.addr;
-	chroma_offset = luma_offset + enc->width * enc->height;
+	chroma_offset = luma_offset + luma_size;
 
 	if (!enc->two_instance) {
 		memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
@@ -353,6 +366,10 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
 	ib_cpu[len + 3] = enc->cpb.addr;
 	len += sizeof(vce_context_buffer) / 4;
 	memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
+	for (i = 0; i <  8; ++i)
+		ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
+	for (i = 0; i <  8; ++i)
+		ib_cpu[len + 10 + i] = luma_size * 1.5;
 	len += sizeof(vce_aux_buffer) / 4;
 	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
 	ib_cpu[len + 2] = enc->fb[1].addr >> 32;
@@ -364,15 +381,17 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
 	ib_cpu[len + 10] = luma_offset;
 	ib_cpu[len + 11] = chroma_offset >> 32;
 	ib_cpu[len + 12] = chroma_offset;
+	ib_cpu[len + 14] = ALIGN(enc->width, align);
+	ib_cpu[len + 15] = ALIGN(enc->width, align);
 	ib_cpu[len + 18] = 0;
 	ib_cpu[len + 19] = 0;
 	ib_cpu[len + 56] = 3;
 	ib_cpu[len + 57] = 0;
 	ib_cpu[len + 58] = 0;
-	ib_cpu[len + 59] = 0x7800;
-	ib_cpu[len + 60] = 0x7800 + 0x5000;
+	ib_cpu[len + 59] = luma_size * 1.5;
+	ib_cpu[len + 60] = luma_size * 2.5;
 	ib_cpu[len + 73] = 0;
-	ib_cpu[len + 74] = 0x5000;
+	ib_cpu[len + 74] = luma_size;
 	ib_cpu[len + 81] = 1;
 	ib_cpu[len + 82] = 1;
 	len += sizeof(vce_encode) / 4;
@@ -408,9 +427,10 @@ static void check_result(struct amdgpu_vce_encode *enc)
 static void amdgpu_cs_vce_encode(void)
 {
 	uint32_t vbuf_size, bs_size = 0x154000, cpb_size;
-	int r;
+	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+	int i, r;
 
-	vbuf_size = enc.width * enc.height * 1.5;
+	vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5;
 	cpb_size = vbuf_size * 10;
 	num_resources = 0;
 	alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
@@ -429,7 +449,17 @@ static void amdgpu_cs_vce_encode(void)
 
 	r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr);
 	CU_ASSERT_EQUAL(r, 0);
-	memcpy(enc.vbuf.ptr, frame, sizeof(frame));
+
+	memset(enc.vbuf.ptr, 0, vbuf_size);
+	for (i = 0; i < enc.height; ++i) {
+		memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width);
+		enc.vbuf.ptr += ALIGN(enc.width, align);
+	}
+	for (i = 0; i < enc.height / 2; ++i) {
+		memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width);
+		enc.vbuf.ptr += ALIGN(enc.width, align);
+	}
+
 	r = amdgpu_bo_cpu_unmap(enc.vbuf.handle);
 	CU_ASSERT_EQUAL(r, 0);
 

commit 5a44f9e6c6a460a5ea0b698fb64d02b359927999
Author: Leo Liu <leo.liu@amd.com>
Date:   Mon Dec 5 11:18:09 2016 -0500

    tests/amdgpu: add uvd unit test support for vega10
    
    Signed-off-by: Leo Liu <leo.liu@amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c
index 82c55aa..0885d97 100644
--- a/tests/amdgpu/cs_tests.c
+++ b/tests/amdgpu/cs_tests.c
@@ -175,11 +175,11 @@ static int submit(unsigned ndw, unsigned ip)
 
 static void uvd_cmd(uint64_t addr, unsigned cmd, int *idx)
 {
-	ib_cpu[(*idx)++] = 0x3BC4;
+	ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC4 : 0x81C4;
 	ib_cpu[(*idx)++] = addr;
-	ib_cpu[(*idx)++] = 0x3BC5;
+	ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC5 : 0x81C5;
 	ib_cpu[(*idx)++] = addr >> 32;
-	ib_cpu[(*idx)++] = 0x3BC3;
+	ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC3 : 0x81C3;
 	ib_cpu[(*idx)++] = cmd << 1;
 }
 
@@ -211,10 +211,12 @@ static void amdgpu_cs_uvd_create(void)
 	CU_ASSERT_EQUAL(r, 0);
 
 	memcpy(msg, uvd_create_msg, sizeof(uvd_create_msg));
+
 	if (family_id >= AMDGPU_FAMILY_VI) {
 		((uint8_t*)msg)[0x10] = 7;
-		/* chip polaris 10/11 */
-		if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) {
+		/* chip beyond polaris 10/11 */
+		if ((family_id == AMDGPU_FAMILY_AI) ||
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
 			/* dpb size */
 			((uint8_t*)msg)[0x28] = 0x00;
 			((uint8_t*)msg)[0x29] = 0x94;
@@ -287,13 +289,15 @@ static void amdgpu_cs_uvd_decode(void)
 	CU_ASSERT_EQUAL(r, 0);
 
 	memcpy(ptr, uvd_decode_msg, sizeof(uvd_create_msg));
+
 	if (family_id >= AMDGPU_FAMILY_VI) {
 		ptr[0x10] = 7;
 		ptr[0x98] = 0x00;
 		ptr[0x99] = 0x02;
-		/* chip polaris10/11 */
-		if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) {
-			/*dpb size */
+		/* chip beyond polaris10/11 */
+		if ((family_id == AMDGPU_FAMILY_AI) ||
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+			/* dpb size */
 			ptr[0x24] = 0x00;
 			ptr[0x25] = 0x94;
 			ptr[0x26] = 0x6B;
@@ -335,9 +339,11 @@ static void amdgpu_cs_uvd_decode(void)
 		bs_addr = fb_addr + 4*1024;
 	dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024);
 
-	if ((family_id >= AMDGPU_FAMILY_VI) &&
-		(chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
-		ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
+	if (family_id >= AMDGPU_FAMILY_VI) {
+		if ((family_id == AMDGPU_FAMILY_AI) ||
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) {
+			ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
+		}
 	}
 
 	dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024);
@@ -348,12 +354,15 @@ static void amdgpu_cs_uvd_decode(void)
 	uvd_cmd(dt_addr, 0x2, &i);
 	uvd_cmd(fb_addr, 0x3, &i);
 	uvd_cmd(bs_addr, 0x100, &i);
+
 	if (family_id >= AMDGPU_FAMILY_VI) {
 		uvd_cmd(it_addr, 0x204, &i);
-		if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)
+		if ((family_id == AMDGPU_FAMILY_AI) ||
+		    (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A))
 			uvd_cmd(ctx_addr, 0x206, &i);
-}
-	ib_cpu[i++] = 0x3BC6;
+	}
+
+	ib_cpu[i++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC6 : 0x81C6;
 	ib_cpu[i++] = 0x1;
 	for (; i % 16; ++i)
 		ib_cpu[i] = 0x80000000;

commit fee173dc77295c8624291a4336075361d5dafd67
Author: Huang Rui <ray.huang@amd.com>
Date:   Wed Nov 9 11:28:45 2016 +0800

    tests/amdgpu: fix the count number for vega10
    
    Signed-off-by: Huang Rui <ray.huang@amd.com>
    Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
    Reviewed-by: Christian König <christian.koenig@amd.com>
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index bfda21b..4dce67e 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -803,12 +803,16 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
+	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo_mc;
 	volatile uint32_t *bo_cpu;
 	int i, j, r, loop;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle va_handle;
 
+	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+	CU_ASSERT_EQUAL(r, 0);
+
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
 
@@ -848,7 +852,10 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
 					       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 			pm4[i++] = 0xffffffff & bo_mc;
 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
-			pm4[i++] = sdma_write_length;
+			if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+				pm4[i++] = sdma_write_length - 1;
+			else
+				pm4[i++] = sdma_write_length;
 			while(j++ < sdma_write_length)
 				pm4[i++] = 0xdeadbeaf;
 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
@@ -904,12 +911,16 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
+	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo_mc;
 	volatile uint32_t *bo_cpu;
 	int i, j, r, loop;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle va_handle;
 
+	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+	CU_ASSERT_EQUAL(r, 0);
+
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
 
@@ -949,7 +960,10 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
 			pm4[i++] = 0xffffffff & bo_mc;
 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
 			pm4[i++] = 0xdeadbeaf;
-			pm4[i++] = sdma_write_length;
+			if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+				pm4[i++] = sdma_write_length - 1;
+			else
+				pm4[i++] = sdma_write_length;
 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
 			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
 			pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
@@ -1007,12 +1021,16 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 	uint32_t *pm4;
 	struct amdgpu_cs_ib_info *ib_info;
 	struct amdgpu_cs_request *ibs_request;
+	struct amdgpu_gpu_info gpu_info = {0};
 	uint64_t bo1_mc, bo2_mc;
 	volatile unsigned char *bo1_cpu, *bo2_cpu;
 	int i, j, r, loop1, loop2;
 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
 
+	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
+	CU_ASSERT_EQUAL(r, 0);
+
 	pm4 = calloc(pm4_dw, sizeof(*pm4));
 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
 
@@ -1064,7 +1082,10 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
 			i = j = 0;
 			if (ip_type == AMDGPU_HW_IP_DMA) {
 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
-				pm4[i++] = sdma_write_length;
+				if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
+					pm4[i++] = sdma_write_length - 1;
+				else
+					pm4[i++] = sdma_write_length;
 				pm4[i++] = 0;
 				pm4[i++] = 0xffffffff & bo1_mc;
 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;

commit 99908bfd4ce3132e99aabc96c1ee4946b1246fa5
Author: Huang Rui <ray.huang@amd.com>
Date:   Tue Nov 8 14:00:45 2016 +0800

    amdgpu: don't read registers not present on Vega10
    
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c
index 66c7e0e..cd31e1b 100644
--- a/amdgpu/amdgpu_gpu_info.c
+++ b/amdgpu/amdgpu_gpu_info.c
@@ -182,40 +182,44 @@ drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev)
 		dev->info.backend_disable[i] =
 			(dev->info.backend_disable[i] >> 16) & 0xff;
 
-		r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
-					     &dev->info.pa_sc_raster_cfg[i]);
-		if (r)
-			return r;
-
-		if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
-			r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0,
-					     &dev->info.pa_sc_raster_cfg1[i]);
+		if (dev->info.family_id < AMDGPU_FAMILY_AI) {
+			r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0,
+						     &dev->info.pa_sc_raster_cfg[i]);
 			if (r)
 				return r;
+
+			if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
+				r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0,
+						     &dev->info.pa_sc_raster_cfg1[i]);
+				if (r)
+					return r;
+			}
 		}
 	}
 
-	r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0,
-				     dev->info.gb_tile_mode);
+	r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0,
+					     &dev->info.gb_addr_cfg);
 	if (r)
 		return r;
 
-	if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
-		r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0,
-					     dev->info.gb_macro_tile_mode);
+	if (dev->info.family_id < AMDGPU_FAMILY_AI) {
+		r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0,
+					     dev->info.gb_tile_mode);
 		if (r)
 			return r;
-	}
 
-	r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0,
-				     &dev->info.gb_addr_cfg);
-	if (r)
-		return r;
+		if (dev->info.family_id >= AMDGPU_FAMILY_CI) {
+			r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0,
+						     dev->info.gb_macro_tile_mode);
+			if (r)
+				return r;
+		}
 
-	r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0,
-				     &dev->info.mc_arb_ramcfg);
-	if (r)
-		return r;
+		r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0,
+					     &dev->info.mc_arb_ramcfg);
+		if (r)
+			return r;
+	}
 
 	dev->info.cu_active_number = dev->dev_info.cu_active_number;
 	dev->info.cu_ao_mask = dev->dev_info.cu_ao_mask;

commit c34b28ae9bac7a20e60482a2bf72f16ad5e28c67
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Tue Mar 21 20:14:45 2017 +0100

    amdgpu: update amdgpu_drm.h for Vega10
    
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 5797283..1e25a87 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -209,6 +209,7 @@ struct drm_amdgpu_gem_userptr {
 	__u32		handle;
 };
 
+/* SI-CI-VI: */
 /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
 #define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
 #define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
@@ -227,10 +228,14 @@ struct drm_amdgpu_gem_userptr {
 #define AMDGPU_TILING_NUM_BANKS_SHIFT			21
 #define AMDGPU_TILING_NUM_BANKS_MASK			0x3
 
+/* GFX9 and later: */
+#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
+#define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
+
 #define AMDGPU_TILING_SET(field, value) \
-	(((value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
+	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
 #define AMDGPU_TILING_GET(value, field) \
-	(((value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
+	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
 
 #define AMDGPU_GEM_METADATA_OP_SET_METADATA                  1
 #define AMDGPU_GEM_METADATA_OP_GET_METADATA                  2
@@ -755,6 +760,7 @@ struct drm_amdgpu_info_vce_clock_table {
 #define AMDGPU_FAMILY_KV			125 /* Kaveri, Kabini, Mullins */
 #define AMDGPU_FAMILY_VI			130 /* Iceland, Tonga */
 #define AMDGPU_FAMILY_CZ			135 /* Carrizo, Stoney */
+#define AMDGPU_FAMILY_AI			141 /* Vega10 */
 
 #if defined(__cplusplus)
 }

commit 3dc002df3e5607a3ae0a194b35e1f2fb2cd36697
Author: Marek Olšák <marek.olsak@amd.com>
Date:   Tue Mar 21 20:31:53 2017 +0100

    amdgpu: sync amdgpu_drm.h with kernel 4.11-rc2
    
    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index d8f2497..5797283 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -50,6 +50,7 @@ extern "C" {
 #define DRM_AMDGPU_WAIT_CS		0x09
 #define DRM_AMDGPU_GEM_OP		0x10
 #define DRM_AMDGPU_GEM_USERPTR		0x11
+#define DRM_AMDGPU_WAIT_FENCES		0x12
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -63,6 +64,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
 #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+#define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
 
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -79,22 +81,26 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_CPU_GTT_USWC		(1 << 2)
 /* Flag that the memory should be in VRAM and cleared */
 #define AMDGPU_GEM_CREATE_VRAM_CLEARED		(1 << 3)
+/* Flag that create shadow bo(GTT) while allocating vram bo */
+#define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
+/* Flag that allocating the BO should use linear VRAM */
+#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-	uint64_t bo_size;
+	__u64 bo_size;
 	/** physical start_addr alignment in bytes for some HW requirements */
-	uint64_t alignment;
+	__u64 alignment;
 	/** the requested memory domains */
-	uint64_t domains;
+	__u64 domains;
 	/** allocation flags */
-	uint64_t domain_flags;
+	__u64 domain_flags;
 };
 
 struct drm_amdgpu_gem_create_out  {
 	/** returned GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };
 
 union drm_amdgpu_gem_create {
@@ -111,28 +117,28 @@ union drm_amdgpu_gem_create {
 
 struct drm_amdgpu_bo_list_in {
 	/** Type of operation */
-	uint32_t operation;
+	__u32 operation;
 	/** Handle of list or 0 if we want to create one */
-	uint32_t list_handle;
+	__u32 list_handle;
 	/** Number of BOs in list  */
-	uint32_t bo_number;
+	__u32 bo_number;
 	/** Size of each element describing BO */
-	uint32_t bo_info_size;
+	__u32 bo_info_size;
 	/** Pointer to array describing BOs */
-	uint64_t bo_info_ptr;
+	__u64 bo_info_ptr;
 };
 
 struct drm_amdgpu_bo_list_entry {
 	/** Handle of BO */
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	/** New (if specified) BO priority to be used during migration */
-	uint32_t bo_priority;
+	__u32 bo_priority;
 };
 
 struct drm_amdgpu_bo_list_out {
 	/** Handle of resource list  */
-	uint32_t list_handle;
-	uint32_t _pad;
+	__u32 list_handle;
+	__u32 _pad;
 };
 
 union drm_amdgpu_bo_list {
@@ -156,26 +162,26 @@ union drm_amdgpu_bo_list {
 
 struct drm_amdgpu_ctx_in {
 	/** AMDGPU_CTX_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** For future use, no flags defined so far */
-	uint32_t	flags;
-	uint32_t	ctx_id;
-	uint32_t	_pad;
+	__u32	flags;
+	__u32	ctx_id;
+	__u32	_pad;
 };
 
 union drm_amdgpu_ctx_out {
 		struct {
-			uint32_t	ctx_id;
-			uint32_t	_pad;
+			__u32	ctx_id;
+			__u32	_pad;
 		} alloc;
 
 		struct {
 			/** For future use, no flags defined so far */
-			uint64_t	flags;
+			__u64	flags;
 			/** Number of resets caused by this context so far. */
-			uint32_t	hangs;
+			__u32	hangs;
 			/** Reset status since the last call of the ioctl. */
-			uint32_t	reset_status;
+			__u32	reset_status;
 		} state;
 };
 
@@ -195,12 +201,12 @@ union drm_amdgpu_ctx {
 #define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_amdgpu_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
+	__u64		addr;
+	__u64		size;
 	/* AMDGPU_GEM_USERPTR_* */
-	uint32_t		flags;
+	__u32		flags;
 	/* Resulting GEM handle */
-	uint32_t		handle;
+	__u32		handle;
 };
 
 /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
@@ -232,28 +238,28 @@ struct drm_amdgpu_gem_userptr {
 /** The same structure is shared for input/output */
 struct drm_amdgpu_gem_metadata {
 	/** GEM Object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** Do we want get or set metadata */
-	uint32_t	op;
+	__u32	op;
 	struct {
 		/** For future use, no flags defined so far */
-		uint64_t	flags;
+		__u64	flags;
 		/** family specific tiling info */
-		uint64_t	tiling_info;
-		uint32_t	data_size_bytes;
-		uint32_t	data[64];
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
 	} data;
 };
 
 struct drm_amdgpu_gem_mmap_in {
 	/** the GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };
 
 struct drm_amdgpu_gem_mmap_out {
 	/** mmap offset from the vma offset manager */
-	uint64_t addr_ptr;
+	__u64 addr_ptr;
 };
 
 union drm_amdgpu_gem_mmap {
@@ -263,18 +269,18 @@ union drm_amdgpu_gem_mmap {
 
 struct drm_amdgpu_gem_wait_idle_in {
 	/** GEM object handle */
-	uint32_t handle;
+	__u32 handle;
 	/** For future use, no flags defined so far */
-	uint32_t flags;
+	__u32 flags;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
+	__u64 timeout;
 };
 
 struct drm_amdgpu_gem_wait_idle_out {
 	/** BO status:  0 - BO is idle, 1 - BO is busy */
-	uint32_t status;
+	__u32 status;
 	/** Returned current memory domain */
-	uint32_t domain;
+	__u32 domain;
 };
 
 union drm_amdgpu_gem_wait_idle {
@@ -284,18 +290,18 @@ union drm_amdgpu_gem_wait_idle {
 
 struct drm_amdgpu_wait_cs_in {
 	/** Command submission handle */
-	uint64_t handle;
+	__u64 handle;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
 };
 
 struct drm_amdgpu_wait_cs_out {
 	/** CS status:  0 - CS completed, 1 - CS still busy */
-	uint64_t status;
+	__u64 status;
 };
 
 union drm_amdgpu_wait_cs {
@@ -303,17 +309,43 @@ union drm_amdgpu_wait_cs {
 	struct drm_amdgpu_wait_cs_out out;
 };
 
+struct drm_amdgpu_fence {
+	__u32 ctx_id;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u64 seq_no;
+};
+
+struct drm_amdgpu_wait_fences_in {
+	/** This points to uint64_t * which points to fences */
+	__u64 fences;
+	__u32 fence_count;
+	__u32 wait_all;
+	__u64 timeout_ns;
+};
+
+struct drm_amdgpu_wait_fences_out {
+	__u32 status;
+	__u32 first_signaled;
+};
+
+union drm_amdgpu_wait_fences {
+	struct drm_amdgpu_wait_fences_in in;
+	struct drm_amdgpu_wait_fences_out out;
+};
+
 #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
 #define AMDGPU_GEM_OP_SET_PLACEMENT		1
 
 /* Sets or returns a value associated with a buffer. */
 struct drm_amdgpu_gem_op {
 	/** GEM object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** AMDGPU_GEM_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** Input or return value */
-	uint64_t	value;
+	__u64	value;
 };
 
 #define AMDGPU_VA_OP_MAP			1
@@ -332,18 +364,18 @@ struct drm_amdgpu_gem_op {
 
 struct drm_amdgpu_gem_va {
 	/** GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 	/** AMDGPU_VA_OP_* */
-	uint32_t operation;
+	__u32 operation;
 	/** AMDGPU_VM_PAGE_* */
-	uint32_t flags;
+	__u32 flags;
 	/** va address to assign . Must be correctly aligned.*/
-	uint64_t va_address;
+	__u64 va_address;
 	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
-	uint64_t offset_in_bo;
+	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
-	uint64_t map_size;
+	__u64 map_size;
 };
 
 #define AMDGPU_HW_IP_GFX          0
@@ -360,24 +392,24 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
 
 struct drm_amdgpu_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };
 
 struct drm_amdgpu_cs_in {
 	/** Rendering context id */
-	uint32_t		ctx_id;
+	__u32		ctx_id;
 	/**  Handle of resource list associated with CS */
-	uint32_t		bo_list_handle;
-	uint32_t		num_chunks;
-	uint32_t		_pad;
-	/** this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		_pad;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 };
 
 struct drm_amdgpu_cs_out {
-	uint64_t handle;


Reply to: