[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'upstream-experimental'



 VERSION                                          |    2 
 src/amd/vulkan/radv_device.c                     |  135 ++++++++++++--------
 src/amd/vulkan/radv_pipeline_cache.c             |  154 ++++++++++++-----------
 src/amd/vulkan/radv_private.h                    |    3 
 src/amd/vulkan/radv_wsi.c                        |   16 ++
 src/compiler/nir/nir_opt_intrinsics.c            |   24 +++
 src/gallium/drivers/etnaviv/etnaviv_clear_blit.c |    4 
 src/gallium/drivers/etnaviv/etnaviv_emit.c       |    4 
 src/gallium/drivers/etnaviv/etnaviv_rs.c         |    1 
 src/gallium/drivers/etnaviv/etnaviv_rs.h         |    2 
 src/gallium/drivers/i915/i915_state_derived.c    |   17 ++
 src/gallium/drivers/i915/i915_state_dynamic.c    |    3 
 src/gallium/drivers/i915/i915_state_immediate.c  |    6 
 src/gallium/drivers/i915/i915_state_static.c     |    2 
 src/gallium/drivers/r600/sb/sb_sched.cpp         |   43 ++++--
 src/gallium/drivers/r600/sb/sb_sched.h           |    8 -
 src/gallium/drivers/radeonsi/si_shader_nir.c     |    3 
 src/gallium/targets/dri/Android.mk               |    5 
 src/intel/compiler/brw_fs.cpp                    |   33 ++++
 src/mesa/drivers/dri/i965/brw_draw.c             |   47 ++-----
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c    |   29 ++++
 src/util/disk_cache.c                            |    2 
 22 files changed, 353 insertions(+), 190 deletions(-)

New commits:
commit 19b62847e0c3465c81efa949fea41b32a0c8c0dc
Author: Emil Velikov <emil.velikov@collabora.com>
Date:   Tue Nov 7 11:51:45 2017 +0000

    pdate version to 17.3.0-rc3
    
    Signed-off-by: Emil Velikov <emil.velikov@collabora.com>

diff --git a/VERSION b/VERSION
index 00ea172..0c573cb 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-17.3.0-rc2
+17.3.0-rc3

commit d5cc7e47a8ad6c3ebaeecb733190d19966d49109
Author: Plamena Manolova <plamena.manolova@intel.com>
Date:   Mon Oct 30 21:14:24 2017 +0000

    i965: Fix ARB_indirect_parameters logic.
    
    This patch modifies the ARB_indirect_parameters logic in
    brw_draw_prims, so that our implementation isn't affected if
    another application attempts to use predicates. Previously we
    were using a predicate with a DELTAS_EQUAL comparison operation
    and relying on the MI_PREDICATE_DATA register being 0. Our code
    to initialize MI_PREDICATE_DATA to 0 was incorrect, so we were
    accidentally using whatever value was written there. Because the
    kernel does not initialize the MI_PREDICATE_DATA register on
    hardware context creation, we might inherit the value from whatever
    context was last running on the GPU (likely another process).
    The Haswell command parser also does not currently allow us to write
    the MI_PREDICATE_DATA register. Rather than fixing this and requiring
    an updated kernel, we switch to a different approach which uses a
    SRCS_EQUAL predicate that makes no assumptions about the states of any
    of the predicate registers.
    
    Fixes Piglit's spec/arb_indirect_parameters/tf-count-arrays test.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103085
    Signed-off-by: Plamena Manolova <plamena.manolova@intel.com>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit 048d4c45c94eb8d99f2a53f3bf200b2c6a9f9629)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 1e5c499..ecd1d67 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -875,7 +875,6 @@ brw_draw_prims(struct gl_context *ctx,
    struct brw_context *brw = brw_context(ctx);
    const struct gl_vertex_array **arrays = ctx->Array._DrawArrays;
    int predicate_state = brw->predicate.state;
-   int combine_op = MI_PREDICATE_COMBINEOP_SET;
    struct brw_transform_feedback_object *xfb_obj =
       (struct brw_transform_feedback_object *) gl_xfb_obj;
 
@@ -919,49 +918,35 @@ brw_draw_prims(struct gl_context *ctx,
     * to it.
     */
 
-    if (brw->draw.draw_params_count_bo &&
-        predicate_state == BRW_PREDICATE_STATE_USE_BIT) {
-      /* We need to empty the MI_PREDICATE_DATA register since it might
-       * already be set.
-       */
-
-      BEGIN_BATCH(4);
-      OUT_BATCH(MI_PREDICATE_DATA);
-      OUT_BATCH(0u);
-      OUT_BATCH(MI_PREDICATE_DATA + 4);
-      OUT_BATCH(0u);
-      ADVANCE_BATCH();
-
-      /* We need to combine the results of both predicates.*/
-      combine_op = MI_PREDICATE_COMBINEOP_AND;
-   }
-
    for (i = 0; i < nr_prims; i++) {
       /* Implementation of ARB_indirect_parameters via predicates */
       if (brw->draw.draw_params_count_bo) {
-         struct brw_bo *draw_id_bo = NULL;
-         uint32_t draw_id_offset;
-
-         intel_upload_data(brw, &prims[i].draw_id, 4, 4, &draw_id_bo,
-                           &draw_id_offset);
-
          brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
 
+         /* Upload the current draw count from the draw parameters buffer to
+          * MI_PREDICATE_SRC0.
+          */
          brw_load_register_mem(brw, MI_PREDICATE_SRC0,
                                brw->draw.draw_params_count_bo,
                                brw->draw.draw_params_count_offset);
-         brw_load_register_mem(brw, MI_PREDICATE_SRC1, draw_id_bo,
-                               draw_id_offset);
+         /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
+         brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0);
+         /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
+         brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id);
 
          BEGIN_BATCH(1);
-         OUT_BATCH(GEN7_MI_PREDICATE |
-                   MI_PREDICATE_LOADOP_LOADINV | combine_op |
-                   MI_PREDICATE_COMPAREOP_DELTAS_EQUAL);
+         if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) {
+            OUT_BATCH(GEN7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+                      MI_PREDICATE_COMBINEOP_SET |
+                      MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+         } else {
+            OUT_BATCH(GEN7_MI_PREDICATE |
+                      MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR |
+                      MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+         }
          ADVANCE_BATCH();
 
          brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
-
-         brw_bo_unreference(draw_id_bo);
       }
 
       brw_draw_single_prim(ctx, arrays, &prims[i], i, xfb_obj, stream,

commit 9b44ef94b4ed3960d9220a156b21b261e4cc320c
Author: Dave Airlie <airlied@redhat.com>
Date:   Fri May 26 11:24:59 2017 +1000

    i915g: make gears run again.
    
    We need to validate some structs exist before we dirty the states, and
    avoid the problem in some other places.
    
    Fixes: e027935a7 ("st/mesa: don't update unrelated states in non-draw calls such as Clear")
    (cherry picked from commit cc69f2385ee5405cd1bef746d3e9006fc5430545)

diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index dbfbc84..7809010 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -216,6 +216,23 @@ void i915_update_derived(struct i915_context *i915)
    if (I915_DBG_ON(DBG_ATOMS))
       i915_dump_dirty(i915, __FUNCTION__);
 
+   if (!i915->fs) {
+      i915->dirty &= ~(I915_NEW_FS_CONSTANTS | I915_NEW_FS);
+      i915->hardware_dirty &= ~(I915_HW_PROGRAM | I915_HW_CONSTANTS);
+   }
+
+   if (!i915->vs)
+      i915->dirty &= ~I915_NEW_VS;
+
+   if (!i915->blend)
+      i915->dirty &= ~I915_NEW_BLEND;
+
+   if (!i915->rasterizer)
+      i915->dirty &= ~I915_NEW_RASTERIZER;
+
+   if (!i915->depth_stencil)
+      i915->dirty &= ~I915_NEW_DEPTH_STENCIL;
+   
    for (i = 0; atoms[i]; i++)
       if (atoms[i]->dirty & i915->dirty)
          atoms[i]->update(i915);
diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c
index 85b2721..434b09d 100644
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -213,7 +213,8 @@ static void upload_STIPPLE(struct i915_context *i915)
 
    /* I915_NEW_RASTERIZER
     */
-   st[1] |= i915->rasterizer->st;
+   if (i915->rasterizer)
+      st[1] |= i915->rasterizer->st;
 
    /* I915_NEW_STIPPLE
     */
diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c
index b6007ac..14566a4 100644
--- a/src/gallium/drivers/i915/i915_state_immediate.c
+++ b/src/gallium/drivers/i915/i915_state_immediate.c
@@ -168,11 +168,13 @@ static void upload_S6(struct i915_context *i915)
 
    /* I915_NEW_BLEND
     */
-   LIS6 |= i915->blend->LIS6;
+   if (i915->blend)
+      LIS6 |= i915->blend->LIS6;
 
    /* I915_NEW_DEPTH
     */
-   LIS6 |= i915->depth_stencil->depth_LIS6;
+   if (i915->depth_stencil)
+      LIS6 |= i915->depth_stencil->depth_LIS6;
 
    set_immediate(i915, I915_IMMEDIATE_S6, LIS6);
 }
diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c
index 9a7ea22..88b418b 100644
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -216,7 +216,7 @@ static void update_dst_buf_vars(struct i915_context *i915)
       zformat = translate_depth_format(depth_surface->format);
 
       if (is->is_i945 && tex->tiling != I915_TILE_NONE
-            && !i915->fs->info.writes_z)
+          && (i915->fs && !i915->fs->info.writes_z))
          early_z = CLASSIC_EARLY_DEPTH;
    } else
       zformat = 0;

commit a12ca3b231a6454d8adf5da916af363c321b5f1a
Author: Jordan Justen <jordan.l.justen@intel.com>
Date:   Fri Oct 13 22:04:52 2017 -0700

    disk_cache: Fix issue reading GLSL metadata
    
    This would cause the read of the metadata content to fail, which would
    prevent the linking from being skipped.
    
    Seen on Rocket League with i965 shader cache.
    
    Fixes: b86ecea3446e "util/disk_cache: write cache item metadata to disk"
    Cc: Timothy Arceri <tarceri@itsqueeze.com>
    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
    Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
    (cherry picked from commit e5b141634cff3aa1f68699f39a2c3794261a32b1)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index e38cacb..fde6e2e 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -1110,7 +1110,7 @@ disk_cache_get(struct disk_cache *cache, const cache_key key, size_t *size)
        * TODO: pass the metadata back to the caller and do some basic
        * validation.
        */
-      cache_item_md_size += sizeof(cache_key);
+      cache_item_md_size += num_keys * sizeof(cache_key);
       ret = lseek(fd, num_keys * sizeof(cache_key), SEEK_CUR);
       if (ret == -1)
          goto fail;

commit 9710fbbcdfbc0d99424a333d692cae14a88a7863
Author: Timothy Arceri <tarceri@itsqueeze.com>
Date:   Tue Oct 31 14:19:18 2017 +1100

    radeonsi: fix culldist_writemask in nir path
    
    The shared si_create_shader_selector() code already offsets the mask.
    
    Fixes the following piglit tests:
    
    arb_cull_distance/clip-cull-3.shader_test
    arb_cull_distance/clip-cull-4.shader_test
    
    Fixes: 29d7bdd179bb (radeonsi: scan NIR shaders to obtain required info)
    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
    (cherry picked from commit e80bbd6f52341cbf9363f3c0c8b7ad3be851b1e6)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index e186661..7a88227 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -302,8 +302,7 @@ void si_nir_scan_shader(const struct nir_shader *nir,
 	info->num_written_clipdistance = nir->info.clip_distance_array_size;
 	info->num_written_culldistance = nir->info.cull_distance_array_size;
 	info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
-	info->culldist_writemask = u_bit_consecutive(info->num_written_clipdistance,
-						     info->num_written_culldistance);
+	info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);
 
 	if (info->processor == PIPE_SHADER_FRAGMENT)
 		info->uses_kill = nir->info.fs.uses_discard;

commit b4bf9f6a41881b3a8cb63bd23d15ac0c08df4982
Author: Timothy Arceri <tarceri@itsqueeze.com>
Date:   Thu Oct 26 09:35:48 2017 +1100

    radv: add cache items to in memory cache when reading from disk
    
    Otherwise we will leak them, load duplicates from disk rather
    than memory and never write items loaded from disk to the apps
    pipeline cache.
    
    Fixes: fd24be134ffd 'radv: make use of on-disk cache'
    Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
    (cherry picked from commit 1e84e53712aed4892fbaf98e6f26ffdf76f06165)
    
    Squashed with commit:
    
    radv: use correct alloc function when loading from disk
    
    Fixes regression in:
    
    dEQP-VK.api.object_management.alloc_callback_fail.graphics_pipeline
    
    Fixes: 1e84e53712ae "radv: add cache items to in memory cache when reading from disk"
    Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
    (cherry picked from commit e92405c55aa885bee5dfb05fac032cab5e419290)

diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
index 9ba9a3b..2bf6379 100644
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -170,6 +170,75 @@ radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
 	return entry;
 }
 
+static void
+radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
+			      struct cache_entry *entry)
+{
+	const uint32_t mask = cache->table_size - 1;
+	const uint32_t start = entry->sha1_dw[0];
+
+	/* We'll always be able to insert when we get here. */
+	assert(cache->kernel_count < cache->table_size / 2);
+
+	for (uint32_t i = 0; i < cache->table_size; i++) {
+		const uint32_t index = (start + i) & mask;
+		if (!cache->hash_table[index]) {
+			cache->hash_table[index] = entry;
+			break;
+		}
+	}
+
+	cache->total_size += entry_size(entry);
+	cache->kernel_count++;
+}
+
+
+static VkResult
+radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
+{
+	const uint32_t table_size = cache->table_size * 2;
+	const uint32_t old_table_size = cache->table_size;
+	const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
+	struct cache_entry **table;
+	struct cache_entry **old_table = cache->hash_table;
+
+	table = malloc(byte_size);
+	if (table == NULL)
+		return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+	cache->hash_table = table;
+	cache->table_size = table_size;
+	cache->kernel_count = 0;
+	cache->total_size = 0;
+
+	memset(cache->hash_table, 0, byte_size);
+	for (uint32_t i = 0; i < old_table_size; i++) {
+		struct cache_entry *entry = old_table[i];
+		if (!entry)
+			continue;
+
+		radv_pipeline_cache_set_entry(cache, entry);
+	}
+
+	free(old_table);
+
+	return VK_SUCCESS;
+}
+
+static void
+radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
+			      struct cache_entry *entry)
+{
+	if (cache->kernel_count == cache->table_size / 2)
+		radv_pipeline_cache_grow(cache);
+
+	/* Failing to grow that hash table isn't fatal, but may mean we don't
+	 * have enough space to add this new kernel. Only add it if there's room.
+	 */
+	if (cache->kernel_count < cache->table_size / 2)
+		radv_pipeline_cache_set_entry(cache, entry);
+}
+
 bool
 radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
 					        struct radv_pipeline_cache *cache,
@@ -201,6 +270,21 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
 		if (!entry) {
 			pthread_mutex_unlock(&cache->mutex);
 			return false;
+		} else {
+			size_t size = entry_size(entry);
+			struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
+								 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+			if (!new_entry) {
+				free(entry);
+				pthread_mutex_unlock(&cache->mutex);
+				return false;
+			}
+
+			memcpy(new_entry, entry, entry_size(entry));
+			free(entry);
+			entry = new_entry;
+
+			radv_pipeline_cache_add_entry(cache, new_entry);
 		}
 	}
 
@@ -246,76 +330,6 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
 	return true;
 }
 
-
-static void
-radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
-			      struct cache_entry *entry)
-{
-	const uint32_t mask = cache->table_size - 1;
-	const uint32_t start = entry->sha1_dw[0];
-
-	/* We'll always be able to insert when we get here. */
-	assert(cache->kernel_count < cache->table_size / 2);
-
-	for (uint32_t i = 0; i < cache->table_size; i++) {
-		const uint32_t index = (start + i) & mask;
-		if (!cache->hash_table[index]) {
-			cache->hash_table[index] = entry;
-			break;
-		}
-	}
-
-	cache->total_size += entry_size(entry);
-	cache->kernel_count++;
-}
-
-
-static VkResult
-radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
-{
-	const uint32_t table_size = cache->table_size * 2;
-	const uint32_t old_table_size = cache->table_size;
-	const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
-	struct cache_entry **table;
-	struct cache_entry **old_table = cache->hash_table;
-
-	table = malloc(byte_size);
-	if (table == NULL)
-		return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-	cache->hash_table = table;
-	cache->table_size = table_size;
-	cache->kernel_count = 0;
-	cache->total_size = 0;
-
-	memset(cache->hash_table, 0, byte_size);
-	for (uint32_t i = 0; i < old_table_size; i++) {
-		struct cache_entry *entry = old_table[i];
-		if (!entry)
-			continue;
-
-		radv_pipeline_cache_set_entry(cache, entry);
-	}
-
-	free(old_table);
-
-	return VK_SUCCESS;
-}
-
-static void
-radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
-			      struct cache_entry *entry)
-{
-	if (cache->kernel_count == cache->table_size / 2)
-		radv_pipeline_cache_grow(cache);
-
-	/* Failing to grow that hash table isn't fatal, but may mean we don't
-	 * have enough space to add this new kernel. Only add it if there's room.
-	 */
-	if (cache->kernel_count < cache->table_size / 2)
-		radv_pipeline_cache_set_entry(cache, entry);
-}
-
 void
 radv_pipeline_cache_insert_shaders(struct radv_device *device,
 				   struct radv_pipeline_cache *cache,

commit 2516c3217ded38a9d025d90502cf5b029593c66c
Author: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date:   Wed Nov 1 09:26:48 2017 +0100

    radv: Don't expose heaps with 0 memory.
    
    It confuses CTS. This pregenerates the heap info into the
    physical device, so we can use it for translating contiguous
    indices into our "standard" ones.
    
    This also makes the WSI a bit smarter in case the first preferred
    heap does not exist.
    
    Reviewed-by: Dave Airlie <airlied@redhat.com>
    CC: <mesa-stable@lists.freedesktop.org>
    (cherry picked from commit 806721429afa090380bf39a4958fe4e21c63816c)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 19ff8fe..abdbdeb 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -104,6 +104,75 @@ get_chip_name(enum radeon_family family)
 	}
 }
 
+static void
+radv_physical_device_init_mem_types(struct radv_physical_device *device)
+{
+	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
+	uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
+	                                  device->rad_info.vram_vis_size);
+
+	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
+	device->memory_properties.memoryHeapCount = 0;
+	if (device->rad_info.vram_size - visible_vram_size > 0) {
+		vram_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
+			.size = device->rad_info.vram_size - visible_vram_size,
+			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+		};
+	}
+	if (visible_vram_size) {
+		visible_vram_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
+			.size = visible_vram_size,
+			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+		};
+	}
+	if (device->rad_info.gart_size > 0) {
+		gart_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+			.size = device->rad_info.gart_size,
+			.flags = 0,
+		};
+	}
+
+	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
+	unsigned type_count = 0;
+	if (vram_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+			.heapIndex = vram_index,
+		};
+	}
+	if (gart_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = gart_index,
+		};
+	}
+	if (visible_vram_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = visible_vram_index,
+		};
+	}
+	if (gart_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+			.heapIndex = gart_index,
+		};
+	}
+	device->memory_properties.memoryTypeCount = type_count;
+}
+
 static VkResult
 radv_physical_device_init(struct radv_physical_device *device,
 			  struct radv_instance *instance,
@@ -190,6 +259,7 @@ radv_physical_device_init(struct radv_physical_device *device,
 	 */
 	device->has_clear_state = device->rad_info.chip_class >= CIK;
 
+	radv_physical_device_init_mem_types(device);
 	return VK_SUCCESS;
 
 fail:
@@ -780,49 +850,7 @@ void radv_GetPhysicalDeviceMemoryProperties(
 {
 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
 
-	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
-
-	pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-		.heapIndex = RADV_MEM_HEAP_VRAM,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-		.heapIndex = RADV_MEM_HEAP_GTT,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-		VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-		.heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-		VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-		.heapIndex = RADV_MEM_HEAP_GTT,
-	};
-
-	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
-	uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
-	                                  physical_device->rad_info.vram_vis_size);
-
-	pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.vram_size -
-				visible_vram_size,
-		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-	};
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
-		.size = visible_vram_size,
-		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-	};
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.gart_size,
-		.flags = 0,
-	};
+	*pMemoryProperties = physical_device->memory_properties;
 }
 
 void radv_GetPhysicalDeviceMemoryProperties2KHR(
@@ -2060,6 +2088,7 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 	VkResult result;
 	enum radeon_bo_domain domain;
 	uint32_t flags = 0;
+	enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
 
 	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
 
@@ -2102,18 +2131,18 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 	}
 
 	uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
-	    pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
+	if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
+	    mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
 		domain = RADEON_DOMAIN_GTT;
 	else
 		domain = RADEON_DOMAIN_VRAM;
 
-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
+	if (mem_type_index == RADV_MEM_TYPE_VRAM)
 		flags |= RADEON_FLAG_NO_CPU_ACCESS;
 	else
 		flags |= RADEON_FLAG_CPU_ACCESS;
 
-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
+	if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
 		flags |= RADEON_FLAG_GTT_WC;
 
 	if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
@@ -2126,7 +2155,7 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 		result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
 		goto fail;
 	}
-	mem->type_index = pAllocateInfo->memoryTypeIndex;
+	mem->type_index = mem_type_index;
 out_success:
 	*pMem = radv_device_memory_to_handle(mem);
 
@@ -2219,13 +2248,14 @@ VkResult radv_InvalidateMappedMemoryRanges(
 }
 
 void radv_GetBufferMemoryRequirements(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkBuffer                                    _buffer,
 	VkMemoryRequirements*                       pMemoryRequirements)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
 
-	pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
 
 	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
 		pMemoryRequirements->alignment = 4096;
@@ -2259,13 +2289,14 @@ void radv_GetBufferMemoryRequirements2KHR(
 }
 
 void radv_GetImageMemoryRequirements(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkImage                                     _image,
 	VkMemoryRequirements*                       pMemoryRequirements)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_image, image, _image);
 
-	pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
 
 	pMemoryRequirements->size = image->size;
 	pMemoryRequirements->alignment = image->alignment;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a4e52b2..ae254ed 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -282,6 +282,9 @@ struct radv_physical_device {
 	 * the pipeline cache defined by apps.
 	 */
 	struct disk_cache *                          disk_cache;
+
+	VkPhysicalDeviceMemoryProperties memory_properties;
+	enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
 };
 
 struct radv_instance {
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index b65ef27..e07c502 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -194,12 +194,26 @@ radv_wsi_image_create(VkDevice device_h,
 		.image = image_h
 	};
 
+	/* Find the first VRAM memory type, or GART for PRIME images. */
+	int memory_type_index = -1;
+	for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
+		bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+		if ((linear && !is_local) || (!linear && is_local)) {
+			memory_type_index = i;
+			break;
+		}
+	}
+
+	/* fallback */
+	if (memory_type_index == -1)
+		memory_type_index = 0;
+
 	result = radv_alloc_memory(device_h,
 				     &(VkMemoryAllocateInfo) {
 					     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
 					     .pNext = &ded_alloc,
 					     .allocationSize = image->size,
-					     .memoryTypeIndex = linear ? 1 : 0,
+					     .memoryTypeIndex = memory_type_index,
 				     },
 				     NULL /* XXX: pAllocator */,
 				     RADV_MEM_IMPLICIT_SYNC,

commit 383b3603482616765e0716fd42a0698772b36d0f
Author: Jason Ekstrand <jason.ekstrand@intel.com>
Date:   Wed Nov 1 07:57:21 2017 -0700

    intel/fs: Alloc pull constants off mem_ctx
    
    It doesn't actually matter since the only user of push constants, i965,
    ralloc_steals it back to NULL but it's more consistent and probably
    fixes memory leaks in some error cases.
    
    Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
    Cc: mesa-stable@lists.freedesktop.org
    (cherry picked from commit 7b4387519c382cffef9c62bbbbefcfe71cfde905)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index e546792..21ff030 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2095,7 +2095,7 @@ fs_visitor::assign_constant_locations()
    stage_prog_data->param = ralloc_array(mem_ctx, uint32_t, num_push_constants);
    if (num_pull_constants > 0) {
       stage_prog_data->nr_pull_params = num_pull_constants;
-      stage_prog_data->pull_param = ralloc_array(NULL, uint32_t,
+      stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t,
                                                  num_pull_constants);
    }
 

commit 71571aab1432faedf2cd01163fab1aad22d2931c
Author: Wladimir J. van der Laan <laanwj@gmail.com>
Date:   Wed Nov 1 11:17:53 2017 +0100

    etnaviv: don't do resolve-in-place without valid TS
    
    GC3000 resolve-in-place assumes that the TS state is configured.
    If it is not, this will result in MMU errors. This is especially
    apparent when using glGenMipmaps().
    
    Fixes: 78ade659569e ("etnaviv: Do GC3000 resolve-in-place when possible")
    Cc: mesa-stable@lists.freedesktop.org
    Signed-off-by: Wladimir J. van der Laan <laanwj@gmail.com>
    Tested-by: Chris Healy <cphealy@gmail.com>
    Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
    (cherry picked from commit 8fbd82f464f26a56167f7962174b2b69756a105a)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c b/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
index 7b3fc18..21f50b7 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
@@ -555,6 +555,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
    }
 
    /* Set up color TS to source surface before blit, if needed */
+   bool source_ts_valid = false;
    if (src->levels[blit_info->src.level].ts_size &&
        src->levels[blit_info->src.level].ts_valid) {
       struct etna_reloc reloc;
@@ -579,6 +580,8 @@ etna_try_rs_blit(struct pipe_context *pctx,
 
       etna_set_state(ctx->stream, VIVS_TS_COLOR_CLEAR_VALUE,
                      src->levels[blit_info->src.level].clear_value);
+
+      source_ts_valid = true;
    } else {
       etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG, ts_mem_config);
    }
@@ -593,6 +596,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
       .source_stride = src_lev->stride,
       .source_padded_width = src_lev->padded_width,
       .source_padded_height = src_lev->padded_height,
+      .source_ts_valid = source_ts_valid,
       .dest_format = translate_rs_format(dst_format),
       .dest_tiling = dst->layout,
       .dest = dst->bo,
diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c
index 707b1e7..5397aa3 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
@@ -171,6 +171,10 @@ etna_submit_rs_state(struct etna_context *ctx,
    struct etna_cmd_stream *stream = ctx->stream;
    struct etna_coalesce coalesce;
 
+   if (cs->RS_KICKER_INPLACE && !cs->source_ts_valid)
+      /* Inplace resolve is no-op if TS is not configured */
+      return;
+
    ctx->stats.rs_operations++;
 
    if (cs->RS_KICKER_INPLACE) {
diff --git a/src/gallium/drivers/etnaviv/etnaviv_rs.c b/src/gallium/drivers/etnaviv/etnaviv_rs.c
index c9072c2..60c2c39 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_rs.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_rs.c
@@ -133,6 +133,7 @@ etna_compile_rs_state(struct etna_context *ctx, struct compiled_rs_state *cs,
       /* Total number of tiles (same as for autodisable) */
       cs->RS_KICKER_INPLACE = rs->source_padded_width * rs->source_padded_height / 16;
    }
+   cs->source_ts_valid = rs->source_ts_valid;
 }
 
 void
diff --git a/src/gallium/drivers/etnaviv/etnaviv_rs.h b/src/gallium/drivers/etnaviv/etnaviv_rs.h
index 171d3fa..41a5960 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_rs.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_rs.h
@@ -33,6 +33,7 @@
 struct rs_state {
    uint8_t downsample_x : 1; /* Downsample in x direction */
    uint8_t downsample_y : 1; /* Downsample in y direction */
+   uint8_t source_ts_valid : 1;
 
    uint8_t source_format; /* RS_FORMAT_XXX */
    uint8_t source_tiling; /* ETNA_LAYOUT_XXX */
@@ -61,6 +62,7 @@ struct rs_state {
 
 /* treat this as opaque structure */
 struct compiled_rs_state {
+   uint8_t source_ts_valid : 1;
    uint32_t RS_CONFIG;
    uint32_t RS_SOURCE_STRIDE;
    uint32_t RS_DEST_STRIDE;

commit 13bfb83b31adcd457ddd9ee8a198fa99eb7ba1cf
Author: Gert Wollny <gw.fossdev@gmail.com>
Date:   Mon Oct 16 21:06:26 2017 +0200

    r600/sb: bail out if prepare_alu_group() doesn't find a proper scheduling
    
    It is possible that the optimizer ends up in an infinite loop in
    post_scheduler::schedule_alu(), because post_scheduler::prepare_alu_group()
    does not find a proper scheduling. This can be deducted from
    pending.count() being larger than zero and not getting smaller.
    
    This patch works around this problem by signalling this failure so that the
    optimizers bails out and the un-optimized shader is used.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103142
    Cc: <mesa-stable@lists.freedesktop.org>
    Signed-off-by: Gert Wollny <gw.fossdev@gmail.com>
    Signed-off-by: Dave Airlie <airlied@redhat.com>
    (cherry picked from commit 69eee511c631a8372803f175bd6f5a9551230424)

diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 5113b75..2fbec2f 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -711,22 +711,24 @@ void alu_group_tracker::update_flags(alu_node* n) {
 }
 
 int post_scheduler::run() {
-	run_on(sh.root);
-	return 0;
+	return run_on(sh.root) ? 0 : 1;
 }
 
-void post_scheduler::run_on(container_node* n) {
-
+bool post_scheduler::run_on(container_node* n) {
+	int r = true;
 	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
 		if (I->is_container()) {
 			if (I->subtype == NST_BB) {
 				bb_node* bb = static_cast<bb_node*>(*I);
-				schedule_bb(bb);
+				r = schedule_bb(bb);
 			} else {
-				run_on(static_cast<container_node*>(*I));
+				r = run_on(static_cast<container_node*>(*I));
 			}
+			if (!r)
+				break;
 		}
 	}
+	return r;
 }
 
 void post_scheduler::init_uc_val(container_node *c, value *v) {
@@ -758,7 +760,7 @@ unsigned post_scheduler::init_ucm(container_node *c, node *n) {
 	return F == ucm.end() ? 0 : F->second;
 }
 
-void post_scheduler::schedule_bb(bb_node* bb) {
+bool post_scheduler::schedule_bb(bb_node* bb) {
 	PSC_DUMP(
 		sblog << "scheduling BB " << bb->id << "\n";
 		if (!pending.empty())
@@ -791,8 +793,10 @@ void post_scheduler::schedule_bb(bb_node* bb) {
 
 		if (n->is_alu_clause()) {
 			n->remove();
-			process_alu(static_cast<container_node*>(n));
-			continue;
+			bool r = process_alu(static_cast<container_node*>(n));
+			if (r)
+				continue;
+			return false;
 		}
 
 		n->remove();
@@ -800,6 +804,7 @@ void post_scheduler::schedule_bb(bb_node* bb) {
 	}
 
 	this->cur_bb = NULL;
+	return true;
 }
 
 void post_scheduler::init_regmap() {
@@ -933,10 +938,10 @@ void post_scheduler::process_fetch(container_node *c) {
 	cur_bb->push_front(c);
 }
 
-void post_scheduler::process_alu(container_node *c) {
+bool post_scheduler::process_alu(container_node *c) {
 


Reply to: