mesa: Changes to 'ubuntu+1'
debian/changelog | 2
debian/patches/revert-a64c1eb9b110.diff | 392 ++++++++++++++++++++++++++++++++
debian/patches/series | 37 +--
3 files changed, 412 insertions(+), 19 deletions(-)
New commits:
commit acaaa5da70d6ee44e781149165fd32c36b5d800c
Author: Timo Aaltonen <tjaalton@ubuntu.com>
Date: Wed Apr 10 00:55:49 2013 +0300
revert a64c1eb9b110 instead of using a ton of patches that still doesn't fix blur on ILK
diff --git a/debian/changelog b/debian/changelog
index 7c8652a..ff6a1d0 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,7 +3,7 @@ mesa (9.1.1-0ubuntu1) UNRELEASED; urgency=low
[ Timo Aaltonen ]
* Merge from unreleased debian git
- new upstream release (LP: #1112147)
- * Added a bunch of patches to fix slow blur on intel.
+ * Revert a commit to fix slow blur on intel.
* vbo-fix-crash.diff: Patch from the stable tree that fixes a crasher
with shared display lists.
diff --git a/debian/patches/revert-a64c1eb9b110.diff b/debian/patches/revert-a64c1eb9b110.diff
new file mode 100644
index 0000000..950157f
--- /dev/null
+++ b/debian/patches/revert-a64c1eb9b110.diff
@@ -0,0 +1,392 @@
+--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
+@@ -219,45 +219,6 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0,
+ return inst;
+ }
+
+-exec_list
+-fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+- fs_reg offset)
+-{
+- exec_list instructions;
+- fs_inst *inst;
+-
+- if (intel->gen >= 7) {
+- inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+- dst, surf_index, offset);
+- instructions.push_tail(inst);
+- } else {
+- int base_mrf = 13;
+- bool header_present = true;
+-
+- fs_reg mrf = fs_reg(MRF, base_mrf + header_present);
+- mrf.type = BRW_REGISTER_TYPE_D;
+-
+- /* On gen6+ we want the dword offset passed in, but on gen4/5 we need a
+- * dword-aligned byte offset.
+- */
+- if (intel->gen == 6) {
+- instructions.push_tail(MOV(mrf, offset));
+- } else {
+- instructions.push_tail(MUL(mrf, offset, fs_reg(4)));
+- }
+- inst = MOV(mrf, offset);
+- inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
+- dst, surf_index);
+- inst->header_present = header_present;
+- inst->base_mrf = base_mrf;
+- inst->mlen = header_present + dispatch_width / 8;
+-
+- instructions.push_tail(inst);
+- }
+-
+- return instructions;
+-}
+-
+ /**
+ * A helper for MOV generation for fixing up broken hardware SEND dependency
+ * handling.
+@@ -443,7 +404,6 @@ fs_reg::equals(const fs_reg &r) const
+ type == r.type &&
+ negate == r.negate &&
+ abs == r.abs &&
+- !reladdr && !r.reladdr &&
+ memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
+ sizeof(fixed_hw_reg)) == 0 &&
+ smear == r.smear &&
+@@ -1561,81 +1521,6 @@ fs_visitor::remove_dead_constants()
+ return true;
+ }
+
+-/*
+- * Implements array access of uniforms by inserting a
+- * PULL_CONSTANT_LOAD instruction.
+- *
+- * Unlike temporary GRF array access (where we don't support it due to
+- * the difficulty of doing relative addressing on instruction
+- * destinations), we could potentially do array access of uniforms
+- * that were loaded in GRF space as push constants. In real-world
+- * usage we've seen, though, the arrays being used are always larger
+- * than we could load as push constants, so just always move all
+- * uniform array access out to a pull constant buffer.
+- */
+-void
+-fs_visitor::move_uniform_array_access_to_pull_constants()
+-{
+- int pull_constant_loc[c->prog_data.nr_params];
+-
+- for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+- pull_constant_loc[i] = -1;
+- }
+-
+- /* Walk through and find array access of uniforms. Put a copy of that
+- * uniform in the pull constant buffer.
+- *
+- * Note that we don't move constant-indexed accesses to arrays. No
+- * testing has been done of the performance impact of this choice.
+- */
+- foreach_list_safe(node, &this->instructions) {
+- fs_inst *inst = (fs_inst *)node;
+-
+- for (int i = 0 ; i < 3; i++) {
+- if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
+- continue;
+-
+- int uniform = inst->src[i].reg;
+-
+- /* If this array isn't already present in the pull constant buffer,
+- * add it.
+- */
+- if (pull_constant_loc[uniform] == -1) {
+- const float **values = &c->prog_data.param[uniform];
+-
+- pull_constant_loc[uniform] = c->prog_data.nr_pull_params;
+-
+- assert(param_size[uniform]);
+-
+- for (int j = 0; j < param_size[uniform]; j++) {
+- c->prog_data.pull_param[c->prog_data.nr_pull_params++] =
+- values[j];
+- }
+- }
+-
+- /* Set up the annotation tracking for new generated instructions. */
+- base_ir = inst->ir;
+- current_annotation = inst->annotation;
+-
+- fs_reg offset = fs_reg(this, glsl_type::int_type);
+- inst->insert_before(ADD(offset, *inst->src[i].reladdr,
+- fs_reg(pull_constant_loc[uniform] +
+- inst->src[i].reg_offset)));
+-
+- fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER);
+- fs_reg temp = fs_reg(this, glsl_type::float_type);
+- exec_list list = VARYING_PULL_CONSTANT_LOAD(temp,
+- surf_index, offset);
+- inst->insert_before(&list);
+-
+- inst->src[i].file = temp.file;
+- inst->src[i].reg = temp.reg;
+- inst->src[i].reg_offset = temp.reg_offset;
+- inst->src[i].reladdr = NULL;
+- }
+- }
+-}
+-
+ /**
+ * Choose accesses from the UNIFORM file to demote to using the pull
+ * constant buffer.
+@@ -1662,31 +1547,8 @@ fs_visitor::setup_pull_constants()
+ /* Just demote the end of the list. We could probably do better
+ * here, demoting things that are rarely used in the program first.
+ */
+- unsigned int pull_uniform_base = max_uniform_components;
+-
+- int pull_constant_loc[c->prog_data.nr_params];
+- for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+- if (i < pull_uniform_base) {
+- pull_constant_loc[i] = -1;
+- } else {
+- pull_constant_loc[i] = -1;
+- /* If our constant is already being uploaded for reladdr purposes,
+- * reuse it.
+- */
+- for (unsigned int j = 0; j < c->prog_data.nr_pull_params; j++) {
+- if (c->prog_data.pull_param[j] == c->prog_data.param[i]) {
+- pull_constant_loc[i] = j;
+- break;
+- }
+- }
+- if (pull_constant_loc[i] == -1) {
+- int pull_index = c->prog_data.nr_pull_params++;
+- c->prog_data.pull_param[pull_index] = c->prog_data.param[i];
+- pull_constant_loc[i] = pull_index;;
+- }
+- }
+- }
+- c->prog_data.nr_params = pull_uniform_base;
++ int pull_uniform_base = max_uniform_components;
++ int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
+
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+@@ -1695,16 +1557,14 @@ fs_visitor::setup_pull_constants()
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+- int pull_index = pull_constant_loc[inst->src[i].reg +
+- inst->src[i].reg_offset];
+- if (pull_index == -1)
++ int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
++ if (uniform_nr < pull_uniform_base)
+ continue;
+
+- assert(!inst->src[i].reladdr);
+-
+ fs_reg dst = fs_reg(this, glsl_type::float_type);
+ fs_reg index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER);
+- fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
++ fs_reg offset = fs_reg((unsigned)(((uniform_nr -
++ pull_uniform_base) * 4) & ~15));
+ fs_inst *pull =
+ new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ dst, index, offset);
+@@ -1716,9 +1576,15 @@ fs_visitor::setup_pull_constants()
+ inst->src[i].file = GRF;
+ inst->src[i].reg = dst.reg;
+ inst->src[i].reg_offset = 0;
+- inst->src[i].smear = pull_index & 3;
++ inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3;
+ }
+ }
++
++ for (int i = 0; i < pull_uniform_count; i++) {
++ c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i];
++ }
++ c->prog_data.nr_params -= pull_uniform_count;
++ c->prog_data.nr_pull_params = pull_uniform_count;
+ }
+
+ bool
+@@ -2633,7 +2499,6 @@ fs_visitor::get_instruction_generating_r
+ end->predicate ||
+ end->force_uncompressed ||
+ end->force_sechalf ||
+- reg.reladdr ||
+ !reg.equals(end->dst)) {
+ return NULL;
+ } else {
+@@ -2754,7 +2619,6 @@ fs_visitor::run()
+
+ split_virtual_grfs();
+
+- move_uniform_array_access_to_pull_constants();
+ setup_pull_constants();
+
+ bool progress;
+--- a/src/mesa/drivers/dri/i965/brw_fs.h
++++ b/src/mesa/drivers/dri/i965/brw_fs.h
+@@ -121,8 +121,6 @@ public:
+ uint32_t u;
+ float f;
+ } imm;
+-
+- fs_reg *reladdr;
+ };
+
+ static const fs_reg reg_undef;
+@@ -256,7 +254,6 @@ public:
+
+ fs_inst *emit(fs_inst inst);
+ fs_inst *emit(fs_inst *inst);
+- void emit(exec_list list);
+
+ fs_inst *emit(enum opcode opcode);
+ fs_inst *emit(enum opcode opcode, fs_reg dst);
+@@ -292,8 +289,6 @@ public:
+ fs_inst *end,
+ fs_reg reg);
+
+- exec_list VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+- fs_reg offset);
+
+ bool run();
+ void setup_payload_gen4();
+@@ -311,7 +306,6 @@ public:
+ void spill_reg(int spill_reg);
+ void split_virtual_grfs();
+ void compact_virtual_grfs();
+- void move_uniform_array_access_to_pull_constants();
+ void setup_pull_constants();
+ void calculate_live_intervals();
+ bool opt_algebraic();
+@@ -424,8 +418,6 @@ public:
+ struct brw_wm_compile *c;
+ unsigned int sanity_param_count;
+
+- int param_size[MAX_UNIFORMS * 4];
+-
+ int *virtual_grf_sizes;
+ int virtual_grf_count;
+ int virtual_grf_array_size;
+--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+@@ -117,7 +117,6 @@ fs_visitor::visit(ir_variable *ir)
+ return;
+ }
+
+- param_size[param_index] = type_size(ir->type);
+ if (!strncmp(ir->name, "gl_", 3)) {
+ setup_builtin_uniform_values(ir);
+ } else {
+@@ -161,41 +160,21 @@ fs_visitor::visit(ir_dereference_record
+ void
+ fs_visitor::visit(ir_dereference_array *ir)
+ {
+- ir_constant *constant_index;
+- fs_reg src;
+- int element_size = type_size(ir->type);
+-
+- constant_index = ir->array_index->as_constant();
++ ir_constant *index;
++ int element_size;
+
+ ir->array->accept(this);
+- src = this->result;
+- src.type = brw_type_for_base_type(ir->type);
+-
+- if (constant_index) {
+- assert(src.file == UNIFORM || src.file == GRF);
+- src.reg_offset += constant_index->value.i[0] * element_size;
+- } else {
+- /* Variable index array dereference. We attach the variable index
+- * component to the reg as a pointer to a register containing the
+- * offset. Currently only uniform arrays are supported in this patch,
+- * and that reladdr pointer is resolved by
+- * move_uniform_array_access_to_pull_constants(). All other array types
+- * are lowered by lower_variable_index_to_cond_assign().
+- */
+- ir->array_index->accept(this);
+-
+- fs_reg index_reg;
+- index_reg = fs_reg(this, glsl_type::int_type);
+- emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size));
++ index = ir->array_index->as_constant();
+
+- if (src.reladdr) {
+- emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg);
+- }
++ element_size = type_size(ir->type);
++ this->result.type = brw_type_for_base_type(ir->type);
+
+- src.reladdr = ralloc(mem_ctx, fs_reg);
+- memcpy(src.reladdr, &index_reg, sizeof(index_reg));
++ if (index) {
++ assert(this->result.file == UNIFORM || this->result.file == GRF);
++ this->result.reg_offset += index->value.i[0] * element_size;
++ } else {
++ assert(!"FINISHME: non-constant array element");
+ }
+- this->result = src;
+ }
+
+ void
+@@ -620,21 +599,6 @@ fs_visitor::visit(ir_expression *ir)
+ */
+ assert(packed_consts.smear < 8);
+ }
+- } else {
+- /* Turn the byte offset into a dword offset. */
+- fs_reg base_offset = fs_reg(this, glsl_type::int_type);
+- emit(SHR(base_offset, op[1], fs_reg(2)));
+-
+- for (int i = 0; i < ir->type->vector_elements; i++) {
+- fs_reg offset = fs_reg(this, glsl_type::int_type);
+- emit(ADD(offset, base_offset, fs_reg(i)));
+- emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, offset));
+-
+- if (ir->type->base_type == GLSL_TYPE_BOOL)
+- emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
+-
+- result.reg_offset++;
+- }
+ }
+
+ result.reg_offset = 0;
+@@ -1884,16 +1848,6 @@ fs_visitor::emit(fs_inst *inst)
+ return inst;
+ }
+
+-void
+-fs_visitor::emit(exec_list list)
+-{
+- foreach_list_safe(node, &list) {
+- fs_inst *inst = (fs_inst *)node;
+- inst->remove();
+- emit(inst);
+- }
+-}
+-
+ /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
+ void
+ fs_visitor::emit_dummy_fs()
+@@ -2322,8 +2276,6 @@ fs_visitor::fs_visitor(struct brw_contex
+
+ this->force_uncompressed_stack = 0;
+ this->force_sechalf_stack = 0;
+-
+- memset(&this->param_size, 0, sizeof(this->param_size));
+ }
+
+ fs_visitor::~fs_visitor()
+--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
++++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
+@@ -174,7 +174,7 @@ brw_link_shader(struct gl_context *ctx,
+ bool input = true;
+ bool output = stage == MESA_SHADER_FRAGMENT;
+ bool temp = stage == MESA_SHADER_FRAGMENT;
+- bool uniform = false;
++ bool uniform = stage == MESA_SHADER_FRAGMENT;
+
+ bool lowered_variable_indexing =
+ lower_variable_index_to_cond_assign(shader->ir,
diff --git a/debian/patches/series b/debian/patches/series
index bd5bfc0..a4906c6 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -17,24 +17,24 @@
119-libllvmradeon-link.patch
# fix slow blur
-i965-enable-cse-on-uniform-pull-constant-loads.diff
-i965-add-a-bit-more-instruction-dumping.diff
-i965-fix-broken-rendering-in-large-shaders.diff
-i965-switch-to-using-sampler-ld-messages.diff
-i965-also-do-the-gen4-send-dependency.diff
-i965-specialize-surface_state-creation.diff
-0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch
-0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch
-0003-i965-Make-the-constant-surface-interface-take-a-norm.patch
-0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch
-0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch
-0006-i965-fs-Improve-performance-of-varying-index-uniform.patch
-0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch
-0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch
-0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch
-0010-i965-fs-Don-t-double-emit-SEND-dependency-workaround.patch
-0011-i965-fs-Use-LD-messages-for-pre-gen7-varying-index-u.patch
-0012-i965-fs-Allow-CSE-on-pre-gen7-varying-index-uniform-.patch
+#i965-enable-cse-on-uniform-pull-constant-loads.diff
+#i965-add-a-bit-more-instruction-dumping.diff
+#i965-fix-broken-rendering-in-large-shaders.diff
+#i965-switch-to-using-sampler-ld-messages.diff
+#i965-also-do-the-gen4-send-dependency.diff
+#i965-specialize-surface_state-creation.diff
+#0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch
+#0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch
+#0003-i965-Make-the-constant-surface-interface-take-a-norm.patch
+#0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch
+#0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch
+#0006-i965-fs-Improve-performance-of-varying-index-uniform.patch
+#0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch
+#0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch
+#0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch
+#0010-i965-fs-Don-t-double-emit-SEND-dependency-workaround.patch
+#0011-i965-fs-Use-LD-messages-for-pre-gen7-varying-index-u.patch
+#0012-i965-fs-Allow-CSE-on-pre-gen7-varying-index-uniform-.patch
0001-nv50-fix-3D-render-target-setup.patch
0002-nv50-nvc0-disable-DEPTH_RANGE_NEAR-FAR-clipping-duri.patch
@@ -42,3 +42,4 @@ i965-specialize-surface_state-creation.diff
0004-nvc0-fix-for-2d-engine-R-source-formats-writing-RRR1.patch
vbo-fix-crash.diff
+revert-a64c1eb9b110.diff
Reply to: