mesa: Changes to 'ubuntu+1'
debian/changelog | 1
debian/patches/0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch | 29
debian/patches/0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch | 90 +++
debian/patches/0003-i965-Make-the-constant-surface-interface-take-a-norm.patch | 141 ++++
debian/patches/0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch | 162 +++++
debian/patches/0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch | 34 +
debian/patches/0006-i965-fs-Improve-performance-of-varying-index-uniform.patch | 127 ++++
debian/patches/0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch | 84 ++
debian/patches/0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch | 36 +
debian/patches/0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch | 293 ++++++++++
debian/patches/0010-i965-fs-Don-t-double-emit-SEND-dependency-workaround.patch | 37 +
debian/patches/0011-i965-fs-Use-LD-messages-for-pre-gen7-varying-index-u.patch | 258 ++++++++
debian/patches/0012-i965-fs-Allow-CSE-on-pre-gen7-varying-index-uniform-.patch | 37 +
debian/patches/i965-add-a-bit-more-instruction-dumping.diff | 57 +
debian/patches/i965-also-do-the-gen4-send-dependency.diff | 58 +
debian/patches/i965-enable-cse-on-uniform-pull-constant-loads.diff | 35 +
debian/patches/i965-fix-broken-rendering-in-large-shaders.diff | 27
debian/patches/i965-specialize-surface_state-creation.diff | 127 ++++
debian/patches/i965-switch-to-using-sampler-ld-messages.diff | 209 +++++++
debian/patches/series | 20
20 files changed, 1862 insertions(+)
New commits:
commit a6f3122dcf667b178a73c0db6eabc14089f4e334
Author: Timo Aaltonen <tjaalton@ubuntu.com>
Date: Mon Apr 1 09:06:24 2013 +0300
Added a bunch of patches to fix slow blur on intel.
diff --git a/debian/changelog b/debian/changelog
index 6e7686f..26a916b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,7 @@ mesa (9.1.1-0ubuntu1) UNRELEASED; urgency=low
[ Timo Aaltonen ]
* Merge from unreleased debian git
- new upstream release (LP: #1112147)
+ * Added a bunch of patches to fix slow blur on intel.
-- Timo Aaltonen <tjaalton@ubuntu.com> Wed, 27 Feb 2013 11:12:43 +0200
diff --git a/debian/patches/0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch b/debian/patches/0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch
new file mode 100644
index 0000000..3368dff
--- /dev/null
+++ b/debian/patches/0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch
@@ -0,0 +1,29 @@
+From e3bb00afd8528eab84e57ce3004aa65358d8c34f Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Wed, 13 Mar 2013 12:17:25 -0700
+Subject: [PATCH 01/12] i965/fs: Remove creation of a MOV instruction that's
+ never used.
+
+We weren't inserting it into the list, so it did nothing. This line was
+replaced by the MOV/MUL block above.
+
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_fs.cpp | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
+index 5a5bfeb..2fb8989 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
+@@ -253,7 +253,6 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+ } else {
+ instructions.push_tail(MUL(mrf, offset, fs_reg(4)));
+ }
+- inst = MOV(mrf, offset);
+ inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
+ dst, surf_index);
+ inst->header_present = header_present;
+--
+1.8.1.2
+
diff --git a/debian/patches/0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch b/debian/patches/0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch
new file mode 100644
index 0000000..e706bb1
--- /dev/null
+++ b/debian/patches/0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch
@@ -0,0 +1,90 @@
+From 88a5caea52f56aab5641fddfd23732cb3ecfaf13 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Wed, 13 Mar 2013 12:27:17 -0700
+Subject: [PATCH 02/12] i965/fs: Move varying uniform offset compuation into
+ the helper func.
+
+I'm going to want to change the math for gen7 using sampler LD
+instructions in a way that gets CSE to occur like we'd hope.
+
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_fs.cpp | 16 +++++++++-------
+ src/mesa/drivers/dri/i965/brw_fs.h | 3 ++-
+ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 ++---
+ 3 files changed, 13 insertions(+), 11 deletions(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
+index 2fb8989..89b08e8 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
+@@ -229,11 +229,15 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition)
+
+ exec_list
+ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+- fs_reg offset)
++ fs_reg varying_offset,
++ uint32_t const_offset)
+ {
+ exec_list instructions;
+ fs_inst *inst;
+
++ fs_reg offset = fs_reg(this, glsl_type::uint_type);
++ instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset)));
++
+ if (intel->gen >= 7) {
+ inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+ dst, surf_index, offset);
+@@ -1625,15 +1629,13 @@ fs_visitor::move_uniform_array_access_to_pull_constants()
+ base_ir = inst->ir;
+ current_annotation = inst->annotation;
+
+- fs_reg offset = fs_reg(this, glsl_type::int_type);
+- inst->insert_before(ADD(offset, *inst->src[i].reladdr,
+- fs_reg(pull_constant_loc[uniform] +
+- inst->src[i].reg_offset)));
+-
+ fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER);
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ exec_list list = VARYING_PULL_CONSTANT_LOAD(temp,
+- surf_index, offset);
++ surf_index,
++ *inst->src[i].reladdr,
++ pull_constant_loc[uniform] +
++ inst->src[i].reg_offset);
+ inst->insert_before(&list);
+
+ inst->src[i].file = temp.file;
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
+index 254a534..76130b1 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.h
++++ b/src/mesa/drivers/dri/i965/brw_fs.h
+@@ -294,7 +294,8 @@ public:
+ fs_reg reg);
+
+ exec_list VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+- fs_reg offset);
++ fs_reg varying_offset,
++ uint32_t const_offset);
+
+ bool run();
+ void setup_payload_gen4();
+diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+index 735a33d..6b6af8d 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+@@ -650,9 +650,8 @@ fs_visitor::visit(ir_expression *ir)
+ emit(SHR(base_offset, op[1], fs_reg(2)));
+
+ for (int i = 0; i < ir->type->vector_elements; i++) {
+- fs_reg offset = fs_reg(this, glsl_type::int_type);
+- emit(ADD(offset, base_offset, fs_reg(i)));
+- emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, offset));
++ emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
++ base_offset, i));
+
+ if (ir->type->base_type == GLSL_TYPE_BOOL)
+ emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
+--
+1.8.1.2
+
diff --git a/debian/patches/0003-i965-Make-the-constant-surface-interface-take-a-norm.patch b/debian/patches/0003-i965-Make-the-constant-surface-interface-take-a-norm.patch
new file mode 100644
index 0000000..17d834e
--- /dev/null
+++ b/debian/patches/0003-i965-Make-the-constant-surface-interface-take-a-norm.patch
@@ -0,0 +1,141 @@
+From 406b0516036273010399ac7a520a765de66df610 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Wed, 20 Mar 2013 10:46:20 -0700
+Subject: [PATCH 03/12] i965: Make the constant surface interface take a normal
+ byte size.
+
+This puts the rounding-up logic into the function itself instead of all
+the callers having to manage it. Also drop an "unused" comment in gen4,
+as the stride *is* used for texbos (and will be for uniforms soon).
+
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 7 +++----
+ src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 16 +++++++---------
+ src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 8 +++++---
+ src/mesa/drivers/dri/intel/intel_context.h | 2 +-
+ 4 files changed, 16 insertions(+), 17 deletions(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+index 2aefc0c..6c0b690 100644
+--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
++++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+@@ -68,9 +68,9 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ drm_intel_bo_unreference(brw->vs.const_bo);
++ uint32_t size = brw->vs.prog_data->nr_pull_params * 4;
+ brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+- brw->vs.prog_data->nr_pull_params * 4,
+- 64);
++ size, 64);
+
+ drm_intel_gem_bo_map_gtt(brw->vs.const_bo);
+ for (i = 0; i < brw->vs.prog_data->nr_pull_params; i++) {
+@@ -90,8 +90,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
+ drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);
+
+ const int surf = SURF_INDEX_VERT_CONST_BUFFER;
+- intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0,
+- ALIGN(brw->vs.prog_data->nr_pull_params, 4) / 4,
++ intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, size,
+ &brw->vs.surf_offset[surf]);
+
+ brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
+diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+index 932e472..98eed15 100644
+--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
++++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+@@ -916,11 +916,13 @@ void
+ brw_create_constant_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+- int width,
++ uint32_t size,
+ uint32_t *out_offset)
+ {
+ struct intel_context *intel = &brw->intel;
+- const GLint w = width - 1;
++ uint32_t stride = 16;
++ uint32_t elements = ALIGN(size, stride) / stride;
++ const GLint w = elements - 1;
+ uint32_t *surf;
+
+ surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+@@ -939,7 +941,7 @@ brw_create_constant_surface(struct brw_context *brw,
+ ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
+
+ surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
+- (16 - 1) << BRW_SURFACE_PITCH_SHIFT); /* ignored */
++ (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
+
+ surf[4] = 0;
+ surf[5] = 0;
+@@ -1086,8 +1088,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
+ }
+ drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
+
+- intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0,
+- ALIGN(brw->wm.prog_data->nr_pull_params, 4) / 4,
++ intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
+ &brw->wm.surf_offset[surf_index]);
+
+ brw->state.dirty.brw |= BRW_NEW_SURFACES;
+@@ -1439,11 +1440,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
+ * glBindBufferRange case is undefined, we can just bind the whole buffer
+ * glBindBufferBase wants and be a correct implementation.
+ */
+- int size = bo->size - binding->Offset;
+- size = ALIGN(size, 16) / 16; /* The interface takes a number of vec4s */
+-
+ intel->vtbl.create_constant_surface(brw, bo, binding->Offset,
+- size,
++ bo->size - binding->Offset,
+ &surf_offsets[i]);
+ }
+
+diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+index db04253..484afcd 100644
+--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
++++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+@@ -383,11 +383,13 @@ static void
+ gen7_create_constant_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+- int width,
++ uint32_t size,
+ uint32_t *out_offset)
+ {
+ struct intel_context *intel = &brw->intel;
+- const GLint w = width - 1;
++ uint32_t stride = 16;
++ uint32_t elements = ALIGN(size, stride) / stride;
++ const GLint w = elements - 1;
+
+ uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+ 8 * 4, 32, out_offset);
+@@ -403,7 +405,7 @@ gen7_create_constant_surface(struct brw_context *brw,
+ surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) |
+ SET_FIELD((w >> 7) & 0x1fff, GEN7_SURFACE_HEIGHT);
+ surf[3] = SET_FIELD((w >> 20) & 0x7f, BRW_SURFACE_DEPTH) |
+- (16 - 1); /* stride between samples */
++ (stride - 1);
+
+ if (intel->is_haswell) {
+ surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
+diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
+index 2df15d4..bb21f55 100644
+--- a/src/mesa/drivers/dri/intel/intel_context.h
++++ b/src/mesa/drivers/dri/intel/intel_context.h
+@@ -202,7 +202,7 @@ struct intel_context
+ void (*create_constant_surface)(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+- int width,
++ uint32_t size,
+ uint32_t *out_offset);
+ /** \} */
+ } vtbl;
+--
+1.8.1.2
+
diff --git a/debian/patches/0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch b/debian/patches/0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch
new file mode 100644
index 0000000..9bbcd0c
--- /dev/null
+++ b/debian/patches/0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch
@@ -0,0 +1,162 @@
+From 4921232ba622d327f238731874c36a288e605515 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Thu, 14 Mar 2013 14:41:37 -0700
+Subject: [PATCH 04/12] i965: Make the fragment shader pull constants index by
+ dwords, not vec4s.
+
+We want to load vec4s, since loading a vec4 instead of a dword is
+basically no increased latency. But for variable indexed access, the
+previous requirement of aligned vec4s for a sampler LD was hard to
+implement.
+
+Note that this change only affects those messages that use the surface
+format, like sampler LDs, but not to the untyped data cache loads we've
+used in other cases.
+
+No significant performance difference on my GLSL demo with uniforms forced
+to take the varying pull constants path (n=4).
+
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_fs.cpp | 5 ++++-
+ src/mesa/drivers/dri/i965/brw_state.h | 5 -----
+ src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 2 +-
+ src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 ++++++++-----
+ src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 5 +++--
+ src/mesa/drivers/dri/intel/intel_context.h | 5 +++--
+ 6 files changed, 19 insertions(+), 16 deletions(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
+index 89b08e8..fbe9e3a 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
+@@ -2483,10 +2483,13 @@ fs_visitor::lower_uniform_pull_constant_loads()
+ continue;
+
+ if (intel->gen >= 7) {
++ /* The offset arg before was a vec4-aligned byte offset. We need to
++ * turn it into a dword offset.
++ */
+ fs_reg const_offset_reg = inst->src[1];
+ assert(const_offset_reg.file == IMM &&
+ const_offset_reg.type == BRW_REGISTER_TYPE_UD);
+- const_offset_reg.imm.u /= 16;
++ const_offset_reg.imm.u /= 4;
+ fs_reg payload = fs_reg(this, glsl_type::uint_type);
+
+ /* This is actually going to be a MOV, but since only the first dword
+diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
+index 02ce57b..29ec276 100644
+--- a/src/mesa/drivers/dri/i965/brw_state.h
++++ b/src/mesa/drivers/dri/i965/brw_state.h
+@@ -187,11 +187,6 @@ void *brw_state_batch(struct brw_context *brw,
+ void gen4_init_vtable_surface_functions(struct brw_context *brw);
+ uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
+ uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
+-void brw_create_constant_surface(struct brw_context *brw,
+- drm_intel_bo *bo,
+- uint32_t offset,
+- int width,
+- uint32_t *out_offset);
+
+ uint32_t brw_format_for_mesa_format(gl_format mesa_format);
+
+diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+index 6c0b690..675a84c 100644
+--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
++++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+@@ -91,7 +91,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
+
+ const int surf = SURF_INDEX_VERT_CONST_BUFFER;
+ intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, size,
+- &brw->vs.surf_offset[surf]);
++ &brw->vs.surf_offset[surf], false);
+
+ brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
+ }
+diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+index 98eed15..506ddf0 100644
+--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
++++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+@@ -912,15 +912,16 @@ brw_update_texture_surface(struct gl_context *ctx,
+ * Create the constant buffer surface. Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+-void
++static void
+ brw_create_constant_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+- uint32_t *out_offset)
++ uint32_t *out_offset,
++ bool dword_pitch)
+ {
+ struct intel_context *intel = &brw->intel;
+- uint32_t stride = 16;
++ uint32_t stride = dword_pitch ? 4 : 16;
+ uint32_t elements = ALIGN(size, stride) / stride;
+ const GLint w = elements - 1;
+ uint32_t *surf;
+@@ -1089,7 +1090,8 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
+ drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
+
+ intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
+- &brw->wm.surf_offset[surf_index]);
++ &brw->wm.surf_offset[surf_index],
++ true);
+
+ brw->state.dirty.brw |= BRW_NEW_SURFACES;
+ }
+@@ -1442,7 +1444,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
+ */
+ intel->vtbl.create_constant_surface(brw, bo, binding->Offset,
+ bo->size - binding->Offset,
+- &surf_offsets[i]);
++ &surf_offsets[i],
++ shader->Type == GL_FRAGMENT_SHADER);
+ }
+
+ if (shader->NumUniformBlocks)
+diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+index 484afcd..2c12be3 100644
+--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
++++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+@@ -384,10 +384,11 @@ gen7_create_constant_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+- uint32_t *out_offset)
++ uint32_t *out_offset,
++ bool dword_pitch)
+ {
+ struct intel_context *intel = &brw->intel;
+- uint32_t stride = 16;
++ uint32_t stride = dword_pitch ? 4 : 16;
+ uint32_t elements = ALIGN(size, stride) / stride;
+ const GLint w = elements - 1;
+
+diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
+index bb21f55..7bec10f 100644
+--- a/src/mesa/drivers/dri/intel/intel_context.h
++++ b/src/mesa/drivers/dri/intel/intel_context.h
+@@ -203,13 +203,14 @@ struct intel_context
+ drm_intel_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+- uint32_t *out_offset);
++ uint32_t *out_offset,
++ bool dword_pitch);
+ /** \} */
+ } vtbl;
+
+ GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */
+ GLuint NewGLState;
+-
++
+ dri_bufmgr *bufmgr;
+ unsigned int maxBatchSize;
+
+--
+1.8.1.2
+
diff --git a/debian/patches/0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch b/debian/patches/0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch
new file mode 100644
index 0000000..5e1a22d
--- /dev/null
+++ b/debian/patches/0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch
@@ -0,0 +1,34 @@
+From 0eb070fa7d4a7d4494c70407fc953adc9429edeb Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Fri, 15 Mar 2013 14:31:46 -0700
+Subject: [PATCH 05/12] i965/fs: Avoid inappropriate optimization with
+ regs_written > 1.
+
+Right now we don't have anything with regs_written() > 1 and !inst->mlen,
+but that's about to change.
+
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
+index fbe9e3a..f1b0789 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
+@@ -2087,6 +2087,12 @@ fs_visitor::compute_to_mrf()
+ break;
+ }
+
++ /* Things returning more than one register would need us to
++ * understand coalescing out more than one MOV at a time.
++ */
++ if (scan_inst->regs_written() > 1)
++ break;
++
+ /* SEND instructions can't have MRF as a destination. */
+ if (scan_inst->mlen)
+ break;
+--
+1.8.1.2
+
diff --git a/debian/patches/0006-i965-fs-Improve-performance-of-varying-index-uniform.patch b/debian/patches/0006-i965-fs-Improve-performance-of-varying-index-uniform.patch
new file mode 100644
index 0000000..131c379
--- /dev/null
+++ b/debian/patches/0006-i965-fs-Improve-performance-of-varying-index-uniform.patch
@@ -0,0 +1,127 @@
+From 2f4d09235849e206e2807146bb8c8e724ab6ff26 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Wed, 13 Mar 2013 14:48:55 -0700
+Subject: [PATCH 06/12] i965/fs: Improve performance of varying-index uniform
+ loads on IVB.
+
+Like we have done for the VS and for constant-index uniform loads, we use
+the sampler engine to get caching in front of the L3 to avoid tickling the
+IVB L3 bug. This is also a bit of a functional change, as we're now
+loading a vec4 instead of a single dword, though we're not taking
+advantage of the other 3 components of the vec4 (yet).
+
+With the driver hacked to always take the varying-index path for all
+uniforms, improves performance of my old GLSL demo by 315% +/- 2% (n=4).
+This a major fix for some blur shaders in compositors from the
+varying-index uniforms support I introduced in 9.1.
+
+v2: Move old offset computation into the pre-gen7 path.
+
+Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_fs.cpp | 29 ++++++++++++++++++++++++-----
+ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 27 ++++++++++++++-------------
+ 2 files changed, 38 insertions(+), 18 deletions(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
+index f1b0789..f4aa9f7 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
+@@ -235,14 +235,33 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+ exec_list instructions;
+ fs_inst *inst;
+
+- fs_reg offset = fs_reg(this, glsl_type::uint_type);
+- instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset)));
+-
+ if (intel->gen >= 7) {
++ /* We have our constant surface use a pitch of 4 bytes, so our index can
++ * be any component of a vector, and then we load 4 contiguous
++ * components starting from that.
++ *
++ * We break down the const_offset to a portion added to the variable
++ * offset and a portion done using reg_offset, which means that if you
++ * have GLSL using something like "uniform vec4 a[20]; gl_FragColor =
++ * a[i]", we'll temporarily generate 4 vec4 loads from offset i * 4, and
++ * CSE can later notice that those loads are all the same and eliminate
++ * the redundant ones.
++ */
++ fs_reg vec4_offset = fs_reg(this, glsl_type::int_type);
++ instructions.push_tail(ADD(vec4_offset,
++ varying_offset, const_offset & ~3));
++
++ fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type);
+ inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+- dst, surf_index, offset);
++ vec4_result, surf_index, vec4_offset);
+ instructions.push_tail(inst);
++
++ vec4_result.reg_offset += const_offset & 3;
++ instructions.push_tail(MOV(dst, vec4_result));
+ } else {
++ fs_reg offset = fs_reg(this, glsl_type::uint_type);
++ instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset)));
++
+ int base_mrf = 13;
+ bool header_present = true;
+
+@@ -313,7 +332,7 @@ fs_inst::equals(fs_inst *inst)
+ int
+ fs_inst::regs_written()
+ {
+- if (is_tex())
++ if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7)
+ return 4;
+
+ /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
+diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+index 712fef6..4b3c43f 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+@@ -737,28 +737,29 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
+ index.type == BRW_REGISTER_TYPE_UD);
+ uint32_t surf_index = index.dw1.ud;
+
+- uint32_t msg_control, rlen, mlen;
++ uint32_t simd_mode, rlen, mlen;
+ if (dispatch_width == 16) {
+- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS;
+- mlen = rlen = 2;
++ mlen = 2;
++ rlen = 8;
++ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ } else {
+- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS;
+- mlen = rlen = 1;
++ mlen = 1;
++ rlen = 4;
++ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+ }
+
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, offset);
+- if (intel->gen < 6)
+- send->header.destreg__conditionalmod = inst->base_mrf;
+- brw_set_dp_read_message(p, send,
++ brw_set_sampler_message(p, send,
+ surf_index,
+- msg_control,
+- GEN7_DATAPORT_DC_DWORD_SCATTERED_READ,
+- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
++ 0, /* LD message ignores sampler unit */
++ GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
++ rlen,
+ mlen,
+- inst->header_present,
+- rlen);
++ false, /* no header */
++ simd_mode,
++ 0);
+ }
+
+ /**
+--
+1.8.1.2
+
diff --git a/debian/patches/0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch b/debian/patches/0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch
new file mode 100644
index 0000000..b5cfe0e
--- /dev/null
+++ b/debian/patches/0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch
@@ -0,0 +1,84 @@
+From b5f8ad54c7bfd624209e4ae7d36abac0093ddb9a Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Fri, 15 Mar 2013 14:43:28 -0700
+Subject: [PATCH 07/12] i965/fs: Do CSE on gen7's varying-index pull constant
+ loads.
+
+This is our first CSE on a regs_written() > 1 instruction, so it takes a
+bit of extra fixup. Reduces the number of loads on kwin's Lanczos shader
+from 12 to 2.
+
+Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 43 ++++++++++++++++++++++++--------
+ 1 file changed, 32 insertions(+), 11 deletions(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+index 02642c9..c89da36 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+@@ -68,6 +68,7 @@ is_expression(const fs_inst *const inst)
+ case BRW_OPCODE_MAD:
+ case BRW_OPCODE_LRP:
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
++ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
+ case FS_OPCODE_CINTERP:
+ case FS_OPCODE_LINTERP:
+ return true;
+@@ -129,21 +130,41 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
+ */
+ bool no_existing_temp = entry->tmp.file == BAD_FILE;
+ if (no_existing_temp) {
+- entry->tmp = fs_reg(this, glsl_type::float_type);
+- entry->tmp.type = inst->dst.type;
+-
+- fs_inst *copy = new(ralloc_parent(inst))
+- fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp);
+- entry->generator->insert_after(copy);
+- entry->generator->dst = entry->tmp;
++ int written = entry->generator->regs_written();
++
++ fs_reg orig_dst = entry->generator->dst;
++ fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
++ orig_dst.type);
++ entry->tmp = tmp;
++ entry->generator->dst = tmp;
++
++ for (int i = 0; i < written; i++) {
++ fs_inst *copy = MOV(orig_dst, tmp);
++ copy->force_writemask_all =
++ entry->generator->force_writemask_all;
++ entry->generator->insert_after(copy);
++
++ orig_dst.reg_offset++;
++ tmp.reg_offset++;
++ }
+ }
+
+ /* dest <- temp */
++ int written = inst->regs_written();
++ assert(written == entry->generator->regs_written());
+ assert(inst->dst.type == entry->tmp.type);
+- fs_inst *copy = new(ralloc_parent(inst))
+- fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp);
+- copy->force_writemask_all = inst->force_writemask_all;
+- inst->replace_with(copy);
++ fs_reg dst = inst->dst;
++ fs_reg tmp = entry->tmp;
++ fs_inst *copy;
++ for (int i = 0; i < written; i++) {
++ copy = MOV(dst, tmp);
++ copy->force_writemask_all = inst->force_writemask_all;
++ inst->insert_before(copy);
++
++ dst.reg_offset++;
++ tmp.reg_offset++;
++ }
++ inst->remove();
+
+ /* Appending an instruction may have changed our bblock end. */
+ if (inst == block->end) {
+--
+1.8.1.2
+
diff --git a/debian/patches/0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch b/debian/patches/0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch
new file mode 100644
index 0000000..4827a9c
--- /dev/null
+++ b/debian/patches/0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch
@@ -0,0 +1,36 @@
+From 2a0a69e2169eee805b6068f930c3b3049b362a91 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Mon, 18 Mar 2013 11:26:17 -0700
+Subject: [PATCH 08/12] i965/fs: Clean up the setup of gen4 simd16 message
+ destinations.
+
+I think this makes it much more obvious what's going on here.
+
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+index 6b6af8d..48c6df3 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+@@ -916,11 +916,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ * this weirdness around to the expected layout.
+ */
+ orig_dst = dst;
+- const glsl_type *vec_type =
+- glsl_type::get_instance(ir->type->base_type, 4, 1);
+- dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
+- dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
+- : BRW_REGISTER_TYPE_F;
++ dst = fs_reg(GRF, virtual_grf_alloc(8),
++ (intel->is_g4x ?
++ brw_type_for_base_type(ir->type) :
++ BRW_REGISTER_TYPE_F));
+ }
+
+ fs_inst *inst = NULL;
+--
+1.8.1.2
+
diff --git a/debian/patches/0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch b/debian/patches/0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch
new file mode 100644
index 0000000..3a430d1
--- /dev/null
+++ b/debian/patches/0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch
@@ -0,0 +1,293 @@
+From bb1d21826152370209fd64b9abffd8a59d3ec5f4 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Mon, 18 Mar 2013 11:30:57 -0700
+Subject: [PATCH 09/12] i965/fs: Bake regs_written into the IR instead of
+ recomputing it later.
+
+For sampler messages, it depends on the target gen, and on gen4
+SIMD16-sampler-on-SIMD8-execution we were returning 4 instead of 8 like we
+should.
+
+NOTE: This is a candidate for the 9.1 branch.
+---
+ src/mesa/drivers/dri/i965/brw_fs.cpp | 29 ++++++++--------------
+ src/mesa/drivers/dri/i965/brw_fs.h | 2 +-
+ src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 6 ++---
+ .../drivers/dri/i965/brw_fs_live_variables.cpp | 2 +-
+ src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 8 +++---
+ .../dri/i965/brw_fs_schedule_instructions.cpp | 6 ++---
+ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ++++--
+ 7 files changed, 27 insertions(+), 33 deletions(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
+index f4aa9f7..c128175 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
+@@ -60,6 +60,9 @@ fs_inst::init()
+ this->src[0] = reg_undef;
+ this->src[1] = reg_undef;
+ this->src[2] = reg_undef;
++
++ /* This will be the case for almost all instructions. */
++ this->regs_written = 1;
+ }
+
+ fs_inst::fs_inst()
+@@ -254,6 +257,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+ fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type);
+ inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+ vec4_result, surf_index, vec4_offset);
++ inst->regs_written = 4;
+ instructions.push_tail(inst);
+
+ vec4_result.reg_offset += const_offset & 3;
+@@ -329,26 +333,13 @@ fs_inst::equals(fs_inst *inst)
+ offset == inst->offset);
+ }
+
+-int
+-fs_inst::regs_written()
+-{
+- if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7)
+- return 4;
+-
+- /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
+- * but we don't currently use them...nor do we have an opcode for them.
+- */
+-
+- return 1;
+-}
+-
+ bool
+ fs_inst::overwrites_reg(const fs_reg ®)
+ {
+ return (reg.file == dst.file &&
+ reg.reg == dst.reg &&
+ reg.reg_offset >= dst.reg_offset &&
+- reg.reg_offset < dst.reg_offset + regs_written());
++ reg.reg_offset < dst.reg_offset + regs_written);
+ }
+
+ bool
+@@ -1388,7 +1379,7 @@ fs_visitor::split_virtual_grfs()
+ /* If there's a SEND message that requires contiguous destination
+ * registers, no splitting is allowed.
+ */
+- if (inst->regs_written() > 1) {
++ if (inst->regs_written > 1) {
+ split_grf[inst->dst.reg] = false;
+ }
+ }
+@@ -2109,7 +2100,7 @@ fs_visitor::compute_to_mrf()
+ /* Things returning more than one register would need us to
+ * understand coalescing out more than one MOV at a time.
+ */
+- if (scan_inst->regs_written() > 1)
++ if (scan_inst->regs_written > 1)
+ break;
+
+ /* SEND instructions can't have MRF as a destination. */
+@@ -2326,7 +2317,7 @@ void
+ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
+ {
+ int reg_size = dispatch_width / 8;
+- int write_len = inst->regs_written() * reg_size;
++ int write_len = inst->regs_written * reg_size;
+ int first_write_grf = inst->dst.reg;
+ bool needs_dep[BRW_MAX_MRF];
+ assert(write_len < (int)sizeof(needs_dep) - 1);
+@@ -2366,7 +2357,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
+ * dependency has more latency than a MOV.
+ */
+ if (scan_inst->dst.file == GRF) {
+- for (int i = 0; i < scan_inst->regs_written(); i++) {
++ for (int i = 0; i < scan_inst->regs_written; i++) {
+ int reg = scan_inst->dst.reg + i * reg_size;
+
+ if (reg >= first_write_grf &&
+@@ -2405,7 +2396,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
+ void
+ fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
+ {
+- int write_len = inst->regs_written() * dispatch_width / 8;
++ int write_len = inst->regs_written * dispatch_width / 8;
+ int first_write_grf = inst->dst.reg;
+ bool needs_dep[BRW_MAX_MRF];
+ assert(write_len < (int)sizeof(needs_dep) - 1);
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
+index 76130b1..0c5aad1 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.h
++++ b/src/mesa/drivers/dri/i965/brw_fs.h
+@@ -174,7 +174,6 @@ public:
+ fs_reg src0, fs_reg src1,fs_reg src2);
+
+ bool equals(fs_inst *inst);
+- int regs_written();
+ bool overwrites_reg(const fs_reg ®);
+ bool is_tex();
+ bool is_math();
+@@ -192,6 +191,7 @@ public:
+ uint8_t flag_subreg;
+
+ int mlen; /**< SEND message length */
++ int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */
+ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+ uint32_t texture_offset; /**< Texture offset bitfield */
+ int sampler;
+diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+index c89da36..01a64d2 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+@@ -130,7 +130,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
+ */
+ bool no_existing_temp = entry->tmp.file == BAD_FILE;
+ if (no_existing_temp) {
+- int written = entry->generator->regs_written();
++ int written = entry->generator->regs_written;
+
+ fs_reg orig_dst = entry->generator->dst;
+ fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
+@@ -150,8 +150,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
+ }
+
+ /* dest <- temp */
+- int written = inst->regs_written();
+- assert(written == entry->generator->regs_written());
++ int written = inst->regs_written;
++ assert(written == entry->generator->regs_written);
+ assert(inst->dst.type == entry->tmp.type);
+ fs_reg dst = inst->dst;
+ fs_reg tmp = entry->tmp;
+diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+index 63af148..373aa2d 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+@@ -77,7 +77,7 @@ fs_live_variables::setup_def_use()
+ * variable, and thus qualify for being in def[].
+ */
+ if (inst->dst.file == GRF &&
+- inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] &&
++ inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
+ !inst->predicate &&
+ !inst->force_uncompressed &&
+ !inst->force_sechalf) {
+diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+index b8936dc..4ee7bbc 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+@@ -553,7 +553,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
+ }
+
+ if (inst->dst.file == GRF) {
+- spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale;
++ spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
+
+ if (inst->dst.smear >= 0) {
+ no_spill[inst->dst.reg] = true;
+@@ -622,7 +622,7 @@ fs_visitor::spill_reg(int spill_reg)
+ inst->dst.reg == spill_reg) {
+ int subset_spill_offset = (spill_offset +
+ REG_SIZE * inst->dst.reg_offset);
+- inst->dst.reg = virtual_grf_alloc(inst->regs_written());
++ inst->dst.reg = virtual_grf_alloc(inst->regs_written);
+ inst->dst.reg_offset = 0;
+
+ /* If our write is going to affect just part of the
+@@ -631,7 +631,7 @@ fs_visitor::spill_reg(int spill_reg)
+ */
+ if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
+ fs_reg unspill_reg = inst->dst;
+- for (int chan = 0; chan < inst->regs_written(); chan++) {
++ for (int chan = 0; chan < inst->regs_written; chan++) {
+ emit_unspill(inst, unspill_reg,
Reply to: