mesa: Changes to 'upstream-experimental'
Rebased ref, commits from common ancestor:
commit 3b48f6a4c06db57a7203d247994b05e55c9418c1
Author: Chris Forbes <chrisf@ijw.co.nz>
Date: Sun Aug 3 19:55:55 2014 +1200
mesa: Add a new function for getting the nonconst sampler array index
If the array index is not a constant expression, the existing support
will assume a zero offset (giving us the sampler index of the base of
the array).
For dynamically uniform indexing of sampler arrays, we need both that
and the indexing expression.
Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index e6532be..29a5408 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -134,3 +134,14 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
return shader_program->UniformStorage[location].sampler[shader].index +
getname.offset;
}
+
+
+extern "C" class ir_rvalue *
+_mesa_get_sampler_array_nonconst_index(class ir_dereference *sampler)
+{
+ ir_dereference_array *deref_arr = sampler->as_dereference_array();
+ if (!deref_arr || deref_arr->array_index->as_constant())
+ return NULL;
+
+ return deref_arr->array_index;
+}
diff --git a/src/mesa/program/sampler.h b/src/mesa/program/sampler.h
index 22467e9..8b7c3b6 100644
--- a/src/mesa/program/sampler.h
+++ b/src/mesa/program/sampler.h
@@ -27,3 +27,6 @@ int
_mesa_get_sampler_uniform_value(class ir_dereference *sampler,
struct gl_shader_program *shader_program,
const struct gl_program *prog);
+
+class ir_rvalue *
+_mesa_get_sampler_array_nonconst_index(class ir_dereference *sampler);
commit 1b4761bc27a50208dba2bc028c9835fed572e696
Author: Chris Forbes <chrisf@ijw.co.nz>
Date: Sun Aug 3 17:57:05 2014 +1200
glsl: Allow dynamically uniform sampler array indexing with 4.0/gs5
V2: Expand comment to explain what dynamically uniform expressions are
about.
Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index 50f9987..5ca85f6 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -213,6 +213,13 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
* as using a loop counter as the index to an array of samplers. If the
* loop in unrolled, the code should compile correctly. Instead, emit a
* warning.
+ *
+ * In GLSL 4.00 / ARB_gpu_shader5, this requirement is relaxed again to allow
+ * indexing with dynamically uniform expressions. Note that these are not
+ * required to be uniforms or expressions based on them, but merely that the
+ * values must not diverge between shader invocations run together. If the
+ * values *do* diverge, then the behavior of the operation requiring a
+ * dynamically uniform expression is undefined.
*/
if (array->type->element_type()->is_sampler()) {
if (!state->is_version(130, 100)) {
@@ -227,7 +234,7 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
"expressions will be forbidden in GLSL 1.30 "
"and later");
}
- } else {
+ } else if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
_mesa_glsl_error(&loc, state,
"sampler arrays indexed with non-constant "
"expressions is forbidden in GLSL 1.30 and "
commit f525bd01d1430a5e33f57805f50fe4e89aa86ae8
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed Aug 6 23:45:05 2014 -0400
nvc0/ir: describe the tex arguments for fermi/kepler
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index ade315d..7da9b0b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -567,6 +567,31 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
const int chipset = prog->getTarget()->getChipset();
+ // Arguments to the TEX instruction are a little insane. Even though the
+ // encoding is identical between SM20 and SM30, the arguments mean
+ // different things between Fermi and Kepler+. A lot of arguments are
+ // optional based on flags passed to the instruction. This summarizes the
+ // order of things.
+ //
+ // Fermi:
+ // array/indirect
+ // coords
+ // sample
+ // lod bias
+ // depth compare
+ // offsets:
+ // - tg4: 8 bits each, either 2 (1 offset reg) or 8 (2 offset reg)
+ // - other: 4 bits each, single reg
+ //
+ // Kepler+:
+ // indirect handle
+ // array (+ offsets for txd in upper 16 bits)
+ // coords
+ // sample
+ // lod bias
+ // depth compare
+ // offsets (same as fermi, except txd which takes it with array)
+
if (chipset >= NVISA_GK104_CHIPSET) {
if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
// XXX this ignores tsc, and assumes a 1:1 mapping
commit b3cbd862242e0ff75584fef706f2b2a3da8e49f2
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed Jul 9 00:41:11 2014 -0400
nvc0/ir: add kepler+ support for indirect texture references
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 4a9e48f..ade315d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -569,9 +569,17 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
if (chipset >= NVISA_GK104_CHIPSET) {
if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
- WARN("indirect TEX not implemented\n");
- }
- if (i->tex.r == i->tex.s) {
+ // XXX this ignores tsc, and assumes a 1:1 mapping
+ assert(i->tex.rIndirectSrc >= 0);
+ Value *hnd = loadTexHandle(
+ bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+ i->getIndirectR(), bld.mkImm(2)),
+ i->tex.r);
+ i->tex.r = 0xff;
+ i->tex.s = 0x1f;
+ i->setIndirectR(hnd);
+ i->setIndirectS(NULL);
+ } else if (i->tex.r == i->tex.s) {
i->tex.r += prog->driver->io.texBindBase / 4;
i->tex.s = 0; // only a single cX[] value possible here
} else {
@@ -595,6 +603,16 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
i->setSrc(s, i->getSrc(s - 1));
i->setSrc(0, layer);
}
+ // Move the indirect reference to the first place
+ if (i->tex.rIndirectSrc >= 0) {
+ Value *hnd = i->getIndirectR();
+
+ i->setIndirectR(NULL);
+ i->moveSources(0, 1);
+ i->setSrc(0, hnd);
+ i->tex.rIndirectSrc = 0;
+ i->tex.sIndirectSrc = -1;
+ }
} else
// (nvc0) generate and move the tsc/tic/array source to the front
if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
@@ -688,14 +706,14 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
// The offset goes into the upper 16 bits of the array index. So
// create it if it's not already there, and INSBF it if it already
// is.
+ s = (i->tex.rIndirectSrc >= 0) ? 1 : 0;
if (i->tex.target.isArray()) {
bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(0),
bld.loadImm(NULL, imm), bld.mkImm(0xc10),
- i->getSrc(0));
+ i->getSrc(s));
} else {
- for (int s = dim; s >= 1; --s)
- i->setSrc(s, i->getSrc(s - 1));
- i->setSrc(0, bld.loadImm(NULL, imm << 16));
+ i->moveSources(s, 1);
+ i->setSrc(s, bld.loadImm(NULL, imm << 16));
}
} else {
i->setSrc(s, bld.loadImm(NULL, imm));
@@ -792,6 +810,8 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
if (chipset >= NVISA_GK104_CHIPSET) {
if (!txd->tex.target.isArray() && txd->tex.useOffsets)
expected_args++;
+ if (txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0)
+ expected_args++;
} else {
if (txd->tex.useOffsets)
expected_args++;
commit af3619e88043ce85560b8220dc16244f8898a926
Author: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed Aug 6 01:22:49 2014 -0400
nvc0/ir: add base tex offset for fermi indirect tex case
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index f010767..4a9e48f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -603,10 +603,18 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
Value *ticRel = i->getIndirectR();
Value *tscRel = i->getIndirectS();
- if (ticRel)
+ if (ticRel) {
i->setSrc(i->tex.rIndirectSrc, NULL);
- if (tscRel)
+ if (i->tex.r)
+ ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
+ ticRel, bld.mkImm(i->tex.r));
+ }
+ if (tscRel) {
i->setSrc(i->tex.sIndirectSrc, NULL);
+ if (i->tex.s)
+ tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
+ tscRel, bld.mkImm(i->tex.s));
+ }
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
for (int s = dim; s >= 1; --s)
commit f73594778b0ef9804fc7839d5e2051d051d4ef48
Author: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon Aug 11 15:05:54 2014 -0700
i965: Revert part of f5cc3fdcf1680b116612fac7c39f1bd79f5e555e.
Fixes non-termination in various Piglit tests.
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 1b8c987..29d2e02 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -82,7 +82,7 @@ is_expression(const vec4_instruction *const inst)
case SHADER_OPCODE_COS:
return inst->mlen == 0;
default:
- return !inst->has_side_effects();
+ return false;
}
}
commit 602a3f92d4d695e116794597db81623a8fd4c653
Author: Eric Anholt <eric@anholt.net>
Date: Sat Aug 9 11:01:53 2014 -0700
vc4: Flip which primitives are considered front-facing.
This mostly fixes glxgears rendering.
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 69fd218..d2c53a5 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -101,7 +101,7 @@ vc4_create_rasterizer_state(struct pipe_context *pctx,
/* XXX: per_vertex */
so->point_size = cso->point_size;
- if (!cso->front_ccw)
+ if (cso->front_ccw)
so->config_bits[0] |= VC4_CONFIG_BITS_CW_PRIMITIVES;
if (cso->offset_tri)
commit f097516505daaaf5c25c919d56cbce54eb441a48
Author: Eric Anholt <eric@anholt.net>
Date: Sat Aug 9 11:00:51 2014 -0700
vc4: Don't forget to set the depth clear value in the packet.
This gets glxgears partially rendering again.
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index ffcbbb2..8ca4031 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -105,7 +105,7 @@ vc4_setup_rcl(struct vc4_context *vc4)
cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
cl_u32(&vc4->rcl, vc4->clear_color[0]);
cl_u32(&vc4->rcl, vc4->clear_color[1]);
- cl_u32(&vc4->rcl, 0);
+ cl_u32(&vc4->rcl, vc4->clear_depth);
cl_u8(&vc4->rcl, 0);
cl_start_reloc(&vc4->rcl, 1);
commit e63598aecb5d1cc2a20b8db1ef85790e301f4241
Author: Eric Anholt <eric@anholt.net>
Date: Tue Aug 5 14:24:29 2014 -0700
vc4: Add support for gl_FragCoord.
This isn't passing all tests (glsl-fs-fragcoord-zw-ortho, for example),
but it does get a bunch more tests passing.
v2: Rebase on helpers change.
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 8109f63..d871dcd 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -570,6 +570,20 @@ emit_vertex_input(struct tgsi_to_qir *trans, int attr)
}
static void
+emit_fragcoord_input(struct tgsi_to_qir *trans, int attr)
+{
+ struct qcompile *c = trans->c;
+
+ trans->inputs[attr * 4 + 0] = qir_FRAG_X(c);
+ trans->inputs[attr * 4 + 1] = qir_FRAG_Y(c);
+ trans->inputs[attr * 4 + 2] =
+ qir_FMUL(c,
+ qir_FRAG_Z(c),
+ qir_uniform_f(trans, 1.0 / 0xffffff));
+ trans->inputs[attr * 4 + 3] = qir_FRAG_RCP_W(c);
+}
+
+static void
emit_fragment_input(struct tgsi_to_qir *trans, int attr)
{
struct qcompile *c = trans->c;
@@ -599,7 +613,12 @@ emit_tgsi_declaration(struct tgsi_to_qir *trans,
i <= decl->Range.Last;
i++) {
if (c->stage == QSTAGE_FRAG) {
- emit_fragment_input(trans, i);
+ if (decl->Semantic.Name ==
+ TGSI_SEMANTIC_POSITION) {
+ emit_fragcoord_input(trans, i);
+ } else {
+ emit_fragment_input(trans, i);
+ }
} else {
emit_vertex_input(trans, i);
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 9462da5..6509a2b 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -65,6 +65,11 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
[QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
+ [QOP_FRAG_X] = { "frag_x", 1, 0 },
+ [QOP_FRAG_Y] = { "frag_y", 1, 0 },
+ [QOP_FRAG_Z] = { "frag_z", 1, 0 },
+ [QOP_FRAG_RCP_W] = { "frag_rcp_w", 1, 0 },
+
[QOP_TEX_S] = { "tex_s", 0, 2 },
[QOP_TEX_T] = { "tex_t", 0, 2 },
[QOP_TEX_R] = { "tex_r", 0, 2 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 5d1f088..7d98062 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -74,6 +74,11 @@ enum qop {
QOP_TLB_COLOR_WRITE,
QOP_VARY_ADD_C,
+ QOP_FRAG_X,
+ QOP_FRAG_Y,
+ QOP_FRAG_Z,
+ QOP_FRAG_RCP_W,
+
/** Texture x coordinate parameter write */
QOP_TEX_S,
/** Texture y coordinate parameter write */
@@ -204,6 +209,15 @@ bool qir_opt_algebraic(struct qcompile *c);
bool qir_opt_copy_propagation(struct qcompile *c);
bool qir_opt_dead_code(struct qcompile *c);
+#define QIR_ALU0(name) \
+static inline struct qreg \
+qir_##name(struct qcompile *c) \
+{ \
+ struct qreg t = qir_get_temp(c); \
+ qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \
+ return t; \
+}
+
#define QIR_ALU1(name) \
static inline struct qreg \
qir_##name(struct qcompile *c, struct qreg a) \
@@ -257,6 +271,10 @@ QIR_NODST_2(TEX_S)
QIR_NODST_2(TEX_T)
QIR_NODST_2(TEX_R)
QIR_NODST_2(TEX_B)
+QIR_ALU0(FRAG_X)
+QIR_ALU0(FRAG_Y)
+QIR_ALU0(FRAG_Z)
+QIR_ALU0(FRAG_RCP_W)
static inline struct qreg
qir_CMP(struct qcompile *c, struct qreg cmp, struct qreg a, struct qreg b)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 33abf6d..63f37dd 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -213,7 +213,8 @@ vc4_generate_code(struct qcompile *c)
if (qinst->src[i].file == QFILE_TEMP)
reg_uses_remaining[qinst->src[i].index]++;
}
- if (qinst->op == QOP_TLB_PASSTHROUGH_Z_WRITE)
+ if (qinst->op == QOP_TLB_PASSTHROUGH_Z_WRITE ||
+ qinst->op == QOP_FRAG_Z)
reg_in_use[3 + 32 + QPU_R_FRAG_PAYLOAD_ZW] = true;
}
@@ -460,6 +461,33 @@ vc4_generate_code(struct qcompile *c)
break;
+ case QOP_FRAG_X:
+ queue(c, qpu_inst(qpu_a_ITOF(dst,
+ qpu_ra(QPU_R_XY_PIXEL_COORD)),
+ qpu_m_NOP()));
+ break;
+
+ case QOP_FRAG_Y:
+ queue(c, qpu_inst(qpu_a_ITOF(dst,
+ qpu_rb(QPU_R_XY_PIXEL_COORD)),
+ qpu_m_NOP()));
+ break;
+
+ case QOP_FRAG_Z:
+ queue(c, qpu_inst(qpu_a_ITOF(dst,
+ qpu_rb(QPU_R_FRAG_PAYLOAD_ZW)),
+ qpu_m_NOP()));
+ break;
+
+ case QOP_FRAG_RCP_W:
+ queue(c, qpu_inst(qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP),
+ qpu_ra(QPU_R_FRAG_PAYLOAD_ZW)),
+ qpu_m_NOP()));
+
+ queue(c, qpu_inst(qpu_a_MOV(dst, qpu_r4()),
+ qpu_m_NOP()));
+ break;
+
case QOP_TLB_PASSTHROUGH_Z_WRITE:
queue(c, qpu_inst(qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
qpu_rb(QPU_R_FRAG_PAYLOAD_ZW)),
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 940596e..4ddc9ac 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -102,6 +102,10 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_VIEWPORTS:
return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+
/* Unsupported features. */
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
case PIPE_CAP_ANISOTROPIC_FILTER:
@@ -111,8 +115,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_TGSI_INSTANCEID:
commit d34fbdda120b2b0e0a33a4a18fa112e58f8a2f4a
Author: Eric Anholt <eric@anholt.net>
Date: Tue Aug 5 14:23:40 2014 -0700
vc4: Refactor shader input setup again.
This makes some space for handling special inputs like fragcoords.
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 8252630..8109f63 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -570,6 +570,24 @@ emit_vertex_input(struct tgsi_to_qir *trans, int attr)
}
static void
+emit_fragment_input(struct tgsi_to_qir *trans, int attr)
+{
+ struct qcompile *c = trans->c;
+
+ for (int i = 0; i < 4; i++) {
+ struct qreg vary = {
+ QFILE_VARY,
+ attr * 4 + i
+ };
+
+ /* XXX: multiply by W */
+ trans->inputs[attr * 4 + i] =
+ qir_VARY_ADD_C(c, qir_MOV(c, vary));
+ c->num_inputs++;
+ }
+}
+
+static void
emit_tgsi_declaration(struct tgsi_to_qir *trans,
struct tgsi_full_declaration *decl)
{
@@ -577,23 +595,12 @@ emit_tgsi_declaration(struct tgsi_to_qir *trans,
switch (decl->Declaration.File) {
case TGSI_FILE_INPUT:
- if (c->stage == QSTAGE_FRAG) {
- for (int i = decl->Range.First * 4;
- i < (decl->Range.Last + 1) * 4;
- i++) {
- struct qreg vary = {
- QFILE_VARY,
- i
- };
- trans->inputs[i] =
- qir_VARY_ADD_C(c, qir_MOV(c, vary));
-
- c->num_inputs++;
- }
- } else {
- for (int i = decl->Range.First;
- i <= decl->Range.Last;
- i++) {
+ for (int i = decl->Range.First;
+ i <= decl->Range.Last;
+ i++) {
+ if (c->stage == QSTAGE_FRAG) {
+ emit_fragment_input(trans, i);
+ } else {
emit_vertex_input(trans, i);
}
}
commit a7faca5d2716c5f87f228c6f82eaf10373154852
Author: Eric Anholt <eric@anholt.net>
Date: Tue Aug 5 11:00:51 2014 -0700
vc4: Clean up the tile alloc buffer size.
This prevents some simulator assertion failures, but it does mean (since
I've dropped the "* 16" padding) that on real hardware you need a kernel
that does overflow memory management (currently, "drm/vc4: Add support for
binner overflow memory allocation." in my kernel tree).
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 2fb57aa..ec218d3 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -45,7 +45,15 @@ vc4_start_draw(struct vc4_context *vc4)
uint32_t tilew = align(width, 64) / 64;
uint32_t tileh = align(height, 64) / 64;
- uint32_t tile_alloc_size = 32 * tilew * tileh * 16;
+ /* Tile alloc memory setup: We use an initial alloc size of 32b. The
+ * hardware then aligns that to 256b (we use 4096, because all of our
+ * BO allocations align to that anyway), then for some reason the
+ * simulator wants an extra page available, even if you have overflow
+ * memory set up.
+ */
+ uint32_t tile_alloc_size = 32 * tilew * tileh;
+ tile_alloc_size = align(tile_alloc_size, 4096);
+ tile_alloc_size += 4096;
uint32_t tile_state_size = 48 * tilew * tileh;
if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) {
vc4_bo_unreference(&vc4->tile_alloc);
commit 7050ab510decce2606ffcd0298b3c7fb13a1401e
Author: Eric Anholt <eric@anholt.net>
Date: Tue Aug 5 11:00:08 2014 -0700
vc4: Clarify some values implicitly chosen for binning config.
These #defines are 0, but it should help make math above make more sense.
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index a76880c..2fb57aa 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -67,7 +67,10 @@ vc4_start_draw(struct vc4_context *vc4)
cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0);
cl_u8(&vc4->bcl, tilew);
cl_u8(&vc4->bcl, tileh);
- cl_u8(&vc4->bcl, VC4_BIN_CONFIG_AUTO_INIT_TSDA);
+ cl_u8(&vc4->bcl,
+ VC4_BIN_CONFIG_AUTO_INIT_TSDA |
+ VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 |
+ VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32);
cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
commit ed5cb5d7d5d3d3cfeafcf67b2020044da9825abe
Author: Eric Anholt <eric@anholt.net>
Date: Tue Aug 5 10:54:56 2014 -0700
vc4: Improve simulator memory allocation.
This should reduce a bunch of spurious failures in sim.
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index fc3d554..827d617 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -32,6 +32,8 @@
#include "vc4_simulator_validate.h"
#include "simpenrose/simpenrose.h"
+#define OVERFLOW_SIZE (32 * 1024 * 1024)
+
static struct drm_gem_cma_object *
vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
{
@@ -234,7 +236,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args,
struct exec_info exec;
struct drm_device local_dev = {
.vc4 = vc4,
- .simulator_mem_next = 0,
+ .simulator_mem_next = OVERFLOW_SIZE,
};
struct drm_device *dev = &local_dev;
int ret;
@@ -290,9 +292,23 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args,
void
vc4_simulator_init(struct vc4_screen *screen)
{
- simpenrose_init_hardware();
- screen->simulator_mem_base = simpenrose_get_mem_start();
- screen->simulator_mem_size = simpenrose_get_mem_size();
+ screen->simulator_mem_size = 256 * 1024 * 1024;
+ screen->simulator_mem_base = malloc(screen->simulator_mem_size);
+
+ /* We supply our own memory so that we can have more aperture
+ * available (256MB instead of simpenrose's default 64MB).
+ */
+ simpenrose_init_hardware_supply_mem(screen->simulator_mem_base,
+ screen->simulator_mem_size);
+
+ /* Carve out low memory for tile allocation overflow. The kernel
+ * should be automatically handling overflow memory setup on real
+ * hardware, but for simulation we just get one shot to set up enough
+ * overflow memory before execution. This overflow mem will be used
+ * up over the whole lifetime of simpenrose (not reused on each
+ * flush), so it had better be big.
+ */
+ simpenrose_supply_overflow_mem(0, OVERFLOW_SIZE);
}
#endif /* USE_VC4_SIMULATOR */
commit f5f8dd29c30e727c10ff087321d87c40c064de04
Author: Eric Anholt <eric@anholt.net>
Date: Mon Aug 4 18:30:33 2014 -0700
vc4: Handle stride==0 in VBO validation
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.c b/src/gallium/drivers/vc4/vc4_simulator_validate.c
index 421107a..038de0f 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.c
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.c
@@ -942,18 +942,20 @@ validate_shader_rec(struct drm_device *dev,
stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
if (vbo->base.size < offset ||
- vbo->base.size - offset < attr_size ||
- stride == 0) {
+ vbo->base.size - offset < attr_size) {
DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
offset, attr_size, vbo->base.size);
return -EINVAL;
}
- max_index = (vbo->base.size - offset - attr_size) / stride;
- if (state->max_index > max_index) {
- DRM_ERROR("primitives use index %d out of supplied %d\n",
- state->max_index, max_index);
- return -EINVAL;
+ if (stride != 0) {
+ max_index = ((vbo->base.size - offset - attr_size) /
+ stride);
+ if (state->max_index > max_index) {
+ DRM_ERROR("primitives use index %d out of supplied %d\n",
+ state->max_index, max_index);
+ return -EINVAL;
+ }
}
*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
commit 0f034055f96b9dd7b1c54e8fa5422d22c26f2269
Author: Eric Anholt <eric@anholt.net>
Date: Mon Aug 4 16:38:07 2014 -0700
vc4: Stash some debug code for looking at what BOs are at what hindex.
When you're debugging validation, it's nice to know what the BOs are for.
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index faec853..a87cdfa 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -106,6 +106,7 @@ vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
bo->screen = screen;
bo->handle = o.handle;
bo->size = o.size;
+ bo->name = "winsys";
#ifdef USE_VC4_SIMULATOR
vc4_bo_map(bo);
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 8038fee5..fc3d554 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -75,6 +75,10 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec)
struct vc4_bo *bo = bos[i];
struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
+#if 0
+ fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
+#endif
+
vc4_bo_map(bo);
memcpy(obj->vaddr, bo->map, bo->size);
commit 8ebfa8fdb27bb5efaeda4fe567622d5de4779342
Author: Eric Anholt <eric@anholt.net>
Date: Mon Aug 4 13:01:29 2014 -0700
vc4: Use GEM under simulation even for non-winsys BOs.
In addition to reducing sim-specific code, it also avoids our local handle
allocation conflicting with the host GEM's handle numbering, which was
causing vc4_gem_hindex() to not distinguish between winsys BOs and the
same-numbered non-winsys bo.
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 581ba89..faec853 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -45,7 +45,6 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
bo->size = size;
bo->name = name;
-#ifndef USE_VC4_SIMULATOR
struct drm_mode_create_dumb create;
memset(&create, 0, sizeof(create));
@@ -59,12 +58,6 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
bo->handle = create.handle;
assert(create.size >= size);
-#else /* USE_VC4_SIMULATOR */
- static int next_handle = 0;
- bo->handle = next_handle++;
-
- bo->map = malloc(size);
-#endif /* USE_VC4_SIMULATOR */
return bo;
}
@@ -72,20 +65,23 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
void
vc4_bo_free(struct vc4_bo *bo)
{
-#ifndef USE_VC4_SIMULATOR
struct vc4_screen *screen = bo->screen;
- if (bo->map)
+ if (bo->map) {
+#ifdef USE_VC4_SIMULATOR
+ if (bo->simulator_winsys_map) {
+ free(bo->map);
+ bo->map = bo->simulator_winsys_map;
+ }
+#endif
munmap(bo->map, bo->size);
+ }
struct drm_gem_close c;
c.handle = bo->handle;
int ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
if (ret != 0)
fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
-#else
- free(bo->map);
-#endif
free(bo);
}
@@ -137,7 +133,6 @@ vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data, uint32_t size,
bool
vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
{
-#ifndef USE_VC4_SIMULATOR
struct drm_gem_flink flink = {
.handle = bo->handle,
};
@@ -150,7 +145,6 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
}
*name = flink.name;
-#endif /* USE_VC4_SIMULATOR */
return true;
}
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 88eda4f..8038fee5 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -75,6 +75,7 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec)
struct vc4_bo *bo = bos[i];
struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
+ vc4_bo_map(bo);
memcpy(obj->vaddr, bo->map, bo->size);
exec->bo[i].bo = obj;
commit cdc208bdaf90017c2e1aaa54d2318b956e801ca0
Author: Eric Anholt <eric@anholt.net>
Date: Mon Aug 4 13:00:56 2014 -0700
vc4: Don't forget to unmap the GEM BO when freeing.
Otherwise it'll stick around forever.
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 653787e..581ba89 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -75,6 +75,9 @@ vc4_bo_free(struct vc4_bo *bo)
#ifndef USE_VC4_SIMULATOR
struct vc4_screen *screen = bo->screen;
+ if (bo->map)
+ munmap(bo->map, bo->size);
+
struct drm_gem_close c;
c.handle = bo->handle;
int ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
commit d2cc7f97df655bbca6486fbe81e35139215d7b72
Author: Eric Anholt <eric@anholt.net>
Date: Sat Aug 2 21:28:34 2014 -0700
vc4: Add validation of raster-format textures.
... and reject everything else, for now.
v2: Rebase on v2 of the rendering config validation change.
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.c b/src/gallium/drivers/vc4/vc4_simulator_validate.c
index 241ca17..421107a 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.c
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.c
@@ -101,8 +101,9 @@ gl_shader_rec_size(uint32_t pointer_bits)
}
static bool
-check_fbo_size(struct exec_info *exec, struct drm_gem_cma_object *fbo,
- uint32_t offset, uint8_t tiling_format, uint8_t cpp)
+check_tex_size(struct exec_info *exec, struct drm_gem_cma_object *fbo,
+ uint32_t offset, uint8_t tiling_format,
+ uint32_t width, uint32_t height, uint8_t cpp)
{
uint32_t width_align, height_align;
uint32_t aligned_row_len, aligned_h, size;
@@ -125,14 +126,14 @@ check_fbo_size(struct exec_info *exec, struct drm_gem_cma_object *fbo,
return false;
}
- /* The values are limited by the packet bitfields, so we don't need to
- * worry as much about integer overflow.
+ /* The values are limited by the packet/texture parameter bitfields,
+ * so we don't need to worry as much about integer overflow.
*/
- BUG_ON(exec->fb_width > 65535);
- BUG_ON(exec->fb_height > 65535);
+ BUG_ON(width > 65535);
+ BUG_ON(height > 65535);
- aligned_row_len = roundup(exec->fb_width * cpp, width_align);
- aligned_h = roundup(exec->fb_height, height_align);
+ aligned_row_len = roundup(width * cpp, width_align);
+ aligned_h = roundup(height, height_align);
if (INT_MAX / aligned_row_len < aligned_h) {
DRM_ERROR("Overflow in fbo size (%d * %d)\n",
@@ -144,8 +145,7 @@ check_fbo_size(struct exec_info *exec, struct drm_gem_cma_object *fbo,
if (size + offset < size ||
size + offset > fbo->base.size) {
DRM_ERROR("Overflow in %dx%d fbo size (%d + %d > %d)\n",
- exec->fb_width, exec->fb_height, size, offset,
- fbo->base.size);
+ width, height, size, offset, fbo->base.size);
return false;
}
@@ -247,11 +247,11 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS)
offset = *(uint32_t *)(untrusted + 2);
- if (!check_fbo_size(exec, fbo, offset,
+ if (!check_tex_size(exec, fbo, offset,
((packet_b0 &
VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK) >>
VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT),
- cpp)) {
+ exec->fb_width, exec->fb_height, cpp)) {
return -EINVAL;
}
@@ -499,11 +499,11 @@ validate_tile_rendering_mode_config(VALIDATE_ARGS)
}
offset = *(uint32_t *)untrusted;
- if (!check_fbo_size(exec, fbo, offset,
+ if (!check_tex_size(exec, fbo, offset,
((flags &
VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK) >>
VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT),
- cpp)) {
+ exec->fb_width, exec->fb_height, cpp)) {
return -EINVAL;
}
@@ -699,14 +699,91 @@ reloc_tex(struct exec_info *exec,
{
struct drm_gem_cma_object *tex;
- uint32_t unvalidated_p0 = *(uint32_t *)(uniform_data_u +
- sample->p_offset[0]);
+ uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
+ uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
+ uint32_t offset = p0 & ~0xfff;
+ uint32_t miplevels = (p0 & 0x15);
+ uint32_t width = (p1 >> 8) & 2047;
+ uint32_t height = (p1 >> 20) & 2047;
+ uint32_t type, cpp, tiling_format;
+ int i;
+
+ if (width == 0)
+ width = 2048;
+ if (height == 0)
+ height = 2048;
+
+ if (p0 & (1 << 9)) {
+ DRM_ERROR("Cube maps unsupported\n");
+ return false;
+ }
+
+ type = ((p0 >> 4) & 15) | ((p1 >> 31) << 4);
+
+ switch (type) {
+ case 0: /* RGBA8888 */
+ case 1: /* RGBX8888 */
+ case 16: /* RGBA32R */
+ cpp = 4;
+ break;
+ case 2: /* RGBA4444 */
+ case 3: /* RGBA5551 */
+ case 4: /* RGB565 */
+ case 7: /* LUMALPHA */
+ case 9: /* S16F */
+ case 11: /* S16 */
+ cpp = 2;
Reply to: