mesa: Changes to 'upstream-experimental'
Makefile | 12
configs/autoconf.in | 8
configs/default | 8
configure.ac | 23
docs/relnotes-7.1.html | 7
progs/tools/trace/Makefile | 2
src/egl/drivers/demo/Makefile | 5
src/egl/drivers/dri/Makefile | 3
src/egl/main/Makefile | 3
src/glu/Makefile | 12
src/glu/glu.pc.in | 4
src/glu/mesa/Makefile | 2
src/glu/sgi/Makefile | 2
src/glut/beos/Makefile | 6
src/glut/directfb/Makefile | 2
src/glut/fbdev/Makefile | 10
src/glut/ggi/Makefile | 2
src/glut/glx/Makefile | 18
src/glut/glx/glut.pc.in | 4
src/glut/mini/Makefile | 19
src/glut/mini/glut.pc.in | 4
src/glw/Makefile | 19
src/glw/glw.pc.in | 4
src/glx/mini/Makefile | 2
src/glx/x11/.gitignore | 1
src/glx/x11/Makefile | 2
src/glx/x11/dri2_glx.c | 2
src/glx/x11/dri_common.c | 4
src/glx/x11/dri_common.h | 2
src/glx/x11/dri_glx.c | 2
src/glx/x11/drisw_glx.c | 2
src/glx/x11/glxext.c | 11
src/mesa/Makefile | 24
src/mesa/drivers/beos/Makefile | 5
src/mesa/drivers/directfb/Makefile | 2
src/mesa/drivers/dri/Makefile | 16
src/mesa/drivers/dri/Makefile.template | 2
src/mesa/drivers/dri/dri.pc.in | 4
src/mesa/drivers/dri/i915/i915_texstate.c | 11
src/mesa/drivers/dri/i965/brw_clip_line.c | 2
src/mesa/drivers/dri/i965/brw_clip_state.c | 2
src/mesa/drivers/dri/i965/brw_clip_tri.c | 2
src/mesa/drivers/dri/i965/brw_defines.h | 13
src/mesa/drivers/dri/i965/brw_eu_emit.c | 16
src/mesa/drivers/dri/i965/brw_misc_state.c | 8
src/mesa/drivers/dri/i965/brw_structs.h | 4
src/mesa/drivers/dri/i965/brw_vs_emit.c | 2
src/mesa/drivers/dri/intel/intel_batchbuffer.h | 4
src/mesa/drivers/dri/intel/intel_blit.c | 5
src/mesa/drivers/dri/intel/intel_chipset.h | 10
src/mesa/drivers/dri/intel/intel_context.c | 4
src/mesa/drivers/dri/r300/Makefile | 3
src/mesa/drivers/dri/r300/r300_cmdbuf.c | 11
src/mesa/drivers/dri/r300/r300_context.h | 51
src/mesa/drivers/dri/r300/r300_fragprog.c | 184 +
src/mesa/drivers/dri/r300/r300_fragprog.h | 30
src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 2092 +---------------------
src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c | 227 ++
src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h | 42
src/mesa/drivers/dri/r300/r300_ioctl.c | 9
src/mesa/drivers/dri/r300/r300_reg.h | 7
src/mesa/drivers/dri/r300/r300_state.c | 109 -
src/mesa/drivers/dri/r300/r500_fragprog.c | 207 +-
src/mesa/drivers/dri/r300/r500_fragprog.h | 32
src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 1596 ++--------------
src/mesa/drivers/dri/r300/radeon_nqssadce.c | 282 ++
src/mesa/drivers/dri/r300/radeon_nqssadce.h | 96 +
src/mesa/drivers/dri/r300/radeon_program.c | 275 --
src/mesa/drivers/dri/r300/radeon_program.h | 113 -
src/mesa/drivers/dri/r300/radeon_program_alu.c | 475 ++++
src/mesa/drivers/dri/r300/radeon_program_alu.h | 12
src/mesa/drivers/dri/r300/radeon_program_pair.c | 983 ++++++++++
src/mesa/drivers/dri/r300/radeon_program_pair.h | 126 +
src/mesa/drivers/fbdev/Makefile | 3
src/mesa/drivers/osmesa/Makefile | 12
src/mesa/drivers/x11/Makefile | 7
src/mesa/gl.pc.in | 4
src/mesa/glapi/glapi.c | 4
src/mesa/main/context.c | 1
src/mesa/main/dd.h | 2
src/mesa/main/execmem.c | 8
src/mesa/main/fbobject.c | 26
src/mesa/main/fbobject.h | 7
src/mesa/main/shaders.c | 14
src/mesa/main/texstate.c | 28
src/mesa/main/version.h | 2
src/mesa/shader/arbprogparse.c | 9
src/mesa/shader/program.c | 62
src/mesa/shader/program.h | 13
src/mesa/shader/shader_api.c | 291 ++-
src/mesa/shader/slang/library/slang_core.gc | 8
src/mesa/shader/slang/library/slang_core_gc.h | 1524 ++++++++--------
src/mesa/shader/slang/slang_codegen.c | 8
src/mesa/shader/slang/slang_compile.c | 18
src/mesa/shader/slang/slang_ir.c | 2
src/mesa/shader/slang/slang_print.c | 84
src/mesa/shader/slang/slang_vartable.c | 3
src/mesa/swrast/s_fragprog.c | 28
src/mesa/tnl_dd/t_dd_tritmp.h | 4
99 files changed, 4398 insertions(+), 5079 deletions(-)
New commits:
commit 4fab47b13c214dc79e0ae5d8001521029ce34231
Author: Dave Airlie <airlied@linux.ie>
Date: Sun Jul 13 19:00:46 2008 +1000
glx/dri: only report DRI2 extensions when DRI2 is enabled.
Fixes bug 15477
diff --git a/src/glx/x11/dri2_glx.c b/src/glx/x11/dri2_glx.c
index b679c72..0be65bc 100644
--- a/src/glx/x11/dri2_glx.c
+++ b/src/glx/x11/dri2_glx.c
@@ -308,7 +308,7 @@ static __GLXDRIscreen *dri2CreateScreen(__GLXscreenConfigs *psc, int screen,
return NULL;
}
- driBindExtensions(psc);
+ driBindExtensions(psc, 1);
psc->configs = driConvertConfigs(psc->core, psc->configs, driver_configs);
psc->visuals = driConvertConfigs(psc->core, psc->visuals, driver_configs);
diff --git a/src/glx/x11/dri_common.c b/src/glx/x11/dri_common.c
index b159d19..8175f46 100644
--- a/src/glx/x11/dri_common.c
+++ b/src/glx/x11/dri_common.c
@@ -329,7 +329,7 @@ driConvertConfigs(const __DRIcoreExtension *core,
}
_X_HIDDEN void
-driBindExtensions(__GLXscreenConfigs *psc)
+driBindExtensions(__GLXscreenConfigs *psc, int dri2)
{
const __DRIextension **extensions;
int i;
@@ -386,7 +386,7 @@ driBindExtensions(__GLXscreenConfigs *psc)
#endif
#ifdef __DRI_TEX_BUFFER
- if (strcmp(extensions[i]->name, __DRI_TEX_BUFFER) == 0) {
+ if ((strcmp(extensions[i]->name, __DRI_TEX_BUFFER) == 0) && dri2) {
psc->texBuffer = (__DRItexBufferExtension *) extensions[i];
__glXEnableDirectExtension(psc, "GLX_EXT_texture_from_pixmap");
}
diff --git a/src/glx/x11/dri_common.h b/src/glx/x11/dri_common.h
index 3556510..15f6cc8 100644
--- a/src/glx/x11/dri_common.h
+++ b/src/glx/x11/dri_common.h
@@ -55,6 +55,6 @@ extern void ErrorMessageF(const char *f, ...);
extern void *driOpenDriver(const char *driverName);
-extern void driBindExtensions(__GLXscreenConfigs *psc);
+extern void driBindExtensions(__GLXscreenConfigs *psc, int dri2);
#endif /* _DRI_COMMON_H */
diff --git a/src/glx/x11/dri_glx.c b/src/glx/x11/dri_glx.c
index d53f2d9..82653f1 100644
--- a/src/glx/x11/dri_glx.c
+++ b/src/glx/x11/dri_glx.c
@@ -635,7 +635,7 @@ static __GLXDRIscreen *driCreateScreen(__GLXscreenConfigs *psc, int screen,
return NULL;
}
- driBindExtensions(psc);
+ driBindExtensions(psc, 0);
psp->destroyScreen = driDestroyScreen;
psp->createContext = driCreateContext;
diff --git a/src/glx/x11/drisw_glx.c b/src/glx/x11/drisw_glx.c
index f7ff001..bcf7e14 100644
--- a/src/glx/x11/drisw_glx.c
+++ b/src/glx/x11/drisw_glx.c
@@ -390,7 +390,7 @@ static __GLXDRIscreen *driCreateScreen(__GLXscreenConfigs *psc, int screen,
goto handle_error;
}
- driBindExtensions(psc);
+ driBindExtensions(psc, 0);
psc->configs = driConvertConfigs(psc->core, psc->configs, driver_configs);
psc->visuals = driConvertConfigs(psc->core, psc->visuals, driver_configs);
commit e81ba58bf4c20229677cdf89b5970b55cefb2199
Author: Nicolai Haehnle <nhaehnle@gmail.com>
Date: Sat Jul 12 21:13:03 2008 +0200
r300_fragprog: Use nqssa+dce and program_pair for emit
Share almost all code with r500_fragprog now.
This also fixes Piglit's texrect-many test, which means that the compiz
bicubic plugin should work with hardware acceleration now.
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 9baa1e7..6ca9342 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -42,6 +42,7 @@ DRIVER_SOURCES = \
radeon_nqssadce.c \
r300_vertprog.c \
r300_fragprog.c \
+ r300_fragprog_swizzle.c \
r300_fragprog_emit.c \
r500_fragprog.c \
r500_fragprog_emit.c \
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 8e9c5ce..98af6d8 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -683,16 +683,25 @@ struct r300_fragment_program_external_state {
};
+struct r300_fragment_program_node {
+ int tex_offset; /**< first tex instruction */
+ int tex_end; /**< last tex instruction, relative to tex_offset */
+ int alu_offset; /**< first ALU instruction */
+ int alu_end; /**< last ALU instruction, relative to alu_offset */
+ int flags;
+};
+
/**
* Stores an R300 fragment program in its compiled-to-hardware form.
*/
struct r300_fragment_program_code {
struct {
- int length;
+ int length; /**< total # of texture instructions used */
GLuint inst[PFS_MAX_TEX_INST];
} tex;
struct {
+ int length; /**< total # of ALU instructions used */
struct {
GLuint inst0;
GLuint inst1;
@@ -701,21 +710,10 @@ struct r300_fragment_program_code {
} inst[PFS_MAX_ALU_INST];
} alu;
- struct {
- int tex_offset;
- int tex_end;
- int alu_offset;
- int alu_end;
- int flags;
- } node[4];
+ struct r300_fragment_program_node node[4];
int cur_node;
int first_node_has_tex;
- int alu_offset;
- int alu_end;
- int tex_offset;
- int tex_end;
-
/**
* Remember which program register a given hardware constant
* belongs to.
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 8a1d690..d390de5 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -29,10 +29,8 @@
* \file
*
* Fragment program compiler. Perform transformations on the intermediate
- * \ref radeon_program representation (which is essentially the Mesa
- * program representation plus the notion of clauses) until the program
- * is in a form where we can translate it more or less directly into
- * machine-readable form.
+ * representation until the program is in a form where we can translate
+ * it more or less directly into machine-readable form.
*
* \author Ben Skeggs <darktama@iinet.net.au>
* \author Jerome Glisse <j.glisse@gmail.com>
@@ -47,8 +45,10 @@
#include "r300_context.h"
#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
#include "r300_state.h"
+#include "radeon_nqssadce.h"
#include "radeon_program_alu.h"
@@ -133,25 +133,6 @@ static GLboolean transform_TEX(
inst.SrcReg[0].Index = tempreg;
}
- /* Texture operations do not support swizzles etc. in hardware,
- * so emit an additional arithmetic operation if necessary.
- */
- if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP ||
- inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) {
- int tempreg = radeonFindFreeTemporary(t);
-
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tempreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tempreg;
- }
-
if (inst.Opcode != OPCODE_KIL) {
if (inst.DstReg.File != PROGRAM_TEMPORARY ||
inst.DstReg.WriteMask != WRITEMASK_XYZW) {
@@ -339,6 +320,13 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
}
+static void nqssadce_init(struct nqssadce_state* s)
+{
+ s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
+ s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
+}
+
+
static GLuint build_dtm(GLuint depthmode)
{
switch(depthmode) {
@@ -417,7 +405,20 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
3, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Fragment Program: After transformations:\n");
+ _mesa_printf("Fragment Program: After native rewrite:\n");
+ _mesa_print_program(compiler.program);
+ }
+
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r300FPIsNativeSwizzle,
+ .BuildSwizzle = &r300FPBuildSwizzle,
+ .RewriteDepthOut = GL_TRUE
+ };
+ radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(compiler.program);
}
@@ -451,22 +452,18 @@ void r300FragmentProgramDump(
fprintf(stderr, "pc=%d*************************************\n", pc++);
- fprintf(stderr, "Mesa program:\n");
- fprintf(stderr, "-------------\n");
- _mesa_print_program(&fp->mesa_program.Base);
- fflush(stdout);
-
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
for (n = 0; n < (code->cur_node + 1); n++) {
fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
- "alu_end: %d, tex_end: %d\n", n,
+ "alu_end: %d, tex_end: %d, flags: %08x\n", n,
code->node[n].alu_offset,
code->node[n].tex_offset,
- code->node[n].alu_end, code->node[n].tex_end);
+ code->node[n].alu_end, code->node[n].tex_end,
+ code->node[n].flags);
- if (code->tex.length) {
+ if (n > 0 || code->first_node_has_tex) {
fprintf(stderr, " TEX:\n");
for (i = code->node[n].tex_offset;
i <= code->node[n].tex_offset + code->node[n].tex_end;
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h
index c76ae62..b3a3cd2 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.h
@@ -42,33 +42,6 @@
#include "r300_context.h"
#include "radeon_program.h"
-/* supported hw opcodes */
-#define PFS_OP_MAD 0
-#define PFS_OP_DP3 1
-#define PFS_OP_DP4 2
-#define PFS_OP_MIN 3
-#define PFS_OP_MAX 4
-#define PFS_OP_CMP 5
-#define PFS_OP_FRC 6
-#define PFS_OP_EX2 7
-#define PFS_OP_LG2 8
-#define PFS_OP_RCP 9
-#define PFS_OP_RSQ 10
-#define PFS_OP_REPL_ALPHA 11
-#define PFS_OP_CMPH 12
-#define MAX_PFS_OP 12
-
-#define PFS_FLAG_SAT (1 << 0)
-#define PFS_FLAG_ABS (1 << 1)
-
-#define ARG_NEG (1 << 5)
-#define ARG_ABS (1 << 6)
-#define ARG_MASK (127 << 0)
-#define ARG_STRIDE 7
-#define SRC_CONST (1 << 5)
-#define SRC_MASK (63 << 0)
-#define SRC_STRIDE 6
-
#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
index 4786b45..9f0b7e3 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
@@ -36,1674 +36,308 @@
* \author Jerome Glisse <j.glisse@gmail.com>
*
* \todo FogOption
- *
- * \todo Verify results of opcodes for accuracy, I've only checked them in
- * specific cases.
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "shader/prog_instruction.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-
-#include "r300_context.h"
#include "r300_fragprog.h"
-#include "r300_reg.h"
-#include "r300_state.h"
-
-/* Mapping Mesa registers to R300 temporaries */
-struct reg_acc {
- int reg; /* Assigned hw temp */
- unsigned int refcount; /* Number of uses by mesa program */
-};
-
-/**
- * Describe the current lifetime information for an R300 temporary
- */
-struct reg_lifetime {
- /* Index of the first slot where this register is free in the sense
- that it can be used as a new destination register.
- This is -1 if the register has been assigned to a Mesa register
- and the last access to the register has not yet been emitted */
- int free;
-
- /* Index of the first slot where this register is currently reserved.
- This is used to stop e.g. a scalar operation from being moved
- before the allocation time of a register that was first allocated
- for a vector operation. */
- int reserved;
-
- /* Index of the first slot in which the register can be used as a
- source without losing the value that is written by the last
- emitted instruction that writes to the register */
- int vector_valid;
- int scalar_valid;
-
- /* Index to the slot where the register was last read.
- This is also the first slot in which the register may be written again */
- int vector_lastread;
- int scalar_lastread;
-};
-
-/**
- * Store usage information about an ALU instruction slot during the
- * compilation of a fragment program.
- */
-#define SLOT_SRC_VECTOR (1<<0)
-#define SLOT_SRC_SCALAR (1<<3)
-#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR)
-#define SLOT_OP_VECTOR (1<<16)
-#define SLOT_OP_SCALAR (1<<17)
-#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR)
-
-struct r300_pfs_compile_slot {
- /* Bitmask indicating which parts of the slot are used, using SLOT_ constants
- defined above */
- unsigned int used;
-
- /* Selected sources */
- int vsrc[3];
- int ssrc[3];
-};
-
-/**
- * Store information during compilation of fragment programs.
- */
-struct r300_pfs_compile_state {
- struct r300_fragment_program_compiler *compiler;
- int nrslots; /* number of ALU slots used so far */
-
- /* Track which (parts of) slots are already filled with instructions */
- struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST];
-
- /* Track the validity of R300 temporaries */
- struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS];
-
- /* Used to map Mesa's inputs/temps onto hardware temps */
- int temp_in_use;
- struct reg_acc temps[PFS_NUM_TEMP_REGS];
- struct reg_acc inputs[32]; /* don't actually need 32... */
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+#include "r300_reg.h"
- /* Track usage of hardware temps, for register allocation,
- * indirection detection, etc. */
- GLuint used_in_node;
- GLuint dest_in_node;
-};
+#define PROG_CODE \
+ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
+ struct r300_fragment_program_code *code = c->code
-/*
- * Usefull macros and values
- */
-#define ERROR(fmt, args...) do { \
+#define error(fmt, args...) do { \
fprintf(stderr, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
- fp->error = GL_TRUE; \
} while(0)
-#define PFS_INVAL 0xFFFFFFFF
-#define COMPILE_STATE \
- struct r300_fragment_program *fp = cs->compiler->fp; \
- struct r300_fragment_program_code *code = cs->compiler->code; \
- (void)code; (void)fp
-
-#define SWIZZLE_XYZ 0
-#define SWIZZLE_XXX 1
-#define SWIZZLE_YYY 2
-#define SWIZZLE_ZZZ 3
-#define SWIZZLE_WWW 4
-#define SWIZZLE_YZX 5
-#define SWIZZLE_ZXY 6
-#define SWIZZLE_WZY 7
-#define SWIZZLE_111 8
-#define SWIZZLE_000 9
-#define SWIZZLE_HHH 10
-
-#define swizzle(r, x, y, z, w) do_swizzle(cs, r, \
- ((SWIZZLE_##x<<0)| \
- (SWIZZLE_##y<<3)| \
- (SWIZZLE_##z<<6)| \
- (SWIZZLE_##w<<9)), \
- 0)
-
-#define REG_TYPE_INPUT 0
-#define REG_TYPE_OUTPUT 1
-#define REG_TYPE_TEMP 2
-#define REG_TYPE_CONST 3
-
-#define REG_TYPE_SHIFT 0
-#define REG_INDEX_SHIFT 2
-#define REG_VSWZ_SHIFT 8
-#define REG_SSWZ_SHIFT 13
-#define REG_NEGV_SHIFT 18
-#define REG_NEGS_SHIFT 19
-#define REG_ABS_SHIFT 20
-#define REG_NO_USE_SHIFT 21 // Hack for refcounting
-#define REG_VALID_SHIFT 22 // Does the register contain a defined value?
-#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)?
-
-#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT)
-#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT)
-#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT)
-#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT)
-#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT)
-#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT)
-#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT)
-#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT)
-#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT)
-#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT)
-
-#define REG(type, index, vswz, sswz, nouse, valid, builtin) \
- (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \
- ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \
- ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \
- ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \
- ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \
- ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \
- ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
-#define REG_GET_TYPE(reg) \
- ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
-#define REG_GET_INDEX(reg) \
- ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
-#define REG_GET_VSWZ(reg) \
- ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
-#define REG_GET_SSWZ(reg) \
- ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
-#define REG_GET_NO_USE(reg) \
- ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
-#define REG_GET_VALID(reg) \
- ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
-#define REG_GET_BUILTIN(reg) \
- ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT)
-#define REG_SET_TYPE(reg, type) \
- reg = ((reg & ~REG_TYPE_MASK) | \
- ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
-#define REG_SET_INDEX(reg, index) \
- reg = ((reg & ~REG_INDEX_MASK) | \
- ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
-#define REG_SET_VSWZ(reg, vswz) \
- reg = ((reg & ~REG_VSWZ_MASK) | \
- ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
-#define REG_SET_SSWZ(reg, sswz) \
- reg = ((reg & ~REG_SSWZ_MASK) | \
- ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
-#define REG_SET_NO_USE(reg, nouse) \
- reg = ((reg & ~REG_NO_USE_MASK) | \
- ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
-#define REG_SET_VALID(reg, valid) \
- reg = ((reg & ~REG_VALID_MASK) | \
- ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
-#define REG_SET_BUILTIN(reg, builtin) \
- reg = ((reg & ~REG_BUILTIN_MASK) | \
- ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK))
-#define REG_ABS(reg) \
- reg = (reg | REG_ABS_MASK)
-#define REG_NEGV(reg) \
- reg = (reg | REG_NEGV_MASK)
-#define REG_NEGS(reg) \
- reg = (reg | REG_NEGS_MASK)
-
-#define NOP_INST0 ( \
- (R300_ALU_OUTC_MAD) | \
- (R300_ALU_ARGC_ZERO << R300_ALU_ARG0C_SHIFT) | \
- (R300_ALU_ARGC_ZERO << R300_ALU_ARG1C_SHIFT) | \
- (R300_ALU_ARGC_ZERO << R300_ALU_ARG2C_SHIFT))
-#define NOP_INST1 ( \
- ((0 | SRC_CONST) << R300_ALU_SRC0C_SHIFT) | \
- ((0 | SRC_CONST) << R300_ALU_SRC1C_SHIFT) | \
- ((0 | SRC_CONST) << R300_ALU_SRC2C_SHIFT))
-#define NOP_INST2 ( \
- (R300_ALU_OUTA_MAD) | \
- (R300_ALU_ARGA_ZERO << R300_ALU_ARG0A_SHIFT) | \
- (R300_ALU_ARGA_ZERO << R300_ALU_ARG1A_SHIFT) | \
- (R300_ALU_ARGA_ZERO << R300_ALU_ARG2A_SHIFT))
-#define NOP_INST3 ( \
- ((0 | SRC_CONST) << R300_ALU_SRC0A_SHIFT) | \
- ((0 | SRC_CONST) << R300_ALU_SRC1A_SHIFT) | \
- ((0 | SRC_CONST) << R300_ALU_SRC2A_SHIFT))
-
-
-/*
- * Datas structures for fragment program generation
- */
-
-/* description of r300 native hw instructions */
-static const struct {
- const char *name;
- int argc;
- int v_op;
- int s_op;
-} r300_fpop[] = {
- /* *INDENT-OFF* */
- {"MAD", 3, R300_ALU_OUTC_MAD, R300_ALU_OUTA_MAD},
- {"DP3", 2, R300_ALU_OUTC_DP3, R300_ALU_OUTA_DP4},
- {"DP4", 2, R300_ALU_OUTC_DP4, R300_ALU_OUTA_DP4},
- {"MIN", 2, R300_ALU_OUTC_MIN, R300_ALU_OUTA_MIN},
- {"MAX", 2, R300_ALU_OUTC_MAX, R300_ALU_OUTA_MAX},
- {"CMP", 3, R300_ALU_OUTC_CMP, R300_ALU_OUTA_CMP},
- {"FRC", 1, R300_ALU_OUTC_FRC, R300_ALU_OUTA_FRC},
- {"EX2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_EX2},
- {"LG2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_LG2},
- {"RCP", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RCP},
- {"RSQ", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RSQ},
- {"REPL_ALPHA", 1, R300_ALU_OUTC_REPL_ALPHA, PFS_INVAL},
- {"CMPH", 3, R300_ALU_OUTC_CMPH, PFS_INVAL},
- /* *INDENT-ON* */
-};
-
-/* vector swizzles r300 can support natively, with a couple of
- * cases we handle specially
- *
- * REG_VSWZ/REG_SSWZ is an index into this table
- */
-
-/* mapping from SWIZZLE_* to r300 native values for scalar insns */
-#define SWIZZLE_HALF 6
-
-#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
- SWIZZLE_##y, \
- SWIZZLE_##z, \
- SWIZZLE_ZERO))
-/* native swizzles */
-static const struct r300_pfs_swizzle {
- GLuint hash; /* swizzle value this matches */
- GLuint base; /* base value for hw swizzle */
- GLuint stride; /* difference in base between arg0/1/2 */
- GLuint flags;
-} v_swiz[] = {
- /* *INDENT-OFF* */
- {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, SLOT_SRC_SCALAR},
- {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR},
- {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH},
- {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
- {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
- {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0},
- {PFS_INVAL, 0, 0, 0},
- /* *INDENT-ON* */
-};
-
-/* used during matching of non-native swizzles */
-#define SWZ_X_MASK (7 << 0)
-#define SWZ_Y_MASK (7 << 3)
-#define SWZ_Z_MASK (7 << 6)
-#define SWZ_W_MASK (7 << 9)
-static const struct {
- GLuint hash; /* used to mask matching swizzle components */
- int mask; /* actual outmask */
- int count; /* count of components matched */
-} s_mask[] = {
- /* *INDENT-OFF* */
- {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3},
- {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2},
- {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2},
- {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2},
- {SWZ_X_MASK, 1, 1},
- {SWZ_Y_MASK, 2, 1},
- {SWZ_Z_MASK, 4, 1},
- {PFS_INVAL, PFS_INVAL, PFS_INVAL}
- /* *INDENT-ON* */
-};
-
-static const struct {
- int base; /* hw value of swizzle */
- int stride; /* difference between SRC0/1/2 */
- GLuint flags;
-} s_swiz[] = {
- /* *INDENT-OFF* */
- {R300_ALU_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR},
- {R300_ALU_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR},
- {R300_ALU_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR},
- {R300_ALU_ARGA_SRC0A, 1, SLOT_SRC_SCALAR},
- {R300_ALU_ARGA_ZERO, 0, 0},
- {R300_ALU_ARGA_ONE, 0, 0},
- {R300_ALU_ARGA_HALF, 0, 0}
- /* *INDENT-ON* */
-};
-
-/* boiler-plate reg, for convenience */
-static const GLuint undef = REG(REG_TYPE_TEMP,
- 0,
- SWIZZLE_XYZ,
- SWIZZLE_W,
- GL_FALSE,
- GL_FALSE,
- GL_FALSE);
-
-/* constant one source */
-static const GLuint pfs_one = REG(REG_TYPE_CONST,
- 0,
- SWIZZLE_111,
- SWIZZLE_ONE,
- GL_FALSE,
- GL_TRUE,
- GL_TRUE);
-
-/* constant half source */
-static const GLuint pfs_half = REG(REG_TYPE_CONST,
- 0,
- SWIZZLE_HHH,
- SWIZZLE_HALF,
- GL_FALSE,
- GL_TRUE,
- GL_TRUE);
-
-/* constant zero source */
-static const GLuint pfs_zero = REG(REG_TYPE_CONST,
- 0,
- SWIZZLE_000,
- SWIZZLE_ZERO,
- GL_FALSE,
- GL_TRUE,
- GL_TRUE);
-
-/*
- * Common functions prototypes
- */
-static void emit_arith(struct r300_pfs_compile_state *cs, int op,
- GLuint dest, int mask,
- GLuint src0, GLuint src1, GLuint src2, int flags);
-
-/**
- * Get an R300 temporary that can be written to in the given slot.
- */
-static int get_hw_temp(struct r300_pfs_compile_state *cs, int slot)
-{
- COMPILE_STATE;
- int r;
-
- for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
- if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot)
- break;
- }
-
- if (r >= PFS_NUM_TEMP_REGS) {
- ERROR("Out of hardware temps\n");
- return 0;
- }
- // Reserved is used to avoid the following scenario:
- // R300 temporary X is first assigned to Mesa temporary Y during vector ops
- // R300 temporary X is then assigned to Mesa temporary Z for further vector ops
- // Then scalar ops on Mesa temporary Z are emitted and move back in time
- // to overwrite the value of temporary Y.
- // End scenario.
- cs->hwtemps[r].reserved = cs->hwtemps[r].free;
- cs->hwtemps[r].free = -1;
-
- // Reset to some value that won't mess things up when the user
- // tries to read from a temporary that hasn't been assigned a value yet.
- // In the normal case, vector_valid and scalar_valid should be set to
- // a sane value by the first emit that writes to this temporary.
- cs->hwtemps[r].vector_valid = 0;
- cs->hwtemps[r].scalar_valid = 0;
-
- if (r > code->max_temp_idx)
- code->max_temp_idx = r;
-
- return r;
-}
-
-/**
- * Get an R300 temporary that will act as a TEX destination register.
- */
-static int get_hw_temp_tex(struct r300_pfs_compile_state *cs)
-{
- COMPILE_STATE;
- int r;
-
- for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
- if (cs->used_in_node & (1 << r))
- continue;
-
- // Note: Be very careful here
- if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0)
- break;
- }
-
- if (r >= PFS_NUM_TEMP_REGS)
- return get_hw_temp(cs, 0); /* Will cause an indirection */
-
- cs->hwtemps[r].reserved = cs->hwtemps[r].free;
- cs->hwtemps[r].free = -1;
-
- // Reset to some value that won't mess things up when the user
- // tries to read from a temporary that hasn't been assigned a value yet.
- // In the normal case, vector_valid and scalar_valid should be set to
- // a sane value by the first emit that writes to this temporary.
- cs->hwtemps[r].vector_valid = cs->nrslots;
- cs->hwtemps[r].scalar_valid = cs->nrslots;
- if (r > code->max_temp_idx)
- code->max_temp_idx = r;
-
- return r;
-}
-
-/**
- * Mark the given hardware register as free.
- */
-static void free_hw_temp(struct r300_pfs_compile_state *cs, int idx)
+static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
{
- // Be very careful here. Consider sequences like
- // MAD r0, r1,r2,r3
- // TEX r4, ...
- // The TEX instruction may be moved in front of the MAD instruction
- // due to the way nodes work. We don't want to alias r1 and r4 in
- // this case.
- // I'm certain the register allocation could be further sanitized,
- // but it's tricky because of stuff that can happen inside emit_tex
- // and emit_arith.
- cs->hwtemps[idx].free = cs->nrslots + 1;
-}
+ PROG_CODE;
-/**
- * Create a new Mesa temporary register.
- */
-static GLuint get_temp_reg(struct r300_pfs_compile_state *cs)
-{
- COMPILE_STATE;
- GLuint r = undef;
- GLuint index;
-
- index = ffs(~cs->temp_in_use);
- if (!index) {
- ERROR("Out of program temps\n");
- return r;
- }
-
- cs->temp_in_use |= (1 << --index);
- cs->temps[index].refcount = 0xFFFFFFFF;
- cs->temps[index].reg = -1;
-
- REG_SET_TYPE(r, REG_TYPE_TEMP);
- REG_SET_INDEX(r, index);
- REG_SET_VALID(r, GL_TRUE);
- return r;
-}
-
-/**
- * Free a Mesa temporary and the associated R300 temporary.
- */
-static void free_temp(struct r300_pfs_compile_state *cs, GLuint r)
-{
- GLuint index = REG_GET_INDEX(r);
-
- if (!(cs->temp_in_use & (1 << index)))
- return;
-
- if (REG_GET_TYPE(r) == REG_TYPE_TEMP) {
- free_hw_temp(cs, cs->temps[index].reg);
- cs->temps[index].reg = -1;
- cs->temp_in_use &= ~(1 << index);
- } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) {
- free_hw_temp(cs, cs->inputs[index].reg);
- cs->inputs[index].reg = -1;
- }
-}
-
-/**
- * Emit a hardware constant/parameter.
- */
-static GLuint emit_const4fv(struct r300_pfs_compile_state *cs,
- struct prog_src_register srcreg)
-{
- COMPILE_STATE;
- GLuint reg = undef;
- int index;
-
- for (index = 0; index < code->const_nr; ++index) {
- if (code->constant[index].File == srcreg.File &&
- code->constant[index].Index == srcreg.Index)
+ for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
+ if (code->constant[*hwindex].File == file &&
+ code->constant[*hwindex].Index == index)
break;
}
- if (index >= code->const_nr) {
- if (index >= PFS_NUM_CONST_REGS) {
- ERROR("Out of hw constants!\n");
- return reg;
+ if (*hwindex >= code->const_nr) {
+ if (*hwindex >= PFS_NUM_CONST_REGS) {
+ error("Out of hw constants!\n");
+ return GL_FALSE;
}
code->const_nr++;
- code->constant[index] = srcreg;
+ code->constant[*hwindex].File = file;
+ code->constant[*hwindex].Index = index;
}
- REG_SET_TYPE(reg, REG_TYPE_CONST);
- REG_SET_INDEX(reg, index);
- REG_SET_VALID(reg, GL_TRUE);
- return reg;
+ return GL_TRUE;
}
-static INLINE GLuint negate(GLuint r)
-{
- REG_NEGS(r);
- REG_NEGV(r);
- return r;
-}
-/* Hack, to prevent clobbering sources used multiple times when
- * emulating non-native instructions
+/**
+ * Mark a temporary register as used.
*/
-static INLINE GLuint keep(GLuint r)
-{
Reply to: