[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'upstream-experimental'



 Makefile                                          |   12 
 configs/autoconf.in                               |    8 
 configs/default                                   |    8 
 configure.ac                                      |   23 
 docs/relnotes-7.1.html                            |    7 
 progs/tools/trace/Makefile                        |    2 
 src/egl/drivers/demo/Makefile                     |    5 
 src/egl/drivers/dri/Makefile                      |    3 
 src/egl/main/Makefile                             |    3 
 src/glu/Makefile                                  |   12 
 src/glu/glu.pc.in                                 |    4 
 src/glu/mesa/Makefile                             |    2 
 src/glu/sgi/Makefile                              |    2 
 src/glut/beos/Makefile                            |    6 
 src/glut/directfb/Makefile                        |    2 
 src/glut/fbdev/Makefile                           |   10 
 src/glut/ggi/Makefile                             |    2 
 src/glut/glx/Makefile                             |   18 
 src/glut/glx/glut.pc.in                           |    4 
 src/glut/mini/Makefile                            |   19 
 src/glut/mini/glut.pc.in                          |    4 
 src/glw/Makefile                                  |   19 
 src/glw/glw.pc.in                                 |    4 
 src/glx/mini/Makefile                             |    2 
 src/glx/x11/.gitignore                            |    1 
 src/glx/x11/Makefile                              |    2 
 src/glx/x11/dri2_glx.c                            |    2 
 src/glx/x11/dri_common.c                          |    4 
 src/glx/x11/dri_common.h                          |    2 
 src/glx/x11/dri_glx.c                             |    2 
 src/glx/x11/drisw_glx.c                           |    2 
 src/glx/x11/glxext.c                              |   11 
 src/mesa/Makefile                                 |   24 
 src/mesa/drivers/beos/Makefile                    |    5 
 src/mesa/drivers/directfb/Makefile                |    2 
 src/mesa/drivers/dri/Makefile                     |   16 
 src/mesa/drivers/dri/Makefile.template            |    2 
 src/mesa/drivers/dri/dri.pc.in                    |    4 
 src/mesa/drivers/dri/i915/i915_texstate.c         |   11 
 src/mesa/drivers/dri/i965/brw_clip_line.c         |    2 
 src/mesa/drivers/dri/i965/brw_clip_state.c        |    2 
 src/mesa/drivers/dri/i965/brw_clip_tri.c          |    2 
 src/mesa/drivers/dri/i965/brw_defines.h           |   13 
 src/mesa/drivers/dri/i965/brw_eu_emit.c           |   16 
 src/mesa/drivers/dri/i965/brw_misc_state.c        |    8 
 src/mesa/drivers/dri/i965/brw_structs.h           |    4 
 src/mesa/drivers/dri/i965/brw_vs_emit.c           |    2 
 src/mesa/drivers/dri/intel/intel_batchbuffer.h    |    4 
 src/mesa/drivers/dri/intel/intel_blit.c           |    5 
 src/mesa/drivers/dri/intel/intel_chipset.h        |   10 
 src/mesa/drivers/dri/intel/intel_context.c        |    4 
 src/mesa/drivers/dri/r300/Makefile                |    3 
 src/mesa/drivers/dri/r300/r300_cmdbuf.c           |   11 
 src/mesa/drivers/dri/r300/r300_context.h          |   51 
 src/mesa/drivers/dri/r300/r300_fragprog.c         |  184 +
 src/mesa/drivers/dri/r300/r300_fragprog.h         |   30 
 src/mesa/drivers/dri/r300/r300_fragprog_emit.c    | 2092 +---------------------
 src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c |  227 ++
 src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h |   42 
 src/mesa/drivers/dri/r300/r300_ioctl.c            |    9 
 src/mesa/drivers/dri/r300/r300_reg.h              |    7 
 src/mesa/drivers/dri/r300/r300_state.c            |  109 -
 src/mesa/drivers/dri/r300/r500_fragprog.c         |  207 +-
 src/mesa/drivers/dri/r300/r500_fragprog.h         |   32 
 src/mesa/drivers/dri/r300/r500_fragprog_emit.c    | 1596 ++--------------
 src/mesa/drivers/dri/r300/radeon_nqssadce.c       |  282 ++
 src/mesa/drivers/dri/r300/radeon_nqssadce.h       |   96 +
 src/mesa/drivers/dri/r300/radeon_program.c        |  275 --
 src/mesa/drivers/dri/r300/radeon_program.h        |  113 -
 src/mesa/drivers/dri/r300/radeon_program_alu.c    |  475 ++++
 src/mesa/drivers/dri/r300/radeon_program_alu.h    |   12 
 src/mesa/drivers/dri/r300/radeon_program_pair.c   |  983 ++++++++++
 src/mesa/drivers/dri/r300/radeon_program_pair.h   |  126 +
 src/mesa/drivers/fbdev/Makefile                   |    3 
 src/mesa/drivers/osmesa/Makefile                  |   12 
 src/mesa/drivers/x11/Makefile                     |    7 
 src/mesa/gl.pc.in                                 |    4 
 src/mesa/glapi/glapi.c                            |    4 
 src/mesa/main/context.c                           |    1 
 src/mesa/main/dd.h                                |    2 
 src/mesa/main/execmem.c                           |    8 
 src/mesa/main/fbobject.c                          |   26 
 src/mesa/main/fbobject.h                          |    7 
 src/mesa/main/shaders.c                           |   14 
 src/mesa/main/texstate.c                          |   28 
 src/mesa/main/version.h                           |    2 
 src/mesa/shader/arbprogparse.c                    |    9 
 src/mesa/shader/program.c                         |   62 
 src/mesa/shader/program.h                         |   13 
 src/mesa/shader/shader_api.c                      |  291 ++-
 src/mesa/shader/slang/library/slang_core.gc       |    8 
 src/mesa/shader/slang/library/slang_core_gc.h     | 1524 ++++++++--------
 src/mesa/shader/slang/slang_codegen.c             |    8 
 src/mesa/shader/slang/slang_compile.c             |   18 
 src/mesa/shader/slang/slang_ir.c                  |    2 
 src/mesa/shader/slang/slang_print.c               |   84 
 src/mesa/shader/slang/slang_vartable.c            |    3 
 src/mesa/swrast/s_fragprog.c                      |   28 
 src/mesa/tnl_dd/t_dd_tritmp.h                     |    4 
 99 files changed, 4398 insertions(+), 5079 deletions(-)

New commits:
commit 4fab47b13c214dc79e0ae5d8001521029ce34231
Author: Dave Airlie <airlied@linux.ie>
Date:   Sun Jul 13 19:00:46 2008 +1000

    glx/dri: only report DRI2 extensions when DRI2 is enabled.
    
    Fixes bug 15477

diff --git a/src/glx/x11/dri2_glx.c b/src/glx/x11/dri2_glx.c
index b679c72..0be65bc 100644
--- a/src/glx/x11/dri2_glx.c
+++ b/src/glx/x11/dri2_glx.c
@@ -308,7 +308,7 @@ static __GLXDRIscreen *dri2CreateScreen(__GLXscreenConfigs *psc, int screen,
 	return NULL;
     }
 
-    driBindExtensions(psc);
+    driBindExtensions(psc, 1);
 
     psc->configs = driConvertConfigs(psc->core, psc->configs, driver_configs);
     psc->visuals = driConvertConfigs(psc->core, psc->visuals, driver_configs);
diff --git a/src/glx/x11/dri_common.c b/src/glx/x11/dri_common.c
index b159d19..8175f46 100644
--- a/src/glx/x11/dri_common.c
+++ b/src/glx/x11/dri_common.c
@@ -329,7 +329,7 @@ driConvertConfigs(const __DRIcoreExtension *core,
 }
 
 _X_HIDDEN void
-driBindExtensions(__GLXscreenConfigs *psc)
+driBindExtensions(__GLXscreenConfigs *psc, int dri2)
 {
     const __DRIextension **extensions;
     int i;
@@ -386,7 +386,7 @@ driBindExtensions(__GLXscreenConfigs *psc)
 #endif
 
 #ifdef __DRI_TEX_BUFFER
-	if (strcmp(extensions[i]->name, __DRI_TEX_BUFFER) == 0) {
+	if ((strcmp(extensions[i]->name, __DRI_TEX_BUFFER) == 0) && dri2) {
 	    psc->texBuffer = (__DRItexBufferExtension *) extensions[i];
 	    __glXEnableDirectExtension(psc, "GLX_EXT_texture_from_pixmap");
 	}
diff --git a/src/glx/x11/dri_common.h b/src/glx/x11/dri_common.h
index 3556510..15f6cc8 100644
--- a/src/glx/x11/dri_common.h
+++ b/src/glx/x11/dri_common.h
@@ -55,6 +55,6 @@ extern void ErrorMessageF(const char *f, ...);
 
 extern void *driOpenDriver(const char *driverName);
 
-extern void driBindExtensions(__GLXscreenConfigs *psc);
+extern void driBindExtensions(__GLXscreenConfigs *psc, int dri2);
 
 #endif /* _DRI_COMMON_H */
diff --git a/src/glx/x11/dri_glx.c b/src/glx/x11/dri_glx.c
index d53f2d9..82653f1 100644
--- a/src/glx/x11/dri_glx.c
+++ b/src/glx/x11/dri_glx.c
@@ -635,7 +635,7 @@ static __GLXDRIscreen *driCreateScreen(__GLXscreenConfigs *psc, int screen,
  	return NULL;
     }
 
-    driBindExtensions(psc);
+    driBindExtensions(psc, 0);
 
     psp->destroyScreen = driDestroyScreen;
     psp->createContext = driCreateContext;
diff --git a/src/glx/x11/drisw_glx.c b/src/glx/x11/drisw_glx.c
index f7ff001..bcf7e14 100644
--- a/src/glx/x11/drisw_glx.c
+++ b/src/glx/x11/drisw_glx.c
@@ -390,7 +390,7 @@ static __GLXDRIscreen *driCreateScreen(__GLXscreenConfigs *psc, int screen,
 	goto handle_error;
     }
 
-    driBindExtensions(psc);
+    driBindExtensions(psc, 0);
 
     psc->configs = driConvertConfigs(psc->core, psc->configs, driver_configs);
     psc->visuals = driConvertConfigs(psc->core, psc->visuals, driver_configs);

commit e81ba58bf4c20229677cdf89b5970b55cefb2199
Author: Nicolai Haehnle <nhaehnle@gmail.com>
Date:   Sat Jul 12 21:13:03 2008 +0200

    r300_fragprog: Use nqssa+dce and program_pair for emit
    
    Share almost all code with r500_fragprog now.
    
    This also fixes Piglit's texrect-many test, which means that the compiz
    bicubic plugin should work with hardware acceleration now.

diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 9baa1e7..6ca9342 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -42,6 +42,7 @@ DRIVER_SOURCES = \
 		 radeon_nqssadce.c \
 		 r300_vertprog.c \
 		 r300_fragprog.c \
+		 r300_fragprog_swizzle.c \
 		 r300_fragprog_emit.c \
 		 r500_fragprog.c \
 		 r500_fragprog_emit.c \
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 8e9c5ce..98af6d8 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -683,16 +683,25 @@ struct r300_fragment_program_external_state {
 };
 
 
+struct r300_fragment_program_node {
+	int tex_offset; /**< first tex instruction */
+	int tex_end; /**< last tex instruction, relative to tex_offset */
+	int alu_offset; /**< first ALU instruction */
+	int alu_end; /**< last ALU instruction, relative to alu_offset */
+	int flags;
+};
+
 /**
  * Stores an R300 fragment program in its compiled-to-hardware form.
  */
 struct r300_fragment_program_code {
 	struct {
-		int length;
+		int length; /**< total # of texture instructions used */
 		GLuint inst[PFS_MAX_TEX_INST];
 	} tex;
 
 	struct {
+		int length; /**< total # of ALU instructions used */
 		struct {
 			GLuint inst0;
 			GLuint inst1;
@@ -701,21 +710,10 @@ struct r300_fragment_program_code {
 		} inst[PFS_MAX_ALU_INST];
 	} alu;
 
-	struct {
-		int tex_offset;
-		int tex_end;
-		int alu_offset;
-		int alu_end;
-		int flags;
-	} node[4];
+	struct r300_fragment_program_node node[4];
 	int cur_node;
 	int first_node_has_tex;
 
-	int alu_offset;
-	int alu_end;
-	int tex_offset;
-	int tex_end;
-
 	/**
 	 * Remember which program register a given hardware constant
 	 * belongs to.
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 8a1d690..d390de5 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -29,10 +29,8 @@
  * \file
  *
  * Fragment program compiler. Perform transformations on the intermediate
- * \ref radeon_program representation (which is essentially the Mesa
- * program representation plus the notion of clauses) until the program
- * is in a form where we can translate it more or less directly into
- * machine-readable form.
+ * representation until the program is in a form where we can translate
+ * it more or less directly into machine-readable form.
  *
  * \author Ben Skeggs <darktama@iinet.net.au>
  * \author Jerome Glisse <j.glisse@gmail.com>
@@ -47,8 +45,10 @@
 
 #include "r300_context.h"
 #include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
 #include "r300_state.h"
 
+#include "radeon_nqssadce.h"
 #include "radeon_program_alu.h"
 
 
@@ -133,25 +133,6 @@ static GLboolean transform_TEX(
 		inst.SrcReg[0].Index = tempreg;
 	}
 
-	/* Texture operations do not support swizzles etc. in hardware,
-	 * so emit an additional arithmetic operation if necessary.
-	 */
-	if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP ||
-	    inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) {
-		int tempreg = radeonFindFreeTemporary(t);
-
-		tgt = radeonAppendInstructions(t->Program, 1);
-
-		tgt->Opcode = OPCODE_MOV;
-		tgt->DstReg.File = PROGRAM_TEMPORARY;
-		tgt->DstReg.Index = tempreg;
-		tgt->SrcReg[0] = inst.SrcReg[0];
-
-		reset_srcreg(&inst.SrcReg[0]);
-		inst.SrcReg[0].File = PROGRAM_TEMPORARY;
-		inst.SrcReg[0].Index = tempreg;
-	}
-
 	if (inst.Opcode != OPCODE_KIL) {
 		if (inst.DstReg.File != PROGRAM_TEMPORARY ||
 		    inst.DstReg.WriteMask != WRITEMASK_XYZW) {
@@ -339,6 +320,13 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
 }
 
 
+static void nqssadce_init(struct nqssadce_state* s)
+{
+	s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
+	s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
+}
+
+
 static GLuint build_dtm(GLuint depthmode)
 {
 	switch(depthmode) {
@@ -417,7 +405,20 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
 			3, transformations);
 
 		if (RADEON_DEBUG & DEBUG_PIXEL) {
-			_mesa_printf("Fragment Program: After transformations:\n");
+			_mesa_printf("Fragment Program: After native rewrite:\n");
+			_mesa_print_program(compiler.program);
+		}
+
+		struct radeon_nqssadce_descr nqssadce = {
+			.Init = &nqssadce_init,
+			.IsNativeSwizzle = &r300FPIsNativeSwizzle,
+			.BuildSwizzle = &r300FPBuildSwizzle,
+			.RewriteDepthOut = GL_TRUE
+		};
+		radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
+
+		if (RADEON_DEBUG & DEBUG_PIXEL) {
+			_mesa_printf("Compiler: after NqSSA-DCE:\n");
 			_mesa_print_program(compiler.program);
 		}
 
@@ -451,22 +452,18 @@ void r300FragmentProgramDump(
 
 	fprintf(stderr, "pc=%d*************************************\n", pc++);
 
-	fprintf(stderr, "Mesa program:\n");
-	fprintf(stderr, "-------------\n");
-	_mesa_print_program(&fp->mesa_program.Base);
-	fflush(stdout);
-
 	fprintf(stderr, "Hardware program\n");
 	fprintf(stderr, "----------------\n");
 
 	for (n = 0; n < (code->cur_node + 1); n++) {
 		fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
-			"alu_end: %d, tex_end: %d\n", n,
+			"alu_end: %d, tex_end: %d, flags: %08x\n", n,
 			code->node[n].alu_offset,
 			code->node[n].tex_offset,
-			code->node[n].alu_end, code->node[n].tex_end);
+			code->node[n].alu_end, code->node[n].tex_end,
+			code->node[n].flags);
 
-		if (code->tex.length) {
+		if (n > 0 || code->first_node_has_tex) {
 			fprintf(stderr, "  TEX:\n");
 			for (i = code->node[n].tex_offset;
 			     i <= code->node[n].tex_offset + code->node[n].tex_end;
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h
index c76ae62..b3a3cd2 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.h
@@ -42,33 +42,6 @@
 #include "r300_context.h"
 #include "radeon_program.h"
 
-/* supported hw opcodes */
-#define PFS_OP_MAD 0
-#define PFS_OP_DP3 1
-#define PFS_OP_DP4 2
-#define PFS_OP_MIN 3
-#define PFS_OP_MAX 4
-#define PFS_OP_CMP 5
-#define PFS_OP_FRC 6
-#define PFS_OP_EX2 7
-#define PFS_OP_LG2 8
-#define PFS_OP_RCP 9
-#define PFS_OP_RSQ 10
-#define PFS_OP_REPL_ALPHA 11
-#define PFS_OP_CMPH 12
-#define MAX_PFS_OP 12
-
-#define PFS_FLAG_SAT	(1 << 0)
-#define PFS_FLAG_ABS	(1 << 1)
-
-#define ARG_NEG			(1 << 5)
-#define ARG_ABS			(1 << 6)
-#define ARG_MASK		(127 << 0)
-#define ARG_STRIDE		7
-#define SRC_CONST		(1 << 5)
-#define SRC_MASK		(63 << 0)
-#define SRC_STRIDE		6
-
 #define DRI_CONF_FP_OPTIMIZATION_SPEED   0
 #define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
 
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
index 4786b45..9f0b7e3 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
@@ -36,1674 +36,308 @@
  * \author Jerome Glisse <j.glisse@gmail.com>
  *
  * \todo FogOption
- *
- * \todo Verify results of opcodes for accuracy, I've only checked them in
- * specific cases.
  */
 
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "shader/prog_instruction.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-
-#include "r300_context.h"
 #include "r300_fragprog.h"
-#include "r300_reg.h"
-#include "r300_state.h"
-
-/* Mapping Mesa registers to R300 temporaries */
-struct reg_acc {
-	int reg;		/* Assigned hw temp */
-	unsigned int refcount;	/* Number of uses by mesa program */
-};
-
-/**
- * Describe the current lifetime information for an R300 temporary
- */
-struct reg_lifetime {
-	/* Index of the first slot where this register is free in the sense
-	   that it can be used as a new destination register.
-	   This is -1 if the register has been assigned to a Mesa register
-	   and the last access to the register has not yet been emitted */
-	int free;
-
-	/* Index of the first slot where this register is currently reserved.
-	   This is used to stop e.g. a scalar operation from being moved
-	   before the allocation time of a register that was first allocated
-	   for a vector operation. */
-	int reserved;
-
-	/* Index of the first slot in which the register can be used as a
-	   source without losing the value that is written by the last
-	   emitted instruction that writes to the register */
-	int vector_valid;
-	int scalar_valid;
-
-	/* Index to the slot where the register was last read.
-	   This is also the first slot in which the register may be written again */
-	int vector_lastread;
-	int scalar_lastread;
-};
-
-/**
- * Store usage information about an ALU instruction slot during the
- * compilation of a fragment program.
- */
-#define SLOT_SRC_VECTOR  (1<<0)
-#define SLOT_SRC_SCALAR  (1<<3)
-#define SLOT_SRC_BOTH    (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR)
-#define SLOT_OP_VECTOR   (1<<16)
-#define SLOT_OP_SCALAR   (1<<17)
-#define SLOT_OP_BOTH     (SLOT_OP_VECTOR | SLOT_OP_SCALAR)
-
-struct r300_pfs_compile_slot {
-	/* Bitmask indicating which parts of the slot are used, using SLOT_ constants
-	   defined above */
-	unsigned int used;
-
-	/* Selected sources */
-	int vsrc[3];
-	int ssrc[3];
-};
-
-/**
- * Store information during compilation of fragment programs.
- */
-struct r300_pfs_compile_state {
-	struct r300_fragment_program_compiler *compiler;
 
-	int nrslots;		/* number of ALU slots used so far */
-
-	/* Track which (parts of) slots are already filled with instructions */
-	struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST];
-
-	/* Track the validity of R300 temporaries */
-	struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS];
-
-	/* Used to map Mesa's inputs/temps onto hardware temps */
-	int temp_in_use;
-	struct reg_acc temps[PFS_NUM_TEMP_REGS];
-	struct reg_acc inputs[32];	/* don't actually need 32... */
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+#include "r300_reg.h"
 
-	/* Track usage of hardware temps, for register allocation,
-	 * indirection detection, etc. */
-	GLuint used_in_node;
-	GLuint dest_in_node;
-};
 
+#define PROG_CODE \
+	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
+	struct r300_fragment_program_code *code = c->code
 
-/*
- * Usefull macros and values
- */
-#define ERROR(fmt, args...) do {			\
+#define error(fmt, args...) do {			\
 		fprintf(stderr, "%s::%s(): " fmt "\n",	\
 			__FILE__, __FUNCTION__, ##args);	\
-		fp->error = GL_TRUE;			\
 	} while(0)
 
-#define PFS_INVAL 0xFFFFFFFF
-#define COMPILE_STATE \
-	struct r300_fragment_program *fp = cs->compiler->fp; \
-	struct r300_fragment_program_code *code = cs->compiler->code; \
-	(void)code; (void)fp
-
-#define SWIZZLE_XYZ		0
-#define SWIZZLE_XXX		1
-#define SWIZZLE_YYY		2
-#define SWIZZLE_ZZZ		3
-#define SWIZZLE_WWW		4
-#define SWIZZLE_YZX		5
-#define SWIZZLE_ZXY		6
-#define SWIZZLE_WZY		7
-#define SWIZZLE_111		8
-#define SWIZZLE_000		9
-#define SWIZZLE_HHH		10
-
-#define swizzle(r, x, y, z, w) do_swizzle(cs, r,		\
-					  ((SWIZZLE_##x<<0)|	\
-					   (SWIZZLE_##y<<3)|	\
-					   (SWIZZLE_##z<<6)|	\
-					   (SWIZZLE_##w<<9)),	\
-					  0)
-
-#define REG_TYPE_INPUT		0
-#define REG_TYPE_OUTPUT		1
-#define REG_TYPE_TEMP		2
-#define REG_TYPE_CONST		3
-
-#define REG_TYPE_SHIFT		0
-#define REG_INDEX_SHIFT		2
-#define REG_VSWZ_SHIFT		8
-#define REG_SSWZ_SHIFT		13
-#define REG_NEGV_SHIFT		18
-#define REG_NEGS_SHIFT		19
-#define REG_ABS_SHIFT		20
-#define REG_NO_USE_SHIFT	21	// Hack for refcounting
-#define REG_VALID_SHIFT		22	// Does the register contain a defined value?
-#define REG_BUILTIN_SHIFT   23	// Is it a builtin (like all zero/all one)?
-
-#define REG_TYPE_MASK		(0x03 << REG_TYPE_SHIFT)
-#define REG_INDEX_MASK		(0x3F << REG_INDEX_SHIFT)
-#define REG_VSWZ_MASK		(0x1F << REG_VSWZ_SHIFT)
-#define REG_SSWZ_MASK		(0x1F << REG_SSWZ_SHIFT)
-#define REG_NEGV_MASK		(0x01 << REG_NEGV_SHIFT)
-#define REG_NEGS_MASK		(0x01 << REG_NEGS_SHIFT)
-#define REG_ABS_MASK		(0x01 << REG_ABS_SHIFT)
-#define REG_NO_USE_MASK		(0x01 << REG_NO_USE_SHIFT)
-#define REG_VALID_MASK		(0x01 << REG_VALID_SHIFT)
-#define REG_BUILTIN_MASK	(0x01 << REG_BUILTIN_SHIFT)
-
-#define REG(type, index, vswz, sswz, nouse, valid, builtin)	\
-	(((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) |			\
-	 ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) |		\
-	 ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) |		\
-	 ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) |		\
-	 ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) |	\
-	 ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) |			\
-	 ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
-#define REG_GET_TYPE(reg)						\
-	((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
-#define REG_GET_INDEX(reg)						\
-	((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
-#define REG_GET_VSWZ(reg)						\
-	((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
-#define REG_GET_SSWZ(reg)						\
-	((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
-#define REG_GET_NO_USE(reg)						\
-	((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
-#define REG_GET_VALID(reg)						\
-	((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
-#define REG_GET_BUILTIN(reg)						\
-	((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT)
-#define REG_SET_TYPE(reg, type)						\
-	reg = ((reg & ~REG_TYPE_MASK) |					\
-	       ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
-#define REG_SET_INDEX(reg, index)					\
-	reg = ((reg & ~REG_INDEX_MASK) |				\
-	       ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
-#define REG_SET_VSWZ(reg, vswz)						\
-	reg = ((reg & ~REG_VSWZ_MASK) |					\
-	       ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
-#define REG_SET_SSWZ(reg, sswz)						\
-	reg = ((reg & ~REG_SSWZ_MASK) |					\
-	       ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
-#define REG_SET_NO_USE(reg, nouse)					\
-	reg = ((reg & ~REG_NO_USE_MASK) |				\
-	       ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
-#define REG_SET_VALID(reg, valid)					\
-	reg = ((reg & ~REG_VALID_MASK) |				\
-	       ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
-#define REG_SET_BUILTIN(reg, builtin)					\
-	reg = ((reg & ~REG_BUILTIN_MASK) |				\
-	       ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK))
-#define REG_ABS(reg)							\
-	reg = (reg | REG_ABS_MASK)
-#define REG_NEGV(reg)							\
-	reg = (reg | REG_NEGV_MASK)
-#define REG_NEGS(reg)							\
-	reg = (reg | REG_NEGS_MASK)
-
-#define NOP_INST0 (						 \
-		(R300_ALU_OUTC_MAD) |				 \
-		(R300_ALU_ARGC_ZERO << R300_ALU_ARG0C_SHIFT) | \
-		(R300_ALU_ARGC_ZERO << R300_ALU_ARG1C_SHIFT) | \
-		(R300_ALU_ARGC_ZERO << R300_ALU_ARG2C_SHIFT))
-#define NOP_INST1 (					     \
-		((0 | SRC_CONST) << R300_ALU_SRC0C_SHIFT) | \
-		((0 | SRC_CONST) << R300_ALU_SRC1C_SHIFT) | \
-		((0 | SRC_CONST) << R300_ALU_SRC2C_SHIFT))
-#define NOP_INST2 ( \
-		(R300_ALU_OUTA_MAD) |				 \
-		(R300_ALU_ARGA_ZERO << R300_ALU_ARG0A_SHIFT) | \
-		(R300_ALU_ARGA_ZERO << R300_ALU_ARG1A_SHIFT) | \
-		(R300_ALU_ARGA_ZERO << R300_ALU_ARG2A_SHIFT))
-#define NOP_INST3 (					     \
-		((0 | SRC_CONST) << R300_ALU_SRC0A_SHIFT) | \
-		((0 | SRC_CONST) << R300_ALU_SRC1A_SHIFT) | \
-		((0 | SRC_CONST) << R300_ALU_SRC2A_SHIFT))
-
-
-/*
- * Datas structures for fragment program generation
- */
-
-/* description of r300 native hw instructions */
-static const struct {
-	const char *name;
-	int argc;
-	int v_op;
-	int s_op;
-} r300_fpop[] = {
-	/* *INDENT-OFF* */
-	{"MAD", 3, R300_ALU_OUTC_MAD, R300_ALU_OUTA_MAD},
-	{"DP3", 2, R300_ALU_OUTC_DP3, R300_ALU_OUTA_DP4},
-	{"DP4", 2, R300_ALU_OUTC_DP4, R300_ALU_OUTA_DP4},
-	{"MIN", 2, R300_ALU_OUTC_MIN, R300_ALU_OUTA_MIN},
-	{"MAX", 2, R300_ALU_OUTC_MAX, R300_ALU_OUTA_MAX},
-	{"CMP", 3, R300_ALU_OUTC_CMP, R300_ALU_OUTA_CMP},
-	{"FRC", 1, R300_ALU_OUTC_FRC, R300_ALU_OUTA_FRC},
-	{"EX2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_EX2},
-	{"LG2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_LG2},
-	{"RCP", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RCP},
-	{"RSQ", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RSQ},
-	{"REPL_ALPHA", 1, R300_ALU_OUTC_REPL_ALPHA, PFS_INVAL},
-	{"CMPH", 3, R300_ALU_OUTC_CMPH, PFS_INVAL},
-	/* *INDENT-ON* */
-};
-
-/* vector swizzles r300 can support natively, with a couple of
- * cases we handle specially
- *
- * REG_VSWZ/REG_SSWZ is an index into this table
- */
-
-/* mapping from SWIZZLE_* to r300 native values for scalar insns */
-#define SWIZZLE_HALF 6
-
-#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
-					  SWIZZLE_##y, \
-					  SWIZZLE_##z, \
-					  SWIZZLE_ZERO))
-/* native swizzles */
-static const struct r300_pfs_swizzle {
-	GLuint hash;		/* swizzle value this matches */
-	GLuint base;		/* base value for hw swizzle */
-	GLuint stride;		/* difference in base between arg0/1/2 */
-	GLuint flags;
-} v_swiz[] = {
-	/* *INDENT-OFF* */
-	{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR},
-	{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR},
-	{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR},
-	{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR},
-	{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, SLOT_SRC_SCALAR},
-	{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR},
-	{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR},
-	{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH},
-	{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
-	{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
-	{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0},
-	{PFS_INVAL, 0, 0, 0},
-	/* *INDENT-ON* */
-};
-
-/* used during matching of non-native swizzles */
-#define SWZ_X_MASK (7 << 0)
-#define SWZ_Y_MASK (7 << 3)
-#define SWZ_Z_MASK (7 << 6)
-#define SWZ_W_MASK (7 << 9)
-static const struct {
-	GLuint hash;		/* used to mask matching swizzle components */
-	int mask;		/* actual outmask */
-	int count;		/* count of components matched */
-} s_mask[] = {
-	/* *INDENT-OFF* */
-	{SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3},
-	{SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2},
-	{SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2},
-	{SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2},
-	{SWZ_X_MASK, 1, 1},
-	{SWZ_Y_MASK, 2, 1},
-	{SWZ_Z_MASK, 4, 1},
-	{PFS_INVAL, PFS_INVAL, PFS_INVAL}
-	/* *INDENT-ON* */
-};
-
-static const struct {
-	int base;		/* hw value of swizzle */
-	int stride;		/* difference between SRC0/1/2 */
-	GLuint flags;
-} s_swiz[] = {
-	/* *INDENT-OFF* */
-	{R300_ALU_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR},
-	{R300_ALU_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR},
-	{R300_ALU_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR},
-	{R300_ALU_ARGA_SRC0A, 1, SLOT_SRC_SCALAR},
-	{R300_ALU_ARGA_ZERO, 0, 0},
-	{R300_ALU_ARGA_ONE, 0, 0},
-	{R300_ALU_ARGA_HALF, 0, 0}
-	/* *INDENT-ON* */
-};
-
-/* boiler-plate reg, for convenience */
-static const GLuint undef = REG(REG_TYPE_TEMP,
-				0,
-				SWIZZLE_XYZ,
-				SWIZZLE_W,
-				GL_FALSE,
-				GL_FALSE,
-				GL_FALSE);
-
-/* constant one source */
-static const GLuint pfs_one = REG(REG_TYPE_CONST,
-				  0,
-				  SWIZZLE_111,
-				  SWIZZLE_ONE,
-				  GL_FALSE,
-				  GL_TRUE,
-				  GL_TRUE);
-
-/* constant half source */
-static const GLuint pfs_half = REG(REG_TYPE_CONST,
-				   0,
-				   SWIZZLE_HHH,
-				   SWIZZLE_HALF,
-				   GL_FALSE,
-				   GL_TRUE,
-				   GL_TRUE);
-
-/* constant zero source */
-static const GLuint pfs_zero = REG(REG_TYPE_CONST,
-				   0,
-				   SWIZZLE_000,
-				   SWIZZLE_ZERO,
-				   GL_FALSE,
-				   GL_TRUE,
-				   GL_TRUE);
-
-/*
- * Common functions prototypes
- */
-static void emit_arith(struct r300_pfs_compile_state *cs, int op,
-		       GLuint dest, int mask,
-		       GLuint src0, GLuint src1, GLuint src2, int flags);
-
-/**
- * Get an R300 temporary that can be written to in the given slot.
- */
-static int get_hw_temp(struct r300_pfs_compile_state *cs, int slot)
-{
-	COMPILE_STATE;
-	int r;
-
-	for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
-		if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot)
-			break;
-	}
-
-	if (r >= PFS_NUM_TEMP_REGS) {
-		ERROR("Out of hardware temps\n");
-		return 0;
-	}
-	// Reserved is used to avoid the following scenario:
-	//  R300 temporary X is first assigned to Mesa temporary Y during vector ops
-	//  R300 temporary X is then assigned to Mesa temporary Z for further vector ops
-	//  Then scalar ops on Mesa temporary Z are emitted and move back in time
-	//  to overwrite the value of temporary Y.
-	// End scenario.
-	cs->hwtemps[r].reserved = cs->hwtemps[r].free;
-	cs->hwtemps[r].free = -1;
-
-	// Reset to some value that won't mess things up when the user
-	// tries to read from a temporary that hasn't been assigned a value yet.
-	// In the normal case, vector_valid and scalar_valid should be set to
-	// a sane value by the first emit that writes to this temporary.
-	cs->hwtemps[r].vector_valid = 0;
-	cs->hwtemps[r].scalar_valid = 0;
-
-	if (r > code->max_temp_idx)
-		code->max_temp_idx = r;
-
-	return r;
-}
-
-/**
- * Get an R300 temporary that will act as a TEX destination register.
- */
-static int get_hw_temp_tex(struct r300_pfs_compile_state *cs)
-{
-	COMPILE_STATE;
-	int r;
-
-	for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
-		if (cs->used_in_node & (1 << r))
-			continue;
-
-		// Note: Be very careful here
-		if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0)
-			break;
-	}
-
-	if (r >= PFS_NUM_TEMP_REGS)
-		return get_hw_temp(cs, 0);	/* Will cause an indirection */
-
-	cs->hwtemps[r].reserved = cs->hwtemps[r].free;
-	cs->hwtemps[r].free = -1;
-
-	// Reset to some value that won't mess things up when the user
-	// tries to read from a temporary that hasn't been assigned a value yet.
-	// In the normal case, vector_valid and scalar_valid should be set to
-	// a sane value by the first emit that writes to this temporary.
-	cs->hwtemps[r].vector_valid = cs->nrslots;
-	cs->hwtemps[r].scalar_valid = cs->nrslots;
 
-	if (r > code->max_temp_idx)
-		code->max_temp_idx = r;
-
-	return r;
-}
-
-/**
- * Mark the given hardware register as free.
- */
-static void free_hw_temp(struct r300_pfs_compile_state *cs, int idx)
+static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
 {
-	// Be very careful here. Consider sequences like
-	//  MAD r0, r1,r2,r3
-	//  TEX r4, ...
-	// The TEX instruction may be moved in front of the MAD instruction
-	// due to the way nodes work. We don't want to alias r1 and r4 in
-	// this case.
-	// I'm certain the register allocation could be further sanitized,
-	// but it's tricky because of stuff that can happen inside emit_tex
-	// and emit_arith.
-	cs->hwtemps[idx].free = cs->nrslots + 1;
-}
+	PROG_CODE;
 
-/**
- * Create a new Mesa temporary register.
- */
-static GLuint get_temp_reg(struct r300_pfs_compile_state *cs)
-{
-	COMPILE_STATE;
-	GLuint r = undef;
-	GLuint index;
-
-	index = ffs(~cs->temp_in_use);
-	if (!index) {
-		ERROR("Out of program temps\n");
-		return r;
-	}
-
-	cs->temp_in_use |= (1 << --index);
-	cs->temps[index].refcount = 0xFFFFFFFF;
-	cs->temps[index].reg = -1;
-
-	REG_SET_TYPE(r, REG_TYPE_TEMP);
-	REG_SET_INDEX(r, index);
-	REG_SET_VALID(r, GL_TRUE);
-	return r;
-}
-
-/**
- * Free a Mesa temporary and the associated R300 temporary.
- */
-static void free_temp(struct r300_pfs_compile_state *cs, GLuint r)
-{
-	GLuint index = REG_GET_INDEX(r);
-
-	if (!(cs->temp_in_use & (1 << index)))
-		return;
-
-	if (REG_GET_TYPE(r) == REG_TYPE_TEMP) {
-		free_hw_temp(cs, cs->temps[index].reg);
-		cs->temps[index].reg = -1;
-		cs->temp_in_use &= ~(1 << index);
-	} else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) {
-		free_hw_temp(cs, cs->inputs[index].reg);
-		cs->inputs[index].reg = -1;
-	}
-}
-
-/**
- * Emit a hardware constant/parameter.
- */
-static GLuint emit_const4fv(struct r300_pfs_compile_state *cs,
-			    struct prog_src_register srcreg)
-{
-	COMPILE_STATE;
-	GLuint reg = undef;
-	int index;
-
-	for (index = 0; index < code->const_nr; ++index) {
-		if (code->constant[index].File == srcreg.File &&
-		    code->constant[index].Index == srcreg.Index)
+	for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
+		if (code->constant[*hwindex].File == file &&
+		    code->constant[*hwindex].Index == index)
 			break;
 	}
 
-	if (index >= code->const_nr) {
-		if (index >= PFS_NUM_CONST_REGS) {
-			ERROR("Out of hw constants!\n");
-			return reg;
+	if (*hwindex >= code->const_nr) {
+		if (*hwindex >= PFS_NUM_CONST_REGS) {
+			error("Out of hw constants!\n");
+			return GL_FALSE;
 		}
 
 		code->const_nr++;
-		code->constant[index] = srcreg;
+		code->constant[*hwindex].File = file;
+		code->constant[*hwindex].Index = index;
 	}
 
-	REG_SET_TYPE(reg, REG_TYPE_CONST);
-	REG_SET_INDEX(reg, index);
-	REG_SET_VALID(reg, GL_TRUE);
-	return reg;
+	return GL_TRUE;
 }
 
-static INLINE GLuint negate(GLuint r)
-{
-	REG_NEGS(r);
-	REG_NEGV(r);
-	return r;
-}
 
-/* Hack, to prevent clobbering sources used multiple times when
- * emulating non-native instructions
+/**
+ * Mark a temporary register as used.
  */
-static INLINE GLuint keep(GLuint r)
-{


Reply to: