[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

mesa: Changes to 'upstream-unstable'



 Makefile                                         |    5 
 bin/mklib                                        |    9 
 configs/dragonfly                                |   38 
 configs/dragonfly-dri                            |   56 
 configs/dragonfly-dri-amd64                      |   10 
 configs/dragonfly-dri-x86                        |   13 
 configs/freebsd-static                           |   27 
 docs/relnotes-7.0.3.html                         |    9 
 docs/relnotes-7.0.4.html                         |   65 +
 docs/relnotes.html                               |    1 
 src/glu/sgi/libutil/mipmap.c                     |    4 
 src/glut/glx/Makefile                            |    2 
 src/glw/Makefile                                 |    2 
 src/mesa/drivers/dri/common/vblank.c             |    4 
 src/mesa/drivers/dri/common/xmlconfig.c          |    3 
 src/mesa/drivers/dri/i915/i915_fragprog.c        |  257 ++--
 src/mesa/drivers/dri/i915/i915_texstate.c        |   16 
 src/mesa/drivers/dri/i915/intel_context.c        |    2 
 src/mesa/drivers/dri/i915/intel_context.h        |    3 
 src/mesa/drivers/dri/i915/intel_screen.c         |    3 
 src/mesa/drivers/dri/i915/intel_state.c          |    4 
 src/mesa/drivers/dri/i915/intel_tris.c           |   30 
 src/mesa/drivers/dri/i915tex/i915_fragprog.c     |    2 
 src/mesa/drivers/dri/i915tex/intel_mipmap_tree.c |    9 
 src/mesa/drivers/dri/i915tex/intel_screen.c      |    2 
 src/mesa/drivers/dri/i965/Makefile               |    1 
 src/mesa/drivers/dri/i965/brw_cc.c               |    7 
 src/mesa/drivers/dri/i965/brw_clip.h             |    4 
 src/mesa/drivers/dri/i965/brw_clip_state.c       |    3 
 src/mesa/drivers/dri/i965/brw_clip_unfilled.c    |    4 
 src/mesa/drivers/dri/i965/brw_context.c          |   11 
 src/mesa/drivers/dri/i965/brw_curbe.c            |    2 
 src/mesa/drivers/dri/i965/brw_defines.h          |    2 
 src/mesa/drivers/dri/i965/brw_draw_upload.c      |   27 
 src/mesa/drivers/dri/i965/brw_eu.h               |   26 
 src/mesa/drivers/dri/i965/brw_eu_emit.c          |   46 
 src/mesa/drivers/dri/i965/brw_gs.h               |    4 
 src/mesa/drivers/dri/i965/brw_gs_state.c         |    3 
 src/mesa/drivers/dri/i965/brw_metaops.c          |    2 
 src/mesa/drivers/dri/i965/brw_program.c          |    3 
 src/mesa/drivers/dri/i965/brw_sf.c               |   15 
 src/mesa/drivers/dri/i965/brw_sf.h               |   11 
 src/mesa/drivers/dri/i965/brw_sf_emit.c          |   98 +
 src/mesa/drivers/dri/i965/brw_sf_state.c         |   11 
 src/mesa/drivers/dri/i965/brw_state_cache.c      |    2 
 src/mesa/drivers/dri/i965/brw_state_pool.c       |    5 
 src/mesa/drivers/dri/i965/brw_tex.c              |   31 
 src/mesa/drivers/dri/i965/brw_tex_layout.c       |   46 
 src/mesa/drivers/dri/i965/brw_urb.c              |    2 
 src/mesa/drivers/dri/i965/brw_vs.h               |    6 
 src/mesa/drivers/dri/i965/brw_vs_emit.c          |  252 +++-
 src/mesa/drivers/dri/i965/brw_vs_state.c         |    2 
 src/mesa/drivers/dri/i965/brw_vs_tnl.c           |   19 
 src/mesa/drivers/dri/i965/brw_wm.c               |  119 +
 src/mesa/drivers/dri/i965/brw_wm.h               |   17 
 src/mesa/drivers/dri/i965/brw_wm_emit.c          |  160 +-
 src/mesa/drivers/dri/i965/brw_wm_fp.c            |  123 +-
 src/mesa/drivers/dri/i965/brw_wm_glsl.c          | 1370 +++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_wm_pass0.c         |    4 
 src/mesa/drivers/dri/i965/brw_wm_pass1.c         |    7 
 src/mesa/drivers/dri/i965/brw_wm_pass2.c         |    2 
 src/mesa/drivers/dri/i965/brw_wm_sampler_state.c |    2 
 src/mesa/drivers/dri/i965/brw_wm_state.c         |   11 
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |   31 
 src/mesa/drivers/dri/i965/bufmgr_fake.c          |    2 
 src/mesa/drivers/dri/i965/intel_batchbuffer.c    |    4 
 src/mesa/drivers/dri/i965/intel_blit.c           |    5 
 src/mesa/drivers/dri/i965/intel_buffers.c        |    5 
 src/mesa/drivers/dri/i965/intel_context.c        |   35 
 src/mesa/drivers/dri/i965/intel_context.h        |    6 
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c    |   21 
 src/mesa/drivers/dri/i965/intel_pixel_bitmap.c   |   38 
 src/mesa/drivers/dri/i965/intel_screen.c         |    2 
 src/mesa/drivers/dri/i965/intel_tex_validate.c   |   36 
 src/mesa/drivers/dri/intel/intel_tex_layout.c    |   42 
 src/mesa/drivers/dri/intel/intel_tex_layout.h    |    1 
 src/mesa/drivers/dri/r200/r200_context.c         |    2 
 src/mesa/drivers/dri/r200/r200_texstate.c        |    6 
 src/mesa/drivers/dri/r200/r200_vertprog.c        |   16 
 src/mesa/drivers/dri/r300/r300_context.c         |    2 
 src/mesa/drivers/dri/radeon/radeon_screen.c      |    4 
 src/mesa/drivers/x11/fakeglx.c                   |    9 
 src/mesa/main/context.c                          |   16 
 src/mesa/main/depthstencil.c                     |    2 
 src/mesa/main/dlist.c                            |   33 
 src/mesa/main/drawpix.c                          |    5 
 src/mesa/main/imports.c                          |   21 
 src/mesa/main/texcompress_s3tc.c                 |   26 
 src/mesa/main/texenvprogram.c                    |    5 
 src/mesa/main/texformat.h                        |    2 
 src/mesa/main/texstate.c                         |    7 
 src/mesa/shader/prog_execute.c                   |    3 
 src/mesa/shader/prog_parameter.c                 |    3 
 src/mesa/shader/program.c                        |    3 
 src/mesa/shader/shader_api.c                     |   23 
 src/mesa/shader/slang/slang_codegen.c            |    1 
 src/mesa/shader/slang/slang_link.c               |    3 
 src/mesa/shader/slang/slang_preprocess.c         |    4 
 src/mesa/swrast/s_aatriangle.c                   |   16 
 src/mesa/swrast/s_aatritemp.h                    |   31 
 src/mesa/vbo/vbo_exec_draw.c                     |    2 
 src/mesa/vbo/vbo_save_api.c                      |    3 
 src/mesa/vbo/vbo_save_draw.c                     |    4 
 src/mesa/x86/common_x86.c                        |    4 
 src/mesa/x86/read_rgba_span_x86.S                |    4 
 windows/VC8/mesa/mesa.sln                        |    3 
 106 files changed, 3033 insertions(+), 494 deletions(-)

New commits:
commit 6f4c8b5b5047c6ff6273e3acc98c7ec504bb0e21
Author: Xiang, Haihao <haihao.xiang@intel.com>
Date:   Tue Jun 10 16:31:36 2008 +0800

    i965: apply commit 6c1a98e97affb2163e776551eb3a9e669ff99bbf to glsl
    (cherry picked from commit a742bed99ae840d806198172005f6b25399ec573)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 5f1dbf2..5a1f80d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -975,7 +975,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
 	brw_ADD(p,
 		dst[0],
 		retype(src0[0], BRW_REGISTER_TYPE_W),
-		brw_imm_d(- c->key.origin_x));
+		brw_imm_d(0 - c->key.origin_x));
     }
 
     if (mask & WRITEMASK_Y) {

commit f8bd9cc30fed39bd6c935b410b745acb73b18bbc
Author: Michal Wajdeczko <Michal.Wajdeczko@intel.com>
Date:   Tue May 6 13:01:29 2008 -0700

    Add support for ATI_separate_stencil in display lists.
    (cherry picked from commit 7f747204ea3b61e507b8bd48f33e8dd83f34705b)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 844db6b..1b2f90e 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -3246,6 +3246,36 @@ save_StencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask)
 
 
 static void GLAPIENTRY
+save_StencilFuncSeparateATI(GLenum frontfunc, GLenum backfunc, GLint ref,
+                            GLuint mask)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   Node *n;
+   ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
+   /* GL_FRONT */
+   n = ALLOC_INSTRUCTION(ctx, OPCODE_STENCIL_FUNC_SEPARATE, 4);
+   if (n) {
+      n[1].e = GL_FRONT;
+      n[2].e = frontfunc;
+      n[3].i = ref;
+      n[4].ui = mask;
+   }
+   /* GL_BACK */
+   n = ALLOC_INSTRUCTION(ctx, OPCODE_STENCIL_FUNC_SEPARATE, 4);
+   if (n) {
+      n[1].e = GL_BACK;
+      n[2].e = backfunc;
+      n[3].i = ref;
+      n[4].ui = mask;
+   }
+   if (ctx->ExecuteFlag) {
+      CALL_StencilFuncSeparate(ctx->Exec, (GL_FRONT, frontfunc, ref, mask));
+      CALL_StencilFuncSeparate(ctx->Exec, (GL_BACK, backfunc, ref, mask));
+   }
+}
+
+
+static void GLAPIENTRY
 save_StencilMaskSeparate(GLenum face, GLuint mask)
 {
    GET_CURRENT_CONTEXT(ctx);
@@ -7870,6 +7900,9 @@ _mesa_init_dlist_table(struct _glapi_table *table)
    SET_StencilMaskSeparate(table, save_StencilMaskSeparate);
    SET_StencilOpSeparate(table, save_StencilOpSeparate);
 
+   /* ATI_separate_stencil */ 
+   SET_StencilFuncSeparateATI(table, save_StencilFuncSeparateATI);
+
    /* GL_ARB_imaging */
    /* Not all are supported */
    SET_BlendColor(table, save_BlendColor);

commit d9f9b1cd0b4bb88b62f68fd67775a1c558d7d25d
Author: Michal Wajdeczko <Michal.Wajdeczko@intel.com>
Date:   Wed Mar 26 12:51:20 2008 -0700

    [965] Correctly set read mask for OPCODE_SWZ in pass1.
    
    While OPCODE_SWZ has usually been optimized away in pass0, it may still
    exist if a SWZ with dst saturate was emitted in pass_fp.  Fixes an error
    in oglconform fpalu.c.
    (cherry picked from commit 13a6f73a64e23bad71d5e94d446e133b3cf634f7)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index 26c044d..f6f3a38 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -150,6 +150,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       case OPCODE_FLR:
       case OPCODE_FRC:
       case OPCODE_MOV:
+      case OPCODE_SWZ:
 	 read0 = writemask;
 	 break;
 
@@ -257,7 +258,6 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 	 read0 = WRITEMASK_XYW;
 	 break;
 
-      case OPCODE_SWZ:
       case OPCODE_DST:
       case OPCODE_TXP:
       default:

commit eca283976b1271c6b7e270f968e820e9cc5a54b5
Author: Michal Wajdeczko <Michal.Wajdeczko@intel.com>
Date:   Fri Mar 21 14:18:26 2008 -0700

    [965] Avoid emitting dead code for DPx/math instructions.
    
    The pass1 optimization stage clears out writemasks and registers, but the
    instructions themselves are still being processed at this stage, and could
    have resulted in them still being emitted.
    (cherry picked from commit c60b5dfde869c208a479ac273f4538d4d07574cf)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 3bda2c7..fd66631 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -504,6 +504,9 @@ static void emit_dp3( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
@@ -521,6 +524,9 @@ static void emit_dp4( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
@@ -539,6 +545,9 @@ static void emit_dph( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
@@ -582,6 +591,9 @@ static void emit_math1( struct brw_compile *p,
 			GLuint mask,
 			const struct brw_reg *arg0 )
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
    //	  function == BRW_MATH_FUNCTION_SINCOS);
    
@@ -606,6 +618,9 @@ static void emit_math2( struct brw_compile *p,
 			const struct brw_reg *arg0,
 			const struct brw_reg *arg1)
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 
    brw_push_insn_state(p);

commit 2176259ca6c1c5060f5dc9729ca60eb796d5d777
Author: Michal Wajdeczko <Michal.Wajdeczko@intel.com>
Date:   Fri Mar 21 13:48:12 2008 -0700

    [965] Improve pinterp performance by delaying reads of just-written regs.
    (cherry picked from commit bb419970ef465804c0e5369264314d9d92726c18)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index fa46151..3bda2c7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -223,6 +223,10 @@ static void emit_pinterp( struct brw_compile *p,
       if (mask & (1<<i)) {
 	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+      }
+   }
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
 	 brw_MUL(p, dst[i], dst[i], w[3]);
       }
    }

commit 8fe6fcb900913770ab47e502ee2525554358806f
Author: Michal Wajdeczko <Michal.Wajdeczko@intel.com>
Date:   Fri Mar 21 13:43:44 2008 -0700

    [965] Fix negating of unsigned value in emit_wpos_xy.
    (cherry picked from commit 6c1a98e97affb2163e776551eb3a9e669ff99bbf)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 2df0e5e..fa46151 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -137,7 +137,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
       brw_ADD(p,
 	      dst[0],
 	      retype(arg0[0], BRW_REGISTER_TYPE_W),
-	      brw_imm_d(- c->key.origin_x));
+	      brw_imm_d(0 - c->key.origin_x));
    }
 
    if (mask & WRITEMASK_Y) {

commit 76d6edcc385d7ae188255c95251f3ecd1e29150c
Author: Michal Wajdeczko <Michal.Wajdeczko@intel.com>
Date:   Fri Mar 21 13:41:12 2008 -0700

    [965] Add MVP code for position invariant vertex programs.
    
    This fixes the arbvptorus demo.
    (cherry picked from commit 5f10438f2d9c739964cf53f04fee3190991325a1)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 752fe49..389fd89 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -125,6 +125,9 @@ static void brwProgramStringNotify( GLcontext *ctx,
       struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
       if (p == vp)
 	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      if (p->program.IsPositionInvariant) {
+	 _mesa_insert_mvp_code(ctx, &p->program);
+      }
       p->id = brw->program_id++;      
       p->param_state = p->program.Base.Parameters->StateFlags;
 

commit 98d6c671f597256645697cb8806a057c3038819b
Author: Michal Wajdeczko <Michal.Wajdeczko@intel.com>
Date:   Fri Mar 21 13:11:07 2008 -0700

    [win32] Use native aligned memory allocation functions.
    (cherry picked from commit 31fe7cf5e3ca38441acb25215420afa6944226f3)

diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 8ac16b9..e324dea 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -104,6 +104,8 @@ _mesa_align_malloc(size_t bytes, unsigned long alignment)
 
    (void) posix_memalign(& mem, alignment, bytes);
    return mem;
+#elif defined(_WIN32) && defined(_MSC_VER)
+   return _aligned_malloc(bytes, alignment);
 #else
    uintptr_t ptr, buf;
 
@@ -144,6 +146,15 @@ _mesa_align_calloc(size_t bytes, unsigned long alignment)
    }
 
    return mem;
+#elif defined(_WIN32) && defined(_MSC_VER)
+   void *mem;
+
+   mem = _aligned_malloc(bytes, alignment);
+   if (mem != NULL) {
+      (void) memset(mem, 0, bytes);
+   }
+
+   return mem;
 #else
    uintptr_t ptr, buf;
 
@@ -180,6 +191,8 @@ _mesa_align_free(void *ptr)
 {
 #if defined(HAVE_POSIX_MEMALIGN)
    free(ptr);
+#elif defined(_WIN32) && defined(_MSC_VER)
+   _aligned_free(ptr);
 #else
    void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
    void *realAddr = *cubbyHole;
@@ -194,6 +207,10 @@ void *
 _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
                     unsigned long alignment)
 {
+#if defined(_WIN32) && defined(_MSC_VER)
+   (void) oldSize;
+   return _aligned_realloc(oldBuffer, newSize, alignment);
+#else
    const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
    void *newBuf = _mesa_align_malloc(newSize, alignment);
    if (newBuf && oldBuffer && copySize > 0) {
@@ -202,6 +219,7 @@ _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
    if (oldBuffer)
       _mesa_align_free(oldBuffer);
    return newBuf;
+#endif
 }
 
 

commit f652811df4424d29427d5cbaf41432843238ec4d
Author: Andrzej Trznadel <Andrzej.Trznadel@intel.com>
Date:   Mon Mar 17 15:54:24 2008 -0700

    [965] Fix fp temp reg release code to not usually release all temps.
    
    Also, use wrapped ffs() instead of native.
    (cherry picked from commit 3105bc1d885ea8ce083d2be85cbeac46d4d873a1)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 301bcba..f895f96 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -144,7 +144,7 @@ static struct prog_dst_register dst_undef( void )
 
 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
 {
-   int bit = ffs( ~c->fp_temp );
+   int bit = _mesa_ffs( ~c->fp_temp );
 
    if (!bit) {
       _mesa_printf("%s: out of temporaries\n", __FILE__);
@@ -158,7 +158,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c )
 
 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
 {
-   c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP);
+   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
 }
 
 

commit e279f4601d2ad1b15f2b6b71e9c6804a7a7f0b23
Author: Andrzej Trznadel <Andrzej.Trznadel@intel.com>
Date:   Mon Mar 17 15:52:08 2008 -0700

    Fix compat implementation of ffs() to return 1-based bit numbers.
    (cherry picked from commit e9809a36aaea3480cba5bd62360bf9d481ff9011)

diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 54fdcb7..8ac16b9 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -560,6 +560,7 @@ _mesa_ffs(int i)
          bit++;
          i >>= 1;
       }
+      bit++;
    }
    return bit;
 #else

commit 87a30337a1a0c3f409163cb0f249d3f684de0ecf
Author: Keith Packard <keithp@keithp.com>
Date:   Fri Apr 25 16:07:12 2008 -0700

    [i965] short immediate values must be replicated to both halves of the dword
    
    The 32-bit immediate value in the i965 instruction word must contain two
    copies of any 16-bit constants. brw_imm_uw and brw_imm_w just needed to
    copy the value into both halves of the immediate value instruction field.
    (cherry picked from commit ca73488f48e3ee278f0185bb7dcc03d7bdedb62d)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 25f1f89..c138d15 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -335,14 +335,14 @@ static __inline struct brw_reg brw_imm_ud( GLuint ud )
 static __inline struct brw_reg brw_imm_uw( GLushort uw )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
-   imm.dw1.ud = uw;
+   imm.dw1.ud = uw | (uw << 16);
    return imm;
 }
 
 static __inline struct brw_reg brw_imm_w( GLshort w )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
-   imm.dw1.d = w;
+   imm.dw1.d = w | (w << 16);
    return imm;
 }
 

commit 9c2047b2759c7fa0373d976b4e9916738fc26692
Author: Eric Anholt <eric@anholt.net>
Date:   Wed Mar 26 13:23:43 2008 -0700

    [965] Don't let the negate flags of src0 affect 1 constants in precalc_dst/lit
    
    This patch is a variant of a submission by Michal Wajdeczko to fix
    oglconform fpalu failures.
    (cherry picked from commit b4cbf6983e0e6d6502c1260f60c463841ab74590)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 0bd0448..301bcba 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -494,17 +494,20 @@ static void precalc_dst( struct brw_wm_compile *c,
 
 
    if (dst.WriteMask & WRITEMASK_XZ) {
+      struct prog_instruction *swz;
       GLuint z = GET_SWZ(src0.Swizzle, Z);
 
       /* dst.xz = swz src0.1zzz
        */
-      emit_op(c,
-	      OPCODE_SWZ,
-	      dst_mask(dst, WRITEMASK_XZ),
-	      inst->SaturateMode, 0, 0,
-	      src_swizzle(src0, SWIZZLE_ONE, z, z, z),
-	      src_undef(),
-	      src_undef());
+      swz = emit_op(c,
+		    OPCODE_SWZ,
+		    dst_mask(dst, WRITEMASK_XZ),
+		    inst->SaturateMode, 0, 0,
+		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
+		    src_undef(),
+		    src_undef());
+      /* Avoid letting negation flag of src0 affect our 1 constant. */
+      swz->SrcReg[0].NegateBase &= ~NEGATE_X;
    }
    if (dst.WriteMask & WRITEMASK_W) {
       /* dst.w = mov src1.w
@@ -527,15 +530,19 @@ static void precalc_lit( struct brw_wm_compile *c,
    struct prog_dst_register dst = inst->DstReg;
    
    if (dst.WriteMask & WRITEMASK_XW) {
+      struct prog_instruction *swz;
+
       /* dst.xw = swz src0.1111
        */
-      emit_op(c,
-	      OPCODE_SWZ,
-	      dst_mask(dst, WRITEMASK_XW),
-	      0, 0, 0,
-	      src_swizzle1(src0, SWIZZLE_ONE),
-	      src_undef(),
-	      src_undef());
+      swz = emit_op(c,
+		    OPCODE_SWZ,
+		    dst_mask(dst, WRITEMASK_XW),
+		    0, 0, 0,
+		    src_swizzle1(src0, SWIZZLE_ONE),
+		    src_undef(),
+		    src_undef());
+      /* Avoid letting the negation flag of src0 affect our 1 constant. */
+      swz->SrcReg[0].NegateBase = 0;
    }
 
 

commit 1dcb0433a309c934f5d132a322993b5d861b1980
Author: Zou Nan hai <nanhai.zou@intel.com>
Date:   Wed Mar 19 16:29:47 2008 +0800

    [i915] fix fragment.position

diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
index b2787ee..3c5ed47 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -202,12 +202,19 @@ static void intel_wpos_triangle( intelContextPtr intel,
 {
    GLuint offset = intel->wpos_offset;
    GLuint size = intel->wpos_size;
-   
-   __memcpy( ((char *)v0) + offset, v0, size );
-   __memcpy( ((char *)v1) + offset, v1, size );
-   __memcpy( ((char *)v2) + offset, v2, size );
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
+   GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset);
+   GLfloat *v2_wpos = (GLfloat *)((char *)v2 + offset);
+
+   __memcpy(v0_wpos, v0, size);
+   __memcpy(v1_wpos, v1, size);
+   __memcpy(v2_wpos, v2, size);
 
-   intel_draw_triangle( intel, v0, v1, v2 );
+   v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h;
+   v1_wpos[1] = -v1_wpos[1] + intel->driDrawable->h;
+   v2_wpos[1] = -v2_wpos[1] + intel->driDrawable->h;
+
+   intel_draw_triangle(intel, v0, v1, v2);
 }
 
 
@@ -217,9 +224,14 @@ static void intel_wpos_line( intelContextPtr intel,
 {
    GLuint offset = intel->wpos_offset;
    GLuint size = intel->wpos_size;
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
+   GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset);
+
+   __memcpy(v0_wpos, v0, size);
+   __memcpy(v1_wpos, v1, size);
 
-   __memcpy( ((char *)v0) + offset, v0, size );
-   __memcpy( ((char *)v1) + offset, v1, size );
+   v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h;
+   v1_wpos[1] = -v1_wpos[1] + intel->driDrawable->h;
 
    intel_draw_line( intel, v0, v1 );
 }
@@ -230,8 +242,10 @@ static void intel_wpos_point( intelContextPtr intel,
 {
    GLuint offset = intel->wpos_offset;
    GLuint size = intel->wpos_size;
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
 
-   __memcpy( ((char *)v0) + offset, v0, size );
+   __memcpy(v0_wpos, v0, size);
+   v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h;
 
    intel_draw_point( intel, v0 );
 }

commit 5ff27e02b35a8a699f1b4fb805a04a3d765c6f59
Author: Zou Nan hai <nanhai.zou@intel.com>
Date:   Mon Mar 17 16:39:10 2008 +0800

     [i965] fix wpos height 1 pixel higher
    (cherry picked from commit b0f681b458ebebab370bbfd2a17699cd851aae8b)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 4897e0b..2df0e5e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -145,7 +145,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
       brw_ADD(p,
 	      dst[1],
 	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
-	      brw_imm_d(c->key.origin_y + c->key.drawable_height));
+	      brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index fd237ee..5f1dbf2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -983,7 +983,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
 	brw_ADD(p,
 		dst[1],
 		negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
-		brw_imm_d(c->key.origin_y + c->key.drawable_height));
+		brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
     }
 }
 

commit 4beee58e57eaa09a17a7b74e268b2818d37644b2
Author: Eric Anholt <eric@anholt.net>
Date:   Thu Feb 28 13:18:12 2008 -0800

    [965] Bug #9151: make fragment.position return window coords not screen coords.
    (cherry picked from commit 9c8f27ba1366da07e20e86a0d48341ea97f5cda4)

diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 3af3377..b2ad0f7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -318,7 +318,30 @@ static void brw_wm_populate_key( struct brw_context *brw,
 	 }
       }
    }
-	  
+
+   /* _NEW_BUFFERS */
+   /*
+    * Include the draw buffer origin and height so that we can calculate
+    * fragment position values relative to the bottom left of the drawable,
+    * from the incoming screen origin relative position we get as part of our
+    * payload.
+    *
+    * We could avoid recompiling by including this as a constant referenced by
+    * our program, but if we were to do that it would also be nice to handle
+    * getting that constant updated at batchbuffer submit time (when we
+    * hold the lock and know where the buffer really is) rather than at emit
+    * time when we don't hold the lock and are just guessing.  We could also
+    * just avoid using this as key data if the program doesn't use
+    * fragment.position.
+    *
+    * This pretty much becomes moot with DRI2 and redirected buffers anyway,
+    * as our origins will always be zero then.
+    */
+   if (brw->intel.driDrawable != NULL) {
+      key->origin_x = brw->intel.driDrawable->x;
+      key->origin_y = brw->intel.driDrawable->y;
+      key->drawable_height = brw->intel.driDrawable->h;
+   }
 
    /* Extra info:
     */
@@ -357,6 +380,7 @@ const struct brw_tracked_state brw_wm_prog = {
 		_NEW_POLYGON |
 		_NEW_LINE |
 		_NEW_LIGHT |
+		_NEW_BUFFERS |
 		_NEW_TEXTURE),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
 		BRW_NEW_WM_INPUT_DIMENSIONS |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 2f9c222..9fb231d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -73,6 +73,8 @@ struct brw_wm_prog_key {
    GLuint pad1:16;
 
    GLuint program_string_id:32;
+   GLuint origin_x, origin_y;
+   GLuint drawable_height;
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 7e7cff3..4897e0b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -122,26 +122,30 @@ static void emit_delta_xy(struct brw_compile *p,
    }
 }
 
-static void emit_wpos_xy(struct brw_compile *p,
-			   const struct brw_reg *dst,
-			   GLuint mask,
-			   const struct brw_reg *arg0)
+static void emit_wpos_xy(struct brw_wm_compile *c,
+			 const struct brw_reg *dst,
+			 GLuint mask,
+			 const struct brw_reg *arg0)
 {
-   /* Calc delta X,Y by subtracting origin in r1 from the pixel
-    * centers.
+   struct brw_compile *p = &c->func;
+
+   /* Calculate the pixel offset from window bottom left into destination
+    * X and Y channels.
     */
    if (mask & WRITEMASK_X) {
-      brw_MOV(p,
+      /* X' = X - origin */
+      brw_ADD(p,
 	      dst[0],
-	      retype(arg0[0], BRW_REGISTER_TYPE_UW));
+	      retype(arg0[0], BRW_REGISTER_TYPE_W),
+	      brw_imm_d(- c->key.origin_x));
    }
 
    if (mask & WRITEMASK_Y) {
-      /* TODO -- window_height - Y */
-      brw_MOV(p,
+      /* Y' = height - (Y - origin_y) = height + origin_y - Y */
+      brw_ADD(p,
 	      dst[1],
-	      negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
-
+	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+	      brw_imm_d(c->key.origin_y + c->key.drawable_height));
    }
 }
 
@@ -1111,7 +1115,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 break;
 
       case WM_WPOSXY:
-	 emit_wpos_xy(p, dst, dst_flags, args[0]);
+	 emit_wpos_xy(c, dst, dst_flags, args[0]);
 	 break;
 
       case WM_PIXELW:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 0a93d06..fd237ee 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -967,21 +967,23 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
     src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1);
     src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1);
 
-    /* Calc delta X,Y by subtracting origin in r1 from the pixel
-     * centers.
+    /* Calculate the pixel offset from window bottom left into destination
+     * X and Y channels.
      */
     if (mask & WRITEMASK_X) {
-	brw_MOV(p,
+	/* X' = X - origin_x */
+	brw_ADD(p,
 		dst[0],
-		retype(src0[0], BRW_REGISTER_TYPE_UW));
+		retype(src0[0], BRW_REGISTER_TYPE_W),
+		brw_imm_d(- c->key.origin_x));
     }
 
     if (mask & WRITEMASK_Y) {
-	/* TODO -- window_height - Y */
-	brw_MOV(p,
+	/* Y' = height - (Y - origin_y) = height + origin_y - Y */
+	brw_ADD(p,
 		dst[1],
-		retype(src0[1], BRW_REGISTER_TYPE_UW));
-
+		negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
+		brw_imm_d(c->key.origin_y + c->key.drawable_height));
     }
 }
 

commit 1f9de207195e8f7f65c07763e4693d9e06481892
Author: Eric Anholt <eric@anholt.net>
Date:   Wed Feb 6 15:41:04 2008 -0800

    [915] Fix COS function using same plan as SIN.
    
    The previous COS function failed badly outside of [-pi/2, pi/2].

diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 75436f6..ffb6b31 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -47,7 +47,7 @@ static const GLfloat sin_quad_constants[2][4] = {
       2.0,
       -1.0,
       .5,
-      0.0
+      .75
    },
    {
       4.0,
@@ -310,67 +310,87 @@ static void upload_program( struct i915_fragment_program *p )
 	 break;
 
       case OPCODE_COS:
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 tmp = i915_get_utemp( p );
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
+	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 src0, 
-			 i915_emit_const1f(p, 1.0/(M_PI * 2)),
-			 0);
+	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         src0,
+			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
+			 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
 
-	 i915_emit_arith( p, 
-			 A0_MOD,
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+	 i915_emit_arith(p,
+			 A0_MAD,
 			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, 
-			 0, 0 );
+			 tmp,
+			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
+			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
 
-	 /* By choosing different taylor constants, could get rid of this mul:
+	 /* Compute COS with the same calculation used for SIN, but a
+	  * different source range has been mapped to [-1,1] this time.
 	  */
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, 
-			 i915_emit_const1f(p, (M_PI * 2)),
+
+	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
 			 0);
 
-	 /* 
-	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
-	  * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
-	  * result = DP4 t0, cos_constants
-	  */
-	 i915_emit_arith( p, 
+	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+	 i915_emit_arith(p,
 			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XY, 0,
-			 swizzle(tmp, X,X,ONE,ONE), 
-			 swizzle(tmp, X,ONE,ONE,ONE), 0);
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 tmp,
+			 0);
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XYZ, 0,
-			 swizzle(tmp, X,Y,X,ONE), 
-			 swizzle(tmp, X,X,ONE,ONE), 0);
+	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
+         i915_emit_arith(p,
+                         A0_DP3,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp,
+                         swizzle(consts1, X, Y, ZERO, ZERO),
+			 0);
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XYZ, 0,
-			 swizzle(tmp, X,X,Z,ONE), 
-			 swizzle(tmp, Z,ONE,ONE,ONE), 0);
-	    
-	 i915_emit_arith( p, 
-			 A0_DP4,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(tmp, ONE,Z,Y,X),
-			 i915_emit_const4fv( p, cos_constants ), 0);
+	 /* tmp.x now contains a first approximation (y).  Now, weight it
+	  * against tmp.y**2 to get closer.
+	  */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+			 0);
 
-	 break;
+	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
+	 i915_emit_arith(p,
+			 A0_MAD,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
 
-      case OPCODE_DP3: 
-	 EMIT_2ARG_ARITH( A0_DP3 );
-	 break;
+	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
+	 i915_emit_arith(p,
+			 A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+			 swizzle(consts1, W, W, W, W),
+			 swizzle(tmp, Y, Y, Y, Y),
+			 swizzle(tmp, X, X, X, X));
+         break;
+
+      case OPCODE_DP3:
+         EMIT_2ARG_ARITH(A0_DP3);
+         break;
 
       case OPCODE_DP4: 
 	 EMIT_2ARG_ARITH( A0_DP4 );

commit d05a8d97507367bcc0a0b1b76a629685257e5c1e
Author: Eric Anholt <eric@anholt.net>
Date:   Wed Feb 6 15:38:16 2008 -0800

    [915] Use a quartic term to improve the accuracy of SIN results.
    
    This is described in the link in the comment, and is the same technique that
    r300 uses.

diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c


Reply to: