[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-ati: Changes to 'upstream-experimental'



 src/AtomBios/includes/CD_Common_Types.h |    4 
 src/atombios_crtc.c                     |   15 
 src/legacy_crtc.c                       |   88 ++-
 src/legacy_output.c                     |   92 +++
 src/radeon.h                            |   30 -
 src/radeon_accel.c                      |   84 ++-
 src/radeon_accelfuncs.c                 |    7 
 src/radeon_atombios.c                   |   78 ++
 src/radeon_atombios.h                   |    6 
 src/radeon_bios.c                       |   73 +-
 src/radeon_commonfuncs.c                |  445 +++++++++++++++-
 src/radeon_dri.h                        |    2 
 src/radeon_driver.c                     |  172 +++---
 src/radeon_exa_funcs.c                  |    6 
 src/radeon_exa_render.c                 |  857 ++++++++++++++++----------------
 src/radeon_output.c                     |   12 
 src/radeon_probe.h                      |   14 
 src/radeon_reg.h                        |   84 ++-
 src/radeon_textured_videofuncs.c        |  319 +++--------
 src/radeon_video.c                      |   13 
 20 files changed, 1535 insertions(+), 866 deletions(-)

New commits:
commit c5d62fa0e8f52c3264ff9db3ff10cdf5a806bfc0
Author: Owen Taylor <otaylor@huygens.home.fishsoup.net>
Date:   Thu Apr 17 13:14:53 2008 +0200

    Emulate repeats by drawing in tiles
    
    When we can't turn on hardware repeats, because the texture
    is non-power-of-two, or has padding at the ends of lines,
    try to draw the image in multiple tiles rather than falling
    back to software. (We can only do this when there is no
    transform.)

diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index e35cc17..138216f 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -59,6 +59,12 @@
 static Bool is_transform[2];
 static PictTransform *transform[2];
 static Bool has_mask;
+/* Whether we are tiling horizontally and vertically */
+static Bool need_src_tile_x;
+static Bool need_src_tile_y;
+/* Size of tiles ... set to 65536x65536 if not tiling in that direction */
+static Bool src_tile_width;
+static Bool src_tile_height;
 
 struct blendinfo {
     Bool dst_alpha;
@@ -222,6 +228,95 @@ union intfloat {
     CARD32 i;
 };
 
+/* Check if we need a software-fallback because of a repeating
+ *   non-power-of-two texture.
+ *
+ * canTile: whether we can emulate a repeat by drawing in tiles:
+ *   possible for the source, but not for the mask. (Actually
+ *   we could do tiling for the mask too, but dealing with the
+ *   combination of a tiled mask and a tiled source would be
+ *   a lot of complexity, so we handle only the most common
+ *   case of a repeating mask.)
+ */
+static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
+{
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+
+    if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
+	!(!pPict->transform && canTile))
+	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
+			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
+
+    return TRUE;
+}
+
+/* Determine if the pitch of the pixmap meets the criteria for being
+ * used as a repeating texture: no padding or only a single line texture.
+ */
+static Bool RADEONPitchMatches(PixmapPtr pPix)
+{
+    int w = pPix->drawable.width;
+    int h = pPix->drawable.height;
+    CARD32 txpitch = exaGetPixmapPitch(pPix);
+
+    if (h > 1 && ((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch)
+	return FALSE;
+
+    return TRUE;
+}
+
+/* We can't turn on repeats normally for a non-power-of-two dimension,
+ * but if the source isn't transformed, we can get the same effect
+ * by drawing the image in multiple tiles. (A common case that it's
+ * important to get right is drawing a strip of a NPOTxPOT texture
+ * repeating in the POT direction. With tiling, this ends up as a
+ * a single tile on R300 and newer, which is perfect.)
+ *
+ * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
+ *   one direction and POT in the other in the POT direction; on
+ *   older chips we can only repeat at all if the texture is POT in
+ *   both directions.
+ *
+ * needMatchingPitch: On R100/R200, we can only repeat horizontally if
+ *   there is no padding in the texture. Textures with small POT widths
+ *   (1,2,4,8) thus can't be tiled.
+ */
+static Bool RADEONSetupSourceTile(PicturePtr pPict,
+				  PixmapPtr pPix,
+				  Bool canTile1d,
+				  Bool needMatchingPitch)
+{
+    need_src_tile_x = need_src_tile_y = FALSE;
+    src_tile_width = src_tile_height = 65536; /* "infinite" */
+	    
+    if (pPict->repeat) {
+	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
+	
+	int w = pPict->pDrawable->width;
+	int h = pPict->pDrawable->height;
+	
+	if (pPict->transform) {
+	    if (badPitch)
+		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
+				 w, (unsigned)exaGetPixmapPitch(pPix)));
+	} else {
+	    need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
+	    need_src_tile_y = (h & (h - 1)) != 0;
+	    
+	    if (!canTile1d)
+		need_src_tile_x = need_src_tile_y = need_src_tile_x || need_src_tile_y;
+	}
+
+	if (need_src_tile_x)
+	  src_tile_width = w;
+	if (need_src_tile_y)
+	  src_tile_height = h;
+    }
+
+    return TRUE;
+}
+
 /* R100-specific code */
 
 static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit)
@@ -241,8 +336,8 @@ static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit)
 	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
 			(int)pPict->format));
 
-    if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0))
-	RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h));
+    if (!RADEONCheckTexturePOT(pPict, unit == 0))
+	return FALSE;
 
     if (pPict->filter != PictFilterNearest &&
 	pPict->filter != PictFilterBilinear)
@@ -262,6 +357,7 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     CARD32 txfilter, txformat, txoffset, txpitch;
     int w = pPict->pDrawable->width;
     int h = pPict->pDrawable->height;
+    Bool repeat = pPict->repeat && !(unit == 0 && (need_src_tile_x || need_src_tile_y));
     int i;
     ACCEL_PREAMBLE();
 
@@ -282,9 +378,8 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     if (RADEONPixmapIsColortiled(pPix))
 	txoffset |= RADEON_TXO_MACRO_TILE;
 
-    if (pPict->repeat) {
-	if ((h != 1) &&
-	    (((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch))
+    if (repeat) {
+	if (!RADEONPitchMatches(pPix))
 	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
 			     w, (unsigned)txpitch));
 
@@ -308,7 +403,7 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
     }
 
-    if (pPict->repeat)
+    if (repeat)
       txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
 
     BEGIN_ACCEL(5);
@@ -462,6 +557,9 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op,
     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
 
+    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
+	return FALSE;
+
     if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
 	return FALSE;
     pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
@@ -553,8 +651,8 @@ static Bool R200CheckCompositeTexture(PicturePtr pPict, int unit)
 	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
 			 (int)pPict->format));
 
-    if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0))
-	RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h));
+    if (!RADEONCheckTexturePOT(pPict, unit == 0))
+	return FALSE;
 
     if (pPict->filter != PictFilterNearest &&
 	pPict->filter != PictFilterBilinear)
@@ -572,6 +670,7 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     CARD32 txfilter, txformat, txoffset, txpitch;
     int w = pPict->pDrawable->width;
     int h = pPict->pDrawable->height;
+    Bool repeat = pPict->repeat && !(unit == 0 && (need_src_tile_x || need_src_tile_y));
     int i;
     ACCEL_PREAMBLE();
 
@@ -592,9 +691,8 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     if (RADEONPixmapIsColortiled(pPix))
 	txoffset |= R200_TXO_MACRO_TILE;
 
-    if (pPict->repeat) {
-	if ((h != 1) &&
-	    (((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch))
+    if (repeat) {
+	if (!RADEONPitchMatches(pPix))
 	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
 			     w, (unsigned)txpitch));
 
@@ -620,7 +718,7 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
     }
 
-    if (pPict->repeat)
+    if (repeat)
       txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
 
     BEGIN_ACCEL(6);
@@ -756,6 +854,9 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
 
+    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
+	return FALSE;
+
     if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
 	return FALSE;
     pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
@@ -864,8 +965,8 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict, int unit, Bool is_r500)
 	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
 			 (int)pPict->format));
 
-    if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0))
-	RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h));
+    if (!RADEONCheckTexturePOT(pPict, unit == 0))
+	return FALSE;
 
     if (pPict->filter != PictFilterNearest &&
 	pPict->filter != PictFilterBilinear)
@@ -941,13 +1042,16 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     info->texW[unit] = w;
     info->texH[unit] = h;
 
-    if (pPict->repeat)
-      txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
-		  R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP));
+    if (pPict->repeat && !(unit == 0 && need_src_tile_x))
+      txfilter = R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
     else
-      txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
-		  R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL));
+      txfilter = R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
 
+    if (pPict->repeat && !(unit == 0 && need_src_tile_y))
+      txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
+    else
+      txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
+		   
     txfilter |= (unit << R300_TX_ID_SHIFT);
 
     switch (pPict->filter) {
@@ -1108,6 +1212,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
 
+    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
+	return FALSE;
+
     if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
 	return FALSE;
     txenable = R300_TEX_0_ENABLE;
@@ -1788,11 +1895,11 @@ static inline void transformPoint(PictTransform *transform, xPointFixed *point)
 }
 #endif
 
-static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
-				     int srcX, int srcY,
-				     int maskX, int maskY,
-				     int dstX, int dstY,
-				     int w, int h)
+static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst,
+					   int srcX, int srcY,
+					   int maskX, int maskY,
+					   int dstX, int dstY,
+					   int w, int h)
 {
     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
     int vtx_count;
@@ -1934,6 +2041,66 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 #undef VTX_OUT
 #undef VTX_OUT_MASK
 
+static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
+				       int srcX, int srcY,
+				       int maskX, int maskY,
+				       int dstX, int dstY,
+				       int width, int height)
+{
+    int tileSrcY, tileMaskY, tileDstY;
+    int remainingHeight;
+    
+    if (!need_src_tile_x && !need_src_tile_y) {
+	FUNC_NAME(RadeonCompositeTile)(pDst,
+				       srcX, srcY,
+				       maskX, maskY,
+				       dstX, dstY,
+				       width, height);
+	return;
+    }
+
+    /* Tiling logic borrowed from exaFillRegionTiled */
+
+    modulus(srcY, src_tile_height, tileSrcY);
+    tileMaskY = maskY;
+    tileDstY = dstY;
+
+    remainingHeight = height;
+    while (remainingHeight > 0) {
+	int remainingWidth = width;
+	int tileSrcX, tileMaskX, tileDstX;
+	int h = src_tile_height - tileSrcY;
+	
+	if (h > remainingHeight)
+	    h = remainingHeight;
+	remainingHeight -= h;
+
+	modulus(srcX, src_tile_width, tileSrcX);
+	tileMaskX = maskX;
+	tileDstX = dstX;
+	
+	while (remainingWidth > 0) {
+	    int w = src_tile_width - tileSrcX;
+	    if (w > remainingWidth)
+		w = remainingWidth;
+	    remainingWidth -= w;
+	    
+	    FUNC_NAME(RadeonCompositeTile)(pDst,
+					   tileSrcX, tileSrcY,
+					   tileMaskX, tileMaskY,
+					   tileDstX, tileDstY,
+					   w, h);
+	    
+	    tileSrcX = 0;
+	    tileMaskX += w;
+	    tileDstX += w;
+	}
+	tileSrcY = 0;
+	tileMaskY += h;
+	tileDstY += h;
+    }
+}
+
 static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
 {
     RINFO_FROM_SCREEN(pDst->drawable.pScreen);

commit eeb7b74bb6c813b0e3afa4b704f6ffb0d0aab92b
Author: Owen Taylor <otaylor@huygens.home.fishsoup.net>
Date:   Thu Apr 17 13:14:25 2008 +0200

    Turn on wrapping when repeating on R100 + R200
    
    Actually enable repeats for R100 and R200. This corresponds
    to a R300 change made in the patch in:
    http://bugs.freedesktop.org/show_bug.cgi?id=15333

diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 654ef19..e35cc17 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -308,6 +308,9 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
     }
 
+    if (pPict->repeat)
+      txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
+
     BEGIN_ACCEL(5);
     if (unit == 0) {
 	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
@@ -617,6 +620,9 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
     }
 
+    if (pPict->repeat)
+      txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
+
     BEGIN_ACCEL(6);
     if (unit == 0) {
 	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);

commit e511f39dfef503006cf249b9f6934091eaade9b5
Author: Alex Deucher <alex@t41p.hsd1.va.comcast.net>
Date:   Thu Apr 17 05:04:34 2008 -0400

    R300+: move more common code into init3d()
    
    - pre-load r3xx tex instructions
    - setup RS instructions in init3d()

diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index 0b99a03..c249c43 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -428,8 +428,90 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	    FINISH_ACCEL();
 	}
 
+	/* pre-load the RS instructions */
+	BEGIN_ACCEL(4);
 	if (IS_R300_3D) {
-	    BEGIN_ACCEL(7);
+	    /* rasterizer source table
+	     * R300_RS_TEX_PTR is the offset into the input RS stream
+	     * 0,1 are tex0
+	     * 2,3 are tex1
+	     */
+	    OUT_ACCEL_REG(R300_RS_IP_0,
+			  (R300_RS_TEX_PTR(0) |
+			   R300_RS_SEL_S(R300_RS_SEL_C0) |
+			   R300_RS_SEL_T(R300_RS_SEL_C1) |
+			   R300_RS_SEL_R(R300_RS_SEL_K0) |
+			   R300_RS_SEL_Q(R300_RS_SEL_K1)));
+	    OUT_ACCEL_REG(R300_RS_IP_1,
+			  (R300_RS_TEX_PTR(2) |
+			   R300_RS_SEL_S(R300_RS_SEL_C0) |
+			   R300_RS_SEL_T(R300_RS_SEL_C1) |
+			   R300_RS_SEL_R(R300_RS_SEL_K0) |
+			   R300_RS_SEL_Q(R300_RS_SEL_K1)));
+	    /* src tex */
+	    /* R300_INST_TEX_ID - select the RS source table entry
+	     * R300_INST_TEX_ADDR - the FS temp register for the texture data
+	     */
+	    OUT_ACCEL_REG(R300_RS_INST_0, (R300_INST_TEX_ID(0) |
+					   R300_RS_INST_TEX_CN_WRITE |
+					   R300_INST_TEX_ADDR(0)));
+	    /* mask tex */
+	    OUT_ACCEL_REG(R300_RS_INST_1, (R300_INST_TEX_ID(1) |
+					   R300_RS_INST_TEX_CN_WRITE |
+					   R300_INST_TEX_ADDR(1)));
+
+	} else {
+	    /* rasterizer source table
+	     * R300_RS_TEX_PTR is the offset into the input RS stream
+	     * 0,1 are tex0
+	     * 2,3 are tex1
+	     */
+	    OUT_ACCEL_REG(R500_RS_IP_0, ((0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+					 (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+					 (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+					 (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)));
+
+	    OUT_ACCEL_REG(R500_RS_IP_1, ((2 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+					 (3 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+					 (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+					 (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)));
+	    /* src tex */
+	    /* R500_RS_INST_TEX_ID_SHIFT - select the RS source table entry
+	     * R500_RS_INST_TEX_ADDR_SHIFT - the FS temp register for the texture data
+	     */
+	    OUT_ACCEL_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) |
+					   R500_RS_INST_TEX_CN_WRITE |
+					   (0 << R500_RS_INST_TEX_ADDR_SHIFT)));
+	    /* mask tex */
+	    OUT_ACCEL_REG(R500_RS_INST_1, ((1 << R500_RS_INST_TEX_ID_SHIFT) |
+					   R500_RS_INST_TEX_CN_WRITE |
+					   (1 << R500_RS_INST_TEX_ADDR_SHIFT)));
+	}
+	FINISH_ACCEL();
+
+	/* pre-load FS tex instructions */
+	if (IS_R300_3D) {
+	    BEGIN_ACCEL(2);
+	    /* tex inst for src texture */
+	    OUT_ACCEL_REG(R300_US_TEX_INST_0,
+			  (R300_TEX_SRC_ADDR(0) |
+			   R300_TEX_DST_ADDR(0) |
+			   R300_TEX_ID(0) |
+			   R300_TEX_INST(R300_TEX_INST_LD)));
+
+	    /* tex inst for mask texture */
+	    OUT_ACCEL_REG(R300_US_TEX_INST_1,
+			  (R300_TEX_SRC_ADDR(1) |
+			   R300_TEX_DST_ADDR(1) |
+			   R300_TEX_ID(1) |
+			   R300_TEX_INST(R300_TEX_INST_LD)));
+	    FINISH_ACCEL();
+	}
+
+	if (IS_R300_3D) {
+	    BEGIN_ACCEL(9);
+	    OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
+	    OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
 	    OUT_ACCEL_REG(R300_US_CODE_ADDR_0,
 			  (R300_ALU_START(0) |
 			   R300_ALU_SIZE(0) |
@@ -445,8 +527,12 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 			   R300_ALU_SIZE(0) |
 			   R300_TEX_START(0) |
 			   R300_TEX_SIZE(0)));
-	} else
-	    BEGIN_ACCEL(4);
+	} else {
+	    BEGIN_ACCEL(7);
+	    OUT_ACCEL_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+	    OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
+	    OUT_ACCEL_REG(R500_US_FC_CTRL, 0);
+	}
 	OUT_ACCEL_REG(R300_US_W_FMT, 0);
 	OUT_ACCEL_REG(R300_US_OUT_FMT_1, (R300_OUT_FMT_UNUSED |
 					  R300_OUT_FMT_C0_SEL_BLUE |
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index f68f34b..654ef19 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1195,7 +1195,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	}
     }
 
-    /* Position and two sets of 2 texture coordinates */
+    /* Position and one or two sets of 2 texture coordinates */
     OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
     if (pMask)
 	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
@@ -1299,87 +1299,41 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	}
 
 
-	/* setup the rasterizer */
+	/* setup the rasterizer, load FS */
+	BEGIN_ACCEL(9);
 	if (pMask) {
-	    BEGIN_ACCEL(17);
 	    /* 4 components: 2 for tex0, 2 for tex1 */
 	    OUT_ACCEL_REG(R300_RS_COUNT,
 			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
 			   R300_RS_COUNT_HIRES_EN));
-	    /* rasterizer source table
-	     * R300_RS_TEX_PTR is the offset into the input RS stream
-	     * 0,1 are tex0
-	     * 2,3 are tex1
-	     */
-	    OUT_ACCEL_REG(R300_RS_IP_0,
-			  (R300_RS_TEX_PTR(0) |
-			   R300_RS_SEL_S(R300_RS_SEL_C0) |
-			   R300_RS_SEL_T(R300_RS_SEL_C1) |
-			   R300_RS_SEL_R(R300_RS_SEL_K0) |
-			   R300_RS_SEL_Q(R300_RS_SEL_K1)));
-	    OUT_ACCEL_REG(R300_RS_IP_1,
-			  (R300_RS_TEX_PTR(2) |
-			   R300_RS_SEL_S(R300_RS_SEL_C0) |
-			   R300_RS_SEL_T(R300_RS_SEL_C1) |
-			   R300_RS_SEL_R(R300_RS_SEL_K0) |
-			   R300_RS_SEL_Q(R300_RS_SEL_K1)));
 
 	    /* R300_INST_COUNT_RS - highest RS instruction used */
 	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
-	    /* src tex */
-	    /* R300_INST_TEX_ID - select the RS source table entry
-	     * R300_INST_TEX_ADDR - the FS temp register for the texture data
-	     */
-	    OUT_ACCEL_REG(R300_RS_INST_0, (R300_INST_TEX_ID(0) |
-					   R300_RS_INST_TEX_CN_WRITE |
-					   R300_INST_TEX_ADDR(0)));
-	    /* mask tex */
-	    OUT_ACCEL_REG(R300_RS_INST_1, (R300_INST_TEX_ID(1) |
-					   R300_RS_INST_TEX_CN_WRITE |
-					   R300_INST_TEX_ADDR(1)));
-
-	    OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
-	    OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
+
 	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
 						R300_ALU_CODE_SIZE(0) |
 						R300_TEX_CODE_OFFSET(0) |
 						R300_TEX_CODE_SIZE(1)));
 
+	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
+			  (R300_ALU_START(0) |
+			   R300_ALU_SIZE(0) |
+			   R300_TEX_START(0) |
+			   R300_TEX_SIZE(1) |
+			   R300_RGBA_OUT));
 	} else {
-	    BEGIN_ACCEL(14);
 	    /* 2 components: 2 for tex0 */
 	    OUT_ACCEL_REG(R300_RS_COUNT,
 			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
 			   R300_RS_COUNT_HIRES_EN));
-	    OUT_ACCEL_REG(R300_RS_IP_0,
-			  (R300_RS_TEX_PTR(0) |
-			   R300_RS_SEL_S(R300_RS_SEL_C0) |
-			   R300_RS_SEL_T(R300_RS_SEL_C1) |
-			   R300_RS_SEL_R(R300_RS_SEL_K0) |
-			   R300_RS_SEL_Q(R300_RS_SEL_K1)));
+
 	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-	    /* src tex */
-	    OUT_ACCEL_REG(R300_RS_INST_0, (R300_INST_TEX_ID(0) |
-					   R300_RS_INST_TEX_CN_WRITE |
-					   R300_INST_TEX_ADDR(0)));
 
-	    OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
-	    OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
 	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
 						R300_ALU_CODE_SIZE(0) |
 						R300_TEX_CODE_OFFSET(0) |
 						R300_TEX_CODE_SIZE(0)));
 
-	}
-
-	if (pMask) {
-	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
-			  (R300_ALU_START(0) |
-			   R300_ALU_SIZE(0) |
-			   R300_TEX_START(0) |
-			   R300_TEX_SIZE(1) |
-			   R300_RGBA_OUT));
-	} else {
 	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
 			  (R300_ALU_START(0) |
 			   R300_ALU_SIZE(0) |
@@ -1391,21 +1345,8 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	/* shader output swizzling */
 	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
 
-	/* tex inst for src texture */
-	OUT_ACCEL_REG(R300_US_TEX_INST_0,
-		      (R300_TEX_SRC_ADDR(0) |
-		       R300_TEX_DST_ADDR(0) |
-		       R300_TEX_ID(0) |
-		       R300_TEX_INST(R300_TEX_INST_LD)));
-
-	if (pMask) {
-	    /* tex inst for mask texture */
-	    OUT_ACCEL_REG(R300_US_TEX_INST_1,
-			  (R300_TEX_SRC_ADDR(1) |
-			   R300_TEX_DST_ADDR(1) |
-			   R300_TEX_ID(1) |
-			   R300_TEX_INST(R300_TEX_INST_LD)));
-	}
+	/* tex inst for src texture is pre-loaded in RADEONInit3DEngine() */
+	/* tex inst for mask texture is pre-loaded in RADEONInit3DEngine() */
 
 	/* RGB inst
 	 * temp addresses for texture inputs
@@ -1573,70 +1514,28 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	    break;
 	}
 
+	BEGIN_ACCEL(6);
 	if (pMask) {
-	    BEGIN_ACCEL(13);
 	    /* 4 components: 2 for tex0, 2 for tex1 */
 	    OUT_ACCEL_REG(R300_RS_COUNT,
 			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
 			   R300_RS_COUNT_HIRES_EN));
-	    /* rasterizer source table
-	     * R300_RS_TEX_PTR is the offset into the input RS stream
-	     * 0,1 are tex0
-	     * 2,3 are tex1
-	     */
-	    OUT_ACCEL_REG(R500_RS_IP_0, ((0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-					 (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-					 (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-					 (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)));
-
-	    OUT_ACCEL_REG(R500_RS_IP_1, ((2 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-					 (3 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-					 (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-					 (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)));
+
 	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
 	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
 
-	    /* src tex */
-	    /* R500_RS_INST_TEX_ID_SHIFT - select the RS source table entry
-	     * R500_RS_INST_TEX_ADDR_SHIFT - the FS temp register for the texture data
-	     */
-	    OUT_ACCEL_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) |
-					   R500_RS_INST_TEX_CN_WRITE |
-					   (0 << R500_RS_INST_TEX_ADDR_SHIFT)));
-	    /* mask tex */
-	    OUT_ACCEL_REG(R500_RS_INST_1, ((1 << R500_RS_INST_TEX_ID_SHIFT) |
-					   R500_RS_INST_TEX_CN_WRITE |
-					   (1 << R500_RS_INST_TEX_ADDR_SHIFT)));
-
-	    OUT_ACCEL_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-	    OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
-	    OUT_ACCEL_REG(R500_US_FC_CTRL, 0);
 	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
 					      R500_US_CODE_END_ADDR(2)));
 	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
 					       R500_US_CODE_RANGE_SIZE(2)));
 	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
 	} else {
-	    BEGIN_ACCEL(11);
 	    OUT_ACCEL_REG(R300_RS_COUNT,
 			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
 			   R300_RS_COUNT_HIRES_EN));
 
-	    OUT_ACCEL_REG(R500_RS_IP_0, ((0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-					 (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-					 (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-					 (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)));
-
 	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
 
-	    /* src tex */
-	    OUT_ACCEL_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) |
-					   R500_RS_INST_TEX_CN_WRITE |
-					   (0 << R500_RS_INST_TEX_ADDR_SHIFT)));
-
-	    OUT_ACCEL_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-	    OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
-	    OUT_ACCEL_REG(R500_US_FC_CTRL, 0);
 	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
 					      R500_US_CODE_END_ADDR(1)));
 	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 4f0f567..f7069f0 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -193,9 +193,9 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
 	/* setup the VAP */
 	if (info->has_tcl)
-	    BEGIN_VIDEO(5);
+	    BEGIN_VIDEO(6);
 	else
-	    BEGIN_VIDEO(3);
+	    BEGIN_VIDEO(4);
 
 	/* These registers define the number, type, and location of data submitted
 	 * to the PVS unit of GA input (when PVS is disabled)
@@ -221,7 +221,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		       R300_LAST_VEC_1 |
 		       R300_SIGNED_1));
 
-	/* load the vertex shader 
+	/* load the vertex shader
 	 * We pre-load vertex programs in RADEONInit3DEngine():
 	 * - exa no mask
 	 * - exa mask
@@ -245,33 +245,14 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
 	/* setup pixel shader */
 	if (IS_R300_3D) {
-	    BEGIN_VIDEO(13);
+	    BEGIN_VIDEO(8);
 	    /* 2 components: 2 for tex0 */
 	    OUT_VIDEO_REG(R300_RS_COUNT,
 			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
 			   R300_RS_COUNT_HIRES_EN));
-	    /* rasterizer source table
-	     * R300_RS_TEX_PTR is the offset into the input RS stream
-	     * 0,1 are tex0
-	     */
-	    OUT_VIDEO_REG(R300_RS_IP_0,
-			  (R300_RS_TEX_PTR(0) |
-			   R300_RS_COL_PTR(0) |
-			   R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA) |
-			   R300_RS_SEL_S(R300_RS_SEL_C0) |
-			   R300_RS_SEL_T(R300_RS_SEL_C1) |
-			   R300_RS_SEL_R(R300_RS_SEL_K0) |
-			   R300_RS_SEL_Q(R300_RS_SEL_K1)));
 	    /* R300_INST_COUNT_RS - highest RS instruction used */
 	    OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-	    /* R300_INST_TEX_ID - select the RS source table entry
-	     * R300_INST_TEX_ADDR - the FS temp register for the texture data
-	     */
-	    OUT_VIDEO_REG(R300_RS_INST_0, (R300_INST_TEX_ID(0) |
-					   R300_RS_INST_TEX_CN_WRITE |
-					   R300_INST_TEX_ADDR(0)));
-	    OUT_VIDEO_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
-	    OUT_VIDEO_REG(R300_US_PIXSIZE, 0); /* we only use temp 0 in this program */
+
 	    OUT_VIDEO_REG(R300_US_CODE_OFFSET,
 			  (R300_ALU_CODE_OFFSET(0) |
 			   R300_ALU_CODE_SIZE(1) |
@@ -284,12 +265,9 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 			   R300_TEX_START(0) |
 			   R300_TEX_SIZE(0) |
 			   R300_RGBA_OUT));
-	    /* tex inst */
-	    OUT_VIDEO_REG(R300_US_TEX_INST_0,
-			  (R300_TEX_SRC_ADDR(0) |
-			   R300_TEX_DST_ADDR(0) |
-			   R300_TEX_ID(0) |
-			   R300_TEX_INST(R300_TEX_INST_LD)));
+
+	    /* tex inst is preloaded in RADEONInit3DEngine() */
+
 	    /* ALU inst */
 	    /* RGB */
 	    OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR_0,
@@ -332,31 +310,15 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 			   R300_ALU_ALPHA_CLAMP));
 	    FINISH_VIDEO();
 	} else {
-	    BEGIN_VIDEO(23);
+	    BEGIN_VIDEO(18);
 	    /* 2 components: 2 for tex0 */
 	    OUT_VIDEO_REG(R300_RS_COUNT,
 			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
 			   R300_RS_COUNT_HIRES_EN));
-	    /* rasterizer source table
-	     * R300_RS_TEX_PTR is the offset into the input RS stream
-	     * 0,1 are tex0
-	     */
-	    OUT_VIDEO_REG(R500_RS_IP_0, ((0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-					 (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-					 (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-					 (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)));
 
 	    /* R300_INST_COUNT_RS - highest RS instruction used */
 	    OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-	    /* R500_RS_INST_TEX_ID - select the RS source table entry
-	     * R500_RS_INST_TEX_ADDR - the FS temp register for the texture data
-	     */
-	    OUT_VIDEO_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) |
-					   R500_RS_INST_TEX_CN_WRITE |
-					   (0 << R500_RS_INST_TEX_ADDR_SHIFT)));
-	    OUT_VIDEO_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-	    OUT_VIDEO_REG(R300_US_PIXSIZE, 0); /* highest temp used */
-	    OUT_VIDEO_REG(R500_US_FC_CTRL, 0);
+
 	    OUT_VIDEO_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
 					      R500_US_CODE_END_ADDR(1)));
 	    OUT_VIDEO_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |

commit 99435b7c18d931ea620044d0fdb4cc93dfcc6331
Author: Owen Taylor <otaylor@redhat.com>
Date:   Thu Apr 17 02:46:11 2008 -0400

    Radeon: Omit mask coordinates
    
    Adapted from Owen's patch on bug 15546
    This fixes the slowness with aatext on r300
    and may speed up other chips marginally.

diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 83366e8..f68f34b 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -58,6 +58,7 @@
 #ifdef ONLY_ONCE
 static Bool is_transform[2];
 static PictTransform *transform[2];
+static Bool has_mask;
 
 struct blendinfo {
     Bool dst_alpha;
@@ -438,6 +439,11 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op,
     if (!RADEONGetDestFormat(pDstPicture, &dst_format))
 	return FALSE;
 
+    if (pMask)
+	has_mask = TRUE;
+    else
+	has_mask = FALSE;
+
     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
 
     dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation;
@@ -508,9 +514,13 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op,
 
     OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
     OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
-    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
-				     RADEON_SE_VTX_FMT_ST0 |
-				     RADEON_SE_VTX_FMT_ST1);
+    if (pMask)
+	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
+					  RADEON_SE_VTX_FMT_ST0 |
+					  RADEON_SE_VTX_FMT_ST1));
+    else
+	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
+					  RADEON_SE_VTX_FMT_ST0));
     /* Op operator. */
     blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
 
@@ -722,6 +732,11 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
     if (!RADEONGetDestFormat(pDstPicture, &dst_format))
 	return FALSE;
 
+    if (pMask)
+	has_mask = TRUE;
+    else
+	has_mask = FALSE;
+
     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
 
     dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation;
@@ -756,9 +771,13 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
     OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
 
     OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
-    OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
-		 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
-		 (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
+    if (pMask)
+	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
+		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
+    else
+	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
 
     OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
 
@@ -1062,6 +1081,11 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     if (!R300GetDestFormat(pDstPicture, &dst_format))
 	return FALSE;
 
+    if (pMask)
+	has_mask = TRUE;
+    else
+	has_mask = FALSE;
+
     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
 
     dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation;
@@ -1093,10 +1117,17 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     RADEON_SWITCH_TO_3D();
 
     /* setup the VAP */
-    if (info->has_tcl)
-	BEGIN_ACCEL(8);
-    else
-	BEGIN_ACCEL(6);
+    if (info->has_tcl) {
+	if (pMask)
+	    BEGIN_ACCEL(8);
+	else
+	    BEGIN_ACCEL(7);
+    } else {
+	if (pMask)
+	    BEGIN_ACCEL(6);
+	else
+	    BEGIN_ACCEL(5);
+    }
 
     /* These registers define the number, type, and location of data submitted
      * to the PVS unit of GA input (when PVS is disabled)
@@ -1111,23 +1142,35 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
      * Textures 0-7
      * Fog
      */
-    OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
-		  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
-		   (0 << R300_SKIP_DWORDS_0_SHIFT) |
-		   (0 << R300_DST_VEC_LOC_0_SHIFT) |
-		   R300_SIGNED_0 |
-		   (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
-		   (0 << R300_SKIP_DWORDS_1_SHIFT) |
-		   (6 << R300_DST_VEC_LOC_1_SHIFT) |
-		   R300_SIGNED_1));
-    OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
-		  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
-		   (0 << R300_SKIP_DWORDS_2_SHIFT) |
-		   (7 << R300_DST_VEC_LOC_2_SHIFT) |
-		   R300_LAST_VEC_2 |
-		   R300_SIGNED_2));
-
-    /* load the vertex shader 
+    if (pMask) {
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
+		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
+		       R300_SIGNED_0 |
+		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
+		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
+		       R300_SIGNED_1));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
+		       (7 << R300_DST_VEC_LOC_2_SHIFT) |


Reply to: