[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-ati: Changes to 'upstream-experimental'



 autogen.sh                        |    4 
 configure.ac                      |    4 
 man/radeon.man                    |   25 +
 src/Makefile.am                   |    1 
 src/ati_pciids_gen.h              |   59 +++
 src/cayman_shader.c               |  590 ++++++++++++++++++++++++-------------
 src/drmmode_display.c             |  280 +++++++++++++++--
 src/drmmode_display.h             |   11 
 src/evergreen_accel.c             |   12 
 src/evergreen_exa.c               |  287 +++++++++++++-----
 src/evergreen_shader.c            |  596 ++++++++++++++++++++++++--------------
 src/evergreen_state.h             |    2 
 src/pcidb/ati_pciids.csv          |  107 +++++-
 src/r600_exa.c                    |   14 
 src/radeon.h                      |   32 +-
 src/radeon_bo_helper.c            |   70 ++++
 src/radeon_bo_helper.h            |    7 
 src/radeon_chipinfo_gen.h         |  107 +++++-
 src/radeon_chipset_gen.h          |   59 +++
 src/radeon_dri2.c                 |  286 +++++++++++++++---
 src/radeon_driver.c               |  100 ------
 src/radeon_exa.c                  |   65 ----
 src/radeon_exa_funcs.c            |    3 
 src/radeon_exa_render.c           |   12 
 src/radeon_glamor.c               |   81 ++++-
 src/radeon_kms.c                  |   85 ++++-
 src/radeon_pci_chipset_gen.h      |   59 +++
 src/radeon_pci_device_match_gen.h |   59 +++
 src/radeon_probe.h                |    7 
 src/radeon_video.c                |    6 
 src/radeon_video.h                |    1 
 31 files changed, 2223 insertions(+), 808 deletions(-)

New commits:
commit 9c97cca5c24409ca8447c99f051a12fd2d494e79
Author: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date:   Wed Aug 7 10:48:17 2013 +0200

    radeon: bump version for release
    
    Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>

diff --git a/configure.ac b/configure.ac
index ed04028..ac202e1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-ati],
-        [7.1.99],
+        [7.2.0],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-ati])
 

commit 16270cfb202ab67dd152644ef019b2f1ee4d0341
Author: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date:   Wed Aug 7 10:29:33 2013 +0200

    add bicubic_table.py to EXTRA_DIST
    
    Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>

diff --git a/src/Makefile.am b/src/Makefile.am
index 6b7171e..e23dc1d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -86,6 +86,7 @@ EXTRA_DIST = \
 	ati.h \
 	ativersion.h \
 	bicubic_table.h \
+	bicubic_table.py \
 	radeon_bo_helper.h \
 	radeon_drm.h \
 	radeon_exa_render.c \

commit 2cb9197ca7a337c911f38b5de562a2364b922b86
Author: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date:   Wed Aug 7 10:28:52 2013 +0200

    kill unused radeon_driver.c
    
    Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>

diff --git a/src/radeon.h b/src/radeon.h
index 912e24d..4660893 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -497,9 +497,6 @@ extern void RADEONWaitForVLine(ScrnInfoPtr pScrn, PixmapPtr pPix,
 			       xf86CrtcPtr crtc, int start, int stop);
 
 
-/* radeon_driver.c */
-extern RADEONEntPtr RADEONEntPriv(ScrnInfoPtr pScrn);
-
 /* radeon_exa.c */
 extern unsigned eg_tile_split(unsigned tile_split);
 extern Bool radeon_transform_is_affine_or_scaled(PictTransformPtr t);
@@ -528,6 +525,7 @@ extern void radeon_ddx_cs_start(ScrnInfoPtr pScrn,
 				int num, const char *file,
 				const char *func, int line);
 void radeon_kms_update_vram_limit(ScrnInfoPtr pScrn, int new_fb_size);
+extern RADEONEntPtr RADEONEntPriv(ScrnInfoPtr pScrn);
 
 drmVBlankSeqType radeon_populate_vbl_request_type(xf86CrtcPtr crtc);
 
diff --git a/src/radeon_driver.c b/src/radeon_driver.c
deleted file mode 100644
index 2f085a8..0000000
--- a/src/radeon_driver.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
- *                VA Linux Systems Inc., Fremont, California.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation on the rights to use, copy, modify, merge,
- * publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so,
- * subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
- * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-/*
- * Authors:
- *   Kevin E. Martin <martin@xfree86.org>
- *   Rickard E. Faith <faith@valinux.com>
- *   Alan Hourihane <alanh@fairlite.demon.co.uk>
- *
- * Credits:
- *
- *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
- *   code to his Radeon driver.  Portions of this file are based on the
- *   initialization code for that driver.
- *
- * References:
- *
- * !!!! FIXME !!!!
- *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
- *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
- *   1999.
- *
- *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
- *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
- *
- * This server does not yet support these XFree86 4.0 features:
- * !!!! FIXME !!!!
- *   DDC1 & DDC2
- *   shadowfb
- *   overlay planes
- *
- * Modified by Marc Aurele La France (tsi@xfree86.org) for ATI driver merge.
- *
- * Mergedfb and pseudo xinerama support added by Alex Deucher (agd5f@yahoo.com)
- * based on the sis driver by Thomas Winischhofer.
- *
- */
-
-#include <string.h>
-#include <stdio.h>
-
-				/* Driver data structures */
-#include "radeon.h"
-#include "radeon_reg.h"
-#include "radeon_probe.h"
-#include "radeon_version.h"
-
-#include "fb.h"
-
-				/* colormap initialization */
-#include "micmap.h"
-#include "dixstruct.h"
-
-				/* X and server generic header files */
-#include "xf86.h"
-#include "xf86_OSproc.h"
-#include "xf86RandR12.h"
-#include "xf86cmap.h"
-
-#include "shadow.h"
-				/* vgaHW definitions */
-#ifdef HAVE_XEXTPROTO_71
-#include <X11/extensions/dpmsconst.h>
-#else
-#define DPMS_SERVER
-#include <X11/extensions/dpms.h>
-#endif
-
-
-#include "atipciids.h"
-

commit c5cbfcf575b0b4aea6f797558ae974c1453c8e07
Author: Alex Deucher <alexander.deucher@amd.com>
Date:   Tue Jul 30 10:08:25 2013 -0400

    drmmode: add support for multi-screen reverse optimus
    
    Initial reverse optimus didn't consider multiple screens, so
    this overhauls the code to use the new X server interface,
    and allows for multiple outputs on the dGPU to be used with
    the iGPU doing the rendering.  Ported from Dave's nouveau
    patch.
    
    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 244a98f..1df104d 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -521,14 +521,15 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
 		}
 		crtc->funcs->gamma_set(crtc, crtc->gamma_red, crtc->gamma_green,
 				       crtc->gamma_blue, crtc->gamma_size);
-		
+
 		drmmode_ConvertToKMode(crtc->scrn, &kmode, mode);
 
 		fb_id = drmmode->fb_id;
 #ifdef RADEON_PIXMAP_SHARING
-		if (crtc->randr_crtc && crtc->randr_crtc->scanout_pixmap)
-			x = y = 0;
-		else
+		if (crtc->randr_crtc && crtc->randr_crtc->scanout_pixmap) {
+			x = drmmode_crtc->scanout_pixmap_x;
+			y = 0;
+		} else
 #endif
 		if (drmmode_crtc->rotate_fb_id) {
 			fb_id = drmmode_crtc->rotate_fb_id;
@@ -741,25 +742,58 @@ drmmode_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix)
 {
 	ScreenPtr screen = xf86ScrnToScreen(crtc->scrn);
 	PixmapPtr screenpix = screen->GetScreenPixmap(screen);
+	xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn);
+	drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+	int c, total_width = 0, max_height = 0, this_x = 0;
 
 	if (!ppix) {
 		if (crtc->randr_crtc->scanout_pixmap)
 			PixmapStopDirtyTracking(crtc->randr_crtc->scanout_pixmap, screenpix);
+		drmmode_crtc->scanout_pixmap_x = 0;
 		return TRUE;
 	}
 
-	if (ppix->drawable.width > screenpix->drawable.width ||
-	    ppix->drawable.height > screenpix->drawable.height) {
+	/* iterate over all the attached crtcs -
+	   work out bounding box */
+	for (c = 0; c < xf86_config->num_crtc; c++) {
+		xf86CrtcPtr iter = xf86_config->crtc[c];
+		if (!iter->enabled && iter != crtc)
+			continue;
+		if (iter == crtc) {
+			this_x = total_width;
+			total_width += ppix->drawable.width;
+			if (max_height < ppix->drawable.height)
+				max_height = ppix->drawable.height;
+		} else {
+			total_width += iter->mode.HDisplay;
+			if (max_height < iter->mode.VDisplay)
+				max_height = iter->mode.VDisplay;
+		}
+#ifndef HAS_DIRTYTRACKING2
+		if (iter != crtc) {
+			ErrorF("Cannot do multiple crtcs without X server dirty tracking 2 interface\n");
+			return FALSE;
+		}
+#endif
+	}
+
+	if (total_width != screenpix->drawable.width ||
+	    max_height != screenpix->drawable.height) {
 		Bool ret;
-		ret = drmmode_xf86crtc_resize(crtc->scrn, ppix->drawable.width, ppix->drawable.height);
+		ret = drmmode_xf86crtc_resize(crtc->scrn, total_width, max_height);
 		if (ret == FALSE)
 			return FALSE;
 
 		screenpix = screen->GetScreenPixmap(screen);
-		screen->width = screenpix->drawable.width = ppix->drawable.width;
-		screen->height = screenpix->drawable.height = ppix->drawable.height;
+		screen->width = screenpix->drawable.width = total_width;
+		screen->height = screenpix->drawable.height = max_height;
 	}
+	drmmode_crtc->scanout_pixmap_x = this_x;
+#ifdef HAS_DIRTYTRACKING2
+	PixmapStartDirtyTracking2(ppix, screenpix, 0, 0, this_x, 0);
+#else
 	PixmapStartDirtyTracking(ppix, screenpix, 0, 0);
+#endif
 	return TRUE;
 }
 #endif
diff --git a/src/drmmode_display.h b/src/drmmode_display.h
index 2fccfda..41e29f6 100644
--- a/src/drmmode_display.h
+++ b/src/drmmode_display.h
@@ -81,6 +81,7 @@ typedef struct {
     int dpms_last_fps;
     uint32_t interpolated_vblanks;
     uint16_t lut_r[256], lut_g[256], lut_b[256];
+    int scanout_pixmap_x;
 } drmmode_crtc_private_rec, *drmmode_crtc_private_ptr;
 
 typedef struct {

commit 429d5b797769895eb4f5fef816ce4e2f3a342031
Author: Dave Airlie <airlied@redhat.com>
Date:   Tue Jan 8 15:56:37 2013 +1000

    radeon: add support for reverse prime (v2)
    
    This adds support for reverse prime configurations
    
    v2: fix compilation with older xservers
    
    Signed-off-by: Alex Deucher <alexdeucher@gmail.com>

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index a614216..244a98f 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -52,6 +52,9 @@
 #define DEFAULT_NOMINAL_FRAME_RATE 60
 
 static Bool
+drmmode_xf86crtc_resize (ScrnInfoPtr scrn, int width, int height);
+
+static Bool
 RADEONZaphodStringMatches(ScrnInfoPtr pScrn, const char *s, char *output_name)
 {
     int i = 0;
@@ -522,6 +525,11 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
 		drmmode_ConvertToKMode(crtc->scrn, &kmode, mode);
 
 		fb_id = drmmode->fb_id;
+#ifdef RADEON_PIXMAP_SHARING
+		if (crtc->randr_crtc && crtc->randr_crtc->scanout_pixmap)
+			x = y = 0;
+		else
+#endif
 		if (drmmode_crtc->rotate_fb_id) {
 			fb_id = drmmode_crtc->rotate_fb_id;
 			x = y = 0;
@@ -727,6 +735,35 @@ drmmode_crtc_gamma_set(xf86CrtcPtr crtc, uint16_t *red, uint16_t *green,
 			    size, red, green, blue);
 }
 
+#ifdef RADEON_PIXMAP_SHARING
+static Bool
+drmmode_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix)
+{
+	ScreenPtr screen = xf86ScrnToScreen(crtc->scrn);
+	PixmapPtr screenpix = screen->GetScreenPixmap(screen);
+
+	if (!ppix) {
+		if (crtc->randr_crtc->scanout_pixmap)
+			PixmapStopDirtyTracking(crtc->randr_crtc->scanout_pixmap, screenpix);
+		return TRUE;
+	}
+
+	if (ppix->drawable.width > screenpix->drawable.width ||
+	    ppix->drawable.height > screenpix->drawable.height) {
+		Bool ret;
+		ret = drmmode_xf86crtc_resize(crtc->scrn, ppix->drawable.width, ppix->drawable.height);
+		if (ret == FALSE)
+			return FALSE;
+
+		screenpix = screen->GetScreenPixmap(screen);
+		screen->width = screenpix->drawable.width = ppix->drawable.width;
+		screen->height = screenpix->drawable.height = ppix->drawable.height;
+	}
+	PixmapStartDirtyTracking(ppix, screenpix, 0, 0);
+	return TRUE;
+}
+#endif
+
 static const xf86CrtcFuncsRec drmmode_crtc_funcs = {
     .dpms = drmmode_crtc_dpms,
     .set_mode_major = drmmode_set_mode_major,
@@ -741,6 +778,9 @@ static const xf86CrtcFuncsRec drmmode_crtc_funcs = {
     .shadow_allocate = drmmode_crtc_shadow_allocate,
     .shadow_destroy = drmmode_crtc_shadow_destroy,
     .destroy = NULL, /* XXX */
+#ifdef RADEON_PIXMAP_SHARING
+    .set_scanout_pixmap = drmmode_set_scanout_pixmap,
+#endif
 };
 
 int drmmode_get_crtc_id(xf86CrtcPtr crtc)
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index c3f50d5..edc3b04 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -257,7 +257,7 @@ redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty)
 	ScrnInfoPtr pScrn = xf86ScreenToScrn(screen);
 	RegionRec pixregion;
 
-	PixmapRegionInit(&pixregion, dirty->slave_dst->master_pixmap);
+	PixmapRegionInit(&pixregion, dirty->slave_dst);
 	DamageRegionAppend(&dirty->slave_dst->drawable, &pixregion);
 	PixmapSyncDirtyHelper(dirty, &pixregion);
 
@@ -766,7 +766,7 @@ static void RADEONSetupCapabilities(ScrnInfoPtr pScrn)
 	if (value & DRM_PRIME_CAP_EXPORT)
 	    pScrn->capabilities |= RR_Capability_SourceOutput | RR_Capability_SinkOffload;
 	if (value & DRM_PRIME_CAP_IMPORT)
-	    pScrn->capabilities |= RR_Capability_SourceOffload;
+	    pScrn->capabilities |= RR_Capability_SourceOffload | RR_Capability_SinkOutput;
     }
 #endif
 }

commit 4de9356a2900ae0fb380a2350791ef045629cd05
Author: Alex Deucher <alexander.deucher@amd.com>
Date:   Mon Aug 5 17:57:16 2013 -0400

    radeon: fix naming clashes with multiple GPUs (v3)
    
    The compat naming code for UMS causes problems
    with multiple GPU as you may end up with the same
    output name on multiple GPUs.  Adjust the naming on
    secondary GPUs to avoid conflicts.
    
    v2: integrate Dave's fixes for nouveau
    v3: keep compat with existing naming on primary GPU
    
    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 3a0187e..a614216 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -1108,6 +1108,8 @@ const char *output_names[] = { "None",
 			       "eDP"
 };
 
+#define NUM_OUTPUT_NAMES (sizeof(output_names) / sizeof(output_names[0]))
+
 static void
 drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num, int *num_dvi, int *num_hdmi)
 {
@@ -1137,30 +1139,43 @@ drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num, int *num_dv
 		}
 	}
 
-	/* need to do smart conversion here for compat with non-kms ATI driver */
-	if (koutput->connector_type_id == 1) {
-	    switch(koutput->connector_type) {
-	    case DRM_MODE_CONNECTOR_DVII:
-	    case DRM_MODE_CONNECTOR_DVID:
-	    case DRM_MODE_CONNECTOR_DVIA:
-		snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], *num_dvi);
-		(*num_dvi)++;
-		break;
-	    case DRM_MODE_CONNECTOR_HDMIA:
-	    case DRM_MODE_CONNECTOR_HDMIB:
-		snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], *num_hdmi);
-		(*num_hdmi)++;
-		break;
-	    case DRM_MODE_CONNECTOR_VGA:
-	    case DRM_MODE_CONNECTOR_DisplayPort:
-		snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], koutput->connector_type_id - 1);
-		break;
-	    default:
-		snprintf(name, 32, "%s", output_names[koutput->connector_type]);
-		break;
-	    }
-	} else {
-	    snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], koutput->connector_type_id - 1);
+	if (koutput->connector_type >= NUM_OUTPUT_NAMES)
+		snprintf(name, 32, "Unknown%d-%d", koutput->connector_type,
+			 koutput->connector_type_id - 1);
+#ifdef RADEON_PIXMAP_SHARING
+	else if (pScrn->is_gpu)
+		snprintf(name, 32, "%s-%d-%d",
+			 output_names[koutput->connector_type], pScrn->scrnIndex - GPU_SCREEN_OFFSET + 1,
+			 koutput->connector_type_id - 1);
+#endif
+	else {
+		/* need to do smart conversion here for compat with non-kms ATI driver */
+		if (koutput->connector_type_id == 1) {
+			switch(koutput->connector_type) {
+			case DRM_MODE_CONNECTOR_DVII:
+			case DRM_MODE_CONNECTOR_DVID:
+			case DRM_MODE_CONNECTOR_DVIA:
+				snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], *num_dvi);
+				(*num_dvi)++;
+				break;
+			case DRM_MODE_CONNECTOR_HDMIA:
+			case DRM_MODE_CONNECTOR_HDMIB:
+				snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], *num_hdmi);
+				(*num_hdmi)++;
+				break;
+			case DRM_MODE_CONNECTOR_VGA:
+			case DRM_MODE_CONNECTOR_DisplayPort:
+				snprintf(name, 32, "%s-%d", output_names[koutput->connector_type],
+					 koutput->connector_type_id - 1);
+				break;
+			default:
+				snprintf(name, 32, "%s", output_names[koutput->connector_type]);
+				break;
+			}
+		} else {
+			snprintf(name, 32, "%s-%d", output_names[koutput->connector_type],
+				 koutput->connector_type_id - 1);
+		}
 	}
 
 	if (xf86IsEntityShared(pScrn->entityList[0])) {

commit 2ae6bb18fefddb309920fa69c9b56c3a7f3db7b4
Author: Grigori Goronzy <greg@chown.ath.cx>
Date:   Wed Jul 31 12:01:20 2013 +0200

    EXA/evergreen/ni: replace magic number
    
    Signed-off-by: Alex Deucher <alexdeucher@gmail.com>

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index ee5b06b..ccd102d 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -1111,7 +1111,7 @@ static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture,
 		if (EVERGREENBlendOp[op].src_alpha &&
 		    (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
 		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
-		    if (pSrcPicture->pDrawable || op != 3)
+		    if (pSrcPicture->pDrawable || op != PictOpOver)
 			RADEON_FALLBACK(("Component alpha not supported with source "
 					 "alpha and source value blending.\n"));
 		}

commit 6a278369c05a298a4367306d986467a9ceacae8c
Author: Raul Fernandes <rgfernandes@gmail.com>
Date:   Tue Jul 30 09:26:05 2013 -0400

    EXA/6xx/7xx: optimize non-overlapping Copy
    
    In case dst and src rectangles of a Copy operation in the same surface
    don't overlap, it is safe to skip the scratch surface. This is a
    common case.
    
    Based on evergreen/ni patch from Grigori Goronzy.
    
    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/src/r600_exa.c b/src/r600_exa.c
index b243234..a354ccd 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -643,7 +643,12 @@ R600Copy(PixmapPtr pDst,
     if (accel_state->vsync)
 	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
 
-    if (accel_state->same_surface && accel_state->copy_area) {
+    if (accel_state->same_surface &&
+	    (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) {
+	R600DoPrepareCopy(pScrn);
+	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+	R600DoCopyVline(pDst);
+    } else if (accel_state->same_surface && accel_state->copy_area) {
 	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
 	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
 	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;

commit 4375a6e75e5d41139be7031a0dee58c057ecbd07
Author: Grigori Goronzy <greg@chown.ath.cx>
Date:   Mon Jul 22 02:30:28 2013 +0200

    EXA/evergreen/ni: accelerate PictOpOver with component alpha
    
    Subpixel text rendering is typically done with a solid src and a
    pixmap mask. Traditionally, this cannot be accelerated in a single
    pass and requires two passes [1]. However, we can cheat a little
    with a constant blend color.
    
    We can use:
    
    const.A = src.A / src.A
    const.R = src.R / src.A
    const.G = src.G / src.A
    const.B = src.B / src.A
    
    dst.A = const.A * (src.A * mask.A) + (1 - (src.A * mask.A)) * dst.A
    dst.R = const.R * (src.A * mask.R) + (1 - (src.A * mask.R)) * dst.R
    dst.G = const.G * (src.A * mask.G) + (1 - (src.A * mask.G)) * dst.G
    dst.B = const.B * (src.A * mask.B) + (1 - (src.A * mask.B)) * dst.B
    
    This only needs a single source value. src.A is cancelled down in
    the right places.
    
    [1] http://anholt.livejournal.com/32058.html

diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
index 10f2e51..e25010b 100644
--- a/src/evergreen_accel.c
+++ b/src/evergreen_accel.c
@@ -335,7 +335,19 @@ evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t do
 					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
     EREG(CB_BLEND0_CONTROL,                   cb_conf->blendcntl);
     END_BATCH();
+}
 
+void evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    BEGIN_BATCH(2 + 4);
+    PACK0(CB_BLEND_RED, 4);
+    EFLOAT(color[0]); /* R */
+    EFLOAT(color[1]); /* G */
+    EFLOAT(color[2]); /* B */
+    EFLOAT(color[3]); /* A */
+    END_BATCH();
 }
 
 static void
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 5b8a631..ee5b06b 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -704,6 +704,14 @@ static uint32_t EVERGREENGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_for
 	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
 	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
 	}
+
+	/* With some tricks, we can still accelerate PictOpOver with solid src.
+	 * This is commonly used for text rendering, so it's worth the extra
+	 * effort.
+	 */
+	if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) {
+	    sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift);
+	}
     }
 
     return sblend | dblend;
@@ -1095,12 +1103,17 @@ static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture,
 		/* Check if it's component alpha that relies on a source alpha and
 		 * on the source value.  We can only get one of those into the
 		 * single source value that we get to blend with.
+		 *
+		 * We can cheat a bit if the src is solid, though. PictOpOver
+		 * can use the constant blend color to sneak a second blend
+		 * source in.
 		 */
 		if (EVERGREENBlendOp[op].src_alpha &&
 		    (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
 		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
-		    RADEON_FALLBACK(("Component alpha not supported with source "
-				     "alpha and source value blending.\n"));
+		    if (pSrcPicture->pDrawable || op != 3)
+			RADEON_FALLBACK(("Component alpha not supported with source "
+					 "alpha and source value blending.\n"));
 		}
 	    }
 
@@ -1196,6 +1209,11 @@ static void EVERGREENSetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, u
 	} else {
 	    if (accel_state->component_alpha) {
 		if (accel_state->src_alpha) {
+		    /* required for PictOpOver */
+		    float cblend[4] = { pix_r / pix_a, pix_g / pix_a,
+					pix_b / pix_a, pix_a / pix_a };
+		    evergreen_set_blend_color(pScrn, cblend);
+
 		    if (PICT_FORMAT_A(format) == 0) {
 			pix_r = 1.0;
 			pix_g = 1.0;
diff --git a/src/evergreen_state.h b/src/evergreen_state.h
index 3ce2bf2..795d447 100644
--- a/src/evergreen_state.h
+++ b/src/evergreen_state.h
@@ -297,6 +297,8 @@ evergreen_start_3d(ScrnInfoPtr pScrn);
 void
 evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain);
 void
+evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color);
+void
 evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop);
 void
 evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp);

commit 94d0d14914a025525a0766669b556eaa6681def7
Author: Grigori Goronzy <greg@chown.ath.cx>
Date:   Thu Jul 18 16:06:23 2013 +0200

    EXA/evergreen/ni: fast solid pixmap support
    
    Solid pixmaps are currently implemented with scratch pixmaps, which
    is slow. This replaces the hack with a proper implementation. The
    Composite shader can now either sample a src/mask or use a constant
    value.

diff --git a/src/cayman_shader.c b/src/cayman_shader.c
index 2a6d6b1..59f4177 100644
--- a/src/cayman_shader.c
+++ b/src/cayman_shader.c
@@ -2495,17 +2495,44 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     int i = 0;
 
     /* 0 */
-    shader[i++] = CF_DWORD0(ADDR(3),
+    /* call interp-fetch-mask if boolean1 == true */
+    shader[i++] = CF_DWORD0(ADDR(12),
 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
-                            CF_CONST(0),
+                            CF_CONST(1),
                             COND(SQ_CF_COND_BOOL),
                             I_COUNT(0),
                             VALID_PIXEL_MODE(0),
                             CF_INST(SQ_CF_INST_CALL),
                             BARRIER(0));
+
     /* 1 */
-    shader[i++] = CF_DWORD0(ADDR(8),
+    /* call read-constant-mask if boolean1 == false */
+    shader[i++] = CF_DWORD0(ADDR(15),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(1),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            BARRIER(0));
+
+    /* 2 */
+    /* call interp-fetch-src if boolean0 == true */
+    shader[i++] = CF_DWORD0(ADDR(7),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            BARRIER(0));
+
+    /* 3 */
+    /* call read-constant-src if boolean0 == false */
+    shader[i++] = CF_DWORD0(ADDR(10),
 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
@@ -2514,7 +2541,41 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                             VALID_PIXEL_MODE(0),
                             CF_INST(SQ_CF_INST_CALL),
                             BARRIER(0));
-    /* 2 - end */
+    /* 4 */
+    /* src IN mask (GPR2 := GPR1 .* GPR0) */
+    shader[i++] = CF_ALU_DWORD0(ADDR(17),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 5 */
+    /* export pixel data */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+
+    /* 6 */
+    /* end of program */
     shader[i++] = CF_DWORD0(ADDR(0),
 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2524,33 +2585,53 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 			    VALID_PIXEL_MODE(0),
 			    CF_INST(SQ_CF_INST_END),
 			    BARRIER(1));
-    /* 3 - mask sub */
-    shader[i++] = CF_ALU_DWORD0(ADDR(12),
+
+    /* subroutine interp-fetch-src */
+
+    /* 7 */
+    /* interpolate src */
+    shader[i++] = CF_ALU_DWORD0(ADDR(21),
 				KCACHE_BANK0(0),
 				KCACHE_BANK1(0),
 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
     shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
 				KCACHE_ADDR0(0),
 				KCACHE_ADDR1(0),
-				I_COUNT(8),
+				I_COUNT(4),
 				ALT_CONST(0),
 				CF_INST(SQ_CF_INST_ALU),
 				WHOLE_QUAD_MODE(0),
 				BARRIER(1));
 
-    /* 4 */
-    shader[i++] = CF_DWORD0(ADDR(28),
+    /* 8 */
+    /* texture fetch src into GPR0 */
+    shader[i++] = CF_DWORD0(ADDR(26),
 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
 			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(2),
+			    I_COUNT(1),
 			    VALID_PIXEL_MODE(0),
 			    CF_INST(SQ_CF_INST_TC),
 			    BARRIER(1));
 
-    /* 5 */
-    shader[i++] = CF_ALU_DWORD0(ADDR(20),
+    /* 9 */
+    /* return */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    BARRIER(0));
+
+    /* subroutine read-constant-src */
+
+    /* 10 */
+    /* read constants into GPR0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(28),
 				KCACHE_BANK0(0),
 				KCACHE_BANK1(0),
 				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
@@ -2558,29 +2639,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 				KCACHE_ADDR0(0),
 				KCACHE_ADDR1(0),
 				I_COUNT(4),
-				ALT_CONST(0),
+				ALT_CONST(1),
 				CF_INST(SQ_CF_INST_ALU),
 				WHOLE_QUAD_MODE(0),
 				BARRIER(1));
 
-    /* 6 */
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-					  TYPE(SQ_EXPORT_PIXEL),
-					  RW_GPR(2),
-					  RW_REL(ABSOLUTE),
-					  INDEX_GPR(0),
-					  ELEM_SIZE(1));
-
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					       SRC_SEL_Y(SQ_SEL_Y),
-					       SRC_SEL_Z(SQ_SEL_Z),
-					       SRC_SEL_W(SQ_SEL_W),
-					       BURST_COUNT(1),
-					       VALID_PIXEL_MODE(0),
-					       CF_INST(SQ_CF_INST_EXPORT_DONE),
-					       MARK(0),
-					       BARRIER(1));
-    /* 7 */
+    /* 11 */
+    /* return */
     shader[i++] = CF_DWORD0(ADDR(0),
 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2589,10 +2654,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 			    I_COUNT(0),
 			    VALID_PIXEL_MODE(0),
 			    CF_INST(SQ_CF_INST_RETURN),
-			    BARRIER(1));
+			    BARRIER(0));
 
-    /* 8 - non-mask sub */
-    shader[i++] = CF_ALU_DWORD0(ADDR(24),
+    /* subroutine interp-fetch-mask */
+
+    /* 12 */
+    /* interpolate mask */
+    shader[i++] = CF_ALU_DWORD0(ADDR(32),
 				KCACHE_BANK0(0),
 				KCACHE_BANK1(0),
 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -2604,8 +2672,10 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 				CF_INST(SQ_CF_INST_ALU),
 				WHOLE_QUAD_MODE(0),
 				BARRIER(1));
-    /* 9 */
-    shader[i++] = CF_DWORD0(ADDR(32),
+
+    /* 13 */
+    /* texture fetch mask into GPR1 */
+    shader[i++] = CF_DWORD0(ADDR(36),
 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
@@ -2615,24 +2685,37 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 			    CF_INST(SQ_CF_INST_TC),
 			    BARRIER(1));
 
-    /* 10 */
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-					  TYPE(SQ_EXPORT_PIXEL),
-					  RW_GPR(0),
-					  RW_REL(ABSOLUTE),
-					  INDEX_GPR(0),
-					  ELEM_SIZE(1));
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					       SRC_SEL_Y(SQ_SEL_Y),
-					       SRC_SEL_Z(SQ_SEL_Z),
-					       SRC_SEL_W(SQ_SEL_W),
-					       BURST_COUNT(1),
-					       VALID_PIXEL_MODE(0),
-					       CF_INST(SQ_CF_INST_EXPORT_DONE),
-					       MARK(0),
-					       BARRIER(1));
+    /* 14 */
+    /* return */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    BARRIER(0));
 
-    /* 11 */
+    /* subroutine read-constant-src */
+
+    /* 15 */
+    /* read constants into GPR1 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(38),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(1),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 16 */
+    /* return */
     shader[i++] = CF_DWORD0(ADDR(0),
 			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2641,18 +2724,21 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 			    I_COUNT(0),
 			    VALID_PIXEL_MODE(0),
 			    CF_INST(SQ_CF_INST_RETURN),
-			    BARRIER(1));
+			    BARRIER(0));
+
+    /* ALU clauses */


Reply to: