[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

xserver-xorg-video-ati: Changes to 'upstream-experimental'



 COPYING                           |  133 +++
 configure.ac                      |  111 --
 man/Makefile.am                   |   22 
 man/radeon.man                    |    2 
 src/AtomBios/CD_Operations.c      |    9 
 src/AtomBios/includes/atombios.h  |   92 ++
 src/Makefile.am                   |   32 
 src/ati.c                         |    2 
 src/ati_pciids_gen.h              |   10 
 src/atombios_crtc.c               |  410 +++++-----
 src/atombios_output.c             |  361 ++++-----
 src/drmmode_display.c             |  164 +++-
 src/drmmode_display.h             |   16 
 src/legacy_crtc.c                 |   28 
 src/legacy_output.c               |   11 
 src/local_xf86Rename.h            |   23 
 src/pcidb/ati_pciids.csv          |   14 
 src/r600_exa.c                    | 1435 +++++++++++++++-----------------------
 src/r600_shader.c                 |  862 +++++++++++-----------
 src/r600_shader.h                 |    1 
 src/r600_state.h                  |   30 
 src/r600_textured_videofuncs.c    |  231 ++----
 src/r6xx_accel.c                  |  556 +++++---------
 src/radeon.h                      |   52 -
 src/radeon_accel.c                |   22 
 src/radeon_accelfuncs.c           |    2 
 src/radeon_atombios.c             |   89 +-
 src/radeon_atombios.h             |    2 
 src/radeon_bios.c                 |   19 
 src/radeon_chipinfo_gen.h         |   10 
 src/radeon_chipset_gen.h          |   14 
 src/radeon_commonfuncs.c          |   15 
 src/radeon_crtc.c                 |   67 -
 src/radeon_cursor.c               |   13 
 src/radeon_dri.c                  |  135 +--
 src/radeon_dri.h                  |    4 
 src/radeon_dri2.c                 |  594 ++++++++++++++-
 src/radeon_dri2.h                 |    6 
 src/radeon_driver.c               |  104 +-
 src/radeon_exa.c                  |  147 ++-
 src/radeon_exa_funcs.c            |  120 +--
 src/radeon_exa_render.c           |   54 +
 src/radeon_exa_shared.c           |  237 ++++++
 src/radeon_exa_shared.h           |   80 ++
 src/radeon_kms.c                  |  187 ++++
 src/radeon_output.c               |  101 ++
 src/radeon_pci_chipset_gen.h      |   10 
 src/radeon_pci_device_match_gen.h |   10 
 src/radeon_probe.c                |    8 
 src/radeon_probe.h                |    9 
 src/radeon_textured_video.c       |    6 
 src/radeon_textured_videofuncs.c  |  504 +++++++++----
 src/radeon_vbo.c                  |   12 
 src/radeon_vbo.h                  |   13 
 src/radeon_video.c                |   54 -
 src/radeon_video.h                |    2 
 src/radeon_vip.c                  |    2 
 src/theatre200.c                  |   16 
 src/theatre_detect.c              |    4 
 59 files changed, 4271 insertions(+), 3008 deletions(-)

New commits:
commit cc5005af61f45a3552f7358dc5aa711e42f5af54
Author: Alex Deucher <alexdeucher@gmail.com>
Date:   Mon Sep 27 18:20:53 2010 -0400

    bump version for release

diff --git a/configure.ac b/configure.ac
index decc46f..8625709 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-ati],
-        [6.13.99],
+        [6.13.2],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-ati])
 

commit 7f8820fcec8c90bf2f823170bd08a23e8b4ff7af
Author: Michael Cree <mcree@orcon.net.nz>
Date:   Mon Sep 27 13:39:12 2010 -0400

    Fix some unaligned 32bit accesses in the AtomBios code.
    
    On the Alpha architecture unaligned 32bit accesses incur a software
    trap to the kernel and pollute the kernel logs.  Fixed by use of the
    ldl_u() interface.
    
    Signed-off-by: Michael Cree <mcree@orcon.net.nz>

diff --git a/src/AtomBios/CD_Operations.c b/src/AtomBios/CD_Operations.c
index 9d399d5..f8b47e3 100644
--- a/src/AtomBios/CD_Operations.c
+++ b/src/AtomBios/CD_Operations.c
@@ -367,7 +367,7 @@ UINT32 GetParametersPS(PARSER_TEMP_DATA STACK_BASED *	pParserTempData)
     UINT32 data;
     pParserTempData->Index=*pParserTempData->pWorkingTableData->IP;
     pParserTempData->pWorkingTableData->IP+=sizeof(UINT8);
-    data = UINT32LE_TO_CPU(*(pParserTempData->pDeviceData->pParameterSpace+pParserTempData->Index));
+    data = UINT32LE_TO_CPU(ldl_u(pParserTempData->pDeviceData->pParameterSpace+pParserTempData->Index));
     return data;
 }
 
@@ -430,7 +430,7 @@ UINT32 GetParametersIndirect(PARSER_TEMP_DATA STACK_BASED *	pParserTempData)
 
     pParserTempData->Index=UINT16LE_TO_CPU(ldw_u((uint16_t *)pParserTempData->pWorkingTableData->IP));
     pParserTempData->pWorkingTableData->IP+=sizeof(UINT16);
-    ret = UINT32LE_TO_CPU(*(UINT32*)(RELATIVE_TO_BIOS_IMAGE(pParserTempData->Index)+pParserTempData->CurrentDataBlock));
+    ret = UINT32LE_TO_CPU(ldl_u((UINT32*)(RELATIVE_TO_BIOS_IMAGE(pParserTempData->Index)+pParserTempData->CurrentDataBlock)));
     return ret;
 }
 
@@ -453,7 +453,7 @@ UINT32 GetParametersDirect16(PARSER_TEMP_DATA STACK_BASED *	pParserTempData)
 UINT32 GetParametersDirect32(PARSER_TEMP_DATA STACK_BASED *	pParserTempData)
 {
     pParserTempData->CD_Mask.SrcAlignment=alignmentDword;
-    pParserTempData->Index=UINT32LE_TO_CPU(*(UINT32*)pParserTempData->pWorkingTableData->IP);
+    pParserTempData->Index=UINT32LE_TO_CPU(ldl_u((UINT32*)pParserTempData->pWorkingTableData->IP));
     pParserTempData->pWorkingTableData->IP+=sizeof(UINT32);
     return pParserTempData->Index;
 }

commit c4f834cdfbe96aa47ac5fb039f9dd7aa9730c8a3
Author: Nicolas Reinecke <nr@das-labor.org>
Date:   Mon Sep 27 13:33:55 2010 -0400

    radeon: Convert remaining x(c)alloc/xfree to m/calloc/free.
    
    Fixes deprecation warnings missed out by
    f7a91ece264af9f3fd2fc18e99aefcda93ce9f5c

diff --git a/src/ati.c b/src/ati.c
index 387aaca..ea71e88 100644
--- a/src/ati.c
+++ b/src/ati.c
@@ -227,7 +227,7 @@ ati_gdev_subdriver(pointer options)
         }
     }
 
-    xfree(ATIGDevs);
+    free(ATIGDevs);
 
     /* load subdrivers as primary modules and only if they do not get loaded
      * from other device sections
diff --git a/src/theatre200.c b/src/theatre200.c
index 16b1840..1b2a46c 100644
--- a/src/theatre200.c
+++ b/src/theatre200.c
@@ -162,7 +162,7 @@ static int microc_load (char* micro_path, char* micro_type, struct rt200_microc_
 		{
 			int ret;
 			
-			curr_seg = (struct rt200_microc_seg*)Xalloc(sizeof(struct rt200_microc_seg));
+			curr_seg = (struct rt200_microc_seg*)malloc(sizeof(struct rt200_microc_seg));
 			if (curr_seg == NULL)
 			{
 				ERROR_0("Cannot allocate memory\n");
@@ -178,7 +178,7 @@ static int microc_load (char* micro_path, char* micro_type, struct rt200_microc_
 				goto fail_exit;
 			}
 
-			curr_seg->data = (unsigned char*)Xalloc(curr_seg->num_bytes);
+			curr_seg->data = (unsigned char*)malloc(curr_seg->num_bytes);
 			if (curr_seg->data == NULL)
 			{
 				ERROR_0("cannot allocate memory\n");
@@ -241,7 +241,7 @@ static int microc_load (char* micro_path, char* micro_type, struct rt200_microc_
 
 		for (i = 0; i < microc_headp->num_seg; i++)
 		{
-			curr_seg = (struct rt200_microc_seg*)Xalloc(sizeof(struct rt200_microc_seg));
+			curr_seg = (struct rt200_microc_seg*)malloc(sizeof(struct rt200_microc_seg));
 			if (curr_seg == NULL)
 			{
 				ERROR_0("Cannot allocate memory\n");
@@ -262,7 +262,7 @@ static int microc_load (char* micro_path, char* micro_type, struct rt200_microc_
 				goto fail_exit;
 			}
 								
-			curr_seg->data = (unsigned char*)Xalloc(curr_seg->num_bytes);
+			curr_seg->data = (unsigned char*)malloc(curr_seg->num_bytes);
 			if (curr_seg->data == NULL)
 			{
 				ERROR_0("cannot allocate memory\n");
diff --git a/src/theatre_detect.c b/src/theatre_detect.c
index 7e7f813..44cd9fa 100644
--- a/src/theatre_detect.c
+++ b/src/theatre_detect.c
@@ -79,7 +79,7 @@ _X_EXPORT TheatrePtr DetectTheatre(GENERIC_BUS_Ptr b)
    return NULL;
    }
    
-   t = xcalloc(1,sizeof(TheatreRec));
+   t = calloc(1,sizeof(TheatreRec));
    t->VIP = b;
    t->theatre_num = -1;
    t->mode=MODE_UNINITIALIZED;
@@ -113,7 +113,7 @@ _X_EXPORT TheatrePtr DetectTheatre(GENERIC_BUS_Ptr b)
 
    if(t->theatre_num < 0)
    {
-   xfree(t);
+   free(t);
    return NULL;
    }
 

commit f8fb9312d791af1f77020e8c2d35bb30841ed9aa
Author: Karl Tomlinson <karlt+@karlt.net>
Date:   Sun Aug 22 22:46:33 2010 +1200

    RADEONPrepareAccess_CS: fallback to DFS when pixmap is in VRAM
    
    This avoids costly CPU VRAM reads and lets EXA manage a system memory cache
    of the portions of pixmaps needed for unaccelerated operations.
    
    https://bugs.freedesktop.org/show_bug.cgi?id=27139

diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index bf7cb88..c0f9dc9 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -284,12 +284,21 @@ Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index)
     ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_exa_pixmap_priv *driver_priv;
+    uint32_t possible_domains = ~0U;
+    uint32_t current_domain = 0;
+#ifdef EXA_MIXED_PIXMAPS
+    Bool can_fail = !(pPix->drawable.bitsPerPixel < 8) &&
+	pPix != pScreen->GetScreenPixmap(pScreen) &&
+        (info->accel_state->exa->flags & EXA_MIXED_PIXMAPS);
+#else
+    Bool can_fail = FALSE;
+#endif
+    Bool flush = FALSE;
     int ret;
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN
     /* May need to handle byte swapping in DownloadFrom/UploadToScreen */
-    if (pPix->drawable.bitsPerPixel > 8 &&
-	pPix != pScreen->GetScreenPixmap(pScreen))
+    if (can_fail && pPix->drawable.bitsPerPixel > 8)
 	return FALSE;
 #endif
 
@@ -298,7 +307,28 @@ Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index)
       return FALSE;
 
     /* if we have more refs than just the BO then flush */
-    if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs))
+    if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
+	flush = TRUE;
+
+	if (can_fail) {
+	    possible_domains = radeon_bo_get_src_domain(driver_priv->bo);
+	    if (possible_domains == RADEON_GEM_DOMAIN_VRAM)
+		return FALSE; /* use DownloadFromScreen */
+	}
+    }
+
+    /* if the BO might end up in VRAM, prefer DownloadFromScreen */
+    if (can_fail && (possible_domains & RADEON_GEM_DOMAIN_VRAM)) {
+	radeon_bo_is_busy(driver_priv->bo, &current_domain);
+
+	if (current_domain & possible_domains) {
+	    if (current_domain == RADEON_GEM_DOMAIN_VRAM)
+		return FALSE;
+	} else if (possible_domains & RADEON_GEM_DOMAIN_VRAM)
+	    return FALSE;
+    }
+
+    if (flush)
         radeon_cs_flush_indirect(pScrn);
     
     /* flush IB */

commit 35c4ff936601ee083f51510a5192fb97d622a483
Author: Karl Tomlinson <karlt+@karlt.net>
Date:   Sun Aug 22 22:28:06 2010 +1200

    radeon: complete UTS and DFS even when a scratch BO is not necessary
    
    Turns on the big-endian paths even for little-endian systems, and adds
    similar paths to the r6xx/r7xx functions.
    
    This makes UTS and DFS reliable, which will let PrepareAccess (with
    mixed pixmaps) choose to fail based on whether the pixmap is in VRAM
    (to avoid CPU reads).

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 9b7a0c9..8544034 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1772,13 +1772,18 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_exa_pixmap_priv *driver_priv;
-    struct radeon_bo *scratch;
+    struct radeon_bo *scratch = NULL;
+    struct radeon_bo *copy_dst;
+    unsigned char *dst;
     unsigned size;
     uint32_t dst_domain;
     int bpp = pDst->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+    uint32_t copy_pitch;
     uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
     uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
+    int ret;
+    Bool flush = TRUE;
     Bool r;
     int i;
     struct r600_accel_object src_obj, dst_obj;
@@ -1788,15 +1793,19 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
 
     driver_priv = exaGetPixmapDriverPrivate(pDst);
 
-    /* If we know the BO won't be busy, don't bother */
-    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
-	!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
-	return FALSE;
+    /* If we know the BO won't be busy, don't bother with a scratch */
+    copy_dst = driver_priv->bo;
+    copy_pitch = pDst->devKind;
+    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
+	flush = FALSE;
+	if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+	    goto copy;
+    }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-	return FALSE;
+	goto copy;
     }
 
     src_obj.pitch = src_pitch_hw;
@@ -1821,33 +1830,45 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
 			   &dst_obj,
 			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
 			   3, 0xffffffff)) {
-        r = FALSE;
-        goto out;
+        goto copy;
     }
+    copy_dst = scratch;
+    copy_pitch = scratch_pitch;
+    flush = FALSE;
+
+copy:
+    if (flush)
+	radeon_cs_flush_indirect(pScrn);
 
-    r = radeon_bo_map(scratch, 0);
-    if (r) {
+    ret = radeon_bo_map(copy_dst, 0);
+    if (ret) {
         r = FALSE;
         goto out;
     }
     r = TRUE;
     size = w * bpp / 8;
+    dst = copy_dst->ptr;
+    if (copy_dst == driver_priv->bo)
+	dst += y * copy_pitch + x * bpp / 8;
     for (i = 0; i < h; i++) {
-        memcpy(scratch->ptr + i * scratch_pitch, src, size);
+        memcpy(dst + i * copy_pitch, src, size);
         src += src_pitch;
     }
-    radeon_bo_unmap(scratch);
+    radeon_bo_unmap(copy_dst);
 
-    if (info->accel_state->vsync)
-	RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
+    if (copy_dst == scratch) {
+	if (info->accel_state->vsync)
+	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
 
-    /* blit from gart to vram */
-    R600DoPrepareCopy(pScrn);
-    R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
-    R600DoCopyVline(pDst);
+	/* blit from gart to vram */
+	R600DoPrepareCopy(pScrn);
+	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
+	R600DoCopyVline(pDst);
+    }
 
 out:
-    radeon_bo_unref(scratch);
+    if (scratch)
+	radeon_bo_unref(scratch);
     return r;
 }
 
@@ -1859,13 +1880,17 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_exa_pixmap_priv *driver_priv;
-    struct radeon_bo *scratch;
+    struct radeon_bo *scratch = NULL;
+    struct radeon_bo *copy_src;
     unsigned size;
     uint32_t src_domain = 0;
     int bpp = pSrc->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+    uint32_t copy_pitch;
     uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
     uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
+    int ret;
+    Bool flush = FALSE;
     Bool r;
     struct r600_accel_object src_obj, dst_obj;
 
@@ -1874,24 +1899,28 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
 
     driver_priv = exaGetPixmapDriverPrivate(pSrc);
 
-    /* If we know the BO won't end up in VRAM anyway, don't bother */
+    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
+    copy_src = driver_priv->bo;
+    copy_pitch = pSrc->devKind;
     if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
 	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
 	    src_domain = 0;
+	else /* A write may be scheduled */
+	    flush = TRUE;
     }
 
     if (!src_domain)
 	radeon_bo_is_busy(driver_priv->bo, &src_domain);
 
     if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
-	return FALSE;
+	goto copy;
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-	return FALSE;
+	goto copy;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
@@ -1900,10 +1929,9 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
     accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
     radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
-    r = radeon_cs_space_check(info->cs);
-    if (r) {
-        r = FALSE;
-        goto out;
+    ret = radeon_cs_space_check(info->cs);
+    if (ret) {
+        goto copy;
     }
 
     src_obj.pitch = src_pitch_hw;
@@ -1928,34 +1956,42 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
 			   &dst_obj,
 			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
 			   3, 0xffffffff)) {
-        r = FALSE;
-        goto out;
+        goto copy;
     }
 
     /* blit from vram to gart */
     R600DoPrepareCopy(pScrn);
     R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
     R600DoCopy(pScrn);
+    copy_src = scratch;
+    copy_pitch = scratch_pitch;
+    flush = TRUE;
 
-    if (info->cs)
+copy:
+    if (flush && info->cs)
 	radeon_cs_flush_indirect(pScrn);
 
-    r = radeon_bo_map(scratch, 0);
-    if (r) {
+    ret = radeon_bo_map(copy_src, 0);
+    if (ret) {
+	ErrorF("failed to map pixmap: %d\n", ret);
         r = FALSE;
         goto out;
     }
     r = TRUE;
     w *= bpp / 8;
-    size = 0;
+    if (copy_src == driver_priv->bo)
+	size = y * copy_pitch + x * bpp / 8;
+    else
+	size = 0;
     while (h--) {
-        memcpy(dst, scratch->ptr + size, w);
-        size += scratch_pitch;
+        memcpy(dst, copy_src->ptr + size, w);
+        size += copy_pitch;
         dst += dst_pitch;
     }
-    radeon_bo_unmap(scratch);
+    radeon_bo_unmap(copy_src);
 out:
-    radeon_bo_unref(scratch);
+    if (scratch)
+	radeon_bo_unref(scratch);
     return r;
 }
 #endif
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index f629c8e..e80a996 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -471,9 +471,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     uint32_t copy_pitch;
     uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
     int ret;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     Bool flush = TRUE;
-#endif
     Bool r;
     int i;
 
@@ -495,61 +493,34 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     }
 #endif
 
-    /* If we know the BO won't be busy, don't bother */
-#if X_BYTE_ORDER == X_BIG_ENDIAN
+    /* If we know the BO won't be busy, don't bother with a scratch */
     copy_dst = driver_priv->bo;
     copy_pitch = pDst->devKind;
-#endif
     if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
 	flush = FALSE;
-#endif
-	if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain)) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	    /* Can't return FALSE here if we need to swap bytes */
-	    if (swap != RADEON_HOST_DATA_SWAP_NONE &&
-		driver_priv->bo != info->front_bo) {
-		goto copy;
-	    }
-#endif
-	    return FALSE;
-	}
+	if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+	    goto copy;
     }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	if (swap != RADEON_HOST_DATA_SWAP_NONE &&
-	    driver_priv->bo != info->front_bo) {
-	    goto copy;
-	}
-#endif
-	return FALSE;
+	goto copy;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
     radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0);
     ret = radeon_cs_space_check(info->cs);
     if (ret) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	if (swap != RADEON_HOST_DATA_SWAP_NONE &&
-	    driver_priv->bo != info->front_bo) {
-	    goto copy;
-	}
-#endif
-        r = FALSE;
-        goto out;
+	goto copy;
     }
     copy_dst = scratch;
     copy_pitch = scratch_pitch;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     flush = FALSE;
 
 copy:
     if (flush)
 	radeon_cs_flush_indirect(pScrn);
-#endif
 
     ret = radeon_bo_map(copy_dst, 0);
     if (ret) {
@@ -600,9 +571,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     uint32_t copy_pitch;
     uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
     int ret;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     Bool flush = FALSE;
-#endif
     Bool r;
 
     if (bpp < 8)
@@ -623,57 +592,36 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     }
 #endif
 
-    /* If we know the BO won't end up in VRAM anyway, don't bother */
-#if X_BYTE_ORDER == X_BIG_ENDIAN
+    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
     copy_src = driver_priv->bo;
     copy_pitch = pSrc->devKind;
-#endif
     if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
 	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
 	    src_domain = 0;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
 	else /* A write may be scheduled */
 	    flush = TRUE;
-#endif
     }
 
     if (!src_domain)
 	radeon_bo_is_busy(driver_priv->bo, &src_domain);
 
     if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	/* Can't return FALSE here if we need to swap bytes */
-	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
-	    goto copy;
-	}
-#endif
-	return FALSE;
+	goto copy;
     }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
-	    goto copy;
-	}
-#endif
-	return FALSE;
+	goto copy;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
     radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT);
     ret = radeon_cs_space_check(info->cs);
     if (ret) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
-	    goto copy;
-	}
-#endif
-        r = FALSE;
-        goto out;
+	goto copy;
     }
     RADEONGetDatatypeBpp(pSrc->drawable.bitsPerPixel, &datatype);
     RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset);
@@ -685,12 +633,10 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
                     RADEON_GEM_DOMAIN_GTT);
     copy_src = scratch;
     copy_pitch = scratch_pitch;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     flush = TRUE;
 
 copy:
     if (flush)
-#endif
 	FLUSH_RING();
 
     ret = radeon_bo_map(copy_src, 0);

commit d46381a3a6bf10903803f5acaa7aa0ce06373b96
Author: Karl Tomlinson <karlt+@karlt.net>
Date:   Sun Aug 22 21:02:45 2010 +1200

    radeon: complete big endian UTS and DFS even when scratch allocation fails.
    
    On big endian systems, PrepareAccess will fail when byte-swapping is
    required so UploadToScreen and DownloadFromScreen cannot rely on
    fallback to PrepareAccess.
    
    When scratch BO space allocation fails, this patch merely adds simple
    fallback to direct CPU access without any GPU blit.  This sometimes
    requires a CS flush even in UploadToScreen.
    (No allocation retry after a flush is added here.)

diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index d02c787..f629c8e 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -459,7 +459,8 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     ScreenPtr pScreen = pDst->drawable.pScreen;
     RINFO_FROM_SCREEN(pScreen);
     struct radeon_exa_pixmap_priv *driver_priv;
-    struct radeon_bo *scratch;
+    struct radeon_bo *scratch = NULL;
+    struct radeon_bo *copy_dst;
     unsigned char *dst;
     unsigned size;
     uint32_t datatype = 0;
@@ -467,7 +468,12 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     uint32_t dst_pitch_offset;
     unsigned bpp = pDst->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
+    uint32_t copy_pitch;
     uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
+    int ret;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    Bool flush = TRUE;
+#endif
     Bool r;
     int i;
 
@@ -490,54 +496,78 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
 #endif
 
     /* If we know the BO won't be busy, don't bother */
-    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
-	!radeon_bo_is_busy(driver_priv->bo, &dst_domain)) {
 #if X_BYTE_ORDER == X_BIG_ENDIAN
-	/* Can't return FALSE here if we need to swap bytes */
-	if (swap != RADEON_HOST_DATA_SWAP_NONE &&
-	    driver_priv->bo != info->front_bo) {
-	    scratch = driver_priv->bo;
-	    scratch_pitch = pDst->devKind;
-	    goto copy;
-	}
+    copy_dst = driver_priv->bo;
+    copy_pitch = pDst->devKind;
 #endif
-	return FALSE;
+    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	flush = FALSE;
+#endif
+	if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain)) {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	    /* Can't return FALSE here if we need to swap bytes */
+	    if (swap != RADEON_HOST_DATA_SWAP_NONE &&
+		driver_priv->bo != info->front_bo) {
+		goto copy;
+	    }
+#endif
+	    return FALSE;
+	}
     }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	if (swap != RADEON_HOST_DATA_SWAP_NONE &&
+	    driver_priv->bo != info->front_bo) {
+	    goto copy;
+	}
+#endif
 	return FALSE;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
     radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0);
-    r = radeon_cs_space_check(info->cs);
-    if (r) {
+    ret = radeon_cs_space_check(info->cs);
+    if (ret) {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	if (swap != RADEON_HOST_DATA_SWAP_NONE &&
+	    driver_priv->bo != info->front_bo) {
+	    goto copy;
+	}
+#endif
         r = FALSE;
         goto out;
     }
-
+    copy_dst = scratch;
+    copy_pitch = scratch_pitch;
 #if X_BYTE_ORDER == X_BIG_ENDIAN
+    flush = FALSE;
+
 copy:
+    if (flush)
+	radeon_cs_flush_indirect(pScrn);
 #endif
-    r = radeon_bo_map(scratch, 0);
-    if (r) {
+
+    ret = radeon_bo_map(copy_dst, 0);
+    if (ret) {
         r = FALSE;
         goto out;
     }
     r = TRUE;
     size = w * bpp / 8;
-    dst = scratch->ptr;
-    if (scratch == driver_priv->bo)
-	dst += y * scratch_pitch + x * bpp / 8;
+    dst = copy_dst->ptr;
+    if (copy_dst == driver_priv->bo)
+	dst += y * copy_pitch + x * bpp / 8;
     for (i = 0; i < h; i++) {
-        RADEONCopySwap(dst + i * scratch_pitch, (uint8_t*)src, size, swap);
+        RADEONCopySwap(dst + i * copy_pitch, (uint8_t*)src, size, swap);
         src += src_pitch;
     }
-    radeon_bo_unmap(scratch);
+    radeon_bo_unmap(copy_dst);
 
-    if (scratch != driver_priv->bo) {
+    if (copy_dst == scratch) {
 	RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype);
 	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset);
 	ACCEL_PREAMBLE();
@@ -548,7 +578,7 @@ copy:
     }
 
 out:
-    if (scratch != driver_priv->bo)
+    if (scratch)
 	radeon_bo_unref(scratch);
     return r;
 }
@@ -559,14 +589,17 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
 {
     RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
     struct radeon_exa_pixmap_priv *driver_priv;
-    struct radeon_bo *scratch;
+    struct radeon_bo *scratch = NULL;
+    struct radeon_bo *copy_src;
     unsigned size;
     uint32_t datatype = 0;
     uint32_t src_domain = 0;
     uint32_t src_pitch_offset;
     unsigned bpp = pSrc->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
+    uint32_t copy_pitch;
     uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
+    int ret;
 #if X_BYTE_ORDER == X_BIG_ENDIAN
     Bool flush = FALSE;
 #endif
@@ -591,6 +624,10 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
 #endif
 
     /* If we know the BO won't end up in VRAM anyway, don't bother */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    copy_src = driver_priv->bo;
+    copy_pitch = pSrc->devKind;
+#endif
     if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
@@ -609,8 +646,6 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
 #if X_BYTE_ORDER == X_BIG_ENDIAN
 	/* Can't return FALSE here if we need to swap bytes */
 	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
-	    scratch = driver_priv->bo;
-	    scratch_pitch = pSrc->devKind;
 	    goto copy;
 	}
 #endif
@@ -620,13 +655,23 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
+	    goto copy;
+	}
+#endif
 	return FALSE;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
     radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT);
-    r = radeon_cs_space_check(info->cs);
-    if (r) {
+    ret = radeon_cs_space_check(info->cs);
+    if (ret) {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
+	    goto copy;
+	}
+#endif
         r = FALSE;
         goto out;
     }
@@ -638,6 +683,8 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
                     scratch_pitch << 16, x, y, 0, 0, w, h,
                     RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT,
                     RADEON_GEM_DOMAIN_GTT);
+    copy_src = scratch;
+    copy_pitch = scratch_pitch;
 #if X_BYTE_ORDER == X_BIG_ENDIAN
     flush = TRUE;
 
@@ -646,25 +693,26 @@ copy:
 #endif
 	FLUSH_RING();
 
-    r = radeon_bo_map(scratch, 0);
-    if (r) {
+    ret = radeon_bo_map(copy_src, 0);
+    if (ret) {
+	ErrorF("failed to map pixmap: %d\n", ret);
         r = FALSE;
         goto out;
     }
     r = TRUE;
     w *= bpp / 8;
-    if (scratch == driver_priv->bo)
-	size = y * scratch_pitch + x * bpp / 8;
+    if (copy_src == driver_priv->bo)
+	size = y * copy_pitch + x * bpp / 8;
     else
 	size = 0;
     while (h--) {
-        RADEONCopySwap((uint8_t*)dst, scratch->ptr + size, w, swap);
-        size += scratch_pitch;
+        RADEONCopySwap((uint8_t*)dst, copy_src->ptr + size, w, swap);
+        size += copy_pitch;
         dst += dst_pitch;
     }
-    radeon_bo_unmap(scratch);
+    radeon_bo_unmap(copy_src);
 out:
-    if (scratch != driver_priv->bo)
+    if (scratch)
 	radeon_bo_unref(scratch);
     return r;
 }

commit 4ced4e1eff67946e306c0c67c9ed59dd5f3c4ba9
Author: Karl Tomlinson <karlt+@karlt.net>
Date:   Sun Aug 22 20:04:42 2010 +1200

    RADEONDownloadFromScreenCS: flush CS writes before mapping BO for read
    
    If unflushed CS operations write to the pixmap BO, then these need to be
    flushed before mapping the BO for read.  This currently only affects big
    endian systems and only when the operation writes to the GTT domain.

diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index a82e416..d02c787 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -567,6 +567,9 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     unsigned bpp = pSrc->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
     uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    Bool flush = FALSE;
+#endif
     Bool r;
 
     if (bpp < 8)
@@ -593,6 +596,10 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
 	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
 	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
 	    src_domain = 0;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	else /* A write may be scheduled */
+	    flush = TRUE;
+#endif
     }
 
     if (!src_domain)
@@ -631,11 +638,14 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
                     scratch_pitch << 16, x, y, 0, 0, w, h,
                     RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT,
                     RADEON_GEM_DOMAIN_GTT);
-    FLUSH_RING();
-
 #if X_BYTE_ORDER == X_BIG_ENDIAN
+    flush = TRUE;
+
 copy:
+    if (flush)
 #endif
+	FLUSH_RING();
+
     r = radeon_bo_map(scratch, 0);
     if (r) {
         r = FALSE;


Reply to: