[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Reinstating GPU fixes to Xen flavour



Bastian (and debian-kernel@),

We are seeing a slow but steady trickle of reports of bug reports (both
to Debian and xen-devel) against the Xen featureset kernel from Squeeze
users due to failures starting X.

These issues are resolved by changes which you excluded from the last
pvops merge (e.g. those merged by bcf16b6b4f34), for example #601341,
#602418 and #604096.

I've been providing testing kernels with those changes reinstated and
users have been reporting success with it.

I think it would be a mistake to release Squeeze with such a user
visible issue which is so easily resolved.

I've attached the diff which I have been using for those kernels, it
precisely corresponds to the omitted changesets. FWIW the diffstat is:
 arch/x86/include/asm/pgtable.h       |    5 +++
 arch/x86/include/asm/pgtable_types.h |    5 ---
 drivers/char/agp/amd64-agp.c         |   31 ++++++++++++++++++
 drivers/char/agp/backend.c           |    9 ++++-
 drivers/char/agp/generic.c           |   58 +++++++++++++++++++++++++++++++----
 drivers/char/agp/intel-agp.c         |   42 +++++++++++++++++++++++--
 drivers/gpu/drm/ttm/ttm_bo_vm.c      |   29 ++++++++++++++++-
 drivers/gpu/drm/ttm/ttm_tt.c         |   25 +++++++++++++--
 drivers/video/fbmem.c                |    1 
 9 files changed, 186 insertions(+), 19 deletions(-)

As you can see from the patch all the functional changes are guarded by
"xen_pv_domain()" tests and in any case the patch would only be applied
to the Xen featureset. These fixes have been in the upstream xen.git
without issue for quite some time.

Although work is under way to solve this issue in a different way which
is suitable for upstreaming to LKML but there's no way that is going to
ready/stable/suitable for backporting Squeeze.

Please would you reconsider your stance on this. I can do the work to
reinstate the fixes but only if you are happy with me doing so.

Ian.
-- 
Ian Campbell
Current Noise: Rwake - The Lure Of Light

Anyway, my money is still on use strict vars . . .
		-- Larry Wall in <199710011704.KAA21395@wall.org>

To regenerate:

Generate branch debian-pvops as per pvops.patch

Generate branch debian-pvops as per debian-pvops but omit revert.

$ git diff debian-pvops..debian-pvops-gpu

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 863e1c2..088f079 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -76,6 +76,11 @@ extern struct list_head pgd_list;
 
 #endif	/* CONFIG_PARAVIRT */
 
+static inline pteval_t pte_flags(pte_t pte)
+{
+	return pte_val(pte) & PTE_FLAGS_MASK;
+}
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d1f4a76..a81b0ed 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -265,11 +265,6 @@ static inline pteval_t native_pte_val(pte_t pte)
 	return pte.pte;
 }
 
-static inline pteval_t pte_flags(pte_t pte)
-{
-	return native_pte_val(pte) & PTE_FLAGS_MASK;
-}
-
 #define pgprot_val(x)	((x).pgprot)
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index c496c8a..4064d95 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -18,6 +18,8 @@
 #include <asm/k8.h>
 #include <asm/gart.h>
 #include "agp.h"
+#include <xen/page.h>
+#include <asm/xen/page.h>
 
 /* NVIDIA K8 registers */
 #define NVIDIA_X86_64_0_APBASE		0x10
@@ -78,8 +80,21 @@ static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
+		phys_addr_t phys = page_to_phys(mem->pages[i]);
+		if (xen_pv_domain()) {
+			phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
+					page_to_pfn(mem->pages[i])));
+			if (phys != xen_phys) {
+				printk(KERN_ERR "Fixing up GART: (0x%lx->0x%lx)." \
+					" CODE UNTESTED!\n",
+					(unsigned long)phys,
+					(unsigned long)xen_phys);
+				WARN_ON_ONCE(phys != xen_phys);
+				phys = xen_phys;
+			}
+		}
 		tmp = agp_bridge->driver->mask_memory(agp_bridge,
-						      page_to_phys(mem->pages[i]),
+						      phys,
 						      mask_type);
 
 		BUG_ON(tmp & 0xffffff0000000ffcULL);
@@ -181,6 +196,20 @@ static int amd_8151_configure(void)
 	unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
 	int i;
 
+	if (xen_pv_domain()) {
+		phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
+				virt_to_pfn(agp_bridge->gatt_table_real)));
+		/* Future thoughts: Perhaps use the gatt_table_bus that
+		 * agp_generic_create_gatt_table has setup instead of
+		 * doing the virt_to_phys once more? */
+		if (gatt_bus != xen_phys) {
+			printk(KERN_ERR "Fixing up GATT: (0x%lx->0x%lx)." \
+					" CODE UNTESTED!\n", gatt_bus,
+					(unsigned long)xen_phys);
+			WARN_ON_ONCE(gatt_bus != xen_phys);
+			gatt_bus = xen_phys;
+		}
+	}
 	/* Configure AGP regs in each x86-64 host bridge. */
         for (i = 0; i < num_k8_northbridges; i++) {
 		agp_bridge->gart_bus_addr =
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index a56ca08..30fc4b6 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -38,6 +38,8 @@
 #include <linux/vmalloc.h>
 #include <asm/io.h>
 #include "agp.h"
+#include <xen/page.h>
+#include <asm/xen/page.h>
 
 /* Due to XFree86 brain-damage, we can't go to 1.0 until they
  * fix some real stupidity. It's only by chance we can bump
@@ -160,8 +162,13 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
 			}
 		} else {
 			bridge->scratch_page_dma = page_to_phys(page);
+			if (xen_pv_domain()) {
+				phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
+							page_to_pfn(page)));
+				if (bridge->scratch_page_dma != xen_phys)
+					bridge->scratch_page_dma = xen_phys;
+			}
 		}
-
 		bridge->scratch_page = bridge->driver->mask_memory(bridge,
 						   bridge->scratch_page_dma, 0);
 	}
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index c505439..2434c91 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -42,6 +42,8 @@
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include "agp.h"
+#include <xen/page.h>
+#include <asm/xen/page.h>
 
 __u32 *agp_gatt_table;
 int agp_memory_reserved;
@@ -1002,6 +1004,14 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
 		return -ENOMEM;
 	}
 	bridge->gatt_bus_addr = virt_to_phys(bridge->gatt_table_real);
+	/* KRW: virt_to_phys under Xen is not safe. */
+	if (xen_pv_domain()) {
+		/* Use back-door to get the "real" PFN. */
+		phys_addr_t pfn = virt_to_pfn(bridge->gatt_table_real);
+		phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(pfn));
+		if (bridge->gatt_bus_addr != xen_phys)
+			bridge->gatt_bus_addr = xen_phys;
+	}
 
 	/* AK: bogus, should encode addresses > 4GB */
 	for (i = 0; i < num_entries; i++) {
@@ -1141,8 +1151,17 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type)
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
+		phys_addr_t phys = page_to_phys(mem->pages[i]);
+
+		/* HACK: Via a back-door we get the bus address. */
+		if (xen_pv_domain()) {
+			phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
+					page_to_pfn(mem->pages[i])));
+			if (phys != xen_phys)
+				phys = xen_phys;
+		}
 		writel(bridge->driver->mask_memory(bridge,
-						   page_to_phys(mem->pages[i]),
+						   phys,
 						   mask_type),
 		       bridge->gatt_table+j);
 	}
@@ -1235,7 +1254,16 @@ int agp_generic_alloc_pages(struct agp_bridge_data *bridge, struct agp_memory *m
 	int i, ret = -ENOMEM;
 
 	for (i = 0; i < num_pages; i++) {
-		page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
+		if (xen_pv_domain()) {
+			void *addr;
+			dma_addr_t _d;
+
+			addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL);
+			if (!addr)
+				goto out;
+			page = virt_to_page(addr);
+		} else
+			page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
 		/* agp_free_memory() needs gart address */
 		if (page == NULL)
 			goto out;
@@ -1263,7 +1291,17 @@ struct page *agp_generic_alloc_page(struct agp_bridge_data *bridge)
 {
 	struct page * page;
 
-	page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
+	if (xen_pv_domain()) {
+		void *addr;
+		dma_addr_t _d;
+
+		addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL);
+		if (!addr)
+			return NULL;
+		page = virt_to_page(addr);
+	} else
+		page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
+
 	if (page == NULL)
 		return NULL;
 
@@ -1294,7 +1332,12 @@ void agp_generic_destroy_pages(struct agp_memory *mem)
 		unmap_page_from_agp(page);
 #endif
 		put_page(page);
-		__free_page(page);
+		if (xen_pv_domain()) {
+			void *addr = page_address(page);
+			dma_free_coherent(NULL, PAGE_SIZE, addr,
+					  virt_to_bus(addr));
+		} else 
+			__free_page(page);
 		atomic_dec(&agp_bridge->current_memory_agp);
 		mem->pages[i] = NULL;
 	}
@@ -1311,7 +1354,12 @@ void agp_generic_destroy_page(struct page *page, int flags)
 
 	if (flags & AGP_PAGE_DESTROY_FREE) {
 		put_page(page);
-		__free_page(page);
+		if (xen_pv_domain()) {
+			void *addr = page_address(page);
+			dma_free_coherent(NULL, PAGE_SIZE, addr,
+					  virt_to_bus(addr));
+		} else
+			__free_page(page);
 		atomic_dec(&agp_bridge->current_memory_agp);
 	}
 }
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 4d01d0e..7a62c3c 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -10,6 +10,8 @@
 #include <linux/agp_backend.h>
 #include <asm/smp.h>
 #include "agp.h"
+#include <xen/page.h>
+#include <asm/xen/page.h>
 
 /*
  * If we have Intel graphics, we're not going to have anything other than
@@ -300,8 +302,20 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem,
 	int i, j;
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
+		phys_addr_t phys = page_to_phys(mem->pages[i]);
+		if (xen_pv_domain()) {
+			phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
+					page_to_pfn(mem->pages[i])));
+			if (xen_phys != phys) {
+				printk(KERN_ERR "Compile kernel with " \
+					"CONFIG_DMAR to get rid of this " \
+					"warning!\n");
+				WARN_ON_ONCE(xen_phys != phys);
+				/* Fixup: */
+				phys = xen_phys;
+			}
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
-				page_to_phys(mem->pages[i]), mask_type),
+				phys, mask_type),
 		       intel_private.gtt+j);
 	}
 
@@ -491,8 +505,16 @@ static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start,
 		if (!mem->is_flushed)
 			global_cache_flush();
 		for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
+			phys_addr_t phys = page_to_phys(mem->pages[i]);
+			if (xen_pv_domain()) {
+				phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
+						page_to_pfn(mem->pages[i])));
+				/* Fixup: */
+				if (xen_phys != phys)
+					phys = xen_phys;
+			}
 			writel(agp_bridge->driver->mask_memory(agp_bridge,
-					page_to_phys(mem->pages[i]), mask_type),
+					phys, mask_type),
 			       intel_private.registers+I810_PTE_BASE+(j*4));
 		}
 		readl(intel_private.registers+I810_PTE_BASE+((j-1)*4));
@@ -565,6 +587,12 @@ static struct agp_memory *alloc_agpphysmem_i8xx(size_t pg_count, int type)
 	new->num_scratch_pages = pg_count;
 	new->type = AGP_PHYS_MEMORY;
 	new->physical = page_to_phys(new->pages[0]);
+	if (xen_pv_domain()) {
+		phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
+					page_to_pfn(new->pages[0])));
+		if (xen_phys != new->physical)
+			new->physical = xen_phys;
+	}
 	return new;
 }
 
@@ -1005,8 +1033,16 @@ static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start,
 		global_cache_flush();
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
+		phys_addr_t phys = page_to_phys(mem->pages[i]);
+		if (xen_pv_domain()) {
+			phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
+					page_to_pfn(mem->pages[i])));
+			/* Fixup: */
+			if (xen_phys != phys)
+				phys = xen_phys;
+		}
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
-				page_to_phys(mem->pages[i]), mask_type),
+				phys, mask_type),
 		       intel_private.registers+I810_PTE_BASE+(j*4));
 	}
 	readl(intel_private.registers+I810_PTE_BASE+((j-1)*4));
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 3dc8d6b..e3555bf 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -87,6 +87,9 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	bool is_iomem;
 	unsigned long address = (unsigned long)vmf->virtual_address;
 	int retval = VM_FAULT_NOPAGE;
+	bool vm_io = (vma->vm_flags & VM_IO) && VM_IO;
+	bool pte_iomap = (pgprot_val(vma->vm_page_prot) & _PAGE_IOMAP)
+			&& _PAGE_IOMAP;
 
 	/*
 	 * Work around locking order reversal in fault / nopfn
@@ -158,11 +161,30 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (is_iomem) {
 		vma->vm_page_prot = ttm_io_prot(bo->mem.placement,
 						vma->vm_page_prot);
+		if (!vm_io || !pte_iomap) {
+			vma->vm_flags |= VM_IO;
+			pgprot_val(vma->vm_page_prot) |= _PAGE_IOMAP;
+		}
 	} else {
 		ttm = bo->ttm;
 		vma->vm_page_prot = (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
 		    vm_get_page_prot(vma->vm_flags) :
 		    ttm_io_prot(bo->mem.placement, vma->vm_page_prot);
+		/*
+		 * During PCI suspend the graphic cards purge their VRAM and
+		 * move their graphic objects to the TT. They also unmap all
+		 * of the objects, meaning that when an user application is
+		 * unfrozen it will re-fault  and call here.
+		 *
+		 * What this means is that the VMA for the graphic object might
+		 * have been set for VRAM TTM but now it is with the TT
+		 * (normal RAM) meaning that the vma->vm_flags could be
+		 * inappropiate (say, VM_IO on TT - no good).
+		 */
+		if (vm_io || pte_iomap) {
+			vma->vm_flags &= ~VM_IO;
+			pgprot_val(vma->vm_page_prot) &= ~_PAGE_IOMAP;
+		}
 	}
 
 	/*
@@ -239,6 +261,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 {
 	struct ttm_bo_driver *driver;
 	struct ttm_buffer_object *bo;
+	struct ttm_mem_type_manager *man;
 	int ret;
 
 	read_lock(&bdev->vm_lock);
@@ -271,7 +294,10 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 	 */
 
 	vma->vm_private_data = bo;
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP | VM_DONTEXPAND;
+	man = &bdev->man[bo->mem.mem_type];
+	if (man->flags & TTM_MEMTYPE_FLAG_NEEDS_IOREMAP)
+		vma->vm_flags |= VM_IO;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	return 0;
 out_unref:
@@ -288,7 +314,6 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
 	vma->vm_ops = &ttm_bo_vm_ops;
 	vma->vm_private_data = ttm_bo_reference(bo);
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
-	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_fbdev_mmap);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 3d5b8b0..8b05e38 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -38,7 +38,8 @@
 #include "ttm/ttm_module.h"
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_placement.h"
-
+#include <linux/dma-mapping.h>
+#include <xen/xen.h>
 static int ttm_tt_swapin(struct ttm_tt *ttm);
 
 /**
@@ -84,6 +85,16 @@ static struct page *ttm_tt_alloc_page(unsigned page_flags)
 	else
 		gfp_flags |= __GFP_HIGHMEM;
 
+	if ((page_flags & TTM_PAGE_FLAG_DMA32) && xen_pv_domain())
+	{
+		void *addr;
+		dma_addr_t _d;
+
+		addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL);
+		if (addr == NULL)
+			return NULL;
+		return virt_to_page(addr);
+	}
 	return alloc_page(gfp_flags);
 }
 
@@ -286,6 +297,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 	int i;
 	struct page *cur_page;
 	struct ttm_backend *be = ttm->be;
+	void *addr;
 
 	if (be)
 		be->func->clear(be);
@@ -300,7 +312,16 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 				       "Leaking pages.\n");
 			ttm_mem_global_free_page(ttm->glob->mem_glob,
 						 cur_page);
-			__free_page(cur_page);
+
+			if ((ttm->page_flags & TTM_PAGE_FLAG_DMA32) &&
+				xen_pv_domain()) {
+				addr = page_address(cur_page);
+				WARN_ON(!addr);
+				if (addr)
+					dma_free_coherent(NULL, PAGE_SIZE, addr,
+						  virt_to_bus(addr));
+			} else
+				__free_page(cur_page);
 		}
 	}
 	ttm->state = tt_unpopulated;
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 99bbd28..057433a 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1362,6 +1362,7 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
 	vma->vm_pgoff = off >> PAGE_SHIFT;
 	/* This is an IO map - tell maydump to skip this VMA */
 	vma->vm_flags |= VM_IO | VM_RESERVED;
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	fb_pgprotect(file, vma, off);
 	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
 			     vma->vm_end - vma->vm_start, vma->vm_page_prot))

Reply to: