[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#503821: Purpose of features/all/xen/workaround-pte-file.patch?



On Wed, 2008-11-05 at 10:05 +0000, Ian Campbell wrote:
> 
> > What happens if you use "nopat" to disable the usage of PAT?
> 
> CONFIG_X86_PAT is disabled anyway so it makes no difference.

Although it does turn out that PAT is implicated...

The key turned out to be the difference in _PAGE_CACHE_MASK between the
asm and mach-xen/asm versions of pgtable.h.
        -#define _PAGE_CACHE_MASK       (_PAGE_PCD | _PAGE_PWT)
        [...]
        +#define _PAGE_CACHE_MASK       (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)

Native sets up PAT such that only the PCD and PWD bits are used for the
cache modes the kernel is interested in (WC, WC, UC, UC-) and the PAT
bit is unused. On Xen PAT is controlled by the hypervisor and has a
static setting which uses the PAT bit for WC.

        PAT PCD PWT	NATIVE	XEN	BIOS	INIT
        0   0   0	WB	WB	WB	WB
        0   0   1	WC	WT	WT	WT
        0   1   0	UC-	UC-	UC-	UC-
        0   1   1	UC	UC	-	UC
        1   0   0	-	WC	WB	WB
        1   0   1	-	WP	WT	WT
        1   1   0	-	-	UC-	UC-
        1   1   1	-	-	-	UC-
(INIT is the processors initial state and BIOS is apparently commonly
set by the BIOS)

The kernel uses WB and UC/UC- and conditionally uses WC IFF PAT is
compiled in (CONFIG_X86_PAT) and enabled (pat_wc_enabled).

Now it seems that on native the masking and checking etc of PROTNONE
pages works fine because _PAGE_CACHE_MASK does not include _PAGE_PAT
(which == _PAGE_PSE used by PROTNONE). Xen broke this by adding that bit
to the mask which presumably leads to confusion on some paths and then
to the observed mprotect crash...

Since we disable CONFIG_X86_PAT it is much simpler to revert the change
to _PAGE_CACHE_MASK than to figure out where the confusion occurs. The
attached patch is bigger than this trivial change since I #undefined
_PAGE_CACHE_{WT,WC,WP} to be sure they were unused and had to #ifdef
their use. WT and WP are completely unused anyway and WC is always gated
on pat_wc_enabled and hence would have been compiled out.

With this both the mprotect and the swap death test cases work (or
correctly OOM in the swap case).

Obviously workaround-pte-file.patch should be dropped too. I never got
to the bottom of why the workaround caused the swap issue. I'll hand
wave it as "incorrect masking" somewhere ;-)

Shall I commit?

Ian.

Index: sid-xen/include/asm-x86/mach-xen/asm/pgtable.h
===================================================================
--- sid-xen.orig/include/asm-x86/mach-xen/asm/pgtable.h	2008-11-05 09:47:42.000000000 +0000
+++ sid-xen/include/asm-x86/mach-xen/asm/pgtable.h	2008-11-05 12:32:00.000000000 +0000
@@ -67,18 +67,20 @@
 			 _PAGE_DIRTY | __kernel_page_user)
 
 /* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO |	\
+#define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_PCD | _PAGE_PWT | _PAGE_IO |	\
 			 _PAGE_ACCESSED | _PAGE_DIRTY)
 
 /*
  * PAT settings are part of the hypervisor interface, which sets the
  * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
  */
-#define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
+#define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
 #define _PAGE_CACHE_WB		(0)
+#ifdef CONFIG_X86_PAT
 #define _PAGE_CACHE_WT		(_PAGE_PWT)
 #define _PAGE_CACHE_WC		(_PAGE_PAT)
 #define _PAGE_CACHE_WP		(_PAGE_PAT | _PAGE_PWT)
+#endif
 #define _PAGE_CACHE_UC_MINUS	(_PAGE_PCD)
 #define _PAGE_CACHE_UC		(_PAGE_PCD | _PAGE_PWT)
 
Index: sid-xen/arch/x86/Kconfig
===================================================================
--- sid-xen.orig/arch/x86/Kconfig	2008-11-05 12:26:55.000000000 +0000
+++ sid-xen/arch/x86/Kconfig	2008-11-05 12:27:02.000000000 +0000
@@ -1141,6 +1141,7 @@
 	bool
 	prompt "x86 PAT support"
 	depends on MTRR
+	depends on !XEN
 	help
 	  Use PAT attributes to setup page level cache control.
 
Index: sid-xen/arch/x86/mm/ioremap-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/ioremap-xen.c	2008-11-05 12:37:53.000000000 +0000
+++ sid-xen/arch/x86/mm/ioremap-xen.c	2008-11-05 12:43:06.000000000 +0000
@@ -255,9 +255,11 @@
 	default:
 		err = _set_memory_uc(vaddr, nrpages);
 		break;
+#ifdef CONFIG_X86_PAT
 	case _PAGE_CACHE_WC:
 		err = _set_memory_wc(vaddr, nrpages);
 		break;
+#endif
 	case _PAGE_CACHE_WB:
 		err = _set_memory_wb(vaddr, nrpages);
 		break;
@@ -340,11 +342,17 @@
 		 * - request is uc-, return cannot be write-combine
 		 * - request is write-combine, return cannot be write-back
 		 */
-		if ((prot_val == _PAGE_CACHE_UC_MINUS &&
+		if (
+#ifdef CONFIG_X86_PAT
+		    (prot_val == _PAGE_CACHE_UC_MINUS &&
 		     (new_prot_val == _PAGE_CACHE_WB ||
 		      new_prot_val == _PAGE_CACHE_WC)) ||
 		    (prot_val == _PAGE_CACHE_WC &&
-		     new_prot_val == _PAGE_CACHE_WB)) {
+		     new_prot_val == _PAGE_CACHE_WB)
+#else
+		    (prot_val == _PAGE_CACHE_UC_MINUS && new_prot_val == _PAGE_CACHE_WB)
+#endif
+		   ) {
 			pr_debug(
 		"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
 				(unsigned long long)phys_addr,
@@ -364,9 +372,11 @@
 	case _PAGE_CACHE_UC_MINUS:
 		prot = PAGE_KERNEL_UC_MINUS;
 		break;
+#ifdef CONFIG_X86_PAT
 	case _PAGE_CACHE_WC:
 		prot = PAGE_KERNEL_WC;
 		break;
+#endif
 	case _PAGE_CACHE_WB:
 		prot = PAGE_KERNEL;
 		break;
@@ -445,10 +455,12 @@
  */
 void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
 {
+#ifdef CONFIG_X86_PAT
 	if (pat_wc_enabled)
 		return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
 					__builtin_return_address(0));
 	else
+#endif
 		return ioremap_nocache(phys_addr, size);
 }
 EXPORT_SYMBOL(ioremap_wc);
Index: sid-xen/arch/x86/mm/pageattr-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/pageattr-xen.c	2008-11-05 12:30:56.000000000 +0000
+++ sid-xen/arch/x86/mm/pageattr-xen.c	2008-11-05 12:34:37.000000000 +0000
@@ -817,14 +817,17 @@
 }
 EXPORT_SYMBOL(set_memory_uc);
 
+#ifdef CONFIG_X86_PAT
 int _set_memory_wc(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(addr, numpages,
 				    __pgprot(_PAGE_CACHE_WC));
 }
+#endif
 
 int set_memory_wc(unsigned long addr, int numpages)
 {
+#ifdef CONFIG_X86_PAT
 	if (!pat_wc_enabled)
 		return set_memory_uc(addr, numpages);
 
@@ -833,6 +836,9 @@
 		return -EINVAL;
 
 	return _set_memory_wc(addr, numpages);
+#else
+	return set_memory_uc(addr, numpages);
+#endif
 }
 EXPORT_SYMBOL(set_memory_wc);
 
Index: sid-xen/arch/x86/mm/pat-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/pat-xen.c	2008-11-05 12:35:28.000000000 +0000
+++ sid-xen/arch/x86/mm/pat-xen.c	2008-11-05 12:37:44.000000000 +0000
@@ -116,9 +116,11 @@
 		case _PAGE_CACHE_UC:		return "uncached";
 		case _PAGE_CACHE_UC_MINUS:	return "uncached-minus";
 		case _PAGE_CACHE_WB:		return "write-back";
+#ifdef CONFIG_X86_PAT
 		case _PAGE_CACHE_WC:		return "write-combining";
 		case _PAGE_CACHE_WP:		return "write-protected";
 		case _PAGE_CACHE_WT:		return "write-through";
+#endif
 		default:			return "broken";
 	}
 }
@@ -157,6 +159,7 @@
  * The intersection is based on "Effective Memory Type" tables in IA-32
  * SDM vol 3a
  */
+#ifdef CONFIG_X86_PAT
 static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
 				unsigned long *ret_prot)
 {
@@ -199,6 +202,7 @@
 
 	return 0;
 }
+#endif
 
 /*
  * req_type typically has one of the:
@@ -218,6 +222,7 @@
 int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 			unsigned long *ret_type)
 {
+#ifdef CONFIG_X86_PAT
 	struct memtype *new_entry = NULL;
 	struct memtype *parse;
 	unsigned long actual_type;
@@ -431,6 +436,17 @@
 
 	spin_unlock(&memtype_lock);
 	return err;
+#else
+	/* This is identical to page table setting without PAT */
+	if (ret_type) {
+		if (req_type == -1) {
+			*ret_type = _PAGE_CACHE_WB;
+		} else {
+			*ret_type = req_type;
+		}
+	}
+	return 0;
+#endif
 }
 
 int free_memtype(u64 start, u64 end)


-- 
Ian Campbell
Current Noise: Kreator - Dying Race Apocalypse

According to Kentucky state law, every person must take a bath at least
once a year.




Reply to: