[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#503821: Purpose of features/all/xen/workaround-pte-file.patch?



On Tue, 2008-11-04 at 19:12 +0000, Ian Campbell wrote:
> On Tue, 2008-11-04 at 17:43 +0100, Bastian Blank wrote:
> > On Tue, Nov 04, 2008 at 02:26:33PM +0000, Ian Campbell wrote:
> > > On Tue, 2008-11-04 at 14:02 +0100, Bastian Blank wrote:
> > > > Maybe its the best to remove the workaround and instead cripple mprotect
> > > > to not allow PROT_NONE for now. And then hope that this can't be
> > > > triggered by mmap with PROT_NONE.
> > > I was thinking of going down the path of removing the workaround then
> > > fixing mprotect, so your suggestion would be a consistant first step I
> > > think.

This patch makes mprotect work by (very skankily) hacking out large page
support which is unsupported on top of Xen anyway (I think so, currently
anyway). I think I took out PAT as collaterol damage too. A cleaned up
version without the pat damage might be an acceptable fix for the
mprotect issue.

My suspicion is that one of the -xen.c or mach-xen/asm/ files has gotten
out of sync with a fix to its native partner since _PAGE_PSE is used for
PROTNONE on native too so they must get round it somehow. I'll have a
scrobble through and see if I can see it.

Ian.
-- 
Ian Campbell

Once I finally figured out all of life's answers, they changed the
questions.
Index: sid-xen/mm/mprotect.c
===================================================================
--- sid-xen.orig/mm/mprotect.c	2008-11-05 06:41:55.000000000 +0000
+++ sid-xen/mm/mprotect.c	2008-11-05 06:51:56.000000000 +0000
@@ -39,6 +39,7 @@
 {
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
+	int debug = !strcmp(current->comm, "mprot");
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
@@ -60,6 +61,9 @@
 			if (dirty_accountable && pte_dirty(ptent))
 				ptent = pte_mkwrite(ptent);
 			set_pte_at(mm, addr, pte, ptent);
+			if (debug)
+				printk(KERN_CRIT "change present pte @ %p %#lx -> %#lx\n",
+				       pte, oldpte.pte, ptent.pte);
 #ifdef CONFIG_MIGRATION
 		} else if (!pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -227,6 +231,7 @@
 {
 	unsigned long vm_flags, nstart, end, tmp, reqprot;
 	struct vm_area_struct *vma, *prev;
+	int debug = !strcmp(current->comm, "mprot");
 	int error = -EINVAL;
 	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
 	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
@@ -280,6 +285,8 @@
 	if (start > vma->vm_start)
 		prev = vma;
 
+
+
 	for (nstart = start ; ; ) {
 		unsigned long newflags;
 
@@ -287,6 +294,10 @@
 
 		newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
 
+		if (debug)
+			printk(KERN_CRIT "mprotect(%s) vma:%p %#lx-%#lx flags:%#lx->%#lx new prot %#lx\n", current->comm,
+			       vma, vma->vm_start, vma->vm_end, vma->vm_flags, newflags, prot);
+
 		/* newflags >> 4 shift VM_MAY% in place of VM_% */
 		if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
 			error = -EACCES;
Index: sid-xen/arch/x86/mm/dump_pagetables.c
===================================================================
--- sid-xen.orig/arch/x86/mm/dump_pagetables.c	2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/mm/dump_pagetables.c	2008-11-05 06:44:14.000000000 +0000
@@ -98,15 +98,15 @@
 
 		/* Bit 9 has a different meaning on level 3 vs 4 */
 		if (level <= 3) {
-			if (pr & _PAGE_PSE)
-				seq_printf(m, "PSE ");
-			else
-				seq_printf(m, "    ");
+//			if (pr & _PAGE_PSE)
+//				seq_printf(m, "PSE ");
+//			else
+//				seq_printf(m, "    ");
 		} else {
-			if (pr & _PAGE_PAT)
-				seq_printf(m, "pat ");
-			else
-				seq_printf(m, "    ");
+//			if (pr & _PAGE_PAT)
+//				seq_printf(m, "pat ");
+//			else
+//				seq_printf(m, "    ");
 		}
 		if (pr & _PAGE_GLOBAL)
 			seq_printf(m, "GLB ");
Index: sid-xen/arch/x86/mm/ioremap-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/ioremap-xen.c	2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/mm/ioremap-xen.c	2008-11-05 06:44:14.000000000 +0000
@@ -255,12 +255,12 @@
 	default:
 		err = _set_memory_uc(vaddr, nrpages);
 		break;
-	case _PAGE_CACHE_WC:
-		err = _set_memory_wc(vaddr, nrpages);
-		break;
-	case _PAGE_CACHE_WB:
-		err = _set_memory_wb(vaddr, nrpages);
-		break;
+		//case _PAGE_CACHE_WC:
+		//err = _set_memory_wc(vaddr, nrpages);
+		//break;
+		//case _PAGE_CACHE_WB:
+		//err = _set_memory_wb(vaddr, nrpages);
+		//break;
 	}
 
 	return err;
@@ -340,7 +340,7 @@
 		 * - request is uc-, return cannot be write-combine
 		 * - request is write-combine, return cannot be write-back
 		 */
-		if ((prot_val == _PAGE_CACHE_UC_MINUS &&
+/*		if ((prot_val == _PAGE_CACHE_UC_MINUS &&
 		     (new_prot_val == _PAGE_CACHE_WB ||
 		      new_prot_val == _PAGE_CACHE_WC)) ||
 		    (prot_val == _PAGE_CACHE_WC &&
@@ -353,6 +353,7 @@
 			free_memtype(phys_addr, phys_addr + size);
 			return NULL;
 		}
+*/
 		prot_val = new_prot_val;
 	}
 
@@ -364,12 +365,12 @@
 	case _PAGE_CACHE_UC_MINUS:
 		prot = PAGE_KERNEL_UC_MINUS;
 		break;
-	case _PAGE_CACHE_WC:
-		prot = PAGE_KERNEL_WC;
-		break;
-	case _PAGE_CACHE_WB:
-		prot = PAGE_KERNEL;
-		break;
+//	case _PAGE_CACHE_WC:
+//		prot = PAGE_KERNEL_WC;
+//		break;
+//	case _PAGE_CACHE_WB:
+//		prot = PAGE_KERNEL;
+//		break;
 	}
 
 	/*
@@ -446,8 +447,9 @@
 void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
 {
 	if (pat_wc_enabled)
-		return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
-					__builtin_return_address(0));
+		BUG();
+	//return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
+	//				__builtin_return_address(0));
 	else
 		return ioremap_nocache(phys_addr, size);
 }
Index: sid-xen/arch/x86/mm/pageattr-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/pageattr-xen.c	2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/mm/pageattr-xen.c	2008-11-05 06:44:14.000000000 +0000
@@ -504,8 +504,9 @@
 
 #ifdef CONFIG_X86_64
 	if (level == PG_LEVEL_1G) {
+		BUG();
 		mfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
-		pgprot_val(ref_prot) |= _PAGE_PSE;
+//		pgprot_val(ref_prot) |= _PAGE_PSE;
 	}
 #endif
 
@@ -714,7 +715,7 @@
 static inline int cache_attr(pgprot_t attr)
 {
 	return pgprot_val(attr) &
-		(_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
+		(/*_PAGE_PAT | _PAGE_PAT_LARGE |*/ _PAGE_PWT | _PAGE_PCD);
 }
 
 static int change_page_attr_set_clr(unsigned long addr, int numpages,
@@ -819,18 +820,21 @@
 
 int _set_memory_wc(unsigned long addr, int numpages)
 {
-	return change_page_attr_set(addr, numpages,
-				    __pgprot(_PAGE_CACHE_WC));
+	BUG();
+	return 0;
+	//return change_page_attr_set(addr, numpages,
+	//			    __pgprot(_PAGE_CACHE_WC));
 }
 
 int set_memory_wc(unsigned long addr, int numpages)
 {
-	if (!pat_wc_enabled)
-		return set_memory_uc(addr, numpages);
-
-	if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
-		_PAGE_CACHE_WC, NULL))
-		return -EINVAL;
+	BUG();
+//	if (!pat_wc_enabled)
+//		return set_memory_uc(addr, numpages);
+//
+//	aif (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
+//		_PAGE_CACHE_WC, NULL))
+//		return -EINVAL;
 
 	return _set_memory_wc(addr, numpages);
 }
Index: sid-xen/arch/x86/mm/pat-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/pat-xen.c	2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/mm/pat-xen.c	2008-11-05 06:44:14.000000000 +0000
@@ -116,9 +116,9 @@
 		case _PAGE_CACHE_UC:		return "uncached";
 		case _PAGE_CACHE_UC_MINUS:	return "uncached-minus";
 		case _PAGE_CACHE_WB:		return "write-back";
-		case _PAGE_CACHE_WC:		return "write-combining";
-		case _PAGE_CACHE_WP:		return "write-protected";
-		case _PAGE_CACHE_WT:		return "write-through";
+			//	case _PAGE_CACHE_WC:		return "write-combining";
+			//case _PAGE_CACHE_WP:		return "write-protected";
+			//case _PAGE_CACHE_WT:		return "write-through";
 		default:			return "broken";
 	}
 }
@@ -172,16 +172,16 @@
 	 * Consistency checks with other PAT requests is done later
 	 * while going through memtype list.
 	 */
-	if (pat_type == _PAGE_CACHE_WC) {
-		*ret_prot = prot | _PAGE_CACHE_WC;
-		return 0;
-	} else if (pat_type == _PAGE_CACHE_UC_MINUS) {
-		*ret_prot = prot | _PAGE_CACHE_UC_MINUS;
-		return 0;
-	} else if (pat_type == _PAGE_CACHE_UC) {
-		*ret_prot = prot | _PAGE_CACHE_UC;
-		return 0;
-	}
+//	if (pat_type == _PAGE_CACHE_WC) {
+//		*ret_prot = prot | _PAGE_CACHE_WC;
+//		return 0;
+//	} else if (pat_type == _PAGE_CACHE_UC_MINUS) {
+//		*ret_prot = prot | _PAGE_CACHE_UC_MINUS;
+//		return 0;
+//	} else if (pat_type == _PAGE_CACHE_UC) {
+//		*ret_prot = prot | _PAGE_CACHE_UC;
+//		return 0;
+//	}
 
 	/*
 	 * Look for MTRR hint to get the effective type in case where PAT
@@ -192,7 +192,8 @@
 	if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
 		*ret_prot = prot | _PAGE_CACHE_UC;
 	} else if (mtrr_type == MTRR_TYPE_WRCOMB) {
-		*ret_prot = prot | _PAGE_CACHE_WC;
+		//*ret_prot = prot | _PAGE_CACHE_WC;
+		BUG();
 	} else {
 		*ret_prot = prot | _PAGE_CACHE_WB;
 	}
Index: sid-xen/arch/x86/xen/mmu.c
===================================================================
--- sid-xen.orig/arch/x86/xen/mmu.c	2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/xen/mmu.c	2008-11-05 06:44:14.000000000 +0000
@@ -156,6 +156,7 @@
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
+	BUG();
 	/* updates to init_mm may be done without lock */
 	if (mm == &init_mm)
 		preempt_disable();
Index: sid-xen/mm/memory.c
===================================================================
--- sid-xen.orig/mm/memory.c	2008-11-05 06:41:41.000000000 +0000
+++ sid-xen/mm/memory.c	2008-11-05 06:44:14.000000000 +0000
@@ -2327,6 +2327,7 @@
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t entry;
+	int debug = !strcmp(current->comm, "mprot");
 
 	/* Allocate our own private page. */
 	pte_unmap(page_table);
@@ -2342,7 +2343,9 @@
 		goto oom_free_page;
 
 	entry = mk_pte(page, vma->vm_page_prot);
+	if (debug) printk("entry %#lx\n", entry.pte);
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+	if (debug) printk("maybe_mkwrite %#lx\n", entry.pte);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 	if (!pte_none(*page_table))
@@ -2350,8 +2353,9 @@
 	inc_mm_counter(mm, anon_rss);
 	lru_cache_add_active(page);
 	page_add_new_anon_rmap(page, vma, address);
+	if (debug) printk("set pte at %#lx %p %#lx\n", address, page_table, entry.pte);
 	set_pte_at(mm, address, page_table, entry);
-
+	if (debug) printk("set pte ok\n");
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, entry);
 unlock:
@@ -2645,18 +2649,33 @@
 {
 	pte_t entry;
 	spinlock_t *ptl;
+	int debug = !strcmp(current->comm, "mprot");
 
 	entry = *pte;
+	if (debug) {
+		printk(KERN_CRIT "pte fault on %#lx @ %p\n", entry.pte, pte);
+		printk(KERN_CRIT "vma %p: %#lx-%#lx %#lx\n", vma, vma->vm_start, vma->vm_end, vma->vm_flags);
+	}
 	if (!pte_present(entry)) {
+		if (debug) printk(KERN_CRIT "pte not present\n");
 		if (pte_none(entry)) {
+			if (debug) printk(KERN_CRIT "pte is none\n");
 			if (vma->vm_ops) {
-				if (likely(vma->vm_ops->fault))
+				if (likely(vma->vm_ops->fault)) {
+					if (debug) {
+						printk(KERN_CRIT "handle via vm_ops->fault %pF\n", vma->vm_ops->fault);
+					}
 					return do_linear_fault(mm, vma, address,
 						pte, pmd, write_access, entry);
+				}
 				if (unlikely(vma->vm_ops->nopfn))
+					if (debug) {
+						printk(KERN_CRIT "handle via vm_ops->nopfn %pF\n\n", vma->vm_ops->nopfn);
+					}
 					return do_no_pfn(mm, vma, address, pte,
 							 pmd, write_access);
 			}
+			if (debug) printk(KERN_CRIT "handle as anonymous page\n");
 			return do_anonymous_page(mm, vma, address,
 						 pte, pmd, write_access);
 		}
@@ -2665,6 +2684,8 @@
 					pte, pmd, write_access, entry);
 		return do_swap_page(mm, vma, address,
 					pte, pmd, write_access, entry);
+	} else {
+			if (debug) printk(KERN_CRIT "pte is present\n");
 	}
 
 	ptl = pte_lockptr(mm, pmd);
Index: sid-xen/include/asm-x86/mach-xen/asm/pgtable.h
===================================================================
--- sid-xen.orig/include/asm-x86/mach-xen/asm/pgtable.h	2008-11-05 06:54:23.000000000 +0000
+++ sid-xen/include/asm-x86/mach-xen/asm/pgtable.h	2008-11-05 06:55:53.000000000 +0000
@@ -11,14 +11,14 @@
 #define _PAGE_BIT_ACCESSED	5	/* was accessed (raised by CPU) */
 #define _PAGE_BIT_DIRTY		6	/* was written to (raised by CPU) */
 #define _PAGE_BIT_FILE		6
-#define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
-#define _PAGE_BIT_PAT		7	/* on 4KB pages */
+#define _PAGE_BIT_PSE_		7	/* 4 MB (or 2MB) page */
+#define _PAGE_BIT_PAT_		7	/* on 4KB pages */
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_IO		9	/* Mapped page is I/O or foreign and
 					 * has no associated page struct. */
 #define _PAGE_BIT_UNUSED2	10	/* available for programmer */
 #define _PAGE_BIT_UNUSED3	11
-#define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
+#define _PAGE_BIT_PAT_LARGE_	12	/* On 2MB or 1GB pages */
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
 /*
@@ -33,13 +33,13 @@
 #define _PAGE_PCD	(_AC(1, L)<<_PAGE_BIT_PCD)
 #define _PAGE_ACCESSED	(_AC(1, L)<<_PAGE_BIT_ACCESSED)
 #define _PAGE_DIRTY	(_AC(1, L)<<_PAGE_BIT_DIRTY)
-#define _PAGE_PSE	(_AC(1, L)<<_PAGE_BIT_PSE)	/* 2MB page */
+#define _PAGE_PSE_	(_AC(1, L)<<_PAGE_BIT_PSE_)	/* 2MB page */
 #define _PAGE_GLOBAL	(_AC(1, L)<<_PAGE_BIT_GLOBAL)	/* Global TLB entry */
 #define _PAGE_IO	(_AC(1, L)<<_PAGE_BIT_IO)
 #define _PAGE_UNUSED2	(_AC(1, L)<<_PAGE_BIT_UNUSED2)
 #define _PAGE_UNUSED3	(_AC(1, L)<<_PAGE_BIT_UNUSED3)
-#define _PAGE_PAT	(_AC(1, L)<<_PAGE_BIT_PAT)
-#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE)
+#define _PAGE_PAT_	(_AC(1, L)<<_PAGE_BIT_PAT_)
+#define _PAGE_PAT_LARGE_ (_AC(1, L)<<_PAGE_BIT_PAT_LARGE_)
 
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AC(1, ULL) << _PAGE_BIT_NX)
@@ -50,7 +50,7 @@
 /* If _PAGE_PRESENT is clear, we use these: */
 #define _PAGE_FILE	_PAGE_DIRTY	/* nonlinear file mapping,
 					 * saved PTE; unset:swap */
-#define _PAGE_PROTNONE	_PAGE_PSE	/* if the user mapped it with PROT_NONE;
+#define _PAGE_PROTNONE	_PAGE_PSE_	/* if the user mapped it with PROT_NONE;
 					   pte_present gives true */
 
 #ifndef __ASSEMBLY__
@@ -74,11 +74,11 @@
  * PAT settings are part of the hypervisor interface, which sets the
  * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
  */
-#define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
+#define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT /*| _PAGE_PAT*/)
 #define _PAGE_CACHE_WB		(0)
 #define _PAGE_CACHE_WT		(_PAGE_PWT)
-#define _PAGE_CACHE_WC		(_PAGE_PAT)
-#define _PAGE_CACHE_WP		(_PAGE_PAT | _PAGE_PWT)
+//#define _PAGE_CACHE_WC		(_PAGE_PAT)
+//#define _PAGE_CACHE_WP		(_PAGE_PAT | _PAGE_PWT)
 #define _PAGE_CACHE_UC_MINUS	(_PAGE_PCD)
 #define _PAGE_CACHE_UC		(_PAGE_PCD | _PAGE_PWT)
 
@@ -120,8 +120,8 @@
 #define __PAGE_KERNEL_UC_MINUS		(__PAGE_KERNEL | _PAGE_PCD)
 #define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
 #define __PAGE_KERNEL_VSYSCALL_NOCACHE	(__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL /*| _PAGE_PSE*/)
+#define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC /*| _PAGE_PSE*/)
 
 /*
  * We don't support GLOBAL page in xenolinux64
@@ -198,7 +198,8 @@
 
 static inline int pte_huge(pte_t pte)
 {
-	return __pte_val(pte) & _PAGE_PSE;
+	return 0;
+	//return __pte_val(pte) & _PAGE_PSE;
 }
 
 static inline int pte_global(pte_t pte)
@@ -218,8 +219,9 @@
 
 static inline int pmd_large(pmd_t pte)
 {
-	return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
-		(_PAGE_PSE | _PAGE_PRESENT);
+	return 0;
+	//return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+	//x	(_PAGE_PSE | _PAGE_PRESENT);
 }
 
 static inline pte_t pte_mkclean(pte_t pte)
@@ -259,12 +261,16 @@
 
 static inline pte_t pte_mkhuge(pte_t pte)
 {
-	return __pte_ma(__pte_val(pte) | _PAGE_PSE);
+	BUG();
+	return pte;
+	//return __pte_ma(__pte_val(pte) | _PAGE_PSE);
 }
 
 static inline pte_t pte_clrhuge(pte_t pte)
 {
-	return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
+	BUG();
+	return pte;
+	//return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
 }
 
 static inline pte_t pte_mkglobal(pte_t pte)

Attachment: signature.asc
Description: This is a digitally signed message part


Reply to: