On Tue, 2008-11-04 at 19:12 +0000, Ian Campbell wrote: > On Tue, 2008-11-04 at 17:43 +0100, Bastian Blank wrote: > > On Tue, Nov 04, 2008 at 02:26:33PM +0000, Ian Campbell wrote: > > > On Tue, 2008-11-04 at 14:02 +0100, Bastian Blank wrote: > > > > Maybe its the best to remove the workaround and instead cripple mprotect > > > > to not allow PROT_NONE for now. And then hope that this can't be > > > > triggered by mmap with PROT_NONE. > > > I was thinking of going down the path of removing the workaround then > > > fixing mprotect, so your suggestion would be a consistant first step I > > > think. This patch makes mprotect work by (very skankily) hacking out large page support which is unsupported on top of Xen anyway (I think so, currently anyway). I think I took out PAT as collaterol damage too. A cleaned up version without the pat damage might be an acceptable fix for the mprotect issue. My suspicion is that one of the -xen.c or mach-xen/asm/ files has gotten out of sync with a fix to its native partner since _PAGE_PSE is used for PROTNONE on native too so they must get round it somehow. I'll have a scrobble through and see if I can see it. Ian. -- Ian Campbell Once I finally figured out all of life's answers, they changed the questions.
Index: sid-xen/mm/mprotect.c
===================================================================
--- sid-xen.orig/mm/mprotect.c 2008-11-05 06:41:55.000000000 +0000
+++ sid-xen/mm/mprotect.c 2008-11-05 06:51:56.000000000 +0000
@@ -39,6 +39,7 @@
{
pte_t *pte, oldpte;
spinlock_t *ptl;
+ int debug = !strcmp(current->comm, "mprot");
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
arch_enter_lazy_mmu_mode();
@@ -60,6 +61,9 @@
if (dirty_accountable && pte_dirty(ptent))
ptent = pte_mkwrite(ptent);
set_pte_at(mm, addr, pte, ptent);
+ if (debug)
+ printk(KERN_CRIT "change present pte @ %p %#lx -> %#lx\n",
+ pte, oldpte.pte, ptent.pte);
#ifdef CONFIG_MIGRATION
} else if (!pte_file(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -227,6 +231,7 @@
{
unsigned long vm_flags, nstart, end, tmp, reqprot;
struct vm_area_struct *vma, *prev;
+ int debug = !strcmp(current->comm, "mprot");
int error = -EINVAL;
const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
@@ -280,6 +285,8 @@
if (start > vma->vm_start)
prev = vma;
+
+
for (nstart = start ; ; ) {
unsigned long newflags;
@@ -287,6 +294,10 @@
newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
+ if (debug)
+ printk(KERN_CRIT "mprotect(%s) vma:%p %#lx-%#lx flags:%#lx->%#lx new prot %#lx\n", current->comm,
+ vma, vma->vm_start, vma->vm_end, vma->vm_flags, newflags, prot);
+
/* newflags >> 4 shift VM_MAY% in place of VM_% */
if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
error = -EACCES;
Index: sid-xen/arch/x86/mm/dump_pagetables.c
===================================================================
--- sid-xen.orig/arch/x86/mm/dump_pagetables.c 2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/mm/dump_pagetables.c 2008-11-05 06:44:14.000000000 +0000
@@ -98,15 +98,15 @@
/* Bit 9 has a different meaning on level 3 vs 4 */
if (level <= 3) {
- if (pr & _PAGE_PSE)
- seq_printf(m, "PSE ");
- else
- seq_printf(m, " ");
+// if (pr & _PAGE_PSE)
+// seq_printf(m, "PSE ");
+// else
+// seq_printf(m, " ");
} else {
- if (pr & _PAGE_PAT)
- seq_printf(m, "pat ");
- else
- seq_printf(m, " ");
+// if (pr & _PAGE_PAT)
+// seq_printf(m, "pat ");
+// else
+// seq_printf(m, " ");
}
if (pr & _PAGE_GLOBAL)
seq_printf(m, "GLB ");
Index: sid-xen/arch/x86/mm/ioremap-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/ioremap-xen.c 2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/mm/ioremap-xen.c 2008-11-05 06:44:14.000000000 +0000
@@ -255,12 +255,12 @@
default:
err = _set_memory_uc(vaddr, nrpages);
break;
- case _PAGE_CACHE_WC:
- err = _set_memory_wc(vaddr, nrpages);
- break;
- case _PAGE_CACHE_WB:
- err = _set_memory_wb(vaddr, nrpages);
- break;
+ //case _PAGE_CACHE_WC:
+ //err = _set_memory_wc(vaddr, nrpages);
+ //break;
+ //case _PAGE_CACHE_WB:
+ //err = _set_memory_wb(vaddr, nrpages);
+ //break;
}
return err;
@@ -340,7 +340,7 @@
* - request is uc-, return cannot be write-combine
* - request is write-combine, return cannot be write-back
*/
- if ((prot_val == _PAGE_CACHE_UC_MINUS &&
+/* if ((prot_val == _PAGE_CACHE_UC_MINUS &&
(new_prot_val == _PAGE_CACHE_WB ||
new_prot_val == _PAGE_CACHE_WC)) ||
(prot_val == _PAGE_CACHE_WC &&
@@ -353,6 +353,7 @@
free_memtype(phys_addr, phys_addr + size);
return NULL;
}
+*/
prot_val = new_prot_val;
}
@@ -364,12 +365,12 @@
case _PAGE_CACHE_UC_MINUS:
prot = PAGE_KERNEL_UC_MINUS;
break;
- case _PAGE_CACHE_WC:
- prot = PAGE_KERNEL_WC;
- break;
- case _PAGE_CACHE_WB:
- prot = PAGE_KERNEL;
- break;
+// case _PAGE_CACHE_WC:
+// prot = PAGE_KERNEL_WC;
+// break;
+// case _PAGE_CACHE_WB:
+// prot = PAGE_KERNEL;
+// break;
}
/*
@@ -446,8 +447,9 @@
void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
{
if (pat_wc_enabled)
- return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
- __builtin_return_address(0));
+ BUG();
+ //return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
+ // __builtin_return_address(0));
else
return ioremap_nocache(phys_addr, size);
}
Index: sid-xen/arch/x86/mm/pageattr-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/pageattr-xen.c 2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/mm/pageattr-xen.c 2008-11-05 06:44:14.000000000 +0000
@@ -504,8 +504,9 @@
#ifdef CONFIG_X86_64
if (level == PG_LEVEL_1G) {
+ BUG();
mfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
- pgprot_val(ref_prot) |= _PAGE_PSE;
+// pgprot_val(ref_prot) |= _PAGE_PSE;
}
#endif
@@ -714,7 +715,7 @@
static inline int cache_attr(pgprot_t attr)
{
return pgprot_val(attr) &
- (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
+ (/*_PAGE_PAT | _PAGE_PAT_LARGE |*/ _PAGE_PWT | _PAGE_PCD);
}
static int change_page_attr_set_clr(unsigned long addr, int numpages,
@@ -819,18 +820,21 @@
int _set_memory_wc(unsigned long addr, int numpages)
{
- return change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_CACHE_WC));
+ BUG();
+ return 0;
+ //return change_page_attr_set(addr, numpages,
+ // __pgprot(_PAGE_CACHE_WC));
}
int set_memory_wc(unsigned long addr, int numpages)
{
- if (!pat_wc_enabled)
- return set_memory_uc(addr, numpages);
-
- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
- _PAGE_CACHE_WC, NULL))
- return -EINVAL;
+ BUG();
+// if (!pat_wc_enabled)
+// return set_memory_uc(addr, numpages);
+//
+// aif (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
+// _PAGE_CACHE_WC, NULL))
+// return -EINVAL;
return _set_memory_wc(addr, numpages);
}
Index: sid-xen/arch/x86/mm/pat-xen.c
===================================================================
--- sid-xen.orig/arch/x86/mm/pat-xen.c 2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/mm/pat-xen.c 2008-11-05 06:44:14.000000000 +0000
@@ -116,9 +116,9 @@
case _PAGE_CACHE_UC: return "uncached";
case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
case _PAGE_CACHE_WB: return "write-back";
- case _PAGE_CACHE_WC: return "write-combining";
- case _PAGE_CACHE_WP: return "write-protected";
- case _PAGE_CACHE_WT: return "write-through";
+ // case _PAGE_CACHE_WC: return "write-combining";
+ //case _PAGE_CACHE_WP: return "write-protected";
+ //case _PAGE_CACHE_WT: return "write-through";
default: return "broken";
}
}
@@ -172,16 +172,16 @@
* Consistency checks with other PAT requests is done later
* while going through memtype list.
*/
- if (pat_type == _PAGE_CACHE_WC) {
- *ret_prot = prot | _PAGE_CACHE_WC;
- return 0;
- } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
- *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
- return 0;
- } else if (pat_type == _PAGE_CACHE_UC) {
- *ret_prot = prot | _PAGE_CACHE_UC;
- return 0;
- }
+// if (pat_type == _PAGE_CACHE_WC) {
+// *ret_prot = prot | _PAGE_CACHE_WC;
+// return 0;
+// } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
+// *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
+// return 0;
+// } else if (pat_type == _PAGE_CACHE_UC) {
+// *ret_prot = prot | _PAGE_CACHE_UC;
+// return 0;
+// }
/*
* Look for MTRR hint to get the effective type in case where PAT
@@ -192,7 +192,8 @@
if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
*ret_prot = prot | _PAGE_CACHE_UC;
} else if (mtrr_type == MTRR_TYPE_WRCOMB) {
- *ret_prot = prot | _PAGE_CACHE_WC;
+ //*ret_prot = prot | _PAGE_CACHE_WC;
+ BUG();
} else {
*ret_prot = prot | _PAGE_CACHE_WB;
}
Index: sid-xen/arch/x86/xen/mmu.c
===================================================================
--- sid-xen.orig/arch/x86/xen/mmu.c 2008-11-05 06:41:22.000000000 +0000
+++ sid-xen/arch/x86/xen/mmu.c 2008-11-05 06:44:14.000000000 +0000
@@ -156,6 +156,7 @@
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
+ BUG();
/* updates to init_mm may be done without lock */
if (mm == &init_mm)
preempt_disable();
Index: sid-xen/mm/memory.c
===================================================================
--- sid-xen.orig/mm/memory.c 2008-11-05 06:41:41.000000000 +0000
+++ sid-xen/mm/memory.c 2008-11-05 06:44:14.000000000 +0000
@@ -2327,6 +2327,7 @@
struct page *page;
spinlock_t *ptl;
pte_t entry;
+ int debug = !strcmp(current->comm, "mprot");
/* Allocate our own private page. */
pte_unmap(page_table);
@@ -2342,7 +2343,9 @@
goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot);
+ if (debug) printk("entry %#lx\n", entry.pte);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ if (debug) printk("maybe_mkwrite %#lx\n", entry.pte);
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!pte_none(*page_table))
@@ -2350,8 +2353,9 @@
inc_mm_counter(mm, anon_rss);
lru_cache_add_active(page);
page_add_new_anon_rmap(page, vma, address);
+ if (debug) printk("set pte at %#lx %p %#lx\n", address, page_table, entry.pte);
set_pte_at(mm, address, page_table, entry);
-
+ if (debug) printk("set pte ok\n");
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, entry);
unlock:
@@ -2645,18 +2649,33 @@
{
pte_t entry;
spinlock_t *ptl;
+ int debug = !strcmp(current->comm, "mprot");
entry = *pte;
+ if (debug) {
+ printk(KERN_CRIT "pte fault on %#lx @ %p\n", entry.pte, pte);
+ printk(KERN_CRIT "vma %p: %#lx-%#lx %#lx\n", vma, vma->vm_start, vma->vm_end, vma->vm_flags);
+ }
if (!pte_present(entry)) {
+ if (debug) printk(KERN_CRIT "pte not present\n");
if (pte_none(entry)) {
+ if (debug) printk(KERN_CRIT "pte is none\n");
if (vma->vm_ops) {
- if (likely(vma->vm_ops->fault))
+ if (likely(vma->vm_ops->fault)) {
+ if (debug) {
+ printk(KERN_CRIT "handle via vm_ops->fault %pF\n", vma->vm_ops->fault);
+ }
return do_linear_fault(mm, vma, address,
pte, pmd, write_access, entry);
+ }
if (unlikely(vma->vm_ops->nopfn))
+ if (debug) {
+ printk(KERN_CRIT "handle via vm_ops->nopfn %pF\n\n", vma->vm_ops->nopfn);
+ }
return do_no_pfn(mm, vma, address, pte,
pmd, write_access);
}
+ if (debug) printk(KERN_CRIT "handle as anonymous page\n");
return do_anonymous_page(mm, vma, address,
pte, pmd, write_access);
}
@@ -2665,6 +2684,8 @@
pte, pmd, write_access, entry);
return do_swap_page(mm, vma, address,
pte, pmd, write_access, entry);
+ } else {
+ if (debug) printk(KERN_CRIT "pte is present\n");
}
ptl = pte_lockptr(mm, pmd);
Index: sid-xen/include/asm-x86/mach-xen/asm/pgtable.h
===================================================================
--- sid-xen.orig/include/asm-x86/mach-xen/asm/pgtable.h 2008-11-05 06:54:23.000000000 +0000
+++ sid-xen/include/asm-x86/mach-xen/asm/pgtable.h 2008-11-05 06:55:53.000000000 +0000
@@ -11,14 +11,14 @@
#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
#define _PAGE_BIT_FILE 6
-#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
-#define _PAGE_BIT_PAT 7 /* on 4KB pages */
+#define _PAGE_BIT_PSE_ 7 /* 4 MB (or 2MB) page */
+#define _PAGE_BIT_PAT_ 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
#define _PAGE_BIT_IO 9 /* Mapped page is I/O or foreign and
* has no associated page struct. */
#define _PAGE_BIT_UNUSED2 10 /* available for programmer */
#define _PAGE_BIT_UNUSED3 11
-#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
+#define _PAGE_BIT_PAT_LARGE_ 12 /* On 2MB or 1GB pages */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
/*
@@ -33,13 +33,13 @@
#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD)
#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED)
#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY)
-#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */
+#define _PAGE_PSE_ (_AC(1, L)<<_PAGE_BIT_PSE_) /* 2MB page */
#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */
#define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO)
#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2)
#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3)
-#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT)
-#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE)
+#define _PAGE_PAT_ (_AC(1, L)<<_PAGE_BIT_PAT_)
+#define _PAGE_PAT_LARGE_ (_AC(1, L)<<_PAGE_BIT_PAT_LARGE_)
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX)
@@ -50,7 +50,7 @@
/* If _PAGE_PRESENT is clear, we use these: */
#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping,
* saved PTE; unset:swap */
-#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE;
+#define _PAGE_PROTNONE _PAGE_PSE_ /* if the user mapped it with PROT_NONE;
pte_present gives true */
#ifndef __ASSEMBLY__
@@ -74,11 +74,11 @@
* PAT settings are part of the hypervisor interface, which sets the
* MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
*/
-#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
+#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT /*| _PAGE_PAT*/)
#define _PAGE_CACHE_WB (0)
#define _PAGE_CACHE_WT (_PAGE_PWT)
-#define _PAGE_CACHE_WC (_PAGE_PAT)
-#define _PAGE_CACHE_WP (_PAGE_PAT | _PAGE_PWT)
+//#define _PAGE_CACHE_WC (_PAGE_PAT)
+//#define _PAGE_CACHE_WP (_PAGE_PAT | _PAGE_PWT)
#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
@@ -120,8 +120,8 @@
#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL /*| _PAGE_PSE*/)
+#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC /*| _PAGE_PSE*/)
/*
* We don't support GLOBAL page in xenolinux64
@@ -198,7 +198,8 @@
static inline int pte_huge(pte_t pte)
{
- return __pte_val(pte) & _PAGE_PSE;
+ return 0;
+ //return __pte_val(pte) & _PAGE_PSE;
}
static inline int pte_global(pte_t pte)
@@ -218,8 +219,9 @@
static inline int pmd_large(pmd_t pte)
{
- return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- (_PAGE_PSE | _PAGE_PRESENT);
+ return 0;
+ //return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+ //x (_PAGE_PSE | _PAGE_PRESENT);
}
static inline pte_t pte_mkclean(pte_t pte)
@@ -259,12 +261,16 @@
static inline pte_t pte_mkhuge(pte_t pte)
{
- return __pte_ma(__pte_val(pte) | _PAGE_PSE);
+ BUG();
+ return pte;
+ //return __pte_ma(__pte_val(pte) | _PAGE_PSE);
}
static inline pte_t pte_clrhuge(pte_t pte)
{
- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
+ BUG();
+ return pte;
+ //return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
}
static inline pte_t pte_mkglobal(pte_t pte)
Attachment:
signature.asc
Description: This is a digitally signed message part