[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: Stable update for xen



On Thu, Jun 14, 2012 at 07:55:21PM +0100, Adam D. Barratt wrote:
> On Thu, 2012-06-14 at 20:51 +0200, Bastian Blank wrote:
> > I'd like to fix an boot error of Xen on several newer machines in
> > stable.
> I'm assuming this is fixed in at least unstable already, given the dates
> of the commits referenced in the bug report?

It is included in 4.1.0.

> Please could we have a full source debdiff for the proposed package,
> against the package currently in stable?

Sure.

Bastian

-- 
The idea of male and female are universal constants.
		-- Kirk, "Metamorphosis", stardate 3219.8
diff -Nru xen-4.0.1/debian/changelog xen-4.0.1/debian/changelog
--- xen-4.0.1/debian/changelog	2011-06-09 20:35:07.000000000 +0200
+++ xen-4.0.1/debian/changelog	2012-06-14 20:27:57.000000000 +0200
@@ -1,3 +1,23 @@
+xen (4.0.1-6) UNRELEASED; urgency=low
+
+  [ Ian Campbell ]
+  * Backport fix to remove lowmem 1:1 mapping that fixes boot on some
+    classes of machines. (Closes: #649923)
+
+ -- Bastian Blank <waldi@debian.org>  Thu, 14 Jun 2012 20:27:03 +0200
+
+xen (4.0.1-5) stable-security; urgency=low
+
+  * Fix privilege escalation and syscall/sysenter DoS while using
+    non-canonical addresses by untrusted PV guests.
+    CVE-2012-0217
+    CVE-2012-0218
+  * Disable Xen on CPUs affected by AMD Erratum #121. PV guests can
+    cause a DoS of the host.
+    CVE-2012-2934
+
+ -- Bastian Blank <waldi@debian.org>  Mon, 11 Jun 2012 18:12:37 +0000
+
 xen (4.0.1-4) stable-security; urgency=low
 
   * Fix overflows and missing error checks in PV kernel loader.
diff -Nru xen-4.0.1/debian/control.md5sum xen-4.0.1/debian/control.md5sum
--- xen-4.0.1/debian/control.md5sum	2011-06-09 20:36:05.000000000 +0200
+++ xen-4.0.1/debian/control.md5sum	2012-06-14 20:31:20.000000000 +0200
@@ -1,4 +1,4 @@
-3207088ea024aa07513e3c44b7d3e1af  debian/changelog
+6a070480a54a79a74d6623a07ff8beb7  debian/changelog
 24f2598a23e30264aea4a983d5d19eec  debian/bin/gencontrol.py
 ee1ccd7bf0932a81ca221cab08347614  debian/templates/control.hypervisor.in
 e4335ab10e217a12328cdf123473ed37  debian/templates/control.main.in
diff -Nru xen-4.0.1/debian/patches/CVE-2012-0217+2012-0218 xen-4.0.1/debian/patches/CVE-2012-0217+2012-0218
--- xen-4.0.1/debian/patches/CVE-2012-0217+2012-0218	1970-01-01 01:00:00.000000000 +0100
+++ xen-4.0.1/debian/patches/CVE-2012-0217+2012-0218	2012-06-14 20:24:30.000000000 +0200
@@ -0,0 +1,96 @@
+diff -r d8fd425b60d3 xen/arch/x86/x86_64/asm-offsets.c
+--- a/xen/arch/x86/x86_64/asm-offsets.c	Tue May 01 14:18:46 2012 +0100
++++ b/xen/arch/x86/x86_64/asm-offsets.c	Thu May 24 11:18:47 2012 +0100
+@@ -89,6 +89,8 @@ void __dummy__(void)
+            arch.guest_context.trap_ctxt[TRAP_gp_fault].address);
+     OFFSET(VCPU_gp_fault_sel, struct vcpu,
+            arch.guest_context.trap_ctxt[TRAP_gp_fault].cs);
++    OFFSET(VCPU_gp_fault_flags, struct vcpu,
++           arch.guest_context.trap_ctxt[TRAP_gp_fault].flags);
+     OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
+     OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
+     OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
+diff -r d8fd425b60d3 xen/arch/x86/x86_64/compat/entry.S
+--- a/xen/arch/x86/x86_64/compat/entry.S	Tue May 01 14:18:46 2012 +0100
++++ b/xen/arch/x86/x86_64/compat/entry.S	Thu May 24 11:18:47 2012 +0100
+@@ -227,6 +227,7 @@ 1:      call  compat_create_bounce_frame
+ ENTRY(compat_post_handle_exception)
+         testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
+         jz    compat_test_all_events
++.Lcompat_bounce_exception:
+         call  compat_create_bounce_frame
+         movb  $0,TRAPBOUNCE_flags(%rdx)
+         jmp   compat_test_all_events
+@@ -243,14 +244,15 @@ ENTRY(compat_syscall)
+ 1:      movq  %rax,TRAPBOUNCE_eip(%rdx)
+         movw  %si,TRAPBOUNCE_cs(%rdx)
+         movb  %cl,TRAPBOUNCE_flags(%rdx)
+-        call  compat_create_bounce_frame
+-        jmp   compat_test_all_events
++        jmp   .Lcompat_bounce_exception
+ 2:      movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+         subl  $2,UREGS_rip(%rsp)
+         movq  VCPU_gp_fault_addr(%rbx),%rax
+         movzwl VCPU_gp_fault_sel(%rbx),%esi
+-        movb  $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+         movl  $0,TRAPBOUNCE_error_code(%rdx)
++        testb $4,VCPU_gp_fault_flags(%rbx)
++        setnz %cl
++        leal  TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE(,%rcx,TBF_INTERRUPT),%ecx
+         jmp   1b
+ 
+ ENTRY(compat_sysenter)
+diff -r d8fd425b60d3 xen/arch/x86/x86_64/entry.S
+--- a/xen/arch/x86/x86_64/entry.S	Tue May 01 14:18:46 2012 +0100
++++ b/xen/arch/x86/x86_64/entry.S	Thu May 24 11:18:47 2012 +0100
+@@ -51,6 +51,13 @@ restore_all_guest:
+         testw $TRAP_syscall,4(%rsp)
+         jz    iret_exit_to_guest
+ 
++        /* Don't use SYSRET path if the return address is not canonical. */
++        movq  8(%rsp),%rcx
++        sarq  $47,%rcx
++        incl  %ecx
++        cmpl  $1,%ecx
++        ja    .Lforce_iret
++
+         addq  $8,%rsp
+         popq  %rcx                    # RIP
+         popq  %r11                    # CS
+@@ -61,6 +68,10 @@ restore_all_guest:
+         sysretq
+ 1:      sysretl
+ 
++.Lforce_iret:
++        /* Mimic SYSRET behavior. */
++        movq  8(%rsp),%rcx            # RIP
++        movq  24(%rsp),%r11           # RFLAGS
+         ALIGN
+ /* No special register assumptions. */
+ iret_exit_to_guest:
+@@ -298,12 +309,14 @@ 1:      movq  VCPU_domain(%rbx),%rdi
+         movb  %cl,TRAPBOUNCE_flags(%rdx)
+         testb $1,DOMAIN_is_32bit_pv(%rdi)
+         jnz   compat_sysenter
+-        call  create_bounce_frame
+-        jmp   test_all_events
++        jmp   .Lbounce_exception
+ 2:      movl  %eax,TRAPBOUNCE_error_code(%rdx)
+         movq  VCPU_gp_fault_addr(%rbx),%rax
+-        movb  $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+         movl  $TRAP_gp_fault,UREGS_entry_vector(%rsp)
++        subq  $2,UREGS_rip(%rsp)
++        testb $4,VCPU_gp_fault_flags(%rbx)
++        setnz %cl
++        leal  TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE(,%rcx,TBF_INTERRUPT),%ecx
+         jmp   1b
+ 
+ ENTRY(int80_direct_trap)
+@@ -490,6 +503,7 @@ 1:      movq  %rsp,%rdi
+         jnz   compat_post_handle_exception
+         testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
+         jz    test_all_events
++.Lbounce_exception:
+         call  create_bounce_frame
+         movb  $0,TRAPBOUNCE_flags(%rdx)
+         jmp   test_all_events
diff -Nru xen-4.0.1/debian/patches/CVE-2012-2934 xen-4.0.1/debian/patches/CVE-2012-2934
--- xen-4.0.1/debian/patches/CVE-2012-2934	1970-01-01 01:00:00.000000000 +0100
+++ xen-4.0.1/debian/patches/CVE-2012-2934	2012-06-14 20:24:30.000000000 +0200
@@ -0,0 +1,46 @@
+x86-64: detect processors subject to AMD erratum #121 and refuse to boot
+
+Processors with this erratum are subject to a DoS attack by unprivileged
+guest users.
+
+This is XSA-9 / CVE-2006-0744.
+
+Signed-off-by: Jan Beulich <JBeulich@suse.com>
+Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
+
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -41,6 +41,9 @@ void start_svm(struct cpuinfo_x86 *c);
+ integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
+ integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
+ 
++static int opt_allow_unsafe;
++boolean_param("allow_unsafe", opt_allow_unsafe);
++
+ static inline void wrmsr_amd(unsigned int index, unsigned int lo, 
+ 		unsigned int hi)
+ {
+@@ -640,6 +643,11 @@ static void __devinit init_amd(struct cp
+ 		clear_bit(X86_FEATURE_MCE, c->x86_capability);
+ 
+ #ifdef __x86_64__
++	if (cpu_has_amd_erratum(c, AMD_ERRATUM_121) && !opt_allow_unsafe)
++		panic("Xen will not boot on this CPU for security reasons.\n"
++		      "Pass \"allow_unsafe\" if you're trusting all your"
++		      " (PV) guest kernels.\n");
++
+ 	/* AMD CPUs do not support SYSENTER outside of legacy mode. */
+ 	clear_bit(X86_FEATURE_SEP, c->x86_capability);
+ #endif
+--- a/xen/include/asm-x86/amd.h
++++ b/xen/include/asm-x86/amd.h
+@@ -127,6 +127,9 @@
+ #define AMD_MODEL_RANGE_START(range)    (((range) >> 12) & 0xfff)
+ #define AMD_MODEL_RANGE_END(range)      ((range) & 0xfff)
+ 
++#define AMD_ERRATUM_121                                                 \
++    AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x3f, 0xf))
++
+ #define AMD_ERRATUM_170                                                 \
+     AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x67, 0xf))
+ 
diff -Nru xen-4.0.1/debian/patches/series xen-4.0.1/debian/patches/series
--- xen-4.0.1/debian/patches/series	2011-06-09 20:35:07.000000000 +0200
+++ xen-4.0.1/debian/patches/series	2012-06-14 20:26:44.000000000 +0200
@@ -71,3 +71,6 @@
 upstream-21461:ee088a0b5cb8-CVE-2011-1166
 upstream-21482:c2adc059e931-CVE-2011-1583
 upstream-21485:b85a9e58ec3a-CVE-2011-1898
+upstream-22375:426f3a265784
+CVE-2012-0217+2012-0218
+CVE-2012-2934
diff -Nru xen-4.0.1/debian/patches/upstream-22375:426f3a265784 xen-4.0.1/debian/patches/upstream-22375:426f3a265784
--- xen-4.0.1/debian/patches/upstream-22375:426f3a265784	1970-01-01 01:00:00.000000000 +0100
+++ xen-4.0.1/debian/patches/upstream-22375:426f3a265784	2012-06-14 20:26:16.000000000 +0200
@@ -0,0 +1,1094 @@
+# HG changeset patch
+# User Keir Fraser <keir@xen.org>
+# Date 1289303389 0
+# Node ID 426f3a2657844cec77ce0043b0408b0887fafa41
+# Parent  9997a1418633c92286189b33f701ecbac2a98ccd
+x86: do away with the boot time low-memory 1:1 mapping
+
+By doing so, we're no longer restricted to be able to place all boot
+loader modules into the low 1Gb/4Gb (32-/64-bit) of memory, nor is
+there a dependency anymore on where the boot loader places the
+modules.
+
+We're also no longer restricted to copy the modules into a place below
+4Gb, nor to put them all together into a single piece of memory.
+
+Further it allows even the 32-bit Dom0 kernel to be loaded anywhere in
+physical memory (except if it doesn't support PAE-above-4G).
+
+Signed-off-by: Jan Beulich <jbeulich@novell.com>
+
+Index: xen-4.0.1/xen/arch/x86/boot/Makefile
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/boot/Makefile	2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/boot/Makefile	2011-11-25 16:24:33.000000000 +0100
+@@ -4,6 +4,6 @@
+ 
+ BOOT_TRAMPOLINE := $(shell sed -n 's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' $(BASEDIR)/include/asm-x86/config.h)
+ %.S: %.c
+-	RELOC=$(BOOT_TRAMPOLINE) XEN_BITSPERLONG=$(patsubst x86_%,%,$(TARGET_SUBARCH)) $(MAKE) -f build32.mk $@
++	RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@
+ 
+ reloc.S: $(BASEDIR)/include/asm-x86/config.h
+Index: xen-4.0.1/xen/arch/x86/boot/build32.mk
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/boot/build32.mk	2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/boot/build32.mk	2011-11-25 16:24:33.000000000 +0100
+@@ -19,6 +19,6 @@
+ 	$(LD) $(LDFLAGS_DIRECT) -N -Ttext $(RELOC) -o $@ $<
+ 
+ %.o: %.c
+-	$(CC) $(CFLAGS) -DXEN_BITSPERLONG=$(XEN_BITSPERLONG) -c $< -o $@
++	$(CC) $(CFLAGS) -c $< -o $@
+ 
+ reloc.o: $(BASEDIR)/include/asm-x86/config.h
+Index: xen-4.0.1/xen/arch/x86/boot/head.S
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/boot/head.S	2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/boot/head.S	2011-11-25 16:24:33.000000000 +0100
+@@ -110,12 +110,15 @@
+         /* Initialise L2 identity-map and xen page table entries (16MB). */
+         mov     $sym_phys(l2_identmap),%edi
+         mov     $sym_phys(l2_xenmap),%esi
++        mov     $sym_phys(l2_bootmap),%edx
+         mov     $0x1e3,%eax                  /* PRESENT+RW+A+D+2MB+GLOBAL */
+         mov     $8,%ecx
+ 1:      mov     %eax,(%edi)
+         add     $8,%edi
+         mov     %eax,(%esi)
+         add     $8,%esi
++        mov     %eax,(%edx)
++        add     $8,%edx
+         add     $(1<<L2_PAGETABLE_SHIFT),%eax
+         loop    1b
+         /* Initialise L3 identity-map page directory entries. */
+@@ -129,9 +132,13 @@
+         /* Initialise L3 xen-map page directory entry. */
+         mov     $(sym_phys(l2_xenmap)+7),%eax
+         mov     %eax,sym_phys(l3_xenmap) + l3_table_offset(XEN_VIRT_START)*8
+-        /* Hook identity-map and xen-map L3 tables into PML4. */
++        /* Initialise L3 boot-map page directory entry. */
++        mov     $(sym_phys(l2_bootmap)+7),%eax
++        mov     %eax,sym_phys(l3_bootmap) + 0*8
++        /* Hook identity-map, xen-map, and boot-map L3 tables into PML4. */
++        mov     $(sym_phys(l3_bootmap)+7),%eax
++        mov     %eax,sym_phys(idle_pg_table) + 0*8
+         mov     $(sym_phys(l3_identmap)+7),%eax
+-        mov     %eax,sym_phys(idle_pg_table) + (  0*8) /* PML4[  0]: 1:1 map */
+         mov     %eax,sym_phys(idle_pg_table) + l4_table_offset(DIRECTMAP_VIRT_START)*8
+         mov     $(sym_phys(l3_xenmap)+7),%eax
+         mov     %eax,sym_phys(idle_pg_table) + l4_table_offset(XEN_VIRT_START)*8
+@@ -176,6 +183,7 @@
+ #if defined(__x86_64__)
+         mov     %edi,sym_phys(l2_identmap)
+         mov     %edi,sym_phys(l2_xenmap)
++        mov     %edi,sym_phys(l2_bootmap)
+ #else
+         mov     %edi,sym_phys(idle_pg_table_l2)
+         mov     %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18)
+Index: xen-4.0.1/xen/arch/x86/boot/reloc.c
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/boot/reloc.c	2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/boot/reloc.c	2011-11-25 16:24:33.000000000 +0100
+@@ -68,7 +68,6 @@
+     {
+         module_t *mods = reloc_mbi_struct(
+             (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
+-        u32 max_addr = 0;
+ 
+         mbi->mods_addr = (u32)mods;
+ 
+@@ -76,29 +75,6 @@
+         {
+             if ( mods[i].string )
+                 mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
+-            if ( mods[i].mod_end > max_addr )
+-                max_addr = mods[i].mod_end;
+-        }
+-
+-        /*
+-         * 32-bit Xen only maps bottom 1GB of memory at boot time. Relocate 
+-         * modules which extend beyond this (GRUB2 in particular likes to 
+-         * place modules as high as possible below 4GB).
+-         */
+-#define BOOTMAP_END (1ul<<30) /* 1GB */
+-        if ( (XEN_BITSPERLONG == 32) && (max_addr > BOOTMAP_END) )
+-        {
+-            char *mod_alloc = (char *)BOOTMAP_END;
+-            for ( i = 0; i < mbi->mods_count; i++ )
+-                mod_alloc -= mods[i].mod_end - mods[i].mod_start;
+-            for ( i = 0; i < mbi->mods_count; i++ )
+-            {
+-                u32 mod_len = mods[i].mod_end - mods[i].mod_start;
+-                mods[i].mod_start = (u32)memcpy(
+-                    mod_alloc, (char *)mods[i].mod_start, mod_len);
+-                mods[i].mod_end = mods[i].mod_start + mod_len;
+-                mod_alloc += mod_len;
+-            }
+         }
+     }
+ 
+Index: xen-4.0.1/xen/arch/x86/domain_build.c
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/domain_build.c	2011-11-25 15:50:01.000000000 +0100
++++ xen-4.0.1/xen/arch/x86/domain_build.c	2011-11-25 16:24:33.000000000 +0100
+@@ -30,6 +30,7 @@
+ #include <asm/p2m.h>
+ #include <asm/e820.h>
+ #include <asm/acpi.h>
++#include <asm/setup.h>
+ #include <asm/bzimage.h> /* for bzimage_parse */
+ 
+ #include <public/version.h>
+@@ -291,9 +292,9 @@
+ 
+ int __init construct_dom0(
+     struct domain *d,
+-    unsigned long _image_base,
+-    unsigned long _image_start, unsigned long image_len,
+-    unsigned long _initrd_start, unsigned long initrd_len,
++    const module_t *image, unsigned long image_headroom,
++    const module_t *initrd,
++    void *(*bootstrap_map)(const module_t *),
+     char *cmdline)
+ {
+     int i, rc, compatible, compat32, order, machine;
+@@ -308,16 +309,14 @@
+     start_info_t *si;
+     struct vcpu *v = d->vcpu[0];
+     unsigned long long value;
+-#if defined(__i386__)
+-    char *image_base   = (char *)_image_base;   /* use lowmem mappings */
+-    char *image_start  = (char *)_image_start;  /* use lowmem mappings */
+-    char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
+-#elif defined(__x86_64__)
+-    char *image_base   = __va(_image_base);
+-    char *image_start  = __va(_image_start);
+-    char *initrd_start = __va(_initrd_start);
+-#endif
+-#if CONFIG_PAGING_LEVELS >= 4
++    char *image_base = bootstrap_map(image);
++    unsigned long image_len = image->mod_end;
++    char *image_start = image_base + image_headroom;
++    unsigned long initrd_len = initrd ? initrd->mod_end : 0;
++#if CONFIG_PAGING_LEVELS < 4
++    module_t mpt;
++    void *mpt_ptr;
++#else
+     l4_pgentry_t *l4tab = NULL, *l4start = NULL;
+ #endif
+     l3_pgentry_t *l3tab = NULL, *l3start = NULL;
+@@ -347,7 +346,7 @@
+     unsigned long v_end;
+ 
+     /* Machine address of next candidate page-table page. */
+-    unsigned long mpt_alloc;
++    paddr_t mpt_alloc;
+ 
+     /* Sanity! */
+     BUG_ON(d->domain_id != 0);
+@@ -502,17 +501,17 @@
+     if ( (1UL << order) > nr_pages )
+         panic("Domain 0 allocation is too small for kernel image.\n");
+ 
+-#ifdef __i386__
+-    /* Ensure that our low-memory 1:1 mapping covers the allocation. */
+-    page = alloc_domheap_pages(d, order, MEMF_bits(30));
+-#else
+     if ( parms.p2m_base != UNSET_ADDR )
+     {
+         vphysmap_start = parms.p2m_base;
+         vphysmap_end   = vphysmap_start + nr_pages * sizeof(unsigned long);
+     }
+-    page = alloc_domheap_pages(d, order, 0);
++#ifdef __i386__
++    if ( !test_bit(XENFEAT_pae_pgdir_above_4gb, parms.f_supported) )
++        page = alloc_domheap_pages(d, order, MEMF_bits(32));
++    else
+ #endif
++        page = alloc_domheap_pages(d, order, 0);
+     if ( page == NULL )
+         panic("Not enough RAM for domain 0 allocation.\n");
+     alloc_spfn = page_to_mfn(page);
+@@ -541,8 +540,7 @@
+            _p(v_start), _p(v_end));
+     printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
+ 
+-    mpt_alloc = (vpt_start - v_start) +
+-        (unsigned long)pfn_to_paddr(alloc_spfn);
++    mpt_alloc = (vpt_start - v_start) + pfn_to_paddr(alloc_spfn);
+ 
+ #if defined(__i386__)
+     /*
+@@ -555,17 +553,25 @@
+         return -EINVAL;
+     }
+ 
++    mpt.mod_start = mpt_alloc >> PAGE_SHIFT;
++    mpt.mod_end   = vpt_end - vpt_start;
++    mpt_ptr = bootstrap_map(&mpt);
++#define MPT_ALLOC(n) (mpt_ptr += (n)*PAGE_SIZE, mpt_alloc += (n)*PAGE_SIZE)
++
+     /* WARNING: The new domain must have its 'processor' field filled in! */
+-    l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+-    l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
++    l3start = l3tab = mpt_ptr; MPT_ALLOC(1);
++    l2start = l2tab = mpt_ptr; MPT_ALLOC(4);
+     for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) {
+-        copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
+-                  idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
+-        l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT);
++        if ( i < 3 )
++            clear_page(l2tab + i * L2_PAGETABLE_ENTRIES);
++        else
++            copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
++                      idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
++        l3tab[i] = l3e_from_pfn(mpt.mod_start + 1 + i, L3_PROT);
+         l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
+-            l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
++            l2e_from_pfn(mpt.mod_start + 1 + i, __PAGE_HYPERVISOR);
+     }
+-    v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
++    v->arch.guest_table = pagetable_from_pfn(mpt.mod_start);
+ 
+     for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+         l2tab[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
+@@ -577,9 +583,9 @@
+     {
+         if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+         {
+-            l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
+-            mpt_alloc += PAGE_SIZE;
+-            *l2tab = l2e_from_paddr((unsigned long)l1start, L2_PROT);
++            l1tab = mpt_ptr;
++            *l2tab = l2e_from_paddr(mpt_alloc, L2_PROT);
++            MPT_ALLOC(1);
+             l2tab++;
+             clear_page(l1tab);
+             if ( count == 0 )
+@@ -594,11 +600,14 @@
+ 
+         mfn++;
+     }
++#undef MPT_ALLOC
+ 
+     /* Pages that are part of page tables must be read only. */
++    mpt_alloc = (paddr_t)mpt.mod_start << PAGE_SHIFT;
++    mpt_ptr = l3start;
+     l2tab = l2start + l2_linear_offset(vpt_start);
+-    l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*l2tab);
+-    l1tab += l1_table_offset(vpt_start);
++    l1start = mpt_ptr + (l2e_get_paddr(*l2tab) - mpt_alloc);
++    l1tab = l1start + l1_table_offset(vpt_start);
+     for ( count = 0; count < nr_pt_pages; count++ ) 
+     {
+         page = mfn_to_page(l1e_get_pfn(*l1tab));
+@@ -634,9 +643,15 @@
+             break;
+         }
+         if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
+-            l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*++l2tab);
++            l1tab = mpt_ptr + (l2e_get_paddr(*++l2tab) - mpt_alloc);
+     }
+ 
++    /*
++     * Put Xen's first L3 entry into Dom0's page tables so that updates
++     * through bootstrap_map() will affect the page tables we will run on.
++     */
++    l3start[0] = l3e_from_paddr(__pa(idle_pg_table_l2), L3_PROT);
++
+ #elif defined(__x86_64__)
+ 
+     /* Overlap with Xen protected area? */
+@@ -810,6 +825,7 @@
+     /* Copy the OS image and free temporary buffer. */
+     elf.dest = (void*)vkern_start;
+     elf_load_binary(&elf);
++    bootstrap_map(NULL);
+ 
+     if ( UNSET_ADDR != parms.virt_hypercall )
+     {
+@@ -826,7 +842,12 @@
+ 
+     /* Copy the initial ramdisk. */
+     if ( initrd_len != 0 )
++    {
++        char *initrd_start = bootstrap_map(initrd);
++
+         memcpy((void *)vinitrd_start, initrd_start, initrd_len);
++        bootstrap_map(NULL);
++    }
+ 
+     /* Free temporary buffers. */
+     discard_initial_images();
+@@ -1034,7 +1055,22 @@
+     write_ptbase(current);
+ 
+ #if defined(__i386__)
+-    /* Destroy low mappings - they were only for our convenience. */
++    /* Restore Dom0's first L3 entry. */
++    mpt.mod_end = 5 * PAGE_SIZE;
++    l3start = mpt_ptr = bootstrap_map(&mpt);
++    l2start = mpt_ptr + PAGE_SIZE;
++    l3start[0] = l3e_from_pfn(mpt.mod_start + 1, L3_PROT);
++
++    /* Re-setup CR3  */
++    if ( paging_mode_enabled(d) )
++        paging_update_paging_modes(v);
++    else
++        update_cr3(v);
++
++    /*
++     * Destroy low mappings - they were only for our convenience. Note
++     * that zap_low_mappings() exceeds what bootstrap_map(NULL) would do.
++     */
+     zap_low_mappings(l2start);
+ #endif
+ 
+Index: xen-4.0.1/xen/arch/x86/setup.c
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/setup.c	2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/setup.c	2011-11-25 16:24:33.000000000 +0100
+@@ -43,14 +43,6 @@
+ #include <asm/mach-generic/mach_apic.h> /* for generic_apic_probe */
+ #include <asm/setup.h>
+ 
+-#if defined(CONFIG_X86_64)
+-#define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
+-#define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
+-#else
+-#define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
+-#define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
+-#endif
+-
+ extern u16 boot_edid_caps;
+ extern u8 boot_edid_info[128];
+ extern struct boot_video_info boot_vid_info;
+@@ -167,21 +159,34 @@
+     for ( ; ; ) halt();                         \
+ } while (0)
+ 
+-static unsigned long __initdata initial_images_base;
+-static unsigned long __initdata initial_images_start;
+-static unsigned long __initdata initial_images_end;
++static const module_t *__initdata initial_images;
++static unsigned int __initdata nr_initial_images;
+ 
+ unsigned long __init initial_images_nrpages(void)
+ {
+-    ASSERT(!(initial_images_base & ~PAGE_MASK));
+-    ASSERT(!(initial_images_end   & ~PAGE_MASK));
+-    return ((initial_images_end >> PAGE_SHIFT) -
+-            (initial_images_base >> PAGE_SHIFT));
++    unsigned long nr;
++    unsigned int i;
++
++    for ( nr = i = 0; i < nr_initial_images; ++i )
++        nr += PFN_UP(initial_images[i].mod_end);
++
++    return nr;
+ }
+ 
+ void __init discard_initial_images(void)
+ {
+-    init_domheap_pages(initial_images_base, initial_images_end);
++    unsigned int i;
++
++    for ( i = 0; i < nr_initial_images; ++i )
++    {
++        uint64_t start = (uint64_t)initial_images[i].mod_start << PAGE_SHIFT;
++
++        init_domheap_pages(start,
++                           start + PAGE_ALIGN(initial_images[i].mod_end));
++    }
++
++    nr_initial_images = 0;
++    initial_images = NULL;
+ }
+ 
+ static void free_xen_data(char *s, char *e)
+@@ -273,33 +278,128 @@
+         printk("CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
+ }
+ 
++#define BOOTSTRAP_MAP_BASE  (16UL << 20)
++#define BOOTSTRAP_MAP_LIMIT (1UL << L3_PAGETABLE_SHIFT)
++
+ /*
+  * Ensure a given physical memory range is present in the bootstrap mappings.
+  * Use superpage mappings to ensure that pagetable memory needn't be allocated.
+  */
+-static void __init bootstrap_map(unsigned long start, unsigned long end)
++static void *__init bootstrap_map(const module_t *mod)
+ {
+-    unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+-    start = max_t(unsigned long, start & ~mask, 16UL << 20);
+-    end   = (end + mask) & ~mask;
++    static unsigned long __initdata map_cur = BOOTSTRAP_MAP_BASE;
++    uint64_t start, end, mask = (1L << L2_PAGETABLE_SHIFT) - 1;
++    void *ret;
++
++#ifdef __x86_64__
++    if ( !early_boot )
++        return mod ? mfn_to_virt(mod->mod_start) : NULL;
++#endif
++
++    if ( !mod )
++    {
++        destroy_xen_mappings(BOOTSTRAP_MAP_BASE, BOOTSTRAP_MAP_LIMIT);
++        map_cur = BOOTSTRAP_MAP_BASE;
++        return NULL;
++    }
++
++    start = (uint64_t)mod->mod_start << PAGE_SHIFT;
++    end = start + mod->mod_end;
+     if ( start >= end )
+-        return;
+-    if ( end > BOOTSTRAP_DIRECTMAP_END )
+-        panic("Cannot access memory beyond end of "
+-              "bootstrap direct-map area\n");
+-    map_pages_to_xen(
+-        (unsigned long)maddr_to_bootstrap_virt(start),
+-        start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++        return NULL;
++
++    if ( end <= BOOTSTRAP_MAP_BASE )
++        return (void *)(unsigned long)start;
++
++    ret = (void *)(map_cur + (unsigned long)(start & mask));
++    start &= ~mask;
++    end = (end + mask) & ~mask;
++    if ( end - start > BOOTSTRAP_MAP_LIMIT - map_cur )
++        return NULL;
++
++    map_pages_to_xen(map_cur, start >> PAGE_SHIFT,
++                     (end - start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++    map_cur += end - start;
++    return ret;
+ }
+ 
+-static void __init move_memory(
+-    unsigned long dst, unsigned long src_start, unsigned long src_end)
++static void *__init move_memory(
++    uint64_t dst, uint64_t src, unsigned int size, bool_t keep)
+ {
+-    bootstrap_map(src_start, src_end);
+-    bootstrap_map(dst, dst + src_end - src_start);
+-    memmove(maddr_to_bootstrap_virt(dst),
+-            maddr_to_bootstrap_virt(src_start),
+-            src_end - src_start);
++    unsigned int blksz = BOOTSTRAP_MAP_LIMIT - BOOTSTRAP_MAP_BASE;
++    unsigned int mask = (1L << L2_PAGETABLE_SHIFT) - 1;
++
++    if ( src + size > BOOTSTRAP_MAP_BASE )
++        blksz >>= 1;
++
++    while ( size )
++    {
++        module_t mod;
++        unsigned int soffs = src & mask;
++        unsigned int doffs = dst & mask;
++        unsigned int sz;
++        void *d, *s;
++
++        mod.mod_start = (src - soffs) >> PAGE_SHIFT;
++        mod.mod_end = soffs + size;
++        if ( mod.mod_end > blksz )
++            mod.mod_end = blksz;
++        sz = mod.mod_end - soffs;
++        s = bootstrap_map(&mod);
++
++        mod.mod_start = (dst - doffs) >> PAGE_SHIFT;
++        mod.mod_end = doffs + size;
++        if ( mod.mod_end > blksz )
++            mod.mod_end = blksz;
++        if ( sz > mod.mod_end - doffs )
++            sz = mod.mod_end - doffs;
++        d = bootstrap_map(&mod);
++
++        memmove(d + doffs, s + soffs, sz);
++
++        dst += sz;
++        src += sz;
++        size -= sz;
++
++        if ( keep )
++            return size ? NULL : d + doffs;
++
++        bootstrap_map(NULL);
++    }
++
++    return NULL;
++}
++
++static uint64_t __init consider_modules(
++    uint64_t s, uint64_t e, uint32_t size, const module_t *mod,
++    unsigned int nr_mods, unsigned int this_mod)
++{
++    unsigned int i;
++
++    if ( s > e || e - s < size )
++        return 0;
++
++    for ( i = 0; i < nr_mods ; ++i )
++    {
++        uint64_t start = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
++        uint64_t end = start + PAGE_ALIGN(mod[i].mod_end);
++
++        if ( i == this_mod )
++            continue;
++
++        if ( s < end && start < e )
++        {
++            end = consider_modules(end, e, size, mod + i + 1,
++                                   nr_mods - i - 1, this_mod - i - 1);
++            if ( end )
++                return end;
++
++            return consider_modules(s, start, size, mod + i + 1,
++                                    nr_mods - i - 1, this_mod - i - 1);
++        }
++    }
++
++    return e;
+ }
+ 
+ static void __init setup_max_pdx(void)
+@@ -463,11 +563,10 @@
+ {
+     char *memmap_type = NULL;
+     char *cmdline, *kextra, *loader;
+-    unsigned long _initrd_start = 0, _initrd_len = 0;
+     unsigned int initrdidx = 1;
+     multiboot_info_t *mbi = __va(mbi_p);
+     module_t *mod = (module_t *)__va(mbi->mods_addr);
+-    unsigned long nr_pages, modules_length, modules_headroom;
++    unsigned long nr_pages, modules_headroom;
+     int i, j, e820_warn = 0, bytes = 0;
+     bool_t acpi_boot_table_init_done = 0;
+     struct ns16550_defaults ns16550 = {
+@@ -666,6 +765,9 @@
+     /* Early kexec reservation (explicit static start address). */
+     kexec_reserve_area(&boot_e820);
+ 
++    initial_images = mod;
++    nr_initial_images = mbi->mods_count;
++
+     /*
+      * Iterate backwards over all superpage-aligned RAM regions.
+      * 
+@@ -679,48 +781,64 @@
+      * we can relocate the dom0 kernel and other multiboot modules. Also, on
+      * x86/64, we relocate Xen to higher memory.
+      */
+-    modules_length = 0;
+     for ( i = 0; i < mbi->mods_count; i++ )
+-        modules_length += mod[i].mod_end - mod[i].mod_start;
++    {
++        if ( mod[i].mod_start & (PAGE_SIZE - 1) )
++            EARLY_FAIL("Bootloader didn't honor module alignment request.\n");
++        mod[i].mod_end -= mod[i].mod_start;
++        mod[i].mod_start >>= PAGE_SHIFT;
++        mod[i].reserved = 0;
++    }
+ 
+-    /* ensure mod[0] is mapped before parsing */
+-    bootstrap_map(mod[0].mod_start, mod[0].mod_end);
+-    modules_headroom = bzimage_headroom(
+-                      (char *)(unsigned long)mod[0].mod_start,
+-                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
++    modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
++    bootstrap_map(NULL);
+ 
+     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
+     {
+         uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
++        uint64_t end, limit = ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT;
+ 
+-        /* Superpage-aligned chunks from 16MB to BOOTSTRAP_DIRECTMAP_END. */
++        /* Superpage-aligned chunks from BOOTSTRAP_MAP_BASE. */
+         s = (boot_e820.map[i].addr + mask) & ~mask;
+         e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+-        s = max_t(uint64_t, s, 16 << 20);
+-        e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
++        s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
+         if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
+             continue;
+ 
+-        set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
+-
+-        /* Map the chunk. No memory will need to be allocated to do this. */
+-        map_pages_to_xen(
+-            (unsigned long)maddr_to_bootstrap_virt(s),
+-            s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++        if ( s < limit )
++        {
++            end = min(e, limit);
++            set_pdx_range(s >> PAGE_SHIFT, end >> PAGE_SHIFT);
++#ifdef CONFIG_X86_64
++            map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
++                             (end - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++#endif
++        }
+ 
+ #if defined(CONFIG_X86_64)
++        e = min_t(uint64_t, e, 1ULL << (PAGE_SHIFT + 32));
+ #define reloc_size ((__pa(&_end) + mask) & ~mask)
+         /* Is the region suitable for relocating Xen? */
+-        if ( !xen_phys_start && ((e-s) >= reloc_size) )
++        if ( !xen_phys_start && e <= limit )
++        {
++            /* Don't overlap with modules. */
++            end = consider_modules(s, e, reloc_size + mask,
++                                   mod, mbi->mods_count, -1);
++            end &= ~mask;
++        }
++        else
++            end = 0;
++        if ( end > s )
+         {
+             extern l2_pgentry_t l2_xenmap[];
+             l4_pgentry_t *pl4e;
+             l3_pgentry_t *pl3e;
+             l2_pgentry_t *pl2e;
+             int i, j, k;
++            void *dst;
+ 
+             /* Select relocation address. */
+-            e -= reloc_size;
++            e = end - reloc_size;
+             xen_phys_start = e;
+             bootsym(trampoline_xen_phys_start) = e;
+ 
+@@ -731,10 +849,10 @@
+              * data until after we have switched to the relocated pagetables!
+              */
+             barrier();
+-            move_memory(e, 0, __pa(&_end) - xen_phys_start);
++            dst = move_memory(e, 0, (unsigned long)&_end - XEN_VIRT_START, 1);
+ 
+             /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
+-            memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
++            memset(dst, 0x55, 1U << 20);
+ 
+             /* Walk initial pagetables, relocating page directory entries. */
+             pl4e = __va(__pa(idle_pg_table));
+@@ -791,38 +909,58 @@
+                 "movq %%rsi,%%cr4   " /* CR4.PGE == 1 */
+                 : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
+                 "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
++
++            bootstrap_map(NULL);
+         }
+ #endif
+ 
+         /* Is the region suitable for relocating the multiboot modules? */
+-        if ( !initial_images_start && (s < e) &&
+-             ((e-s) >= (modules_length+modules_headroom)) )
++        for ( j = mbi->mods_count - 1; j >= 0; j-- )
+         {
+-            initial_images_end = e;
+-            initial_images_start = initial_images_end - modules_length;
+-            initial_images_base = initial_images_start - modules_headroom;
+-            initial_images_base &= PAGE_MASK;
+-            for ( j = mbi->mods_count-1; j >= 0; j-- )
++            unsigned long headroom = j ? 0 : modules_headroom;
++            unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end);
++
++            if ( mod[j].reserved )
++                continue;
++
++            /* Don't overlap with other modules. */
++            end = consider_modules(s, e, size, mod, mbi->mods_count, j);
++
++            if ( s < end &&
++                 (headroom ||
++                  ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
+             {
+-                e -= mod[j].mod_end - mod[j].mod_start;
+-                move_memory(e, mod[j].mod_start, mod[j].mod_end);
+-                mod[j].mod_end += e - mod[j].mod_start;
+-                mod[j].mod_start = e;
++                move_memory(end - size + headroom,
++                            (uint64_t)mod[j].mod_start << PAGE_SHIFT,
++                            mod[j].mod_end, 0);
++                mod[j].mod_start = (end - size) >> PAGE_SHIFT;
++                mod[j].mod_end += headroom;
++                mod[j].reserved = 1;
+             }
+-            e = initial_images_base;
+         }
+ 
+-        if ( !kexec_crash_area.start && (s < e) &&
+-             ((e-s) >= kexec_crash_area.size) )
++#ifdef CONFIG_X86_32
++        /* Confine the kexec area to below 4Gb. */
++        e = min_t(uint64_t, e, 1ULL << 32);
++#endif
++        /* Don't overlap with modules. */
++        e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size),
++                             mod, mbi->mods_count, -1);
++        if ( !kexec_crash_area.start && (s < e) )
+         {
+             e = (e - kexec_crash_area.size) & PAGE_MASK;
+             kexec_crash_area.start = e;
+         }
+     }
+ 
+-    if ( !initial_images_start )
++    if ( modules_headroom && !mod->reserved )
+         EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
+-    reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
++    for ( i = 0; i < mbi->mods_count; ++i )
++    {
++        uint64_t s = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
++
++        reserve_e820_ram(&boot_e820, s, s + PAGE_ALIGN(mod[i].mod_end));
++    }
+ 
+ #if defined(CONFIG_X86_32)
+     xenheap_initial_phys_start = (PFN_UP(__pa(&_end)) + 1) << PAGE_SHIFT;
+@@ -846,7 +984,10 @@
+      */
+     for ( i = 0; i < boot_e820.nr_map; i++ )
+     {
+-        uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1;
++        uint64_t s, e, mask = PAGE_SIZE - 1;
++#ifdef CONFIG_X86_64
++        uint64_t map_s, map_e;
++#endif
+ 
+         /* Only page alignment required now. */
+         s = (boot_e820.map[i].addr + mask) & ~mask;
+@@ -861,7 +1002,7 @@
+ 
+ #ifdef __x86_64__
+         if ( !acpi_boot_table_init_done &&
+-             s >= BOOTSTRAP_DIRECTMAP_END &&
++             s >= (1ULL << 32) &&
+              !acpi_boot_table_init() )
+         {
+             acpi_boot_table_init_done = 1;
+@@ -900,26 +1041,60 @@
+ 
+         set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
+ 
+-        /* Need to create mappings above 16MB. */
+-        map_s = max_t(uint64_t, s, 16<<20);
+-        map_e = e;
+-#if defined(CONFIG_X86_32) /* mappings are truncated on x86_32 */
+-        map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
+-#endif
++#ifdef CONFIG_X86_64
++        /* Need to create mappings above BOOTSTRAP_MAP_BASE. */
++        map_s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
++        map_e = min_t(uint64_t, e,
++                      ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT);
+ 
+         /* Pass mapped memory to allocator /before/ creating new mappings. */
+-        init_boot_pages(s, min_t(uint64_t, map_s, e));
++        init_boot_pages(s, min(map_s, e));
++        s = map_s;
++        if ( s < map_e )
++        {
++            uint64_t mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
++
++            map_s = (s + mask) & ~mask;
++            map_e &= ~mask;
++            init_boot_pages(map_s, map_e);
++        }
++
++        if ( map_s > map_e )
++            map_s = map_e = s;
+ 
+         /* Create new mappings /before/ passing memory to the allocator. */
+-        if ( map_s < map_e )
+-            map_pages_to_xen(
+-                (unsigned long)maddr_to_bootstrap_virt(map_s),
+-                map_s >> PAGE_SHIFT, (map_e-map_s) >> PAGE_SHIFT,
+-                PAGE_HYPERVISOR);
++        if ( map_e < e )
++        {
++            map_pages_to_xen((unsigned long)__va(map_e), map_e >> PAGE_SHIFT,
++                             (e - map_e) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++            init_boot_pages(map_e, e);
++        }
++        if ( s < map_s )
++        {
++            map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
++                             (map_s - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++            init_boot_pages(s, map_s);
++        }
++#else
++        init_boot_pages(s, e);
++#endif
++    }
+ 
+-        /* Pass remainder of this memory chunk to the allocator. */
+-        init_boot_pages(map_s, e);
++    for ( i = 0; i < mbi->mods_count; ++i )
++    {
++        set_pdx_range(mod[i].mod_start,
++                      mod[i].mod_start + PFN_UP(mod[i].mod_end));
++#ifdef CONFIG_X86_64
++        map_pages_to_xen((unsigned long)mfn_to_virt(mod[i].mod_start),
++                         mod[i].mod_start,
++                         PFN_UP(mod[i].mod_end), PAGE_HYPERVISOR);
++#endif
+     }
++#ifdef CONFIG_X86_64
++    map_pages_to_xen((unsigned long)__va(kexec_crash_area.start),
++                     kexec_crash_area.start >> PAGE_SHIFT,
++                     PFN_UP(kexec_crash_area.size), PAGE_HYPERVISOR);
++#endif
+ 
+     memguard_init();
+ 
+@@ -1041,7 +1216,7 @@
+ 
+     init_IRQ();
+ 
+-    xsm_init(&initrdidx, mbi, initial_images_start);
++    xsm_init(&initrdidx, mbi, bootstrap_map);
+ 
+     init_idle_domain();
+ 
+@@ -1158,12 +1333,6 @@
+         cmdline = dom0_cmdline;
+     }
+ 
+-    if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
+-    {
+-        _initrd_start = mod[initrdidx].mod_start;
+-        _initrd_len   = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
+-    }
+-
+     if ( xen_cpuidle )
+         xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
+ 
+@@ -1171,13 +1340,10 @@
+      * We're going to setup domain0 using the module(s) that we stashed safely
+      * above our heap. The second module, if present, is an initrd ramdisk.
+      */
+-    if ( construct_dom0(dom0,
+-                        initial_images_base,
+-                        initial_images_start,
+-                        mod[0].mod_end-mod[0].mod_start,
+-                        _initrd_start,
+-                        _initrd_len,
+-                        cmdline) != 0)
++    if ( construct_dom0(dom0, mod, modules_headroom,
++                        (initrdidx > 0) && (initrdidx < mbi->mods_count)
++                        ? mod + initrdidx : NULL,
++                        bootstrap_map, cmdline) != 0)
+         panic("Could not set up DOM0 guest OS\n");
+ 
+     /* Scrub RAM that is still free and so may go to an unprivileged domain. */
+Index: xen-4.0.1/xen/arch/x86/x86_64/mm.c
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/x86_64/mm.c	2011-11-25 15:50:01.000000000 +0100
++++ xen-4.0.1/xen/arch/x86/x86_64/mm.c	2011-11-25 16:24:33.000000000 +0100
+@@ -65,6 +65,12 @@
+ l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+     l2_xenmap[L2_PAGETABLE_ENTRIES];
+ 
++/* Enough page directories to map into the bottom 1GB. */
++l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
++    l3_bootmap[L3_PAGETABLE_ENTRIES];
++l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
++    l2_bootmap[L2_PAGETABLE_ENTRIES];
++
+ int __mfn_valid(unsigned long mfn)
+ {
+     return likely(mfn < max_page) &&
+Index: xen-4.0.1/xen/include/asm-x86/domain.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/asm-x86/domain.h	2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/asm-x86/domain.h	2011-11-25 16:24:33.000000000 +0100
+@@ -483,16 +483,6 @@
+                   unsigned int  *ecx,
+                   unsigned int  *edx);
+ 
+-int construct_dom0(
+-    struct domain *d,
+-    unsigned long image_base,
+-    unsigned long image_start, unsigned long image_len,
+-    unsigned long initrd_start, unsigned long initrd_len,
+-    char *cmdline);
+-
+-extern unsigned long initial_images_nrpages(void);
+-extern void discard_initial_images(void);
+-
+ #endif /* __ASM_DOMAIN_H__ */
+ 
+ /*
+Index: xen-4.0.1/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h	2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h	2011-11-25 16:24:33.000000000 +0100
+@@ -30,7 +30,6 @@
+         &amd_iommu_head, list)
+ 
+ #define DMA_32BIT_MASK  0x00000000ffffffffULL
+-#define PAGE_ALIGN(addr)    (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+ 
+ extern int amd_iommu_debug;
+ extern int amd_iommu_perdev_intremap;
+Index: xen-4.0.1/xen/include/asm-x86/page.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/asm-x86/page.h	2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/asm-x86/page.h	2011-11-25 16:24:33.000000000 +0100
+@@ -292,6 +292,7 @@
+ extern l2_pgentry_t  *compat_idle_pg_table_l2;
+ extern unsigned int   m2p_compat_vstart;
+ #endif
++extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES];
+ void paging_init(void);
+ void setup_idle_pagetable(void);
+ #endif /* !defined(__ASSEMBLY__) */
+@@ -387,6 +388,7 @@
+ 
+ #define PFN_DOWN(x)   ((x) >> PAGE_SHIFT)
+ #define PFN_UP(x)     (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
++#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK)
+ 
+ #endif /* __X86_PAGE_H__ */
+ 
+Index: xen-4.0.1/xen/include/asm-x86/setup.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/asm-x86/setup.h	2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/asm-x86/setup.h	2011-11-25 16:24:33.000000000 +0100
+@@ -1,6 +1,8 @@
+ #ifndef __X86_SETUP_H_
+ #define __X86_SETUP_H_
+ 
++#include <xen/multiboot.h>
++
+ extern int early_boot;
+ extern unsigned long xenheap_initial_phys_start;
+ 
+@@ -26,4 +28,14 @@
+ void vesa_init(void);
+ void vesa_mtrr_init(void);
+ 
++int construct_dom0(
++    struct domain *d,
++    const module_t *kernel, unsigned long kernel_headroom,
++    const module_t *initrd,
++    void *(*bootstrap_map)(const module_t *),
++    char *cmdline);
++
++unsigned long initial_images_nrpages(void);
++void discard_initial_images(void);
++
+ #endif
+Index: xen-4.0.1/xen/include/xsm/xsm.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/xsm/xsm.h	2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/xsm/xsm.h	2011-11-25 16:24:33.000000000 +0100
+@@ -431,14 +431,15 @@
+ 
+ #ifdef XSM_ENABLE
+ extern int xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
+-                                          unsigned long initial_images_start);
++                    void *(*bootstrap_map)(const module_t *));
+ extern int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
+-                                           unsigned long initial_images_start);
++                           void *(*bootstrap_map)(const module_t *));
+ extern int register_xsm(struct xsm_operations *ops);
+ extern int unregister_xsm(struct xsm_operations *ops);
+ #else
+ static inline int xsm_init (unsigned int *initrdidx,
+-                const multiboot_info_t *mbi, unsigned long initial_images_start)
++                            const multiboot_info_t *mbi,
++                            void *(*bootstrap_map)(const module_t *))
+ {
+     return 0;
+ }
+Index: xen-4.0.1/xen/xsm/xsm_core.c
+===================================================================
+--- xen-4.0.1.orig/xen/xsm/xsm_core.c	2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/xsm/xsm_core.c	2011-11-25 16:24:33.000000000 +0100
+@@ -47,7 +47,7 @@
+ }
+ 
+ int __init xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
+-                    unsigned long initial_images_start)
++                    void *(*bootstrap_map)(const module_t *))
+ {
+     int ret = 0;
+ 
+@@ -55,9 +55,10 @@
+ 
+     if ( XSM_MAGIC )
+     {
+-        ret = xsm_policy_init(initrdidx, mbi, initial_images_start);
++        ret = xsm_policy_init(initrdidx, mbi, bootstrap_map);
+         if ( ret )
+         {
++            bootstrap_map(NULL);
+             printk("%s: Error initializing policy.\n", __FUNCTION__);
+             return -EINVAL;
+         }
+@@ -65,6 +66,7 @@
+ 
+     if ( verify(&dummy_xsm_ops) )
+     {
++        bootstrap_map(NULL);
+         printk("%s could not verify "
+                "dummy_xsm_ops structure.\n", __FUNCTION__);
+         return -EIO;
+@@ -72,6 +74,7 @@
+ 
+     xsm_ops = &dummy_xsm_ops;
+     do_xsm_initcalls();
++    bootstrap_map(NULL);
+ 
+     return 0;
+ }
+Index: xen-4.0.1/xen/xsm/xsm_policy.c
+===================================================================
+--- xen-4.0.1.orig/xen/xsm/xsm_policy.c	2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/xsm/xsm_policy.c	2011-11-25 16:24:33.000000000 +0100
+@@ -22,11 +22,11 @@
+ #include <xsm/xsm.h>
+ #include <xen/multiboot.h>
+ 
+-char *policy_buffer = NULL;
+-u32 policy_size = 0;
++char *__initdata policy_buffer = NULL;
++u32 __initdata policy_size = 0;
+ 
+ int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
+-                           unsigned long initial_images_start)
++                    void *(*bootstrap_map)(const module_t *))
+ {
+     int i;
+     module_t *mod = (module_t *)__va(mbi->mods_addr);
+@@ -40,15 +40,8 @@
+      */
+     for ( i = mbi->mods_count-1; i >= 1; i-- )
+     {
+-        start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
+-#if defined(__i386__)
+-        _policy_start = (u32 *)start;
+-#elif defined(__x86_64__)
+-        _policy_start = maddr_to_virt(start);
+-#else
+-        _policy_start = NULL;
+-#endif
+-        _policy_len   = mod[i].mod_end - mod[i].mod_start;
++        _policy_start = bootstrap_map(mod + i);
++        _policy_len   = mod[i].mod_end;
+ 
+         if ( (xsm_magic_t)(*_policy_start) == XSM_MAGIC )
+         {
+@@ -63,6 +56,8 @@
+             break;
+ 
+         }
++
++        bootstrap_map(NULL);
+     }
+ 
+     return rc;
diff -Nru xen-4.0.1/debian/rules.real xen-4.0.1/debian/rules.real
--- xen-4.0.1/debian/rules.real	2010-08-02 15:10:13.000000000 +0200
+++ xen-4.0.1/debian/rules.real	2012-06-14 20:24:30.000000000 +0200
@@ -112,6 +112,7 @@
 
 install-hypervisor_$(ARCH)_$(FLAVOUR): DIR=$(BUILD_DIR)/build-hypervisor_$(ARCH)_$(FLAVOUR)
 install-hypervisor_$(ARCH)_$(FLAVOUR): PACKAGE_NAME = xen-hypervisor-$(VERSION)-$(FLAVOUR)
+install-hypervisor_$(ARCH)_$(FLAVOUR): PACKAGE_DIR = debian/$(PACKAGE_NAME)
 install-hypervisor_$(ARCH)_$(FLAVOUR): DH_OPTIONS = -p$(PACKAGE_NAME)
 install-hypervisor_$(ARCH)_$(FLAVOUR): $(STAMPS_DIR)/build-hypervisor_$(ARCH)_$(FLAVOUR)
 	dh_testdir
@@ -119,6 +120,7 @@
 	dh_prep
 	dh_installdirs boot
 	cp $(DIR)/xen/xen.gz debian/$(PACKAGE_NAME)/boot/xen-$(VERSION)-$(FLAVOUR).gz
+	install -D -m644 debian/xen-hypervisor.NEWS $(PACKAGE_DIR)/usr/share/doc/$(PACKAGE_NAME)/NEWS
 	+$(MAKE_SELF) install-base
 
 install-lib-dev_$(ARCH): DIR = $(BUILD_DIR)/install-utils_$(ARCH)
diff -Nru xen-4.0.1/debian/xen-hypervisor.NEWS xen-4.0.1/debian/xen-hypervisor.NEWS
--- xen-4.0.1/debian/xen-hypervisor.NEWS	1970-01-01 01:00:00.000000000 +0100
+++ xen-4.0.1/debian/xen-hypervisor.NEWS	2012-06-14 20:24:30.000000000 +0200
@@ -0,0 +1,15 @@
+xen-3.0 (4.0.1-5) stable-security; urgency=low
+
+  A security issue has been discovered that affects some older AMD
+  processors. Untrusted 64-bit Xen guests can cause a processor hang.
+  Affected processors all predate the AMD SVM extensions for hardware
+  virtualization.
+
+  After this update has been applied, Xen will refuse to boot by default
+  if it determines it is running on a vulnerable system. You may override
+  this default by adding the "allow_unsafe" keyword to your hypervisor
+  command line. On systems using the GRUB bootloader, you can do this by
+  editing the /etc/default/grub file and adding the keyword to the
+  "export GRUB_CMDLINE_XEN=" line.
+
+ -- Bastian Blank <waldi@debian.org>  Mon, 11 Jun 2012 18:10:55 +0000

Reply to: