Re: Stable update for xen
On Thu, Jun 14, 2012 at 07:55:21PM +0100, Adam D. Barratt wrote:
> On Thu, 2012-06-14 at 20:51 +0200, Bastian Blank wrote:
> > I'd like to fix an boot error of Xen on several newer machines in
> > stable.
> I'm assuming this is fixed in at least unstable already, given the dates
> of the commits referenced in the bug report?
It is included in 4.1.0.
> Please could we have a full source debdiff for the proposed package,
> against the package currently in stable?
Sure.
Bastian
--
The idea of male and female are universal constants.
-- Kirk, "Metamorphosis", stardate 3219.8
diff -Nru xen-4.0.1/debian/changelog xen-4.0.1/debian/changelog
--- xen-4.0.1/debian/changelog 2011-06-09 20:35:07.000000000 +0200
+++ xen-4.0.1/debian/changelog 2012-06-14 20:27:57.000000000 +0200
@@ -1,3 +1,23 @@
+xen (4.0.1-6) UNRELEASED; urgency=low
+
+ [ Ian Campbell ]
+ * Backport fix to remove lowmem 1:1 mapping that fixes boot on some
+ classes of machines. (Closes: #649923)
+
+ -- Bastian Blank <waldi@debian.org> Thu, 14 Jun 2012 20:27:03 +0200
+
+xen (4.0.1-5) stable-security; urgency=low
+
+ * Fix privilege escalation and syscall/sysenter DoS while using
+ non-canonical addresses by untrusted PV guests.
+ CVE-2012-0217
+ CVE-2012-0218
+ * Disable Xen on CPUs affected by AMD Erratum #121. PV guests can
+ cause a DoS of the host.
+ CVE-2012-2934
+
+ -- Bastian Blank <waldi@debian.org> Mon, 11 Jun 2012 18:12:37 +0000
+
xen (4.0.1-4) stable-security; urgency=low
* Fix overflows and missing error checks in PV kernel loader.
diff -Nru xen-4.0.1/debian/control.md5sum xen-4.0.1/debian/control.md5sum
--- xen-4.0.1/debian/control.md5sum 2011-06-09 20:36:05.000000000 +0200
+++ xen-4.0.1/debian/control.md5sum 2012-06-14 20:31:20.000000000 +0200
@@ -1,4 +1,4 @@
-3207088ea024aa07513e3c44b7d3e1af debian/changelog
+6a070480a54a79a74d6623a07ff8beb7 debian/changelog
24f2598a23e30264aea4a983d5d19eec debian/bin/gencontrol.py
ee1ccd7bf0932a81ca221cab08347614 debian/templates/control.hypervisor.in
e4335ab10e217a12328cdf123473ed37 debian/templates/control.main.in
diff -Nru xen-4.0.1/debian/patches/CVE-2012-0217+2012-0218 xen-4.0.1/debian/patches/CVE-2012-0217+2012-0218
--- xen-4.0.1/debian/patches/CVE-2012-0217+2012-0218 1970-01-01 01:00:00.000000000 +0100
+++ xen-4.0.1/debian/patches/CVE-2012-0217+2012-0218 2012-06-14 20:24:30.000000000 +0200
@@ -0,0 +1,96 @@
+diff -r d8fd425b60d3 xen/arch/x86/x86_64/asm-offsets.c
+--- a/xen/arch/x86/x86_64/asm-offsets.c Tue May 01 14:18:46 2012 +0100
++++ b/xen/arch/x86/x86_64/asm-offsets.c Thu May 24 11:18:47 2012 +0100
+@@ -89,6 +89,8 @@ void __dummy__(void)
+ arch.guest_context.trap_ctxt[TRAP_gp_fault].address);
+ OFFSET(VCPU_gp_fault_sel, struct vcpu,
+ arch.guest_context.trap_ctxt[TRAP_gp_fault].cs);
++ OFFSET(VCPU_gp_fault_flags, struct vcpu,
++ arch.guest_context.trap_ctxt[TRAP_gp_fault].flags);
+ OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
+ OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
+ OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
+diff -r d8fd425b60d3 xen/arch/x86/x86_64/compat/entry.S
+--- a/xen/arch/x86/x86_64/compat/entry.S Tue May 01 14:18:46 2012 +0100
++++ b/xen/arch/x86/x86_64/compat/entry.S Thu May 24 11:18:47 2012 +0100
+@@ -227,6 +227,7 @@ 1: call compat_create_bounce_frame
+ ENTRY(compat_post_handle_exception)
+ testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
+ jz compat_test_all_events
++.Lcompat_bounce_exception:
+ call compat_create_bounce_frame
+ movb $0,TRAPBOUNCE_flags(%rdx)
+ jmp compat_test_all_events
+@@ -243,14 +244,15 @@ ENTRY(compat_syscall)
+ 1: movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw %si,TRAPBOUNCE_cs(%rdx)
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+- call compat_create_bounce_frame
+- jmp compat_test_all_events
++ jmp .Lcompat_bounce_exception
+ 2: movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ subl $2,UREGS_rip(%rsp)
+ movq VCPU_gp_fault_addr(%rbx),%rax
+ movzwl VCPU_gp_fault_sel(%rbx),%esi
+- movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+ movl $0,TRAPBOUNCE_error_code(%rdx)
++ testb $4,VCPU_gp_fault_flags(%rbx)
++ setnz %cl
++ leal TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE(,%rcx,TBF_INTERRUPT),%ecx
+ jmp 1b
+
+ ENTRY(compat_sysenter)
+diff -r d8fd425b60d3 xen/arch/x86/x86_64/entry.S
+--- a/xen/arch/x86/x86_64/entry.S Tue May 01 14:18:46 2012 +0100
++++ b/xen/arch/x86/x86_64/entry.S Thu May 24 11:18:47 2012 +0100
+@@ -51,6 +51,13 @@ restore_all_guest:
+ testw $TRAP_syscall,4(%rsp)
+ jz iret_exit_to_guest
+
++ /* Don't use SYSRET path if the return address is not canonical. */
++ movq 8(%rsp),%rcx
++ sarq $47,%rcx
++ incl %ecx
++ cmpl $1,%ecx
++ ja .Lforce_iret
++
+ addq $8,%rsp
+ popq %rcx # RIP
+ popq %r11 # CS
+@@ -61,6 +68,10 @@ restore_all_guest:
+ sysretq
+ 1: sysretl
+
++.Lforce_iret:
++ /* Mimic SYSRET behavior. */
++ movq 8(%rsp),%rcx # RIP
++ movq 24(%rsp),%r11 # RFLAGS
+ ALIGN
+ /* No special register assumptions. */
+ iret_exit_to_guest:
+@@ -298,12 +309,14 @@ 1: movq VCPU_domain(%rbx),%rdi
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+ testb $1,DOMAIN_is_32bit_pv(%rdi)
+ jnz compat_sysenter
+- call create_bounce_frame
+- jmp test_all_events
++ jmp .Lbounce_exception
+ 2: movl %eax,TRAPBOUNCE_error_code(%rdx)
+ movq VCPU_gp_fault_addr(%rbx),%rax
+- movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+ movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
++ subq $2,UREGS_rip(%rsp)
++ testb $4,VCPU_gp_fault_flags(%rbx)
++ setnz %cl
++ leal TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE(,%rcx,TBF_INTERRUPT),%ecx
+ jmp 1b
+
+ ENTRY(int80_direct_trap)
+@@ -490,6 +503,7 @@ 1: movq %rsp,%rdi
+ jnz compat_post_handle_exception
+ testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
+ jz test_all_events
++.Lbounce_exception:
+ call create_bounce_frame
+ movb $0,TRAPBOUNCE_flags(%rdx)
+ jmp test_all_events
diff -Nru xen-4.0.1/debian/patches/CVE-2012-2934 xen-4.0.1/debian/patches/CVE-2012-2934
--- xen-4.0.1/debian/patches/CVE-2012-2934 1970-01-01 01:00:00.000000000 +0100
+++ xen-4.0.1/debian/patches/CVE-2012-2934 2012-06-14 20:24:30.000000000 +0200
@@ -0,0 +1,46 @@
+x86-64: detect processors subject to AMD erratum #121 and refuse to boot
+
+Processors with this erratum are subject to a DoS attack by unprivileged
+guest users.
+
+This is XSA-9 / CVE-2006-0744.
+
+Signed-off-by: Jan Beulich <JBeulich@suse.com>
+Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
+
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -41,6 +41,9 @@ void start_svm(struct cpuinfo_x86 *c);
+ integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
+ integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
+
++static int opt_allow_unsafe;
++boolean_param("allow_unsafe", opt_allow_unsafe);
++
+ static inline void wrmsr_amd(unsigned int index, unsigned int lo,
+ unsigned int hi)
+ {
+@@ -640,6 +643,11 @@ static void __devinit init_amd(struct cp
+ clear_bit(X86_FEATURE_MCE, c->x86_capability);
+
+ #ifdef __x86_64__
++ if (cpu_has_amd_erratum(c, AMD_ERRATUM_121) && !opt_allow_unsafe)
++ panic("Xen will not boot on this CPU for security reasons.\n"
++ "Pass \"allow_unsafe\" if you're trusting all your"
++ " (PV) guest kernels.\n");
++
+ /* AMD CPUs do not support SYSENTER outside of legacy mode. */
+ clear_bit(X86_FEATURE_SEP, c->x86_capability);
+ #endif
+--- a/xen/include/asm-x86/amd.h
++++ b/xen/include/asm-x86/amd.h
+@@ -127,6 +127,9 @@
+ #define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+ #define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+
++#define AMD_ERRATUM_121 \
++ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x3f, 0xf))
++
+ #define AMD_ERRATUM_170 \
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x67, 0xf))
+
diff -Nru xen-4.0.1/debian/patches/series xen-4.0.1/debian/patches/series
--- xen-4.0.1/debian/patches/series 2011-06-09 20:35:07.000000000 +0200
+++ xen-4.0.1/debian/patches/series 2012-06-14 20:26:44.000000000 +0200
@@ -71,3 +71,6 @@
upstream-21461:ee088a0b5cb8-CVE-2011-1166
upstream-21482:c2adc059e931-CVE-2011-1583
upstream-21485:b85a9e58ec3a-CVE-2011-1898
+upstream-22375:426f3a265784
+CVE-2012-0217+2012-0218
+CVE-2012-2934
diff -Nru xen-4.0.1/debian/patches/upstream-22375:426f3a265784 xen-4.0.1/debian/patches/upstream-22375:426f3a265784
--- xen-4.0.1/debian/patches/upstream-22375:426f3a265784 1970-01-01 01:00:00.000000000 +0100
+++ xen-4.0.1/debian/patches/upstream-22375:426f3a265784 2012-06-14 20:26:16.000000000 +0200
@@ -0,0 +1,1094 @@
+# HG changeset patch
+# User Keir Fraser <keir@xen.org>
+# Date 1289303389 0
+# Node ID 426f3a2657844cec77ce0043b0408b0887fafa41
+# Parent 9997a1418633c92286189b33f701ecbac2a98ccd
+x86: do away with the boot time low-memory 1:1 mapping
+
+By doing so, we're no longer restricted to be able to place all boot
+loader modules into the low 1Gb/4Gb (32-/64-bit) of memory, nor is
+there a dependency anymore on where the boot loader places the
+modules.
+
+We're also no longer restricted to copy the modules into a place below
+4Gb, nor to put them all together into a single piece of memory.
+
+Further it allows even the 32-bit Dom0 kernel to be loaded anywhere in
+physical memory (except if it doesn't support PAE-above-4G).
+
+Signed-off-by: Jan Beulich <jbeulich@novell.com>
+
+Index: xen-4.0.1/xen/arch/x86/boot/Makefile
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/boot/Makefile 2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/boot/Makefile 2011-11-25 16:24:33.000000000 +0100
+@@ -4,6 +4,6 @@
+
+ BOOT_TRAMPOLINE := $(shell sed -n 's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' $(BASEDIR)/include/asm-x86/config.h)
+ %.S: %.c
+- RELOC=$(BOOT_TRAMPOLINE) XEN_BITSPERLONG=$(patsubst x86_%,%,$(TARGET_SUBARCH)) $(MAKE) -f build32.mk $@
++ RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@
+
+ reloc.S: $(BASEDIR)/include/asm-x86/config.h
+Index: xen-4.0.1/xen/arch/x86/boot/build32.mk
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/boot/build32.mk 2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/boot/build32.mk 2011-11-25 16:24:33.000000000 +0100
+@@ -19,6 +19,6 @@
+ $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(RELOC) -o $@ $<
+
+ %.o: %.c
+- $(CC) $(CFLAGS) -DXEN_BITSPERLONG=$(XEN_BITSPERLONG) -c $< -o $@
++ $(CC) $(CFLAGS) -c $< -o $@
+
+ reloc.o: $(BASEDIR)/include/asm-x86/config.h
+Index: xen-4.0.1/xen/arch/x86/boot/head.S
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/boot/head.S 2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/boot/head.S 2011-11-25 16:24:33.000000000 +0100
+@@ -110,12 +110,15 @@
+ /* Initialise L2 identity-map and xen page table entries (16MB). */
+ mov $sym_phys(l2_identmap),%edi
+ mov $sym_phys(l2_xenmap),%esi
++ mov $sym_phys(l2_bootmap),%edx
+ mov $0x1e3,%eax /* PRESENT+RW+A+D+2MB+GLOBAL */
+ mov $8,%ecx
+ 1: mov %eax,(%edi)
+ add $8,%edi
+ mov %eax,(%esi)
+ add $8,%esi
++ mov %eax,(%edx)
++ add $8,%edx
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ loop 1b
+ /* Initialise L3 identity-map page directory entries. */
+@@ -129,9 +132,13 @@
+ /* Initialise L3 xen-map page directory entry. */
+ mov $(sym_phys(l2_xenmap)+7),%eax
+ mov %eax,sym_phys(l3_xenmap) + l3_table_offset(XEN_VIRT_START)*8
+- /* Hook identity-map and xen-map L3 tables into PML4. */
++ /* Initialise L3 boot-map page directory entry. */
++ mov $(sym_phys(l2_bootmap)+7),%eax
++ mov %eax,sym_phys(l3_bootmap) + 0*8
++ /* Hook identity-map, xen-map, and boot-map L3 tables into PML4. */
++ mov $(sym_phys(l3_bootmap)+7),%eax
++ mov %eax,sym_phys(idle_pg_table) + 0*8
+ mov $(sym_phys(l3_identmap)+7),%eax
+- mov %eax,sym_phys(idle_pg_table) + ( 0*8) /* PML4[ 0]: 1:1 map */
+ mov %eax,sym_phys(idle_pg_table) + l4_table_offset(DIRECTMAP_VIRT_START)*8
+ mov $(sym_phys(l3_xenmap)+7),%eax
+ mov %eax,sym_phys(idle_pg_table) + l4_table_offset(XEN_VIRT_START)*8
+@@ -176,6 +183,7 @@
+ #if defined(__x86_64__)
+ mov %edi,sym_phys(l2_identmap)
+ mov %edi,sym_phys(l2_xenmap)
++ mov %edi,sym_phys(l2_bootmap)
+ #else
+ mov %edi,sym_phys(idle_pg_table_l2)
+ mov %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18)
+Index: xen-4.0.1/xen/arch/x86/boot/reloc.c
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/boot/reloc.c 2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/boot/reloc.c 2011-11-25 16:24:33.000000000 +0100
+@@ -68,7 +68,6 @@
+ {
+ module_t *mods = reloc_mbi_struct(
+ (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
+- u32 max_addr = 0;
+
+ mbi->mods_addr = (u32)mods;
+
+@@ -76,29 +75,6 @@
+ {
+ if ( mods[i].string )
+ mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
+- if ( mods[i].mod_end > max_addr )
+- max_addr = mods[i].mod_end;
+- }
+-
+- /*
+- * 32-bit Xen only maps bottom 1GB of memory at boot time. Relocate
+- * modules which extend beyond this (GRUB2 in particular likes to
+- * place modules as high as possible below 4GB).
+- */
+-#define BOOTMAP_END (1ul<<30) /* 1GB */
+- if ( (XEN_BITSPERLONG == 32) && (max_addr > BOOTMAP_END) )
+- {
+- char *mod_alloc = (char *)BOOTMAP_END;
+- for ( i = 0; i < mbi->mods_count; i++ )
+- mod_alloc -= mods[i].mod_end - mods[i].mod_start;
+- for ( i = 0; i < mbi->mods_count; i++ )
+- {
+- u32 mod_len = mods[i].mod_end - mods[i].mod_start;
+- mods[i].mod_start = (u32)memcpy(
+- mod_alloc, (char *)mods[i].mod_start, mod_len);
+- mods[i].mod_end = mods[i].mod_start + mod_len;
+- mod_alloc += mod_len;
+- }
+ }
+ }
+
+Index: xen-4.0.1/xen/arch/x86/domain_build.c
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/domain_build.c 2011-11-25 15:50:01.000000000 +0100
++++ xen-4.0.1/xen/arch/x86/domain_build.c 2011-11-25 16:24:33.000000000 +0100
+@@ -30,6 +30,7 @@
+ #include <asm/p2m.h>
+ #include <asm/e820.h>
+ #include <asm/acpi.h>
++#include <asm/setup.h>
+ #include <asm/bzimage.h> /* for bzimage_parse */
+
+ #include <public/version.h>
+@@ -291,9 +292,9 @@
+
+ int __init construct_dom0(
+ struct domain *d,
+- unsigned long _image_base,
+- unsigned long _image_start, unsigned long image_len,
+- unsigned long _initrd_start, unsigned long initrd_len,
++ const module_t *image, unsigned long image_headroom,
++ const module_t *initrd,
++ void *(*bootstrap_map)(const module_t *),
+ char *cmdline)
+ {
+ int i, rc, compatible, compat32, order, machine;
+@@ -308,16 +309,14 @@
+ start_info_t *si;
+ struct vcpu *v = d->vcpu[0];
+ unsigned long long value;
+-#if defined(__i386__)
+- char *image_base = (char *)_image_base; /* use lowmem mappings */
+- char *image_start = (char *)_image_start; /* use lowmem mappings */
+- char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
+-#elif defined(__x86_64__)
+- char *image_base = __va(_image_base);
+- char *image_start = __va(_image_start);
+- char *initrd_start = __va(_initrd_start);
+-#endif
+-#if CONFIG_PAGING_LEVELS >= 4
++ char *image_base = bootstrap_map(image);
++ unsigned long image_len = image->mod_end;
++ char *image_start = image_base + image_headroom;
++ unsigned long initrd_len = initrd ? initrd->mod_end : 0;
++#if CONFIG_PAGING_LEVELS < 4
++ module_t mpt;
++ void *mpt_ptr;
++#else
+ l4_pgentry_t *l4tab = NULL, *l4start = NULL;
+ #endif
+ l3_pgentry_t *l3tab = NULL, *l3start = NULL;
+@@ -347,7 +346,7 @@
+ unsigned long v_end;
+
+ /* Machine address of next candidate page-table page. */
+- unsigned long mpt_alloc;
++ paddr_t mpt_alloc;
+
+ /* Sanity! */
+ BUG_ON(d->domain_id != 0);
+@@ -502,17 +501,17 @@
+ if ( (1UL << order) > nr_pages )
+ panic("Domain 0 allocation is too small for kernel image.\n");
+
+-#ifdef __i386__
+- /* Ensure that our low-memory 1:1 mapping covers the allocation. */
+- page = alloc_domheap_pages(d, order, MEMF_bits(30));
+-#else
+ if ( parms.p2m_base != UNSET_ADDR )
+ {
+ vphysmap_start = parms.p2m_base;
+ vphysmap_end = vphysmap_start + nr_pages * sizeof(unsigned long);
+ }
+- page = alloc_domheap_pages(d, order, 0);
++#ifdef __i386__
++ if ( !test_bit(XENFEAT_pae_pgdir_above_4gb, parms.f_supported) )
++ page = alloc_domheap_pages(d, order, MEMF_bits(32));
++ else
+ #endif
++ page = alloc_domheap_pages(d, order, 0);
+ if ( page == NULL )
+ panic("Not enough RAM for domain 0 allocation.\n");
+ alloc_spfn = page_to_mfn(page);
+@@ -541,8 +540,7 @@
+ _p(v_start), _p(v_end));
+ printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
+
+- mpt_alloc = (vpt_start - v_start) +
+- (unsigned long)pfn_to_paddr(alloc_spfn);
++ mpt_alloc = (vpt_start - v_start) + pfn_to_paddr(alloc_spfn);
+
+ #if defined(__i386__)
+ /*
+@@ -555,17 +553,25 @@
+ return -EINVAL;
+ }
+
++ mpt.mod_start = mpt_alloc >> PAGE_SHIFT;
++ mpt.mod_end = vpt_end - vpt_start;
++ mpt_ptr = bootstrap_map(&mpt);
++#define MPT_ALLOC(n) (mpt_ptr += (n)*PAGE_SIZE, mpt_alloc += (n)*PAGE_SIZE)
++
+ /* WARNING: The new domain must have its 'processor' field filled in! */
+- l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+- l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
++ l3start = l3tab = mpt_ptr; MPT_ALLOC(1);
++ l2start = l2tab = mpt_ptr; MPT_ALLOC(4);
+ for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) {
+- copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
+- idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
+- l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT);
++ if ( i < 3 )
++ clear_page(l2tab + i * L2_PAGETABLE_ENTRIES);
++ else
++ copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
++ idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
++ l3tab[i] = l3e_from_pfn(mpt.mod_start + 1 + i, L3_PROT);
+ l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
+- l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
++ l2e_from_pfn(mpt.mod_start + 1 + i, __PAGE_HYPERVISOR);
+ }
+- v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
++ v->arch.guest_table = pagetable_from_pfn(mpt.mod_start);
+
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ l2tab[l2_linear_offset(PERDOMAIN_VIRT_START) + i] =
+@@ -577,9 +583,9 @@
+ {
+ if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+ {
+- l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
+- mpt_alloc += PAGE_SIZE;
+- *l2tab = l2e_from_paddr((unsigned long)l1start, L2_PROT);
++ l1tab = mpt_ptr;
++ *l2tab = l2e_from_paddr(mpt_alloc, L2_PROT);
++ MPT_ALLOC(1);
+ l2tab++;
+ clear_page(l1tab);
+ if ( count == 0 )
+@@ -594,11 +600,14 @@
+
+ mfn++;
+ }
++#undef MPT_ALLOC
+
+ /* Pages that are part of page tables must be read only. */
++ mpt_alloc = (paddr_t)mpt.mod_start << PAGE_SHIFT;
++ mpt_ptr = l3start;
+ l2tab = l2start + l2_linear_offset(vpt_start);
+- l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*l2tab);
+- l1tab += l1_table_offset(vpt_start);
++ l1start = mpt_ptr + (l2e_get_paddr(*l2tab) - mpt_alloc);
++ l1tab = l1start + l1_table_offset(vpt_start);
+ for ( count = 0; count < nr_pt_pages; count++ )
+ {
+ page = mfn_to_page(l1e_get_pfn(*l1tab));
+@@ -634,9 +643,15 @@
+ break;
+ }
+ if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
+- l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*++l2tab);
++ l1tab = mpt_ptr + (l2e_get_paddr(*++l2tab) - mpt_alloc);
+ }
+
++ /*
++ * Put Xen's first L3 entry into Dom0's page tables so that updates
++ * through bootstrap_map() will affect the page tables we will run on.
++ */
++ l3start[0] = l3e_from_paddr(__pa(idle_pg_table_l2), L3_PROT);
++
+ #elif defined(__x86_64__)
+
+ /* Overlap with Xen protected area? */
+@@ -810,6 +825,7 @@
+ /* Copy the OS image and free temporary buffer. */
+ elf.dest = (void*)vkern_start;
+ elf_load_binary(&elf);
++ bootstrap_map(NULL);
+
+ if ( UNSET_ADDR != parms.virt_hypercall )
+ {
+@@ -826,7 +842,12 @@
+
+ /* Copy the initial ramdisk. */
+ if ( initrd_len != 0 )
++ {
++ char *initrd_start = bootstrap_map(initrd);
++
+ memcpy((void *)vinitrd_start, initrd_start, initrd_len);
++ bootstrap_map(NULL);
++ }
+
+ /* Free temporary buffers. */
+ discard_initial_images();
+@@ -1034,7 +1055,22 @@
+ write_ptbase(current);
+
+ #if defined(__i386__)
+- /* Destroy low mappings - they were only for our convenience. */
++ /* Restore Dom0's first L3 entry. */
++ mpt.mod_end = 5 * PAGE_SIZE;
++ l3start = mpt_ptr = bootstrap_map(&mpt);
++ l2start = mpt_ptr + PAGE_SIZE;
++ l3start[0] = l3e_from_pfn(mpt.mod_start + 1, L3_PROT);
++
++ /* Re-setup CR3 */
++ if ( paging_mode_enabled(d) )
++ paging_update_paging_modes(v);
++ else
++ update_cr3(v);
++
++ /*
++ * Destroy low mappings - they were only for our convenience. Note
++ * that zap_low_mappings() exceeds what bootstrap_map(NULL) would do.
++ */
+ zap_low_mappings(l2start);
+ #endif
+
+Index: xen-4.0.1/xen/arch/x86/setup.c
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/setup.c 2010-08-29 17:13:22.000000000 +0200
++++ xen-4.0.1/xen/arch/x86/setup.c 2011-11-25 16:24:33.000000000 +0100
+@@ -43,14 +43,6 @@
+ #include <asm/mach-generic/mach_apic.h> /* for generic_apic_probe */
+ #include <asm/setup.h>
+
+-#if defined(CONFIG_X86_64)
+-#define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
+-#define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
+-#else
+-#define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
+-#define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
+-#endif
+-
+ extern u16 boot_edid_caps;
+ extern u8 boot_edid_info[128];
+ extern struct boot_video_info boot_vid_info;
+@@ -167,21 +159,34 @@
+ for ( ; ; ) halt(); \
+ } while (0)
+
+-static unsigned long __initdata initial_images_base;
+-static unsigned long __initdata initial_images_start;
+-static unsigned long __initdata initial_images_end;
++static const module_t *__initdata initial_images;
++static unsigned int __initdata nr_initial_images;
+
+ unsigned long __init initial_images_nrpages(void)
+ {
+- ASSERT(!(initial_images_base & ~PAGE_MASK));
+- ASSERT(!(initial_images_end & ~PAGE_MASK));
+- return ((initial_images_end >> PAGE_SHIFT) -
+- (initial_images_base >> PAGE_SHIFT));
++ unsigned long nr;
++ unsigned int i;
++
++ for ( nr = i = 0; i < nr_initial_images; ++i )
++ nr += PFN_UP(initial_images[i].mod_end);
++
++ return nr;
+ }
+
+ void __init discard_initial_images(void)
+ {
+- init_domheap_pages(initial_images_base, initial_images_end);
++ unsigned int i;
++
++ for ( i = 0; i < nr_initial_images; ++i )
++ {
++ uint64_t start = (uint64_t)initial_images[i].mod_start << PAGE_SHIFT;
++
++ init_domheap_pages(start,
++ start + PAGE_ALIGN(initial_images[i].mod_end));
++ }
++
++ nr_initial_images = 0;
++ initial_images = NULL;
+ }
+
+ static void free_xen_data(char *s, char *e)
+@@ -273,33 +278,128 @@
+ printk("CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
+ }
+
++#define BOOTSTRAP_MAP_BASE (16UL << 20)
++#define BOOTSTRAP_MAP_LIMIT (1UL << L3_PAGETABLE_SHIFT)
++
+ /*
+ * Ensure a given physical memory range is present in the bootstrap mappings.
+ * Use superpage mappings to ensure that pagetable memory needn't be allocated.
+ */
+-static void __init bootstrap_map(unsigned long start, unsigned long end)
++static void *__init bootstrap_map(const module_t *mod)
+ {
+- unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+- start = max_t(unsigned long, start & ~mask, 16UL << 20);
+- end = (end + mask) & ~mask;
++ static unsigned long __initdata map_cur = BOOTSTRAP_MAP_BASE;
++ uint64_t start, end, mask = (1L << L2_PAGETABLE_SHIFT) - 1;
++ void *ret;
++
++#ifdef __x86_64__
++ if ( !early_boot )
++ return mod ? mfn_to_virt(mod->mod_start) : NULL;
++#endif
++
++ if ( !mod )
++ {
++ destroy_xen_mappings(BOOTSTRAP_MAP_BASE, BOOTSTRAP_MAP_LIMIT);
++ map_cur = BOOTSTRAP_MAP_BASE;
++ return NULL;
++ }
++
++ start = (uint64_t)mod->mod_start << PAGE_SHIFT;
++ end = start + mod->mod_end;
+ if ( start >= end )
+- return;
+- if ( end > BOOTSTRAP_DIRECTMAP_END )
+- panic("Cannot access memory beyond end of "
+- "bootstrap direct-map area\n");
+- map_pages_to_xen(
+- (unsigned long)maddr_to_bootstrap_virt(start),
+- start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++ return NULL;
++
++ if ( end <= BOOTSTRAP_MAP_BASE )
++ return (void *)(unsigned long)start;
++
++ ret = (void *)(map_cur + (unsigned long)(start & mask));
++ start &= ~mask;
++ end = (end + mask) & ~mask;
++ if ( end - start > BOOTSTRAP_MAP_LIMIT - map_cur )
++ return NULL;
++
++ map_pages_to_xen(map_cur, start >> PAGE_SHIFT,
++ (end - start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++ map_cur += end - start;
++ return ret;
+ }
+
+-static void __init move_memory(
+- unsigned long dst, unsigned long src_start, unsigned long src_end)
++static void *__init move_memory(
++ uint64_t dst, uint64_t src, unsigned int size, bool_t keep)
+ {
+- bootstrap_map(src_start, src_end);
+- bootstrap_map(dst, dst + src_end - src_start);
+- memmove(maddr_to_bootstrap_virt(dst),
+- maddr_to_bootstrap_virt(src_start),
+- src_end - src_start);
++ unsigned int blksz = BOOTSTRAP_MAP_LIMIT - BOOTSTRAP_MAP_BASE;
++ unsigned int mask = (1L << L2_PAGETABLE_SHIFT) - 1;
++
++ if ( src + size > BOOTSTRAP_MAP_BASE )
++ blksz >>= 1;
++
++ while ( size )
++ {
++ module_t mod;
++ unsigned int soffs = src & mask;
++ unsigned int doffs = dst & mask;
++ unsigned int sz;
++ void *d, *s;
++
++ mod.mod_start = (src - soffs) >> PAGE_SHIFT;
++ mod.mod_end = soffs + size;
++ if ( mod.mod_end > blksz )
++ mod.mod_end = blksz;
++ sz = mod.mod_end - soffs;
++ s = bootstrap_map(&mod);
++
++ mod.mod_start = (dst - doffs) >> PAGE_SHIFT;
++ mod.mod_end = doffs + size;
++ if ( mod.mod_end > blksz )
++ mod.mod_end = blksz;
++ if ( sz > mod.mod_end - doffs )
++ sz = mod.mod_end - doffs;
++ d = bootstrap_map(&mod);
++
++ memmove(d + doffs, s + soffs, sz);
++
++ dst += sz;
++ src += sz;
++ size -= sz;
++
++ if ( keep )
++ return size ? NULL : d + doffs;
++
++ bootstrap_map(NULL);
++ }
++
++ return NULL;
++}
++
++static uint64_t __init consider_modules(
++ uint64_t s, uint64_t e, uint32_t size, const module_t *mod,
++ unsigned int nr_mods, unsigned int this_mod)
++{
++ unsigned int i;
++
++ if ( s > e || e - s < size )
++ return 0;
++
++ for ( i = 0; i < nr_mods ; ++i )
++ {
++ uint64_t start = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
++ uint64_t end = start + PAGE_ALIGN(mod[i].mod_end);
++
++ if ( i == this_mod )
++ continue;
++
++ if ( s < end && start < e )
++ {
++ end = consider_modules(end, e, size, mod + i + 1,
++ nr_mods - i - 1, this_mod - i - 1);
++ if ( end )
++ return end;
++
++ return consider_modules(s, start, size, mod + i + 1,
++ nr_mods - i - 1, this_mod - i - 1);
++ }
++ }
++
++ return e;
+ }
+
+ static void __init setup_max_pdx(void)
+@@ -463,11 +563,10 @@
+ {
+ char *memmap_type = NULL;
+ char *cmdline, *kextra, *loader;
+- unsigned long _initrd_start = 0, _initrd_len = 0;
+ unsigned int initrdidx = 1;
+ multiboot_info_t *mbi = __va(mbi_p);
+ module_t *mod = (module_t *)__va(mbi->mods_addr);
+- unsigned long nr_pages, modules_length, modules_headroom;
++ unsigned long nr_pages, modules_headroom;
+ int i, j, e820_warn = 0, bytes = 0;
+ bool_t acpi_boot_table_init_done = 0;
+ struct ns16550_defaults ns16550 = {
+@@ -666,6 +765,9 @@
+ /* Early kexec reservation (explicit static start address). */
+ kexec_reserve_area(&boot_e820);
+
++ initial_images = mod;
++ nr_initial_images = mbi->mods_count;
++
+ /*
+ * Iterate backwards over all superpage-aligned RAM regions.
+ *
+@@ -679,48 +781,64 @@
+ * we can relocate the dom0 kernel and other multiboot modules. Also, on
+ * x86/64, we relocate Xen to higher memory.
+ */
+- modules_length = 0;
+ for ( i = 0; i < mbi->mods_count; i++ )
+- modules_length += mod[i].mod_end - mod[i].mod_start;
++ {
++ if ( mod[i].mod_start & (PAGE_SIZE - 1) )
++ EARLY_FAIL("Bootloader didn't honor module alignment request.\n");
++ mod[i].mod_end -= mod[i].mod_start;
++ mod[i].mod_start >>= PAGE_SHIFT;
++ mod[i].reserved = 0;
++ }
+
+- /* ensure mod[0] is mapped before parsing */
+- bootstrap_map(mod[0].mod_start, mod[0].mod_end);
+- modules_headroom = bzimage_headroom(
+- (char *)(unsigned long)mod[0].mod_start,
+- (unsigned long)(mod[0].mod_end - mod[0].mod_start));
++ modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
++ bootstrap_map(NULL);
+
+ for ( i = boot_e820.nr_map-1; i >= 0; i-- )
+ {
+ uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
++ uint64_t end, limit = ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT;
+
+- /* Superpage-aligned chunks from 16MB to BOOTSTRAP_DIRECTMAP_END. */
++ /* Superpage-aligned chunks from BOOTSTRAP_MAP_BASE. */
+ s = (boot_e820.map[i].addr + mask) & ~mask;
+ e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+- s = max_t(uint64_t, s, 16 << 20);
+- e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
++ s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
+ if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
+ continue;
+
+- set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
+-
+- /* Map the chunk. No memory will need to be allocated to do this. */
+- map_pages_to_xen(
+- (unsigned long)maddr_to_bootstrap_virt(s),
+- s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++ if ( s < limit )
++ {
++ end = min(e, limit);
++ set_pdx_range(s >> PAGE_SHIFT, end >> PAGE_SHIFT);
++#ifdef CONFIG_X86_64
++ map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
++ (end - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++#endif
++ }
+
+ #if defined(CONFIG_X86_64)
++ e = min_t(uint64_t, e, 1ULL << (PAGE_SHIFT + 32));
+ #define reloc_size ((__pa(&_end) + mask) & ~mask)
+ /* Is the region suitable for relocating Xen? */
+- if ( !xen_phys_start && ((e-s) >= reloc_size) )
++ if ( !xen_phys_start && e <= limit )
++ {
++ /* Don't overlap with modules. */
++ end = consider_modules(s, e, reloc_size + mask,
++ mod, mbi->mods_count, -1);
++ end &= ~mask;
++ }
++ else
++ end = 0;
++ if ( end > s )
+ {
+ extern l2_pgentry_t l2_xenmap[];
+ l4_pgentry_t *pl4e;
+ l3_pgentry_t *pl3e;
+ l2_pgentry_t *pl2e;
+ int i, j, k;
++ void *dst;
+
+ /* Select relocation address. */
+- e -= reloc_size;
++ e = end - reloc_size;
+ xen_phys_start = e;
+ bootsym(trampoline_xen_phys_start) = e;
+
+@@ -731,10 +849,10 @@
+ * data until after we have switched to the relocated pagetables!
+ */
+ barrier();
+- move_memory(e, 0, __pa(&_end) - xen_phys_start);
++ dst = move_memory(e, 0, (unsigned long)&_end - XEN_VIRT_START, 1);
+
+ /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
+- memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
++ memset(dst, 0x55, 1U << 20);
+
+ /* Walk initial pagetables, relocating page directory entries. */
+ pl4e = __va(__pa(idle_pg_table));
+@@ -791,38 +909,58 @@
+ "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */
+ : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
+ "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
++
++ bootstrap_map(NULL);
+ }
+ #endif
+
+ /* Is the region suitable for relocating the multiboot modules? */
+- if ( !initial_images_start && (s < e) &&
+- ((e-s) >= (modules_length+modules_headroom)) )
++ for ( j = mbi->mods_count - 1; j >= 0; j-- )
+ {
+- initial_images_end = e;
+- initial_images_start = initial_images_end - modules_length;
+- initial_images_base = initial_images_start - modules_headroom;
+- initial_images_base &= PAGE_MASK;
+- for ( j = mbi->mods_count-1; j >= 0; j-- )
++ unsigned long headroom = j ? 0 : modules_headroom;
++ unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end);
++
++ if ( mod[j].reserved )
++ continue;
++
++ /* Don't overlap with other modules. */
++ end = consider_modules(s, e, size, mod, mbi->mods_count, j);
++
++ if ( s < end &&
++ (headroom ||
++ ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
+ {
+- e -= mod[j].mod_end - mod[j].mod_start;
+- move_memory(e, mod[j].mod_start, mod[j].mod_end);
+- mod[j].mod_end += e - mod[j].mod_start;
+- mod[j].mod_start = e;
++ move_memory(end - size + headroom,
++ (uint64_t)mod[j].mod_start << PAGE_SHIFT,
++ mod[j].mod_end, 0);
++ mod[j].mod_start = (end - size) >> PAGE_SHIFT;
++ mod[j].mod_end += headroom;
++ mod[j].reserved = 1;
+ }
+- e = initial_images_base;
+ }
+
+- if ( !kexec_crash_area.start && (s < e) &&
+- ((e-s) >= kexec_crash_area.size) )
++#ifdef CONFIG_X86_32
++ /* Confine the kexec area to below 4Gb. */
++ e = min_t(uint64_t, e, 1ULL << 32);
++#endif
++ /* Don't overlap with modules. */
++ e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size),
++ mod, mbi->mods_count, -1);
++ if ( !kexec_crash_area.start && (s < e) )
+ {
+ e = (e - kexec_crash_area.size) & PAGE_MASK;
+ kexec_crash_area.start = e;
+ }
+ }
+
+- if ( !initial_images_start )
++ if ( modules_headroom && !mod->reserved )
+ EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
+- reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
++ for ( i = 0; i < mbi->mods_count; ++i )
++ {
++ uint64_t s = (uint64_t)mod[i].mod_start << PAGE_SHIFT;
++
++ reserve_e820_ram(&boot_e820, s, s + PAGE_ALIGN(mod[i].mod_end));
++ }
+
+ #if defined(CONFIG_X86_32)
+ xenheap_initial_phys_start = (PFN_UP(__pa(&_end)) + 1) << PAGE_SHIFT;
+@@ -846,7 +984,10 @@
+ */
+ for ( i = 0; i < boot_e820.nr_map; i++ )
+ {
+- uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1;
++ uint64_t s, e, mask = PAGE_SIZE - 1;
++#ifdef CONFIG_X86_64
++ uint64_t map_s, map_e;
++#endif
+
+ /* Only page alignment required now. */
+ s = (boot_e820.map[i].addr + mask) & ~mask;
+@@ -861,7 +1002,7 @@
+
+ #ifdef __x86_64__
+ if ( !acpi_boot_table_init_done &&
+- s >= BOOTSTRAP_DIRECTMAP_END &&
++ s >= (1ULL << 32) &&
+ !acpi_boot_table_init() )
+ {
+ acpi_boot_table_init_done = 1;
+@@ -900,26 +1041,60 @@
+
+ set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
+
+- /* Need to create mappings above 16MB. */
+- map_s = max_t(uint64_t, s, 16<<20);
+- map_e = e;
+-#if defined(CONFIG_X86_32) /* mappings are truncated on x86_32 */
+- map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
+-#endif
++#ifdef CONFIG_X86_64
++ /* Need to create mappings above BOOTSTRAP_MAP_BASE. */
++ map_s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE);
++ map_e = min_t(uint64_t, e,
++ ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT);
+
+ /* Pass mapped memory to allocator /before/ creating new mappings. */
+- init_boot_pages(s, min_t(uint64_t, map_s, e));
++ init_boot_pages(s, min(map_s, e));
++ s = map_s;
++ if ( s < map_e )
++ {
++ uint64_t mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
++
++ map_s = (s + mask) & ~mask;
++ map_e &= ~mask;
++ init_boot_pages(map_s, map_e);
++ }
++
++ if ( map_s > map_e )
++ map_s = map_e = s;
+
+ /* Create new mappings /before/ passing memory to the allocator. */
+- if ( map_s < map_e )
+- map_pages_to_xen(
+- (unsigned long)maddr_to_bootstrap_virt(map_s),
+- map_s >> PAGE_SHIFT, (map_e-map_s) >> PAGE_SHIFT,
+- PAGE_HYPERVISOR);
++ if ( map_e < e )
++ {
++ map_pages_to_xen((unsigned long)__va(map_e), map_e >> PAGE_SHIFT,
++ (e - map_e) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++ init_boot_pages(map_e, e);
++ }
++ if ( s < map_s )
++ {
++ map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT,
++ (map_s - s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
++ init_boot_pages(s, map_s);
++ }
++#else
++ init_boot_pages(s, e);
++#endif
++ }
+
+- /* Pass remainder of this memory chunk to the allocator. */
+- init_boot_pages(map_s, e);
++ for ( i = 0; i < mbi->mods_count; ++i )
++ {
++ set_pdx_range(mod[i].mod_start,
++ mod[i].mod_start + PFN_UP(mod[i].mod_end));
++#ifdef CONFIG_X86_64
++ map_pages_to_xen((unsigned long)mfn_to_virt(mod[i].mod_start),
++ mod[i].mod_start,
++ PFN_UP(mod[i].mod_end), PAGE_HYPERVISOR);
++#endif
+ }
++#ifdef CONFIG_X86_64
++ map_pages_to_xen((unsigned long)__va(kexec_crash_area.start),
++ kexec_crash_area.start >> PAGE_SHIFT,
++ PFN_UP(kexec_crash_area.size), PAGE_HYPERVISOR);
++#endif
+
+ memguard_init();
+
+@@ -1041,7 +1216,7 @@
+
+ init_IRQ();
+
+- xsm_init(&initrdidx, mbi, initial_images_start);
++ xsm_init(&initrdidx, mbi, bootstrap_map);
+
+ init_idle_domain();
+
+@@ -1158,12 +1333,6 @@
+ cmdline = dom0_cmdline;
+ }
+
+- if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
+- {
+- _initrd_start = mod[initrdidx].mod_start;
+- _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
+- }
+-
+ if ( xen_cpuidle )
+ xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
+
+@@ -1171,13 +1340,10 @@
+ * We're going to setup domain0 using the module(s) that we stashed safely
+ * above our heap. The second module, if present, is an initrd ramdisk.
+ */
+- if ( construct_dom0(dom0,
+- initial_images_base,
+- initial_images_start,
+- mod[0].mod_end-mod[0].mod_start,
+- _initrd_start,
+- _initrd_len,
+- cmdline) != 0)
++ if ( construct_dom0(dom0, mod, modules_headroom,
++ (initrdidx > 0) && (initrdidx < mbi->mods_count)
++ ? mod + initrdidx : NULL,
++ bootstrap_map, cmdline) != 0)
+ panic("Could not set up DOM0 guest OS\n");
+
+ /* Scrub RAM that is still free and so may go to an unprivileged domain. */
+Index: xen-4.0.1/xen/arch/x86/x86_64/mm.c
+===================================================================
+--- xen-4.0.1.orig/xen/arch/x86/x86_64/mm.c 2011-11-25 15:50:01.000000000 +0100
++++ xen-4.0.1/xen/arch/x86/x86_64/mm.c 2011-11-25 16:24:33.000000000 +0100
+@@ -65,6 +65,12 @@
+ l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+ l2_xenmap[L2_PAGETABLE_ENTRIES];
+
++/* Enough page directories to map into the bottom 1GB. */
++l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
++ l3_bootmap[L3_PAGETABLE_ENTRIES];
++l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
++ l2_bootmap[L2_PAGETABLE_ENTRIES];
++
+ int __mfn_valid(unsigned long mfn)
+ {
+ return likely(mfn < max_page) &&
+Index: xen-4.0.1/xen/include/asm-x86/domain.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/asm-x86/domain.h 2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/asm-x86/domain.h 2011-11-25 16:24:33.000000000 +0100
+@@ -483,16 +483,6 @@
+ unsigned int *ecx,
+ unsigned int *edx);
+
+-int construct_dom0(
+- struct domain *d,
+- unsigned long image_base,
+- unsigned long image_start, unsigned long image_len,
+- unsigned long initrd_start, unsigned long initrd_len,
+- char *cmdline);
+-
+-extern unsigned long initial_images_nrpages(void);
+-extern void discard_initial_images(void);
+-
+ #endif /* __ASM_DOMAIN_H__ */
+
+ /*
+Index: xen-4.0.1/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h 2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h 2011-11-25 16:24:33.000000000 +0100
+@@ -30,7 +30,6 @@
+ &amd_iommu_head, list)
+
+ #define DMA_32BIT_MASK 0x00000000ffffffffULL
+-#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+
+ extern int amd_iommu_debug;
+ extern int amd_iommu_perdev_intremap;
+Index: xen-4.0.1/xen/include/asm-x86/page.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/asm-x86/page.h 2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/asm-x86/page.h 2011-11-25 16:24:33.000000000 +0100
+@@ -292,6 +292,7 @@
+ extern l2_pgentry_t *compat_idle_pg_table_l2;
+ extern unsigned int m2p_compat_vstart;
+ #endif
++extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES];
+ void paging_init(void);
+ void setup_idle_pagetable(void);
+ #endif /* !defined(__ASSEMBLY__) */
+@@ -387,6 +388,7 @@
+
+ #define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
+ #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
++#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK)
+
+ #endif /* __X86_PAGE_H__ */
+
+Index: xen-4.0.1/xen/include/asm-x86/setup.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/asm-x86/setup.h 2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/asm-x86/setup.h 2011-11-25 16:24:33.000000000 +0100
+@@ -1,6 +1,8 @@
+ #ifndef __X86_SETUP_H_
+ #define __X86_SETUP_H_
+
++#include <xen/multiboot.h>
++
+ extern int early_boot;
+ extern unsigned long xenheap_initial_phys_start;
+
+@@ -26,4 +28,14 @@
+ void vesa_init(void);
+ void vesa_mtrr_init(void);
+
++int construct_dom0(
++ struct domain *d,
++ const module_t *kernel, unsigned long kernel_headroom,
++ const module_t *initrd,
++ void *(*bootstrap_map)(const module_t *),
++ char *cmdline);
++
++unsigned long initial_images_nrpages(void);
++void discard_initial_images(void);
++
+ #endif
+Index: xen-4.0.1/xen/include/xsm/xsm.h
+===================================================================
+--- xen-4.0.1.orig/xen/include/xsm/xsm.h 2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/include/xsm/xsm.h 2011-11-25 16:24:33.000000000 +0100
+@@ -431,14 +431,15 @@
+
+ #ifdef XSM_ENABLE
+ extern int xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
+- unsigned long initial_images_start);
++ void *(*bootstrap_map)(const module_t *));
+ extern int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
+- unsigned long initial_images_start);
++ void *(*bootstrap_map)(const module_t *));
+ extern int register_xsm(struct xsm_operations *ops);
+ extern int unregister_xsm(struct xsm_operations *ops);
+ #else
+ static inline int xsm_init (unsigned int *initrdidx,
+- const multiboot_info_t *mbi, unsigned long initial_images_start)
++ const multiboot_info_t *mbi,
++ void *(*bootstrap_map)(const module_t *))
+ {
+ return 0;
+ }
+Index: xen-4.0.1/xen/xsm/xsm_core.c
+===================================================================
+--- xen-4.0.1.orig/xen/xsm/xsm_core.c 2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/xsm/xsm_core.c 2011-11-25 16:24:33.000000000 +0100
+@@ -47,7 +47,7 @@
+ }
+
+ int __init xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
+- unsigned long initial_images_start)
++ void *(*bootstrap_map)(const module_t *))
+ {
+ int ret = 0;
+
+@@ -55,9 +55,10 @@
+
+ if ( XSM_MAGIC )
+ {
+- ret = xsm_policy_init(initrdidx, mbi, initial_images_start);
++ ret = xsm_policy_init(initrdidx, mbi, bootstrap_map);
+ if ( ret )
+ {
++ bootstrap_map(NULL);
+ printk("%s: Error initializing policy.\n", __FUNCTION__);
+ return -EINVAL;
+ }
+@@ -65,6 +66,7 @@
+
+ if ( verify(&dummy_xsm_ops) )
+ {
++ bootstrap_map(NULL);
+ printk("%s could not verify "
+ "dummy_xsm_ops structure.\n", __FUNCTION__);
+ return -EIO;
+@@ -72,6 +74,7 @@
+
+ xsm_ops = &dummy_xsm_ops;
+ do_xsm_initcalls();
++ bootstrap_map(NULL);
+
+ return 0;
+ }
+Index: xen-4.0.1/xen/xsm/xsm_policy.c
+===================================================================
+--- xen-4.0.1.orig/xen/xsm/xsm_policy.c 2010-08-29 17:13:24.000000000 +0200
++++ xen-4.0.1/xen/xsm/xsm_policy.c 2011-11-25 16:24:33.000000000 +0100
+@@ -22,11 +22,11 @@
+ #include <xsm/xsm.h>
+ #include <xen/multiboot.h>
+
+-char *policy_buffer = NULL;
+-u32 policy_size = 0;
++char *__initdata policy_buffer = NULL;
++u32 __initdata policy_size = 0;
+
+ int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t *mbi,
+- unsigned long initial_images_start)
++ void *(*bootstrap_map)(const module_t *))
+ {
+ int i;
+ module_t *mod = (module_t *)__va(mbi->mods_addr);
+@@ -40,15 +40,8 @@
+ */
+ for ( i = mbi->mods_count-1; i >= 1; i-- )
+ {
+- start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
+-#if defined(__i386__)
+- _policy_start = (u32 *)start;
+-#elif defined(__x86_64__)
+- _policy_start = maddr_to_virt(start);
+-#else
+- _policy_start = NULL;
+-#endif
+- _policy_len = mod[i].mod_end - mod[i].mod_start;
++ _policy_start = bootstrap_map(mod + i);
++ _policy_len = mod[i].mod_end;
+
+ if ( (xsm_magic_t)(*_policy_start) == XSM_MAGIC )
+ {
+@@ -63,6 +56,8 @@
+ break;
+
+ }
++
++ bootstrap_map(NULL);
+ }
+
+ return rc;
diff -Nru xen-4.0.1/debian/rules.real xen-4.0.1/debian/rules.real
--- xen-4.0.1/debian/rules.real 2010-08-02 15:10:13.000000000 +0200
+++ xen-4.0.1/debian/rules.real 2012-06-14 20:24:30.000000000 +0200
@@ -112,6 +112,7 @@
install-hypervisor_$(ARCH)_$(FLAVOUR): DIR=$(BUILD_DIR)/build-hypervisor_$(ARCH)_$(FLAVOUR)
install-hypervisor_$(ARCH)_$(FLAVOUR): PACKAGE_NAME = xen-hypervisor-$(VERSION)-$(FLAVOUR)
+install-hypervisor_$(ARCH)_$(FLAVOUR): PACKAGE_DIR = debian/$(PACKAGE_NAME)
install-hypervisor_$(ARCH)_$(FLAVOUR): DH_OPTIONS = -p$(PACKAGE_NAME)
install-hypervisor_$(ARCH)_$(FLAVOUR): $(STAMPS_DIR)/build-hypervisor_$(ARCH)_$(FLAVOUR)
dh_testdir
@@ -119,6 +120,7 @@
dh_prep
dh_installdirs boot
cp $(DIR)/xen/xen.gz debian/$(PACKAGE_NAME)/boot/xen-$(VERSION)-$(FLAVOUR).gz
+ install -D -m644 debian/xen-hypervisor.NEWS $(PACKAGE_DIR)/usr/share/doc/$(PACKAGE_NAME)/NEWS
+$(MAKE_SELF) install-base
install-lib-dev_$(ARCH): DIR = $(BUILD_DIR)/install-utils_$(ARCH)
diff -Nru xen-4.0.1/debian/xen-hypervisor.NEWS xen-4.0.1/debian/xen-hypervisor.NEWS
--- xen-4.0.1/debian/xen-hypervisor.NEWS 1970-01-01 01:00:00.000000000 +0100
+++ xen-4.0.1/debian/xen-hypervisor.NEWS 2012-06-14 20:24:30.000000000 +0200
@@ -0,0 +1,15 @@
+xen-3.0 (4.0.1-5) stable-security; urgency=low
+
+ A security issue has been discovered that affects some older AMD
+ processors. Untrusted 64-bit Xen guests can cause a processor hang.
+ Affected processors all predate the AMD SVM extensions for hardware
+ virtualization.
+
+ After this update has been applied, Xen will refuse to boot by default
+ if it determines it is running on a vulnerable system. You may override
+ this default by adding the "allow_unsafe" keyword to your hypervisor
+ command line. On systems using the GRUB bootloader, you can do this by
+ editing the /etc/default/grub file and adding the keyword to the
+ "export GRUB_CMDLINE_XEN=" line.
+
+ -- Bastian Blank <waldi@debian.org> Mon, 11 Jun 2012 18:10:55 +0000
Reply to: