[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Applying Xen updates patch to main i386 trees



Hi,

I've just committed a change to enable a patch (previously added by
maks) which adds suspend/resume and ballooning to the -xen flavour of
the i386 kernel.

Looking through it I notice that the changesets which touch non-Xen code
are completely optional. Therefore I propose to strip them out and move
the patch to the main series file, applying it for all flavours. This
will have the benefit of enabling these features in the -bigmem-686
kernel which is(/will be) used by the Xen flavour of the installer. The
stripped down patch is attached (still needs testing). diffstat is at
the end of this mail.

The only change to generic code which I propose leaving in is the
changes to kernel/printk.c and include/linux/console.h from 

http://git.kernel.org/?p=linux/kernel/git/x86/linux-2.6-tip.git;a=commit;h=9e124fe16ff24746d6de5a2ad685266d7bce0e08
        commit 9e124fe16ff24746d6de5a2ad685266d7bce0e08
        
            xen: Enable console tty by default in domU if it's not a
        dummy

which are completely trivial.

Any objections?

The changesets which I will remove from the patch are below. Note that
they aren't actually especially scary and don't make a functional
difference to generic code (since they just add hooks where the default
is the current behaviour). If there is consensus I'd be reasonably happy
to leave them in...
        
        http://git.kernel.org/?p=linux/kernel/git/x86/linux-2.6-tip.git;a=commit;h=a15af1c9ea2750a9ff01e51615c45950bad8221b
        commit a15af1c9ea2750a9ff01e51615c45950bad8221b
        
            x86/paravirt: add pte_flags to just get pte flags

        http://git.kernel.org/?p=linux/kernel/git/x86/linux-2.6-tip.git;a=commit;h=1ea0704e0da65b2b46f9142ff1391163aac24060
        commit 1ea0704e0da65b2b46f9142ff1391163aac24060
        
            mm: add a ptep_modify_prot transaction abstraction    
        
        http://git.kernel.org/?p=linux/kernel/git/x86/linux-2.6-tip.git;a=commit;h=08b882c627aeeeb3cfd3c4354f0d360d7949549d
        commit 08b882c627aeeeb3cfd3c4354f0d360d7949549d
        
            paravirt: add hooks for ptep_modify_prot_start/commit
        
        http://git.kernel.org/?p=linux/kernel/git/x86/linux-2.6-tip.git;a=commit;h=e57778a1e30470c9f5b79e370511b9af29b59c48
        commit e57778a1e30470c9f5b79e370511b9af29b59c48
        
            xen: implement ptep_modify_prot_start/commit
            
diffstat of the remaining:
 arch/x86/xen/Kconfig              |   10 +
 arch/x86/xen/Makefile             |    2 
 arch/x86/xen/enlighten.c          |  123 ++++++++++++++----
 arch/x86/xen/manage.c             |  143 ---------------------
 arch/x86/xen/mmu.c                |  250 +++++++++++++++++++++++++++++++++++--
 arch/x86/xen/mmu.h                |    8 -
 arch/x86/xen/multicalls.c         |   40 +++++-
 arch/x86/xen/multicalls.h         |   12 +
 arch/x86/xen/setup.c              |    5 
 arch/x86/xen/smp.c                |    8 +
 arch/x86/xen/suspend.c            |   45 ++++++
 arch/x86/xen/time.c               |   13 +
 arch/x86/xen/xen-head.S           |    5 
 arch/x86/xen/xen-ops.h            |   11 +
 drivers/char/hvc_xen.c            |   61 ++++++++-
 drivers/input/xen-kbdfront.c      |   20 ++-
 drivers/video/xen-fbfront.c       |  211 +++++++++++++++++++++++++++----
 drivers/xen/Makefile              |    2 
 drivers/xen/balloon.c             |   10 -
 drivers/xen/events.c              |  114 ++++++++++++++++-
 drivers/xen/grant-table.c         |    4 
 drivers/xen/manage.c              |  252 ++++++++++++++++++++++++++++++++++++++
 drivers/xen/xenbus/xenbus_comms.c |   23 +--
 include/asm-x86/xen/hypercall.h   |   11 +
 include/asm-x86/xen/page.h        |   25 +--
 include/linux/console.h           |    2 
 include/linux/page-flags.h        |    1 
 include/xen/events.h              |    4 
 include/xen/grant_table.h         |    3 
 include/xen/hvc-console.h         |    9 +
 include/xen/interface/elfnote.h   |   20 +++
 include/xen/interface/features.h  |    3 
 include/xen/interface/io/fbif.h   |   29 +++-
 include/xen/interface/io/kbdif.h  |    2 
 include/xen/interface/memory.h    |   12 +
 include/xen/interface/xen.h       |    9 +
 include/xen/xen-ops.h             |    6 
 kernel/printk.c                   |    3 
 38 files changed, 1229 insertions(+), 282 deletions(-)

-- 
Ian Campbell

I know what "custody" [of the children] means.  "Get even."  That's all
custody means.  Get even with your old lady.
		-- Lenny Bruce
commit 400d34944c4ad82a817c06e570bc93b1114aa596
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon Jun 16 04:30:03 2008 -0700

    xen: add mechanism to extend existing multicalls
    
    Some Xen hypercalls accept an array of operations to work on.  In
    general this is because its more efficient for the hypercall to the
    work all at once rather than as separate hypercalls (even batched as a
    multicall).
    
    This patch adds a mechanism (xen_mc_extend_args()) to allocate more
    argument space to the last-issued multicall, in order to extend its
    argument list.
    
    The user of this mechanism is xen/mmu.c, which uses it to extend the
    args array of mmu_update.  This is particularly valuable when doing
    the update for a large mprotect, which goes via
    ptep_modify_prot_commit(), but it also manages to batch updates to
    pgd/pmds as well.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
    Acked-by: Hugh Dickins <hugh@veritas.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit d02859ecb321c8c0f74cb9bbe3f51a59e58822b0
Merge: a987b16... 543cf4c...
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jun 25 12:16:51 2008 +0200

    Merge commit 'v2.6.26-rc8' into x86/xen
    
    Conflicts:
    
    	arch/x86/xen/enlighten.c
    	arch/x86/xen/mmu.c
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit a987b16cc6123af2c9414032701bab5f73c54c89
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon Jun 16 15:01:56 2008 -0700

    xen: don't drop NX bit
    
    Because NX is now enforced properly, we must put the hypercall page
    into the .text segment so that it is executable.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Cc: Stable Kernel <stable@kernel.org>
    Cc: the arch/x86 maintainers <x86@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit eb179e443deb0a5c81a62b4c157124a4b7ff1813
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon Jun 16 15:01:53 2008 -0700

    xen: mask unwanted pte bits in __supported_pte_mask
    
    [ Stable: this isn't a bugfix in itself, but it's a pre-requiste
      for "xen: don't drop NX bit" ]
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Cc: Stable Kernel <stable@kernel.org>
    Cc: the arch/x86 maintainers <x86@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 6673cf63e5d973db5145d1f48b354efcb9fe2a13
Author: Isaku Yamahata <yamahata@valinux.co.jp>
Date:   Mon Jun 16 14:58:13 2008 -0700

    xen: Use wmb instead of rmb in xen_evtchn_do_upcall().
    
    This patch is ported one from 534:77db69c38249 of linux-2.6.18-xen.hg.
    Use wmb instead of rmb to enforce ordering between
    evtchn_upcall_pending and evtchn_pending_sel stores
    in xen_evtchn_do_upcall().
    
    Cc: Samuel Thibault <samuel.thibault@eu.citrix.com>
    Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
    Cc: Nick Piggin <nickpiggin@yahoo.com.au>
    Cc: the arch/x86 maintainers <x86@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 688d22e23ab1caacb2c36c615854294b58f2ea47
Merge: 7e0edc1... 0665190...
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jun 16 11:21:27 2008 +0200

    Merge branch 'linus' into x86/xen

commit 7e0edc1bc343231029084761ebf59e522902eb49
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Sat May 31 01:33:04 2008 +0100

    xen: add new Xen elfnote types and use them appropriately
    
    Define recently added XEN_ELFNOTEs, and use them appropriately.
    Most significantly, this enables domain checkpointing (xm save -c).
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit d07af1f0e3a3e378074fc36322dd7b0e72d9a3e2
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Sat May 31 01:33:03 2008 +0100

    xen: resume timers on all vcpus
    
    On resume, the vcpu timer modes will not be restored.  The timer
    infrastructure doesn't do this for us, since it assumes the cpus
    are offline.  We can just poke the other vcpus into the right mode
    directly though.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 9c7a794209f8a91f47697c3be20597eb60531e6d
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Sat May 31 01:33:02 2008 +0100

    xen: restore vcpu_info mapping
    
    If we're using vcpu_info mapping, then make sure its restored on all
    processors before relasing them from stop_machine.
    
    The only complication is that if this fails, we can't continue because
    we've already made assumptions that the mapping is available (baked in
    calls to the _direct versions of the functions, for example).
    
    Fortunately this can only happen with a 32-bit hypervisor, which may
    possibly run out of mapping space.  On a 64-bit hypervisor, this is a
    non-issue.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit e2426cf85f8db5891fb5831323d2d0c176c4dadc
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Sat May 31 01:24:27 2008 +0100

    xen: avoid hypercalls when updating unpinned pud/pmd
    
    When operating on an unpinned pagetable (ie, one under construction or
    destruction), it isn't necessary to use a hypercall to update a
    pud/pmd entry.  Jan Beulich observed that a similar optimisation
    avoided many thousands of hypercalls while doing a kernel build.
    
    One tricky part is that early in the kernel boot there's no page
    structure, so we can't check to see if the page is pinned.  In that
    case, we just always use the hypercall.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Cc: Jan Beulich <jbeulich@novell.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 15ce60056b24a65b65e28de973a9fd8ac0750a2f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jun 2 13:20:11 2008 +0200

    xen: export get_phys_to_machine
    
    -tip testing found the following xen-console symbols trouble:
    
      ERROR: "get_phys_to_machine" [drivers/video/xen-fbfront.ko] undefined!
      ERROR: "get_phys_to_machine" [drivers/net/xen-netfront.ko] undefined!
      ERROR: "get_phys_to_machine" [drivers/input/xen-kbdfront.ko] undefined!
    
    with:
    
      http://redhat.com/~mingo/misc/config-Mon_Jun__2_12_25_13_CEST_2008.bad

commit c78277288e3d561d55fb48bc0fe8d6e2cf4d0880
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Thu May 29 09:02:19 2008 +0100

    CONFIG_PM_SLEEP fix: xen: fix compilation when CONFIG_PM_SLEEP is disabled
    
    Xen save/restore depends on CONFIG_PM_SLEEP being set for device_power_up/down.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 0261ac5f2f43a1906cfacfb19d62ed643d162cbe
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu May 29 09:31:50 2008 +0200

    xen: fix "xen: implement save/restore"
    
    -tip testing found the following build breakage:
    
      drivers/built-in.o: In function `xen_suspend':
      manage.c:(.text+0x4390f): undefined reference to `xen_console_resume'
    
    with this config:
    
      http://redhat.com/~mingo/misc/config-Thu_May_29_09_23_16_CEST_2008.bad
    
    i have bisected it down to:
    
    |  commit 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58
    |  Author: Jeremy Fitzhardinge <jeremy@goop.org>
    |  Date:   Mon May 26 23:31:27 2008 +0100
    |
    |      xen: implement save/restore
    
    the problem is that drivers/xen/manage.c is built unconditionally if
    CONFIG_XEN is enabled and makes use of xen_suspend(), but
    drivers/char/hvc_xen.c, where the xen_suspend() method is implemented,
    is only build if CONFIG_HVC_XEN=y as well.
    
    i have solved this by providing a NOP implementation for xen_suspend()
    in the !CONFIG_HVC_XEN case.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit b20aeccd6ad42ccb6be1b3d1d32618ddd2b31bf0
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed May 28 14:24:38 2008 +0200

    xen: fix early bootup crash on native hardware
    
    -tip tree auto-testing found the following early bootup hang:
    
    -------------->
    get_memcfg_from_srat: assigning address to rsdp
    RSD PTR  v0 [Nvidia]
    BUG: Int 14: CR2 ffd00040
         EDI 8092fbfe  ESI ffd00040  EBP 80b0aee8  ESP 80b0aed0
         EBX 000f76f0  EDX 0000000e  ECX 00000003  EAX ffd00040
         err 00000000  EIP 802c055a   CS 00000060  flg 00010006
    Stack: ffd00040 80bc78d0 80b0af6c 80b1dbfe 8093d8ba 00000008 80b42810 80b4ddb4
           80b42842 00000000 80b0af1c 801079c8 808e724e 00000000 80b42871 802c0531
           00000100 00000000 0003fff0 80b0af40 80129999 00040100 00040100 00000000
    Pid: 0, comm: swapper Not tainted 2.6.26-rc4-sched-devel.git #570
     [<802c055a>] ? strncmp+0x11/0x25
     [<80b1dbfe>] ? get_memcfg_from_srat+0xb4/0x568
     [<801079c8>] ? mcount_call+0x5/0x9
     [<802c0531>] ? strcmp+0xa/0x22
     [<80129999>] ? printk+0x38/0x3a
     [<80129999>] ? printk+0x38/0x3a
     [<8011b122>] ? memory_present+0x66/0x6f
     [<80b216b4>] ? setup_memory+0x13/0x40c
     [<80b16b47>] ? propagate_e820_map+0x80/0x97
     [<80b1622a>] ? setup_arch+0x248/0x477
     [<80129999>] ? printk+0x38/0x3a
     [<80b11759>] ? start_kernel+0x6e/0x2eb
     [<80b110fc>] ? i386_start_kernel+0xeb/0xf2
     =======================
    <------
    
    with this config:
    
       http://redhat.com/~mingo/misc/config-Wed_May_28_01_33_33_CEST_2008.bad
    
    The thing is, the crash makes little sense at first sight. We crash on a
    benign-looking printk. The code around it got changed in -tip but
    checking those topic branches individually did not reproduce the bug.
    
    Bisection led to this commit:
    
    |   d5edbc1f75420935b1ec7e65df10c8f81cea82de is first bad commit
    |   commit d5edbc1f75420935b1ec7e65df10c8f81cea82de
    |   Author: Jeremy Fitzhardinge <jeremy@goop.org>
    |   Date:   Mon May 26 23:31:22 2008 +0100
    |
    |   xen: add p2m mfn_list_list
    
    Which is somewhat surprising, as on native hardware Xen client side
    should have little to no side-effects.
    
    After some head scratching, it turns out the following happened:
    randconfig enabled the following Xen options:
    
      CONFIG_XEN=y
      CONFIG_XEN_MAX_DOMAIN_MEMORY=8
      # CONFIG_XEN_BLKDEV_FRONTEND is not set
      # CONFIG_XEN_NETDEV_FRONTEND is not set
      CONFIG_HVC_XEN=y
      # CONFIG_XEN_BALLOON is not set
    
    which activated this piece of code in arch/x86/xen/mmu.c:
    
    > @@ -69,6 +69,13 @@
    >  	__attribute__((section(".data.page_aligned"))) =
    >  		{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
    >
    > +/* Arrays of p2m arrays expressed in mfns used for save/restore */
    > +static unsigned long p2m_top_mfn[TOP_ENTRIES]
    > +	__attribute__((section(".bss.page_aligned")));
    > +
    > +static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
    > +	__attribute__((section(".bss.page_aligned")));
    
    The problem is, you must only put variables into .bss.page_aligned that
    have a _size_ that is _exactly_ page aligned. In this case the size of
    p2m_top_mfn_list is not page aligned:
    
     80b8d000 b p2m_top_mfn
     80b8f000 b p2m_top_mfn_list
     80b8f008 b softirq_stack
     80b97008 b hardirq_stack
     80b9f008 b bm_pte
    
    So all subsequent variables get unaligned which, depending on luck,
    breaks the kernel in various funny ways. In this case what killed the
    kernel first was the misaligned bootmap pte page, resulting in that
    creative crash above.
    
    Anyway, this was a fun bug to track down :-)
    
    I think the moral is that .bss.page_aligned is a dangerous construct in
    its current form, and the symptoms of breakage are very non-trivial, so
    i think we need build-time checks to make sure all symbols in
    .bss.page_aligned are truly page aligned.
    
    The Xen fix below gets the kernel booting again.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 359cdd3f866b6219a6729e313faf2221397f3278
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:28 2008 +0100

    xen: maintain clock offset over save/restore
    
    Hook into the device model to make sure that timekeeping's resume handler
    is called.  This deals with our clocksource's non-monotonicity over the
    save/restore.  Explicitly call clock_has_changed() to make sure that
    all the timers get retriggered properly.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:27 2008 +0100

    xen: implement save/restore
    
    This patch implements Xen save/restore and migration.
    
    Saving is triggered via xenbus, which is polled in
    drivers/xen/manage.c.  When a suspend request comes in, the kernel
    prepares itself for saving by:
    
    1 - Freeze all processes.  This is primarily to prevent any
        partially-completed pagetable updates from confusing the suspend
        process.  If CONFIG_PREEMPT isn't defined, then this isn't necessary.
    
    2 - Suspend xenbus and other devices
    
    3 - Stop_machine, to make sure all the other vcpus are quiescent.  The
        Xen tools require the domain to run its save off vcpu0.
    
    4 - Within the stop_machine state, it pins any unpinned pgds (under
        construction or destruction), performs canonicalizes various other
        pieces of state (mostly converting mfns to pfns), and finally
    
    5 - Suspend the domain
    
    Restore reverses the steps used to save the domain, ending when all
    the frozen processes are thawed.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 7d88d32a4670af583c896e5ecd3929b78538ca62
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:26 2008 +0100

    xenbus: rebind irq on restore
    
    When restoring, rebind the existing xenbus irq to the new xenbus event
    channel.  (It turns out in practice that this is always the same, and
    is never updated on restore.  That's a bug, but Xeno-linux has been
    like this for a long time, so it can't really be fixed.)
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 6b9b732d0e396a3f1a95977162a8624aafce38a1
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:25 2008 +0100

    xen-console: add save/restore
    
    Add code to:
    
     1. Deal with the console page being canonicalized.  During save, the
        console's mfn in the start_info structure is canonicalized to a pfn.
        In order to deal with that, we always use a copy of the pfn and
        indirect off that all the time.  However, we fall back to using the
        mfn if the pfn hasn't been initialized yet.
    
     2. Restore the console event channel, and rebind it to the existing irq.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 0f2287ad7c61f10b2a22a06e2a66cdbbbfc44ad0
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:24 2008 +0100

    xen: fix unbind_from_irq()
    
    Rearrange the tests in unbind_from_irq() so that we can still unbind
    an irq even if the underlying event channel is bad.  This allows a
    device driver to shuffle its irqs on save/restore before the
    underlying event channels have been fixed up.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit eb1e305f4ef201e549ffd475b7dcbcd4ec36d7dc
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:23 2008 +0100

    xen: add rebind_evtchn_irq
    
    Add rebind_evtchn_irq(), which will rebind an device driver's existing
    irq to a new event channel on restore.  Since the new event channel
    will be masked and bound to vcpu0, we update the state accordingly and
    unmask the irq once everything is set up.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d5edbc1f75420935b1ec7e65df10c8f81cea82de
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:22 2008 +0100

    xen: add p2m mfn_list_list
    
    When saving a domain, the Xen tools need to remap all our mfns to
    portable pfns.  In order to remap our p2m table, it needs to know
    where all its pages are, so maintain the references to the p2m table
    for it to use.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a0d695c821544947342a2d372ec4108bc813b979
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:21 2008 +0100

    xen: make dummy_shared_info non-static
    
    Rename dummy_shared_info to xen_dummy_shared_info and make it
    non-static, in anticipation of users outside of enlighten.c
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit cf0923ea295ba08ae656ef04164a43cb6553ba99
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:20 2008 +0100

    xen: efficiently support a holey p2m table
    
    When using sparsemem and memory hotplug, the kernel's pseudo-physical
    address space can be discontigious.  Previously this was dealt with by
    having the upper parts of the radix tree stubbed off.  Unfortunately,
    this is incompatible with save/restore, which requires a complete p2m
    table.
    
    The solution is to have a special distinguished all-invalid p2m leaf
    page, which we can point all the hole areas at.  This allows the tools
    to see a complete p2m table, but it only costs a page for all memory
    holes.
    
    It also simplifies the code since it removes a few special cases.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 8006ec3e911f93d702e1d4a4e387e244ab434924
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:19 2008 +0100

    xen: add configurable max domain size
    
    Add a config option to set the max size of a Xen domain.  This is used
    to scale the size of the physical-to-machine array; it ends up using
    around 1 page/GByte, so there's no reason to be very restrictive.
    
    For a 32-bit guest, the default value of 8GB is probably sufficient;
    there's not much point in giving a 32-bit machine much more memory
    than that.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d451bb7aa852627bdf7be7937dc3d9d9f261b235
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:18 2008 +0100

    xen: make phys_to_machine structure dynamic
    
    We now support the use of memory hotplug, so the physical to machine
    page mapping structure must be dynamic.  This is implemented as a
    two-level radix tree structure, which allows us to efficiently
    incrementally allocate memory for the p2m table as new pages are
    added.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 955d6f1778da5a9795f2dfb07f760006f194609a
Author: Adrian Bunk <bunk@kernel.org>
Date:   Mon May 26 23:31:17 2008 +0100

    xen: drivers/xen/balloon.c: make a function static
    
    Make the needlessly global balloon_set_new_target() static.
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Acked-by: Chris Wright <chrisw@sous-sol.org>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 38bb5ab4179572f4d24d3ca7188172a31ca51a69
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:16 2008 +0100

    xen: count resched interrupts properly
    
    Make sure resched interrupts appear in /proc/interrupts in the proper
    place.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit bfdab126cfa6fe3c2ddb8b6007a38202b510b6c1
Author: Isaku Yamahata <yamahata@valinux.co.jp>
Date:   Mon May 26 23:31:15 2008 +0100

    xen: add missing definitions in include/xen/interface/memory.h which ia64/xen needs
    
    Add xen handles realted definitions for xen memory which ia64/xen needs.
    Pointer argumsnts for ia64/xen hypercall are passed in pseudo physical
    address (guest physical address) so that it is required to convert
    guest kernel virtual address into pseudo physical address.
    The xen guest handle represents such arguments.
    
    Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a90971ebddc81330f59203dee9803512aa4e2ef6
Author: Isaku Yamahata <yamahata@valinux.co.jp>
Date:   Mon May 26 23:31:14 2008 +0100

    xen: compilation fix to balloon driver for ia64 support
    
    fix compilation error of ballon driver on ia64.
    extent_start member is pointer argument. On x86 pointer argument for
    xen hypercall is passed as virtual address.
    On the other hand, ia64 and ppc, pointer argument is passed in pseudo
    physical address. (guest physicall address.)
    So they must be passed as handle and convert right before issuing hypercall.
    
      CC      drivers/xen/balloon.o
    linux-2.6-x86/drivers/xen/balloon.c: In function 'increase_reservation':
    linux-2.6-x86/drivers/xen/balloon.c:228: error: incompatible types in assignment
    linux-2.6-x86/drivers/xen/balloon.c: In function 'decrease_reservation':
    linux-2.6-x86/drivers/xen/balloon.c:324: error: incompatible types in assignment
    linux-2.6-x86/drivers/xen/balloon.c: In function 'dealloc_pte_fn':
    linux-2.6-x86/drivers/xen/balloon.c:486: error: incompatible types in assignment
    linux-2.6-x86/drivers/xen/balloon.c: In function 'alloc_empty_pages_and_pagevec':
    linux-2.6-x86/drivers/xen/balloon.c:522: error: incompatible types in assignment
    make[2]: *** [drivers/xen/balloon.o] Error 1
    
    Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit ec9b2065d4d3b797604c09a569083dd9ff951b1b
Author: Isaku Yamahata <yamahata@valinux.co.jp>
Date:   Mon May 26 23:31:13 2008 +0100

    xen: Move manage.c to drivers/xen for ia64/xen support
    
    move arch/x86/xen/manage.c under drivers/xen/to share codes
    with x86 and ia64.
    ia64/xen also uses manage.c
    
    Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 83abc70a4c6e306f4c1672e25884322f797e4fcb
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:12 2008 +0100

    xen: make earlyprintk=xen work again
    
    For some perverse reason, if you call add_preferred_console() it prevents
    setup_early_printk() from successfully enabling the boot console -
    unless you make it a preferred console too...
    
    Also, make xenboot console output distinct from normal console output,
    since it gets repeated when the console handover happens, and the
    duplicated output is confusing without disambiguation.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Cc: Markus Armbruster <armbru@redhat.com>
    Cc: Gerd Hoffmann <kraxel@redhat.com>

commit e4dcff1f6e7582f76c2c9990b1d9111bbc8e26ef
Author: Markus Armbruster <armbru@redhat.com>
Date:   Mon May 26 23:31:11 2008 +0100

    xen pvfb: Dynamic mode support (screen resizing)
    
    The pvfb backend indicates dynamic mode support by creating node
    feature_resize with a non-zero value in its xenstore directory.
    xen-fbfront sends a resize notification event on mode change.  Fully
    backwards compatible both ways.
    
    Framebuffer size and initial resolution can be controlled through
    kernel parameter xen_fbfront.video.  The backend enforces a separate
    size limit, which it advertises in node videoram in its xenstore
    directory.
    
    xen-kbdfront gets the maximum screen resolution from nodes width and
    height in the backend's xenstore directory instead of hardcoding it.
    
    Additional goodie: support for larger framebuffers (512M on a 64-bit
    system with 4K pages).
    
    Changing the number of bits per pixels dynamically is not supported,
    yet.
    
    Ported from
    http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/92f7b3144f41
    http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/bfc040135633
    
    Signed-off-by: Pat Campbell <plc@novell.com>
    Signed-off-by: Markus Armbruster <armbru@redhat.com>
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit f4ad1ebd7a0fae2782ef9f76c0b94b536742c3e8
Author: Markus Armbruster <armbru@redhat.com>
Date:   Mon May 26 23:31:10 2008 +0100

    xen pvfb: Zero unused bytes in events sent to backend
    
    This isn't a security flaw (the backend can see all our memory
    anyway).  But it's the right thing to do all the same.
    
    Signed-off-by: Markus Armbruster <armbru@redhat.com>
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1e892c959da42278e60b21f5ecfd6fba0efff313
Author: Markus Armbruster <armbru@redhat.com>
Date:   Mon May 26 23:31:09 2008 +0100

    xen pvfb: Module aliases to support module autoloading
    
    These are mostly for completeness and consistency with the other
    frontends, as PVFB is typically compiled in rather than a module.
    
    Derived from
    http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/5e294e29a43e
    
    While there, add module descriptions.
    
    Signed-off-by: Markus Armbruster <armbru@redhat.com>
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 6ba0e7b36c7cc1745b3cbeda244d14edae3ad058
Author: Markus Armbruster <armbru@redhat.com>
Date:   Mon May 26 23:31:08 2008 +0100

    xen pvfb: Pointer z-axis (mouse wheel) support
    
    Add z-axis motion to pointer events.  Backward compatible, because
    there's space for the z-axis in union xenkbd_in_event, and old
    backends zero it.
    
    Derived from
    http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/57dfe0098000
    http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/1edfea26a2a9
    http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/c3ff0b26f664
    
    Signed-off-by: Pat Campbell <plc@novell.com>
    Signed-off-by: Markus Armbruster <armbru@redhat.com>
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 9e124fe16ff24746d6de5a2ad685266d7bce0e08
Author: Markus Armbruster <armbru@redhat.com>
Date:   Mon May 26 23:31:07 2008 +0100

    xen: Enable console tty by default in domU if it's not a dummy
    
    Without console= arguments on the kernel command line, the first
    console to register becomes enabled and the preferred console (the one
    behind /dev/console).  This is normally tty (assuming
    CONFIG_VT_CONSOLE is enabled, which it commonly is).
    
    This is okay as long tty is a useful console.  But unless we have the
    PV framebuffer, and it is enabled for this domain, tty0 in domU is
    merely a dummy.  In that case, we want the preferred console to be the
    Xen console hvc0, and we want it without having to fiddle with the
    kernel command line.  Commit b8c2d3dfbc117dff26058fbac316b8acfc2cb5f7
    did that for us.
    
    Since we now have the PV framebuffer, we want to enable and prefer tty
    again, but only when PVFB is enabled.  But even then we still want to
    enable the Xen console as well.
    
    Problem: when tty registers, we can't yet know whether the PVFB is
    enabled.  By the time we can know (xenstore is up), the console setup
    game is over.
    
    Solution: enable console tty by default, but keep hvc as the preferred
    console.  Change the preferred console to tty when PVFB probes
    successfully, unless we've been given console kernel parameters.
    
    Signed-off-by: Markus Armbruster <armbru@redhat.com>
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 239d1fc04ed0b58d638096b12a7f6d50269d30c9
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:05 2008 +0100

    xen: don't worry about preempt during xen_irq_enable()
    
    When enabling interrupts, we don't need to worry about preemption,
    because we either enter with interrupts disabled - so no preemption -
    or the caller is confused and is re-enabling interrupts on some
    indeterminate processor.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2956a3511c8c5dccb1d4739ead17c7c3c23a24b7
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:04 2008 +0100

    xen: allow some cr4 updates
    
    The guest can legitimately change things like cr4.OSFXSR and
    OSXMMEXCPT, so let it.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 349c709f42453707f74bece0d9d35ee5b3842893
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:02 2008 +0100

    xen: use new sched_op
    
    Use the new sched_op hypercall, mainly because xenner doesn't support
    the old one.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 7b1333aa4cb546ddeb9c05098a53d9a777623a05
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:01 2008 +0100

    xen: use hypercall rather than clts
    
    Xen will trap and emulate clts, but its better to use a hypercall.
    Also, xenner doesn't handle clts.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 0922abdc3982ae54cbe1b24ac5aa91a260eca1bb
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:31:00 2008 +0100

    xen: make early console also write to debug console
    
    When using "earlyprintk=xen", also write the console output to the raw
    debug console.  This will appear on dom0's console if the hypervisor
    has been compiled to allow it.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 0acf10d8fbd52926217d3933d196b33fe2468f18
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Mon May 26 23:30:59 2008 +0100

    xen: add raw console write functions for debug
    
    Add a couple of functions which can write directly to the Xen console
    for debugging.  This output ends up on the host's dom0 console
    (assuming it allows the domain to write there).
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3843fc2575e3389f4f0ad0420a720240a5746a5d
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Fri May 9 12:05:57 2008 +0100

    xen: remove support for non-PAE 32-bit
    
    Non-PAE operation has been deprecated in Xen for a while, and is
    rarely tested or used.  xen-unstable has now officially dropped
    non-PAE support.  Since Xen/pvops' non-PAE support has also been
    broken for a while, we may as well completely drop it altogether.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 6c388e5..c2cc995 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -20,3 +20,13 @@ config XEN
 	select SYS_HYPERVISOR
 	help
 	  This is the /proc/xen interface used by Xen's libxc.
+
+config XEN_MAX_DOMAIN_MEMORY
+       int "Maximum allowed size of a domain in gigabytes"
+       default 8
+       depends on XEN
+       help
+         The pseudo-physical to machine address array is sized
+         according to the maximum possible memory size of a Xen
+         domain.  This array uses 1 page per gigabyte, so there's no
+         need to be too stingy here.
\ No newline at end of file
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3d8df98..2ba2d16 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
-			time.o manage.o xen-asm.o grant-table.o
+			time.o xen-asm.o grant-table.o suspend.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09c1c6..bd74229 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3);	 /* actual vcpu cr3 */
 struct start_info *xen_start_info;
 EXPORT_SYMBOL_GPL(xen_start_info);
 
-static /* __initdata */ struct shared_info dummy_shared_info;
+struct shared_info xen_dummy_shared_info;
 
 /*
  * Point at some empty memory to start with. We map the real shared_info
  * page as soon as fixmap is up and running.
  */
-struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
+struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
 
 /*
  * Flag to determine whether vcpu info placement is available on all
@@ -98,13 +98,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
  */
 static int have_vcpu_info_placement = 1;
 
-static void __init xen_vcpu_setup(int cpu)
+static void xen_vcpu_setup(int cpu)
 {
 	struct vcpu_register_vcpu_info info;
 	int err;
 	struct vcpu_info *vcpup;
 
-	BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info);
+	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 	per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 
 	if (!have_vcpu_info_placement)
@@ -136,11 +136,41 @@ static void __init xen_vcpu_setup(int cpu)
 	}
 }
 
+/*
+ * On restore, set the vcpu placement up again.
+ * If it fails, then we're in a bad state, since
+ * we can't back out from using it...
+ */
+void xen_vcpu_restore(void)
+{
+	if (have_vcpu_info_placement) {
+		int cpu;
+
+		for_each_online_cpu(cpu) {
+			bool other_cpu = (cpu != smp_processor_id());
+
+			if (other_cpu &&
+			    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+				BUG();
+
+			xen_vcpu_setup(cpu);
+
+			if (other_cpu &&
+			    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+				BUG();
+		}
+
+		BUG_ON(!have_vcpu_info_placement);
+	}
+}
+
 static void __init xen_banner(void)
 {
 	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
 	       pv_info.name);
-	printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
+	printk(KERN_INFO "Hypervisor signature: %s%s\n",
+	       xen_start_info->magic,
+	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 }
 
 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
@@ -235,13 +265,13 @@ static void xen_irq_enable(void)
 {
 	struct vcpu_info *vcpu;
 
-	/* There's a one instruction preempt window here.  We need to
-	   make sure we're don't switch CPUs between getting the vcpu
-	   pointer and updating the mask. */
-	preempt_disable();
+	/* We don't need to worry about being preempted here, since
+	   either a) interrupts are disabled, so no preemption, or b)
+	   the caller is confused and is trying to re-enable interrupts
+	   on an indeterminate processor. */
+
 	vcpu = x86_read_percpu(xen_vcpu);
 	vcpu->evtchn_upcall_mask = 0;
-	preempt_enable_no_resched();
 
 	/* Doesn't matter if we get preempted here, because any
 	   pending event will get dealt with anyway. */
@@ -254,7 +284,7 @@ static void xen_irq_enable(void)
 static void xen_safe_halt(void)
 {
 	/* Blocking includes an implicit local_irq_enable(). */
-	if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
+	if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
 		BUG();
 }
 
@@ -607,6 +637,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 
+static void xen_clts(void)
+{
+	struct multicall_space mcs;
+
+	mcs = xen_mc_entry(0);
+
+	MULTI_fpu_taskswitch(mcs.mc, 0);
+
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
+}
+
+static void xen_write_cr0(unsigned long cr0)
+{
+	struct multicall_space mcs;
+
+	/* Only pay attention to cr0.TS; everything else is
+	   ignored. */
+	mcs = xen_mc_entry(0);
+
+	MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
+
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
+}
+
 static void xen_write_cr2(unsigned long cr2)
 {
 	x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@ -624,8 +678,10 @@ static unsigned long xen_read_cr2_direct(void)
 
 static void xen_write_cr4(unsigned long cr4)
 {
-	/* Just ignore cr4 changes; Xen doesn't allow us to do
-	   anything anyway. */
+	cr4 &= ~X86_CR4_PGE;
+	cr4 &= ~X86_CR4_PSE;
+
+	native_write_cr4(cr4);
 }
 
 static unsigned long xen_read_cr3(void)
@@ -831,7 +887,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
 			  PFN_DOWN(__pa(xen_start_info->pt_base)));
 }
 
-static __init void setup_shared_info(void)
+void xen_setup_shared_info(void)
 {
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 		unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
@@ -854,6 +910,8 @@ static __init void setup_shared_info(void)
 	/* In UP this is as good a place as any to set up shared info */
 	xen_setup_vcpu_info_placement();
 #endif
+
+	xen_setup_mfn_list_list();
 }
 
 static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -866,15 +924,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 	pv_mmu_ops.release_pmd = xen_release_pmd;
 	pv_mmu_ops.set_pte = xen_set_pte;
 
-	setup_shared_info();
+	xen_setup_shared_info();
 
 	/* Actually pin the pagetable down, but we can't set PG_pinned
 	   yet because the page structures don't exist yet. */
 	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
 }
 
+static __init void xen_post_allocator_init(void)
+{
+	pv_mmu_ops.set_pmd = xen_set_pmd;
+	pv_mmu_ops.set_pud = xen_set_pud;
+
+	xen_mark_init_mm_pinned();
+}
+
 /* This is called once we have the cpu_possible_map */
-void __init xen_setup_vcpu_info_placement(void)
+void xen_setup_vcpu_info_placement(void)
 {
 	int cpu;
 
@@ -960,7 +1026,7 @@ static const struct pv_init_ops xen_init_ops __initdata = {
 	.banner = xen_banner,
 	.memory_setup = xen_memory_setup,
 	.arch_setup = xen_arch_setup,
-	.post_allocator_init = xen_mark_init_mm_pinned,
+	.post_allocator_init = xen_post_allocator_init,
 };
 
 static const struct pv_time_ops xen_time_ops __initdata = {
@@ -978,10 +1044,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.set_debugreg = xen_set_debugreg,
 	.get_debugreg = xen_get_debugreg,
 
-	.clts = native_clts,
+	.clts = xen_clts,
 
 	.read_cr0 = native_read_cr0,
-	.write_cr0 = native_write_cr0,
+	.write_cr0 = xen_write_cr0,
 
 	.read_cr4 = native_read_cr4,
 	.read_cr4_safe = native_read_cr4_safe,
@@ -1072,4 +1138,4 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.set_pte = NULL,	/* see xen_pagetable_setup_* */
 	.set_pte_at = xen_set_pte_at,
-	.set_pmd = xen_set_pmd,
+	.set_pmd = xen_set_pmd_hyper,
@@ -1082,7 +1152,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.set_pte_atomic = xen_set_pte_atomic,
 	.set_pte_present = xen_set_pte_at,
-	.set_pud = xen_set_pud,
+	.set_pud = xen_set_pud_hyper,
 	.pte_clear = xen_pte_clear,
 	.pmd_clear = xen_pmd_clear,
 
@@ -1114,11 +1184,13 @@ static const struct smp_ops xen_smp_ops __initdata = {
 
 static void xen_reboot(int reason)
 {
+	struct sched_shutdown r = { .reason = reason };
+
 #ifdef CONFIG_SMP
 	smp_send_stop();
 #endif
 
-	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason))
+	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
 		BUG();
 }
 
@@ -1173,6 +1245,8 @@ asmlinkage void __init xen_start_kernel(void)
 
 	BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
 
+	xen_setup_features();
+
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
@@ -1186,13 +1265,11 @@
 
 #ifdef CONFIG_SMP
 	smp_ops = xen_smp_ops;
 #endif
 
-	xen_setup_features();
-
 	/* Get mfn list */
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
-		phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
+		xen_build_dynamic_phys_to_machine();
 
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
@@ -1232,8 +1309,11 @@ asmlinkage void __init xen_start_kernel(void)
 		? __pa(xen_start_info->mod_start) : 0;
 	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
 
-	if (!is_initial_xendomain())
+	if (!is_initial_xendomain()) {
+		add_preferred_console("xenboot", 0, NULL);
+		add_preferred_console("tty", 0, NULL);
 		add_preferred_console("hvc", 0, NULL);
+	}
 
 	/* Start the world */
 	start_kernel();
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c
deleted file mode 100644
index aa7af9e..0000000
--- a/arch/x86/xen/manage.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Handle extern requests for shutdown, reboot and sysrq
- */
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/reboot.h>
-#include <linux/sysrq.h>
-
-#include <xen/xenbus.h>
-
-#define SHUTDOWN_INVALID  -1
-#define SHUTDOWN_POWEROFF  0
-#define SHUTDOWN_SUSPEND   2
-/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
- * report a crash, not be instructed to crash!
- * HALT is the same as POWEROFF, as far as we're concerned.  The tools use
- * the distinction when we return the reason code to them.
- */
-#define SHUTDOWN_HALT      4
-
-/* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
-
-static void shutdown_handler(struct xenbus_watch *watch,
-			     const char **vec, unsigned int len)
-{
-	char *str;
-	struct xenbus_transaction xbt;
-	int err;
-
-	if (shutting_down != SHUTDOWN_INVALID)
-		return;
-
- again:
-	err = xenbus_transaction_start(&xbt);
-	if (err)
-		return;
-
-	str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
-	/* Ignore read errors and empty reads. */
-	if (XENBUS_IS_ERR_READ(str)) {
-		xenbus_transaction_end(xbt, 1);
-		return;
-	}
-
-	xenbus_write(xbt, "control", "shutdown", "");
-
-	err = xenbus_transaction_end(xbt, 0);
-	if (err == -EAGAIN) {
-		kfree(str);
-		goto again;
-	}
-
-	if (strcmp(str, "poweroff") == 0 ||
-	    strcmp(str, "halt") == 0)
-		orderly_poweroff(false);
-	else if (strcmp(str, "reboot") == 0)
-		ctrl_alt_del();
-	else {
-		printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
-		shutting_down = SHUTDOWN_INVALID;
-	}
-
-	kfree(str);
-}
-
-static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
-			  unsigned int len)
-{
-	char sysrq_key = '\0';
-	struct xenbus_transaction xbt;
-	int err;
-
- again:
-	err = xenbus_transaction_start(&xbt);
-	if (err)
-		return;
-	if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
-		printk(KERN_ERR "Unable to read sysrq code in "
-		       "control/sysrq\n");
-		xenbus_transaction_end(xbt, 1);
-		return;
-	}
-
-	if (sysrq_key != '\0')
-		xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
-
-	err = xenbus_transaction_end(xbt, 0);
-	if (err == -EAGAIN)
-		goto again;
-
-	if (sysrq_key != '\0')
-		handle_sysrq(sysrq_key, NULL);
-}
-
-static struct xenbus_watch shutdown_watch = {
-	.node = "control/shutdown",
-	.callback = shutdown_handler
-};
-
-static struct xenbus_watch sysrq_watch = {
-	.node = "control/sysrq",
-	.callback = sysrq_handler
-};
-
-static int setup_shutdown_watcher(void)
-{
-	int err;
-
-	err = register_xenbus_watch(&shutdown_watch);
-	if (err) {
-		printk(KERN_ERR "Failed to set shutdown watcher\n");
-		return err;
-	}
-
-	err = register_xenbus_watch(&sysrq_watch);
-	if (err) {
-		printk(KERN_ERR "Failed to set sysrq watcher\n");
-		return err;
-	}
-
-	return 0;
-}
-
-static int shutdown_event(struct notifier_block *notifier,
-			  unsigned long event,
-			  void *data)
-{
-	setup_shutdown_watcher();
-	return NOTIFY_DONE;
-}
-
-static int __init setup_shutdown_event(void)
-{
-	static struct notifier_block xenstore_notifier = {
-		.notifier_call = shutdown_event
-	};
-	register_xenstore_notifier(&xenstore_notifier);
-
-	return 0;
-}
-
-subsys_initcall(setup_shutdown_event);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index df40bf7..f6b8225 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,6 +56,131 @@
 #include "multicalls.h"
 #include "mmu.h"
 
+#define P2M_ENTRIES_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long))
+#define TOP_ENTRIES		(MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
+
+/* Placeholder for holes in the address space */
+static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
+	__attribute__((section(".data.page_aligned"))) =
+		{ [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
+
+ /* Array of pointers to pages containing p2m entries */
+static unsigned long *p2m_top[TOP_ENTRIES]
+	__attribute__((section(".data.page_aligned"))) =
+		{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
+
+/* Arrays of p2m arrays expressed in mfns used for save/restore */
+static unsigned long p2m_top_mfn[TOP_ENTRIES]
+	__attribute__((section(".bss.page_aligned")));
+
+static unsigned long p2m_top_mfn_list[
+			PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
+	__attribute__((section(".bss.page_aligned")));
+
+static inline unsigned p2m_top_index(unsigned long pfn)
+{
+	BUG_ON(pfn >= MAX_DOMAIN_PAGES);
+	return pfn / P2M_ENTRIES_PER_PAGE;
+}
+
+static inline unsigned p2m_index(unsigned long pfn)
+{
+	return pfn % P2M_ENTRIES_PER_PAGE;
+}
+
+/* Build the parallel p2m_top_mfn structures */
+void xen_setup_mfn_list_list(void)
+{
+	unsigned pfn, idx;
+
+	for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
+		unsigned topidx = p2m_top_index(pfn);
+
+		p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
+	}
+
+	for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
+		unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
+		p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
+	}
+
+	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+
+	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+		virt_to_mfn(p2m_top_mfn_list);
+	HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
+}
+
+/* Set up p2m_top to point to the domain-builder provided p2m pages */
+void __init xen_build_dynamic_phys_to_machine(void)
+{
+	unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+	unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
+	unsigned pfn;
+
+	for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
+		unsigned topidx = p2m_top_index(pfn);
+
+		p2m_top[topidx] = &mfn_list[pfn];
+	}
+}
+
+unsigned long get_phys_to_machine(unsigned long pfn)
+{
+	unsigned topidx, idx;
+
+	if (unlikely(pfn >= MAX_DOMAIN_PAGES))
+		return INVALID_P2M_ENTRY;
+
+	topidx = p2m_top_index(pfn);
+	idx = p2m_index(pfn);
+	return p2m_top[topidx][idx];
+}
+EXPORT_SYMBOL_GPL(get_phys_to_machine);
+
+static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
+{
+	unsigned long *p;
+	unsigned i;
+
+	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+	BUG_ON(p == NULL);
+
+	for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+		p[i] = INVALID_P2M_ENTRY;
+
+	if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
+		free_page((unsigned long)p);
+	else
+		*mfnp = virt_to_mfn(p);
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	unsigned topidx, idx;
+
+	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+
+	if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
+		BUG_ON(mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+
+	topidx = p2m_top_index(pfn);
+	if (p2m_top[topidx] == p2m_missing) {
+		/* no need to allocate a page to store an invalid entry */
+		if (mfn == INVALID_P2M_ENTRY)
+			return;
+		alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
+	}
+
+	idx = p2m_index(pfn);
+	p2m_top[topidx][idx] = mfn;
+}
+
 xmaddr_t arbitrary_virt_to_machine(unsigned long address)
 {
 	unsigned int level;
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr)
 }
 
 
-void xen_set_pmd(pmd_t *ptr, pmd_t val)
+static bool page_pinned(void *ptr)
+{
+	struct page *page = virt_to_page(ptr);
+
+	return PagePinned(page);
+}
+
+static void extend_mmu_update(const struct mmu_update *update)
 {
 	struct multicall_space mcs;
 	struct mmu_update *u;
 
-	preempt_disable();
+	mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
+
+	if (mcs.mc != NULL)
+		mcs.mc->args[1]++;
+	else {
+		mcs = __xen_mc_entry(sizeof(*u));
+		MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
+	}
 
-	mcs = xen_mc_entry(sizeof(*u));
 	u = mcs.args;
-	u->ptr = virt_to_machine(ptr).maddr;
-	u->val = pmd_val_ma(val);
-	MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
+	*u = *update;
+}
+
+void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
+{
+	struct mmu_update u;
+
+	preempt_disable();
+
+	xen_mc_batch();
+
+	u.ptr = virt_to_machine(ptr).maddr;
+	u.val = pmd_val_ma(val);
+	extend_mmu_update(&u);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
 	preempt_enable();
 }
 
+void xen_set_pmd(pmd_t *ptr, pmd_t val)
+{
+	/* If page is not pinned, we can just update the entry
+	   directly */
+	if (!page_pinned(ptr)) {
+		*ptr = val;
+		return;
+	}
+
+	xen_set_pmd_hyper(ptr, val);
+}
+
 /*
  * Associate a virtual page frame with a given physical page frame
  * and protection flags for that frame.
@@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd)
 	return pte_mfn_to_pfn(pmd.pmd);
 }
 
-void xen_set_pud(pud_t *ptr, pud_t val)
+void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 {
-	struct multicall_space mcs;
-	struct mmu_update *u;
+	struct mmu_update u;
 
 	preempt_disable();
 
-	mcs = xen_mc_entry(sizeof(*u));
-	u = mcs.args;
-	u->ptr = virt_to_machine(ptr).maddr;
-	u->val = pud_val_ma(val);
-	MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
+	xen_mc_batch();
+
+	u.ptr = virt_to_machine(ptr).maddr;
+	u.val = pud_val_ma(val);
+	extend_mmu_update(&u);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
 	preempt_enable();
 }
 
+void xen_set_pud(pud_t *ptr, pud_t val)
+{
+	/* If page is not pinned, we can just update the entry
+	   directly */
+	if (!page_pinned(ptr)) {
+		*ptr = val;
+		return;
+	}
+
+	xen_set_pud_hyper(ptr, val);
+}
+
 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
 	ptep->pte_high = pte.pte_high;
@@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 
 void xen_pmd_clear(pmd_t *pmdp)
 {
-	xen_set_pmd(pmdp, __pmd(0));
+	set_pmd(pmdp, __pmd(0));
 }
 
 pmd_t xen_make_pmd(pmdval_t pmd)
@@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd)
 	xen_mc_issue(0);
 }
 
+/*
+ * On save, we need to pin all pagetables to make sure they get their
+ * mfns turned into pfns.  Search the list for any unpinned pgds and pin
+ * them (unpinned pgds are not currently in use, probably because the
+ * process is under construction or destruction).
+ */
+void xen_mm_pin_all(void)
+{
+	unsigned long flags;
+	struct page *page;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+
+	list_for_each_entry(page, &pgd_list, lru) {
+		if (!PagePinned(page)) {
+			xen_pgd_pin((pgd_t *)page_address(page));
+			SetPageSavePinned(page);
+		}
+	}
+
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
 /* The init_mm pagetable is really pinned as soon as its created, but
    that's before we have page structures to store the bits.  So do all
    the book-keeping now. */
@@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd)
 	xen_mc_issue(0);
 }
 
+/*
+ * On resume, undo any pinning done at save, so that the rest of the
+ * kernel doesn't see any unexpected pinned pagetables.
+ */
+void xen_mm_unpin_all(void)
+{
+	unsigned long flags;
+	struct page *page;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+
+	list_for_each_entry(page, &pgd_list, lru) {
+		if (PageSavePinned(page)) {
+			BUG_ON(!PagePinned(page));
+			printk("unpinning pinned %p\n", page_address(page));
+			xen_pgd_unpin((pgd_t *)page_address(page));
+			ClearPageSavePinned(page);
+		}
+	}
+
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
 void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
 {
 	spin_lock(&next->page_table_lock);
@@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm)
 	spin_lock(&mm->page_table_lock);
 
 	/* pgd may not be pinned in the error exit path of execve */
-	if (PagePinned(virt_to_page(mm->pgd)))
+	if (page_pinned(mm->pgd))
 		xen_pgd_unpin(mm->pgd);
 
 	spin_unlock(&mm->page_table_lock);
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 5fe961c..297bf9f 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -25,10 +25,6 @@ enum pt_level {
 
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-void xen_set_pte(pte_t *ptep, pte_t pteval);
-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
-		    pte_t *ptep, pte_t pteval);
-void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
 
 void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
 void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
@@ -45,10 +41,14 @@ pte_t xen_make_pte(pteval_t);
 pmd_t xen_make_pmd(pmdval_t);
 pgd_t xen_make_pgd(pgdval_t);
 
+void xen_set_pte(pte_t *ptep, pte_t pteval);
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval);
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
+void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
 void xen_set_pud(pud_t *ptr, pud_t val);
+void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
+void xen_set_pud_hyper(pud_t *ptr, pud_t val);
 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void xen_pmd_clear(pmd_t *pmdp);
 
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 5791eb2..3c63c4d 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -29,14 +29,14 @@
 #define MC_DEBUG	1
 
 #define MC_BATCH	32
-#define MC_ARGS		(MC_BATCH * 16 / sizeof(u64))
+#define MC_ARGS		(MC_BATCH * 16)
 
 struct mc_buffer {
 	struct multicall_entry entries[MC_BATCH];
 #if MC_DEBUG
 	struct multicall_entry debug[MC_BATCH];
 #endif
-	u64 args[MC_ARGS];
+	unsigned char args[MC_ARGS];
 	struct callback {
 		void (*fn)(void *);
 		void *data;
@@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args)
 {
 	struct mc_buffer *b = &__get_cpu_var(mc_buffer);
 	struct multicall_space ret;
-	unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
+	unsigned argidx = roundup(b->argidx, sizeof(u64));
 
 	BUG_ON(preemptible());
-	BUG_ON(argspace > MC_ARGS);
+	BUG_ON(b->argidx > MC_ARGS);
 
 	if (b->mcidx == MC_BATCH ||
-	    (b->argidx + argspace) > MC_ARGS)
+	    (argidx + args) > MC_ARGS) {
 		xen_mc_flush();
+		argidx = roundup(b->argidx, sizeof(u64));
+	}
 
 	ret.mc = &b->entries[b->mcidx];
 	b->mcidx++;
+	ret.args = &b->args[argidx];
+	b->argidx = argidx + args;
+
+	BUG_ON(b->argidx > MC_ARGS);
+	return ret;
+}
+
+struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
+{
+	struct mc_buffer *b = &__get_cpu_var(mc_buffer);
+	struct multicall_space ret = { NULL, NULL };
+
+	BUG_ON(preemptible());
+	BUG_ON(b->argidx > MC_ARGS);
+
+	if (b->mcidx == 0)
+		return ret;
+
+	if (b->entries[b->mcidx - 1].op != op)
+		return ret;
+
+	if ((b->argidx + size) > MC_ARGS)
+		return ret;
+
+	ret.mc = &b->entries[b->mcidx - 1];
 	ret.args = &b->args[b->argidx];
-	b->argidx += argspace;
+	b->argidx += size;
 
+	BUG_ON(b->argidx > MC_ARGS);
 	return ret;
 }
 
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 8bae996..8589382 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode)
 /* Set up a callback to be called when the current batch is flushed */
 void xen_mc_callback(void (*fn)(void *), void *data);
 
+/*
+ * Try to extend the arguments of the previous multicall command.  The
+ * previous command's op must match.  If it does, then it attempts to
+ * extend the argument space allocated to the multicall entry by
+ * arg_size bytes.
+ *
+ * The returned multicall_space will return with mc pointing to the
+ * command on success, or NULL on failure, and args pointing to the
+ * newly allocated space.
+ */
+struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size);
+
 #endif /* _XEN_MULTICALLS_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 82517e4..4884478 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#include <xen/page.h>
 #include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
@@ -27,8 +28,6 @@
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
 
-unsigned long *phys_to_machine_mapping;
-EXPORT_SYMBOL(phys_to_machine_mapping);
 
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
@@ -38,6 +37,8 @@ char * __init xen_memory_setup(void)
 {
 	unsigned long max_pfn = xen_start_info->nr_pages;
 
+	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
+
 	e820.nr_map = 0;
 	add_memory_region(0, LOWMEMSIZE(), E820_RAM);
 	add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e6900..d2e3c20 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,7 +35,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
-static cpumask_t xen_cpu_initialized_map;
+cpumask_t xen_cpu_initialized_map;
 static DEFINE_PER_CPU(int, resched_irq) = -1;
 static DEFINE_PER_CPU(int, callfunc_irq) = -1;
 static DEFINE_PER_CPU(int, debug_irq) = -1;
@@ -65,6 +65,12 @@ static struct call_data_struct *call_data;
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
+#ifdef CONFIG_X86_32
+	__get_cpu_var(irq_stat).irq_resched_count++;
+#else
+	add_pda(irq_resched_count, 1);
+#endif
+
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
new file mode 100644
index 0000000..251669a
--- /dev/null
+++ b/arch/x86/xen/suspend.c
@@ -0,0 +1,45 @@
+#include <linux/types.h>
+
+#include <xen/interface/xen.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+#include "xen-ops.h"
+#include "mmu.h"
+
+void xen_pre_suspend(void)
+{
+	xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+	xen_start_info->console.domU.mfn =
+		mfn_to_pfn(xen_start_info->console.domU.mfn);
+
+	BUG_ON(!irqs_disabled());
+
+	HYPERVISOR_shared_info = &xen_dummy_shared_info;
+	if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
+					 __pte_ma(0), 0))
+		BUG();
+}
+
+void xen_post_suspend(int suspend_cancelled)
+{
+	xen_setup_shared_info();
+
+	if (suspend_cancelled) {
+		xen_start_info->store_mfn =
+			pfn_to_mfn(xen_start_info->store_mfn);
+		xen_start_info->console.domU.mfn =
+			pfn_to_mfn(xen_start_info->console.domU.mfn);
+	} else {
+#ifdef CONFIG_SMP
+		xen_cpu_initialized_map = cpu_online_map;
+#endif
+		xen_vcpu_restore();
+		xen_timer_resume();
+	}
+
+}
+
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 41e2175..64f0038 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void)
 	clockevents_register_device(&__get_cpu_var(xen_clock_events));
 }
 
+void xen_timer_resume(void)
+{
+	int cpu;
+
+	if (xen_clockevent != &xen_vcpuop_clockevent)
+		return;
+
+	for_each_online_cpu(cpu) {
+		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+			BUG();
+	}
+}
+
 __init void xen_time_init(void)
 {
 	int cpu = smp_processor_id();
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 6ec3b4f..7c0cf63 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <asm/boot.h>
 #include <xen/interface/elfnote.h>
+#include <asm/xen/interface.h>
 
 	__INIT
 ENTRY(startup_xen)
@@ -32,5 +33,9 @@ ENTRY(hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
+	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
+		.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
+	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
+	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long __HYPERVISOR_VIRT_START)
 
 #endif /*CONFIG_XEN */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae..9a05559 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -9,18 +9,26 @@
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
 
+struct trap_info;
 void xen_copy_trap_info(struct trap_info *traps);
 
 DECLARE_PER_CPU(unsigned long, xen_cr3);
 DECLARE_PER_CPU(unsigned long, xen_current_cr3);
 
 extern struct start_info *xen_start_info;
+extern struct shared_info xen_dummy_shared_info;
 extern struct shared_info *HYPERVISOR_shared_info;
 
+void xen_setup_mfn_list_list(void);
+void xen_setup_shared_info(void);
+
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
 void xen_enable_sysenter(void);
+void xen_vcpu_restore(void);
+
+void __init xen_build_dynamic_phys_to_machine(void);
 
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
@@ -29,6 +37,7 @@ void __init xen_time_init(void);
 unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
 unsigned long long xen_sched_clock(void);
+void xen_timer_resume(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
@@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
 			       void *info, int wait);
 
+extern cpumask_t xen_cpu_initialized_map;
+
 
 /* Declare an asm function, along with symbols needed to make it
    inlineable */
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index dd68f85..db2ae42 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -39,9 +39,14 @@ static int xencons_irq;
 
 /* ------------------------------------------------------------------ */
 
+static unsigned long console_pfn = ~0ul;
+
 static inline struct xencons_interface *xencons_interface(void)
 {
-	return mfn_to_virt(xen_start_info->console.domU.mfn);
+	if (console_pfn == ~0ul)
+		return mfn_to_virt(xen_start_info->console.domU.mfn);
+	else
+		return __va(console_pfn << PAGE_SHIFT);
 }
 
 static inline void notify_daemon(void)
@@ -101,20 +106,32 @@ static int __init xen_init(void)
 {
 	struct hvc_struct *hp;
 
-	if (!is_running_on_xen())
-		return 0;
+	if (!is_running_on_xen() ||
+	    is_initial_xendomain() ||
+	    !xen_start_info->console.domU.evtchn)
+		return -ENODEV;
 
 	xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
 	if (xencons_irq < 0)
-		xencons_irq = 0 /* NO_IRQ */;
+		xencons_irq = 0; /* NO_IRQ */
+
 	hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 
 	hvc = hp;
+
+	console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn);
+
 	return 0;
 }
 
+void xen_console_resume(void)
+{
+	if (xencons_irq)
+		rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq);
+}
+
 static void __exit xen_fini(void)
 {
 	if (hvc)
@@ -134,12 +151,28 @@ module_init(xen_init);
 module_exit(xen_fini);
 console_initcall(xen_cons_init);
 
+static void raw_console_write(const char *str, int len)
+{
+	while(len > 0) {
+		int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
+		if (rc <= 0)
+			break;
+
+		str += rc;
+		len -= rc;
+	}
+}
+
+#ifdef CONFIG_EARLY_PRINTK
 static void xenboot_write_console(struct console *console, const char *string,
 				  unsigned len)
 {
 	unsigned int linelen, off = 0;
 	const char *pos;
 
+	raw_console_write(string, len);
+
+	write_console(0, "(early) ", 8);
 	while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
 		linelen = pos-string+off;
 		if (off + linelen > len)
@@ -155,5 +188,23 @@ static void xenboot_write_console(struct console *console, const char *string,
 struct console xenboot_console = {
 	.name		= "xenboot",
 	.write		= xenboot_write_console,
-	.flags		= CON_PRINTBUFFER | CON_BOOT,
+	.flags		= CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
 };
+#endif	/* CONFIG_EARLY_PRINTK */
+
+void xen_raw_console_write(const char *str)
+{
+	raw_console_write(str, strlen(str));
+}
+
+void xen_raw_printk(const char *fmt, ...)
+{
+	static char buf[512];
+	va_list ap;
+
+	va_start(ap, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+
+	xen_raw_console_write(buf);
+}
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
index 0f47f46..9ce3b3b 100644
--- a/drivers/input/xen-kbdfront.c
+++ b/drivers/input/xen-kbdfront.c
@@ -66,6 +66,9 @@ static irqreturn_t input_handler(int rq, void *dev_id)
 		case XENKBD_TYPE_MOTION:
 			input_report_rel(dev, REL_X, event->motion.rel_x);
 			input_report_rel(dev, REL_Y, event->motion.rel_y);
+			if (event->motion.rel_z)
+				input_report_rel(dev, REL_WHEEL,
+						 -event->motion.rel_z);
 			break;
 		case XENKBD_TYPE_KEY:
 			dev = NULL;
@@ -84,6 +87,9 @@ static irqreturn_t input_handler(int rq, void *dev_id)
 		case XENKBD_TYPE_POS:
 			input_report_abs(dev, ABS_X, event->pos.abs_x);
 			input_report_abs(dev, ABS_Y, event->pos.abs_y);
+			if (event->pos.rel_z)
+				input_report_rel(dev, REL_WHEEL,
+						 -event->pos.rel_z);
 			break;
 		}
 		if (dev)
@@ -152,7 +158,7 @@ static int __devinit xenkbd_probe(struct xenbus_device *dev,
 	ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
 	for (i = BTN_LEFT; i <= BTN_TASK; i++)
 		set_bit(i, ptr->keybit);
-	ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL);
 	input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
 	input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
 
@@ -294,6 +300,16 @@ InitWait:
 		 */
 		if (dev->state != XenbusStateConnected)
 			goto InitWait; /* no InitWait seen yet, fudge it */
+
+		/* Set input abs params to match backend screen res */
+		if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+				 "width", "%d", &val) > 0)
+			input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0);
+
+		if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+				 "height", "%d", &val) > 0)
+			input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0);
+
 		break;
 
 	case XenbusStateClosing:
@@ -337,4 +353,6 @@ static void __exit xenkbd_cleanup(void)
 module_init(xenkbd_init);
 module_exit(xenkbd_cleanup);
 
+MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vkbd");
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index 619a6f8..47ed39b 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -18,6 +18,7 @@
  * frame buffer.
  */
 
+#include <linux/console.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fb.h>
@@ -42,37 +43,68 @@ struct xenfb_info {
 	struct xenfb_page	*page;
 	unsigned long 		*mfns;
 	int			update_wanted; /* XENFB_TYPE_UPDATE wanted */
+	int			feature_resize; /* XENFB_TYPE_RESIZE ok */
+	struct xenfb_resize	resize;		/* protected by resize_lock */
+	int			resize_dpy;	/* ditto */
+	spinlock_t		resize_lock;
 
 	struct xenbus_device	*xbdev;
 };
 
-static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8;
+#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8)
 
+enum { KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT };
+static int video[KPARAM_CNT] = { 2, XENFB_WIDTH, XENFB_HEIGHT };
+module_param_array(video, int, NULL, 0);
+MODULE_PARM_DESC(video,
+	"Video memory size in MB, width, height in pixels (default 2,800,600)");
+
+static void xenfb_make_preferred_console(void);
 static int xenfb_remove(struct xenbus_device *);
-static void xenfb_init_shared_page(struct xenfb_info *);
+static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *);
 static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
 static void xenfb_disconnect_backend(struct xenfb_info *);
 
+static void xenfb_send_event(struct xenfb_info *info,
+			     union xenfb_out_event *event)
+{
+	u32 prod;
+
+	prod = info->page->out_prod;
+	/* caller ensures !xenfb_queue_full() */
+	mb();			/* ensure ring space available */
+	XENFB_OUT_RING_REF(info->page, prod) = *event;
+	wmb();			/* ensure ring contents visible */
+	info->page->out_prod = prod + 1;
+
+	notify_remote_via_irq(info->irq);
+}
+
 static void xenfb_do_update(struct xenfb_info *info,
 			    int x, int y, int w, int h)
 {
 	union xenfb_out_event event;
-	u32 prod;
 
+	memset(&event, 0, sizeof(event));
 	event.type = XENFB_TYPE_UPDATE;
 	event.update.x = x;
 	event.update.y = y;
 	event.update.width = w;
 	event.update.height = h;
 
-	prod = info->page->out_prod;
 	/* caller ensures !xenfb_queue_full() */
-	mb();			/* ensure ring space available */
-	XENFB_OUT_RING_REF(info->page, prod) = event;
-	wmb();			/* ensure ring contents visible */
-	info->page->out_prod = prod + 1;
+	xenfb_send_event(info, &event);
+}
 
-	notify_remote_via_irq(info->irq);
+static void xenfb_do_resize(struct xenfb_info *info)
+{
+	union xenfb_out_event event;
+
+	memset(&event, 0, sizeof(event));
+	event.resize = info->resize;
+
+	/* caller ensures !xenfb_queue_full() */
+	xenfb_send_event(info, &event);
 }
 
 static int xenfb_queue_full(struct xenfb_info *info)
@@ -84,12 +116,28 @@ static int xenfb_queue_full(struct xenfb_info *info)
 	return prod - cons == XENFB_OUT_RING_LEN;
 }
 
+static void xenfb_handle_resize_dpy(struct xenfb_info *info)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->resize_lock, flags);
+	if (info->resize_dpy) {
+		if (!xenfb_queue_full(info)) {
+			info->resize_dpy = 0;
+			xenfb_do_resize(info);
+		}
+	}
+	spin_unlock_irqrestore(&info->resize_lock, flags);
+}
+
 static void xenfb_refresh(struct xenfb_info *info,
 			  int x1, int y1, int w, int h)
 {
 	unsigned long flags;
-	int y2 = y1 + h - 1;
 	int x2 = x1 + w - 1;
+	int y2 = y1 + h - 1;
+
+	xenfb_handle_resize_dpy(info);
 
 	if (!info->update_wanted)
 		return;
@@ -222,6 +270,57 @@ static ssize_t xenfb_write(struct fb_info *p, const char __user *buf,
 	return res;
 }
 
+static int
+xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct xenfb_info *xenfb_info;
+	int required_mem_len;
+
+	xenfb_info = info->par;
+
+	if (!xenfb_info->feature_resize) {
+		if (var->xres == video[KPARAM_WIDTH] &&
+		    var->yres == video[KPARAM_HEIGHT] &&
+		    var->bits_per_pixel == xenfb_info->page->depth) {
+			return 0;
+		}
+		return -EINVAL;
+	}
+
+	/* Can't resize past initial width and height */
+	if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT])
+		return -EINVAL;
+
+	required_mem_len = var->xres * var->yres * xenfb_info->page->depth / 8;
+	if (var->bits_per_pixel == xenfb_info->page->depth &&
+	    var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) &&
+	    required_mem_len <= info->fix.smem_len) {
+		var->xres_virtual = var->xres;
+		var->yres_virtual = var->yres;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int xenfb_set_par(struct fb_info *info)
+{
+	struct xenfb_info *xenfb_info;
+	unsigned long flags;
+
+	xenfb_info = info->par;
+
+	spin_lock_irqsave(&xenfb_info->resize_lock, flags);
+	xenfb_info->resize.type = XENFB_TYPE_RESIZE;
+	xenfb_info->resize.width = info->var.xres;
+	xenfb_info->resize.height = info->var.yres;
+	xenfb_info->resize.stride = info->fix.line_length;
+	xenfb_info->resize.depth = info->var.bits_per_pixel;
+	xenfb_info->resize.offset = 0;
+	xenfb_info->resize_dpy = 1;
+	spin_unlock_irqrestore(&xenfb_info->resize_lock, flags);
+	return 0;
+}
+
 static struct fb_ops xenfb_fb_ops = {
 	.owner		= THIS_MODULE,
 	.fb_read	= fb_sys_read,
@@ -230,6 +329,8 @@ static struct fb_ops xenfb_fb_ops = {
 	.fb_fillrect	= xenfb_fillrect,
 	.fb_copyarea	= xenfb_copyarea,
 	.fb_imageblit	= xenfb_imageblit,
+	.fb_check_var	= xenfb_check_var,
+	.fb_set_par     = xenfb_set_par,
 };
 
 static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
@@ -258,6 +359,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 {
 	struct xenfb_info *info;
 	struct fb_info *fb_info;
+	int fb_size;
+	int val;
 	int ret;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -265,18 +368,35 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
 		return -ENOMEM;
 	}
+
+	/* Limit kernel param videoram amount to what is in xenstore */
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) {
+		if (val < video[KPARAM_MEM])
+			video[KPARAM_MEM] = val;
+	}
+
+	/* If requested res does not fit in available memory, use default */
+	fb_size = video[KPARAM_MEM] * 1024 * 1024;
+	if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8
+	    > fb_size) {
+		video[KPARAM_WIDTH] = XENFB_WIDTH;
+		video[KPARAM_HEIGHT] = XENFB_HEIGHT;
+		fb_size = XENFB_DEFAULT_FB_LEN;
+	}
+
 	dev->dev.driver_data = info;
 	info->xbdev = dev;
 	info->irq = -1;
 	info->x1 = info->y1 = INT_MAX;
 	spin_lock_init(&info->dirty_lock);
+	spin_lock_init(&info->resize_lock);
 
-	info->fb = vmalloc(xenfb_mem_len);
+	info->fb = vmalloc(fb_size);
 	if (info->fb == NULL)
 		goto error_nomem;
-	memset(info->fb, 0, xenfb_mem_len);
+	memset(info->fb, 0, fb_size);
 
-	info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
 	if (!info->mfns)
@@ -287,8 +407,6 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 	if (!info->page)
 		goto error_nomem;
 
-	xenfb_init_shared_page(info);
-
 	/* abusing framebuffer_alloc() to allocate pseudo_palette */
 	fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
 	if (fb_info == NULL)
@@ -301,9 +419,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 	fb_info->screen_base = info->fb;
 
 	fb_info->fbops = &xenfb_fb_ops;
-	fb_info->var.xres_virtual = fb_info->var.xres = info->page->width;
-	fb_info->var.yres_virtual = fb_info->var.yres = info->page->height;
-	fb_info->var.bits_per_pixel = info->page->depth;
+	fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH];
+	fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT];
+	fb_info->var.bits_per_pixel = XENFB_DEPTH;
 
 	fb_info->var.red = (struct fb_bitfield){16, 8, 0};
 	fb_info->var.green = (struct fb_bitfield){8, 8, 0};
@@ -315,9 +433,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 	fb_info->var.vmode = FB_VMODE_NONINTERLACED;
 
 	fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
-	fb_info->fix.line_length = info->page->line_length;
+	fb_info->fix.line_length = fb_info->var.xres * XENFB_DEPTH / 8;
 	fb_info->fix.smem_start = 0;
-	fb_info->fix.smem_len = xenfb_mem_len;
+	fb_info->fix.smem_len = fb_size;
 	strcpy(fb_info->fix.id, "xen");
 	fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
 	fb_info->fix.accel = FB_ACCEL_NONE;
@@ -334,6 +452,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 	fb_info->fbdefio = &xenfb_defio;
 	fb_deferred_io_init(fb_info);
 
+	xenfb_init_shared_page(info, fb_info);
+
 	ret = register_framebuffer(fb_info);
 	if (ret) {
 		fb_deferred_io_cleanup(fb_info);
@@ -348,6 +468,7 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 	if (ret < 0)
 		goto error;
 
+	xenfb_make_preferred_console();
 	return 0;
 
  error_nomem:
@@ -358,12 +479,34 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 	return ret;
 }
 
+static __devinit void
+xenfb_make_preferred_console(void)
+{
+	struct console *c;
+
+	if (console_set_on_cmdline)
+		return;
+
+	acquire_console_sem();
+	for (c = console_drivers; c; c = c->next) {
+		if (!strcmp(c->name, "tty") && c->index == 0)
+			break;
+	}
+	release_console_sem();
+	if (c) {
+		unregister_console(c);
+		c->flags |= CON_CONSDEV;
+		c->flags &= ~CON_PRINTBUFFER; /* don't print again */
+		register_console(c);
+	}
+}
+
 static int xenfb_resume(struct xenbus_device *dev)
 {
 	struct xenfb_info *info = dev->dev.driver_data;
 
 	xenfb_disconnect_backend(info);
-	xenfb_init_shared_page(info);
+	xenfb_init_shared_page(info, info->fb_info);
 	return xenfb_connect_backend(dev, info);
 }
 
@@ -391,20 +534,23 @@ static unsigned long vmalloc_to_mfn(void *address)
 	return pfn_to_mfn(vmalloc_to_pfn(address));
 }
 
-static void xenfb_init_shared_page(struct xenfb_info *info)
+static void xenfb_init_shared_page(struct xenfb_info *info,
+				   struct fb_info *fb_info)
 {
 	int i;
+	int epd = PAGE_SIZE / sizeof(info->mfns[0]);
 
 	for (i = 0; i < info->nr_pages; i++)
 		info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
 
-	info->page->pd[0] = vmalloc_to_mfn(info->mfns);
-	info->page->pd[1] = 0;
-	info->page->width = XENFB_WIDTH;
-	info->page->height = XENFB_HEIGHT;
-	info->page->depth = XENFB_DEPTH;
-	info->page->line_length = (info->page->depth / 8) * info->page->width;
-	info->page->mem_length = xenfb_mem_len;
+	for (i = 0; i * epd < info->nr_pages; i++)
+		info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]);
+
+	info->page->width = fb_info->var.xres;
+	info->page->height = fb_info->var.yres;
+	info->page->depth = fb_info->var.bits_per_pixel;
+	info->page->line_length = fb_info->fix.line_length;
+	info->page->mem_length = fb_info->fix.smem_len;
 	info->page->in_cons = info->page->in_prod = 0;
 	info->page->out_cons = info->page->out_prod = 0;
 }
@@ -504,6 +650,11 @@ InitWait:
 			val = 0;
 		if (val)
 			info->update_wanted = 1;
+
+		if (xenbus_scanf(XBT_NIL, dev->otherend,
+				 "feature-resize", "%d", &val) < 0)
+			val = 0;
+		info->feature_resize = val;
 		break;
 
 	case XenbusStateClosing:
@@ -547,4 +698,6 @@ static void __exit xenfb_cleanup(void)
 module_init(xenfb_init);
 module_exit(xenfb_cleanup);
 
+MODULE_DESCRIPTION("Xen virtual framebuffer device frontend");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vfb");
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 37af04f..363286c 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,4 @@
-obj-y	+= grant-table.o features.o events.o
+obj-y	+= grant-table.o features.o events.o manage.o
 obj-y	+= xenbus/
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index ab25ba6..591bc29 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -225,7 +225,7 @@ static int increase_reservation(unsigned long nr_pages)
 		page = balloon_next_page(page);
 	}
 
-	reservation.extent_start = (unsigned long)frame_list;
+	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents   = nr_pages;
 	rc = HYPERVISOR_memory_op(
 		XENMEM_populate_physmap, &reservation);
@@ -321,7 +321,7 @@ static int decrease_reservation(unsigned long nr_pages)
 		balloon_append(pfn_to_page(pfn));
 	}
 
-	reservation.extent_start = (unsigned long)frame_list;
+	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents   = nr_pages;
 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 	BUG_ON(ret != nr_pages);
@@ -368,7 +368,7 @@ static void balloon_process(struct work_struct *work)
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
-void balloon_set_new_target(unsigned long target)
+static void balloon_set_new_target(unsigned long target)
 {
 	/* No need for lock. Not read-modify-write updates. */
 	balloon_stats.hard_limit   = ~0UL;
@@ -483,7 +483,7 @@ static int dealloc_pte_fn(
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
-	reservation.extent_start = (unsigned long)&mfn;
+	set_xen_guest_handle(reservation.extent_start, &mfn);
 	set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
 	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
@@ -519,7 +519,7 @@ static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
 				.extent_order = 0,
 				.domid        = DOMID_SELF
 			};
-			reservation.extent_start = (unsigned long)&gmfn;
+			set_xen_guest_handle(reservation.extent_start, &gmfn);
 			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 						   &reservation);
 			if (ret == 1)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 76e5b73..332dd63 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -355,7 +355,7 @@ static void unbind_from_irq(unsigned int irq)
 
 	spin_lock(&irq_mapping_update_lock);
 
-	if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
+	if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
 		close.port = evtchn;
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
 			BUG();
@@ -375,7 +375,7 @@ static void unbind_from_irq(unsigned int irq)
 		evtchn_to_irq[evtchn] = -1;
 		irq_info[irq] = IRQ_UNBOUND;
 
-		dynamic_irq_init(irq);
+		dynamic_irq_cleanup(irq);
 	}
 
 	spin_unlock(&irq_mapping_update_lock);
@@ -557,6 +557,33 @@ out:
 	put_cpu();
 }
 
+/* Rebind a new event channel to an existing irq. */
+void rebind_evtchn_irq(int evtchn, int irq)
+{
+	/* Make sure the irq is masked, since the new event channel
+	   will also be masked. */
+	disable_irq(irq);
+
+	spin_lock(&irq_mapping_update_lock);
+
+	/* After resume the irq<->evtchn mappings are all cleared out */
+	BUG_ON(evtchn_to_irq[evtchn] != -1);
+	/* Expect irq to have been bound before,
+	   so the bindcount should be non-0 */
+	BUG_ON(irq_bindcount[irq] == 0);
+
+	evtchn_to_irq[evtchn] = irq;
+	irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+
+	spin_unlock(&irq_mapping_update_lock);
+
+	/* new event channels are always bound to cpu 0 */
+	irq_set_affinity(irq, cpumask_of_cpu(0));
+
+	/* Unmask the event channel. */
+	enable_irq(irq);
+}
+
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
 static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 {
@@ -647,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq)
 	return ret;
 }
 
+static void restore_cpu_virqs(unsigned int cpu)
+{
+	struct evtchn_bind_virq bind_virq;
+	int virq, irq, evtchn;
+
+	for (virq = 0; virq < NR_VIRQS; virq++) {
+		if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
+			continue;
+
+		BUG_ON(irq_info[irq].type != IRQT_VIRQ);
+		BUG_ON(irq_info[irq].index != virq);
+
+		/* Get a new binding from Xen. */
+		bind_virq.virq = virq;
+		bind_virq.vcpu = cpu;
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						&bind_virq) != 0)
+			BUG();
+		evtchn = bind_virq.port;
+
+		/* Record the new mapping. */
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+		bind_evtchn_to_cpu(evtchn, cpu);
+
+		/* Ready for use. */
+		unmask_evtchn(evtchn);
+	}
+}
+
+static void restore_cpu_ipis(unsigned int cpu)
+{
+	struct evtchn_bind_ipi bind_ipi;
+	int ipi, irq, evtchn;
+
+	for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
+		if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
+			continue;
+
+		BUG_ON(irq_info[irq].type != IRQT_IPI);
+		BUG_ON(irq_info[irq].index != ipi);
+
+		/* Get a new binding from Xen. */
+		bind_ipi.vcpu = cpu;
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+						&bind_ipi) != 0)
+			BUG();
+		evtchn = bind_ipi.port;
+
+		/* Record the new mapping. */
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+		bind_evtchn_to_cpu(evtchn, cpu);
+
+		/* Ready for use. */
+		unmask_evtchn(evtchn);
+
+	}
+}
+
+void xen_irq_resume(void)
+{
+	unsigned int cpu, irq, evtchn;
+
+	init_evtchn_cpu_bindings();
+
+	/* New event-channel space is not 'live' yet. */
+	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+		mask_evtchn(evtchn);
+
+	/* No IRQ <-> event-channel mappings. */
+	for (irq = 0; irq < NR_IRQS; irq++)
+		irq_info[irq].evtchn = 0; /* zap event-channel binding */
+
+	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+		evtchn_to_irq[evtchn] = -1;
+
+	for_each_possible_cpu(cpu) {
+		restore_cpu_virqs(cpu);
+		restore_cpu_ipis(cpu);
+	}
+}
+
 static struct irq_chip xen_dynamic_chip __read_mostly = {
 	.name		= "xen-dyn",
 	.mask		= disable_dynirq,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 52b6b41..e9e1116 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 	return 0;
 }
 
-static int gnttab_resume(void)
+int gnttab_resume(void)
 {
 	if (max_nr_grant_frames() < nr_grant_frames)
 		return -ENOSYS;
 	return gnttab_map(0, nr_grant_frames - 1);
 }
 
-static int gnttab_suspend(void)
+int gnttab_suspend(void)
 {
 	arch_gnttab_unmap_shared(shared, nr_grant_frames);
 	return 0;
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
new file mode 100644
index 0000000..5b546e3
--- /dev/null
+++ b/drivers/xen/manage.c
@@ -0,0 +1,252 @@
+/*
+ * Handle extern requests for shutdown, reboot and sysrq
+ */
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/reboot.h>
+#include <linux/sysrq.h>
+#include <linux/stop_machine.h>
+#include <linux/freezer.h>
+
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/hvc-console.h>
+#include <xen/xen-ops.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+enum shutdown_state {
+	SHUTDOWN_INVALID = -1,
+	SHUTDOWN_POWEROFF = 0,
+	SHUTDOWN_SUSPEND = 2,
+	/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
+	   report a crash, not be instructed to crash!
+	   HALT is the same as POWEROFF, as far as we're concerned.  The tools use
+	   the distinction when we return the reason code to them.  */
+	 SHUTDOWN_HALT = 4,
+};
+
+/* Ignore multiple shutdown requests. */
+static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
+
+#ifdef CONFIG_PM_SLEEP
+static int xen_suspend(void *data)
+{
+	int *cancelled = data;
+	int err;
+
+	BUG_ON(!irqs_disabled());
+
+	load_cr3(swapper_pg_dir);
+
+	err = device_power_down(PMSG_SUSPEND);
+	if (err) {
+		printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
+		       err);
+		return err;
+	}
+
+	xen_mm_pin_all();
+	gnttab_suspend();
+	xen_pre_suspend();
+
+	/*
+	 * This hypercall returns 1 if suspend was cancelled
+	 * or the domain was merely checkpointed, and 0 if it
+	 * is resuming in a new domain.
+	 */
+	*cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+	xen_post_suspend(*cancelled);
+	gnttab_resume();
+	xen_mm_unpin_all();
+
+	device_power_up();
+
+	if (!*cancelled) {
+		xen_irq_resume();
+		xen_console_resume();
+	}
+
+	return 0;
+}
+
+static void do_suspend(void)
+{
+	int err;
+	int cancelled = 1;
+
+	shutting_down = SHUTDOWN_SUSPEND;
+
+#ifdef CONFIG_PREEMPT
+	/* If the kernel is preemptible, we need to freeze all the processes
+	   to prevent them from being in the middle of a pagetable update
+	   during suspend. */
+	err = freeze_processes();
+	if (err) {
+		printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
+		return;
+	}
+#endif
+
+	err = device_suspend(PMSG_SUSPEND);
+	if (err) {
+		printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
+		goto out;
+	}
+
+	printk("suspending xenbus...\n");
+	/* XXX use normal device tree? */
+	xenbus_suspend();
+
+	err = stop_machine_run(xen_suspend, &cancelled, 0);
+	if (err) {
+		printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
+		goto out;
+	}
+
+	if (!cancelled)
+		xenbus_resume();
+	else
+		xenbus_suspend_cancel();
+
+	device_resume();
+
+	/* Make sure timer events get retriggered on all CPUs */
+	clock_was_set();
+out:
+#ifdef CONFIG_PREEMPT
+	thaw_processes();
+#endif
+	shutting_down = SHUTDOWN_INVALID;
+}
+#endif	/* CONFIG_PM_SLEEP */
+
+static void shutdown_handler(struct xenbus_watch *watch,
+			     const char **vec, unsigned int len)
+{
+	char *str;
+	struct xenbus_transaction xbt;
+	int err;
+
+	if (shutting_down != SHUTDOWN_INVALID)
+		return;
+
+ again:
+	err = xenbus_transaction_start(&xbt);
+	if (err)
+		return;
+
+	str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
+	/* Ignore read errors and empty reads. */
+	if (XENBUS_IS_ERR_READ(str)) {
+		xenbus_transaction_end(xbt, 1);
+		return;
+	}
+
+	xenbus_write(xbt, "control", "shutdown", "");
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN) {
+		kfree(str);
+		goto again;
+	}
+
+	if (strcmp(str, "poweroff") == 0 ||
+	    strcmp(str, "halt") == 0) {
+		shutting_down = SHUTDOWN_POWEROFF;
+		orderly_poweroff(false);
+	} else if (strcmp(str, "reboot") == 0) {
+		shutting_down = SHUTDOWN_POWEROFF; /* ? */
+		ctrl_alt_del();
+#ifdef CONFIG_PM_SLEEP
+	} else if (strcmp(str, "suspend") == 0) {
+		do_suspend();
+#endif
+	} else {
+		printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
+		shutting_down = SHUTDOWN_INVALID;
+	}
+
+	kfree(str);
+}
+
+static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
+			  unsigned int len)
+{
+	char sysrq_key = '\0';
+	struct xenbus_transaction xbt;
+	int err;
+
+ again:
+	err = xenbus_transaction_start(&xbt);
+	if (err)
+		return;
+	if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
+		printk(KERN_ERR "Unable to read sysrq code in "
+		       "control/sysrq\n");
+		xenbus_transaction_end(xbt, 1);
+		return;
+	}
+
+	if (sysrq_key != '\0')
+		xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN)
+		goto again;
+
+	if (sysrq_key != '\0')
+		handle_sysrq(sysrq_key, NULL);
+}
+
+static struct xenbus_watch shutdown_watch = {
+	.node = "control/shutdown",
+	.callback = shutdown_handler
+};
+
+static struct xenbus_watch sysrq_watch = {
+	.node = "control/sysrq",
+	.callback = sysrq_handler
+};
+
+static int setup_shutdown_watcher(void)
+{
+	int err;
+
+	err = register_xenbus_watch(&shutdown_watch);
+	if (err) {
+		printk(KERN_ERR "Failed to set shutdown watcher\n");
+		return err;
+	}
+
+	err = register_xenbus_watch(&sysrq_watch);
+	if (err) {
+		printk(KERN_ERR "Failed to set sysrq watcher\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int shutdown_event(struct notifier_block *notifier,
+			  unsigned long event,
+			  void *data)
+{
+	setup_shutdown_watcher();
+	return NOTIFY_DONE;
+}
+
+static int __init setup_shutdown_event(void)
+{
+	static struct notifier_block xenstore_notifier = {
+		.notifier_call = shutdown_event
+	};
+	register_xenstore_notifier(&xenstore_notifier);
+
+	return 0;
+}
+
+subsys_initcall(setup_shutdown_event);
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 6efbe3f..090c61e 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -203,7 +203,6 @@ int xb_read(void *data, unsigned len)
 int xb_init_comms(void)
 {
 	struct xenstore_domain_interface *intf = xen_store_interface;
-	int err;
 
 	if (intf->req_prod != intf->req_cons)
 		printk(KERN_ERR "XENBUS request ring is not quiescent "
@@ -216,18 +215,20 @@ int xb_init_comms(void)
 		intf->rsp_cons = intf->rsp_prod;
 	}
 
-	if (xenbus_irq)
-		unbind_from_irqhandler(xenbus_irq, &xb_waitq);
+	if (xenbus_irq) {
+		/* Already have an irq; assume we're resuming */
+		rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
+	} else {
+		int err;
+		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
+						0, "xenbus", &xb_waitq);
+		if (err <= 0) {
+			printk(KERN_ERR "XENBUS request irq failed %i\n", err);
+			return err;
+		}
 
-	err = bind_evtchn_to_irqhandler(
-		xen_store_evtchn, wake_waiting,
-		0, "xenbus", &xb_waitq);
-	if (err <= 0) {
-		printk(KERN_ERR "XENBUS request irq failed %i\n", err);
-		return err;
+		xenbus_irq = err;
 	}
 
-	xenbus_irq = err;
-
 	return 0;
 }
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index c2ccd99..2a4f9b4 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -176,9 +176,9 @@ HYPERVISOR_fpu_taskswitch(int set)
 }
 
 static inline int
-HYPERVISOR_sched_op(int cmd, unsigned long arg)
+HYPERVISOR_sched_op(int cmd, void *arg)
 {
-	return _hypercall2(int, sched_op, cmd, arg);
+	return _hypercall2(int, sched_op_new, cmd, arg);
 }
 
 static inline long
@@ -315,6 +315,13 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
 }
 
 static inline void
+MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
+{
+	mcl->op = __HYPERVISOR_fpu_taskswitch;
+	mcl->args[0] = set;
+}
+
+static inline void
 MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
 			pte_t new_val, unsigned long flags)
 {
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index e11f240..377c045 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -26,15 +26,20 @@ typedef struct xpaddr {
 #define FOREIGN_FRAME_BIT	(1UL<<31)
 #define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
 
-extern unsigned long *phys_to_machine_mapping;
+/* Maximum amount of memory we can handle in a domain in pages */
+#define MAX_DOMAIN_PAGES						\
+    ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
+
+
+extern unsigned long get_phys_to_machine(unsigned long pfn);
+extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return pfn;
 
-	return phys_to_machine_mapping[(unsigned int)(pfn)] &
-		~FOREIGN_FRAME_BIT;
+	return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
 }
 
 static inline int phys_to_machine_mapping_valid(unsigned long pfn)
@@ -42,7 +47,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return 1;
 
-	return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
+	return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
 }
 
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
@@ -106,20 +111,12 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 	unsigned long pfn = mfn_to_pfn(mfn);
 	if ((pfn < max_mapnr)
 	    && !xen_feature(XENFEAT_auto_translated_physmap)
-	    && (phys_to_machine_mapping[pfn] != mfn))
+	    && (get_phys_to_machine(pfn) != mfn))
 		return max_mapnr; /* force !pfn_valid() */
+	/* XXX fixme; not true with sparsemem */
 	return pfn;
 }
 
-static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-		return;
-	}
-	phys_to_machine_mapping[pfn] = mfn;
-}
-
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
 #define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
diff --git a/include/linux/console.h b/include/linux/console.h
index a4f27fb..248e6e3 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -108,6 +108,8 @@ struct console {
 	struct	 console *next;
 };
 
+extern int console_set_on_cmdline;
+
 extern int add_preferred_console(char *name, int idx, char *options);
 extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
 extern void register_console(struct console *);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f31debf..0d2a4e7 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
 __PAGEFLAG(Slab, slab)
 PAGEFLAG(Checked, owner_priv_1)		/* Used by some filesystems */
 PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */
+PAGEFLAG(SavePinned, dirty);					/* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 	__SETPAGEFLAG(Private, private)
diff --git a/include/xen/events.h b/include/xen/events.h
index acd8e06..67c4436 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -32,6 +32,7 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id);
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
 int resend_irq_on_evtchn(unsigned int irq);
+void rebind_evtchn_irq(int evtchn, int irq);
 
 static inline void notify_remote_via_evtchn(int port)
 {
@@ -40,4 +41,7 @@ static inline void notify_remote_via_evtchn(int port)
 }
 
 extern void notify_remote_via_irq(int irq);
+
+extern void xen_irq_resume(void);
+
 #endif	/* _XEN_EVENTS_H */
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 4662048..a40f1cd 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -51,6 +51,9 @@ struct gnttab_free_callback {
 	u16 count;
 };
 
+int gnttab_suspend(void);
+int gnttab_resume(void);
+
 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 				int readonly);
 
diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h
index 21c0ecf..98b79bc 100644
--- a/include/xen/hvc-console.h
+++ b/include/xen/hvc-console.h
@@ -3,4 +3,13 @@
 
 extern struct console xenboot_console;
 
+#ifdef CONFIG_HVC_XEN
+void xen_console_resume(void);
+#else
+static inline void xen_console_resume(void) { }
+#endif
+
+void xen_raw_console_write(const char *str);
+void xen_raw_printk(const char *fmt, ...);
+
 #endif	/* XEN_HVC_CONSOLE_H */
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index a64d3df..7a8262c 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -120,6 +120,26 @@
  */
 #define XEN_ELFNOTE_BSD_SYMTAB    11
 
+/*
+ * The lowest address the hypervisor hole can begin at (numeric).
+ *
+ * This must not be set higher than HYPERVISOR_VIRT_START. Its presence
+ * also indicates to the hypervisor that the kernel can deal with the
+ * hole starting at a higher address.
+ */
+#define XEN_ELFNOTE_HV_START_LOW  12
+
+/*
+ * List of maddr_t-sized mask/value pairs describing how to recognize
+ * (non-present) L1 page table entries carrying valid MFNs (numeric).
+ */
+#define XEN_ELFNOTE_L1_MFN_VALID  13
+
+/*
+ * Whether or not the guest supports cooperative suspend cancellation.
+ */
+#define XEN_ELFNOTE_SUSPEND_CANCEL 14
+
 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */
 
 /*
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index d73228d..f51b641 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -38,6 +38,9 @@
  */
 #define XENFEAT_pae_pgdir_above_4gb        4
 
+/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
+#define XENFEAT_mmu_pt_update_preserve_ad  5
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h
index 5a934dd..974a51e 100644
--- a/include/xen/interface/io/fbif.h
+++ b/include/xen/interface/io/fbif.h
@@ -49,11 +49,27 @@ struct xenfb_update {
 	int32_t height;		/* rect height */
 };
 
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize {
+	uint8_t type;		/* XENFB_TYPE_RESIZE */
+	int32_t width;		/* width in pixels */
+	int32_t height;		/* height in pixels */
+	int32_t stride;		/* stride in bytes */
+	int32_t depth;		/* depth in bits */
+	int32_t offset;		/* start offset within framebuffer */
+};
+
 #define XENFB_OUT_EVENT_SIZE 40
 
 union xenfb_out_event {
 	uint8_t type;
 	struct xenfb_update update;
+	struct xenfb_resize resize;
 	char pad[XENFB_OUT_EVENT_SIZE];
 };
 
@@ -105,15 +121,18 @@ struct xenfb_page {
 	 * Each directory page holds PAGE_SIZE / sizeof(*pd)
 	 * framebuffer pages, and can thus map up to PAGE_SIZE *
 	 * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
-	 * sizeof(unsigned long) == 4, that's 4 Megs.  Two directory
-	 * pages should be enough for a while.
+	 * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2
+	 * Megs 64 bit.  256 directories give enough room for a 512
+	 * Meg framebuffer with a max resolution of 12,800x10,240.
+	 * Should be enough for a while with room leftover for
+	 * expansion.
 	 */
-	unsigned long pd[2];
+	unsigned long pd[256];
 };
 
 /*
- * Wart: xenkbd needs to know resolution.  Put it here until a better
- * solution is found, but don't leak it to the backend.
+ * Wart: xenkbd needs to know default resolution.  Put it here until a
+ * better solution is found, but don't leak it to the backend.
  */
 #ifdef __KERNEL__
 #define XENFB_WIDTH 800
diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h
index fb97f42..8066c78 100644
--- a/include/xen/interface/io/kbdif.h
+++ b/include/xen/interface/io/kbdif.h
@@ -49,6 +49,7 @@ struct xenkbd_motion {
 	uint8_t type;		/* XENKBD_TYPE_MOTION */
 	int32_t rel_x;		/* relative X motion */
 	int32_t rel_y;		/* relative Y motion */
+	int32_t rel_z;		/* relative Z motion (wheel) */
 };
 
 struct xenkbd_key {
@@ -61,6 +62,7 @@ struct xenkbd_position {
 	uint8_t type;		/* XENKBD_TYPE_POS */
 	int32_t abs_x;		/* absolute X position (in FB pixels) */
 	int32_t abs_y;		/* absolute Y position (in FB pixels) */
+	int32_t rel_z;		/* relative Z motion (wheel) */
 };
 
 #define XENKBD_IN_EVENT_SIZE 40
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index da76846..af36ead 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -29,7 +29,7 @@ struct xen_memory_reservation {
      *   OUT: GMFN bases of extents that were allocated
      *   (NB. This command also updates the mach_to_phys translation table)
      */
-    ulong extent_start;
+    GUEST_HANDLE(ulong) extent_start;
 
     /* Number of extents, and size/alignment of each (2^extent_order pages). */
     unsigned long  nr_extents;
@@ -50,6 +50,7 @@ struct xen_memory_reservation {
     domid_t        domid;
 
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
 
 /*
  * Returns the maximum machine frame number of mapped RAM in this system.
@@ -85,7 +86,7 @@ struct xen_machphys_mfn_list {
      * any large discontiguities in the machine address space, 2MB gaps in
      * the machphys table will be represented by an MFN base of zero.
      */
-    ulong extent_start;
+    GUEST_HANDLE(ulong) extent_start;
 
     /*
      * Number of extents written to the above array. This will be smaller
@@ -93,6 +94,7 @@ struct xen_machphys_mfn_list {
      */
     unsigned int nr_extents;
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
 
 /*
  * Sets the GPFN at which a particular page appears in the specified guest's
@@ -115,6 +117,7 @@ struct xen_add_to_physmap {
     /* GPFN where the source mapping page should appear. */
     unsigned long gpfn;
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
 
 /*
  * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
@@ -129,13 +132,14 @@ struct xen_translate_gpfn_list {
     unsigned long nr_gpfns;
 
     /* List of GPFNs to translate. */
-    ulong gpfn_list;
+    GUEST_HANDLE(ulong) gpfn_list;
 
     /*
      * Output list to contain MFN translations. May be the same as the input
      * list (in which case each input GPFN is overwritten with the output MFN).
      */
-    ulong mfn_list;
+    GUEST_HANDLE(ulong) mfn_list;
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 819a033..2befa3e 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -114,9 +114,14 @@
  * ptr[:2]  -- Machine address within the frame whose mapping to modify.
  *             The frame must belong to the FD, if one is specified.
  * val      -- Value to write into the mapping entry.
+ *
+ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
+ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
+ * with those in @val.
  */
-#define MMU_NORMAL_PT_UPDATE     0 /* checked '*ptr = val'. ptr is MA.       */
-#define MMU_MACHPHYS_UPDATE      1 /* ptr = MA of frame to modify entry for  */
+#define MMU_NORMAL_PT_UPDATE      0 /* checked '*ptr = val'. ptr is MA.       */
+#define MMU_MACHPHYS_UPDATE       1 /* ptr = MA of frame to modify entry for  */
+#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */
 
 /*
  * MMU EXTENDED OPERATIONS
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 10ddfe0..a706d6a 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,4 +5,10 @@
 
 DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 
+void xen_pre_suspend(void);
+void xen_post_suspend(int suspend_cancelled);
+
+void xen_mm_pin_all(void);
+void xen_mm_unpin_all(void);
+
 #endif /* INCLUDE_XEN_OPS_H */
diff --git a/kernel/printk.c b/kernel/printk.c
index 8fb01c3..028ed75 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -121,6 +121,8 @@ struct console_cmdline
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
 static int selected_console = -1;
 static int preferred_console = -1;
+int console_set_on_cmdline;
+EXPORT_SYMBOL(console_set_on_cmdline);
 
 /* Flag: console code may call schedule() */
 static int console_may_schedule;
@@ -890,6 +892,7 @@ static int __init console_setup(char *str)
 	*s = 0;
 
 	__add_preferred_console(buf, idx, options, brl_options);
+	console_set_on_cmdline = 1;
 	return 1;
 }
 __setup("console=", console_setup);

Reply to: