summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/2.6.22/20047_xen3-patch-2.6.22.patch1')
-rw-r--r--trunk/2.6.22/20047_xen3-patch-2.6.22.patch17866
1 files changed, 7866 insertions, 0 deletions
diff --git a/trunk/2.6.22/20047_xen3-patch-2.6.22.patch1 b/trunk/2.6.22/20047_xen3-patch-2.6.22.patch1
new file mode 100644
index 0000000..df38df6
--- /dev/null
+++ b/trunk/2.6.22/20047_xen3-patch-2.6.22.patch1
@@ -0,0 +1,7866 @@
+From: www.kernel.org
+Subject: Update to 2.6.22
+Patch-mainline: 2.6.22
+
+Automatically created from "patches.kernel.org/patch-2.6.22" by xen-port-patches.py
+
+Acked-by: jbeulich@novell.com
+
+Index: 10.3-2007-11-26/arch/i386/Kconfig
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/Kconfig 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/Kconfig 2007-10-22 13:58:56.000000000 +0200
+@@ -922,7 +922,6 @@ config HOTPLUG_CPU
+
+ config COMPAT_VDSO
+ bool "Compat VDSO support"
+- depends on !X86_XEN
+ default y
+ help
+ Map the VDSO to the predictable old-style address too.
+@@ -1086,7 +1085,7 @@ config PCI
+ bool "PCI support" if !X86_VISWS
+ depends on !X86_VOYAGER
+ default y if X86_VISWS
+- select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
++ select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC && !X86_XEN)
+ help
+ Find out whether you have a PCI motherboard. PCI is the name of a
+ bus system, i.e. the way the CPU talks to the other stuff inside
+Index: 10.3-2007-11-26/arch/i386/Kconfig.cpu
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/Kconfig.cpu 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/Kconfig.cpu 2007-10-22 13:58:56.000000000 +0200
+@@ -299,7 +299,7 @@ config X86_POPAD_OK
+
+ config X86_CMPXCHG64
+ bool
+- depends on X86_PAE
++ depends on X86_PAE || X86_XEN
+ default y
+
+ config X86_ALIGNMENT_16
+Index: 10.3-2007-11-26/arch/i386/kernel/Makefile
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/Makefile 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/Makefile 2007-10-22 13:58:56.000000000 +0200
+@@ -103,5 +103,4 @@ n-obj-xen := i8253.o i8259.o reboot.o sm
+ obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+ obj-y := $(call cherrypickxen, $(obj-y))
+ extra-y := $(call cherrypickxen, $(extra-y))
+-%/head-xen.o %/head-xen.s: EXTRA_AFLAGS :=
+ endif
+Index: 10.3-2007-11-26/arch/i386/kernel/acpi/boot-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/acpi/boot-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/acpi/boot-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -624,8 +624,6 @@ static int __init acpi_parse_sbf(struct
+ static int __init acpi_parse_hpet(struct acpi_table_header *table)
+ {
+ struct acpi_table_hpet *hpet_tbl;
+- struct resource *hpet_res;
+- resource_size_t res_start;
+
+ hpet_tbl = (struct acpi_table_hpet *)table;
+ if (!hpet_tbl) {
+@@ -639,29 +637,10 @@ static int __init acpi_parse_hpet(struct
+ return -1;
+ }
+
+-#define HPET_RESOURCE_NAME_SIZE 9
+- hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
+- if (hpet_res) {
+- memset(hpet_res, 0, sizeof(*hpet_res));
+- hpet_res->name = (void *)&hpet_res[1];
+- hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+- snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE,
+- "HPET %u", hpet_tbl->sequence);
+- hpet_res->end = (1 * 1024) - 1;
+- }
+-
+ hpet_address = hpet_tbl->address.address;
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, hpet_address);
+
+- res_start = hpet_address;
+-
+- if (hpet_res) {
+- hpet_res->start = res_start;
+- hpet_res->end += res_start;
+- insert_resource(&iomem_resource, hpet_res);
+- }
+-
+ return 0;
+ }
+ #else
+@@ -877,7 +856,7 @@ static void __init acpi_process_madt(voi
+ acpi_ioapic = 1;
+
+ smp_found_config = 1;
+- clustered_apic_check();
++ setup_apic_routing();
+ }
+ }
+ if (error == -EINVAL) {
+Index: 10.3-2007-11-26/arch/i386/kernel/apic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/apic-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/apic-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -19,7 +19,6 @@
+ #include <linux/mm.h>
+ #include <linux/delay.h>
+ #include <linux/bootmem.h>
+-#include <linux/smp_lock.h>
+ #include <linux/interrupt.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/kernel_stat.h>
+Index: 10.3-2007-11-26/arch/i386/kernel/cpu/common-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/cpu/common-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/cpu/common-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -22,16 +22,40 @@
+ #define phys_pkg_id(a,b) a
+ #endif
+ #endif
+-#include <asm/pda.h>
+ #include <asm/hypervisor.h>
+
+ #include "cpu.h"
+
+-DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+-EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
++DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
++ [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
++ [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
++ [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
++ [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
++#ifndef CONFIG_XEN
++ /*
++ * Segments used for calling PnP BIOS have byte granularity.
++ * They code segments and data segments have fixed 64k limits,
++ * the transfer segment sizes are set at run time.
++ */
++ [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
++ [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
++ [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
++ [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
++ [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
++ /*
++ * The APM segments have byte granularity and their bases
++ * are set at run time. All have 64k limits.
++ */
++ [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
++ /* 16-bit code */
++ [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
++ [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
+
+-struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
+-EXPORT_SYMBOL(_cpu_pda);
++ [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
++#endif
++ [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
++} };
++EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+
+ static int cachesize_override __cpuinitdata = -1;
+ static int disable_x86_fxsr __cpuinitdata;
+@@ -373,7 +397,7 @@ __setup("serialnumber", x86_serial_nr_se
+ /*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
++static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+ {
+ int i;
+
+@@ -484,15 +508,22 @@ void __cpuinit identify_cpu(struct cpuin
+
+ /* Init Machine Check Exception if available. */
+ mcheck_init(c);
++}
+
+- if (c == &boot_cpu_data)
+- sysenter_setup();
++void __init identify_boot_cpu(void)
++{
++ identify_cpu(&boot_cpu_data);
++ sysenter_setup();
+ enable_sep_cpu();
++ mtrr_bp_init();
++}
+
+- if (c == &boot_cpu_data)
+- mtrr_bp_init();
+- else
+- mtrr_ap_init();
++void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
++{
++ BUG_ON(c == &boot_cpu_data);
++ identify_cpu(c);
++ enable_sep_cpu();
++ mtrr_ap_init();
+ }
+
+ #ifdef CONFIG_X86_HT
+@@ -606,136 +637,47 @@ void __init early_cpu_init(void)
+ #endif
+ }
+
+-/* Make sure %gs is initialized properly in idle threads */
++/* Make sure %fs is initialized properly in idle threads */
+ struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
+ {
+ memset(regs, 0, sizeof(struct pt_regs));
+- regs->xfs = __KERNEL_PDA;
++ regs->xfs = __KERNEL_PERCPU;
+ return regs;
+ }
+
+-static __cpuinit int alloc_gdt(int cpu)
++/* Current gdt points %fs at the "master" per-cpu area: after this,
++ * it's on the real one. */
++void switch_to_new_gdt(void)
+ {
+- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+- struct desc_struct *gdt;
+- struct i386_pda *pda;
+-
+- gdt = (struct desc_struct *)cpu_gdt_descr->address;
+- pda = cpu_pda(cpu);
+-
+- /*
+- * This is a horrible hack to allocate the GDT. The problem
+- * is that cpu_init() is called really early for the boot CPU
+- * (and hence needs bootmem) but much later for the secondary
+- * CPUs, when bootmem will have gone away
+- */
+- if (NODE_DATA(0)->bdata->node_bootmem_map) {
+- BUG_ON(gdt != NULL || pda != NULL);
+-
+- gdt = alloc_bootmem_pages(PAGE_SIZE);
+- pda = alloc_bootmem(sizeof(*pda));
+- /* alloc_bootmem(_pages) panics on failure, so no check */
+-
+- memset(gdt, 0, PAGE_SIZE);
+- memset(pda, 0, sizeof(*pda));
+- } else {
+- /* GDT and PDA might already have been allocated if
+- this is a CPU hotplug re-insertion. */
+- if (gdt == NULL)
+- gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+-
+- if (pda == NULL)
+- pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
+-
+- if (unlikely(!gdt || !pda)) {
+- free_pages((unsigned long)gdt, 0);
+- kfree(pda);
+- return 0;
+- }
+- }
+-
+- cpu_gdt_descr->address = (unsigned long)gdt;
+- cpu_pda(cpu) = pda;
+-
+- return 1;
+-}
+-
+-/* Initial PDA used by boot CPU */
+-struct i386_pda boot_pda = {
+- ._pda = &boot_pda,
+- .cpu_number = 0,
+- .pcurrent = &init_task,
+-};
+-
+-static inline void set_kernel_fs(void)
+-{
+- /* Set %fs for this CPU's PDA. Memory clobber is to create a
+- barrier with respect to any PDA operations, so the compiler
+- doesn't move any before here. */
+- asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
+-}
+-
+-/* Initialize the CPU's GDT and PDA. The boot CPU does this for
+- itself, but secondaries find this done for them. */
+-__cpuinit int init_gdt(int cpu, struct task_struct *idle)
+-{
+- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+- struct desc_struct *gdt;
+- struct i386_pda *pda;
+-
+- /* For non-boot CPUs, the GDT and PDA should already have been
+- allocated. */
+- if (!alloc_gdt(cpu)) {
+- printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
+- return 0;
+- }
+-
+- gdt = (struct desc_struct *)cpu_gdt_descr->address;
+- pda = cpu_pda(cpu);
+-
+- BUG_ON(gdt == NULL || pda == NULL);
+-
+- /*
+- * Initialize the per-CPU GDT with the boot GDT,
+- * and set up the GDT descriptor:
+- */
+- memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+- cpu_gdt_descr->size = GDT_SIZE - 1;
+-
+- pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
+- (u32 *)&gdt[GDT_ENTRY_PDA].b,
+- (unsigned long)pda, sizeof(*pda) - 1,
+- 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
+-
+- memset(pda, 0, sizeof(*pda));
+- pda->_pda = pda;
+- pda->cpu_number = cpu;
+- pda->pcurrent = idle;
+-
+- return 1;
+-}
+-
+-void __cpuinit cpu_set_gdt(int cpu)
+-{
+- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
++ struct Xgt_desc_struct gdt_descr;
+ unsigned long va, frames[16];
+ int f;
+
+- for (va = cpu_gdt_descr->address, f = 0;
+- va < cpu_gdt_descr->address + cpu_gdt_descr->size;
++ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
++ gdt_descr.size = GDT_SIZE - 1;
++
++ for (va = gdt_descr.address, f = 0;
++ va < gdt_descr.address + gdt_descr.size;
+ va += PAGE_SIZE, f++) {
+ frames[f] = virt_to_mfn(va);
+ make_lowmem_page_readonly(
+ (void *)va, XENFEAT_writable_descriptor_tables);
+ }
+- BUG_ON(HYPERVISOR_set_gdt(frames, cpu_gdt_descr->size / 8));
+-
+- set_kernel_fs();
++ if (HYPERVISOR_set_gdt(frames, gdt_descr.size / 8))
++ BUG();
++ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+ }
+
+-/* Common CPU init for both boot and secondary CPUs */
+-static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
++/*
++ * cpu_init() initializes state that is per-CPU. Some data is already
++ * initialized (naturally) in the bootstrap process, such as the GDT
++ * and IDT. We reload them nevertheless, this function acts as a
++ * 'CPU state barrier', nothing should get across.
++ */
++void __cpuinit cpu_init(void)
+ {
++ int cpu = smp_processor_id();
++ struct task_struct *curr = current;
+ #ifndef CONFIG_X86_NO_TSS
+ struct tss_struct * t = &per_cpu(init_tss, cpu);
+ #endif
+@@ -757,6 +699,8 @@ static void __cpuinit _cpu_init(int cpu,
+ set_in_cr4(X86_CR4_TSD);
+ }
+
++ switch_to_new_gdt();
++
+ /*
+ * Set up and load the per-CPU TSS and LDT
+ */
+@@ -794,38 +738,6 @@ static void __cpuinit _cpu_init(int cpu,
+ mxcsr_feature_mask_init();
+ }
+
+-/* Entrypoint to initialize secondary CPU */
+-void __cpuinit secondary_cpu_init(void)
+-{
+- int cpu = smp_processor_id();
+- struct task_struct *curr = current;
+-
+- _cpu_init(cpu, curr);
+-}
+-
+-/*
+- * cpu_init() initializes state that is per-CPU. Some data is already
+- * initialized (naturally) in the bootstrap process, such as the GDT
+- * and IDT. We reload them nevertheless, this function acts as a
+- * 'CPU state barrier', nothing should get across.
+- */
+-void __cpuinit cpu_init(void)
+-{
+- int cpu = smp_processor_id();
+- struct task_struct *curr = current;
+-
+- /* Set up the real GDT and PDA, so we can transition from the
+- boot versions. */
+- if (!init_gdt(cpu, curr)) {
+- /* failed to allocate something; not much we can do... */
+- for (;;)
+- local_irq_enable();
+- }
+-
+- cpu_set_gdt(cpu);
+- _cpu_init(cpu, curr);
+-}
+-
+ #ifdef CONFIG_HOTPLUG_CPU
+ void __cpuinit cpu_uninit(void)
+ {
+Index: 10.3-2007-11-26/arch/i386/kernel/cpu/mtrr/main-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/cpu/mtrr/main-xen.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/cpu/mtrr/main-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -166,7 +166,7 @@ mtrr_del(int reg, unsigned long base, un
+ EXPORT_SYMBOL(mtrr_add);
+ EXPORT_SYMBOL(mtrr_del);
+
+-void __init mtrr_bp_init(void)
++__init void mtrr_bp_init(void)
+ {
+ }
+
+Index: 10.3-2007-11-26/arch/i386/kernel/e820-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/e820-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/e820-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -162,26 +162,27 @@ static struct resource standard_io_resou
+
+ static int __init romsignature(const unsigned char *rom)
+ {
++ const unsigned short * const ptr = (const unsigned short *)rom;
+ unsigned short sig;
+
+- return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
+- sig == ROMSIGNATURE;
++ return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
+ }
+
+-static int __init romchecksum(unsigned char *rom, unsigned long length)
++static int __init romchecksum(const unsigned char *rom, unsigned long length)
+ {
+- unsigned char sum;
++ unsigned char sum, c;
+
+- for (sum = 0; length; length--)
+- sum += *rom++;
+- return sum == 0;
++ for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
++ sum += c;
++ return !length && !sum;
+ }
+
+ static void __init probe_roms(void)
+ {
++ const unsigned char *rom;
+ unsigned long start, length, upper;
+- unsigned char *rom;
+- int i;
++ unsigned char c;
++ int i;
+
+ #ifdef CONFIG_XEN
+ /* Nothing to do if not running in dom0. */
+@@ -198,8 +199,11 @@ static void __init probe_roms(void)
+
+ video_rom_resource.start = start;
+
++ if (probe_kernel_address(rom + 2, c) != 0)
++ continue;
++
+ /* 0 < length <= 0x7f * 512, historically */
+- length = rom[2] * 512;
++ length = c * 512;
+
+ /* if checksum okay, trust length byte */
+ if (length && romchecksum(rom, length))
+@@ -233,8 +237,11 @@ static void __init probe_roms(void)
+ if (!romsignature(rom))
+ continue;
+
++ if (probe_kernel_address(rom + 2, c) != 0)
++ continue;
++
+ /* 0 < length <= 0x7f * 512, historically */
+- length = rom[2] * 512;
++ length = c * 512;
+
+ /* but accept any length that fits if checksum okay */
+ if (!length || start + length > upper || !romchecksum(rom, length))
+@@ -249,7 +256,7 @@ static void __init probe_roms(void)
+ }
+
+ #ifdef CONFIG_XEN
+-static struct e820map machine_e820 __initdata;
++static struct e820map machine_e820;
+ #define e820 machine_e820
+ #endif
+
+@@ -409,10 +416,8 @@ int __init sanitize_e820_map(struct e820
+ ____________________33__
+ ______________________4_
+ */
+- printk("sanitize start\n");
+ /* if there's only one memory region, don't bother */
+ if (*pnr_map < 2) {
+- printk("sanitize bail 0\n");
+ return -1;
+ }
+
+@@ -421,7 +426,6 @@ int __init sanitize_e820_map(struct e820
+ /* bail out if we find any unreasonable addresses in bios map */
+ for (i=0; i<old_nr; i++)
+ if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
+- printk("sanitize bail 1\n");
+ return -1;
+ }
+
+@@ -517,7 +521,6 @@ int __init sanitize_e820_map(struct e820
+ memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+ *pnr_map = new_nr;
+
+- printk("sanitize end\n");
+ return 0;
+ }
+
+@@ -552,7 +555,6 @@ int __init copy_e820_map(struct e820entr
+ unsigned long long size = biosmap->size;
+ unsigned long long end = start + size;
+ unsigned long type = biosmap->type;
+- printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
+
+ /* Overflow in 64 bits? Ignore the memory map. */
+ if (start > end)
+@@ -564,17 +566,11 @@ int __init copy_e820_map(struct e820entr
+ * Not right. Fix it up.
+ */
+ if (type == E820_RAM) {
+- printk("copy_e820_map() type is E820_RAM\n");
+ if (start < 0x100000ULL && end > 0xA0000ULL) {
+- printk("copy_e820_map() lies in range...\n");
+- if (start < 0xA0000ULL) {
+- printk("copy_e820_map() start < 0xA0000ULL\n");
++ if (start < 0xA0000ULL)
+ add_memory_region(start, 0xA0000ULL-start, type);
+- }
+- if (end <= 0x100000ULL) {
+- printk("copy_e820_map() end <= 0x100000ULL\n");
++ if (end <= 0x100000ULL)
+ continue;
+- }
+ start = 0x100000ULL;
+ size = end - start;
+ }
+@@ -887,6 +883,33 @@ void __init limit_regions(unsigned long
+ print_memory_map("limit_regions endfunc");
+ }
+
++/*
++ * This function checks if any part of the range <start,end> is mapped
++ * with type.
++ */
++int
++e820_any_mapped(u64 start, u64 end, unsigned type)
++{
++ int i;
++#ifndef CONFIG_XEN
++ for (i = 0; i < e820.nr_map; i++) {
++ const struct e820entry *ei = &e820.map[i];
++#else
++ if (!is_initial_xendomain())
++ return 0;
++ for (i = 0; i < machine_e820.nr_map; ++i) {
++ const struct e820entry *ei = &machine_e820.map[i];
++#endif
++ if (type && ei->type != type)
++ continue;
++ if (ei->addr >= end || ei->addr + ei->size <= start)
++ continue;
++ return 1;
++ }
++ return 0;
++}
++EXPORT_SYMBOL_GPL(e820_any_mapped);
++
+ /*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+Index: 10.3-2007-11-26/arch/i386/kernel/entry-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/entry-xen.S 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/entry-xen.S 2007-10-22 13:58:57.000000000 +0200
+@@ -15,7 +15,7 @@
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+- * Stack layout in 'ret_from_system_call':
++ * Stack layout in 'syscall_exit':
+ * ptrace needs to have all regs on the stack.
+ * if the order here is changed, it needs to be
+ * updated in fork.c:copy_process, signal.c:do_signal,
+@@ -135,7 +135,7 @@ NMI_MASK = 0x80000000
+ movl $(__USER_DS), %edx; \
+ movl %edx, %ds; \
+ movl %edx, %es; \
+- movl $(__KERNEL_PDA), %edx; \
++ movl $(__KERNEL_PERCPU), %edx; \
+ movl %edx, %fs
+
+ #define RESTORE_INT_REGS \
+@@ -308,16 +308,12 @@ sysenter_past_esp:
+ pushl $(__USER_CS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET cs, 0*/
+-#ifndef CONFIG_COMPAT_VDSO
+ /*
+ * Push current_thread_info()->sysenter_return to the stack.
+ * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
+ * pushed above; +8 corresponds to copy_thread's esp0 setting.
+ */
+ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+-#else
+- pushl $SYSENTER_RETURN
+-#endif
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eip, 0
+
+@@ -345,7 +341,7 @@ sysenter_past_esp:
+ jae syscall_badsys
+ call *sys_call_table(,%eax,4)
+ movl %eax,PT_EAX(%esp)
+- DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
++ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testw $_TIF_ALLWORK_MASK, %cx
+@@ -374,10 +370,6 @@ ENTRY(system_call)
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ GET_THREAD_INFO(%ebp)
+- testl $TF_MASK,PT_EFLAGS(%esp)
+- jz no_singlestep
+- orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+-no_singlestep:
+ # system call tracing in operation / emulation
+ /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
+ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+@@ -392,6 +384,10 @@ syscall_exit:
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ TRACE_IRQS_OFF
++ testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
++ jz no_singlestep
++ orl $_TIF_SINGLESTEP,TI_flags(%ebp)
++no_singlestep:
+ movl TI_flags(%ebp), %ecx
+ testw $_TIF_ALLWORK_MASK, %cx # current->work
+ jne syscall_exit_work
+@@ -609,9 +605,7 @@ END(syscall_badsys)
+ #ifndef CONFIG_XEN
+ #define FIXUP_ESPFIX_STACK \
+ /* since we are on a wrong stack, we cant make it a C code :( */ \
+- movl %fs:PDA_cpu, %ebx; \
+- PER_CPU(cpu_gdt_descr, %ebx); \
+- movl GDS_address(%ebx), %ebx; \
++ PER_CPU(gdt_page, %ebx); \
+ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
+ addl %esp, %eax; \
+ pushl $__KERNEL_DS; \
+@@ -684,7 +678,7 @@ ENTRY(name) \
+ SAVE_ALL; \
+ TRACE_IRQS_OFF \
+ movl %esp,%eax; \
+- call smp_/**/name; \
++ call smp_##name; \
+ jmp ret_from_intr; \
+ CFI_ENDPROC; \
+ ENDPROC(name)
+@@ -692,10 +686,6 @@ ENDPROC(name)
+ /* The include is where all of the SMP etc. interrupts come from */
+ #include "entry_arch.h"
+
+-/* This alternate entry is needed because we hijack the apic LVTT */
+-#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
+-BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
+-#endif
+ #else
+ #define UNWIND_ESPFIX_STACK
+ #endif
+@@ -738,7 +728,7 @@ error_code:
+ pushl %fs
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET fs, 0*/
+- movl $(__KERNEL_PDA), %ecx
++ movl $(__KERNEL_PERCPU), %ecx
+ movl %ecx, %fs
+ UNWIND_ESPFIX_STACK
+ popl %ecx
+Index: 10.3-2007-11-26/arch/i386/kernel/head-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/head-xen.S 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/head-xen.S 2007-10-22 13:58:57.000000000 +0200
+@@ -37,7 +37,8 @@ ENTRY(startup_32)
+ /* Set up the stack pointer */
+ movl $(init_thread_union+THREAD_SIZE),%esp
+
+- call setup_pda
++ movl %ss,%eax
++ movl %eax,%fs # gets reset once there's real percpu
+
+ /* get vendor info */
+ xorl %eax,%eax # call CPUID with 0 -> return vendor ID
+@@ -64,55 +65,11 @@ ENTRY(startup_32)
+ xorl %eax,%eax # Clear GS
+ movl %eax,%gs
+
+- movl $(__KERNEL_PDA),%eax
+- mov %eax,%fs
+-
+ cld # gcc2 wants the direction flag cleared at all times
+
+ pushl $0 # fake return address for unwinder
+ jmp start_kernel
+
+-/*
+- * Point the GDT at this CPU's PDA. This will be
+- * cpu_gdt_table and boot_pda.
+- */
+-ENTRY(setup_pda)
+- /* get the PDA pointer */
+- movl $boot_pda, %eax
+-
+- /* slot the PDA address into the GDT */
+- mov $cpu_gdt_table, %ecx
+- mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
+- shr $16, %eax
+- mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
+- mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
+-
+- # %esi still points to start_info, and no registers
+- # need to be preserved.
+-
+- movl XEN_START_mfn_list(%esi), %ebx
+- movl $(cpu_gdt_table - __PAGE_OFFSET), %eax
+- shrl $PAGE_SHIFT, %eax
+- movl (%ebx,%eax,4), %ecx
+- pushl %ecx # frame number for set_gdt below
+-
+- xorl %esi, %esi
+- xorl %edx, %edx
+- shldl $PAGE_SHIFT, %ecx, %edx
+- shll $PAGE_SHIFT, %ecx
+- orl $0x61, %ecx
+- movl $cpu_gdt_table, %ebx
+- movl $__HYPERVISOR_update_va_mapping, %eax
+- int $0x82
+-
+- movl $(PAGE_SIZE_asm / 8), %ecx
+- movl %esp, %ebx
+- movl $__HYPERVISOR_set_gdt, %eax
+- int $0x82
+-
+- popl %ecx
+- ret
+-
+ #define HYPERCALL_PAGE_OFFSET 0x1000
+ .org HYPERCALL_PAGE_OFFSET
+ ENTRY(hypercall_page)
+@@ -138,60 +95,6 @@ ENTRY(empty_zero_page)
+ */
+ .data
+
+-/*
+- * The Global Descriptor Table contains 28 quadwords, per-CPU.
+- */
+- .section .data.page_aligned, "aw"
+- .align PAGE_SIZE_asm
+-ENTRY(cpu_gdt_table)
+- .quad 0x0000000000000000 /* NULL descriptor */
+- .quad 0x0000000000000000 /* 0x0b reserved */
+- .quad 0x0000000000000000 /* 0x13 reserved */
+- .quad 0x0000000000000000 /* 0x1b reserved */
+- .quad 0x0000000000000000 /* 0x20 unused */
+- .quad 0x0000000000000000 /* 0x28 unused */
+- .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
+- .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
+- .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
+- .quad 0x0000000000000000 /* 0x4b reserved */
+- .quad 0x0000000000000000 /* 0x53 reserved */
+- .quad 0x0000000000000000 /* 0x5b reserved */
+-
+- .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+- .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+- .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
+- .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
+-
+- .quad 0x0000000000000000 /* 0x80 TSS descriptor */
+- .quad 0x0000000000000000 /* 0x88 LDT descriptor */
+-
+- /*
+- * Segments used for calling PnP BIOS have byte granularity.
+- * They code segments and data segments have fixed 64k limits,
+- * the transfer segment sizes are set at run time.
+- */
+- .quad 0x0000000000000000 /* 0x90 32-bit code */
+- .quad 0x0000000000000000 /* 0x98 16-bit code */
+- .quad 0x0000000000000000 /* 0xa0 16-bit data */
+- .quad 0x0000000000000000 /* 0xa8 16-bit data */
+- .quad 0x0000000000000000 /* 0xb0 16-bit data */
+-
+- /*
+- * The APM segments have byte granularity and their bases
+- * are set at run time. All have 64k limits.
+- */
+- .quad 0x0000000000000000 /* 0xb8 APM CS code */
+- .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */
+- .quad 0x0000000000000000 /* 0xc8 APM DS data */
+-
+- .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */
+- .quad 0x00cf92000000ffff /* 0xd8 - PDA */
+- .quad 0x0000000000000000 /* 0xe0 - unused */
+- .quad 0x0000000000000000 /* 0xe8 - unused */
+- .quad 0x0000000000000000 /* 0xf0 - unused */
+- .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
+- .align PAGE_SIZE_asm
+-
+ #if CONFIG_XEN_COMPAT <= 0x030002
+ /*
+ * __xen_guest information
+Index: 10.3-2007-11-26/arch/i386/kernel/io_apic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/io_apic-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/io_apic-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -25,7 +25,6 @@
+ #include <linux/init.h>
+ #include <linux/delay.h>
+ #include <linux/sched.h>
+-#include <linux/smp_lock.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/compiler.h>
+ #include <linux/acpi.h>
+@@ -35,6 +34,7 @@
+ #include <linux/msi.h>
+ #include <linux/htirq.h>
+ #include <linux/freezer.h>
++#include <linux/kthread.h>
+
+ #include <asm/io.h>
+ #include <asm/smp.h>
+@@ -705,8 +705,6 @@ static int balanced_irq(void *unused)
+ unsigned long prev_balance_time = jiffies;
+ long time_remaining = balanced_irq_interval;
+
+- daemonize("kirqd");
+-
+ /* push everything to CPU 0 to give us a starting point. */
+ for (i = 0 ; i < NR_IRQS ; i++) {
+ irq_desc[i].pending_mask = cpumask_of_cpu(0);
+@@ -766,10 +764,9 @@ static int __init balanced_irq_init(void
+ }
+
+ printk(KERN_INFO "Starting balanced_irq\n");
+- if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
++ if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
+ return 0;
+- else
+- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
++ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+ failed:
+ for_each_possible_cpu(i) {
+ kfree(irq_cpu_data[i].irq_delta);
+@@ -1445,10 +1442,6 @@ static void __init setup_ExtINT_IRQ0_pin
+ enable_8259A_irq(0);
+ }
+
+-static inline void UNEXPECTED_IO_APIC(void)
+-{
+-}
+-
+ void __init print_IO_APIC(void)
+ {
+ int apic, i;
+@@ -1488,34 +1481,12 @@ void __init print_IO_APIC(void)
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
+ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
+ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
+- if (reg_00.bits.ID >= get_physical_broadcast())
+- UNEXPECTED_IO_APIC();
+- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+- UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
+- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+- (reg_01.bits.entries != 0x2E) &&
+- (reg_01.bits.entries != 0x3F)
+- )
+- UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
+- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
+- )
+- UNEXPECTED_IO_APIC();
+- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+- UNEXPECTED_IO_APIC();
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+@@ -1525,8 +1496,6 @@ void __init print_IO_APIC(void)
+ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
+- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+- UNEXPECTED_IO_APIC();
+ }
+
+ /*
+@@ -1538,8 +1507,6 @@ void __init print_IO_APIC(void)
+ reg_03.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
+- if (reg_03.bits.__reserved_1)
+- UNEXPECTED_IO_APIC();
+ }
+
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
+@@ -2679,19 +2646,19 @@ int arch_setup_msi_irq(struct pci_dev *d
+ if (irq < 0)
+ return irq;
+
+- set_irq_msi(irq, desc);
+ ret = msi_compose_msg(dev, irq, &msg);
+ if (ret < 0) {
+ destroy_irq(irq);
+ return ret;
+ }
+
++ set_irq_msi(irq, desc);
+ write_msi_msg(irq, &msg);
+
+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
+ "edge");
+
+- return irq;
++ return 0;
+ }
+
+ void arch_teardown_msi_irq(unsigned int irq)
+Index: 10.3-2007-11-26/arch/i386/kernel/ioport-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/ioport-xen.c 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/ioport-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -12,10 +12,10 @@
+ #include <linux/types.h>
+ #include <linux/ioport.h>
+ #include <linux/smp.h>
+-#include <linux/smp_lock.h>
+ #include <linux/stddef.h>
+ #include <linux/slab.h>
+ #include <linux/thread_info.h>
++#include <linux/syscalls.h>
+ #include <xen/interface/physdev.h>
+
+ /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+Index: 10.3-2007-11-26/arch/i386/kernel/irq-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/irq-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/irq-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -24,6 +24,9 @@
+ DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
+ EXPORT_PER_CPU_SYMBOL(irq_stat);
+
++DEFINE_PER_CPU(struct pt_regs *, irq_regs);
++EXPORT_PER_CPU_SYMBOL(irq_regs);
++
+ /*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+Index: 10.3-2007-11-26/arch/i386/kernel/ldt-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/ldt-xen.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/ldt-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -10,7 +10,6 @@
+ #include <linux/string.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
+-#include <linux/smp_lock.h>
+ #include <linux/vmalloc.h>
+ #include <linux/slab.h>
+
+Index: 10.3-2007-11-26/arch/i386/kernel/microcode-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/microcode-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/microcode-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -135,7 +135,7 @@ static int __init microcode_dev_init (vo
+ return 0;
+ }
+
+-static void __exit microcode_dev_exit (void)
++static void microcode_dev_exit (void)
+ {
+ misc_deregister(&microcode_dev);
+ }
+Index: 10.3-2007-11-26/arch/i386/kernel/mpparse-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/mpparse-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/mpparse-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -18,7 +18,6 @@
+ #include <linux/acpi.h>
+ #include <linux/delay.h>
+ #include <linux/bootmem.h>
+-#include <linux/smp_lock.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/bitops.h>
+@@ -484,7 +483,7 @@ static int __init smp_read_mpc(struct mp
+ }
+ ++mpc_record;
+ }
+- clustered_apic_check();
++ setup_apic_routing();
+ if (!num_processors)
+ printk(KERN_ERR "SMP mptable: no processors registered!\n");
+ return num_processors;
+Index: 10.3-2007-11-26/arch/i386/kernel/pci-dma-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/pci-dma-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/pci-dma-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -13,6 +13,7 @@
+ #include <linux/pci.h>
+ #include <linux/module.h>
+ #include <linux/version.h>
++#include <linux/pci.h>
+ #include <asm/io.h>
+ #include <xen/balloon.h>
+ #include <xen/gnttab.h>
+@@ -251,7 +252,7 @@ int dma_declare_coherent_memory(struct d
+ {
+ void __iomem *mem_base = NULL;
+ int pages = size >> PAGE_SHIFT;
+- int bitmap_size = (pages + 31)/32;
++ int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
+
+ if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
+ goto out;
+@@ -324,6 +325,32 @@ void *dma_mark_declared_memory_occupied(
+ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
+ #endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
+
++#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
++/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
++
++int forbid_dac;
++EXPORT_SYMBOL(forbid_dac);
++
++static __devinit void via_no_dac(struct pci_dev *dev)
++{
++ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
++ printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
++ forbid_dac = 1;
++ }
++}
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
++
++static int check_iommu(char *s)
++{
++ if (!strcmp(s, "usedac")) {
++ forbid_dac = -1;
++ return 1;
++ }
++ return 0;
++}
++__setup("iommu=", check_iommu);
++#endif
++
+ dma_addr_t
+ dma_map_single(struct device *dev, void *ptr, size_t size,
+ enum dma_data_direction direction)
+Index: 10.3-2007-11-26/arch/i386/kernel/process-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/process-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/process-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -21,7 +21,6 @@
+ #include <linux/mm.h>
+ #include <linux/elfcore.h>
+ #include <linux/smp.h>
+-#include <linux/smp_lock.h>
+ #include <linux/stddef.h>
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+@@ -39,6 +38,7 @@
+ #include <linux/random.h>
+ #include <linux/personality.h>
+ #include <linux/tick.h>
++#include <linux/percpu.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -61,7 +61,6 @@
+
+ #include <asm/tlbflush.h>
+ #include <asm/cpu.h>
+-#include <asm/pda.h>
+
+ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+@@ -70,6 +69,12 @@ static int hlt_counter;
+ unsigned long boot_option_idle_override = 0;
+ EXPORT_SYMBOL(boot_option_idle_override);
+
++DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
++EXPORT_PER_CPU_SYMBOL(current_task);
++
++DEFINE_PER_CPU(int, cpu_number);
++EXPORT_PER_CPU_SYMBOL(cpu_number);
++
+ /*
+ * Return saved PC of a blocked thread.
+ */
+@@ -168,6 +173,7 @@ void cpu_idle(void)
+ if (__get_cpu_var(cpu_idle_state))
+ __get_cpu_var(cpu_idle_state) = 0;
+
++ check_pgt_cache();
+ rmb();
+ idle = xen_idle; /* no alternatives */
+
+@@ -218,18 +224,19 @@ void __devinit select_idle_routine(const
+ {
+ }
+
+-static int __init idle_setup (char *str)
++static int __init idle_setup(char *str)
+ {
+- if (!strncmp(str, "poll", 4)) {
++ if (!strcmp(str, "poll")) {
+ printk("using polling idle threads.\n");
+ pm_idle = poll_idle;
+ }
++ else
++ return -1;
+
+ boot_option_idle_override = 1;
+- return 1;
++ return 0;
+ }
+-
+-__setup("idle=", idle_setup);
++early_param("idle", idle_setup);
+
+ void show_regs(struct pt_regs * regs)
+ {
+@@ -282,7 +289,7 @@ int kernel_thread(int (*fn)(void *), voi
+
+ regs.xds = __USER_DS;
+ regs.xes = __USER_DS;
+- regs.xfs = __KERNEL_PDA;
++ regs.xfs = __KERNEL_PERCPU;
+ regs.orig_eax = -1;
+ regs.eip = (unsigned long) kernel_thread_helper;
+ regs.xcs = __KERNEL_CS | get_kernel_rpl();
+@@ -555,7 +562,7 @@ struct task_struct fastcall * __switch_t
+ * multicall to indicate FPU task switch, rather than
+ * synchronously trapping to Xen.
+ */
+- if (prev_p->thread_info->status & TS_USEDFPU) {
++ if (task_thread_info(prev_p)->status & TS_USEDFPU) {
+ __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 1;
+@@ -645,7 +652,7 @@ struct task_struct fastcall * __switch_t
+ if (prev->gs | next->gs)
+ loadsegment(gs, next->gs);
+
+- write_pda(pcurrent, next_p);
++ x86_write_percpu(current_task, next_p);
+
+ return prev_p;
+ }
+Index: 10.3-2007-11-26/arch/i386/kernel/quirks-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/quirks-xen.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/quirks-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -3,12 +3,10 @@
+ */
+ #include <linux/pci.h>
+ #include <linux/irq.h>
+-#include <asm/pci-direct.h>
+-#include <asm/genapic.h>
+-#include <asm/cpu.h>
+
+ #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI)
+-static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
++
++static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
+ {
+ u8 config, rev;
+ u32 word;
+@@ -16,7 +14,7 @@ static void __devinit verify_quirk_intel
+ /* BIOS may enable hardware IRQ balancing for
+ * E7520/E7320/E7525(revision ID 0x9 and below)
+ * based platforms.
+- * For those platforms, make sure that the genapic is set to 'flat'
++ * Disable SW irqbalance/affinity on those platforms.
+ */
+ pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
+ if (rev > 0x9)
+@@ -30,59 +28,19 @@ static void __devinit verify_quirk_intel
+ raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
+
+ if (!(word & (1 << 13))) {
+-#ifndef CONFIG_XEN
+-#ifdef CONFIG_X86_64
+- if (genapic != &apic_flat)
+- panic("APIC mode must be flat on this system\n");
+-#elif defined(CONFIG_X86_GENERICARCH)
+- if (genapic != &apic_default)
+- panic("APIC mode must be default(flat) on this system. Use apic=default\n");
+-#endif
+-#endif
+- }
+-
+- /* put back the original value for config space*/
+- if (!(config & 0x2))
+- pci_write_config_byte(dev, 0xf4, config);
+-}
+-
+-void __init quirk_intel_irqbalance(void)
+-{
+- u8 config, rev;
+- u32 word;
+-
+- /* BIOS may enable hardware IRQ balancing for
+- * E7520/E7320/E7525(revision ID 0x9 and below)
+- * based platforms.
+- * Disable SW irqbalance/affinity on those platforms.
+- */
+- rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
+- if (rev > 0x9)
+- return;
+-
+- printk(KERN_INFO "Intel E7520/7320/7525 detected.");
+-
+- /* enable access to config space */
+- config = read_pci_config_byte(0, 0, 0, 0xf4);
+- write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
+-
+- /* read xTPR register */
+- word = read_pci_config_16(0, 0, 0x40, 0x4c);
+-
+- if (!(word & (1 << 13))) {
+ struct xen_platform_op op;
+- printk(KERN_INFO "Disabling irq balancing and affinity\n");
++ printk(KERN_INFO "Intel E7520/7320/7525 detected. "
++ "Disabling irq balancing and affinity\n");
+ op.cmd = XENPF_platform_quirk;
+ op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING;
+ (void)HYPERVISOR_platform_op(&op);
+ }
+
+- /* put back the original value for config space */
++ /* put back the original value for config space*/
+ if (!(config & 0x2))
+- write_pci_config_byte(0, 0, 0, 0xf4, config);
++ pci_write_config_byte(dev, 0xf4, config);
+ }
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance);
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance);
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance);
+-
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
+ #endif
+Index: 10.3-2007-11-26/arch/i386/kernel/smp-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/smp-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/smp-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -13,7 +13,6 @@
+ #include <linux/mm.h>
+ #include <linux/delay.h>
+ #include <linux/spinlock.h>
+-#include <linux/smp_lock.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/cache.h>
+@@ -216,7 +215,6 @@ static cpumask_t flush_cpumask;
+ static struct mm_struct * flush_mm;
+ static unsigned long flush_va;
+ static DEFINE_SPINLOCK(tlbstate_lock);
+-#define FLUSH_ALL 0xffffffff
+
+ /*
+ * We cannot call mmdrop() because we are in interrupt context,
+@@ -298,7 +296,7 @@ irqreturn_t smp_invalidate_interrupt(int
+
+ if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+- if (flush_va == FLUSH_ALL)
++ if (flush_va == TLB_FLUSH_ALL)
+ local_flush_tlb();
+ else
+ __flush_tlb_one(flush_va);
+@@ -314,9 +312,11 @@ out:
+ return IRQ_HANDLED;
+ }
+
+-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+- unsigned long va)
++void xen_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
++ unsigned long va)
+ {
++ cpumask_t cpumask = *cpumaskp;
++
+ /*
+ * A couple of (to be removed) sanity checks:
+ *
+@@ -327,10 +327,12 @@ static void flush_tlb_others(cpumask_t c
+ BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+ BUG_ON(!mm);
+
++#ifdef CONFIG_HOTPLUG_CPU
+ /* If a CPU which we ran on has gone down, OK. */
+ cpus_and(cpumask, cpumask, cpu_online_map);
+- if (cpus_empty(cpumask))
++ if (unlikely(cpus_empty(cpumask)))
+ return;
++#endif
+
+ /*
+ * i'm not happy about this global shared spinlock in the
+@@ -341,17 +343,7 @@ static void flush_tlb_others(cpumask_t c
+
+ flush_mm = mm;
+ flush_va = va;
+-#if NR_CPUS <= BITS_PER_LONG
+- atomic_set_mask(cpumask, &flush_cpumask);
+-#else
+- {
+- int k;
+- unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
+- unsigned long *cpu_mask = (unsigned long *)&cpumask;
+- for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
+- atomic_set_mask(cpu_mask[k], &flush_mask[k]);
+- }
+-#endif
++ cpus_or(flush_cpumask, cpumask, flush_cpumask);
+ /*
+ * We have to send the IPI only to
+ * CPUs affected.
+@@ -378,7 +370,7 @@ void flush_tlb_current_task(void)
+
+ local_flush_tlb();
+ if (!cpus_empty(cpu_mask))
+- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
++ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+ preempt_enable();
+ }
+
+@@ -397,7 +389,7 @@ void flush_tlb_mm (struct mm_struct * mm
+ leave_mm(smp_processor_id());
+ }
+ if (!cpus_empty(cpu_mask))
+- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
++ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+
+ preempt_enable();
+ }
+@@ -460,7 +452,7 @@ void flush_tlb_all(void)
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+-void smp_send_reschedule(int cpu)
++void xen_smp_send_reschedule(int cpu)
+ {
+ WARN_ON(cpu_is_offline(cpu));
+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+@@ -492,36 +484,79 @@ void unlock_ipi_call_lock(void)
+
+ static struct call_data_struct *call_data;
+
++static void __smp_call_function(void (*func) (void *info), void *info,
++ int nonatomic, int wait)
++{
++ struct call_data_struct data;
++ int cpus = num_online_cpus() - 1;
++
++ if (!cpus)
++ return;
++
++ data.func = func;
++ data.info = info;
++ atomic_set(&data.started, 0);
++ data.wait = wait;
++ if (wait)
++ atomic_set(&data.finished, 0);
++
++ call_data = &data;
++ mb();
++
++ /* Send a message to all other CPUs and wait for them to respond */
++ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++
++ /* Wait for response */
++ while (atomic_read(&data.started) != cpus)
++ cpu_relax();
++
++ if (wait)
++ while (atomic_read(&data.finished) != cpus)
++ cpu_relax();
++}
++
++
+ /**
+- * smp_call_function(): Run a function on all other CPUs.
++ * smp_call_function_mask(): Run a function on a set of other CPUs.
++ * @mask: The set of cpus to run on. Must not include the current cpu.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+- * @nonatomic: currently unused.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+- * Returns 0 on success, else a negative status code. Does not return until
+- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
++ * Returns 0 on success, else a negative status code.
++ *
++ * If @wait is true, then returns once @func has returned; otherwise
++ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+- int wait)
++int
++xen_smp_call_function_mask(cpumask_t mask,
++ void (*func)(void *), void *info,
++ int wait)
+ {
+ struct call_data_struct data;
++ cpumask_t allbutself;
+ int cpus;
+
++ /* Can deadlock when called with interrupts disabled */
++ WARN_ON(irqs_disabled());
++
+ /* Holding any lock stops cpus from going down. */
+ spin_lock(&call_lock);
+- cpus = num_online_cpus() - 1;
++
++ allbutself = cpu_online_map;
++ cpu_clear(smp_processor_id(), allbutself);
++
++ cpus_and(mask, mask, allbutself);
++ cpus = cpus_weight(mask);
++
+ if (!cpus) {
+ spin_unlock(&call_lock);
+ return 0;
+ }
+
+- /* Can deadlock when called with interrupts disabled */
+- WARN_ON(irqs_disabled());
+-
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+@@ -531,9 +566,12 @@ int smp_call_function (void (*func) (voi
+
+ call_data = &data;
+ mb();
+-
+- /* Send a message to all other CPUs and wait for them to respond */
+- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++
++ /* Send a message to other CPUs */
++ if (cpus_equal(mask, allbutself))
++ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++ else
++ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+@@ -546,15 +584,14 @@ int smp_call_function (void (*func) (voi
+
+ return 0;
+ }
+-EXPORT_SYMBOL(smp_call_function);
+
+ static void stop_this_cpu (void * dummy)
+ {
++ local_irq_disable();
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+- local_irq_disable();
+ #if 0
+ disable_local_APIC();
+ #endif
+@@ -567,15 +604,20 @@ static void stop_this_cpu (void * dummy)
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+-void smp_send_stop(void)
++void xen_smp_send_stop(void)
+ {
+- smp_call_function(stop_this_cpu, NULL, 1, 0);
++ /* Don't deadlock on the call lock in panic */
++ int nolock = !spin_trylock(&call_lock);
++ unsigned long flags;
+
+- local_irq_disable();
++ local_irq_save(flags);
++ __smp_call_function(stop_this_cpu, NULL, 0, 0);
++ if (!nolock)
++ spin_unlock(&call_lock);
+ #if 0
+ disable_local_APIC();
+ #endif
+- local_irq_enable();
++ local_irq_restore(flags);
+ }
+
+ /*
+@@ -616,74 +658,3 @@ irqreturn_t smp_call_function_interrupt(
+
+ return IRQ_HANDLED;
+ }
+-
+-/*
+- * this function sends a 'generic call function' IPI to one other CPU
+- * in the system.
+- *
+- * cpu is a standard Linux logical CPU number.
+- */
+-static void
+-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+- int nonatomic, int wait)
+-{
+- struct call_data_struct data;
+- int cpus = 1;
+-
+- data.func = func;
+- data.info = info;
+- atomic_set(&data.started, 0);
+- data.wait = wait;
+- if (wait)
+- atomic_set(&data.finished, 0);
+-
+- call_data = &data;
+- wmb();
+- /* Send a message to all other CPUs and wait for them to respond */
+- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+-
+- /* Wait for response */
+- while (atomic_read(&data.started) != cpus)
+- cpu_relax();
+-
+- if (!wait)
+- return;
+-
+- while (atomic_read(&data.finished) != cpus)
+- cpu_relax();
+-}
+-
+-/*
+- * smp_call_function_single - Run a function on another CPU
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @nonatomic: Currently unused.
+- * @wait: If true, wait until function has completed on other CPUs.
+- *
+- * Retrurns 0 on success, else a negative status code.
+- *
+- * Does not return until the remote CPU is nearly ready to execute <func>
+- * or is or has executed.
+- */
+-
+-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+- int nonatomic, int wait)
+-{
+- /* prevent preemption and reschedule on another processor */
+- int me = get_cpu();
+- if (cpu == me) {
+- WARN_ON(1);
+- put_cpu();
+- return -EBUSY;
+- }
+-
+- /* Can deadlock when called with interrupts disabled */
+- WARN_ON(irqs_disabled());
+-
+- spin_lock_bh(&call_lock);
+- __smp_call_function_single(cpu, func, info, nonatomic, wait);
+- spin_unlock_bh(&call_lock);
+- put_cpu();
+- return 0;
+-}
+-EXPORT_SYMBOL(smp_call_function_single);
+Index: 10.3-2007-11-26/arch/i386/kernel/swiotlb.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/swiotlb.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/swiotlb.c 2007-10-22 13:58:57.000000000 +0200
+@@ -729,7 +729,6 @@ swiotlb_dma_supported (struct device *hw
+ return (mask >= ((1UL << dma_bits) - 1));
+ }
+
+-EXPORT_SYMBOL(swiotlb_init);
+ EXPORT_SYMBOL(swiotlb_map_single);
+ EXPORT_SYMBOL(swiotlb_unmap_single);
+ EXPORT_SYMBOL(swiotlb_map_sg);
+Index: 10.3-2007-11-26/arch/i386/kernel/time-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/time-xen.c 2007-12-06 17:32:21.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/time-xen.c 2007-12-06 17:32:30.000000000 +0100
+@@ -79,7 +79,6 @@
+ #include <asm/i8253.h>
+ DEFINE_SPINLOCK(i8253_lock);
+ EXPORT_SYMBOL(i8253_lock);
+-int pit_latch_buggy; /* extern */
+ #else
+ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+ #endif
+@@ -593,7 +592,7 @@ irqreturn_t timer_interrupt(int irq, voi
+ return IRQ_HANDLED;
+ }
+
+-void mark_tsc_unstable(void)
++void mark_tsc_unstable(char *reason)
+ {
+ #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
+ tsc_unstable = 1;
+@@ -812,7 +811,7 @@ static void setup_cpu0_timer_irq(void)
+ VIRQ_TIMER,
+ 0,
+ timer_interrupt,
+- SA_INTERRUPT,
++ IRQF_DISABLED,
+ "timer0",
+ NULL);
+ BUG_ON(per_cpu(timer_irq, 0) < 0);
+@@ -922,21 +921,21 @@ static void start_hz_timer(void)
+ cpu_clear(smp_processor_id(), nohz_cpu_mask);
+ }
+
+-void raw_safe_halt(void)
++void xen_safe_halt(void)
+ {
+ stop_hz_timer();
+ /* Blocking includes an implicit local_irq_enable(). */
+ HYPERVISOR_block();
+ start_hz_timer();
+ }
+-EXPORT_SYMBOL(raw_safe_halt);
++EXPORT_SYMBOL(xen_safe_halt);
+
+-void halt(void)
++void xen_halt(void)
+ {
+ if (irqs_disabled())
+ HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+ }
+-EXPORT_SYMBOL(halt);
++EXPORT_SYMBOL(xen_halt);
+
+ /* No locking required. Interrupts are disabled on all CPUs. */
+ void time_resume(void)
+@@ -983,7 +982,7 @@ int local_setup_timer(unsigned int cpu)
+ irq = bind_virq_to_irqhandler(VIRQ_TIMER,
+ cpu,
+ timer_interrupt,
+- SA_INTERRUPT,
++ IRQF_DISABLED,
+ timer_name[cpu],
+ NULL);
+ if (irq < 0)
+Index: 10.3-2007-11-26/arch/i386/kernel/traps-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/traps-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/kernel/traps-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -52,7 +52,7 @@
+ #include <asm/unwind.h>
+ #include <asm/smp.h>
+ #include <asm/arch_hooks.h>
+-#include <asm/kdebug.h>
++#include <linux/kdebug.h>
+ #include <asm/stacktrace.h>
+
+ #include <linux/module.h>
+@@ -101,20 +101,6 @@ asmlinkage void machine_check(void);
+
+ int kstack_depth_to_print = 24;
+ static unsigned int code_bytes = 64;
+-ATOMIC_NOTIFIER_HEAD(i386die_chain);
+-
+-int register_die_notifier(struct notifier_block *nb)
+-{
+- vmalloc_sync_all();
+- return atomic_notifier_chain_register(&i386die_chain, nb);
+-}
+-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
+-
+-int unregister_die_notifier(struct notifier_block *nb)
+-{
+- return atomic_notifier_chain_unregister(&i386die_chain, nb);
+-}
+-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
+
+ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+@@ -325,7 +311,7 @@ void show_registers(struct pt_regs *regs
+ regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
+ printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
+ TASK_COMM_LEN, current->comm, current->pid,
+- current_thread_info(), current, current->thread_info);
++ current_thread_info(), current, task_thread_info(current));
+ /*
+ * When in-kernel, we also print out the stack and code at the
+ * time of the fault..
+@@ -482,8 +468,6 @@ static void __kprobes do_trap(int trapnr
+ siginfo_t *info)
+ {
+ struct task_struct *tsk = current;
+- tsk->thread.error_code = error_code;
+- tsk->thread.trap_no = trapnr;
+
+ if (regs->eflags & VM_MASK) {
+ if (vm86)
+@@ -495,6 +479,18 @@ static void __kprobes do_trap(int trapnr
+ goto kernel_trap;
+
+ trap_signal: {
++ /*
++ * We want error_code and trap_no set for userspace faults and
++ * kernelspace faults which result in die(), but not
++ * kernelspace faults which are fixed up. die() gives the
++ * process no chance to handle the signal and notice the
++ * kernel fault information, so that won't result in polluting
++ * the information about previously queued, but not yet
++ * delivered, faults. See also do_general_protection below.
++ */
++ tsk->thread.error_code = error_code;
++ tsk->thread.trap_no = trapnr;
++
+ if (info)
+ force_sig_info(signr, info, tsk);
+ else
+@@ -503,8 +499,11 @@ static void __kprobes do_trap(int trapnr
+ }
+
+ kernel_trap: {
+- if (!fixup_exception(regs))
++ if (!fixup_exception(regs)) {
++ tsk->thread.error_code = error_code;
++ tsk->thread.trap_no = trapnr;
+ die(str, regs, error_code);
++ }
+ return;
+ }
+
+@@ -578,9 +577,6 @@ DO_ERROR_INFO(32, SIGSEGV, "iret excepti
+ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
+ long error_code)
+ {
+- current->thread.error_code = error_code;
+- current->thread.trap_no = 13;
+-
+ if (regs->eflags & VM_MASK)
+ goto gp_in_vm86;
+
+@@ -599,6 +595,8 @@ gp_in_vm86:
+
+ gp_in_kernel:
+ if (!fixup_exception(regs)) {
++ current->thread.error_code = error_code;
++ current->thread.trap_no = 13;
+ if (notify_die(DIE_GPF, "general protection fault", regs,
+ error_code, 13, SIGSEGV) == NOTIFY_STOP)
+ return;
+@@ -987,9 +985,7 @@ fastcall void do_spurious_interrupt_bug(
+ fastcall unsigned long patch_espfix_desc(unsigned long uesp,
+ unsigned long kesp)
+ {
+- int cpu = smp_processor_id();
+- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+- struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
++ struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
+ unsigned long base = (kesp - uesp) & -THREAD_SIZE;
+ unsigned long new_kesp = kesp - base;
+ unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
+Index: 10.3-2007-11-26/arch/i386/mm/fault-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/fault-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/mm/fault-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -14,19 +14,20 @@
+ #include <linux/mman.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
+-#include <linux/smp_lock.h>
+ #include <linux/interrupt.h>
+ #include <linux/init.h>
+ #include <linux/tty.h>
+ #include <linux/vt_kern.h> /* For unblank_screen() */
+ #include <linux/highmem.h>
++#include <linux/bootmem.h> /* for max_low_pfn */
++#include <linux/vmalloc.h>
+ #include <linux/module.h>
+ #include <linux/kprobes.h>
+ #include <linux/uaccess.h>
++#include <linux/kdebug.h>
+
+ #include <asm/system.h>
+ #include <asm/desc.h>
+-#include <asm/kdebug.h>
+ #include <asm/segment.h>
+
+ extern void die(const char *,struct pt_regs *,long);
+@@ -259,25 +260,20 @@ static void dump_fault_path(unsigned lon
+ unsigned long page;
+
+ page = read_cr3();
+- page = ((unsigned long *) __va(page))[address >> 22];
+- if (oops_may_print())
+- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
+- machine_to_phys(page));
++ page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT];
++ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
++ machine_to_phys(page));
+ /*
+ * We must not directly access the pte in the highpte
+ * case if the page table is located in highmem.
+ * And lets rather not kmap-atomic the pte, just in case
+ * it's allocated already.
+ */
+-#ifdef CONFIG_HIGHPTE
+- if ((page >> PAGE_SHIFT) >= highstart_pfn)
+- return;
+-#endif
+- if ((page & 1) && oops_may_print()) {
+- page &= PAGE_MASK;
+- address &= 0x003ff000;
+- page = machine_to_phys(page);
+- page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
++ if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn
++ && (page & _PAGE_PRESENT)) {
++ page = machine_to_phys(page & PAGE_MASK);
++ page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT)
++ & (PTRS_PER_PTE - 1)];
+ printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
+ machine_to_phys(page));
+ }
+@@ -581,6 +577,11 @@ bad_area:
+ bad_area_nosemaphore:
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & 4) {
++ /*
++ * It's possible to have interrupts off here.
++ */
++ local_irq_enable();
++
+ /*
+ * Valid to do another page fault here because this one came
+ * from user space.
+@@ -633,7 +634,7 @@ no_context:
+ bust_spinlocks(1);
+
+ if (oops_may_print()) {
+- #ifdef CONFIG_X86_PAE
++#ifdef CONFIG_X86_PAE
+ if (error_code & 16) {
+ pte_t *pte = lookup_address(address);
+
+@@ -642,7 +643,7 @@ no_context:
+ "NX-protected page - exploit attempt? "
+ "(uid: %d)\n", current->uid);
+ }
+- #endif
++#endif
+ if (address < PAGE_SIZE)
+ printk(KERN_ALERT "BUG: unable to handle kernel NULL "
+ "pointer dereference");
+@@ -652,8 +653,8 @@ no_context:
+ printk(" at virtual address %08lx\n",address);
+ printk(KERN_ALERT " printing eip:\n");
+ printk("%08lx\n", regs->eip);
++ dump_fault_path(address);
+ }
+- dump_fault_path(address);
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+@@ -694,7 +695,6 @@ do_sigbus:
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+ }
+
+-#if !HAVE_SHARED_KERNEL_PMD
+ void vmalloc_sync_all(void)
+ {
+ /*
+@@ -710,6 +710,9 @@ void vmalloc_sync_all(void)
+ static unsigned long start = TASK_SIZE;
+ unsigned long address;
+
++ if (SHARED_KERNEL_PMD)
++ return;
++
+ BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
+ for (address = start;
+ address >= TASK_SIZE && address < hypervisor_virt_start;
+@@ -742,4 +745,3 @@ void vmalloc_sync_all(void)
+ start = address + (1UL << PMD_SHIFT);
+ }
+ }
+-#endif
+Index: 10.3-2007-11-26/arch/i386/mm/highmem-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/highmem-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/mm/highmem-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -26,7 +26,7 @@ void kunmap(struct page *page)
+ * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * kmaps are appropriate for short, tight code paths only.
+ */
+-static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot)
++void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+ {
+ enum fixed_addresses idx;
+ unsigned long vaddr;
+@@ -49,15 +49,7 @@ static void *__kmap_atomic(struct page *
+
+ void *kmap_atomic(struct page *page, enum km_type type)
+ {
+- return __kmap_atomic(page, type, kmap_prot);
+-}
+-
+-/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */
+-void *kmap_atomic_pte(struct page *page, enum km_type type)
+-{
+- return __kmap_atomic(page, type,
+- test_bit(PG_pinned, &page->flags)
+- ? PAGE_KERNEL_RO : kmap_prot);
++ return kmap_atomic_prot(page, type, kmap_prot);
+ }
+
+ void kunmap_atomic(void *kvaddr, enum km_type type)
+@@ -80,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km
+ #endif
+ }
+
++ arch_flush_lazy_mmu_mode();
+ pagefault_enable();
+ }
+
+@@ -117,6 +110,5 @@ struct page *kmap_atomic_to_page(void *p
+ EXPORT_SYMBOL(kmap);
+ EXPORT_SYMBOL(kunmap);
+ EXPORT_SYMBOL(kmap_atomic);
+-EXPORT_SYMBOL(kmap_atomic_pte);
+ EXPORT_SYMBOL(kunmap_atomic);
+ EXPORT_SYMBOL(kmap_atomic_to_page);
+Index: 10.3-2007-11-26/arch/i386/mm/init-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/init-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/mm/init-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -22,6 +22,7 @@
+ #include <linux/init.h>
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
++#include <linux/pfn.h>
+ #include <linux/poison.h>
+ #include <linux/bootmem.h>
+ #include <linux/slab.h>
+@@ -67,17 +68,19 @@ static pmd_t * __init one_md_table_init(
+ pmd_t *pmd_table;
+
+ #ifdef CONFIG_X86_PAE
+- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+- paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
+- make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
+- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+- pud = pud_offset(pgd, 0);
+- if (pmd_table != pmd_offset(pud, 0))
+- BUG();
+-#else
++ if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
++ pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
++
++ paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
++ make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
++ set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
++ pud = pud_offset(pgd, 0);
++ if (pmd_table != pmd_offset(pud, 0))
++ BUG();
++ }
++#endif
+ pud = pud_offset(pgd, 0);
+ pmd_table = pmd_offset(pud, 0);
+-#endif
+
+ return pmd_table;
+ }
+@@ -88,16 +91,18 @@ static pmd_t * __init one_md_table_init(
+ */
+ static pte_t * __init one_page_table_init(pmd_t *pmd)
+ {
++#if CONFIG_XEN_COMPAT <= 0x030002
+ if (pmd_none(*pmd)) {
++#else
++ if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
++#endif
+ pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
++
+ paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
+ make_lowmem_page_readonly(page_table,
+ XENFEAT_writable_page_tables);
+ set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+- if (page_table != pte_offset_kernel(pmd, 0))
+- BUG();
+-
+- return page_table;
++ BUG_ON(page_table != pte_offset_kernel(pmd, 0));
+ }
+
+ return pte_offset_kernel(pmd, 0);
+@@ -117,7 +122,6 @@ static pte_t * __init one_page_table_ini
+ static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+ {
+ pgd_t *pgd;
+- pud_t *pud;
+ pmd_t *pmd;
+ int pgd_idx, pmd_idx;
+ unsigned long vaddr;
+@@ -128,12 +132,10 @@ static void __init page_table_range_init
+ pgd = pgd_base + pgd_idx;
+
+ for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
+- if (pgd_none(*pgd))
+- one_md_table_init(pgd);
+- pud = pud_offset(pgd, vaddr);
+- pmd = pmd_offset(pud, vaddr);
++ pmd = one_md_table_init(pgd);
++ pmd = pmd + pmd_index(vaddr);
+ for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
+- if (vaddr < hypervisor_virt_start && pmd_none(*pmd))
++ if (vaddr < hypervisor_virt_start)
+ one_page_table_init(pmd);
+
+ vaddr += PMD_SIZE;
+@@ -196,24 +198,25 @@ static void __init kernel_physical_mappi
+ /* Map with big pages if possible, otherwise create normal page tables. */
+ if (cpu_has_pse) {
+ unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
+-
+ if (is_kernel_text(address) || is_kernel_text(address2))
+ set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
+ else
+ set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
++
+ pfn += PTRS_PER_PTE;
+ } else {
+ pte = one_page_table_init(pmd);
+
+- pte += pte_ofs;
+- for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
+- /* XEN: Only map initial RAM allocation. */
+- if ((pfn >= max_ram_pfn) || pte_present(*pte))
+- continue;
+- if (is_kernel_text(address))
+- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+- else
+- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
++ for (pte += pte_ofs;
++ pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
++ pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
++ /* XEN: Only map initial RAM allocation. */
++ if ((pfn >= max_ram_pfn) || pte_present(*pte))
++ continue;
++ if (is_kernel_text(address))
++ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
++ else
++ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+ }
+ pte_ofs = 0;
+ }
+@@ -383,18 +386,46 @@ extern void __init remap_numa_kva(void);
+
+ pgd_t *swapper_pg_dir;
+
++static void __init xen_pagetable_setup_start(pgd_t *base)
++{
++ swapper_pg_dir = base;
++ init_mm.pgd = base;
++}
++
++static void __init xen_pagetable_setup_done(pgd_t *base)
++{
++}
++
++/*
++ * Build a proper pagetable for the kernel mappings. Up until this
++ * point, we've been running on some set of pagetables constructed by
++ * the boot process.
++ *
++ * If we're booting on native hardware, this will be a pagetable
++ * constructed in arch/i386/kernel/head.S, and not running in PAE mode
++ * (even if we'll end up running in PAE). The root of the pagetable
++ * will be swapper_pg_dir.
++ *
++ * If we're booting paravirtualized under a hypervisor, then there are
++ * more options: we may already be running PAE, and the pagetable may
++ * or may not be based in swapper_pg_dir. In any case,
++ * paravirt_pagetable_setup_start() will set up swapper_pg_dir
++ * appropriately for the rest of the initialization to work.
++ *
++ * In general, pagetable_init() assumes that the pagetable may already
++ * be partially populated, and so it avoids stomping on any existing
++ * mappings.
++ */
+ static void __init pagetable_init (void)
+ {
+- unsigned long vaddr;
++ unsigned long vaddr, end;
+ pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
+
+- swapper_pg_dir = pgd_base;
+- init_mm.pgd = pgd_base;
++ xen_pagetable_setup_start(pgd_base);
+
+ /* Enable PSE if available */
+- if (cpu_has_pse) {
++ if (cpu_has_pse)
+ set_in_cr4(X86_CR4_PSE);
+- }
+
+ /* Enable PGE if available */
+ if (cpu_has_pge) {
+@@ -411,9 +442,12 @@ static void __init pagetable_init (void)
+ * created - mappings will be set by set_fixmap():
+ */
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+- page_table_range_init(vaddr, hypervisor_virt_start, pgd_base);
++ end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
++ page_table_range_init(vaddr, end, pgd_base);
+
+ permanent_kmaps_init(pgd_base);
++
++ xen_pagetable_setup_done(pgd_base);
+ }
+
+ #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
+@@ -764,34 +798,29 @@ int remove_memory(u64 start, u64 size)
+ EXPORT_SYMBOL_GPL(remove_memory);
+ #endif
+
+-struct kmem_cache *pgd_cache;
+ struct kmem_cache *pmd_cache;
+
+ void __init pgtable_cache_init(void)
+ {
++ size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
++
+ if (PTRS_PER_PMD > 1) {
+ pmd_cache = kmem_cache_create("pmd",
+ PTRS_PER_PMD*sizeof(pmd_t),
+ PTRS_PER_PMD*sizeof(pmd_t),
+- 0,
++ SLAB_PANIC,
+ pmd_ctor,
+ NULL);
+- if (!pmd_cache)
+- panic("pgtable_cache_init(): cannot create pmd cache");
++ if (!SHARED_KERNEL_PMD) {
++ /* If we're in PAE mode and have a non-shared
++ kernel pmd, then the pgd size must be a
++ page size. This is because the pgd_list
++ links through the page structure, so there
++ can only be one pgd per page for this to
++ work. */
++ pgd_size = PAGE_SIZE;
++ }
+ }
+- pgd_cache = kmem_cache_create("pgd",
+-#ifndef CONFIG_XEN
+- PTRS_PER_PGD*sizeof(pgd_t),
+- PTRS_PER_PGD*sizeof(pgd_t),
+-#else
+- PAGE_SIZE,
+- PAGE_SIZE,
+-#endif
+- 0,
+- pgd_ctor,
+- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+- if (!pgd_cache)
+- panic("pgtable_cache_init(): Cannot create pgd cache");
+ }
+
+ /*
+@@ -825,13 +854,26 @@ static int noinline do_test_wp_bit(void)
+
+ void mark_rodata_ro(void)
+ {
+- unsigned long addr = (unsigned long)__start_rodata;
+-
+- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
+- change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
++ unsigned long start = PFN_ALIGN(_text);
++ unsigned long size = PFN_ALIGN(_etext) - start;
+
+- printk("Write protecting the kernel read-only data: %uk\n",
+- (__end_rodata - __start_rodata) >> 10);
++#ifndef CONFIG_KPROBES
++#ifdef CONFIG_HOTPLUG_CPU
++ /* It must still be possible to apply SMP alternatives. */
++ if (num_possible_cpus() <= 1)
++#endif
++ {
++ change_page_attr(virt_to_page(start),
++ size >> PAGE_SHIFT, PAGE_KERNEL_RX);
++ printk("Write protecting the kernel text: %luk\n", size >> 10);
++ }
++#endif
++ start += size;
++ size = (unsigned long)__end_rodata - start;
++ change_page_attr(virt_to_page(start),
++ size >> PAGE_SHIFT, PAGE_KERNEL_RO);
++ printk("Write protecting the kernel read-only data: %luk\n",
++ size >> 10);
+
+ /*
+ * change_page_attr() requires a global_flush_tlb() call after it.
+@@ -854,7 +896,7 @@ void free_init_pages(char *what, unsigne
+ free_page(addr);
+ totalram_pages++;
+ }
+- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
++ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+ }
+
+ void free_initmem(void)
+Index: 10.3-2007-11-26/arch/i386/mm/ioremap-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/ioremap-xen.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/mm/ioremap-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -13,6 +13,7 @@
+ #include <linux/slab.h>
+ #include <linux/module.h>
+ #include <linux/io.h>
++#include <linux/sched.h>
+ #include <asm/fixmap.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+Index: 10.3-2007-11-26/arch/i386/mm/pgtable-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/pgtable-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/mm/pgtable-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -13,6 +13,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/spinlock.h>
+ #include <linux/module.h>
++#include <linux/quicklist.h>
+
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -292,8 +293,6 @@ void pmd_ctor(void *pmd, struct kmem_cac
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+- * The locking scheme was chosen on the basis of manfred's
+- * recommendations and having no core impact whatsoever.
+ * -- wli
+ */
+ DEFINE_SPINLOCK(pgd_lock);
+@@ -319,37 +318,60 @@ static inline void pgd_list_del(pgd_t *p
+ set_page_private(next, (unsigned long)pprev);
+ }
+
+-void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
++
++
++#if (PTRS_PER_PMD == 1)
++/* Non-PAE pgd constructor */
++void pgd_ctor(void *pgd)
+ {
+ unsigned long flags;
+
+- if (PTRS_PER_PMD > 1) {
+- if (HAVE_SHARED_KERNEL_PMD)
+- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+- swapper_pg_dir + USER_PTRS_PER_PGD,
+- KERNEL_PGD_PTRS);
+- } else {
+- spin_lock_irqsave(&pgd_lock, flags);
++ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
++
++ spin_lock_irqsave(&pgd_lock, flags);
++
++ /* must happen under lock */
++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
++ swapper_pg_dir + USER_PTRS_PER_PGD,
++ KERNEL_PGD_PTRS);
++
++ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
++ __pa(swapper_pg_dir) >> PAGE_SHIFT,
++ USER_PTRS_PER_PGD,
++ KERNEL_PGD_PTRS);
++ pgd_list_add(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++}
++#else /* PTRS_PER_PMD > 1 */
++/* PAE pgd constructor */
++void pgd_ctor(void *pgd)
++{
++ /* PAE, kernel PMD may be shared */
++
++ if (SHARED_KERNEL_PMD) {
+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
+- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+-
+- /* must happen under lock */
+- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+- __pa(swapper_pg_dir) >> PAGE_SHIFT,
+- USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
++#ifndef CONFIG_XEN
++ } else {
++ unsigned long flags;
+
++ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
++ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
++#endif
+ }
+ }
++#endif /* PTRS_PER_PMD */
+
+-/* never called when PTRS_PER_PMD > 1 */
+-void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
++void pgd_dtor(void *pgd)
+ {
+ unsigned long flags; /* can be called from interrupt context */
+
++ if (SHARED_KERNEL_PMD)
++ return;
++
+ paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+@@ -358,11 +380,46 @@ void pgd_dtor(void *pgd, struct kmem_cac
+ pgd_test_and_unpin(pgd);
+ }
+
++#define UNSHARED_PTRS_PER_PGD \
++ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
++
++/* If we allocate a pmd for part of the kernel address space, then
++ make sure its initialized with the appropriate kernel mappings.
++ Otherwise use a cached zeroed pmd. */
++static pmd_t *pmd_cache_alloc(int idx)
++{
++ pmd_t *pmd;
++
++ if (idx >= USER_PTRS_PER_PGD) {
++ pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
++
++#ifndef CONFIG_XEN
++ if (pmd)
++ memcpy(pmd,
++ (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
++ sizeof(pmd_t) * PTRS_PER_PMD);
++#endif
++ } else
++ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
++
++ return pmd;
++}
++
++static void pmd_cache_free(pmd_t *pmd, int idx)
++{
++ if (idx >= USER_PTRS_PER_PGD) {
++ make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables);
++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
++ free_page((unsigned long)pmd);
++ } else
++ kmem_cache_free(pmd_cache, pmd);
++}
++
+ pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ int i;
+- pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+- pmd_t **pmd;
++ pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
++ pmd_t **pmds = NULL;
+ unsigned long flags;
+
+ pgd_test_and_unpin(pgd);
+@@ -370,37 +427,40 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ if (PTRS_PER_PMD == 1 || !pgd)
+ return pgd;
+
+- if (HAVE_SHARED_KERNEL_PMD) {
+- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+- if (!pmd)
+- goto out_oom;
+- paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
+- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
++#ifdef CONFIG_XEN
++ if (!SHARED_KERNEL_PMD) {
++ /*
++ * We can race save/restore (if we sleep during a GFP_KERNEL memory
++ * allocation). We therefore store virtual addresses of pmds as they
++ * do not change across save/restore, and poke the machine addresses
++ * into the pgdir under the pgd_lock.
++ */
++ pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
++ if (!pmds) {
++ quicklist_free(0, pgd_dtor, pgd);
++ return NULL;
+ }
+- return pgd;
+- }
+-
+- /*
+- * We can race save/restore (if we sleep during a GFP_KERNEL memory
+- * allocation). We therefore store virtual addresses of pmds as they
+- * do not change across save/restore, and poke the machine addresses
+- * into the pgdir under the pgd_lock.
+- */
+- pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
+- if (!pmd) {
+- kmem_cache_free(pgd_cache, pgd);
+- return NULL;
+ }
++#endif
+
+ /* Allocate pmds, remember virtual addresses. */
+- for (i = 0; i < PTRS_PER_PGD; ++i) {
+- pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+- if (!pmd[i])
++ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
++ pmd_t *pmd = pmd_cache_alloc(i);
++
++ if (!pmd)
+ goto out_oom;
++
+ paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
++ if (pmds)
++ pmds[i] = pmd;
++ else
++ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ }
+
++#ifdef CONFIG_XEN
++ if (SHARED_KERNEL_PMD)
++ return pgd;
++
+ spin_lock_irqsave(&pgd_lock, flags);
+
+ /* Protect against save/restore: move below 4GB under pgd_lock. */
+@@ -419,40 +479,41 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ pgd_t *kpgd = pgd_offset_k(v);
+ pud_t *kpud = pud_offset(kpgd, v);
+ pmd_t *kpmd = pmd_offset(kpud, v);
+- memcpy(pmd[i], kpmd, PAGE_SIZE);
++ memcpy(pmds[i], kpmd, PAGE_SIZE);
+ make_lowmem_page_readonly(
+- pmd[i], XENFEAT_writable_page_tables);
++ pmds[i], XENFEAT_writable_page_tables);
+ }
+
+ /* It is safe to poke machine addresses of pmds under the pmd_lock. */
+ for (i = 0; i < PTRS_PER_PGD; i++)
+- set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
++ set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i])));
+
+ /* Ensure this pgd gets picked up and pinned on save/restore. */
+ pgd_list_add(pgd);
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+- kfree(pmd);
++ kfree(pmds);
++#endif
+
+ return pgd;
+
+ out_oom:
+- if (HAVE_SHARED_KERNEL_PMD) {
++ if (!pmds) {
+ for (i--; i >= 0; i--) {
+ pgd_t pgdent = pgd[i];
+ void* pmd = (void *)__va(pgd_val(pgdent)-1);
+ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+- kmem_cache_free(pmd_cache, pmd);
++ pmd_cache_free(pmd, i);
+ }
+ } else {
+ for (i--; i >= 0; i--) {
+- paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT);
+- kmem_cache_free(pmd_cache, pmd[i]);
++ paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT);
++ pmd_cache_free(pmds[i], i);
+ }
+- kfree(pmd);
++ kfree(pmds);
+ }
+- kmem_cache_free(pgd_cache, pgd);
++ quicklist_free(0, pgd_dtor, pgd);
+ return NULL;
+ }
+
+@@ -472,35 +533,31 @@ void pgd_free(pgd_t *pgd)
+
+ /* in the PAE case user pgd entries are overwritten before usage */
+ if (PTRS_PER_PMD > 1) {
+- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+- pgd_t pgdent = pgd[i];
+- void* pmd = (void *)__va(pgd_val(pgdent)-1);
+- paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+- kmem_cache_free(pmd_cache, pmd);
+- }
+-
+- if (!HAVE_SHARED_KERNEL_PMD) {
++ if (!SHARED_KERNEL_PMD) {
+ unsigned long flags;
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
++ }
+
+- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
+- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
+- make_lowmem_page_writable(
+- pmd, XENFEAT_writable_page_tables);
+- memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
+- kmem_cache_free(pmd_cache, pmd);
+- }
+-
+- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
+- xen_destroy_contiguous_region(
+- (unsigned long)pgd, 0);
++ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
++ pgd_t pgdent = pgd[i];
++ void* pmd = (void *)__va(pgd_val(pgdent)-1);
++ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
++ pmd_cache_free(pmd, i);
+ }
++
++ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
++ xen_destroy_contiguous_region((unsigned long)pgd, 0);
+ }
+
+ /* in the non-PAE case, free_pgtables() clears user pgd entries */
+- kmem_cache_free(pgd_cache, pgd);
++ quicklist_free(0, pgd_dtor, pgd);
++}
++
++void check_pgt_cache(void)
++{
++ quicklist_trim(0, pgd_dtor, 25, 16);
+ }
+
+ void make_lowmem_page_readonly(void *va, unsigned int feature)
+@@ -719,13 +776,13 @@ void mm_pin_all(void)
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+
+-void _arch_dup_mmap(struct mm_struct *mm)
++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+ if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
+ mm_pin(mm);
+ }
+
+-void _arch_exit_mmap(struct mm_struct *mm)
++void arch_exit_mmap(struct mm_struct *mm)
+ {
+ struct task_struct *tsk = current;
+
+Index: 10.3-2007-11-26/arch/x86_64/Kconfig
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/Kconfig 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/Kconfig 2007-10-22 13:58:57.000000000 +0200
+@@ -599,7 +599,7 @@ config CRASH_DUMP
+
+ config RELOCATABLE
+ bool "Build a relocatable kernel(EXPERIMENTAL)"
+- depends on EXPERIMENTAL
++ depends on EXPERIMENTAL && !X86_64_XEN
+ help
+ Builds a relocatable kernel. This enables loading and running
+ a kernel binary from a different physical address than it has
+@@ -736,7 +736,7 @@ menu "Bus options (PCI etc.)"
+
+ config PCI
+ bool "PCI support"
+- select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
++ select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC && !X86_64_XEN)
+
+ # x86-64 doesn't support PCI BIOS access from long mode so always go direct.
+ config PCI_DIRECT
+Index: 10.3-2007-11-26/arch/x86_64/ia32/ia32entry-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/ia32/ia32entry-xen.S 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/ia32/ia32entry-xen.S 2007-10-22 13:58:57.000000000 +0200
+@@ -508,11 +508,7 @@ ia32_sys_call_table:
+ .quad sys_symlink
+ .quad sys_lstat
+ .quad sys_readlink /* 85 */
+-#ifdef CONFIG_IA32_AOUT
+ .quad sys_uselib
+-#else
+- .quad quiet_ni_syscall
+-#endif
+ .quad sys_swapon
+ .quad sys_reboot
+ .quad compat_sys_old_readdir
+@@ -651,7 +647,7 @@ ia32_sys_call_table:
+ .quad quiet_ni_syscall /* tux */
+ .quad quiet_ni_syscall /* security */
+ .quad sys_gettid
+- .quad sys_readahead /* 225 */
++ .quad sys32_readahead /* 225 */
+ .quad sys_setxattr
+ .quad sys_lsetxattr
+ .quad sys_fsetxattr
+@@ -676,7 +672,7 @@ ia32_sys_call_table:
+ .quad compat_sys_io_getevents
+ .quad compat_sys_io_submit
+ .quad sys_io_cancel
+- .quad sys_fadvise64 /* 250 */
++ .quad sys32_fadvise64 /* 250 */
+ .quad quiet_ni_syscall /* free_huge_pages */
+ .quad sys_exit_group
+ .quad sys32_lookup_dcookie
+@@ -740,10 +736,14 @@ ia32_sys_call_table:
+ .quad compat_sys_set_robust_list
+ .quad compat_sys_get_robust_list
+ .quad sys_splice
+- .quad sys_sync_file_range
+- .quad sys_tee
++ .quad sys32_sync_file_range
++ .quad sys_tee /* 315 */
+ .quad compat_sys_vmsplice
+ .quad compat_sys_move_pages
+ .quad sys_getcpu
+ .quad sys_epoll_pwait
+-ia32_syscall_end:
++ .quad compat_sys_utimensat /* 320 */
++ .quad compat_sys_signalfd
++ .quad compat_sys_timerfd
++ .quad sys_eventfd
++ia32_syscall_end:
+Index: 10.3-2007-11-26/arch/x86_64/ia32/syscall32-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/ia32/syscall32-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/ia32/syscall32-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -13,6 +13,7 @@
+ #include <asm/proto.h>
+ #include <asm/tlbflush.h>
+ #include <asm/ia32_unistd.h>
++#include <asm/vsyscall32.h>
+
+ #ifdef USE_INT80
+ extern unsigned char syscall32_int80[], syscall32_int80_end[];
+Index: 10.3-2007-11-26/arch/x86_64/kernel/apic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/apic-xen.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/apic-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -19,7 +19,6 @@
+ #include <linux/mm.h>
+ #include <linux/delay.h>
+ #include <linux/bootmem.h>
+-#include <linux/smp_lock.h>
+ #include <linux/interrupt.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/kernel_stat.h>
+Index: 10.3-2007-11-26/arch/x86_64/kernel/e820-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/e820-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/e820-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -17,6 +17,8 @@
+ #include <linux/kexec.h>
+ #include <linux/module.h>
+ #include <linux/mm.h>
++#include <linux/suspend.h>
++#include <linux/pfn.h>
+
+ #include <asm/pgtable.h>
+ #include <asm/page.h>
+@@ -28,7 +30,7 @@
+
+ struct e820map e820 __initdata;
+ #ifdef CONFIG_XEN
+-struct e820map machine_e820 __initdata;
++struct e820map machine_e820;
+ #endif
+
+ /*
+@@ -105,17 +107,23 @@ static inline int bad_addr(unsigned long
+ return 0;
+ }
+
+-#ifndef CONFIG_XEN
+ /*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+-int __meminit
++int
+ e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
+ {
+ int i;
++#ifndef CONFIG_XEN
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
++#else
++ if (!is_initial_xendomain())
++ return 0;
++ for (i = 0; i < machine_e820.nr_map; i++) {
++ const struct e820entry *ei = &machine_e820.map[i];
++#endif
+ if (type && ei->type != type)
+ continue;
+ if (ei->addr >= end || ei->addr + ei->size <= start)
+@@ -124,7 +132,7 @@ e820_any_mapped(unsigned long start, uns
+ }
+ return 0;
+ }
+-#endif
++EXPORT_SYMBOL_GPL(e820_any_mapped);
+
+ /*
+ * This function checks if the entire range <start,end> is mapped with type.
+@@ -284,22 +292,6 @@ void __init e820_reserve_resources(struc
+ }
+
+ #ifndef CONFIG_XEN
+-/* Mark pages corresponding to given address range as nosave */
+-static void __init
+-e820_mark_nosave_range(unsigned long start, unsigned long end)
+-{
+- unsigned long pfn, max_pfn;
+-
+- if (start >= end)
+- return;
+-
+- printk("Nosave address range: %016lx - %016lx\n", start, end);
+- max_pfn = end >> PAGE_SHIFT;
+- for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
+- if (pfn_valid(pfn))
+- SetPageNosave(pfn_to_page(pfn));
+-}
+-
+ /*
+ * Find the ranges of physical addresses that do not correspond to
+ * e820 RAM areas and mark the corresponding pages as nosave for software
+@@ -318,13 +310,13 @@ void __init e820_mark_nosave_regions(voi
+ struct e820entry *ei = &e820.map[i];
+
+ if (paddr < ei->addr)
+- e820_mark_nosave_range(paddr,
+- round_up(ei->addr, PAGE_SIZE));
++ register_nosave_region(PFN_DOWN(paddr),
++ PFN_UP(ei->addr));
+
+ paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
+ if (ei->type != E820_RAM)
+- e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
+- paddr);
++ register_nosave_region(PFN_UP(ei->addr),
++ PFN_DOWN(paddr));
+
+ if (paddr >= (end_pfn << PAGE_SHIFT))
+ break;
+Index: 10.3-2007-11-26/arch/x86_64/kernel/early_printk-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/early_printk-xen.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/early_printk-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -11,11 +11,10 @@
+
+ #ifdef __i386__
+ #include <asm/setup.h>
+-#define VGABASE (__ISA_IO_base + 0xb8000)
+ #else
+ #include <asm/bootsetup.h>
+-#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
+ #endif
++#define VGABASE (__ISA_IO_base + 0xb8000)
+
+ #ifndef CONFIG_XEN
+ static int max_ypos = 25, max_xpos = 80;
+@@ -93,9 +92,9 @@ static int early_serial_putc(unsigned ch
+ static void early_serial_write(struct console *con, const char *s, unsigned n)
+ {
+ while (*s && n-- > 0) {
+- early_serial_putc(*s);
+ if (*s == '\n')
+ early_serial_putc('\r');
++ early_serial_putc(*s);
+ s++;
+ }
+ }
+@@ -205,7 +204,7 @@ static noinline long simnow(long cmd, lo
+ return ret;
+ }
+
+-void __init simnow_init(char *str)
++static void __init simnow_init(char *str)
+ {
+ char *fn = "klog";
+ if (*str == '=')
+@@ -277,22 +276,12 @@ static int __init setup_early_printk(cha
+ early_console = &simnow_console;
+ keep_early = 1;
+ }
++
++ if (keep_early)
++ early_console->flags &= ~CON_BOOT;
++ else
++ early_console->flags |= CON_BOOT;
+ register_console(early_console);
+ return 0;
+ }
+-
+ early_param("earlyprintk", setup_early_printk);
+-
+-void __init disable_early_printk(void)
+-{
+- if (!early_console_initialized || !early_console)
+- return;
+- if (!keep_early) {
+- printk("disabling early console\n");
+- unregister_console(early_console);
+- early_console_initialized = 0;
+- } else {
+- printk("keeping early console\n");
+- }
+-}
+-
+Index: 10.3-2007-11-26/arch/x86_64/kernel/entry-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/entry-xen.S 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/entry-xen.S 2007-10-22 13:58:57.000000000 +0200
+@@ -1261,3 +1261,10 @@ ENTRY(call_softirq)
+ ret
+ CFI_ENDPROC
+ ENDPROC(call_softirq)
++
++KPROBE_ENTRY(ignore_sysret)
++ CFI_STARTPROC
++ mov $-ENOSYS,%eax
++ sysret
++ CFI_ENDPROC
++ENDPROC(ignore_sysret)
+Index: 10.3-2007-11-26/arch/x86_64/kernel/genapic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/genapic-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/genapic-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -11,123 +11,57 @@
+ #include <linux/threads.h>
+ #include <linux/cpumask.h>
+ #include <linux/string.h>
++#include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/ctype.h>
+ #include <linux/init.h>
+-#include <linux/module.h>
+
+ #include <asm/smp.h>
+ #include <asm/ipi.h>
++#include <asm/genapic.h>
+
+-#if defined(CONFIG_ACPI)
++#ifdef CONFIG_ACPI
+ #include <acpi/acpi_bus.h>
+ #endif
+
+ /* which logical CPU number maps to which CPU (physical APIC ID) */
+-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
++u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
++ = { [0 ... NR_CPUS-1] = BAD_APICID };
+ EXPORT_SYMBOL(x86_cpu_to_apicid);
+-u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+-extern struct genapic apic_cluster;
+-extern struct genapic apic_flat;
+-extern struct genapic apic_physflat;
++u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+ #ifndef CONFIG_XEN
+-struct genapic *genapic = &apic_flat;
+-struct genapic *genapic_force;
++struct genapic __read_mostly *genapic = &apic_flat;
+ #else
+ extern struct genapic apic_xen;
+-struct genapic *genapic = &apic_xen;
++struct genapic __read_mostly *genapic = &apic_xen;
+ #endif
+
+
+ /*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+-void __init clustered_apic_check(void)
++void __init setup_apic_routing(void)
+ {
+ #ifndef CONFIG_XEN
+- long i;
+- u8 clusters, max_cluster;
+- u8 id;
+- u8 cluster_cnt[NUM_APIC_CLUSTERS];
+- int max_apic = 0;
+-
+- /* genapic selection can be forced because of certain quirks.
+- */
+- if (genapic_force) {
+- genapic = genapic_force;
+- goto print;
+- }
+-
+-#if defined(CONFIG_ACPI)
++#ifdef CONFIG_ACPI
+ /*
+- * Some x86_64 machines use physical APIC mode regardless of how many
+- * procs/clusters are present (x86_64 ES7000 is an example).
++ * Quirk: some x86_64 machines can only use physical APIC mode
++ * regardless of how many processors are present (x86_64 ES7000
++ * is an example).
+ */
+- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID)
+- if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) {
+- genapic = &apic_cluster;
+- goto print;
+- }
+-#endif
+-
+- memset(cluster_cnt, 0, sizeof(cluster_cnt));
+- for (i = 0; i < NR_CPUS; i++) {
+- id = bios_cpu_apicid[i];
+- if (id == BAD_APICID)
+- continue;
+- if (id > max_apic)
+- max_apic = id;
+- cluster_cnt[APIC_CLUSTERID(id)]++;
+- }
+-
+- /* Don't use clustered mode on AMD platforms. */
+- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
++ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
++ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+ genapic = &apic_physflat;
+-#ifndef CONFIG_HOTPLUG_CPU
+- /* In the CPU hotplug case we cannot use broadcast mode
+- because that opens a race when a CPU is removed.
+- Stay at physflat mode in this case.
+- It is bad to do this unconditionally though. Once
+- we have ACPI platform support for CPU hotplug
+- we should detect hotplug capablity from ACPI tables and
+- only do this when really needed. -AK */
+- if (max_apic <= 8)
+- genapic = &apic_flat;
+-#endif
+- goto print;
+- }
+-
+- clusters = 0;
+- max_cluster = 0;
+-
+- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+- if (cluster_cnt[i] > 0) {
+- ++clusters;
+- if (cluster_cnt[i] > max_cluster)
+- max_cluster = cluster_cnt[i];
+- }
+- }
++ else
++#endif
+
+- /*
+- * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
+- * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
+- * else physical mode.
+- * (We don't use lowest priority delivery + HW APIC IRQ steering, so
+- * can ignore the clustered logical case and go straight to physical.)
+- */
+- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
+-#ifdef CONFIG_HOTPLUG_CPU
+- /* Don't use APIC shortcuts in CPU hotplug to avoid races */
+- genapic = &apic_physflat;
+-#else
++ if (cpus_weight(cpu_possible_map) <= 8)
+ genapic = &apic_flat;
+-#endif
+- } else
+- genapic = &apic_cluster;
++ else
++ genapic = &apic_physflat;
+
+-print:
+ #else
+ /* hardcode to xen apic functions */
+ genapic = &apic_xen;
+@@ -135,7 +69,7 @@ print:
+ printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+ }
+
+-/* Same for both flat and clustered. */
++/* Same for both flat and physical. */
+
+ #ifdef CONFIG_XEN
+ extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
+Index: 10.3-2007-11-26/arch/x86_64/kernel/genapic_xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/genapic_xen.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/genapic_xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -21,9 +21,8 @@
+ #include <asm/ipi.h>
+ #else
+ #include <asm/apic.h>
+-#include <asm/apicdef.h>
+-#include <asm/genapic.h>
+ #endif
++#include <asm/genapic.h>
+ #include <xen/evtchn.h>
+
+ DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+Index: 10.3-2007-11-26/arch/x86_64/kernel/head-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/head-xen.S 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/head-xen.S 2007-10-22 13:58:57.000000000 +0200
+@@ -5,6 +5,7 @@
+ * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
+ * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
++ * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Modified for Xen
+ */
+@@ -37,18 +38,14 @@ ENTRY(_start)
+ pushq $0 # fake return address
+ jmp x86_64_start_kernel
+
+-ENTRY(stext)
+-ENTRY(_stext)
++.balign PAGE_SIZE
+
+- $page = 0
+ #define NEXT_PAGE(name) \
+- $page = $page + 1; \
+- .org $page * 0x1000; \
+- phys_##name = $page * 0x1000 + __PHYSICAL_START; \
++ .balign PAGE_SIZE; \
++ phys_##name = . - .bootstrap.text; \
+ ENTRY(name)
+
+ NEXT_PAGE(init_level4_pgt)
+- /* This gets initialized in x86_64_start_kernel */
+ .fill 512,8,0
+
+ /*
+@@ -125,13 +122,13 @@ gdt:
+
+ ENTRY(cpu_gdt_table)
+ .quad 0x0000000000000000 /* NULL descriptor */
++ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
++ .quad 0x00af9b000000ffff /* __KERNEL_CS */
++ .quad 0x00cf93000000ffff /* __KERNEL_DS */
++ .quad 0x00cffb000000ffff /* __USER32_CS */
++ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
++ .quad 0x00affb000000ffff /* __USER_CS */
+ .quad 0x0 /* unused */
+- .quad 0x00af9a000000ffff /* __KERNEL_CS */
+- .quad 0x00cf92000000ffff /* __KERNEL_DS */
+- .quad 0x00cffa000000ffff /* __USER32_CS */
+- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
+- .quad 0x00affa000000ffff /* __USER_CS */
+- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
+ .quad 0,0 /* TSS */
+ .quad 0,0 /* LDT */
+ .quad 0,0,0 /* three TLS descriptors */
+@@ -154,14 +151,11 @@ ENTRY(empty_zero_page)
+ * __xen_guest information
+ */
+ .macro utoh value
+- .if (\value) < 0 || (\value) >= 0x10
+- utoh (((\value)>>4)&0x0fffffffffffffff)
+- .endif
+- .if ((\value) & 0xf) < 10
+- .byte '0' + ((\value) & 0xf)
+- .else
+- .byte 'A' + ((\value) & 0xf) - 10
+- .endif
++ i = 64
++ .rept 16
++ i = i - 4
++ .byte '0' + ((((\value) >> i) & 0xf) > 9) * ('0' - 'A' + 10) + (((\value) >> i) & 0xf)
++ .endr
+ .endm
+
+ .section __xen_guest
+Index: 10.3-2007-11-26/arch/x86_64/kernel/head64-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/head64-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/head64-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -22,13 +22,21 @@
+ #include <asm/setup.h>
+ #include <asm/desc.h>
+ #include <asm/pgtable.h>
++#include <asm/tlbflush.h>
+ #include <asm/sections.h>
+
+ unsigned long start_pfn;
+
++#ifndef CONFIG_XEN
++static void __init zap_identity_mappings(void)
++{
++ pgd_t *pgd = pgd_offset_k(0UL);
++ pgd_clear(pgd);
++ __flush_tlb();
++}
++
+ /* Don't add a printk in there. printk relies on the PDA which is not initialized
+ yet. */
+-#if 0
+ static void __init clear_bss(void)
+ {
+ memset(__bss_start, 0,
+@@ -37,7 +45,7 @@ static void __init clear_bss(void)
+ #endif
+
+ #define NEW_CL_POINTER 0x228 /* Relative to real mode data */
+-#define OLD_CL_MAGIC_ADDR 0x90020
++#define OLD_CL_MAGIC_ADDR 0x20
+ #define OLD_CL_MAGIC 0xA33F
+ #define OLD_CL_BASE_ADDR 0x90000
+ #define OLD_CL_OFFSET 0x90022
+@@ -45,18 +53,18 @@ static void __init clear_bss(void)
+ static void __init copy_bootdata(char *real_mode_data)
+ {
+ #ifndef CONFIG_XEN
+- int new_data;
++ unsigned long new_data;
+ char * command_line;
+
+ memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
+- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
++ new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
+ if (!new_data) {
+- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
++ if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
+ return;
+ }
+- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
++ new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
+ }
+- command_line = (char *) ((u64)(new_data));
++ command_line = __va(new_data);
+ memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
+ #else
+ int max_cmdline;
+@@ -98,10 +106,13 @@ void __init x86_64_start_kernel(char * r
+ while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
+ machine_to_phys_order++;
+
+-#if 0
++#ifndef CONFIG_XEN
+ /* clear bss before set_intr_gate with early_idt_handler */
+ clear_bss();
+
++ /* Make NULL pointers segfault */
++ zap_identity_mappings();
++
+ for (i = 0; i < IDT_ENTRIES; i++)
+ set_intr_gate(i, early_idt_handler);
+ asm volatile("lidt %0" :: "m" (idt_descr));
+@@ -113,7 +124,7 @@ void __init x86_64_start_kernel(char * r
+ cpu_pda(i) = &boot_cpu_pda[i];
+
+ pda_init(0);
+- copy_bootdata(real_mode_data);
++ copy_bootdata(__va(real_mode_data));
+ #ifdef CONFIG_SMP
+ cpu_set(0, cpu_online_map);
+ #endif
+Index: 10.3-2007-11-26/arch/x86_64/kernel/io_apic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/io_apic-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/io_apic-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -25,7 +25,6 @@
+ #include <linux/init.h>
+ #include <linux/delay.h>
+ #include <linux/sched.h>
+-#include <linux/smp_lock.h>
+ #include <linux/pci.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/acpi.h>
+@@ -897,10 +896,6 @@ static void __init setup_ExtINT_IRQ0_pin
+ enable_8259A_irq(0);
+ }
+
+-void __init UNEXPECTED_IO_APIC(void)
+-{
+-}
+-
+ void __apicdebuginit print_IO_APIC(void)
+ {
+ int apic, i;
+@@ -936,40 +931,16 @@ void __apicdebuginit print_IO_APIC(void)
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
+- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+- UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
+- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+- (reg_01.bits.entries != 0x2E) &&
+- (reg_01.bits.entries != 0x3F) &&
+- (reg_01.bits.entries != 0x03)
+- )
+- UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
+- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+- (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
+- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
+- )
+- UNEXPECTED_IO_APIC();
+- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+- UNEXPECTED_IO_APIC();
+
+ if (reg_01.bits.version >= 0x10) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
+- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+- UNEXPECTED_IO_APIC();
+ }
+
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
+@@ -1408,8 +1379,7 @@ static void irq_complete_move(unsigned i
+
+ vector = ~get_irq_regs()->orig_rax;
+ me = smp_processor_id();
+- if ((vector == cfg->vector) &&
+- cpu_isset(smp_processor_id(), cfg->domain)) {
++ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
+ cpumask_t cleanup_mask;
+
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+@@ -1444,7 +1414,7 @@ static void ack_apic_level(unsigned int
+
+ /*
+ * We must acknowledge the irq before we move it or the acknowledge will
+- * not propogate properly.
++ * not propagate properly.
+ */
+ ack_APIC_irq();
+
+@@ -1527,6 +1497,7 @@ static void ack_lapic_irq (unsigned int
+ static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+ static struct hw_interrupt_type lapic_irq_type __read_mostly = {
++ .name = "local-APIC",
+ .typename = "local-APIC-edge",
+ .startup = NULL, /* startup_irq() not used for IRQ0 */
+ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
+@@ -1996,18 +1967,18 @@ int arch_setup_msi_irq(struct pci_dev *d
+ if (irq < 0)
+ return irq;
+
+- set_irq_msi(irq, desc);
+ ret = msi_compose_msg(dev, irq, &msg);
+ if (ret < 0) {
+ destroy_irq(irq);
+ return ret;
+ }
+
++ set_irq_msi(irq, desc);
+ write_msi_msg(irq, &msg);
+
+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+- return irq;
++ return 0;
+ }
+
+ void arch_teardown_msi_irq(unsigned int irq)
+Index: 10.3-2007-11-26/arch/x86_64/kernel/ioport-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/ioport-xen.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/ioport-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -13,10 +13,10 @@
+ #include <linux/ioport.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
+-#include <linux/smp_lock.h>
+ #include <linux/stddef.h>
+ #include <linux/slab.h>
+ #include <linux/thread_info.h>
++#include <linux/syscalls.h>
+ #include <xen/interface/physdev.h>
+
+ /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+Index: 10.3-2007-11-26/arch/x86_64/kernel/irq-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/irq-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/irq-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -32,7 +32,7 @@ atomic_t irq_err_count;
+ */
+ static inline void stack_overflow_check(struct pt_regs *regs)
+ {
+- u64 curbase = (u64) current->thread_info;
++ u64 curbase = (u64)task_stack_page(current);
+ static unsigned long warned = -60*HZ;
+
+ if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
+@@ -145,17 +145,43 @@ void fixup_irqs(cpumask_t map)
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ cpumask_t mask;
++ int break_affinity = 0;
++ int set_affinity = 1;
++
+ if (irq == 2)
+ continue;
+
++ /* interrupt's are disabled at this point */
++ spin_lock(&irq_desc[irq].lock);
++
++ if (!irq_has_action(irq) ||
++ cpus_equal(irq_desc[irq].affinity, map)) {
++ spin_unlock(&irq_desc[irq].lock);
++ continue;
++ }
++
+ cpus_and(mask, irq_desc[irq].affinity, map);
+- if (any_online_cpu(mask) == NR_CPUS) {
+- printk("Breaking affinity for irq %i\n", irq);
++ if (cpus_empty(mask)) {
++ break_affinity = 1;
+ mask = map;
+ }
++
++ if (irq_desc[irq].chip->mask)
++ irq_desc[irq].chip->mask(irq);
++
+ if (irq_desc[irq].chip->set_affinity)
+ irq_desc[irq].chip->set_affinity(irq, mask);
+- else if (irq_desc[irq].action && !(warned++))
++ else if (!(warned++))
++ set_affinity = 0;
++
++ if (irq_desc[irq].chip->unmask)
++ irq_desc[irq].chip->unmask(irq);
++
++ spin_unlock(&irq_desc[irq].lock);
++
++ if (break_affinity && set_affinity)
++ printk("Broke affinity for irq %i\n", irq);
++ else if (!set_affinity)
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+Index: 10.3-2007-11-26/arch/x86_64/kernel/ldt-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/ldt-xen.c 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/ldt-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -13,7 +13,6 @@
+ #include <linux/string.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
+-#include <linux/smp_lock.h>
+ #include <linux/vmalloc.h>
+ #include <linux/slab.h>
+
+Index: 10.3-2007-11-26/arch/x86_64/kernel/mpparse-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/mpparse-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/mpparse-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -17,7 +17,6 @@
+ #include <linux/init.h>
+ #include <linux/delay.h>
+ #include <linux/bootmem.h>
+-#include <linux/smp_lock.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/acpi.h>
+@@ -307,7 +306,7 @@ static int __init smp_read_mpc(struct mp
+ }
+ }
+ }
+- clustered_apic_check();
++ setup_apic_routing();
+ if (!num_processors)
+ printk(KERN_ERR "MPTABLE: no processors registered!\n");
+ return num_processors;
+Index: 10.3-2007-11-26/arch/x86_64/kernel/pci-swiotlb-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/pci-swiotlb-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/pci-swiotlb-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -16,7 +16,7 @@ EXPORT_SYMBOL(swiotlb);
+
+ void swiotlb_init(void);
+
+-struct dma_mapping_ops swiotlb_dma_ops = {
++const struct dma_mapping_ops swiotlb_dma_ops = {
+ #if 0
+ .mapping_error = swiotlb_dma_mapping_error,
+ .alloc_coherent = swiotlb_alloc_coherent,
+Index: 10.3-2007-11-26/arch/x86_64/kernel/process-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/process-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/process-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -39,6 +39,7 @@
+ #include <linux/random.h>
+ #include <linux/notifier.h>
+ #include <linux/kprobes.h>
++#include <linux/kdebug.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -49,7 +50,6 @@
+ #include <asm/mmu_context.h>
+ #include <asm/pda.h>
+ #include <asm/prctl.h>
+-#include <asm/kdebug.h>
+ #include <xen/interface/platform.h>
+ #include <xen/interface/physdev.h>
+ #include <xen/interface/vcpu.h>
+@@ -232,16 +232,18 @@ void __cpuinit select_idle_routine(const
+
+ static int __init idle_setup (char *str)
+ {
+- if (!strncmp(str, "poll", 4)) {
++ if (!strcmp(str, "poll")) {
+ printk("using polling idle threads.\n");
+ pm_idle = poll_idle;
+- }
++ } else if (!strcmp(str, "mwait"))
++ force_mwait = 1;
++ else
++ return -1;
+
+ boot_option_idle_override = 1;
+- return 1;
++ return 0;
+ }
+-
+-__setup("idle=", idle_setup);
++early_param("idle", idle_setup);
+
+ /* Prints also some state that isn't saved in the pt_regs */
+ void __show_regs(struct pt_regs * regs)
+@@ -532,7 +534,7 @@ __switch_to(struct task_struct *prev_p,
+ * The AMD workaround requires it to be after DS reload, or
+ * after DS has been cleared, which we do in __prepare_arch_switch.
+ */
+- if (prev_p->thread_info->status & TS_USEDFPU) {
++ if (task_thread_info(prev_p)->status & TS_USEDFPU) {
+ __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 1;
+Index: 10.3-2007-11-26/arch/x86_64/kernel/setup-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/setup-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/setup-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -123,6 +123,8 @@ int bootloader_type;
+
+ unsigned long saved_video_mode;
+
++int force_mwait __cpuinitdata;
++
+ /*
+ * Early DMI memory
+ */
+@@ -256,10 +258,10 @@ static void discover_ebda(void)
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E
+ */
+- ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
++ ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
+ ebda_addr <<= 4;
+
+- ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
++ ebda_size = *(unsigned short *)__va(ebda_addr);
+
+ /* Round EBDA up to pages */
+ if (ebda_size == 0)
+@@ -434,15 +436,8 @@ void __init setup_arch(char **cmdline_p)
+ #endif
+
+ #ifdef CONFIG_SMP
+- /*
+- * But first pinch a few for the stack/trampoline stuff
+- * FIXME: Don't need the extra page at 4K, but need to fix
+- * trampoline before removing it. (see the GDT stuff)
+- */
+- reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
+-
+ /* Reserve SMP trampoline */
+- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
++ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
+ #endif
+ #endif
+
+@@ -563,8 +558,6 @@ void __init setup_arch(char **cmdline_p)
+ early_quirks();
+ #endif
+
+- zap_low_mappings(0);
+-
+ /*
+ * set this early, so we dont allocate cpu0
+ * if MADT list doesnt list BSP first
+@@ -868,6 +861,10 @@ static void __cpuinit init_amd(struct cp
+
+ /* RDTSC can be speculated around */
+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
++
++ /* Family 10 doesn't support C states in MWAIT so don't use it */
++ if (c->x86 == 0x10 && !force_mwait)
++ clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
+ }
+
+ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+@@ -1150,9 +1147,7 @@ void __cpuinit identify_cpu(struct cpuin
+ #ifdef CONFIG_X86_MCE
+ mcheck_init(c);
+ #endif
+- if (c == &boot_cpu_data)
+- mtrr_bp_init();
+- else
++ if (c != &boot_cpu_data)
+ mtrr_ap_init();
+ #ifdef CONFIG_NUMA
+ numa_add_cpu(smp_processor_id());
+@@ -1243,9 +1238,8 @@ static int show_cpuinfo(struct seq_file
+ "stc",
+ "100mhzsteps",
+ "hwpstate",
+- NULL, /* tsc invariant mapped to constant_tsc */
+- NULL,
+- /* nothing */ /* constant_tsc - moved to flags */
++ "", /* tsc invariant mapped to constant_tsc */
++ /* nothing */
+ };
+
+
+Index: 10.3-2007-11-26/arch/x86_64/kernel/setup64-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/setup64-xen.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/setup64-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -113,9 +113,9 @@ void __init setup_per_cpu_areas(void)
+ if (!NODE_DATA(cpu_to_node(i))) {
+ printk("cpu with no node %d, num_online_nodes %d\n",
+ i, num_online_nodes());
+- ptr = alloc_bootmem(size);
++ ptr = alloc_bootmem_pages(size);
+ } else {
+- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
++ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
+ }
+ if (!ptr)
+ panic("Cannot allocate cpu data for CPU %d\n", i);
+@@ -206,6 +206,8 @@ char boot_exception_stacks[(N_EXCEPTION_
+ __attribute__((section(".bss.page_aligned")));
+ #endif
+
++extern asmlinkage void ignore_sysret(void);
++
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
+ {
+@@ -217,6 +219,7 @@ void syscall_init(void)
+ */
+ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
+ wrmsrl(MSR_LSTAR, system_call);
++ wrmsrl(MSR_CSTAR, ignore_sysret);
+
+ /* Flags to clear on syscall */
+ wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
+@@ -260,7 +263,6 @@ void __cpuinit cpu_init (void)
+ /* CPU 0 is initialised in head64.c */
+ if (cpu != 0) {
+ pda_init(cpu);
+- zap_low_mappings(cpu);
+ }
+ #ifndef CONFIG_X86_NO_TSS
+ else
+Index: 10.3-2007-11-26/arch/x86_64/kernel/smp-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/smp-xen.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/smp-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -14,7 +14,6 @@
+ #include <linux/mm.h>
+ #include <linux/delay.h>
+ #include <linux/spinlock.h>
+-#include <linux/smp_lock.h>
+ #include <linux/smp.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/mc146818rtc.h>
+@@ -478,48 +477,40 @@ int smp_call_function (void (*func) (voi
+ }
+ EXPORT_SYMBOL(smp_call_function);
+
+-void smp_stop_cpu(void)
++static void stop_this_cpu(void *dummy)
+ {
+- unsigned long flags;
++ local_irq_disable();
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+- local_irq_save(flags);
+ #ifndef CONFIG_XEN
+ disable_local_APIC();
+ #endif
+- local_irq_restore(flags);
+-}
+-
+-static void smp_really_stop_cpu(void *dummy)
+-{
+- smp_stop_cpu();
+ for (;;)
+ halt();
+ }
+
+ void smp_send_stop(void)
+ {
+- int nolock = 0;
++ int nolock;
++ unsigned long flags;
++
+ #ifndef CONFIG_XEN
+ if (reboot_force)
+ return;
+ #endif
++
+ /* Don't deadlock on the call lock in panic */
+- if (!spin_trylock(&call_lock)) {
+- /* ignore locking because we have panicked anyways */
+- nolock = 1;
+- }
+- __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
++ nolock = !spin_trylock(&call_lock);
++ local_irq_save(flags);
++ __smp_call_function(stop_this_cpu, NULL, 0, 0);
+ if (!nolock)
+ spin_unlock(&call_lock);
+-
+- local_irq_disable();
+ #ifndef CONFIG_XEN
+ disable_local_APIC();
+ #endif
+- local_irq_enable();
++ local_irq_restore(flags);
+ }
+
+ /*
+Index: 10.3-2007-11-26/arch/x86_64/kernel/traps-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/traps-xen.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/traps-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -32,6 +32,7 @@
+ #include <linux/unwind.h>
+ #include <linux/uaccess.h>
+ #include <linux/bug.h>
++#include <linux/kdebug.h>
+
+ #include <asm/system.h>
+ #include <asm/io.h>
+@@ -39,7 +40,6 @@
+ #include <asm/debugreg.h>
+ #include <asm/desc.h>
+ #include <asm/i387.h>
+-#include <asm/kdebug.h>
+ #include <asm/processor.h>
+ #include <asm/unwind.h>
+ #include <asm/smp.h>
+@@ -71,22 +71,6 @@ asmlinkage void alignment_check(void);
+ asmlinkage void machine_check(void);
+ asmlinkage void spurious_interrupt_bug(void);
+
+-ATOMIC_NOTIFIER_HEAD(die_chain);
+-EXPORT_SYMBOL(die_chain);
+-
+-int register_die_notifier(struct notifier_block *nb)
+-{
+- vmalloc_sync_all();
+- return atomic_notifier_chain_register(&die_chain, nb);
+-}
+-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
+-
+-int unregister_die_notifier(struct notifier_block *nb)
+-{
+- return atomic_notifier_chain_unregister(&die_chain, nb);
+-}
+-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
+-
+ static inline void conditional_sti(struct pt_regs *regs)
+ {
+ if (regs->eflags & X86_EFLAGS_IF)
+@@ -428,8 +412,7 @@ void show_registers(struct pt_regs *regs
+ const int cpu = smp_processor_id();
+ struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+
+- rsp = regs->rsp;
+-
++ rsp = regs->rsp;
+ printk("CPU %d ", cpu);
+ __show_regs(regs);
+ printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
+@@ -440,7 +423,6 @@ void show_registers(struct pt_regs *regs
+ * time of the fault..
+ */
+ if (in_kernel) {
+-
+ printk("Stack: ");
+ _show_stack(NULL, regs, (unsigned long*)rsp);
+
+@@ -485,13 +467,14 @@ static unsigned int die_nest_count;
+
+ unsigned __kprobes long oops_begin(void)
+ {
+- int cpu = smp_processor_id();
++ int cpu;
+ unsigned long flags;
+
+ oops_enter();
+
+ /* racy, but better than risking deadlock. */
+ local_irq_save(flags);
++ cpu = smp_processor_id();
+ if (!spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+@@ -585,10 +568,20 @@ static void __kprobes do_trap(int trapnr
+ {
+ struct task_struct *tsk = current;
+
+- tsk->thread.error_code = error_code;
+- tsk->thread.trap_no = trapnr;
+-
+ if (user_mode(regs)) {
++ /*
++ * We want error_code and trap_no set for userspace
++ * faults and kernelspace faults which result in
++ * die(), but not kernelspace faults which are fixed
++ * up. die() gives the process no chance to handle
++ * the signal and notice the kernel fault information,
++ * so that won't result in polluting the information
++ * about previously queued, but not yet delivered,
++ * faults. See also do_general_protection below.
++ */
++ tsk->thread.error_code = error_code;
++ tsk->thread.trap_no = trapnr;
++
+ if (exception_trace && unhandled_signal(tsk, signr))
+ printk(KERN_INFO
+ "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
+@@ -609,8 +602,11 @@ static void __kprobes do_trap(int trapnr
+ fixup = search_exception_tables(regs->rip);
+ if (fixup)
+ regs->rip = fixup->fixup;
+- else
++ else {
++ tsk->thread.error_code = error_code;
++ tsk->thread.trap_no = trapnr;
+ die(str, regs, error_code);
++ }
+ return;
+ }
+ }
+@@ -686,10 +682,10 @@ asmlinkage void __kprobes do_general_pro
+
+ conditional_sti(regs);
+
+- tsk->thread.error_code = error_code;
+- tsk->thread.trap_no = 13;
+-
+ if (user_mode(regs)) {
++ tsk->thread.error_code = error_code;
++ tsk->thread.trap_no = 13;
++
+ if (exception_trace && unhandled_signal(tsk, SIGSEGV))
+ printk(KERN_INFO
+ "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
+@@ -708,6 +704,9 @@ asmlinkage void __kprobes do_general_pro
+ regs->rip = fixup->fixup;
+ return;
+ }
++
++ tsk->thread.error_code = error_code;
++ tsk->thread.trap_no = 13;
+ if (notify_die(DIE_GPF, "general protection fault", regs,
+ error_code, 13, SIGSEGV) == NOTIFY_STOP)
+ return;
+Index: 10.3-2007-11-26/arch/x86_64/kernel/vsyscall-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/vsyscall-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/vsyscall-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -45,14 +45,34 @@
+
+ #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+ #define __syscall_clobber "r11","rcx","memory"
++#define __pa_vsymbol(x) \
++ ({unsigned long v; \
++ extern char __vsyscall_0; \
++ asm("" : "=r" (v) : "0" (x)); \
++ ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
+
++/*
++ * vsyscall_gtod_data contains data that is :
++ * - readonly from vsyscalls
++ * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
++ * Try to keep this structure as small as possible to avoid cache line ping pongs
++ */
+ struct vsyscall_gtod_data_t {
+- seqlock_t lock;
+- int sysctl_enabled;
+- struct timeval wall_time_tv;
++ seqlock_t lock;
++
++ /* open coded 'struct timespec' */
++ time_t wall_time_sec;
++ u32 wall_time_nsec;
++
++ int sysctl_enabled;
+ struct timezone sys_tz;
+- cycle_t offset_base;
+- struct clocksource clock;
++ struct { /* extract of a clocksource struct */
++ cycle_t (*vread)(void);
++ cycle_t cycle_last;
++ cycle_t mask;
++ u32 mult;
++ u32 shift;
++ } clock;
+ };
+ int __vgetcpu_mode __section_vgetcpu_mode;
+
+@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wa
+
+ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ /* copy vsyscall data */
+- vsyscall_gtod_data.clock = *clock;
+- vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
+- vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
++ vsyscall_gtod_data.clock.vread = clock->vread;
++ vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
++ vsyscall_gtod_data.clock.mask = clock->mask;
++ vsyscall_gtod_data.clock.mult = clock->mult;
++ vsyscall_gtod_data.clock.shift = clock->shift;
++ vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
++ vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
+ vsyscall_gtod_data.sys_tz = sys_tz;
+ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+ }
+@@ -105,7 +129,8 @@ static __always_inline long time_syscall
+ static __always_inline void do_vgettimeofday(struct timeval * tv)
+ {
+ cycle_t now, base, mask, cycle_delta;
+- unsigned long seq, mult, shift, nsec_delta;
++ unsigned seq;
++ unsigned long mult, shift, nsec;
+ cycle_t (*vread)(void);
+ do {
+ seq = read_seqbegin(&__vsyscall_gtod_data.lock);
+@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeo
+ mult = __vsyscall_gtod_data.clock.mult;
+ shift = __vsyscall_gtod_data.clock.shift;
+
+- *tv = __vsyscall_gtod_data.wall_time_tv;
+-
++ tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
++ nsec = __vsyscall_gtod_data.wall_time_nsec;
+ } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
+
+ /* calculate interval: */
+ cycle_delta = (now - base) & mask;
+ /* convert to nsecs: */
+- nsec_delta = (cycle_delta * mult) >> shift;
++ nsec += (cycle_delta * mult) >> shift;
+
+- /* convert to usecs and add to timespec: */
+- tv->tv_usec += nsec_delta / NSEC_PER_USEC;
+- while (tv->tv_usec > USEC_PER_SEC) {
++ while (nsec >= NSEC_PER_SEC) {
+ tv->tv_sec += 1;
+- tv->tv_usec -= USEC_PER_SEC;
++ nsec -= NSEC_PER_SEC;
+ }
++ tv->tv_usec = nsec / NSEC_PER_USEC;
+ }
+
+ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
+@@ -151,11 +175,16 @@ int __vsyscall(0) vgettimeofday(struct t
+ * unlikely */
+ time_t __vsyscall(1) vtime(time_t *t)
+ {
++ struct timeval tv;
++ time_t result;
+ if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
+ return time_syscall(t);
+- else if (t)
+- *t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
+- return __vsyscall_gtod_data.wall_time_tv.tv_sec;
++
++ vgettimeofday(&tv, 0);
++ result = tv.tv_sec;
++ if (t)
++ *t = result;
++ return result;
+ }
+
+ /* Fast way to get current CPU and node.
+@@ -224,10 +253,10 @@ static int vsyscall_sysctl_change(ctl_ta
+ return ret;
+ /* gcc has some trouble with __va(__pa()), so just do it this
+ way. */
+- map1 = ioremap(__pa_symbol(&vsysc1), 2);
++ map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
+ if (!map1)
+ return -ENOMEM;
+- map2 = ioremap(__pa_symbol(&vsysc2), 2);
++ map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
+ if (!map2) {
+ ret = -ENOMEM;
+ goto out;
+@@ -301,7 +330,7 @@ static int __cpuinit
+ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
+ {
+ long cpu = (long)arg;
+- if (action == CPU_ONLINE)
++ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+ return NOTIFY_DONE;
+ }
+Index: 10.3-2007-11-26/arch/x86_64/mm/fault-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/mm/fault-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/mm/fault-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -15,22 +15,22 @@
+ #include <linux/mman.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
+-#include <linux/smp_lock.h>
+ #include <linux/interrupt.h>
+ #include <linux/init.h>
+ #include <linux/tty.h>
+ #include <linux/vt_kern.h> /* For unblank_screen() */
+ #include <linux/compiler.h>
++#include <linux/vmalloc.h>
+ #include <linux/module.h>
+ #include <linux/kprobes.h>
+ #include <linux/uaccess.h>
++#include <linux/kdebug.h>
+
+ #include <asm/system.h>
+ #include <asm/pgalloc.h>
+ #include <asm/smp.h>
+ #include <asm/tlbflush.h>
+ #include <asm/proto.h>
+-#include <asm/kdebug.h>
+ #include <asm-generic/sections.h>
+
+ /* Page fault error code bits */
+@@ -537,6 +537,12 @@ bad_area:
+ bad_area_nosemaphore:
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & PF_USER) {
++
++ /*
++ * It's possible to have interrupts off here.
++ */
++ local_irq_enable();
++
+ if (is_prefetch(regs, address, error_code))
+ return;
+
+@@ -646,7 +652,7 @@ do_sigbus:
+ }
+
+ DEFINE_SPINLOCK(pgd_lock);
+-struct page *pgd_list;
++LIST_HEAD(pgd_list);
+
+ void vmalloc_sync_all(void)
+ {
+@@ -666,8 +672,7 @@ void vmalloc_sync_all(void)
+ if (pgd_none(*pgd_ref))
+ continue;
+ spin_lock(&pgd_lock);
+- for (page = pgd_list; page;
+- page = (struct page *)page->index) {
++ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ if (pgd_none(*pgd))
+Index: 10.3-2007-11-26/arch/x86_64/mm/init-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/mm/init-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/mm/init-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -25,10 +25,12 @@
+ #include <linux/bootmem.h>
+ #include <linux/proc_fs.h>
+ #include <linux/pci.h>
++#include <linux/pfn.h>
+ #include <linux/poison.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/module.h>
+ #include <linux/memory_hotplug.h>
++#include <linux/nmi.h>
+
+ #include <asm/processor.h>
+ #include <asm/system.h>
+@@ -51,7 +53,7 @@
+ #define Dprintk(x...)
+ #endif
+
+-struct dma_mapping_ops* dma_ops;
++const struct dma_mapping_ops* dma_ops;
+ EXPORT_SYMBOL(dma_ops);
+
+ #if CONFIG_XEN_COMPAT <= 0x030002
+@@ -189,6 +191,13 @@ void show_mem(void)
+
+ for_each_online_pgdat(pgdat) {
+ for (i = 0; i < pgdat->node_spanned_pages; ++i) {
++ /* this loop can take a while with 256 GB and 4k pages
++ so update the NMI watchdog */
++ if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
++ touch_nmi_watchdog();
++ }
++ if (!pfn_valid(pgdat->node_start_pfn + i))
++ continue;
+ page = pfn_to_page(pgdat->node_start_pfn + i);
+ total++;
+ if (PageReserved(page))
+@@ -374,7 +383,7 @@ __set_fixmap_user (enum fixed_addresses
+ set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
+ }
+
+-unsigned long __initdata table_start, table_end;
++unsigned long __meminitdata table_start, table_end;
+
+ static __meminit void *alloc_static_page(unsigned long *phys)
+ {
+@@ -391,7 +400,7 @@ static __meminit void *alloc_static_page
+ start_pfn++;
+ memset((void *)va, 0, PAGE_SIZE);
+ return (void *)va;
+-}
++}
+
+ #define PTE_SIZE PAGE_SIZE
+
+@@ -432,28 +441,46 @@ static inline int make_readonly(unsigned
+
+ #ifndef CONFIG_XEN
+ /* Must run before zap_low_mappings */
+-__init void *early_ioremap(unsigned long addr, unsigned long size)
++__meminit void *early_ioremap(unsigned long addr, unsigned long size)
+ {
+- unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
+-
+- /* actually usually some more */
+- if (size >= LARGE_PAGE_SIZE) {
+- return NULL;
++ unsigned long vaddr;
++ pmd_t *pmd, *last_pmd;
++ int i, pmds;
++
++ pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
++ vaddr = __START_KERNEL_map;
++ pmd = level2_kernel_pgt;
++ last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
++ for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
++ for (i = 0; i < pmds; i++) {
++ if (pmd_present(pmd[i]))
++ goto next;
++ }
++ vaddr += addr & ~PMD_MASK;
++ addr &= PMD_MASK;
++ for (i = 0; i < pmds; i++, addr += PMD_SIZE)
++ set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
++ __flush_tlb();
++ return (void *)vaddr;
++ next:
++ ;
+ }
+- set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+- map += LARGE_PAGE_SIZE;
+- set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+- __flush_tlb();
+- return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
++ printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
++ return NULL;
+ }
+
+ /* To avoid virtual aliases later */
+-__init void early_iounmap(void *addr, unsigned long size)
++__meminit void early_iounmap(void *addr, unsigned long size)
+ {
+- if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
+- printk("early_iounmap: bad address %p\n", addr);
+- set_pmd(temp_mappings[0].pmd, __pmd(0));
+- set_pmd(temp_mappings[1].pmd, __pmd(0));
++ unsigned long vaddr;
++ pmd_t *pmd;
++ int i, pmds;
++
++ vaddr = (unsigned long)addr;
++ pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
++ pmd = level2_kernel_pgt + pmd_index(vaddr);
++ for (i = 0; i < pmds; i++)
++ pmd_clear(pmd + i);
+ __flush_tlb();
+ }
+ #endif
+@@ -787,14 +814,6 @@ void __meminit init_memory_mapping(unsig
+ __flush_tlb_all();
+ }
+
+-void __cpuinit zap_low_mappings(int cpu)
+-{
+- /* this is not required for Xen */
+-#if 0
+- swap_low_mappings();
+-#endif
+-}
+-
+ #ifndef CONFIG_NUMA
+ void __init paging_init(void)
+ {
+@@ -986,17 +1005,6 @@ void __init mem_init(void)
+ reservedpages << (PAGE_SHIFT-10),
+ datasize >> 10,
+ initsize >> 10);
+-
+-#ifndef CONFIG_XEN
+-#ifdef CONFIG_SMP
+- /*
+- * Sync boot_level4_pgt mappings with the init_level4_pgt
+- * except for the low identity mappings which are already zapped
+- * in init_level4_pgt. This sync-up is essential for AP's bringup
+- */
+- memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
+-#endif
+-#endif
+ }
+
+ void free_init_pages(char *what, unsigned long begin, unsigned long end)
+@@ -1006,7 +1014,7 @@ void free_init_pages(char *what, unsigne
+ if (begin >= end)
+ return;
+
+- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
++ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(addr));
+ init_page_count(virt_to_page(addr));
+@@ -1015,24 +1023,17 @@ void free_init_pages(char *what, unsigne
+ if (addr >= __START_KERNEL_map) {
+ /* make_readonly() reports all kernel addresses. */
+ __make_page_writable(__va(__pa(addr)));
+- if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
+- pgd_t *pgd = pgd_offset_k(addr);
+- pud_t *pud = pud_offset(pgd, addr);
+- pmd_t *pmd = pmd_offset(pud, addr);
+- pte_t *pte = pte_offset_kernel(pmd, addr);
+-
+- xen_l1_entry_update(pte, __pte(0)); /* fallback */
+- }
++ change_page_attr_addr(addr, 1, __pgprot(0));
+ }
+ free_page(addr);
+ totalram_pages++;
+ }
++ if (addr > __START_KERNEL_map)
++ global_flush_tlb();
+ }
+
+ void free_initmem(void)
+ {
+- memset(__initdata_begin, POISON_FREE_INITDATA,
+- __initdata_end - __initdata_begin);
+ free_init_pages("unused kernel memory",
+ (unsigned long)(&__init_begin),
+ (unsigned long)(&__init_end));
+@@ -1042,13 +1043,28 @@ void free_initmem(void)
+
+ void mark_rodata_ro(void)
+ {
+- unsigned long addr = (unsigned long)__start_rodata;
++ unsigned long start = (unsigned long)_stext, end;
++
++#ifdef CONFIG_HOTPLUG_CPU
++ /* It must still be possible to apply SMP alternatives. */
++ if (num_possible_cpus() > 1)
++ start = (unsigned long)_etext;
++#endif
++
++#ifdef CONFIG_KPROBES
++ start = (unsigned long)__start_rodata;
++#endif
++
++ end = (unsigned long)__end_rodata;
++ start = (start + PAGE_SIZE - 1) & PAGE_MASK;
++ end &= PAGE_MASK;
++ if (end <= start)
++ return;
+
+- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
+- change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
++ change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
+
+- printk ("Write protecting the kernel read-only data: %luk\n",
+- (__end_rodata - __start_rodata) >> 10);
++ printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
++ (end - start) >> 10);
+
+ /*
+ * change_page_attr_addr() requires a global_flush_tlb() call after it.
+@@ -1201,3 +1217,11 @@ int in_gate_area_no_task(unsigned long a
+ {
+ return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
+ }
++
++#ifndef CONFIG_XEN
++void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
++{
++ return __alloc_bootmem_core(pgdat->bdata, size,
++ SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
++}
++#endif
+Index: 10.3-2007-11-26/arch/x86_64/mm/pageattr-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/mm/pageattr-xen.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/mm/pageattr-xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -146,13 +146,13 @@ void mm_pin_all(void)
+ preempt_enable();
+ }
+
+-void _arch_dup_mmap(struct mm_struct *mm)
++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+ {
+ if (!mm->context.pinned)
+ mm_pin(mm);
+ }
+
+-void _arch_exit_mmap(struct mm_struct *mm)
++void arch_exit_mmap(struct mm_struct *mm)
+ {
+ struct task_struct *tsk = current;
+
+@@ -268,10 +268,11 @@ static void flush_kernel_map(void *arg)
+ struct page *pg;
+
+ /* When clflush is available always use it because it is
+- much cheaper than WBINVD */
+- if (!cpu_has_clflush)
++ much cheaper than WBINVD. Disable clflush for now because
++ the high level code is not ready yet */
++ if (1 || !cpu_has_clflush)
+ asm volatile("wbinvd" ::: "memory");
+- list_for_each_entry(pg, l, lru) {
++ else list_for_each_entry(pg, l, lru) {
+ void *adr = page_address(pg);
+ if (cpu_has_clflush)
+ cache_flush_page(adr);
+@@ -385,16 +386,24 @@ __change_page_attr(unsigned long address
+ */
+ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
+ {
+- int err = 0;
++ int err = 0, kernel_map = 0;
+ int i;
+
++ if (address >= __START_KERNEL_map
++ && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
++ address = (unsigned long)__va(__pa(address));
++ kernel_map = 1;
++ }
++
+ down_write(&init_mm.mmap_sem);
+ for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
+ unsigned long pfn = __pa(address) >> PAGE_SHIFT;
+
+- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
+- if (err)
+- break;
++ if (!kernel_map || pte_present(pfn_pte(0, prot))) {
++ err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
++ if (err)
++ break;
++ }
+ /* Handle kernel mapping too which aliases part of the
+ * lowmem */
+ if (__pa(address) < KERNEL_TEXT_SIZE) {
+Index: 10.3-2007-11-26/drivers/char/tpm/tpm_xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/char/tpm/tpm_xen.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/drivers/char/tpm/tpm_xen.c 2007-10-22 13:58:57.000000000 +0200
+@@ -462,7 +462,7 @@ static int tpmif_connect(struct xenbus_d
+ tp->backend_id = domid;
+
+ err = bind_listening_port_to_irqhandler(
+- domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
++ domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp);
+ if (err <= 0) {
+ WPRINTK("bind_listening_port_to_irqhandler failed "
+ "(err=%d)\n", err);
+Index: 10.3-2007-11-26/drivers/xen/blkfront/blkfront.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/blkfront/blkfront.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/blkfront/blkfront.c 2007-10-22 13:58:57.000000000 +0200
+@@ -236,7 +236,7 @@ static int setup_blkring(struct xenbus_d
+ info->ring_ref = err;
+
+ err = bind_listening_port_to_irqhandler(
+- dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
++ dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info);
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err,
+ "bind_listening_port_to_irqhandler");
+Index: 10.3-2007-11-26/drivers/xen/char/mem.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/char/mem.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/char/mem.c 2007-10-22 13:58:57.000000000 +0200
+@@ -18,7 +18,6 @@
+ #include <linux/raw.h>
+ #include <linux/tty.h>
+ #include <linux/capability.h>
+-#include <linux/smp_lock.h>
+ #include <linux/ptrace.h>
+ #include <linux/device.h>
+ #include <asm/pgalloc.h>
+Index: 10.3-2007-11-26/drivers/xen/core/hypervisor_sysfs.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/core/hypervisor_sysfs.c 2007-09-03 09:52:56.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/core/hypervisor_sysfs.c 2007-10-22 13:58:57.000000000 +0200
+@@ -49,7 +49,7 @@ static int __init hypervisor_subsys_init
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+- hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
++ hypervisor_subsys.kobj.ktype = &hyp_sysfs_kobj_type;
+ return 0;
+ }
+
+Index: 10.3-2007-11-26/drivers/xen/core/smpboot.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/core/smpboot.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/core/smpboot.c 2007-10-22 13:58:57.000000000 +0200
+@@ -127,7 +127,7 @@ static int xen_smp_intr_init(unsigned in
+ rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
+ cpu,
+ smp_reschedule_interrupt,
+- SA_INTERRUPT,
++ IRQF_DISABLED,
+ resched_name[cpu],
+ NULL);
+ if (rc < 0)
+@@ -138,7 +138,7 @@ static int xen_smp_intr_init(unsigned in
+ rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
+ cpu,
+ smp_call_function_interrupt,
+- SA_INTERRUPT,
++ IRQF_DISABLED,
+ callfunc_name[cpu],
+ NULL);
+ if (rc < 0)
+@@ -171,12 +171,7 @@ static void xen_smp_intr_exit(unsigned i
+
+ void cpu_bringup(void)
+ {
+-#ifdef __i386__
+- cpu_set_gdt(current_thread_info()->cpu);
+- secondary_cpu_init();
+-#else
+ cpu_init();
+-#endif
+ touch_softlockup_watchdog();
+ preempt_disable();
+ local_irq_enable();
+@@ -194,8 +189,6 @@ static void cpu_initialize_context(unsig
+ struct task_struct *idle = idle_task(cpu);
+ #ifdef __x86_64__
+ struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
+-#else
+- struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+ #endif
+
+ if (cpu_test_and_set(cpu, cpu_initialized_map))
+@@ -218,16 +211,18 @@ static void cpu_initialize_context(unsig
+
+ ctxt.ldt_ents = 0;
+
+- ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
+- ctxt.gdt_ents = gdt_descr->size / 8;
+-
+ #ifdef __i386__
++ ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu));
++ ctxt.gdt_ents = GDT_SIZE / 8;
++
+ ctxt.user_regs.cs = __KERNEL_CS;
+ ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
+
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_sp = idle->thread.esp0;
+
++ ctxt.user_regs.fs = __KERNEL_PERCPU;
++
+ ctxt.event_callback_cs = __KERNEL_CS;
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
+ ctxt.failsafe_callback_cs = __KERNEL_CS;
+@@ -235,6 +230,9 @@ static void cpu_initialize_context(unsig
+
+ ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
+ #else /* __x86_64__ */
++ ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
++ ctxt.gdt_ents = gdt_descr->size / 8;
++
+ ctxt.user_regs.cs = __KERNEL_CS;
+ ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
+
+@@ -259,9 +257,8 @@ void __init smp_prepare_cpus(unsigned in
+ struct task_struct *idle;
+ #ifdef __x86_64__
+ struct desc_ptr *gdt_descr;
+-#else
+- struct Xgt_desc_struct *gdt_descr;
+ #endif
++ void *gdt_addr;
+
+ boot_cpu_data.apicid = 0;
+ cpu_data[0] = boot_cpu_data;
+@@ -308,14 +305,13 @@ void __init smp_prepare_cpus(unsigned in
+ }
+ gdt_descr->size = GDT_SIZE;
+ memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
++ gdt_addr = (void *)gdt_descr->address;
+ #else
+- if (unlikely(!init_gdt(cpu, idle)))
+- continue;
+- gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
++ init_gdt(cpu);
++ gdt_addr = get_cpu_gdt_table(cpu);
+ #endif
+- make_page_readonly(
+- (void *)gdt_descr->address,
+- XENFEAT_writable_descriptor_tables);
++ make_page_readonly(gdt_addr,
++ XENFEAT_writable_descriptor_tables);
+
+ cpu_data[cpu] = boot_cpu_data;
+ cpu_data[cpu].apicid = cpu;
+@@ -326,7 +322,9 @@ void __init smp_prepare_cpus(unsigned in
+ #ifdef __x86_64__
+ cpu_pda(cpu)->pcurrent = idle;
+ cpu_pda(cpu)->cpunumber = cpu;
+- clear_ti_thread_flag(idle->thread_info, TIF_FORK);
++ clear_ti_thread_flag(task_thread_info(idle), TIF_FORK);
++#else
++ per_cpu(current_task, cpu) = idle;
+ #endif
+
+ irq_ctx_init(cpu);
+@@ -351,8 +349,12 @@ void __init smp_prepare_cpus(unsigned in
+ #endif
+ }
+
+-void __devinit smp_prepare_boot_cpu(void)
++void __init smp_prepare_boot_cpu(void)
+ {
++#ifdef __i386__
++ init_gdt(smp_processor_id());
++ switch_to_new_gdt();
++#endif
+ prefill_possible_map();
+ }
+
+Index: 10.3-2007-11-26/drivers/xen/core/xen_sysfs.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/core/xen_sysfs.c 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/core/xen_sysfs.c 2007-10-22 13:58:57.000000000 +0200
+@@ -28,12 +28,12 @@ HYPERVISOR_ATTR_RO(type);
+
+ static int __init xen_sysfs_type_init(void)
+ {
+- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
++ return sysfs_create_file(&hypervisor_subsys.kobj, &type_attr.attr);
+ }
+
+ static void xen_sysfs_type_destroy(void)
+ {
+- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
++ sysfs_remove_file(&hypervisor_subsys.kobj, &type_attr.attr);
+ }
+
+ /* xen version attributes */
+@@ -89,13 +89,13 @@ static struct attribute_group version_gr
+
+ static int __init xen_sysfs_version_init(void)
+ {
+- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
++ return sysfs_create_group(&hypervisor_subsys.kobj,
+ &version_group);
+ }
+
+ static void xen_sysfs_version_destroy(void)
+ {
+- sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group);
++ sysfs_remove_group(&hypervisor_subsys.kobj, &version_group);
+ }
+
+ /* UUID */
+@@ -121,12 +121,12 @@ HYPERVISOR_ATTR_RO(uuid);
+
+ static int __init xen_sysfs_uuid_init(void)
+ {
+- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
++ return sysfs_create_file(&hypervisor_subsys.kobj, &uuid_attr.attr);
+ }
+
+ static void xen_sysfs_uuid_destroy(void)
+ {
+- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
++ sysfs_remove_file(&hypervisor_subsys.kobj, &uuid_attr.attr);
+ }
+
+ /* xen compilation attributes */
+@@ -199,13 +199,13 @@ static struct attribute_group xen_compil
+
+ int __init static xen_compilation_init(void)
+ {
+- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
++ return sysfs_create_group(&hypervisor_subsys.kobj,
+ &xen_compilation_group);
+ }
+
+ static void xen_compilation_destroy(void)
+ {
+- sysfs_remove_group(&hypervisor_subsys.kset.kobj,
++ sysfs_remove_group(&hypervisor_subsys.kobj,
+ &xen_compilation_group);
+ }
+
+@@ -320,13 +320,13 @@ static struct attribute_group xen_proper
+
+ static int __init xen_properties_init(void)
+ {
+- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
++ return sysfs_create_group(&hypervisor_subsys.kobj,
+ &xen_properties_group);
+ }
+
+ static void xen_properties_destroy(void)
+ {
+- sysfs_remove_group(&hypervisor_subsys.kset.kobj,
++ sysfs_remove_group(&hypervisor_subsys.kobj,
+ &xen_properties_group);
+ }
+
+Index: 10.3-2007-11-26/drivers/xen/netback/netback.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/netback/netback.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/netback/netback.c 2007-10-22 13:58:57.000000000 +0200
+@@ -156,7 +156,7 @@ static struct sk_buff *netbk_copy_skb(st
+ goto err;
+
+ skb_reserve(nskb, 16 + NET_IP_ALIGN);
+- headlen = nskb->end - nskb->data;
++ headlen = skb_end_pointer(nskb) - nskb->data;
+ if (headlen > skb_headlen(skb))
+ headlen = skb_headlen(skb);
+ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+@@ -202,11 +202,15 @@ static struct sk_buff *netbk_copy_skb(st
+ len -= copy;
+ }
+
++#ifdef NET_SKBUFF_DATA_USES_OFFSET
++ offset = 0;
++#else
+ offset = nskb->data - skb->data;
++#endif
+
+- nskb->h.raw = skb->h.raw + offset;
+- nskb->nh.raw = skb->nh.raw + offset;
+- nskb->mac.raw = skb->mac.raw + offset;
++ nskb->transport_header = skb->transport_header + offset;
++ nskb->network_header = skb->network_header + offset;
++ nskb->mac_header = skb->mac_header + offset;
+
+ return nskb;
+
+@@ -1483,7 +1487,7 @@ static int __init netback_init(void)
+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+ 0,
+ netif_be_dbg,
+- SA_SHIRQ,
++ IRQF_SHARED,
+ "net-be-dbg",
+ &netif_be_dbg);
+ #endif
+Index: 10.3-2007-11-26/drivers/xen/netfront/netfront.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/netfront/netfront.c 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/netfront/netfront.c 2007-10-22 13:58:57.000000000 +0200
+@@ -533,7 +533,7 @@ static int setup_device(struct xenbus_de
+ memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
+
+ err = bind_listening_port_to_irqhandler(
+- dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
++ dev->otherend_id, netif_int, IRQF_SAMPLE_RANDOM, netdev->name,
+ netdev);
+ if (err < 0)
+ goto fail;
+Index: 10.3-2007-11-26/drivers/xen/pciback/xenbus.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/pciback/xenbus.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/pciback/xenbus.c 2007-10-22 13:58:57.000000000 +0200
+@@ -86,7 +86,7 @@ static int pciback_do_attach(struct pcib
+
+ err = bind_interdomain_evtchn_to_irqhandler(
+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
+- SA_SAMPLE_RANDOM, "pciback", pdev);
++ IRQF_SAMPLE_RANDOM, "pciback", pdev);
+ if (err < 0) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error binding event channel to IRQ");
+Index: 10.3-2007-11-26/drivers/xen/xenoprof/xenoprofile.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/xenoprof/xenoprofile.c 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/xenoprof/xenoprofile.c 2007-10-22 13:58:57.000000000 +0200
+@@ -219,7 +219,7 @@ static int bind_virq(void)
+ result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
+ i,
+ xenoprof_ovf_interrupt,
+- SA_INTERRUPT,
++ IRQF_DISABLED,
+ "xenoprof",
+ NULL);
+
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/agp.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/agp.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/agp.h 2007-10-22 13:58:57.000000000 +0200
+@@ -13,8 +13,15 @@
+ * data corruption on some CPUs.
+ */
+
+-int map_page_into_agp(struct page *page);
+-int unmap_page_from_agp(struct page *page);
++/* Caller's responsibility to call global_flush_tlb() for
++ * performance reasons */
++#define map_page_into_agp(page) ( \
++ xen_create_contiguous_region((unsigned long)page_address(page), 0, 32) \
++ ?: change_page_attr(page, 1, PAGE_KERNEL_NOCACHE))
++#define unmap_page_from_agp(page) ( \
++ xen_destroy_contiguous_region((unsigned long)page_address(page), 0), \
++ /* only a fallback: xen_destroy_contiguous_region uses PAGE_KERNEL */ \
++ change_page_attr(page, 1, PAGE_KERNEL))
+ #define flush_agp_mappings() global_flush_tlb()
+
+ /* Could use CLFLUSH here if the cpu supports it. But then it would
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/desc.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/desc.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/desc.h 2007-10-22 13:58:57.000000000 +0200
+@@ -11,23 +11,24 @@
+
+ #include <asm/mmu.h>
+
+-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
+-
+ struct Xgt_desc_struct {
+ unsigned short size;
+ unsigned long address __attribute__((packed));
+ unsigned short pad;
+ } __attribute__ ((packed));
+
+-extern struct Xgt_desc_struct idt_descr;
+-DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+-extern struct Xgt_desc_struct early_gdt_descr;
++struct gdt_page
++{
++ struct desc_struct gdt[GDT_ENTRIES];
++} __attribute__((aligned(PAGE_SIZE)));
++DECLARE_PER_CPU(struct gdt_page, gdt_page);
+
+ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+ {
+- return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
++ return per_cpu(gdt_page, cpu).gdt;
+ }
+
++extern struct Xgt_desc_struct idt_descr;
+ extern struct desc_struct idt_table[];
+ extern void set_intr_gate(unsigned int irq, void * addr);
+
+@@ -55,51 +56,32 @@ static inline void pack_gate(__u32 *a, _
+ #define DESCTYPE_S 0x10 /* !system */
+
+ #ifndef CONFIG_XEN
+-#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
+-
+-#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+-#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
++#define load_TR_desc() native_load_tr_desc()
++#define load_gdt(dtr) native_load_gdt(dtr)
++#define load_idt(dtr) native_load_idt(dtr)
+ #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
+ #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
+
+-#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
+-#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
+-#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
++#define store_gdt(dtr) native_store_gdt(dtr)
++#define store_idt(dtr) native_store_idt(dtr)
++#define store_tr(tr) (tr = native_store_tr())
+ #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
+-#endif
+
+-#if TLS_SIZE != 24
+-# error update this code.
+-#endif
+-
+-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
+-{
+-#define C(i) HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), *(u64 *)&t->tls_array[i])
+- C(0); C(1); C(2);
+-#undef C
+-}
++#define load_TLS(t, cpu) native_load_tls(t, cpu)
++#define set_ldt native_set_ldt
+
+-#ifndef CONFIG_XEN
+ #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+ #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+ #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+
+-static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
++static inline void write_dt_entry(struct desc_struct *dt,
++ int entry, u32 entry_low, u32 entry_high)
+ {
+- __u32 *lp = (__u32 *)((char *)dt + entry*8);
+- *lp = entry_a;
+- *(lp+1) = entry_b;
++ dt[entry].a = entry_low;
++ dt[entry].b = entry_high;
+ }
+-#define set_ldt native_set_ldt
+-#else
+-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
+-extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
+-#define set_ldt(addr, entries) xen_set_ldt((unsigned long)(addr), entries)
+-#endif
+
+-#ifndef CONFIG_XEN
+-static inline fastcall void native_set_ldt(const void *addr,
+- unsigned int entries)
++static inline void native_set_ldt(const void *addr, unsigned int entries)
+ {
+ if (likely(entries == 0))
+ __asm__ __volatile__("lldt %w0"::"q" (0));
+@@ -114,6 +96,64 @@ static inline fastcall void native_set_l
+ __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
+ }
+ }
++
++
++static inline void native_load_tr_desc(void)
++{
++ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
++}
++
++static inline void native_load_gdt(const struct Xgt_desc_struct *dtr)
++{
++ asm volatile("lgdt %0"::"m" (*dtr));
++}
++
++static inline void native_load_idt(const struct Xgt_desc_struct *dtr)
++{
++ asm volatile("lidt %0"::"m" (*dtr));
++}
++
++static inline void native_store_gdt(struct Xgt_desc_struct *dtr)
++{
++ asm ("sgdt %0":"=m" (*dtr));
++}
++
++static inline void native_store_idt(struct Xgt_desc_struct *dtr)
++{
++ asm ("sidt %0":"=m" (*dtr));
++}
++
++static inline unsigned long native_store_tr(void)
++{
++ unsigned long tr;
++ asm ("str %0":"=r" (tr));
++ return tr;
++}
++
++static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
++{
++ unsigned int i;
++ struct desc_struct *gdt = get_cpu_gdt_table(cpu);
++
++ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
++ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
++}
++#else
++#define load_TLS(t, cpu) xen_load_tls(t, cpu)
++#define set_ldt(addr, entries) xen_set_ldt((unsigned long)(addr), entries)
++
++extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
++extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
++
++static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu)
++{
++ unsigned int i;
++ struct desc_struct *gdt = get_cpu_gdt_table(cpu);
++
++ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
++ HYPERVISOR_update_descriptor(virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN + i]),
++ *(u64 *)&t->tls_array[i]);
++}
+ #endif
+
+ #ifndef CONFIG_X86_NO_IDT
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/fixmap.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/fixmap.h 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/fixmap.h 2007-10-22 13:58:57.000000000 +0200
+@@ -19,10 +19,8 @@
+ * the start of the fixmap.
+ */
+ extern unsigned long __FIXADDR_TOP;
+-#ifdef CONFIG_COMPAT_VDSO
+-#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
+-#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
+-#endif
++#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
++#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
+
+ #ifndef __ASSEMBLY__
+ #include <linux/kernel.h>
+@@ -85,6 +83,9 @@ enum fixed_addresses {
+ #ifdef CONFIG_PCI_MMCONFIG
+ FIX_PCIE_MCFG,
+ #endif
++#ifdef CONFIG_PARAVIRT
++ FIX_PARAVIRT_BOOTMAP,
++#endif
+ FIX_SHARED_INFO,
+ #define NR_FIX_ISAMAPS 256
+ FIX_ISAMAP_END,
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/highmem.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/highmem.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/highmem.h 2007-10-22 13:58:57.000000000 +0200
+@@ -67,12 +67,18 @@ extern void FASTCALL(kunmap_high(struct
+
+ void *kmap(struct page *page);
+ void kunmap(struct page *page);
++void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
+ void *kmap_atomic(struct page *page, enum km_type type);
+ void *kmap_atomic_pte(struct page *page, enum km_type type);
+ void kunmap_atomic(void *kvaddr, enum km_type type);
+ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
+ struct page *kmap_atomic_to_page(void *ptr);
+
++#define kmap_atomic_pte(page, type) \
++ kmap_atomic_prot(page, type, \
++ test_bit(PG_pinned, &(page)->flags) \
++ ? PAGE_KERNEL_RO : kmap_prot)
++
+ #define flush_cache_kmaps() do { } while (0)
+
+ #endif /* __KERNEL__ */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/io.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/io.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/io.h 2007-10-22 13:58:57.000000000 +0200
+@@ -264,15 +264,18 @@ static inline void flush_write_buffers(v
+
+ #endif /* __KERNEL__ */
+
+-#define __SLOW_DOWN_IO "outb %%al,$0x80;"
++static inline void xen_io_delay(void)
++{
++ asm volatile("outb %%al,$0x80" : : : "memory");
++}
+
+ static inline void slow_down_io(void) {
+- __asm__ __volatile__(
+- __SLOW_DOWN_IO
++ xen_io_delay();
+ #ifdef REALLY_SLOW_IO
+- __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
++ xen_io_delay();
++ xen_io_delay();
++ xen_io_delay();
+ #endif
+- : : );
+ }
+
+ #ifdef CONFIG_X86_NUMAQ
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/irqflags.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/irqflags.h 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/irqflags.h 2007-10-22 13:58:57.000000000 +0200
+@@ -11,6 +11,43 @@
+ #define _ASM_IRQFLAGS_H
+
+ #ifndef __ASSEMBLY__
++#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask)
++
++#define xen_restore_fl(f) \
++do { \
++ vcpu_info_t *_vcpu; \
++ barrier(); \
++ _vcpu = current_vcpu_info(); \
++ if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \
++ barrier(); /* unmask then check (avoid races) */\
++ if (unlikely(_vcpu->evtchn_upcall_pending)) \
++ force_evtchn_callback(); \
++ } \
++} while (0)
++
++#define xen_irq_disable() \
++do { \
++ current_vcpu_info()->evtchn_upcall_mask = 1; \
++ barrier(); \
++} while (0)
++
++#define xen_irq_enable() \
++do { \
++ vcpu_info_t *_vcpu; \
++ barrier(); \
++ _vcpu = current_vcpu_info(); \
++ _vcpu->evtchn_upcall_mask = 0; \
++ barrier(); /* unmask then check (avoid races) */ \
++ if (unlikely(_vcpu->evtchn_upcall_pending)) \
++ force_evtchn_callback(); \
++} while (0)
++
++void xen_safe_halt(void);
++
++void xen_halt(void);
++#endif /* __ASSEMBLY__ */
++
++#ifndef __ASSEMBLY__
+
+ /*
+ * The use of 'barrier' in the following reflects their use as local-lock
+@@ -20,48 +57,31 @@
+ * includes these barriers, for example.
+ */
+
+-#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask)
++#define __raw_local_save_flags(void) xen_save_fl()
+
+-#define raw_local_irq_restore(x) \
+-do { \
+- vcpu_info_t *_vcpu; \
+- barrier(); \
+- _vcpu = current_vcpu_info(); \
+- if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
+- barrier(); /* unmask then check (avoid races) */ \
+- if (unlikely(_vcpu->evtchn_upcall_pending)) \
+- force_evtchn_callback(); \
+- } \
+-} while (0)
++#define raw_local_irq_restore(flags) xen_restore_fl(flags)
+
+-#define raw_local_irq_disable() \
+-do { \
+- current_vcpu_info()->evtchn_upcall_mask = 1; \
+- barrier(); \
+-} while (0)
++#define raw_local_irq_disable() xen_irq_disable()
+
+-#define raw_local_irq_enable() \
+-do { \
+- vcpu_info_t *_vcpu; \
+- barrier(); \
+- _vcpu = current_vcpu_info(); \
+- _vcpu->evtchn_upcall_mask = 0; \
+- barrier(); /* unmask then check (avoid races) */ \
+- if (unlikely(_vcpu->evtchn_upcall_pending)) \
+- force_evtchn_callback(); \
+-} while (0)
++#define raw_local_irq_enable() xen_irq_enable()
+
+ /*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+-void raw_safe_halt(void);
++static inline void raw_safe_halt(void)
++{
++ xen_safe_halt();
++}
+
+ /*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+-void halt(void);
++static inline void halt(void)
++{
++ xen_halt();
++}
+
+ /*
+ * For spinlocks, etc:
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/mmu.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/mmu.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/mmu.h 2007-10-22 13:58:57.000000000 +0200
+@@ -18,12 +18,4 @@ typedef struct {
+ #endif
+ } mm_context_t;
+
+-/* mm/memory.c:exit_mmap hook */
+-extern void _arch_exit_mmap(struct mm_struct *mm);
+-#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+-
+-/* kernel/fork.c:dup_mmap hook */
+-extern void _arch_dup_mmap(struct mm_struct *mm);
+-#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
+-
+ #endif
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/mmu_context.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/mmu_context.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/mmu_context.h 2007-10-22 13:58:57.000000000 +0200
+@@ -6,6 +6,20 @@
+ #include <asm/pgalloc.h>
+ #include <asm/tlbflush.h>
+
++void arch_exit_mmap(struct mm_struct *mm);
++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
++
++void mm_pin(struct mm_struct *mm);
++void mm_unpin(struct mm_struct *mm);
++void mm_pin_all(void);
++
++static inline void xen_activate_mm(struct mm_struct *prev,
++ struct mm_struct *next)
++{
++ if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
++ mm_pin(next);
++}
++
+ /*
+ * Used for LDT copy/destruction.
+ */
+@@ -37,10 +51,6 @@ static inline void __prepare_arch_switch
+ : : "r" (0) );
+ }
+
+-extern void mm_pin(struct mm_struct *mm);
+-extern void mm_unpin(struct mm_struct *mm);
+-void mm_pin_all(void);
+-
+ static inline void switch_mm(struct mm_struct *prev,
+ struct mm_struct *next,
+ struct task_struct *tsk)
+@@ -97,11 +107,10 @@ static inline void switch_mm(struct mm_s
+ #define deactivate_mm(tsk, mm) \
+ asm("movl %0,%%gs": :"r" (0));
+
+-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+-{
+- if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
+- mm_pin(next);
+- switch_mm(prev, next, NULL);
+-}
++#define activate_mm(prev, next) \
++ do { \
++ xen_activate_mm(prev, next); \
++ switch_mm((prev),(next),NULL); \
++ } while(0)
+
+ #endif
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/page.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/page.h 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/page.h 2007-10-22 13:58:57.000000000 +0200
+@@ -66,6 +66,7 @@
+ * These are used to make use of C type-checking..
+ */
+ extern int nx_enabled;
++
+ #ifdef CONFIG_X86_PAE
+ extern unsigned long long __supported_pte_mask;
+ typedef struct { unsigned long pte_low, pte_high; } pte_t;
+@@ -74,69 +75,117 @@ typedef struct { unsigned long long pgd;
+ typedef struct { unsigned long long pgprot; } pgprot_t;
+ #define pgprot_val(x) ((x).pgprot)
+ #include <asm/maddr.h>
+-#define __pte(x) ({ unsigned long long _x = (x); \
+- if (_x & _PAGE_PRESENT) _x = pte_phys_to_machine(_x); \
+- ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
+-#define __pgd(x) ({ unsigned long long _x = (x); \
+- (pgd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; })
+-#define __pmd(x) ({ unsigned long long _x = (x); \
+- (pmd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; })
+-static inline unsigned long long pte_val_ma(pte_t x)
+-{
+- return ((unsigned long long)x.pte_high << 32) | x.pte_low;
+-}
+-static inline unsigned long long pte_val(pte_t x)
++
++static inline unsigned long long xen_pgd_val(pgd_t pgd)
+ {
+- unsigned long long ret = pte_val_ma(x);
+- if (x.pte_low & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
++ unsigned long long ret = pgd.pgd;
++ if (ret & _PAGE_PRESENT)
++ ret = pte_machine_to_phys(ret);
+ return ret;
+ }
+-static inline unsigned long long pmd_val(pmd_t x)
++
++static inline unsigned long long xen_pmd_val(pmd_t pmd)
+ {
+- unsigned long long ret = x.pmd;
++ unsigned long long ret = pmd.pmd;
+ #if CONFIG_XEN_COMPAT <= 0x030002
+- if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT;
++ if (ret)
++ ret = pte_machine_to_phys(ret) | _PAGE_PRESENT;
+ #else
+- if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
++ if (ret & _PAGE_PRESENT)
++ ret = pte_machine_to_phys(ret);
+ #endif
+ return ret;
+ }
+-static inline unsigned long long pgd_val(pgd_t x)
++
++static inline unsigned long long pte_val_ma(pte_t pte)
+ {
+- unsigned long long ret = x.pgd;
+- if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
++ return ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
++}
++static inline unsigned long long xen_pte_val(pte_t pte)
++{
++ unsigned long long ret = pte_val_ma(pte);
++ if (pte.pte_low & _PAGE_PRESENT)
++ ret = pte_machine_to_phys(ret);
+ return ret;
+ }
++
++static inline pgd_t xen_make_pgd(unsigned long long val)
++{
++ if (val & _PAGE_PRESENT)
++ val = pte_phys_to_machine(val);
++ return (pgd_t) { val };
++}
++
++static inline pmd_t xen_make_pmd(unsigned long long val)
++{
++ if (val & _PAGE_PRESENT)
++ val = pte_phys_to_machine(val);
++ return (pmd_t) { val };
++}
++
++static inline pte_t xen_make_pte(unsigned long long val)
++{
++ if (val & _PAGE_PRESENT)
++ val = pte_phys_to_machine(val);
++ return (pte_t) { .pte_low = val, .pte_high = (val >> 32) } ;
++}
++
++#define pmd_val(x) xen_pmd_val(x)
++#define __pmd(x) xen_make_pmd(x)
++
+ #define HPAGE_SHIFT 21
+ #include <asm-generic/pgtable-nopud.h>
+-#else
++#else /* !CONFIG_X86_PAE */
+ typedef struct { unsigned long pte_low; } pte_t;
+ typedef struct { unsigned long pgd; } pgd_t;
+ typedef struct { unsigned long pgprot; } pgprot_t;
+ #define pgprot_val(x) ((x).pgprot)
+-#include <asm/maddr.h>
+ #define boot_pte_t pte_t /* or would you rather have a typedef */
+-#define pte_val(x) (((x).pte_low & _PAGE_PRESENT) ? \
+- machine_to_phys((x).pte_low) : \
+- (x).pte_low)
+-#define pte_val_ma(x) ((x).pte_low)
+-#define __pte(x) ({ unsigned long _x = (x); \
+- (pte_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; })
+-#define __pgd(x) ({ unsigned long _x = (x); \
+- (pgd_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; })
+-static inline unsigned long pgd_val(pgd_t x)
++#include <asm/maddr.h>
++
++static inline unsigned long xen_pgd_val(pgd_t pgd)
+ {
+- unsigned long ret = x.pgd;
++ unsigned long ret = pgd.pgd;
+ #if CONFIG_XEN_COMPAT <= 0x030002
+- if (ret) ret = machine_to_phys(ret) | _PAGE_PRESENT;
++ if (ret)
++ ret = machine_to_phys(ret) | _PAGE_PRESENT;
+ #else
+- if (ret & _PAGE_PRESENT) ret = machine_to_phys(ret);
++ if (ret & _PAGE_PRESENT)
++ ret = machine_to_phys(ret);
+ #endif
+ return ret;
+ }
++
++static inline unsigned long pte_val_ma(pte_t pte)
++{
++ return pte.pte_low;
++}
++static inline unsigned long xen_pte_val(pte_t pte)
++{
++ unsigned long ret = pte_val_ma(pte);
++ if (ret & _PAGE_PRESENT)
++ ret = machine_to_phys(ret);
++ return ret;
++}
++
++static inline pgd_t xen_make_pgd(unsigned long val)
++{
++ if (val & _PAGE_PRESENT)
++ val = phys_to_machine(val);
++ return (pgd_t) { val };
++}
++
++static inline pte_t xen_make_pte(unsigned long val)
++{
++ if (val & _PAGE_PRESENT)
++ val = phys_to_machine(val);
++ return (pte_t) { .pte_low = val };
++}
++
+ #define HPAGE_SHIFT 22
+ #include <asm-generic/pgtable-nopmd.h>
+-#endif
++#endif /* CONFIG_X86_PAE */
++
+ #define PTE_MASK PHYSICAL_PAGE_MASK
+
+ #ifdef CONFIG_HUGETLB_PAGE
+@@ -148,6 +197,11 @@ static inline unsigned long pgd_val(pgd_
+
+ #define __pgprot(x) ((pgprot_t) { (x) } )
+
++#define pgd_val(x) xen_pgd_val(x)
++#define __pgd(x) xen_make_pgd(x)
++#define pte_val(x) xen_pte_val(x)
++#define __pte(x) xen_make_pte(x)
++
+ #endif /* !__ASSEMBLY__ */
+
+ /* to align the pointer to the (next) page boundary */
+@@ -188,6 +242,7 @@ extern int page_is_ram(unsigned long pag
+ #define __PAGE_OFFSET ((unsigned long)CONFIG_PAGE_OFFSET)
+ #endif
+
++
+ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+ #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
+ #define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
+@@ -212,9 +267,7 @@ extern int page_is_ram(unsigned long pag
+ #include <asm-generic/memory_model.h>
+ #include <asm-generic/page.h>
+
+-#ifndef CONFIG_COMPAT_VDSO
+ #define __HAVE_ARCH_GATE_AREA 1
+-#endif
+ #endif /* __KERNEL__ */
+
+ #endif /* _I386_PAGE_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgalloc.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgalloc.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgalloc.h 2007-10-22 13:58:57.000000000 +0200
+@@ -1,7 +1,6 @@
+ #ifndef _I386_PGALLOC_H
+ #define _I386_PGALLOC_H
+
+-#include <asm/fixmap.h>
+ #include <linux/threads.h>
+ #include <linux/mm.h> /* for struct page */
+ #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
+@@ -69,6 +68,4 @@ do { \
+ #define pud_populate(mm, pmd, pte) BUG()
+ #endif
+
+-#define check_pgt_cache() do { } while (0)
+-
+ #endif /* _I386_PGALLOC_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h 2007-12-06 17:27:30.000000000 +0100
++++ /dev/null 1970-01-01 00:00:00.000000000 +0000
+@@ -1,20 +0,0 @@
+-#ifndef _I386_PGTABLE_2LEVEL_DEFS_H
+-#define _I386_PGTABLE_2LEVEL_DEFS_H
+-
+-#define HAVE_SHARED_KERNEL_PMD 0
+-
+-/*
+- * traditional i386 two-level paging structure:
+- */
+-
+-#define PGDIR_SHIFT 22
+-#define PTRS_PER_PGD 1024
+-
+-/*
+- * the i386 is two-level, so we don't really have any
+- * PMD directory physically.
+- */
+-
+-#define PTRS_PER_PTE 1024
+-
+-#endif /* _I386_PGTABLE_2LEVEL_DEFS_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-2level.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-10-22 13:54:57.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-10-22 13:58:57.000000000 +0200
+@@ -11,22 +11,43 @@
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+-#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+-
+-#define set_pte_at(_mm,addr,ptep,pteval) do { \
+- if (((_mm) != current->mm && (_mm) != &init_mm) || \
+- HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \
+- set_pte((ptep), (pteval)); \
+-} while (0)
+-
+-#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
++static inline void xen_set_pte(pte_t *ptep , pte_t pte)
++{
++ *ptep = pte;
++}
++static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
++ pte_t *ptep , pte_t pte)
++{
++ if ((mm != current->mm && mm != &init_mm) ||
++ HYPERVISOR_update_va_mapping(addr, pte, 0))
++ xen_set_pte(ptep, pte);
++}
++static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
++{
++ xen_l2_entry_update(pmdp, pmd);
++}
++#define set_pte(pteptr, pteval) xen_set_pte(pteptr, pteval)
++#define set_pte_at(mm,addr,ptep,pteval) xen_set_pte_at(mm, addr, ptep, pteval)
++#define set_pmd(pmdptr, pmdval) xen_set_pmd(pmdptr, pmdval)
+
+ #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
+
+ #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+ #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+
+-#define raw_ptep_get_and_clear(xp, pte) __pte_ma(xchg(&(xp)->pte_low, 0))
++static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp)
++{
++ xen_set_pte_at(mm, addr, xp, __pte(0));
++}
++
++#ifdef CONFIG_SMP
++static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res)
++{
++ return __pte_ma(xchg(&xp->pte_low, 0));
++}
++#else
++#define xen_ptep_get_and_clear(xp, res) xen_local_ptep_get_and_clear(xp, res)
++#endif
+
+ #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+ #define ptep_clear_flush(vma, addr, ptep) \
+@@ -91,6 +112,4 @@ static inline int pte_exec_kernel(pte_t
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+-void vmalloc_sync_all(void);
+-
+ #endif /* _I386_PGTABLE_2LEVEL_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h 2007-10-22 13:58:57.000000000 +0200
+@@ -1,7 +1,7 @@
+ #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
+ #define _I386_PGTABLE_3LEVEL_DEFS_H
+
+-#define HAVE_SHARED_KERNEL_PMD 0
++#define SHARED_KERNEL_PMD 0
+
+ /*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-3level.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-10-22 13:58:00.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-10-22 13:58:57.000000000 +0200
+@@ -49,32 +49,40 @@ static inline int pte_exec_kernel(pte_t
+ * value and then use set_pte to update it. -ben
+ */
+
+-static inline void set_pte(pte_t *ptep, pte_t pte)
++static inline void xen_set_pte(pte_t *ptep, pte_t pte)
+ {
+ ptep->pte_high = pte.pte_high;
+ smp_wmb();
+ ptep->pte_low = pte.pte_low;
+ }
+-#define set_pte_atomic(pteptr,pteval) \
+- set_64bit((unsigned long long *)(pteptr),pte_val_ma(pteval))
+
+-#define set_pte_at(_mm,addr,ptep,pteval) do { \
+- if (((_mm) != current->mm && (_mm) != &init_mm) || \
+- HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \
+- set_pte((ptep), (pteval)); \
+-} while (0)
+-
+-#define set_pmd(pmdptr,pmdval) \
+- xen_l2_entry_update((pmdptr), (pmdval))
+-#define set_pud(pudptr,pudval) \
+- xen_l3_entry_update((pudptr), (pudval))
++static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
++ pte_t *ptep , pte_t pte)
++{
++ if ((mm != current->mm && mm != &init_mm) ||
++ HYPERVISOR_update_va_mapping(addr, pte, 0))
++ xen_set_pte(ptep, pte);
++}
++
++static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
++{
++ set_64bit((unsigned long long *)(ptep),pte_val_ma(pte));
++}
++static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
++{
++ xen_l2_entry_update(pmdp, pmd);
++}
++static inline void xen_set_pud(pud_t *pudp, pud_t pud)
++{
++ xen_l3_entry_update(pudp, pud);
++}
+
+ /*
+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
+ * entry, so clear the bottom half first and enforce ordering with a compiler
+ * barrier.
+ */
+-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
++static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+ if ((mm != current->mm && mm != &init_mm)
+ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
+@@ -84,7 +92,18 @@ static inline void pte_clear(struct mm_s
+ }
+ }
+
+-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
++static inline void xen_pmd_clear(pmd_t *pmd)
++{
++ xen_l2_entry_update(pmd, __pmd(0));
++}
++
++#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
++#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
++#define set_pte_atomic(ptep, pte) xen_set_pte_atomic(ptep, pte)
++#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd)
++#define set_pud(pudp, pud) xen_set_pud(pudp, pud)
++#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep)
++#define pmd_clear(pmd) xen_pmd_clear(pmd)
+
+ /*
+ * Pentium-II erratum A13: in PAE mode we explicitly have to flush
+@@ -105,7 +124,8 @@ static inline void pud_clear (pud_t * pu
+ #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
+ pmd_index(address))
+
+-static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res)
++#ifdef CONFIG_SMP
++static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
+ {
+ uint64_t val = pte_val_ma(res);
+ if (__cmpxchg64(ptep, val, 0) != val) {
+@@ -116,6 +136,9 @@ static inline pte_t raw_ptep_get_and_cle
+ }
+ return res;
+ }
++#else
++#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte)
++#endif
+
+ #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+ #define ptep_clear_flush(vma, addr, ptep) \
+@@ -160,13 +183,13 @@ extern unsigned long long __supported_pt
+ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+ {
+ return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
+- pgprot_val(pgprot)) & __supported_pte_mask);
++ pgprot_val(pgprot)) & __supported_pte_mask);
+ }
+
+ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+ {
+ return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
+- pgprot_val(pgprot)) & __supported_pte_mask);
++ pgprot_val(pgprot)) & __supported_pte_mask);
+ }
+
+ /*
+@@ -186,6 +209,4 @@ static inline pmd_t pfn_pmd(unsigned lon
+
+ #define __pmd_free_tlb(tlb, x) do { } while (0)
+
+-void vmalloc_sync_all(void);
+-
+ #endif /* _I386_PGTABLE_3LEVEL_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable.h 2007-10-22 14:08:56.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable.h 2007-10-22 14:09:14.000000000 +0200
+@@ -24,11 +24,11 @@
+ #include <linux/slab.h>
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
++#include <linux/sched.h>
+
+ /* Is this pagetable pinned? */
+ #define PG_pinned PG_arch_1
+
+-struct mm_struct;
+ struct vm_area_struct;
+
+ /*
+@@ -38,17 +38,16 @@ struct vm_area_struct;
+ #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+ extern unsigned long empty_zero_page[1024];
+ extern pgd_t *swapper_pg_dir;
+-extern struct kmem_cache *pgd_cache;
+ extern struct kmem_cache *pmd_cache;
+ extern spinlock_t pgd_lock;
+ extern struct page *pgd_list;
++void check_pgt_cache(void);
+
+ void pmd_ctor(void *, struct kmem_cache *, unsigned long);
+-void pgd_ctor(void *, struct kmem_cache *, unsigned long);
+-void pgd_dtor(void *, struct kmem_cache *, unsigned long);
+ void pgtable_cache_init(void);
+ void paging_init(void);
+
++
+ /*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+@@ -162,6 +161,7 @@ void paging_init(void);
+
+ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
+ #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
++#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+ #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD)
+ #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
+ #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+@@ -169,6 +169,7 @@ extern unsigned long long __PAGE_KERNEL,
+ #define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
+ #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
+ #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
++#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
+ #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
+ #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
+ #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
+@@ -271,7 +272,13 @@ static inline pte_t pte_mkhuge(pte_t pte
+ */
+ #define pte_update(mm, addr, ptep) do { } while (0)
+ #define pte_update_defer(mm, addr, ptep) do { } while (0)
+-#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0)
++
++/* local pte updates need not use xchg for locking */
++static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res)
++{
++ xen_set_pte(ptep, __pte(0));
++ return res;
++}
+
+ /*
+ * We only update the dirty/accessed state if we set
+@@ -282,17 +289,34 @@ static inline pte_t pte_mkhuge(pte_t pte
+ */
+ #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
+-do { \
+- if (dirty) \
++({ \
++ int __changed = !pte_same(*(ptep), entry); \
++ if (__changed && (dirty)) \
+ ptep_establish(vma, address, ptep, entry); \
+-} while (0)
++ __changed; \
++})
+
+-/*
+- * We don't actually have these, but we want to advertise them so that
+- * we can encompass the flush here.
+- */
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
++#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \
++ int __ret = 0; \
++ if (pte_dirty(*(ptep))) \
++ __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \
++ &(ptep)->pte_low); \
++ if (__ret) \
++ pte_update((vma)->vm_mm, addr, ptep); \
++ __ret; \
++})
++
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
++#define ptep_test_and_clear_young(vma, addr, ptep) ({ \
++ int __ret = 0; \
++ if (pte_young(*(ptep))) \
++ __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
++ &(ptep)->pte_low); \
++ if (__ret) \
++ pte_update((vma)->vm_mm, addr, ptep); \
++ __ret; \
++})
+
+ /*
+ * Rules for using ptep_establish: the pte MUST be a user pte, and
+@@ -319,7 +343,7 @@ do { \
+ int __dirty = pte_dirty(__pte); \
+ __pte = pte_mkclean(__pte); \
+ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
+- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
++ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
+ else if (__dirty) \
+ (ptep)->pte_low = __pte.pte_low; \
+ __dirty; \
+@@ -332,7 +356,7 @@ do { \
+ int __young = pte_young(__pte); \
+ __pte = pte_mkold(__pte); \
+ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
+- ptep_set_access_flags(vma, address, ptep, __pte, __young); \
++ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
+ else if (__young) \
+ (ptep)->pte_low = __pte.pte_low; \
+ __young; \
+@@ -345,7 +369,7 @@ static inline pte_t ptep_get_and_clear(s
+ if (!pte_none(pte)
+ && (mm != &init_mm
+ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
+- pte = raw_ptep_get_and_clear(ptep, pte);
++ pte = xen_ptep_get_and_clear(ptep, pte);
+ pte_update(mm, addr, ptep);
+ }
+ return pte;
+@@ -487,24 +511,10 @@ extern pte_t *lookup_address(unsigned lo
+ #endif
+
+ #if defined(CONFIG_HIGHPTE)
+-#define pte_offset_map(dir, address) \
+-({ \
+- pte_t *__ptep; \
+- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
+- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \
+- paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \
+- __ptep = __ptep + pte_index(address); \
+- __ptep; \
+-})
+-#define pte_offset_map_nested(dir, address) \
+-({ \
+- pte_t *__ptep; \
+- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
+- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \
+- paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \
+- __ptep = __ptep + pte_index(address); \
+- __ptep; \
+-})
++#define pte_offset_map(dir, address) \
++ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
++#define pte_offset_map_nested(dir, address) \
++ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
+ #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+ #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
+ #else
+@@ -574,10 +584,6 @@ int touch_pte_range(struct mm_struct *mm
+ #define io_remap_pfn_range(vma,from,pfn,size,prot) \
+ direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
+
+-#define MK_IOSPACE_PFN(space, pfn) (pfn)
+-#define GET_IOSPACE(pfn) 0
+-#define GET_PFN(pfn) (pfn)
+-
+ #include <asm-generic/pgtable.h>
+
+ #endif /* _I386_PGTABLE_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/processor.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/processor.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/processor.h 2007-10-22 13:58:57.000000000 +0200
+@@ -21,6 +21,7 @@
+ #include <asm/percpu.h>
+ #include <linux/cpumask.h>
+ #include <linux/init.h>
++#include <asm/processor-flags.h>
+ #include <xen/interface/physdev.h>
+
+ /* flag for disabling the tsc */
+@@ -118,7 +119,8 @@ extern char ignore_fpu_irq;
+
+ void __init cpu_detect(struct cpuinfo_x86 *c);
+
+-extern void identify_cpu(struct cpuinfo_x86 *);
++extern void identify_boot_cpu(void);
++extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+ extern void print_cpu_info(struct cpuinfo_x86 *);
+ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+ extern unsigned short num_cache_leaves;
+@@ -129,29 +131,8 @@ extern void detect_ht(struct cpuinfo_x86
+ static inline void detect_ht(struct cpuinfo_x86 *c) {}
+ #endif
+
+-/*
+- * EFLAGS bits
+- */
+-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
+-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+-
+-static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
+- unsigned int *ecx, unsigned int *edx)
++static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx,
++ unsigned int *ecx, unsigned int *edx)
+ {
+ /* ecx is often an input as well as an output. */
+ __asm__(XEN_CPUID
+@@ -165,21 +146,6 @@ static inline fastcall void xen_cpuid(un
+ #define load_cr3(pgdir) write_cr3(__pa(pgdir))
+
+ /*
+- * Intel CPU features in CR4
+- */
+-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+-#define X86_CR4_MCE 0x0040 /* Machine check enable */
+-#define X86_CR4_PGE 0x0080 /* enable global pages */
+-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
+-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
+-
+-/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+@@ -206,26 +172,6 @@ static inline void clear_in_cr4 (unsigne
+ }
+
+ /*
+- * NSC/Cyrix CPU configuration register indexes
+- */
+-
+-#define CX86_PCR0 0x20
+-#define CX86_GCR 0xb8
+-#define CX86_CCR0 0xc0
+-#define CX86_CCR1 0xc1
+-#define CX86_CCR2 0xc2
+-#define CX86_CCR3 0xc3
+-#define CX86_CCR4 0xe8
+-#define CX86_CCR5 0xe9
+-#define CX86_CCR6 0xea
+-#define CX86_CCR7 0xeb
+-#define CX86_PCR1 0xf0
+-#define CX86_DIR0 0xfe
+-#define CX86_DIR1 0xff
+-#define CX86_ARR_BASE 0xc4
+-#define CX86_RCR_BASE 0xdc
+-
+-/*
+ * NSC/Cyrix CPU indexed register access macros
+ */
+
+@@ -351,7 +297,8 @@ typedef struct {
+ struct thread_struct;
+
+ #ifndef CONFIG_X86_NO_TSS
+-struct tss_struct {
++/* This is the TSS defined by the hardware. */
++struct i386_hw_tss {
+ unsigned short back_link,__blh;
+ unsigned long esp0;
+ unsigned short ss0,__ss0h;
+@@ -375,6 +322,11 @@ struct tss_struct {
+ unsigned short gs, __gsh;
+ unsigned short ldt, __ldth;
+ unsigned short trace, io_bitmap_base;
++} __attribute__((packed));
++
++struct tss_struct {
++ struct i386_hw_tss x86_tss;
++
+ /*
+ * The extra 1 is there because the CPU will access an
+ * additional byte beyond the end of the IO permission
+@@ -428,10 +380,11 @@ struct thread_struct {
+ };
+
+ #define INIT_THREAD { \
++ .esp0 = sizeof(init_stack) + (long)&init_stack, \
+ .vm86_info = NULL, \
+ .sysenter_cs = __KERNEL_CS, \
+ .io_bitmap_ptr = NULL, \
+- .fs = __KERNEL_PDA, \
++ .fs = __KERNEL_PERCPU, \
+ }
+
+ /*
+@@ -441,10 +394,12 @@ struct thread_struct {
+ * be within the limit.
+ */
+ #define INIT_TSS { \
+- .esp0 = sizeof(init_stack) + (long)&init_stack, \
+- .ss0 = __KERNEL_DS, \
+- .ss1 = __KERNEL_CS, \
+- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
++ .x86_tss = { \
++ .esp0 = sizeof(init_stack) + (long)&init_stack, \
++ .ss0 = __KERNEL_DS, \
++ .ss1 = __KERNEL_CS, \
++ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
++ }, \
+ .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
+ }
+
+@@ -551,36 +506,31 @@ static inline void rep_nop(void)
+
+ #define cpu_relax() rep_nop()
+
+-#define paravirt_enabled() 0
+-#define __cpuid xen_cpuid
+-
+ #ifndef CONFIG_X86_NO_TSS
+-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
++static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+ {
+- tss->esp0 = thread->esp0;
++ tss->x86_tss.esp0 = thread->esp0;
+ /* This can only happen when SEP is enabled, no need to test "SEP"arately */
+- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
+- tss->ss1 = thread->sysenter_cs;
++ if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
++ tss->x86_tss.ss1 = thread->sysenter_cs;
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+ }
+ }
+-#define load_esp0(tss, thread) \
+- __load_esp0(tss, thread)
+ #else
+-#define load_esp0(tss, thread) \
++#define xen_load_esp0(tss, thread) \
+ HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)
+ #endif
+
+
+-/*
+- * These special macros can be used to get or set a debugging register
+- */
+-#define get_debugreg(var, register) \
+- (var) = HYPERVISOR_get_debugreg((register))
+-#define set_debugreg(value, register) \
+- HYPERVISOR_set_debugreg((register), (value))
++static inline unsigned long xen_get_debugreg(int regno)
++{
++ return HYPERVISOR_get_debugreg(regno);
++}
+
+-#define set_iopl_mask xen_set_iopl_mask
++static inline void xen_set_debugreg(int regno, unsigned long value)
++{
++ HYPERVISOR_set_debugreg(regno, value);
++}
+
+ /*
+ * Set IOPL bits in EFLAGS from given mask
+@@ -595,6 +545,21 @@ static inline void xen_set_iopl_mask(uns
+ }
+
+
++#define paravirt_enabled() 0
++#define __cpuid xen_cpuid
++
++#define load_esp0 xen_load_esp0
++
++/*
++ * These special macros can be used to get or set a debugging register
++ */
++#define get_debugreg(var, register) \
++ (var) = xen_get_debugreg(register)
++#define set_debugreg(value, register) \
++ xen_set_debugreg(register, value)
++
++#define set_iopl_mask xen_set_iopl_mask
++
+ /*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+@@ -747,8 +712,14 @@ extern unsigned long boot_option_idle_ov
+ extern void enable_sep_cpu(void);
+ extern int sysenter_setup(void);
+
+-extern int init_gdt(int cpu, struct task_struct *idle);
++/* Defined in head.S */
++extern struct Xgt_desc_struct early_gdt_descr;
++
+ extern void cpu_set_gdt(int);
+-extern void secondary_cpu_init(void);
++extern void switch_to_new_gdt(void);
++extern void cpu_init(void);
++extern void init_gdt(int cpu);
++
++extern int force_mwait;
+
+ #endif /* __ASM_I386_PROCESSOR_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/scatterlist.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/scatterlist.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/scatterlist.h 2007-10-22 13:58:57.000000000 +0200
+@@ -1,6 +1,8 @@
+ #ifndef _I386_SCATTERLIST_H
+ #define _I386_SCATTERLIST_H
+
++#include <asm/types.h>
++
+ struct scatterlist {
+ struct page *page;
+ unsigned int offset;
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/segment.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/segment.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/segment.h 2007-10-22 13:58:57.000000000 +0200
+@@ -39,7 +39,7 @@
+ * 25 - APM BIOS support
+ *
+ * 26 - ESPFIX small SS
+- * 27 - PDA [ per-cpu private data area ]
++ * 27 - per-cpu [ offset to per-cpu data area ]
+ * 28 - unused
+ * 29 - unused
+ * 30 - unused
+@@ -74,8 +74,12 @@
+ #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
+ #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
+-#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15)
+-#define __KERNEL_PDA (GDT_ENTRY_PDA * 8)
++#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15)
++#ifdef CONFIG_SMP
++#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
++#else
++#define __KERNEL_PERCPU 0
++#endif
+
+ #define GDT_ENTRY_DOUBLEFAULT_TSS 31
+
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/smp.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/smp.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/smp.h 2007-10-22 13:58:57.000000000 +0200
+@@ -8,19 +8,15 @@
+ #include <linux/kernel.h>
+ #include <linux/threads.h>
+ #include <linux/cpumask.h>
+-#include <asm/pda.h>
+ #endif
+
+-#ifdef CONFIG_X86_LOCAL_APIC
+-#ifndef __ASSEMBLY__
+-#include <asm/fixmap.h>
++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
+ #include <asm/bitops.h>
+ #include <asm/mpspec.h>
++#include <asm/apic.h>
+ #ifdef CONFIG_X86_IO_APIC
+ #include <asm/io_apic.h>
+ #endif
+-#include <asm/apic.h>
+-#endif
+ #endif
+
+ #define BAD_APICID 0xFFu
+@@ -52,9 +48,76 @@ extern void cpu_exit_clear(void);
+ extern void cpu_uninit(void);
+ #endif
+
+-#ifndef CONFIG_PARAVIRT
++#ifndef CONFIG_XEN
++struct smp_ops
++{
++ void (*smp_prepare_boot_cpu)(void);
++ void (*smp_prepare_cpus)(unsigned max_cpus);
++ int (*cpu_up)(unsigned cpu);
++ void (*smp_cpus_done)(unsigned max_cpus);
++
++ void (*smp_send_stop)(void);
++ void (*smp_send_reschedule)(int cpu);
++ int (*smp_call_function_mask)(cpumask_t mask,
++ void (*func)(void *info), void *info,
++ int wait);
++};
++
++extern struct smp_ops smp_ops;
++
++static inline void smp_prepare_boot_cpu(void)
++{
++ smp_ops.smp_prepare_boot_cpu();
++}
++static inline void smp_prepare_cpus(unsigned int max_cpus)
++{
++ smp_ops.smp_prepare_cpus(max_cpus);
++}
++static inline int __cpu_up(unsigned int cpu)
++{
++ return smp_ops.cpu_up(cpu);
++}
++static inline void smp_cpus_done(unsigned int max_cpus)
++{
++ smp_ops.smp_cpus_done(max_cpus);
++}
++
++static inline void smp_send_stop(void)
++{
++ smp_ops.smp_send_stop();
++}
++static inline void smp_send_reschedule(int cpu)
++{
++ smp_ops.smp_send_reschedule(cpu);
++}
++static inline int smp_call_function_mask(cpumask_t mask,
++ void (*func) (void *info), void *info,
++ int wait)
++{
++ return smp_ops.smp_call_function_mask(mask, func, info, wait);
++}
++
++void native_smp_prepare_boot_cpu(void);
++void native_smp_prepare_cpus(unsigned int max_cpus);
++int native_cpu_up(unsigned int cpunum);
++void native_smp_cpus_done(unsigned int max_cpus);
++
+ #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
+ do { } while (0)
++
++#else
++
++
++void xen_smp_send_stop(void);
++void xen_smp_send_reschedule(int cpu);
++int xen_smp_call_function_mask(cpumask_t mask,
++ void (*func) (void *info), void *info,
++ int wait);
++
++#define smp_send_stop xen_smp_send_stop
++#define smp_send_reschedule xen_smp_send_reschedule
++#define smp_call_function_mask xen_smp_call_function_mask
++
+ #endif
+
+ /*
+@@ -62,7 +125,8 @@ do { } while (0)
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+-#define raw_smp_processor_id() (read_pda(cpu_number))
++DECLARE_PER_CPU(int, cpu_number);
++#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
+
+ extern cpumask_t cpu_possible_map;
+ #define cpu_callin_map cpu_possible_map
+@@ -73,20 +137,6 @@ static inline int num_booting_cpus(void)
+ return cpus_weight(cpu_possible_map);
+ }
+
+-#ifdef CONFIG_X86_LOCAL_APIC
+-
+-#ifdef APIC_DEFINITION
+-extern int hard_smp_processor_id(void);
+-#else
+-#include <mach_apicdef.h>
+-static inline int hard_smp_processor_id(void)
+-{
+- /* we don't want to mark this access volatile - bad code generation */
+- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+-}
+-#endif
+-#endif
+-
+ extern int safe_smp_processor_id(void);
+ extern int __cpu_disable(void);
+ extern void __cpu_die(unsigned int cpu);
+@@ -102,10 +152,31 @@ extern unsigned int num_processors;
+
+ #define NO_PROC_ID 0xFF /* No processor magic marker */
+
+-#endif
++#endif /* CONFIG_SMP */
+
+ #ifndef __ASSEMBLY__
+
++#ifdef CONFIG_X86_LOCAL_APIC
++
++#ifdef APIC_DEFINITION
++extern int hard_smp_processor_id(void);
++#else
++#include <mach_apicdef.h>
++static inline int hard_smp_processor_id(void)
++{
++ /* we don't want to mark this access volatile - bad code generation */
++ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
++}
++#endif /* APIC_DEFINITION */
++
++#else /* CONFIG_X86_LOCAL_APIC */
++
++#ifndef CONFIG_SMP
++#define hard_smp_processor_id() 0
++#endif
++
++#endif /* CONFIG_X86_LOCAL_APIC */
++
+ extern u8 apicid_2_node[];
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/system.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/system.h 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/system.h 2007-10-22 13:58:57.000000000 +0200
+@@ -4,7 +4,7 @@
+ #include <linux/kernel.h>
+ #include <asm/segment.h>
+ #include <asm/cpufeature.h>
+-#include <linux/bitops.h> /* for LOCK_PREFIX */
++#include <asm/cmpxchg.h>
+ #include <asm/synch_bitops.h>
+ #include <asm/hypervisor.h>
+
+@@ -90,308 +90,102 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
+ #define savesegment(seg, value) \
+ asm volatile("mov %%" #seg ",%0":"=rm" (value))
+
+-#define read_cr0() ({ \
+- unsigned int __dummy; \
+- __asm__ __volatile__( \
+- "movl %%cr0,%0\n\t" \
+- :"=r" (__dummy)); \
+- __dummy; \
+-})
+-#define write_cr0(x) \
+- __asm__ __volatile__("movl %0,%%cr0": :"r" (x))
+-
+-#define read_cr2() (current_vcpu_info()->arch.cr2)
+-#define write_cr2(x) \
+- __asm__ __volatile__("movl %0,%%cr2": :"r" (x))
+-
+-#define read_cr3() ({ \
+- unsigned int __dummy; \
+- __asm__ ( \
+- "movl %%cr3,%0\n\t" \
+- :"=r" (__dummy)); \
+- __dummy = xen_cr3_to_pfn(__dummy); \
+- mfn_to_pfn(__dummy) << PAGE_SHIFT; \
+-})
+-#define write_cr3(x) ({ \
+- unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \
+- __dummy = xen_pfn_to_cr3(__dummy); \
+- __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \
+-})
+-#define read_cr4() ({ \
+- unsigned int __dummy; \
+- __asm__( \
+- "movl %%cr4,%0\n\t" \
+- :"=r" (__dummy)); \
+- __dummy; \
+-})
+-#define read_cr4_safe() ({ \
+- unsigned int __dummy; \
+- /* This could fault if %cr4 does not exist */ \
+- __asm__("1: movl %%cr4, %0 \n" \
+- "2: \n" \
+- ".section __ex_table,\"a\" \n" \
+- ".long 1b,2b \n" \
+- ".previous \n" \
+- : "=r" (__dummy): "0" (0)); \
+- __dummy; \
+-})
+-
+-#define write_cr4(x) \
+- __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
+-
+-#define wbinvd() \
+- __asm__ __volatile__ ("wbinvd": : :"memory")
+-
+-/* Clear the 'TS' bit */
+-#define clts() (HYPERVISOR_fpu_taskswitch(0))
+-
+-/* Set the 'TS' bit */
+-#define stts() (HYPERVISOR_fpu_taskswitch(1))
+-
+-#endif /* __KERNEL__ */
+-
+-static inline unsigned long get_limit(unsigned long segment)
++static inline void xen_clts(void)
+ {
+- unsigned long __limit;
+- __asm__("lsll %1,%0"
+- :"=r" (__limit):"r" (segment));
+- return __limit+1;
++ HYPERVISOR_fpu_taskswitch(0);
+ }
+
+-#define nop() __asm__ __volatile__ ("nop")
+-
+-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+-
+-#define tas(ptr) (xchg((ptr),1))
+-
+-struct __xchg_dummy { unsigned long a[100]; };
+-#define __xg(x) ((struct __xchg_dummy *)(x))
++static inline unsigned long xen_read_cr0(void)
++{
++ unsigned long val;
++ asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
++ return val;
++}
+
++static inline void xen_write_cr0(unsigned long val)
++{
++ asm volatile("movl %0,%%cr0": :"r" (val));
++}
+
+-#ifdef CONFIG_X86_CMPXCHG64
++#define xen_read_cr2() (current_vcpu_info()->arch.cr2)
+
+-/*
+- * The semantics of XCHGCMP8B are a bit strange, this is why
+- * there is a loop and the loading of %%eax and %%edx has to
+- * be inside. This inlines well in most cases, the cached
+- * cost is around ~38 cycles. (in the future we might want
+- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
+- * might have an implicit FPU-save as a cost, so it's not
+- * clear which path to go.)
+- *
+- * cmpxchg8b must be used with the lock prefix here to allow
+- * the instruction to be executed atomically, see page 3-102
+- * of the instruction set reference 24319102.pdf. We need
+- * the reader side to see the coherent 64bit value.
+- */
+-static inline void __set_64bit (unsigned long long * ptr,
+- unsigned int low, unsigned int high)
++static inline void xen_write_cr2(unsigned long val)
+ {
+- __asm__ __volatile__ (
+- "\n1:\t"
+- "movl (%0), %%eax\n\t"
+- "movl 4(%0), %%edx\n\t"
+- "lock cmpxchg8b (%0)\n\t"
+- "jnz 1b"
+- : /* no outputs */
+- : "D"(ptr),
+- "b"(low),
+- "c"(high)
+- : "ax","dx","memory");
++ asm volatile("movl %0,%%cr2": :"r" (val));
+ }
+
+-static inline void __set_64bit_constant (unsigned long long *ptr,
+- unsigned long long value)
++static inline unsigned long xen_read_cr3(void)
+ {
+- __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
++ unsigned long val;
++ asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
++ return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT;
+ }
+-#define ll_low(x) *(((unsigned int*)&(x))+0)
+-#define ll_high(x) *(((unsigned int*)&(x))+1)
+
+-static inline void __set_64bit_var (unsigned long long *ptr,
+- unsigned long long value)
++static inline void xen_write_cr3(unsigned long val)
+ {
+- __set_64bit(ptr,ll_low(value), ll_high(value));
++ val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT));
++ asm volatile("movl %0,%%cr3": :"r" (val));
+ }
+
+-#define set_64bit(ptr,value) \
+-(__builtin_constant_p(value) ? \
+- __set_64bit_constant(ptr, value) : \
+- __set_64bit_var(ptr, value) )
+-
+-#define _set_64bit(ptr,value) \
+-(__builtin_constant_p(value) ? \
+- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
+- __set_64bit(ptr, ll_low(value), ll_high(value)) )
+-
+-#endif
+-
+-/*
+- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+- * Note 2: xchg has side effect, so that attribute volatile is necessary,
+- * but generally the primitive is invalid, *ptr is output argument. --ANK
+- */
+-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
++static inline unsigned long xen_read_cr4(void)
+ {
+- switch (size) {
+- case 1:
+- __asm__ __volatile__("xchgb %b0,%1"
+- :"=q" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- case 2:
+- __asm__ __volatile__("xchgw %w0,%1"
+- :"=r" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- case 4:
+- __asm__ __volatile__("xchgl %0,%1"
+- :"=r" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- }
+- return x;
++ unsigned long val;
++ asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
++ return val;
+ }
+
+-/*
+- * Atomic compare and exchange. Compare OLD with MEM, if identical,
+- * store NEW in MEM. Return the initial value in MEM. Success is
+- * indicated by comparing RETURN with OLD.
+- */
+-
+-#ifdef CONFIG_X86_CMPXCHG
+-#define __HAVE_ARCH_CMPXCHG 1
+-#define cmpxchg(ptr,o,n)\
+- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+- (unsigned long)(n),sizeof(*(ptr))))
+-#define sync_cmpxchg(ptr,o,n)\
+- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
+- (unsigned long)(n),sizeof(*(ptr))))
+-#endif
++static inline unsigned long xen_read_cr4_safe(void)
++{
++ unsigned long val;
++ /* This could fault if %cr4 does not exist */
++ asm("1: movl %%cr4, %0 \n"
++ "2: \n"
++ ".section __ex_table,\"a\" \n"
++ ".long 1b,2b \n"
++ ".previous \n"
++ : "=r" (val): "0" (0));
++ return val;
++}
+
+-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+- unsigned long new, int size)
++static inline void xen_write_cr4(unsigned long val)
+ {
+- unsigned long prev;
+- switch (size) {
+- case 1:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 2:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 4:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- }
+- return old;
++ asm volatile("movl %0,%%cr4": :"r" (val));
+ }
+
+-/*
+- * Always use locked operations when touching memory shared with a
+- * hypervisor, since the system may be SMP even if the guest kernel
+- * isn't.
+- */
+-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+- unsigned long old,
+- unsigned long new, int size)
+-{
+- unsigned long prev;
+- switch (size) {
+- case 1:
+- __asm__ __volatile__("lock; cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 2:
+- __asm__ __volatile__("lock; cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 4:
+- __asm__ __volatile__("lock; cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- }
+- return old;
++static inline void xen_wbinvd(void)
++{
++ asm volatile("wbinvd": : :"memory");
+ }
+
+-#ifndef CONFIG_X86_CMPXCHG
+-/*
+- * Building a kernel capable running on 80386. It may be necessary to
+- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+- * a function for each of the sizes we support.
+- */
++#define read_cr0() (xen_read_cr0())
++#define write_cr0(x) (xen_write_cr0(x))
++#define read_cr2() (xen_read_cr2())
++#define write_cr2(x) (xen_write_cr2(x))
++#define read_cr3() (xen_read_cr3())
++#define write_cr3(x) (xen_write_cr3(x))
++#define read_cr4() (xen_read_cr4())
++#define read_cr4_safe() (xen_read_cr4_safe())
++#define write_cr4(x) (xen_write_cr4(x))
++#define wbinvd() (xen_wbinvd())
+
+-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+-
+-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+- unsigned long new, int size)
+-{
+- switch (size) {
+- case 1:
+- return cmpxchg_386_u8(ptr, old, new);
+- case 2:
+- return cmpxchg_386_u16(ptr, old, new);
+- case 4:
+- return cmpxchg_386_u32(ptr, old, new);
+- }
+- return old;
+-}
+-
+-#define cmpxchg(ptr,o,n) \
+-({ \
+- __typeof__(*(ptr)) __ret; \
+- if (likely(boot_cpu_data.x86 > 3)) \
+- __ret = __cmpxchg((ptr), (unsigned long)(o), \
+- (unsigned long)(n), sizeof(*(ptr))); \
+- else \
+- __ret = cmpxchg_386((ptr), (unsigned long)(o), \
+- (unsigned long)(n), sizeof(*(ptr))); \
+- __ret; \
+-})
+-#endif
++/* Clear the 'TS' bit */
++#define clts() (xen_clts())
+
+-#ifdef CONFIG_X86_CMPXCHG64
++/* Set the 'TS' bit */
++#define stts() (HYPERVISOR_fpu_taskswitch(1))
+
+-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
+- unsigned long long new)
++#endif /* __KERNEL__ */
++
++static inline unsigned long get_limit(unsigned long segment)
+ {
+- unsigned long long prev;
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
+- : "=A"(prev)
+- : "b"((unsigned long)new),
+- "c"((unsigned long)(new >> 32)),
+- "m"(*__xg(ptr)),
+- "0"(old)
+- : "memory");
+- return prev;
+-}
+-
+-#define cmpxchg64(ptr,o,n)\
+- ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
+- (unsigned long long)(n)))
++ unsigned long __limit;
++ __asm__("lsll %1,%0"
++ :"=r" (__limit):"r" (segment));
++ return __limit+1;
++}
++
++#define nop() __asm__ __volatile__ ("nop")
+
+-#endif
+-
+ /*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/tlbflush.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/tlbflush.h 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/tlbflush.h 2007-10-22 13:58:57.000000000 +0200
+@@ -24,13 +24,19 @@
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
++ * - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
++#define TLB_FLUSH_ALL 0xffffffff
++
++
+ #ifndef CONFIG_SMP
+
++#include <linux/sched.h>
++
+ #define flush_tlb() __flush_tlb()
+ #define flush_tlb_all() __flush_tlb_all()
+ #define local_flush_tlb() __flush_tlb()
+@@ -55,7 +61,12 @@ static inline void flush_tlb_range(struc
+ __flush_tlb();
+ }
+
+-#else
++static inline void xen_flush_tlb_others(const cpumask_t *cpumask,
++ struct mm_struct *mm, unsigned long va)
++{
++}
++
++#else /* SMP */
+
+ #include <asm/smp.h>
+
+@@ -74,6 +85,9 @@ static inline void flush_tlb_range(struc
+ flush_tlb_mm(vma->vm_mm);
+ }
+
++void xen_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
++ unsigned long va);
++
+ #define TLBSTATE_OK 1
+ #define TLBSTATE_LAZY 2
+
+@@ -84,9 +98,10 @@ struct tlb_state
+ char __cacheline_padding[L1_CACHE_BYTES-8];
+ };
+ DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
++#endif /* SMP */
+
+-
+-#endif
++#define flush_tlb_others(mask, mm, va) \
++ xen_flush_tlb_others(&mask, mm, va)
+
+ #define flush_tlb_kernel_range(start, end) flush_tlb_all()
+
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/agp.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/agp.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/agp.h 2007-10-22 13:58:57.000000000 +0200
+@@ -11,8 +11,15 @@
+ * with different cachability attributes for the same page.
+ */
+
+-int map_page_into_agp(struct page *page);
+-int unmap_page_from_agp(struct page *page);
++/* Caller's responsibility to call global_flush_tlb() for
++ * performance reasons */
++#define map_page_into_agp(page) ( \
++ xen_create_contiguous_region((unsigned long)page_address(page), 0, 32) \
++ ?: change_page_attr(page, 1, PAGE_KERNEL_NOCACHE))
++#define unmap_page_from_agp(page) ( \
++ xen_destroy_contiguous_region((unsigned long)page_address(page), 0), \
++ /* only a fallback: xen_destroy_contiguous_region uses PAGE_KERNEL */ \
++ change_page_attr(page, 1, PAGE_KERNEL))
+ #define flush_agp_mappings() global_flush_tlb()
+
+ /* Could use CLFLUSH here if the cpu supports it. But then it would
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/desc.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/desc.h 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/desc.h 2007-10-22 13:58:57.000000000 +0200
+@@ -127,16 +127,6 @@ static inline void set_ldt_desc(unsigned
+ DESC_LDT, size * 8 - 1);
+ }
+
+-static inline void set_seg_base(unsigned cpu, int entry, void *base)
+-{
+- struct desc_struct *d = &cpu_gdt(cpu)[entry];
+- u32 addr = (u32)(u64)base;
+- BUG_ON((u64)base >> 32);
+- d->base0 = addr & 0xffff;
+- d->base1 = (addr >> 16) & 0xff;
+- d->base2 = (addr >> 24) & 0xff;
+-}
+-
+ #define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+ /* Don't allow setting of the lm bit. It is useless anyways because
+@@ -165,23 +155,13 @@ static inline void set_seg_base(unsigned
+ (info)->useable == 0 && \
+ (info)->lm == 0)
+
+-#if TLS_SIZE != 24
+-# error update this code.
+-#endif
+-
+ static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
+ {
+-#if 0
++ unsigned int i;
+ u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN);
+- gdt[0] = t->tls_array[0];
+- gdt[1] = t->tls_array[1];
+- gdt[2] = t->tls_array[2];
+-#endif
+-#define C(i) \
+- HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]), t->tls_array[i])
+
+- C(0); C(1); C(2);
+-#undef C
++ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
++ HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), t->tls_array[i]);
+ }
+
+ /*
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/dma-mapping.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/dma-mapping.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/dma-mapping.h 2007-10-22 13:58:57.000000000 +0200
+@@ -51,7 +51,7 @@ struct dma_mapping_ops {
+ };
+
+ extern dma_addr_t bad_dma_address;
+-extern struct dma_mapping_ops* dma_ops;
++extern const struct dma_mapping_ops* dma_ops;
+ extern int iommu_merge;
+
+ #if 0
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/fixmap.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/fixmap.h 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/fixmap.h 2007-10-22 13:58:57.000000000 +0200
+@@ -15,7 +15,6 @@
+ #include <asm/apicdef.h>
+ #include <asm/page.h>
+ #include <asm/vsyscall.h>
+-#include <asm/vsyscall32.h>
+ #include <asm/acpi.h>
+
+ /*
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/irqflags.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/irqflags.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/irqflags.h 2007-10-22 13:58:57.000000000 +0200
+@@ -9,6 +9,7 @@
+ */
+ #ifndef _ASM_IRQFLAGS_H
+ #define _ASM_IRQFLAGS_H
++#include <asm/processor-flags.h>
+
+ #ifndef __ASSEMBLY__
+ /*
+@@ -50,19 +51,19 @@ static inline void raw_local_irq_disable
+ {
+ unsigned long flags = __raw_local_save_flags();
+
+- raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18));
++ raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
+ }
+
+ static inline void raw_local_irq_enable(void)
+ {
+ unsigned long flags = __raw_local_save_flags();
+
+- raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18));
++ raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
+ }
+
+ static inline int raw_irqs_disabled_flags(unsigned long flags)
+ {
+- return !(flags & (1<<9)) || (flags & (1 << 18));
++ return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
+ }
+
+ #else /* CONFIG_X86_VSMP */
+@@ -118,13 +119,21 @@ static inline int raw_irqs_disabled_flag
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+-void raw_safe_halt(void);
++void xen_safe_halt(void);
++static inline void raw_safe_halt(void)
++{
++ xen_safe_halt();
++}
+
+ /*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+-void halt(void);
++void xen_halt(void);
++static inline void halt(void)
++{
++ xen_halt();
++}
+
+ #else /* __ASSEMBLY__: */
+ # ifdef CONFIG_TRACE_IRQFLAGS
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/mmu.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/mmu.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/mmu.h 2007-10-22 13:58:57.000000000 +0200
+@@ -25,14 +25,6 @@ typedef struct {
+ #ifdef CONFIG_XEN
+ extern struct list_head mm_unpinned;
+ extern spinlock_t mm_unpinned_lock;
+-
+-/* mm/memory.c:exit_mmap hook */
+-extern void _arch_exit_mmap(struct mm_struct *mm);
+-#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+-
+-/* kernel/fork.c:dup_mmap hook */
+-extern void _arch_dup_mmap(struct mm_struct *mm);
+-#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
+ #endif
+
+ #endif
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/mmu_context.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/mmu_context.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/mmu_context.h 2007-10-22 13:58:57.000000000 +0200
+@@ -9,6 +9,9 @@
+ #include <asm/pgtable.h>
+ #include <asm/tlbflush.h>
+
++void arch_exit_mmap(struct mm_struct *mm);
++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
++
+ /*
+ * possibly do the LDT unload here?
+ */
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/msr.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/msr.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/msr.h 2007-10-22 13:58:57.000000000 +0200
+@@ -1,7 +1,10 @@
+ #ifndef X86_64_MSR_H
+ #define X86_64_MSR_H 1
+
++#include <asm/msr-index.h>
++
+ #ifndef __ASSEMBLY__
++#include <linux/errno.h>
+ /*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+@@ -157,12 +160,11 @@ static inline unsigned int cpuid_edx(uns
+ return edx;
+ }
+
+-#define MSR_IA32_UCODE_WRITE 0x79
+-#define MSR_IA32_UCODE_REV 0x8b
+-
+ #ifdef CONFIG_SMP
+ void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
+ void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
++int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
++int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+ #else /* CONFIG_SMP */
+ static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+ {
+@@ -172,269 +174,14 @@ static inline void wrmsr_on_cpu(unsigned
+ {
+ wrmsr(msr_no, l, h);
+ }
+-#endif /* CONFIG_SMP */
+-
+-#endif
+-
+-/* AMD/K8 specific MSRs */
+-#define MSR_EFER 0xc0000080 /* extended feature register */
+-#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
+-#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
+-#define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */
+-#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
+-#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
+-#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
+-#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */
+-/* EFER bits: */
+-#define _EFER_SCE 0 /* SYSCALL/SYSRET */
+-#define _EFER_LME 8 /* Long mode enable */
+-#define _EFER_LMA 10 /* Long mode active (read-only) */
+-#define _EFER_NX 11 /* No execute enable */
+-
+-#define EFER_SCE (1<<_EFER_SCE)
+-#define EFER_LME (1<<_EFER_LME)
+-#define EFER_LMA (1<<_EFER_LMA)
+-#define EFER_NX (1<<_EFER_NX)
+-
+-/* Intel MSRs. Some also available on other CPUs */
+-#define MSR_IA32_TSC 0x10
+-#define MSR_IA32_PLATFORM_ID 0x17
+-
+-#define MSR_IA32_PERFCTR0 0xc1
+-#define MSR_IA32_PERFCTR1 0xc2
+-#define MSR_FSB_FREQ 0xcd
+-
+-#define MSR_MTRRcap 0x0fe
+-#define MSR_IA32_BBL_CR_CTL 0x119
+-
+-#define MSR_IA32_SYSENTER_CS 0x174
+-#define MSR_IA32_SYSENTER_ESP 0x175
+-#define MSR_IA32_SYSENTER_EIP 0x176
+-
+-#define MSR_IA32_MCG_CAP 0x179
+-#define MSR_IA32_MCG_STATUS 0x17a
+-#define MSR_IA32_MCG_CTL 0x17b
+-
+-#define MSR_IA32_EVNTSEL0 0x186
+-#define MSR_IA32_EVNTSEL1 0x187
+-
+-#define MSR_IA32_DEBUGCTLMSR 0x1d9
+-#define MSR_IA32_LASTBRANCHFROMIP 0x1db
+-#define MSR_IA32_LASTBRANCHTOIP 0x1dc
+-#define MSR_IA32_LASTINTFROMIP 0x1dd
+-#define MSR_IA32_LASTINTTOIP 0x1de
+-
+-#define MSR_IA32_PEBS_ENABLE 0x3f1
+-#define MSR_IA32_DS_AREA 0x600
+-#define MSR_IA32_PERF_CAPABILITIES 0x345
+-
+-#define MSR_MTRRfix64K_00000 0x250
+-#define MSR_MTRRfix16K_80000 0x258
+-#define MSR_MTRRfix16K_A0000 0x259
+-#define MSR_MTRRfix4K_C0000 0x268
+-#define MSR_MTRRfix4K_C8000 0x269
+-#define MSR_MTRRfix4K_D0000 0x26a
+-#define MSR_MTRRfix4K_D8000 0x26b
+-#define MSR_MTRRfix4K_E0000 0x26c
+-#define MSR_MTRRfix4K_E8000 0x26d
+-#define MSR_MTRRfix4K_F0000 0x26e
+-#define MSR_MTRRfix4K_F8000 0x26f
+-#define MSR_MTRRdefType 0x2ff
+-
+-#define MSR_IA32_MC0_CTL 0x400
+-#define MSR_IA32_MC0_STATUS 0x401
+-#define MSR_IA32_MC0_ADDR 0x402
+-#define MSR_IA32_MC0_MISC 0x403
+-
+-#define MSR_P6_PERFCTR0 0xc1
+-#define MSR_P6_PERFCTR1 0xc2
+-#define MSR_P6_EVNTSEL0 0x186
+-#define MSR_P6_EVNTSEL1 0x187
+-
+-/* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
+-#define MSR_K7_EVNTSEL0 0xC0010000
+-#define MSR_K7_PERFCTR0 0xC0010004
+-#define MSR_K7_EVNTSEL1 0xC0010001
+-#define MSR_K7_PERFCTR1 0xC0010005
+-#define MSR_K7_EVNTSEL2 0xC0010002
+-#define MSR_K7_PERFCTR2 0xC0010006
+-#define MSR_K7_EVNTSEL3 0xC0010003
+-#define MSR_K7_PERFCTR3 0xC0010007
+-#define MSR_K8_TOP_MEM1 0xC001001A
+-#define MSR_K8_TOP_MEM2 0xC001001D
+-#define MSR_K8_SYSCFG 0xC0010010
+-#define MSR_K8_HWCR 0xC0010015
+-
+-/* K6 MSRs */
+-#define MSR_K6_EFER 0xC0000080
+-#define MSR_K6_STAR 0xC0000081
+-#define MSR_K6_WHCR 0xC0000082
+-#define MSR_K6_UWCCR 0xC0000085
+-#define MSR_K6_PSOR 0xC0000087
+-#define MSR_K6_PFIR 0xC0000088
+-
+-/* Centaur-Hauls/IDT defined MSRs. */
+-#define MSR_IDT_FCR1 0x107
+-#define MSR_IDT_FCR2 0x108
+-#define MSR_IDT_FCR3 0x109
+-#define MSR_IDT_FCR4 0x10a
+-
+-#define MSR_IDT_MCR0 0x110
+-#define MSR_IDT_MCR1 0x111
+-#define MSR_IDT_MCR2 0x112
+-#define MSR_IDT_MCR3 0x113
+-#define MSR_IDT_MCR4 0x114
+-#define MSR_IDT_MCR5 0x115
+-#define MSR_IDT_MCR6 0x116
+-#define MSR_IDT_MCR7 0x117
+-#define MSR_IDT_MCR_CTRL 0x120
+-
+-/* VIA Cyrix defined MSRs*/
+-#define MSR_VIA_FCR 0x1107
+-#define MSR_VIA_LONGHAUL 0x110a
+-#define MSR_VIA_RNG 0x110b
+-#define MSR_VIA_BCR2 0x1147
+-
+-/* Intel defined MSRs. */
+-#define MSR_IA32_P5_MC_ADDR 0
+-#define MSR_IA32_P5_MC_TYPE 1
+-#define MSR_IA32_PLATFORM_ID 0x17
+-#define MSR_IA32_EBL_CR_POWERON 0x2a
+-
+-#define MSR_IA32_APICBASE 0x1b
+-#define MSR_IA32_APICBASE_BSP (1<<8)
+-#define MSR_IA32_APICBASE_ENABLE (1<<11)
+-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+-
+-/* P4/Xeon+ specific */
+-#define MSR_IA32_MCG_EAX 0x180
+-#define MSR_IA32_MCG_EBX 0x181
+-#define MSR_IA32_MCG_ECX 0x182
+-#define MSR_IA32_MCG_EDX 0x183
+-#define MSR_IA32_MCG_ESI 0x184
+-#define MSR_IA32_MCG_EDI 0x185
+-#define MSR_IA32_MCG_EBP 0x186
+-#define MSR_IA32_MCG_ESP 0x187
+-#define MSR_IA32_MCG_EFLAGS 0x188
+-#define MSR_IA32_MCG_EIP 0x189
+-#define MSR_IA32_MCG_RESERVED 0x18A
+-
+-#define MSR_P6_EVNTSEL0 0x186
+-#define MSR_P6_EVNTSEL1 0x187
+-
+-#define MSR_IA32_PERF_STATUS 0x198
+-#define MSR_IA32_PERF_CTL 0x199
+-
+-#define MSR_IA32_MPERF 0xE7
+-#define MSR_IA32_APERF 0xE8
+-
+-#define MSR_IA32_THERM_CONTROL 0x19a
+-#define MSR_IA32_THERM_INTERRUPT 0x19b
+-#define MSR_IA32_THERM_STATUS 0x19c
+-#define MSR_IA32_MISC_ENABLE 0x1a0
+-
+-#define MSR_IA32_DEBUGCTLMSR 0x1d9
+-#define MSR_IA32_LASTBRANCHFROMIP 0x1db
+-#define MSR_IA32_LASTBRANCHTOIP 0x1dc
+-#define MSR_IA32_LASTINTFROMIP 0x1dd
+-#define MSR_IA32_LASTINTTOIP 0x1de
+-
+-#define MSR_IA32_MC0_CTL 0x400
+-#define MSR_IA32_MC0_STATUS 0x401
+-#define MSR_IA32_MC0_ADDR 0x402
+-#define MSR_IA32_MC0_MISC 0x403
+-
+-/* Pentium IV performance counter MSRs */
+-#define MSR_P4_BPU_PERFCTR0 0x300
+-#define MSR_P4_BPU_PERFCTR1 0x301
+-#define MSR_P4_BPU_PERFCTR2 0x302
+-#define MSR_P4_BPU_PERFCTR3 0x303
+-#define MSR_P4_MS_PERFCTR0 0x304
+-#define MSR_P4_MS_PERFCTR1 0x305
+-#define MSR_P4_MS_PERFCTR2 0x306
+-#define MSR_P4_MS_PERFCTR3 0x307
+-#define MSR_P4_FLAME_PERFCTR0 0x308
+-#define MSR_P4_FLAME_PERFCTR1 0x309
+-#define MSR_P4_FLAME_PERFCTR2 0x30a
+-#define MSR_P4_FLAME_PERFCTR3 0x30b
+-#define MSR_P4_IQ_PERFCTR0 0x30c
+-#define MSR_P4_IQ_PERFCTR1 0x30d
+-#define MSR_P4_IQ_PERFCTR2 0x30e
+-#define MSR_P4_IQ_PERFCTR3 0x30f
+-#define MSR_P4_IQ_PERFCTR4 0x310
+-#define MSR_P4_IQ_PERFCTR5 0x311
+-#define MSR_P4_BPU_CCCR0 0x360
+-#define MSR_P4_BPU_CCCR1 0x361
+-#define MSR_P4_BPU_CCCR2 0x362
+-#define MSR_P4_BPU_CCCR3 0x363
+-#define MSR_P4_MS_CCCR0 0x364
+-#define MSR_P4_MS_CCCR1 0x365
+-#define MSR_P4_MS_CCCR2 0x366
+-#define MSR_P4_MS_CCCR3 0x367
+-#define MSR_P4_FLAME_CCCR0 0x368
+-#define MSR_P4_FLAME_CCCR1 0x369
+-#define MSR_P4_FLAME_CCCR2 0x36a
+-#define MSR_P4_FLAME_CCCR3 0x36b
+-#define MSR_P4_IQ_CCCR0 0x36c
+-#define MSR_P4_IQ_CCCR1 0x36d
+-#define MSR_P4_IQ_CCCR2 0x36e
+-#define MSR_P4_IQ_CCCR3 0x36f
+-#define MSR_P4_IQ_CCCR4 0x370
+-#define MSR_P4_IQ_CCCR5 0x371
+-#define MSR_P4_ALF_ESCR0 0x3ca
+-#define MSR_P4_ALF_ESCR1 0x3cb
+-#define MSR_P4_BPU_ESCR0 0x3b2
+-#define MSR_P4_BPU_ESCR1 0x3b3
+-#define MSR_P4_BSU_ESCR0 0x3a0
+-#define MSR_P4_BSU_ESCR1 0x3a1
+-#define MSR_P4_CRU_ESCR0 0x3b8
+-#define MSR_P4_CRU_ESCR1 0x3b9
+-#define MSR_P4_CRU_ESCR2 0x3cc
+-#define MSR_P4_CRU_ESCR3 0x3cd
+-#define MSR_P4_CRU_ESCR4 0x3e0
+-#define MSR_P4_CRU_ESCR5 0x3e1
+-#define MSR_P4_DAC_ESCR0 0x3a8
+-#define MSR_P4_DAC_ESCR1 0x3a9
+-#define MSR_P4_FIRM_ESCR0 0x3a4
+-#define MSR_P4_FIRM_ESCR1 0x3a5
+-#define MSR_P4_FLAME_ESCR0 0x3a6
+-#define MSR_P4_FLAME_ESCR1 0x3a7
+-#define MSR_P4_FSB_ESCR0 0x3a2
+-#define MSR_P4_FSB_ESCR1 0x3a3
+-#define MSR_P4_IQ_ESCR0 0x3ba
+-#define MSR_P4_IQ_ESCR1 0x3bb
+-#define MSR_P4_IS_ESCR0 0x3b4
+-#define MSR_P4_IS_ESCR1 0x3b5
+-#define MSR_P4_ITLB_ESCR0 0x3b6
+-#define MSR_P4_ITLB_ESCR1 0x3b7
+-#define MSR_P4_IX_ESCR0 0x3c8
+-#define MSR_P4_IX_ESCR1 0x3c9
+-#define MSR_P4_MOB_ESCR0 0x3aa
+-#define MSR_P4_MOB_ESCR1 0x3ab
+-#define MSR_P4_MS_ESCR0 0x3c0
+-#define MSR_P4_MS_ESCR1 0x3c1
+-#define MSR_P4_PMH_ESCR0 0x3ac
+-#define MSR_P4_PMH_ESCR1 0x3ad
+-#define MSR_P4_RAT_ESCR0 0x3bc
+-#define MSR_P4_RAT_ESCR1 0x3bd
+-#define MSR_P4_SAAT_ESCR0 0x3ae
+-#define MSR_P4_SAAT_ESCR1 0x3af
+-#define MSR_P4_SSU_ESCR0 0x3be
+-#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */
+-#define MSR_P4_TBPU_ESCR0 0x3c2
+-#define MSR_P4_TBPU_ESCR1 0x3c3
+-#define MSR_P4_TC_ESCR0 0x3c4
+-#define MSR_P4_TC_ESCR1 0x3c5
+-#define MSR_P4_U2L_ESCR0 0x3b0
+-#define MSR_P4_U2L_ESCR1 0x3b1
+-
+-/* Intel Core-based CPU performance counters */
+-#define MSR_CORE_PERF_FIXED_CTR0 0x309
+-#define MSR_CORE_PERF_FIXED_CTR1 0x30a
+-#define MSR_CORE_PERF_FIXED_CTR2 0x30b
+-#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d
+-#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e
+-#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f
+-#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390
+-
+-#endif
++static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
++{
++ return rdmsr_safe(msr_no, l, h);
++}
++static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
++{
++ return wrmsr_safe(msr_no, l, h);
++}
++#endif /* CONFIG_SMP */
++#endif /* __ASSEMBLY__ */
++#endif /* X86_64_MSR_H */
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/nmi.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/nmi.h 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/nmi.h 2007-10-22 13:58:57.000000000 +0200
+@@ -96,4 +96,13 @@ extern int unknown_nmi_panic;
+ void __trigger_all_cpu_backtrace(void);
+ #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+
++
++void lapic_watchdog_stop(void);
++int lapic_watchdog_init(unsigned nmi_hz);
++int lapic_wd_event(unsigned nmi_hz);
++unsigned lapic_adjust_nmi_hz(unsigned hz);
++int lapic_watchdog_ok(void);
++void disable_lapic_nmi_watchdog(void);
++void enable_lapic_nmi_watchdog(void);
++
+ #endif /* ASM_NMI_H */
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/page.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/page.h 2007-05-31 14:39:08.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/page.h 2007-10-22 13:58:57.000000000 +0200
+@@ -7,6 +7,7 @@
+ #include <linux/types.h>
+ #include <asm/bug.h>
+ #endif
++#include <linux/const.h>
+ #include <xen/interface/xen.h>
+
+ /*
+@@ -18,18 +19,14 @@
+
+ /* PAGE_SHIFT determines the page size */
+ #define PAGE_SHIFT 12
+-#ifdef __ASSEMBLY__
+-#define PAGE_SIZE (0x1 << PAGE_SHIFT)
+-#else
+-#define PAGE_SIZE (1UL << PAGE_SHIFT)
+-#endif
++#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
+ #define PAGE_MASK (~(PAGE_SIZE-1))
+
+ /* See Documentation/x86_64/mm.txt for a description of the memory map. */
+ #define __PHYSICAL_MASK_SHIFT 46
+-#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1)
++#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
+ #define __VIRTUAL_MASK_SHIFT 48
+-#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
++#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
+
+ #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
+
+@@ -54,10 +51,10 @@
+ #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
+
+ #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+-#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
++#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
+
+ #define HPAGE_SHIFT PMD_SHIFT
+-#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
++#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
+ #define HPAGE_MASK (~(HPAGE_SIZE - 1))
+ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+
+@@ -146,17 +143,23 @@ static inline pgd_t __pgd(unsigned long
+
+ #define __pgprot(x) ((pgprot_t) { (x) } )
+
+-#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START)
+-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
+-#define __START_KERNEL_map 0xffffffff80000000UL
+-#define __PAGE_OFFSET 0xffff880000000000UL
++#endif /* !__ASSEMBLY__ */
+
+-#else
+ #define __PHYSICAL_START CONFIG_PHYSICAL_START
++#define __KERNEL_ALIGN 0x200000
++
++/*
++ * Make sure kernel is aligned to 2MB address. Catching it at compile
++ * time is better. Change your config file and compile the kernel
++ * for a 2MB aligned address (CONFIG_PHYSICAL_START)
++ */
++#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
++#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
++#endif
++
+ #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
+-#define __START_KERNEL_map 0xffffffff80000000
+-#define __PAGE_OFFSET 0xffff880000000000
+-#endif /* !__ASSEMBLY__ */
++#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
++#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
+
+ #if CONFIG_XEN_COMPAT <= 0x030002
+ #undef LOAD_OFFSET
+@@ -166,20 +169,20 @@ static inline pgd_t __pgd(unsigned long
+ /* to align the pointer to the (next) page boundary */
+ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+-#define KERNEL_TEXT_SIZE (40UL*1024*1024)
+-#define KERNEL_TEXT_START 0xffffffff80000000UL
++#define KERNEL_TEXT_SIZE (40*1024*1024)
++#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
++
++#define PAGE_OFFSET __PAGE_OFFSET
+
+-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
++#ifndef __ASSEMBLY__
++static inline unsigned long __phys_addr(unsigned long x)
++{
++ return x - (x >= __START_KERNEL_map ? __START_KERNEL_map : PAGE_OFFSET);
++}
++#endif
+
+-/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
+- Otherwise you risk miscompilation. */
+-#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET)
+-/* __pa_symbol should be used for C visible symbols.
+- This seems to be the official gcc blessed way to do such arithmetic. */
+-#define __pa_symbol(x) \
+- ({unsigned long v; \
+- asm("" : "=r" (v) : "0" (x)); \
+- __pa(v); })
++#define __pa(x) __phys_addr((unsigned long)(x))
++#define __pa_symbol(x) __phys_addr((unsigned long)(x))
+
+ #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+ #define __boot_va(x) __va(x)
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgalloc.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/pgalloc.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgalloc.h 2007-10-22 13:58:57.000000000 +0200
+@@ -1,7 +1,6 @@
+ #ifndef _X86_64_PGALLOC_H
+ #define _X86_64_PGALLOC_H
+
+-#include <asm/fixmap.h>
+ #include <asm/pda.h>
+ #include <linux/threads.h>
+ #include <linux/mm.h>
+@@ -100,24 +99,16 @@ static inline void pgd_list_add(pgd_t *p
+ struct page *page = virt_to_page(pgd);
+
+ spin_lock(&pgd_lock);
+- page->index = (pgoff_t)pgd_list;
+- if (pgd_list)
+- pgd_list->private = (unsigned long)&page->index;
+- pgd_list = page;
+- page->private = (unsigned long)&pgd_list;
++ list_add(&page->lru, &pgd_list);
+ spin_unlock(&pgd_lock);
+ }
+
+ static inline void pgd_list_del(pgd_t *pgd)
+ {
+- struct page *next, **pprev, *page = virt_to_page(pgd);
++ struct page *page = virt_to_page(pgd);
+
+ spin_lock(&pgd_lock);
+- next = (struct page *)page->index;
+- pprev = (struct page **)page->private;
+- *pprev = next;
+- if (next)
+- next->private = (unsigned long)pprev;
++ list_del(&page->lru);
+ spin_unlock(&pgd_lock);
+ }
+
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgtable.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-10-22 13:58:57.000000000 +0200
+@@ -1,12 +1,14 @@
+ #ifndef _X86_64_PGTABLE_H
+ #define _X86_64_PGTABLE_H
+
++#include <linux/const.h>
++#ifndef __ASSEMBLY__
++
+ /*
+ * This file contains the functions and defines necessary to modify and use
+ * the x86-64 page table tree.
+ */
+ #include <asm/processor.h>
+-#include <asm/fixmap.h>
+ #include <asm/bitops.h>
+ #include <linux/threads.h>
+ #include <linux/sched.h>
+@@ -35,11 +37,9 @@ extern void xen_init_pt(void);
+ #endif
+
+ extern pud_t level3_kernel_pgt[512];
+-extern pud_t level3_physmem_pgt[512];
+ extern pud_t level3_ident_pgt[512];
+ extern pmd_t level2_kernel_pgt[512];
+ extern pgd_t init_level4_pgt[];
+-extern pgd_t boot_level4_pgt[];
+ extern unsigned long __supported_pte_mask;
+
+ #define swapper_pg_dir init_level4_pgt
+@@ -54,6 +54,8 @@ extern void clear_kernel_mapping(unsigne
+ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+ #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
++#endif /* !__ASSEMBLY__ */
++
+ /*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
+@@ -78,6 +80,8 @@ extern unsigned long empty_zero_page[PAG
+ */
+ #define PTRS_PER_PTE 512
+
++#ifndef __ASSEMBLY__
++
+ #define pte_ERROR(e) \
+ printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e))
+ #define pmd_ERROR(e) \
+@@ -116,22 +120,23 @@ static inline void pgd_clear (pgd_t * pg
+
+ #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
+
+-#define PMD_SIZE (1UL << PMD_SHIFT)
++#endif /* !__ASSEMBLY__ */
++
++#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
+ #define PMD_MASK (~(PMD_SIZE-1))
+-#define PUD_SIZE (1UL << PUD_SHIFT)
++#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
+ #define PUD_MASK (~(PUD_SIZE-1))
+-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
++#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
+ #define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+ #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
+ #define FIRST_USER_ADDRESS 0
+
+-#ifndef __ASSEMBLY__
+-#define MAXMEM 0x3fffffffffffUL
+-#define VMALLOC_START 0xffffc20000000000UL
+-#define VMALLOC_END 0xffffe1ffffffffffUL
+-#define MODULES_VADDR 0xffffffff88000000UL
+-#define MODULES_END 0xfffffffffff00000UL
++#define MAXMEM _AC(0x3fffffffffff, UL)
++#define VMALLOC_START _AC(0xffffc20000000000, UL)
++#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
++#define MODULES_VADDR _AC(0xffffffff88000000, UL)
++#define MODULES_END _AC(0xfffffffffff00000, UL)
+ #define MODULES_LEN (MODULES_END - MODULES_VADDR)
+
+ #define _PAGE_BIT_PRESENT 0
+@@ -157,7 +162,7 @@ static inline void pgd_clear (pgd_t * pg
+ #define _PAGE_GLOBAL 0x100 /* Global TLB entry */
+
+ #define _PAGE_PROTNONE 0x080 /* If not present */
+-#define _PAGE_NX (1UL<<_PAGE_BIT_NX)
++#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX)
+
+ #if CONFIG_XEN_COMPAT <= 0x030002
+ extern unsigned int __kernel_page_user;
+@@ -228,6 +233,8 @@ extern unsigned int __kernel_page_user;
+ #define __S110 PAGE_SHARED_EXEC
+ #define __S111 PAGE_SHARED_EXEC
+
++#ifndef __ASSEMBLY__
++
+ static inline unsigned long pgd_bad(pgd_t pgd)
+ {
+ return pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
+@@ -339,6 +346,20 @@ static inline pte_t pte_mkwrite(pte_t pt
+ static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
+ static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
+
++static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
++{
++ if (!pte_dirty(*ptep))
++ return 0;
++ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte);
++}
++
++static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
++{
++ if (!pte_young(*ptep))
++ return 0;
++ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte);
++}
++
+ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte = *ptep;
+@@ -464,18 +485,12 @@ static inline pte_t pte_modify(pte_t pte
+ * bit at the same time. */
+ #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
+- do { \
+- if (dirty) \
+- ptep_establish(vma, address, ptep, entry); \
+- } while (0)
+-
+-
+-/*
+- * i386 says: We don't actually have these, but we want to advertise
+- * them so that we can encompass the flush here.
+- */
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
++({ \
++ int __changed = !pte_same(*(ptep), entry); \
++ if (__changed && (dirty)) \
++ ptep_establish(vma, address, ptep, entry); \
++ __changed; \
++})
+
+ #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
+ #define ptep_clear_flush_dirty(vma, address, ptep) \
+@@ -484,7 +499,7 @@ static inline pte_t pte_modify(pte_t pte
+ int __dirty = pte_dirty(__pte); \
+ __pte = pte_mkclean(__pte); \
+ if ((vma)->vm_mm->context.pinned) \
+- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
++ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
+ else if (__dirty) \
+ set_pte(ptep, __pte); \
+ __dirty; \
+@@ -497,7 +512,7 @@ static inline pte_t pte_modify(pte_t pte
+ int __young = pte_young(__pte); \
+ __pte = pte_mkold(__pte); \
+ if ((vma)->vm_mm->context.pinned) \
+- ptep_set_access_flags(vma, address, ptep, __pte, __young); \
++ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
+ else if (__young) \
+ set_pte(ptep, __pte); \
+ __young; \
+@@ -511,10 +526,7 @@ static inline pte_t pte_modify(pte_t pte
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+ extern spinlock_t pgd_lock;
+-extern struct page *pgd_list;
+-void vmalloc_sync_all(void);
+-
+-#endif /* !__ASSEMBLY__ */
++extern struct list_head pgd_list;
+
+ extern int kern_addr_valid(unsigned long addr);
+
+@@ -546,10 +558,6 @@ int touch_pte_range(struct mm_struct *mm
+ #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
+
+-#define MK_IOSPACE_PFN(space, pfn) (pfn)
+-#define GET_IOSPACE(pfn) 0
+-#define GET_PFN(pfn) (pfn)
+-
+ #define HAVE_ARCH_UNMAPPED_AREA
+
+ #define pgtable_cache_init() do { } while (0)
+@@ -563,11 +571,14 @@ int touch_pte_range(struct mm_struct *mm
+ #define kc_offset_to_vaddr(o) \
+ (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
+
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+ #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+ #define __HAVE_ARCH_PTEP_SET_WRPROTECT
+ #define __HAVE_ARCH_PTE_SAME
+ #include <asm-generic/pgtable.h>
++#endif /* !__ASSEMBLY__ */
+
+ #endif /* _X86_64_PGTABLE_H */
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/processor.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/processor.h 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/processor.h 2007-10-22 13:58:57.000000000 +0200
+@@ -20,6 +20,7 @@
+ #include <asm/percpu.h>
+ #include <linux/personality.h>
+ #include <linux/cpumask.h>
++#include <asm/processor-flags.h>
+
+ #define TF_MASK 0x00000100
+ #define IF_MASK 0x00000200
+@@ -103,42 +104,6 @@ extern unsigned int init_intel_cacheinfo
+ extern unsigned short num_cache_leaves;
+
+ /*
+- * EFLAGS bits
+- */
+-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
+-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+-
+-/*
+- * Intel CPU features in CR4
+- */
+-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+-#define X86_CR4_MCE 0x0040 /* Machine check enable */
+-#define X86_CR4_PGE 0x0080 /* enable global pages */
+-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
+-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
+-
+-/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+@@ -209,7 +174,7 @@ struct i387_fxsave_struct {
+ u32 mxcsr;
+ u32 mxcsr_mask;
+ u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */
++ u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
+ u32 padding[24];
+ } __attribute__ ((aligned (16)));
+
+@@ -440,22 +405,6 @@ static inline void prefetchw(void *x)
+ #define cpu_relax() rep_nop()
+
+ /*
+- * NSC/Cyrix CPU configuration register indexes
+- */
+-#define CX86_CCR0 0xc0
+-#define CX86_CCR1 0xc1
+-#define CX86_CCR2 0xc2
+-#define CX86_CCR3 0xc3
+-#define CX86_CCR4 0xe8
+-#define CX86_CCR5 0xe9
+-#define CX86_CCR6 0xea
+-#define CX86_CCR7 0xeb
+-#define CX86_DIR0 0xfe
+-#define CX86_DIR1 0xff
+-#define CX86_ARR_BASE 0xc4
+-#define CX86_RCR_BASE 0xdc
+-
+-/*
+ * NSC/Cyrix CPU indexed register access macros
+ */
+
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/smp.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/smp.h 2007-10-22 13:58:46.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/smp.h 2007-10-22 13:58:57.000000000 +0200
+@@ -11,12 +11,11 @@
+ extern int disable_apic;
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+-#include <asm/fixmap.h>
+ #include <asm/mpspec.h>
++#include <asm/apic.h>
+ #ifdef CONFIG_X86_IO_APIC
+ #include <asm/io_apic.h>
+ #endif
+-#include <asm/apic.h>
+ #include <asm/thread_info.h>
+ #endif
+
+@@ -41,7 +40,6 @@ extern void lock_ipi_call_lock(void);
+ extern void unlock_ipi_call_lock(void);
+ extern int smp_num_siblings;
+ extern void smp_send_reschedule(int cpu);
+-void smp_stop_cpu(void);
+
+ extern cpumask_t cpu_sibling_map[NR_CPUS];
+ extern cpumask_t cpu_core_map[NR_CPUS];
+@@ -62,14 +60,6 @@ static inline int num_booting_cpus(void)
+
+ #define raw_smp_processor_id() read_pda(cpunumber)
+
+-#ifdef CONFIG_X86_LOCAL_APIC
+-static inline int hard_smp_processor_id(void)
+-{
+- /* we don't want to mark this access volatile - bad code generation */
+- return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
+-}
+-#endif
+-
+ extern int __cpu_disable(void);
+ extern void __cpu_die(unsigned int cpu);
+ extern void prefill_possible_map(void);
+@@ -78,6 +68,14 @@ extern unsigned __cpuinitdata disabled_c
+
+ #define NO_PROC_ID 0xFF /* No processor magic marker */
+
++#endif /* CONFIG_SMP */
++
++#ifdef CONFIG_X86_LOCAL_APIC
++static inline int hard_smp_processor_id(void)
++{
++ /* we don't want to mark this access volatile - bad code generation */
++ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
++}
+ #endif
+
+ /*
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/system.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/system.h 2007-10-22 13:53:08.000000000 +0200
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/system.h 2007-10-22 13:58:57.000000000 +0200
+@@ -3,7 +3,7 @@
+
+ #include <linux/kernel.h>
+ #include <asm/segment.h>
+-#include <asm/alternative.h>
++#include <asm/cmpxchg.h>
+
+ #include <asm/synch_bitops.h>
+ #include <asm/hypervisor.h>
+@@ -49,7 +49,7 @@
+ [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
+ [ti_flags] "i" (offsetof(struct thread_info, flags)),\
+ [tif_fork] "i" (TIF_FORK), \
+- [thread_info] "i" (offsetof(struct task_struct, thread_info)), \
++ [thread_info] "i" (offsetof(struct task_struct, stack)), \
+ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
+ : "memory", "cc" __EXTRA_CLOBBER)
+
+@@ -98,6 +98,12 @@ static inline void write_cr0(unsigned lo
+ machine_to_phys(__dummy); \
+ })
+
++static inline void write_cr3(unsigned long val)
++{
++ val = phys_to_machine(val);
++ asm volatile("movq %0,%%cr3" :: "r" (val) : "memory");
++}
++
+ static inline unsigned long read_cr4(void)
+ {
+ unsigned long cr4;
+@@ -107,7 +113,7 @@ static inline unsigned long read_cr4(voi
+
+ static inline void write_cr4(unsigned long val)
+ {
+- asm volatile("movq %0,%%cr4" :: "r" (val));
++ asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
+ }
+
+ #define stts() (HYPERVISOR_fpu_taskswitch(1))
+@@ -128,100 +134,6 @@ static inline void sched_cacheflush(void
+
+ #define nop() __asm__ __volatile__ ("nop")
+
+-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+-
+-#define tas(ptr) (xchg((ptr),1))
+-
+-#define __xg(x) ((volatile long *)(x))
+-
+-static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
+-{
+- *ptr = val;
+-}
+-
+-#define _set_64bit set_64bit
+-
+-/*
+- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+- * Note 2: xchg has side effect, so that attribute volatile is necessary,
+- * but generally the primitive is invalid, *ptr is output argument. --ANK
+- */
+-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+-{
+- switch (size) {
+- case 1:
+- __asm__ __volatile__("xchgb %b0,%1"
+- :"=q" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- case 2:
+- __asm__ __volatile__("xchgw %w0,%1"
+- :"=r" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- case 4:
+- __asm__ __volatile__("xchgl %k0,%1"
+- :"=r" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- case 8:
+- __asm__ __volatile__("xchgq %0,%1"
+- :"=r" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- }
+- return x;
+-}
+-
+-/*
+- * Atomic compare and exchange. Compare OLD with MEM, if identical,
+- * store NEW in MEM. Return the initial value in MEM. Success is
+- * indicated by comparing RETURN with OLD.
+- */
+-
+-#define __HAVE_ARCH_CMPXCHG 1
+-
+-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+- unsigned long new, int size)
+-{
+- unsigned long prev;
+- switch (size) {
+- case 1:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 2:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 4:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 8:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- }
+- return old;
+-}
+-
+-#define cmpxchg(ptr,o,n)\
+- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+- (unsigned long)(n),sizeof(*(ptr))))
+-
+ #ifdef CONFIG_SMP
+ #define smp_mb() mb()
+ #define smp_rmb() rmb()
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/tlbflush.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/tlbflush.h 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/tlbflush.h 2007-10-22 13:58:57.000000000 +0200
+@@ -2,7 +2,9 @@
+ #define _X8664_TLBFLUSH_H
+
+ #include <linux/mm.h>
++#include <linux/sched.h>
+ #include <asm/processor.h>
++#include <asm/system.h>
+
+ #define __flush_tlb() xen_tlb_flush()
+
+Index: 10.3-2007-11-26/mm/highmem.c
+===================================================================
+--- 10.3-2007-11-26.orig/mm/highmem.c 2007-12-06 17:27:30.000000000 +0100
++++ 10.3-2007-11-26/mm/highmem.c 2007-10-22 13:58:57.000000000 +0200
+@@ -158,17 +158,6 @@ start:
+ return vaddr;
+ }
+
+-#ifdef CONFIG_XEN
+-void kmap_flush_unused(void)
+-{
+- spin_lock(&kmap_lock);
+- flush_all_zero_pkmaps();
+- spin_unlock(&kmap_lock);
+-}
+-
+-EXPORT_SYMBOL(kmap_flush_unused);
+-#endif
+-
+ void fastcall *kmap_high(struct page *page)
+ {
+ unsigned long vaddr;
+Index: 10.3-2007-11-26/net/core/dev.c
+===================================================================
+--- 10.3-2007-11-26.orig/net/core/dev.c 2007-10-22 13:53:25.000000000 +0200
++++ 10.3-2007-11-26/net/core/dev.c 2007-10-22 13:58:57.000000000 +0200
+@@ -1466,12 +1466,16 @@ out_kfree_skb:
+ inline int skb_checksum_setup(struct sk_buff *skb)
+ {
+ if (skb->proto_csum_blank) {
++ struct iphdr *iph;
++
+ if (skb->protocol != htons(ETH_P_IP))
+ goto out;
+- skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+- if (skb->h.raw >= skb->tail)
++ iph = ip_hdr(skb);
++ skb->transport_header = skb->network_header + 4 * iph->ihl;
++ if (skb->transport_header >= skb->tail)
+ goto out;
+- switch (skb->nh.iph->protocol) {
++ skb->csum_start = skb_transport_header(skb) - skb->head;
++ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ skb->csum_offset = offsetof(struct tcphdr, check);
+ break;
+@@ -1482,10 +1486,10 @@ inline int skb_checksum_setup(struct sk_
+ if (net_ratelimit())
+ printk(KERN_ERR "Attempting to checksum a non-"
+ "TCP/UDP packet, dropping a protocol"
+- " %d packet", skb->nh.iph->protocol);
++ " %d packet", iph->protocol);
+ goto out;
+ }
+- if ((skb->h.raw + skb->csum_offset + 2) > skb->tail)
++ if ((skb->transport_header + skb->csum_offset + 2) > skb->tail)
+ goto out;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->proto_csum_blank = 0;