1 files changed, 28404 insertions, 0 deletions
diff --git a/trunk/2.6.22/20012_xen3-auto-xen-drivers.patch1 b/trunk/2.6.22/20012_xen3-auto-xen-drivers.patch1
new file mode 100644
index 0000000..5b134b5
--- /dev/null
+++ b/trunk/2.6.22/20012_xen3-auto-xen-drivers.patch1
@@ -0,0 +1,28404 @@
+Subject: xen3 xen-drivers
+From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042)
+Patch-mainline: obsolete
+Acked-by: jbeulich@novell.com
+
+---
+ drivers/Makefile                                |    1 
+ drivers/xen/Makefile                            |   20 
+ drivers/xen/balloon/Makefile                    |    2 
+ drivers/xen/balloon/balloon.c                   |  663 +++++++
+ drivers/xen/balloon/common.h                    |   58 
+ drivers/xen/balloon/sysfs.c                     |  170 +
+ drivers/xen/blkback/Makefile                    |    3 
+ drivers/xen/blkback/blkback.c                   |  614 ++++++
+ drivers/xen/blkback/common.h                    |  139 +
+ drivers/xen/blkback/interface.c                 |  181 ++
+ drivers/xen/blkback/vbd.c                       |  118 +
+ drivers/xen/blkback/xenbus.c                    |  533 +++++
+ drivers/xen/blkfront/Makefile                   |    5 
+ drivers/xen/blkfront/blkfront.c                 |  902 ++++++++++
+ drivers/xen/blkfront/block.h                    |  142 +
+ drivers/xen/blkfront/vbd.c                      |  372 ++++
+ drivers/xen/blktap/Makefile                     |    5 
+ drivers/xen/blktap/blktap.c                     | 1528 +++++++++++++++++
+ drivers/xen/blktap/common.h                     |  121 +
+ drivers/xen/blktap/interface.c                  |  174 +
+ drivers/xen/blktap/xenbus.c                     |  473 +++++
+ drivers/xen/char/Makefile                       |    2 
+ drivers/xen/char/mem.c                          |  203 ++
+ drivers/xen/console/Makefile                    |    2 
+ drivers/xen/console/console.c                   |  721 ++++++++
+ drivers/xen/console/xencons_ring.c              |  143 +
+ drivers/xen/core/Makefile                       |   12 
+ drivers/xen/core/cpu_hotplug.c                  |  172 +
+ drivers/xen/core/evtchn.c                       | 1015 +++++++++++
+ drivers/xen/core/features.c                     |   34 
+ drivers/xen/core/gnttab.c                       |  631 +++++++
+ drivers/xen/core/hypervisor_sysfs.c             |   59 
+ drivers/xen/core/machine_kexec.c                |  189 ++
+ drivers/xen/core/machine_reboot.c               |  241 ++
+ drivers/xen/core/reboot.c                       |  249 ++
+ drivers/xen/core/smpboot.c                      |  452 +++++
+ drivers/xen/core/xen_proc.c                     |   23 
+ drivers/xen/core/xen_sysfs.c                    |  378 ++++
+ drivers/xen/evtchn/Makefile                     |    2 
+ drivers/xen/evtchn/evtchn.c                     |  469 +++++
+ drivers/xen/fbfront/Makefile                    |    2 
+ drivers/xen/fbfront/xenfb.c                     |  752 ++++++++
+ drivers/xen/fbfront/xenkbd.c                    |  333 +++
+ drivers/xen/gntdev/Makefile                     |    1 
+ drivers/xen/gntdev/gntdev.c                     |  973 ++++++++++
+ drivers/xen/netback/Makefile                    |    5 
+ drivers/xen/netback/common.h                    |  157 +
+ drivers/xen/netback/interface.c                 |  336 +++
+ drivers/xen/netback/loopback.c                  |  320 +++
+ drivers/xen/netback/netback.c                   | 1496 ++++++++++++++++
+ drivers/xen/netback/xenbus.c                    |  448 +++++
+ drivers/xen/netfront/Makefile                   |    4 
+ drivers/xen/netfront/netfront.c                 | 2133 ++++++++++++++++++++++++
+ drivers/xen/pciback/Makefile                    |   15 
+ drivers/xen/pciback/conf_space.c                |  426 ++++
+ drivers/xen/pciback/conf_space.h                |  126 +
+ drivers/xen/pciback/conf_space_capability.c     |   71 
+ drivers/xen/pciback/conf_space_capability.h     |   23 
+ drivers/xen/pciback/conf_space_capability_pm.c  |  128 +
+ drivers/xen/pciback/conf_space_capability_vpd.c |   42 
+ drivers/xen/pciback/conf_space_header.c         |  309 +++
+ drivers/xen/pciback/conf_space_quirks.c         |  126 +
+ drivers/xen/pciback/conf_space_quirks.h         |   35 
+ drivers/xen/pciback/passthrough.c               |  157 +
+ drivers/xen/pciback/pci_stub.c                  |  929 ++++++++++
+ drivers/xen/pciback/pciback.h                   |   93 +
+ drivers/xen/pciback/pciback_ops.c               |   95 +
+ drivers/xen/pciback/slot.c                      |  151 +
+ drivers/xen/pciback/vpci.c                      |  204 ++
+ drivers/xen/pciback/xenbus.c                    |  454 +++++
+ drivers/xen/pcifront/Makefile                   |    7 
+ drivers/xen/pcifront/pci.c                      |   46 
+ drivers/xen/pcifront/pci_op.c                   |  268 +++
+ drivers/xen/pcifront/pcifront.h                 |   40 
+ drivers/xen/pcifront/xenbus.c                   |  295 +++
+ drivers/xen/privcmd/Makefile                    |    2 
+ drivers/xen/privcmd/privcmd.c                   |  284 +++
+ drivers/xen/tpmback/Makefile                    |    4 
+ drivers/xen/tpmback/common.h                    |   85 
+ drivers/xen/tpmback/interface.c                 |  167 +
+ drivers/xen/tpmback/tpmback.c                   |  944 ++++++++++
+ drivers/xen/tpmback/xenbus.c                    |  289 +++
+ drivers/xen/util.c                              |   70 
+ drivers/xen/xenbus/Makefile                     |    9 
+ drivers/xen/xenbus/xenbus_backend_client.c      |  147 +
+ drivers/xen/xenbus/xenbus_client.c              |  283 +++
+ drivers/xen/xenbus/xenbus_comms.c               |  232 ++
+ drivers/xen/xenbus/xenbus_comms.h               |   46 
+ drivers/xen/xenbus/xenbus_dev.c                 |  404 ++++
+ drivers/xen/xenbus/xenbus_probe.c               | 1086 ++++++++++++
+ drivers/xen/xenbus/xenbus_probe.h               |   75 
+ drivers/xen/xenbus/xenbus_probe_backend.c       |  286 +++
+ drivers/xen/xenbus/xenbus_xs.c                  |  880 +++++++++
+ drivers/xen/xenoprof/xenoprofile.c              |  500 +++++
+ 94 files changed, 28014 insertions(+)
+
+--- a/drivers/Makefile	2007-08-27 14:01:24.000000000 -0400
++++ b/drivers/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -31,6 +31,7 @@ obj-y				+= base/ block/ misc/ mfd/ net/
+ obj-$(CONFIG_NUBUS)		+= nubus/
+ obj-$(CONFIG_ATM)		+= atm/
+ obj-y				+= macintosh/
++obj-$(CONFIG_XEN)		+= xen/
+ obj-$(CONFIG_IDE)		+= ide/
+ obj-$(CONFIG_FC4)		+= fc4/
+ obj-$(CONFIG_SCSI)		+= scsi/
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,20 @@
++obj-y	+= core/
++obj-y	+= console/
++obj-y	+= evtchn/
++obj-y	+= privcmd/
++obj-y	+= xenbus/
++obj-y	+= gntdev/
++obj-y	+= balloon/
++obj-y	+= char/
++
++obj-y	+= util.o
++obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
++obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
++obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
++obj-$(CONFIG_XEN_TPMDEV_BACKEND)	+= tpmback/
++obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= blkfront/
++obj-$(CONFIG_XEN_NETDEV_FRONTEND)	+= netfront/
++obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
++obj-$(CONFIG_XEN_PCIDEV_FRONTEND)	+= pcifront/
++obj-$(CONFIG_XEN_FRAMEBUFFER)		+= fbfront/
++obj-$(CONFIG_XEN_KEYBOARD)		+= fbfront/
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/balloon/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,2 @@
++
++obj-y := balloon.o sysfs.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/balloon/balloon.c	2007-08-27 14:02:09.000000000 -0400
+@@ -0,0 +1,663 @@
++/******************************************************************************
++ * balloon.c
++ *
++ * Xen balloon driver - enables returning/claiming memory to/from Xen.
++ *
++ * Copyright (c) 2003, B Dragovic
++ * Copyright (c) 2003-2004, M Williamson, K Fraser
++ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/smp_lock.h>
++#include <linux/pagemap.h>
++#include <linux/bootmem.h>
++#include <linux/highmem.h>
++#include <linux/vmalloc.h>
++#include <linux/mutex.h>
++#include <xen/xen_proc.h>
++#include <asm/hypervisor.h>
++#include <xen/balloon.h>
++#include <xen/interface/memory.h>
++#include <asm/maddr.h>
++#include <asm/page.h>
++#include <asm/pgalloc.h>
++#include <asm/pgtable.h>
++#include <asm/uaccess.h>
++#include <asm/tlb.h>
++#include <linux/highmem.h>
++#include <linux/list.h>
++#include <xen/xenbus.h>
++#include "common.h"
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++#ifdef CONFIG_PROC_FS
++static struct proc_dir_entry *balloon_pde;
++#endif
++
++static DEFINE_MUTEX(balloon_mutex);
++
++/*
++ * Protects atomic reservation decrease/increase against concurrent increases.
++ * Also protects non-atomic updates of current_pages and driver_pages, and
++ * balloon lists.
++ */
++DEFINE_SPINLOCK(balloon_lock);
++
++struct balloon_stats balloon_stats;
++
++/* We increase/decrease in batches which fit in a page */
++static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
++
++/* VM /proc information for memory */
++extern unsigned long totalram_pages;
++
++/* List of ballooned pages, threaded through the mem_map array. */
++static LIST_HEAD(ballooned_pages);
++
++/* Main work function, always executed in process context. */
++static void balloon_process(void *unused);
++static DECLARE_WORK(balloon_worker, balloon_process, NULL);
++static struct timer_list balloon_timer;
++
++/* When ballooning out (allocating memory to return to Xen) we don't really 
++   want the kernel to try too hard since that can trigger the oom killer. */
++#define GFP_BALLOON \
++	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
++
++#define PAGE_TO_LIST(p) (&(p)->lru)
++#define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
++#define UNLIST_PAGE(p)				\
++	do {					\
++		list_del(PAGE_TO_LIST(p));	\
++		PAGE_TO_LIST(p)->next = NULL;	\
++		PAGE_TO_LIST(p)->prev = NULL;	\
++	} while(0)
++
++#define IPRINTK(fmt, args...) \
++	printk(KERN_INFO "xen_mem: " fmt, ##args)
++#define WPRINTK(fmt, args...) \
++	printk(KERN_WARNING "xen_mem: " fmt, ##args)
++
++/* balloon_append: add the given page to the balloon. */
++static void balloon_append(struct page *page)
++{
++	/* Lowmem is re-populated first, so highmem pages go at list tail. */
++	if (PageHighMem(page)) {
++		list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
++		bs.balloon_high++;
++	} else {
++		list_add(PAGE_TO_LIST(page), &ballooned_pages);
++		bs.balloon_low++;
++	}
++}
++
++/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
++static struct page *balloon_retrieve(void)
++{
++	struct page *page;
++
++	if (list_empty(&ballooned_pages))
++		return NULL;
++
++	page = LIST_TO_PAGE(ballooned_pages.next);
++	UNLIST_PAGE(page);
++
++	if (PageHighMem(page))
++		bs.balloon_high--;
++	else
++		bs.balloon_low--;
++
++	return page;
++}
++
++static struct page *balloon_first_page(void)
++{
++	if (list_empty(&ballooned_pages))
++		return NULL;
++	return LIST_TO_PAGE(ballooned_pages.next);
++}
++
++static struct page *balloon_next_page(struct page *page)
++{
++	struct list_head *next = PAGE_TO_LIST(page)->next;
++	if (next == &ballooned_pages)
++		return NULL;
++	return LIST_TO_PAGE(next);
++}
++
++static void balloon_alarm(unsigned long unused)
++{
++	schedule_work(&balloon_worker);
++}
++
++static unsigned long current_target(void)
++{
++	unsigned long target = min(bs.target_pages, bs.hard_limit);
++	if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
++		target = bs.current_pages + bs.balloon_low + bs.balloon_high;
++	return target;
++}
++
++static int increase_reservation(unsigned long nr_pages)
++{
++	unsigned long  pfn, i, flags;
++	struct page   *page;
++	long           rc;
++	struct xen_memory_reservation reservation = {
++		.address_bits = 0,
++		.extent_order = 0,
++		.domid        = DOMID_SELF
++	};
++
++	if (nr_pages > ARRAY_SIZE(frame_list))
++		nr_pages = ARRAY_SIZE(frame_list);
++
++	balloon_lock(flags);
++
++	page = balloon_first_page();
++	for (i = 0; i < nr_pages; i++) {
++		BUG_ON(page == NULL);
++		frame_list[i] = page_to_pfn(page);;
++		page = balloon_next_page(page);
++	}
++
++	set_xen_guest_handle(reservation.extent_start, frame_list);
++	reservation.nr_extents   = nr_pages;
++	rc = HYPERVISOR_memory_op(
++		XENMEM_populate_physmap, &reservation);
++	if (rc < nr_pages) {
++		if (rc > 0) {
++			int ret;
++
++			/* We hit the Xen hard limit: reprobe. */
++			reservation.nr_extents = rc;
++			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
++					&reservation);
++			BUG_ON(ret != rc);
++		}
++		if (rc >= 0)
++			bs.hard_limit = (bs.current_pages + rc -
++					 bs.driver_pages);
++		goto out;
++	}
++
++	for (i = 0; i < nr_pages; i++) {
++		page = balloon_retrieve();
++		BUG_ON(page == NULL);
++
++		pfn = page_to_pfn(page);
++		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
++		       phys_to_machine_mapping_valid(pfn));
++
++		set_phys_to_machine(pfn, frame_list[i]);
++
++#ifdef CONFIG_XEN
++		/* Link back into the page tables if not highmem. */
++		if (pfn < max_low_pfn) {
++			int ret;
++			ret = HYPERVISOR_update_va_mapping(
++				(unsigned long)__va(pfn << PAGE_SHIFT),
++				pfn_pte_ma(frame_list[i], PAGE_KERNEL),
++				0);
++			BUG_ON(ret);
++		}
++#endif
++
++		/* Relinquish the page back to the allocator. */
++		ClearPageReserved(page);
++		init_page_count(page);
++		__free_page(page);
++	}
++
++	bs.current_pages += nr_pages;
++	totalram_pages = bs.current_pages;
++
++ out:
++	balloon_unlock(flags);
++
++	return 0;
++}
++
++static int decrease_reservation(unsigned long nr_pages)
++{
++	unsigned long  pfn, i, flags;
++	struct page   *page;
++	void          *v;
++	int            need_sleep = 0;
++	int ret;
++	struct xen_memory_reservation reservation = {
++		.address_bits = 0,
++		.extent_order = 0,
++		.domid        = DOMID_SELF
++	};
++
++	if (nr_pages > ARRAY_SIZE(frame_list))
++		nr_pages = ARRAY_SIZE(frame_list);
++
++	for (i = 0; i < nr_pages; i++) {
++		if ((page = alloc_page(GFP_BALLOON)) == NULL) {
++			nr_pages = i;
++			need_sleep = 1;
++			break;
++		}
++
++		pfn = page_to_pfn(page);
++		frame_list[i] = pfn_to_mfn(pfn);
++
++		if (!PageHighMem(page)) {
++			v = phys_to_virt(pfn << PAGE_SHIFT);
++			scrub_pages(v, 1);
++#ifdef CONFIG_XEN
++			ret = HYPERVISOR_update_va_mapping(
++				(unsigned long)v, __pte_ma(0), 0);
++			BUG_ON(ret);
++#endif
++		}
++#ifdef CONFIG_XEN_SCRUB_PAGES
++		else {
++			v = kmap(page);
++			scrub_pages(v, 1);
++			kunmap(page);
++		}
++#endif
++	}
++
++#ifdef CONFIG_XEN
++	/* Ensure that ballooned highmem pages don't have kmaps. */
++	kmap_flush_unused();
++	flush_tlb_all();
++#endif
++
++	balloon_lock(flags);
++
++	/* No more mappings: invalidate P2M and add to balloon. */
++	for (i = 0; i < nr_pages; i++) {
++		pfn = mfn_to_pfn(frame_list[i]);
++		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
++		balloon_append(pfn_to_page(pfn));
++	}
++
++	set_xen_guest_handle(reservation.extent_start, frame_list);
++	reservation.nr_extents   = nr_pages;
++	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
++	BUG_ON(ret != nr_pages);
++
++	bs.current_pages -= nr_pages;
++	totalram_pages = bs.current_pages;
++
++	balloon_unlock(flags);
++
++	return need_sleep;
++}
++
++/*
++ * We avoid multiple worker processes conflicting via the balloon mutex.
++ * We may of course race updates of the target counts (which are protected
++ * by the balloon lock), or with changes to the Xen hard limit, but we will
++ * recover from these in time.
++ */
++static void balloon_process(void *unused)
++{
++	int need_sleep = 0;
++	long credit;
++
++	mutex_lock(&balloon_mutex);
++
++	do {
++		credit = current_target() - bs.current_pages;
++		if (credit > 0)
++			need_sleep = (increase_reservation(credit) != 0);
++		if (credit < 0)
++			need_sleep = (decrease_reservation(-credit) != 0);
++
++#ifndef CONFIG_PREEMPT
++		if (need_resched())
++			schedule();
++#endif
++	} while ((credit != 0) && !need_sleep);
++
++	/* Schedule more work if there is some still to be done. */
++	if (current_target() != bs.current_pages)
++		mod_timer(&balloon_timer, jiffies + HZ);
++
++	mutex_unlock(&balloon_mutex);
++}
++
++/* Resets the Xen limit, sets new target, and kicks off processing. */
++void balloon_set_new_target(unsigned long target)
++{
++	/* No need for lock. Not read-modify-write updates. */
++	bs.hard_limit   = ~0UL;
++	bs.target_pages = target;
++	schedule_work(&balloon_worker);
++}
++
++static struct xenbus_watch target_watch =
++{
++	.node = "memory/target"
++};
++
++/* React to a change in the target key */
++static void watch_target(struct xenbus_watch *watch,
++			 const char **vec, unsigned int len)
++{
++	unsigned long long new_target;
++	int err;
++
++	err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
++	if (err != 1) {
++		/* This is ok (for domain0 at least) - so just return */
++		return;
++	}
++
++	/* The given memory/target value is in KiB, so it needs converting to
++	 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
++	 */
++	balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
++}
++
++static int balloon_init_watcher(struct notifier_block *notifier,
++				unsigned long event,
++				void *data)
++{
++	int err;
++
++	err = register_xenbus_watch(&target_watch);
++	if (err)
++		printk(KERN_ERR "Failed to set balloon watcher\n");
++
++	return NOTIFY_DONE;
++}
++
++#ifdef CONFIG_PROC_FS
++static int balloon_write(struct file *file, const char __user *buffer,
++			 unsigned long count, void *data)
++{
++	char memstring[64], *endchar;
++	unsigned long long target_bytes;
++
++	if (!capable(CAP_SYS_ADMIN))
++		return -EPERM;
++
++	if (count <= 1)
++		return -EBADMSG; /* runt */
++	if (count > sizeof(memstring))
++		return -EFBIG;   /* too long */
++
++	if (copy_from_user(memstring, buffer, count))
++		return -EFAULT;
++	memstring[sizeof(memstring)-1] = '\0';
++
++	target_bytes = memparse(memstring, &endchar);
++	balloon_set_new_target(target_bytes >> PAGE_SHIFT);
++
++	return count;
++}
++
++static int balloon_read(char *page, char **start, off_t off,
++			int count, int *eof, void *data)
++{
++	int len;
++
++	len = sprintf(
++		page,
++		"Current allocation: %8lu kB\n"
++		"Requested target:   %8lu kB\n"
++		"Low-mem balloon:    %8lu kB\n"
++		"High-mem balloon:   %8lu kB\n"
++		"Driver pages:       %8lu kB\n"
++		"Xen hard limit:     ",
++		PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages), 
++		PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
++		PAGES2KB(bs.driver_pages));
++
++	if (bs.hard_limit != ~0UL)
++		len += sprintf(page + len, "%8lu kB\n",
++			       PAGES2KB(bs.hard_limit));
++	else
++		len += sprintf(page + len, "     ??? kB\n");
++
++	*eof = 1;
++	return len;
++}
++#endif
++
++static struct notifier_block xenstore_notifier;
++
++static int __init balloon_init(void)
++{
++#if defined(CONFIG_X86) && defined(CONFIG_XEN) 
++	unsigned long pfn;
++	struct page *page;
++#endif
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	IPRINTK("Initialising balloon driver.\n");
++
++#ifdef CONFIG_XEN
++	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
++	totalram_pages   = bs.current_pages;
++#else 
++	bs.current_pages = totalram_pages; 
++#endif
++	bs.target_pages  = bs.current_pages;
++	bs.balloon_low   = 0;
++	bs.balloon_high  = 0;
++	bs.driver_pages  = 0UL;
++	bs.hard_limit    = ~0UL;
++
++	init_timer(&balloon_timer);
++	balloon_timer.data = 0;
++	balloon_timer.function = balloon_alarm;
++    
++#ifdef CONFIG_PROC_FS
++	if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
++		WPRINTK("Unable to create /proc/xen/balloon.\n");
++		return -1;
++	}
++
++	balloon_pde->read_proc  = balloon_read;
++	balloon_pde->write_proc = balloon_write;
++#endif
++	balloon_sysfs_init();
++
++#if defined(CONFIG_X86) && defined(CONFIG_XEN) 
++	/* Initialise the balloon with excess memory space. */
++	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
++		page = pfn_to_page(pfn);
++		if (!PageReserved(page))
++			balloon_append(page);
++	}
++#endif
++
++	target_watch.callback = watch_target;
++	xenstore_notifier.notifier_call = balloon_init_watcher;
++
++	register_xenstore_notifier(&xenstore_notifier);
++    
++	return 0;
++}
++
++subsys_initcall(balloon_init);
++
++static void balloon_exit(void) 
++{
++    /* XXX - release balloon here */
++    return; 
++}
++
++module_exit(balloon_exit); 
++
++void balloon_update_driver_allowance(long delta)
++{
++	unsigned long flags;
++
++	balloon_lock(flags);
++	bs.driver_pages += delta;
++	balloon_unlock(flags);
++}
++
++#ifdef CONFIG_XEN
++static int dealloc_pte_fn(
++	pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
++{
++	unsigned long mfn = pte_mfn(*pte);
++	int ret;
++	struct xen_memory_reservation reservation = {
++		.nr_extents   = 1,
++		.extent_order = 0,
++		.domid        = DOMID_SELF
++	};
++	set_xen_guest_handle(reservation.extent_start, &mfn);
++	set_pte_at(&init_mm, addr, pte, __pte_ma(0));
++	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
++	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
++	BUG_ON(ret != 1);
++	return 0;
++}
++#endif
++
++struct page **alloc_empty_pages_and_pagevec(int nr_pages)
++{
++	unsigned long vaddr, flags;
++	struct page *page, **pagevec;
++	int i, ret;
++
++	pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
++	if (pagevec == NULL)
++		return NULL;
++
++	for (i = 0; i < nr_pages; i++) {
++		page = pagevec[i] = alloc_page(GFP_KERNEL);
++		if (page == NULL)
++			goto err;
++
++		vaddr = (unsigned long)page_address(page);
++
++		scrub_pages(vaddr, 1);
++
++		balloon_lock(flags);
++
++		if (xen_feature(XENFEAT_auto_translated_physmap)) {
++			unsigned long gmfn = page_to_pfn(page);
++			struct xen_memory_reservation reservation = {
++				.nr_extents   = 1,
++				.extent_order = 0,
++				.domid        = DOMID_SELF
++			};
++			set_xen_guest_handle(reservation.extent_start, &gmfn);
++			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
++						   &reservation);
++			if (ret == 1)
++				ret = 0; /* success */
++		} else {
++#ifdef CONFIG_XEN
++			ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
++						  dealloc_pte_fn, NULL);
++#else
++			/* Cannot handle non-auto translate mode. */
++			ret = 1;
++#endif
++		}
++
++		if (ret != 0) {
++			balloon_unlock(flags);
++			__free_page(page);
++			goto err;
++		}
++
++		totalram_pages = --bs.current_pages;
++
++		balloon_unlock(flags);
++	}
++
++ out:
++	schedule_work(&balloon_worker);
++#ifdef CONFIG_XEN
++	flush_tlb_all();
++#endif
++	return pagevec;
++
++ err:
++	balloon_lock(flags);
++	while (--i >= 0)
++		balloon_append(pagevec[i]);
++	balloon_unlock(flags);
++	kfree(pagevec);
++	pagevec = NULL;
++	goto out;
++}
++
++void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
++{
++	unsigned long flags;
++	int i;
++
++	if (pagevec == NULL)
++		return;
++
++	balloon_lock(flags);
++	for (i = 0; i < nr_pages; i++) {
++		BUG_ON(page_count(pagevec[i]) != 1);
++		balloon_append(pagevec[i]);
++	}
++	balloon_unlock(flags);
++
++	kfree(pagevec);
++
++	schedule_work(&balloon_worker);
++}
++
++void balloon_release_driver_page(struct page *page)
++{
++	unsigned long flags;
++
++	balloon_lock(flags);
++	balloon_append(page);
++	bs.driver_pages--;
++	balloon_unlock(flags);
++
++	schedule_work(&balloon_worker);
++}
++
++EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
++EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
++EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
++EXPORT_SYMBOL_GPL(balloon_release_driver_page);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/balloon/common.h	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,58 @@
++/******************************************************************************
++ * balloon/common.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __XEN_BALLOON_COMMON_H__
++#define __XEN_BALLOON_COMMON_H__
++
++#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
++
++struct balloon_stats {
++	/* We aim for 'current allocation' == 'target allocation'. */
++	unsigned long current_pages;
++	unsigned long target_pages;
++	/* We may hit the hard limit in Xen. If we do then we remember it. */
++	unsigned long hard_limit;
++	/*
++	 * Drivers may alter the memory reservation independently, but they
++	 * must inform the balloon driver so we avoid hitting the hard limit.
++	 */
++	unsigned long driver_pages;
++	/* Number of pages in high- and low-memory balloons. */
++	unsigned long balloon_low;
++	unsigned long balloon_high;
++};
++
++extern struct balloon_stats balloon_stats;
++#define bs balloon_stats
++
++int balloon_sysfs_init(void);
++void balloon_sysfs_exit(void);
++
++void balloon_set_new_target(unsigned long target);
++
++#endif /* __XEN_BALLOON_COMMON_H__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/balloon/sysfs.c	2007-08-27 14:01:59.000000000 -0400
+@@ -0,0 +1,170 @@
++/******************************************************************************
++ * balloon/sysfs.c
++ *
++ * Xen balloon driver - sysfs interfaces.
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/capability.h>
++#include <linux/errno.h>
++#include <linux/stat.h>
++#include <linux/string.h>
++#include <linux/sysdev.h>
++#include "common.h"
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++#define BALLOON_CLASS_NAME "memory"
++
++#define BALLOON_SHOW(name, format, args...)			\
++	static ssize_t show_##name(struct sys_device *dev,	\
++				   char *buf)			\
++	{							\
++		return sprintf(buf, format, ##args);		\
++	}							\
++	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
++
++BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
++BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
++BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
++BALLOON_SHOW(hard_limit_kb,
++	     (bs.hard_limit!=~0UL) ? "%lu\n" : "???\n",
++	     (bs.hard_limit!=~0UL) ? PAGES2KB(bs.hard_limit) : 0);
++BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
++
++static ssize_t show_target_kb(struct sys_device *dev, char *buf)
++{
++	return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
++}
++
++static ssize_t store_target_kb(struct sys_device *dev,
++			       const char *buf,
++			       size_t count)
++{
++	char memstring[64], *endchar;
++	unsigned long long target_bytes;
++
++	if (!capable(CAP_SYS_ADMIN))
++		return -EPERM;
++	
++	if (count <= 1)
++		return -EBADMSG; /* runt */
++	if (count > sizeof(memstring))
++		return -EFBIG;   /* too long */
++	strcpy(memstring, buf);
++	
++	target_bytes = memparse(memstring, &endchar);
++	balloon_set_new_target(target_bytes >> PAGE_SHIFT);
++	
++	return count;
++}
++
++static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
++		   show_target_kb, store_target_kb);
++
++static struct sysdev_attribute *balloon_attrs[] = {
++	&attr_target_kb,
++};
++
++static struct attribute *balloon_info_attrs[] = {
++	&attr_current_kb.attr,
++	&attr_low_kb.attr,
++	&attr_high_kb.attr,
++	&attr_hard_limit_kb.attr,
++	&attr_driver_kb.attr,
++	NULL
++};
++
++static struct attribute_group balloon_info_group = {
++	.name = "info",
++	.attrs = balloon_info_attrs,
++};
++
++static struct sysdev_class balloon_sysdev_class = {
++	set_kset_name(BALLOON_CLASS_NAME),
++};
++
++static struct sys_device balloon_sysdev;
++
++static int register_balloon(struct sys_device *sysdev)
++{
++	int i, error;
++
++	error = sysdev_class_register(&balloon_sysdev_class);
++	if (error)
++		return error;
++
++	sysdev->id = 0;
++	sysdev->cls = &balloon_sysdev_class;
++
++	error = sysdev_register(sysdev);
++	if (error) {
++		sysdev_class_unregister(&balloon_sysdev_class);
++		return error;
++	}
++
++	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
++		error = sysdev_create_file(sysdev, balloon_attrs[i]);
++		if (error)
++			goto fail;
++	}
++
++	error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
++	if (error)
++		goto fail;
++	
++	return 0;
++
++ fail:
++	while (--i >= 0)
++		sysdev_remove_file(sysdev, balloon_attrs[i]);
++	sysdev_unregister(sysdev);
++	sysdev_class_unregister(&balloon_sysdev_class);
++	return error;
++}
++
++static void unregister_balloon(struct sys_device *sysdev)
++{
++	int i;
++
++	sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
++	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
++		sysdev_remove_file(sysdev, balloon_attrs[i]);
++	sysdev_unregister(sysdev);
++	sysdev_class_unregister(&balloon_sysdev_class);
++}
++
++int balloon_sysfs_init(void)
++{
++	return register_balloon(&balloon_sysdev);
++}
++
++void balloon_sysfs_exit(void)
++{
++	unregister_balloon(&balloon_sysdev);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkback/Makefile	2007-08-27 14:01:47.000000000 -0400
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o
++
++blkbk-y	:= blkback.o xenbus.o interface.o vbd.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkback/blkback.c	2007-08-27 14:02:10.000000000 -0400
+@@ -0,0 +1,614 @@
++/******************************************************************************
++ * arch/xen/drivers/blkif/backend/main.c
++ * 
++ * Back-end of the driver for virtual block devices. This portion of the
++ * driver exports a 'unified' block-device interface that can be accessed
++ * by any operating system that implements a compatible front end. A 
++ * reference front-end implementation can be found in:
++ *  arch/xen/drivers/blkif/frontend
++ * 
++ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
++ * Copyright (c) 2005, Christopher Clark
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/spinlock.h>
++#include <linux/kthread.h>
++#include <linux/list.h>
++#include <xen/balloon.h>
++#include <asm/hypervisor.h>
++#include "common.h"
++
++/*
++ * These are rather arbitrary. They are fairly large because adjacent requests
++ * pulled from a communication ring are quite likely to end up being part of
++ * the same scatter/gather request at the disc.
++ * 
++ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
++ * 
++ * This will increase the chances of being able to write whole tracks.
++ * 64 should be enough to keep us competitive with Linux.
++ */
++static int blkif_reqs = 64;
++module_param_named(reqs, blkif_reqs, int, 0);
++MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
++
++/* Run-time switchable: /sys/module/blkback/parameters/ */
++static unsigned int log_stats = 0;
++static unsigned int debug_lvl = 0;
++module_param(log_stats, int, 0644);
++module_param(debug_lvl, int, 0644);
++
++/*
++ * Each outstanding request that we've passed to the lower device layers has a 
++ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
++ * the pendcnt towards zero. When it hits zero, the specified domain has a 
++ * response queued for it, with the saved 'id' passed back.
++ */
++typedef struct {
++	blkif_t       *blkif;
++	u64            id;
++	int            nr_pages;
++	atomic_t       pendcnt;
++	unsigned short operation;
++	int            status;
++	struct list_head free_list;
++} pending_req_t;
++
++static pending_req_t *pending_reqs;
++static struct list_head pending_free;
++static DEFINE_SPINLOCK(pending_free_lock);
++static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
++
++#define BLKBACK_INVALID_HANDLE (~0)
++
++static struct page **pending_pages;
++static grant_handle_t *pending_grant_handles;
++
++static inline int vaddr_pagenr(pending_req_t *req, int seg)
++{
++	return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
++}
++
++static inline unsigned long vaddr(pending_req_t *req, int seg)
++{
++	unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
++	return (unsigned long)pfn_to_kaddr(pfn);
++}
++
++#define pending_handle(_req, _seg) \
++	(pending_grant_handles[vaddr_pagenr(_req, _seg)])
++
++
++static int do_block_io_op(blkif_t *blkif);
++static void dispatch_rw_block_io(blkif_t *blkif,
++				 blkif_request_t *req,
++				 pending_req_t *pending_req);
++static void make_response(blkif_t *blkif, u64 id,
++			  unsigned short op, int st);
++
++/******************************************************************
++ * misc small helpers
++ */
++static pending_req_t* alloc_req(void)
++{
++	pending_req_t *req = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pending_free_lock, flags);
++	if (!list_empty(&pending_free)) {
++		req = list_entry(pending_free.next, pending_req_t, free_list);
++		list_del(&req->free_list);
++	}
++	spin_unlock_irqrestore(&pending_free_lock, flags);
++	return req;
++}
++
++static void free_req(pending_req_t *req)
++{
++	unsigned long flags;
++	int was_empty;
++
++	spin_lock_irqsave(&pending_free_lock, flags);
++	was_empty = list_empty(&pending_free);
++	list_add(&req->free_list, &pending_free);
++	spin_unlock_irqrestore(&pending_free_lock, flags);
++	if (was_empty)
++		wake_up(&pending_free_wq);
++}
++
++static void unplug_queue(blkif_t *blkif)
++{
++	if (blkif->plug == NULL)
++		return;
++	if (blkif->plug->unplug_fn)
++		blkif->plug->unplug_fn(blkif->plug);
++	blk_put_queue(blkif->plug);
++	blkif->plug = NULL;
++}
++
++static void plug_queue(blkif_t *blkif, struct bio *bio)
++{
++	request_queue_t *q = bdev_get_queue(bio->bi_bdev);
++
++	if (q == blkif->plug)
++		return;
++	unplug_queue(blkif);
++	blk_get_queue(q);
++	blkif->plug = q;
++}
++
++static void fast_flush_area(pending_req_t *req)
++{
++	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++	unsigned int i, invcount = 0;
++	grant_handle_t handle;
++	int ret;
++
++	for (i = 0; i < req->nr_pages; i++) {
++		handle = pending_handle(req, i);
++		if (handle == BLKBACK_INVALID_HANDLE)
++			continue;
++		gnttab_set_unmap_op(&unmap[i], vaddr(req, i), GNTMAP_host_map,
++				    handle);
++		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
++		invcount++;
++	}
++
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_unmap_grant_ref, unmap, invcount);
++	BUG_ON(ret);
++}
++
++/******************************************************************
++ * SCHEDULER FUNCTIONS
++ */
++
++static void print_stats(blkif_t *blkif)
++{
++	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d  |  br %4d\n",
++	       current->comm, blkif->st_oo_req,
++	       blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
++	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
++	blkif->st_rd_req = 0;
++	blkif->st_wr_req = 0;
++	blkif->st_oo_req = 0;
++}
++
++int blkif_schedule(void *arg)
++{
++	blkif_t *blkif = arg;
++
++	blkif_get(blkif);
++
++	if (debug_lvl)
++		printk(KERN_DEBUG "%s: started\n", current->comm);
++
++	while (!kthread_should_stop()) {
++		wait_event_interruptible(
++			blkif->wq,
++			blkif->waiting_reqs || kthread_should_stop());
++		wait_event_interruptible(
++			pending_free_wq,
++			!list_empty(&pending_free) || kthread_should_stop());
++
++		blkif->waiting_reqs = 0;
++		smp_mb(); /* clear flag *before* checking for work */
++
++		if (do_block_io_op(blkif))
++			blkif->waiting_reqs = 1;
++		unplug_queue(blkif);
++
++		if (log_stats && time_after(jiffies, blkif->st_print))
++			print_stats(blkif);
++	}
++
++	if (log_stats)
++		print_stats(blkif);
++	if (debug_lvl)
++		printk(KERN_DEBUG "%s: exiting\n", current->comm);
++
++	blkif->xenblkd = NULL;
++	blkif_put(blkif);
++
++	return 0;
++}
++
++/******************************************************************
++ * COMPLETION CALLBACK -- Called as bh->b_end_io()
++ */
++
++static void __end_block_io_op(pending_req_t *pending_req, int error)
++{
++	/* An error fails the entire request. */
++	if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
++	    (error == -EOPNOTSUPP)) {
++		DPRINTK("blkback: write barrier op failed, not supported\n");
++		blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
++		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
++	} else if (error) {
++		DPRINTK("Buffer not up-to-date at end of operation, "
++			"error=%d\n", error);
++		pending_req->status = BLKIF_RSP_ERROR;
++	}
++
++	if (atomic_dec_and_test(&pending_req->pendcnt)) {
++		fast_flush_area(pending_req);
++		make_response(pending_req->blkif, pending_req->id,
++			      pending_req->operation, pending_req->status);
++		blkif_put(pending_req->blkif);
++		free_req(pending_req);
++	}
++}
++
++static int end_block_io_op(struct bio *bio, unsigned int done, int error)
++{
++	if (bio->bi_size != 0)
++		return 1;
++	__end_block_io_op(bio->bi_private, error);
++	bio_put(bio);
++	return error;
++}
++
++
++/******************************************************************************
++ * NOTIFICATION FROM GUEST OS.
++ */
++
++static void blkif_notify_work(blkif_t *blkif)
++{
++	blkif->waiting_reqs = 1;
++	wake_up(&blkif->wq);
++}
++
++irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++{
++	blkif_notify_work(dev_id);
++	return IRQ_HANDLED;
++}
++
++
++
++/******************************************************************
++ * DOWNWARD CALLS -- These interface with the block-device layer proper.
++ */
++
++static int do_block_io_op(blkif_t *blkif)
++{
++	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
++	blkif_request_t req;
++	pending_req_t *pending_req;
++	RING_IDX rc, rp;
++	int more_to_do = 0;
++
++	rc = blk_rings->common.req_cons;
++	rp = blk_rings->common.sring->req_prod;
++	rmb(); /* Ensure we see queued requests up to 'rp'. */
++
++	while ((rc != rp)) {
++
++		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
++			break;
++
++		pending_req = alloc_req();
++		if (NULL == pending_req) {
++			blkif->st_oo_req++;
++			more_to_do = 1;
++			break;
++		}
++
++		switch (blkif->blk_protocol) {
++		case BLKIF_PROTOCOL_NATIVE:
++			memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
++			break;
++		case BLKIF_PROTOCOL_X86_32:
++			blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
++			break;
++		case BLKIF_PROTOCOL_X86_64:
++			blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
++			break;
++		default:
++			BUG();
++		}
++		blk_rings->common.req_cons = ++rc; /* before make_response() */
++
++		switch (req.operation) {
++		case BLKIF_OP_READ:
++			blkif->st_rd_req++;
++			dispatch_rw_block_io(blkif, &req, pending_req);
++			break;
++		case BLKIF_OP_WRITE_BARRIER:
++			blkif->st_br_req++;
++			/* fall through */
++		case BLKIF_OP_WRITE:
++			blkif->st_wr_req++;
++			dispatch_rw_block_io(blkif, &req, pending_req);
++			break;
++		default:
++			DPRINTK("error: unknown block io operation [%d]\n",
++				req.operation);
++			make_response(blkif, req.id, req.operation,
++				      BLKIF_RSP_ERROR);
++			free_req(pending_req);
++			break;
++		}
++	}
++	return more_to_do;
++}
++
++static void dispatch_rw_block_io(blkif_t *blkif,
++				 blkif_request_t *req,
++				 pending_req_t *pending_req)
++{
++	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
++	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++	struct phys_req preq;
++	struct { 
++		unsigned long buf; unsigned int nsec;
++	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++	unsigned int nseg;
++	struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++	int ret, i, nbio = 0;
++	int operation;
++
++	switch (req->operation) {
++	case BLKIF_OP_READ:
++		operation = READ;
++		break;
++	case BLKIF_OP_WRITE:
++		operation = WRITE;
++		break;
++	case BLKIF_OP_WRITE_BARRIER:
++		operation = WRITE_BARRIER;
++		break;
++	default:
++		operation = 0; /* make gcc happy */
++		BUG();
++	}
++
++	/* Check that number of segments is sane. */
++	nseg = req->nr_segments;
++	if (unlikely(nseg == 0) || 
++	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
++		DPRINTK("Bad number of segments in request (%d)\n", nseg);
++		goto fail_response;
++	}
++
++	preq.dev           = req->handle;
++	preq.sector_number = req->sector_number;
++	preq.nr_sects      = 0;
++
++	pending_req->blkif     = blkif;
++	pending_req->id        = req->id;
++	pending_req->operation = req->operation;
++	pending_req->status    = BLKIF_RSP_OKAY;
++	pending_req->nr_pages  = nseg;
++
++	for (i = 0; i < nseg; i++) {
++		uint32_t flags;
++
++		seg[i].nsec = req->seg[i].last_sect -
++			req->seg[i].first_sect + 1;
++
++		if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
++		    (req->seg[i].last_sect < req->seg[i].first_sect))
++			goto fail_response;
++		preq.nr_sects += seg[i].nsec;
++
++		flags = GNTMAP_host_map;
++		if (operation != READ)
++			flags |= GNTMAP_readonly;
++		gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
++				  req->seg[i].gref, blkif->domid);
++	}
++
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
++	BUG_ON(ret);
++
++	for (i = 0; i < nseg; i++) {
++		if (unlikely(map[i].status != 0)) {
++			DPRINTK("invalid buffer -- could not remap it\n");
++			map[i].handle = BLKBACK_INVALID_HANDLE;
++			ret |= 1;
++		}
++
++		pending_handle(pending_req, i) = map[i].handle;
++
++		if (ret)
++			continue;
++
++		set_phys_to_machine(__pa(vaddr(
++			pending_req, i)) >> PAGE_SHIFT,
++			FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
++		seg[i].buf  = map[i].dev_bus_addr | 
++			(req->seg[i].first_sect << 9);
++	}
++
++	if (ret)
++		goto fail_flush;
++
++	if (vbd_translate(&preq, blkif, operation) != 0) {
++		DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
++			operation == READ ? "read" : "write",
++			preq.sector_number,
++			preq.sector_number + preq.nr_sects, preq.dev);
++		goto fail_flush;
++	}
++
++	for (i = 0; i < nseg; i++) {
++		if (((int)preq.sector_number|(int)seg[i].nsec) &
++		    ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
++			DPRINTK("Misaligned I/O request from domain %d",
++				blkif->domid);
++			goto fail_put_bio;
++		}
++
++		while ((bio == NULL) ||
++		       (bio_add_page(bio,
++				     virt_to_page(vaddr(pending_req, i)),
++				     seg[i].nsec << 9,
++				     seg[i].buf & ~PAGE_MASK) == 0)) {
++			bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
++			if (unlikely(bio == NULL))
++				goto fail_put_bio;
++
++			bio->bi_bdev    = preq.bdev;
++			bio->bi_private = pending_req;
++			bio->bi_end_io  = end_block_io_op;
++			bio->bi_sector  = preq.sector_number;
++		}
++
++		preq.sector_number += seg[i].nsec;
++	}
++
++	plug_queue(blkif, bio);
++	atomic_set(&pending_req->pendcnt, nbio);
++	blkif_get(blkif);
++
++	for (i = 0; i < nbio; i++)
++		submit_bio(operation, biolist[i]);
++
++	if (operation == READ)
++		blkif->st_rd_sect += preq.nr_sects;
++	else if (operation == WRITE)
++		blkif->st_wr_sect += preq.nr_sects;
++
++	return;
++
++ fail_put_bio:
++	for (i = 0; i < (nbio-1); i++)
++		bio_put(biolist[i]);
++ fail_flush:
++	fast_flush_area(pending_req);
++ fail_response:
++	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
++	free_req(pending_req);
++} 
++
++
++
++/******************************************************************
++ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
++ */
++
++
++static void make_response(blkif_t *blkif, u64 id,
++			  unsigned short op, int st)
++{
++	blkif_response_t  resp;
++	unsigned long     flags;
++	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
++	int more_to_do = 0;
++	int notify;
++
++	resp.id        = id;
++	resp.operation = op;
++	resp.status    = st;
++
++	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
++	/* Place on the response ring for the relevant domain. */
++	switch (blkif->blk_protocol) {
++	case BLKIF_PROTOCOL_NATIVE:
++		memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	case BLKIF_PROTOCOL_X86_32:
++		memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	case BLKIF_PROTOCOL_X86_64:
++		memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	default:
++		BUG();
++	}
++	blk_rings->common.rsp_prod_pvt++;
++	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
++	if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
++		/*
++		 * Tail check for pending requests. Allows frontend to avoid
++		 * notifications if requests are already in flight (lower
++		 * overheads and promotes batching).
++		 */
++		RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
++
++	} else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
++		more_to_do = 1;
++	}
++
++	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
++
++	if (more_to_do)
++		blkif_notify_work(blkif);
++	if (notify)
++		notify_remote_via_irq(blkif->irq);
++}
++
++static int __init blkif_init(void)
++{
++	int i, mmap_pages;
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
++
++	pending_reqs          = kmalloc(sizeof(pending_reqs[0]) *
++					blkif_reqs, GFP_KERNEL);
++	pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
++					mmap_pages, GFP_KERNEL);
++	pending_pages         = alloc_empty_pages_and_pagevec(mmap_pages);
++
++	if (!pending_reqs || !pending_grant_handles || !pending_pages)
++		goto out_of_memory;
++
++	for (i = 0; i < mmap_pages; i++)
++		pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
++
++	blkif_interface_init();
++
++	memset(pending_reqs, 0, sizeof(pending_reqs));
++	INIT_LIST_HEAD(&pending_free);
++
++	for (i = 0; i < blkif_reqs; i++)
++		list_add_tail(&pending_reqs[i].free_list, &pending_free);
++
++	blkif_xenbus_init();
++
++	return 0;
++
++ out_of_memory:
++	kfree(pending_reqs);
++	kfree(pending_grant_handles);
++	free_empty_pages_and_pagevec(pending_pages, mmap_pages);
++	printk("%s: out of memory\n", __FUNCTION__);
++	return -ENOMEM;
++}
++
++module_init(blkif_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkback/common.h	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,139 @@
++/* 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __BLKIF__BACKEND__COMMON_H__
++#define __BLKIF__BACKEND__COMMON_H__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/vmalloc.h>
++#include <linux/wait.h>
++#include <asm/io.h>
++#include <asm/setup.h>
++#include <asm/pgalloc.h>
++#include <xen/evtchn.h>
++#include <asm/hypervisor.h>
++#include <xen/blkif.h>
++#include <xen/gnttab.h>
++#include <xen/driver_util.h>
++#include <xen/xenbus.h>
++
++#define DPRINTK(_f, _a...)			\
++	pr_debug("(file=%s, line=%d) " _f,	\
++		 __FILE__ , __LINE__ , ## _a )
++
++struct vbd {
++	blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
++	unsigned char  readonly;    /* Non-zero -> read-only */
++	unsigned char  type;        /* VDISK_xxx */
++	u32            pdevice;     /* phys device that this vbd maps to */
++	struct block_device *bdev;
++};
++
++struct backend_info;
++
++typedef struct blkif_st {
++	/* Unique identifier for this interface. */
++	domid_t           domid;
++	unsigned int      handle;
++	/* Physical parameters of the comms window. */
++	unsigned int      irq;
++	/* Comms information. */
++	enum blkif_protocol blk_protocol;
++	blkif_back_rings_t blk_rings;
++	struct vm_struct *blk_ring_area;
++	/* The VBD attached to this interface. */
++	struct vbd        vbd;
++	/* Back pointer to the backend_info. */
++	struct backend_info *be;
++	/* Private fields. */
++	spinlock_t       blk_ring_lock;
++	atomic_t         refcnt;
++
++	wait_queue_head_t   wq;
++	struct task_struct  *xenblkd;
++	unsigned int        waiting_reqs;
++	request_queue_t     *plug;
++
++	/* statistics */
++	unsigned long       st_print;
++	int                 st_rd_req;
++	int                 st_wr_req;
++	int                 st_oo_req;
++	int                 st_br_req;
++	int                 st_rd_sect;
++	int                 st_wr_sect;
++
++	wait_queue_head_t waiting_to_free;
++
++	grant_handle_t shmem_handle;
++	grant_ref_t    shmem_ref;
++} blkif_t;
++
++blkif_t *blkif_alloc(domid_t domid);
++void blkif_disconnect(blkif_t *blkif);
++void blkif_free(blkif_t *blkif);
++int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
++
++#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
++#define blkif_put(_b)					\
++	do {						\
++		if (atomic_dec_and_test(&(_b)->refcnt))	\
++			wake_up(&(_b)->waiting_to_free);\
++	} while (0)
++
++/* Create a vbd. */
++int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
++	       unsigned minor, int readonly);
++void vbd_free(struct vbd *vbd);
++
++unsigned long long vbd_size(struct vbd *vbd);
++unsigned int vbd_info(struct vbd *vbd);
++unsigned long vbd_secsize(struct vbd *vbd);
++
++struct phys_req {
++	unsigned short       dev;
++	unsigned short       nr_sects;
++	struct block_device *bdev;
++	blkif_sector_t       sector_number;
++};
++
++int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
++
++void blkif_interface_init(void);
++
++void blkif_xenbus_init(void);
++
++irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++int blkif_schedule(void *arg);
++
++int blkback_barrier(struct xenbus_transaction xbt,
++		    struct backend_info *be, int state);
++
++#endif /* __BLKIF__BACKEND__COMMON_H__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkback/interface.c	2007-08-27 14:02:01.000000000 -0400
+@@ -0,0 +1,181 @@
++/******************************************************************************
++ * arch/xen/drivers/blkif/backend/interface.c
++ * 
++ * Block-device interface management.
++ * 
++ * Copyright (c) 2004, Keir Fraser
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++#include <xen/evtchn.h>
++#include <linux/kthread.h>
++
++static kmem_cache_t *blkif_cachep;
++
++blkif_t *blkif_alloc(domid_t domid)
++{
++	blkif_t *blkif;
++
++	blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
++	if (!blkif)
++		return ERR_PTR(-ENOMEM);
++
++	memset(blkif, 0, sizeof(*blkif));
++	blkif->domid = domid;
++	spin_lock_init(&blkif->blk_ring_lock);
++	atomic_set(&blkif->refcnt, 1);
++	init_waitqueue_head(&blkif->wq);
++	blkif->st_print = jiffies;
++	init_waitqueue_head(&blkif->waiting_to_free);
++
++	return blkif;
++}
++
++static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
++{
++	struct gnttab_map_grant_ref op;
++
++	gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
++			  GNTMAP_host_map, shared_page, blkif->domid);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) {
++		DPRINTK(" Grant table operation failure !\n");
++		return op.status;
++	}
++
++	blkif->shmem_ref = shared_page;
++	blkif->shmem_handle = op.handle;
++
++	return 0;
++}
++
++static void unmap_frontend_page(blkif_t *blkif)
++{
++	struct gnttab_unmap_grant_ref op;
++
++	gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
++			    GNTMAP_host_map, blkif->shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++}
++
++int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
++{
++	int err;
++
++	/* Already connected through? */
++	if (blkif->irq)
++		return 0;
++
++	if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
++		return -ENOMEM;
++
++	err = map_frontend_page(blkif, shared_page);
++	if (err) {
++		free_vm_area(blkif->blk_ring_area);
++		return err;
++	}
++
++	switch (blkif->blk_protocol) {
++	case BLKIF_PROTOCOL_NATIVE:
++	{
++		blkif_sring_t *sring;
++		sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
++		break;
++	}
++	case BLKIF_PROTOCOL_X86_32:
++	{
++		blkif_x86_32_sring_t *sring_x86_32;
++		sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
++		break;
++	}
++	case BLKIF_PROTOCOL_X86_64:
++	{
++		blkif_x86_64_sring_t *sring_x86_64;
++		sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
++		break;
++	}
++	default:
++		BUG();
++	}
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
++	if (err < 0)
++	{
++		unmap_frontend_page(blkif);
++		free_vm_area(blkif->blk_ring_area);
++		blkif->blk_rings.common.sring = NULL;
++		return err;
++	}
++	blkif->irq = err;
++
++	return 0;
++}
++
++void blkif_disconnect(blkif_t *blkif)
++{
++	if (blkif->xenblkd) {
++		kthread_stop(blkif->xenblkd);
++		blkif->xenblkd = NULL;
++	}
++
++	atomic_dec(&blkif->refcnt);
++	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
++	atomic_inc(&blkif->refcnt);
++
++	if (blkif->irq) {
++		unbind_from_irqhandler(blkif->irq, blkif);
++		blkif->irq = 0;
++	}
++
++	if (blkif->blk_rings.common.sring) {
++		unmap_frontend_page(blkif);
++		free_vm_area(blkif->blk_ring_area);
++		blkif->blk_rings.common.sring = NULL;
++	}
++}
++
++void blkif_free(blkif_t *blkif)
++{
++	if (!atomic_dec_and_test(&blkif->refcnt))
++		BUG();
++	kmem_cache_free(blkif_cachep, blkif);
++}
++
++void __init blkif_interface_init(void)
++{
++	blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
++					 0, 0, NULL, NULL);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkback/vbd.c	2007-08-27 14:01:47.000000000 -0400
+@@ -0,0 +1,118 @@
++/******************************************************************************
++ * blkback/vbd.c
++ * 
++ * Routines for managing virtual block devices (VBDs).
++ * 
++ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++
++#define vbd_sz(_v)   ((_v)->bdev->bd_part ?				\
++	(_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
++
++unsigned long long vbd_size(struct vbd *vbd)
++{
++	return vbd_sz(vbd);
++}
++
++unsigned int vbd_info(struct vbd *vbd)
++{
++	return vbd->type | (vbd->readonly?VDISK_READONLY:0);
++}
++
++unsigned long vbd_secsize(struct vbd *vbd)
++{
++	return bdev_hardsect_size(vbd->bdev);
++}
++
++int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
++	       unsigned minor, int readonly)
++{
++	struct vbd *vbd;
++	struct block_device *bdev;
++
++	vbd = &blkif->vbd;
++	vbd->handle   = handle; 
++	vbd->readonly = readonly;
++	vbd->type     = 0;
++
++	vbd->pdevice  = MKDEV(major, minor);
++
++	bdev = open_by_devnum(vbd->pdevice,
++			      vbd->readonly ? FMODE_READ : FMODE_WRITE);
++
++	if (IS_ERR(bdev)) {
++		DPRINTK("vbd_creat: device %08x could not be opened.\n",
++			vbd->pdevice);
++		return -ENOENT;
++	}
++
++	vbd->bdev = bdev;
++
++	if (vbd->bdev->bd_disk == NULL) {
++		DPRINTK("vbd_creat: device %08x doesn't exist.\n",
++			vbd->pdevice);
++		vbd_free(vbd);
++		return -ENOENT;
++	}
++
++	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD)
++		vbd->type |= VDISK_CDROM;
++	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
++		vbd->type |= VDISK_REMOVABLE;
++
++	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
++		handle, blkif->domid);
++	return 0;
++}
++
++void vbd_free(struct vbd *vbd)
++{
++	if (vbd->bdev)
++		blkdev_put(vbd->bdev);
++	vbd->bdev = NULL;
++}
++
++int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
++{
++	struct vbd *vbd = &blkif->vbd;
++	int rc = -EACCES;
++
++	if ((operation != READ) && vbd->readonly)
++		goto out;
++
++	if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
++		goto out;
++
++	req->dev  = vbd->pdevice;
++	req->bdev = vbd->bdev;
++	rc = 0;
++
++ out:
++	return rc;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkback/xenbus.c	2007-08-27 14:02:09.000000000 -0400
+@@ -0,0 +1,533 @@
++/*  Xenbus code for blkif backend
++    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
++    Copyright (C) 2005 XenSource Ltd
++
++    This program is free software; you can redistribute it and/or modify
++    it under the terms of the GNU General Public License as published by
++    the Free Software Foundation; either version 2 of the License, or
++    (at your option) any later version.
++
++    This program is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++    GNU General Public License for more details.
++
++    You should have received a copy of the GNU General Public License
++    along with this program; if not, write to the Free Software
++    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++*/
++
++#include <stdarg.h>
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include "common.h"
++
++#undef DPRINTK
++#define DPRINTK(fmt, args...)				\
++	pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",	\
++		 __FUNCTION__, __LINE__, ##args)
++
++struct backend_info
++{
++	struct xenbus_device *dev;
++	blkif_t *blkif;
++	struct xenbus_watch backend_watch;
++	unsigned major;
++	unsigned minor;
++	char *mode;
++};
++
++static void connect(struct backend_info *);
++static int connect_ring(struct backend_info *);
++static void backend_changed(struct xenbus_watch *, const char **,
++			    unsigned int);
++
++static int blkback_name(blkif_t *blkif, char *buf)
++{
++	char *devpath, *devname;
++	struct xenbus_device *dev = blkif->be->dev;
++
++	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
++	if (IS_ERR(devpath)) 
++		return PTR_ERR(devpath);
++	
++	if ((devname = strstr(devpath, "/dev/")) != NULL)
++		devname += strlen("/dev/");
++	else
++		devname  = devpath;
++
++	snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
++	kfree(devpath);
++	
++	return 0;
++}
++
++static void update_blkif_status(blkif_t *blkif)
++{ 
++	int err;
++	char name[TASK_COMM_LEN];
++
++	/* Not ready to connect? */
++	if (!blkif->irq || !blkif->vbd.bdev)
++		return;
++
++	/* Already connected? */
++	if (blkif->be->dev->state == XenbusStateConnected)
++		return;
++
++	/* Attempt to connect: exit if we fail to. */
++	connect(blkif->be);
++	if (blkif->be->dev->state != XenbusStateConnected)
++		return;
++
++	err = blkback_name(blkif, name);
++	if (err) {
++		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
++		return;
++	}
++
++	blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
++	if (IS_ERR(blkif->xenblkd)) {
++		err = PTR_ERR(blkif->xenblkd);
++		blkif->xenblkd = NULL;
++		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
++	}
++}
++
++
++/****************************************************************
++ *  sysfs interface for VBD I/O requests
++ */
++
++#define VBD_SHOW(name, format, args...)					\
++	static ssize_t show_##name(struct device *_dev,			\
++				   struct device_attribute *attr,	\
++				   char *buf)				\
++	{								\
++		struct xenbus_device *dev = to_xenbus_device(_dev);	\
++		struct backend_info *be = dev->dev.driver_data;		\
++									\
++		return sprintf(buf, format, ##args);			\
++	}								\
++	DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
++
++VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
++VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
++VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
++VBD_SHOW(br_req,  "%d\n", be->blkif->st_br_req);
++VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
++VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
++
++static struct attribute *vbdstat_attrs[] = {
++	&dev_attr_oo_req.attr,
++	&dev_attr_rd_req.attr,
++	&dev_attr_wr_req.attr,
++	&dev_attr_br_req.attr,
++	&dev_attr_rd_sect.attr,
++	&dev_attr_wr_sect.attr,
++	NULL
++};
++
++static struct attribute_group vbdstat_group = {
++	.name = "statistics",
++	.attrs = vbdstat_attrs,
++};
++
++VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
++VBD_SHOW(mode, "%s\n", be->mode);
++
++int xenvbd_sysfs_addif(struct xenbus_device *dev)
++{
++	int error;
++	
++	error = device_create_file(&dev->dev, &dev_attr_physical_device);
++ 	if (error)
++		goto fail1;
++
++	error = device_create_file(&dev->dev, &dev_attr_mode);
++	if (error)
++		goto fail2;
++
++	error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
++	if (error)
++		goto fail3;
++
++	return 0;
++
++fail3:	sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
++fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
++fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
++	return error;
++}
++
++void xenvbd_sysfs_delif(struct xenbus_device *dev)
++{
++	sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
++	device_remove_file(&dev->dev, &dev_attr_mode);
++	device_remove_file(&dev->dev, &dev_attr_physical_device);
++}
++
++static int blkback_remove(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev->dev.driver_data;
++
++	DPRINTK("");
++
++	if (be->backend_watch.node) {
++		unregister_xenbus_watch(&be->backend_watch);
++		kfree(be->backend_watch.node);
++		be->backend_watch.node = NULL;
++	}
++
++	if (be->blkif) {
++		blkif_disconnect(be->blkif);
++		vbd_free(&be->blkif->vbd);
++		blkif_free(be->blkif);
++		be->blkif = NULL;
++	}
++
++	if (be->major || be->minor)
++		xenvbd_sysfs_delif(dev);
++
++	kfree(be);
++	dev->dev.driver_data = NULL;
++	return 0;
++}
++
++int blkback_barrier(struct xenbus_transaction xbt,
++		    struct backend_info *be, int state)
++{
++	struct xenbus_device *dev = be->dev;
++	int err;
++
++	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
++			    "%d", state);
++	if (err)
++		xenbus_dev_fatal(dev, err, "writing feature-barrier");
++
++	return err;
++}
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures, and watch the store waiting for the hotplug scripts to tell us
++ * the device's physical major and minor numbers.  Switch to InitWait.
++ */
++static int blkback_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id)
++{
++	int err;
++	struct backend_info *be = kzalloc(sizeof(struct backend_info),
++					  GFP_KERNEL);
++	if (!be) {
++		xenbus_dev_fatal(dev, -ENOMEM,
++				 "allocating backend structure");
++		return -ENOMEM;
++	}
++	be->dev = dev;
++	dev->dev.driver_data = be;
++
++	be->blkif = blkif_alloc(dev->otherend_id);
++	if (IS_ERR(be->blkif)) {
++		err = PTR_ERR(be->blkif);
++		be->blkif = NULL;
++		xenbus_dev_fatal(dev, err, "creating block interface");
++		goto fail;
++	}
++
++	/* setup back pointer */
++	be->blkif->be = be;
++
++	err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
++				 &be->backend_watch, backend_changed);
++	if (err)
++		goto fail;
++
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto fail;
++
++	return 0;
++
++fail:
++	DPRINTK("failed");
++	blkback_remove(dev);
++	return err;
++}
++
++
++/**
++ * Callback received when the hotplug scripts have placed the physical-device
++ * node.  Read it and the mode node, and create a vbd.  If the frontend is
++ * ready, connect.
++ */
++static void backend_changed(struct xenbus_watch *watch,
++			    const char **vec, unsigned int len)
++{
++	int err;
++	unsigned major;
++	unsigned minor;
++	struct backend_info *be
++		= container_of(watch, struct backend_info, backend_watch);
++	struct xenbus_device *dev = be->dev;
++
++	DPRINTK("");
++
++	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
++			   &major, &minor);
++	if (XENBUS_EXIST_ERR(err)) {
++		/* Since this watch will fire once immediately after it is
++		   registered, we expect this.  Ignore it, and wait for the
++		   hotplug scripts. */
++		return;
++	}
++	if (err != 2) {
++		xenbus_dev_fatal(dev, err, "reading physical-device");
++		return;
++	}
++
++	if ((be->major || be->minor) &&
++	    ((be->major != major) || (be->minor != minor))) {
++		printk(KERN_WARNING
++		       "blkback: changing physical device (from %x:%x to "
++		       "%x:%x) not supported.\n", be->major, be->minor,
++		       major, minor);
++		return;
++	}
++
++	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
++	if (IS_ERR(be->mode)) {
++		err = PTR_ERR(be->mode);
++		be->mode = NULL;
++		xenbus_dev_fatal(dev, err, "reading mode");
++		return;
++	}
++
++	if (be->major == 0 && be->minor == 0) {
++		/* Front end dir is a number, which is used as the handle. */
++
++		char *p = strrchr(dev->otherend, '/') + 1;
++		long handle = simple_strtoul(p, NULL, 0);
++
++		be->major = major;
++		be->minor = minor;
++
++		err = vbd_create(be->blkif, handle, major, minor,
++				 (NULL == strchr(be->mode, 'w')));
++		if (err) {
++			be->major = be->minor = 0;
++			xenbus_dev_fatal(dev, err, "creating vbd structure");
++			return;
++		}
++
++		err = xenvbd_sysfs_addif(dev);
++		if (err) {
++			vbd_free(&be->blkif->vbd);
++			be->major = be->minor = 0;
++			xenbus_dev_fatal(dev, err, "creating sysfs entries");
++			return;
++		}
++
++		/* We're potentially connected now */
++		update_blkif_status(be->blkif);
++	}
++}
++
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state)
++{
++	struct backend_info *be = dev->dev.driver_data;
++	int err;
++
++	DPRINTK("%s", xenbus_strstate(frontend_state));
++
++	switch (frontend_state) {
++	case XenbusStateInitialising:
++		if (dev->state == XenbusStateClosed) {
++			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++			       __FUNCTION__, dev->nodename);
++			xenbus_switch_state(dev, XenbusStateInitWait);
++		}
++		break;
++
++	case XenbusStateInitialised:
++	case XenbusStateConnected:
++		/* Ensure we connect even when two watches fire in 
++		   close successsion and we miss the intermediate value 
++		   of frontend_state. */
++		if (dev->state == XenbusStateConnected)
++			break;
++
++		err = connect_ring(be);
++		if (err)
++			break;
++		update_blkif_status(be->blkif);
++		break;
++
++	case XenbusStateClosing:
++		blkif_disconnect(be->blkif);
++		xenbus_switch_state(dev, XenbusStateClosing);
++		break;
++
++	case XenbusStateClosed:
++		xenbus_switch_state(dev, XenbusStateClosed);
++		if (xenbus_dev_is_online(dev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		device_unregister(&dev->dev);
++		break;
++
++	default:
++		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++				 frontend_state);
++		break;
++	}
++}
++
++
++/* ** Connection ** */
++
++
++/**
++ * Write the physical details regarding the block device to the store, and
++ * switch to Connected state.
++ */
++static void connect(struct backend_info *be)
++{
++	struct xenbus_transaction xbt;
++	int err;
++	struct xenbus_device *dev = be->dev;
++
++	DPRINTK("%s", dev->otherend);
++
++	/* Supply the information about the device the frontend needs */
++again:
++	err = xenbus_transaction_start(&xbt);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "starting transaction");
++		return;
++	}
++
++	err = blkback_barrier(xbt, be, 1);
++	if (err)
++		goto abort;
++
++	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
++			    vbd_size(&be->blkif->vbd));
++	if (err) {
++		xenbus_dev_fatal(dev, err, "writing %s/sectors",
++				 dev->nodename);
++		goto abort;
++	}
++
++	/* FIXME: use a typename instead */
++	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
++			    vbd_info(&be->blkif->vbd));
++	if (err) {
++		xenbus_dev_fatal(dev, err, "writing %s/info",
++				 dev->nodename);
++		goto abort;
++	}
++	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
++			    vbd_secsize(&be->blkif->vbd));
++	if (err) {
++		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
++				 dev->nodename);
++		goto abort;
++	}
++
++	err = xenbus_transaction_end(xbt, 0);
++	if (err == -EAGAIN)
++		goto again;
++	if (err)
++		xenbus_dev_fatal(dev, err, "ending transaction");
++
++	err = xenbus_switch_state(dev, XenbusStateConnected);
++	if (err)
++		xenbus_dev_fatal(dev, err, "switching to Connected state",
++				 dev->nodename);
++
++	return;
++ abort:
++	xenbus_transaction_end(xbt, 1);
++}
++
++
++static int connect_ring(struct backend_info *be)
++{
++	struct xenbus_device *dev = be->dev;
++	unsigned long ring_ref;
++	unsigned int evtchn;
++	char protocol[64] = "";
++	int err;
++
++	DPRINTK("%s", dev->otherend);
++
++	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref,
++			    "event-channel", "%u", &evtchn, NULL);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "reading %s/ring-ref and event-channel",
++				 dev->otherend);
++		return err;
++	}
++
++	be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
++	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
++			    "%63s", protocol, NULL);
++	if (err)
++		strcpy(protocol, "unspecified, assuming native");
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
++	else {
++		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
++		return -1;
++	}
++	printk(KERN_INFO
++	       "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
++	       ring_ref, evtchn, be->blkif->blk_protocol, protocol);
++
++	/* Map the shared frame, irq etc. */
++	err = blkif_map(be->blkif, ring_ref, evtchn);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
++				 ring_ref, evtchn);
++		return err;
++	}
++
++	return 0;
++}
++
++
++/* ** Driver Registration ** */
++
++
++static struct xenbus_device_id blkback_ids[] = {
++	{ "vbd" },
++	{ "" }
++};
++
++
++static struct xenbus_driver blkback = {
++	.name = "vbd",
++	.owner = THIS_MODULE,
++	.ids = blkback_ids,
++	.probe = blkback_probe,
++	.remove = blkback_remove,
++	.otherend_changed = frontend_changed
++};
++
++
++void blkif_xenbus_init(void)
++{
++	xenbus_register_backend(&blkback);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkfront/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,5 @@
++
++obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	:= xenblk.o
++
++xenblk-objs := blkfront.o vbd.o
++
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkfront/blkfront.c	2007-08-27 14:02:08.000000000 -0400
+@@ -0,0 +1,902 @@
++/******************************************************************************
++ * blkfront.c
++ * 
++ * XenLinux virtual block-device driver.
++ * 
++ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
++ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
++ * Copyright (c) 2004, Christian Limpach
++ * Copyright (c) 2004, Andrew Warfield
++ * Copyright (c) 2005, Christopher Clark
++ * Copyright (c) 2005, XenSource Ltd
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/version.h>
++#include "block.h"
++#include <linux/cdrom.h>
++#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <scsi/scsi.h>
++#include <xen/evtchn.h>
++#include <xen/xenbus.h>
++#include <xen/interface/grant_table.h>
++#include <xen/interface/io/protocols.h>
++#include <xen/gnttab.h>
++#include <asm/hypervisor.h>
++#include <asm/maddr.h>
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++#define BLKIF_STATE_DISCONNECTED 0
++#define BLKIF_STATE_CONNECTED    1
++#define BLKIF_STATE_SUSPENDED    2
++
++#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
++    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
++#define GRANT_INVALID_REF	0
++
++static void connect(struct blkfront_info *);
++static void blkfront_closing(struct xenbus_device *);
++static int blkfront_remove(struct xenbus_device *);
++static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
++static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
++
++static void kick_pending_request_queues(struct blkfront_info *);
++
++static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
++static void blkif_restart_queue(void *arg);
++static void blkif_recover(struct blkfront_info *);
++static void blkif_completion(struct blk_shadow *);
++static void blkif_free(struct blkfront_info *, int);
++
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures and the ring buffer for communication with the backend, and
++ * inform the backend of the appropriate details for those.  Switch to
++ * Initialised state.
++ */
++static int blkfront_probe(struct xenbus_device *dev,
++			  const struct xenbus_device_id *id)
++{
++	int err, vdevice, i;
++	struct blkfront_info *info;
++
++	/* FIXME: Use dynamic device id if this is not set. */
++	err = xenbus_scanf(XBT_NIL, dev->nodename,
++			   "virtual-device", "%i", &vdevice);
++	if (err != 1) {
++		xenbus_dev_fatal(dev, err, "reading virtual-device");
++		return err;
++	}
++
++	info = kzalloc(sizeof(*info), GFP_KERNEL);
++	if (!info) {
++		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
++		return -ENOMEM;
++	}
++
++	info->xbdev = dev;
++	info->vdevice = vdevice;
++	info->connected = BLKIF_STATE_DISCONNECTED;
++	INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
++
++	for (i = 0; i < BLK_RING_SIZE; i++)
++		info->shadow[i].req.id = i+1;
++	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
++
++	/* Front end dir is a number, which is used as the id. */
++	info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
++	dev->dev.driver_data = info;
++
++	err = talk_to_backend(dev, info);
++	if (err) {
++		kfree(info);
++		dev->dev.driver_data = NULL;
++		return err;
++	}
++
++	return 0;
++}
++
++
++/**
++ * We are reconnecting to the backend, due to a suspend/resume, or a backend
++ * driver restart.  We tear down our blkif structure and recreate it, but
++ * leave the device-layer structures intact so that this is transparent to the
++ * rest of the kernel.
++ */
++static int blkfront_resume(struct xenbus_device *dev)
++{
++	struct blkfront_info *info = dev->dev.driver_data;
++	int err;
++
++	DPRINTK("blkfront_resume: %s\n", dev->nodename);
++
++	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
++
++	err = talk_to_backend(dev, info);
++	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
++		blkif_recover(info);
++
++	return err;
++}
++
++
++/* Common code used when first setting up, and when resuming. */
++static int talk_to_backend(struct xenbus_device *dev,
++			   struct blkfront_info *info)
++{
++	const char *message = NULL;
++	struct xenbus_transaction xbt;
++	int err;
++
++	/* Create shared ring, alloc event channel. */
++	err = setup_blkring(dev, info);
++	if (err)
++		goto out;
++
++again:
++	err = xenbus_transaction_start(&xbt);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "starting transaction");
++		goto destroy_blkring;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename,
++			    "ring-ref","%u", info->ring_ref);
++	if (err) {
++		message = "writing ring-ref";
++		goto abort_transaction;
++	}
++	err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
++			    irq_to_evtchn_port(info->irq));
++	if (err) {
++		message = "writing event-channel";
++		goto abort_transaction;
++	}
++	err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
++			    XEN_IO_PROTO_ABI_NATIVE);
++	if (err) {
++		message = "writing protocol";
++		goto abort_transaction;
++	}
++
++	err = xenbus_transaction_end(xbt, 0);
++	if (err) {
++		if (err == -EAGAIN)
++			goto again;
++		xenbus_dev_fatal(dev, err, "completing transaction");
++		goto destroy_blkring;
++	}
++
++	xenbus_switch_state(dev, XenbusStateInitialised);
++
++	return 0;
++
++ abort_transaction:
++	xenbus_transaction_end(xbt, 1);
++	if (message)
++		xenbus_dev_fatal(dev, err, "%s", message);
++ destroy_blkring:
++	blkif_free(info, 0);
++ out:
++	return err;
++}
++
++
++static int setup_blkring(struct xenbus_device *dev,
++			 struct blkfront_info *info)
++{
++	blkif_sring_t *sring;
++	int err;
++
++	info->ring_ref = GRANT_INVALID_REF;
++
++	sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
++	if (!sring) {
++		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
++		return -ENOMEM;
++	}
++	SHARED_RING_INIT(sring);
++	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
++
++	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
++	if (err < 0) {
++		free_page((unsigned long)sring);
++		info->ring.sring = NULL;
++		goto fail;
++	}
++	info->ring_ref = err;
++
++	err = bind_listening_port_to_irqhandler(
++		dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
++	if (err <= 0) {
++		xenbus_dev_fatal(dev, err,
++				 "bind_listening_port_to_irqhandler");
++		goto fail;
++	}
++	info->irq = err;
++
++	return 0;
++fail:
++	blkif_free(info, 0);
++	return err;
++}
++
++
++/**
++ * Callback received when the backend's state changes.
++ */
++static void backend_changed(struct xenbus_device *dev,
++			    enum xenbus_state backend_state)
++{
++	struct blkfront_info *info = dev->dev.driver_data;
++	struct block_device *bd;
++
++	DPRINTK("blkfront:backend_changed.\n");
++
++	switch (backend_state) {
++	case XenbusStateInitialising:
++	case XenbusStateInitWait:
++	case XenbusStateInitialised:
++	case XenbusStateUnknown:
++	case XenbusStateClosed:
++		break;
++
++	case XenbusStateConnected:
++		connect(info);
++		break;
++
++	case XenbusStateClosing:
++		bd = bdget(info->dev);
++		if (bd == NULL)
++			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
++		down(&bd->bd_sem);
++#else
++		mutex_lock(&bd->bd_mutex);
++#endif
++		if (info->users > 0)
++			xenbus_dev_error(dev, -EBUSY,
++					 "Device in use; refusing to close");
++		else
++			blkfront_closing(dev);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
++		up(&bd->bd_sem);
++#else
++		mutex_unlock(&bd->bd_mutex);
++#endif
++		bdput(bd);
++		break;
++	}
++}
++
++
++/* ** Connection ** */
++
++
++/*
++ * Invoked when the backend is finally 'ready' (and has told produced
++ * the details about the physical device - #sectors, size, etc).
++ */
++static void connect(struct blkfront_info *info)
++{
++	unsigned long long sectors;
++	unsigned long sector_size;
++	unsigned int binfo;
++	int err;
++
++	if ((info->connected == BLKIF_STATE_CONNECTED) ||
++	    (info->connected == BLKIF_STATE_SUSPENDED) )
++		return;
++
++	DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
++
++	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
++			    "sectors", "%Lu", &sectors,
++			    "info", "%u", &binfo,
++			    "sector-size", "%lu", &sector_size,
++			    NULL);
++	if (err) {
++		xenbus_dev_fatal(info->xbdev, err,
++				 "reading backend fields at %s",
++				 info->xbdev->otherend);
++		return;
++	}
++
++	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
++			    "feature-barrier", "%lu", &info->feature_barrier,
++			    NULL);
++	if (err)
++		info->feature_barrier = 0;
++
++	err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
++	if (err) {
++		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
++				 info->xbdev->otherend);
++		return;
++	}
++
++	(void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
++
++	/* Kick pending requests. */
++	spin_lock_irq(&blkif_io_lock);
++	info->connected = BLKIF_STATE_CONNECTED;
++	kick_pending_request_queues(info);
++	spin_unlock_irq(&blkif_io_lock);
++
++	add_disk(info->gd);
++}
++
++/**
++ * Handle the change of state of the backend to Closing.  We must delete our
++ * device-layer structures now, to ensure that writes are flushed through to
++ * the backend.  Once is this done, we can switch to Closed in
++ * acknowledgement.
++ */
++static void blkfront_closing(struct xenbus_device *dev)
++{
++	struct blkfront_info *info = dev->dev.driver_data;
++	unsigned long flags;
++
++	DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
++
++	if (info->rq == NULL)
++		goto out;
++
++	spin_lock_irqsave(&blkif_io_lock, flags);
++	/* No more blkif_request(). */
++	blk_stop_queue(info->rq);
++	/* No more gnttab callback work. */
++	gnttab_cancel_free_callback(&info->callback);
++	spin_unlock_irqrestore(&blkif_io_lock, flags);
++
++	/* Flush gnttab callback work. Must be done with no locks held. */
++	flush_scheduled_work();
++
++	xlvbd_del(info);
++
++ out:
++	xenbus_frontend_closed(dev);
++}
++
++
++static int blkfront_remove(struct xenbus_device *dev)
++{
++	struct blkfront_info *info = dev->dev.driver_data;
++
++	DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
++
++	blkif_free(info, 0);
++
++	kfree(info);
++
++	return 0;
++}
++
++
++static inline int GET_ID_FROM_FREELIST(
++	struct blkfront_info *info)
++{
++	unsigned long free = info->shadow_free;
++	BUG_ON(free > BLK_RING_SIZE);
++	info->shadow_free = info->shadow[free].req.id;
++	info->shadow[free].req.id = 0x0fffffee; /* debug */
++	return free;
++}
++
++static inline void ADD_ID_TO_FREELIST(
++	struct blkfront_info *info, unsigned long id)
++{
++	info->shadow[id].req.id  = info->shadow_free;
++	info->shadow[id].request = 0;
++	info->shadow_free = id;
++}
++
++static inline void flush_requests(struct blkfront_info *info)
++{
++	int notify;
++
++	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
++
++	if (notify)
++		notify_remote_via_irq(info->irq);
++}
++
++static void kick_pending_request_queues(struct blkfront_info *info)
++{
++	if (!RING_FULL(&info->ring)) {
++		/* Re-enable calldowns. */
++		blk_start_queue(info->rq);
++		/* Kick things off immediately. */
++		do_blkif_request(info->rq);
++	}
++}
++
++static void blkif_restart_queue(void *arg)
++{
++	struct blkfront_info *info = (struct blkfront_info *)arg;
++	spin_lock_irq(&blkif_io_lock);
++	if (info->connected == BLKIF_STATE_CONNECTED)
++		kick_pending_request_queues(info);
++	spin_unlock_irq(&blkif_io_lock);
++}
++
++static void blkif_restart_queue_callback(void *arg)
++{
++	struct blkfront_info *info = (struct blkfront_info *)arg;
++	schedule_work(&info->work);
++}
++
++int blkif_open(struct inode *inode, struct file *filep)
++{
++	struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
++	info->users++;
++	return 0;
++}
++
++
++int blkif_release(struct inode *inode, struct file *filep)
++{
++	struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
++	info->users--;
++	if (info->users == 0) {
++		/* Check whether we have been instructed to close.  We will
++		   have ignored this request initially, as the device was
++		   still mounted. */
++		struct xenbus_device * dev = info->xbdev;
++		enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
++
++		if (state == XenbusStateClosing)
++			blkfront_closing(dev);
++	}
++	return 0;
++}
++
++
++int blkif_ioctl(struct inode *inode, struct file *filep,
++		unsigned command, unsigned long argument)
++{
++	int i;
++
++	DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
++		      command, (long)argument, inode->i_rdev);
++
++	switch (command) {
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
++	case HDIO_GETGEO: {
++		struct block_device *bd = inode->i_bdev;
++		struct hd_geometry geo;
++		int ret;
++
++                if (!argument)
++                        return -EINVAL;
++
++		geo.start = get_start_sect(bd);
++		ret = blkif_getgeo(bd, &geo);
++		if (ret)
++			return ret;
++
++		if (copy_to_user((struct hd_geometry __user *)argument, &geo,
++				 sizeof(geo)))
++                        return -EFAULT;
++
++                return 0;
++	}
++#endif
++	case CDROMMULTISESSION:
++		DPRINTK("FIXME: support multisession CDs later\n");
++		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
++			if (put_user(0, (char __user *)(argument + i)))
++				return -EFAULT;
++		return 0;
++
++	default:
++		/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
++		  command);*/
++		return -EINVAL; /* same return as native Linux */
++	}
++
++	return 0;
++}
++
++
++int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
++{
++	/* We don't have real geometry info, but let's at least return
++	   values consistent with the size of the device */
++	sector_t nsect = get_capacity(bd->bd_disk);
++	sector_t cylinders = nsect;
++
++	hg->heads = 0xff;
++	hg->sectors = 0x3f;
++	sector_div(cylinders, hg->heads * hg->sectors);
++	hg->cylinders = cylinders;
++	if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
++		hg->cylinders = 0xffff;
++	return 0;
++}
++
++
++/*
++ * blkif_queue_request
++ *
++ * request block io
++ *
++ * id: for guest use only.
++ * operation: BLKIF_OP_{READ,WRITE,PROBE}
++ * buffer: buffer to read/write into. this should be a
++ *   virtual address in the guest os.
++ */
++static int blkif_queue_request(struct request *req)
++{
++	struct blkfront_info *info = req->rq_disk->private_data;
++	unsigned long buffer_mfn;
++	blkif_request_t *ring_req;
++	struct bio *bio;
++	struct bio_vec *bvec;
++	int idx;
++	unsigned long id;
++	unsigned int fsect, lsect;
++	int ref;
++	grant_ref_t gref_head;
++
++	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
++		return 1;
++
++	if (gnttab_alloc_grant_references(
++		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
++		gnttab_request_free_callback(
++			&info->callback,
++			blkif_restart_queue_callback,
++			info,
++			BLKIF_MAX_SEGMENTS_PER_REQUEST);
++		return 1;
++	}
++
++	/* Fill out a communications ring structure. */
++	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
++	id = GET_ID_FROM_FREELIST(info);
++	info->shadow[id].request = (unsigned long)req;
++
++	ring_req->id = id;
++	ring_req->sector_number = (blkif_sector_t)req->sector;
++	ring_req->handle = info->handle;
++
++	ring_req->operation = rq_data_dir(req) ?
++		BLKIF_OP_WRITE : BLKIF_OP_READ;
++	if (blk_barrier_rq(req))
++		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
++
++	ring_req->nr_segments = 0;
++	rq_for_each_bio (bio, req) {
++		bio_for_each_segment (bvec, bio, idx) {
++			BUG_ON(ring_req->nr_segments
++			       == BLKIF_MAX_SEGMENTS_PER_REQUEST);
++			buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
++			fsect = bvec->bv_offset >> 9;
++			lsect = fsect + (bvec->bv_len >> 9) - 1;
++			/* install a grant reference. */
++			ref = gnttab_claim_grant_reference(&gref_head);
++			BUG_ON(ref == -ENOSPC);
++
++			gnttab_grant_foreign_access_ref(
++				ref,
++				info->xbdev->otherend_id,
++				buffer_mfn,
++				rq_data_dir(req) );
++
++			info->shadow[id].frame[ring_req->nr_segments] =
++				mfn_to_pfn(buffer_mfn);
++
++			ring_req->seg[ring_req->nr_segments] =
++				(struct blkif_request_segment) {
++					.gref       = ref,
++					.first_sect = fsect,
++					.last_sect  = lsect };
++
++			ring_req->nr_segments++;
++		}
++	}
++
++	info->ring.req_prod_pvt++;
++
++	/* Keep a private copy so we can reissue requests when recovering. */
++	info->shadow[id].req = *ring_req;
++
++	gnttab_free_grant_references(gref_head);
++
++	return 0;
++}
++
++/*
++ * do_blkif_request
++ *  read a block; request is in a request queue
++ */
++void do_blkif_request(request_queue_t *rq)
++{
++	struct blkfront_info *info = NULL;
++	struct request *req;
++	int queued;
++
++	DPRINTK("Entered do_blkif_request\n");
++
++	queued = 0;
++
++	while ((req = elv_next_request(rq)) != NULL) {
++		info = req->rq_disk->private_data;
++		if (!blk_fs_request(req)) {
++			end_request(req, 0);
++			continue;
++		}
++
++		if (RING_FULL(&info->ring))
++			goto wait;
++
++		DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
++			"(%u/%li) buffer:%p [%s]\n",
++			req, req->cmd, (long long)req->sector,
++			req->current_nr_sectors,
++			req->nr_sectors, req->buffer,
++			rq_data_dir(req) ? "write" : "read");
++
++
++		blkdev_dequeue_request(req);
++		if (blkif_queue_request(req)) {
++			blk_requeue_request(rq, req);
++		wait:
++			/* Avoid pointless unplugs. */
++			blk_stop_queue(rq);
++			break;
++		}
++
++		queued++;
++	}
++
++	if (queued != 0)
++		flush_requests(info);
++}
++
++
++static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
++{
++	struct request *req;
++	blkif_response_t *bret;
++	RING_IDX i, rp;
++	unsigned long flags;
++	struct blkfront_info *info = (struct blkfront_info *)dev_id;
++	int uptodate;
++
++	spin_lock_irqsave(&blkif_io_lock, flags);
++
++	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
++		spin_unlock_irqrestore(&blkif_io_lock, flags);
++		return IRQ_HANDLED;
++	}
++
++ again:
++	rp = info->ring.sring->rsp_prod;
++	rmb(); /* Ensure we see queued responses up to 'rp'. */
++
++	for (i = info->ring.rsp_cons; i != rp; i++) {
++		unsigned long id;
++		int ret;
++
++		bret = RING_GET_RESPONSE(&info->ring, i);
++		id   = bret->id;
++		req  = (struct request *)info->shadow[id].request;
++
++		blkif_completion(&info->shadow[id]);
++
++		ADD_ID_TO_FREELIST(info, id);
++
++		uptodate = (bret->status == BLKIF_RSP_OKAY);
++		switch (bret->operation) {
++		case BLKIF_OP_WRITE_BARRIER:
++			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
++				printk("blkfront: %s: write barrier op failed\n",
++				       info->gd->disk_name);
++				uptodate = -EOPNOTSUPP;
++				info->feature_barrier = 0;
++			        xlvbd_barrier(info);
++			}
++			/* fall through */
++		case BLKIF_OP_READ:
++		case BLKIF_OP_WRITE:
++			if (unlikely(bret->status != BLKIF_RSP_OKAY))
++				DPRINTK("Bad return from blkdev data "
++					"request: %x\n", bret->status);
++
++			ret = end_that_request_first(req, uptodate,
++				req->hard_nr_sectors);
++			BUG_ON(ret);
++			end_that_request_last(req, uptodate);
++			break;
++		default:
++			BUG();
++		}
++	}
++
++	info->ring.rsp_cons = i;
++
++	if (i != info->ring.req_prod_pvt) {
++		int more_to_do;
++		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
++		if (more_to_do)
++			goto again;
++	} else
++		info->ring.sring->rsp_event = i + 1;
++
++	kick_pending_request_queues(info);
++
++	spin_unlock_irqrestore(&blkif_io_lock, flags);
++
++	return IRQ_HANDLED;
++}
++
++static void blkif_free(struct blkfront_info *info, int suspend)
++{
++	/* Prevent new requests being issued until we fix things up. */
++	spin_lock_irq(&blkif_io_lock);
++	info->connected = suspend ?
++		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
++	/* No more blkif_request(). */
++	if (info->rq)
++		blk_stop_queue(info->rq);
++	/* No more gnttab callback work. */
++	gnttab_cancel_free_callback(&info->callback);
++	spin_unlock_irq(&blkif_io_lock);
++
++	/* Flush gnttab callback work. Must be done with no locks held. */
++	flush_scheduled_work();
++
++	/* Free resources associated with old device channel. */
++	if (info->ring_ref != GRANT_INVALID_REF) {
++		gnttab_end_foreign_access(info->ring_ref, 0,
++					  (unsigned long)info->ring.sring);
++		info->ring_ref = GRANT_INVALID_REF;
++		info->ring.sring = NULL;
++	}
++	if (info->irq)
++		unbind_from_irqhandler(info->irq, info);
++	info->irq = 0;
++}
++
++static void blkif_completion(struct blk_shadow *s)
++{
++	int i;
++	for (i = 0; i < s->req.nr_segments; i++)
++		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
++}
++
++static void blkif_recover(struct blkfront_info *info)
++{
++	int i;
++	blkif_request_t *req;
++	struct blk_shadow *copy;
++	int j;
++
++	/* Stage 1: Make a safe copy of the shadow state. */
++	copy = kmalloc(sizeof(info->shadow), GFP_KERNEL | __GFP_NOFAIL);
++	memcpy(copy, info->shadow, sizeof(info->shadow));
++
++	/* Stage 2: Set up free list. */
++	memset(&info->shadow, 0, sizeof(info->shadow));
++	for (i = 0; i < BLK_RING_SIZE; i++)
++		info->shadow[i].req.id = i+1;
++	info->shadow_free = info->ring.req_prod_pvt;
++	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
++
++	/* Stage 3: Find pending requests and requeue them. */
++	for (i = 0; i < BLK_RING_SIZE; i++) {
++		/* Not in use? */
++		if (copy[i].request == 0)
++			continue;
++
++		/* Grab a request slot and copy shadow state into it. */
++		req = RING_GET_REQUEST(
++			&info->ring, info->ring.req_prod_pvt);
++		*req = copy[i].req;
++
++		/* We get a new request id, and must reset the shadow state. */
++		req->id = GET_ID_FROM_FREELIST(info);
++		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
++
++		/* Rewrite any grant references invalidated by susp/resume. */
++		for (j = 0; j < req->nr_segments; j++)
++			gnttab_grant_foreign_access_ref(
++				req->seg[j].gref,
++				info->xbdev->otherend_id,
++				pfn_to_mfn(info->shadow[req->id].frame[j]),
++				rq_data_dir(
++					(struct request *)
++					info->shadow[req->id].request));
++		info->shadow[req->id].req = *req;
++
++		info->ring.req_prod_pvt++;
++	}
++
++	kfree(copy);
++
++	(void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
++
++	spin_lock_irq(&blkif_io_lock);
++
++	/* Now safe for us to use the shared ring */
++	info->connected = BLKIF_STATE_CONNECTED;
++
++	/* Send off requeued requests */
++	flush_requests(info);
++
++	/* Kick any other new requests queued since we resumed */
++	kick_pending_request_queues(info);
++
++	spin_unlock_irq(&blkif_io_lock);
++}
++
++
++/* ** Driver Registration ** */
++
++
++static struct xenbus_device_id blkfront_ids[] = {
++	{ "vbd" },
++	{ "" }
++};
++
++
++static struct xenbus_driver blkfront = {
++	.name = "vbd",
++	.owner = THIS_MODULE,
++	.ids = blkfront_ids,
++	.probe = blkfront_probe,
++	.remove = blkfront_remove,
++	.resume = blkfront_resume,
++	.otherend_changed = backend_changed,
++};
++
++
++static int __init xlblk_init(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	return xenbus_register_frontend(&blkfront);
++}
++module_init(xlblk_init);
++
++
++static void xlblk_exit(void)
++{
++	return xenbus_unregister_driver(&blkfront);
++}
++module_exit(xlblk_exit);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkfront/block.h	2007-08-27 14:02:08.000000000 -0400
+@@ -0,0 +1,142 @@
++/******************************************************************************
++ * block.h
++ * 
++ * Shared definitions between all levels of XenLinux Virtual block devices.
++ * 
++ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
++ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
++ * Copyright (c) 2004-2005, Christian Limpach
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __XEN_DRIVERS_BLOCK_H__
++#define __XEN_DRIVERS_BLOCK_H__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/hdreg.h>
++#include <linux/blkdev.h>
++#include <linux/major.h>
++#include <asm/hypervisor.h>
++#include <xen/xenbus.h>
++#include <xen/gnttab.h>
++#include <xen/interface/xen.h>
++#include <xen/interface/io/blkif.h>
++#include <xen/interface/io/ring.h>
++#include <asm/io.h>
++#include <asm/atomic.h>
++#include <asm/uaccess.h>
++
++#define DPRINTK(_f, _a...) pr_debug(_f, ## _a)
++
++#if 0
++#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
++#else
++#define DPRINTK_IOCTL(_f, _a...) ((void)0)
++#endif
++
++struct xlbd_type_info
++{
++	int partn_shift;
++	int disks_per_major;
++	char *devname;
++	char *diskname;
++};
++
++struct xlbd_major_info
++{
++	int major;
++	int index;
++	int usage;
++	struct xlbd_type_info *type;
++};
++
++struct blk_shadow {
++	blkif_request_t req;
++	unsigned long request;
++	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++};
++
++#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
++
++/*
++ * We have one of these per vbd, whether ide, scsi or 'other'.  They
++ * hang in private_data off the gendisk structure. We may end up
++ * putting all kinds of interesting stuff here :-)
++ */
++struct blkfront_info
++{
++	struct xenbus_device *xbdev;
++	dev_t dev;
++ 	struct gendisk *gd;
++	int vdevice;
++	blkif_vdev_t handle;
++	int connected;
++	int ring_ref;
++	blkif_front_ring_t ring;
++	unsigned int irq;
++	struct xlbd_major_info *mi;
++	request_queue_t *rq;
++	struct work_struct work;
++	struct gnttab_free_callback callback;
++	struct blk_shadow shadow[BLK_RING_SIZE];
++	unsigned long shadow_free;
++	int feature_barrier;
++
++	/**
++	 * The number of people holding this device open.  We won't allow a
++	 * hot-unplug unless this is 0.
++	 */
++	int users;
++};
++
++extern spinlock_t blkif_io_lock;
++
++extern int blkif_open(struct inode *inode, struct file *filep);
++extern int blkif_release(struct inode *inode, struct file *filep);
++extern int blkif_ioctl(struct inode *inode, struct file *filep,
++		       unsigned command, unsigned long argument);
++extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
++extern int blkif_check(dev_t dev);
++extern int blkif_revalidate(dev_t dev);
++extern void do_blkif_request (request_queue_t *rq);
++
++/* Virtual block-device subsystem. */
++/* Note that xlvbd_add doesn't call add_disk for you: you're expected
++   to call add_disk on info->gd once the disk is properly connected
++   up. */
++int xlvbd_add(blkif_sector_t capacity, int device,
++	      u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
++void xlvbd_del(struct blkfront_info *info);
++int xlvbd_barrier(struct blkfront_info *info);
++
++#endif /* __XEN_DRIVERS_BLOCK_H__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blkfront/vbd.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,372 @@
++/******************************************************************************
++ * vbd.c
++ * 
++ * XenLinux virtual block-device driver (xvd).
++ * 
++ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
++ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
++ * Copyright (c) 2004-2005, Christian Limpach
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "block.h"
++#include <linux/blkdev.h>
++#include <linux/list.h>
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++#define BLKIF_MAJOR(dev) ((dev)>>8)
++#define BLKIF_MINOR(dev) ((dev) & 0xff)
++
++/*
++ * For convenience we distinguish between ide, scsi and 'other' (i.e.,
++ * potentially combinations of the two) in the naming scheme and in a few other
++ * places.
++ */
++
++#define NUM_IDE_MAJORS 10
++#define NUM_SCSI_MAJORS 17
++#define NUM_VBD_MAJORS 1
++
++static struct xlbd_type_info xlbd_ide_type = {
++	.partn_shift = 6,
++	.disks_per_major = 2,
++	.devname = "ide",
++	.diskname = "hd",
++};
++
++static struct xlbd_type_info xlbd_scsi_type = {
++	.partn_shift = 4,
++	.disks_per_major = 16,
++	.devname = "sd",
++	.diskname = "sd",
++};
++
++static struct xlbd_type_info xlbd_vbd_type = {
++	.partn_shift = 4,
++	.disks_per_major = 16,
++	.devname = "xvd",
++	.diskname = "xvd",
++};
++
++static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
++					 NUM_VBD_MAJORS];
++
++#define XLBD_MAJOR_IDE_START	0
++#define XLBD_MAJOR_SCSI_START	(NUM_IDE_MAJORS)
++#define XLBD_MAJOR_VBD_START	(NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
++
++#define XLBD_MAJOR_IDE_RANGE	XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1
++#define XLBD_MAJOR_SCSI_RANGE	XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1
++#define XLBD_MAJOR_VBD_RANGE	XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1
++
++/* Information about our VBDs. */
++#define MAX_VBDS 64
++static LIST_HEAD(vbds_list);
++
++static struct block_device_operations xlvbd_block_fops =
++{
++	.owner = THIS_MODULE,
++	.open = blkif_open,
++	.release = blkif_release,
++	.ioctl  = blkif_ioctl,
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
++	.getgeo = blkif_getgeo
++#endif
++};
++
++DEFINE_SPINLOCK(blkif_io_lock);
++
++static struct xlbd_major_info *
++xlbd_alloc_major_info(int major, int minor, int index)
++{
++	struct xlbd_major_info *ptr;
++
++	ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
++	if (ptr == NULL)
++		return NULL;
++
++	ptr->major = major;
++
++	switch (index) {
++	case XLBD_MAJOR_IDE_RANGE:
++		ptr->type = &xlbd_ide_type;
++		ptr->index = index - XLBD_MAJOR_IDE_START;
++		break;
++	case XLBD_MAJOR_SCSI_RANGE:
++		ptr->type = &xlbd_scsi_type;
++		ptr->index = index - XLBD_MAJOR_SCSI_START;
++		break;
++	case XLBD_MAJOR_VBD_RANGE:
++		ptr->type = &xlbd_vbd_type;
++		ptr->index = index - XLBD_MAJOR_VBD_START;
++		break;
++	}
++
++	if (register_blkdev(ptr->major, ptr->type->devname)) {
++		kfree(ptr);
++		return NULL;
++	}
++
++	printk("xen-vbd: registered block device major %i\n", ptr->major);
++	major_info[index] = ptr;
++	return ptr;
++}
++
++static struct xlbd_major_info *
++xlbd_get_major_info(int vdevice)
++{
++	struct xlbd_major_info *mi;
++	int major, minor, index;
++
++	major = BLKIF_MAJOR(vdevice);
++	minor = BLKIF_MINOR(vdevice);
++
++	switch (major) {
++	case IDE0_MAJOR: index = 0; break;
++	case IDE1_MAJOR: index = 1; break;
++	case IDE2_MAJOR: index = 2; break;
++	case IDE3_MAJOR: index = 3; break;
++	case IDE4_MAJOR: index = 4; break;
++	case IDE5_MAJOR: index = 5; break;
++	case IDE6_MAJOR: index = 6; break;
++	case IDE7_MAJOR: index = 7; break;
++	case IDE8_MAJOR: index = 8; break;
++	case IDE9_MAJOR: index = 9; break;
++	case SCSI_DISK0_MAJOR: index = 10; break;
++	case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
++		index = 11 + major - SCSI_DISK1_MAJOR;
++		break;
++        case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR:
++                index = 18 + major - SCSI_DISK8_MAJOR;
++                break;
++        case SCSI_CDROM_MAJOR: index = 26; break;
++        default: index = 27; break;
++	}
++
++	mi = ((major_info[index] != NULL) ? major_info[index] :
++	      xlbd_alloc_major_info(major, minor, index));
++	if (mi)
++		mi->usage++;
++	return mi;
++}
++
++static void
++xlbd_put_major_info(struct xlbd_major_info *mi)
++{
++	mi->usage--;
++	/* XXX: release major if 0 */
++}
++
++static int
++xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
++{
++	request_queue_t *rq;
++
++	rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
++	if (rq == NULL)
++		return -1;
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
++	elevator_init(rq, "noop");
++#else
++	elevator_init(rq, &elevator_noop);
++#endif
++
++	/* Hard sector size and max sectors impersonate the equiv. hardware. */
++	blk_queue_hardsect_size(rq, sector_size);
++	blk_queue_max_sectors(rq, 512);
++
++	/* Each segment in a request is up to an aligned page in size. */
++	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
++	blk_queue_max_segment_size(rq, PAGE_SIZE);
++
++	/* Ensure a merged request will fit in a single I/O ring slot. */
++	blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
++	blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
++
++	/* Make sure buffer addresses are sector-aligned. */
++	blk_queue_dma_alignment(rq, 511);
++
++	gd->queue = rq;
++
++	return 0;
++}
++
++static int
++xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
++		    u16 vdisk_info, u16 sector_size,
++		    struct blkfront_info *info)
++{
++	struct gendisk *gd;
++	struct xlbd_major_info *mi;
++	int nr_minors = 1;
++	int err = -ENODEV;
++	unsigned int offset;
++
++	BUG_ON(info->gd != NULL);
++	BUG_ON(info->mi != NULL);
++	BUG_ON(info->rq != NULL);
++
++	mi = xlbd_get_major_info(vdevice);
++	if (mi == NULL)
++		goto out;
++	info->mi = mi;
++
++	if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
++		nr_minors = 1 << mi->type->partn_shift;
++
++	gd = alloc_disk(nr_minors);
++	if (gd == NULL)
++		goto out;
++
++	offset =  mi->index * mi->type->disks_per_major +
++			(minor >> mi->type->partn_shift);
++	if (nr_minors > 1) {
++		if (offset < 26) {
++			sprintf(gd->disk_name, "%s%c",
++				 mi->type->diskname, 'a' + offset );
++		}
++		else {
++			sprintf(gd->disk_name, "%s%c%c",
++				mi->type->diskname,
++				'a' + ((offset/26)-1), 'a' + (offset%26) );
++		}
++	}
++	else {
++		if (offset < 26) {
++			sprintf(gd->disk_name, "%s%c%d",
++				mi->type->diskname,
++				'a' + offset,
++				minor & ((1 << mi->type->partn_shift) - 1));
++		}
++		else {
++			sprintf(gd->disk_name, "%s%c%c%d",
++				mi->type->diskname,
++				'a' + ((offset/26)-1), 'a' + (offset%26),
++				minor & ((1 << mi->type->partn_shift) - 1));
++		}
++	}
++
++	gd->major = mi->major;
++	gd->first_minor = minor;
++	gd->fops = &xlvbd_block_fops;
++	gd->private_data = info;
++	gd->driverfs_dev = &(info->xbdev->dev);
++	set_capacity(gd, capacity);
++
++	if (xlvbd_init_blk_queue(gd, sector_size)) {
++		del_gendisk(gd);
++		goto out;
++	}
++
++	info->rq = gd->queue;
++	info->gd = gd;
++
++	if (info->feature_barrier)
++		xlvbd_barrier(info);
++
++	if (vdisk_info & VDISK_READONLY)
++		set_disk_ro(gd, 1);
++
++	if (vdisk_info & VDISK_REMOVABLE)
++		gd->flags |= GENHD_FL_REMOVABLE;
++
++	if (vdisk_info & VDISK_CDROM)
++		gd->flags |= GENHD_FL_CD;
++
++	return 0;
++
++ out:
++	if (mi)
++		xlbd_put_major_info(mi);
++	info->mi = NULL;
++	return err;
++}
++
++int
++xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
++	  u16 sector_size, struct blkfront_info *info)
++{
++	struct block_device *bd;
++	int err = 0;
++
++	info->dev = MKDEV(BLKIF_MAJOR(vdevice), BLKIF_MINOR(vdevice));
++
++	bd = bdget(info->dev);
++	if (bd == NULL)
++		return -ENODEV;
++
++	err = xlvbd_alloc_gendisk(BLKIF_MINOR(vdevice), capacity, vdevice,
++				  vdisk_info, sector_size, info);
++
++	bdput(bd);
++	return err;
++}
++
++void
++xlvbd_del(struct blkfront_info *info)
++{
++	if (info->mi == NULL)
++		return;
++
++	BUG_ON(info->gd == NULL);
++	del_gendisk(info->gd);
++	put_disk(info->gd);
++	info->gd = NULL;
++
++	xlbd_put_major_info(info->mi);
++	info->mi = NULL;
++
++	BUG_ON(info->rq == NULL);
++	blk_cleanup_queue(info->rq);
++	info->rq = NULL;
++}
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
++int
++xlvbd_barrier(struct blkfront_info *info)
++{
++	int err;
++
++	err = blk_queue_ordered(info->rq,
++		info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL);
++	if (err)
++		return err;
++	printk("blkfront: %s: barriers %s\n",
++	       info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled");
++	return 0;
++}
++#else
++int
++xlvbd_barrier(struct blkfront_info *info)
++{
++	printk("blkfront: %s: barriers disabled\n", info->gd->disk_name);
++	return -ENOSYS;
++}
++#endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blktap/Makefile	2007-08-27 14:01:54.000000000 -0400
+@@ -0,0 +1,5 @@
++LINUXINCLUDE += -I../xen/include/public/io
++
++obj-$(CONFIG_XEN_BLKDEV_TAP) := xenblktap.o
++
++xenblktap-y := xenbus.o interface.o blktap.o 
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blktap/blktap.c	2007-08-27 14:02:10.000000000 -0400
+@@ -0,0 +1,1528 @@
++/******************************************************************************
++ * drivers/xen/blktap/blktap.c
++ * 
++ * Back-end driver for user level virtual block devices. This portion of the
++ * driver exports a 'unified' block-device interface that can be accessed
++ * by any operating system that implements a compatible front end. Requests
++ * are remapped to a user-space memory region.
++ *
++ * Based on the blkback driver code.
++ * 
++ * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
++ *
++ * Clean ups and fix ups:
++ *    Copyright (c) 2006, Steven Rostedt - Red Hat, Inc.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/spinlock.h>
++#include <linux/kthread.h>
++#include <linux/list.h>
++#include <asm/hypervisor.h>
++#include "common.h"
++#include <xen/balloon.h>
++#include <xen/driver_util.h>
++#include <linux/kernel.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/gfp.h>
++#include <linux/poll.h>
++#include <asm/tlbflush.h>
++
++#define MAX_TAP_DEV 256     /*the maximum number of tapdisk ring devices    */
++#define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
++
++/*
++ * The maximum number of requests that can be outstanding at any time
++ * is determined by 
++ *
++ *   [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] 
++ *
++ * where mmap_alloc < MAX_DYNAMIC_MEM.
++ *
++ * TODO:
++ * mmap_alloc is initialised to 2 and should be adjustable on the fly via
++ * sysfs.
++ */
++#define BLK_RING_SIZE		__RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
++#define MAX_DYNAMIC_MEM		BLK_RING_SIZE
++#define MAX_PENDING_REQS	BLK_RING_SIZE
++#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
++#define MMAP_VADDR(_start, _req,_seg)                                   \
++        (_start +                                                       \
++         ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
++         ((_seg) * PAGE_SIZE))
++static int blkif_reqs = MAX_PENDING_REQS;
++static int mmap_pages = MMAP_PAGES;
++
++#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
++		      * have a bunch of pages reserved for shared
++		      * memory rings.
++		      */
++
++/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
++typedef struct domid_translate {
++	unsigned short domid;
++	unsigned short busid;
++} domid_translate_t ;
++
++/*Data struct associated with each of the tapdisk devices*/
++typedef struct tap_blkif {
++	struct vm_area_struct *vma;   /*Shared memory area                   */
++	unsigned long rings_vstart;   /*Kernel memory mapping                */
++	unsigned long user_vstart;    /*User memory mapping                  */
++	unsigned long dev_inuse;      /*One process opens device at a time.  */
++	unsigned long dev_pending;    /*In process of being opened           */
++	unsigned long ring_ok;        /*make this ring->state                */
++	blkif_front_ring_t ufe_ring;  /*Rings up to user space.              */
++	wait_queue_head_t wait;       /*for poll                             */
++	unsigned long mode;           /*current switching mode               */
++	int minor;                    /*Minor number for tapdisk device      */
++	pid_t pid;                    /*tapdisk process id                   */
++	enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
++						  shutdown                   */
++	unsigned long *idx_map;       /*Record the user ring id to kern 
++					[req id, idx] tuple                  */
++	blkif_t *blkif;               /*Associate blkif with tapdev          */
++	struct domid_translate trans; /*Translation from domid to bus.       */
++} tap_blkif_t;
++
++static struct tap_blkif *tapfds[MAX_TAP_DEV];
++static int blktap_next_minor;
++
++static int __init set_blkif_reqs(char *str)
++{
++	get_option(&str, &blkif_reqs);
++	return 1;
++}
++__setup("blkif_reqs=", set_blkif_reqs);
++
++/* Run-time switchable: /sys/module/blktap/parameters/ */
++static unsigned int log_stats = 0;
++static unsigned int debug_lvl = 0;
++module_param(log_stats, int, 0644);
++module_param(debug_lvl, int, 0644);
++
++/*
++ * Each outstanding request that we've passed to the lower device layers has a 
++ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
++ * the pendcnt towards zero. When it hits zero, the specified domain has a 
++ * response queued for it, with the saved 'id' passed back.
++ */
++typedef struct {
++	blkif_t       *blkif;
++	u64            id;
++	unsigned short mem_idx;
++	int            nr_pages;
++	atomic_t       pendcnt;
++	unsigned short operation;
++	int            status;
++	struct list_head free_list;
++	int            inuse;
++} pending_req_t;
++
++static pending_req_t *pending_reqs[MAX_PENDING_REQS];
++static struct list_head pending_free;
++static DEFINE_SPINLOCK(pending_free_lock);
++static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
++static int alloc_pending_reqs;
++
++typedef unsigned int PEND_RING_IDX;
++
++static inline int MASK_PEND_IDX(int i) { 
++	return (i & (MAX_PENDING_REQS-1));
++}
++
++static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
++	return (req - pending_reqs[idx]);
++}
++
++#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
++
++#define BLKBACK_INVALID_HANDLE (~0)
++
++static struct page **foreign_pages[MAX_DYNAMIC_MEM];
++static inline unsigned long idx_to_kaddr(
++	unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx)
++{
++	unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx;
++	unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]);
++	return (unsigned long)pfn_to_kaddr(pfn);
++}
++
++static unsigned short mmap_alloc = 0;
++static unsigned short mmap_lock = 0;
++static unsigned short mmap_inuse = 0;
++
++/******************************************************************
++ * GRANT HANDLES
++ */
++
++/* When using grant tables to map a frame for device access then the
++ * handle returned must be used to unmap the frame. This is needed to
++ * drop the ref count on the frame.
++ */
++struct grant_handle_pair
++{
++        grant_handle_t kernel;
++        grant_handle_t user;
++};
++#define INVALID_GRANT_HANDLE	0xFFFF
++
++static struct grant_handle_pair 
++    pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
++#define pending_handle(_id, _idx, _i) \
++    (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
++    + (_i)])
++
++
++static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
++
++#define BLKTAP_MINOR 0  /*/dev/xen/blktap has a dynamic major */
++#define BLKTAP_DEV_DIR  "/dev/xen"
++
++static int blktap_major;
++
++/* blktap IOCTLs: */
++#define BLKTAP_IOCTL_KICK_FE         1
++#define BLKTAP_IOCTL_KICK_BE         2 /* currently unused */
++#define BLKTAP_IOCTL_SETMODE         3
++#define BLKTAP_IOCTL_SENDPID	     4
++#define BLKTAP_IOCTL_NEWINTF	     5
++#define BLKTAP_IOCTL_MINOR	     6
++#define BLKTAP_IOCTL_MAJOR	     7
++#define BLKTAP_QUERY_ALLOC_REQS      8
++#define BLKTAP_IOCTL_FREEINTF        9
++#define BLKTAP_IOCTL_PRINT_IDXS      100  
++
++/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
++#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
++#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
++#define BLKTAP_MODE_INTERCEPT_BE     0x00000002  /* unimp.             */
++
++#define BLKTAP_MODE_INTERPOSE \
++           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
++
++
++static inline int BLKTAP_MODE_VALID(unsigned long arg)
++{
++	return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
++		(arg == BLKTAP_MODE_INTERCEPT_FE) ||
++                (arg == BLKTAP_MODE_INTERPOSE   ));
++}
++
++/* Requests passing through the tap to userspace are re-assigned an ID.
++ * We must record a mapping between the BE [IDX,ID] tuple and the userspace
++ * ring ID. 
++ */
++
++static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
++{
++        return ((fe_dom << 16) | MASK_PEND_IDX(idx));
++}
++
++extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id)
++{
++        return (PEND_RING_IDX)(id & 0x0000ffff);
++}
++
++extern inline int ID_TO_MIDX(unsigned long id)
++{
++        return (int)(id >> 16);
++}
++
++#define INVALID_REQ 0xdead0000
++
++/*TODO: Convert to a free list*/
++static inline int GET_NEXT_REQ(unsigned long *idx_map)
++{
++	int i;
++	for (i = 0; i < MAX_PENDING_REQS; i++)
++		if (idx_map[i] == INVALID_REQ)
++			return i;
++
++	return INVALID_REQ;
++}
++
++
++#define BLKTAP_INVALID_HANDLE(_g) \
++    (((_g->kernel) == INVALID_GRANT_HANDLE) &&  \
++     ((_g->user) == INVALID_GRANT_HANDLE))
++
++#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
++    (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \
++    } while(0)
++
++
++/******************************************************************
++ * BLKTAP VM OPS
++ */
++
++static struct page *blktap_nopage(struct vm_area_struct *vma,
++				  unsigned long address,
++				  int *type)
++{
++	/*
++	 * if the page has not been mapped in by the driver then return
++	 * NOPAGE_SIGBUS to the domain.
++	 */
++
++	return NOPAGE_SIGBUS;
++}
++
++struct vm_operations_struct blktap_vm_ops = {
++	nopage:   blktap_nopage,
++};
++
++/******************************************************************
++ * BLKTAP FILE OPS
++ */
++ 
++/*Function Declarations*/
++static tap_blkif_t *get_next_free_dev(void);
++static int blktap_open(struct inode *inode, struct file *filp);
++static int blktap_release(struct inode *inode, struct file *filp);
++static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
++static int blktap_ioctl(struct inode *inode, struct file *filp,
++                        unsigned int cmd, unsigned long arg);
++static unsigned int blktap_poll(struct file *file, poll_table *wait);
++
++static const struct file_operations blktap_fops = {
++	.owner   = THIS_MODULE,
++	.poll    = blktap_poll,
++	.ioctl   = blktap_ioctl,
++	.open    = blktap_open,
++	.release = blktap_release,
++	.mmap    = blktap_mmap,
++};
++
++
++static tap_blkif_t *get_next_free_dev(void)
++{
++	struct class *class;
++	tap_blkif_t *info;
++	int minor;
++
++	/*
++	 * This is called only from the ioctl, which
++	 * means we should always have interrupts enabled.
++	 */
++	BUG_ON(irqs_disabled());
++
++	spin_lock_irq(&pending_free_lock);
++
++	/* tapfds[0] is always NULL */
++
++	for (minor = 1; minor < blktap_next_minor; minor++) {
++		info = tapfds[minor];
++		/* we could have failed a previous attempt. */
++		if (!info ||
++		    ((info->dev_inuse == 0) &&
++		     (info->dev_pending == 0)) ) {
++			info->dev_pending = 1;
++			goto found;
++		}
++	}
++	info = NULL;
++	minor = -1;
++
++	/*
++	 * We didn't find free device. If we can still allocate
++	 * more, then we grab the next device minor that is
++	 * available.  This is done while we are still under
++	 * the protection of the pending_free_lock.
++	 */
++	if (blktap_next_minor < MAX_TAP_DEV)
++		minor = blktap_next_minor++;
++found:
++	spin_unlock_irq(&pending_free_lock);
++
++	if (!info && minor > 0) {
++		info = kzalloc(sizeof(*info), GFP_KERNEL);
++		if (unlikely(!info)) {
++			/*
++			 * If we failed here, try to put back
++			 * the next minor number. But if one
++			 * was just taken, then we just lose this
++			 * minor.  We can try to allocate this
++			 * minor again later.
++			 */
++			spin_lock_irq(&pending_free_lock);
++			if (blktap_next_minor == minor+1)
++				blktap_next_minor--;
++			spin_unlock_irq(&pending_free_lock);
++			goto out;
++		}
++
++		info->minor = minor;
++		/*
++		 * Make sure that we have a minor before others can
++		 * see us.
++		 */
++		wmb();
++		tapfds[minor] = info;
++
++		if ((class = get_xen_class()) != NULL)
++			class_device_create(class, NULL,
++					    MKDEV(blktap_major, minor), NULL,
++					    "blktap%d", minor);
++	}
++
++out:
++	return info;
++}
++
++int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
++{
++	tap_blkif_t *info;
++	int i;
++
++	for (i = 1; i < blktap_next_minor; i++) {
++		info = tapfds[i];
++		if ( info &&
++		     (info->trans.domid == domid) &&
++		     (info->trans.busid == xenbus_id) ) {
++			info->blkif = blkif;
++			info->status = RUNNING;
++			return i;
++		}
++	}
++	return -1;
++}
++
++void signal_tapdisk(int idx) 
++{
++	tap_blkif_t *info;
++	struct task_struct *ptask;
++
++	info = tapfds[idx];
++	if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
++		return;
++
++	if (info->pid > 0) {
++		ptask = find_task_by_pid(info->pid);
++		if (ptask)
++			info->status = CLEANSHUTDOWN;
++	}
++	info->blkif = NULL;
++
++	return;
++}
++
++static int blktap_open(struct inode *inode, struct file *filp)
++{
++	blkif_sring_t *sring;
++	int idx = iminor(inode) - BLKTAP_MINOR;
++	tap_blkif_t *info;
++	int i;
++	
++	/* ctrl device, treat differently */
++	if (!idx)
++		return 0;
++
++	info = tapfds[idx];
++
++	if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
++		WPRINTK("Unable to open device /dev/xen/blktap%d\n",
++			idx);
++		return -ENODEV;
++	}
++
++	DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
++	
++	/*Only one process can access device at a time*/
++	if (test_and_set_bit(0, &info->dev_inuse))
++		return -EBUSY;
++
++	info->dev_pending = 0;
++	    
++	/* Allocate the fe ring. */
++	sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
++	if (sring == NULL)
++		goto fail_nomem;
++
++	SetPageReserved(virt_to_page(sring));
++    
++	SHARED_RING_INIT(sring);
++	FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
++	
++	filp->private_data = info;
++	info->vma = NULL;
++
++	info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, 
++				GFP_KERNEL);
++	
++	if (idx > 0) {
++		init_waitqueue_head(&info->wait);
++		for (i = 0; i < MAX_PENDING_REQS; i++) 
++			info->idx_map[i] = INVALID_REQ;
++	}
++
++	DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
++	return 0;
++
++ fail_nomem:
++	return -ENOMEM;
++}
++
++static int blktap_release(struct inode *inode, struct file *filp)
++{
++	tap_blkif_t *info = filp->private_data;
++	
++	/* check for control device */
++	if (!info)
++		return 0;
++
++	info->dev_inuse = 0;
++	DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
++
++	/* Free the ring page. */
++	ClearPageReserved(virt_to_page(info->ufe_ring.sring));
++	free_page((unsigned long) info->ufe_ring.sring);
++
++	/* Clear any active mappings and free foreign map table */
++	if (info->vma) {
++		zap_page_range(
++			info->vma, info->vma->vm_start, 
++			info->vma->vm_end - info->vma->vm_start, NULL);
++		info->vma = NULL;
++	}
++	
++	if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
++		if (info->blkif->xenblkd != NULL) {
++			kthread_stop(info->blkif->xenblkd);
++			info->blkif->xenblkd = NULL;
++		}
++		info->status = CLEANSHUTDOWN;
++	}	
++	return 0;
++}
++
++
++/* Note on mmap:
++ * We need to map pages to user space in a way that will allow the block
++ * subsystem set up direct IO to them.  This couldn't be done before, because
++ * there isn't really a sane way to translate a user virtual address down to a 
++ * physical address when the page belongs to another domain.
++ *
++ * My first approach was to map the page in to kernel memory, add an entry
++ * for it in the physical frame list (using alloc_lomem_region as in blkback)
++ * and then attempt to map that page up to user space.  This is disallowed
++ * by xen though, which realizes that we don't really own the machine frame
++ * underlying the physical page.
++ *
++ * The new approach is to provide explicit support for this in xen linux.
++ * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
++ * mapped from other vms.  vma->vm_private_data is set up as a mapping 
++ * from pages to actual page structs.  There is a new clause in get_user_pages
++ * that does the right thing for this sort of mapping.
++ */
++static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
++{
++	int size;
++	struct page **map;
++	int i;
++	tap_blkif_t *info = filp->private_data;
++
++	if (info == NULL) {
++		WPRINTK("blktap: mmap, retrieving idx failed\n");
++		return -ENOMEM;
++	}
++	
++	vma->vm_flags |= VM_RESERVED;
++	vma->vm_ops = &blktap_vm_ops;
++
++	size = vma->vm_end - vma->vm_start;
++	if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
++		WPRINTK("you _must_ map exactly %d pages!\n",
++		       mmap_pages + RING_PAGES);
++		return -EAGAIN;
++	}
++
++	size >>= PAGE_SHIFT;
++	info->rings_vstart = vma->vm_start;
++	info->user_vstart  = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
++    
++	/* Map the ring pages to the start of the region and reserve it. */
++	if (remap_pfn_range(vma, vma->vm_start, 
++			    __pa(info->ufe_ring.sring) >> PAGE_SHIFT, 
++			    PAGE_SIZE, vma->vm_page_prot)) {
++		WPRINTK("Mapping user ring failed!\n");
++		goto fail;
++	}
++
++	/* Mark this VM as containing foreign pages, and set up mappings. */
++	map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
++		      * sizeof(struct page_struct*),
++		      GFP_KERNEL);
++	if (map == NULL) {
++		WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
++		goto fail;
++	}
++
++	for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
++		map[i] = NULL;
++    
++	vma->vm_private_data = map;
++	vma->vm_flags |= VM_FOREIGN;
++
++	info->vma = vma;
++	info->ring_ok = 1;
++	return 0;
++ fail:
++	/* Clear any active mappings. */
++	zap_page_range(vma, vma->vm_start, 
++		       vma->vm_end - vma->vm_start, NULL);
++
++	return -ENOMEM;
++}
++
++
++static int blktap_ioctl(struct inode *inode, struct file *filp,
++                        unsigned int cmd, unsigned long arg)
++{
++	tap_blkif_t *info = filp->private_data;
++
++	switch(cmd) {
++	case BLKTAP_IOCTL_KICK_FE: 
++	{
++		/* There are fe messages to process. */
++		return blktap_read_ufe_ring(info);
++	}
++	case BLKTAP_IOCTL_SETMODE:
++	{
++		if (info) {
++			if (BLKTAP_MODE_VALID(arg)) {
++				info->mode = arg;
++				/* XXX: may need to flush rings here. */
++				DPRINTK("blktap: set mode to %lx\n", 
++				       arg);
++				return 0;
++			}
++		}
++		return 0;
++	}
++	case BLKTAP_IOCTL_PRINT_IDXS:
++        {
++		if (info) {
++			printk("User Rings: \n-----------\n");
++			printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
++				"| req_prod: %2d, rsp_prod: %2d\n",
++				info->ufe_ring.rsp_cons,
++				info->ufe_ring.req_prod_pvt,
++				info->ufe_ring.sring->req_prod,
++				info->ufe_ring.sring->rsp_prod);
++		}
++            	return 0;
++        }
++	case BLKTAP_IOCTL_SENDPID:
++	{
++		if (info) {
++			info->pid = (pid_t)arg;
++			DPRINTK("blktap: pid received %d\n", 
++			       info->pid);
++		}
++		return 0;
++	}
++	case BLKTAP_IOCTL_NEWINTF:
++	{		
++		uint64_t val = (uint64_t)arg;
++		domid_translate_t *tr = (domid_translate_t *)&val;
++
++		DPRINTK("NEWINTF Req for domid %d and bus id %d\n", 
++		       tr->domid, tr->busid);
++		info = get_next_free_dev();
++		if (!info) {
++			WPRINTK("Error initialising /dev/xen/blktap - "
++				"No more devices\n");
++			return -1;
++		}
++		info->trans.domid = tr->domid;
++		info->trans.busid = tr->busid;
++		return info->minor;
++	}
++	case BLKTAP_IOCTL_FREEINTF:
++	{
++		unsigned long dev = arg;
++		unsigned long flags;
++
++		info = tapfds[dev];
++
++		if ((dev > MAX_TAP_DEV) || !info)
++			return 0; /* should this be an error? */
++
++		spin_lock_irqsave(&pending_free_lock, flags);
++		if (info->dev_pending)
++			info->dev_pending = 0;
++		spin_unlock_irqrestore(&pending_free_lock, flags);
++
++		return 0;
++	}
++	case BLKTAP_IOCTL_MINOR:
++	{
++		unsigned long dev = arg;
++
++		info = tapfds[dev];
++
++		if ((dev > MAX_TAP_DEV) || !info)
++			return -EINVAL;
++
++		return info->minor;
++	}
++	case BLKTAP_IOCTL_MAJOR:
++		return blktap_major;
++
++	case BLKTAP_QUERY_ALLOC_REQS:
++	{
++		WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
++		       alloc_pending_reqs, blkif_reqs);
++		return (alloc_pending_reqs/blkif_reqs) * 100;
++	}
++	}
++	return -ENOIOCTLCMD;
++}
++
++static unsigned int blktap_poll(struct file *filp, poll_table *wait)
++{
++	tap_blkif_t *info = filp->private_data;
++	
++	/* do not work on the control device */
++	if (!info)
++		return 0;
++
++	poll_wait(filp, &info->wait, wait);
++	if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
++		RING_PUSH_REQUESTS(&info->ufe_ring);
++		return POLLIN | POLLRDNORM;
++	}
++	return 0;
++}
++
++void blktap_kick_user(int idx)
++{
++	tap_blkif_t *info;
++
++	info = tapfds[idx];
++
++	if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
++		return;
++
++	wake_up_interruptible(&info->wait);
++
++	return;
++}
++
++static int do_block_io_op(blkif_t *blkif);
++static void dispatch_rw_block_io(blkif_t *blkif,
++				 blkif_request_t *req,
++				 pending_req_t *pending_req);
++static void make_response(blkif_t *blkif, u64 id,
++                          unsigned short op, int st);
++
++/******************************************************************
++ * misc small helpers
++ */
++static int req_increase(void)
++{
++	int i, j;
++
++	if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) 
++		return -EINVAL;
++
++	pending_reqs[mmap_alloc]  = kzalloc(sizeof(pending_req_t)
++					    * blkif_reqs, GFP_KERNEL);
++	foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages);
++
++	if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc])
++		goto out_of_memory;
++
++	DPRINTK("%s: reqs=%d, pages=%d\n",
++		__FUNCTION__, blkif_reqs, mmap_pages);
++
++	for (i = 0; i < MAX_PENDING_REQS; i++) {
++		list_add_tail(&pending_reqs[mmap_alloc][i].free_list, 
++			      &pending_free);
++		pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
++		for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
++			BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc, 
++								 i, j));
++	}
++
++	mmap_alloc++;
++	DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
++	return 0;
++
++ out_of_memory:
++	free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
++	kfree(pending_reqs[mmap_alloc]);
++	WPRINTK("%s: out of memory\n", __FUNCTION__);
++	return -ENOMEM;
++}
++
++static void mmap_req_del(int mmap)
++{
++	BUG_ON(!spin_is_locked(&pending_free_lock));
++
++	kfree(pending_reqs[mmap]);
++	pending_reqs[mmap] = NULL;
++
++	free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
++	foreign_pages[mmap] = NULL;
++
++	mmap_lock = 0;
++	DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
++	mmap_alloc--;
++}
++
++static pending_req_t* alloc_req(void)
++{
++	pending_req_t *req = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pending_free_lock, flags);
++
++	if (!list_empty(&pending_free)) {
++		req = list_entry(pending_free.next, pending_req_t, free_list);
++		list_del(&req->free_list);
++	}
++
++	if (req) {
++		req->inuse = 1;
++		alloc_pending_reqs++;
++	}
++	spin_unlock_irqrestore(&pending_free_lock, flags);
++
++	return req;
++}
++
++static void free_req(pending_req_t *req)
++{
++	unsigned long flags;
++	int was_empty;
++
++	spin_lock_irqsave(&pending_free_lock, flags);
++
++	alloc_pending_reqs--;
++	req->inuse = 0;
++	if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
++		mmap_inuse--;
++		if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
++		spin_unlock_irqrestore(&pending_free_lock, flags);
++		return;
++	}
++	was_empty = list_empty(&pending_free);
++	list_add(&req->free_list, &pending_free);
++
++	spin_unlock_irqrestore(&pending_free_lock, flags);
++
++	if (was_empty)
++		wake_up(&pending_free_wq);
++}
++
++static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
++			    int tapidx)
++{
++	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
++	unsigned int i, invcount = 0;
++	struct grant_handle_pair *khandle;
++	uint64_t ptep;
++	int ret, mmap_idx;
++	unsigned long kvaddr, uvaddr;
++	tap_blkif_t *info;
++	
++
++	info = tapfds[tapidx];
++
++	if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
++		WPRINTK("fast_flush: Couldn't get info!\n");
++		return;
++	}
++
++	if (info->vma != NULL &&
++	    xen_feature(XENFEAT_auto_translated_physmap)) {
++		down_write(&info->vma->vm_mm->mmap_sem);
++		zap_page_range(info->vma, 
++			       MMAP_VADDR(info->user_vstart, u_idx, 0), 
++			       req->nr_pages << PAGE_SHIFT, NULL);
++		up_write(&info->vma->vm_mm->mmap_sem);
++	}
++
++	mmap_idx = req->mem_idx;
++
++	for (i = 0; i < req->nr_pages; i++) {
++		kvaddr = idx_to_kaddr(mmap_idx, k_idx, i);
++		uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
++
++		khandle = &pending_handle(mmap_idx, k_idx, i);
++
++		if (khandle->kernel != INVALID_GRANT_HANDLE) {
++			gnttab_set_unmap_op(&unmap[invcount],
++					    idx_to_kaddr(mmap_idx, k_idx, i),
++					    GNTMAP_host_map, khandle->kernel);
++			invcount++;
++		}
++
++		if (khandle->user != INVALID_GRANT_HANDLE) {
++			BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
++			if (create_lookup_pte_addr(
++				info->vma->vm_mm,
++				MMAP_VADDR(info->user_vstart, u_idx, i),
++				&ptep) !=0) {
++				WPRINTK("Couldn't get a pte addr!\n");
++				return;
++			}
++
++			gnttab_set_unmap_op(&unmap[invcount], ptep,
++					    GNTMAP_host_map
++					    | GNTMAP_application_map
++					    | GNTMAP_contains_pte,
++					    khandle->user);
++			invcount++;
++		}
++
++		BLKTAP_INVALIDATE_HANDLE(khandle);
++	}
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_unmap_grant_ref, unmap, invcount);
++	BUG_ON(ret);
++	
++	if (info->vma != NULL && !xen_feature(XENFEAT_auto_translated_physmap))
++		zap_page_range(info->vma, 
++			       MMAP_VADDR(info->user_vstart, u_idx, 0), 
++			       req->nr_pages << PAGE_SHIFT, NULL);
++}
++
++/******************************************************************
++ * SCHEDULER FUNCTIONS
++ */
++
++static void print_stats(blkif_t *blkif)
++{
++	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
++	       current->comm, blkif->st_oo_req,
++	       blkif->st_rd_req, blkif->st_wr_req);
++	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
++	blkif->st_rd_req = 0;
++	blkif->st_wr_req = 0;
++	blkif->st_oo_req = 0;
++}
++
++int tap_blkif_schedule(void *arg)
++{
++	blkif_t *blkif = arg;
++
++	blkif_get(blkif);
++
++	if (debug_lvl)
++		printk(KERN_DEBUG "%s: started\n", current->comm);
++
++	while (!kthread_should_stop()) {
++		wait_event_interruptible(
++			blkif->wq,
++			blkif->waiting_reqs || kthread_should_stop());
++		wait_event_interruptible(
++			pending_free_wq,
++			!list_empty(&pending_free) || kthread_should_stop());
++
++		blkif->waiting_reqs = 0;
++		smp_mb(); /* clear flag *before* checking for work */
++
++		if (do_block_io_op(blkif))
++			blkif->waiting_reqs = 1;
++
++		if (log_stats && time_after(jiffies, blkif->st_print))
++			print_stats(blkif);
++	}
++
++	if (log_stats)
++		print_stats(blkif);
++	if (debug_lvl)
++		printk(KERN_DEBUG "%s: exiting\n", current->comm);
++
++	blkif->xenblkd = NULL;
++	blkif_put(blkif);
++
++	return 0;
++}
++
++/******************************************************************
++ * COMPLETION CALLBACK -- Called by user level ioctl()
++ */
++
++static int blktap_read_ufe_ring(tap_blkif_t *info)
++{
++	/* This is called to read responses from the UFE ring. */
++	RING_IDX i, j, rp;
++	blkif_response_t *resp;
++	blkif_t *blkif=NULL;
++	int pending_idx, usr_idx, mmap_idx;
++	pending_req_t *pending_req;
++	
++	if (!info)
++		return 0;
++
++	/* We currently only forward packets in INTERCEPT_FE mode. */
++	if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
++		return 0;
++
++	/* for each outstanding message on the UFEring  */
++	rp = info->ufe_ring.sring->rsp_prod;
++	rmb();
++        
++	for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
++		blkif_response_t res;
++		resp = RING_GET_RESPONSE(&info->ufe_ring, i);
++		memcpy(&res, resp, sizeof(res));
++		mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
++		++info->ufe_ring.rsp_cons;
++
++		/*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
++		usr_idx = (int)res.id;
++		pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
++		mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
++
++		if ( (mmap_idx >= mmap_alloc) || 
++		   (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) )
++			WPRINTK("Incorrect req map"
++			       "[%d], internal map [%d,%d (%d)]\n", 
++			       usr_idx, mmap_idx, 
++			       ID_TO_IDX(info->idx_map[usr_idx]),
++			       MASK_PEND_IDX(
++				       ID_TO_IDX(info->idx_map[usr_idx])));
++
++		pending_req = &pending_reqs[mmap_idx][pending_idx];
++		blkif = pending_req->blkif;
++
++		for (j = 0; j < pending_req->nr_pages; j++) {
++
++			unsigned long kvaddr, uvaddr;
++			struct page **map = info->vma->vm_private_data;
++			struct page *pg;
++			int offset;
++
++			uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
++			kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j);
++
++			pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
++			ClearPageReserved(pg);
++			offset = (uvaddr - info->vma->vm_start) 
++				>> PAGE_SHIFT;
++			map[offset] = NULL;
++		}
++		fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
++		info->idx_map[usr_idx] = INVALID_REQ;
++		make_response(blkif, pending_req->id, res.operation,
++			      res.status);
++		blkif_put(pending_req->blkif);
++		free_req(pending_req);
++	}
++		
++	return 0;
++}
++
++
++/******************************************************************************
++ * NOTIFICATION FROM GUEST OS.
++ */
++
++static void blkif_notify_work(blkif_t *blkif)
++{
++	blkif->waiting_reqs = 1;
++	wake_up(&blkif->wq);
++}
++
++irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++{
++	blkif_notify_work(dev_id);
++	return IRQ_HANDLED;
++}
++
++
++
++/******************************************************************
++ * DOWNWARD CALLS -- These interface with the block-device layer proper.
++ */
++static int print_dbug = 1;
++static int do_block_io_op(blkif_t *blkif)
++{
++	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
++	blkif_request_t req;
++	pending_req_t *pending_req;
++	RING_IDX rc, rp;
++	int more_to_do = 0;
++	tap_blkif_t *info;
++
++	rc = blk_rings->common.req_cons;
++	rp = blk_rings->common.sring->req_prod;
++	rmb(); /* Ensure we see queued requests up to 'rp'. */
++
++	/*Check blkif has corresponding UE ring*/
++	if (blkif->dev_num < 0) {
++		/*oops*/
++		if (print_dbug) {
++			WPRINTK("Corresponding UE " 
++			       "ring does not exist!\n");
++			print_dbug = 0; /*We only print this message once*/
++		}
++		return 0;
++	}
++
++	info = tapfds[blkif->dev_num];
++
++	if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
++		if (print_dbug) {
++			WPRINTK("Can't get UE info!\n");
++			print_dbug = 0;
++		}
++		return 0;
++	}
++
++	while (rc != rp) {
++		
++		if (RING_FULL(&info->ufe_ring)) {
++			WPRINTK("RING_FULL! More to do\n");
++			more_to_do = 1;
++			break;
++		}
++
++		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) {
++			WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
++			       " More to do\n");
++			more_to_do = 1;
++			break;		
++		}
++
++		pending_req = alloc_req();
++		if (NULL == pending_req) {
++			blkif->st_oo_req++;
++			more_to_do = 1;
++			break;
++		}
++
++		switch (blkif->blk_protocol) {
++		case BLKIF_PROTOCOL_NATIVE:
++			memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc),
++			       sizeof(req));
++			break;
++		case BLKIF_PROTOCOL_X86_32:
++			blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
++			break;
++		case BLKIF_PROTOCOL_X86_64:
++			blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
++			break;
++		default:
++			BUG();
++		}
++		blk_rings->common.req_cons = ++rc; /* before make_response() */
++
++		switch (req.operation) {
++		case BLKIF_OP_READ:
++			blkif->st_rd_req++;
++			dispatch_rw_block_io(blkif, &req, pending_req);
++			break;
++
++		case BLKIF_OP_WRITE:
++			blkif->st_wr_req++;
++			dispatch_rw_block_io(blkif, &req, pending_req);
++			break;
++
++		default:
++			WPRINTK("unknown operation [%d]\n",
++				req.operation);
++			make_response(blkif, req.id, req.operation,
++				      BLKIF_RSP_ERROR);
++			free_req(pending_req);
++			break;
++		}
++	}
++		
++	blktap_kick_user(blkif->dev_num);
++
++	return more_to_do;
++}
++
++static void dispatch_rw_block_io(blkif_t *blkif,
++				 blkif_request_t *req,
++				 pending_req_t *pending_req)
++{
++	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
++	int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
++	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
++	unsigned int nseg;
++	int ret, i, nr_sects = 0;
++	tap_blkif_t *info;
++	uint64_t sector;
++	blkif_request_t *target;
++	int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
++	int usr_idx;
++	uint16_t mmap_idx = pending_req->mem_idx;
++
++	if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
++		goto fail_response;
++
++	info = tapfds[blkif->dev_num];
++	if (info == NULL)
++		goto fail_response;
++
++	/* Check we have space on user ring - should never fail. */
++	usr_idx = GET_NEXT_REQ(info->idx_map);
++	if (usr_idx == INVALID_REQ) {
++		BUG();
++		goto fail_response;
++	}
++
++	/* Check that number of segments is sane. */
++	nseg = req->nr_segments;
++	if ( unlikely(nseg == 0) || 
++	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
++		WPRINTK("Bad number of segments in request (%d)\n", nseg);
++		goto fail_response;
++	}
++	
++	/* Make sure userspace is ready. */
++	if (!info->ring_ok) {
++		WPRINTK("blktap: ring not ready for requests!\n");
++		goto fail_response;
++	}
++
++	if (RING_FULL(&info->ufe_ring)) {
++		WPRINTK("blktap: fe_ring is full, can't add "
++			"IO Request will be dropped. %d %d\n",
++			RING_SIZE(&info->ufe_ring),
++			RING_SIZE(&blkif->blk_rings.common));
++		goto fail_response;
++	}
++
++	pending_req->blkif     = blkif;
++	pending_req->id        = req->id;
++	pending_req->operation = operation;
++	pending_req->status    = BLKIF_RSP_OKAY;
++	pending_req->nr_pages  = nseg;
++	op = 0;
++	for (i = 0; i < nseg; i++) {
++		unsigned long uvaddr;
++		unsigned long kvaddr;
++		uint64_t ptep;
++		uint32_t flags;
++
++		uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
++		kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
++
++		sector = req->sector_number + ((PAGE_SIZE / 512) * i);
++		if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) {
++			WPRINTK("BLKTAP: Sector request greater" 
++			       "than size\n");
++			WPRINTK("BLKTAP: %s request sector" 
++			       "[%llu,%llu], Total [%llu]\n",
++			       (req->operation == 
++				BLKIF_OP_WRITE ? "WRITE" : "READ"),
++				(long long unsigned) sector,
++				(long long unsigned) sector>>9,
++				(long long unsigned) blkif->sectors);
++		}
++
++		flags = GNTMAP_host_map;
++		if (operation == WRITE)
++			flags |= GNTMAP_readonly;
++		gnttab_set_map_op(&map[op], kvaddr, flags,
++				  req->seg[i].gref, blkif->domid);
++		op++;
++
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			/* Now map it to user. */
++			ret = create_lookup_pte_addr(info->vma->vm_mm, 
++						     uvaddr, &ptep);
++			if (ret) {
++				WPRINTK("Couldn't get a pte addr!\n");
++				goto fail_flush;
++			}
++
++			flags = GNTMAP_host_map | GNTMAP_application_map
++				| GNTMAP_contains_pte;
++			if (operation == WRITE)
++				flags |= GNTMAP_readonly;
++			gnttab_set_map_op(&map[op], ptep, flags,
++					  req->seg[i].gref, blkif->domid);
++			op++;
++		}
++
++		nr_sects += (req->seg[i].last_sect - 
++			     req->seg[i].first_sect + 1);
++	}
++
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
++	BUG_ON(ret);
++
++	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++		for (i = 0; i < (nseg*2); i+=2) {
++			unsigned long uvaddr;
++			unsigned long kvaddr;
++			unsigned long offset;
++			struct page *pg;
++
++			uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
++			kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2);
++
++			if (unlikely(map[i].status != 0)) {
++				WPRINTK("invalid kernel buffer -- "
++					"could not remap it\n");
++				ret |= 1;
++				map[i].handle = INVALID_GRANT_HANDLE;
++			}
++
++			if (unlikely(map[i+1].status != 0)) {
++				WPRINTK("invalid user buffer -- "
++					"could not remap it\n");
++				ret |= 1;
++				map[i+1].handle = INVALID_GRANT_HANDLE;
++			}
++
++			pending_handle(mmap_idx, pending_idx, i/2).kernel 
++				= map[i].handle;
++			pending_handle(mmap_idx, pending_idx, i/2).user   
++				= map[i+1].handle;
++
++			if (ret)
++				continue;
++
++			set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
++					    FOREIGN_FRAME(map[i].dev_bus_addr
++							  >> PAGE_SHIFT));
++			offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
++			pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
++			((struct page **)info->vma->vm_private_data)[offset] =
++				pg;
++		}
++	} else {
++		for (i = 0; i < nseg; i++) {
++			unsigned long uvaddr;
++			unsigned long kvaddr;
++			unsigned long offset;
++			struct page *pg;
++
++			uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
++			kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
++
++			if (unlikely(map[i].status != 0)) {
++				WPRINTK("invalid kernel buffer -- "
++					"could not remap it\n");
++				ret |= 1;
++				map[i].handle = INVALID_GRANT_HANDLE;
++			}
++
++			pending_handle(mmap_idx, pending_idx, i).kernel 
++				= map[i].handle;
++
++			if (ret)
++				continue;
++
++			offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
++			pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
++			((struct page **)info->vma->vm_private_data)[offset] =
++				pg;
++		}
++	}
++
++	if (ret)
++		goto fail_flush;
++
++	if (xen_feature(XENFEAT_auto_translated_physmap))
++		down_write(&info->vma->vm_mm->mmap_sem);
++	/* Mark mapped pages as reserved: */
++	for (i = 0; i < req->nr_segments; i++) {
++		unsigned long kvaddr;
++		struct page *pg;
++
++		kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
++		pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
++		SetPageReserved(pg);
++		if (xen_feature(XENFEAT_auto_translated_physmap)) {
++			ret = vm_insert_page(info->vma,
++					     MMAP_VADDR(info->user_vstart,
++							usr_idx, i), pg);
++			if (ret) {
++				up_write(&info->vma->vm_mm->mmap_sem);
++				goto fail_flush;
++			}
++		}
++	}
++	if (xen_feature(XENFEAT_auto_translated_physmap))
++		up_write(&info->vma->vm_mm->mmap_sem);
++	
++	/*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
++	info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
++
++	blkif_get(blkif);
++	/* Finally, write the request message to the user ring. */
++	target = RING_GET_REQUEST(&info->ufe_ring,
++				  info->ufe_ring.req_prod_pvt);
++	memcpy(target, req, sizeof(*req));
++	target->id = usr_idx;
++	wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
++	info->ufe_ring.req_prod_pvt++;
++
++	if (operation == READ)
++		blkif->st_rd_sect += nr_sects;
++	else if (operation == WRITE)
++		blkif->st_wr_sect += nr_sects;
++
++	return;
++
++ fail_flush:
++	WPRINTK("Reached Fail_flush\n");
++	fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
++ fail_response:
++	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
++	free_req(pending_req);
++} 
++
++
++
++/******************************************************************
++ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
++ */
++
++
++static void make_response(blkif_t *blkif, u64 id,
++                          unsigned short op, int st)
++{
++	blkif_response_t  resp;
++	unsigned long     flags;
++	blkif_back_rings_t *blk_rings = &blkif->blk_rings;
++	int more_to_do = 0;
++	int notify;
++
++	resp.id        = id;
++	resp.operation = op;
++	resp.status    = st;
++
++	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
++	/* Place on the response ring for the relevant domain. */
++	switch (blkif->blk_protocol) {
++	case BLKIF_PROTOCOL_NATIVE:
++		memcpy(RING_GET_RESPONSE(&blk_rings->native,
++					 blk_rings->native.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	case BLKIF_PROTOCOL_X86_32:
++		memcpy(RING_GET_RESPONSE(&blk_rings->x86_32,
++					 blk_rings->x86_32.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	case BLKIF_PROTOCOL_X86_64:
++		memcpy(RING_GET_RESPONSE(&blk_rings->x86_64,
++					 blk_rings->x86_64.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	default:
++		BUG();
++	}
++	blk_rings->common.rsp_prod_pvt++;
++	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
++
++	if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
++		/*
++		 * Tail check for pending requests. Allows frontend to avoid
++		 * notifications if requests are already in flight (lower
++		 * overheads and promotes batching).
++		 */
++		RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
++	} else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
++		more_to_do = 1;
++	}
++
++	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
++	if (more_to_do)
++		blkif_notify_work(blkif);
++	if (notify)
++		notify_remote_via_irq(blkif->irq);
++}
++
++static int __init blkif_init(void)
++{
++	int i, ret;
++	struct class *class;
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	INIT_LIST_HEAD(&pending_free);
++        for(i = 0; i < 2; i++) {
++		ret = req_increase();
++		if (ret)
++			break;
++	}
++	if (i == 0)
++		return ret;
++
++	tap_blkif_interface_init();
++
++	alloc_pending_reqs = 0;
++
++	tap_blkif_xenbus_init();
++
++	/* Dynamically allocate a major for this device */
++	ret = register_chrdev(0, "blktap", &blktap_fops);
++
++	if (ret < 0) {
++		WPRINTK("Couldn't register /dev/xen/blktap\n");
++		return -ENOMEM;
++	}	
++	
++	blktap_major = ret;
++
++	/* tapfds[0] is always NULL */
++	blktap_next_minor++;
++
++	DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
++
++	/* Make sure the xen class exists */
++	if ((class = get_xen_class()) != NULL) {
++		/*
++		 * This will allow udev to create the blktap ctrl device.
++		 * We only want to create blktap0 first.  We don't want
++		 * to flood the sysfs system with needless blktap devices.
++		 * We only create the device when a request of a new device is
++		 * made.
++		 */
++		class_device_create(class, NULL,
++				    MKDEV(blktap_major, 0), NULL,
++				    "blktap0");
++	} else {
++		/* this is bad, but not fatal */
++		WPRINTK("blktap: sysfs xen_class not created\n");
++	}
++
++	DPRINTK("Blktap device successfully created\n");
++
++	return 0;
++}
++
++module_init(blkif_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blktap/common.h	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,121 @@
++/* 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __BLKIF__BACKEND__COMMON_H__
++#define __BLKIF__BACKEND__COMMON_H__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/vmalloc.h>
++#include <asm/io.h>
++#include <asm/setup.h>
++#include <asm/pgalloc.h>
++#include <xen/evtchn.h>
++#include <asm/hypervisor.h>
++#include <xen/blkif.h>
++#include <xen/gnttab.h>
++#include <xen/driver_util.h>
++
++#define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \
++                                    __FILE__ , __LINE__ , ## _a )
++
++#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
++
++struct backend_info;
++
++typedef struct blkif_st {
++	/* Unique identifier for this interface. */
++	domid_t           domid;
++	unsigned int      handle;
++	/* Physical parameters of the comms window. */
++	unsigned int      irq;
++	/* Comms information. */
++	enum blkif_protocol blk_protocol;
++	blkif_back_rings_t blk_rings;
++	struct vm_struct *blk_ring_area;
++	/* Back pointer to the backend_info. */
++	struct backend_info *be;
++	/* Private fields. */
++	spinlock_t       blk_ring_lock;
++	atomic_t         refcnt;
++
++	wait_queue_head_t   wq;
++	struct task_struct  *xenblkd;
++	unsigned int        waiting_reqs;
++	request_queue_t     *plug;
++
++	/* statistics */
++	unsigned long       st_print;
++	int                 st_rd_req;
++	int                 st_wr_req;
++	int                 st_oo_req;
++	int                 st_rd_sect;
++	int                 st_wr_sect;
++
++	wait_queue_head_t waiting_to_free;
++
++	grant_handle_t shmem_handle;
++	grant_ref_t    shmem_ref;
++	
++	int		dev_num;
++	uint64_t        sectors;
++} blkif_t;
++
++blkif_t *tap_alloc_blkif(domid_t domid);
++void tap_blkif_free(blkif_t *blkif);
++int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
++		  unsigned int evtchn);
++void tap_blkif_unmap(blkif_t *blkif);
++
++#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
++#define blkif_put(_b)					\
++	do {						\
++		if (atomic_dec_and_test(&(_b)->refcnt))	\
++			wake_up(&(_b)->waiting_to_free);\
++	} while (0)
++
++
++struct phys_req {
++	unsigned short       dev;
++	unsigned short       nr_sects;
++	struct block_device *bdev;
++	blkif_sector_t       sector_number;
++};
++
++void tap_blkif_interface_init(void);
++
++void tap_blkif_xenbus_init(void);
++
++irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++int tap_blkif_schedule(void *arg);
++
++int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
++void signal_tapdisk(int idx);
++
++#endif /* __BLKIF__BACKEND__COMMON_H__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blktap/interface.c	2007-08-27 14:02:01.000000000 -0400
+@@ -0,0 +1,174 @@
++/******************************************************************************
++ * drivers/xen/blktap/interface.c
++ * 
++ * Block-device interface management.
++ * 
++ * Copyright (c) 2004, Keir Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++
++ */
++
++#include "common.h"
++#include <xen/evtchn.h>
++
++static kmem_cache_t *blkif_cachep;
++
++blkif_t *tap_alloc_blkif(domid_t domid)
++{
++	blkif_t *blkif;
++
++	blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
++	if (!blkif)
++		return ERR_PTR(-ENOMEM);
++
++	memset(blkif, 0, sizeof(*blkif));
++	blkif->domid = domid;
++	spin_lock_init(&blkif->blk_ring_lock);
++	atomic_set(&blkif->refcnt, 1);
++	init_waitqueue_head(&blkif->wq);
++	blkif->st_print = jiffies;
++	init_waitqueue_head(&blkif->waiting_to_free);
++
++	return blkif;
++}
++
++static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
++{
++	struct gnttab_map_grant_ref op;
++
++	gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
++			  GNTMAP_host_map, shared_page, blkif->domid);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) {
++		DPRINTK(" Grant table operation failure !\n");
++		return op.status;
++	}
++
++	blkif->shmem_ref = shared_page;
++	blkif->shmem_handle = op.handle;
++
++	return 0;
++}
++
++static void unmap_frontend_page(blkif_t *blkif)
++{
++	struct gnttab_unmap_grant_ref op;
++
++	gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
++			    GNTMAP_host_map, blkif->shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++}
++
++int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
++		  unsigned int evtchn)
++{
++	int err;
++
++	/* Already connected through? */
++	if (blkif->irq)
++		return 0;
++
++	if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
++		return -ENOMEM;
++
++	err = map_frontend_page(blkif, shared_page);
++	if (err) {
++		free_vm_area(blkif->blk_ring_area);
++		return err;
++	}
++
++	switch (blkif->blk_protocol) {
++	case BLKIF_PROTOCOL_NATIVE:
++	{
++		blkif_sring_t *sring;
++		sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
++		break;
++	}
++	case BLKIF_PROTOCOL_X86_32:
++	{
++		blkif_x86_32_sring_t *sring_x86_32;
++		sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
++		break;
++	}
++	case BLKIF_PROTOCOL_X86_64:
++	{
++		blkif_x86_64_sring_t *sring_x86_64;
++		sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
++		break;
++	}
++	default:
++		BUG();
++	}
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		blkif->domid, evtchn, tap_blkif_be_int,
++		0, "blkif-backend", blkif);
++	if (err < 0) {
++		unmap_frontend_page(blkif);
++		free_vm_area(blkif->blk_ring_area);
++		blkif->blk_rings.common.sring = NULL;
++		return err;
++	}
++	blkif->irq = err;
++
++	return 0;
++}
++
++void tap_blkif_unmap(blkif_t *blkif)
++{
++	if (blkif->irq) {
++		unbind_from_irqhandler(blkif->irq, blkif);
++		blkif->irq = 0;
++	}
++	if (blkif->blk_rings.common.sring) {
++		unmap_frontend_page(blkif);
++		free_vm_area(blkif->blk_ring_area);
++		blkif->blk_rings.common.sring = NULL;
++	}
++}
++
++void tap_blkif_free(blkif_t *blkif)
++{
++	atomic_dec(&blkif->refcnt);
++	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
++
++	tap_blkif_unmap(blkif);
++	kmem_cache_free(blkif_cachep, blkif);
++}
++
++void __init tap_blkif_interface_init(void)
++{
++	blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t), 
++					 0, 0, NULL, NULL);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/blktap/xenbus.c	2007-08-27 14:02:09.000000000 -0400
+@@ -0,0 +1,473 @@
++/* drivers/xen/blktap/xenbus.c
++ *
++ * Xenbus code for blktap
++ *
++ * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
++ *
++ * Based on the blkback xenbus code:
++ *
++ * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
++ * Copyright (C) 2005 XenSource Ltd
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <stdarg.h>
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <xen/xenbus.h>
++#include "common.h"
++
++
++struct backend_info
++{
++	struct xenbus_device *dev;
++	blkif_t *blkif;
++	struct xenbus_watch backend_watch;
++	int xenbus_id;
++	int group_added;
++};
++
++
++static void connect(struct backend_info *);
++static int connect_ring(struct backend_info *);
++static int blktap_remove(struct xenbus_device *dev);
++static int blktap_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id);
++static void tap_backend_changed(struct xenbus_watch *, const char **,
++			    unsigned int);
++static void tap_frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state);
++
++static int strsep_len(const char *str, char c, unsigned int len)
++{
++        unsigned int i;
++
++        for (i = 0; str[i]; i++)
++                if (str[i] == c) {
++                        if (len == 0)
++                                return i;
++                        len--;
++                }
++        return (len == 0) ? i : -ERANGE;
++}
++
++static long get_id(const char *str)
++{
++        int len,end;
++        const char *ptr;
++        char *tptr, num[10];
++	
++        len = strsep_len(str, '/', 2);
++        end = strlen(str);
++        if ( (len < 0) || (end < 0) ) return -1;
++	
++        ptr = str + len + 1;
++        strncpy(num,ptr,end - len);
++        tptr = num + (end - (len + 1));
++        *tptr = '\0';
++	DPRINTK("Get_id called for %s (%s)\n",str,num);
++	
++        return simple_strtol(num, NULL, 10);
++}				
++
++static int blktap_name(blkif_t *blkif, char *buf)
++{
++	char *devpath, *devname;
++	struct xenbus_device *dev = blkif->be->dev;
++
++	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
++	if (IS_ERR(devpath)) 
++		return PTR_ERR(devpath);
++	
++	if ((devname = strstr(devpath, "/dev/")) != NULL)
++		devname += strlen("/dev/");
++	else
++		devname  = devpath;
++
++	snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname);
++	kfree(devpath);
++	
++	return 0;
++}
++
++/****************************************************************
++ *  sysfs interface for VBD I/O requests
++ */
++
++#define VBD_SHOW(name, format, args...)					\
++	static ssize_t show_##name(struct device *_dev,			\
++				   struct device_attribute *attr,	\
++				   char *buf)				\
++	{								\
++		struct xenbus_device *dev = to_xenbus_device(_dev);	\
++		struct backend_info *be = dev->dev.driver_data;		\
++									\
++		return sprintf(buf, format, ##args);			\
++	}								\
++	DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
++
++VBD_SHOW(tap_oo_req,  "%d\n", be->blkif->st_oo_req);
++VBD_SHOW(tap_rd_req,  "%d\n", be->blkif->st_rd_req);
++VBD_SHOW(tap_wr_req,  "%d\n", be->blkif->st_wr_req);
++VBD_SHOW(tap_rd_sect, "%d\n", be->blkif->st_rd_sect);
++VBD_SHOW(tap_wr_sect, "%d\n", be->blkif->st_wr_sect);
++
++static struct attribute *tapstat_attrs[] = {
++	&dev_attr_tap_oo_req.attr,
++	&dev_attr_tap_rd_req.attr,
++	&dev_attr_tap_wr_req.attr,
++	&dev_attr_tap_rd_sect.attr,
++	&dev_attr_tap_wr_sect.attr,
++	NULL
++};
++
++static struct attribute_group tapstat_group = {
++	.name = "statistics",
++	.attrs = tapstat_attrs,
++};
++
++int xentap_sysfs_addif(struct xenbus_device *dev)
++{
++	int err;
++	struct backend_info *be = dev->dev.driver_data;
++	err = sysfs_create_group(&dev->dev.kobj, &tapstat_group);
++	if (!err)
++		be->group_added = 1;
++	return err;
++}
++
++void xentap_sysfs_delif(struct xenbus_device *dev)
++{
++	sysfs_remove_group(&dev->dev.kobj, &tapstat_group);
++}
++
++static int blktap_remove(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev->dev.driver_data;
++
++	if (be->backend_watch.node) {
++		unregister_xenbus_watch(&be->backend_watch);
++		kfree(be->backend_watch.node);
++		be->backend_watch.node = NULL;
++	}
++	if (be->blkif) {
++		if (be->blkif->xenblkd)
++			kthread_stop(be->blkif->xenblkd);
++		signal_tapdisk(be->blkif->dev_num);
++		tap_blkif_free(be->blkif);
++		be->blkif = NULL;
++	}
++	if (be->group_added)
++		xentap_sysfs_delif(be->dev);
++	kfree(be);
++	dev->dev.driver_data = NULL;
++	return 0;
++}
++
++static void tap_update_blkif_status(blkif_t *blkif)
++{ 
++	int err;
++	char name[TASK_COMM_LEN];
++
++	/* Not ready to connect? */
++	if(!blkif->irq || !blkif->sectors) {
++		return;
++	} 
++
++	/* Already connected? */
++	if (blkif->be->dev->state == XenbusStateConnected)
++		return;
++
++	/* Attempt to connect: exit if we fail to. */
++	connect(blkif->be);
++	if (blkif->be->dev->state != XenbusStateConnected)
++		return;
++
++	err = blktap_name(blkif, name);
++	if (err) {
++		xenbus_dev_error(blkif->be->dev, err, "get blktap dev name");
++		return;
++	}
++
++	err = xentap_sysfs_addif(blkif->be->dev);
++	if (err) {
++		xenbus_dev_fatal(blkif->be->dev, err, 
++				 "creating sysfs entries");
++		return;
++	}
++
++	blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name);
++	if (IS_ERR(blkif->xenblkd)) {
++		err = PTR_ERR(blkif->xenblkd);
++		blkif->xenblkd = NULL;
++		xenbus_dev_fatal(blkif->be->dev, err, "start xenblkd");
++		WPRINTK("Error starting thread\n");
++	}
++}
++
++/**
++ * Entry point to this code when a new device is created.  Allocate
++ * the basic structures, and watch the store waiting for the
++ * user-space program to tell us the physical device info.  Switch to
++ * InitWait.
++ */
++static int blktap_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id)
++{
++	int err;
++	struct backend_info *be = kzalloc(sizeof(struct backend_info),
++					  GFP_KERNEL);
++	if (!be) {
++		xenbus_dev_fatal(dev, -ENOMEM,
++				 "allocating backend structure");
++		return -ENOMEM;
++	}
++
++	be->dev = dev;
++	dev->dev.driver_data = be;
++	be->xenbus_id = get_id(dev->nodename);
++
++	be->blkif = tap_alloc_blkif(dev->otherend_id);
++	if (IS_ERR(be->blkif)) {
++		err = PTR_ERR(be->blkif);
++		be->blkif = NULL;
++		xenbus_dev_fatal(dev, err, "creating block interface");
++		goto fail;
++	}
++
++	/* setup back pointer */
++	be->blkif->be = be;
++	be->blkif->sectors = 0;
++
++	/* set a watch on disk info, waiting for userspace to update details*/
++	err = xenbus_watch_path2(dev, dev->nodename, "info",
++				 &be->backend_watch, tap_backend_changed);
++	if (err)
++		goto fail;
++	
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto fail;
++	return 0;
++
++fail:
++	DPRINTK("blktap probe failed\n");
++	blktap_remove(dev);
++	return err;
++}
++
++
++/**
++ * Callback received when the user space code has placed the device
++ * information in xenstore. 
++ */
++static void tap_backend_changed(struct xenbus_watch *watch,
++			    const char **vec, unsigned int len)
++{
++	int err;
++	unsigned long info;
++	struct backend_info *be
++		= container_of(watch, struct backend_info, backend_watch);
++	struct xenbus_device *dev = be->dev;
++	
++	/** 
++	 * Check to see whether userspace code has opened the image 
++	 * and written sector
++	 * and disk info to xenstore
++	 */
++	err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info, 
++			    NULL);
++	if (XENBUS_EXIST_ERR(err))
++		return;
++	if (err) {
++		xenbus_dev_error(dev, err, "getting info");
++		return;
++	}
++
++	DPRINTK("Userspace update on disk info, %lu\n",info);
++
++	err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", 
++			    &be->blkif->sectors, NULL);
++
++	/* Associate tap dev with domid*/
++	be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id, 
++					  be->blkif);
++	DPRINTK("Thread started for domid [%d], connecting disk\n", 
++		be->blkif->dev_num);
++
++	tap_update_blkif_status(be->blkif);
++}
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void tap_frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state)
++{
++	struct backend_info *be = dev->dev.driver_data;
++	int err;
++
++	DPRINTK("\n");
++
++	switch (frontend_state) {
++	case XenbusStateInitialising:
++		if (dev->state == XenbusStateClosed) {
++			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++			       __FUNCTION__, dev->nodename);
++			xenbus_switch_state(dev, XenbusStateInitWait);
++		}
++		break;
++
++	case XenbusStateInitialised:
++	case XenbusStateConnected:
++		/* Ensure we connect even when two watches fire in 
++		   close successsion and we miss the intermediate value 
++		   of frontend_state. */
++		if (dev->state == XenbusStateConnected)
++			break;
++
++		err = connect_ring(be);
++		if (err)
++			break;
++		tap_update_blkif_status(be->blkif);
++		break;
++
++	case XenbusStateClosing:
++		if (be->blkif->xenblkd) {
++			kthread_stop(be->blkif->xenblkd);
++			be->blkif->xenblkd = NULL;
++		}
++		xenbus_switch_state(dev, XenbusStateClosing);
++		break;
++
++	case XenbusStateClosed:
++		xenbus_switch_state(dev, XenbusStateClosed);
++		if (xenbus_dev_is_online(dev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		device_unregister(&dev->dev);
++		break;
++
++	default:
++		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++				 frontend_state);
++		break;
++	}
++}
++
++
++/**
++ * Switch to Connected state.
++ */
++static void connect(struct backend_info *be)
++{
++	int err;
++
++	struct xenbus_device *dev = be->dev;
++
++	err = xenbus_switch_state(dev, XenbusStateConnected);
++	if (err)
++		xenbus_dev_fatal(dev, err, "switching to Connected state",
++				 dev->nodename);
++
++	return;
++}
++
++
++static int connect_ring(struct backend_info *be)
++{
++	struct xenbus_device *dev = be->dev;
++	unsigned long ring_ref;
++	unsigned int evtchn;
++	char protocol[64];
++	int err;
++
++	DPRINTK("%s\n", dev->otherend);
++
++	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", 
++			    &ring_ref, "event-channel", "%u", &evtchn, NULL);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "reading %s/ring-ref and event-channel",
++				 dev->otherend);
++		return err;
++	}
++
++	be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
++	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
++			    "%63s", protocol, NULL);
++	if (err)
++		strcpy(protocol, "unspecified, assuming native");
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
++	else {
++		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
++		return -1;
++	}
++	printk(KERN_INFO
++	       "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
++	       ring_ref, evtchn, be->blkif->blk_protocol, protocol);
++
++	/* Map the shared frame, irq etc. */
++	err = tap_blkif_map(be->blkif, ring_ref, evtchn);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
++				 ring_ref, evtchn);
++		return err;
++	} 
++
++	return 0;
++}
++
++
++/* ** Driver Registration ** */
++
++
++static struct xenbus_device_id blktap_ids[] = {
++	{ "tap" },
++	{ "" }
++};
++
++
++static struct xenbus_driver blktap = {
++	.name = "tap",
++	.owner = THIS_MODULE,
++	.ids = blktap_ids,
++	.probe = blktap_probe,
++	.remove = blktap_remove,
++	.otherend_changed = tap_frontend_changed
++};
++
++
++void tap_blkif_xenbus_init(void)
++{
++	xenbus_register_backend(&blktap);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/char/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,2 @@
++
++obj-y	:= mem.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/char/mem.c	2007-08-27 14:02:01.000000000 -0400
+@@ -0,0 +1,203 @@
++/*
++ *  Originally from linux/drivers/char/mem.c
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ *
++ *  Added devfs support. 
++ *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
++ *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
++ */
++
++#include <linux/mm.h>
++#include <linux/miscdevice.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/mman.h>
++#include <linux/random.h>
++#include <linux/init.h>
++#include <linux/raw.h>
++#include <linux/tty.h>
++#include <linux/capability.h>
++#include <linux/smp_lock.h>
++#include <linux/ptrace.h>
++#include <linux/device.h>
++#include <asm/pgalloc.h>
++#include <asm/uaccess.h>
++#include <asm/io.h>
++#include <asm/hypervisor.h>
++
++#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
++static inline int valid_phys_addr_range(unsigned long addr, size_t count)
++{
++	return 1;
++}
++#endif
++
++/*
++ * This funcion reads the *physical* memory. The f_pos points directly to the 
++ * memory location. 
++ */
++static ssize_t read_mem(struct file * file, char __user * buf,
++			size_t count, loff_t *ppos)
++{
++	unsigned long p = *ppos, ignored;
++	ssize_t read = 0, sz;
++	void __iomem *v;
++
++	if (!valid_phys_addr_range(p, count))
++		return -EFAULT;
++
++	while (count > 0) {
++		/*
++		 * Handle first page in case it's not aligned
++		 */
++		if (-p & (PAGE_SIZE - 1))
++			sz = -p & (PAGE_SIZE - 1);
++		else
++			sz = PAGE_SIZE;
++
++		sz = min_t(unsigned long, sz, count);
++
++		v = xlate_dev_mem_ptr(p, sz);
++		if (IS_ERR(v) || v == NULL) {
++			/*
++			 * Some programs (e.g., dmidecode) groove off into
++			 * weird RAM areas where no tables can possibly exist
++			 * (because Xen will have stomped on them!). These
++			 * programs get rather upset if we let them know that
++			 * Xen failed their access, so we fake out a read of
++			 * all zeroes.
++			 */
++			if (clear_user(buf, count))
++				return -EFAULT;
++			read += count;
++			break;
++		}
++
++		ignored = copy_to_user(buf, v, sz);
++		xlate_dev_mem_ptr_unmap(v);
++		if (ignored)
++			return -EFAULT;
++		buf += sz;
++		p += sz;
++		count -= sz;
++		read += sz;
++	}
++
++	*ppos += read;
++	return read;
++}
++
++static ssize_t write_mem(struct file * file, const char __user * buf, 
++			 size_t count, loff_t *ppos)
++{
++	unsigned long p = *ppos, ignored;
++	ssize_t written = 0, sz;
++	void __iomem *v;
++
++	if (!valid_phys_addr_range(p, count))
++		return -EFAULT;
++
++	while (count > 0) {
++		/*
++		 * Handle first page in case it's not aligned
++		 */
++		if (-p & (PAGE_SIZE - 1))
++			sz = -p & (PAGE_SIZE - 1);
++		else
++			sz = PAGE_SIZE;
++
++		sz = min_t(unsigned long, sz, count);
++
++		v = xlate_dev_mem_ptr(p, sz);
++		if (v == NULL)
++			break;
++		if (IS_ERR(v)) {
++			if (written == 0)
++				return PTR_ERR(v);
++			break;
++		}
++
++		ignored = copy_from_user(v, buf, sz);
++		xlate_dev_mem_ptr_unmap(v);
++		if (ignored) {
++			written += sz - ignored;
++			if (written)
++				break;
++			return -EFAULT;
++		}
++		buf += sz;
++		p += sz;
++		count -= sz;
++		written += sz;
++	}
++
++	*ppos += written;
++	return written;
++}
++
++#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
++static inline int uncached_access(struct file *file)
++{
++	if (file->f_flags & O_SYNC)
++		return 1;
++	/* Xen sets correct MTRR type on non-RAM for us. */
++	return 0;
++}
++
++static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
++{
++	size_t size = vma->vm_end - vma->vm_start;
++
++	if (uncached_access(file))
++		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
++
++	/* We want to return the real error code, not EAGAIN. */
++	return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
++				      size, vma->vm_page_prot, DOMID_IO);
++}
++#endif
++
++/*
++ * The memory devices use the full 32/64 bits of the offset, and so we cannot
++ * check against negative addresses: they are ok. The return value is weird,
++ * though, in that case (0).
++ *
++ * also note that seeking relative to the "end of file" isn't supported:
++ * it has no meaning, so it returns -EINVAL.
++ */
++static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
++{
++	loff_t ret;
++
++	mutex_lock(&file->f_dentry->d_inode->i_mutex);
++	switch (orig) {
++		case 0:
++			file->f_pos = offset;
++			ret = file->f_pos;
++			force_successful_syscall_return();
++			break;
++		case 1:
++			file->f_pos += offset;
++			ret = file->f_pos;
++			force_successful_syscall_return();
++			break;
++		default:
++			ret = -EINVAL;
++	}
++	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
++	return ret;
++}
++
++static int open_mem(struct inode * inode, struct file * filp)
++{
++	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
++}
++
++const struct file_operations mem_fops = {
++	.llseek		= memory_lseek,
++	.read		= read_mem,
++	.write		= write_mem,
++	.mmap		= xen_mmap_mem,
++	.open		= open_mem,
++};
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/console/Makefile	2007-08-27 14:01:53.000000000 -0400
+@@ -0,0 +1,2 @@
++
++obj-y	:= console.o xencons_ring.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/console/console.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,721 @@
++/******************************************************************************
++ * console.c
++ * 
++ * Virtual console driver.
++ * 
++ * Copyright (c) 2002-2004, K A Fraser.
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/errno.h>
++#include <linux/signal.h>
++#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/tty.h>
++#include <linux/tty_flip.h>
++#include <linux/serial.h>
++#include <linux/major.h>
++#include <linux/ptrace.h>
++#include <linux/ioport.h>
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <linux/init.h>
++#include <linux/console.h>
++#include <linux/bootmem.h>
++#include <linux/sysrq.h>
++#include <linux/screen_info.h>
++#include <linux/vt.h>
++#include <asm/io.h>
++#include <asm/irq.h>
++#include <asm/uaccess.h>
++#include <xen/interface/xen.h>
++#include <xen/interface/event_channel.h>
++#include <asm/hypervisor.h>
++#include <xen/evtchn.h>
++#include <xen/xenbus.h>
++#include <xen/xencons.h>
++
++/*
++ * Modes:
++ *  'xencons=off'  [XC_OFF]:     Console is disabled.
++ *  'xencons=tty'  [XC_TTY]:     Console attached to '/dev/tty[0-9]+'.
++ *  'xencons=ttyS' [XC_SERIAL]:  Console attached to '/dev/ttyS[0-9]+'.
++ *  'xencons=xvc'  [XC_XVC]:     Console attached to '/dev/xvc0'.
++ *  default:                     DOM0 -> XC_SERIAL ; all others -> XC_TTY.
++ * 
++ * NB. In mode XC_TTY, we create dummy consoles for tty2-63. This suppresses
++ * warnings from standard distro startup scripts.
++ */
++static enum {
++	XC_OFF, XC_TTY, XC_SERIAL, XC_XVC
++} xc_mode;
++static int xc_num = -1;
++
++/* /dev/xvc0 device number allocated by lanana.org. */
++#define XEN_XVC_MAJOR 204
++#define XEN_XVC_MINOR 191
++
++#ifdef CONFIG_MAGIC_SYSRQ
++static unsigned long sysrq_requested;
++extern int sysrq_enabled;
++#endif
++
++void xencons_early_setup(void)
++{
++	extern int console_use_vt;
++
++	if (is_initial_xendomain()) {
++		xc_mode = XC_SERIAL;
++	} else {
++		xc_mode = XC_TTY;
++		console_use_vt = 0;
++	}
++}
++
++static int __init xencons_setup(char *str)
++{
++	char *q;
++	int n;
++	extern int console_use_vt;
++
++	console_use_vt = 1;
++	if (!strncmp(str, "ttyS", 4)) {
++		xc_mode = XC_SERIAL;
++		str += 4;
++	} else if (!strncmp(str, "tty", 3)) {
++		xc_mode = XC_TTY;
++		str += 3;
++		console_use_vt = 0;
++	} else if (!strncmp(str, "xvc", 3)) {
++		xc_mode = XC_XVC;
++		str += 3;
++	} else if (!strncmp(str, "off", 3)) {
++		xc_mode = XC_OFF;
++		str += 3;
++	}
++
++	n = simple_strtol(str, &q, 10);
++	if (q != str)
++		xc_num = n;
++
++	return 1;
++}
++__setup("xencons=", xencons_setup);
++
++/* The kernel and user-land drivers share a common transmit buffer. */
++static unsigned int wbuf_size = 4096;
++#define WBUF_MASK(_i) ((_i)&(wbuf_size-1))
++static char *wbuf;
++static unsigned int wc, wp; /* write_cons, write_prod */
++
++static int __init xencons_bufsz_setup(char *str)
++{
++	unsigned int goal;
++	goal = simple_strtoul(str, NULL, 0);
++	if (goal) {
++		goal = roundup_pow_of_two(goal);
++		if (wbuf_size < goal)
++			wbuf_size = goal;
++	}
++	return 1;
++}
++__setup("xencons_bufsz=", xencons_bufsz_setup);
++
++/* This lock protects accesses to the common transmit buffer. */
++static DEFINE_SPINLOCK(xencons_lock);
++
++/* Common transmit-kick routine. */
++static void __xencons_tx_flush(void);
++
++static struct tty_driver *xencons_driver;
++
++/******************** Kernel console driver ********************************/
++
++static void kcons_write(struct console *c, const char *s, unsigned int count)
++{
++	int           i = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++
++	while (i < count) {
++		for (; i < count; i++) {
++			if ((wp - wc) >= (wbuf_size - 1))
++				break;
++			if ((wbuf[WBUF_MASK(wp++)] = s[i]) == '\n')
++				wbuf[WBUF_MASK(wp++)] = '\r';
++		}
++
++		__xencons_tx_flush();
++	}
++
++	spin_unlock_irqrestore(&xencons_lock, flags);
++}
++
++static void kcons_write_dom0(struct console *c, const char *s, unsigned int count)
++{
++
++	while (count > 0) {
++		int rc;
++		rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
++		if (rc <= 0)
++			break;
++		count -= rc;
++		s += rc;
++	}
++}
++
++static struct tty_driver *kcons_device(struct console *c, int *index)
++{
++	*index = 0;
++	return xencons_driver;
++}
++
++static struct console kcons_info = {
++	.device	= kcons_device,
++	.flags	= CON_PRINTBUFFER | CON_ENABLED,
++	.index	= -1,
++};
++
++static int __init xen_console_init(void)
++{
++	if (!is_running_on_xen())
++		goto out;
++
++	if (is_initial_xendomain()) {
++		kcons_info.write = kcons_write_dom0;
++	} else {
++		if (!xen_start_info->console.domU.evtchn)
++			goto out;
++		kcons_info.write = kcons_write;
++	}
++
++	switch (xc_mode) {
++	case XC_XVC:
++		strcpy(kcons_info.name, "xvc");
++		if (xc_num == -1)
++			xc_num = 0;
++		break;
++
++	case XC_SERIAL:
++		strcpy(kcons_info.name, "ttyS");
++		if (xc_num == -1)
++			xc_num = 0;
++		break;
++
++	case XC_TTY:
++		strcpy(kcons_info.name, "tty");
++		if (xc_num == -1)
++			xc_num = 1;
++		break;
++
++	default:
++		goto out;
++	}
++
++	wbuf = alloc_bootmem(wbuf_size);
++
++	register_console(&kcons_info);
++
++ out:
++	return 0;
++}
++console_initcall(xen_console_init);
++
++/*** Useful function for console debugging -- goes straight to Xen. ***/
++asmlinkage int xprintk(const char *fmt, ...)
++{
++	va_list args;
++	int printk_len;
++	static char printk_buf[1024];
++
++	/* Emit the output into the temporary buffer */
++	va_start(args, fmt);
++	printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
++	va_end(args);
++
++	/* Send the processed output directly to Xen. */
++	kcons_write_dom0(NULL, printk_buf, printk_len);
++
++	return 0;
++}
++
++/*** Forcibly flush console data before dying. ***/
++void xencons_force_flush(void)
++{
++	int sz;
++
++	/* Emergency console is synchronous, so there's nothing to flush. */
++	if (!is_running_on_xen() ||
++	    is_initial_xendomain() ||
++	    !xen_start_info->console.domU.evtchn)
++		return;
++
++	/* Spin until console data is flushed through to the daemon. */
++	while (wc != wp) {
++		int sent = 0;
++		if ((sz = wp - wc) == 0)
++			continue;
++		sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
++		if (sent > 0)
++			wc += sent;
++	}
++}
++
++
++void dom0_init_screen_info(const struct dom0_vga_console_info *info)
++{
++	switch (info->video_type) {
++	case XEN_VGATYPE_TEXT_MODE_3:
++		screen_info.orig_video_mode = 3;
++		screen_info.orig_video_ega_bx = 3;
++		screen_info.orig_video_isVGA = 1;
++		screen_info.orig_video_lines = info->u.text_mode_3.rows;
++		screen_info.orig_video_cols = info->u.text_mode_3.columns;
++		screen_info.orig_x = info->u.text_mode_3.cursor_x;
++		screen_info.orig_y = info->u.text_mode_3.cursor_y;
++		screen_info.orig_video_points =
++			info->u.text_mode_3.font_height;
++		break;
++	case XEN_VGATYPE_VESA_LFB:
++		screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
++		screen_info.lfb_width = info->u.vesa_lfb.width;
++		screen_info.lfb_height = info->u.vesa_lfb.height;
++		screen_info.lfb_depth = info->u.vesa_lfb.bits_per_pixel;
++		screen_info.lfb_base = info->u.vesa_lfb.lfb_base;
++		screen_info.lfb_size = info->u.vesa_lfb.lfb_size;
++		screen_info.lfb_linelength = info->u.vesa_lfb.bytes_per_line;
++		screen_info.red_size = info->u.vesa_lfb.red_size;
++		screen_info.red_pos = info->u.vesa_lfb.red_pos;
++		screen_info.green_size = info->u.vesa_lfb.green_size;
++		screen_info.green_pos = info->u.vesa_lfb.green_pos;
++		screen_info.blue_size = info->u.vesa_lfb.blue_size;
++		screen_info.blue_pos = info->u.vesa_lfb.blue_pos;
++		screen_info.rsvd_size = info->u.vesa_lfb.rsvd_size;
++		screen_info.rsvd_pos = info->u.vesa_lfb.rsvd_pos;
++		break;
++	}
++}
++
++
++/******************** User-space console driver (/dev/console) ************/
++
++#define DRV(_d)         (_d)
++#define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) &&		\
++			 ((_tty)->index != (xc_num - 1)))
++
++static struct termios *xencons_termios[MAX_NR_CONSOLES];
++static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
++static struct tty_struct *xencons_tty;
++static int xencons_priv_irq;
++static char x_char;
++
++void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
++{
++	int           i;
++	unsigned long flags;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++	if (xencons_tty == NULL)
++		goto out;
++
++	for (i = 0; i < len; i++) {
++#ifdef CONFIG_MAGIC_SYSRQ
++		if (sysrq_enabled) {
++			if (buf[i] == '\x0f') { /* ^O */
++				if (!sysrq_requested) {
++					sysrq_requested = jiffies;
++					continue; /* don't print sysrq key */
++				}
++				sysrq_requested = 0;
++			} else if (sysrq_requested) {
++				unsigned long sysrq_timeout =
++					sysrq_requested + HZ*2;
++				sysrq_requested = 0;
++				if (time_before(jiffies, sysrq_timeout)) {
++					spin_unlock_irqrestore(
++						&xencons_lock, flags);
++					handle_sysrq(
++						buf[i], regs, xencons_tty);
++					spin_lock_irqsave(
++						&xencons_lock, flags);
++					continue;
++				}
++			}
++		}
++#endif
++		tty_insert_flip_char(xencons_tty, buf[i], 0);
++	}
++	tty_flip_buffer_push(xencons_tty);
++
++ out:
++	spin_unlock_irqrestore(&xencons_lock, flags);
++}
++
++static void __xencons_tx_flush(void)
++{
++	int sent, sz, work_done = 0;
++
++	if (x_char) {
++		if (is_initial_xendomain())
++			kcons_write_dom0(NULL, &x_char, 1);
++		else
++			while (x_char)
++				if (xencons_ring_send(&x_char, 1) == 1)
++					break;
++		x_char = 0;
++		work_done = 1;
++	}
++
++	while (wc != wp) {
++		sz = wp - wc;
++		if (sz > (wbuf_size - WBUF_MASK(wc)))
++			sz = wbuf_size - WBUF_MASK(wc);
++		if (is_initial_xendomain()) {
++			kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
++			wc += sz;
++		} else {
++			sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
++			if (sent == 0)
++				break;
++			wc += sent;
++		}
++		work_done = 1;
++	}
++
++	if (work_done && (xencons_tty != NULL)) {
++		wake_up_interruptible(&xencons_tty->write_wait);
++		if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
++		    (xencons_tty->ldisc.write_wakeup != NULL))
++			(xencons_tty->ldisc.write_wakeup)(xencons_tty);
++	}
++}
++
++void xencons_tx(void)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++	__xencons_tx_flush();
++	spin_unlock_irqrestore(&xencons_lock, flags);
++}
++
++/* Privileged receive callback and transmit kicker. */
++static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
++					  struct pt_regs *regs)
++{
++	static char rbuf[16];
++	int         l;
++
++	while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
++		xencons_rx(rbuf, l, regs);
++
++	xencons_tx();
++
++	return IRQ_HANDLED;
++}
++
++static int xencons_write_room(struct tty_struct *tty)
++{
++	return wbuf_size - (wp - wc);
++}
++
++static int xencons_chars_in_buffer(struct tty_struct *tty)
++{
++	return wp - wc;
++}
++
++static void xencons_send_xchar(struct tty_struct *tty, char ch)
++{
++	unsigned long flags;
++
++	if (DUMMY_TTY(tty))
++		return;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++	x_char = ch;
++	__xencons_tx_flush();
++	spin_unlock_irqrestore(&xencons_lock, flags);
++}
++
++static void xencons_throttle(struct tty_struct *tty)
++{
++	if (DUMMY_TTY(tty))
++		return;
++
++	if (I_IXOFF(tty))
++		xencons_send_xchar(tty, STOP_CHAR(tty));
++}
++
++static void xencons_unthrottle(struct tty_struct *tty)
++{
++	if (DUMMY_TTY(tty))
++		return;
++
++	if (I_IXOFF(tty)) {
++		if (x_char != 0)
++			x_char = 0;
++		else
++			xencons_send_xchar(tty, START_CHAR(tty));
++	}
++}
++
++static void xencons_flush_buffer(struct tty_struct *tty)
++{
++	unsigned long flags;
++
++	if (DUMMY_TTY(tty))
++		return;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++	wc = wp = 0;
++	spin_unlock_irqrestore(&xencons_lock, flags);
++}
++
++static inline int __xencons_put_char(int ch)
++{
++	char _ch = (char)ch;
++	if ((wp - wc) == wbuf_size)
++		return 0;
++	wbuf[WBUF_MASK(wp++)] = _ch;
++	return 1;
++}
++
++static int xencons_write(
++	struct tty_struct *tty,
++	const unsigned char *buf,
++	int count)
++{
++	int i;
++	unsigned long flags;
++
++	if (DUMMY_TTY(tty))
++		return count;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++
++	for (i = 0; i < count; i++)
++		if (!__xencons_put_char(buf[i]))
++			break;
++
++	if (i != 0)
++		__xencons_tx_flush();
++
++	spin_unlock_irqrestore(&xencons_lock, flags);
++
++	return i;
++}
++
++static void xencons_put_char(struct tty_struct *tty, u_char ch)
++{
++	unsigned long flags;
++
++	if (DUMMY_TTY(tty))
++		return;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++	(void)__xencons_put_char(ch);
++	spin_unlock_irqrestore(&xencons_lock, flags);
++}
++
++static void xencons_flush_chars(struct tty_struct *tty)
++{
++	unsigned long flags;
++
++	if (DUMMY_TTY(tty))
++		return;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++	__xencons_tx_flush();
++	spin_unlock_irqrestore(&xencons_lock, flags);
++}
++
++static void xencons_wait_until_sent(struct tty_struct *tty, int timeout)
++{
++	unsigned long orig_jiffies = jiffies;
++
++	if (DUMMY_TTY(tty))
++		return;
++
++	while (DRV(tty->driver)->chars_in_buffer(tty)) {
++		set_current_state(TASK_INTERRUPTIBLE);
++		schedule_timeout(1);
++		if (signal_pending(current))
++			break;
++		if (timeout && time_after(jiffies, orig_jiffies + timeout))
++			break;
++	}
++
++	set_current_state(TASK_RUNNING);
++}
++
++static int xencons_open(struct tty_struct *tty, struct file *filp)
++{
++	unsigned long flags;
++
++	if (DUMMY_TTY(tty))
++		return 0;
++
++	spin_lock_irqsave(&xencons_lock, flags);
++	tty->driver_data = NULL;
++	if (xencons_tty == NULL)
++		xencons_tty = tty;
++	__xencons_tx_flush();
++	spin_unlock_irqrestore(&xencons_lock, flags);
++
++	return 0;
++}
++
++static void xencons_close(struct tty_struct *tty, struct file *filp)
++{
++	unsigned long flags;
++
++	if (DUMMY_TTY(tty))
++		return;
++
++	mutex_lock(&tty_mutex);
++
++	if (tty->count != 1) {
++		mutex_unlock(&tty_mutex);
++		return;
++	}
++
++	/* Prevent other threads from re-opening this tty. */
++	set_bit(TTY_CLOSING, &tty->flags);
++	mutex_unlock(&tty_mutex);
++
++	tty->closing = 1;
++	tty_wait_until_sent(tty, 0);
++	if (DRV(tty->driver)->flush_buffer != NULL)
++		DRV(tty->driver)->flush_buffer(tty);
++	if (tty->ldisc.flush_buffer != NULL)
++		tty->ldisc.flush_buffer(tty);
++	tty->closing = 0;
++	spin_lock_irqsave(&xencons_lock, flags);
++	xencons_tty = NULL;
++	spin_unlock_irqrestore(&xencons_lock, flags);
++}
++
++static struct tty_operations xencons_ops = {
++	.open = xencons_open,
++	.close = xencons_close,
++	.write = xencons_write,
++	.write_room = xencons_write_room,
++	.put_char = xencons_put_char,
++	.flush_chars = xencons_flush_chars,
++	.chars_in_buffer = xencons_chars_in_buffer,
++	.send_xchar = xencons_send_xchar,
++	.flush_buffer = xencons_flush_buffer,
++	.throttle = xencons_throttle,
++	.unthrottle = xencons_unthrottle,
++	.wait_until_sent = xencons_wait_until_sent,
++};
++
++static int __init xencons_init(void)
++{
++	int rc;
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	if (xc_mode == XC_OFF)
++		return 0;
++
++	if (!is_initial_xendomain()) {
++		rc = xencons_ring_init();
++		if (rc)
++			return rc;
++	}
++
++	xencons_driver = alloc_tty_driver((xc_mode == XC_TTY) ?
++					  MAX_NR_CONSOLES : 1);
++	if (xencons_driver == NULL)
++		return -ENOMEM;
++
++	DRV(xencons_driver)->name            = "xencons";
++	DRV(xencons_driver)->major           = TTY_MAJOR;
++	DRV(xencons_driver)->type            = TTY_DRIVER_TYPE_SERIAL;
++	DRV(xencons_driver)->subtype         = SERIAL_TYPE_NORMAL;
++	DRV(xencons_driver)->init_termios    = tty_std_termios;
++	DRV(xencons_driver)->flags           =
++		TTY_DRIVER_REAL_RAW |
++		TTY_DRIVER_RESET_TERMIOS;
++	DRV(xencons_driver)->termios         = xencons_termios;
++	DRV(xencons_driver)->termios_locked  = xencons_termios_locked;
++
++	switch (xc_mode) {
++	case XC_XVC:
++		DRV(xencons_driver)->name        = "xvc";
++		DRV(xencons_driver)->major       = XEN_XVC_MAJOR;
++		DRV(xencons_driver)->minor_start = XEN_XVC_MINOR;
++		DRV(xencons_driver)->name_base   = xc_num;
++		break;
++	case XC_SERIAL:
++		DRV(xencons_driver)->name        = "ttyS";
++		DRV(xencons_driver)->minor_start = 64 + xc_num;
++		DRV(xencons_driver)->name_base   = xc_num;
++		break;
++	default:
++		DRV(xencons_driver)->name        = "tty";
++		DRV(xencons_driver)->minor_start = 1;
++		DRV(xencons_driver)->name_base   = 1;
++		break;
++	}
++
++	tty_set_operations(xencons_driver, &xencons_ops);
++
++	if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) {
++		printk("WARNING: Failed to register Xen virtual "
++		       "console driver as '%s%d'\n",
++		       DRV(xencons_driver)->name,
++		       DRV(xencons_driver)->name_base);
++		put_tty_driver(xencons_driver);
++		xencons_driver = NULL;
++		return rc;
++	}
++
++	if (is_initial_xendomain()) {
++		xencons_priv_irq = bind_virq_to_irqhandler(
++			VIRQ_CONSOLE,
++			0,
++			xencons_priv_interrupt,
++			0,
++			"console",
++			NULL);
++		BUG_ON(xencons_priv_irq < 0);
++	}
++
++	printk("Xen virtual console successfully installed as %s%d\n",
++	       DRV(xencons_driver)->name, xc_num);
++
++	return 0;
++}
++
++module_init(xencons_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/console/xencons_ring.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,143 @@
++/* 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/errno.h>
++#include <linux/signal.h>
++#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/tty.h>
++#include <linux/tty_flip.h>
++#include <linux/serial.h>
++#include <linux/major.h>
++#include <linux/ptrace.h>
++#include <linux/ioport.h>
++#include <linux/mm.h>
++#include <linux/slab.h>
++
++#include <asm/hypervisor.h>
++#include <xen/evtchn.h>
++#include <xen/xencons.h>
++#include <linux/wait.h>
++#include <linux/interrupt.h>
++#include <linux/sched.h>
++#include <linux/err.h>
++#include <xen/interface/io/console.h>
++
++static int xencons_irq;
++
++static inline struct xencons_interface *xencons_interface(void)
++{
++	return mfn_to_virt(xen_start_info->console.domU.mfn);
++}
++
++static inline void notify_daemon(void)
++{
++	/* Use evtchn: this is called early, before irq is set up. */
++	notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
++}
++
++int xencons_ring_send(const char *data, unsigned len)
++{
++	int sent = 0;
++	struct xencons_interface *intf = xencons_interface();
++	XENCONS_RING_IDX cons, prod;
++
++	cons = intf->out_cons;
++	prod = intf->out_prod;
++	mb();
++	BUG_ON((prod - cons) > sizeof(intf->out));
++
++	while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
++		intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
++
++	wmb();
++	intf->out_prod = prod;
++
++	notify_daemon();
++
++	return sent;
++}
++
++static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
++{
++	struct xencons_interface *intf = xencons_interface();
++	XENCONS_RING_IDX cons, prod;
++
++	cons = intf->in_cons;
++	prod = intf->in_prod;
++	mb();
++	BUG_ON((prod - cons) > sizeof(intf->in));
++
++	while (cons != prod) {
++		xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
++		cons++;
++	}
++
++	mb();
++	intf->in_cons = cons;
++
++	notify_daemon();
++
++	xencons_tx();
++
++	return IRQ_HANDLED;
++}
++
++int xencons_ring_init(void)
++{
++	int irq;
++
++	if (xencons_irq)
++		unbind_from_irqhandler(xencons_irq, NULL);
++	xencons_irq = 0;
++
++	if (!is_running_on_xen() ||
++	    is_initial_xendomain() ||
++	    !xen_start_info->console.domU.evtchn)
++		return -ENODEV;
++
++	irq = bind_caller_port_to_irqhandler(
++		xen_start_info->console.domU.evtchn,
++		handle_input, 0, "xencons", NULL);
++	if (irq < 0) {
++		printk(KERN_ERR "XEN console request irq failed %i\n", irq);
++		return irq;
++	}
++
++	xencons_irq = irq;
++
++	/* In case we have in-flight data after save/restore... */
++	notify_daemon();
++
++	return 0;
++}
++
++void xencons_resume(void)
++{
++	(void)xencons_ring_init();
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/Makefile	2007-08-27 14:02:09.000000000 -0400
+@@ -0,0 +1,12 @@
++#
++# Makefile for the linux kernel.
++#
++
++obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o
++
++obj-$(CONFIG_PROC_FS)		+= xen_proc.o
++obj-$(CONFIG_SYSFS)		+= hypervisor_sysfs.o
++obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
++obj-$(CONFIG_XEN_SYSFS)		+= xen_sysfs.o
++obj-$(CONFIG_XEN_SMPBOOT)	+= smpboot.o
++obj-$(CONFIG_KEXEC)		+= machine_kexec.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/cpu_hotplug.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,172 @@
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/notifier.h>
++#include <linux/cpu.h>
++#include <xen/cpu_hotplug.h>
++#include <xen/xenbus.h>
++
++/*
++ * Set of CPUs that remote admin software will allow us to bring online.
++ * Notified to us via xenbus.
++ */
++static cpumask_t xenbus_allowed_cpumask;
++
++/* Set of CPUs that local admin will allow us to bring online. */
++static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
++
++static int local_cpu_hotplug_request(void)
++{
++	/*
++	 * We assume a CPU hotplug request comes from local admin if it is made
++	 * via a userspace process (i.e., one with a real mm_struct).
++	 */
++	return (current->mm != NULL);
++}
++
++static void vcpu_hotplug(unsigned int cpu)
++{
++	int err;
++	char dir[32], state[32];
++
++	if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
++		return;
++
++	sprintf(dir, "cpu/%d", cpu);
++	err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
++	if (err != 1) {
++		printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
++		return;
++	}
++
++	if (strcmp(state, "online") == 0) {
++		cpu_set(cpu, xenbus_allowed_cpumask);
++		(void)cpu_up(cpu);
++	} else if (strcmp(state, "offline") == 0) {
++		cpu_clear(cpu, xenbus_allowed_cpumask);
++		(void)cpu_down(cpu);
++	} else {
++		printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
++		       state, cpu);
++	}
++}
++
++static void handle_vcpu_hotplug_event(
++	struct xenbus_watch *watch, const char **vec, unsigned int len)
++{
++	int cpu;
++	char *cpustr;
++	const char *node = vec[XS_WATCH_PATH];
++
++	if ((cpustr = strstr(node, "cpu/")) != NULL) {
++		sscanf(cpustr, "cpu/%d", &cpu);
++		vcpu_hotplug(cpu);
++	}
++}
++
++static int smpboot_cpu_notify(struct notifier_block *notifier,
++			      unsigned long action, void *hcpu)
++{
++	int cpu = (long)hcpu;
++
++	/*
++	 * We do this in a callback notifier rather than __cpu_disable()
++	 * because local_cpu_hotplug_request() does not work in the latter
++	 * as it's always executed from within a stopmachine kthread.
++	 */
++	if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
++		cpu_clear(cpu, local_allowed_cpumask);
++
++	return NOTIFY_OK;
++}
++
++static int setup_cpu_watcher(struct notifier_block *notifier,
++			      unsigned long event, void *data)
++{
++	int i;
++
++	static struct xenbus_watch cpu_watch = {
++		.node = "cpu",
++		.callback = handle_vcpu_hotplug_event,
++		.flags = XBWF_new_thread };
++	(void)register_xenbus_watch(&cpu_watch);
++
++	if (!is_initial_xendomain()) {
++		for_each_possible_cpu(i)
++			vcpu_hotplug(i);
++		printk(KERN_INFO "Brought up %ld CPUs\n",
++		       (long)num_online_cpus());
++	}
++
++	return NOTIFY_DONE;
++}
++
++static int __init setup_vcpu_hotplug_event(void)
++{
++	static struct notifier_block hotplug_cpu = {
++		.notifier_call = smpboot_cpu_notify };
++	static struct notifier_block xsn_cpu = {
++		.notifier_call = setup_cpu_watcher };
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	register_cpu_notifier(&hotplug_cpu);
++	register_xenstore_notifier(&xsn_cpu);
++
++	return 0;
++}
++
++arch_initcall(setup_vcpu_hotplug_event);
++
++int smp_suspend(void)
++{
++	int cpu, err;
++
++	for_each_online_cpu(cpu) {
++		if (cpu == 0)
++			continue;
++		err = cpu_down(cpu);
++		if (err) {
++			printk(KERN_CRIT "Failed to take all CPUs "
++			       "down: %d.\n", err);
++			for_each_possible_cpu(cpu)
++				vcpu_hotplug(cpu);
++			return err;
++		}
++	}
++
++	return 0;
++}
++
++void smp_resume(void)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu)
++		vcpu_hotplug(cpu);
++}
++
++int cpu_up_check(unsigned int cpu)
++{
++	int rc = 0;
++
++	if (local_cpu_hotplug_request()) {
++		cpu_set(cpu, local_allowed_cpumask);
++		if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
++			printk("%s: attempt to bring up CPU %u disallowed by "
++			       "remote admin.\n", __FUNCTION__, cpu);
++			rc = -EBUSY;
++		}
++	} else if (!cpu_isset(cpu, local_allowed_cpumask) ||
++		   !cpu_isset(cpu, xenbus_allowed_cpumask)) {
++		rc = -EBUSY;
++	}
++
++	return rc;
++}
++
++void init_xenbus_allowed_cpumask(void)
++{
++	xenbus_allowed_cpumask = cpu_present_map;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/evtchn.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,1015 @@
++/******************************************************************************
++ * evtchn.c
++ * 
++ * Communication via Xen event channels.
++ * 
++ * Copyright (c) 2002-2005, K A Fraser
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/module.h>
++#include <linux/irq.h>
++#include <linux/interrupt.h>
++#include <linux/sched.h>
++#include <linux/kernel_stat.h>
++#include <linux/version.h>
++#include <asm/atomic.h>
++#include <asm/system.h>
++#include <asm/ptrace.h>
++#include <asm/synch_bitops.h>
++#include <xen/evtchn.h>
++#include <xen/interface/event_channel.h>
++#include <xen/interface/physdev.h>
++#include <asm/hypervisor.h>
++#include <linux/mc146818rtc.h> /* RTC_IRQ */
++
++/*
++ * This lock protects updates to the following mapping and reference-count
++ * arrays. The lock does not need to be acquired to read the mapping tables.
++ */
++static DEFINE_SPINLOCK(irq_mapping_update_lock);
++
++/* IRQ <-> event-channel mappings. */
++static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
++	[0 ...  NR_EVENT_CHANNELS-1] = -1 };
++
++/* Packed IRQ information: binding type, sub-type index, and event channel. */
++static u32 irq_info[NR_IRQS];
++
++/* Binding types. */
++enum {
++	IRQT_UNBOUND,
++	IRQT_PIRQ,
++	IRQT_VIRQ,
++	IRQT_IPI,
++	IRQT_LOCAL_PORT,
++	IRQT_CALLER_PORT
++};
++
++/* Constructor for packed IRQ information. */
++static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn)
++{
++	return ((type << 24) | (index << 16) | evtchn);
++}
++
++/* Convenient shorthand for packed representation of an unbound IRQ. */
++#define IRQ_UNBOUND	mk_irq_info(IRQT_UNBOUND, 0, 0)
++
++/*
++ * Accessors for packed IRQ information.
++ */
++
++static inline unsigned int evtchn_from_irq(int irq)
++{
++	return (u16)(irq_info[irq]);
++}
++
++static inline unsigned int index_from_irq(int irq)
++{
++	return (u8)(irq_info[irq] >> 16);
++}
++
++static inline unsigned int type_from_irq(int irq)
++{
++	return (u8)(irq_info[irq] >> 24);
++}
++
++/* IRQ <-> VIRQ mapping. */
++DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
++
++/* IRQ <-> IPI mapping. */
++#ifndef NR_IPIS
++#define NR_IPIS 1
++#endif
++DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
++
++/* Reference counts for bindings to IRQs. */
++static int irq_bindcount[NR_IRQS];
++
++/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
++static DECLARE_BITMAP(pirq_needs_eoi, NR_PIRQS);
++
++#ifdef CONFIG_SMP
++
++static u8 cpu_evtchn[NR_EVENT_CHANNELS];
++static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
++
++static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
++					   unsigned int idx)
++{
++	return (sh->evtchn_pending[idx] &
++		cpu_evtchn_mask[cpu][idx] &
++		~sh->evtchn_mask[idx]);
++}
++
++static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
++{
++	int irq = evtchn_to_irq[chn];
++
++	BUG_ON(irq == -1);
++	set_native_irq_info(irq, cpumask_of_cpu(cpu));
++
++	clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
++	set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
++	cpu_evtchn[chn] = cpu;
++}
++
++static void init_evtchn_cpu_bindings(void)
++{
++	int i;
++
++	/* By default all event channels notify CPU#0. */
++	for (i = 0; i < NR_IRQS; i++)
++		set_native_irq_info(i, cpumask_of_cpu(0));
++
++	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
++	memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
++}
++
++static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
++{
++	return cpu_evtchn[evtchn];
++}
++
++#else
++
++static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
++					   unsigned int idx)
++{
++	return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
++}
++
++static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
++{
++}
++
++static void init_evtchn_cpu_bindings(void)
++{
++}
++
++static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
++{
++	return 0;
++}
++
++#endif
++
++/* Upcall to generic IRQ layer. */
++#ifdef CONFIG_X86
++extern fastcall unsigned int do_IRQ(struct pt_regs *regs);
++void __init xen_init_IRQ(void);
++void __init init_IRQ(void)
++{
++	irq_ctx_init(0);
++	xen_init_IRQ();
++}
++#if defined (__i386__)
++static inline void exit_idle(void) {}
++#define IRQ_REG orig_eax
++#elif defined (__x86_64__)
++#include <asm/idle.h>
++#define IRQ_REG orig_rax
++#endif
++#define do_IRQ(irq, regs) do {		\
++	(regs)->IRQ_REG = ~(irq);	\
++	do_IRQ((regs));			\
++} while (0)
++#endif
++
++/* Xen will never allocate port zero for any purpose. */
++#define VALID_EVTCHN(chn)	((chn) != 0)
++
++/*
++ * Force a proper event-channel callback from Xen after clearing the
++ * callback mask. We do this in a very simple manner, by making a call
++ * down into Xen. The pending flag will be checked by Xen on return.
++ */
++void force_evtchn_callback(void)
++{
++	(void)HYPERVISOR_xen_version(0, NULL);
++}
++/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
++EXPORT_SYMBOL(force_evtchn_callback);
++
++static DEFINE_PER_CPU(unsigned int, upcall_count) = { 0 };
++
++/* NB. Interrupts are disabled on entry. */
++asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
++{
++	unsigned long  l1, l2;
++	unsigned int   l1i, l2i, port, count;
++	int            irq, cpu = smp_processor_id();
++	shared_info_t *s = HYPERVISOR_shared_info;
++	vcpu_info_t   *vcpu_info = &s->vcpu_info[cpu];
++
++	do {
++		/* Avoid a callback storm when we reenable delivery. */
++		vcpu_info->evtchn_upcall_pending = 0;
++
++		/* Nested invocations bail immediately. */
++		if (unlikely(per_cpu(upcall_count, cpu)++))
++			return;
++
++#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
++		/* Clear master flag /before/ clearing selector flag. */
++		rmb();
++#endif
++		l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
++		while (l1 != 0) {
++			l1i = __ffs(l1);
++			l1 &= ~(1UL << l1i);
++
++			while ((l2 = active_evtchns(cpu, s, l1i)) != 0) {
++				l2i = __ffs(l2);
++
++				port = (l1i * BITS_PER_LONG) + l2i;
++				if ((irq = evtchn_to_irq[port]) != -1)
++					do_IRQ(irq, regs);
++				else {
++					exit_idle();
++					evtchn_device_upcall(port);
++				}
++			}
++		}
++
++		/* If there were nested callbacks then we have more to do. */
++		count = per_cpu(upcall_count, cpu);
++		per_cpu(upcall_count, cpu) = 0;
++	} while (unlikely(count != 1));
++}
++
++static int find_unbound_irq(void)
++{
++	static int warned;
++	int dynirq, irq;
++
++	for (dynirq = 0; dynirq < NR_DYNIRQS; dynirq++) {
++		irq = dynirq_to_irq(dynirq);
++		if (irq_bindcount[irq] == 0)
++			return irq;
++	}
++
++	if (!warned) {
++		warned = 1;
++		printk(KERN_WARNING "No available IRQ to bind to: "
++		       "increase NR_DYNIRQS.\n");
++	}
++
++	return -ENOSPC;
++}
++
++static int bind_caller_port_to_irq(unsigned int caller_port)
++{
++	int irq;
++
++	spin_lock(&irq_mapping_update_lock);
++
++	if ((irq = evtchn_to_irq[caller_port]) == -1) {
++		if ((irq = find_unbound_irq()) < 0)
++			goto out;
++
++		evtchn_to_irq[caller_port] = irq;
++		irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
++	}
++
++	irq_bindcount[irq]++;
++
++ out:
++	spin_unlock(&irq_mapping_update_lock);
++	return irq;
++}
++
++static int bind_local_port_to_irq(unsigned int local_port)
++{
++	int irq;
++
++	spin_lock(&irq_mapping_update_lock);
++
++	BUG_ON(evtchn_to_irq[local_port] != -1);
++
++	if ((irq = find_unbound_irq()) < 0) {
++		struct evtchn_close close = { .port = local_port };
++		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
++			BUG();
++		goto out;
++	}
++
++	evtchn_to_irq[local_port] = irq;
++	irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
++	irq_bindcount[irq]++;
++
++ out:
++	spin_unlock(&irq_mapping_update_lock);
++	return irq;
++}
++
++static int bind_listening_port_to_irq(unsigned int remote_domain)
++{
++	struct evtchn_alloc_unbound alloc_unbound;
++	int err;
++
++	alloc_unbound.dom        = DOMID_SELF;
++	alloc_unbound.remote_dom = remote_domain;
++
++	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
++					  &alloc_unbound);
++
++	return err ? : bind_local_port_to_irq(alloc_unbound.port);
++}
++
++static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
++					  unsigned int remote_port)
++{
++	struct evtchn_bind_interdomain bind_interdomain;
++	int err;
++
++	bind_interdomain.remote_dom  = remote_domain;
++	bind_interdomain.remote_port = remote_port;
++
++	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
++					  &bind_interdomain);
++
++	return err ? : bind_local_port_to_irq(bind_interdomain.local_port);
++}
++
++static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
++{
++	struct evtchn_bind_virq bind_virq;
++	int evtchn, irq;
++
++	spin_lock(&irq_mapping_update_lock);
++
++	if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
++		if ((irq = find_unbound_irq()) < 0)
++			goto out;
++
++		bind_virq.virq = virq;
++		bind_virq.vcpu = cpu;
++		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
++						&bind_virq) != 0)
++			BUG();
++		evtchn = bind_virq.port;
++
++		evtchn_to_irq[evtchn] = irq;
++		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
++
++		per_cpu(virq_to_irq, cpu)[virq] = irq;
++
++		bind_evtchn_to_cpu(evtchn, cpu);
++	}
++
++	irq_bindcount[irq]++;
++
++ out:
++	spin_unlock(&irq_mapping_update_lock);
++	return irq;
++}
++
++static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
++{
++	struct evtchn_bind_ipi bind_ipi;
++	int evtchn, irq;
++
++	spin_lock(&irq_mapping_update_lock);
++
++	if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
++		if ((irq = find_unbound_irq()) < 0)
++			goto out;
++
++		bind_ipi.vcpu = cpu;
++		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
++						&bind_ipi) != 0)
++			BUG();
++		evtchn = bind_ipi.port;
++
++		evtchn_to_irq[evtchn] = irq;
++		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
++
++		per_cpu(ipi_to_irq, cpu)[ipi] = irq;
++
++		bind_evtchn_to_cpu(evtchn, cpu);
++	}
++
++	irq_bindcount[irq]++;
++
++ out:
++	spin_unlock(&irq_mapping_update_lock);
++	return irq;
++}
++
++static void unbind_from_irq(unsigned int irq)
++{
++	struct evtchn_close close;
++	int cpu, evtchn = evtchn_from_irq(irq);
++
++	spin_lock(&irq_mapping_update_lock);
++
++	if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
++		close.port = evtchn;
++		if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
++		    HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
++			BUG();
++
++		switch (type_from_irq(irq)) {
++		case IRQT_VIRQ:
++			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
++				[index_from_irq(irq)] = -1;
++			break;
++		case IRQT_IPI:
++			per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
++				[index_from_irq(irq)] = -1;
++			break;
++		default:
++			break;
++		}
++
++		/* Closed ports are implicitly re-bound to VCPU0. */
++		bind_evtchn_to_cpu(evtchn, 0);
++
++		evtchn_to_irq[evtchn] = -1;
++		irq_info[irq] = IRQ_UNBOUND;
++
++		/* Zap stats across IRQ changes of use. */
++		for_each_possible_cpu(cpu)
++			kstat_cpu(cpu).irqs[irq] = 0;
++	}
++
++	spin_unlock(&irq_mapping_update_lock);
++}
++
++int bind_caller_port_to_irqhandler(
++	unsigned int caller_port,
++	irqreturn_t (*handler)(int, void *, struct pt_regs *),
++	unsigned long irqflags,
++	const char *devname,
++	void *dev_id)
++{
++	int irq, retval;
++
++	irq = bind_caller_port_to_irq(caller_port);
++	if (irq < 0)
++		return irq;
++
++	retval = request_irq(irq, handler, irqflags, devname, dev_id);
++	if (retval != 0) {
++		unbind_from_irq(irq);
++		return retval;
++	}
++
++	return irq;
++}
++EXPORT_SYMBOL_GPL(bind_caller_port_to_irqhandler);
++
++int bind_listening_port_to_irqhandler(
++	unsigned int remote_domain,
++	irqreturn_t (*handler)(int, void *, struct pt_regs *),
++	unsigned long irqflags,
++	const char *devname,
++	void *dev_id)
++{
++	int irq, retval;
++
++	irq = bind_listening_port_to_irq(remote_domain);
++	if (irq < 0)
++		return irq;
++
++	retval = request_irq(irq, handler, irqflags, devname, dev_id);
++	if (retval != 0) {
++		unbind_from_irq(irq);
++		return retval;
++	}
++
++	return irq;
++}
++EXPORT_SYMBOL_GPL(bind_listening_port_to_irqhandler);
++
++int bind_interdomain_evtchn_to_irqhandler(
++	unsigned int remote_domain,
++	unsigned int remote_port,
++	irqreturn_t (*handler)(int, void *, struct pt_regs *),
++	unsigned long irqflags,
++	const char *devname,
++	void *dev_id)
++{
++	int irq, retval;
++
++	irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
++	if (irq < 0)
++		return irq;
++
++	retval = request_irq(irq, handler, irqflags, devname, dev_id);
++	if (retval != 0) {
++		unbind_from_irq(irq);
++		return retval;
++	}
++
++	return irq;
++}
++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
++
++int bind_virq_to_irqhandler(
++	unsigned int virq,
++	unsigned int cpu,
++	irqreturn_t (*handler)(int, void *, struct pt_regs *),
++	unsigned long irqflags,
++	const char *devname,
++	void *dev_id)
++{
++	int irq, retval;
++
++	irq = bind_virq_to_irq(virq, cpu);
++	if (irq < 0)
++		return irq;
++
++	retval = request_irq(irq, handler, irqflags, devname, dev_id);
++	if (retval != 0) {
++		unbind_from_irq(irq);
++		return retval;
++	}
++
++	return irq;
++}
++EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
++
++int bind_ipi_to_irqhandler(
++	unsigned int ipi,
++	unsigned int cpu,
++	irqreturn_t (*handler)(int, void *, struct pt_regs *),
++	unsigned long irqflags,
++	const char *devname,
++	void *dev_id)
++{
++	int irq, retval;
++
++	irq = bind_ipi_to_irq(ipi, cpu);
++	if (irq < 0)
++		return irq;
++
++	retval = request_irq(irq, handler, irqflags, devname, dev_id);
++	if (retval != 0) {
++		unbind_from_irq(irq);
++		return retval;
++	}
++
++	return irq;
++}
++EXPORT_SYMBOL_GPL(bind_ipi_to_irqhandler);
++
++void unbind_from_irqhandler(unsigned int irq, void *dev_id)
++{
++	free_irq(irq, dev_id);
++	unbind_from_irq(irq);
++}
++EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
++
++#ifdef CONFIG_SMP
++/* Rebind an evtchn so that it gets delivered to a specific cpu */
++static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
++{
++	struct evtchn_bind_vcpu bind_vcpu;
++	int evtchn = evtchn_from_irq(irq);
++
++	if (!VALID_EVTCHN(evtchn))
++		return;
++
++	/* Send future instances of this interrupt to other vcpu. */
++	bind_vcpu.port = evtchn;
++	bind_vcpu.vcpu = tcpu;
++
++	/*
++	 * If this fails, it usually just indicates that we're dealing with a 
++	 * virq or IPI channel, which don't actually need to be rebound. Ignore
++	 * it, but don't do the xenlinux-level rebind in that case.
++	 */
++	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
++		bind_evtchn_to_cpu(evtchn, tcpu);
++}
++
++static void set_affinity_irq(unsigned irq, cpumask_t dest)
++{
++	unsigned tcpu = first_cpu(dest);
++	rebind_irq_to_cpu(irq, tcpu);
++}
++#endif
++
++int resend_irq_on_evtchn(unsigned int irq)
++{
++	int masked, evtchn = evtchn_from_irq(irq);
++	shared_info_t *s = HYPERVISOR_shared_info;
++
++	if (!VALID_EVTCHN(evtchn))
++		return 1;
++
++	masked = synch_test_and_set_bit(evtchn, s->evtchn_mask);
++	synch_set_bit(evtchn, s->evtchn_pending);
++	if (!masked)
++		unmask_evtchn(evtchn);
++
++	return 1;
++}
++
++/*
++ * Interface to generic handling in irq.c
++ */
++
++static unsigned int startup_dynirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		unmask_evtchn(evtchn);
++	return 0;
++}
++
++static void shutdown_dynirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		mask_evtchn(evtchn);
++}
++
++static void enable_dynirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		unmask_evtchn(evtchn);
++}
++
++static void disable_dynirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		mask_evtchn(evtchn);
++}
++
++static void ack_dynirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	move_native_irq(irq);
++
++	if (VALID_EVTCHN(evtchn)) {
++		mask_evtchn(evtchn);
++		clear_evtchn(evtchn);
++	}
++}
++
++static void end_dynirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn) && !(irq_desc[irq].status & IRQ_DISABLED))
++		unmask_evtchn(evtchn);
++}
++
++static struct hw_interrupt_type dynirq_type = {
++	.typename = "Dynamic-irq",
++	.startup  = startup_dynirq,
++	.shutdown = shutdown_dynirq,
++	.enable   = enable_dynirq,
++	.disable  = disable_dynirq,
++	.ack      = ack_dynirq,
++	.end      = end_dynirq,
++#ifdef CONFIG_SMP
++	.set_affinity = set_affinity_irq,
++#endif
++	.retrigger = resend_irq_on_evtchn,
++};
++
++static inline void pirq_unmask_notify(int pirq)
++{
++	struct physdev_eoi eoi = { .irq = pirq };
++	if (unlikely(test_bit(pirq, pirq_needs_eoi)))
++		(void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
++}
++
++static inline void pirq_query_unmask(int pirq)
++{
++	struct physdev_irq_status_query irq_status;
++	irq_status.irq = pirq;
++	(void)HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
++	clear_bit(pirq, pirq_needs_eoi);
++	if (irq_status.flags & XENIRQSTAT_needs_eoi)
++		set_bit(pirq, pirq_needs_eoi);
++}
++
++/*
++ * On startup, if there is no action associated with the IRQ then we are
++ * probing. In this case we should not share with others as it will confuse us.
++ */
++#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
++
++static unsigned int startup_pirq(unsigned int irq)
++{
++	struct evtchn_bind_pirq bind_pirq;
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		goto out;
++
++	bind_pirq.pirq  = irq;
++	/* NB. We are happy to share unless we are probing. */
++	bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
++	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
++		if (!probing_irq(irq))
++			printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
++			       irq);
++		return 0;
++	}
++	evtchn = bind_pirq.port;
++
++	pirq_query_unmask(irq_to_pirq(irq));
++
++	evtchn_to_irq[evtchn] = irq;
++	bind_evtchn_to_cpu(evtchn, 0);
++	irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, evtchn);
++
++ out:
++	unmask_evtchn(evtchn);
++	pirq_unmask_notify(irq_to_pirq(irq));
++
++	return 0;
++}
++
++static void shutdown_pirq(unsigned int irq)
++{
++	struct evtchn_close close;
++	int evtchn = evtchn_from_irq(irq);
++
++	if (!VALID_EVTCHN(evtchn))
++		return;
++
++	mask_evtchn(evtchn);
++
++	close.port = evtchn;
++	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
++		BUG();
++
++	bind_evtchn_to_cpu(evtchn, 0);
++	evtchn_to_irq[evtchn] = -1;
++	irq_info[irq] = IRQ_UNBOUND;
++}
++
++static void enable_pirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn)) {
++		unmask_evtchn(evtchn);
++		pirq_unmask_notify(irq_to_pirq(irq));
++	}
++}
++
++static void disable_pirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		mask_evtchn(evtchn);
++}
++
++static void ack_pirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	move_native_irq(irq);
++
++	if (VALID_EVTCHN(evtchn)) {
++		mask_evtchn(evtchn);
++		clear_evtchn(evtchn);
++	}
++}
++
++static void end_pirq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn) && !(irq_desc[irq].status & IRQ_DISABLED)) {
++		unmask_evtchn(evtchn);
++		pirq_unmask_notify(irq_to_pirq(irq));
++	}
++}
++
++static struct hw_interrupt_type pirq_type = {
++	.typename = "Phys-irq",
++	.startup  = startup_pirq,
++	.shutdown = shutdown_pirq,
++	.enable   = enable_pirq,
++	.disable  = disable_pirq,
++	.ack      = ack_pirq,
++	.end      = end_pirq,
++#ifdef CONFIG_SMP
++	.set_affinity = set_affinity_irq,
++#endif
++	.retrigger = resend_irq_on_evtchn,
++};
++
++int irq_ignore_unhandled(unsigned int irq)
++{
++	struct physdev_irq_status_query irq_status = { .irq = irq };
++
++	if (!is_running_on_xen())
++		return 0;
++
++	(void)HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
++	return !!(irq_status.flags & XENIRQSTAT_shared);
++}
++
++void notify_remote_via_irq(int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		notify_remote_via_evtchn(evtchn);
++}
++EXPORT_SYMBOL_GPL(notify_remote_via_irq);
++
++int irq_to_evtchn_port(int irq)
++{
++	return evtchn_from_irq(irq);
++}
++EXPORT_SYMBOL_GPL(irq_to_evtchn_port);
++
++void mask_evtchn(int port)
++{
++	shared_info_t *s = HYPERVISOR_shared_info;
++	synch_set_bit(port, s->evtchn_mask);
++}
++EXPORT_SYMBOL_GPL(mask_evtchn);
++
++void unmask_evtchn(int port)
++{
++	shared_info_t *s = HYPERVISOR_shared_info;
++	unsigned int cpu = smp_processor_id();
++	vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
++
++	BUG_ON(!irqs_disabled());
++
++	/* Slow path (hypercall) if this is a non-local port. */
++	if (unlikely(cpu != cpu_from_evtchn(port))) {
++		struct evtchn_unmask unmask = { .port = port };
++		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
++		return;
++	}
++
++	synch_clear_bit(port, s->evtchn_mask);
++
++	/* Did we miss an interrupt 'edge'? Re-fire if so. */
++	if (synch_test_bit(port, s->evtchn_pending) &&
++	    !synch_test_and_set_bit(port / BITS_PER_LONG,
++				    &vcpu_info->evtchn_pending_sel))
++		vcpu_info->evtchn_upcall_pending = 1;
++}
++EXPORT_SYMBOL_GPL(unmask_evtchn);
++
++static void restore_cpu_virqs(int cpu)
++{
++	struct evtchn_bind_virq bind_virq;
++	int virq, irq, evtchn;
++
++	for (virq = 0; virq < NR_VIRQS; virq++) {
++		if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
++			continue;
++
++		BUG_ON(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0));
++
++		/* Get a new binding from Xen. */
++		bind_virq.virq = virq;
++		bind_virq.vcpu = cpu;
++		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
++						&bind_virq) != 0)
++			BUG();
++		evtchn = bind_virq.port;
++
++		/* Record the new mapping. */
++		evtchn_to_irq[evtchn] = irq;
++		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
++		bind_evtchn_to_cpu(evtchn, cpu);
++
++		/* Ready for use. */
++		unmask_evtchn(evtchn);
++	}
++}
++
++static void restore_cpu_ipis(int cpu)
++{
++	struct evtchn_bind_ipi bind_ipi;
++	int ipi, irq, evtchn;
++
++	for (ipi = 0; ipi < NR_IPIS; ipi++) {
++		if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
++			continue;
++
++		BUG_ON(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0));
++
++		/* Get a new binding from Xen. */
++		bind_ipi.vcpu = cpu;
++		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
++						&bind_ipi) != 0)
++			BUG();
++		evtchn = bind_ipi.port;
++
++		/* Record the new mapping. */
++		evtchn_to_irq[evtchn] = irq;
++		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
++		bind_evtchn_to_cpu(evtchn, cpu);
++
++		/* Ready for use. */
++		unmask_evtchn(evtchn);
++
++	}
++}
++
++void irq_resume(void)
++{
++	int cpu, pirq, irq, evtchn;
++
++	init_evtchn_cpu_bindings();
++
++	/* New event-channel space is not 'live' yet. */
++	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
++		mask_evtchn(evtchn);
++
++	/* Check that no PIRQs are still bound. */
++	for (pirq = 0; pirq < NR_PIRQS; pirq++)
++		BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
++
++	/* No IRQ <-> event-channel mappings. */
++	for (irq = 0; irq < NR_IRQS; irq++)
++		irq_info[irq] &= ~0xFFFF; /* zap event-channel binding */
++	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
++		evtchn_to_irq[evtchn] = -1;
++
++	for_each_possible_cpu(cpu) {
++		restore_cpu_virqs(cpu);
++		restore_cpu_ipis(cpu);
++	}
++
++}
++
++void __init xen_init_IRQ(void)
++{
++	int i;
++
++	init_evtchn_cpu_bindings();
++
++	/* No event channels are 'live' right now. */
++	for (i = 0; i < NR_EVENT_CHANNELS; i++)
++		mask_evtchn(i);
++
++	/* No IRQ -> event-channel mappings. */
++	for (i = 0; i < NR_IRQS; i++)
++		irq_info[i] = IRQ_UNBOUND;
++
++	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
++	for (i = 0; i < NR_DYNIRQS; i++) {
++		irq_bindcount[dynirq_to_irq(i)] = 0;
++
++		irq_desc[dynirq_to_irq(i)].status = IRQ_DISABLED;
++		irq_desc[dynirq_to_irq(i)].action = NULL;
++		irq_desc[dynirq_to_irq(i)].depth = 1;
++		irq_desc[dynirq_to_irq(i)].chip = &dynirq_type;
++	}
++
++	/* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
++	for (i = 0; i < NR_PIRQS; i++) {
++		irq_bindcount[pirq_to_irq(i)] = 1;
++
++#ifdef RTC_IRQ
++		/* If not domain 0, force our RTC driver to fail its probe. */
++		if ((i == RTC_IRQ) && !is_initial_xendomain())
++			continue;
++#endif
++
++		irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED;
++		irq_desc[pirq_to_irq(i)].action = NULL;
++		irq_desc[pirq_to_irq(i)].depth = 1;
++		irq_desc[pirq_to_irq(i)].chip = &pirq_type;
++	}
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/features.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,34 @@
++/******************************************************************************
++ * features.c
++ *
++ * Xen feature flags.
++ *
++ * Copyright (c) 2006, Ian Campbell, XenSource Inc.
++ */
++#include <linux/types.h>
++#include <linux/cache.h>
++#include <linux/module.h>
++#include <asm/hypervisor.h>
++#include <xen/features.h>
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
++/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
++EXPORT_SYMBOL(xen_features);
++
++void setup_xen_features(void)
++{
++	xen_feature_info_t fi;
++	int i, j;
++
++	for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
++		fi.submap_idx = i;
++		if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
++			break;
++		for (j=0; j<32; j++)
++			xen_features[i*32+j] = !!(fi.submap & 1<<j);
++	}
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/gnttab.c	2007-08-27 14:02:10.000000000 -0400
+@@ -0,0 +1,631 @@
++/******************************************************************************
++ * gnttab.c
++ *
++ * Granting foreign access to our memory reservation.
++ *
++ * Copyright (c) 2005-2006, Christopher Clark
++ * Copyright (c) 2004-2005, K A Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/mm.h>
++#include <xen/interface/xen.h>
++#include <xen/gnttab.h>
++#include <asm/pgtable.h>
++#include <asm/uaccess.h>
++#include <asm/synch_bitops.h>
++#include <asm/io.h>
++#include <xen/interface/memory.h>
++#include <xen/driver_util.h>
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++/* External tools reserve first few grant table entries. */
++#define NR_RESERVED_ENTRIES 8
++#define GNTTAB_LIST_END 0xffffffff
++#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
++
++static grant_ref_t **gnttab_list;
++static unsigned int nr_grant_frames;
++static unsigned int boot_max_nr_grant_frames;
++static int gnttab_free_count;
++static grant_ref_t gnttab_free_head;
++static DEFINE_SPINLOCK(gnttab_list_lock);
++
++static struct grant_entry *shared;
++
++static struct gnttab_free_callback *gnttab_free_callback_list;
++
++static int gnttab_expand(unsigned int req_entries);
++
++#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
++#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
++
++static int get_free_entries(int count)
++{
++	unsigned long flags;
++	int ref, rc;
++	grant_ref_t head;
++
++	spin_lock_irqsave(&gnttab_list_lock, flags);
++
++	if ((gnttab_free_count < count) &&
++	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
++		spin_unlock_irqrestore(&gnttab_list_lock, flags);
++		return rc;
++	}
++
++	ref = head = gnttab_free_head;
++	gnttab_free_count -= count;
++	while (count-- > 1)
++		head = gnttab_entry(head);
++ 	gnttab_free_head = gnttab_entry(head);
++	gnttab_entry(head) = GNTTAB_LIST_END;
++
++	spin_unlock_irqrestore(&gnttab_list_lock, flags);
++
++	return ref;
++}
++
++#define get_free_entry() get_free_entries(1)
++
++static void do_free_callbacks(void)
++{
++	struct gnttab_free_callback *callback, *next;
++
++	callback = gnttab_free_callback_list;
++	gnttab_free_callback_list = NULL;
++
++	while (callback != NULL) {
++		next = callback->next;
++		if (gnttab_free_count >= callback->count) {
++			callback->next = NULL;
++			callback->fn(callback->arg);
++		} else {
++			callback->next = gnttab_free_callback_list;
++			gnttab_free_callback_list = callback;
++		}
++		callback = next;
++	}
++}
++
++static inline void check_free_callbacks(void)
++{
++	if (unlikely(gnttab_free_callback_list))
++		do_free_callbacks();
++}
++
++static void put_free_entry(grant_ref_t ref)
++{
++	unsigned long flags;
++	spin_lock_irqsave(&gnttab_list_lock, flags);
++	gnttab_entry(ref) = gnttab_free_head;
++	gnttab_free_head = ref;
++	gnttab_free_count++;
++	check_free_callbacks();
++	spin_unlock_irqrestore(&gnttab_list_lock, flags);
++}
++
++/*
++ * Public grant-issuing interface functions
++ */
++
++int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
++				int readonly)
++{
++	int ref;
++
++	if (unlikely((ref = get_free_entry()) < 0))
++		return -ENOSPC;
++
++	shared[ref].frame = frame;
++	shared[ref].domid = domid;
++	wmb();
++	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
++
++	return ref;
++}
++EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
++
++void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
++				     unsigned long frame, int readonly)
++{
++	shared[ref].frame = frame;
++	shared[ref].domid = domid;
++	wmb();
++	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
++}
++EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
++
++
++int gnttab_query_foreign_access(grant_ref_t ref)
++{
++	u16 nflags;
++
++	nflags = shared[ref].flags;
++
++	return (nflags & (GTF_reading|GTF_writing));
++}
++EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
++
++int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
++{
++	u16 flags, nflags;
++
++	nflags = shared[ref].flags;
++	do {
++		if ((flags = nflags) & (GTF_reading|GTF_writing)) {
++			printk(KERN_ALERT "WARNING: g.e. still in use!\n");
++			return 0;
++		}
++	} while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) !=
++		 flags);
++
++	return 1;
++}
++EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
++
++void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
++			       unsigned long page)
++{
++	if (gnttab_end_foreign_access_ref(ref, readonly)) {
++		put_free_entry(ref);
++		if (page != 0)
++			free_page(page);
++	} else {
++		/* XXX This needs to be fixed so that the ref and page are
++		   placed on a list to be freed up later. */
++		printk(KERN_WARNING
++		       "WARNING: leaking g.e. and page still in use!\n");
++	}
++}
++EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
++
++int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
++{
++	int ref;
++
++	if (unlikely((ref = get_free_entry()) < 0))
++		return -ENOSPC;
++	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
++
++	return ref;
++}
++EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
++
++void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
++				       unsigned long pfn)
++{
++	shared[ref].frame = pfn;
++	shared[ref].domid = domid;
++	wmb();
++	shared[ref].flags = GTF_accept_transfer;
++}
++EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
++
++unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
++{
++	unsigned long frame;
++	u16           flags;
++
++	/*
++	 * If a transfer is not even yet started, try to reclaim the grant
++	 * reference and return failure (== 0).
++	 */
++	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
++		if (synch_cmpxchg_subword(&shared[ref].flags, flags, 0) == flags)
++			return 0;
++		cpu_relax();
++	}
++
++	/* If a transfer is in progress then wait until it is completed. */
++	while (!(flags & GTF_transfer_completed)) {
++		flags = shared[ref].flags;
++		cpu_relax();
++	}
++
++	/* Read the frame number /after/ reading completion status. */
++	rmb();
++	frame = shared[ref].frame;
++	BUG_ON(frame == 0);
++
++	return frame;
++}
++EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
++
++unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
++{
++	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
++	put_free_entry(ref);
++	return frame;
++}
++EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
++
++void gnttab_free_grant_reference(grant_ref_t ref)
++{
++	put_free_entry(ref);
++}
++EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
++
++void gnttab_free_grant_references(grant_ref_t head)
++{
++	grant_ref_t ref;
++	unsigned long flags;
++	int count = 1;
++	if (head == GNTTAB_LIST_END)
++		return;
++	spin_lock_irqsave(&gnttab_list_lock, flags);
++	ref = head;
++	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
++		ref = gnttab_entry(ref);
++		count++;
++	}
++	gnttab_entry(ref) = gnttab_free_head;
++	gnttab_free_head = head;
++	gnttab_free_count += count;
++	check_free_callbacks();
++	spin_unlock_irqrestore(&gnttab_list_lock, flags);
++}
++EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
++
++int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
++{
++	int h = get_free_entries(count);
++
++	if (h < 0)
++		return -ENOSPC;
++
++	*head = h;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
++
++int gnttab_empty_grant_references(const grant_ref_t *private_head)
++{
++	return (*private_head == GNTTAB_LIST_END);
++}
++EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
++
++int gnttab_claim_grant_reference(grant_ref_t *private_head)
++{
++	grant_ref_t g = *private_head;
++	if (unlikely(g == GNTTAB_LIST_END))
++		return -ENOSPC;
++	*private_head = gnttab_entry(g);
++	return g;
++}
++EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
++
++void gnttab_release_grant_reference(grant_ref_t *private_head,
++				    grant_ref_t release)
++{
++	gnttab_entry(release) = *private_head;
++	*private_head = release;
++}
++EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
++
++void gnttab_request_free_callback(struct gnttab_free_callback *callback,
++				  void (*fn)(void *), void *arg, u16 count)
++{
++	unsigned long flags;
++	spin_lock_irqsave(&gnttab_list_lock, flags);
++	if (callback->next)
++		goto out;
++	callback->fn = fn;
++	callback->arg = arg;
++	callback->count = count;
++	callback->next = gnttab_free_callback_list;
++	gnttab_free_callback_list = callback;
++	check_free_callbacks();
++out:
++	spin_unlock_irqrestore(&gnttab_list_lock, flags);
++}
++EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
++
++void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
++{
++	struct gnttab_free_callback **pcb;
++	unsigned long flags;
++
++	spin_lock_irqsave(&gnttab_list_lock, flags);
++	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
++		if (*pcb == callback) {
++			*pcb = callback->next;
++			break;
++		}
++	}
++	spin_unlock_irqrestore(&gnttab_list_lock, flags);
++}
++EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
++
++static int grow_gnttab_list(unsigned int more_frames)
++{
++	unsigned int new_nr_grant_frames, extra_entries, i;
++
++	new_nr_grant_frames = nr_grant_frames + more_frames;
++	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
++
++	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
++	{
++		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
++		if (!gnttab_list[i])
++			goto grow_nomem;
++	}
++
++
++	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
++	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
++		gnttab_entry(i) = i + 1;
++
++	gnttab_entry(i) = gnttab_free_head;
++	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
++	gnttab_free_count += extra_entries;
++
++	nr_grant_frames = new_nr_grant_frames;
++
++	check_free_callbacks();
++
++	return 0;
++	
++grow_nomem:
++	for ( ; i >= nr_grant_frames; i--)
++		free_page((unsigned long) gnttab_list[i]);
++	return -ENOMEM;
++}
++
++static unsigned int __max_nr_grant_frames(void)
++{
++	struct gnttab_query_size query;
++	int rc;
++
++	query.dom = DOMID_SELF;
++
++	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
++	if ((rc < 0) || (query.status != GNTST_okay))
++		return 4; /* Legacy max supported number of frames */
++
++	return query.max_nr_frames;
++}
++
++static inline unsigned int max_nr_grant_frames(void)
++{
++	unsigned int xen_max = __max_nr_grant_frames();
++
++	if (xen_max > boot_max_nr_grant_frames)
++		return boot_max_nr_grant_frames;
++	return xen_max;
++}
++
++#ifdef CONFIG_XEN
++
++#ifndef __ia64__
++static int map_pte_fn(pte_t *pte, struct page *pmd_page,
++		      unsigned long addr, void *data)
++{
++	unsigned long **frames = (unsigned long **)data;
++
++	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
++	(*frames)++;
++	return 0;
++}
++
++static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
++			unsigned long addr, void *data)
++{
++
++	set_pte_at(&init_mm, addr, pte, __pte(0));
++	return 0;
++}
++#endif
++
++static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
++{
++	struct gnttab_setup_table setup;
++	unsigned long *frames;
++	unsigned int nr_gframes = end_idx + 1;
++	int rc;
++
++	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
++	if (!frames)
++		return -ENOMEM;
++
++	setup.dom        = DOMID_SELF;
++	setup.nr_frames  = nr_gframes;
++	set_xen_guest_handle(setup.frame_list, frames);
++
++	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
++	if (rc == -ENOSYS) {
++		kfree(frames);
++		return -ENOSYS;
++	}
++
++	BUG_ON(rc || setup.status);
++
++#ifndef __ia64__
++	if (shared == NULL) {
++		struct vm_struct *area;
++		area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
++		BUG_ON(area == NULL);
++		shared = area->addr;
++	}
++	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
++				 PAGE_SIZE * nr_gframes,
++				 map_pte_fn, &frames);
++	BUG_ON(rc);
++        frames -= nr_gframes; /* adjust after map_pte_fn() */
++#else
++	shared = __va(frames[0] << PAGE_SHIFT);
++#endif
++
++	kfree(frames);
++
++	return 0;
++}
++
++int gnttab_resume(void)
++{
++	if (max_nr_grant_frames() < nr_grant_frames)
++		return -ENOSYS;
++	return gnttab_map(0, nr_grant_frames - 1);
++}
++
++int gnttab_suspend(void)
++{
++#ifndef __ia64__
++	apply_to_page_range(&init_mm, (unsigned long)shared,
++			    PAGE_SIZE * nr_grant_frames,
++			    unmap_pte_fn, NULL);
++#endif
++	return 0;
++}
++
++#else /* !CONFIG_XEN */
++
++#include <platform-pci.h>
++
++static unsigned long resume_frames;
++
++static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
++{
++	struct xen_add_to_physmap xatp;
++	unsigned int i = end_idx;
++
++	/* Loop backwards, so that the first hypercall has the largest index,
++	 * ensuring that the table will grow only once.
++	 */
++	do {
++		xatp.domid = DOMID_SELF;
++		xatp.idx = i;
++		xatp.space = XENMAPSPACE_grant_table;
++		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
++		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
++			BUG();
++	} while (i-- > start_idx);
++
++	return 0;
++}
++
++int gnttab_resume(void)
++{
++	unsigned int max_nr_gframes, nr_gframes;
++
++	nr_gframes = nr_grant_frames;
++	max_nr_gframes = max_nr_grant_frames();
++	if (max_nr_gframes < nr_gframes)
++		return -ENOSYS;
++
++	if (!resume_frames) {
++		resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
++		shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
++		if (shared == NULL) {
++			printk("error to ioremap gnttab share frames\n");
++			return -1;
++		}
++	}
++
++	gnttab_map(0, nr_gframes - 1);
++
++	return 0;
++}
++
++#endif /* !CONFIG_XEN */
++
++static int gnttab_expand(unsigned int req_entries)
++{
++	int rc;
++	unsigned int cur, extra;
++
++	cur = nr_grant_frames;
++	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
++		 GREFS_PER_GRANT_FRAME);
++	if (cur + extra > max_nr_grant_frames())
++		return -ENOSPC;
++
++	if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
++		rc = grow_gnttab_list(extra);
++
++	return rc;
++}
++
++int __devinit gnttab_init(void)
++{
++	int i;
++	unsigned int max_nr_glist_frames;
++	unsigned int nr_init_grefs;
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	nr_grant_frames = 1;
++	boot_max_nr_grant_frames = __max_nr_grant_frames();
++
++	/* Determine the maximum number of frames required for the
++	 * grant reference free list on the current hypervisor.
++	 */
++	max_nr_glist_frames = (boot_max_nr_grant_frames *
++			       GREFS_PER_GRANT_FRAME /
++			       (PAGE_SIZE / sizeof(grant_ref_t)));
++
++	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
++			      GFP_KERNEL);
++	if (gnttab_list == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < nr_grant_frames; i++) {
++		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
++		if (gnttab_list[i] == NULL)
++			goto ini_nomem;
++	}
++
++	if (gnttab_resume() < 0)
++		return -ENODEV;
++
++	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
++
++	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
++		gnttab_entry(i) = i + 1;
++
++	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
++	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
++	gnttab_free_head  = NR_RESERVED_ENTRIES;
++
++	return 0;
++
++ ini_nomem:
++	for (i--; i >= 0; i--)
++		free_page((unsigned long)gnttab_list[i]);
++	kfree(gnttab_list);
++	return -ENOMEM;
++}
++
++#ifdef CONFIG_XEN
++core_initcall(gnttab_init);
++#endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/hypervisor_sysfs.c	2007-08-27 14:02:04.000000000 -0400
+@@ -0,0 +1,59 @@
++/*
++ *  copyright (c) 2006 IBM Corporation
++ *  Authored by: Mike D. Day <ncmike@us.ibm.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2 as
++ *  published by the Free Software Foundation.
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/kobject.h>
++#include <xen/hypervisor_sysfs.h>
++
++decl_subsys(hypervisor, NULL, NULL);
++
++static ssize_t hyp_sysfs_show(struct kobject *kobj,
++			      struct attribute *attr,
++			      char *buffer)
++{
++	struct hyp_sysfs_attr *hyp_attr;
++	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
++	if (hyp_attr->show)
++		return hyp_attr->show(hyp_attr, buffer);
++	return 0;
++}
++
++static ssize_t hyp_sysfs_store(struct kobject *kobj,
++			       struct attribute *attr,
++			       const char *buffer,
++			       size_t len)
++{
++	struct hyp_sysfs_attr *hyp_attr;
++	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
++	if (hyp_attr->store)
++		return hyp_attr->store(hyp_attr, buffer, len);
++	return 0;
++}
++
++struct sysfs_ops hyp_sysfs_ops = {
++	.show = hyp_sysfs_show,
++	.store = hyp_sysfs_store,
++};
++
++static struct kobj_type hyp_sysfs_kobj_type = {
++	.sysfs_ops = &hyp_sysfs_ops,
++};
++
++static int __init hypervisor_subsys_init(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
++	return subsystem_register(&hypervisor_subsys);
++}
++
++device_initcall(hypervisor_subsys_init);
++EXPORT_SYMBOL_GPL(hypervisor_subsys);
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/machine_kexec.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,189 @@
++/*
++ * drivers/xen/core/machine_kexec.c 
++ * handle transition of Linux booting another kernel
++ */
++
++#include <linux/kexec.h>
++#include <xen/interface/kexec.h>
++#include <linux/mm.h>
++#include <linux/bootmem.h>
++
++extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, 
++					 struct kimage *image);
++
++int xen_max_nr_phys_cpus;
++struct resource xen_hypervisor_res;
++struct resource *xen_phys_cpus;
++
++void xen_machine_kexec_setup_resources(void)
++{
++	xen_kexec_range_t range;
++	struct resource *res;
++	int k = 0;
++
++	if (!is_initial_xendomain())
++		return;
++
++	/* determine maximum number of physical cpus */
++
++	while (1) {
++		memset(&range, 0, sizeof(range));
++		range.range = KEXEC_RANGE_MA_CPU;
++		range.nr = k;
++
++		if(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
++			break;
++
++		k++;
++	}
++
++	if (k == 0)
++		return;
++
++	xen_max_nr_phys_cpus = k;
++
++	/* allocate xen_phys_cpus */
++
++	xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource));
++	BUG_ON(xen_phys_cpus == NULL);
++
++	/* fill in xen_phys_cpus with per-cpu crash note information */
++
++	for (k = 0; k < xen_max_nr_phys_cpus; k++) {
++		memset(&range, 0, sizeof(range));
++		range.range = KEXEC_RANGE_MA_CPU;
++		range.nr = k;
++
++		if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
++			goto err;
++
++		res = xen_phys_cpus + k;
++
++		memset(res, 0, sizeof(*res));
++		res->name = "Crash note";
++		res->start = range.start;
++		res->end = range.start + range.size - 1;
++		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
++	}
++
++	/* fill in xen_hypervisor_res with hypervisor machine address range */
++
++	memset(&range, 0, sizeof(range));
++	range.range = KEXEC_RANGE_MA_XEN;
++
++	if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
++		goto err;
++
++	xen_hypervisor_res.name = "Hypervisor code and data";
++	xen_hypervisor_res.start = range.start;
++	xen_hypervisor_res.end = range.start + range.size - 1;
++	xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
++
++	/* fill in crashk_res if range is reserved by hypervisor */
++
++	memset(&range, 0, sizeof(range));
++	range.range = KEXEC_RANGE_MA_CRASH;
++
++	if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
++		return;
++
++	if (range.size) {
++		crashk_res.start = range.start;
++		crashk_res.end = range.start + range.size - 1;
++	}
++
++	return;
++
++ err:
++	/*
++	 * It isn't possible to free xen_phys_cpus this early in the
++	 * boot. Failure at this stage is unexpected and the amount of
++	 * memory is small therefore we tolerate the potential leak.
++         */
++	xen_max_nr_phys_cpus = 0;
++	return;
++}
++
++void xen_machine_kexec_register_resources(struct resource *res)
++{
++	int k;
++
++	request_resource(res, &xen_hypervisor_res);
++
++	for (k = 0; k < xen_max_nr_phys_cpus; k++)
++		request_resource(&xen_hypervisor_res, xen_phys_cpus + k);
++
++}
++
++static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
++{
++	machine_kexec_setup_load_arg(xki, image);
++
++	xki->indirection_page = image->head;
++	xki->start_address = image->start;
++}
++
++/*
++ * Load the image into xen so xen can kdump itself
++ * This might have been done in prepare, but prepare
++ * is currently called too early. It might make sense
++ * to move prepare, but for now, just add an extra hook.
++ */
++int xen_machine_kexec_load(struct kimage *image)
++{
++	xen_kexec_load_t xkl;
++
++	memset(&xkl, 0, sizeof(xkl));
++	xkl.type = image->type;
++	setup_load_arg(&xkl.image, image);
++	return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl);
++}
++
++/*
++ * Unload the image that was stored by machine_kexec_load()
++ * This might have been done in machine_kexec_cleanup() but it
++ * is called too late, and its possible xen could try and kdump
++ * using resources that have been freed.
++ */
++void xen_machine_kexec_unload(struct kimage *image)
++{
++	xen_kexec_load_t xkl;
++
++	memset(&xkl, 0, sizeof(xkl));
++	xkl.type = image->type;
++	HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl);
++}
++
++/*
++ * Do not allocate memory (or fail in any way) in machine_kexec().
++ * We are past the point of no return, committed to rebooting now.
++ *
++ * This has the hypervisor move to the prefered reboot CPU, 
++ * stop all CPUs and kexec. That is it combines machine_shutdown()
++ * and machine_kexec() in Linux kexec terms.
++ */
++NORET_TYPE void machine_kexec(struct kimage *image)
++{
++	xen_kexec_exec_t xke;
++
++	memset(&xke, 0, sizeof(xke));
++	xke.type = image->type;
++	HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke);
++	panic("KEXEC_CMD_kexec hypercall should not return\n");
++}
++
++void machine_shutdown(void)
++{
++	/* do nothing */
++}
++
++
++/*
++ * Local variables:
++ *  c-file-style: "linux"
++ *  indent-tabs-mode: t
++ *  c-indent-level: 8
++ *  c-basic-offset: 8
++ *  tab-width: 8
++ * End:
++ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/machine_reboot.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,241 @@
++#include <linux/version.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/unistd.h>
++#include <linux/module.h>
++#include <linux/reboot.h>
++#include <linux/sysrq.h>
++#include <linux/stringify.h>
++#include <linux/stop_machine.h>
++#include <asm/irq.h>
++#include <asm/mmu_context.h>
++#include <xen/evtchn.h>
++#include <asm/hypervisor.h>
++#include <xen/xenbus.h>
++#include <linux/cpu.h>
++#include <linux/kthread.h>
++#include <xen/gnttab.h>
++#include <xen/xencons.h>
++#include <xen/cpu_hotplug.h>
++#include <xen/interface/vcpu.h>
++
++#if defined(__i386__) || defined(__x86_64__)
++
++/*
++ * Power off function, if any
++ */
++void (*pm_power_off)(void);
++EXPORT_SYMBOL(pm_power_off);
++
++void machine_emergency_restart(void)
++{
++	/* We really want to get pending console data out before we die. */
++	xencons_force_flush();
++	HYPERVISOR_shutdown(SHUTDOWN_reboot);
++}
++
++void machine_restart(char * __unused)
++{
++	machine_emergency_restart();
++}
++
++void machine_halt(void)
++{
++	machine_power_off();
++}
++
++void machine_power_off(void)
++{
++	/* We really want to get pending console data out before we die. */
++	xencons_force_flush();
++	if (pm_power_off)
++		pm_power_off();
++	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
++}
++
++int reboot_thru_bios = 0;	/* for dmi_scan.c */
++EXPORT_SYMBOL(machine_restart);
++EXPORT_SYMBOL(machine_halt);
++EXPORT_SYMBOL(machine_power_off);
++
++static void pre_suspend(void)
++{
++	HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
++	HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
++				     __pte_ma(0), 0);
++
++	xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
++	xen_start_info->console.domU.mfn =
++		mfn_to_pfn(xen_start_info->console.domU.mfn);
++}
++
++static void post_suspend(int suspend_cancelled)
++{
++	int i, j, k, fpp;
++	unsigned long shinfo_mfn;
++	extern unsigned long max_pfn;
++	extern unsigned long *pfn_to_mfn_frame_list_list;
++	extern unsigned long *pfn_to_mfn_frame_list[];
++
++	if (suspend_cancelled) {
++		xen_start_info->store_mfn =
++			pfn_to_mfn(xen_start_info->store_mfn);
++		xen_start_info->console.domU.mfn =
++			pfn_to_mfn(xen_start_info->console.domU.mfn);
++	} else {
++#ifdef CONFIG_SMP
++		cpu_initialized_map = cpu_online_map;
++#endif
++	}
++
++	shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
++	HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
++				     pfn_pte_ma(shinfo_mfn, PAGE_KERNEL), 0);
++	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
++
++	memset(empty_zero_page, 0, PAGE_SIZE);
++
++	fpp = PAGE_SIZE/sizeof(unsigned long);
++	for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
++		if ((j % fpp) == 0) {
++			k++;
++			pfn_to_mfn_frame_list_list[k] =
++				virt_to_mfn(pfn_to_mfn_frame_list[k]);
++			j = 0;
++		}
++		pfn_to_mfn_frame_list[k][j] =
++			virt_to_mfn(&phys_to_machine_mapping[i]);
++	}
++	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
++	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
++		virt_to_mfn(pfn_to_mfn_frame_list_list);
++}
++
++#else /* !(defined(__i386__) || defined(__x86_64__)) */
++
++#ifndef HAVE_XEN_PRE_SUSPEND
++#define xen_pre_suspend()	((void)0)
++#endif
++
++#ifndef HAVE_XEN_POST_SUSPEND
++#define xen_post_suspend(x)	((void)0)
++#endif
++
++#define switch_idle_mm()	((void)0)
++#define mm_pin_all()		((void)0)
++#define pre_suspend()		xen_pre_suspend()
++#define post_suspend(x)		xen_post_suspend(x)
++
++#endif
++
++static int take_machine_down(void *p_fast_suspend)
++{
++	int fast_suspend = *(int *)p_fast_suspend;
++	int suspend_cancelled, err;
++	extern void time_resume(void);
++
++	if (fast_suspend) {
++		BUG_ON(!irqs_disabled());
++	} else {
++		BUG_ON(irqs_disabled());
++
++		for (;;) {
++			err = smp_suspend();
++			if (err)
++				return err;
++
++			xenbus_suspend();
++			preempt_disable();
++
++			if (num_online_cpus() == 1)
++				break;
++
++			preempt_enable();
++			xenbus_suspend_cancel();
++		}
++
++		local_irq_disable();
++	}
++
++	mm_pin_all();
++	gnttab_suspend();
++	pre_suspend();
++
++	/*
++	 * This hypercall returns 1 if suspend was cancelled or the domain was
++	 * merely checkpointed, and 0 if it is resuming in a new domain.
++	 */
++	suspend_cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
++
++	post_suspend(suspend_cancelled);
++	gnttab_resume();
++	if (!suspend_cancelled) {
++		irq_resume();
++#ifdef __x86_64__
++		/*
++		 * Older versions of Xen do not save/restore the user %cr3.
++		 * We do it here just in case, but there's no need if we are
++		 * in fast-suspend mode as that implies a new enough Xen.
++		 */
++		if (!fast_suspend) {
++			struct mmuext_op op;
++			op.cmd = MMUEXT_NEW_USER_BASEPTR;
++			op.arg1.mfn = pfn_to_mfn(__pa(__user_pgd(
++				current->active_mm->pgd)) >> PAGE_SHIFT);
++			if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
++				BUG();
++		}
++#endif
++	}
++	time_resume();
++
++	if (!fast_suspend)
++		local_irq_enable();
++
++	return suspend_cancelled;
++}
++
++int __xen_suspend(int fast_suspend)
++{
++	int err, suspend_cancelled;
++
++	BUG_ON(smp_processor_id() != 0);
++	BUG_ON(in_interrupt());
++
++#if defined(__i386__) || defined(__x86_64__)
++	if (xen_feature(XENFEAT_auto_translated_physmap)) {
++		printk(KERN_WARNING "Cannot suspend in "
++		       "auto_translated_physmap mode.\n");
++		return -EOPNOTSUPP;
++	}
++#endif
++
++	/* If we are definitely UP then 'slow mode' is actually faster. */
++	if (num_possible_cpus() == 1)
++		fast_suspend = 0;
++
++	if (fast_suspend) {
++		xenbus_suspend();
++		err = stop_machine_run(take_machine_down, &fast_suspend, 0);
++		if (err < 0)
++			xenbus_suspend_cancel();
++	} else {
++		err = take_machine_down(&fast_suspend);
++	}
++
++	if (err < 0)
++		return err;
++
++	suspend_cancelled = err;
++	if (!suspend_cancelled) {
++		xencons_resume();
++		xenbus_resume();
++	} else {
++		xenbus_suspend_cancel();
++	}
++
++	if (!fast_suspend)
++		smp_resume();
++
++	return 0;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/reboot.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,249 @@
++#define __KERNEL_SYSCALLS__
++#include <linux/version.h>
++#include <linux/kernel.h>
++#include <linux/unistd.h>
++#include <linux/module.h>
++#include <linux/reboot.h>
++#include <linux/sysrq.h>
++#include <asm/hypervisor.h>
++#include <xen/xenbus.h>
++#include <linux/kthread.h>
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++MODULE_LICENSE("Dual BSD/GPL");
++
++#define SHUTDOWN_INVALID  -1
++#define SHUTDOWN_POWEROFF  0
++#define SHUTDOWN_SUSPEND   2
++/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
++ * report a crash, not be instructed to crash!
++ * HALT is the same as POWEROFF, as far as we're concerned.  The tools use
++ * the distinction when we return the reason code to them.
++ */
++#define SHUTDOWN_HALT      4
++
++/* Ignore multiple shutdown requests. */
++static int shutting_down = SHUTDOWN_INVALID;
++
++/* Can we leave APs online when we suspend? */
++static int fast_suspend;
++
++static void __shutdown_handler(void *unused);
++static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
++
++int __xen_suspend(int fast_suspend);
++
++static int shutdown_process(void *__unused)
++{
++	static char *envp[] = { "HOME=/", "TERM=linux",
++				"PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
++	static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
++
++	extern asmlinkage long sys_reboot(int magic1, int magic2,
++					  unsigned int cmd, void *arg);
++
++	if ((shutting_down == SHUTDOWN_POWEROFF) ||
++	    (shutting_down == SHUTDOWN_HALT)) {
++		if (call_usermodehelper("/sbin/poweroff", poweroff_argv,
++					envp, 0) < 0) {
++#ifdef CONFIG_XEN
++			sys_reboot(LINUX_REBOOT_MAGIC1,
++				   LINUX_REBOOT_MAGIC2,
++				   LINUX_REBOOT_CMD_POWER_OFF,
++				   NULL);
++#endif /* CONFIG_XEN */
++		}
++	}
++
++	shutting_down = SHUTDOWN_INVALID; /* could try again */
++
++	return 0;
++}
++
++static int xen_suspend(void *__unused)
++{
++	int err = __xen_suspend(fast_suspend);
++	if (err)
++		printk(KERN_ERR "Xen suspend failed (%d)\n", err);
++	shutting_down = SHUTDOWN_INVALID;
++	return 0;
++}
++
++static int kthread_create_on_cpu(int (*f)(void *arg),
++				 void *arg,
++				 const char *name,
++				 int cpu)
++{
++	struct task_struct *p;
++	p = kthread_create(f, arg, name);
++	if (IS_ERR(p))
++		return PTR_ERR(p);
++	kthread_bind(p, cpu);
++	wake_up_process(p);
++	return 0;
++}
++
++static void __shutdown_handler(void *unused)
++{
++	int err;
++
++	if (shutting_down != SHUTDOWN_SUSPEND)
++		err = kernel_thread(shutdown_process, NULL,
++				    CLONE_FS | CLONE_FILES);
++	else
++		err = kthread_create_on_cpu(xen_suspend, NULL, "suspend", 0);
++
++	if (err < 0) {
++		printk(KERN_WARNING "Error creating shutdown process (%d): "
++		       "retrying...\n", -err);
++		schedule_delayed_work(&shutdown_work, HZ/2);
++	}
++}
++
++static void shutdown_handler(struct xenbus_watch *watch,
++			     const char **vec, unsigned int len)
++{
++	extern void ctrl_alt_del(void);
++	char *str;
++	struct xenbus_transaction xbt;
++	int err;
++
++	if (shutting_down != SHUTDOWN_INVALID)
++		return;
++
++ again:
++	err = xenbus_transaction_start(&xbt);
++	if (err)
++		return;
++
++	str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
++	/* Ignore read errors and empty reads. */
++	if (XENBUS_IS_ERR_READ(str)) {
++		xenbus_transaction_end(xbt, 1);
++		return;
++	}
++
++	xenbus_write(xbt, "control", "shutdown", "");
++
++	err = xenbus_transaction_end(xbt, 0);
++	if (err == -EAGAIN) {
++		kfree(str);
++		goto again;
++	}
++
++	if (strcmp(str, "poweroff") == 0)
++		shutting_down = SHUTDOWN_POWEROFF;
++	else if (strcmp(str, "reboot") == 0)
++		ctrl_alt_del();
++	else if (strcmp(str, "suspend") == 0)
++		shutting_down = SHUTDOWN_SUSPEND;
++	else if (strcmp(str, "halt") == 0)
++		shutting_down = SHUTDOWN_HALT;
++	else {
++		printk("Ignoring shutdown request: %s\n", str);
++		shutting_down = SHUTDOWN_INVALID;
++	}
++
++	if (shutting_down != SHUTDOWN_INVALID)
++		schedule_work(&shutdown_work);
++
++	kfree(str);
++}
++
++static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
++			  unsigned int len)
++{
++	char sysrq_key = '\0';
++	struct xenbus_transaction xbt;
++	int err;
++
++ again:
++	err = xenbus_transaction_start(&xbt);
++	if (err)
++		return;
++	if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
++		printk(KERN_ERR "Unable to read sysrq code in "
++		       "control/sysrq\n");
++		xenbus_transaction_end(xbt, 1);
++		return;
++	}
++
++	if (sysrq_key != '\0')
++		xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
++
++	err = xenbus_transaction_end(xbt, 0);
++	if (err == -EAGAIN)
++		goto again;
++
++#ifdef CONFIG_MAGIC_SYSRQ
++	if (sysrq_key != '\0')
++		handle_sysrq(sysrq_key, NULL, NULL);
++#endif
++}
++
++static struct xenbus_watch shutdown_watch = {
++	.node = "control/shutdown",
++	.callback = shutdown_handler
++};
++
++static struct xenbus_watch sysrq_watch = {
++	.node = "control/sysrq",
++	.callback = sysrq_handler
++};
++
++static int setup_shutdown_watcher(void)
++{
++	int err;
++
++	xenbus_scanf(XBT_NIL, "control",
++		     "platform-feature-multiprocessor-suspend",
++		     "%d", &fast_suspend);
++
++	err = register_xenbus_watch(&shutdown_watch);
++	if (err) {
++		printk(KERN_ERR "Failed to set shutdown watcher\n");
++		return err;
++	}
++
++	err = register_xenbus_watch(&sysrq_watch);
++	if (err) {
++		printk(KERN_ERR "Failed to set sysrq watcher\n");
++		return err;
++	}
++
++	return 0;
++}
++
++#ifdef CONFIG_XEN
++
++static int shutdown_event(struct notifier_block *notifier,
++			  unsigned long event,
++			  void *data)
++{
++	setup_shutdown_watcher();
++	return NOTIFY_DONE;
++}
++
++static int __init setup_shutdown_event(void)
++{
++	static struct notifier_block xenstore_notifier = {
++		.notifier_call = shutdown_event
++	};
++	register_xenstore_notifier(&xenstore_notifier);
++
++	return 0;
++}
++
++subsys_initcall(setup_shutdown_event);
++
++#else /* !defined(CONFIG_XEN) */
++
++int xen_reboot_init(void)
++{
++	return setup_shutdown_watcher();
++}
++
++#endif /* !defined(CONFIG_XEN) */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/smpboot.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,452 @@
++/*
++ *	Xen SMP booting functions
++ *
++ *	See arch/i386/kernel/smpboot.c for copyright and credits for derived
++ *	portions of this file.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/sched.h>
++#include <linux/kernel_stat.h>
++#include <linux/smp_lock.h>
++#include <linux/irq.h>
++#include <linux/bootmem.h>
++#include <linux/notifier.h>
++#include <linux/cpu.h>
++#include <linux/percpu.h>
++#include <asm/desc.h>
++#include <asm/arch_hooks.h>
++#include <asm/pgalloc.h>
++#include <xen/evtchn.h>
++#include <xen/interface/vcpu.h>
++#include <xen/cpu_hotplug.h>
++#include <xen/xenbus.h>
++
++extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
++extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
++
++extern int local_setup_timer(unsigned int cpu);
++extern void local_teardown_timer(unsigned int cpu);
++
++extern void hypervisor_callback(void);
++extern void failsafe_callback(void);
++extern void system_call(void);
++extern void smp_trap_init(trap_info_t *);
++
++/* Number of siblings per CPU package */
++int smp_num_siblings = 1;
++int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
++EXPORT_SYMBOL(phys_proc_id);
++int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
++EXPORT_SYMBOL(cpu_core_id);
++
++cpumask_t cpu_online_map;
++EXPORT_SYMBOL(cpu_online_map);
++cpumask_t cpu_possible_map;
++EXPORT_SYMBOL(cpu_possible_map);
++cpumask_t cpu_initialized_map;
++
++struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
++EXPORT_SYMBOL(cpu_data);
++
++#ifdef CONFIG_HOTPLUG_CPU
++DEFINE_PER_CPU(int, cpu_state) = { 0 };
++#endif
++
++static DEFINE_PER_CPU(int, resched_irq);
++static DEFINE_PER_CPU(int, callfunc_irq);
++static char resched_name[NR_CPUS][15];
++static char callfunc_name[NR_CPUS][15];
++
++u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
++
++void *xquad_portio;
++
++cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
++cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
++EXPORT_SYMBOL(cpu_core_map);
++
++#if defined(__i386__)
++u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
++EXPORT_SYMBOL(x86_cpu_to_apicid);
++#elif !defined(CONFIG_X86_IO_APIC)
++unsigned int maxcpus = NR_CPUS;
++#endif
++
++void __init prefill_possible_map(void)
++{
++	int i, rc;
++
++	for_each_possible_cpu(i)
++	    if (i != smp_processor_id())
++		return;
++
++	for (i = 0; i < NR_CPUS; i++) {
++		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
++		if (rc >= 0)
++			cpu_set(i, cpu_possible_map);
++	}
++}
++
++void __init smp_alloc_memory(void)
++{
++}
++
++static inline void
++set_cpu_sibling_map(int cpu)
++{
++	phys_proc_id[cpu] = cpu;
++	cpu_core_id[cpu]  = 0;
++
++	cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
++	cpu_core_map[cpu]    = cpumask_of_cpu(cpu);
++
++	cpu_data[cpu].booted_cores = 1;
++}
++
++static void
++remove_siblinginfo(int cpu)
++{
++	phys_proc_id[cpu] = BAD_APICID;
++	cpu_core_id[cpu]  = BAD_APICID;
++
++	cpus_clear(cpu_sibling_map[cpu]);
++	cpus_clear(cpu_core_map[cpu]);
++
++	cpu_data[cpu].booted_cores = 0;
++}
++
++static int xen_smp_intr_init(unsigned int cpu)
++{
++	int rc;
++
++	per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
++
++	sprintf(resched_name[cpu], "resched%d", cpu);
++	rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
++				    cpu,
++				    smp_reschedule_interrupt,
++				    SA_INTERRUPT,
++				    resched_name[cpu],
++				    NULL);
++	if (rc < 0)
++		goto fail;
++	per_cpu(resched_irq, cpu) = rc;
++
++	sprintf(callfunc_name[cpu], "callfunc%d", cpu);
++	rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
++				    cpu,
++				    smp_call_function_interrupt,
++				    SA_INTERRUPT,
++				    callfunc_name[cpu],
++				    NULL);
++	if (rc < 0)
++		goto fail;
++	per_cpu(callfunc_irq, cpu) = rc;
++
++	if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
++		goto fail;
++
++	return 0;
++
++ fail:
++	if (per_cpu(resched_irq, cpu) >= 0)
++		unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
++	if (per_cpu(callfunc_irq, cpu) >= 0)
++		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
++	return rc;
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++static void xen_smp_intr_exit(unsigned int cpu)
++{
++	if (cpu != 0)
++		local_teardown_timer(cpu);
++
++	unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
++	unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
++}
++#endif
++
++void cpu_bringup(void)
++{
++	cpu_init();
++	touch_softlockup_watchdog();
++	preempt_disable();
++	local_irq_enable();
++}
++
++static void cpu_bringup_and_idle(void)
++{
++	cpu_bringup();
++	cpu_idle();
++}
++
++static void cpu_initialize_context(unsigned int cpu)
++{
++	vcpu_guest_context_t ctxt;
++	struct task_struct *idle = idle_task(cpu);
++#ifdef __x86_64__
++	struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
++#else
++	struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
++#endif
++
++	if (cpu_test_and_set(cpu, cpu_initialized_map))
++		return;
++
++	memset(&ctxt, 0, sizeof(ctxt));
++
++	ctxt.flags = VGCF_IN_KERNEL;
++	ctxt.user_regs.ds = __USER_DS;
++	ctxt.user_regs.es = __USER_DS;
++	ctxt.user_regs.fs = 0;
++	ctxt.user_regs.gs = 0;
++	ctxt.user_regs.ss = __KERNEL_DS;
++	ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
++	ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
++
++	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
++
++	smp_trap_init(ctxt.trap_ctxt);
++
++	ctxt.ldt_ents = 0;
++
++	ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
++	ctxt.gdt_ents      = gdt_descr->size / 8;
++
++#ifdef __i386__
++	ctxt.user_regs.cs = __KERNEL_CS;
++	ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
++
++	ctxt.kernel_ss = __KERNEL_DS;
++	ctxt.kernel_sp = idle->thread.esp0;
++
++	ctxt.event_callback_cs     = __KERNEL_CS;
++	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
++	ctxt.failsafe_callback_cs  = __KERNEL_CS;
++	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
++
++	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
++#else /* __x86_64__ */
++	ctxt.user_regs.cs = __KERNEL_CS;
++	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
++
++	ctxt.kernel_ss = __KERNEL_DS;
++	ctxt.kernel_sp = idle->thread.rsp0;
++
++	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
++	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
++	ctxt.syscall_callback_eip  = (unsigned long)system_call;
++
++	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
++
++	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
++#endif
++
++	BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
++}
++
++void __init smp_prepare_cpus(unsigned int max_cpus)
++{
++	int cpu;
++	struct task_struct *idle;
++#ifdef __x86_64__
++	struct desc_ptr *gdt_descr;
++#else
++	struct Xgt_desc_struct *gdt_descr;
++#endif
++
++	boot_cpu_data.apicid = 0;
++	cpu_data[0] = boot_cpu_data;
++
++	cpu_2_logical_apicid[0] = 0;
++	x86_cpu_to_apicid[0] = 0;
++
++	current_thread_info()->cpu = 0;
++
++	for (cpu = 0; cpu < NR_CPUS; cpu++) {
++		cpus_clear(cpu_sibling_map[cpu]);
++		cpus_clear(cpu_core_map[cpu]);
++	}
++
++	set_cpu_sibling_map(0);
++
++	if (xen_smp_intr_init(0))
++		BUG();
++
++	cpu_initialized_map = cpumask_of_cpu(0);
++
++	/* Restrict the possible_map according to max_cpus. */
++	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
++		for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
++			continue;
++		cpu_clear(cpu, cpu_possible_map);
++	}
++
++	for_each_possible_cpu (cpu) {
++		if (cpu == 0)
++			continue;
++
++#ifdef __x86_64__
++		gdt_descr = &cpu_gdt_descr[cpu];
++#else
++		gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
++#endif
++		gdt_descr->address = get_zeroed_page(GFP_KERNEL);
++		if (unlikely(!gdt_descr->address)) {
++			printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
++			       cpu);
++			continue;
++		}
++		gdt_descr->size = GDT_SIZE;
++		memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
++		make_page_readonly(
++			(void *)gdt_descr->address,
++			XENFEAT_writable_descriptor_tables);
++
++		cpu_data[cpu] = boot_cpu_data;
++		cpu_data[cpu].apicid = cpu;
++
++		cpu_2_logical_apicid[cpu] = cpu;
++		x86_cpu_to_apicid[cpu] = cpu;
++
++		idle = fork_idle(cpu);
++		if (IS_ERR(idle))
++			panic("failed fork for CPU %d", cpu);
++
++#ifdef __x86_64__
++		cpu_pda(cpu)->pcurrent = idle;
++		cpu_pda(cpu)->cpunumber = cpu;
++		clear_ti_thread_flag(idle->thread_info, TIF_FORK);
++#endif
++
++		irq_ctx_init(cpu);
++
++#ifdef CONFIG_HOTPLUG_CPU
++		if (is_initial_xendomain())
++			cpu_set(cpu, cpu_present_map);
++#else
++		cpu_set(cpu, cpu_present_map);
++#endif
++	}
++
++	init_xenbus_allowed_cpumask();
++
++#ifdef CONFIG_X86_IO_APIC
++	/*
++	 * Here we can be sure that there is an IO-APIC in the system. Let's
++	 * go and set it up:
++	 */
++	if (!skip_ioapic_setup && nr_ioapics)
++		setup_IO_APIC();
++#endif
++}
++
++void __devinit smp_prepare_boot_cpu(void)
++{
++	prefill_possible_map();
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++
++/*
++ * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
++ * But do it early enough to catch critical for_each_present_cpu() loops
++ * in i386-specific code.
++ */
++static int __init initialize_cpu_present_map(void)
++{
++	cpu_present_map = cpu_possible_map;
++	return 0;
++}
++core_initcall(initialize_cpu_present_map);
++
++int __cpu_disable(void)
++{
++	cpumask_t map = cpu_online_map;
++	int cpu = smp_processor_id();
++
++	if (cpu == 0)
++		return -EBUSY;
++
++	remove_siblinginfo(cpu);
++
++	cpu_clear(cpu, map);
++	fixup_irqs(map);
++	cpu_clear(cpu, cpu_online_map);
++
++	return 0;
++}
++
++void __cpu_die(unsigned int cpu)
++{
++	while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
++		current->state = TASK_UNINTERRUPTIBLE;
++		schedule_timeout(HZ/10);
++	}
++
++	xen_smp_intr_exit(cpu);
++
++	if (num_online_cpus() == 1)
++		alternatives_smp_switch(0);
++}
++
++#else /* !CONFIG_HOTPLUG_CPU */
++
++int __cpu_disable(void)
++{
++	return -ENOSYS;
++}
++
++void __cpu_die(unsigned int cpu)
++{
++	BUG();
++}
++
++#endif /* CONFIG_HOTPLUG_CPU */
++
++int __devinit __cpu_up(unsigned int cpu)
++{
++	int rc;
++
++	rc = cpu_up_check(cpu);
++	if (rc)
++		return rc;
++
++	cpu_initialize_context(cpu);
++
++	if (num_online_cpus() == 1)
++		alternatives_smp_switch(1);
++
++	/* This must be done before setting cpu_online_map */
++	set_cpu_sibling_map(cpu);
++	wmb();
++
++	rc = xen_smp_intr_init(cpu);
++	if (rc) {
++		remove_siblinginfo(cpu);
++		return rc;
++	}
++
++	cpu_set(cpu, cpu_online_map);
++
++	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
++	BUG_ON(rc);
++
++	return 0;
++}
++
++void __init smp_cpus_done(unsigned int max_cpus)
++{
++}
++
++#ifndef CONFIG_X86_LOCAL_APIC
++int setup_profiling_timer(unsigned int multiplier)
++{
++	return -EINVAL;
++}
++#endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/xen_proc.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,23 @@
++
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <xen/xen_proc.h>
++
++static struct proc_dir_entry *xen_base;
++
++struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
++{
++	if ( xen_base == NULL )
++		if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
++			panic("Couldn't create /proc/xen");
++	return create_proc_entry(name, mode, xen_base);
++}
++
++EXPORT_SYMBOL_GPL(create_xen_proc_entry); 
++
++void remove_xen_proc_entry(const char *name)
++{
++	remove_proc_entry(name, xen_base);
++}
++
++EXPORT_SYMBOL_GPL(remove_xen_proc_entry); 
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/core/xen_sysfs.c	2007-08-27 14:01:58.000000000 -0400
+@@ -0,0 +1,378 @@
++/*
++ *  copyright (c) 2006 IBM Corporation
++ *  Authored by: Mike D. Day <ncmike@us.ibm.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2 as
++ *  published by the Free Software Foundation.
++ */
++
++#include <linux/err.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <asm/hypervisor.h>
++#include <xen/features.h>
++#include <xen/hypervisor_sysfs.h>
++#include <xen/xenbus.h>
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Mike D. Day <ncmike@us.ibm.com>");
++
++static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	return sprintf(buffer, "xen\n");
++}
++
++HYPERVISOR_ATTR_RO(type);
++
++static int __init xen_sysfs_type_init(void)
++{
++	return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
++}
++
++static void xen_sysfs_type_destroy(void)
++{
++	sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
++}
++
++/* xen version attributes */
++static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
++	if (version)
++		return sprintf(buffer, "%d\n", version >> 16);
++	return -ENODEV;
++}
++
++HYPERVISOR_ATTR_RO(major);
++
++static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
++	if (version)
++		return sprintf(buffer, "%d\n", version & 0xff);
++	return -ENODEV;
++}
++
++HYPERVISOR_ATTR_RO(minor);
++
++static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int ret = -ENOMEM;
++	char *extra;
++
++	extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
++	if (extra) {
++		ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
++		if (!ret)
++			ret = sprintf(buffer, "%s\n", extra);
++		kfree(extra);
++	}
++
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(extra);
++
++static struct attribute *version_attrs[] = {
++	&major_attr.attr,
++	&minor_attr.attr,
++	&extra_attr.attr,
++	NULL
++};
++
++static struct attribute_group version_group = {
++	.name = "version",
++	.attrs = version_attrs,
++};
++
++static int __init xen_sysfs_version_init(void)
++{
++	return sysfs_create_group(&hypervisor_subsys.kset.kobj,
++				  &version_group);
++}
++
++static void xen_sysfs_version_destroy(void)
++{
++	sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group);
++}
++
++/* UUID */
++
++static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	char *vm, *val;
++	int ret;
++
++	vm = xenbus_read(XBT_NIL, "vm", "", NULL);
++	if (IS_ERR(vm))
++		return PTR_ERR(vm);
++	val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
++	kfree(vm);
++	if (IS_ERR(val))
++		return PTR_ERR(val);
++	ret = sprintf(buffer, "%s\n", val);
++	kfree(val);
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(uuid);
++
++static int __init xen_sysfs_uuid_init(void)
++{
++	return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
++}
++
++static void xen_sysfs_uuid_destroy(void)
++{
++	sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
++}
++
++/* xen compilation attributes */
++
++static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int ret = -ENOMEM;
++	struct xen_compile_info *info;
++
++	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
++	if (info) {
++		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
++		if (!ret)
++			ret = sprintf(buffer, "%s\n", info->compiler);
++		kfree(info);
++	}
++
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(compiler);
++
++static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int ret = -ENOMEM;
++	struct xen_compile_info *info;
++
++	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
++	if (info) {
++		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
++		if (!ret)
++			ret = sprintf(buffer, "%s\n", info->compile_by);
++		kfree(info);
++	}
++
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(compiled_by);
++
++static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int ret = -ENOMEM;
++	struct xen_compile_info *info;
++
++	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
++	if (info) {
++		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
++		if (!ret)
++			ret = sprintf(buffer, "%s\n", info->compile_date);
++		kfree(info);
++	}
++
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(compile_date);
++
++static struct attribute *xen_compile_attrs[] = {
++	&compiler_attr.attr,
++	&compiled_by_attr.attr,
++	&compile_date_attr.attr,
++	NULL
++};
++
++static struct attribute_group xen_compilation_group = {
++	.name = "compilation",
++	.attrs = xen_compile_attrs,
++};
++
++int __init static xen_compilation_init(void)
++{
++	return sysfs_create_group(&hypervisor_subsys.kset.kobj,
++				  &xen_compilation_group);
++}
++
++static void xen_compilation_destroy(void)
++{
++	sysfs_remove_group(&hypervisor_subsys.kset.kobj,
++			   &xen_compilation_group);
++}
++
++/* xen properties info */
++
++static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int ret = -ENOMEM;
++	char *caps;
++
++	caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
++	if (caps) {
++		ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
++		if (!ret)
++			ret = sprintf(buffer, "%s\n", caps);
++		kfree(caps);
++	}
++
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(capabilities);
++
++static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int ret = -ENOMEM;
++	char *cset;
++
++	cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
++	if (cset) {
++		ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
++		if (!ret)
++			ret = sprintf(buffer, "%s\n", cset);
++		kfree(cset);
++	}
++
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(changeset);
++
++static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int ret = -ENOMEM;
++	struct xen_platform_parameters *parms;
++
++	parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
++	if (parms) {
++		ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
++					     parms);
++		if (!ret)
++			ret = sprintf(buffer, "%lx\n", parms->virt_start);
++		kfree(parms);
++	}
++
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(virtual_start);
++
++static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	int ret;
++
++	ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
++	if (ret > 0)
++		ret = sprintf(buffer, "%x\n", ret);
++
++	return ret;
++}
++
++HYPERVISOR_ATTR_RO(pagesize);
++
++/* eventually there will be several more features to export */
++static ssize_t xen_feature_show(int index, char *buffer)
++{
++	int ret = -ENOMEM;
++	struct xen_feature_info *info;
++
++	info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
++	if (info) {
++		info->submap_idx = index;
++		ret = HYPERVISOR_xen_version(XENVER_get_features, info);
++		if (!ret)
++			ret = sprintf(buffer, "%d\n", info->submap);
++		kfree(info);
++	}
++
++	return ret;
++}
++
++static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
++{
++	return xen_feature_show(XENFEAT_writable_page_tables, buffer);
++}
++
++HYPERVISOR_ATTR_RO(writable_pt);
++
++static struct attribute *xen_properties_attrs[] = {
++	&capabilities_attr.attr,
++	&changeset_attr.attr,
++	&virtual_start_attr.attr,
++	&pagesize_attr.attr,
++	&writable_pt_attr.attr,
++	NULL
++};
++
++static struct attribute_group xen_properties_group = {
++	.name = "properties",
++	.attrs = xen_properties_attrs,
++};
++
++static int __init xen_properties_init(void)
++{
++	return sysfs_create_group(&hypervisor_subsys.kset.kobj,
++				  &xen_properties_group);
++}
++
++static void xen_properties_destroy(void)
++{
++	sysfs_remove_group(&hypervisor_subsys.kset.kobj,
++			   &xen_properties_group);
++}
++
++static int __init hyper_sysfs_init(void)
++{
++	int ret;
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	ret = xen_sysfs_type_init();
++	if (ret)
++		goto out;
++	ret = xen_sysfs_version_init();
++	if (ret)
++		goto version_out;
++	ret = xen_compilation_init();
++	if (ret)
++		goto comp_out;
++	ret = xen_sysfs_uuid_init();
++	if (ret)
++		goto uuid_out;
++	ret = xen_properties_init();
++	if (!ret)
++		goto out;
++
++	xen_sysfs_uuid_destroy();
++uuid_out:
++	xen_compilation_destroy();
++comp_out:
++	xen_sysfs_version_destroy();
++version_out:
++	xen_sysfs_type_destroy();
++out:
++	return ret;
++}
++
++static void hyper_sysfs_exit(void)
++{
++	xen_properties_destroy();
++	xen_compilation_destroy();
++	xen_sysfs_uuid_destroy();
++	xen_sysfs_version_destroy();
++	xen_sysfs_type_destroy();
++
++}
++
++module_init(hyper_sysfs_init);
++module_exit(hyper_sysfs_exit);
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/evtchn/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,2 @@
++
++obj-y	:= evtchn.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/evtchn/evtchn.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,469 @@
++/******************************************************************************
++ * evtchn.c
++ * 
++ * Driver for receiving and demuxing event-channel signals.
++ * 
++ * Copyright (c) 2004-2005, K A Fraser
++ * Multi-process extensions Copyright (c) 2004, Steven Smith
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/errno.h>
++#include <linux/miscdevice.h>
++#include <linux/major.h>
++#include <linux/proc_fs.h>
++#include <linux/stat.h>
++#include <linux/poll.h>
++#include <linux/irq.h>
++#include <linux/init.h>
++#include <linux/gfp.h>
++#include <linux/mutex.h>
++#include <xen/evtchn.h>
++#include <xen/public/evtchn.h>
++
++struct per_user_data {
++	/* Notification ring, accessed via /dev/xen/evtchn. */
++#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
++#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
++	evtchn_port_t *ring;
++	unsigned int ring_cons, ring_prod, ring_overflow;
++	struct mutex ring_cons_mutex; /* protect against concurrent readers */
++
++	/* Processes wait on this queue when ring is empty. */
++	wait_queue_head_t evtchn_wait;
++	struct fasync_struct *evtchn_async_queue;
++};
++
++/* Who's bound to each port? */
++static struct per_user_data *port_user[NR_EVENT_CHANNELS];
++static spinlock_t port_user_lock;
++
++void evtchn_device_upcall(int port)
++{
++	struct per_user_data *u;
++
++	spin_lock(&port_user_lock);
++
++	mask_evtchn(port);
++	clear_evtchn(port);
++
++	if ((u = port_user[port]) != NULL) {
++		if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
++			u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
++			if (u->ring_cons == u->ring_prod++) {
++				wake_up_interruptible(&u->evtchn_wait);
++				kill_fasync(&u->evtchn_async_queue,
++					    SIGIO, POLL_IN);
++			}
++		} else {
++			u->ring_overflow = 1;
++		}
++	}
++
++	spin_unlock(&port_user_lock);
++}
++
++static ssize_t evtchn_read(struct file *file, char __user *buf,
++			   size_t count, loff_t *ppos)
++{
++	int rc;
++	unsigned int c, p, bytes1 = 0, bytes2 = 0;
++	struct per_user_data *u = file->private_data;
++
++	/* Whole number of ports. */
++	count &= ~(sizeof(evtchn_port_t)-1);
++
++	if (count == 0)
++		return 0;
++
++	if (count > PAGE_SIZE)
++		count = PAGE_SIZE;
++
++	for (;;) {
++		mutex_lock(&u->ring_cons_mutex);
++
++		rc = -EFBIG;
++		if (u->ring_overflow)
++			goto unlock_out;
++
++		if ((c = u->ring_cons) != (p = u->ring_prod))
++			break;
++
++		mutex_unlock(&u->ring_cons_mutex);
++
++		if (file->f_flags & O_NONBLOCK)
++			return -EAGAIN;
++
++		rc = wait_event_interruptible(
++			u->evtchn_wait, u->ring_cons != u->ring_prod);
++		if (rc)
++			return rc;
++	}
++
++	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
++	if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
++		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
++			sizeof(evtchn_port_t);
++		bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
++	} else {
++		bytes1 = (p - c) * sizeof(evtchn_port_t);
++		bytes2 = 0;
++	}
++
++	/* Truncate chunks according to caller's maximum byte count. */
++	if (bytes1 > count) {
++		bytes1 = count;
++		bytes2 = 0;
++	} else if ((bytes1 + bytes2) > count) {
++		bytes2 = count - bytes1;
++	}
++
++	rc = -EFAULT;
++	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
++	    ((bytes2 != 0) &&
++	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
++		goto unlock_out;
++
++	u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
++	rc = bytes1 + bytes2;
++
++ unlock_out:
++	mutex_unlock(&u->ring_cons_mutex);
++	return rc;
++}
++
++static ssize_t evtchn_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *ppos)
++{
++	int rc, i;
++	evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
++	struct per_user_data *u = file->private_data;
++
++	if (kbuf == NULL)
++		return -ENOMEM;
++
++	/* Whole number of ports. */
++	count &= ~(sizeof(evtchn_port_t)-1);
++
++	rc = 0;
++	if (count == 0)
++		goto out;
++
++	if (count > PAGE_SIZE)
++		count = PAGE_SIZE;
++
++	rc = -EFAULT;
++	if (copy_from_user(kbuf, buf, count) != 0)
++		goto out;
++
++	spin_lock_irq(&port_user_lock);
++	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
++		if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
++			unmask_evtchn(kbuf[i]);
++	spin_unlock_irq(&port_user_lock);
++
++	rc = count;
++
++ out:
++	free_page((unsigned long)kbuf);
++	return rc;
++}
++
++static void evtchn_bind_to_user(struct per_user_data *u, int port)
++{
++	spin_lock_irq(&port_user_lock);
++	BUG_ON(port_user[port] != NULL);
++	port_user[port] = u;
++	unmask_evtchn(port);
++	spin_unlock_irq(&port_user_lock);
++}
++
++static int evtchn_ioctl(struct inode *inode, struct file *file,
++			unsigned int cmd, unsigned long arg)
++{
++	int rc;
++	struct per_user_data *u = file->private_data;
++	void __user *uarg = (void __user *) arg;
++
++	switch (cmd) {
++	case IOCTL_EVTCHN_BIND_VIRQ: {
++		struct ioctl_evtchn_bind_virq bind;
++		struct evtchn_bind_virq bind_virq;
++
++		rc = -EFAULT;
++		if (copy_from_user(&bind, uarg, sizeof(bind)))
++			break;
++
++		bind_virq.virq = bind.virq;
++		bind_virq.vcpu = 0;
++		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
++						 &bind_virq);
++		if (rc != 0)
++			break;
++
++		rc = bind_virq.port;
++		evtchn_bind_to_user(u, rc);
++		break;
++	}
++
++	case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
++		struct ioctl_evtchn_bind_interdomain bind;
++		struct evtchn_bind_interdomain bind_interdomain;
++
++		rc = -EFAULT;
++		if (copy_from_user(&bind, uarg, sizeof(bind)))
++			break;
++
++		bind_interdomain.remote_dom  = bind.remote_domain;
++		bind_interdomain.remote_port = bind.remote_port;
++		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
++						 &bind_interdomain);
++		if (rc != 0)
++			break;
++
++		rc = bind_interdomain.local_port;
++		evtchn_bind_to_user(u, rc);
++		break;
++	}
++
++	case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
++		struct ioctl_evtchn_bind_unbound_port bind;
++		struct evtchn_alloc_unbound alloc_unbound;
++
++		rc = -EFAULT;
++		if (copy_from_user(&bind, uarg, sizeof(bind)))
++			break;
++
++		alloc_unbound.dom        = DOMID_SELF;
++		alloc_unbound.remote_dom = bind.remote_domain;
++		rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
++						 &alloc_unbound);
++		if (rc != 0)
++			break;
++
++		rc = alloc_unbound.port;
++		evtchn_bind_to_user(u, rc);
++		break;
++	}
++
++	case IOCTL_EVTCHN_UNBIND: {
++		struct ioctl_evtchn_unbind unbind;
++		struct evtchn_close close;
++		int ret;
++
++		rc = -EFAULT;
++		if (copy_from_user(&unbind, uarg, sizeof(unbind)))
++			break;
++
++		rc = -EINVAL;
++		if (unbind.port >= NR_EVENT_CHANNELS)
++			break;
++
++		spin_lock_irq(&port_user_lock);
++    
++		rc = -ENOTCONN;
++		if (port_user[unbind.port] != u) {
++			spin_unlock_irq(&port_user_lock);
++			break;
++		}
++
++		port_user[unbind.port] = NULL;
++		mask_evtchn(unbind.port);
++
++		spin_unlock_irq(&port_user_lock);
++
++		close.port = unbind.port;
++		ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
++		BUG_ON(ret);
++
++		rc = 0;
++		break;
++	}
++
++	case IOCTL_EVTCHN_NOTIFY: {
++		struct ioctl_evtchn_notify notify;
++
++		rc = -EFAULT;
++		if (copy_from_user(&notify, uarg, sizeof(notify)))
++			break;
++
++		if (notify.port >= NR_EVENT_CHANNELS) {
++			rc = -EINVAL;
++		} else if (port_user[notify.port] != u) {
++			rc = -ENOTCONN;
++		} else {
++			notify_remote_via_evtchn(notify.port);
++			rc = 0;
++		}
++		break;
++	}
++
++	case IOCTL_EVTCHN_RESET: {
++		/* Initialise the ring to empty. Clear errors. */
++		mutex_lock(&u->ring_cons_mutex);
++		spin_lock_irq(&port_user_lock);
++		u->ring_cons = u->ring_prod = u->ring_overflow = 0;
++		spin_unlock_irq(&port_user_lock);
++		mutex_unlock(&u->ring_cons_mutex);
++		rc = 0;
++		break;
++	}
++
++	default:
++		rc = -ENOSYS;
++		break;
++	}
++
++	return rc;
++}
++
++static unsigned int evtchn_poll(struct file *file, poll_table *wait)
++{
++	unsigned int mask = POLLOUT | POLLWRNORM;
++	struct per_user_data *u = file->private_data;
++
++	poll_wait(file, &u->evtchn_wait, wait);
++	if (u->ring_cons != u->ring_prod)
++		mask |= POLLIN | POLLRDNORM;
++	if (u->ring_overflow)
++		mask = POLLERR;
++	return mask;
++}
++
++static int evtchn_fasync(int fd, struct file *filp, int on)
++{
++	struct per_user_data *u = filp->private_data;
++	return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
++}
++
++static int evtchn_open(struct inode *inode, struct file *filp)
++{
++	struct per_user_data *u;
++
++	if ((u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL)
++		return -ENOMEM;
++
++	memset(u, 0, sizeof(*u));
++	init_waitqueue_head(&u->evtchn_wait);
++
++	u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
++	if (u->ring == NULL) {
++		kfree(u);
++		return -ENOMEM;
++	}
++
++	mutex_init(&u->ring_cons_mutex);
++
++	filp->private_data = u;
++
++	return 0;
++}
++
++static int evtchn_release(struct inode *inode, struct file *filp)
++{
++	int i;
++	struct per_user_data *u = filp->private_data;
++	struct evtchn_close close;
++
++	spin_lock_irq(&port_user_lock);
++
++	free_page((unsigned long)u->ring);
++
++	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
++		int ret;
++		if (port_user[i] != u)
++			continue;
++
++		port_user[i] = NULL;
++		mask_evtchn(i);
++
++		close.port = i;
++		ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
++		BUG_ON(ret);
++	}
++
++	spin_unlock_irq(&port_user_lock);
++
++	kfree(u);
++
++	return 0;
++}
++
++static const struct file_operations evtchn_fops = {
++	.owner   = THIS_MODULE,
++	.read    = evtchn_read,
++	.write   = evtchn_write,
++	.ioctl   = evtchn_ioctl,
++	.poll    = evtchn_poll,
++	.fasync  = evtchn_fasync,
++	.open    = evtchn_open,
++	.release = evtchn_release,
++};
++
++static struct miscdevice evtchn_miscdev = {
++	.minor        = MISC_DYNAMIC_MINOR,
++	.name         = "evtchn",
++	.fops         = &evtchn_fops,
++};
++
++static int __init evtchn_init(void)
++{
++	int err;
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	spin_lock_init(&port_user_lock);
++	memset(port_user, 0, sizeof(port_user));
++
++	/* Create '/dev/misc/evtchn'. */
++	err = misc_register(&evtchn_miscdev);
++	if (err != 0) {
++		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
++		return err;
++	}
++
++	printk("Event-channel device installed.\n");
++
++	return 0;
++}
++
++static void evtchn_cleanup(void)
++{
++	misc_deregister(&evtchn_miscdev);
++}
++
++module_init(evtchn_init);
++module_exit(evtchn_cleanup);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/fbfront/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,2 @@
++obj-$(CONFIG_XEN_FRAMEBUFFER)	:= xenfb.o
++obj-$(CONFIG_XEN_KEYBOARD)	+= xenkbd.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/fbfront/xenfb.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,752 @@
++/*
++ * linux/drivers/video/xenfb.c -- Xen para-virtual frame buffer device
++ *
++ * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com>
++ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
++ *
++ *  Based on linux/drivers/video/q40fb.c
++ *
++ *  This file is subject to the terms and conditions of the GNU General Public
++ *  License. See the file COPYING in the main directory of this archive for
++ *  more details.
++ */
++
++/*
++ * TODO:
++ *
++ * Switch to grant tables when they become capable of dealing with the
++ * frame buffer.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/fb.h>
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include <linux/mutex.h>
++#include <asm/hypervisor.h>
++#include <xen/evtchn.h>
++#include <xen/interface/io/fbif.h>
++#include <xen/interface/io/protocols.h>
++#include <xen/xenbus.h>
++#include <linux/kthread.h>
++
++struct xenfb_mapping
++{
++	struct list_head	link;
++	struct vm_area_struct	*vma;
++	atomic_t		map_refs;
++	int			faults;
++	struct xenfb_info	*info;
++};
++
++struct xenfb_info
++{
++	struct task_struct	*kthread;
++	wait_queue_head_t	wq;
++
++	unsigned char		*fb;
++	struct fb_info		*fb_info;
++	struct timer_list	refresh;
++	int			dirty;
++	int			x1, y1, x2, y2;	/* dirty rectangle,
++						   protected by dirty_lock */
++	spinlock_t		dirty_lock;
++	struct mutex		mm_lock;
++	int			nr_pages;
++	struct page		**pages;
++	struct list_head	mappings; /* protected by mm_lock */
++
++	int			irq;
++	struct xenfb_page	*page;
++	unsigned long 		*mfns;
++	int			update_wanted; /* XENFB_TYPE_UPDATE wanted */
++
++	struct xenbus_device	*xbdev;
++};
++
++/*
++ * How the locks work together
++ *
++ * There are two locks: spinlock dirty_lock protecting the dirty
++ * rectangle, and mutex mm_lock protecting mappings.
++ *
++ * The problem is that dirty rectangle and mappings aren't
++ * independent: the dirty rectangle must cover all faulted pages in
++ * mappings.  We need to prove that our locking maintains this
++ * invariant.
++ *
++ * There are several kinds of critical regions:
++ *
++ * 1. Holding only dirty_lock: xenfb_refresh().  May run in
++ *    interrupts.  Extends the dirty rectangle.  Trivially preserves
++ *    invariant.
++ *
++ * 2. Holding only mm_lock: xenfb_mmap() and xenfb_vm_close().  Touch
++ *    only mappings.  The former creates unfaulted pages.  Preserves
++ *    invariant.  The latter removes pages.  Preserves invariant.
++ *
++ * 3. Holding both locks: xenfb_vm_nopage().  Extends the dirty
++ *    rectangle and updates mappings consistently.  Preserves
++ *    invariant.
++ *
++ * 4. The ugliest one: xenfb_update_screen().  Clear the dirty
++ *    rectangle and update mappings consistently.
++ *
++ *    We can't simply hold both locks, because zap_page_range() cannot
++ *    be called with a spinlock held.
++ *
++ *    Therefore, we first clear the dirty rectangle with both locks
++ *    held.  Then we unlock dirty_lock and update the mappings.
++ *    Critical regions that hold only dirty_lock may interfere with
++ *    that.  This can only be region 1: xenfb_refresh().  But that
++ *    just extends the dirty rectangle, which can't harm the
++ *    invariant.
++ *
++ * But FIXME: the invariant is too weak.  It misses that the fault
++ * record in mappings must be consistent with the mapping of pages in
++ * the associated address space!  do_no_page() updates the PTE after
++ * xenfb_vm_nopage() returns, i.e. outside the critical region.  This
++ * allows the following race:
++ *
++ * X writes to some address in the Xen frame buffer
++ * Fault - call do_no_page()
++ *     call xenfb_vm_nopage()
++ *         grab mm_lock
++ *         map->faults++;
++ *         release mm_lock
++ *     return back to do_no_page()
++ * (preempted, or SMP)
++ * Xen worker thread runs.
++ *      grab mm_lock
++ *      look at mappings
++ *          find this mapping, zaps its pages (but page not in pte yet)
++ *          clear map->faults
++ *      releases mm_lock
++ * (back to X process)
++ *     put page in X's pte
++ *
++ * Oh well, we wont be updating the writes to this page anytime soon.
++ */
++
++static int xenfb_fps = 20;
++static unsigned long xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8;
++
++static int xenfb_remove(struct xenbus_device *);
++static void xenfb_init_shared_page(struct xenfb_info *);
++static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
++static void xenfb_disconnect_backend(struct xenfb_info *);
++
++static void xenfb_do_update(struct xenfb_info *info,
++			    int x, int y, int w, int h)
++{
++	union xenfb_out_event event;
++	__u32 prod;
++
++	event.type = XENFB_TYPE_UPDATE;
++	event.update.x = x;
++	event.update.y = y;
++	event.update.width = w;
++	event.update.height = h;
++
++	prod = info->page->out_prod;
++	/* caller ensures !xenfb_queue_full() */
++	mb();			/* ensure ring space available */
++	XENFB_OUT_RING_REF(info->page, prod) = event;
++	wmb();			/* ensure ring contents visible */
++	info->page->out_prod = prod + 1;
++
++	notify_remote_via_irq(info->irq);
++}
++
++static int xenfb_queue_full(struct xenfb_info *info)
++{
++	__u32 cons, prod;
++
++	prod = info->page->out_prod;
++	cons = info->page->out_cons;
++	return prod - cons == XENFB_OUT_RING_LEN;
++}
++
++static void xenfb_update_screen(struct xenfb_info *info)
++{
++	unsigned long flags;
++	int y1, y2, x1, x2;
++	struct xenfb_mapping *map;
++
++	if (!info->update_wanted)
++		return;
++	if (xenfb_queue_full(info))
++		return;
++
++	mutex_lock(&info->mm_lock);
++
++	spin_lock_irqsave(&info->dirty_lock, flags);
++	y1 = info->y1;
++	y2 = info->y2;
++	x1 = info->x1;
++	x2 = info->x2;
++	info->x1 = info->y1 = INT_MAX;
++	info->x2 = info->y2 = 0;
++	spin_unlock_irqrestore(&info->dirty_lock, flags);
++
++	list_for_each_entry(map, &info->mappings, link) {
++		if (!map->faults)
++			continue;
++		zap_page_range(map->vma, map->vma->vm_start,
++			       map->vma->vm_end - map->vma->vm_start, NULL);
++		map->faults = 0;
++	}
++
++	mutex_unlock(&info->mm_lock);
++
++	xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1);
++}
++
++static int xenfb_thread(void *data)
++{
++	struct xenfb_info *info = data;
++
++	while (!kthread_should_stop()) {
++		if (info->dirty) {
++			info->dirty = 0;
++			xenfb_update_screen(info);
++		}
++		wait_event_interruptible(info->wq,
++			kthread_should_stop() || info->dirty);
++		try_to_freeze();
++	}
++	return 0;
++}
++
++static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green,
++			   unsigned blue, unsigned transp,
++			   struct fb_info *info)
++{
++	u32 v;
++
++	if (regno > info->cmap.len)
++		return 1;
++
++	red   >>= (16 - info->var.red.length);
++	green >>= (16 - info->var.green.length);
++	blue  >>= (16 - info->var.blue.length);
++
++	v = (red << info->var.red.offset) |
++	    (green << info->var.green.offset) |
++	    (blue << info->var.blue.offset);
++
++	/* FIXME is this sane?  check against xxxfb_setcolreg()!  */
++	switch (info->var.bits_per_pixel) {
++	case 16:
++	case 24:
++	case 32:
++		((u32 *)info->pseudo_palette)[regno] = v;
++		break;
++	}
++	
++	return 0;
++}
++
++static void xenfb_timer(unsigned long data)
++{
++	struct xenfb_info *info = (struct xenfb_info *)data;
++	info->dirty = 1;
++	wake_up(&info->wq);
++}
++
++static void __xenfb_refresh(struct xenfb_info *info,
++			    int x1, int y1, int w, int h)
++{
++	int y2, x2;
++
++	y2 = y1 + h;
++	x2 = x1 + w;
++
++	if (info->y1 > y1)
++		info->y1 = y1;
++	if (info->y2 < y2)
++		info->y2 = y2;
++	if (info->x1 > x1)
++		info->x1 = x1;
++	if (info->x2 < x2)
++		info->x2 = x2;
++
++	if (timer_pending(&info->refresh))
++		return;
++
++	mod_timer(&info->refresh, jiffies + HZ/xenfb_fps);
++}
++
++static void xenfb_refresh(struct xenfb_info *info,
++			  int x1, int y1, int w, int h)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&info->dirty_lock, flags);
++	__xenfb_refresh(info, x1, y1, w, h);
++	spin_unlock_irqrestore(&info->dirty_lock, flags);
++}
++
++static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
++{
++	struct xenfb_info *info = p->par;
++
++	cfb_fillrect(p, rect);
++	xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height);
++}
++
++static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image)
++{
++	struct xenfb_info *info = p->par;
++
++	cfb_imageblit(p, image);
++	xenfb_refresh(info, image->dx, image->dy, image->width, image->height);
++}
++
++static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
++{
++	struct xenfb_info *info = p->par;
++
++	cfb_copyarea(p, area);
++	xenfb_refresh(info, area->dx, area->dy, area->width, area->height);
++}
++
++static void xenfb_vm_open(struct vm_area_struct *vma)
++{
++	struct xenfb_mapping *map = vma->vm_private_data;
++	atomic_inc(&map->map_refs);
++}
++
++static void xenfb_vm_close(struct vm_area_struct *vma)
++{
++	struct xenfb_mapping *map = vma->vm_private_data;
++	struct xenfb_info *info = map->info;
++
++	mutex_lock(&info->mm_lock);
++	if (atomic_dec_and_test(&map->map_refs)) {
++		list_del(&map->link);
++		kfree(map);
++	}
++	mutex_unlock(&info->mm_lock);
++}
++
++static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
++				    unsigned long vaddr, int *type)
++{
++	struct xenfb_mapping *map = vma->vm_private_data;
++	struct xenfb_info *info = map->info;
++	int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
++	unsigned long flags;
++	struct page *page;
++	int y1, y2;
++
++	if (pgnr >= info->nr_pages)
++		return NOPAGE_SIGBUS;
++
++	mutex_lock(&info->mm_lock);
++	spin_lock_irqsave(&info->dirty_lock, flags);
++	page = info->pages[pgnr];
++	get_page(page);
++	map->faults++;
++
++	y1 = pgnr * PAGE_SIZE / info->fb_info->fix.line_length;
++	y2 = (pgnr * PAGE_SIZE + PAGE_SIZE - 1) / info->fb_info->fix.line_length;
++	if (y2 > info->fb_info->var.yres)
++		y2 = info->fb_info->var.yres;
++	__xenfb_refresh(info, 0, y1, info->fb_info->var.xres, y2 - y1);
++	spin_unlock_irqrestore(&info->dirty_lock, flags);
++	mutex_unlock(&info->mm_lock);
++
++	if (type)
++		*type = VM_FAULT_MINOR;
++
++	return page;
++}
++
++static struct vm_operations_struct xenfb_vm_ops = {
++	.open	= xenfb_vm_open,
++	.close	= xenfb_vm_close,
++	.nopage	= xenfb_vm_nopage,
++};
++
++static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
++{
++	struct xenfb_info *info = fb_info->par;
++	struct xenfb_mapping *map;
++	int map_pages;
++
++	if (!(vma->vm_flags & VM_WRITE))
++		return -EINVAL;
++	if (!(vma->vm_flags & VM_SHARED))
++		return -EINVAL;
++	if (vma->vm_pgoff != 0)
++		return -EINVAL;
++
++	map_pages = (vma->vm_end - vma->vm_start + PAGE_SIZE-1) >> PAGE_SHIFT;
++	if (map_pages > info->nr_pages)
++		return -EINVAL;
++
++	map = kzalloc(sizeof(*map), GFP_KERNEL);
++	if (map == NULL)
++		return -ENOMEM;
++
++	map->vma = vma;
++	map->faults = 0;
++	map->info = info;
++	atomic_set(&map->map_refs, 1);
++
++	mutex_lock(&info->mm_lock);
++	list_add(&map->link, &info->mappings);
++	mutex_unlock(&info->mm_lock);
++
++	vma->vm_ops = &xenfb_vm_ops;
++	vma->vm_flags |= (VM_DONTEXPAND | VM_RESERVED);
++	vma->vm_private_data = map;
++
++	return 0;
++}
++
++static struct fb_ops xenfb_fb_ops = {
++	.owner		= THIS_MODULE,
++	.fb_setcolreg	= xenfb_setcolreg,
++	.fb_fillrect	= xenfb_fillrect,
++	.fb_copyarea	= xenfb_copyarea,
++	.fb_imageblit	= xenfb_imageblit,
++	.fb_mmap	= xenfb_mmap,
++};
++
++static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
++				       struct pt_regs *regs)
++{
++	/*
++	 * No in events recognized, simply ignore them all.
++	 * If you need to recognize some, see xenbkd's input_handler()
++	 * for how to do that.
++	 */
++	struct xenfb_info *info = dev_id;
++	struct xenfb_page *page = info->page;
++
++	if (page->in_cons != page->in_prod) {
++		info->page->in_cons = info->page->in_prod;
++		notify_remote_via_irq(info->irq);
++	}
++	return IRQ_HANDLED;
++}
++
++static unsigned long vmalloc_to_mfn(void *address)
++{
++	return pfn_to_mfn(vmalloc_to_pfn(address));
++}
++
++static int __devinit xenfb_probe(struct xenbus_device *dev,
++				 const struct xenbus_device_id *id)
++{
++	struct xenfb_info *info;
++	struct fb_info *fb_info;
++	int ret;
++
++	info = kzalloc(sizeof(*info), GFP_KERNEL);
++	if (info == NULL) {
++		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
++		return -ENOMEM;
++	}
++	dev->dev.driver_data = info;
++	info->xbdev = dev;
++	info->irq = -1;
++	info->x1 = info->y1 = INT_MAX;
++	spin_lock_init(&info->dirty_lock);
++	mutex_init(&info->mm_lock);
++	init_waitqueue_head(&info->wq);
++	init_timer(&info->refresh);
++	info->refresh.function = xenfb_timer;
++	info->refresh.data = (unsigned long)info;
++	INIT_LIST_HEAD(&info->mappings);
++
++	info->fb = vmalloc(xenfb_mem_len);
++	if (info->fb == NULL)
++		goto error_nomem;
++	memset(info->fb, 0, xenfb_mem_len);
++
++	info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
++
++	info->pages = kmalloc(sizeof(struct page *) * info->nr_pages,
++			      GFP_KERNEL);
++	if (info->pages == NULL)
++		goto error_nomem;
++
++	info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
++	if (!info->mfns)
++		goto error_nomem;
++
++	/* set up shared page */
++	info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
++	if (!info->page)
++		goto error_nomem;
++
++	xenfb_init_shared_page(info);
++
++	fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
++				/* see fishy hackery below */
++	if (fb_info == NULL)
++		goto error_nomem;
++
++	/* FIXME fishy hackery */
++	fb_info->pseudo_palette = fb_info->par;
++	fb_info->par = info;
++	/* /FIXME */
++	fb_info->screen_base = info->fb;
++
++	fb_info->fbops = &xenfb_fb_ops;
++	fb_info->var.xres_virtual = fb_info->var.xres = info->page->width;
++	fb_info->var.yres_virtual = fb_info->var.yres = info->page->height;
++	fb_info->var.bits_per_pixel = info->page->depth;
++
++	fb_info->var.red = (struct fb_bitfield){16, 8, 0};
++	fb_info->var.green = (struct fb_bitfield){8, 8, 0};
++	fb_info->var.blue = (struct fb_bitfield){0, 8, 0};
++
++	fb_info->var.activate = FB_ACTIVATE_NOW;
++	fb_info->var.height = -1;
++	fb_info->var.width = -1;
++	fb_info->var.vmode = FB_VMODE_NONINTERLACED;
++
++	fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
++	fb_info->fix.line_length = info->page->line_length;
++	fb_info->fix.smem_start = 0;
++	fb_info->fix.smem_len = xenfb_mem_len;
++	strcpy(fb_info->fix.id, "xen");
++	fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
++	fb_info->fix.accel = FB_ACCEL_NONE;
++
++	fb_info->flags = FBINFO_FLAG_DEFAULT;
++
++	ret = fb_alloc_cmap(&fb_info->cmap, 256, 0);
++	if (ret < 0) {
++		framebuffer_release(fb_info);
++		xenbus_dev_fatal(dev, ret, "fb_alloc_cmap");
++		goto error;
++	}
++
++	ret = register_framebuffer(fb_info);
++	if (ret) {
++		fb_dealloc_cmap(&info->fb_info->cmap);
++		framebuffer_release(fb_info);
++		xenbus_dev_fatal(dev, ret, "register_framebuffer");
++		goto error;
++	}
++	info->fb_info = fb_info;
++
++	/* FIXME should this be delayed until backend XenbusStateConnected? */
++	info->kthread = kthread_run(xenfb_thread, info, "xenfb thread");
++	if (IS_ERR(info->kthread)) {
++		ret = PTR_ERR(info->kthread);
++		info->kthread = NULL;
++		xenbus_dev_fatal(dev, ret, "register_framebuffer");
++		goto error;
++	}
++
++	ret = xenfb_connect_backend(dev, info);
++	if (ret < 0)
++		goto error;
++
++	return 0;
++
++ error_nomem:
++	ret = -ENOMEM;
++	xenbus_dev_fatal(dev, ret, "allocating device memory");
++ error:
++	xenfb_remove(dev);
++	return ret;
++}
++
++static int xenfb_resume(struct xenbus_device *dev)
++{
++	struct xenfb_info *info = dev->dev.driver_data;
++
++	xenfb_disconnect_backend(info);
++	xenfb_init_shared_page(info);
++	return xenfb_connect_backend(dev, info);
++}
++
++static int xenfb_remove(struct xenbus_device *dev)
++{
++	struct xenfb_info *info = dev->dev.driver_data;
++
++	del_timer(&info->refresh);
++	if (info->kthread)
++		kthread_stop(info->kthread);
++	xenfb_disconnect_backend(info);
++	if (info->fb_info) {
++		unregister_framebuffer(info->fb_info);
++		fb_dealloc_cmap(&info->fb_info->cmap);
++		framebuffer_release(info->fb_info);
++	}
++	free_page((unsigned long)info->page);
++	vfree(info->mfns);
++	kfree(info->pages);
++	vfree(info->fb);
++	kfree(info);
++
++	return 0;
++}
++
++static void xenfb_init_shared_page(struct xenfb_info *info)
++{
++	int i;
++
++	for (i = 0; i < info->nr_pages; i++)
++		info->pages[i] = vmalloc_to_page(info->fb + i * PAGE_SIZE);
++
++	for (i = 0; i < info->nr_pages; i++)
++		info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
++
++	info->page->pd[0] = vmalloc_to_mfn(info->mfns);
++	info->page->pd[1] = 0;
++	info->page->width = XENFB_WIDTH;
++	info->page->height = XENFB_HEIGHT;
++	info->page->depth = XENFB_DEPTH;
++	info->page->line_length = (info->page->depth / 8) * info->page->width;
++	info->page->mem_length = xenfb_mem_len;
++	info->page->in_cons = info->page->in_prod = 0;
++	info->page->out_cons = info->page->out_prod = 0;
++}
++
++static int xenfb_connect_backend(struct xenbus_device *dev,
++				 struct xenfb_info *info)
++{
++	int ret;
++	struct xenbus_transaction xbt;
++
++	ret = bind_listening_port_to_irqhandler(
++		dev->otherend_id, xenfb_event_handler, 0, "xenfb", info);
++	if (ret < 0) {
++		xenbus_dev_fatal(dev, ret,
++				 "bind_listening_port_to_irqhandler");
++		return ret;
++	}
++	info->irq = ret;
++
++ again:
++	ret = xenbus_transaction_start(&xbt);
++	if (ret) {
++		xenbus_dev_fatal(dev, ret, "starting transaction");
++		return ret;
++	}
++	ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
++			    virt_to_mfn(info->page));
++	if (ret)
++		goto error_xenbus;
++	ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
++			    irq_to_evtchn_port(info->irq));
++	if (ret)
++		goto error_xenbus;
++	ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
++			    XEN_IO_PROTO_ABI_NATIVE);
++	if (ret)
++		goto error_xenbus;
++	ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1");
++	if (ret)
++		goto error_xenbus;
++	ret = xenbus_transaction_end(xbt, 0);
++	if (ret) {
++		if (ret == -EAGAIN)
++			goto again;
++		xenbus_dev_fatal(dev, ret, "completing transaction");
++		return ret;
++	}
++
++	xenbus_switch_state(dev, XenbusStateInitialised);
++	return 0;
++
++ error_xenbus:
++	xenbus_transaction_end(xbt, 1);
++	xenbus_dev_fatal(dev, ret, "writing xenstore");
++	return ret;
++}
++
++static void xenfb_disconnect_backend(struct xenfb_info *info)
++{
++	if (info->irq >= 0)
++		unbind_from_irqhandler(info->irq, info);
++	info->irq = -1;
++}
++
++static void xenfb_backend_changed(struct xenbus_device *dev,
++				  enum xenbus_state backend_state)
++{
++	struct xenfb_info *info = dev->dev.driver_data;
++	int val;
++
++	switch (backend_state) {
++	case XenbusStateInitialising:
++	case XenbusStateInitialised:
++	case XenbusStateUnknown:
++	case XenbusStateClosed:
++		break;
++
++	case XenbusStateInitWait:
++	InitWait:
++		xenbus_switch_state(dev, XenbusStateConnected);
++		break;
++
++	case XenbusStateConnected:
++		/*
++		 * Work around xenbus race condition: If backend goes
++		 * through InitWait to Connected fast enough, we can
++		 * get Connected twice here.
++		 */
++		if (dev->state != XenbusStateConnected)
++			goto InitWait; /* no InitWait seen yet, fudge it */
++
++		if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
++				 "request-update", "%d", &val) < 0)
++			val = 0;
++		if (val)
++			info->update_wanted = 1;
++		break;
++
++	case XenbusStateClosing:
++		// FIXME is this safe in any dev->state?
++		xenbus_frontend_closed(dev);
++		break;
++	}
++}
++
++static struct xenbus_device_id xenfb_ids[] = {
++	{ "vfb" },
++	{ "" }
++};
++
++static struct xenbus_driver xenfb = {
++	.name = "vfb",
++	.owner = THIS_MODULE,
++	.ids = xenfb_ids,
++	.probe = xenfb_probe,
++	.remove = xenfb_remove,
++	.resume = xenfb_resume,
++	.otherend_changed = xenfb_backend_changed,
++};
++
++static int __init xenfb_init(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	/* Nothing to do if running in dom0. */
++	if (is_initial_xendomain())
++		return -ENODEV;
++
++	return xenbus_register_frontend(&xenfb);
++}
++
++static void __exit xenfb_cleanup(void)
++{
++	return xenbus_unregister_driver(&xenfb);
++}
++
++module_init(xenfb_init);
++module_exit(xenfb_cleanup);
++
++MODULE_LICENSE("GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/fbfront/xenkbd.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,333 @@
++/*
++ * linux/drivers/input/keyboard/xenkbd.c -- Xen para-virtual input device
++ *
++ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
++ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
++ *
++ *  Based on linux/drivers/input/mouse/sermouse.c
++ *
++ *  This file is subject to the terms and conditions of the GNU General Public
++ *  License. See the file COPYING in the main directory of this archive for
++ *  more details.
++ */
++
++/*
++ * TODO:
++ *
++ * Switch to grant tables together with xenfb.c.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/module.h>
++#include <linux/input.h>
++#include <asm/hypervisor.h>
++#include <xen/evtchn.h>
++#include <xen/interface/io/fbif.h>
++#include <xen/interface/io/kbdif.h>
++#include <xen/xenbus.h>
++
++struct xenkbd_info
++{
++	struct input_dev *kbd;
++	struct input_dev *ptr;
++	struct xenkbd_page *page;
++	int irq;
++	struct xenbus_device *xbdev;
++	char phys[32];
++};
++
++static int xenkbd_remove(struct xenbus_device *);
++static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
++static void xenkbd_disconnect_backend(struct xenkbd_info *);
++
++/*
++ * Note: if you need to send out events, see xenfb_do_update() for how
++ * to do that.
++ */
++
++static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
++{
++	struct xenkbd_info *info = dev_id;
++	struct xenkbd_page *page = info->page;
++	__u32 cons, prod;
++
++	prod = page->in_prod;
++	if (prod == page->out_cons)
++		return IRQ_HANDLED;
++	rmb();			/* ensure we see ring contents up to prod */
++	for (cons = page->in_cons; cons != prod; cons++) {
++		union xenkbd_in_event *event;
++		struct input_dev *dev;
++		event = &XENKBD_IN_RING_REF(page, cons);
++
++		dev = info->ptr;
++		switch (event->type) {
++		case XENKBD_TYPE_MOTION:
++			input_report_rel(dev, REL_X, event->motion.rel_x);
++			input_report_rel(dev, REL_Y, event->motion.rel_y);
++			break;
++		case XENKBD_TYPE_KEY:
++			dev = NULL;
++			if (test_bit(event->key.keycode, info->kbd->keybit))
++				dev = info->kbd;
++			if (test_bit(event->key.keycode, info->ptr->keybit))
++				dev = info->ptr;
++			if (dev)
++				input_report_key(dev, event->key.keycode,
++						 event->key.pressed);
++			else
++				printk("xenkbd: unhandled keycode 0x%x\n",
++				       event->key.keycode);
++			break;
++		case XENKBD_TYPE_POS:
++			input_report_abs(dev, ABS_X, event->pos.abs_x);
++			input_report_abs(dev, ABS_Y, event->pos.abs_y);
++			break;
++		}
++		if (dev)
++			input_sync(dev);
++	}
++	mb();			/* ensure we got ring contents */
++	page->in_cons = cons;
++	notify_remote_via_irq(info->irq);
++
++	return IRQ_HANDLED;
++}
++
++int __devinit xenkbd_probe(struct xenbus_device *dev,
++			   const struct xenbus_device_id *id)
++{
++	int ret, i;
++	struct xenkbd_info *info;
++	struct input_dev *kbd, *ptr;
++
++	info = kzalloc(sizeof(*info), GFP_KERNEL);
++	if (!info) {
++		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
++		return -ENOMEM;
++	}
++	dev->dev.driver_data = info;
++	info->xbdev = dev;
++	snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
++
++	info->page = (void *)__get_free_page(GFP_KERNEL);
++	if (!info->page)
++		goto error_nomem;
++	info->page->in_cons = info->page->in_prod = 0;
++	info->page->out_cons = info->page->out_prod = 0;
++
++	/* keyboard */
++	kbd = input_allocate_device();
++	if (!kbd)
++		goto error_nomem;
++	kbd->name = "Xen Virtual Keyboard";
++	kbd->phys = info->phys;
++	kbd->id.bustype = BUS_PCI;
++	kbd->id.vendor = 0x5853;
++	kbd->id.product = 0xffff;
++	kbd->evbit[0] = BIT(EV_KEY);
++	for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
++		set_bit(i, kbd->keybit);
++	for (i = KEY_OK; i < KEY_MAX; i++)
++		set_bit(i, kbd->keybit);
++
++	ret = input_register_device(kbd);
++	if (ret) {
++		input_free_device(kbd);
++		xenbus_dev_fatal(dev, ret, "input_register_device(kbd)");
++		goto error;
++	}
++	info->kbd = kbd;
++
++	/* pointing device */
++	ptr = input_allocate_device();
++	if (!ptr)
++		goto error_nomem;
++	ptr->name = "Xen Virtual Pointer";
++	ptr->phys = info->phys;
++	ptr->id.bustype = BUS_PCI;
++	ptr->id.vendor = 0x5853;
++	ptr->id.product = 0xfffe;
++	ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
++	for (i = BTN_LEFT; i <= BTN_TASK; i++)
++		set_bit(i, ptr->keybit);
++	ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y);
++	input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
++	input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
++
++	ret = input_register_device(ptr);
++	if (ret) {
++		input_free_device(ptr);
++		xenbus_dev_fatal(dev, ret, "input_register_device(ptr)");
++		goto error;
++	}
++	info->ptr = ptr;
++
++	ret = xenkbd_connect_backend(dev, info);
++	if (ret < 0)
++		goto error;
++
++	return 0;
++
++ error_nomem:
++	ret = -ENOMEM;
++	xenbus_dev_fatal(dev, ret, "allocating device memory");
++ error:
++	xenkbd_remove(dev);
++	return ret;
++}
++
++static int xenkbd_resume(struct xenbus_device *dev)
++{
++	struct xenkbd_info *info = dev->dev.driver_data;
++
++	xenkbd_disconnect_backend(info);
++	return xenkbd_connect_backend(dev, info);
++}
++
++static int xenkbd_remove(struct xenbus_device *dev)
++{
++	struct xenkbd_info *info = dev->dev.driver_data;
++
++	xenkbd_disconnect_backend(info);
++	input_unregister_device(info->kbd);
++	input_unregister_device(info->ptr);
++	free_page((unsigned long)info->page);
++	kfree(info);
++	return 0;
++}
++
++static int xenkbd_connect_backend(struct xenbus_device *dev,
++				  struct xenkbd_info *info)
++{
++	int ret;
++	struct xenbus_transaction xbt;
++
++	ret = bind_listening_port_to_irqhandler(
++		dev->otherend_id, input_handler, 0, "xenkbd", info);
++	if (ret < 0) {
++		xenbus_dev_fatal(dev, ret,
++				 "bind_listening_port_to_irqhandler");
++		return ret;
++	}
++	info->irq = ret;
++
++ again:
++	ret = xenbus_transaction_start(&xbt);
++	if (ret) {
++		xenbus_dev_fatal(dev, ret, "starting transaction");
++		return ret;
++	}
++	ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
++			    virt_to_mfn(info->page));
++	if (ret)
++		goto error_xenbus;
++	ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
++			    irq_to_evtchn_port(info->irq));
++	if (ret)
++		goto error_xenbus;
++	ret = xenbus_transaction_end(xbt, 0);
++	if (ret) {
++		if (ret == -EAGAIN)
++			goto again;
++		xenbus_dev_fatal(dev, ret, "completing transaction");
++		return ret;
++	}
++
++	xenbus_switch_state(dev, XenbusStateInitialised);
++	return 0;
++
++ error_xenbus:
++	xenbus_transaction_end(xbt, 1);
++	xenbus_dev_fatal(dev, ret, "writing xenstore");
++	return ret;
++}
++
++static void xenkbd_disconnect_backend(struct xenkbd_info *info)
++{
++	if (info->irq >= 0)
++		unbind_from_irqhandler(info->irq, info);
++	info->irq = -1;
++}
++
++static void xenkbd_backend_changed(struct xenbus_device *dev,
++				   enum xenbus_state backend_state)
++{
++	struct xenkbd_info *info = dev->dev.driver_data;
++	int ret, val;
++
++	switch (backend_state) {
++	case XenbusStateInitialising:
++	case XenbusStateInitialised:
++	case XenbusStateUnknown:
++	case XenbusStateClosed:
++		break;
++
++	case XenbusStateInitWait:
++	InitWait:
++		ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
++				   "feature-abs-pointer", "%d", &val);
++		if (ret < 0)
++			val = 0;
++		if (val) {
++			ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
++					    "request-abs-pointer", "1");
++			if (ret)
++				; /* FIXME */
++		}
++		xenbus_switch_state(dev, XenbusStateConnected);
++		break;
++
++	case XenbusStateConnected:
++		/*
++		 * Work around xenbus race condition: If backend goes
++		 * through InitWait to Connected fast enough, we can
++		 * get Connected twice here.
++		 */
++		if (dev->state != XenbusStateConnected)
++			goto InitWait; /* no InitWait seen yet, fudge it */
++		break;
++
++	case XenbusStateClosing:
++		xenbus_frontend_closed(dev);
++		break;
++	}
++}
++
++static struct xenbus_device_id xenkbd_ids[] = {
++	{ "vkbd" },
++	{ "" }
++};
++
++static struct xenbus_driver xenkbd = {
++	.name = "vkbd",
++	.owner = THIS_MODULE,
++	.ids = xenkbd_ids,
++	.probe = xenkbd_probe,
++	.remove = xenkbd_remove,
++	.resume = xenkbd_resume,
++	.otherend_changed = xenkbd_backend_changed,
++};
++
++static int __init xenkbd_init(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	/* Nothing to do if running in dom0. */
++	if (is_initial_xendomain())
++		return -ENODEV;
++
++	return xenbus_register_frontend(&xenkbd);
++}
++
++static void __exit xenkbd_cleanup(void)
++{
++	return xenbus_unregister_driver(&xenkbd);
++}
++
++module_init(xenkbd_init);
++module_exit(xenkbd_cleanup);
++
++MODULE_LICENSE("GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/gntdev/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1 @@
++obj-y	:= gntdev.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/gntdev/gntdev.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,973 @@
++/******************************************************************************
++ * gntdev.c
++ * 
++ * Device for accessing (in user-space) pages that have been granted by other
++ * domains.
++ *
++ * Copyright (c) 2006-2007, D G Murray.
++ * 
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ * 
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ */
++
++#include <asm/atomic.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/fs.h>
++#include <linux/device.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <asm/uaccess.h>
++#include <asm/io.h>
++#include <xen/gnttab.h>
++#include <asm/hypervisor.h>
++#include <xen/balloon.h>
++#include <xen/evtchn.h>
++#include <xen/driver_util.h>
++
++#include <linux/types.h>
++#include <xen/public/gntdev.h>
++
++
++#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>"
++#define DRIVER_DESC   "User-space granted page access driver"
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR(DRIVER_AUTHOR);
++MODULE_DESCRIPTION(DRIVER_DESC);
++
++#define MAX_GRANTS 128
++
++/* A slot can be in one of three states:
++ *
++ * 0. GNTDEV_SLOT_INVALID:
++ *    This slot is not associated with a grant reference, and is therefore free
++ *    to be overwritten by a new grant reference.
++ *
++ * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
++ *    This slot is associated with a grant reference (via the 
++ *    IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
++ *
++ * 2. GNTDEV_SLOT_MAPPED:
++ *    This slot is associated with a grant reference, and has been mmap()-ed.
++ */
++typedef enum gntdev_slot_state {
++	GNTDEV_SLOT_INVALID = 0,
++	GNTDEV_SLOT_NOT_YET_MAPPED,
++	GNTDEV_SLOT_MAPPED
++} gntdev_slot_state_t;
++
++#define GNTDEV_INVALID_HANDLE    -1
++#define GNTDEV_FREE_LIST_INVALID -1
++/* Each opened instance of gntdev is associated with a list of grants,
++ * represented by an array of elements of the following type,
++ * gntdev_grant_info_t.
++ */
++typedef struct gntdev_grant_info {
++	gntdev_slot_state_t state;
++	union {
++		uint32_t free_list_index;
++		struct {
++			domid_t domid;
++			grant_ref_t ref;
++			grant_handle_t kernel_handle;
++			grant_handle_t user_handle;
++			uint64_t dev_bus_addr;
++		} valid;
++	} u;
++} gntdev_grant_info_t;
++
++/* Private data structure, which is stored in the file pointer for files
++ * associated with this device.
++ */
++typedef struct gntdev_file_private_data {
++  
++	/* Array of grant information. */
++	gntdev_grant_info_t grants[MAX_GRANTS];
++
++	/* Read/write semaphore used to protect the grants array. */
++	struct rw_semaphore grants_sem;
++
++	/* An array of indices of free slots in the grants array.
++	 * N.B. An entry in this list may temporarily have the value
++	 * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
++	 * from the list by the contiguous allocator, but the list has not yet
++	 * been compressed. However, this is not visible across invocations of
++	 * the device.
++	 */
++	int32_t free_list[MAX_GRANTS];
++	
++	/* The number of free slots in the grants array. */
++	uint32_t free_list_size;
++
++	/* Read/write semaphore used to protect the free list. */
++	struct rw_semaphore free_list_sem;
++	
++	/* Index of the next slot after the most recent contiguous allocation, 
++	 * for use in a next-fit allocator.
++	 */
++	uint32_t next_fit_index;
++
++	/* Used to map grants into the kernel, before mapping them into user
++	 * space.
++	 */
++	struct page **foreign_pages;
++
++} gntdev_file_private_data_t;
++
++/* Module lifecycle operations. */
++static int __init gntdev_init(void);
++static void __exit gntdev_exit(void);
++
++module_init(gntdev_init);
++module_exit(gntdev_exit);
++
++/* File operations. */
++static int gntdev_open(struct inode *inode, struct file *flip);
++static int gntdev_release(struct inode *inode, struct file *flip);
++static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
++static int gntdev_ioctl (struct inode *inode, struct file *flip,
++			 unsigned int cmd, unsigned long arg);
++
++static struct file_operations gntdev_fops = {
++	.owner = THIS_MODULE,
++	.open = gntdev_open,
++	.release = gntdev_release,
++	.mmap = gntdev_mmap,
++	.ioctl = gntdev_ioctl
++};
++
++/* VM operations. */
++static void gntdev_vma_close(struct vm_area_struct *vma);
++static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
++			      pte_t *ptep, int is_fullmm);
++
++static struct vm_operations_struct gntdev_vmops = {
++	.close = gntdev_vma_close,
++	.zap_pte = gntdev_clear_pte
++};
++
++/* Global variables. */
++
++/* The driver major number, for use when unregistering the driver. */
++static int gntdev_major;
++
++#define GNTDEV_NAME "gntdev"
++
++/* Memory mapping functions
++ * ------------------------
++ *
++ * Every granted page is mapped into both kernel and user space, and the two
++ * following functions return the respective virtual addresses of these pages.
++ *
++ * When shadow paging is disabled, the granted page is mapped directly into
++ * user space; when it is enabled, it is mapped into the kernel and remapped
++ * into user space using vm_insert_page() (see gntdev_mmap(), below).
++ */
++
++/* Returns the virtual address (in user space) of the @page_index'th page
++ * in the given VM area.
++ */
++static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
++					    int page_index)
++{
++	return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
++}
++
++/* Returns the virtual address (in kernel space) of the @slot_index'th page
++ * mapped by the gntdev instance that owns the given private data struct.
++ */
++static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
++					      int slot_index)
++{
++	unsigned long pfn;
++	void *kaddr;
++	pfn = page_to_pfn(priv->foreign_pages[slot_index]);
++	kaddr = pfn_to_kaddr(pfn);
++	return (unsigned long) kaddr;
++}
++
++/* Helper functions. */
++
++/* Adds information about a grant reference to the list of grants in the file's
++ * private data structure. Returns non-zero on failure. On success, sets the
++ * value of *offset to the offset that should be mmap()-ed in order to map the
++ * grant reference.
++ */
++static int add_grant_reference(struct file *flip,
++			       struct ioctl_gntdev_grant_ref *op,
++			       uint64_t *offset)
++{
++	gntdev_file_private_data_t *private_data 
++		= (gntdev_file_private_data_t *) flip->private_data;
++
++	uint32_t slot_index;
++
++	if (unlikely(private_data->free_list_size == 0)) {
++		return -ENOMEM;
++	}
++
++	slot_index = private_data->free_list[--private_data->free_list_size];
++
++	/* Copy the grant information into file's private data. */
++	private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
++	private_data->grants[slot_index].u.valid.domid = op->domid;
++	private_data->grants[slot_index].u.valid.ref = op->ref;
++
++	/* The offset is calculated as the index of the chosen entry in the
++	 * file's private data's array of grant information. This is then
++	 * shifted to give an offset into the virtual "file address space".
++	 */
++	*offset = slot_index << PAGE_SHIFT;
++
++	return 0;
++}
++
++/* Adds the @count grant references to the contiguous range in the slot array
++ * beginning at @first_slot. It is assumed that @first_slot was returned by a
++ * previous invocation of find_contiguous_free_range(), during the same
++ * invocation of the driver.
++ */
++static int add_grant_references(struct file *flip,
++				int count,
++				struct ioctl_gntdev_grant_ref *ops,
++				uint32_t first_slot)
++{
++	gntdev_file_private_data_t *private_data 
++		= (gntdev_file_private_data_t *) flip->private_data;
++	int i;
++	
++	for (i = 0; i < count; ++i) {
++
++		/* First, mark the slot's entry in the free list as invalid. */
++		int free_list_index = 
++			private_data->grants[first_slot+i].u.free_list_index;
++		private_data->free_list[free_list_index] = 
++			GNTDEV_FREE_LIST_INVALID;
++
++		/* Now, update the slot. */
++		private_data->grants[first_slot+i].state = 
++			GNTDEV_SLOT_NOT_YET_MAPPED;
++		private_data->grants[first_slot+i].u.valid.domid =
++			ops[i].domid;
++		private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
++	}
++
++	return 0;	
++}
++
++/* Scans through the free list for @flip, removing entries that are marked as
++ * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
++ * the number of valid entries.
++ */
++static void compress_free_list(struct file *flip) 
++{
++	gntdev_file_private_data_t *private_data 
++		= (gntdev_file_private_data_t *) flip->private_data;
++	int i, j = 0, old_size;
++	
++	old_size = private_data->free_list_size;
++	for (i = 0; i < old_size; ++i) {
++		if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
++			private_data->free_list[j] = 
++				private_data->free_list[i];
++			++j;
++		} else {
++			--private_data->free_list_size;
++		}
++	}
++}
++
++/* Searches the grant array in the private data of @flip for a range of
++ * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
++ *
++ * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
++ */
++static int find_contiguous_free_range(struct file *flip,
++				      uint32_t num_slots) 
++{
++	gntdev_file_private_data_t *private_data 
++		= (gntdev_file_private_data_t *) flip->private_data;
++	
++	int i;
++	int start_index = private_data->next_fit_index;
++	int range_start = 0, range_length;
++
++	if (private_data->free_list_size < num_slots) {
++		return -ENOMEM;
++	}
++
++	/* First search from the start_index to the end of the array. */
++	range_length = 0;
++	for (i = start_index; i < MAX_GRANTS; ++i) {
++		if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
++			if (range_length == 0) {
++				range_start = i;
++			}
++			++range_length;
++			if (range_length == num_slots) {
++				return range_start;
++			}
++		}
++	}
++	
++	/* Now search from the start of the array to the start_index. */
++	range_length = 0;
++	for (i = 0; i < start_index; ++i) {
++		if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
++			if (range_length == 0) {
++				range_start = i;
++			}
++			++range_length;
++			if (range_length == num_slots) {
++				return range_start;
++			}
++		}
++	}
++	
++	return -ENOMEM;
++}
++
++/* Interface functions. */
++
++/* Initialises the driver. Called when the module is loaded. */
++static int __init gntdev_init(void)
++{
++	struct class *class;
++	struct class_device *device;
++
++	if (!is_running_on_xen()) {
++		printk(KERN_ERR "You must be running Xen to use gntdev\n");
++		return -ENODEV;
++	}
++
++	gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
++	if (gntdev_major < 0)
++	{
++		printk(KERN_ERR "Could not register gntdev device\n");
++		return -ENOMEM;
++	}
++
++	/* Note that if the sysfs code fails, we will still initialise the
++	 * device, and output the major number so that the device can be
++	 * created manually using mknod.
++	 */
++	if ((class = get_xen_class()) == NULL) {
++		printk(KERN_ERR "Error setting up xen_class\n");
++		printk(KERN_ERR "gntdev created with major number = %d\n", 
++		       gntdev_major);
++		return 0;
++	}
++
++	device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
++				     NULL, GNTDEV_NAME);
++	if (IS_ERR(device)) {
++		printk(KERN_ERR "Error creating gntdev device in xen_class\n");
++		printk(KERN_ERR "gntdev created with major number = %d\n",
++		       gntdev_major);
++		return 0;
++	}
++
++	return 0;
++}
++
++/* Cleans up and unregisters the driver. Called when the driver is unloaded.
++ */
++static void __exit gntdev_exit(void)
++{
++	struct class *class;
++	if ((class = get_xen_class()) != NULL)
++		class_device_destroy(class, MKDEV(gntdev_major, 0));
++	unregister_chrdev(gntdev_major, GNTDEV_NAME);
++}
++
++/* Called when the device is opened. */
++static int gntdev_open(struct inode *inode, struct file *flip)
++{
++	gntdev_file_private_data_t *private_data;
++	int i;
++
++	try_module_get(THIS_MODULE);
++
++	/* Allocate space for the per-instance private data. */
++	private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
++	if (!private_data)
++		goto nomem_out;
++
++	/* Allocate space for the kernel-mapping of granted pages. */
++	private_data->foreign_pages = 
++		alloc_empty_pages_and_pagevec(MAX_GRANTS);
++	if (!private_data->foreign_pages)
++		goto nomem_out2;
++
++	/* Initialise the free-list, which contains all slots at first.
++	 */
++	for (i = 0; i < MAX_GRANTS; ++i) {
++		private_data->free_list[MAX_GRANTS - i - 1] = i;
++		private_data->grants[i].state = GNTDEV_SLOT_INVALID;
++		private_data->grants[i].u.free_list_index = MAX_GRANTS - i - 1;
++	}
++	private_data->free_list_size = MAX_GRANTS;
++	private_data->next_fit_index = 0;
++
++	init_rwsem(&private_data->grants_sem);
++	init_rwsem(&private_data->free_list_sem);
++
++	flip->private_data = private_data;
++
++	return 0;
++
++nomem_out2:
++	kfree(private_data);
++nomem_out:
++	return -ENOMEM;
++}
++
++/* Called when the device is closed.
++ */
++static int gntdev_release(struct inode *inode, struct file *flip)
++{
++	if (flip->private_data) {
++		gntdev_file_private_data_t *private_data = 
++			(gntdev_file_private_data_t *) flip->private_data;
++		if (private_data->foreign_pages) {
++			free_empty_pages_and_pagevec
++				(private_data->foreign_pages, MAX_GRANTS);
++		}
++		kfree(private_data);
++	}
++	module_put(THIS_MODULE);
++	return 0;
++}
++
++/* Called when an attempt is made to mmap() the device. The private data from
++ * @flip contains the list of grant references that can be mapped. The vm_pgoff
++ * field of @vma contains the index into that list that refers to the grant
++ * reference that will be mapped. Only mappings that are a multiple of
++ * PAGE_SIZE are handled.
++ */
++static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma) 
++{
++	struct gnttab_map_grant_ref op;
++	unsigned long slot_index = vma->vm_pgoff;
++	unsigned long kernel_vaddr, user_vaddr;
++	uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
++	uint64_t ptep;
++	int ret;
++	int flags;
++	int i;
++	struct page *page;
++	gntdev_file_private_data_t *private_data = flip->private_data;
++
++	if (unlikely(!private_data)) {
++		printk(KERN_ERR "File's private data is NULL.\n");
++		return -EINVAL;
++	}
++
++	if (unlikely((size <= 0) || (size + slot_index) > MAX_GRANTS)) {
++		printk(KERN_ERR "Invalid number of pages or offset"
++		       "(num_pages = %d, first_slot = %ld).\n",
++		       size, slot_index);
++		return -ENXIO;
++	}
++
++	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
++		printk(KERN_ERR "Writable mappings must be shared.\n");
++		return -EINVAL;
++	}
++
++	/* Slots must be in the NOT_YET_MAPPED state. */
++	down_write(&private_data->grants_sem);
++	for (i = 0; i < size; ++i) {
++		if (private_data->grants[slot_index + i].state != 
++		    GNTDEV_SLOT_NOT_YET_MAPPED) {
++			printk(KERN_ERR "Slot (index = %ld) is in the wrong "
++			       "state (%d).\n", slot_index + i, 
++			       private_data->grants[slot_index + i].state);
++			up_write(&private_data->grants_sem);
++			return -EINVAL;
++		}
++	}
++
++	/* Install the hook for unmapping. */
++	vma->vm_ops = &gntdev_vmops;
++    
++	/* The VM area contains pages from another VM. */
++	vma->vm_flags |= VM_FOREIGN;
++	vma->vm_private_data = kzalloc(size * sizeof(struct page_struct *), 
++				       GFP_KERNEL);
++	if (vma->vm_private_data == NULL) {
++		printk(KERN_ERR "Couldn't allocate mapping structure for VM "
++		       "area.\n");
++		return -ENOMEM;
++	}
++
++	/* This flag prevents Bad PTE errors when the memory is unmapped. */
++	vma->vm_flags |= VM_RESERVED;
++
++	/* This flag prevents this VM area being copied on a fork(). A better
++	 * behaviour might be to explicitly carry out the appropriate mappings
++	 * on fork(), but I don't know if there's a hook for this.
++	 */
++	vma->vm_flags |= VM_DONTCOPY;
++
++#ifdef CONFIG_X86
++	/* This flag ensures that the page tables are not unpinned before the
++	 * VM area is unmapped. Therefore Xen still recognises the PTE as
++	 * belonging to an L1 pagetable, and the grant unmap operation will
++	 * succeed, even if the process does not exit cleanly.
++	 */
++	vma->vm_mm->context.has_foreign_mappings = 1;
++#endif
++
++	for (i = 0; i < size; ++i) {
++
++		flags = GNTMAP_host_map;
++		if (!(vma->vm_flags & VM_WRITE))
++			flags |= GNTMAP_readonly;
++
++		kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
++		user_vaddr = get_user_vaddr(vma, i);
++		page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
++
++		gnttab_set_map_op(&op, kernel_vaddr, flags,   
++				  private_data->grants[slot_index+i]
++				  .u.valid.ref, 
++				  private_data->grants[slot_index+i]
++				  .u.valid.domid);
++
++		/* Carry out the mapping of the grant reference. */
++		ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 
++						&op, 1);
++		BUG_ON(ret);
++		if (op.status) {
++			printk(KERN_ERR "Error mapping the grant reference "
++			       "into the kernel (%d). domid = %d; ref = %d\n",
++			       op.status,
++			       private_data->grants[slot_index+i]
++			       .u.valid.domid,
++			       private_data->grants[slot_index+i]
++			       .u.valid.ref);
++			goto undo_map_out;
++		}
++
++		/* Store a reference to the page that will be mapped into user
++		 * space.
++		 */
++		((struct page **) vma->vm_private_data)[i] = page;
++
++		/* Mark mapped page as reserved. */
++		SetPageReserved(page);
++
++		/* Record the grant handle, for use in the unmap operation. */
++		private_data->grants[slot_index+i].u.valid.kernel_handle = 
++			op.handle;
++		private_data->grants[slot_index+i].u.valid.dev_bus_addr = 
++			op.dev_bus_addr;
++		
++		private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
++		private_data->grants[slot_index+i].u.valid.user_handle =
++			GNTDEV_INVALID_HANDLE;
++
++		/* Now perform the mapping to user space. */
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++
++			/* NOT USING SHADOW PAGE TABLES. */
++			/* In this case, we map the grant(s) straight into user
++			 * space.
++			 */
++
++			/* Get the machine address of the PTE for the user 
++			 *  page.
++			 */
++			if ((ret = create_lookup_pte_addr(vma->vm_mm, 
++							  vma->vm_start 
++							  + (i << PAGE_SHIFT), 
++							  &ptep)))
++			{
++				printk(KERN_ERR "Error obtaining PTE pointer "
++				       "(%d).\n", ret);
++				goto undo_map_out;
++			}
++			
++			/* Configure the map operation. */
++		
++			/* The reference is to be used by host CPUs. */
++			flags = GNTMAP_host_map;
++			
++			/* Specifies a user space mapping. */
++			flags |= GNTMAP_application_map;
++			
++			/* The map request contains the machine address of the
++			 * PTE to update.
++			 */
++			flags |= GNTMAP_contains_pte;
++			
++			if (!(vma->vm_flags & VM_WRITE))
++				flags |= GNTMAP_readonly;
++
++			gnttab_set_map_op(&op, ptep, flags, 
++					  private_data->grants[slot_index+i]
++					  .u.valid.ref, 
++					  private_data->grants[slot_index+i]
++					  .u.valid.domid);
++
++			/* Carry out the mapping of the grant reference. */
++			ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
++							&op, 1);
++			BUG_ON(ret);
++			if (op.status) {
++				printk(KERN_ERR "Error mapping the grant "
++				       "reference into user space (%d). domid "
++				       "= %d; ref = %d\n", op.status,
++				       private_data->grants[slot_index+i].u
++				       .valid.domid,
++				       private_data->grants[slot_index+i].u
++				       .valid.ref);
++				goto undo_map_out;
++			}
++			
++			/* Record the grant handle, for use in the unmap 
++			 * operation. 
++			 */
++			private_data->grants[slot_index+i].u.
++				valid.user_handle = op.handle;
++
++			/* Update p2m structure with the new mapping. */
++			set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
++					    FOREIGN_FRAME(private_data->
++							  grants[slot_index+i]
++							  .u.valid.dev_bus_addr
++							  >> PAGE_SHIFT));
++		} else {
++			/* USING SHADOW PAGE TABLES. */
++			/* In this case, we simply insert the page into the VM
++			 * area. */
++			ret = vm_insert_page(vma, user_vaddr, page);
++		}
++
++	}
++
++	up_write(&private_data->grants_sem);
++	return 0;
++
++undo_map_out:
++	/* If we have a mapping failure, the unmapping will be taken care of
++	 * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
++	 * All we need to do here is free the vma_private_data.
++	 */
++	kfree(vma->vm_private_data);
++
++	/* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
++	 * to NULL on failure. However, we need this in gntdev_clear_pte() to
++	 * unmap the grants. Therefore, we smuggle a reference to the file's
++	 * private data in the VM area's private data pointer.
++	 */
++	vma->vm_private_data = private_data;
++	
++	up_write(&private_data->grants_sem);
++
++	return -ENOMEM;
++}
++
++static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
++			      pte_t *ptep, int is_fullmm)
++{
++	int slot_index, ret;
++	pte_t copy;
++	struct gnttab_unmap_grant_ref op;
++	gntdev_file_private_data_t *private_data;
++
++	/* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
++	 * to NULL on failure. However, we need this in gntdev_clear_pte() to
++	 * unmap the grants. Therefore, we smuggle a reference to the file's
++	 * private data in the VM area's private data pointer.
++	 */
++	if (vma->vm_file) {
++		private_data = (gntdev_file_private_data_t *)
++			vma->vm_file->private_data;
++	} else if (vma->vm_private_data) {
++		private_data = (gntdev_file_private_data_t *)
++			vma->vm_private_data;
++	} else {
++		private_data = NULL; /* gcc warning */
++		BUG();
++	}
++
++	/* Copy the existing value of the PTE for returning. */
++	copy = *ptep;
++
++	/* Calculate the grant relating to this PTE. */
++	slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
++
++	/* Only unmap grants if the slot has been mapped. This could be being
++	 * called from a failing mmap().
++	 */
++	if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
++
++		/* First, we clear the user space mapping, if it has been made.
++		 */
++		if (private_data->grants[slot_index].u.valid.user_handle !=
++		    GNTDEV_INVALID_HANDLE && 
++		    !xen_feature(XENFEAT_auto_translated_physmap)) {
++			/* NOT USING SHADOW PAGE TABLES. */
++			gnttab_set_unmap_op(&op, virt_to_machine(ptep), 
++					    GNTMAP_contains_pte,
++					    private_data->grants[slot_index]
++					    .u.valid.user_handle);
++			ret = HYPERVISOR_grant_table_op(
++				GNTTABOP_unmap_grant_ref, &op, 1);
++			BUG_ON(ret);
++			if (op.status)
++				printk("User unmap grant status = %d\n", 
++				       op.status);
++		} else {
++			/* USING SHADOW PAGE TABLES. */
++			pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
++		}
++
++		/* Finally, we unmap the grant from kernel space. */
++		gnttab_set_unmap_op(&op, 
++				    get_kernel_vaddr(private_data, slot_index),
++				    GNTMAP_host_map, 
++				    private_data->grants[slot_index].u.valid
++				    .kernel_handle);
++		ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 
++						&op, 1);
++		BUG_ON(ret);
++		if (op.status)
++			printk("Kernel unmap grant status = %d\n", op.status);
++
++
++		/* Return slot to the not-yet-mapped state, so that it may be
++		 * mapped again, or removed by a subsequent ioctl.
++		 */
++		private_data->grants[slot_index].state = 
++			GNTDEV_SLOT_NOT_YET_MAPPED;
++
++		/* Invalidate the physical to machine mapping for this page. */
++		set_phys_to_machine(__pa(get_kernel_vaddr(private_data, 
++							  slot_index)) 
++				    >> PAGE_SHIFT, INVALID_P2M_ENTRY);
++
++	} else {
++		pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
++	}
++
++	return copy;
++}
++
++/* "Destructor" for a VM area.
++ */
++static void gntdev_vma_close(struct vm_area_struct *vma) {
++	if (vma->vm_private_data) {
++		kfree(vma->vm_private_data);
++	}
++}
++
++/* Called when an ioctl is made on the device.
++ */
++static int gntdev_ioctl(struct inode *inode, struct file *flip,
++			unsigned int cmd, unsigned long arg)
++{
++	int rc = 0;
++	gntdev_file_private_data_t *private_data = 
++		(gntdev_file_private_data_t *) flip->private_data;
++
++	switch (cmd) {
++	case IOCTL_GNTDEV_MAP_GRANT_REF:
++	{
++		struct ioctl_gntdev_map_grant_ref op;
++		down_write(&private_data->grants_sem);
++		down_write(&private_data->free_list_sem);
++
++		if ((rc = copy_from_user(&op, (void __user *) arg, 
++					 sizeof(op)))) {
++			rc = -EFAULT;
++			goto map_out;
++		}
++		if (unlikely(op.count <= 0)) {
++			rc = -EINVAL;
++			goto map_out;
++		}
++
++		if (op.count == 1) {
++			if ((rc = add_grant_reference(flip, &op.refs[0],
++						      &op.index)) < 0) {
++				printk(KERN_ERR "Adding grant reference "
++				       "failed (%d).\n", rc);
++				goto map_out;
++			}
++		} else {
++			struct ioctl_gntdev_grant_ref *refs, *u;
++			refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
++			if (!refs) {
++				rc = -ENOMEM;
++				goto map_out;
++			}
++			u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
++			if ((rc = copy_from_user(refs,
++						 (void __user *)u,
++						 sizeof(*refs) * op.count))) {
++				printk(KERN_ERR "Copying refs from user failed"
++				       " (%d).\n", rc);
++				rc = -EINVAL;
++				goto map_out;
++			}
++			if ((rc = find_contiguous_free_range(flip, op.count))
++			    < 0) {
++				printk(KERN_ERR "Finding contiguous range "
++				       "failed (%d).\n", rc);
++				kfree(refs);
++				goto map_out;
++			}
++			op.index = rc << PAGE_SHIFT;
++			if ((rc = add_grant_references(flip, op.count,
++						       refs, rc))) {
++				printk(KERN_ERR "Adding grant references "
++				       "failed (%d).\n", rc);
++				kfree(refs);
++				goto map_out;
++			}
++			compress_free_list(flip);
++			kfree(refs);
++		}
++		if ((rc = copy_to_user((void __user *) arg, 
++				       &op, 
++				       sizeof(op)))) {
++			printk(KERN_ERR "Copying result back to user failed "
++			       "(%d)\n", rc);
++			rc = -EFAULT;
++			goto map_out;
++		}
++	map_out:
++		up_write(&private_data->grants_sem);
++		up_write(&private_data->free_list_sem);
++		return rc;
++	}
++	case IOCTL_GNTDEV_UNMAP_GRANT_REF:
++	{
++		struct ioctl_gntdev_unmap_grant_ref op;
++		int i, start_index;
++
++		down_write(&private_data->grants_sem);
++		down_write(&private_data->free_list_sem);
++
++		if ((rc = copy_from_user(&op, 
++					 (void __user *) arg, 
++					 sizeof(op)))) {
++			rc = -EFAULT;
++			goto unmap_out;
++		}
++
++		start_index = op.index >> PAGE_SHIFT;
++
++		/* First, check that all pages are in the NOT_YET_MAPPED
++		 * state.
++		 */
++		for (i = 0; i < op.count; ++i) {
++			if (unlikely
++			    (private_data->grants[start_index + i].state
++			     != GNTDEV_SLOT_NOT_YET_MAPPED)) {
++				if (private_data->grants[start_index + i].state
++				    == GNTDEV_SLOT_INVALID) {
++					printk(KERN_ERR
++					       "Tried to remove an invalid "
++					       "grant at offset 0x%x.",
++					       (start_index + i) 
++					       << PAGE_SHIFT);
++					rc = -EINVAL;
++				} else {
++					printk(KERN_ERR
++					       "Tried to remove a grant which "
++					       "is currently mmap()-ed at "
++					       "offset 0x%x.",
++					       (start_index + i) 
++					       << PAGE_SHIFT);
++					rc = -EBUSY;
++				}
++				goto unmap_out;
++			}
++		}
++
++		/* Unmap pages and add them to the free list.
++		 */
++		for (i = 0; i < op.count; ++i) {
++			private_data->grants[start_index+i].state = 
++				GNTDEV_SLOT_INVALID;
++			private_data->grants[start_index+i].u.free_list_index =
++				private_data->free_list_size;
++			private_data->free_list[private_data->free_list_size] =
++				start_index + i;
++			++private_data->free_list_size;
++		}
++		compress_free_list(flip);
++
++	unmap_out:
++		up_write(&private_data->grants_sem);
++		up_write(&private_data->free_list_sem);
++		return rc;
++	}
++	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
++	{
++		struct ioctl_gntdev_get_offset_for_vaddr op;
++		struct vm_area_struct *vma;
++		unsigned long vaddr;
++
++		if ((rc = copy_from_user(&op, 
++					 (void __user *) arg, 
++					 sizeof(op)))) {
++			rc = -EFAULT;
++			goto get_offset_out;
++		}
++		vaddr = (unsigned long)op.vaddr;
++
++		down_read(&current->mm->mmap_sem);		
++		vma = find_vma(current->mm, vaddr);
++		if (vma == NULL) {
++			rc = -EFAULT;
++			goto get_offset_unlock_out;
++		}
++		if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
++			printk(KERN_ERR "The vaddr specified does not belong "
++			       "to a gntdev instance: %#lx\n", vaddr);
++			rc = -EFAULT;
++			goto get_offset_unlock_out;
++		}
++		if (vma->vm_start != vaddr) {
++			printk(KERN_ERR "The vaddr specified in an "
++			       "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
++			       "the start of the VM area. vma->vm_start = "
++			       "%#lx; vaddr = %#lx\n",
++			       vma->vm_start, vaddr);
++			rc = -EFAULT;
++			goto get_offset_unlock_out;
++		}
++		op.offset = vma->vm_pgoff << PAGE_SHIFT;
++		op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
++		up_read(&current->mm->mmap_sem);
++		if ((rc = copy_to_user((void __user *) arg, 
++				       &op, 
++				       sizeof(op)))) {
++			rc = -EFAULT;
++			goto get_offset_out;
++		}
++		goto get_offset_out;
++	get_offset_unlock_out:
++		up_read(&current->mm->mmap_sem);
++	get_offset_out:
++		return rc;
++	}
++	default:
++		return -ENOIOCTLCMD;
++	}
++
++	return 0;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/netback/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,5 @@
++obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
++obj-$(CONFIG_XEN_NETDEV_LOOPBACK) += netloop.o
++
++netbk-y   := netback.o xenbus.o interface.o
++netloop-y := loopback.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/netback/common.h	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,157 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/common.h
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __NETIF__BACKEND__COMMON_H__
++#define __NETIF__BACKEND__COMMON_H__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/ip.h>
++#include <linux/in.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/wait.h>
++#include <xen/evtchn.h>
++#include <xen/interface/io/netif.h>
++#include <asm/io.h>
++#include <asm/pgalloc.h>
++#include <xen/interface/grant_table.h>
++#include <xen/gnttab.h>
++#include <xen/driver_util.h>
++
++#define DPRINTK(_f, _a...)			\
++	pr_debug("(file=%s, line=%d) " _f,	\
++		 __FILE__ , __LINE__ , ## _a )
++#define IPRINTK(fmt, args...)				\
++	printk(KERN_INFO "xen_net: " fmt, ##args)
++#define WPRINTK(fmt, args...)				\
++	printk(KERN_WARNING "xen_net: " fmt, ##args)
++
++typedef struct netif_st {
++	/* Unique identifier for this interface. */
++	domid_t          domid;
++	unsigned int     handle;
++
++	u8               fe_dev_addr[6];
++
++	/* Physical parameters of the comms window. */
++	grant_handle_t   tx_shmem_handle;
++	grant_ref_t      tx_shmem_ref;
++	grant_handle_t   rx_shmem_handle;
++	grant_ref_t      rx_shmem_ref;
++	unsigned int     irq;
++
++	/* The shared rings and indexes. */
++	netif_tx_back_ring_t tx;
++	netif_rx_back_ring_t rx;
++	struct vm_struct *tx_comms_area;
++	struct vm_struct *rx_comms_area;
++
++	/* Set of features that can be turned on in dev->features. */
++	int features;
++
++	/* Internal feature information. */
++	int can_queue:1;	/* can queue packets for receiver? */
++	int copying_receiver:1;	/* copy packets to receiver?       */
++
++	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
++	RING_IDX rx_req_cons_peek;
++
++	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
++	unsigned long   credit_bytes;
++	unsigned long   credit_usec;
++	unsigned long   remaining_credit;
++	struct timer_list credit_timeout;
++
++	/* Enforce draining of the transmit queue. */
++	struct timer_list tx_queue_timeout;
++
++	/* Miscellaneous private stuff. */
++	struct list_head list;  /* scheduling list */
++	atomic_t         refcnt;
++	struct net_device *dev;
++	struct net_device_stats stats;
++
++	unsigned int carrier;
++
++	wait_queue_head_t waiting_to_free;
++} netif_t;
++
++/*
++ * Implement our own carrier flag: the network stack's version causes delays
++ * when the carrier is re-enabled (in particular, dev_activate() may not
++ * immediately be called, which can cause packet loss; also the etherbridge
++ * can be rather lazy in activating its port).
++ */
++#define netback_carrier_on(netif)	((netif)->carrier = 1)
++#define netback_carrier_off(netif)	((netif)->carrier = 0)
++#define netback_carrier_ok(netif)	((netif)->carrier)
++
++#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
++
++void netif_disconnect(netif_t *netif);
++
++netif_t *netif_alloc(domid_t domid, unsigned int handle);
++int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++	      unsigned long rx_ring_ref, unsigned int evtchn);
++
++#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
++#define netif_put(_b)						\
++	do {							\
++		if ( atomic_dec_and_test(&(_b)->refcnt) )	\
++			wake_up(&(_b)->waiting_to_free);	\
++	} while (0)
++
++void netif_xenbus_init(void);
++
++#define netif_schedulable(netif)				\
++	(netif_running((netif)->dev) && netback_carrier_ok(netif))
++
++void netif_schedule_work(netif_t *netif);
++void netif_deschedule_work(netif_t *netif);
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
++struct net_device_stats *netif_be_get_stats(struct net_device *dev);
++irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++
++static inline int netbk_can_queue(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	return netif->can_queue;
++}
++
++static inline int netbk_can_sg(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	return netif->features & NETIF_F_SG;
++}
++
++#endif /* __NETIF__BACKEND__COMMON_H__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/netback/interface.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,336 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/interface.c
++ * 
++ * Network-device interface management.
++ * 
++ * Copyright (c) 2004-2005, Keir Fraser
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++
++/*
++ * Module parameter 'queue_length':
++ * 
++ * Enables queuing in the network stack when a client has run out of receive
++ * descriptors. Although this feature can improve receive bandwidth by avoiding
++ * packet loss, it can also result in packets sitting in the 'tx_queue' for
++ * unbounded time. This is bad if those packets hold onto foreign resources.
++ * For example, consider a packet that holds onto resources belonging to the
++ * guest for which it is queued (e.g., packet received on vif1.0, destined for
++ * vif1.1 which is not activated in the guest): in this situation the guest
++ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
++ * run a timer (tx_queue_timeout) to drain the queue when the interface is
++ * blocked.
++ */
++static unsigned long netbk_queue_length = 32;
++module_param_named(queue_length, netbk_queue_length, ulong, 0);
++
++static void __netif_up(netif_t *netif)
++{
++	enable_irq(netif->irq);
++	netif_schedule_work(netif);
++}
++
++static void __netif_down(netif_t *netif)
++{
++	disable_irq(netif->irq);
++	netif_deschedule_work(netif);
++}
++
++static int net_open(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	if (netback_carrier_ok(netif)) {
++		__netif_up(netif);
++		netif_start_queue(dev);
++	}
++	return 0;
++}
++
++static int net_close(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	if (netback_carrier_ok(netif))
++		__netif_down(netif);
++	netif_stop_queue(dev);
++	return 0;
++}
++
++static int netbk_change_mtu(struct net_device *dev, int mtu)
++{
++	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
++
++	if (mtu > max)
++		return -EINVAL;
++	dev->mtu = mtu;
++	return 0;
++}
++
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++	if (data) {
++		netif_t *netif = netdev_priv(dev);
++
++		if (!(netif->features & NETIF_F_SG))
++			return -ENOSYS;
++	}
++
++	return ethtool_op_set_sg(dev, data);
++}
++
++static int netbk_set_tso(struct net_device *dev, u32 data)
++{
++	if (data) {
++		netif_t *netif = netdev_priv(dev);
++
++		if (!(netif->features & NETIF_F_TSO))
++			return -ENOSYS;
++	}
++
++	return ethtool_op_set_tso(dev, data);
++}
++
++static struct ethtool_ops network_ethtool_ops =
++{
++	.get_tx_csum = ethtool_op_get_tx_csum,
++	.set_tx_csum = ethtool_op_set_tx_csum,
++	.get_sg = ethtool_op_get_sg,
++	.set_sg = netbk_set_sg,
++	.get_tso = ethtool_op_get_tso,
++	.set_tso = netbk_set_tso,
++	.get_link = ethtool_op_get_link,
++};
++
++netif_t *netif_alloc(domid_t domid, unsigned int handle)
++{
++	int err = 0;
++	struct net_device *dev;
++	netif_t *netif;
++	char name[IFNAMSIZ] = {};
++
++	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
++	dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
++	if (dev == NULL) {
++		DPRINTK("Could not create netif: out of memory\n");
++		return ERR_PTR(-ENOMEM);
++	}
++
++	netif = netdev_priv(dev);
++	memset(netif, 0, sizeof(*netif));
++	netif->domid  = domid;
++	netif->handle = handle;
++	atomic_set(&netif->refcnt, 1);
++	init_waitqueue_head(&netif->waiting_to_free);
++	netif->dev = dev;
++
++	netback_carrier_off(netif);
++
++	netif->credit_bytes = netif->remaining_credit = ~0UL;
++	netif->credit_usec  = 0UL;
++	init_timer(&netif->credit_timeout);
++	/* Initialize 'expires' now: it's used to track the credit window. */
++	netif->credit_timeout.expires = jiffies;
++
++	init_timer(&netif->tx_queue_timeout);
++
++	dev->hard_start_xmit = netif_be_start_xmit;
++	dev->get_stats       = netif_be_get_stats;
++	dev->open            = net_open;
++	dev->stop            = net_close;
++	dev->change_mtu	     = netbk_change_mtu;
++	dev->features        = NETIF_F_IP_CSUM;
++
++	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
++
++	dev->tx_queue_len = netbk_queue_length;
++
++	/*
++	 * Initialise a dummy MAC address. We choose the numerically
++	 * largest non-broadcast address to prevent the address getting
++	 * stolen by an Ethernet bridge for STP purposes.
++	 * (FE:FF:FF:FF:FF:FF)
++	 */ 
++	memset(dev->dev_addr, 0xFF, ETH_ALEN);
++	dev->dev_addr[0] &= ~0x01;
++
++	rtnl_lock();
++	err = register_netdevice(dev);
++	rtnl_unlock();
++	if (err) {
++		DPRINTK("Could not register new net device %s: err=%d\n",
++			dev->name, err);
++		free_netdev(dev);
++		return ERR_PTR(err);
++	}
++
++	DPRINTK("Successfully created netif\n");
++	return netif;
++}
++
++static int map_frontend_pages(
++	netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
++{
++	struct gnttab_map_grant_ref op;
++
++	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
++			  GNTMAP_host_map, tx_ring_ref, netif->domid);
++    
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) { 
++		DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
++		return op.status;
++	}
++
++	netif->tx_shmem_ref    = tx_ring_ref;
++	netif->tx_shmem_handle = op.handle;
++
++	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
++			  GNTMAP_host_map, rx_ring_ref, netif->domid);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) {
++		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
++		return op.status;
++	}
++
++	netif->rx_shmem_ref    = rx_ring_ref;
++	netif->rx_shmem_handle = op.handle;
++
++	return 0;
++}
++
++static void unmap_frontend_pages(netif_t *netif)
++{
++	struct gnttab_unmap_grant_ref op;
++
++	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
++			    GNTMAP_host_map, netif->tx_shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++
++	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
++			    GNTMAP_host_map, netif->rx_shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++}
++
++int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++	      unsigned long rx_ring_ref, unsigned int evtchn)
++{
++	int err = -ENOMEM;
++	netif_tx_sring_t *txs;
++	netif_rx_sring_t *rxs;
++
++	/* Already connected through? */
++	if (netif->irq)
++		return 0;
++
++	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
++	if (netif->tx_comms_area == NULL)
++		return -ENOMEM;
++	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
++	if (netif->rx_comms_area == NULL)
++		goto err_rx;
++
++	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
++	if (err)
++		goto err_map;
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		netif->domid, evtchn, netif_be_int, 0,
++		netif->dev->name, netif);
++	if (err < 0)
++		goto err_hypervisor;
++	netif->irq = err;
++	disable_irq(netif->irq);
++
++	txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
++	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
++
++	rxs = (netif_rx_sring_t *)
++		((char *)netif->rx_comms_area->addr);
++	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
++
++	netif->rx_req_cons_peek = 0;
++
++	netif_get(netif);
++
++	rtnl_lock();
++	netback_carrier_on(netif);
++	if (netif_running(netif->dev))
++		__netif_up(netif);
++	rtnl_unlock();
++
++	return 0;
++err_hypervisor:
++	unmap_frontend_pages(netif);
++err_map:
++	free_vm_area(netif->rx_comms_area);
++err_rx:
++	free_vm_area(netif->tx_comms_area);
++	return err;
++}
++
++void netif_disconnect(netif_t *netif)
++{
++	if (netback_carrier_ok(netif)) {
++		rtnl_lock();
++		netback_carrier_off(netif);
++		netif_carrier_off(netif->dev); /* discard queued packets */
++		if (netif_running(netif->dev))
++			__netif_down(netif);
++		rtnl_unlock();
++		netif_put(netif);
++	}
++
++	atomic_dec(&netif->refcnt);
++	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
++
++	del_timer_sync(&netif->credit_timeout);
++	del_timer_sync(&netif->tx_queue_timeout);
++
++	if (netif->irq)
++		unbind_from_irqhandler(netif->irq, netif);
++	
++	unregister_netdev(netif->dev);
++
++	if (netif->tx.sring) {
++		unmap_frontend_pages(netif);
++		free_vm_area(netif->tx_comms_area);
++		free_vm_area(netif->rx_comms_area);
++	}
++
++	free_netdev(netif->dev);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/netback/loopback.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,320 @@
++/******************************************************************************
++ * netback/loopback.c
++ * 
++ * A two-interface loopback device to emulate a local netfront-netback
++ * connection. This ensures that local packet delivery looks identical
++ * to inter-domain delivery. Most importantly, packets delivered locally
++ * originating from other domains will get *copied* when they traverse this
++ * driver. This prevents unbounded delays in socket-buffer queues from
++ * causing the netback driver to "seize up".
++ * 
++ * This driver creates a symmetric pair of loopback interfaces with names
++ * vif0.0 and veth0. The intention is that 'vif0.0' is bound to an Ethernet
++ * bridge, just like a proper netback interface, while a local IP interface
++ * is configured on 'veth0'.
++ * 
++ * As with a real netback interface, vif0.0 is configured with a suitable
++ * dummy MAC address. No default is provided for veth0: a reasonable strategy
++ * is to transfer eth0's MAC address to veth0, and give eth0 a dummy address
++ * (to avoid confusing the Etherbridge).
++ * 
++ * Copyright (c) 2005 K A Fraser
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/module.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/ethtool.h>
++#include <net/dst.h>
++#include <net/xfrm.h>		/* secpath_reset() */
++#include <asm/hypervisor.h>	/* is_initial_xendomain() */
++
++static int nloopbacks = -1;
++module_param(nloopbacks, int, 0);
++MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
++
++struct net_private {
++	struct net_device *loopback_dev;
++	struct net_device_stats stats;
++};
++
++static int loopback_open(struct net_device *dev)
++{
++	struct net_private *np = netdev_priv(dev);
++	memset(&np->stats, 0, sizeof(np->stats));
++	netif_start_queue(dev);
++	return 0;
++}
++
++static int loopback_close(struct net_device *dev)
++{
++	netif_stop_queue(dev);
++	return 0;
++}
++
++#ifdef CONFIG_X86
++static int is_foreign(unsigned long pfn)
++{
++	/* NB. Play it safe for auto-translation mode. */
++	return (xen_feature(XENFEAT_auto_translated_physmap) ||
++		(phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
++}
++#else
++/* How to detect a foreign mapping? Play it safe. */
++#define is_foreign(pfn)	(1)
++#endif
++
++static int skb_remove_foreign_references(struct sk_buff *skb)
++{
++	struct page *page;
++	unsigned long pfn;
++	int i, off;
++	char *vaddr;
++
++	BUG_ON(skb_shinfo(skb)->frag_list);
++
++	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++		pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
++		if (!is_foreign(pfn))
++			continue;
++		
++		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
++		if (unlikely(!page))
++			return 0;
++
++		vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
++		off = skb_shinfo(skb)->frags[i].page_offset;
++		memcpy(page_address(page) + off,
++		       vaddr + off,
++		       skb_shinfo(skb)->frags[i].size);
++		kunmap_skb_frag(vaddr);
++
++		put_page(skb_shinfo(skb)->frags[i].page);
++		skb_shinfo(skb)->frags[i].page = page;
++	}
++
++	return 1;
++}
++
++static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct net_private *np = netdev_priv(dev);
++
++	if (!skb_remove_foreign_references(skb)) {
++		np->stats.tx_dropped++;
++		dev_kfree_skb(skb);
++		return 0;
++	}
++
++	dst_release(skb->dst);
++	skb->dst = NULL;
++
++	skb_orphan(skb);
++
++	np->stats.tx_bytes += skb->len;
++	np->stats.tx_packets++;
++
++	/* Switch to loopback context. */
++	dev = np->loopback_dev;
++	np  = netdev_priv(dev);
++
++	np->stats.rx_bytes += skb->len;
++	np->stats.rx_packets++;
++
++	if (skb->ip_summed == CHECKSUM_HW) {
++		/* Defer checksum calculation. */
++		skb->proto_csum_blank = 1;
++		/* Must be a local packet: assert its integrity. */
++		skb->proto_data_valid = 1;
++	}
++
++	skb->ip_summed = skb->proto_data_valid ?
++		CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
++
++	skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
++	skb->protocol = eth_type_trans(skb, dev);
++	skb->dev      = dev;
++	dev->last_rx  = jiffies;
++
++	/* Flush netfilter context: rx'ed skbuffs not expected to have any. */
++	nf_reset(skb);
++	secpath_reset(skb);
++
++	netif_rx(skb);
++
++	return 0;
++}
++
++static struct net_device_stats *loopback_get_stats(struct net_device *dev)
++{
++	struct net_private *np = netdev_priv(dev);
++	return &np->stats;
++}
++
++static struct ethtool_ops network_ethtool_ops =
++{
++	.get_tx_csum = ethtool_op_get_tx_csum,
++	.set_tx_csum = ethtool_op_set_tx_csum,
++	.get_sg = ethtool_op_get_sg,
++	.set_sg = ethtool_op_set_sg,
++	.get_tso = ethtool_op_get_tso,
++	.set_tso = ethtool_op_set_tso,
++	.get_link = ethtool_op_get_link,
++};
++
++/*
++ * Nothing to do here. Virtual interface is point-to-point and the
++ * physical interface is probably promiscuous anyway.
++ */
++static void loopback_set_multicast_list(struct net_device *dev)
++{
++}
++
++static void loopback_construct(struct net_device *dev, struct net_device *lo)
++{
++	struct net_private *np = netdev_priv(dev);
++
++	np->loopback_dev     = lo;
++
++	dev->open            = loopback_open;
++	dev->stop            = loopback_close;
++	dev->hard_start_xmit = loopback_start_xmit;
++	dev->get_stats       = loopback_get_stats;
++	dev->set_multicast_list = loopback_set_multicast_list;
++	dev->change_mtu	     = NULL; /* allow arbitrary mtu */
++
++	dev->tx_queue_len    = 0;
++
++	dev->features        = (NETIF_F_HIGHDMA |
++				NETIF_F_LLTX |
++				NETIF_F_TSO |
++				NETIF_F_SG |
++				NETIF_F_IP_CSUM);
++
++	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
++
++	/*
++	 * We do not set a jumbo MTU on the interface. Otherwise the network
++	 * stack will try to send large packets that will get dropped by the
++	 * Ethernet bridge (unless the physical Ethernet interface is
++	 * configured to transfer jumbo packets). If a larger MTU is desired
++	 * then the system administrator can specify it using the 'ifconfig'
++	 * command.
++	 */
++	/*dev->mtu             = 16*1024;*/
++}
++
++static int __init make_loopback(int i)
++{
++	struct net_device *dev1, *dev2;
++	char dev_name[IFNAMSIZ];
++	int err = -ENOMEM;
++
++	sprintf(dev_name, "vif0.%d", i);
++	dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
++	if (!dev1)
++		return err;
++
++	sprintf(dev_name, "veth%d", i);
++	dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
++	if (!dev2)
++		goto fail_netdev2;
++
++	loopback_construct(dev1, dev2);
++	loopback_construct(dev2, dev1);
++
++	/*
++	 * Initialise a dummy MAC address for the 'dummy backend' interface. We
++	 * choose the numerically largest non-broadcast address to prevent the
++	 * address getting stolen by an Ethernet bridge for STP purposes.
++	 */
++	memset(dev1->dev_addr, 0xFF, ETH_ALEN);
++	dev1->dev_addr[0] &= ~0x01;
++
++	if ((err = register_netdev(dev1)) != 0)
++		goto fail;
++
++	if ((err = register_netdev(dev2)) != 0) {
++		unregister_netdev(dev1);
++		goto fail;
++	}
++
++	return 0;
++
++ fail:
++	free_netdev(dev2);
++ fail_netdev2:
++	free_netdev(dev1);
++	return err;
++}
++
++static void __exit clean_loopback(int i)
++{
++	struct net_device *dev1, *dev2;
++	char dev_name[IFNAMSIZ];
++
++	sprintf(dev_name, "vif0.%d", i);
++	dev1 = dev_get_by_name(dev_name);
++	sprintf(dev_name, "veth%d", i);
++	dev2 = dev_get_by_name(dev_name);
++	if (dev1 && dev2) {
++		unregister_netdev(dev2);
++		unregister_netdev(dev1);
++		free_netdev(dev2);
++		free_netdev(dev1);
++	}
++}
++
++static int __init loopback_init(void)
++{
++	int i, err = 0;
++
++	if (nloopbacks == -1)
++		nloopbacks = is_initial_xendomain() ? 4 : 0;
++
++	for (i = 0; i < nloopbacks; i++)
++		if ((err = make_loopback(i)) != 0)
++			break;
++
++	return err;
++}
++
++module_init(loopback_init);
++
++static void __exit loopback_exit(void)
++{
++	int i;
++
++	for (i = nloopbacks; i-- > 0; )
++		clean_loopback(i);
++}
++
++module_exit(loopback_exit);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/netback/netback.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,1496 @@
++/******************************************************************************
++ * drivers/xen/netback/netback.c
++ * 
++ * Back-end of the driver for virtual network devices. This portion of the
++ * driver exports a 'unified' network-device interface that can be accessed
++ * by any operating system that implements a compatible front end. A 
++ * reference front-end implementation can be found in:
++ *  drivers/xen/netfront/netfront.c
++ * 
++ * Copyright (c) 2002-2005, K A Fraser
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++#include <xen/balloon.h>
++#include <xen/interface/memory.h>
++
++/*define NETBE_DEBUG_INTERRUPT*/
++
++/* extra field used in struct page */
++#define netif_page_index(pg) (*(long *)&(pg)->mapping)
++
++struct netbk_rx_meta {
++	skb_frag_t frag;
++	int id;
++	int copy:1;
++};
++
++static void netif_idx_release(u16 pending_idx);
++static void netif_page_release(struct page *page);
++static void make_tx_response(netif_t *netif, 
++			     netif_tx_request_t *txp,
++			     s8       st);
++static netif_rx_response_t *make_rx_response(netif_t *netif, 
++					     u16      id, 
++					     s8       st,
++					     u16      offset,
++					     u16      size,
++					     u16      flags);
++
++static void net_tx_action(unsigned long unused);
++static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
++
++static void net_rx_action(unsigned long unused);
++static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
++
++static struct timer_list net_timer;
++
++#define MAX_PENDING_REQS 256
++
++static struct sk_buff_head rx_queue;
++
++static struct page **mmap_pages;
++static inline unsigned long idx_to_kaddr(unsigned int idx)
++{
++	return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
++}
++
++#define PKT_PROT_LEN 64
++
++static struct pending_tx_info {
++	netif_tx_request_t req;
++	netif_t *netif;
++} pending_tx_info[MAX_PENDING_REQS];
++static u16 pending_ring[MAX_PENDING_REQS];
++typedef unsigned int PEND_RING_IDX;
++#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
++static PEND_RING_IDX pending_prod, pending_cons;
++#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
++
++/* Freed TX SKBs get batched on this ring before return to pending_ring. */
++static u16 dealloc_ring[MAX_PENDING_REQS];
++static PEND_RING_IDX dealloc_prod, dealloc_cons;
++
++static struct sk_buff_head tx_queue;
++
++static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
++static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
++
++static struct list_head net_schedule_list;
++static spinlock_t net_schedule_list_lock;
++
++#define MAX_MFN_ALLOC 64
++static unsigned long mfn_list[MAX_MFN_ALLOC];
++static unsigned int alloc_index = 0;
++
++static inline unsigned long alloc_mfn(void)
++{
++	BUG_ON(alloc_index == 0);
++	return mfn_list[--alloc_index];
++}
++
++static int check_mfn(int nr)
++{
++	struct xen_memory_reservation reservation = {
++		.extent_order = 0,
++		.domid        = DOMID_SELF
++	};
++
++	if (likely(alloc_index >= nr))
++		return 0;
++
++	set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
++	reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
++	alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation,
++					    &reservation);
++
++	return alloc_index >= nr ? 0 : -ENOMEM;
++}
++
++static inline void maybe_schedule_tx_action(void)
++{
++	smp_mb();
++	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
++	    !list_empty(&net_schedule_list))
++		tasklet_schedule(&net_tx_tasklet);
++}
++
++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
++{
++	struct skb_shared_info *ninfo;
++	struct sk_buff *nskb;
++	unsigned long offset;
++	int ret;
++	int len;
++	int headlen;
++
++	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
++
++	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
++	if (unlikely(!nskb))
++		goto err;
++
++	skb_reserve(nskb, 16 + NET_IP_ALIGN);
++	headlen = nskb->end - nskb->data;
++	if (headlen > skb_headlen(skb))
++		headlen = skb_headlen(skb);
++	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
++	BUG_ON(ret);
++
++	ninfo = skb_shinfo(nskb);
++	ninfo->gso_size = skb_shinfo(skb)->gso_size;
++	ninfo->gso_type = skb_shinfo(skb)->gso_type;
++
++	offset = headlen;
++	len = skb->len - headlen;
++
++	nskb->len = skb->len;
++	nskb->data_len = len;
++	nskb->truesize += len;
++
++	while (len) {
++		struct page *page;
++		int copy;
++		int zero;
++
++		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
++			dump_stack();
++			goto err_free;
++		}
++
++		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
++		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
++
++		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
++		if (unlikely(!page))
++			goto err_free;
++
++		ret = skb_copy_bits(skb, offset, page_address(page), copy);
++		BUG_ON(ret);
++
++		ninfo->frags[ninfo->nr_frags].page = page;
++		ninfo->frags[ninfo->nr_frags].page_offset = 0;
++		ninfo->frags[ninfo->nr_frags].size = copy;
++		ninfo->nr_frags++;
++
++		offset += copy;
++		len -= copy;
++	}
++
++	offset = nskb->data - skb->data;
++
++	nskb->h.raw = skb->h.raw + offset;
++	nskb->nh.raw = skb->nh.raw + offset;
++	nskb->mac.raw = skb->mac.raw + offset;
++
++	return nskb;
++
++ err_free:
++	kfree_skb(nskb);
++ err:
++	return NULL;
++}
++
++static inline int netbk_max_required_rx_slots(netif_t *netif)
++{
++	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
++		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
++	return 1; /* all in one */
++}
++
++static inline int netbk_queue_full(netif_t *netif)
++{
++	RING_IDX peek   = netif->rx_req_cons_peek;
++	RING_IDX needed = netbk_max_required_rx_slots(netif);
++
++	return ((netif->rx.sring->req_prod - peek) < needed) ||
++	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
++}
++
++static void tx_queue_callback(unsigned long data)
++{
++	netif_t *netif = (netif_t *)data;
++	if (netif_schedulable(netif))
++		netif_wake_queue(netif->dev);
++}
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++
++	BUG_ON(skb->dev != dev);
++
++	/* Drop the packet if the target domain has no receive buffers. */
++	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
++		goto drop;
++
++	/*
++	 * Copy the packet here if it's destined for a flipping interface
++	 * but isn't flippable (e.g. extra references to data).
++	 * XXX For now we also copy skbuffs whose head crosses a page
++	 * boundary, because netbk_gop_skb can't handle them.
++	 */
++	if (!netif->copying_receiver ||
++	    ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
++		struct sk_buff *nskb = netbk_copy_skb(skb);
++		if ( unlikely(nskb == NULL) )
++			goto drop;
++		/* Copy only the header fields we use in this driver. */
++		nskb->dev = skb->dev;
++		nskb->ip_summed = skb->ip_summed;
++		nskb->proto_data_valid = skb->proto_data_valid;
++		dev_kfree_skb(skb);
++		skb = nskb;
++	}
++
++	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
++				   !!skb_shinfo(skb)->gso_size;
++	netif_get(netif);
++
++	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
++		netif->rx.sring->req_event = netif->rx_req_cons_peek +
++			netbk_max_required_rx_slots(netif);
++		mb(); /* request notification /then/ check & stop the queue */
++		if (netbk_queue_full(netif)) {
++			netif_stop_queue(dev);
++			/*
++			 * Schedule 500ms timeout to restart the queue, thus
++			 * ensuring that an inactive queue will be drained.
++			 * Packets will be immediately be dropped until more
++			 * receive buffers become available (see
++			 * netbk_queue_full() check above).
++			 */
++			netif->tx_queue_timeout.data = (unsigned long)netif;
++			netif->tx_queue_timeout.function = tx_queue_callback;
++			__mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
++		}
++	}
++
++	skb_queue_tail(&rx_queue, skb);
++	tasklet_schedule(&net_rx_tasklet);
++
++	return 0;
++
++ drop:
++	netif->stats.tx_dropped++;
++	dev_kfree_skb(skb);
++	return 0;
++}
++
++#if 0
++static void xen_network_done_notify(void)
++{
++	static struct net_device *eth0_dev = NULL;
++	if (unlikely(eth0_dev == NULL))
++		eth0_dev = __dev_get_by_name("eth0");
++	netif_rx_schedule(eth0_dev);
++}
++/* 
++ * Add following to poll() function in NAPI driver (Tigon3 is example):
++ *  if ( xen_network_done() )
++ *      tg3_enable_ints(tp);
++ */
++int xen_network_done(void)
++{
++	return skb_queue_empty(&rx_queue);
++}
++#endif
++
++struct netrx_pending_operations {
++	unsigned trans_prod, trans_cons;
++	unsigned mmu_prod, mmu_cons;
++	unsigned mcl_prod, mcl_cons;
++	unsigned copy_prod, copy_cons;
++	unsigned meta_prod, meta_cons;
++	mmu_update_t *mmu;
++	gnttab_transfer_t *trans;
++	gnttab_copy_t *copy;
++	multicall_entry_t *mcl;
++	struct netbk_rx_meta *meta;
++};
++
++/* Set up the grant operations for this fragment.  If it's a flipping
++   interface, we also set up the unmap request from here. */
++static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
++			  int i, struct netrx_pending_operations *npo,
++			  struct page *page, unsigned long size,
++			  unsigned long offset)
++{
++	mmu_update_t *mmu;
++	gnttab_transfer_t *gop;
++	gnttab_copy_t *copy_gop;
++	multicall_entry_t *mcl;
++	netif_rx_request_t *req;
++	unsigned long old_mfn, new_mfn;
++
++	old_mfn = virt_to_mfn(page_address(page));
++
++	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
++	if (netif->copying_receiver) {
++		/* The fragment needs to be copied rather than
++		   flipped. */
++		meta->copy = 1;
++		copy_gop = npo->copy + npo->copy_prod++;
++		copy_gop->flags = GNTCOPY_dest_gref;
++		if (PageForeign(page)) {
++			struct pending_tx_info *src_pend =
++				&pending_tx_info[netif_page_index(page)];
++			copy_gop->source.domid = src_pend->netif->domid;
++			copy_gop->source.u.ref = src_pend->req.gref;
++			copy_gop->flags |= GNTCOPY_source_gref;
++		} else {
++			copy_gop->source.domid = DOMID_SELF;
++			copy_gop->source.u.gmfn = old_mfn;
++		}
++		copy_gop->source.offset = offset;
++		copy_gop->dest.domid = netif->domid;
++		copy_gop->dest.offset = 0;
++		copy_gop->dest.u.ref = req->gref;
++		copy_gop->len = size;
++	} else {
++		meta->copy = 0;
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			new_mfn = alloc_mfn();
++
++			/*
++			 * Set the new P2M table entry before
++			 * reassigning the old data page. Heed the
++			 * comment in pgtable-2level.h:pte_page(). :-)
++			 */
++			set_phys_to_machine(page_to_pfn(page), new_mfn);
++
++			mcl = npo->mcl + npo->mcl_prod++;
++			MULTI_update_va_mapping(mcl,
++					     (unsigned long)page_address(page),
++					     pfn_pte_ma(new_mfn, PAGE_KERNEL),
++					     0);
++
++			mmu = npo->mmu + npo->mmu_prod++;
++			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
++				MMU_MACHPHYS_UPDATE;
++			mmu->val = page_to_pfn(page);
++		}
++
++		gop = npo->trans + npo->trans_prod++;
++		gop->mfn = old_mfn;
++		gop->domid = netif->domid;
++		gop->ref = req->gref;
++	}
++	return req->id;
++}
++
++static void netbk_gop_skb(struct sk_buff *skb,
++			  struct netrx_pending_operations *npo)
++{
++	netif_t *netif = netdev_priv(skb->dev);
++	int nr_frags = skb_shinfo(skb)->nr_frags;
++	int i;
++	int extra;
++	struct netbk_rx_meta *head_meta, *meta;
++
++	head_meta = npo->meta + npo->meta_prod++;
++	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
++	head_meta->frag.size = skb_shinfo(skb)->gso_size;
++	extra = !!head_meta->frag.size + 1;
++
++	for (i = 0; i < nr_frags; i++) {
++		meta = npo->meta + npo->meta_prod++;
++		meta->frag = skb_shinfo(skb)->frags[i];
++		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
++					  meta->frag.page,
++					  meta->frag.size,
++					  meta->frag.page_offset);
++	}
++
++	/*
++	 * This must occur at the end to ensure that we don't trash skb_shinfo
++	 * until we're done. We know that the head doesn't cross a page
++	 * boundary because such packets get copied in netif_be_start_xmit.
++	 */
++	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
++				       virt_to_page(skb->data),
++				       skb_headlen(skb),
++				       offset_in_page(skb->data));
++
++	netif->rx.req_cons += nr_frags + extra;
++}
++
++static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
++{
++	int i;
++
++	for (i = 0; i < nr_frags; i++)
++		put_page(meta[i].frag.page);
++}
++
++/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
++   used to set up the operations on the top of
++   netrx_pending_operations, which have since been done.  Check that
++   they didn't give any errors and advance over them. */
++static int netbk_check_gop(int nr_frags, domid_t domid,
++			   struct netrx_pending_operations *npo)
++{
++	multicall_entry_t *mcl;
++	gnttab_transfer_t *gop;
++	gnttab_copy_t     *copy_op;
++	int status = NETIF_RSP_OKAY;
++	int i;
++
++	for (i = 0; i <= nr_frags; i++) {
++		if (npo->meta[npo->meta_cons + i].copy) {
++			copy_op = npo->copy + npo->copy_cons++;
++			if (copy_op->status != GNTST_okay) {
++				DPRINTK("Bad status %d from copy to DOM%d.\n",
++					copy_op->status, domid);
++				status = NETIF_RSP_ERROR;
++			}
++		} else {
++			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++				mcl = npo->mcl + npo->mcl_cons++;
++				/* The update_va_mapping() must not fail. */
++				BUG_ON(mcl->result != 0);
++			}
++
++			gop = npo->trans + npo->trans_cons++;
++			/* Check the reassignment error code. */
++			if (gop->status != 0) {
++				DPRINTK("Bad status %d from grant transfer to DOM%u\n",
++					gop->status, domid);
++				/*
++				 * Page no longer belongs to us unless
++				 * GNTST_bad_page, but that should be
++				 * a fatal error anyway.
++				 */
++				BUG_ON(gop->status == GNTST_bad_page);
++				status = NETIF_RSP_ERROR;
++			}
++		}
++	}
++
++	return status;
++}
++
++static void netbk_add_frag_responses(netif_t *netif, int status,
++				     struct netbk_rx_meta *meta, int nr_frags)
++{
++	int i;
++	unsigned long offset;
++
++	for (i = 0; i < nr_frags; i++) {
++		int id = meta[i].id;
++		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
++
++		if (meta[i].copy)
++			offset = 0;
++		else
++			offset = meta[i].frag.page_offset;
++		make_rx_response(netif, id, status, offset,
++				 meta[i].frag.size, flags);
++	}
++}
++
++static void net_rx_action(unsigned long unused)
++{
++	netif_t *netif = NULL;
++	s8 status;
++	u16 id, irq, flags;
++	netif_rx_response_t *resp;
++	multicall_entry_t *mcl;
++	struct sk_buff_head rxq;
++	struct sk_buff *skb;
++	int notify_nr = 0;
++	int ret;
++	int nr_frags;
++	int count;
++	unsigned long offset;
++
++	/*
++	 * Putting hundreds of bytes on the stack is considered rude.
++	 * Static works because a tasklet can only be on one CPU at any time.
++	 */
++	static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
++	static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
++	static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
++	static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
++	static unsigned char rx_notify[NR_IRQS];
++	static u16 notify_list[NET_RX_RING_SIZE];
++	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++
++	struct netrx_pending_operations npo = {
++		mmu: rx_mmu,
++		trans: grant_trans_op,
++		copy: grant_copy_op,
++		mcl: rx_mcl,
++		meta: meta};
++
++	skb_queue_head_init(&rxq);
++
++	count = 0;
++
++	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
++		nr_frags = skb_shinfo(skb)->nr_frags;
++		*(int *)skb->cb = nr_frags;
++
++		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
++		    !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
++		    check_mfn(nr_frags + 1)) {
++			/* Memory squeeze? Back off for an arbitrary while. */
++			if ( net_ratelimit() )
++				WPRINTK("Memory squeeze in netback "
++					"driver.\n");
++			mod_timer(&net_timer, jiffies + HZ);
++			skb_queue_head(&rx_queue, skb);
++			break;
++		}
++
++		netbk_gop_skb(skb, &npo);
++
++		count += nr_frags + 1;
++
++		__skb_queue_tail(&rxq, skb);
++
++		/* Filled the batch queue? */
++		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
++			break;
++	}
++
++	if (npo.mcl_prod &&
++	    !xen_feature(XENFEAT_auto_translated_physmap)) {
++		mcl = npo.mcl + npo.mcl_prod++;
++
++		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
++		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
++
++		mcl->op = __HYPERVISOR_mmu_update;
++		mcl->args[0] = (unsigned long)rx_mmu;
++		mcl->args[1] = npo.mmu_prod;
++		mcl->args[2] = 0;
++		mcl->args[3] = DOMID_SELF;
++	}
++
++	if (npo.trans_prod) {
++		mcl = npo.mcl + npo.mcl_prod++;
++		mcl->op = __HYPERVISOR_grant_table_op;
++		mcl->args[0] = GNTTABOP_transfer;
++		mcl->args[1] = (unsigned long)grant_trans_op;
++		mcl->args[2] = npo.trans_prod;
++	}
++
++	if (npo.copy_prod) {
++		mcl = npo.mcl + npo.mcl_prod++;
++		mcl->op = __HYPERVISOR_grant_table_op;
++		mcl->args[0] = GNTTABOP_copy;
++		mcl->args[1] = (unsigned long)grant_copy_op;
++		mcl->args[2] = npo.copy_prod;
++	}
++
++	/* Nothing to do? */
++	if (!npo.mcl_prod)
++		return;
++
++	BUG_ON(npo.copy_prod > NET_RX_RING_SIZE);
++	BUG_ON(npo.mmu_prod > NET_RX_RING_SIZE);
++	BUG_ON(npo.trans_prod > NET_RX_RING_SIZE);
++	BUG_ON(npo.mcl_prod > NET_RX_RING_SIZE+3);
++	BUG_ON(npo.meta_prod > NET_RX_RING_SIZE);
++
++	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
++	BUG_ON(ret != 0);
++
++	while ((skb = __skb_dequeue(&rxq)) != NULL) {
++		nr_frags = *(int *)skb->cb;
++
++		netif = netdev_priv(skb->dev);
++		/* We can't rely on skb_release_data to release the
++		   pages used by fragments for us, since it tries to
++		   touch the pages in the fraglist.  If we're in
++		   flipping mode, that doesn't work.  In copying mode,
++		   we still have access to all of the pages, and so
++		   it's safe to let release_data deal with it. */
++		/* (Freeing the fragments is safe since we copy
++		   non-linear skbs destined for flipping interfaces) */
++		if (!netif->copying_receiver) {
++			atomic_set(&(skb_shinfo(skb)->dataref), 1);
++			skb_shinfo(skb)->frag_list = NULL;
++			skb_shinfo(skb)->nr_frags = 0;
++			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
++		}
++
++		netif->stats.tx_bytes += skb->len;
++		netif->stats.tx_packets++;
++
++		status = netbk_check_gop(nr_frags, netif->domid, &npo);
++
++		id = meta[npo.meta_cons].id;
++		flags = nr_frags ? NETRXF_more_data : 0;
++
++		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++			flags |= NETRXF_csum_blank | NETRXF_data_validated;
++		else if (skb->proto_data_valid) /* remote but checksummed? */
++			flags |= NETRXF_data_validated;
++
++		if (meta[npo.meta_cons].copy)
++			offset = 0;
++		else
++			offset = offset_in_page(skb->data);
++		resp = make_rx_response(netif, id, status, offset,
++					skb_headlen(skb), flags);
++
++		if (meta[npo.meta_cons].frag.size) {
++			struct netif_extra_info *gso =
++				(struct netif_extra_info *)
++				RING_GET_RESPONSE(&netif->rx,
++						  netif->rx.rsp_prod_pvt++);
++
++			resp->flags |= NETRXF_extra_info;
++
++			gso->u.gso.size = meta[npo.meta_cons].frag.size;
++			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++			gso->u.gso.pad = 0;
++			gso->u.gso.features = 0;
++
++			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++			gso->flags = 0;
++		}
++
++		netbk_add_frag_responses(netif, status,
++					 meta + npo.meta_cons + 1,
++					 nr_frags);
++
++		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
++		irq = netif->irq;
++		if (ret && !rx_notify[irq]) {
++			rx_notify[irq] = 1;
++			notify_list[notify_nr++] = irq;
++		}
++
++		if (netif_queue_stopped(netif->dev) &&
++		    netif_schedulable(netif) &&
++		    !netbk_queue_full(netif))
++			netif_wake_queue(netif->dev);
++
++		netif_put(netif);
++		dev_kfree_skb(skb);
++		npo.meta_cons += nr_frags + 1;
++	}
++
++	while (notify_nr != 0) {
++		irq = notify_list[--notify_nr];
++		rx_notify[irq] = 0;
++		notify_remote_via_irq(irq);
++	}
++
++	/* More work to do? */
++	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
++		tasklet_schedule(&net_rx_tasklet);
++#if 0
++	else
++		xen_network_done_notify();
++#endif
++}
++
++static void net_alarm(unsigned long unused)
++{
++	tasklet_schedule(&net_rx_tasklet);
++}
++
++struct net_device_stats *netif_be_get_stats(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	return &netif->stats;
++}
++
++static int __on_net_schedule_list(netif_t *netif)
++{
++	return netif->list.next != NULL;
++}
++
++static void remove_from_net_schedule_list(netif_t *netif)
++{
++	spin_lock_irq(&net_schedule_list_lock);
++	if (likely(__on_net_schedule_list(netif))) {
++		list_del(&netif->list);
++		netif->list.next = NULL;
++		netif_put(netif);
++	}
++	spin_unlock_irq(&net_schedule_list_lock);
++}
++
++static void add_to_net_schedule_list_tail(netif_t *netif)
++{
++	if (__on_net_schedule_list(netif))
++		return;
++
++	spin_lock_irq(&net_schedule_list_lock);
++	if (!__on_net_schedule_list(netif) &&
++	    likely(netif_schedulable(netif))) {
++		list_add_tail(&netif->list, &net_schedule_list);
++		netif_get(netif);
++	}
++	spin_unlock_irq(&net_schedule_list_lock);
++}
++
++/*
++ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
++ * If this driver is pipelining transmit requests then we can be very
++ * aggressive in avoiding new-packet notifications -- frontend only needs to
++ * send a notification if there are no outstanding unreceived responses.
++ * If we may be buffer transmit buffers for any reason then we must be rather
++ * more conservative and treat this as the final check for pending work.
++ */
++void netif_schedule_work(netif_t *netif)
++{
++	int more_to_do;
++
++#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
++	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
++#else
++	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
++#endif
++
++	if (more_to_do) {
++		add_to_net_schedule_list_tail(netif);
++		maybe_schedule_tx_action();
++	}
++}
++
++void netif_deschedule_work(netif_t *netif)
++{
++	remove_from_net_schedule_list(netif);
++}
++
++
++static void tx_add_credit(netif_t *netif)
++{
++	unsigned long max_burst, max_credit;
++
++	/*
++	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
++	 * Otherwise the interface can seize up due to insufficient credit.
++	 */
++	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
++	max_burst = min(max_burst, 131072UL);
++	max_burst = max(max_burst, netif->credit_bytes);
++
++	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
++	max_credit = netif->remaining_credit + netif->credit_bytes;
++	if (max_credit < netif->remaining_credit)
++		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
++
++	netif->remaining_credit = min(max_credit, max_burst);
++}
++
++static void tx_credit_callback(unsigned long data)
++{
++	netif_t *netif = (netif_t *)data;
++	tx_add_credit(netif);
++	netif_schedule_work(netif);
++}
++
++inline static void net_tx_action_dealloc(void)
++{
++	gnttab_unmap_grant_ref_t *gop;
++	u16 pending_idx;
++	PEND_RING_IDX dc, dp;
++	netif_t *netif;
++	int ret;
++
++	dc = dealloc_cons;
++	dp = dealloc_prod;
++
++	/* Ensure we see all indexes enqueued by netif_idx_release(). */
++	smp_rmb();
++
++	/*
++	 * Free up any grants we have finished using
++	 */
++	gop = tx_unmap_ops;
++	while (dc != dp) {
++		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
++		gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
++				    GNTMAP_host_map,
++				    grant_tx_handle[pending_idx]);
++		gop++;
++	}
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
++	BUG_ON(ret);
++
++	while (dealloc_cons != dp) {
++		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
++
++		netif = pending_tx_info[pending_idx].netif;
++
++		make_tx_response(netif, &pending_tx_info[pending_idx].req, 
++				 NETIF_RSP_OKAY);
++
++		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++
++		netif_put(netif);
++	}
++}
++
++static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
++{
++	RING_IDX cons = netif->tx.req_cons;
++
++	do {
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		if (cons >= end)
++			break;
++		txp = RING_GET_REQUEST(&netif->tx, cons++);
++	} while (1);
++	netif->tx.req_cons = cons;
++	netif_schedule_work(netif);
++	netif_put(netif);
++}
++
++static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
++				netif_tx_request_t *txp, int work_to_do)
++{
++	RING_IDX cons = netif->tx.req_cons;
++	int frags = 0;
++
++	if (!(first->flags & NETTXF_more_data))
++		return 0;
++
++	do {
++		if (frags >= work_to_do) {
++			DPRINTK("Need more frags\n");
++			return -frags;
++		}
++
++		if (unlikely(frags >= MAX_SKB_FRAGS)) {
++			DPRINTK("Too many frags\n");
++			return -frags;
++		}
++
++		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
++		       sizeof(*txp));
++		if (txp->size > first->size) {
++			DPRINTK("Frags galore\n");
++			return -frags;
++		}
++
++		first->size -= txp->size;
++		frags++;
++
++		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
++			DPRINTK("txp->offset: %x, size: %u\n",
++				txp->offset, txp->size);
++			return -frags;
++		}
++	} while ((txp++)->flags & NETTXF_more_data);
++
++	return frags;
++}
++
++static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
++						  struct sk_buff *skb,
++						  netif_tx_request_t *txp,
++						  gnttab_map_grant_ref_t *mop)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	skb_frag_t *frags = shinfo->frags;
++	unsigned long pending_idx = *((u16 *)skb->data);
++	int i, start;
++
++	/* Skip first skb fragment if it is on same page as header fragment. */
++	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++	for (i = start; i < shinfo->nr_frags; i++, txp++) {
++		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
++
++		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
++				  GNTMAP_host_map | GNTMAP_readonly,
++				  txp->gref, netif->domid);
++
++		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
++		netif_get(netif);
++		pending_tx_info[pending_idx].netif = netif;
++		frags[i].page = (void *)pending_idx;
++	}
++
++	return mop;
++}
++
++static int netbk_tx_check_mop(struct sk_buff *skb,
++			       gnttab_map_grant_ref_t **mopp)
++{
++	gnttab_map_grant_ref_t *mop = *mopp;
++	int pending_idx = *((u16 *)skb->data);
++	netif_t *netif = pending_tx_info[pending_idx].netif;
++	netif_tx_request_t *txp;
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	int i, err, start;
++
++	/* Check status of header. */
++	err = mop->status;
++	if (unlikely(err)) {
++		txp = &pending_tx_info[pending_idx].req;
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++		netif_put(netif);
++	} else {
++		set_phys_to_machine(
++			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
++			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
++		grant_tx_handle[pending_idx] = mop->handle;
++	}
++
++	/* Skip first skb fragment if it is on same page as header fragment. */
++	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++	for (i = start; i < nr_frags; i++) {
++		int j, newerr;
++
++		pending_idx = (unsigned long)shinfo->frags[i].page;
++
++		/* Check error status: if okay then remember grant handle. */
++		newerr = (++mop)->status;
++		if (likely(!newerr)) {
++			set_phys_to_machine(
++				__pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
++				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
++			grant_tx_handle[pending_idx] = mop->handle;
++			/* Had a previous error? Invalidate this fragment. */
++			if (unlikely(err))
++				netif_idx_release(pending_idx);
++			continue;
++		}
++
++		/* Error on this fragment: respond to client with an error. */
++		txp = &pending_tx_info[pending_idx].req;
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++		netif_put(netif);
++
++		/* Not the first error? Preceding frags already invalidated. */
++		if (err)
++			continue;
++
++		/* First error: invalidate header and preceding fragments. */
++		pending_idx = *((u16 *)skb->data);
++		netif_idx_release(pending_idx);
++		for (j = start; j < i; j++) {
++			pending_idx = (unsigned long)shinfo->frags[i].page;
++			netif_idx_release(pending_idx);
++		}
++
++		/* Remember the error: invalidate all subsequent fragments. */
++		err = newerr;
++	}
++
++	*mopp = mop + 1;
++	return err;
++}
++
++static void netbk_fill_frags(struct sk_buff *skb)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	int i;
++
++	for (i = 0; i < nr_frags; i++) {
++		skb_frag_t *frag = shinfo->frags + i;
++		netif_tx_request_t *txp;
++		unsigned long pending_idx;
++
++		pending_idx = (unsigned long)frag->page;
++		txp = &pending_tx_info[pending_idx].req;
++		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
++		frag->size = txp->size;
++		frag->page_offset = txp->offset;
++
++		skb->len += txp->size;
++		skb->data_len += txp->size;
++		skb->truesize += txp->size;
++	}
++}
++
++int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
++		     int work_to_do)
++{
++	struct netif_extra_info extra;
++	RING_IDX cons = netif->tx.req_cons;
++
++	do {
++		if (unlikely(work_to_do-- <= 0)) {
++			DPRINTK("Missing extra info\n");
++			return -EBADR;
++		}
++
++		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
++		       sizeof(extra));
++		if (unlikely(!extra.type ||
++			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++			netif->tx.req_cons = ++cons;
++			DPRINTK("Invalid extra type: %d\n", extra.type);
++			return -EINVAL;
++		}
++
++		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
++		netif->tx.req_cons = ++cons;
++	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
++
++	return work_to_do;
++}
++
++static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
++{
++	if (!gso->u.gso.size) {
++		DPRINTK("GSO size must not be zero.\n");
++		return -EINVAL;
++	}
++
++	/* Currently only TCPv4 S.O. is supported. */
++	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++		DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
++		return -EINVAL;
++	}
++
++	skb_shinfo(skb)->gso_size = gso->u.gso.size;
++	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++
++	/* Header must be checked, and gso_segs computed. */
++	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++	skb_shinfo(skb)->gso_segs = 0;
++
++	return 0;
++}
++
++/* Called after netfront has transmitted */
++static void net_tx_action(unsigned long unused)
++{
++	struct list_head *ent;
++	struct sk_buff *skb;
++	netif_t *netif;
++	netif_tx_request_t txreq;
++	netif_tx_request_t txfrags[MAX_SKB_FRAGS];
++	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++	u16 pending_idx;
++	RING_IDX i;
++	gnttab_map_grant_ref_t *mop;
++	unsigned int data_len;
++	int ret, work_to_do;
++
++	if (dealloc_cons != dealloc_prod)
++		net_tx_action_dealloc();
++
++	mop = tx_map_ops;
++	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++		!list_empty(&net_schedule_list)) {
++		/* Get a netif from the list with work to do. */
++		ent = net_schedule_list.next;
++		netif = list_entry(ent, netif_t, list);
++		netif_get(netif);
++		remove_from_net_schedule_list(netif);
++
++		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
++		if (!work_to_do) {
++			netif_put(netif);
++			continue;
++		}
++
++		i = netif->tx.req_cons;
++		rmb(); /* Ensure that we see the request before we copy it. */
++		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
++
++		/* Credit-based scheduling. */
++		if (txreq.size > netif->remaining_credit) {
++			unsigned long now = jiffies;
++			unsigned long next_credit = 
++				netif->credit_timeout.expires +
++				msecs_to_jiffies(netif->credit_usec / 1000);
++
++			/* Timer could already be pending in rare cases. */
++			if (timer_pending(&netif->credit_timeout)) {
++				netif_put(netif);
++				continue;
++			}
++
++			/* Passed the point where we can replenish credit? */
++			if (time_after_eq(now, next_credit)) {
++				netif->credit_timeout.expires = now;
++				tx_add_credit(netif);
++			}
++
++			/* Still too big to send right now? Set a callback. */
++			if (txreq.size > netif->remaining_credit) {
++				netif->credit_timeout.data     =
++					(unsigned long)netif;
++				netif->credit_timeout.function =
++					tx_credit_callback;
++				__mod_timer(&netif->credit_timeout,
++					    next_credit);
++				netif_put(netif);
++				continue;
++			}
++		}
++		netif->remaining_credit -= txreq.size;
++
++		work_to_do--;
++		netif->tx.req_cons = ++i;
++
++		memset(extras, 0, sizeof(extras));
++		if (txreq.flags & NETTXF_extra_info) {
++			work_to_do = netbk_get_extras(netif, extras,
++						      work_to_do);
++			i = netif->tx.req_cons;
++			if (unlikely(work_to_do < 0)) {
++				netbk_tx_err(netif, &txreq, i);
++				continue;
++			}
++		}
++
++		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
++		if (unlikely(ret < 0)) {
++			netbk_tx_err(netif, &txreq, i - ret);
++			continue;
++		}
++		i += ret;
++
++		if (unlikely(txreq.size < ETH_HLEN)) {
++			DPRINTK("Bad packet size: %d\n", txreq.size);
++			netbk_tx_err(netif, &txreq, i);
++			continue;
++		}
++
++		/* No crossing a page as the payload mustn't fragment. */
++		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
++			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", 
++				txreq.offset, txreq.size, 
++				(txreq.offset &~PAGE_MASK) + txreq.size);
++			netbk_tx_err(netif, &txreq, i);
++			continue;
++		}
++
++		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
++
++		data_len = (txreq.size > PKT_PROT_LEN &&
++			    ret < MAX_SKB_FRAGS) ?
++			PKT_PROT_LEN : txreq.size;
++
++		skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
++				GFP_ATOMIC | __GFP_NOWARN);
++		if (unlikely(skb == NULL)) {
++			DPRINTK("Can't allocate a skb in start_xmit.\n");
++			netbk_tx_err(netif, &txreq, i);
++			break;
++		}
++
++		/* Packets passed to netif_rx() must have some headroom. */
++		skb_reserve(skb, 16 + NET_IP_ALIGN);
++
++		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++			struct netif_extra_info *gso;
++			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
++
++			if (netbk_set_skb_gso(skb, gso)) {
++				kfree_skb(skb);
++				netbk_tx_err(netif, &txreq, i);
++				continue;
++			}
++		}
++
++		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
++				  GNTMAP_host_map | GNTMAP_readonly,
++				  txreq.gref, netif->domid);
++		mop++;
++
++		memcpy(&pending_tx_info[pending_idx].req,
++		       &txreq, sizeof(txreq));
++		pending_tx_info[pending_idx].netif = netif;
++		*((u16 *)skb->data) = pending_idx;
++
++		__skb_put(skb, data_len);
++
++		skb_shinfo(skb)->nr_frags = ret;
++		if (data_len < txreq.size) {
++			skb_shinfo(skb)->nr_frags++;
++			skb_shinfo(skb)->frags[0].page =
++				(void *)(unsigned long)pending_idx;
++		} else {
++			/* Discriminate from any valid pending_idx value. */
++			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
++		}
++
++		__skb_queue_tail(&tx_queue, skb);
++
++		pending_cons++;
++
++		mop = netbk_get_requests(netif, skb, txfrags, mop);
++
++		netif->tx.req_cons = i;
++		netif_schedule_work(netif);
++
++		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
++			break;
++	}
++
++	if (mop == tx_map_ops)
++		return;
++
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
++	BUG_ON(ret);
++
++	mop = tx_map_ops;
++	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
++		netif_tx_request_t *txp;
++
++		pending_idx = *((u16 *)skb->data);
++		netif       = pending_tx_info[pending_idx].netif;
++		txp         = &pending_tx_info[pending_idx].req;
++
++		/* Check the remap error code. */
++		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
++			DPRINTK("netback grant failed.\n");
++			skb_shinfo(skb)->nr_frags = 0;
++			kfree_skb(skb);
++			continue;
++		}
++
++		data_len = skb->len;
++		memcpy(skb->data,
++		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
++		       data_len);
++		if (data_len < txp->size) {
++			/* Append the packet payload as a fragment. */
++			txp->offset += data_len;
++			txp->size -= data_len;
++		} else {
++			/* Schedule a response immediately. */
++			netif_idx_release(pending_idx);
++		}
++
++		/*
++		 * Old frontends do not assert data_validated but we
++		 * can infer it from csum_blank so test both flags.
++		 */
++		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
++			skb->ip_summed = CHECKSUM_UNNECESSARY;
++			skb->proto_data_valid = 1;
++		} else {
++			skb->ip_summed = CHECKSUM_NONE;
++			skb->proto_data_valid = 0;
++		}
++		skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
++
++		netbk_fill_frags(skb);
++
++		skb->dev      = netif->dev;
++		skb->protocol = eth_type_trans(skb, skb->dev);
++
++		netif->stats.rx_bytes += skb->len;
++		netif->stats.rx_packets++;
++
++		netif_rx(skb);
++		netif->dev->last_rx = jiffies;
++	}
++}
++
++static void netif_idx_release(u16 pending_idx)
++{
++	static DEFINE_SPINLOCK(_lock);
++	unsigned long flags;
++
++	spin_lock_irqsave(&_lock, flags);
++	dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
++	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
++	smp_wmb();
++	dealloc_prod++;
++	spin_unlock_irqrestore(&_lock, flags);
++
++	tasklet_schedule(&net_tx_tasklet);
++}
++
++static void netif_page_release(struct page *page)
++{
++	/* Ready for next use. */
++	init_page_count(page);
++
++	netif_idx_release(netif_page_index(page));
++}
++
++irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++{
++	netif_t *netif = dev_id;
++
++	add_to_net_schedule_list_tail(netif);
++	maybe_schedule_tx_action();
++
++	if (netif_schedulable(netif) && !netbk_queue_full(netif))
++		netif_wake_queue(netif->dev);
++
++	return IRQ_HANDLED;
++}
++
++static void make_tx_response(netif_t *netif, 
++			     netif_tx_request_t *txp,
++			     s8       st)
++{
++	RING_IDX i = netif->tx.rsp_prod_pvt;
++	netif_tx_response_t *resp;
++	int notify;
++
++	resp = RING_GET_RESPONSE(&netif->tx, i);
++	resp->id     = txp->id;
++	resp->status = st;
++
++	if (txp->flags & NETTXF_extra_info)
++		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
++
++	netif->tx.rsp_prod_pvt = ++i;
++	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
++	if (notify)
++		notify_remote_via_irq(netif->irq);
++
++#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
++	if (i == netif->tx.req_cons) {
++		int more_to_do;
++		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
++		if (more_to_do)
++			add_to_net_schedule_list_tail(netif);
++	}
++#endif
++}
++
++static netif_rx_response_t *make_rx_response(netif_t *netif, 
++					     u16      id, 
++					     s8       st,
++					     u16      offset,
++					     u16      size,
++					     u16      flags)
++{
++	RING_IDX i = netif->rx.rsp_prod_pvt;
++	netif_rx_response_t *resp;
++
++	resp = RING_GET_RESPONSE(&netif->rx, i);
++	resp->offset     = offset;
++	resp->flags      = flags;
++	resp->id         = id;
++	resp->status     = (s16)size;
++	if (st < 0)
++		resp->status = (s16)st;
++
++	netif->rx.rsp_prod_pvt = ++i;
++
++	return resp;
++}
++
++#ifdef NETBE_DEBUG_INTERRUPT
++static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
++{
++	struct list_head *ent;
++	netif_t *netif;
++	int i = 0;
++
++	printk(KERN_ALERT "netif_schedule_list:\n");
++	spin_lock_irq(&net_schedule_list_lock);
++
++	list_for_each (ent, &net_schedule_list) {
++		netif = list_entry(ent, netif_t, list);
++		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++		       "rx_resp_prod=%08x\n",
++		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++		printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
++		       netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++		printk(KERN_ALERT "   shared(rx_req_prod=%08x "
++		       "rx_resp_prod=%08x\n",
++		       netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
++		printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
++		       netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
++		printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
++		       netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
++		i++;
++	}
++
++	spin_unlock_irq(&net_schedule_list_lock);
++	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
++
++	return IRQ_HANDLED;
++}
++#endif
++
++static int __init netback_init(void)
++{
++	int i;
++	struct page *page;
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	/* We can increase reservation by this much in net_rx_action(). */
++	balloon_update_driver_allowance(NET_RX_RING_SIZE);
++
++	skb_queue_head_init(&rx_queue);
++	skb_queue_head_init(&tx_queue);
++
++	init_timer(&net_timer);
++	net_timer.data = 0;
++	net_timer.function = net_alarm;
++
++	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++	if (mmap_pages == NULL) {
++		printk("%s: out of memory\n", __FUNCTION__);
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < MAX_PENDING_REQS; i++) {
++		page = mmap_pages[i];
++		SetPageForeign(page, netif_page_release);
++		netif_page_index(page) = i;
++	}
++
++	pending_cons = 0;
++	pending_prod = MAX_PENDING_REQS;
++	for (i = 0; i < MAX_PENDING_REQS; i++)
++		pending_ring[i] = i;
++
++	spin_lock_init(&net_schedule_list_lock);
++	INIT_LIST_HEAD(&net_schedule_list);
++
++	netif_xenbus_init();
++
++#ifdef NETBE_DEBUG_INTERRUPT
++	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
++				      0,
++				      netif_be_dbg,
++				      SA_SHIRQ, 
++				      "net-be-dbg",
++				      &netif_be_dbg);
++#endif
++
++	return 0;
++}
++
++module_init(netback_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/netback/xenbus.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,448 @@
++/*  Xenbus code for netif backend
++    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
++    Copyright (C) 2005 XenSource Ltd
++
++    This program is free software; you can redistribute it and/or modify
++    it under the terms of the GNU General Public License as published by
++    the Free Software Foundation; either version 2 of the License, or
++    (at your option) any later version.
++
++    This program is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++    GNU General Public License for more details.
++
++    You should have received a copy of the GNU General Public License
++    along with this program; if not, write to the Free Software
++    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++*/
++
++#include <stdarg.h>
++#include <linux/module.h>
++#include <xen/xenbus.h>
++#include "common.h"
++
++#if 0
++#undef DPRINTK
++#define DPRINTK(fmt, args...) \
++    printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
++#endif
++
++struct backend_info {
++	struct xenbus_device *dev;
++	netif_t *netif;
++	enum xenbus_state frontend_state;
++};
++
++static int connect_rings(struct backend_info *);
++static void connect(struct backend_info *);
++static void backend_create_netif(struct backend_info *be);
++
++static int netback_remove(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev->dev.driver_data;
++
++	if (be->netif) {
++		netif_disconnect(be->netif);
++		be->netif = NULL;
++	}
++	kfree(be);
++	dev->dev.driver_data = NULL;
++	return 0;
++}
++
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures and switch to InitWait.
++ */
++static int netback_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id)
++{
++	const char *message;
++	struct xenbus_transaction xbt;
++	int err;
++	struct backend_info *be = kzalloc(sizeof(struct backend_info),
++					  GFP_KERNEL);
++	if (!be) {
++		xenbus_dev_fatal(dev, -ENOMEM,
++				 "allocating backend structure");
++		return -ENOMEM;
++	}
++
++	be->dev = dev;
++	dev->dev.driver_data = be;
++
++	do {
++		err = xenbus_transaction_start(&xbt);
++		if (err) {
++			xenbus_dev_fatal(dev, err, "starting transaction");
++			goto fail;
++		}
++
++		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
++		if (err) {
++			message = "writing feature-sg";
++			goto abort_transaction;
++		}
++
++		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
++				    "%d", 1);
++		if (err) {
++			message = "writing feature-gso-tcpv4";
++			goto abort_transaction;
++		}
++
++		/* We support rx-copy path. */
++		err = xenbus_printf(xbt, dev->nodename,
++				    "feature-rx-copy", "%d", 1);
++		if (err) {
++			message = "writing feature-rx-copy";
++			goto abort_transaction;
++		}
++
++		/*
++		 * We don't support rx-flip path (except old guests who don't
++		 * grok this feature flag).
++		 */
++		err = xenbus_printf(xbt, dev->nodename,
++				    "feature-rx-flip", "%d", 0);
++		if (err) {
++			message = "writing feature-rx-flip";
++			goto abort_transaction;
++		}
++
++		err = xenbus_transaction_end(xbt, 0);
++	} while (err == -EAGAIN);
++
++	if (err) {
++		xenbus_dev_fatal(dev, err, "completing transaction");
++		goto fail;
++	}
++
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto fail;
++
++	/* This kicks hotplug scripts, so do it immediately. */
++	backend_create_netif(be);
++
++	return 0;
++
++abort_transaction:
++	xenbus_transaction_end(xbt, 1);
++	xenbus_dev_fatal(dev, err, "%s", message);
++fail:
++	DPRINTK("failed");
++	netback_remove(dev);
++	return err;
++}
++
++
++/**
++ * Handle the creation of the hotplug script environment.  We add the script
++ * and vif variables to the environment, for the benefit of the vif-* hotplug
++ * scripts.
++ */
++static int netback_uevent(struct xenbus_device *xdev, char **envp,
++			  int num_envp, char *buffer, int buffer_size)
++{
++	struct backend_info *be = xdev->dev.driver_data;
++	netif_t *netif = be->netif;
++	int i = 0, length = 0;
++	char *val;
++
++	DPRINTK("netback_uevent");
++
++	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
++	if (IS_ERR(val)) {
++		int err = PTR_ERR(val);
++		xenbus_dev_fatal(xdev, err, "reading script");
++		return err;
++	}
++	else {
++		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
++			       &length, "script=%s", val);
++		kfree(val);
++	}
++
++	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
++		       "vif=%s", netif->dev->name);
++
++	envp[i] = NULL;
++
++	return 0;
++}
++
++
++static void backend_create_netif(struct backend_info *be)
++{
++	int err;
++	long handle;
++	struct xenbus_device *dev = be->dev;
++
++	if (be->netif != NULL)
++		return;
++
++	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
++	if (err != 1) {
++		xenbus_dev_fatal(dev, err, "reading handle");
++		return;
++	}
++
++	be->netif = netif_alloc(dev->otherend_id, handle);
++	if (IS_ERR(be->netif)) {
++		err = PTR_ERR(be->netif);
++		be->netif = NULL;
++		xenbus_dev_fatal(dev, err, "creating interface");
++		return;
++	}
++
++	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
++}
++
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state)
++{
++	struct backend_info *be = dev->dev.driver_data;
++
++	DPRINTK("%s", xenbus_strstate(frontend_state));
++
++	be->frontend_state = frontend_state;
++
++	switch (frontend_state) {
++	case XenbusStateInitialising:
++		if (dev->state == XenbusStateClosed) {
++			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++			       __FUNCTION__, dev->nodename);
++			if (be->netif) {
++				netif_disconnect(be->netif);
++				be->netif = NULL;
++			}
++			xenbus_switch_state(dev, XenbusStateInitWait);
++		}
++		break;
++
++	case XenbusStateInitialised:
++		break;
++
++	case XenbusStateConnected:
++		backend_create_netif(be);
++		if (be->netif)
++			connect(be);
++		break;
++
++	case XenbusStateClosing:
++		xenbus_switch_state(dev, XenbusStateClosing);
++		break;
++
++	case XenbusStateClosed:
++		xenbus_switch_state(dev, XenbusStateClosed);
++		if (xenbus_dev_is_online(dev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		if (be->netif != NULL)
++			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++		device_unregister(&dev->dev);
++		break;
++
++	default:
++		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++				 frontend_state);
++		break;
++	}
++}
++
++
++static void xen_net_read_rate(struct xenbus_device *dev,
++			      unsigned long *bytes, unsigned long *usec)
++{
++	char *s, *e;
++	unsigned long b, u;
++	char *ratestr;
++
++	/* Default to unlimited bandwidth. */
++	*bytes = ~0UL;
++	*usec = 0;
++
++	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
++	if (IS_ERR(ratestr))
++		return;
++
++	s = ratestr;
++	b = simple_strtoul(s, &e, 10);
++	if ((s == e) || (*e != ','))
++		goto fail;
++
++	s = e + 1;
++	u = simple_strtoul(s, &e, 10);
++	if ((s == e) || (*e != '\0'))
++		goto fail;
++
++	*bytes = b;
++	*usec = u;
++
++	kfree(ratestr);
++	return;
++
++ fail:
++	WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
++	kfree(ratestr);
++}
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
++{
++	char *s, *e, *macstr;
++	int i;
++
++	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++	if (IS_ERR(macstr))
++		return PTR_ERR(macstr);
++
++	for (i = 0; i < ETH_ALEN; i++) {
++		mac[i] = simple_strtoul(s, &e, 16);
++		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++			kfree(macstr);
++			return -ENOENT;
++		}
++		s = e+1;
++	}
++
++	kfree(macstr);
++	return 0;
++}
++
++static void connect(struct backend_info *be)
++{
++	int err;
++	struct xenbus_device *dev = be->dev;
++
++	err = connect_rings(be);
++	if (err)
++		return;
++
++	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++		return;
++	}
++
++	xen_net_read_rate(dev, &be->netif->credit_bytes,
++			  &be->netif->credit_usec);
++	be->netif->remaining_credit = be->netif->credit_bytes;
++
++	xenbus_switch_state(dev, XenbusStateConnected);
++
++	netif_wake_queue(be->netif->dev);
++}
++
++
++static int connect_rings(struct backend_info *be)
++{
++	struct xenbus_device *dev = be->dev;
++	unsigned long tx_ring_ref, rx_ring_ref;
++	unsigned int evtchn, rx_copy;
++	int err;
++	int val;
++
++	DPRINTK("");
++
++	err = xenbus_gather(XBT_NIL, dev->otherend,
++			    "tx-ring-ref", "%lu", &tx_ring_ref,
++			    "rx-ring-ref", "%lu", &rx_ring_ref,
++			    "event-channel", "%u", &evtchn, NULL);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "reading %s/ring-ref and event-channel",
++				 dev->otherend);
++		return err;
++	}
++
++	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
++			   &rx_copy);
++	if (err == -ENOENT) {
++		err = 0;
++		rx_copy = 0;
++	}
++	if (err < 0) {
++		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
++				 dev->otherend);
++		return err;
++	}
++	be->netif->copying_receiver = !!rx_copy;
++
++	if (be->netif->dev->tx_queue_len != 0) {
++		if (xenbus_scanf(XBT_NIL, dev->otherend,
++				 "feature-rx-notify", "%d", &val) < 0)
++			val = 0;
++		if (val)
++			be->netif->can_queue = 1;
++		else
++			/* Must be non-zero for pfifo_fast to work. */
++			be->netif->dev->tx_queue_len = 1;
++	}
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
++		val = 0;
++	if (val) {
++		be->netif->features |= NETIF_F_SG;
++		be->netif->dev->features |= NETIF_F_SG;
++	}
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
++			 &val) < 0)
++		val = 0;
++	if (val) {
++		be->netif->features |= NETIF_F_TSO;
++		be->netif->dev->features |= NETIF_F_TSO;
++	}
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
++			 "%d", &val) < 0)
++		val = 0;
++	if (val) {
++		be->netif->features &= ~NETIF_F_IP_CSUM;
++		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
++	}
++
++	/* Map the shared frame, irq etc. */
++	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "mapping shared-frames %lu/%lu port %u",
++				 tx_ring_ref, rx_ring_ref, evtchn);
++		return err;
++	}
++	return 0;
++}
++
++
++/* ** Driver Registration ** */
++
++
++static struct xenbus_device_id netback_ids[] = {
++	{ "vif" },
++	{ "" }
++};
++
++
++static struct xenbus_driver netback = {
++	.name = "vif",
++	.owner = THIS_MODULE,
++	.ids = netback_ids,
++	.probe = netback_probe,
++	.remove = netback_remove,
++	.uevent = netback_uevent,
++	.otherend_changed = frontend_changed,
++};
++
++
++void netif_xenbus_init(void)
++{
++	xenbus_register_backend(&netback);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/netfront/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,4 @@
++
++obj-$(CONFIG_XEN_NETDEV_FRONTEND)	:= xennet.o
++
++xennet-objs := netfront.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/netfront/netfront.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,2133 @@
++/******************************************************************************
++ * Virtual network driver for conversing with remote driver backends.
++ *
++ * Copyright (c) 2002-2005, K A Fraser
++ * Copyright (c) 2005, XenSource Ltd
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <linux/errno.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/init.h>
++#include <linux/bitops.h>
++#include <linux/ethtool.h>
++#include <linux/in.h>
++#include <linux/if_ether.h>
++#include <linux/io.h>
++#include <linux/moduleparam.h>
++#include <net/sock.h>
++#include <net/pkt_sched.h>
++#include <net/arp.h>
++#include <net/route.h>
++#include <asm/uaccess.h>
++#include <xen/evtchn.h>
++#include <xen/xenbus.h>
++#include <xen/interface/io/netif.h>
++#include <xen/interface/memory.h>
++#include <xen/balloon.h>
++#include <asm/page.h>
++#include <asm/maddr.h>
++#include <asm/uaccess.h>
++#include <xen/interface/grant_table.h>
++#include <xen/gnttab.h>
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++struct netfront_cb {
++	struct page *page;
++	unsigned offset;
++};
++
++#define NETFRONT_SKB_CB(skb)	((struct netfront_cb *)((skb)->cb))
++
++/*
++ * Mutually-exclusive module options to select receive data path:
++ *  rx_copy : Packets are copied by network backend into local memory
++ *  rx_flip : Page containing packet data is transferred to our ownership
++ * For fully-virtualised guests there is no option - copying must be used.
++ * For paravirtualised guests, flipping is the default.
++ */
++#ifdef CONFIG_XEN
++static int MODPARM_rx_copy = 0;
++module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
++MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
++static int MODPARM_rx_flip = 0;
++module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
++MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
++#else
++static const int MODPARM_rx_copy = 1;
++static const int MODPARM_rx_flip = 0;
++#endif
++
++#define RX_COPY_THRESHOLD 256
++
++/* If we don't have GSO, fake things up so that we never try to use it. */
++#if defined(NETIF_F_GSO)
++#define HAVE_GSO			1
++#define HAVE_TSO			1 /* TSO is a subset of GSO */
++static inline void dev_disable_gso_features(struct net_device *dev)
++{
++	/* Turn off all GSO bits except ROBUST. */
++	dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
++	dev->features |= NETIF_F_GSO_ROBUST;
++}
++#elif defined(NETIF_F_TSO)
++#define HAVE_TSO                       1
++
++/* Some older kernels cannot cope with incorrect checksums,
++ * particularly in netfilter. I'm not sure there is 100% correlation
++ * with the presence of NETIF_F_TSO but it appears to be a good first
++ * approximiation.
++ */
++#define HAVE_NO_CSUM_OFFLOAD           1
++
++#define gso_size tso_size
++#define gso_segs tso_segs
++static inline void dev_disable_gso_features(struct net_device *dev)
++{
++       /* Turn off all TSO bits. */
++       dev->features &= ~NETIF_F_TSO;
++}
++static inline int skb_is_gso(const struct sk_buff *skb)
++{
++        return skb_shinfo(skb)->tso_size;
++}
++static inline int skb_gso_ok(struct sk_buff *skb, int features)
++{
++        return (features & NETIF_F_TSO);
++}
++
++static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
++{
++        return skb_is_gso(skb) &&
++               (!skb_gso_ok(skb, dev->features) ||
++                unlikely(skb->ip_summed != CHECKSUM_HW));
++}
++#else
++#define netif_needs_gso(dev, skb)	0
++#define dev_disable_gso_features(dev)	((void)0)
++#endif
++
++#define GRANT_INVALID_REF	0
++
++#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
++
++struct netfront_info {
++	struct list_head list;
++	struct net_device *netdev;
++
++	struct net_device_stats stats;
++
++	struct netif_tx_front_ring tx;
++	struct netif_rx_front_ring rx;
++
++	spinlock_t   tx_lock;
++	spinlock_t   rx_lock;
++
++	unsigned int irq;
++	unsigned int copying_receiver;
++	unsigned int carrier;
++
++	/* Receive-ring batched refills. */
++#define RX_MIN_TARGET 8
++#define RX_DFL_MIN_TARGET 64
++#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
++	unsigned rx_min_target, rx_max_target, rx_target;
++	struct sk_buff_head rx_batch;
++
++	struct timer_list rx_refill_timer;
++
++	/*
++	 * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs
++	 * is an index into a chain of free entries.
++	 */
++	struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
++	struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
++
++#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
++	grant_ref_t gref_tx_head;
++	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
++	grant_ref_t gref_rx_head;
++	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
++
++	struct xenbus_device *xbdev;
++	int tx_ring_ref;
++	int rx_ring_ref;
++	u8 mac[ETH_ALEN];
++
++	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
++	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
++	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++};
++
++struct netfront_rx_info {
++	struct netif_rx_response rx;
++	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++};
++
++/*
++ * Implement our own carrier flag: the network stack's version causes delays
++ * when the carrier is re-enabled (in particular, dev_activate() may not
++ * immediately be called, which can cause packet loss).
++ */
++#define netfront_carrier_on(netif)	((netif)->carrier = 1)
++#define netfront_carrier_off(netif)	((netif)->carrier = 0)
++#define netfront_carrier_ok(netif)	((netif)->carrier)
++
++/*
++ * Access macros for acquiring freeing slots in tx_skbs[].
++ */
++
++static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
++{
++	list[id] = list[0];
++	list[0]  = (void *)(unsigned long)id;
++}
++
++static inline unsigned short get_id_from_freelist(struct sk_buff **list)
++{
++	unsigned int id = (unsigned int)(unsigned long)list[0];
++	list[0] = list[id];
++	return id;
++}
++
++static inline int xennet_rxidx(RING_IDX idx)
++{
++	return idx & (NET_RX_RING_SIZE - 1);
++}
++
++static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
++						RING_IDX ri)
++{
++	int i = xennet_rxidx(ri);
++	struct sk_buff *skb = np->rx_skbs[i];
++	np->rx_skbs[i] = NULL;
++	return skb;
++}
++
++static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
++					    RING_IDX ri)
++{
++	int i = xennet_rxidx(ri);
++	grant_ref_t ref = np->grant_rx_ref[i];
++	np->grant_rx_ref[i] = GRANT_INVALID_REF;
++	return ref;
++}
++
++#define DPRINTK(fmt, args...)				\
++	pr_debug("netfront (%s:%d) " fmt,		\
++		 __FUNCTION__, __LINE__, ##args)
++#define IPRINTK(fmt, args...)				\
++	printk(KERN_INFO "netfront: " fmt, ##args)
++#define WPRINTK(fmt, args...)				\
++	printk(KERN_WARNING "netfront: " fmt, ##args)
++
++static int setup_device(struct xenbus_device *, struct netfront_info *);
++static struct net_device *create_netdev(struct xenbus_device *);
++
++static void end_access(int, void *);
++static void netif_disconnect_backend(struct netfront_info *);
++
++static int network_connect(struct net_device *);
++static void network_tx_buf_gc(struct net_device *);
++static void network_alloc_rx_buffers(struct net_device *);
++static int send_fake_arp(struct net_device *);
++
++static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
++
++#ifdef CONFIG_SYSFS
++static int xennet_sysfs_addif(struct net_device *netdev);
++static void xennet_sysfs_delif(struct net_device *netdev);
++#else /* !CONFIG_SYSFS */
++#define xennet_sysfs_addif(dev) (0)
++#define xennet_sysfs_delif(dev) do { } while(0)
++#endif
++
++static inline int xennet_can_sg(struct net_device *dev)
++{
++	return dev->features & NETIF_F_SG;
++}
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures and the ring buffers for communication with the backend, and
++ * inform the backend of the appropriate details for those.
++ */
++static int __devinit netfront_probe(struct xenbus_device *dev,
++				    const struct xenbus_device_id *id)
++{
++	int err;
++	struct net_device *netdev;
++	struct netfront_info *info;
++
++	netdev = create_netdev(dev);
++	if (IS_ERR(netdev)) {
++		err = PTR_ERR(netdev);
++		xenbus_dev_fatal(dev, err, "creating netdev");
++		return err;
++	}
++
++	info = netdev_priv(netdev);
++	dev->dev.driver_data = info;
++
++	err = register_netdev(info->netdev);
++	if (err) {
++		printk(KERN_WARNING "%s: register_netdev err=%d\n",
++		       __FUNCTION__, err);
++		goto fail;
++	}
++
++	err = xennet_sysfs_addif(info->netdev);
++	if (err) {
++		unregister_netdev(info->netdev);
++		printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
++		       __FUNCTION__, err);
++		goto fail;
++	}
++
++	return 0;
++
++ fail:
++	free_netdev(netdev);
++	dev->dev.driver_data = NULL;
++	return err;
++}
++
++static int __devexit netfront_remove(struct xenbus_device *dev)
++{
++	struct netfront_info *info = dev->dev.driver_data;
++
++	DPRINTK("%s\n", dev->nodename);
++
++	netif_disconnect_backend(info);
++
++	del_timer_sync(&info->rx_refill_timer);
++
++	xennet_sysfs_delif(info->netdev);
++
++	unregister_netdev(info->netdev);
++
++	free_netdev(info->netdev);
++
++	return 0;
++}
++
++/**
++ * We are reconnecting to the backend, due to a suspend/resume, or a backend
++ * driver restart.  We tear down our netif structure and recreate it, but
++ * leave the device-layer structures intact so that this is transparent to the
++ * rest of the kernel.
++ */
++static int netfront_resume(struct xenbus_device *dev)
++{
++	struct netfront_info *info = dev->dev.driver_data;
++
++	DPRINTK("%s\n", dev->nodename);
++
++	netif_disconnect_backend(info);
++	return 0;
++}
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
++{
++	char *s, *e, *macstr;
++	int i;
++
++	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++	if (IS_ERR(macstr))
++		return PTR_ERR(macstr);
++
++	for (i = 0; i < ETH_ALEN; i++) {
++		mac[i] = simple_strtoul(s, &e, 16);
++		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++			kfree(macstr);
++			return -ENOENT;
++		}
++		s = e+1;
++	}
++
++	kfree(macstr);
++	return 0;
++}
++
++/* Common code used when first setting up, and when resuming. */
++static int talk_to_backend(struct xenbus_device *dev,
++			   struct netfront_info *info)
++{
++	const char *message;
++	struct xenbus_transaction xbt;
++	int err;
++
++	err = xen_net_read_mac(dev, info->mac);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++		goto out;
++	}
++
++	/* Create shared ring, alloc event channel. */
++	err = setup_device(dev, info);
++	if (err)
++		goto out;
++
++again:
++	err = xenbus_transaction_start(&xbt);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "starting transaction");
++		goto destroy_ring;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
++			    info->tx_ring_ref);
++	if (err) {
++		message = "writing tx ring-ref";
++		goto abort_transaction;
++	}
++	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
++			    info->rx_ring_ref);
++	if (err) {
++		message = "writing rx ring-ref";
++		goto abort_transaction;
++	}
++	err = xenbus_printf(xbt, dev->nodename,
++			    "event-channel", "%u",
++			    irq_to_evtchn_port(info->irq));
++	if (err) {
++		message = "writing event-channel";
++		goto abort_transaction;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
++			    info->copying_receiver);
++	if (err) {
++		message = "writing request-rx-copy";
++		goto abort_transaction;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
++	if (err) {
++		message = "writing feature-rx-notify";
++		goto abort_transaction;
++	}
++
++#ifdef HAVE_NO_CSUM_OFFLOAD
++	err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload", "%d", 1);
++	if (err) {
++		message = "writing feature-no-csum-offload";
++		goto abort_transaction;
++	}
++#endif
++
++	err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
++	if (err) {
++		message = "writing feature-sg";
++		goto abort_transaction;
++	}
++
++#ifdef HAVE_TSO
++	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
++	if (err) {
++		message = "writing feature-gso-tcpv4";
++		goto abort_transaction;
++	}
++#endif
++
++	err = xenbus_transaction_end(xbt, 0);
++	if (err) {
++		if (err == -EAGAIN)
++			goto again;
++		xenbus_dev_fatal(dev, err, "completing transaction");
++		goto destroy_ring;
++	}
++
++	return 0;
++
++ abort_transaction:
++	xenbus_transaction_end(xbt, 1);
++	xenbus_dev_fatal(dev, err, "%s", message);
++ destroy_ring:
++	netif_disconnect_backend(info);
++ out:
++	return err;
++}
++
++static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
++{
++	struct netif_tx_sring *txs;
++	struct netif_rx_sring *rxs;
++	int err;
++	struct net_device *netdev = info->netdev;
++
++	info->tx_ring_ref = GRANT_INVALID_REF;
++	info->rx_ring_ref = GRANT_INVALID_REF;
++	info->rx.sring = NULL;
++	info->tx.sring = NULL;
++	info->irq = 0;
++
++	txs = (struct netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
++	if (!txs) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(dev, err, "allocating tx ring page");
++		goto fail;
++	}
++	SHARED_RING_INIT(txs);
++	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
++
++	err = xenbus_grant_ring(dev, virt_to_mfn(txs));
++	if (err < 0) {
++		free_page((unsigned long)txs);
++		goto fail;
++	}
++	info->tx_ring_ref = err;
++
++	rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
++	if (!rxs) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(dev, err, "allocating rx ring page");
++		goto fail;
++	}
++	SHARED_RING_INIT(rxs);
++	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
++
++	err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
++	if (err < 0) {
++		free_page((unsigned long)rxs);
++		goto fail;
++	}
++	info->rx_ring_ref = err;
++
++	memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
++
++	err = bind_listening_port_to_irqhandler(
++		dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
++		netdev);
++	if (err < 0)
++		goto fail;
++	info->irq = err;
++
++	return 0;
++
++ fail:
++	return err;
++}
++
++/**
++ * Callback received when the backend's state changes.
++ */
++static void backend_changed(struct xenbus_device *dev,
++			    enum xenbus_state backend_state)
++{
++	struct netfront_info *np = dev->dev.driver_data;
++	struct net_device *netdev = np->netdev;
++
++	DPRINTK("%s\n", xenbus_strstate(backend_state));
++
++	switch (backend_state) {
++	case XenbusStateInitialising:
++	case XenbusStateInitialised:
++	case XenbusStateConnected:
++	case XenbusStateUnknown:
++	case XenbusStateClosed:
++		break;
++
++	case XenbusStateInitWait:
++		if (dev->state != XenbusStateInitialising)
++			break;
++		if (network_connect(netdev) != 0)
++			break;
++		xenbus_switch_state(dev, XenbusStateConnected);
++		(void)send_fake_arp(netdev);
++		break;
++
++	case XenbusStateClosing:
++		xenbus_frontend_closed(dev);
++		break;
++	}
++}
++
++/** Send a packet on a net device to encourage switches to learn the
++ * MAC. We send a fake ARP request.
++ *
++ * @param dev device
++ * @return 0 on success, error code otherwise
++ */
++static int send_fake_arp(struct net_device *dev)
++{
++	struct sk_buff *skb;
++	u32             src_ip, dst_ip;
++
++	dst_ip = INADDR_BROADCAST;
++	src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
++
++	/* No IP? Then nothing to do. */
++	if (src_ip == 0)
++		return 0;
++
++	skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
++			 dst_ip, dev, src_ip,
++			 /*dst_hw*/ NULL, /*src_hw*/ NULL,
++			 /*target_hw*/ dev->dev_addr);
++	if (skb == NULL)
++		return -ENOMEM;
++
++	return dev_queue_xmit(skb);
++}
++
++static inline int netfront_tx_slot_available(struct netfront_info *np)
++{
++	return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
++		(TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
++}
++
++static inline void network_maybe_wake_tx(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++
++	if (unlikely(netif_queue_stopped(dev)) &&
++	    netfront_tx_slot_available(np) &&
++	    likely(netif_running(dev)))
++		netif_wake_queue(dev);
++}
++
++static int network_open(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++
++	memset(&np->stats, 0, sizeof(np->stats));
++
++	spin_lock_bh(&np->rx_lock);
++	if (netfront_carrier_ok(np)) {
++		network_alloc_rx_buffers(dev);
++		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
++		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
++			netif_rx_schedule(dev);
++	}
++	spin_unlock_bh(&np->rx_lock);
++
++	network_maybe_wake_tx(dev);
++
++	return 0;
++}
++
++static void network_tx_buf_gc(struct net_device *dev)
++{
++	RING_IDX cons, prod;
++	unsigned short id;
++	struct netfront_info *np = netdev_priv(dev);
++	struct sk_buff *skb;
++
++	BUG_ON(!netfront_carrier_ok(np));
++
++	do {
++		prod = np->tx.sring->rsp_prod;
++		rmb(); /* Ensure we see responses up to 'rp'. */
++
++		for (cons = np->tx.rsp_cons; cons != prod; cons++) {
++			struct netif_tx_response *txrsp;
++
++			txrsp = RING_GET_RESPONSE(&np->tx, cons);
++			if (txrsp->status == NETIF_RSP_NULL)
++				continue;
++
++			id  = txrsp->id;
++			skb = np->tx_skbs[id];
++			if (unlikely(gnttab_query_foreign_access(
++				np->grant_tx_ref[id]) != 0)) {
++				printk(KERN_ALERT "network_tx_buf_gc: warning "
++				       "-- grant still in use by backend "
++				       "domain.\n");
++				BUG();
++			}
++			gnttab_end_foreign_access_ref(
++				np->grant_tx_ref[id], GNTMAP_readonly);
++			gnttab_release_grant_reference(
++				&np->gref_tx_head, np->grant_tx_ref[id]);
++			np->grant_tx_ref[id] = GRANT_INVALID_REF;
++			add_id_to_freelist(np->tx_skbs, id);
++			dev_kfree_skb_irq(skb);
++		}
++
++		np->tx.rsp_cons = prod;
++
++		/*
++		 * Set a new event, then check for race with update of tx_cons.
++		 * Note that it is essential to schedule a callback, no matter
++		 * how few buffers are pending. Even if there is space in the
++		 * transmit ring, higher layers may be blocked because too much
++		 * data is outstanding: in such cases notification from Xen is
++		 * likely to be the only kick that we'll get.
++		 */
++		np->tx.sring->rsp_event =
++			prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
++		mb();
++	} while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
++
++	network_maybe_wake_tx(dev);
++}
++
++static void rx_refill_timeout(unsigned long data)
++{
++	struct net_device *dev = (struct net_device *)data;
++	netif_rx_schedule(dev);
++}
++
++static void network_alloc_rx_buffers(struct net_device *dev)
++{
++	unsigned short id;
++	struct netfront_info *np = netdev_priv(dev);
++	struct sk_buff *skb;
++	struct page *page;
++	int i, batch_target, notify;
++	RING_IDX req_prod = np->rx.req_prod_pvt;
++	struct xen_memory_reservation reservation;
++	grant_ref_t ref;
++ 	unsigned long pfn;
++ 	void *vaddr;
++	int nr_flips;
++	netif_rx_request_t *req;
++
++	if (unlikely(!netfront_carrier_ok(np)))
++		return;
++
++	/*
++	 * Allocate skbuffs greedily, even though we batch updates to the
++	 * receive ring. This creates a less bursty demand on the memory
++	 * allocator, so should reduce the chance of failed allocation requests
++	 * both for ourself and for other kernel subsystems.
++	 */
++	batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
++	for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
++		/*
++		 * Allocate an skb and a page. Do not use __dev_alloc_skb as
++		 * that will allocate page-sized buffers which is not
++		 * necessary here.
++		 * 16 bytes added as necessary headroom for netif_receive_skb.
++		 */
++		skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN,
++				GFP_ATOMIC | __GFP_NOWARN);
++		if (unlikely(!skb))
++			goto no_skb;
++
++		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
++		if (!page) {
++			kfree_skb(skb);
++no_skb:
++			/* Any skbuffs queued for refill? Force them out. */
++			if (i != 0)
++				goto refill;
++			/* Could not allocate any skbuffs. Try again later. */
++			mod_timer(&np->rx_refill_timer,
++				  jiffies + (HZ/10));
++			break;
++		}
++
++		skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
++		skb_shinfo(skb)->frags[0].page = page;
++		skb_shinfo(skb)->nr_frags = 1;
++		__skb_queue_tail(&np->rx_batch, skb);
++	}
++
++	/* Is the batch large enough to be worthwhile? */
++	if (i < (np->rx_target/2)) {
++		if (req_prod > np->rx.sring->req_prod)
++			goto push;
++		return;
++	}
++
++	/* Adjust our fill target if we risked running out of buffers. */
++	if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
++	    ((np->rx_target *= 2) > np->rx_max_target))
++		np->rx_target = np->rx_max_target;
++
++ refill:
++	for (nr_flips = i = 0; ; i++) {
++		if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
++			break;
++
++		skb->dev = dev;
++
++		id = xennet_rxidx(req_prod + i);
++
++		BUG_ON(np->rx_skbs[id]);
++		np->rx_skbs[id] = skb;
++
++		ref = gnttab_claim_grant_reference(&np->gref_rx_head);
++		BUG_ON((signed short)ref < 0);
++		np->grant_rx_ref[id] = ref;
++
++		pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
++		vaddr = page_address(skb_shinfo(skb)->frags[0].page);
++
++		req = RING_GET_REQUEST(&np->rx, req_prod + i);
++		if (!np->copying_receiver) {
++			gnttab_grant_foreign_transfer_ref(ref,
++							  np->xbdev->otherend_id,
++							  pfn);
++			np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
++			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++				/* Remove this page before passing
++				 * back to Xen. */
++				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
++				MULTI_update_va_mapping(np->rx_mcl+i,
++							(unsigned long)vaddr,
++							__pte(0), 0);
++			}
++			nr_flips++;
++		} else {
++			gnttab_grant_foreign_access_ref(ref,
++							np->xbdev->otherend_id,
++							pfn_to_mfn(pfn),
++							0);
++		}
++
++		req->id = id;
++		req->gref = ref;
++	}
++
++	if ( nr_flips != 0 ) {
++		/* Tell the ballon driver what is going on. */
++		balloon_update_driver_allowance(i);
++
++		set_xen_guest_handle(reservation.extent_start,
++				     np->rx_pfn_array);
++		reservation.nr_extents   = nr_flips;
++		reservation.extent_order = 0;
++		reservation.address_bits = 0;
++		reservation.domid        = DOMID_SELF;
++
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			/* After all PTEs have been zapped, flush the TLB. */
++			np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
++				UVMF_TLB_FLUSH|UVMF_ALL;
++
++			/* Give away a batch of pages. */
++			np->rx_mcl[i].op = __HYPERVISOR_memory_op;
++			np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
++			np->rx_mcl[i].args[1] = (unsigned long)&reservation;
++
++			/* Zap PTEs and give away pages in one big
++			 * multicall. */
++			(void)HYPERVISOR_multicall(np->rx_mcl, i+1);
++
++			/* Check return status of HYPERVISOR_memory_op(). */
++			if (unlikely(np->rx_mcl[i].result != i))
++				panic("Unable to reduce memory reservation\n");
++		} else {
++			if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
++						 &reservation) != i)
++				panic("Unable to reduce memory reservation\n");
++		}
++	} else {
++		wmb();
++	}
++
++	/* Above is a suitable barrier to ensure backend will see requests. */
++	np->rx.req_prod_pvt = req_prod + i;
++ push:
++	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
++	if (notify)
++		notify_remote_via_irq(np->irq);
++}
++
++static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
++			      struct netif_tx_request *tx)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	char *data = skb->data;
++	unsigned long mfn;
++	RING_IDX prod = np->tx.req_prod_pvt;
++	int frags = skb_shinfo(skb)->nr_frags;
++	unsigned int offset = offset_in_page(data);
++	unsigned int len = skb_headlen(skb);
++	unsigned int id;
++	grant_ref_t ref;
++	int i;
++
++	while (len > PAGE_SIZE - offset) {
++		tx->size = PAGE_SIZE - offset;
++		tx->flags |= NETTXF_more_data;
++		len -= tx->size;
++		data += tx->size;
++		offset = 0;
++
++		id = get_id_from_freelist(np->tx_skbs);
++		np->tx_skbs[id] = skb_get(skb);
++		tx = RING_GET_REQUEST(&np->tx, prod++);
++		tx->id = id;
++		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++		BUG_ON((signed short)ref < 0);
++
++		mfn = virt_to_mfn(data);
++		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
++						mfn, GNTMAP_readonly);
++
++		tx->gref = np->grant_tx_ref[id] = ref;
++		tx->offset = offset;
++		tx->size = len;
++		tx->flags = 0;
++	}
++
++	for (i = 0; i < frags; i++) {
++		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
++
++		tx->flags |= NETTXF_more_data;
++
++		id = get_id_from_freelist(np->tx_skbs);
++		np->tx_skbs[id] = skb_get(skb);
++		tx = RING_GET_REQUEST(&np->tx, prod++);
++		tx->id = id;
++		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++		BUG_ON((signed short)ref < 0);
++
++		mfn = pfn_to_mfn(page_to_pfn(frag->page));
++		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
++						mfn, GNTMAP_readonly);
++
++		tx->gref = np->grant_tx_ref[id] = ref;
++		tx->offset = frag->page_offset;
++		tx->size = frag->size;
++		tx->flags = 0;
++	}
++
++	np->tx.req_prod_pvt = prod;
++}
++
++static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	unsigned short id;
++	struct netfront_info *np = netdev_priv(dev);
++	struct netif_tx_request *tx;
++	struct netif_extra_info *extra;
++	char *data = skb->data;
++	RING_IDX i;
++	grant_ref_t ref;
++	unsigned long mfn;
++	int notify;
++	int frags = skb_shinfo(skb)->nr_frags;
++	unsigned int offset = offset_in_page(data);
++	unsigned int len = skb_headlen(skb);
++
++	frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
++	if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
++		printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
++		       frags);
++		dump_stack();
++		goto drop;
++	}
++
++	spin_lock_irq(&np->tx_lock);
++
++	if (unlikely(!netfront_carrier_ok(np) ||
++		     (frags > 1 && !xennet_can_sg(dev)) ||
++		     netif_needs_gso(dev, skb))) {
++		spin_unlock_irq(&np->tx_lock);
++		goto drop;
++	}
++
++	i = np->tx.req_prod_pvt;
++
++	id = get_id_from_freelist(np->tx_skbs);
++	np->tx_skbs[id] = skb;
++
++	tx = RING_GET_REQUEST(&np->tx, i);
++
++	tx->id   = id;
++	ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++	BUG_ON((signed short)ref < 0);
++	mfn = virt_to_mfn(data);
++	gnttab_grant_foreign_access_ref(
++		ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
++	tx->gref = np->grant_tx_ref[id] = ref;
++	tx->offset = offset;
++	tx->size = len;
++
++	tx->flags = 0;
++	extra = NULL;
++
++	if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
++#ifdef CONFIG_XEN
++	if (skb->proto_data_valid) /* remote but checksummed? */
++		tx->flags |= NETTXF_data_validated;
++#endif
++
++#ifdef HAVE_TSO
++	if (skb_shinfo(skb)->gso_size) {
++		struct netif_extra_info *gso = (struct netif_extra_info *)
++			RING_GET_REQUEST(&np->tx, ++i);
++
++		if (extra)
++			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
++		else
++			tx->flags |= NETTXF_extra_info;
++
++		gso->u.gso.size = skb_shinfo(skb)->gso_size;
++		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++		gso->u.gso.pad = 0;
++		gso->u.gso.features = 0;
++
++		gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++		gso->flags = 0;
++		extra = gso;
++	}
++#endif
++
++	np->tx.req_prod_pvt = i + 1;
++
++	xennet_make_frags(skb, dev, tx);
++	tx->size = skb->len;
++
++	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
++	if (notify)
++		notify_remote_via_irq(np->irq);
++
++	network_tx_buf_gc(dev);
++
++	if (!netfront_tx_slot_available(np))
++		netif_stop_queue(dev);
++
++	spin_unlock_irq(&np->tx_lock);
++
++	np->stats.tx_bytes += skb->len;
++	np->stats.tx_packets++;
++
++	return 0;
++
++ drop:
++	np->stats.tx_dropped++;
++	dev_kfree_skb(skb);
++	return 0;
++}
++
++static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
++{
++	struct net_device *dev = dev_id;
++	struct netfront_info *np = netdev_priv(dev);
++	unsigned long flags;
++
++	spin_lock_irqsave(&np->tx_lock, flags);
++
++	if (likely(netfront_carrier_ok(np))) {
++		network_tx_buf_gc(dev);
++		/* Under tx_lock: protects access to rx shared-ring indexes. */
++		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
++			netif_rx_schedule(dev);
++	}
++
++	spin_unlock_irqrestore(&np->tx_lock, flags);
++
++	return IRQ_HANDLED;
++}
++
++static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
++				grant_ref_t ref)
++{
++	int new = xennet_rxidx(np->rx.req_prod_pvt);
++
++	BUG_ON(np->rx_skbs[new]);
++	np->rx_skbs[new] = skb;
++	np->grant_rx_ref[new] = ref;
++	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
++	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
++	np->rx.req_prod_pvt++;
++}
++
++int xennet_get_extras(struct netfront_info *np,
++		      struct netif_extra_info *extras, RING_IDX rp)
++
++{
++	struct netif_extra_info *extra;
++	RING_IDX cons = np->rx.rsp_cons;
++	int err = 0;
++
++	do {
++		struct sk_buff *skb;
++		grant_ref_t ref;
++
++		if (unlikely(cons + 1 == rp)) {
++			if (net_ratelimit())
++				WPRINTK("Missing extra info\n");
++			err = -EBADR;
++			break;
++		}
++
++		extra = (struct netif_extra_info *)
++			RING_GET_RESPONSE(&np->rx, ++cons);
++
++		if (unlikely(!extra->type ||
++			     extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++			if (net_ratelimit())
++				WPRINTK("Invalid extra type: %d\n",
++					extra->type);
++			err = -EINVAL;
++		} else {
++			memcpy(&extras[extra->type - 1], extra,
++			       sizeof(*extra));
++		}
++
++		skb = xennet_get_rx_skb(np, cons);
++		ref = xennet_get_rx_ref(np, cons);
++		xennet_move_rx_slot(np, skb, ref);
++	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
++
++	np->rx.rsp_cons = cons;
++	return err;
++}
++
++static int xennet_get_responses(struct netfront_info *np,
++				struct netfront_rx_info *rinfo, RING_IDX rp,
++				struct sk_buff_head *list,
++				int *pages_flipped_p)
++{
++	int pages_flipped = *pages_flipped_p;
++	struct mmu_update *mmu;
++	struct multicall_entry *mcl;
++	struct netif_rx_response *rx = &rinfo->rx;
++	struct netif_extra_info *extras = rinfo->extras;
++	RING_IDX cons = np->rx.rsp_cons;
++	struct sk_buff *skb = xennet_get_rx_skb(np, cons);
++	grant_ref_t ref = xennet_get_rx_ref(np, cons);
++	int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
++	int frags = 1;
++	int err = 0;
++	unsigned long ret;
++
++	if (rx->flags & NETRXF_extra_info) {
++		err = xennet_get_extras(np, extras, rp);
++		cons = np->rx.rsp_cons;
++	}
++
++	for (;;) {
++		unsigned long mfn;
++
++		if (unlikely(rx->status < 0 ||
++			     rx->offset + rx->status > PAGE_SIZE)) {
++			if (net_ratelimit())
++				WPRINTK("rx->offset: %x, size: %u\n",
++					rx->offset, rx->status);
++			xennet_move_rx_slot(np, skb, ref);
++			err = -EINVAL;
++			goto next;
++		}
++
++		/*
++		 * This definitely indicates a bug, either in this driver or in
++		 * the backend driver. In future this should flag the bad
++		 * situation to the system controller to reboot the backed.
++		 */
++		if (ref == GRANT_INVALID_REF) {
++			if (net_ratelimit())
++				WPRINTK("Bad rx response id %d.\n", rx->id);
++			err = -EINVAL;
++			goto next;
++		}
++
++		if (!np->copying_receiver) {
++			/* Memory pressure, insufficient buffer
++			 * headroom, ... */
++			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
++				if (net_ratelimit())
++					WPRINTK("Unfulfilled rx req "
++						"(id=%d, st=%d).\n",
++						rx->id, rx->status);
++				xennet_move_rx_slot(np, skb, ref);
++				err = -ENOMEM;
++				goto next;
++			}
++
++			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++				/* Remap the page. */
++				struct page *page =
++					skb_shinfo(skb)->frags[0].page;
++				unsigned long pfn = page_to_pfn(page);
++				void *vaddr = page_address(page);
++
++				mcl = np->rx_mcl + pages_flipped;
++				mmu = np->rx_mmu + pages_flipped;
++
++				MULTI_update_va_mapping(mcl,
++							(unsigned long)vaddr,
++							pfn_pte_ma(mfn,
++								   PAGE_KERNEL),
++							0);
++				mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
++					| MMU_MACHPHYS_UPDATE;
++				mmu->val = pfn;
++
++				set_phys_to_machine(pfn, mfn);
++			}
++			pages_flipped++;
++		} else {
++			ret = gnttab_end_foreign_access_ref(ref, 0);
++			BUG_ON(!ret);
++		}
++
++		gnttab_release_grant_reference(&np->gref_rx_head, ref);
++
++		__skb_queue_tail(list, skb);
++
++next:
++		if (!(rx->flags & NETRXF_more_data))
++			break;
++
++		if (cons + frags == rp) {
++			if (net_ratelimit())
++				WPRINTK("Need more frags\n");
++			err = -ENOENT;
++			break;
++		}
++
++		rx = RING_GET_RESPONSE(&np->rx, cons + frags);
++		skb = xennet_get_rx_skb(np, cons + frags);
++		ref = xennet_get_rx_ref(np, cons + frags);
++		frags++;
++	}
++
++	if (unlikely(frags > max)) {
++		if (net_ratelimit())
++			WPRINTK("Too many frags\n");
++		err = -E2BIG;
++	}
++
++	if (unlikely(err))
++		np->rx.rsp_cons = cons + frags;
++
++	*pages_flipped_p = pages_flipped;
++
++	return err;
++}
++
++static RING_IDX xennet_fill_frags(struct netfront_info *np,
++				  struct sk_buff *skb,
++				  struct sk_buff_head *list)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	RING_IDX cons = np->rx.rsp_cons;
++	skb_frag_t *frag = shinfo->frags + nr_frags;
++	struct sk_buff *nskb;
++
++	while ((nskb = __skb_dequeue(list))) {
++		struct netif_rx_response *rx =
++			RING_GET_RESPONSE(&np->rx, ++cons);
++
++		frag->page = skb_shinfo(nskb)->frags[0].page;
++		frag->page_offset = rx->offset;
++		frag->size = rx->status;
++
++		skb->data_len += rx->status;
++
++		skb_shinfo(nskb)->nr_frags = 0;
++		kfree_skb(nskb);
++
++		frag++;
++		nr_frags++;
++	}
++
++	shinfo->nr_frags = nr_frags;
++	return cons;
++}
++
++static int xennet_set_skb_gso(struct sk_buff *skb,
++			      struct netif_extra_info *gso)
++{
++	if (!gso->u.gso.size) {
++		if (net_ratelimit())
++			WPRINTK("GSO size must not be zero.\n");
++		return -EINVAL;
++	}
++
++	/* Currently only TCPv4 S.O. is supported. */
++	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++		if (net_ratelimit())
++			WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
++		return -EINVAL;
++	}
++
++#ifdef HAVE_TSO
++	skb_shinfo(skb)->gso_size = gso->u.gso.size;
++#ifdef HAVE_GSO
++	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++
++	/* Header must be checked, and gso_segs computed. */
++	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++#endif
++	skb_shinfo(skb)->gso_segs = 0;
++
++	return 0;
++#else
++	if (net_ratelimit())
++		WPRINTK("GSO unsupported by this kernel.\n");
++	return -EINVAL;
++#endif
++}
++
++static int netif_poll(struct net_device *dev, int *pbudget)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	struct sk_buff *skb;
++	struct netfront_rx_info rinfo;
++	struct netif_rx_response *rx = &rinfo.rx;
++	struct netif_extra_info *extras = rinfo.extras;
++	RING_IDX i, rp;
++	struct multicall_entry *mcl;
++	int work_done, budget, more_to_do = 1;
++	struct sk_buff_head rxq;
++	struct sk_buff_head errq;
++	struct sk_buff_head tmpq;
++	unsigned long flags;
++	unsigned int len;
++	int pages_flipped = 0;
++	int err;
++
++	spin_lock(&np->rx_lock); /* no need for spin_lock_bh() in ->poll() */
++
++	if (unlikely(!netfront_carrier_ok(np))) {
++		spin_unlock(&np->rx_lock);
++		return 0;
++	}
++
++	skb_queue_head_init(&rxq);
++	skb_queue_head_init(&errq);
++	skb_queue_head_init(&tmpq);
++
++	if ((budget = *pbudget) > dev->quota)
++		budget = dev->quota;
++	rp = np->rx.sring->rsp_prod;
++	rmb(); /* Ensure we see queued responses up to 'rp'. */
++
++	i = np->rx.rsp_cons;
++	work_done = 0;
++	while ((i != rp) && (work_done < budget)) {
++		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
++		memset(extras, 0, sizeof(rinfo.extras));
++
++		err = xennet_get_responses(np, &rinfo, rp, &tmpq,
++					   &pages_flipped);
++
++		if (unlikely(err)) {
++err:	
++			while ((skb = __skb_dequeue(&tmpq)))
++				__skb_queue_tail(&errq, skb);
++			np->stats.rx_errors++;
++			i = np->rx.rsp_cons;
++			continue;
++		}
++
++		skb = __skb_dequeue(&tmpq);
++
++		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++			struct netif_extra_info *gso;
++			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
++
++			if (unlikely(xennet_set_skb_gso(skb, gso))) {
++				__skb_queue_head(&tmpq, skb);
++				np->rx.rsp_cons += skb_queue_len(&tmpq);
++				goto err;
++			}
++		}
++
++		NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
++		NETFRONT_SKB_CB(skb)->offset = rx->offset;
++
++		len = rx->status;
++		if (len > RX_COPY_THRESHOLD)
++			len = RX_COPY_THRESHOLD;
++		skb_put(skb, len);
++
++		if (rx->status > len) {
++			skb_shinfo(skb)->frags[0].page_offset =
++				rx->offset + len;
++			skb_shinfo(skb)->frags[0].size = rx->status - len;
++			skb->data_len = rx->status - len;
++		} else {
++			skb_shinfo(skb)->frags[0].page = NULL;
++			skb_shinfo(skb)->nr_frags = 0;
++		}
++
++		i = xennet_fill_frags(np, skb, &tmpq);
++
++		/*
++		 * Truesize must approximates the size of true data plus
++		 * any supervisor overheads. Adding hypervisor overheads
++		 * has been shown to significantly reduce achievable
++		 * bandwidth with the default receive buffer size. It is
++		 * therefore not wise to account for it here.
++		 *
++		 * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
++		 * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
++		 * add the size of the data pulled in xennet_fill_frags().
++		 *
++		 * We also adjust for any unused space in the main data
++		 * area by subtracting (RX_COPY_THRESHOLD - len). This is
++		 * especially important with drivers which split incoming
++		 * packets into header and data, using only 66 bytes of
++		 * the main data area (see the e1000 driver for example.)
++		 * On such systems, without this last adjustement, our
++		 * achievable receive throughout using the standard receive
++		 * buffer size was cut by 25%(!!!).
++		 */
++		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
++		skb->len += skb->data_len;
++
++		/*
++		 * Old backends do not assert data_validated but we
++		 * can infer it from csum_blank so test both flags.
++		 */
++		if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
++			skb->ip_summed = CHECKSUM_UNNECESSARY;
++		else
++			skb->ip_summed = CHECKSUM_NONE;
++#ifdef CONFIG_XEN
++		skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
++		skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
++#endif
++		np->stats.rx_packets++;
++		np->stats.rx_bytes += skb->len;
++
++		__skb_queue_tail(&rxq, skb);
++
++		np->rx.rsp_cons = ++i;
++		work_done++;
++	}
++
++	if (pages_flipped) {
++		/* Some pages are no longer absent... */
++		balloon_update_driver_allowance(-pages_flipped);
++
++		/* Do all the remapping work and M2P updates. */
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			mcl = np->rx_mcl + pages_flipped;
++			mcl->op = __HYPERVISOR_mmu_update;
++			mcl->args[0] = (unsigned long)np->rx_mmu;
++			mcl->args[1] = pages_flipped;
++			mcl->args[2] = 0;
++			mcl->args[3] = DOMID_SELF;
++			(void)HYPERVISOR_multicall(np->rx_mcl,
++						   pages_flipped + 1);
++		}
++	}
++
++	while ((skb = __skb_dequeue(&errq)))
++		kfree_skb(skb);
++
++	while ((skb = __skb_dequeue(&rxq)) != NULL) {
++		struct page *page = NETFRONT_SKB_CB(skb)->page;
++		void *vaddr = page_address(page);
++		unsigned offset = NETFRONT_SKB_CB(skb)->offset;
++
++		memcpy(skb->data, vaddr + offset, skb_headlen(skb));
++
++		if (page != skb_shinfo(skb)->frags[0].page)
++			__free_page(page);
++
++		/* Ethernet work: Delayed to here as it peeks the header. */
++		skb->protocol = eth_type_trans(skb, dev);
++
++		/* Pass it up. */
++		netif_receive_skb(skb);
++		dev->last_rx = jiffies;
++	}
++
++	/* If we get a callback with very few responses, reduce fill target. */
++	/* NB. Note exponential increase, linear decrease. */
++	if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
++	     ((3*np->rx_target) / 4)) &&
++	    (--np->rx_target < np->rx_min_target))
++		np->rx_target = np->rx_min_target;
++
++	network_alloc_rx_buffers(dev);
++
++	*pbudget   -= work_done;
++	dev->quota -= work_done;
++
++	if (work_done < budget) {
++		local_irq_save(flags);
++
++		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
++		if (!more_to_do)
++			__netif_rx_complete(dev);
++
++		local_irq_restore(flags);
++	}
++
++	spin_unlock(&np->rx_lock);
++
++	return more_to_do;
++}
++
++static void netif_release_tx_bufs(struct netfront_info *np)
++{
++	struct sk_buff *skb;
++	int i;
++
++	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
++		if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
++			continue;
++
++		skb = np->tx_skbs[i];
++		gnttab_end_foreign_access_ref(
++			np->grant_tx_ref[i], GNTMAP_readonly);
++		gnttab_release_grant_reference(
++			&np->gref_tx_head, np->grant_tx_ref[i]);
++		np->grant_tx_ref[i] = GRANT_INVALID_REF;
++		add_id_to_freelist(np->tx_skbs, i);
++		dev_kfree_skb_irq(skb);
++	}
++}
++
++static void netif_release_rx_bufs(struct netfront_info *np)
++{
++	struct mmu_update      *mmu = np->rx_mmu;
++	struct multicall_entry *mcl = np->rx_mcl;
++	struct sk_buff_head free_list;
++	struct sk_buff *skb;
++	unsigned long mfn;
++	int xfer = 0, noxfer = 0, unused = 0;
++	int id, ref, rc;
++
++	if (np->copying_receiver) {
++		WPRINTK("%s: fix me for copying receiver.\n", __FUNCTION__);
++		return;
++	}
++
++	skb_queue_head_init(&free_list);
++
++	spin_lock_bh(&np->rx_lock);
++
++	for (id = 0; id < NET_RX_RING_SIZE; id++) {
++		if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
++			unused++;
++			continue;
++		}
++
++		skb = np->rx_skbs[id];
++		mfn = gnttab_end_foreign_transfer_ref(ref);
++		gnttab_release_grant_reference(&np->gref_rx_head, ref);
++		np->grant_rx_ref[id] = GRANT_INVALID_REF;
++		add_id_to_freelist(np->rx_skbs, id);
++
++		if (0 == mfn) {
++			struct page *page = skb_shinfo(skb)->frags[0].page;
++			balloon_release_driver_page(page);
++			skb_shinfo(skb)->nr_frags = 0;
++			dev_kfree_skb(skb);
++			noxfer++;
++			continue;
++		}
++
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			/* Remap the page. */
++			struct page *page = skb_shinfo(skb)->frags[0].page;
++			unsigned long pfn = page_to_pfn(page);
++			void *vaddr = page_address(page);
++
++			MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
++						pfn_pte_ma(mfn, PAGE_KERNEL),
++						0);
++			mcl++;
++			mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
++				| MMU_MACHPHYS_UPDATE;
++			mmu->val = pfn;
++			mmu++;
++
++			set_phys_to_machine(pfn, mfn);
++		}
++		__skb_queue_tail(&free_list, skb);
++		xfer++;
++	}
++
++	IPRINTK("%s: %d xfer, %d noxfer, %d unused\n",
++		__FUNCTION__, xfer, noxfer, unused);
++
++	if (xfer) {
++		/* Some pages are no longer absent... */
++		balloon_update_driver_allowance(-xfer);
++
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			/* Do all the remapping work and M2P updates. */
++			mcl->op = __HYPERVISOR_mmu_update;
++			mcl->args[0] = (unsigned long)np->rx_mmu;
++			mcl->args[1] = mmu - np->rx_mmu;
++			mcl->args[2] = 0;
++			mcl->args[3] = DOMID_SELF;
++			mcl++;
++			rc = HYPERVISOR_multicall_check(
++				np->rx_mcl, mcl - np->rx_mcl, NULL);
++			BUG_ON(rc);
++		}
++	}
++
++	while ((skb = __skb_dequeue(&free_list)) != NULL)
++		dev_kfree_skb(skb);
++
++	spin_unlock_bh(&np->rx_lock);
++}
++
++static int network_close(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	netif_stop_queue(np->netdev);
++	return 0;
++}
++
++
++static struct net_device_stats *network_get_stats(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	return &np->stats;
++}
++
++static int xennet_change_mtu(struct net_device *dev, int mtu)
++{
++	int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
++
++	if (mtu > max)
++		return -EINVAL;
++	dev->mtu = mtu;
++	return 0;
++}
++
++static int xennet_set_sg(struct net_device *dev, u32 data)
++{
++	if (data) {
++		struct netfront_info *np = netdev_priv(dev);
++		int val;
++
++		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
++				 "%d", &val) < 0)
++			val = 0;
++		if (!val)
++			return -ENOSYS;
++	} else if (dev->mtu > ETH_DATA_LEN)
++		dev->mtu = ETH_DATA_LEN;
++
++	return ethtool_op_set_sg(dev, data);
++}
++
++static int xennet_set_tso(struct net_device *dev, u32 data)
++{
++#ifdef HAVE_TSO
++	if (data) {
++		struct netfront_info *np = netdev_priv(dev);
++		int val;
++
++		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++				 "feature-gso-tcpv4", "%d", &val) < 0)
++			val = 0;
++		if (!val)
++			return -ENOSYS;
++	}
++
++	return ethtool_op_set_tso(dev, data);
++#else
++	return -ENOSYS;
++#endif
++}
++
++static void xennet_set_features(struct net_device *dev)
++{
++	dev_disable_gso_features(dev);
++	xennet_set_sg(dev, 0);
++
++	/* We need checksum offload to enable scatter/gather and TSO. */
++	if (!(dev->features & NETIF_F_IP_CSUM))
++		return;
++
++	if (xennet_set_sg(dev, 1))
++		return;
++
++	/* Before 2.6.9 TSO seems to be unreliable so do not enable it
++	 * on older kernels.
++	 */
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
++	xennet_set_tso(dev, 1);
++#endif
++
++}
++
++static int network_connect(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	int i, requeue_idx, err;
++	struct sk_buff *skb;
++	grant_ref_t ref;
++	netif_rx_request_t *req;
++	unsigned int feature_rx_copy, feature_rx_flip;
++
++	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++			   "feature-rx-copy", "%u", &feature_rx_copy);
++	if (err != 1)
++		feature_rx_copy = 0;
++	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++			   "feature-rx-flip", "%u", &feature_rx_flip);
++	if (err != 1)
++		feature_rx_flip = 1;
++
++	/*
++	 * Copy packets on receive path if:
++	 *  (a) This was requested by user, and the backend supports it; or
++	 *  (b) Flipping was requested, but this is unsupported by the backend.
++	 */
++	np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
++				(MODPARM_rx_flip && !feature_rx_flip));
++
++	err = talk_to_backend(np->xbdev, np);
++	if (err)
++		return err;
++
++	xennet_set_features(dev);
++
++	IPRINTK("device %s has %sing receive path.\n",
++		dev->name, np->copying_receiver ? "copy" : "flipp");
++
++	spin_lock_bh(&np->rx_lock);
++	spin_lock_irq(&np->tx_lock);
++
++	/*
++	 * Recovery procedure:
++	 *  NB. Freelist index entries are always going to be less than
++	 *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
++	 *  greater than PAGE_OFFSET: we use this property to distinguish
++	 *  them.
++	 */
++
++	/* Step 1: Discard all pending TX packet fragments. */
++	netif_release_tx_bufs(np);
++
++	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
++	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
++		if (!np->rx_skbs[i])
++			continue;
++
++		skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
++		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
++		req = RING_GET_REQUEST(&np->rx, requeue_idx);
++
++		if (!np->copying_receiver) {
++			gnttab_grant_foreign_transfer_ref(
++				ref, np->xbdev->otherend_id,
++				page_to_pfn(skb_shinfo(skb)->frags->page));
++		} else {
++			gnttab_grant_foreign_access_ref(
++				ref, np->xbdev->otherend_id,
++				pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
++						       frags->page)),
++				0);
++		}
++		req->gref = ref;
++		req->id   = requeue_idx;
++
++		requeue_idx++;
++	}
++
++	np->rx.req_prod_pvt = requeue_idx;
++
++	/*
++	 * Step 3: All public and private state should now be sane.  Get
++	 * ready to start sending and receiving packets and give the driver
++	 * domain a kick because we've probably just requeued some
++	 * packets.
++	 */
++	netfront_carrier_on(np);
++	notify_remote_via_irq(np->irq);
++	network_tx_buf_gc(dev);
++	network_alloc_rx_buffers(dev);
++
++	spin_unlock_irq(&np->tx_lock);
++	spin_unlock_bh(&np->rx_lock);
++
++	return 0;
++}
++
++static void netif_uninit(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	netif_release_tx_bufs(np);
++	netif_release_rx_bufs(np);
++	gnttab_free_grant_references(np->gref_tx_head);
++	gnttab_free_grant_references(np->gref_rx_head);
++}
++
++static struct ethtool_ops network_ethtool_ops =
++{
++	.get_tx_csum = ethtool_op_get_tx_csum,
++	.set_tx_csum = ethtool_op_set_tx_csum,
++	.get_sg = ethtool_op_get_sg,
++	.set_sg = xennet_set_sg,
++	.get_tso = ethtool_op_get_tso,
++	.set_tso = xennet_set_tso,
++	.get_link = ethtool_op_get_link,
++};
++
++#ifdef CONFIG_SYSFS
++static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
++{
++	struct net_device *netdev = container_of(cd, struct net_device,
++						 class_dev);
++	struct netfront_info *info = netdev_priv(netdev);
++
++	return sprintf(buf, "%u\n", info->rx_min_target);
++}
++
++static ssize_t store_rxbuf_min(struct class_device *cd,
++			       const char *buf, size_t len)
++{
++	struct net_device *netdev = container_of(cd, struct net_device,
++						 class_dev);
++	struct netfront_info *np = netdev_priv(netdev);
++	char *endp;
++	unsigned long target;
++
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
++	target = simple_strtoul(buf, &endp, 0);
++	if (endp == buf)
++		return -EBADMSG;
++
++	if (target < RX_MIN_TARGET)
++		target = RX_MIN_TARGET;
++	if (target > RX_MAX_TARGET)
++		target = RX_MAX_TARGET;
++
++	spin_lock_bh(&np->rx_lock);
++	if (target > np->rx_max_target)
++		np->rx_max_target = target;
++	np->rx_min_target = target;
++	if (target > np->rx_target)
++		np->rx_target = target;
++
++	network_alloc_rx_buffers(netdev);
++
++	spin_unlock_bh(&np->rx_lock);
++	return len;
++}
++
++static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
++{
++	struct net_device *netdev = container_of(cd, struct net_device,
++						 class_dev);
++	struct netfront_info *info = netdev_priv(netdev);
++
++	return sprintf(buf, "%u\n", info->rx_max_target);
++}
++
++static ssize_t store_rxbuf_max(struct class_device *cd,
++			       const char *buf, size_t len)
++{
++	struct net_device *netdev = container_of(cd, struct net_device,
++						 class_dev);
++	struct netfront_info *np = netdev_priv(netdev);
++	char *endp;
++	unsigned long target;
++
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
++	target = simple_strtoul(buf, &endp, 0);
++	if (endp == buf)
++		return -EBADMSG;
++
++	if (target < RX_MIN_TARGET)
++		target = RX_MIN_TARGET;
++	if (target > RX_MAX_TARGET)
++		target = RX_MAX_TARGET;
++
++	spin_lock_bh(&np->rx_lock);
++	if (target < np->rx_min_target)
++		np->rx_min_target = target;
++	np->rx_max_target = target;
++	if (target < np->rx_target)
++		np->rx_target = target;
++
++	network_alloc_rx_buffers(netdev);
++
++	spin_unlock_bh(&np->rx_lock);
++	return len;
++}
++
++static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
++{
++	struct net_device *netdev = container_of(cd, struct net_device,
++						 class_dev);
++	struct netfront_info *info = netdev_priv(netdev);
++
++	return sprintf(buf, "%u\n", info->rx_target);
++}
++
++static const struct class_device_attribute xennet_attrs[] = {
++	__ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
++	__ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
++	__ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
++};
++
++static int xennet_sysfs_addif(struct net_device *netdev)
++{
++	int i;
++	int error = 0;
++
++	for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
++		error = class_device_create_file(&netdev->class_dev, 
++						 &xennet_attrs[i]);
++		if (error)
++			goto fail;
++	}
++	return 0;
++
++ fail:
++	while (--i >= 0)
++		class_device_remove_file(&netdev->class_dev,
++					 &xennet_attrs[i]);
++	return error;
++}
++
++static void xennet_sysfs_delif(struct net_device *netdev)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
++		class_device_remove_file(&netdev->class_dev,
++					 &xennet_attrs[i]);
++	}
++}
++
++#endif /* CONFIG_SYSFS */
++
++
++/*
++ * Nothing to do here. Virtual interface is point-to-point and the
++ * physical interface is probably promiscuous anyway.
++ */
++static void network_set_multicast_list(struct net_device *dev)
++{
++}
++
++static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
++{
++	int i, err = 0;
++	struct net_device *netdev = NULL;
++	struct netfront_info *np = NULL;
++
++	netdev = alloc_etherdev(sizeof(struct netfront_info));
++	if (!netdev) {
++		printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
++		       __FUNCTION__);
++		return ERR_PTR(-ENOMEM);
++	}
++
++	np                   = netdev_priv(netdev);
++	np->xbdev            = dev;
++
++	spin_lock_init(&np->tx_lock);
++	spin_lock_init(&np->rx_lock);
++
++	skb_queue_head_init(&np->rx_batch);
++	np->rx_target     = RX_DFL_MIN_TARGET;
++	np->rx_min_target = RX_DFL_MIN_TARGET;
++	np->rx_max_target = RX_MAX_TARGET;
++
++	init_timer(&np->rx_refill_timer);
++	np->rx_refill_timer.data = (unsigned long)netdev;
++	np->rx_refill_timer.function = rx_refill_timeout;
++
++	/* Initialise {tx,rx}_skbs as a free chain containing every entry. */
++	for (i = 0; i <= NET_TX_RING_SIZE; i++) {
++		np->tx_skbs[i] = (void *)((unsigned long) i+1);
++		np->grant_tx_ref[i] = GRANT_INVALID_REF;
++	}
++
++	for (i = 0; i < NET_RX_RING_SIZE; i++) {
++		np->rx_skbs[i] = NULL;
++		np->grant_rx_ref[i] = GRANT_INVALID_REF;
++	}
++
++	/* A grant for every tx ring slot */
++	if (gnttab_alloc_grant_references(TX_MAX_TARGET,
++					  &np->gref_tx_head) < 0) {
++		printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
++		err = -ENOMEM;
++		goto exit;
++	}
++	/* A grant for every rx ring slot */
++	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
++					  &np->gref_rx_head) < 0) {
++		printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
++		err = -ENOMEM;
++		goto exit_free_tx;
++	}
++
++	netdev->open            = network_open;
++	netdev->hard_start_xmit = network_start_xmit;
++	netdev->stop            = network_close;
++	netdev->get_stats       = network_get_stats;
++	netdev->poll            = netif_poll;
++	netdev->set_multicast_list = network_set_multicast_list;
++	netdev->uninit          = netif_uninit;
++	netdev->change_mtu	= xennet_change_mtu;
++	netdev->weight          = 64;
++	netdev->features        = NETIF_F_IP_CSUM;
++
++	SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
++	SET_MODULE_OWNER(netdev);
++	SET_NETDEV_DEV(netdev, &dev->dev);
++
++	np->netdev = netdev;
++
++	netfront_carrier_off(np);
++
++	return netdev;
++
++ exit_free_tx:
++	gnttab_free_grant_references(np->gref_tx_head);
++ exit:
++	free_netdev(netdev);
++	return ERR_PTR(err);
++}
++
++/*
++ * We use this notifier to send out a fake ARP reply to reset switches and
++ * router ARP caches when an IP interface is brought up on a VIF.
++ */
++static int
++inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
++{
++	struct in_ifaddr  *ifa = (struct in_ifaddr *)ptr;
++	struct net_device *dev = ifa->ifa_dev->dev;
++
++	/* UP event and is it one of our devices? */
++	if (event == NETDEV_UP && dev->open == network_open)
++		(void)send_fake_arp(dev);
++
++	return NOTIFY_DONE;
++}
++
++
++static void netif_disconnect_backend(struct netfront_info *info)
++{
++	/* Stop old i/f to prevent errors whilst we rebuild the state. */
++	spin_lock_bh(&info->rx_lock);
++	spin_lock_irq(&info->tx_lock);
++	netfront_carrier_off(info);
++	spin_unlock_irq(&info->tx_lock);
++	spin_unlock_bh(&info->rx_lock);
++
++	if (info->irq)
++		unbind_from_irqhandler(info->irq, info->netdev);
++	info->irq = 0;
++
++	end_access(info->tx_ring_ref, info->tx.sring);
++	end_access(info->rx_ring_ref, info->rx.sring);
++	info->tx_ring_ref = GRANT_INVALID_REF;
++	info->rx_ring_ref = GRANT_INVALID_REF;
++	info->tx.sring = NULL;
++	info->rx.sring = NULL;
++}
++
++
++static void end_access(int ref, void *page)
++{
++	if (ref != GRANT_INVALID_REF)
++		gnttab_end_foreign_access(ref, 0, (unsigned long)page);
++}
++
++
++/* ** Driver registration ** */
++
++
++static struct xenbus_device_id netfront_ids[] = {
++	{ "vif" },
++	{ "" }
++};
++
++
++static struct xenbus_driver netfront = {
++	.name = "vif",
++	.owner = THIS_MODULE,
++	.ids = netfront_ids,
++	.probe = netfront_probe,
++	.remove = __devexit_p(netfront_remove),
++	.resume = netfront_resume,
++	.otherend_changed = backend_changed,
++};
++
++
++static struct notifier_block notifier_inetdev = {
++	.notifier_call  = inetdev_notify,
++	.next           = NULL,
++	.priority       = 0
++};
++
++static int __init netif_init(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++#ifdef CONFIG_XEN
++	if (MODPARM_rx_flip && MODPARM_rx_copy) {
++		WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
++		return -EINVAL;
++	}
++
++	if (!MODPARM_rx_flip && !MODPARM_rx_copy)
++		MODPARM_rx_flip = 1; /* Default is to flip. */
++#endif
++
++	if (is_initial_xendomain())
++		return 0;
++
++	IPRINTK("Initialising virtual ethernet driver.\n");
++
++	(void)register_inetaddr_notifier(&notifier_inetdev);
++
++	return xenbus_register_frontend(&netfront);
++}
++module_init(netif_init);
++
++
++static void __exit netif_exit(void)
++{
++	if (is_initial_xendomain())
++		return;
++
++	unregister_inetaddr_notifier(&notifier_inetdev);
++
++	return xenbus_unregister_driver(&netfront);
++}
++module_exit(netif_exit);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,15 @@
++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
++
++pciback-y := pci_stub.o pciback_ops.o xenbus.o
++pciback-y += conf_space.o conf_space_header.o \
++	     conf_space_capability.o \
++	     conf_space_capability_vpd.o \
++	     conf_space_capability_pm.o \
++             conf_space_quirks.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
++
++ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
++EXTRA_CFLAGS += -DDEBUG
++endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,426 @@
++/*
++ * PCI Backend - Functions for creating a virtual configuration space for
++ *               exported PCI Devices.
++ *               It's dangerous to allow PCI Driver Domains to change their
++ *               device's resources (memory, i/o ports, interrupts). We need to
++ *               restrict changes to certain PCI Configuration registers:
++ *               BARs, INTERRUPT_PIN, most registers in the header...
++ *
++ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++#define DEFINE_PCI_CONFIG(op,size,type) 			\
++int pciback_##op##_config_##size 				\
++(struct pci_dev *dev, int offset, type value, void *data)	\
++{								\
++	return pci_##op##_config_##size (dev, offset, value);	\
++}
++
++DEFINE_PCI_CONFIG(read, byte, u8 *)
++DEFINE_PCI_CONFIG(read, word, u16 *)
++DEFINE_PCI_CONFIG(read, dword, u32 *)
++
++DEFINE_PCI_CONFIG(write, byte, u8)
++DEFINE_PCI_CONFIG(write, word, u16)
++DEFINE_PCI_CONFIG(write, dword, u32)
++
++static int conf_space_read(struct pci_dev *dev,
++			   struct config_field_entry *entry, int offset,
++			   u32 * value)
++{
++	int ret = 0;
++	struct config_field *field = entry->field;
++
++	*value = 0;
++
++	switch (field->size) {
++	case 1:
++		if (field->u.b.read)
++			ret = field->u.b.read(dev, offset, (u8 *) value,
++					      entry->data);
++		break;
++	case 2:
++		if (field->u.w.read)
++			ret = field->u.w.read(dev, offset, (u16 *) value,
++					      entry->data);
++		break;
++	case 4:
++		if (field->u.dw.read)
++			ret = field->u.dw.read(dev, offset, value, entry->data);
++		break;
++	}
++	return ret;
++}
++
++static int conf_space_write(struct pci_dev *dev,
++			    struct config_field_entry *entry, int offset,
++			    u32 value)
++{
++	int ret = 0;
++	struct config_field *field = entry->field;
++
++	switch (field->size) {
++	case 1:
++		if (field->u.b.write)
++			ret = field->u.b.write(dev, offset, (u8) value,
++					       entry->data);
++		break;
++	case 2:
++		if (field->u.w.write)
++			ret = field->u.w.write(dev, offset, (u16) value,
++					       entry->data);
++		break;
++	case 4:
++		if (field->u.dw.write)
++			ret = field->u.dw.write(dev, offset, value,
++						entry->data);
++		break;
++	}
++	return ret;
++}
++
++static inline u32 get_mask(int size)
++{
++	if (size == 1)
++		return 0xff;
++	else if (size == 2)
++		return 0xffff;
++	else
++		return 0xffffffff;
++}
++
++static inline int valid_request(int offset, int size)
++{
++	/* Validate request (no un-aligned requests) */
++	if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
++		return 1;
++	return 0;
++}
++
++static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
++			      int offset)
++{
++	if (offset >= 0) {
++		new_val_mask <<= (offset * 8);
++		new_val <<= (offset * 8);
++	} else {
++		new_val_mask >>= (offset * -8);
++		new_val >>= (offset * -8);
++	}
++	val = (val & ~new_val_mask) | (new_val & new_val_mask);
++
++	return val;
++}
++
++static int pcibios_err_to_errno(int err)
++{
++	switch (err) {
++	case PCIBIOS_SUCCESSFUL:
++		return XEN_PCI_ERR_success;
++	case PCIBIOS_DEVICE_NOT_FOUND:
++		return XEN_PCI_ERR_dev_not_found;
++	case PCIBIOS_BAD_REGISTER_NUMBER:
++		return XEN_PCI_ERR_invalid_offset;
++	case PCIBIOS_FUNC_NOT_SUPPORTED:
++		return XEN_PCI_ERR_not_implemented;
++	case PCIBIOS_SET_FAILED:
++		return XEN_PCI_ERR_access_denied;
++	}
++	return err;
++}
++
++int pciback_config_read(struct pci_dev *dev, int offset, int size,
++			u32 * ret_val)
++{
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
++	struct config_field *field;
++	int req_start, req_end, field_start, field_end;
++	/* if read fails for any reason, return 0 (as if device didn't respond) */
++	u32 value = 0, tmp_val;
++
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
++		       pci_name(dev), size, offset);
++
++	if (!valid_request(offset, size)) {
++		err = XEN_PCI_ERR_invalid_offset;
++		goto out;
++	}
++
++	/* Get the real value first, then modify as appropriate */
++	switch (size) {
++	case 1:
++		err = pci_read_config_byte(dev, offset, (u8 *) & value);
++		break;
++	case 2:
++		err = pci_read_config_word(dev, offset, (u16 *) & value);
++		break;
++	case 4:
++		err = pci_read_config_dword(dev, offset, &value);
++		break;
++	}
++
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		req_start = offset;
++		req_end = offset + size;
++		field_start = OFFSET(cfg_entry);
++		field_end = OFFSET(cfg_entry) + field->size;
++
++		if ((req_start >= field_start && req_start < field_end)
++		    || (req_end > field_start && req_end <= field_end)) {
++			err = conf_space_read(dev, cfg_entry, field_start,
++					      &tmp_val);
++			if (err)
++				goto out;
++
++			value = merge_value(value, tmp_val,
++					    get_mask(field->size),
++					    field_start - req_start);
++		}
++	}
++
++      out:
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
++		       pci_name(dev), size, offset, value);
++
++	*ret_val = value;
++	return pcibios_err_to_errno(err);
++}
++
++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
++{
++	int err = 0, handled = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
++	struct config_field *field;
++	u32 tmp_val;
++	int req_start, req_end, field_start, field_end;
++
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG
++		       "pciback: %s: write request %d bytes at 0x%x = %x\n",
++		       pci_name(dev), size, offset, value);
++
++	if (!valid_request(offset, size))
++		return XEN_PCI_ERR_invalid_offset;
++
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		req_start = offset;
++		req_end = offset + size;
++		field_start = OFFSET(cfg_entry);
++		field_end = OFFSET(cfg_entry) + field->size;
++
++		if ((req_start >= field_start && req_start < field_end)
++		    || (req_end > field_start && req_end <= field_end)) {
++			tmp_val = 0;
++
++			err = pciback_config_read(dev, field_start,
++						  field->size, &tmp_val);
++			if (err)
++				break;
++
++			tmp_val = merge_value(tmp_val, value, get_mask(size),
++					      req_start - field_start);
++
++			err = conf_space_write(dev, cfg_entry, field_start,
++					       tmp_val);
++
++			/* handled is set true here, but not every byte
++			 * may have been written! Properly detecting if
++			 * every byte is handled is unnecessary as the
++			 * flag is used to detect devices that need
++			 * special helpers to work correctly.
++			 */
++			handled = 1;
++		}
++	}
++
++	if (!handled && !err) {
++		/* By default, anything not specificially handled above is
++		 * read-only. The permissive flag changes this behavior so
++		 * that anything not specifically handled above is writable.
++		 * This means that some fields may still be read-only because
++		 * they have entries in the config_field list that intercept
++		 * the write and do nothing. */
++		if (dev_data->permissive) {
++			switch (size) {
++			case 1:
++				err = pci_write_config_byte(dev, offset,
++							    (u8) value);
++				break;
++			case 2:
++				err = pci_write_config_word(dev, offset,
++							    (u16) value);
++				break;
++			case 4:
++				err = pci_write_config_dword(dev, offset,
++							     (u32) value);
++				break;
++			}
++		} else if (!dev_data->warned_on_write) {
++			dev_data->warned_on_write = 1;
++			dev_warn(&dev->dev, "Driver tried to write to a "
++				 "read-only configuration space field at offset "
++				 "0x%x, size %d. This may be harmless, but if "
++				 "you have problems with your device:\n"
++				 "1) see permissive attribute in sysfs\n"
++				 "2) report problems to the xen-devel "
++				 "mailing list along with details of your "
++				 "device obtained from lspci.\n", offset, size);
++		}
++	}
++
++	return pcibios_err_to_errno(err);
++}
++
++void pciback_config_free_dyn_fields(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry, *t;
++	struct config_field *field;
++
++	dev_dbg(&dev->dev,
++		"free-ing dynamically allocated virtual configuration space fields\n");
++
++	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		if (field->clean) {
++			field->clean(field);
++
++			if (cfg_entry->data)
++				kfree(cfg_entry->data);
++
++			list_del(&cfg_entry->list);
++			kfree(cfg_entry);
++		}
++
++	}
++}
++
++void pciback_config_reset_dev(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
++	struct config_field *field;
++
++	dev_dbg(&dev->dev, "resetting virtual configuration space\n");
++
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		if (field->reset)
++			field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
++	}
++}
++
++void pciback_config_free_dev(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry, *t;
++	struct config_field *field;
++
++	dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
++
++	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
++		list_del(&cfg_entry->list);
++
++		field = cfg_entry->field;
++
++		if (field->release)
++			field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
++
++		kfree(cfg_entry);
++	}
++}
++
++int pciback_config_add_field_offset(struct pci_dev *dev,
++				    struct config_field *field,
++				    unsigned int base_offset)
++{
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
++	void *tmp;
++
++	cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
++	if (!cfg_entry) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	cfg_entry->data = NULL;
++	cfg_entry->field = field;
++	cfg_entry->base_offset = base_offset;
++
++	/* silently ignore duplicate fields */
++	err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
++	if (err)
++		goto out;
++
++	if (field->init) {
++		tmp = field->init(dev, OFFSET(cfg_entry));
++
++		if (IS_ERR(tmp)) {
++			err = PTR_ERR(tmp);
++			goto out;
++		}
++
++		cfg_entry->data = tmp;
++	}
++
++	dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
++		OFFSET(cfg_entry));
++	list_add_tail(&cfg_entry->list, &dev_data->config_fields);
++
++      out:
++	if (err)
++		kfree(cfg_entry);
++
++	return err;
++}
++
++/* This sets up the device's virtual configuration space to keep track of 
++ * certain registers (like the base address registers (BARs) so that we can
++ * keep the client from manipulating them directly.
++ */
++int pciback_config_init_dev(struct pci_dev *dev)
++{
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++
++	dev_dbg(&dev->dev, "initializing virtual configuration space\n");
++
++	INIT_LIST_HEAD(&dev_data->config_fields);
++
++	err = pciback_config_header_add_fields(dev);
++	if (err)
++		goto out;
++
++	err = pciback_config_capability_add_fields(dev);
++	if (err)
++		goto out;
++
++	err = pciback_config_quirks_init(dev);
++
++      out:
++	return err;
++}
++
++int pciback_config_init(void)
++{
++	return pciback_config_capability_init();
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space.h	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend - Common data structures for overriding the configuration space
++ *
++ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#ifndef __XEN_PCIBACK_CONF_SPACE_H__
++#define __XEN_PCIBACK_CONF_SPACE_H__
++
++#include <linux/list.h>
++#include <linux/err.h>
++
++/* conf_field_init can return an errno in a ptr with ERR_PTR() */
++typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
++typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
++typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
++
++typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
++				 void *data);
++typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
++				void *data);
++typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
++				void *data);
++typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
++				void *data);
++typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
++			       void *data);
++typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
++			       void *data);
++
++/* These are the fields within the configuration space which we
++ * are interested in intercepting reads/writes to and changing their
++ * values.
++ */
++struct config_field {
++	unsigned int offset;
++	unsigned int size;
++	unsigned int mask;
++	conf_field_init init;
++	conf_field_reset reset;
++	conf_field_free release;
++	void (*clean) (struct config_field * field);
++	union {
++		struct {
++			conf_dword_write write;
++			conf_dword_read read;
++		} dw;
++		struct {
++			conf_word_write write;
++			conf_word_read read;
++		} w;
++		struct {
++			conf_byte_write write;
++			conf_byte_read read;
++		} b;
++	} u;
++	struct list_head list;
++};
++
++struct config_field_entry {
++	struct list_head list;
++	struct config_field *field;
++	unsigned int base_offset;
++	void *data;
++};
++
++#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
++
++/* Add fields to a device - the add_fields macro expects to get a pointer to
++ * the first entry in an array (of which the ending is marked by size==0)
++ */
++int pciback_config_add_field_offset(struct pci_dev *dev,
++				    struct config_field *field,
++				    unsigned int offset);
++
++static inline int pciback_config_add_field(struct pci_dev *dev,
++					   struct config_field *field)
++{
++	return pciback_config_add_field_offset(dev, field, 0);
++}
++
++static inline int pciback_config_add_fields(struct pci_dev *dev,
++					    struct config_field *field)
++{
++	int i, err = 0;
++	for (i = 0; field[i].size != 0; i++) {
++		err = pciback_config_add_field(dev, &field[i]);
++		if (err)
++			break;
++	}
++	return err;
++}
++
++static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
++						   struct config_field *field,
++						   unsigned int offset)
++{
++	int i, err = 0;
++	for (i = 0; field[i].size != 0; i++) {
++		err = pciback_config_add_field_offset(dev, &field[i], offset);
++		if (err)
++			break;
++	}
++	return err;
++}
++
++/* Read/Write the real configuration space */
++int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
++			     void *data);
++int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
++			     void *data);
++int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
++			      void *data);
++int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
++			      void *data);
++int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
++			      void *data);
++int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
++			       void *data);
++
++int pciback_config_capability_init(void);
++
++int pciback_config_header_add_fields(struct pci_dev *dev);
++int pciback_config_capability_add_fields(struct pci_dev *dev);
++
++#endif				/* __XEN_PCIBACK_CONF_SPACE_H__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space_capability.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,71 @@
++/*
++ * PCI Backend - Handles the virtual fields found on the capability lists
++ *               in the configuration space.
++ *
++ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static LIST_HEAD(capabilities);
++
++static struct config_field caplist_header[] = {
++	{
++	 .offset    = PCI_CAP_LIST_ID,
++	 .size      = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
++	 .u.w.read  = pciback_read_config_word,
++	 .u.w.write = NULL,
++	},
++	{
++	 .size = 0,
++	},
++};
++
++static inline void register_capability(struct pciback_config_capability *cap)
++{
++	list_add_tail(&cap->cap_list, &capabilities);
++}
++
++int pciback_config_capability_add_fields(struct pci_dev *dev)
++{
++	int err = 0;
++	struct pciback_config_capability *cap;
++	int cap_offset;
++
++	list_for_each_entry(cap, &capabilities, cap_list) {
++		cap_offset = pci_find_capability(dev, cap->capability);
++		if (cap_offset) {
++			dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
++				cap->capability, cap_offset);
++
++			err = pciback_config_add_fields_offset(dev,
++							       caplist_header,
++							       cap_offset);
++			if (err)
++				goto out;
++			err = pciback_config_add_fields_offset(dev,
++							       cap->fields,
++							       cap_offset);
++			if (err)
++				goto out;
++		}
++	}
++
++      out:
++	return err;
++}
++
++extern struct pciback_config_capability pciback_config_capability_vpd;
++extern struct pciback_config_capability pciback_config_capability_pm;
++
++int pciback_config_capability_init(void)
++{
++	register_capability(&pciback_config_capability_vpd);
++	register_capability(&pciback_config_capability_pm);
++
++	return 0;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space_capability.h	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,23 @@
++/*
++ * PCI Backend - Data structures for special overlays for structures on
++ *               the capability list.
++ *
++ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
++#define __PCIBACK_CONFIG_CAPABILITY_H__
++
++#include <linux/pci.h>
++#include <linux/list.h>
++
++struct pciback_config_capability {
++	struct list_head cap_list;
++
++	int capability;
++
++	/* If the device has the capability found above, add these fields */
++	struct config_field *fields;
++};
++
++#endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space_capability_pm.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,128 @@
++/*
++ * PCI Backend - Configuration space overlay for power management
++ *
++ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#include <linux/pci.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
++			void *data)
++{
++	int err;
++	u16 real_value;
++
++	err = pci_read_config_word(dev, offset, &real_value);
++	if (err)
++		goto out;
++
++	*value = real_value & ~PCI_PM_CAP_PME_MASK;
++
++      out:
++	return err;
++}
++
++/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
++ * Can't allow driver domain to enable PMEs - they're shared */
++#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
++
++static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
++			 void *data)
++{
++	int err;
++	u16 old_value;
++	pci_power_t new_state, old_state;
++
++	err = pci_read_config_word(dev, offset, &old_value);
++	if (err)
++		goto out;
++
++	old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
++	new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
++
++	new_value &= PM_OK_BITS;
++	if ((old_value & PM_OK_BITS) != new_value) {
++		new_value = (old_value & ~PM_OK_BITS) | new_value;
++		err = pci_write_config_word(dev, offset, new_value);
++		if (err)
++			goto out;
++	}
++
++	/* Let pci core handle the power management change */
++	dev_dbg(&dev->dev, "set power state to %x\n", new_state);
++	err = pci_set_power_state(dev, new_state);
++	if (err) {
++		err = PCIBIOS_SET_FAILED;
++		goto out;
++	}
++
++	/*
++	 * Device may lose PCI config info on D3->D0 transition. This
++	 * is a problem for some guests which will not reset BARs. Even
++	 * those that have a go will be foiled by our BAR-write handler
++	 * which will discard the write! Since Linux won't re-init
++	 * the config space automatically in all cases, we do it here.
++	 * Future: Should we re-initialise all first 64 bytes of config space?
++	 */
++	if (new_state == PCI_D0 &&
++	    (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
++	    !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
++		pci_restore_bars(dev);
++
++ out:
++	return err;
++}
++
++/* Ensure PMEs are disabled */
++static void *pm_ctrl_init(struct pci_dev *dev, int offset)
++{
++	int err;
++	u16 value;
++
++	err = pci_read_config_word(dev, offset, &value);
++	if (err)
++		goto out;
++
++	if (value & PCI_PM_CTRL_PME_ENABLE) {
++		value &= ~PCI_PM_CTRL_PME_ENABLE;
++		err = pci_write_config_word(dev, offset, value);
++	}
++
++      out:
++	return ERR_PTR(err);
++}
++
++static struct config_field caplist_pm[] = {
++	{
++		.offset     = PCI_PM_PMC,
++		.size       = 2,
++		.u.w.read   = pm_caps_read,
++	},
++	{
++		.offset     = PCI_PM_CTRL,
++		.size       = 2,
++		.init       = pm_ctrl_init,
++		.u.w.read   = pciback_read_config_word,
++		.u.w.write  = pm_ctrl_write,
++	},
++	{
++		.offset     = PCI_PM_PPB_EXTENSIONS,
++		.size       = 1,
++		.u.b.read   = pciback_read_config_byte,
++	},
++	{
++		.offset     = PCI_PM_DATA_REGISTER,
++		.size       = 1,
++		.u.b.read   = pciback_read_config_byte,
++	},
++	{
++		.size = 0,
++	},
++};
++
++struct pciback_config_capability pciback_config_capability_pm = {
++	.capability = PCI_CAP_ID_PM,
++	.fields = caplist_pm,
++};
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space_capability_vpd.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,42 @@
++/*
++ * PCI Backend - Configuration space overlay for Vital Product Data
++ *
++ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#include <linux/pci.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
++			     void *data)
++{
++	/* Disallow writes to the vital product data */
++	if (value & PCI_VPD_ADDR_F)
++		return PCIBIOS_SET_FAILED;
++	else
++		return pci_write_config_word(dev, offset, value);
++}
++
++static struct config_field caplist_vpd[] = {
++	{
++	 .offset    = PCI_VPD_ADDR,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_config_word,
++	 .u.w.write = vpd_address_write,
++	 },
++	{
++	 .offset     = PCI_VPD_DATA,
++	 .size       = 4,
++	 .u.dw.read  = pciback_read_config_dword,
++	 .u.dw.write = NULL,
++	 },
++	{
++	 .size = 0,
++	 },
++};
++ 
++struct pciback_config_capability pciback_config_capability_vpd = {
++	.capability = PCI_CAP_ID_VPD,
++	.fields = caplist_vpd,
++};
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space_header.c	2007-08-27 14:02:01.000000000 -0400
+@@ -0,0 +1,309 @@
++/*
++ * PCI Backend - Handles the virtual fields in the configuration space headers.
++ *
++ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++
++struct pci_bar_info {
++	u32 val;
++	u32 len_val;
++	int which;
++};
++
++#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
++#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
++
++static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
++{
++	int err;
++
++	if (!dev->is_enabled && is_enable_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: enable\n",
++			       pci_name(dev));
++		err = pci_enable_device(dev);
++		if (err)
++			return err;
++	} else if (dev->is_enabled && !is_enable_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: disable\n",
++			       pci_name(dev));
++		pci_disable_device(dev);
++	}
++
++	if (!dev->is_busmaster && is_master_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: set bus master\n",
++			       pci_name(dev));
++		pci_set_master(dev);
++	}
++
++	if (value & PCI_COMMAND_INVALIDATE) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG
++			       "pciback: %s: enable memory-write-invalidate\n",
++			       pci_name(dev));
++		err = pci_set_mwi(dev);
++		if (err) {
++			printk(KERN_WARNING
++			       "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
++			       pci_name(dev), err);
++			value &= ~PCI_COMMAND_INVALIDATE;
++		}
++	}
++
++	return pci_write_config_word(dev, offset, value);
++}
++
++static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	/* A write to obtain the length must happen as a 32-bit write.
++	 * This does not (yet) support writing individual bytes
++	 */
++	if (value == ~PCI_ROM_ADDRESS_ENABLE)
++		bar->which = 1;
++	else
++		bar->which = 0;
++
++	/* Do we need to support enabling/disabling the rom address here? */
++
++	return 0;
++}
++
++/* For the BARs, only allow writes which write ~0 or
++ * the correct resource information
++ * (Needed for when the driver probes the resource usage)
++ */
++static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	/* A write to obtain the length must happen as a 32-bit write.
++	 * This does not (yet) support writing individual bytes
++	 */
++	if (value == ~0)
++		bar->which = 1;
++	else
++		bar->which = 0;
++
++	return 0;
++}
++
++static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	*value = bar->which ? bar->len_val : bar->val;
++
++	return 0;
++}
++
++static inline void read_dev_bar(struct pci_dev *dev,
++				struct pci_bar_info *bar_info, int offset,
++				u32 len_mask)
++{
++	pci_read_config_dword(dev, offset, &bar_info->val);
++	pci_write_config_dword(dev, offset, len_mask);
++	pci_read_config_dword(dev, offset, &bar_info->len_val);
++	pci_write_config_dword(dev, offset, bar_info->val);
++}
++
++static void *bar_init(struct pci_dev *dev, int offset)
++{
++	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
++
++	if (!bar)
++		return ERR_PTR(-ENOMEM);
++
++	read_dev_bar(dev, bar, offset, ~0);
++	bar->which = 0;
++
++	return bar;
++}
++
++static void *rom_init(struct pci_dev *dev, int offset)
++{
++	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
++
++	if (!bar)
++		return ERR_PTR(-ENOMEM);
++
++	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
++	bar->which = 0;
++
++	return bar;
++}
++
++static void bar_reset(struct pci_dev *dev, int offset, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	bar->which = 0;
++}
++
++static void bar_release(struct pci_dev *dev, int offset, void *data)
++{
++	kfree(data);
++}
++
++static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
++			  void *data)
++{
++	*value = (u8) dev->irq;
++
++	return 0;
++}
++
++static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
++{
++	u8 cur_value;
++	int err;
++
++	err = pci_read_config_byte(dev, offset, &cur_value);
++	if (err)
++		goto out;
++
++	if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
++	    || value == PCI_BIST_START)
++		err = pci_write_config_byte(dev, offset, value);
++
++      out:
++	return err;
++}
++
++static struct config_field header_common[] = {
++	{
++	 .offset    = PCI_COMMAND,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_config_word,
++	 .u.w.write = command_write,
++	},
++	{
++	 .offset    = PCI_INTERRUPT_LINE,
++	 .size      = 1,
++	 .u.b.read  = interrupt_read,
++	},
++	{
++	 .offset    = PCI_INTERRUPT_PIN,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	},
++	{
++	 /* Any side effects of letting driver domain control cache line? */
++	 .offset    = PCI_CACHE_LINE_SIZE,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	 .u.b.write = pciback_write_config_byte,
++	},
++	{
++	 .offset    = PCI_LATENCY_TIMER,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	},
++	{
++	 .offset    = PCI_BIST,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	 .u.b.write = bist_write,
++	},
++	{
++	 .size = 0,
++	},
++};
++
++#define CFG_FIELD_BAR(reg_offset) 			\
++	{ 						\
++	 .offset     = reg_offset, 			\
++	 .size       = 4, 				\
++	 .init       = bar_init, 			\
++	 .reset      = bar_reset, 			\
++	 .release    = bar_release, 			\
++	 .u.dw.read  = bar_read, 			\
++	 .u.dw.write = bar_write, 			\
++	 }
++
++#define CFG_FIELD_ROM(reg_offset) 			\
++	{ 						\
++	 .offset     = reg_offset, 			\
++	 .size       = 4, 				\
++	 .init       = rom_init, 			\
++	 .reset      = bar_reset, 			\
++	 .release    = bar_release, 			\
++	 .u.dw.read  = bar_read, 			\
++	 .u.dw.write = rom_write, 			\
++	 }
++
++static struct config_field header_0[] = {
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
++	CFG_FIELD_ROM(PCI_ROM_ADDRESS),
++	{
++	 .size = 0,
++	},
++};
++
++static struct config_field header_1[] = {
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
++	CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
++	{
++	 .size = 0,
++	},
++};
++
++int pciback_config_header_add_fields(struct pci_dev *dev)
++{
++	int err;
++
++	err = pciback_config_add_fields(dev, header_common);
++	if (err)
++		goto out;
++
++	switch (dev->hdr_type) {
++	case PCI_HEADER_TYPE_NORMAL:
++		err = pciback_config_add_fields(dev, header_0);
++		break;
++
++	case PCI_HEADER_TYPE_BRIDGE:
++		err = pciback_config_add_fields(dev, header_1);
++		break;
++
++	default:
++		err = -EINVAL;
++		printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
++		       pci_name(dev), dev->hdr_type);
++		break;
++	}
++
++      out:
++	return err;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space_quirks.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend - Handle special overlays for broken devices.
++ *
++ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++LIST_HEAD(pciback_quirks);
++
++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
++{
++	struct pciback_config_quirk *tmp_quirk;
++
++	list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
++	    if (pci_match_id(&tmp_quirk->devid, dev))
++		goto out;
++	tmp_quirk = NULL;
++	printk(KERN_DEBUG
++	       "quirk didn't match any device pciback knows about\n");
++      out:
++	return tmp_quirk;
++}
++
++static inline void register_quirk(struct pciback_config_quirk *quirk)
++{
++	list_add_tail(&quirk->quirks_list, &pciback_quirks);
++}
++
++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
++{
++	int ret = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
++
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		if ( OFFSET(cfg_entry) == reg) {
++			ret = 1;
++			break;
++		}
++	}
++	return ret;
++}
++
++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
++				    *field)
++{
++	int err = 0;
++
++	switch (field->size) {
++	case 1:
++		field->u.b.read = pciback_read_config_byte;
++		field->u.b.write = pciback_write_config_byte;
++		break;
++	case 2:
++		field->u.w.read = pciback_read_config_word;
++		field->u.w.write = pciback_write_config_word;
++		break;
++	case 4:
++		field->u.dw.read = pciback_read_config_dword;
++		field->u.dw.write = pciback_write_config_dword;
++		break;
++	default:
++		err = -EINVAL;
++		goto out;
++	}
++
++	pciback_config_add_field(dev, field);
++
++      out:
++	return err;
++}
++
++int pciback_config_quirks_init(struct pci_dev *dev)
++{
++	struct pciback_config_quirk *quirk;
++	int ret = 0;
++
++	quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
++	if (!quirk) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	quirk->devid.vendor = dev->vendor;
++	quirk->devid.device = dev->device;
++	quirk->devid.subvendor = dev->subsystem_vendor;
++	quirk->devid.subdevice = dev->subsystem_device;
++	quirk->devid.class = 0;
++	quirk->devid.class_mask = 0;
++	quirk->devid.driver_data = 0UL;
++
++	quirk->pdev = dev;
++
++	register_quirk(quirk);
++      out:
++	return ret;
++}
++
++void pciback_config_field_free(struct config_field *field)
++{
++	kfree(field);
++}
++
++int pciback_config_quirk_release(struct pci_dev *dev)
++{
++	struct pciback_config_quirk *quirk;
++	int ret = 0;
++
++	quirk = pciback_find_quirk(dev);
++	if (!quirk) {
++		ret = -ENXIO;
++		goto out;
++	}
++
++	list_del(&quirk->quirks_list);
++	kfree(quirk);
++
++      out:
++	return ret;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/conf_space_quirks.h	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,35 @@
++/*
++ * PCI Backend - Data structures for special overlays for broken devices.
++ *
++ * Ryan Wilson <hap9@epoch.ncsc.mil>
++ * Chris Bookholt <hap10@epoch.ncsc.mil>
++ */
++
++#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
++#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
++
++#include <linux/pci.h>
++#include <linux/list.h>
++
++struct pciback_config_quirk {
++	struct list_head quirks_list;
++	struct pci_device_id devid;
++	struct pci_dev *pdev;
++};
++
++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
++
++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
++				    *field);
++
++int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
++
++int pciback_config_quirks_init(struct pci_dev *dev);
++
++void pciback_config_field_free(struct config_field *field);
++
++int pciback_config_quirk_release(struct pci_dev *dev);
++
++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
++
++#endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/passthrough.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,157 @@
++/*
++ * PCI Backend - Provides restricted access to the real PCI bus topology
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#include <linux/list.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++struct passthrough_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct list_head dev_list;
++	spinlock_t lock;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry;
++	struct pci_dev *dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++
++	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
++		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
++		    && bus == (unsigned int)dev_entry->dev->bus->number
++		    && devfn == dev_entry->dev->devfn) {
++			dev = dev_entry->dev;
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry;
++	unsigned long flags;
++
++	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
++	if (!dev_entry)
++		return -ENOMEM;
++	dev_entry->dev = dev;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++	list_add_tail(&dev_entry->list, &dev_data->dev_list);
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	return 0;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *t;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++
++	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
++		if (dev_entry->dev == dev) {
++			list_del(&dev_entry->list);
++			found_dev = dev_entry->dev;
++			kfree(dev_entry);
++		}
++	}
++
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	struct passthrough_dev_data *dev_data;
++
++	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
++	if (!dev_data)
++		return -ENOMEM;
++
++	spin_lock_init(&dev_data->lock);
++
++	INIT_LIST_HEAD(&dev_data->dev_list);
++
++	pdev->pci_dev_data = dev_data;
++
++	return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_root_cb)
++{
++	int err = 0;
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *e;
++	struct pci_dev *dev;
++	int found;
++	unsigned int domain, bus;
++
++	spin_lock(&dev_data->lock);
++
++	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
++		/* Only publish this device as a root if none of its
++		 * parent bridges are exported
++		 */
++		found = 0;
++		dev = dev_entry->dev->bus->self;
++		for (; !found && dev != NULL; dev = dev->bus->self) {
++			list_for_each_entry(e, &dev_data->dev_list, list) {
++				if (dev == e->dev) {
++					found = 1;
++					break;
++				}
++			}
++		}
++
++		domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
++		bus = (unsigned int)dev_entry->dev->bus->number;
++
++		if (!found) {
++			err = publish_root_cb(pdev, domain, bus);
++			if (err)
++				break;
++		}
++	}
++
++	spin_unlock(&dev_data->lock);
++
++	return err;
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *t;
++
++	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
++		list_del(&dev_entry->list);
++		pcistub_put_pci_dev(dev_entry->dev);
++		kfree(dev_entry);
++	}
++
++	kfree(dev_data);
++	pdev->pci_dev_data = NULL;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/pci_stub.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,929 @@
++/*
++ * PCI Stub Driver - Grabs devices in backend to be exported later
++ *
++ * Ryan Wilson <hap9@epoch.ncsc.mil>
++ * Chris Bookholt <hap10@epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/kref.h>
++#include <asm/atomic.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++static char *pci_devs_to_hide = NULL;
++module_param_named(hide, pci_devs_to_hide, charp, 0444);
++
++struct pcistub_device_id {
++	struct list_head slot_list;
++	int domain;
++	unsigned char bus;
++	unsigned int devfn;
++};
++static LIST_HEAD(pcistub_device_ids);
++static DEFINE_SPINLOCK(device_ids_lock);
++
++struct pcistub_device {
++	struct kref kref;
++	struct list_head dev_list;
++	spinlock_t lock;
++
++	struct pci_dev *dev;
++	struct pciback_device *pdev;	/* non-NULL if struct pci_dev is in use */
++};
++
++/* Access to pcistub_devices & seized_devices lists and the initialize_devices
++ * flag must be locked with pcistub_devices_lock
++ */
++static DEFINE_SPINLOCK(pcistub_devices_lock);
++static LIST_HEAD(pcistub_devices);
++
++/* wait for device_initcall before initializing our devices
++ * (see pcistub_init_devices_late)
++ */
++static int initialize_devices = 0;
++static LIST_HEAD(seized_devices);
++
++static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++
++	dev_dbg(&dev->dev, "pcistub_device_alloc\n");
++
++	psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
++	if (!psdev)
++		return NULL;
++
++	psdev->dev = pci_dev_get(dev);
++	if (!psdev->dev) {
++		kfree(psdev);
++		return NULL;
++	}
++
++	kref_init(&psdev->kref);
++	spin_lock_init(&psdev->lock);
++
++	return psdev;
++}
++
++/* Don't call this directly as it's called by pcistub_device_put */
++static void pcistub_device_release(struct kref *kref)
++{
++	struct pcistub_device *psdev;
++
++	psdev = container_of(kref, struct pcistub_device, kref);
++
++	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
++
++	/* Clean-up the device */
++	pciback_reset_device(psdev->dev);
++	pciback_config_free_dyn_fields(psdev->dev);
++	pciback_config_free_dev(psdev->dev);
++	kfree(pci_get_drvdata(psdev->dev));
++	pci_set_drvdata(psdev->dev, NULL);
++
++	pci_dev_put(psdev->dev);
++
++	kfree(psdev);
++}
++
++static inline void pcistub_device_get(struct pcistub_device *psdev)
++{
++	kref_get(&psdev->kref);
++}
++
++static inline void pcistub_device_put(struct pcistub_device *psdev)
++{
++	kref_put(&psdev->kref, pcistub_device_release);
++}
++
++static struct pcistub_device *pcistub_device_find(int domain, int bus,
++						  int slot, int func)
++{
++	struct pcistub_device *psdev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev != NULL
++		    && domain == pci_domain_nr(psdev->dev->bus)
++		    && bus == psdev->dev->bus->number
++		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
++			pcistub_device_get(psdev);
++			goto out;
++		}
++	}
++
++	/* didn't find it */
++	psdev = NULL;
++
++      out:
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return psdev;
++}
++
++static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
++						  struct pcistub_device *psdev)
++{
++	struct pci_dev *pci_dev = NULL;
++	unsigned long flags;
++
++	pcistub_device_get(psdev);
++
++	spin_lock_irqsave(&psdev->lock, flags);
++	if (!psdev->pdev) {
++		psdev->pdev = pdev;
++		pci_dev = psdev->dev;
++	}
++	spin_unlock_irqrestore(&psdev->lock, flags);
++
++	if (!pci_dev)
++		pcistub_device_put(psdev);
++
++	return pci_dev;
++}
++
++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
++					    int domain, int bus,
++					    int slot, int func)
++{
++	struct pcistub_device *psdev;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev != NULL
++		    && domain == pci_domain_nr(psdev->dev->bus)
++		    && bus == psdev->dev->bus->number
++		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
++			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return found_dev;
++}
++
++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
++				    struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return found_dev;
++}
++
++void pcistub_put_pci_dev(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev, *found_psdev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_psdev = psdev;
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	/* Cleanup our device
++	 * (so it's ready for the next domain)
++	 */
++	pciback_reset_device(found_psdev->dev);
++	pciback_config_free_dyn_fields(found_psdev->dev);
++	pciback_config_reset_dev(found_psdev->dev);
++
++	spin_lock_irqsave(&found_psdev->lock, flags);
++	found_psdev->pdev = NULL;
++	spin_unlock_irqrestore(&found_psdev->lock, flags);
++
++	pcistub_device_put(found_psdev);
++}
++
++static int __devinit pcistub_match_one(struct pci_dev *dev,
++				       struct pcistub_device_id *pdev_id)
++{
++	/* Match the specified device by domain, bus, slot, func and also if
++	 * any of the device's parent bridges match.
++	 */
++	for (; dev != NULL; dev = dev->bus->self) {
++		if (pci_domain_nr(dev->bus) == pdev_id->domain
++		    && dev->bus->number == pdev_id->bus
++		    && dev->devfn == pdev_id->devfn)
++			return 1;
++
++		/* Sometimes topmost bridge links to itself. */
++		if (dev == dev->bus->self)
++			break;
++	}
++
++	return 0;
++}
++
++static int __devinit pcistub_match(struct pci_dev *dev)
++{
++	struct pcistub_device_id *pdev_id;
++	unsigned long flags;
++	int found = 0;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
++		if (pcistub_match_one(dev, pdev_id)) {
++			found = 1;
++			break;
++		}
++	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return found;
++}
++
++static int __devinit pcistub_init_device(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data;
++	int err = 0;
++
++	dev_dbg(&dev->dev, "initializing...\n");
++
++	/* The PCI backend is not intended to be a module (or to work with
++	 * removable PCI devices (yet). If it were, pciback_config_free()
++	 * would need to be called somewhere to free the memory allocated
++	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
++	 */
++	dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
++	if (!dev_data) {
++		err = -ENOMEM;
++		goto out;
++	}
++	pci_set_drvdata(dev, dev_data);
++
++	dev_dbg(&dev->dev, "initializing config\n");
++	err = pciback_config_init_dev(dev);
++	if (err)
++		goto out;
++
++	/* HACK: Force device (& ACPI) to determine what IRQ it's on - we
++	 * must do this here because pcibios_enable_device may specify
++	 * the pci device's true irq (and possibly its other resources)
++	 * if they differ from what's in the configuration space.
++	 * This makes the assumption that the device's resources won't
++	 * change after this point (otherwise this code may break!)
++	 */
++	dev_dbg(&dev->dev, "enabling device\n");
++	err = pci_enable_device(dev);
++	if (err)
++		goto config_release;
++
++	/* Now disable the device (this also ensures some private device
++	 * data is setup before we export)
++	 */
++	dev_dbg(&dev->dev, "reset device\n");
++	pciback_reset_device(dev);
++
++	return 0;
++
++      config_release:
++	pciback_config_free_dev(dev);
++
++      out:
++	pci_set_drvdata(dev, NULL);
++	kfree(dev_data);
++	return err;
++}
++
++/*
++ * Because some initialization still happens on
++ * devices during fs_initcall, we need to defer
++ * full initialization of our devices until
++ * device_initcall.
++ */
++static int __init pcistub_init_devices_late(void)
++{
++	struct pcistub_device *psdev;
++	unsigned long flags;
++	int err = 0;
++
++	pr_debug("pciback: pcistub_init_devices_late\n");
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	while (!list_empty(&seized_devices)) {
++		psdev = container_of(seized_devices.next,
++				     struct pcistub_device, dev_list);
++		list_del(&psdev->dev_list);
++
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++		err = pcistub_init_device(psdev->dev);
++		if (err) {
++			dev_err(&psdev->dev->dev,
++				"error %d initializing device\n", err);
++			kfree(psdev);
++			psdev = NULL;
++		}
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++		if (psdev)
++			list_add_tail(&psdev->dev_list, &pcistub_devices);
++	}
++
++	initialize_devices = 1;
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	return 0;
++}
++
++static int __devinit pcistub_seize(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	unsigned long flags;
++	int err = 0;
++
++	psdev = pcistub_device_alloc(dev);
++	if (!psdev)
++		return -ENOMEM;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	if (initialize_devices) {
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++		/* don't want irqs disabled when calling pcistub_init_device */
++		err = pcistub_init_device(psdev->dev);
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++		if (!err)
++			list_add(&psdev->dev_list, &pcistub_devices);
++	} else {
++		dev_dbg(&dev->dev, "deferring initialization\n");
++		list_add(&psdev->dev_list, &seized_devices);
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	if (err)
++		pcistub_device_put(psdev);
++
++	return err;
++}
++
++static int __devinit pcistub_probe(struct pci_dev *dev,
++				   const struct pci_device_id *id)
++{
++	int err = 0;
++
++	dev_dbg(&dev->dev, "probing...\n");
++
++	if (pcistub_match(dev)) {
++
++		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
++		    && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
++			dev_err(&dev->dev, "can't export pci devices that "
++				"don't have a normal (0) or bridge (1) "
++				"header type!\n");
++			err = -ENODEV;
++			goto out;
++		}
++
++		dev_info(&dev->dev, "seizing device\n");
++		err = pcistub_seize(dev);
++	} else
++		/* Didn't find the device */
++		err = -ENODEV;
++
++      out:
++	return err;
++}
++
++static void pcistub_remove(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev, *found_psdev = NULL;
++	unsigned long flags;
++
++	dev_dbg(&dev->dev, "removing\n");
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	pciback_config_quirk_release(dev);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_psdev = psdev;
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	if (found_psdev) {
++		dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
++			found_psdev->pdev);
++
++		if (found_psdev->pdev) {
++			printk(KERN_WARNING "pciback: ****** removing device "
++			       "%s while still in-use! ******\n",
++			       pci_name(found_psdev->dev));
++			printk(KERN_WARNING "pciback: ****** driver domain may "
++			       "still access this device's i/o resources!\n");
++			printk(KERN_WARNING "pciback: ****** shutdown driver "
++			       "domain before binding device\n");
++			printk(KERN_WARNING "pciback: ****** to other drivers "
++			       "or domains\n");
++
++			pciback_release_pci_dev(found_psdev->pdev,
++						found_psdev->dev);
++		}
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++		list_del(&found_psdev->dev_list);
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++		/* the final put for releasing from the list */
++		pcistub_device_put(found_psdev);
++	}
++}
++
++static struct pci_device_id pcistub_ids[] = {
++	{
++	 .vendor = PCI_ANY_ID,
++	 .device = PCI_ANY_ID,
++	 .subvendor = PCI_ANY_ID,
++	 .subdevice = PCI_ANY_ID,
++	 },
++	{0,},
++};
++
++/*
++ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
++ * for a normal device. I don't want it to be loaded automatically.
++ */
++
++static struct pci_driver pciback_pci_driver = {
++	.name = "pciback",
++	.id_table = pcistub_ids,
++	.probe = pcistub_probe,
++	.remove = pcistub_remove,
++};
++
++static inline int str_to_slot(const char *buf, int *domain, int *bus,
++			      int *slot, int *func)
++{
++	int err;
++
++	err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
++	if (err == 4)
++		return 0;
++	else if (err < 0)
++		return -EINVAL;
++
++	/* try again without domain */
++	*domain = 0;
++	err = sscanf(buf, " %x:%x.%x", bus, slot, func);
++	if (err == 3)
++		return 0;
++
++	return -EINVAL;
++}
++
++static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
++			       *slot, int *func, int *reg, int *size, int *mask)
++{
++	int err;
++
++	err =
++	    sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
++		   func, reg, size, mask);
++	if (err == 7)
++		return 0;
++	return -EINVAL;
++}
++
++static int pcistub_device_id_add(int domain, int bus, int slot, int func)
++{
++	struct pcistub_device_id *pci_dev_id;
++	unsigned long flags;
++
++	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
++	if (!pci_dev_id)
++		return -ENOMEM;
++
++	pci_dev_id->domain = domain;
++	pci_dev_id->bus = bus;
++	pci_dev_id->devfn = PCI_DEVFN(slot, func);
++
++	pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
++		 domain, bus, slot, func);
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return 0;
++}
++
++static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
++{
++	struct pcistub_device_id *pci_dev_id, *t;
++	int devfn = PCI_DEVFN(slot, func);
++	int err = -ENOENT;
++	unsigned long flags;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
++
++		if (pci_dev_id->domain == domain
++		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
++			/* Don't break; here because it's possible the same
++			 * slot could be in the list more than once
++			 */
++			list_del(&pci_dev_id->slot_list);
++			kfree(pci_dev_id);
++
++			err = 0;
++
++			pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
++				 "seize list\n", domain, bus, slot, func);
++		}
++	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return err;
++}
++
++static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
++			   int size, int mask)
++{
++	int err = 0;
++	struct pcistub_device *psdev;
++	struct pci_dev *dev;
++	struct config_field *field;
++
++	psdev = pcistub_device_find(domain, bus, slot, func);
++	if (!psdev || !psdev->dev) {
++		err = -ENODEV;
++		goto out;
++	}
++	dev = psdev->dev;
++
++	field = kzalloc(sizeof(*field), GFP_ATOMIC);
++	if (!field) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	field->offset = reg;
++	field->size = size;
++	field->mask = mask;
++	field->init = NULL;
++	field->reset = NULL;
++	field->release = NULL;
++	field->clean = pciback_config_field_free;
++
++	err = pciback_config_quirks_add_field(dev, field);
++	if (err)
++		kfree(field);
++      out:
++	return err;
++}
++
++static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
++				size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++
++	err = pcistub_device_id_add(domain, bus, slot, func);
++
++      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
++
++static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
++				   size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++
++	err = pcistub_device_id_remove(domain, bus, slot, func);
++
++      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
++
++static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
++{
++	struct pcistub_device_id *pci_dev_id;
++	size_t count = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
++		if (count >= PAGE_SIZE)
++			break;
++
++		count += scnprintf(buf + count, PAGE_SIZE - count,
++				   "%04x:%02x:%02x.%01x\n",
++				   pci_dev_id->domain, pci_dev_id->bus,
++				   PCI_SLOT(pci_dev_id->devfn),
++				   PCI_FUNC(pci_dev_id->devfn));
++	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return count;
++}
++
++DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
++
++static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
++				 size_t count)
++{
++	int domain, bus, slot, func, reg, size, mask;
++	int err;
++
++	err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
++			   &mask);
++	if (err)
++		goto out;
++
++	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
++
++      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
++{
++	int count = 0;
++	unsigned long flags;
++	extern struct list_head pciback_quirks;
++	struct pciback_config_quirk *quirk;
++	struct pciback_dev_data *dev_data;
++	struct config_field *field;
++	struct config_field_entry *cfg_entry;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
++		if (count >= PAGE_SIZE)
++			goto out;
++
++		count += scnprintf(buf + count, PAGE_SIZE - count,
++				   "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
++				   quirk->pdev->bus->number,
++				   PCI_SLOT(quirk->pdev->devfn),
++				   PCI_FUNC(quirk->pdev->devfn),
++				   quirk->devid.vendor, quirk->devid.device,
++				   quirk->devid.subvendor,
++				   quirk->devid.subdevice);
++
++		dev_data = pci_get_drvdata(quirk->pdev);
++
++		list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++			field = cfg_entry->field;
++			if (count >= PAGE_SIZE)
++				goto out;
++
++			count += scnprintf(buf + count, PAGE_SIZE - count,
++					   "\t\t%08x:%01x:%08x\n",
++					   cfg_entry->base_offset + field->offset, 
++					   field->size, field->mask);
++		}
++	}
++
++      out:
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return count;
++}
++
++DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
++
++static ssize_t permissive_add(struct device_driver *drv, const char *buf,
++			      size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++	psdev = pcistub_device_find(domain, bus, slot, func);
++	if (!psdev) {
++		err = -ENODEV;
++		goto out;
++	}
++	if (!psdev->dev) {
++		err = -ENODEV;
++		goto release;
++	}
++	dev_data = pci_get_drvdata(psdev->dev);
++	/* the driver data for a device should never be null at this point */
++	if (!dev_data) {
++		err = -ENXIO;
++		goto release;
++	}
++	if (!dev_data->permissive) {
++		dev_data->permissive = 1;
++		/* Let user know that what they're doing could be unsafe */
++		dev_warn(&psdev->dev->dev,
++			 "enabling permissive mode configuration space accesses!\n");
++		dev_warn(&psdev->dev->dev,
++			 "permissive mode is potentially unsafe!\n");
++	}
++      release:
++	pcistub_device_put(psdev);
++      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++static ssize_t permissive_show(struct device_driver *drv, char *buf)
++{
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	size_t count = 0;
++	unsigned long flags;
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (count >= PAGE_SIZE)
++			break;
++		if (!psdev->dev)
++			continue;
++		dev_data = pci_get_drvdata(psdev->dev);
++		if (!dev_data || !dev_data->permissive)
++			continue;
++		count +=
++		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
++			      pci_name(psdev->dev));
++	}
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return count;
++}
++
++DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
++
++static void pcistub_exit(void)
++{
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
++	driver_remove_file(&pciback_pci_driver.driver,
++			   &driver_attr_remove_slot);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
++
++	pci_unregister_driver(&pciback_pci_driver);
++}
++
++static int __init pcistub_init(void)
++{
++	int pos = 0;
++	int err = 0;
++	int domain, bus, slot, func;
++	int parsed;
++
++	if (pci_devs_to_hide && *pci_devs_to_hide) {
++		do {
++			parsed = 0;
++
++			err = sscanf(pci_devs_to_hide + pos,
++				     " (%x:%x:%x.%x) %n",
++				     &domain, &bus, &slot, &func, &parsed);
++			if (err != 4) {
++				domain = 0;
++				err = sscanf(pci_devs_to_hide + pos,
++					     " (%x:%x.%x) %n",
++					     &bus, &slot, &func, &parsed);
++				if (err != 3)
++					goto parse_error;
++			}
++
++			err = pcistub_device_id_add(domain, bus, slot, func);
++			if (err)
++				goto out;
++
++			/* if parsed<=0, we've reached the end of the string */
++			pos += parsed;
++		} while (parsed > 0 && pci_devs_to_hide[pos]);
++	}
++
++	/* If we're the first PCI Device Driver to register, we're the
++	 * first one to get offered PCI devices as they become
++	 * available (and thus we can be the first to grab them)
++	 */
++	err = pci_register_driver(&pciback_pci_driver);
++	if (err < 0)
++		goto out;
++
++	err = driver_create_file(&pciback_pci_driver.driver,
++				 &driver_attr_new_slot);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_remove_slot);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_slots);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_quirks);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_permissive);
++
++	if (err)
++		pcistub_exit();
++
++      out:
++	return err;
++
++      parse_error:
++	printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
++	       pci_devs_to_hide + pos);
++	return -EINVAL;
++}
++
++#ifndef MODULE
++/*
++ * fs_initcall happens before device_initcall
++ * so pciback *should* get called first (b/c we 
++ * want to suck up any device before other drivers
++ * get a chance by being the first pci device
++ * driver to register)
++ */
++fs_initcall(pcistub_init);
++#endif
++
++static int __init pciback_init(void)
++{
++	int err;
++
++	err = pciback_config_init();
++	if (err)
++		return err;
++
++#ifdef MODULE
++	err = pcistub_init();
++	if (err < 0)
++		return err;
++#endif
++
++	pcistub_init_devices_late();
++	err = pciback_xenbus_register();
++	if (err)
++		pcistub_exit();
++
++	return err;
++}
++
++static void __exit pciback_cleanup(void)
++{
++	pciback_xenbus_unregister();
++	pcistub_exit();
++}
++
++module_init(pciback_init);
++module_exit(pciback_cleanup);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/pciback.h	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,93 @@
++/*
++ * PCI Backend Common Data Structures & Function Declarations
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++#ifndef __XEN_PCIBACK_H__
++#define __XEN_PCIBACK_H__
++
++#include <linux/pci.h>
++#include <linux/interrupt.h>
++#include <xen/xenbus.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/workqueue.h>
++#include <asm/atomic.h>
++#include <xen/interface/io/pciif.h>
++
++struct pci_dev_entry {
++	struct list_head list;
++	struct pci_dev *dev;
++};
++
++#define _PDEVF_op_active 	(0)
++#define PDEVF_op_active 	(1<<(_PDEVF_op_active))
++
++struct pciback_device {
++	void *pci_dev_data;
++	spinlock_t dev_lock;
++
++	struct xenbus_device *xdev;
++
++	struct xenbus_watch be_watch;
++	u8 be_watching;
++
++	int evtchn_irq;
++
++	struct vm_struct *sh_area;
++	struct xen_pci_sharedinfo *sh_info;
++
++	unsigned long flags;
++
++	struct work_struct op_work;
++};
++
++struct pciback_dev_data {
++	struct list_head config_fields;
++	int permissive;
++	int warned_on_write;
++};
++
++/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
++					    int domain, int bus,
++					    int slot, int func);
++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
++				    struct pci_dev *dev);
++void pcistub_put_pci_dev(struct pci_dev *dev);
++
++/* Ensure a device is turned off or reset */
++void pciback_reset_device(struct pci_dev *pdev);
++
++/* Access a virtual configuration space for a PCI device */
++int pciback_config_init(void);
++int pciback_config_init_dev(struct pci_dev *dev);
++void pciback_config_free_dyn_fields(struct pci_dev *dev);
++void pciback_config_reset_dev(struct pci_dev *dev);
++void pciback_config_free_dev(struct pci_dev *dev);
++int pciback_config_read(struct pci_dev *dev, int offset, int size,
++			u32 * ret_val);
++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
++
++/* Handle requests for specific devices from the frontend */
++typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
++				    unsigned int domain, unsigned int bus);
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn);
++int pciback_init_devices(struct pciback_device *pdev);
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb cb);
++void pciback_release_devices(struct pciback_device *pdev);
++
++/* Handles events from front-end */
++irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
++void pciback_do_op(void *data);
++
++int pciback_xenbus_register(void);
++void pciback_xenbus_unregister(void);
++
++extern int verbose_request;
++#endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/pciback_ops.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,95 @@
++/*
++ * PCI Backend Operations - respond to PCI requests from Frontend
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <asm/bitops.h>
++#include <xen/evtchn.h>
++#include "pciback.h"
++
++int verbose_request = 0;
++module_param(verbose_request, int, 0644);
++
++/* Ensure a device is "turned off" and ready to be exported.
++ * (Also see pciback_config_reset to ensure virtual configuration space is
++ * ready to be re-exported)
++ */
++void pciback_reset_device(struct pci_dev *dev)
++{
++	u16 cmd;
++
++	/* Disable devices (but not bridges) */
++	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
++		pci_disable_device(dev);
++
++		pci_write_config_word(dev, PCI_COMMAND, 0);
++
++		dev->is_enabled = 0;
++		dev->is_busmaster = 0;
++	} else {
++		pci_read_config_word(dev, PCI_COMMAND, &cmd);
++		if (cmd & (PCI_COMMAND_INVALIDATE)) {
++			cmd &= ~(PCI_COMMAND_INVALIDATE);
++			pci_write_config_word(dev, PCI_COMMAND, cmd);
++
++			dev->is_busmaster = 0;
++		}
++	}
++}
++
++static inline void test_and_schedule_op(struct pciback_device *pdev)
++{
++	/* Check that frontend is requesting an operation and that we are not
++	 * already processing a request */
++	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
++	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
++		schedule_work(&pdev->op_work);
++}
++
++/* Performing the configuration space reads/writes must not be done in atomic
++ * context because some of the pci_* functions can sleep (mostly due to ACPI
++ * use of semaphores). This function is intended to be called from a work
++ * queue in process context taking a struct pciback_device as a parameter */
++void pciback_do_op(void *data)
++{
++	struct pciback_device *pdev = data;
++	struct pci_dev *dev;
++	struct xen_pci_op *op = &pdev->sh_info->op;
++
++	dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
++
++	if (dev == NULL)
++		op->err = XEN_PCI_ERR_dev_not_found;
++	else if (op->cmd == XEN_PCI_OP_conf_read)
++		op->err = pciback_config_read(dev, op->offset, op->size,
++					      &op->value);
++	else if (op->cmd == XEN_PCI_OP_conf_write)
++		op->err = pciback_config_write(dev, op->offset, op->size,
++					       op->value);
++	else
++		op->err = XEN_PCI_ERR_not_implemented;
++
++	/* Tell the driver domain that we're done. */ 
++	wmb();
++	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
++	notify_remote_via_irq(pdev->evtchn_irq);
++
++	/* Mark that we're done. */
++	smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
++	clear_bit(_PDEVF_op_active, &pdev->flags);
++	smp_mb__after_clear_bit(); /* /before/ final check for work */
++
++	/* Check to see if the driver domain tried to start another request in
++	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. */
++	test_and_schedule_op(pdev);
++}
++
++irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
++{
++	struct pciback_device *pdev = dev_id;
++
++	test_and_schedule_op(pdev);
++
++	return IRQ_HANDLED;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/slot.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,151 @@
++/*
++ * PCI Backend - Provides a Virtual PCI bus (with real devices)
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c)
++ *   Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c
++ */
++
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++/* There are at most 32 slots in a pci bus.  */
++#define PCI_SLOT_MAX 32
++
++#define PCI_BUS_NBR 2
++
++struct slot_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
++	spinlock_t lock;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct pci_dev *dev = NULL;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if (domain != 0 || PCI_FUNC(devfn) != 0)
++		return NULL;
++
++	if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
++		return NULL;
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++	dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++	return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	int err = 0, slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
++		err = -EFAULT;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Can't export bridges on the virtual PCI bus");
++		goto out;
++	}
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++
++	/* Assign to a new slot on the virtual PCI bus */
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			if (slot_dev->slots[bus][slot] == NULL) {
++				printk(KERN_INFO
++				       "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
++				       pci_name(dev), slot, bus);
++				slot_dev->slots[bus][slot] = dev;
++				goto unlock;
++			}
++		}
++
++	err = -ENOMEM;
++	xenbus_dev_fatal(pdev->xdev, err,
++			 "No more space on root virtual PCI bus");
++
++      unlock:
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++      out:
++	return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			if (slot_dev->slots[bus][slot] == dev) {
++				slot_dev->slots[bus][slot] = NULL;
++				found_dev = dev;
++				goto out;
++			}
++		}
++
++      out:
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev;
++
++	slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
++	if (!slot_dev)
++		return -ENOMEM;
++
++	spin_lock_init(&slot_dev->lock);
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++)
++			slot_dev->slots[bus][slot] = NULL;
++
++	pdev->pci_dev_data = slot_dev;
++
++	return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_cb)
++{
++	/* The Virtual PCI bus has only one root */
++	return publish_cb(pdev, 0, 0);
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	struct pci_dev *dev;
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			dev = slot_dev->slots[bus][slot];
++			if (dev != NULL)
++				pcistub_put_pci_dev(dev);
++		}
++
++	kfree(slot_dev);
++	pdev->pci_dev_data = NULL;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/vpci.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,204 @@
++/*
++ * PCI Backend - Provides a Virtual PCI bus (with real devices)
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++#define PCI_SLOT_MAX 32
++
++struct vpci_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct list_head dev_list[PCI_SLOT_MAX];
++	spinlock_t lock;
++};
++
++static inline struct list_head *list_first(struct list_head *head)
++{
++	return head->next;
++}
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct pci_dev_entry *entry;
++	struct pci_dev *dev = NULL;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if (domain != 0 || bus != 0)
++		return NULL;
++
++	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
++		spin_lock_irqsave(&vpci_dev->lock, flags);
++
++		list_for_each_entry(entry,
++				    &vpci_dev->dev_list[PCI_SLOT(devfn)],
++				    list) {
++			if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
++				dev = entry->dev;
++				break;
++			}
++		}
++
++		spin_unlock_irqrestore(&vpci_dev->lock, flags);
++	}
++	return dev;
++}
++
++static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
++{
++	if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
++	    && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
++		return 1;
++
++	return 0;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	int err = 0, slot;
++	struct pci_dev_entry *t, *dev_entry;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
++		err = -EFAULT;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Can't export bridges on the virtual PCI bus");
++		goto out;
++	}
++
++	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
++	if (!dev_entry) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error adding entry to virtual PCI bus");
++		goto out;
++	}
++
++	dev_entry->dev = dev;
++
++	spin_lock_irqsave(&vpci_dev->lock, flags);
++
++	/* Keep multi-function devices together on the virtual PCI bus */
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		if (!list_empty(&vpci_dev->dev_list[slot])) {
++			t = list_entry(list_first(&vpci_dev->dev_list[slot]),
++				       struct pci_dev_entry, list);
++
++			if (match_slot(dev, t->dev)) {
++				pr_info("pciback: vpci: %s: "
++					"assign to virtual slot %d func %d\n",
++					pci_name(dev), slot,
++					PCI_FUNC(dev->devfn));
++				list_add_tail(&dev_entry->list,
++					      &vpci_dev->dev_list[slot]);
++				goto unlock;
++			}
++		}
++	}
++
++	/* Assign to a new slot on the virtual PCI bus */
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		if (list_empty(&vpci_dev->dev_list[slot])) {
++			printk(KERN_INFO
++			       "pciback: vpci: %s: assign to virtual slot %d\n",
++			       pci_name(dev), slot);
++			list_add_tail(&dev_entry->list,
++				      &vpci_dev->dev_list[slot]);
++			goto unlock;
++		}
++	}
++
++	err = -ENOMEM;
++	xenbus_dev_fatal(pdev->xdev, err,
++			 "No more space on root virtual PCI bus");
++
++      unlock:
++	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++      out:
++	return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&vpci_dev->lock, flags);
++
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		struct pci_dev_entry *e, *tmp;
++		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
++					 list) {
++			if (e->dev == dev) {
++				list_del(&e->list);
++				found_dev = e->dev;
++				kfree(e);
++				goto out;
++			}
++		}
++	}
++
++      out:
++	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev;
++
++	vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
++	if (!vpci_dev)
++		return -ENOMEM;
++
++	spin_lock_init(&vpci_dev->lock);
++
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
++	}
++
++	pdev->pci_dev_data = vpci_dev;
++
++	return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_cb)
++{
++	/* The Virtual PCI bus has only one root */
++	return publish_cb(pdev, 0, 0);
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		struct pci_dev_entry *e, *tmp;
++		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
++					 list) {
++			list_del(&e->list);
++			pcistub_put_pci_dev(e->dev);
++			kfree(e);
++		}
++	}
++
++	kfree(vpci_dev);
++	pdev->pci_dev_data = NULL;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pciback/xenbus.c	2007-08-27 14:02:01.000000000 -0400
+@@ -0,0 +1,454 @@
++/*
++ * PCI Backend Xenbus Setup - handles setup with frontend and xend
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/vmalloc.h>
++#include <xen/xenbus.h>
++#include <xen/evtchn.h>
++#include "pciback.h"
++
++#define INVALID_EVTCHN_IRQ  (-1)
++
++static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
++{
++	struct pciback_device *pdev;
++
++	pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
++	if (pdev == NULL)
++		goto out;
++	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
++
++	pdev->xdev = xdev;
++	xdev->dev.driver_data = pdev;
++
++	spin_lock_init(&pdev->dev_lock);
++
++	pdev->sh_area = NULL;
++	pdev->sh_info = NULL;
++	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
++	pdev->be_watching = 0;
++
++	INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
++
++	if (pciback_init_devices(pdev)) {
++		kfree(pdev);
++		pdev = NULL;
++	}
++      out:
++	return pdev;
++}
++
++static void free_pdev(struct pciback_device *pdev)
++{
++	if (pdev->be_watching)
++		unregister_xenbus_watch(&pdev->be_watch);
++
++	/* Ensure the guest can't trigger our handler before removing devices */
++	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ)
++		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
++
++	/* If the driver domain started an op, make sure we complete it or
++	 * delete it before releasing the shared memory */
++	cancel_delayed_work(&pdev->op_work);
++	flush_scheduled_work();
++
++	if (pdev->sh_info)
++		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
++
++	pciback_release_devices(pdev);
++
++	pdev->xdev->dev.driver_data = NULL;
++	pdev->xdev = NULL;
++
++	kfree(pdev);
++}
++
++static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
++			     int remote_evtchn)
++{
++	int err = 0;
++	struct vm_struct *area;
++
++	dev_dbg(&pdev->xdev->dev,
++		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
++		gnt_ref, remote_evtchn);
++
++	area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
++	if (IS_ERR(area)) {
++		err = PTR_ERR(area);
++		goto out;
++	}
++	pdev->sh_area = area;
++	pdev->sh_info = area->addr;
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
++		SA_SAMPLE_RANDOM, "pciback", pdev);
++	if (err < 0) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error binding event channel to IRQ");
++		goto out;
++	}
++	pdev->evtchn_irq = err;
++	err = 0;
++
++	dev_dbg(&pdev->xdev->dev, "Attached!\n");
++      out:
++	return err;
++}
++
++static int pciback_attach(struct pciback_device *pdev)
++{
++	int err = 0;
++	int gnt_ref, remote_evtchn;
++	char *magic = NULL;
++
++	spin_lock(&pdev->dev_lock);
++
++	/* Make sure we only do this setup once */
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateInitialised)
++		goto out;
++
++	/* Wait for frontend to state that it has published the configuration */
++	if (xenbus_read_driver_state(pdev->xdev->otherend) !=
++	    XenbusStateInitialised)
++		goto out;
++
++	dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
++
++	err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
++			    "pci-op-ref", "%u", &gnt_ref,
++			    "event-channel", "%u", &remote_evtchn,
++			    "magic", NULL, &magic, NULL);
++	if (err) {
++		/* If configuration didn't get read correctly, wait longer */
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading configuration from frontend");
++		goto out;
++	}
++
++	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
++		xenbus_dev_fatal(pdev->xdev, -EFAULT,
++				 "version mismatch (%s/%s) with pcifront - "
++				 "halting pciback",
++				 magic, XEN_PCI_MAGIC);
++		goto out;
++	}
++
++	err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
++	if (err)
++		goto out;
++
++	dev_dbg(&pdev->xdev->dev, "Connecting...\n");
++
++	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
++	if (err)
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error switching to connected state!");
++
++	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
++      out:
++	spin_unlock(&pdev->dev_lock);
++
++	if (magic)
++		kfree(magic);
++
++	return err;
++}
++
++static void pciback_frontend_changed(struct xenbus_device *xdev,
++				     enum xenbus_state fe_state)
++{
++	struct pciback_device *pdev = xdev->dev.driver_data;
++
++	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
++
++	switch (fe_state) {
++	case XenbusStateInitialised:
++		pciback_attach(pdev);
++		break;
++
++	case XenbusStateClosing:
++		xenbus_switch_state(xdev, XenbusStateClosing);
++		break;
++
++	case XenbusStateUnknown:
++	case XenbusStateClosed:
++		dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
++		device_unregister(&xdev->dev);
++		break;
++
++	default:
++		break;
++	}
++}
++
++static int pciback_publish_pci_root(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus)
++{
++	unsigned int d, b;
++	int i, root_num, len, err;
++	char str[64];
++
++	dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
++
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++			   "root_num", "%d", &root_num);
++	if (err == 0 || err == -ENOENT)
++		root_num = 0;
++	else if (err < 0)
++		goto out;
++
++	/* Verify that we haven't already published this pci root */
++	for (i = 0; i < root_num; i++) {
++		len = snprintf(str, sizeof(str), "root-%d", i);
++		if (unlikely(len >= (sizeof(str) - 1))) {
++			err = -ENOMEM;
++			goto out;
++		}
++
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++				   str, "%x:%x", &d, &b);
++		if (err < 0)
++			goto out;
++		if (err != 2) {
++			err = -EINVAL;
++			goto out;
++		}
++
++		if (d == domain && b == bus) {
++			err = 0;
++			goto out;
++		}
++	}
++
++	len = snprintf(str, sizeof(str), "root-%d", root_num);
++	if (unlikely(len >= (sizeof(str) - 1))) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
++		root_num, domain, bus);
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++			    "%04x:%02x", domain, bus);
++	if (err)
++		goto out;
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
++			    "root_num", "%d", (root_num + 1));
++
++      out:
++	return err;
++}
++
++static int pciback_export_device(struct pciback_device *pdev,
++				 int domain, int bus, int slot, int func)
++{
++	struct pci_dev *dev;
++	int err = 0;
++
++	dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
++		domain, bus, slot, func);
++
++	dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
++	if (!dev) {
++		err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Couldn't locate PCI device "
++				 "(%04x:%02x:%02x.%01x)! "
++				 "perhaps already in-use?",
++				 domain, bus, slot, func);
++		goto out;
++	}
++
++	err = pciback_add_pci_dev(pdev, dev);
++	if (err)
++		goto out;
++
++	/* TODO: It'd be nice to export a bridge and have all of its children
++	 * get exported with it. This may be best done in xend (which will
++	 * have to calculate resource usage anyway) but we probably want to
++	 * put something in here to ensure that if a bridge gets given to a
++	 * driver domain, that all devices under that bridge are not given
++	 * to other driver domains (as he who controls the bridge can disable
++	 * it and stop the other devices from working).
++	 */
++      out:
++	return err;
++}
++
++static int pciback_setup_backend(struct pciback_device *pdev)
++{
++	/* Get configuration from xend (if available now) */
++	int domain, bus, slot, func;
++	int err = 0;
++	int i, num_devs;
++	char dev_str[64];
++
++	spin_lock(&pdev->dev_lock);
++
++	/* It's possible we could get the call to setup twice, so make sure
++	 * we're not already connected.
++	 */
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateInitWait)
++		goto out;
++
++	dev_dbg(&pdev->xdev->dev, "getting be setup\n");
++
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
++			   &num_devs);
++	if (err != 1) {
++		if (err >= 0)
++			err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading number of devices");
++		goto out;
++	}
++
++	for (i = 0; i < num_devs; i++) {
++		int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
++		if (unlikely(l >= (sizeof(dev_str) - 1))) {
++			err = -ENOMEM;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "String overflow while reading "
++					 "configuration");
++			goto out;
++		}
++
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
++				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
++		if (err < 0) {
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error reading device configuration");
++			goto out;
++		}
++		if (err != 4) {
++			err = -EINVAL;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error parsing pci device "
++					 "configuration");
++			goto out;
++		}
++
++		err = pciback_export_device(pdev, domain, bus, slot, func);
++		if (err)
++			goto out;
++	}
++
++	err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
++	if (err) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error while publish PCI root buses "
++				 "for frontend");
++		goto out;
++	}
++
++	err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
++	if (err)
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error switching to initialised state!");
++
++      out:
++	spin_unlock(&pdev->dev_lock);
++
++	if (!err)
++		/* see if pcifront is already configured (if not, we'll wait) */
++		pciback_attach(pdev);
++
++	return err;
++}
++
++static void pciback_be_watch(struct xenbus_watch *watch,
++			     const char **vec, unsigned int len)
++{
++	struct pciback_device *pdev =
++	    container_of(watch, struct pciback_device, be_watch);
++
++	switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
++	case XenbusStateInitWait:
++		pciback_setup_backend(pdev);
++		break;
++
++	default:
++		break;
++	}
++}
++
++static int pciback_xenbus_probe(struct xenbus_device *dev,
++				const struct xenbus_device_id *id)
++{
++	int err = 0;
++	struct pciback_device *pdev = alloc_pdev(dev);
++
++	if (pdev == NULL) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(dev, err,
++				 "Error allocating pciback_device struct");
++		goto out;
++	}
++
++	/* wait for xend to configure us */
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto out;
++
++	/* watch the backend node for backend configuration information */
++	err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
++				pciback_be_watch);
++	if (err)
++		goto out;
++	pdev->be_watching = 1;
++
++	/* We need to force a call to our callback here in case
++	 * xend already configured us!
++	 */
++	pciback_be_watch(&pdev->be_watch, NULL, 0);
++
++      out:
++	return err;
++}
++
++static int pciback_xenbus_remove(struct xenbus_device *dev)
++{
++	struct pciback_device *pdev = dev->dev.driver_data;
++
++	if (pdev != NULL)
++		free_pdev(pdev);
++
++	return 0;
++}
++
++static struct xenbus_device_id xenpci_ids[] = {
++	{"pci"},
++	{{0}},
++};
++
++static struct xenbus_driver xenbus_pciback_driver = {
++	.name 			= "pciback",
++	.owner 			= THIS_MODULE,
++	.ids 			= xenpci_ids,
++	.probe 			= pciback_xenbus_probe,
++	.remove 		= pciback_xenbus_remove,
++	.otherend_changed 	= pciback_frontend_changed,
++};
++
++int __init pciback_xenbus_register(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	return xenbus_register_backend(&xenbus_pciback_driver);
++}
++
++void __exit pciback_xenbus_unregister(void)
++{
++	xenbus_unregister_driver(&xenbus_pciback_driver);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pcifront/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,7 @@
++obj-y += pcifront.o
++
++pcifront-y := pci_op.o xenbus.o pci.o
++
++ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y)
++EXTRA_CFLAGS += -DDEBUG
++endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pcifront/pci.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,46 @@
++/*
++ * PCI Frontend Operations - ensure only one PCI frontend runs at a time
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pcifront.h"
++
++DEFINE_SPINLOCK(pcifront_dev_lock);
++static struct pcifront_device *pcifront_dev = NULL;
++
++int pcifront_connect(struct pcifront_device *pdev)
++{
++	int err = 0;
++
++	spin_lock(&pcifront_dev_lock);
++
++	if (!pcifront_dev) {
++		dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
++		pcifront_dev = pdev;
++	}
++	else {
++		dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
++		err = -EEXIST;
++	}
++
++	spin_unlock(&pcifront_dev_lock);
++
++	return err;
++}
++
++void pcifront_disconnect(struct pcifront_device *pdev)
++{
++	spin_lock(&pcifront_dev_lock);
++
++	if (pdev == pcifront_dev) {
++		dev_info(&pdev->xdev->dev,
++			 "Disconnecting PCI Frontend Buses\n");
++		pcifront_dev = NULL;
++	}
++
++	spin_unlock(&pcifront_dev_lock);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pcifront/pci_op.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,268 @@
++/*
++ * PCI Frontend Operations - Communicates with frontend
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/init.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/time.h>
++#include <xen/evtchn.h>
++#include "pcifront.h"
++
++static int verbose_request = 0;
++module_param(verbose_request, int, 0644);
++
++static int errno_to_pcibios_err(int errno)
++{
++	switch (errno) {
++	case XEN_PCI_ERR_success:
++		return PCIBIOS_SUCCESSFUL;
++
++	case XEN_PCI_ERR_dev_not_found:
++		return PCIBIOS_DEVICE_NOT_FOUND;
++
++	case XEN_PCI_ERR_invalid_offset:
++	case XEN_PCI_ERR_op_failed:
++		return PCIBIOS_BAD_REGISTER_NUMBER;
++
++	case XEN_PCI_ERR_not_implemented:
++		return PCIBIOS_FUNC_NOT_SUPPORTED;
++
++	case XEN_PCI_ERR_access_denied:
++		return PCIBIOS_SET_FAILED;
++	}
++	return errno;
++}
++
++static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
++{
++	int err = 0;
++	struct xen_pci_op *active_op = &pdev->sh_info->op;
++	unsigned long irq_flags;
++	evtchn_port_t port = pdev->evtchn;
++	s64 ns, ns_timeout;
++	struct timeval tv;
++
++	spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
++
++	memcpy(active_op, op, sizeof(struct xen_pci_op));
++
++	/* Go */
++	wmb();
++	set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
++	notify_remote_via_evtchn(port);
++
++	/*
++	 * We set a poll timeout of 3 seconds but give up on return after
++	 * 2 seconds. It is better to time out too late rather than too early
++	 * (in the latter case we end up continually re-executing poll() with a
++	 * timeout in the past). 1s difference gives plenty of slack for error.
++	 */
++	do_gettimeofday(&tv);
++	ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
++
++	clear_evtchn(port);
++
++	while (test_bit(_XEN_PCIF_active,
++			(unsigned long *)&pdev->sh_info->flags)) {
++		if (HYPERVISOR_poll(&port, 1, jiffies + 3*HZ))
++			BUG();
++		clear_evtchn(port);
++		do_gettimeofday(&tv);
++		ns = timeval_to_ns(&tv);
++		if (ns > ns_timeout) {
++			dev_err(&pdev->xdev->dev,
++				"pciback not responding!!!\n");
++			clear_bit(_XEN_PCIF_active,
++				  (unsigned long *)&pdev->sh_info->flags);
++			err = XEN_PCI_ERR_dev_not_found;
++			goto out;
++		}
++	}
++
++	memcpy(op, active_op, sizeof(struct xen_pci_op));
++
++	err = op->err;
++      out:
++	spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
++	return err;
++}
++
++/* Access to this function is spinlocked in drivers/pci/access.c */
++static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
++			     int where, int size, u32 * val)
++{
++	int err = 0;
++	struct xen_pci_op op = {
++		.cmd    = XEN_PCI_OP_conf_read,
++		.domain = pci_domain_nr(bus),
++		.bus    = bus->number,
++		.devfn  = devfn,
++		.offset = where,
++		.size   = size,
++	};
++	struct pcifront_sd *sd = bus->sysdata;
++	struct pcifront_device *pdev = pcifront_get_pdev(sd);
++
++	if (verbose_request)
++		dev_info(&pdev->xdev->dev,
++			 "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
++			 pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
++			 PCI_FUNC(devfn), where, size);
++
++	err = do_pci_op(pdev, &op);
++
++	if (likely(!err)) {
++		if (verbose_request)
++			dev_info(&pdev->xdev->dev, "read got back value %x\n",
++				 op.value);
++
++		*val = op.value;
++	} else if (err == -ENODEV) {
++		/* No device here, pretend that it just returned 0 */
++		err = 0;
++		*val = 0;
++	}
++
++	return errno_to_pcibios_err(err);
++}
++
++/* Access to this function is spinlocked in drivers/pci/access.c */
++static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
++			      int where, int size, u32 val)
++{
++	struct xen_pci_op op = {
++		.cmd    = XEN_PCI_OP_conf_write,
++		.domain = pci_domain_nr(bus),
++		.bus    = bus->number,
++		.devfn  = devfn,
++		.offset = where,
++		.size   = size,
++		.value  = val,
++	};
++	struct pcifront_sd *sd = bus->sysdata;
++	struct pcifront_device *pdev = pcifront_get_pdev(sd);
++
++	if (verbose_request)
++		dev_info(&pdev->xdev->dev,
++			 "write dev=%04x:%02x:%02x.%01x - "
++			 "offset %x size %d val %x\n",
++			 pci_domain_nr(bus), bus->number,
++			 PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
++
++	return errno_to_pcibios_err(do_pci_op(pdev, &op));
++}
++
++struct pci_ops pcifront_bus_ops = {
++	.read = pcifront_bus_read,
++	.write = pcifront_bus_write,
++};
++
++/* Claim resources for the PCI frontend as-is, backend won't allow changes */
++static void pcifront_claim_resource(struct pci_dev *dev, void *data)
++{
++	struct pcifront_device *pdev = data;
++	int i;
++	struct resource *r;
++
++	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
++		r = &dev->resource[i];
++
++		if (!r->parent && r->start && r->flags) {
++			dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n",
++				pci_name(dev), i);
++			pci_claim_resource(dev, i);
++		}
++	}
++}
++
++int pcifront_scan_root(struct pcifront_device *pdev,
++		       unsigned int domain, unsigned int bus)
++{
++	struct pci_bus *b;
++	struct pcifront_sd *sd = NULL;
++	struct pci_bus_entry *bus_entry = NULL;
++	int err = 0;
++
++#ifndef CONFIG_PCI_DOMAINS
++	if (domain != 0) {
++		dev_err(&pdev->xdev->dev,
++			"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
++		dev_err(&pdev->xdev->dev,
++			"Please compile with CONFIG_PCI_DOMAINS\n");
++		err = -EINVAL;
++		goto err_out;
++	}
++#endif
++
++	dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
++		 domain, bus);
++
++	bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
++	sd = kmalloc(sizeof(*sd), GFP_KERNEL);
++	if (!bus_entry || !sd) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++	pcifront_init_sd(sd, domain, pdev);
++
++	b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
++				  &pcifront_bus_ops, sd);
++	if (!b) {
++		dev_err(&pdev->xdev->dev,
++			"Error creating PCI Frontend Bus!\n");
++		err = -ENOMEM;
++		goto err_out;
++	}
++	bus_entry->bus = b;
++
++	list_add(&bus_entry->list, &pdev->root_buses);
++
++	/* Claim resources before going "live" with our devices */
++	pci_walk_bus(b, pcifront_claim_resource, pdev);
++
++	pci_bus_add_devices(b);
++
++	return 0;
++
++      err_out:
++	kfree(bus_entry);
++	kfree(sd);
++
++	return err;
++}
++
++static void free_root_bus_devs(struct pci_bus *bus)
++{
++	struct pci_dev *dev;
++
++	while (!list_empty(&bus->devices)) {
++		dev = container_of(bus->devices.next, struct pci_dev,
++				   bus_list);
++		dev_dbg(&dev->dev, "removing device\n");
++		pci_remove_bus_device(dev);
++	}
++}
++
++void pcifront_free_roots(struct pcifront_device *pdev)
++{
++	struct pci_bus_entry *bus_entry, *t;
++
++	dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
++
++	list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
++		list_del(&bus_entry->list);
++
++		free_root_bus_devs(bus_entry->bus);
++
++		kfree(bus_entry->bus->sysdata);
++
++		device_unregister(bus_entry->bus->bridge);
++		pci_remove_bus(bus_entry->bus);
++
++		kfree(bus_entry);
++	}
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pcifront/pcifront.h	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,40 @@
++/*
++ * PCI Frontend - Common data structures & function declarations
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++#ifndef __XEN_PCIFRONT_H__
++#define __XEN_PCIFRONT_H__
++
++#include <linux/spinlock.h>
++#include <linux/pci.h>
++#include <xen/xenbus.h>
++#include <xen/interface/io/pciif.h>
++#include <xen/pcifront.h>
++
++struct pci_bus_entry {
++	struct list_head list;
++	struct pci_bus *bus;
++};
++
++struct pcifront_device {
++	struct xenbus_device *xdev;
++	struct list_head root_buses;
++	spinlock_t dev_lock;
++
++	int evtchn;
++	int gnt_ref;
++
++	/* Lock this when doing any operations in sh_info */
++	spinlock_t sh_info_lock;
++	struct xen_pci_sharedinfo *sh_info;
++};
++
++int pcifront_connect(struct pcifront_device *pdev);
++void pcifront_disconnect(struct pcifront_device *pdev);
++
++int pcifront_scan_root(struct pcifront_device *pdev,
++		       unsigned int domain, unsigned int bus);
++void pcifront_free_roots(struct pcifront_device *pdev);
++
++#endif	/* __XEN_PCIFRONT_H__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/pcifront/xenbus.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,295 @@
++/*
++ * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn)
++ *
++ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/mm.h>
++#include <xen/xenbus.h>
++#include <xen/gnttab.h>
++#include "pcifront.h"
++
++#define INVALID_GRANT_REF (0)
++#define INVALID_EVTCHN    (-1)
++
++static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
++{
++	struct pcifront_device *pdev;
++
++	pdev = kmalloc(sizeof(struct pcifront_device), GFP_KERNEL);
++	if (pdev == NULL)
++		goto out;
++
++	pdev->sh_info =
++	    (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
++	if (pdev->sh_info == NULL) {
++		kfree(pdev);
++		pdev = NULL;
++		goto out;
++	}
++	pdev->sh_info->flags = 0;
++
++	xdev->dev.driver_data = pdev;
++	pdev->xdev = xdev;
++
++	INIT_LIST_HEAD(&pdev->root_buses);
++
++	spin_lock_init(&pdev->dev_lock);
++	spin_lock_init(&pdev->sh_info_lock);
++
++	pdev->evtchn = INVALID_EVTCHN;
++	pdev->gnt_ref = INVALID_GRANT_REF;
++
++	dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
++		pdev, pdev->sh_info);
++      out:
++	return pdev;
++}
++
++static void free_pdev(struct pcifront_device *pdev)
++{
++	dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
++
++	pcifront_free_roots(pdev);
++
++	if (pdev->evtchn != INVALID_EVTCHN)
++		xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
++
++	if (pdev->gnt_ref != INVALID_GRANT_REF)
++		gnttab_end_foreign_access(pdev->gnt_ref, 0,
++					  (unsigned long)pdev->sh_info);
++
++	pdev->xdev->dev.driver_data = NULL;
++
++	kfree(pdev);
++}
++
++static int pcifront_publish_info(struct pcifront_device *pdev)
++{
++	int err = 0;
++	struct xenbus_transaction trans;
++
++	err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
++	if (err < 0)
++		goto out;
++
++	pdev->gnt_ref = err;
++
++	err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
++	if (err)
++		goto out;
++
++      do_publish:
++	err = xenbus_transaction_start(&trans);
++	if (err) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error writing configuration for backend "
++				 "(start transaction)");
++		goto out;
++	}
++
++	err = xenbus_printf(trans, pdev->xdev->nodename,
++			    "pci-op-ref", "%u", pdev->gnt_ref);
++	if (!err)
++		err = xenbus_printf(trans, pdev->xdev->nodename,
++				    "event-channel", "%u", pdev->evtchn);
++	if (!err)
++		err = xenbus_printf(trans, pdev->xdev->nodename,
++				    "magic", XEN_PCI_MAGIC);
++
++	if (err) {
++		xenbus_transaction_end(trans, 1);
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error writing configuration for backend");
++		goto out;
++	} else {
++		err = xenbus_transaction_end(trans, 0);
++		if (err == -EAGAIN)
++			goto do_publish;
++		else if (err) {
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error completing transaction "
++					 "for backend");
++			goto out;
++		}
++	}
++
++	xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
++
++	dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
++
++      out:
++	return err;
++}
++
++static int pcifront_try_connect(struct pcifront_device *pdev)
++{
++	int err = -EFAULT;
++	int i, num_roots, len;
++	char str[64];
++	unsigned int domain, bus;
++
++	spin_lock(&pdev->dev_lock);
++
++	/* Only connect once */
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateInitialised)
++		goto out;
++
++	err = pcifront_connect(pdev);
++	if (err) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error connecting PCI Frontend");
++		goto out;
++	}
++
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
++			   "root_num", "%d", &num_roots);
++	if (err == -ENOENT) {
++		xenbus_dev_error(pdev->xdev, err,
++				 "No PCI Roots found, trying 0000:00");
++		err = pcifront_scan_root(pdev, 0, 0);
++		num_roots = 0;
++	} else if (err != 1) {
++		if (err == 0)
++			err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading number of PCI roots");
++		goto out;
++	}
++
++	for (i = 0; i < num_roots; i++) {
++		len = snprintf(str, sizeof(str), "root-%d", i);
++		if (unlikely(len >= (sizeof(str) - 1))) {
++			err = -ENOMEM;
++			goto out;
++		}
++
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
++				   "%x:%x", &domain, &bus);
++		if (err != 2) {
++			if (err >= 0)
++				err = -EINVAL;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error reading PCI root %d", i);
++			goto out;
++		}
++
++		err = pcifront_scan_root(pdev, domain, bus);
++		if (err) {
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error scanning PCI root %04x:%02x",
++					 domain, bus);
++			goto out;
++		}
++	}
++
++	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
++	if (err)
++		goto out;
++
++      out:
++	spin_unlock(&pdev->dev_lock);
++	return err;
++}
++
++static int pcifront_try_disconnect(struct pcifront_device *pdev)
++{
++	int err = 0;
++	enum xenbus_state prev_state;
++
++	spin_lock(&pdev->dev_lock);
++
++	prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
++
++	if (prev_state < XenbusStateClosing)
++		err = xenbus_switch_state(pdev->xdev, XenbusStateClosing);
++
++	if (!err && prev_state == XenbusStateConnected)
++		pcifront_disconnect(pdev);
++
++	spin_unlock(&pdev->dev_lock);
++
++	return err;
++}
++
++static void pcifront_backend_changed(struct xenbus_device *xdev,
++				     enum xenbus_state be_state)
++{
++	struct pcifront_device *pdev = xdev->dev.driver_data;
++
++	switch (be_state) {
++	case XenbusStateClosing:
++		dev_warn(&xdev->dev, "backend going away!\n");
++		pcifront_try_disconnect(pdev);
++		break;
++
++	case XenbusStateUnknown:
++	case XenbusStateClosed:
++		dev_warn(&xdev->dev, "backend went away!\n");
++		pcifront_try_disconnect(pdev);
++
++		device_unregister(&pdev->xdev->dev);
++		break;
++
++	case XenbusStateConnected:
++		pcifront_try_connect(pdev);
++		break;
++
++	default:
++		break;
++	}
++}
++
++static int pcifront_xenbus_probe(struct xenbus_device *xdev,
++				 const struct xenbus_device_id *id)
++{
++	int err = 0;
++	struct pcifront_device *pdev = alloc_pdev(xdev);
++
++	if (pdev == NULL) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(xdev, err,
++				 "Error allocating pcifront_device struct");
++		goto out;
++	}
++
++	err = pcifront_publish_info(pdev);
++
++      out:
++	return err;
++}
++
++static int pcifront_xenbus_remove(struct xenbus_device *xdev)
++{
++	if (xdev->dev.driver_data)
++		free_pdev(xdev->dev.driver_data);
++
++	return 0;
++}
++
++static struct xenbus_device_id xenpci_ids[] = {
++	{"pci"},
++	{{0}},
++};
++
++static struct xenbus_driver xenbus_pcifront_driver = {
++	.name 			= "pcifront",
++	.owner 			= THIS_MODULE,
++	.ids 			= xenpci_ids,
++	.probe 			= pcifront_xenbus_probe,
++	.remove 		= pcifront_xenbus_remove,
++	.otherend_changed 	= pcifront_backend_changed,
++};
++
++static int __init pcifront_init(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	return xenbus_register_frontend(&xenbus_pcifront_driver);
++}
++
++/* Initialize after the Xen PCI Frontend Stub is initialized */
++subsys_initcall(pcifront_init);
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/privcmd/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,2 @@
++
++obj-$(CONFIG_XEN_PRIVCMD)	:= privcmd.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/privcmd/privcmd.c	2007-08-27 14:02:05.000000000 -0400
+@@ -0,0 +1,284 @@
++/******************************************************************************
++ * privcmd.c
++ * 
++ * Interface to privileged domain-0 commands.
++ * 
++ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
++ */
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/swap.h>
++#include <linux/smp_lock.h>
++#include <linux/highmem.h>
++#include <linux/pagemap.h>
++#include <linux/seq_file.h>
++#include <linux/kthread.h>
++#include <asm/hypervisor.h>
++
++#include <asm/pgalloc.h>
++#include <asm/pgtable.h>
++#include <asm/uaccess.h>
++#include <asm/tlb.h>
++#include <asm/hypervisor.h>
++#include <xen/public/privcmd.h>
++#include <xen/interface/xen.h>
++#include <xen/xen_proc.h>
++
++static struct proc_dir_entry *privcmd_intf;
++static struct proc_dir_entry *capabilities_intf;
++
++#ifndef HAVE_ARCH_PRIVCMD_MMAP
++static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
++#endif
++
++static int privcmd_ioctl(struct inode *inode, struct file *file,
++			 unsigned int cmd, unsigned long data)
++{
++	int ret = -ENOSYS;
++	void __user *udata = (void __user *) data;
++
++	switch (cmd) {
++	case IOCTL_PRIVCMD_HYPERCALL: {
++		privcmd_hypercall_t hypercall;
++  
++		if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
++			return -EFAULT;
++
++#if defined(__i386__)
++		if (hypercall.op >= (PAGE_SIZE >> 5))
++			break;
++		__asm__ __volatile__ (
++			"pushl %%ebx; pushl %%ecx; pushl %%edx; "
++			"pushl %%esi; pushl %%edi; "
++			"movl  8(%%eax),%%ebx ;"
++			"movl 16(%%eax),%%ecx ;"
++			"movl 24(%%eax),%%edx ;"
++			"movl 32(%%eax),%%esi ;"
++			"movl 40(%%eax),%%edi ;"
++			"movl   (%%eax),%%eax ;"
++			"shll $5,%%eax ;"
++			"addl $hypercall_page,%%eax ;"
++			"call *%%eax ;"
++			"popl %%edi; popl %%esi; popl %%edx; "
++			"popl %%ecx; popl %%ebx"
++			: "=a" (ret) : "0" (&hypercall) : "memory" );
++#elif defined (__x86_64__)
++		if (hypercall.op < (PAGE_SIZE >> 5)) {
++			long ign1, ign2, ign3;
++			__asm__ __volatile__ (
++				"movq %8,%%r10; movq %9,%%r8;"
++				"shll $5,%%eax ;"
++				"addq $hypercall_page,%%rax ;"
++				"call *%%rax"
++				: "=a" (ret), "=D" (ign1),
++				  "=S" (ign2), "=d" (ign3)
++				: "0" ((unsigned int)hypercall.op),
++				"1" (hypercall.arg[0]),
++				"2" (hypercall.arg[1]),
++				"3" (hypercall.arg[2]),
++				"g" (hypercall.arg[3]),
++				"g" (hypercall.arg[4])
++				: "r8", "r10", "memory" );
++		}
++#elif defined (__ia64__)
++		ret = privcmd_hypercall(&hypercall);
++#endif
++	}
++	break;
++
++	case IOCTL_PRIVCMD_MMAP: {
++		privcmd_mmap_t mmapcmd;
++		privcmd_mmap_entry_t msg;
++		privcmd_mmap_entry_t __user *p;
++		struct mm_struct *mm = current->mm;
++		struct vm_area_struct *vma;
++		unsigned long va;
++		int i, rc;
++
++		if (!is_initial_xendomain())
++			return -EPERM;
++
++		if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
++			return -EFAULT;
++
++		p = mmapcmd.entry;
++		if (copy_from_user(&msg, p, sizeof(msg)))
++			return -EFAULT;
++
++		down_read(&mm->mmap_sem);
++
++		vma = find_vma(mm, msg.va);
++		rc = -EINVAL;
++		if (!vma || (msg.va != vma->vm_start) ||
++		    !privcmd_enforce_singleshot_mapping(vma))
++			goto mmap_out;
++
++		va = vma->vm_start;
++
++		for (i = 0; i < mmapcmd.num; i++) {
++			rc = -EFAULT;
++			if (copy_from_user(&msg, p, sizeof(msg)))
++				goto mmap_out;
++
++			/* Do not allow range to wrap the address space. */
++			rc = -EINVAL;
++			if ((msg.npages > (LONG_MAX >> PAGE_SHIFT)) ||
++			    ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
++				goto mmap_out;
++
++			/* Range chunks must be contiguous in va space. */
++			if ((msg.va != va) ||
++			    ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
++				goto mmap_out;
++
++			if ((rc = direct_remap_pfn_range(
++				vma,
++				msg.va & PAGE_MASK, 
++				msg.mfn, 
++				msg.npages << PAGE_SHIFT, 
++				vma->vm_page_prot,
++				mmapcmd.dom)) < 0)
++				goto mmap_out;
++
++			p++;
++			va += msg.npages << PAGE_SHIFT;
++		}
++
++		rc = 0;
++
++	mmap_out:
++		up_read(&mm->mmap_sem);
++		ret = rc;
++	}
++	break;
++
++	case IOCTL_PRIVCMD_MMAPBATCH: {
++		privcmd_mmapbatch_t m;
++		struct mm_struct *mm = current->mm;
++		struct vm_area_struct *vma;
++		xen_pfn_t __user *p;
++		unsigned long addr, mfn, nr_pages;
++		int i;
++
++		if (!is_initial_xendomain())
++			return -EPERM;
++
++		if (copy_from_user(&m, udata, sizeof(m)))
++			return -EFAULT;
++
++		nr_pages = m.num;
++		if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
++			return -EINVAL;
++
++		down_read(&mm->mmap_sem);
++
++		vma = find_vma(mm, m.addr);
++		if (!vma ||
++		    (m.addr != vma->vm_start) ||
++		    ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
++		    !privcmd_enforce_singleshot_mapping(vma)) {
++			up_read(&mm->mmap_sem);
++			return -EINVAL;
++		}
++
++		p = m.arr;
++		addr = m.addr;
++		for (i = 0; i < nr_pages; i++, addr += PAGE_SIZE, p++) {
++			if (get_user(mfn, p)) {
++				up_read(&mm->mmap_sem);
++				return -EFAULT;
++			}
++
++			ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
++						     mfn, PAGE_SIZE,
++						     vma->vm_page_prot, m.dom);
++			if (ret < 0)
++				put_user(0xF0000000 | mfn, p);
++		}
++
++		up_read(&mm->mmap_sem);
++		ret = 0;
++	}
++	break;
++
++	default:
++		ret = -EINVAL;
++		break;
++	}
++
++	return ret;
++}
++
++#ifndef HAVE_ARCH_PRIVCMD_MMAP
++static struct page *privcmd_nopage(struct vm_area_struct *vma,
++				   unsigned long address,
++				   int *type)
++{
++	return NOPAGE_SIGBUS;
++}
++
++static struct vm_operations_struct privcmd_vm_ops = {
++	.nopage = privcmd_nopage
++};
++
++static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
++{
++	/* Unsupported for auto-translate guests. */
++	if (xen_feature(XENFEAT_auto_translated_physmap))
++		return -ENOSYS;
++
++	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
++	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
++	vma->vm_ops = &privcmd_vm_ops;
++	vma->vm_private_data = NULL;
++
++	return 0;
++}
++
++static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
++{
++	return (xchg(&vma->vm_private_data, (void *)1) == NULL);
++}
++#endif
++
++static const struct file_operations privcmd_file_ops = {
++	.ioctl = privcmd_ioctl,
++	.mmap  = privcmd_mmap,
++};
++
++static int capabilities_read(char *page, char **start, off_t off,
++			     int count, int *eof, void *data)
++{
++	int len = 0;
++	*page = 0;
++
++	if (is_initial_xendomain())
++		len = sprintf( page, "control_d\n" );
++
++	*eof = 1;
++	return len;
++}
++
++static int __init privcmd_init(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	privcmd_intf = create_xen_proc_entry("privcmd", 0400);
++	if (privcmd_intf != NULL)
++		privcmd_intf->proc_fops = &privcmd_file_ops;
++
++	capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
++	if (capabilities_intf != NULL)
++		capabilities_intf->read_proc = capabilities_read;
++
++	return 0;
++}
++
++__initcall(privcmd_init);
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/tpmback/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,4 @@
++
++obj-$(CONFIG_XEN_TPMDEV_BACKEND)	+= tpmbk.o
++
++tpmbk-y += tpmback.o interface.o xenbus.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/tpmback/common.h	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,85 @@
++/******************************************************************************
++ * drivers/xen/tpmback/common.h
++ */
++
++#ifndef __TPM__BACKEND__COMMON_H__
++#define __TPM__BACKEND__COMMON_H__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <xen/evtchn.h>
++#include <xen/driver_util.h>
++#include <xen/interface/grant_table.h>
++#include <xen/interface/io/tpmif.h>
++#include <asm/io.h>
++#include <asm/pgalloc.h>
++
++#define DPRINTK(_f, _a...)			\
++	pr_debug("(file=%s, line=%d) " _f,	\
++		 __FILE__ , __LINE__ , ## _a )
++
++struct backend_info;
++
++typedef struct tpmif_st {
++	struct list_head tpmif_list;
++	/* Unique identifier for this interface. */
++	domid_t domid;
++	unsigned int handle;
++
++	/* Physical parameters of the comms window. */
++	unsigned int irq;
++
++	/* The shared rings and indexes. */
++	tpmif_tx_interface_t *tx;
++	struct vm_struct *tx_area;
++
++	/* Miscellaneous private stuff. */
++	enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
++	int active;
++
++	struct tpmif_st *hash_next;
++	struct list_head list;	/* scheduling list */
++	atomic_t refcnt;
++
++	struct backend_info *bi;
++
++	grant_handle_t shmem_handle;
++	grant_ref_t shmem_ref;
++	struct page **mmap_pages;
++
++	char devname[20];
++} tpmif_t;
++
++void tpmif_disconnect_complete(tpmif_t * tpmif);
++tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi);
++void tpmif_interface_init(void);
++void tpmif_interface_exit(void);
++void tpmif_schedule_work(tpmif_t * tpmif);
++void tpmif_deschedule_work(tpmif_t * tpmif);
++void tpmif_xenbus_init(void);
++void tpmif_xenbus_exit(void);
++int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
++irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++
++long int tpmback_get_instance(struct backend_info *bi);
++
++int vtpm_release_packets(tpmif_t * tpmif, int send_msgs);
++
++
++#define tpmif_get(_b) (atomic_inc(&(_b)->refcnt))
++#define tpmif_put(_b)					\
++	do {						\
++		if (atomic_dec_and_test(&(_b)->refcnt))	\
++			tpmif_disconnect_complete(_b);	\
++	} while (0)
++
++extern int num_frontends;
++
++static inline unsigned long idx_to_kaddr(tpmif_t *t, unsigned int idx)
++{
++	return (unsigned long)pfn_to_kaddr(page_to_pfn(t->mmap_pages[idx]));
++}
++
++#endif /* __TPMIF__BACKEND__COMMON_H__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/tpmback/interface.c	2007-08-27 14:02:01.000000000 -0400
+@@ -0,0 +1,167 @@
++ /*****************************************************************************
++ * drivers/xen/tpmback/interface.c
++ *
++ * Vritual TPM interface management.
++ *
++ * Copyright (c) 2005, IBM Corporation
++ *
++ * Author: Stefan Berger, stefanb@us.ibm.com
++ *
++ * This code has been derived from drivers/xen/netback/interface.c
++ * Copyright (c) 2004, Keir Fraser
++ */
++
++#include "common.h"
++#include <xen/balloon.h>
++#include <xen/gnttab.h>
++
++static kmem_cache_t *tpmif_cachep;
++int num_frontends = 0;
++
++LIST_HEAD(tpmif_list);
++
++static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi)
++{
++	tpmif_t *tpmif;
++
++	tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
++	if (tpmif == NULL)
++		goto out_of_memory;
++
++	memset(tpmif, 0, sizeof (*tpmif));
++	tpmif->domid = domid;
++	tpmif->status = DISCONNECTED;
++	tpmif->bi = bi;
++	snprintf(tpmif->devname, sizeof(tpmif->devname), "tpmif%d", domid);
++	atomic_set(&tpmif->refcnt, 1);
++
++	tpmif->mmap_pages = alloc_empty_pages_and_pagevec(TPMIF_TX_RING_SIZE);
++	if (tpmif->mmap_pages == NULL)
++		goto out_of_memory;
++
++	list_add(&tpmif->tpmif_list, &tpmif_list);
++	num_frontends++;
++
++	return tpmif;
++
++ out_of_memory:
++	if (tpmif != NULL)
++		kmem_cache_free(tpmif_cachep, tpmif);
++	printk("%s: out of memory\n", __FUNCTION__);
++	return ERR_PTR(-ENOMEM);
++}
++
++static void free_tpmif(tpmif_t * tpmif)
++{
++	num_frontends--;
++	list_del(&tpmif->tpmif_list);
++	free_empty_pages_and_pagevec(tpmif->mmap_pages, TPMIF_TX_RING_SIZE);
++	kmem_cache_free(tpmif_cachep, tpmif);
++}
++
++tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi)
++{
++	tpmif_t *tpmif;
++
++	list_for_each_entry(tpmif, &tpmif_list, tpmif_list) {
++		if (tpmif->bi == bi) {
++			if (tpmif->domid == domid) {
++				tpmif_get(tpmif);
++				return tpmif;
++			} else {
++				return ERR_PTR(-EEXIST);
++			}
++		}
++	}
++
++	return alloc_tpmif(domid, bi);
++}
++
++static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page)
++{
++	struct gnttab_map_grant_ref op;
++
++	gnttab_set_map_op(&op, (unsigned long)tpmif->tx_area->addr,
++			  GNTMAP_host_map, shared_page, tpmif->domid);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) {
++		DPRINTK(" Grant table operation failure !\n");
++		return op.status;
++	}
++
++	tpmif->shmem_ref = shared_page;
++	tpmif->shmem_handle = op.handle;
++
++	return 0;
++}
++
++static void unmap_frontend_page(tpmif_t *tpmif)
++{
++	struct gnttab_unmap_grant_ref op;
++
++	gnttab_set_unmap_op(&op, (unsigned long)tpmif->tx_area->addr,
++			    GNTMAP_host_map, tpmif->shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++}
++
++int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn)
++{
++	int err;
++
++	if (tpmif->irq)
++		return 0;
++
++	if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL)
++		return -ENOMEM;
++
++	err = map_frontend_page(tpmif, shared_page);
++	if (err) {
++		free_vm_area(tpmif->tx_area);
++		return err;
++	}
++
++	tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr;
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		tpmif->domid, evtchn, tpmif_be_int, 0, tpmif->devname, tpmif);
++	if (err < 0) {
++		unmap_frontend_page(tpmif);
++		free_vm_area(tpmif->tx_area);
++		return err;
++	}
++	tpmif->irq = err;
++
++	tpmif->shmem_ref = shared_page;
++	tpmif->active = 1;
++
++	return 0;
++}
++
++void tpmif_disconnect_complete(tpmif_t *tpmif)
++{
++	if (tpmif->irq)
++		unbind_from_irqhandler(tpmif->irq, tpmif);
++
++	if (tpmif->tx) {
++		unmap_frontend_page(tpmif);
++		free_vm_area(tpmif->tx_area);
++	}
++
++	free_tpmif(tpmif);
++}
++
++void __init tpmif_interface_init(void)
++{
++	tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
++					 0, 0, NULL, NULL);
++}
++
++void __exit tpmif_interface_exit(void)
++{
++	kmem_cache_destroy(tpmif_cachep);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/tpmback/tpmback.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,944 @@
++/******************************************************************************
++ * drivers/xen/tpmback/tpmback.c
++ *
++ * Copyright (c) 2005, IBM Corporation
++ *
++ * Author: Stefan Berger, stefanb@us.ibm.com
++ * Grant table support: Mahadevan Gomathisankaran
++ *
++ * This code has been derived from drivers/xen/netback/netback.c
++ * Copyright (c) 2002-2004, K A Fraser
++ *
++ */
++
++#include "common.h"
++#include <xen/evtchn.h>
++
++#include <linux/types.h>
++#include <linux/list.h>
++#include <linux/miscdevice.h>
++#include <linux/poll.h>
++#include <asm/uaccess.h>
++#include <xen/xenbus.h>
++#include <xen/interface/grant_table.h>
++#include <xen/gnttab.h>
++
++/* local data structures */
++struct data_exchange {
++	struct list_head pending_pak;
++	struct list_head current_pak;
++	unsigned int copied_so_far;
++	u8 has_opener:1;
++	u8 aborted:1;
++	rwlock_t pak_lock;	// protects all of the previous fields
++	wait_queue_head_t wait_queue;
++};
++
++struct vtpm_resp_hdr {
++	uint32_t instance_no;
++	uint16_t tag_no;
++	uint32_t len_no;
++	uint32_t ordinal_no;
++} __attribute__ ((packed));
++
++struct packet {
++	struct list_head next;
++	unsigned int data_len;
++	u8 *data_buffer;
++	tpmif_t *tpmif;
++	u32 tpm_instance;
++	u8 req_tag;
++	u32 last_read;
++	u8 flags;
++	struct timer_list processing_timer;
++};
++
++enum {
++	PACKET_FLAG_DISCARD_RESPONSE = 1,
++};
++
++/* local variables */
++static struct data_exchange dataex;
++
++/* local function prototypes */
++static int _packet_write(struct packet *pak,
++			 const char *data, size_t size, int userbuffer);
++static void processing_timeout(unsigned long ptr);
++static int packet_read_shmem(struct packet *pak,
++			     tpmif_t * tpmif,
++			     u32 offset,
++			     char *buffer, int isuserbuffer, u32 left);
++static int vtpm_queue_packet(struct packet *pak);
++
++/***************************************************************
++ Buffer copying fo user and kernel space buffes.
++***************************************************************/
++static inline int copy_from_buffer(void *to,
++				   const void *from, unsigned long size,
++				   int isuserbuffer)
++{
++	if (isuserbuffer) {
++		if (copy_from_user(to, (void __user *)from, size))
++			return -EFAULT;
++	} else {
++		memcpy(to, from, size);
++	}
++	return 0;
++}
++
++static inline int copy_to_buffer(void *to,
++				 const void *from, unsigned long size,
++				 int isuserbuffer)
++{
++	if (isuserbuffer) {
++		if (copy_to_user((void __user *)to, from, size))
++			return -EFAULT;
++	} else {
++		memcpy(to, from, size);
++	}
++	return 0;
++}
++
++
++static void dataex_init(struct data_exchange *dataex)
++{
++	INIT_LIST_HEAD(&dataex->pending_pak);
++	INIT_LIST_HEAD(&dataex->current_pak);
++	dataex->has_opener = 0;
++	rwlock_init(&dataex->pak_lock);
++	init_waitqueue_head(&dataex->wait_queue);
++}
++
++/***************************************************************
++ Packet-related functions
++***************************************************************/
++
++static struct packet *packet_find_instance(struct list_head *head,
++					   u32 tpm_instance)
++{
++	struct packet *pak;
++	struct list_head *p;
++
++	/*
++	 * traverse the list of packets and return the first
++	 * one with the given instance number
++	 */
++	list_for_each(p, head) {
++		pak = list_entry(p, struct packet, next);
++
++		if (pak->tpm_instance == tpm_instance) {
++			return pak;
++		}
++	}
++	return NULL;
++}
++
++static struct packet *packet_find_packet(struct list_head *head, void *packet)
++{
++	struct packet *pak;
++	struct list_head *p;
++
++	/*
++	 * traverse the list of packets and return the first
++	 * one with the given instance number
++	 */
++	list_for_each(p, head) {
++		pak = list_entry(p, struct packet, next);
++
++		if (pak == packet) {
++			return pak;
++		}
++	}
++	return NULL;
++}
++
++static struct packet *packet_alloc(tpmif_t * tpmif,
++				   u32 size, u8 req_tag, u8 flags)
++{
++	struct packet *pak = NULL;
++	pak = kzalloc(sizeof (struct packet), GFP_ATOMIC);
++	if (NULL != pak) {
++		if (tpmif) {
++			pak->tpmif = tpmif;
++			pak->tpm_instance = tpmback_get_instance(tpmif->bi);
++			tpmif_get(tpmif);
++		}
++		pak->data_len = size;
++		pak->req_tag = req_tag;
++		pak->last_read = 0;
++		pak->flags = flags;
++
++		/*
++		 * cannot do tpmif_get(tpmif); bad things happen
++		 * on the last tpmif_put()
++		 */
++		init_timer(&pak->processing_timer);
++		pak->processing_timer.function = processing_timeout;
++		pak->processing_timer.data = (unsigned long)pak;
++	}
++	return pak;
++}
++
++static void inline packet_reset(struct packet *pak)
++{
++	pak->last_read = 0;
++}
++
++static void packet_free(struct packet *pak)
++{
++	if (timer_pending(&pak->processing_timer)) {
++		BUG();
++	}
++
++	if (pak->tpmif)
++		tpmif_put(pak->tpmif);
++	kfree(pak->data_buffer);
++	/*
++	 * cannot do tpmif_put(pak->tpmif); bad things happen
++	 * on the last tpmif_put()
++	 */
++	kfree(pak);
++}
++
++
++/*
++ * Write data to the shared memory and send it to the FE.
++ */
++static int packet_write(struct packet *pak,
++			const char *data, size_t size, int isuserbuffer)
++{
++	int rc = 0;
++
++	if (0 != (pak->flags & PACKET_FLAG_DISCARD_RESPONSE)) {
++		/* Don't send a respone to this packet. Just acknowledge it. */
++		rc = size;
++	} else {
++		rc = _packet_write(pak, data, size, isuserbuffer);
++	}
++
++	return rc;
++}
++
++int _packet_write(struct packet *pak,
++		  const char *data, size_t size, int isuserbuffer)
++{
++	/*
++	 * Write into the shared memory pages directly
++	 * and send it to the front end.
++	 */
++	tpmif_t *tpmif = pak->tpmif;
++	grant_handle_t handle;
++	int rc = 0;
++	unsigned int i = 0;
++	unsigned int offset = 0;
++
++	if (tpmif == NULL) {
++		return -EFAULT;
++	}
++
++	if (tpmif->status == DISCONNECTED) {
++		return size;
++	}
++
++	while (offset < size && i < TPMIF_TX_RING_SIZE) {
++		unsigned int tocopy;
++		struct gnttab_map_grant_ref map_op;
++		struct gnttab_unmap_grant_ref unmap_op;
++		tpmif_tx_request_t *tx;
++
++		tx = &tpmif->tx->ring[i].req;
++
++		if (0 == tx->addr) {
++			DPRINTK("ERROR: Buffer for outgoing packet NULL?! i=%d\n", i);
++			return 0;
++		}
++
++		gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
++				  GNTMAP_host_map, tx->ref, tpmif->domid);
++
++		if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
++						       &map_op, 1))) {
++			BUG();
++		}
++
++		handle = map_op.handle;
++
++		if (map_op.status) {
++			DPRINTK(" Grant table operation failure !\n");
++			return 0;
++		}
++
++		tocopy = min_t(size_t, size - offset, PAGE_SIZE);
++
++		if (copy_from_buffer((void *)(idx_to_kaddr(tpmif, i) |
++					      (tx->addr & ~PAGE_MASK)),
++				     &data[offset], tocopy, isuserbuffer)) {
++			tpmif_put(tpmif);
++			return -EFAULT;
++		}
++		tx->size = tocopy;
++
++		gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
++				    GNTMAP_host_map, handle);
++
++		if (unlikely
++		    (HYPERVISOR_grant_table_op
++		     (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) {
++			BUG();
++		}
++
++		offset += tocopy;
++		i++;
++	}
++
++	rc = offset;
++	DPRINTK("Notifying frontend via irq %d\n", tpmif->irq);
++	notify_remote_via_irq(tpmif->irq);
++
++	return rc;
++}
++
++/*
++ * Read data from the shared memory and copy it directly into the
++ * provided buffer. Advance the read_last indicator which tells
++ * how many bytes have already been read.
++ */
++static int packet_read(struct packet *pak, size_t numbytes,
++		       char *buffer, size_t buffersize, int isuserbuffer)
++{
++	tpmif_t *tpmif = pak->tpmif;
++
++	/*
++	 * Read 'numbytes' of data from the buffer. The first 4
++	 * bytes are the instance number in network byte order,
++	 * after that come the data from the shared memory buffer.
++	 */
++	u32 to_copy;
++	u32 offset = 0;
++	u32 room_left = buffersize;
++
++	if (pak->last_read < 4) {
++		/*
++		 * copy the instance number into the buffer
++		 */
++		u32 instance_no = htonl(pak->tpm_instance);
++		u32 last_read = pak->last_read;
++
++		to_copy = min_t(size_t, 4 - last_read, numbytes);
++
++		if (copy_to_buffer(&buffer[0],
++				   &(((u8 *) & instance_no)[last_read]),
++				   to_copy, isuserbuffer)) {
++			return -EFAULT;
++		}
++
++		pak->last_read += to_copy;
++		offset += to_copy;
++		room_left -= to_copy;
++	}
++
++	/*
++	 * If the packet has a data buffer appended, read from it...
++	 */
++
++	if (room_left > 0) {
++		if (pak->data_buffer) {
++			u32 to_copy = min_t(u32, pak->data_len - offset, room_left);
++			u32 last_read = pak->last_read - 4;
++
++			if (copy_to_buffer(&buffer[offset],
++					   &pak->data_buffer[last_read],
++					   to_copy, isuserbuffer)) {
++				return -EFAULT;
++			}
++			pak->last_read += to_copy;
++			offset += to_copy;
++		} else {
++			offset = packet_read_shmem(pak,
++						   tpmif,
++						   offset,
++						   buffer,
++						   isuserbuffer, room_left);
++		}
++	}
++	return offset;
++}
++
++static int packet_read_shmem(struct packet *pak,
++			     tpmif_t * tpmif,
++			     u32 offset, char *buffer, int isuserbuffer,
++			     u32 room_left)
++{
++	u32 last_read = pak->last_read - 4;
++	u32 i = (last_read / PAGE_SIZE);
++	u32 pg_offset = last_read & (PAGE_SIZE - 1);
++	u32 to_copy;
++	grant_handle_t handle;
++
++	tpmif_tx_request_t *tx;
++
++	tx = &tpmif->tx->ring[0].req;
++	/*
++	 * Start copying data at the page with index 'index'
++	 * and within that page at offset 'offset'.
++	 * Copy a maximum of 'room_left' bytes.
++	 */
++	to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left);
++	while (to_copy > 0) {
++		void *src;
++		struct gnttab_map_grant_ref map_op;
++		struct gnttab_unmap_grant_ref unmap_op;
++
++		tx = &tpmif->tx->ring[i].req;
++
++		gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
++				  GNTMAP_host_map, tx->ref, tpmif->domid);
++
++		if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
++						       &map_op, 1))) {
++			BUG();
++		}
++
++		if (map_op.status) {
++			DPRINTK(" Grant table operation failure !\n");
++			return -EFAULT;
++		}
++
++		handle = map_op.handle;
++
++		if (to_copy > tx->size) {
++			/*
++			 * User requests more than what's available
++			 */
++			to_copy = min_t(u32, tx->size, to_copy);
++		}
++
++		DPRINTK("Copying from mapped memory at %08lx\n",
++			(unsigned long)(idx_to_kaddr(tpmif, i) |
++					(tx->addr & ~PAGE_MASK)));
++
++		src = (void *)(idx_to_kaddr(tpmif, i) |
++			       ((tx->addr & ~PAGE_MASK) + pg_offset));
++		if (copy_to_buffer(&buffer[offset],
++				   src, to_copy, isuserbuffer)) {
++			return -EFAULT;
++		}
++
++		DPRINTK("Data from TPM-FE of domain %d are %d %d %d %d\n",
++			tpmif->domid, buffer[offset], buffer[offset + 1],
++			buffer[offset + 2], buffer[offset + 3]);
++
++		gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
++				    GNTMAP_host_map, handle);
++
++		if (unlikely
++		    (HYPERVISOR_grant_table_op
++		     (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) {
++			BUG();
++		}
++
++		offset += to_copy;
++		pg_offset = 0;
++		last_read += to_copy;
++		room_left -= to_copy;
++
++		to_copy = min_t(u32, PAGE_SIZE, room_left);
++		i++;
++	}			/* while (to_copy > 0) */
++	/*
++	 * Adjust the last_read pointer
++	 */
++	pak->last_read = last_read + 4;
++	return offset;
++}
++
++/* ============================================================
++ * The file layer for reading data from this device
++ * ============================================================
++ */
++static int vtpm_op_open(struct inode *inode, struct file *f)
++{
++	int rc = 0;
++	unsigned long flags;
++
++	write_lock_irqsave(&dataex.pak_lock, flags);
++	if (dataex.has_opener == 0) {
++		dataex.has_opener = 1;
++	} else {
++		rc = -EPERM;
++	}
++	write_unlock_irqrestore(&dataex.pak_lock, flags);
++	return rc;
++}
++
++static ssize_t vtpm_op_read(struct file *file,
++			    char __user * data, size_t size, loff_t * offset)
++{
++	int ret_size = -ENODATA;
++	struct packet *pak = NULL;
++	unsigned long flags;
++
++	write_lock_irqsave(&dataex.pak_lock, flags);
++	if (dataex.aborted) {
++		dataex.aborted = 0;
++		dataex.copied_so_far = 0;
++		write_unlock_irqrestore(&dataex.pak_lock, flags);
++		return -EIO;
++	}
++
++	if (list_empty(&dataex.pending_pak)) {
++		write_unlock_irqrestore(&dataex.pak_lock, flags);
++		wait_event_interruptible(dataex.wait_queue,
++					 !list_empty(&dataex.pending_pak));
++		write_lock_irqsave(&dataex.pak_lock, flags);
++		dataex.copied_so_far = 0;
++	}
++
++	if (!list_empty(&dataex.pending_pak)) {
++		unsigned int left;
++
++		pak = list_entry(dataex.pending_pak.next, struct packet, next);
++		left = pak->data_len - dataex.copied_so_far;
++		list_del(&pak->next);
++		write_unlock_irqrestore(&dataex.pak_lock, flags);
++
++		DPRINTK("size given by app: %d, available: %d\n", size, left);
++
++		ret_size = min_t(size_t, size, left);
++
++		ret_size = packet_read(pak, ret_size, data, size, 1);
++
++		write_lock_irqsave(&dataex.pak_lock, flags);
++
++		if (ret_size < 0) {
++			del_singleshot_timer_sync(&pak->processing_timer);
++			packet_free(pak);
++			dataex.copied_so_far = 0;
++		} else {
++			DPRINTK("Copied %d bytes to user buffer\n", ret_size);
++
++			dataex.copied_so_far += ret_size;
++			if (dataex.copied_so_far >= pak->data_len + 4) {
++				DPRINTK("All data from this packet given to app.\n");
++				/* All data given to app */
++
++				del_singleshot_timer_sync(&pak->
++							  processing_timer);
++				list_add_tail(&pak->next, &dataex.current_pak);
++				/*
++				 * The more fontends that are handled at the same time,
++				 * the more time we give the TPM to process the request.
++				 */
++				mod_timer(&pak->processing_timer,
++					  jiffies + (num_frontends * 60 * HZ));
++				dataex.copied_so_far = 0;
++			} else {
++				list_add(&pak->next, &dataex.pending_pak);
++			}
++		}
++	}
++	write_unlock_irqrestore(&dataex.pak_lock, flags);
++
++	DPRINTK("Returning result from read to app: %d\n", ret_size);
++
++	return ret_size;
++}
++
++/*
++ * Write operation - only works after a previous read operation!
++ */
++static ssize_t vtpm_op_write(struct file *file,
++			     const char __user * data, size_t size,
++			     loff_t * offset)
++{
++	struct packet *pak;
++	int rc = 0;
++	unsigned int off = 4;
++	unsigned long flags;
++	struct vtpm_resp_hdr vrh;
++
++	/*
++	 * Minimum required packet size is:
++	 * 4 bytes for instance number
++	 * 2 bytes for tag
++	 * 4 bytes for paramSize
++	 * 4 bytes for the ordinal
++	 * sum: 14 bytes
++	 */
++	if (size < sizeof (vrh))
++		return -EFAULT;
++
++	if (copy_from_user(&vrh, data, sizeof (vrh)))
++		return -EFAULT;
++
++	/* malformed packet? */
++	if ((off + ntohl(vrh.len_no)) != size)
++		return -EFAULT;
++
++	write_lock_irqsave(&dataex.pak_lock, flags);
++	pak = packet_find_instance(&dataex.current_pak,
++				   ntohl(vrh.instance_no));
++
++	if (pak == NULL) {
++		write_unlock_irqrestore(&dataex.pak_lock, flags);
++		DPRINTK(KERN_ALERT "No associated packet! (inst=%d)\n",
++		        ntohl(vrh.instance_no));
++		return -EFAULT;
++	}
++
++	del_singleshot_timer_sync(&pak->processing_timer);
++	list_del(&pak->next);
++
++	write_unlock_irqrestore(&dataex.pak_lock, flags);
++
++	/*
++	 * The first 'offset' bytes must be the instance number - skip them.
++	 */
++	size -= off;
++
++	rc = packet_write(pak, &data[off], size, 1);
++
++	if (rc > 0) {
++		/* I neglected the first 4 bytes */
++		rc += off;
++	}
++	packet_free(pak);
++	return rc;
++}
++
++static int vtpm_op_release(struct inode *inode, struct file *file)
++{
++	unsigned long flags;
++
++	vtpm_release_packets(NULL, 1);
++	write_lock_irqsave(&dataex.pak_lock, flags);
++	dataex.has_opener = 0;
++	write_unlock_irqrestore(&dataex.pak_lock, flags);
++	return 0;
++}
++
++static unsigned int vtpm_op_poll(struct file *file,
++				 struct poll_table_struct *pts)
++{
++	unsigned int flags = POLLOUT | POLLWRNORM;
++
++	poll_wait(file, &dataex.wait_queue, pts);
++	if (!list_empty(&dataex.pending_pak)) {
++		flags |= POLLIN | POLLRDNORM;
++	}
++	return flags;
++}
++
++static const struct file_operations vtpm_ops = {
++	.owner = THIS_MODULE,
++	.llseek = no_llseek,
++	.open = vtpm_op_open,
++	.read = vtpm_op_read,
++	.write = vtpm_op_write,
++	.release = vtpm_op_release,
++	.poll = vtpm_op_poll,
++};
++
++static struct miscdevice vtpms_miscdevice = {
++	.minor = 225,
++	.name = "vtpm",
++	.fops = &vtpm_ops,
++};
++
++/***************************************************************
++ Utility functions
++***************************************************************/
++
++static int tpm_send_fail_message(struct packet *pak, u8 req_tag)
++{
++	int rc;
++	static const unsigned char tpm_error_message_fail[] = {
++		0x00, 0x00,
++		0x00, 0x00, 0x00, 0x0a,
++		0x00, 0x00, 0x00, 0x09	/* TPM_FAIL */
++	};
++	unsigned char buffer[sizeof (tpm_error_message_fail)];
++
++	memcpy(buffer, tpm_error_message_fail,
++	       sizeof (tpm_error_message_fail));
++	/*
++	 * Insert the right response tag depending on the given tag
++	 * All response tags are '+3' to the request tag.
++	 */
++	buffer[1] = req_tag + 3;
++
++	/*
++	 * Write the data to shared memory and notify the front-end
++	 */
++	rc = packet_write(pak, buffer, sizeof (buffer), 0);
++
++	return rc;
++}
++
++static int _vtpm_release_packets(struct list_head *head,
++				 tpmif_t * tpmif, int send_msgs)
++{
++	int aborted = 0;
++	int c = 0;
++	struct packet *pak;
++	struct list_head *pos, *tmp;
++
++	list_for_each_safe(pos, tmp, head) {
++		pak = list_entry(pos, struct packet, next);
++		c += 1;
++
++		if (tpmif == NULL || pak->tpmif == tpmif) {
++			int can_send = 0;
++
++			del_singleshot_timer_sync(&pak->processing_timer);
++			list_del(&pak->next);
++
++			if (pak->tpmif && pak->tpmif->status == CONNECTED) {
++				can_send = 1;
++			}
++
++			if (send_msgs && can_send) {
++				tpm_send_fail_message(pak, pak->req_tag);
++			}
++			packet_free(pak);
++			if (c == 1)
++				aborted = 1;
++		}
++	}
++	return aborted;
++}
++
++int vtpm_release_packets(tpmif_t * tpmif, int send_msgs)
++{
++	unsigned long flags;
++
++	write_lock_irqsave(&dataex.pak_lock, flags);
++
++	dataex.aborted = _vtpm_release_packets(&dataex.pending_pak,
++					       tpmif,
++					       send_msgs);
++	_vtpm_release_packets(&dataex.current_pak, tpmif, send_msgs);
++
++	write_unlock_irqrestore(&dataex.pak_lock, flags);
++	return 0;
++}
++
++static int vtpm_queue_packet(struct packet *pak)
++{
++	int rc = 0;
++
++	if (dataex.has_opener) {
++		unsigned long flags;
++
++		write_lock_irqsave(&dataex.pak_lock, flags);
++		list_add_tail(&pak->next, &dataex.pending_pak);
++		/* give the TPM some time to pick up the request */
++		mod_timer(&pak->processing_timer, jiffies + (30 * HZ));
++		write_unlock_irqrestore(&dataex.pak_lock, flags);
++
++		wake_up_interruptible(&dataex.wait_queue);
++	} else {
++		rc = -EFAULT;
++	}
++	return rc;
++}
++
++static int vtpm_receive(tpmif_t * tpmif, u32 size)
++{
++	int rc = 0;
++	unsigned char buffer[10];
++	__be32 *native_size;
++	struct packet *pak = packet_alloc(tpmif, size, 0, 0);
++
++	if (!pak)
++		return -ENOMEM;
++	/*
++	 * Read 10 bytes from the received buffer to test its
++	 * content for validity.
++	 */
++	if (sizeof (buffer) != packet_read(pak,
++					   sizeof (buffer), buffer,
++					   sizeof (buffer), 0)) {
++		goto failexit;
++	}
++	/*
++	 * Reset the packet read pointer so we can read all its
++	 * contents again.
++	 */
++	packet_reset(pak);
++
++	native_size = (__force __be32 *) (&buffer[4 + 2]);
++	/*
++	 * Verify that the size of the packet is correct
++	 * as indicated and that there's actually someone reading packets.
++	 * The minimum size of the packet is '10' for tag, size indicator
++	 * and ordinal.
++	 */
++	if (size < 10 ||
++	    be32_to_cpu(*native_size) != size ||
++	    0 == dataex.has_opener || tpmif->status != CONNECTED) {
++		rc = -EINVAL;
++		goto failexit;
++	} else {
++		rc = vtpm_queue_packet(pak);
++		if (rc < 0)
++			goto failexit;
++	}
++	return 0;
++
++      failexit:
++	if (pak) {
++		tpm_send_fail_message(pak, buffer[4 + 1]);
++		packet_free(pak);
++	}
++	return rc;
++}
++
++/*
++ * Timeout function that gets invoked when a packet has not been processed
++ * during the timeout period.
++ * The packet must be on a list when this function is invoked. This
++ * also means that once its taken off a list, the timer must be
++ * destroyed as well.
++ */
++static void processing_timeout(unsigned long ptr)
++{
++	struct packet *pak = (struct packet *)ptr;
++	unsigned long flags;
++
++	write_lock_irqsave(&dataex.pak_lock, flags);
++	/*
++	 * The packet needs to be searched whether it
++	 * is still on the list.
++	 */
++	if (pak == packet_find_packet(&dataex.pending_pak, pak) ||
++	    pak == packet_find_packet(&dataex.current_pak, pak)) {
++		if ((pak->flags & PACKET_FLAG_DISCARD_RESPONSE) == 0) {
++			tpm_send_fail_message(pak, pak->req_tag);
++		}
++		/* discard future responses */
++		pak->flags |= PACKET_FLAG_DISCARD_RESPONSE;
++	}
++
++	write_unlock_irqrestore(&dataex.pak_lock, flags);
++}
++
++static void tpm_tx_action(unsigned long unused);
++static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0);
++
++static struct list_head tpm_schedule_list;
++static spinlock_t tpm_schedule_list_lock;
++
++static inline void maybe_schedule_tx_action(void)
++{
++	smp_mb();
++	tasklet_schedule(&tpm_tx_tasklet);
++}
++
++static inline int __on_tpm_schedule_list(tpmif_t * tpmif)
++{
++	return tpmif->list.next != NULL;
++}
++
++static void remove_from_tpm_schedule_list(tpmif_t * tpmif)
++{
++	spin_lock_irq(&tpm_schedule_list_lock);
++	if (likely(__on_tpm_schedule_list(tpmif))) {
++		list_del(&tpmif->list);
++		tpmif->list.next = NULL;
++		tpmif_put(tpmif);
++	}
++	spin_unlock_irq(&tpm_schedule_list_lock);
++}
++
++static void add_to_tpm_schedule_list_tail(tpmif_t * tpmif)
++{
++	if (__on_tpm_schedule_list(tpmif))
++		return;
++
++	spin_lock_irq(&tpm_schedule_list_lock);
++	if (!__on_tpm_schedule_list(tpmif) && tpmif->active) {
++		list_add_tail(&tpmif->list, &tpm_schedule_list);
++		tpmif_get(tpmif);
++	}
++	spin_unlock_irq(&tpm_schedule_list_lock);
++}
++
++void tpmif_schedule_work(tpmif_t * tpmif)
++{
++	add_to_tpm_schedule_list_tail(tpmif);
++	maybe_schedule_tx_action();
++}
++
++void tpmif_deschedule_work(tpmif_t * tpmif)
++{
++	remove_from_tpm_schedule_list(tpmif);
++}
++
++static void tpm_tx_action(unsigned long unused)
++{
++	struct list_head *ent;
++	tpmif_t *tpmif;
++	tpmif_tx_request_t *tx;
++
++	DPRINTK("%s: Getting data from front-end(s)!\n", __FUNCTION__);
++
++	while (!list_empty(&tpm_schedule_list)) {
++		/* Get a tpmif from the list with work to do. */
++		ent = tpm_schedule_list.next;
++		tpmif = list_entry(ent, tpmif_t, list);
++		tpmif_get(tpmif);
++		remove_from_tpm_schedule_list(tpmif);
++
++		tx = &tpmif->tx->ring[0].req;
++
++		/* pass it up */
++		vtpm_receive(tpmif, tx->size);
++
++		tpmif_put(tpmif);
++	}
++}
++
++irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++{
++	tpmif_t *tpmif = (tpmif_t *) dev_id;
++
++	add_to_tpm_schedule_list_tail(tpmif);
++	maybe_schedule_tx_action();
++	return IRQ_HANDLED;
++}
++
++static int __init tpmback_init(void)
++{
++	int rc;
++
++	if ((rc = misc_register(&vtpms_miscdevice)) != 0) {
++		printk(KERN_ALERT
++		       "Could not register misc device for TPM BE.\n");
++		return rc;
++	}
++
++	dataex_init(&dataex);
++
++	spin_lock_init(&tpm_schedule_list_lock);
++	INIT_LIST_HEAD(&tpm_schedule_list);
++
++	tpmif_interface_init();
++	tpmif_xenbus_init();
++
++	printk(KERN_ALERT "Successfully initialized TPM backend driver.\n");
++
++	return 0;
++}
++
++module_init(tpmback_init);
++
++void __exit tpmback_exit(void)
++{
++	vtpm_release_packets(NULL, 0);
++	tpmif_xenbus_exit();
++	tpmif_interface_exit();
++	misc_deregister(&vtpms_miscdevice);
++}
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/tpmback/xenbus.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,289 @@
++/*  Xenbus code for tpmif backend
++    Copyright (C) 2005 IBM Corporation
++    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
++
++    This program is free software; you can redistribute it and/or modify
++    it under the terms of the GNU General Public License as published by
++    the Free Software Foundation; either version 2 of the License, or
++    (at your option) any later version.
++
++    This program is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++    GNU General Public License for more details.
++
++    You should have received a copy of the GNU General Public License
++    along with this program; if not, write to the Free Software
++    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++*/
++#include <stdarg.h>
++#include <linux/module.h>
++#include <xen/xenbus.h>
++#include "common.h"
++
++struct backend_info
++{
++	struct xenbus_device *dev;
++
++	/* our communications channel */
++	tpmif_t *tpmif;
++
++	long int frontend_id;
++	long int instance; // instance of TPM
++	u8 is_instance_set;// whether instance number has been set
++
++	/* watch front end for changes */
++	struct xenbus_watch backend_watch;
++};
++
++static void maybe_connect(struct backend_info *be);
++static void connect(struct backend_info *be);
++static int connect_ring(struct backend_info *be);
++static void backend_changed(struct xenbus_watch *watch,
++			    const char **vec, unsigned int len);
++static void frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state);
++
++long int tpmback_get_instance(struct backend_info *bi)
++{
++	long int res = -1;
++	if (bi && bi->is_instance_set)
++		res = bi->instance;
++	return res;
++}
++
++static int tpmback_remove(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev->dev.driver_data;
++
++	if (!be) return 0;
++
++	if (be->backend_watch.node) {
++		unregister_xenbus_watch(&be->backend_watch);
++		kfree(be->backend_watch.node);
++		be->backend_watch.node = NULL;
++	}
++	if (be->tpmif) {
++		be->tpmif->bi = NULL;
++		vtpm_release_packets(be->tpmif, 0);
++		tpmif_put(be->tpmif);
++		be->tpmif = NULL;
++	}
++	kfree(be);
++	dev->dev.driver_data = NULL;
++	return 0;
++}
++
++static int tpmback_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id)
++{
++	int err;
++	struct backend_info *be = kzalloc(sizeof(struct backend_info),
++					  GFP_KERNEL);
++
++	if (!be) {
++		xenbus_dev_fatal(dev, -ENOMEM,
++				 "allocating backend structure");
++		return -ENOMEM;
++	}
++
++	be->is_instance_set = 0;
++	be->dev = dev;
++	dev->dev.driver_data = be;
++
++	err = xenbus_watch_path2(dev, dev->nodename,
++				 "instance", &be->backend_watch,
++				 backend_changed);
++	if (err) {
++		goto fail;
++	}
++
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err) {
++		goto fail;
++	}
++	return 0;
++fail:
++	tpmback_remove(dev);
++	return err;
++}
++
++
++static void backend_changed(struct xenbus_watch *watch,
++			    const char **vec, unsigned int len)
++{
++	int err;
++	long instance;
++	struct backend_info *be
++		= container_of(watch, struct backend_info, backend_watch);
++	struct xenbus_device *dev = be->dev;
++
++	err = xenbus_scanf(XBT_NIL, dev->nodename,
++			   "instance","%li", &instance);
++	if (XENBUS_EXIST_ERR(err)) {
++		return;
++	}
++
++	if (err != 1) {
++		xenbus_dev_fatal(dev, err, "reading instance");
++		return;
++	}
++
++	if (be->is_instance_set == 0) {
++		be->instance = instance;
++		be->is_instance_set = 1;
++	}
++}
++
++
++static void frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state)
++{
++	struct backend_info *be = dev->dev.driver_data;
++	int err;
++
++	switch (frontend_state) {
++	case XenbusStateInitialising:
++	case XenbusStateInitialised:
++		break;
++
++	case XenbusStateConnected:
++		err = connect_ring(be);
++		if (err) {
++			return;
++		}
++		maybe_connect(be);
++		break;
++
++	case XenbusStateClosing:
++		be->instance = -1;
++		xenbus_switch_state(dev, XenbusStateClosing);
++		break;
++
++	case XenbusStateUnknown: /* keep it here */
++	case XenbusStateClosed:
++		xenbus_switch_state(dev, XenbusStateClosed);
++		device_unregister(&be->dev->dev);
++		tpmback_remove(dev);
++		break;
++
++	default:
++		xenbus_dev_fatal(dev, -EINVAL,
++				 "saw state %d at frontend",
++				 frontend_state);
++		break;
++	}
++}
++
++
++
++static void maybe_connect(struct backend_info *be)
++{
++	if (be->tpmif == NULL || be->tpmif->status == CONNECTED)
++		return;
++
++	connect(be);
++}
++
++
++static void connect(struct backend_info *be)
++{
++	struct xenbus_transaction xbt;
++	int err;
++	struct xenbus_device *dev = be->dev;
++	unsigned long ready = 1;
++
++again:
++	err = xenbus_transaction_start(&xbt);
++	if (err) {
++		xenbus_dev_fatal(be->dev, err, "starting transaction");
++		return;
++	}
++
++	err = xenbus_printf(xbt, be->dev->nodename,
++			    "ready", "%lu", ready);
++	if (err) {
++		xenbus_dev_fatal(be->dev, err, "writing 'ready'");
++		goto abort;
++	}
++
++	err = xenbus_transaction_end(xbt, 0);
++	if (err == -EAGAIN)
++		goto again;
++	if (err)
++		xenbus_dev_fatal(be->dev, err, "end of transaction");
++
++	err = xenbus_switch_state(dev, XenbusStateConnected);
++	if (!err)
++		be->tpmif->status = CONNECTED;
++	return;
++abort:
++	xenbus_transaction_end(xbt, 1);
++}
++
++
++static int connect_ring(struct backend_info *be)
++{
++	struct xenbus_device *dev = be->dev;
++	unsigned long ring_ref;
++	unsigned int evtchn;
++	int err;
++
++	err = xenbus_gather(XBT_NIL, dev->otherend,
++			    "ring-ref", "%lu", &ring_ref,
++			    "event-channel", "%u", &evtchn, NULL);
++	if (err) {
++		xenbus_dev_error(dev, err,
++				 "reading %s/ring-ref and event-channel",
++				 dev->otherend);
++		return err;
++	}
++
++	if (!be->tpmif) {
++		be->tpmif = tpmif_find(dev->otherend_id, be);
++		if (IS_ERR(be->tpmif)) {
++			err = PTR_ERR(be->tpmif);
++			be->tpmif = NULL;
++			xenbus_dev_fatal(dev,err,"creating vtpm interface");
++			return err;
++		}
++	}
++
++	if (be->tpmif != NULL) {
++		err = tpmif_map(be->tpmif, ring_ref, evtchn);
++		if (err) {
++			xenbus_dev_error(dev, err,
++					 "mapping shared-frame %lu port %u",
++					 ring_ref, evtchn);
++			return err;
++		}
++	}
++	return 0;
++}
++
++
++static struct xenbus_device_id tpmback_ids[] = {
++	{ "vtpm" },
++	{ "" }
++};
++
++
++static struct xenbus_driver tpmback = {
++	.name = "vtpm",
++	.owner = THIS_MODULE,
++	.ids = tpmback_ids,
++	.probe = tpmback_probe,
++	.remove = tpmback_remove,
++	.otherend_changed = frontend_changed,
++};
++
++
++void tpmif_xenbus_init(void)
++{
++	xenbus_register_backend(&tpmback);
++}
++
++void tpmif_xenbus_exit(void)
++{
++	xenbus_unregister_driver(&tpmback);
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/util.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,70 @@
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <asm/uaccess.h>
++#include <xen/driver_util.h>
++
++struct class *get_xen_class(void)
++{
++	static struct class *xen_class;
++
++	if (xen_class)
++		return xen_class;
++
++	xen_class = class_create(THIS_MODULE, "xen");
++	if (IS_ERR(xen_class)) {
++		printk("Failed to create xen sysfs class.\n");
++		xen_class = NULL;
++	}
++
++	return xen_class;
++}
++EXPORT_SYMBOL_GPL(get_xen_class);
++
++/* Todo: merge ia64 ('auto-translate physmap') versions of these functions. */
++#ifndef __ia64__
++
++static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
++{
++	/* apply_to_page_range() does all the hard work. */
++	return 0;
++}
++
++struct vm_struct *alloc_vm_area(unsigned long size)
++{
++	struct vm_struct *area;
++
++	area = get_vm_area(size, VM_IOREMAP);
++	if (area == NULL)
++		return NULL;
++
++	/*
++	 * This ensures that page tables are constructed for this region
++	 * of kernel virtual address space and mapped into init_mm.
++	 */
++	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
++				area->size, f, NULL)) {
++		free_vm_area(area);
++		return NULL;
++	}
++
++	/* Map page directories into every address space. */
++#ifdef CONFIG_X86
++	vmalloc_sync_all();
++#endif
++
++	return area;
++}
++EXPORT_SYMBOL_GPL(alloc_vm_area);
++
++void free_vm_area(struct vm_struct *area)
++{
++	struct vm_struct *ret;
++	ret = remove_vm_area(area->addr);
++	BUG_ON(ret != area);
++	kfree(area);
++}
++EXPORT_SYMBOL_GPL(free_vm_area);
++
++#endif /* !__ia64__ */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/Makefile	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,9 @@
++obj-y += xenbus_client.o xenbus_comms.o xenbus_xs.o xenbus_probe.o
++obj-$(CONFIG_XEN_BACKEND) += xenbus_be.o
++
++xenbus_be-objs =
++xenbus_be-objs += xenbus_backend_client.o
++
++xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
++obj-y += $(xenbus-y) $(xenbus-m)
++obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_backend_client.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,147 @@
++/******************************************************************************
++ * Backend-client-facing interface for the Xenbus driver.  In other words, the
++ * interface between the Xenbus and the device-specific code in the backend
++ * driver.
++ *
++ * Copyright (C) 2005-2006 XenSource Ltd
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/err.h>
++#include <xen/gnttab.h>
++#include <xen/xenbus.h>
++#include <xen/driver_util.h>
++
++/* Based on Rusty Russell's skeleton driver's map_page */
++struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref)
++{
++	struct gnttab_map_grant_ref op;
++	struct vm_struct *area;
++
++	area = alloc_vm_area(PAGE_SIZE);
++	if (!area)
++		return ERR_PTR(-ENOMEM);
++
++	gnttab_set_map_op(&op, (unsigned long)area->addr, GNTMAP_host_map,
++			  gnt_ref, dev->otherend_id);
++	
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status != GNTST_okay) {
++		free_vm_area(area);
++		xenbus_dev_fatal(dev, op.status,
++				 "mapping in shared page %d from domain %d",
++				 gnt_ref, dev->otherend_id);
++		BUG_ON(!IS_ERR(ERR_PTR(op.status)));
++		return ERR_PTR(op.status);
++	}
++
++	/* Stuff the handle in an unused field */
++	area->phys_addr = (unsigned long)op.handle;
++
++	return area;
++}
++EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
++
++
++int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
++		   grant_handle_t *handle, void *vaddr)
++{
++	struct gnttab_map_grant_ref op;
++	
++	gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
++			  gnt_ref, dev->otherend_id);
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status != GNTST_okay) {
++		xenbus_dev_fatal(dev, op.status,
++				 "mapping in shared page %d from domain %d",
++				 gnt_ref, dev->otherend_id);
++	} else
++		*handle = op.handle;
++
++	return op.status;
++}
++EXPORT_SYMBOL_GPL(xenbus_map_ring);
++
++
++/* Based on Rusty Russell's skeleton driver's unmap_page */
++int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *area)
++{
++	struct gnttab_unmap_grant_ref op;
++
++	gnttab_set_unmap_op(&op, (unsigned long)area->addr, GNTMAP_host_map,
++			    (grant_handle_t)area->phys_addr);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status == GNTST_okay)
++		free_vm_area(area);
++	else
++		xenbus_dev_error(dev, op.status,
++				 "unmapping page at handle %d error %d",
++				 (int16_t)area->phys_addr, op.status);
++
++	return op.status;
++}
++EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
++
++
++int xenbus_unmap_ring(struct xenbus_device *dev,
++		     grant_handle_t handle, void *vaddr)
++{
++	struct gnttab_unmap_grant_ref op;
++
++	gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
++			    handle);
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status != GNTST_okay)
++		xenbus_dev_error(dev, op.status,
++				 "unmapping page at handle %d error %d",
++				 handle, op.status);
++
++	return op.status;
++}
++EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
++
++int xenbus_dev_is_online(struct xenbus_device *dev)
++{
++	int rc, val;
++
++	rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
++	if (rc != 1)
++		val = 0; /* no online node present */
++
++	return val;
++}
++EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
++
++MODULE_LICENSE("Dual BSD/GPL");
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_client.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,283 @@
++/******************************************************************************
++ * Client-facing interface for the Xenbus driver.  In other words, the
++ * interface between the Xenbus and the device-specific code, be it the
++ * frontend or the backend of that driver.
++ *
++ * Copyright (C) 2005 XenSource Ltd
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <xen/evtchn.h>
++#include <xen/gnttab.h>
++#include <xen/xenbus.h>
++#include <xen/driver_util.h>
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++#define DPRINTK(fmt, args...) \
++    pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
++
++const char *xenbus_strstate(enum xenbus_state state)
++{
++	static const char *const name[] = {
++		[ XenbusStateUnknown      ] = "Unknown",
++		[ XenbusStateInitialising ] = "Initialising",
++		[ XenbusStateInitWait     ] = "InitWait",
++		[ XenbusStateInitialised  ] = "Initialised",
++		[ XenbusStateConnected    ] = "Connected",
++		[ XenbusStateClosing      ] = "Closing",
++		[ XenbusStateClosed	  ] = "Closed",
++	};
++	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
++}
++EXPORT_SYMBOL_GPL(xenbus_strstate);
++
++int xenbus_watch_path(struct xenbus_device *dev, const char *path,
++		      struct xenbus_watch *watch,
++		      void (*callback)(struct xenbus_watch *,
++				       const char **, unsigned int))
++{
++	int err;
++
++	watch->node = path;
++	watch->callback = callback;
++
++	err = register_xenbus_watch(watch);
++
++	if (err) {
++		watch->node = NULL;
++		watch->callback = NULL;
++		xenbus_dev_fatal(dev, err, "adding watch on %s", path);
++	}
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(xenbus_watch_path);
++
++
++int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
++		       const char *path2, struct xenbus_watch *watch,
++		       void (*callback)(struct xenbus_watch *,
++					const char **, unsigned int))
++{
++	int err;
++	char *state = kasprintf(GFP_KERNEL, "%s/%s", path, path2);
++	if (!state) {
++		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
++		return -ENOMEM;
++	}
++	err = xenbus_watch_path(dev, state, watch, callback);
++
++	if (err)
++		kfree(state);
++	return err;
++}
++EXPORT_SYMBOL_GPL(xenbus_watch_path2);
++
++
++int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
++{
++	/* We check whether the state is currently set to the given value, and
++	   if not, then the state is set.  We don't want to unconditionally
++	   write the given state, because we don't want to fire watches
++	   unnecessarily.  Furthermore, if the node has gone, we don't write
++	   to it, as the device will be tearing down, and we don't want to
++	   resurrect that directory.
++
++	   Note that, because of this cached value of our state, this function
++	   will not work inside a Xenstore transaction (something it was
++	   trying to in the past) because dev->state would not get reset if
++	   the transaction was aborted.
++
++	 */
++
++	int current_state;
++	int err;
++
++	if (state == dev->state)
++		return 0;
++
++	err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
++			   &current_state);
++	if (err != 1)
++		return 0;
++
++	err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
++	if (err) {
++		if (state != XenbusStateClosing) /* Avoid looping */
++			xenbus_dev_fatal(dev, err, "writing new state");
++		return err;
++	}
++
++	dev->state = state;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xenbus_switch_state);
++
++int xenbus_frontend_closed(struct xenbus_device *dev)
++{
++	xenbus_switch_state(dev, XenbusStateClosed);
++	complete(&dev->down);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
++
++/**
++ * Return the path to the error node for the given device, or NULL on failure.
++ * If the value returned is non-NULL, then it is the caller's to kfree.
++ */
++static char *error_path(struct xenbus_device *dev)
++{
++	return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
++}
++
++
++void _dev_error(struct xenbus_device *dev, int err, const char *fmt,
++		va_list ap)
++{
++	int ret;
++	unsigned int len;
++	char *printf_buffer = NULL, *path_buffer = NULL;
++
++#define PRINTF_BUFFER_SIZE 4096
++	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
++	if (printf_buffer == NULL)
++		goto fail;
++
++	len = sprintf(printf_buffer, "%i ", -err);
++	ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
++
++	BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
++
++	dev_err(&dev->dev, "%s\n", printf_buffer);
++
++	path_buffer = error_path(dev);
++
++	if (path_buffer == NULL) {
++		printk("xenbus: failed to write error node for %s (%s)\n",
++		       dev->nodename, printf_buffer);
++		goto fail;
++	}
++
++	if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
++		printk("xenbus: failed to write error node for %s (%s)\n",
++		       dev->nodename, printf_buffer);
++		goto fail;
++	}
++
++fail:
++	if (printf_buffer)
++		kfree(printf_buffer);
++	if (path_buffer)
++		kfree(path_buffer);
++}
++
++
++void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
++		      ...)
++{
++	va_list ap;
++
++	va_start(ap, fmt);
++	_dev_error(dev, err, fmt, ap);
++	va_end(ap);
++}
++EXPORT_SYMBOL_GPL(xenbus_dev_error);
++
++
++void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
++		      ...)
++{
++	va_list ap;
++
++	va_start(ap, fmt);
++	_dev_error(dev, err, fmt, ap);
++	va_end(ap);
++
++	xenbus_switch_state(dev, XenbusStateClosing);
++}
++EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
++
++
++int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
++{
++	int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
++	if (err < 0)
++		xenbus_dev_fatal(dev, err, "granting access to ring page");
++	return err;
++}
++EXPORT_SYMBOL_GPL(xenbus_grant_ring);
++
++
++int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
++{
++	struct evtchn_alloc_unbound alloc_unbound;
++	int err;
++
++	alloc_unbound.dom        = DOMID_SELF;
++	alloc_unbound.remote_dom = dev->otherend_id;
++
++	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
++					  &alloc_unbound);
++	if (err)
++		xenbus_dev_fatal(dev, err, "allocating event channel");
++	else
++		*port = alloc_unbound.port;
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
++
++
++int xenbus_free_evtchn(struct xenbus_device *dev, int port)
++{
++	struct evtchn_close close;
++	int err;
++
++	close.port = port;
++
++	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
++	if (err)
++		xenbus_dev_error(dev, err, "freeing event channel %d", port);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
++
++
++enum xenbus_state xenbus_read_driver_state(const char *path)
++{
++	enum xenbus_state result;
++	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
++	if (err)
++		result = XenbusStateUnknown;
++
++	return result;
++}
++EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_comms.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,232 @@
++/******************************************************************************
++ * xenbus_comms.c
++ *
++ * Low level code to talks to Xen Store: ringbuffer and event channel.
++ *
++ * Copyright (C) 2005 Rusty Russell, IBM Corporation
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/wait.h>
++#include <linux/interrupt.h>
++#include <linux/sched.h>
++#include <linux/err.h>
++#include <linux/ptrace.h>
++#include <xen/evtchn.h>
++#include <xen/xenbus.h>
++
++#include <asm/hypervisor.h>
++
++#include "xenbus_comms.h"
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++static int xenbus_irq;
++
++extern void xenbus_probe(void *);
++extern int xenstored_ready;
++static DECLARE_WORK(probe_work, xenbus_probe, NULL);
++
++static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
++
++static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
++{
++	if (unlikely(xenstored_ready == 0)) {
++		xenstored_ready = 1;
++		schedule_work(&probe_work);
++	}
++
++	wake_up(&xb_waitq);
++	return IRQ_HANDLED;
++}
++
++static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
++{
++	return ((prod - cons) <= XENSTORE_RING_SIZE);
++}
++
++static void *get_output_chunk(XENSTORE_RING_IDX cons,
++			      XENSTORE_RING_IDX prod,
++			      char *buf, uint32_t *len)
++{
++	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
++	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
++		*len = XENSTORE_RING_SIZE - (prod - cons);
++	return buf + MASK_XENSTORE_IDX(prod);
++}
++
++static const void *get_input_chunk(XENSTORE_RING_IDX cons,
++				   XENSTORE_RING_IDX prod,
++				   const char *buf, uint32_t *len)
++{
++	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
++	if ((prod - cons) < *len)
++		*len = prod - cons;
++	return buf + MASK_XENSTORE_IDX(cons);
++}
++
++int xb_write(const void *data, unsigned len)
++{
++	struct xenstore_domain_interface *intf = xen_store_interface;
++	XENSTORE_RING_IDX cons, prod;
++	int rc;
++
++	while (len != 0) {
++		void *dst;
++		unsigned int avail;
++
++		rc = wait_event_interruptible(
++			xb_waitq,
++			(intf->req_prod - intf->req_cons) !=
++			XENSTORE_RING_SIZE);
++		if (rc < 0)
++			return rc;
++
++		/* Read indexes, then verify. */
++		cons = intf->req_cons;
++		prod = intf->req_prod;
++		if (!check_indexes(cons, prod)) {
++			intf->req_cons = intf->req_prod = 0;
++			return -EIO;
++		}
++
++		dst = get_output_chunk(cons, prod, intf->req, &avail);
++		if (avail == 0)
++			continue;
++		if (avail > len)
++			avail = len;
++
++		/* Must write data /after/ reading the consumer index. */
++		mb();
++
++		memcpy(dst, data, avail);
++		data += avail;
++		len -= avail;
++
++		/* Other side must not see new producer until data is there. */
++		wmb();
++		intf->req_prod += avail;
++
++		/* Implies mb(): other side will see the updated producer. */
++		notify_remote_via_evtchn(xen_store_evtchn);
++	}
++
++	return 0;
++}
++
++int xb_data_to_read(void)
++{
++	struct xenstore_domain_interface *intf = xen_store_interface;
++	return (intf->rsp_cons != intf->rsp_prod);
++}
++
++int xb_wait_for_data_to_read(void)
++{
++	return wait_event_interruptible(xb_waitq, xb_data_to_read());
++}
++
++int xb_read(void *data, unsigned len)
++{
++	struct xenstore_domain_interface *intf = xen_store_interface;
++	XENSTORE_RING_IDX cons, prod;
++	int rc;
++
++	while (len != 0) {
++		unsigned int avail;
++		const char *src;
++
++		rc = xb_wait_for_data_to_read();
++		if (rc < 0)
++			return rc;
++
++		/* Read indexes, then verify. */
++		cons = intf->rsp_cons;
++		prod = intf->rsp_prod;
++		if (!check_indexes(cons, prod)) {
++			intf->rsp_cons = intf->rsp_prod = 0;
++			return -EIO;
++		}
++
++		src = get_input_chunk(cons, prod, intf->rsp, &avail);
++		if (avail == 0)
++			continue;
++		if (avail > len)
++			avail = len;
++
++		/* Must read data /after/ reading the producer index. */
++		rmb();
++
++		memcpy(data, src, avail);
++		data += avail;
++		len -= avail;
++
++		/* Other side must not see free space until we've copied out */
++		mb();
++		intf->rsp_cons += avail;
++
++		pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
++
++		/* Implies mb(): other side will see the updated consumer. */
++		notify_remote_via_evtchn(xen_store_evtchn);
++	}
++
++	return 0;
++}
++
++/* Set up interrupt handler off store event channel. */
++int xb_init_comms(void)
++{
++	struct xenstore_domain_interface *intf = xen_store_interface;
++	int err;
++
++	if (intf->req_prod != intf->req_cons)
++		printk(KERN_ERR "XENBUS request ring is not quiescent "
++		       "(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
++
++	if (intf->rsp_prod != intf->rsp_cons) {
++		printk(KERN_WARNING "XENBUS response ring is not quiescent "
++		       "(%08x:%08x): fixing up\n",
++		       intf->rsp_cons, intf->rsp_prod);
++		intf->rsp_cons = intf->rsp_prod;
++	}
++
++	if (xenbus_irq)
++		unbind_from_irqhandler(xenbus_irq, &xb_waitq);
++
++	err = bind_caller_port_to_irqhandler(
++		xen_store_evtchn, wake_waiting,
++		0, "xenbus", &xb_waitq);
++	if (err <= 0) {
++		printk(KERN_ERR "XENBUS request irq failed %i\n", err);
++		return err;
++	}
++
++	xenbus_irq = err;
++
++	return 0;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_comms.h	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,46 @@
++/*
++ * Private include for xenbus communications.
++ * 
++ * Copyright (C) 2005 Rusty Russell, IBM Corporation
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef _XENBUS_COMMS_H
++#define _XENBUS_COMMS_H
++
++int xs_init(void);
++int xb_init_comms(void);
++
++/* Low level routines. */
++int xb_write(const void *data, unsigned len);
++int xb_read(void *data, unsigned len);
++int xb_data_to_read(void);
++int xb_wait_for_data_to_read(void);
++int xs_input_avail(void);
++extern struct xenstore_domain_interface *xen_store_interface;
++extern int xen_store_evtchn;
++
++#endif /* _XENBUS_COMMS_H */
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_dev.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,404 @@
++/*
++ * xenbus_dev.c
++ * 
++ * Driver giving user-space access to the kernel's xenbus connection
++ * to xenstore.
++ * 
++ * Copyright (c) 2005, Christian Limpach
++ * Copyright (c) 2005, Rusty Russell, IBM Corporation
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/uio.h>
++#include <linux/notifier.h>
++#include <linux/wait.h>
++#include <linux/fs.h>
++#include <linux/poll.h>
++#include <linux/mutex.h>
++
++#include "xenbus_comms.h"
++
++#include <asm/uaccess.h>
++#include <asm/hypervisor.h>
++#include <xen/xenbus.h>
++#include <xen/xen_proc.h>
++#include <asm/hypervisor.h>
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++struct xenbus_dev_transaction {
++	struct list_head list;
++	struct xenbus_transaction handle;
++};
++
++struct read_buffer {
++	struct list_head list;
++	unsigned int cons;
++	unsigned int len;
++	char msg[];
++};
++
++struct xenbus_dev_data {
++	/* In-progress transaction. */
++	struct list_head transactions;
++
++	/* Active watches. */
++	struct list_head watches;
++
++	/* Partial request. */
++	unsigned int len;
++	union {
++		struct xsd_sockmsg msg;
++		char buffer[PAGE_SIZE];
++	} u;
++
++	/* Response queue. */
++	struct list_head read_buffers;
++	wait_queue_head_t read_waitq;
++
++	struct mutex reply_mutex;
++};
++
++static struct proc_dir_entry *xenbus_dev_intf;
++
++static ssize_t xenbus_dev_read(struct file *filp,
++			       char __user *ubuf,
++			       size_t len, loff_t *ppos)
++{
++	struct xenbus_dev_data *u = filp->private_data;
++	struct read_buffer *rb;
++	int i, ret;
++
++	mutex_lock(&u->reply_mutex);
++	while (list_empty(&u->read_buffers)) {
++		mutex_unlock(&u->reply_mutex);
++		ret = wait_event_interruptible(u->read_waitq,
++					       !list_empty(&u->read_buffers));
++		if (ret)
++			return ret;
++		mutex_lock(&u->reply_mutex);
++	}
++
++	rb = list_entry(u->read_buffers.next, struct read_buffer, list);
++	for (i = 0; i < len;) {
++		put_user(rb->msg[rb->cons], ubuf + i);
++		i++;
++		rb->cons++;
++		if (rb->cons == rb->len) {
++			list_del(&rb->list);
++			kfree(rb);
++			if (list_empty(&u->read_buffers))
++				break;
++			rb = list_entry(u->read_buffers.next,
++					struct read_buffer, list);
++		}
++	}
++	mutex_unlock(&u->reply_mutex);
++
++	return i;
++}
++
++static void queue_reply(struct xenbus_dev_data *u,
++			char *data, unsigned int len)
++{
++	struct read_buffer *rb;
++
++	if (len == 0)
++		return;
++
++	rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
++	BUG_ON(rb == NULL);
++
++	rb->cons = 0;
++	rb->len = len;
++
++	memcpy(rb->msg, data, len);
++
++	list_add_tail(&rb->list, &u->read_buffers);
++
++	wake_up(&u->read_waitq);
++}
++
++struct watch_adapter
++{
++	struct list_head list;
++	struct xenbus_watch watch;
++	struct xenbus_dev_data *dev_data;
++	char *token;
++};
++
++static void free_watch_adapter (struct watch_adapter *watch)
++{
++	kfree(watch->watch.node);
++	kfree(watch->token);
++	kfree(watch);
++}
++
++static void watch_fired(struct xenbus_watch *watch,
++			const char **vec,
++			unsigned int len)
++{
++	struct watch_adapter *adap =
++            container_of(watch, struct watch_adapter, watch);
++	struct xsd_sockmsg hdr;
++	const char *path, *token;
++	int path_len, tok_len, body_len;
++
++	path = vec[XS_WATCH_PATH];
++	token = adap->token;
++
++	path_len = strlen(path) + 1;
++	tok_len = strlen(token) + 1;
++	body_len = path_len + tok_len;
++
++	hdr.type = XS_WATCH_EVENT;
++	hdr.len = body_len;
++
++	mutex_lock(&adap->dev_data->reply_mutex);
++	queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr));
++	queue_reply(adap->dev_data, (char *)path, path_len);
++	queue_reply(adap->dev_data, (char *)token, tok_len);
++	mutex_unlock(&adap->dev_data->reply_mutex);
++}
++
++static LIST_HEAD(watch_list);
++
++static ssize_t xenbus_dev_write(struct file *filp,
++				const char __user *ubuf,
++				size_t len, loff_t *ppos)
++{
++	struct xenbus_dev_data *u = filp->private_data;
++	struct xenbus_dev_transaction *trans = NULL;
++	uint32_t msg_type;
++	void *reply;
++	char *path, *token;
++	struct watch_adapter *watch, *tmp_watch;
++	int err, rc = len;
++
++	if ((len + u->len) > sizeof(u->u.buffer)) {
++		rc = -EINVAL;
++		goto out;
++	}
++
++	if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) {
++		rc = -EFAULT;
++		goto out;
++	}
++
++	u->len += len;
++	if ((u->len < sizeof(u->u.msg)) ||
++	    (u->len < (sizeof(u->u.msg) + u->u.msg.len)))
++		return rc;
++
++	msg_type = u->u.msg.type;
++
++	switch (msg_type) {
++	case XS_TRANSACTION_START:
++	case XS_TRANSACTION_END:
++	case XS_DIRECTORY:
++	case XS_READ:
++	case XS_GET_PERMS:
++	case XS_RELEASE:
++	case XS_GET_DOMAIN_PATH:
++	case XS_WRITE:
++	case XS_MKDIR:
++	case XS_RM:
++	case XS_SET_PERMS:
++		if (msg_type == XS_TRANSACTION_START) {
++			trans = kmalloc(sizeof(*trans), GFP_KERNEL);
++			if (!trans) {
++				rc = -ENOMEM;
++				goto out;
++			}
++		}
++
++		reply = xenbus_dev_request_and_reply(&u->u.msg);
++		if (IS_ERR(reply)) {
++			kfree(trans);
++			rc = PTR_ERR(reply);
++			goto out;
++		}
++
++		if (msg_type == XS_TRANSACTION_START) {
++			trans->handle.id = simple_strtoul(reply, NULL, 0);
++			list_add(&trans->list, &u->transactions);
++		} else if (msg_type == XS_TRANSACTION_END) {
++			list_for_each_entry(trans, &u->transactions, list)
++				if (trans->handle.id == u->u.msg.tx_id)
++					break;
++			BUG_ON(&trans->list == &u->transactions);
++			list_del(&trans->list);
++			kfree(trans);
++		}
++		mutex_lock(&u->reply_mutex);
++		queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
++		queue_reply(u, (char *)reply, u->u.msg.len);
++		mutex_unlock(&u->reply_mutex);
++		kfree(reply);
++		break;
++
++	case XS_WATCH:
++	case XS_UNWATCH: {
++		static const char *XS_RESP = "OK";
++		struct xsd_sockmsg hdr;
++
++		path = u->u.buffer + sizeof(u->u.msg);
++		token = memchr(path, 0, u->u.msg.len);
++		if (token == NULL) {
++			rc = -EILSEQ;
++			goto out;
++		}
++		token++;
++
++		if (msg_type == XS_WATCH) {
++			watch = kmalloc(sizeof(*watch), GFP_KERNEL);
++			watch->watch.node = kmalloc(strlen(path)+1,
++                                                    GFP_KERNEL);
++			strcpy((char *)watch->watch.node, path);
++			watch->watch.callback = watch_fired;
++			watch->token = kmalloc(strlen(token)+1, GFP_KERNEL);
++			strcpy(watch->token, token);
++			watch->dev_data = u;
++
++			err = register_xenbus_watch(&watch->watch);
++			if (err) {
++				free_watch_adapter(watch);
++				rc = err;
++				goto out;
++			}
++			
++			list_add(&watch->list, &u->watches);
++		} else {
++			list_for_each_entry_safe(watch, tmp_watch,
++                                                 &u->watches, list) {
++				if (!strcmp(watch->token, token) &&
++				    !strcmp(watch->watch.node, path))
++				{
++					unregister_xenbus_watch(&watch->watch);
++					list_del(&watch->list);
++					free_watch_adapter(watch);
++					break;
++				}
++			}
++		}
++
++		hdr.type = msg_type;
++		hdr.len = strlen(XS_RESP) + 1;
++		mutex_lock(&u->reply_mutex);
++		queue_reply(u, (char *)&hdr, sizeof(hdr));
++		queue_reply(u, (char *)XS_RESP, hdr.len);
++		mutex_unlock(&u->reply_mutex);
++		break;
++	}
++
++	default:
++		rc = -EINVAL;
++		break;
++	}
++
++ out:
++	u->len = 0;
++	return rc;
++}
++
++static int xenbus_dev_open(struct inode *inode, struct file *filp)
++{
++	struct xenbus_dev_data *u;
++
++	if (xen_store_evtchn == 0)
++		return -ENOENT;
++
++	nonseekable_open(inode, filp);
++
++	u = kzalloc(sizeof(*u), GFP_KERNEL);
++	if (u == NULL)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&u->transactions);
++	INIT_LIST_HEAD(&u->watches);
++	INIT_LIST_HEAD(&u->read_buffers);
++	init_waitqueue_head(&u->read_waitq);
++
++	mutex_init(&u->reply_mutex);
++
++	filp->private_data = u;
++
++	return 0;
++}
++
++static int xenbus_dev_release(struct inode *inode, struct file *filp)
++{
++	struct xenbus_dev_data *u = filp->private_data;
++	struct xenbus_dev_transaction *trans, *tmp;
++	struct watch_adapter *watch, *tmp_watch;
++
++	list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
++		xenbus_transaction_end(trans->handle, 1);
++		list_del(&trans->list);
++		kfree(trans);
++	}
++
++	list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
++		unregister_xenbus_watch(&watch->watch);
++		list_del(&watch->list);
++		free_watch_adapter(watch);
++	}
++
++	kfree(u);
++
++	return 0;
++}
++
++static unsigned int xenbus_dev_poll(struct file *file, poll_table *wait)
++{
++	struct xenbus_dev_data *u = file->private_data;
++
++	poll_wait(file, &u->read_waitq, wait);
++	if (!list_empty(&u->read_buffers))
++		return POLLIN | POLLRDNORM;
++	return 0;
++}
++
++static const struct file_operations xenbus_dev_file_ops = {
++	.read = xenbus_dev_read,
++	.write = xenbus_dev_write,
++	.open = xenbus_dev_open,
++	.release = xenbus_dev_release,
++	.poll = xenbus_dev_poll,
++};
++
++int xenbus_dev_init(void)
++{
++	xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
++	if (xenbus_dev_intf)
++		xenbus_dev_intf->proc_fops = &xenbus_dev_file_ops;
++
++	return 0;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_probe.c	2007-08-27 14:02:08.000000000 -0400
+@@ -0,0 +1,1086 @@
++/******************************************************************************
++ * Talks to Xen Store to figure out what devices we have.
++ *
++ * Copyright (C) 2005 Rusty Russell, IBM Corporation
++ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
++ * Copyright (C) 2005, 2006 XenSource Ltd
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#define DPRINTK(fmt, args...)				\
++	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
++		 __FUNCTION__, __LINE__, ##args)
++
++#include <linux/kernel.h>
++#include <linux/err.h>
++#include <linux/string.h>
++#include <linux/ctype.h>
++#include <linux/fcntl.h>
++#include <linux/mm.h>
++#include <linux/notifier.h>
++#include <linux/kthread.h>
++#include <linux/mutex.h>
++
++#include <asm/io.h>
++#include <asm/page.h>
++#include <asm/maddr.h>
++#include <asm/pgtable.h>
++#include <asm/hypervisor.h>
++#include <xen/xenbus.h>
++#include <xen/xen_proc.h>
++#include <xen/evtchn.h>
++#include <xen/features.h>
++#include <xen/hvm.h>
++
++#include "xenbus_comms.h"
++#include "xenbus_probe.h"
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++int xen_store_evtchn;
++struct xenstore_domain_interface *xen_store_interface;
++static unsigned long xen_store_mfn;
++
++extern struct mutex xenwatch_mutex;
++
++static ATOMIC_NOTIFIER_HEAD(xenstore_chain);
++
++static void wait_for_devices(struct xenbus_driver *xendrv);
++
++static int xenbus_probe_frontend(const char *type, const char *name);
++
++static void xenbus_dev_shutdown(struct device *_dev);
++
++/* If something in array of ids matches this device, return it. */
++static const struct xenbus_device_id *
++match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
++{
++	for (; *arr->devicetype != '\0'; arr++) {
++		if (!strcmp(arr->devicetype, dev->devicetype))
++			return arr;
++	}
++	return NULL;
++}
++
++int xenbus_match(struct device *_dev, struct device_driver *_drv)
++{
++	struct xenbus_driver *drv = to_xenbus_driver(_drv);
++
++	if (!drv->ids)
++		return 0;
++
++	return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
++}
++
++/* device/<type>/<id> => <type>-<id> */
++static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
++{
++	nodename = strchr(nodename, '/');
++	if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
++		printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
++		return -EINVAL;
++	}
++
++	strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
++	if (!strchr(bus_id, '/')) {
++		printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
++		return -EINVAL;
++	}
++	*strchr(bus_id, '/') = '-';
++	return 0;
++}
++
++
++static void free_otherend_details(struct xenbus_device *dev)
++{
++	kfree(dev->otherend);
++	dev->otherend = NULL;
++}
++
++
++static void free_otherend_watch(struct xenbus_device *dev)
++{
++	if (dev->otherend_watch.node) {
++		unregister_xenbus_watch(&dev->otherend_watch);
++		kfree(dev->otherend_watch.node);
++		dev->otherend_watch.node = NULL;
++	}
++}
++
++
++int read_otherend_details(struct xenbus_device *xendev,
++				 char *id_node, char *path_node)
++{
++	int err = xenbus_gather(XBT_NIL, xendev->nodename,
++				id_node, "%i", &xendev->otherend_id,
++				path_node, NULL, &xendev->otherend,
++				NULL);
++	if (err) {
++		xenbus_dev_fatal(xendev, err,
++				 "reading other end details from %s",
++				 xendev->nodename);
++		return err;
++	}
++	if (strlen(xendev->otherend) == 0 ||
++	    !xenbus_exists(XBT_NIL, xendev->otherend, "")) {
++		xenbus_dev_fatal(xendev, -ENOENT,
++				 "unable to read other end from %s.  "
++				 "missing or inaccessible.",
++				 xendev->nodename);
++		free_otherend_details(xendev);
++		return -ENOENT;
++	}
++
++	return 0;
++}
++
++
++static int read_backend_details(struct xenbus_device *xendev)
++{
++	return read_otherend_details(xendev, "backend-id", "backend");
++}
++
++
++/* Bus type for frontend drivers. */
++static struct xen_bus_type xenbus_frontend = {
++	.root = "device",
++	.levels = 2, 		/* device/type/<id> */
++	.get_bus_id = frontend_bus_id,
++	.probe = xenbus_probe_frontend,
++	.bus = {
++		.name     = "xen",
++		.match    = xenbus_match,
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
++		.probe    = xenbus_dev_probe,
++		.remove   = xenbus_dev_remove,
++		.shutdown = xenbus_dev_shutdown,
++#endif
++	},
++	.dev = {
++		.bus_id = "xen",
++	},
++};
++
++static void otherend_changed(struct xenbus_watch *watch,
++			     const char **vec, unsigned int len)
++{
++	struct xenbus_device *dev =
++		container_of(watch, struct xenbus_device, otherend_watch);
++	struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
++	enum xenbus_state state;
++
++	/* Protect us against watches firing on old details when the otherend
++	   details change, say immediately after a resume. */
++	if (!dev->otherend ||
++	    strncmp(dev->otherend, vec[XS_WATCH_PATH],
++		    strlen(dev->otherend))) {
++		DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]);
++		return;
++	}
++
++	state = xenbus_read_driver_state(dev->otherend);
++
++	DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
++		dev->otherend_watch.node, vec[XS_WATCH_PATH]);
++
++	/*
++	 * Ignore xenbus transitions during shutdown. This prevents us doing
++	 * work that can fail e.g., when the rootfs is gone.
++	 */
++	if (system_state > SYSTEM_RUNNING) {
++		struct xen_bus_type *bus = bus;
++		bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
++		/* If we're frontend, drive the state machine to Closed. */
++		/* This should cause the backend to release our resources. */
++		if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
++			xenbus_frontend_closed(dev);
++		return;
++	}
++
++	if (drv->otherend_changed)
++		drv->otherend_changed(dev, state);
++}
++
++
++static int talk_to_otherend(struct xenbus_device *dev)
++{
++	struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
++
++	free_otherend_watch(dev);
++	free_otherend_details(dev);
++
++	return drv->read_otherend_details(dev);
++}
++
++
++static int watch_otherend(struct xenbus_device *dev)
++{
++	return xenbus_watch_path2(dev, dev->otherend, "state",
++				  &dev->otherend_watch, otherend_changed);
++}
++
++
++int xenbus_dev_probe(struct device *_dev)
++{
++	struct xenbus_device *dev = to_xenbus_device(_dev);
++	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
++	const struct xenbus_device_id *id;
++	int err;
++
++	DPRINTK("%s", dev->nodename);
++
++	if (!drv->probe) {
++		err = -ENODEV;
++		goto fail;
++	}
++
++	id = match_device(drv->ids, dev);
++	if (!id) {
++		err = -ENODEV;
++		goto fail;
++	}
++
++	err = talk_to_otherend(dev);
++	if (err) {
++		printk(KERN_WARNING
++		       "xenbus_probe: talk_to_otherend on %s failed.\n",
++		       dev->nodename);
++		return err;
++	}
++
++	err = drv->probe(dev, id);
++	if (err)
++		goto fail;
++
++	err = watch_otherend(dev);
++	if (err) {
++		printk(KERN_WARNING
++		       "xenbus_probe: watch_otherend on %s failed.\n",
++		       dev->nodename);
++		return err;
++	}
++
++	return 0;
++fail:
++	xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
++	xenbus_switch_state(dev, XenbusStateClosed);
++	return -ENODEV;
++}
++
++int xenbus_dev_remove(struct device *_dev)
++{
++	struct xenbus_device *dev = to_xenbus_device(_dev);
++	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
++
++	DPRINTK("%s", dev->nodename);
++
++	free_otherend_watch(dev);
++	free_otherend_details(dev);
++
++	if (drv->remove)
++		drv->remove(dev);
++
++	xenbus_switch_state(dev, XenbusStateClosed);
++	return 0;
++}
++
++static void xenbus_dev_shutdown(struct device *_dev)
++{
++	struct xenbus_device *dev = to_xenbus_device(_dev);
++	unsigned long timeout = 5*HZ;
++
++	DPRINTK("%s", dev->nodename);
++
++	get_device(&dev->dev);
++	if (dev->state != XenbusStateConnected) {
++		printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
++		       dev->nodename, xenbus_strstate(dev->state));
++		goto out;
++	}
++	xenbus_switch_state(dev, XenbusStateClosing);
++	timeout = wait_for_completion_timeout(&dev->down, timeout);
++	if (!timeout)
++		printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename);
++ out:
++	put_device(&dev->dev);
++}
++
++int xenbus_register_driver_common(struct xenbus_driver *drv,
++				  struct xen_bus_type *bus)
++{
++	int ret;
++
++	if (bus->error)
++		return bus->error;
++
++	drv->driver.name = drv->name;
++	drv->driver.bus = &bus->bus;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
++	drv->driver.owner = drv->owner;
++#endif
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
++	drv->driver.probe = xenbus_dev_probe;
++	drv->driver.remove = xenbus_dev_remove;
++	drv->driver.shutdown = xenbus_dev_shutdown;
++#endif
++
++	mutex_lock(&xenwatch_mutex);
++	ret = driver_register(&drv->driver);
++	mutex_unlock(&xenwatch_mutex);
++	return ret;
++}
++
++int xenbus_register_frontend(struct xenbus_driver *drv)
++{
++	int ret;
++
++	drv->read_otherend_details = read_backend_details;
++
++	ret = xenbus_register_driver_common(drv, &xenbus_frontend);
++	if (ret)
++		return ret;
++
++	/* If this driver is loaded as a module wait for devices to attach. */
++	wait_for_devices(drv);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xenbus_register_frontend);
++
++void xenbus_unregister_driver(struct xenbus_driver *drv)
++{
++	driver_unregister(&drv->driver);
++}
++EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
++
++struct xb_find_info
++{
++	struct xenbus_device *dev;
++	const char *nodename;
++};
++
++static int cmp_dev(struct device *dev, void *data)
++{
++	struct xenbus_device *xendev = to_xenbus_device(dev);
++	struct xb_find_info *info = data;
++
++	if (!strcmp(xendev->nodename, info->nodename)) {
++		info->dev = xendev;
++		get_device(dev);
++		return 1;
++	}
++	return 0;
++}
++
++struct xenbus_device *xenbus_device_find(const char *nodename,
++					 struct bus_type *bus)
++{
++	struct xb_find_info info = { .dev = NULL, .nodename = nodename };
++
++	bus_for_each_dev(bus, NULL, &info, cmp_dev);
++	return info.dev;
++}
++
++static int cleanup_dev(struct device *dev, void *data)
++{
++	struct xenbus_device *xendev = to_xenbus_device(dev);
++	struct xb_find_info *info = data;
++	int len = strlen(info->nodename);
++
++	DPRINTK("%s", info->nodename);
++
++	/* Match the info->nodename path, or any subdirectory of that path. */
++	if (strncmp(xendev->nodename, info->nodename, len))
++		return 0;
++
++	/* If the node name is longer, ensure it really is a subdirectory. */
++	if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
++		return 0;
++
++	info->dev = xendev;
++	get_device(dev);
++	return 1;
++}
++
++static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
++{
++	struct xb_find_info info = { .nodename = path };
++
++	do {
++		info.dev = NULL;
++		bus_for_each_dev(bus, NULL, &info, cleanup_dev);
++		if (info.dev) {
++			device_unregister(&info.dev->dev);
++			put_device(&info.dev->dev);
++		}
++	} while (info.dev);
++}
++
++static void xenbus_dev_release(struct device *dev)
++{
++	if (dev)
++		kfree(to_xenbus_device(dev));
++}
++
++static ssize_t xendev_show_nodename(struct device *dev,
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
++				    struct device_attribute *attr,
++#endif
++				    char *buf)
++{
++	return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
++}
++DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
++
++static ssize_t xendev_show_devtype(struct device *dev,
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
++				   struct device_attribute *attr,
++#endif
++				   char *buf)
++{
++	return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
++}
++DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
++
++
++int xenbus_probe_node(struct xen_bus_type *bus,
++		      const char *type,
++		      const char *nodename)
++{
++	int err;
++	struct xenbus_device *xendev;
++	size_t stringlen;
++	char *tmpstring;
++
++	enum xenbus_state state = xenbus_read_driver_state(nodename);
++
++	if (bus->error)
++		return bus->error;
++
++	if (state != XenbusStateInitialising) {
++		/* Device is not new, so ignore it.  This can happen if a
++		   device is going away after switching to Closed.  */
++		return 0;
++	}
++
++	stringlen = strlen(nodename) + 1 + strlen(type) + 1;
++	xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
++	if (!xendev)
++		return -ENOMEM;
++
++	xendev->state = XenbusStateInitialising;
++
++	/* Copy the strings into the extra space. */
++
++	tmpstring = (char *)(xendev + 1);
++	strcpy(tmpstring, nodename);
++	xendev->nodename = tmpstring;
++
++	tmpstring += strlen(tmpstring) + 1;
++	strcpy(tmpstring, type);
++	xendev->devicetype = tmpstring;
++	init_completion(&xendev->down);
++
++	xendev->dev.parent = &bus->dev;
++	xendev->dev.bus = &bus->bus;
++	xendev->dev.release = xenbus_dev_release;
++
++	err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
++	if (err)
++		goto fail;
++
++	/* Register with generic device framework. */
++	err = device_register(&xendev->dev);
++	if (err)
++		goto fail;
++
++	err = device_create_file(&xendev->dev, &dev_attr_nodename);
++	if (err)
++		goto unregister;
++	err = device_create_file(&xendev->dev, &dev_attr_devtype);
++	if (err)
++		goto unregister;
++
++	return 0;
++unregister:
++	device_remove_file(&xendev->dev, &dev_attr_nodename);
++	device_remove_file(&xendev->dev, &dev_attr_devtype);
++	device_unregister(&xendev->dev);
++fail:
++	kfree(xendev);
++	return err;
++}
++
++/* device/<typename>/<name> */
++static int xenbus_probe_frontend(const char *type, const char *name)
++{
++	char *nodename;
++	int err;
++
++	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name);
++	if (!nodename)
++		return -ENOMEM;
++
++	DPRINTK("%s", nodename);
++
++	err = xenbus_probe_node(&xenbus_frontend, type, nodename);
++	kfree(nodename);
++	return err;
++}
++
++static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
++{
++	int err = 0;
++	char **dir;
++	unsigned int dir_n = 0;
++	int i;
++
++	dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n);
++	if (IS_ERR(dir))
++		return PTR_ERR(dir);
++
++	for (i = 0; i < dir_n; i++) {
++		err = bus->probe(type, dir[i]);
++		if (err)
++			break;
++	}
++	kfree(dir);
++	return err;
++}
++
++int xenbus_probe_devices(struct xen_bus_type *bus)
++{
++	int err = 0;
++	char **dir;
++	unsigned int i, dir_n;
++
++	if (bus->error)
++		return bus->error;
++
++	dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
++	if (IS_ERR(dir))
++		return PTR_ERR(dir);
++
++	for (i = 0; i < dir_n; i++) {
++		err = xenbus_probe_device_type(bus, dir[i]);
++		if (err)
++			break;
++	}
++	kfree(dir);
++	return err;
++}
++
++static unsigned int char_count(const char *str, char c)
++{
++	unsigned int i, ret = 0;
++
++	for (i = 0; str[i]; i++)
++		if (str[i] == c)
++			ret++;
++	return ret;
++}
++
++static int strsep_len(const char *str, char c, unsigned int len)
++{
++	unsigned int i;
++
++	for (i = 0; str[i]; i++)
++		if (str[i] == c) {
++			if (len == 0)
++				return i;
++			len--;
++		}
++	return (len == 0) ? i : -ERANGE;
++}
++
++void dev_changed(const char *node, struct xen_bus_type *bus)
++{
++	int exists, rootlen;
++	struct xenbus_device *dev;
++	char type[BUS_ID_SIZE];
++	const char *p, *root;
++
++	if (bus->error || char_count(node, '/') < 2)
++ 		return;
++
++	exists = xenbus_exists(XBT_NIL, node, "");
++	if (!exists) {
++		xenbus_cleanup_devices(node, &bus->bus);
++		return;
++	}
++
++	/* backend/<type>/... or device/<type>/... */
++	p = strchr(node, '/') + 1;
++	snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p);
++	type[BUS_ID_SIZE-1] = '\0';
++
++	rootlen = strsep_len(node, '/', bus->levels);
++	if (rootlen < 0)
++		return;
++	root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
++	if (!root)
++		return;
++
++	dev = xenbus_device_find(root, &bus->bus);
++	if (!dev)
++		xenbus_probe_node(bus, type, root);
++	else
++		put_device(&dev->dev);
++
++	kfree(root);
++}
++
++static void frontend_changed(struct xenbus_watch *watch,
++			     const char **vec, unsigned int len)
++{
++	DPRINTK("");
++
++	dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
++}
++
++/* We watch for devices appearing and vanishing. */
++static struct xenbus_watch fe_watch = {
++	.node = "device",
++	.callback = frontend_changed,
++};
++
++static int suspend_dev(struct device *dev, void *data)
++{
++	int err = 0;
++	struct xenbus_driver *drv;
++	struct xenbus_device *xdev;
++
++	DPRINTK("");
++
++	if (dev->driver == NULL)
++		return 0;
++	drv = to_xenbus_driver(dev->driver);
++	xdev = container_of(dev, struct xenbus_device, dev);
++	if (drv->suspend)
++		err = drv->suspend(xdev);
++	if (err)
++		printk(KERN_WARNING
++		       "xenbus: suspend %s failed: %i\n", dev->bus_id, err);
++	return 0;
++}
++
++static int suspend_cancel_dev(struct device *dev, void *data)
++{
++	int err = 0;
++	struct xenbus_driver *drv;
++	struct xenbus_device *xdev;
++
++	DPRINTK("");
++
++	if (dev->driver == NULL)
++		return 0;
++	drv = to_xenbus_driver(dev->driver);
++	xdev = container_of(dev, struct xenbus_device, dev);
++	if (drv->suspend_cancel)
++		err = drv->suspend_cancel(xdev);
++	if (err)
++		printk(KERN_WARNING
++		       "xenbus: suspend_cancel %s failed: %i\n",
++		       dev->bus_id, err);
++	return 0;
++}
++
++static int resume_dev(struct device *dev, void *data)
++{
++	int err;
++	struct xenbus_driver *drv;
++	struct xenbus_device *xdev;
++
++	DPRINTK("");
++
++	if (dev->driver == NULL)
++		return 0;
++
++	drv = to_xenbus_driver(dev->driver);
++	xdev = container_of(dev, struct xenbus_device, dev);
++
++	err = talk_to_otherend(xdev);
++	if (err) {
++		printk(KERN_WARNING
++		       "xenbus: resume (talk_to_otherend) %s failed: %i\n",
++		       dev->bus_id, err);
++		return err;
++	}
++
++	xdev->state = XenbusStateInitialising;
++
++	if (drv->resume) {
++		err = drv->resume(xdev);
++		if (err) { 
++			printk(KERN_WARNING
++			       "xenbus: resume %s failed: %i\n", 
++			       dev->bus_id, err);
++			return err;
++		}
++	}
++
++	err = watch_otherend(xdev);
++	if (err) {
++		printk(KERN_WARNING
++		       "xenbus_probe: resume (watch_otherend) %s failed: "
++		       "%d.\n", dev->bus_id, err);
++		return err;
++	}
++
++	return 0;
++}
++
++void xenbus_suspend(void)
++{
++	DPRINTK("");
++
++	if (!xenbus_frontend.error)
++		bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
++	xenbus_backend_suspend(suspend_dev);
++	xs_suspend();
++}
++EXPORT_SYMBOL_GPL(xenbus_suspend);
++
++void xenbus_resume(void)
++{
++	xb_init_comms();
++	xs_resume();
++	if (!xenbus_frontend.error)
++		bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
++	xenbus_backend_resume(resume_dev);
++}
++EXPORT_SYMBOL_GPL(xenbus_resume);
++
++void xenbus_suspend_cancel(void)
++{
++	xs_suspend_cancel();
++	if (!xenbus_frontend.error)
++		bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
++	xenbus_backend_resume(suspend_cancel_dev);
++}
++EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
++
++/* A flag to determine if xenstored is 'ready' (i.e. has started) */
++int xenstored_ready = 0;
++
++
++int register_xenstore_notifier(struct notifier_block *nb)
++{
++	int ret = 0;
++
++	if (xenstored_ready > 0)
++		ret = nb->notifier_call(nb, 0, NULL);
++	else
++		atomic_notifier_chain_register(&xenstore_chain, nb);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(register_xenstore_notifier);
++
++void unregister_xenstore_notifier(struct notifier_block *nb)
++{
++	atomic_notifier_chain_unregister(&xenstore_chain, nb);
++}
++EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
++
++
++void xenbus_probe(void *unused)
++{
++	BUG_ON((xenstored_ready <= 0));
++
++	/* Enumerate devices in xenstore and watch for changes. */
++	xenbus_probe_devices(&xenbus_frontend);
++	register_xenbus_watch(&fe_watch);
++	xenbus_backend_probe_and_watch();
++
++	/* Notify others that xenstore is up */
++	atomic_notifier_call_chain(&xenstore_chain, 0, NULL);
++}
++
++
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
++static struct file_operations xsd_kva_fops;
++static struct proc_dir_entry *xsd_kva_intf;
++static struct proc_dir_entry *xsd_port_intf;
++
++static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	size_t size = vma->vm_end - vma->vm_start;
++
++	if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
++		return -EINVAL;
++
++	if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn),
++			    size, vma->vm_page_prot))
++		return -EAGAIN;
++
++	return 0;
++}
++
++static int xsd_kva_read(char *page, char **start, off_t off,
++			int count, int *eof, void *data)
++{
++	int len;
++
++	len  = sprintf(page, "0x%p", xen_store_interface);
++	*eof = 1;
++	return len;
++}
++
++static int xsd_port_read(char *page, char **start, off_t off,
++			 int count, int *eof, void *data)
++{
++	int len;
++
++	len  = sprintf(page, "%d", xen_store_evtchn);
++	*eof = 1;
++	return len;
++}
++#endif
++
++static int xenbus_probe_init(void)
++{
++	int err = 0;
++	unsigned long page = 0;
++
++	DPRINTK("");
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	/* Register ourselves with the kernel bus subsystem */
++	xenbus_frontend.error = bus_register(&xenbus_frontend.bus);
++	if (xenbus_frontend.error)
++		printk(KERN_WARNING
++		       "XENBUS: Error registering frontend bus: %i\n",
++		       xenbus_frontend.error);
++	xenbus_backend_bus_register();
++
++	/*
++	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
++	 */
++	if (is_initial_xendomain()) {
++		struct evtchn_alloc_unbound alloc_unbound;
++
++		/* Allocate page. */
++		page = get_zeroed_page(GFP_KERNEL);
++		if (!page)
++			return -ENOMEM;
++
++		xen_store_mfn = xen_start_info->store_mfn =
++			pfn_to_mfn(virt_to_phys((void *)page) >>
++				   PAGE_SHIFT);
++
++		/* Next allocate a local port which xenstored can bind to */
++		alloc_unbound.dom        = DOMID_SELF;
++		alloc_unbound.remote_dom = 0;
++
++		err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
++						  &alloc_unbound);
++		if (err == -ENOSYS)
++			goto err;
++		BUG_ON(err);
++		xen_store_evtchn = xen_start_info->store_evtchn =
++			alloc_unbound.port;
++
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
++		/* And finally publish the above info in /proc/xen */
++		xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
++		if (xsd_kva_intf) {
++			memcpy(&xsd_kva_fops, xsd_kva_intf->proc_fops,
++			       sizeof(xsd_kva_fops));
++			xsd_kva_fops.mmap = xsd_kva_mmap;
++			xsd_kva_intf->proc_fops = &xsd_kva_fops;
++			xsd_kva_intf->read_proc = xsd_kva_read;
++		}
++		xsd_port_intf = create_xen_proc_entry("xsd_port", 0400);
++		if (xsd_port_intf)
++			xsd_port_intf->read_proc = xsd_port_read;
++#endif
++		xen_store_interface = mfn_to_virt(xen_store_mfn);
++	} else {
++		xenstored_ready = 1;
++#ifdef CONFIG_XEN
++		xen_store_evtchn = xen_start_info->store_evtchn;
++		xen_store_mfn = xen_start_info->store_mfn;
++		xen_store_interface = mfn_to_virt(xen_store_mfn);
++#else
++		xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
++		xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
++		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
++					      PAGE_SIZE);
++#endif
++	}
++
++
++	xenbus_dev_init();
++
++	/* Initialize the interface to xenstore. */
++	err = xs_init();
++	if (err) {
++		printk(KERN_WARNING
++		       "XENBUS: Error initializing xenstore comms: %i\n", err);
++		goto err;
++	}
++
++	/* Register ourselves with the kernel device subsystem */
++	if (!xenbus_frontend.error) {
++		xenbus_frontend.error = device_register(&xenbus_frontend.dev);
++		if (xenbus_frontend.error) {
++			bus_unregister(&xenbus_frontend.bus);
++			printk(KERN_WARNING
++			       "XENBUS: Error registering frontend device: %i\n",
++			       xenbus_frontend.error);
++		}
++	}
++	xenbus_backend_device_register();
++
++	if (!is_initial_xendomain())
++		xenbus_probe(NULL);
++
++	return 0;
++
++ err:
++	if (page)
++		free_page(page);
++
++	/*
++	 * Do not unregister the xenbus front/backend buses here. The buses
++	 * must exist because front/backend drivers will use them when they are
++	 * registered.
++	 */
++
++	return err;
++}
++
++#ifdef CONFIG_XEN
++postcore_initcall(xenbus_probe_init);
++MODULE_LICENSE("Dual BSD/GPL");
++#else
++int xenbus_init(void)
++{
++	return xenbus_probe_init();
++}
++#endif
++
++static int is_disconnected_device(struct device *dev, void *data)
++{
++	struct xenbus_device *xendev = to_xenbus_device(dev);
++	struct device_driver *drv = data;
++
++	/*
++	 * A device with no driver will never connect. We care only about
++	 * devices which should currently be in the process of connecting.
++	 */
++	if (!dev->driver)
++		return 0;
++
++	/* Is this search limited to a particular driver? */
++	if (drv && (dev->driver != drv))
++		return 0;
++
++	return (xendev->state != XenbusStateConnected);
++}
++
++static int exists_disconnected_device(struct device_driver *drv)
++{
++	if (xenbus_frontend.error)
++		return xenbus_frontend.error;
++	return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
++				is_disconnected_device);
++}
++
++static int print_device_status(struct device *dev, void *data)
++{
++	struct xenbus_device *xendev = to_xenbus_device(dev);
++	struct device_driver *drv = data;
++
++	/* Is this operation limited to a particular driver? */
++	if (drv && (dev->driver != drv))
++		return 0;
++
++	if (!dev->driver) {
++		/* Information only: is this too noisy? */
++		printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
++		       xendev->nodename);
++	} else if (xendev->state != XenbusStateConnected) {
++		printk(KERN_WARNING "XENBUS: Timeout connecting "
++		       "to device: %s (state %d)\n",
++		       xendev->nodename, xendev->state);
++	}
++
++	return 0;
++}
++
++/* We only wait for device setup after most initcalls have run. */
++static int ready_to_wait_for_devices;
++
++/*
++ * On a 10 second timeout, wait for all devices currently configured.  We need
++ * to do this to guarantee that the filesystems and / or network devices
++ * needed for boot are available, before we can allow the boot to proceed.
++ *
++ * This needs to be on a late_initcall, to happen after the frontend device
++ * drivers have been initialised, but before the root fs is mounted.
++ *
++ * A possible improvement here would be to have the tools add a per-device
++ * flag to the store entry, indicating whether it is needed at boot time.
++ * This would allow people who knew what they were doing to accelerate their
++ * boot slightly, but of course needs tools or manual intervention to set up
++ * those flags correctly.
++ */
++static void wait_for_devices(struct xenbus_driver *xendrv)
++{
++	unsigned long timeout = jiffies + 10*HZ;
++	struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
++
++	if (!ready_to_wait_for_devices || !is_running_on_xen())
++		return;
++
++	while (exists_disconnected_device(drv)) {
++		if (time_after(jiffies, timeout))
++			break;
++		schedule_timeout_interruptible(HZ/10);
++	}
++
++	bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
++			 print_device_status);
++}
++
++#ifndef MODULE
++static int __init boot_wait_for_devices(void)
++{
++	if (!xenbus_frontend.error) {
++		ready_to_wait_for_devices = 1;
++		wait_for_devices(NULL);
++	}
++	return 0;
++}
++
++late_initcall(boot_wait_for_devices);
++#endif
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_probe.h	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,75 @@
++/******************************************************************************
++ * xenbus_probe.h
++ *
++ * Talks to Xen Store to figure out what devices we have.
++ *
++ * Copyright (C) 2005 Rusty Russell, IBM Corporation
++ * Copyright (C) 2005 XenSource Ltd.
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef _XENBUS_PROBE_H
++#define _XENBUS_PROBE_H
++
++#if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE)
++extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
++extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
++extern void xenbus_backend_probe_and_watch(void);
++extern void xenbus_backend_bus_register(void);
++extern void xenbus_backend_device_register(void);
++#else
++static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
++static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
++static inline void xenbus_backend_probe_and_watch(void) {}
++static inline void xenbus_backend_bus_register(void) {}
++static inline void xenbus_backend_device_register(void) {}
++#endif
++
++struct xen_bus_type
++{
++	char *root;
++	int error;
++	unsigned int levels;
++	int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
++	int (*probe)(const char *type, const char *dir);
++	struct bus_type bus;
++	struct device dev;
++};
++
++extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
++extern int xenbus_dev_probe(struct device *_dev);
++extern int xenbus_dev_remove(struct device *_dev);
++extern int xenbus_register_driver_common(struct xenbus_driver *drv,
++					 struct xen_bus_type *bus);
++extern int xenbus_probe_node(struct xen_bus_type *bus,
++			     const char *type,
++			     const char *nodename);
++extern int xenbus_probe_devices(struct xen_bus_type *bus);
++
++extern void dev_changed(const char *node, struct xen_bus_type *bus);
++
++#endif
++
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_probe_backend.c	2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,286 @@
++/******************************************************************************
++ * Talks to Xen Store to figure out what devices we have (backend half).
++ *
++ * Copyright (C) 2005 Rusty Russell, IBM Corporation
++ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
++ * Copyright (C) 2005, 2006 XenSource Ltd
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#define DPRINTK(fmt, args...)				\
++	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
++		 __FUNCTION__, __LINE__, ##args)
++
++#include <linux/kernel.h>
++#include <linux/err.h>
++#include <linux/string.h>
++#include <linux/ctype.h>
++#include <linux/fcntl.h>
++#include <linux/mm.h>
++#include <linux/notifier.h>
++#include <linux/kthread.h>
++
++#include <asm/io.h>
++#include <asm/page.h>
++#include <asm/maddr.h>
++#include <asm/pgtable.h>
++#include <asm/hypervisor.h>
++#include <xen/xenbus.h>
++#include <xen/xen_proc.h>
++#include <xen/evtchn.h>
++#include <xen/features.h>
++#include <xen/hvm.h>
++
++#include "xenbus_comms.h"
++#include "xenbus_probe.h"
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++static int xenbus_uevent_backend(struct device *dev, char **envp,
++				 int num_envp, char *buffer, int buffer_size);
++static int xenbus_probe_backend(const char *type, const char *domid);
++
++extern int read_otherend_details(struct xenbus_device *xendev,
++				 char *id_node, char *path_node);
++
++static int read_frontend_details(struct xenbus_device *xendev)
++{
++	return read_otherend_details(xendev, "frontend-id", "frontend");
++}
++
++/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
++static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
++{
++	int domid, err;
++	const char *devid, *type, *frontend;
++	unsigned int typelen;
++
++	type = strchr(nodename, '/');
++	if (!type)
++		return -EINVAL;
++	type++;
++	typelen = strcspn(type, "/");
++	if (!typelen || type[typelen] != '/')
++		return -EINVAL;
++
++	devid = strrchr(nodename, '/') + 1;
++
++	err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
++			    "frontend", NULL, &frontend,
++			    NULL);
++	if (err)
++		return err;
++	if (strlen(frontend) == 0)
++		err = -ERANGE;
++	if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
++		err = -ENOENT;
++	kfree(frontend);
++
++	if (err)
++		return err;
++
++	if (snprintf(bus_id, BUS_ID_SIZE,
++		     "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
++		return -ENOSPC;
++	return 0;
++}
++
++static struct xen_bus_type xenbus_backend = {
++	.root = "backend",
++	.levels = 3, 		/* backend/type/<frontend>/<id> */
++	.get_bus_id = backend_bus_id,
++	.probe = xenbus_probe_backend,
++	.bus = {
++		.name     = "xen-backend",
++		.match    = xenbus_match,
++		.probe    = xenbus_dev_probe,
++		.remove   = xenbus_dev_remove,
++//		.shutdown = xenbus_dev_shutdown,
++		.uevent   = xenbus_uevent_backend,
++	},
++	.dev = {
++		.bus_id = "xen-backend",
++	},
++};
++
++static int xenbus_uevent_backend(struct device *dev, char **envp,
++				 int num_envp, char *buffer, int buffer_size)
++{
++	struct xenbus_device *xdev;
++	struct xenbus_driver *drv;
++	int i = 0;
++	int length = 0;
++
++	DPRINTK("");
++
++	if (dev == NULL)
++		return -ENODEV;
++
++	xdev = to_xenbus_device(dev);
++	if (xdev == NULL)
++		return -ENODEV;
++
++	/* stuff we want to pass to /sbin/hotplug */
++	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
++		       "XENBUS_TYPE=%s", xdev->devicetype);
++
++	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
++		       "XENBUS_PATH=%s", xdev->nodename);
++
++	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
++		       "XENBUS_BASE_PATH=%s", xenbus_backend.root);
++
++	/* terminate, set to next free slot, shrink available space */
++	envp[i] = NULL;
++	envp = &envp[i];
++	num_envp -= i;
++	buffer = &buffer[length];
++	buffer_size -= length;
++
++	if (dev->driver) {
++		drv = to_xenbus_driver(dev->driver);
++		if (drv && drv->uevent)
++			return drv->uevent(xdev, envp, num_envp, buffer,
++					   buffer_size);
++	}
++
++	return 0;
++}
++
++int xenbus_register_backend(struct xenbus_driver *drv)
++{
++	drv->read_otherend_details = read_frontend_details;
++
++	return xenbus_register_driver_common(drv, &xenbus_backend);
++}
++EXPORT_SYMBOL_GPL(xenbus_register_backend);
++
++/* backend/<typename>/<frontend-uuid>/<name> */
++static int xenbus_probe_backend_unit(const char *dir,
++				     const char *type,
++				     const char *name)
++{
++	char *nodename;
++	int err;
++
++	nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
++	if (!nodename)
++		return -ENOMEM;
++
++	DPRINTK("%s\n", nodename);
++
++	err = xenbus_probe_node(&xenbus_backend, type, nodename);
++	kfree(nodename);
++	return err;
++}
++
++/* backend/<typename>/<frontend-domid> */
++static int xenbus_probe_backend(const char *type, const char *domid)
++{
++	char *nodename;
++	int err = 0;
++	char **dir;
++	unsigned int i, dir_n = 0;
++
++	DPRINTK("");
++
++	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid);
++	if (!nodename)
++		return -ENOMEM;
++
++	dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
++	if (IS_ERR(dir)) {
++		kfree(nodename);
++		return PTR_ERR(dir);
++	}
++
++	for (i = 0; i < dir_n; i++) {
++		err = xenbus_probe_backend_unit(nodename, type, dir[i]);
++		if (err)
++			break;
++	}
++	kfree(dir);
++	kfree(nodename);
++	return err;
++}
++
++static void backend_changed(struct xenbus_watch *watch,
++			    const char **vec, unsigned int len)
++{
++	DPRINTK("");
++
++	dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
++}
++
++static struct xenbus_watch be_watch = {
++	.node = "backend",
++	.callback = backend_changed,
++};
++
++void xenbus_backend_suspend(int (*fn)(struct device *, void *))
++{
++	DPRINTK("");
++	if (!xenbus_backend.error)
++		bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
++}
++
++void xenbus_backend_resume(int (*fn)(struct device *, void *))
++{
++	DPRINTK("");
++	if (!xenbus_backend.error)
++		bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
++}
++
++void xenbus_backend_probe_and_watch(void)
++{
++	xenbus_probe_devices(&xenbus_backend);
++	register_xenbus_watch(&be_watch);
++}
++
++void xenbus_backend_bus_register(void)
++{
++	xenbus_backend.error = bus_register(&xenbus_backend.bus);
++	if (xenbus_backend.error)
++		printk(KERN_WARNING
++		       "XENBUS: Error registering backend bus: %i\n",
++		       xenbus_backend.error);
++}
++
++void xenbus_backend_device_register(void)
++{
++	if (xenbus_backend.error)
++		return;
++
++	xenbus_backend.error = device_register(&xenbus_backend.dev);
++	if (xenbus_backend.error) {
++		bus_unregister(&xenbus_backend.bus);
++		printk(KERN_WARNING
++		       "XENBUS: Error registering backend device: %i\n",
++		       xenbus_backend.error);
++	}
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenbus/xenbus_xs.c	2007-08-27 14:02:10.000000000 -0400
+@@ -0,0 +1,880 @@
++/******************************************************************************
++ * xenbus_xs.c
++ *
++ * This is the kernel equivalent of the "xs" library.  We don't need everything
++ * and we use xenbus_comms for communication.
++ *
++ * Copyright (C) 2005 Rusty Russell, IBM Corporation
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/unistd.h>
++#include <linux/errno.h>
++#include <linux/types.h>
++#include <linux/uio.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/err.h>
++#include <linux/slab.h>
++#include <linux/fcntl.h>
++#include <linux/kthread.h>
++#include <linux/rwsem.h>
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <xen/xenbus.h>
++#include "xenbus_comms.h"
++
++#ifdef HAVE_XEN_PLATFORM_COMPAT_H
++#include <xen/platform-compat.h>
++#endif
++
++struct xs_stored_msg {
++	struct list_head list;
++
++	struct xsd_sockmsg hdr;
++
++	union {
++		/* Queued replies. */
++		struct {
++			char *body;
++		} reply;
++
++		/* Queued watch events. */
++		struct {
++			struct xenbus_watch *handle;
++			char **vec;
++			unsigned int vec_size;
++		} watch;
++	} u;
++};
++
++struct xs_handle {
++	/* A list of replies. Currently only one will ever be outstanding. */
++	struct list_head reply_list;
++	spinlock_t reply_lock;
++	wait_queue_head_t reply_waitq;
++
++	/*
++	 * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
++	 * response_mutex is never taken simultaneously with the other three.
++	 */
++
++	/* One request at a time. */
++	struct mutex request_mutex;
++
++	/* Protect xenbus reader thread against save/restore. */
++	struct mutex response_mutex;
++
++	/* Protect transactions against save/restore. */
++	struct rw_semaphore transaction_mutex;
++
++	/* Protect watch (de)register against save/restore. */
++	struct rw_semaphore watch_mutex;
++};
++
++static struct xs_handle xs_state;
++
++/* List of registered watches, and a lock to protect it. */
++static LIST_HEAD(watches);
++static DEFINE_SPINLOCK(watches_lock);
++
++/* List of pending watch callback events, and a lock to protect it. */
++static LIST_HEAD(watch_events);
++static DEFINE_SPINLOCK(watch_events_lock);
++
++/*
++ * Details of the xenwatch callback kernel thread. The thread waits on the
++ * watch_events_waitq for work to do (queued on watch_events list). When it
++ * wakes up it acquires the xenwatch_mutex before reading the list and
++ * carrying out work.
++ */
++static pid_t xenwatch_pid;
++/* static */ DEFINE_MUTEX(xenwatch_mutex);
++static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
++
++static int get_error(const char *errorstring)
++{
++	unsigned int i;
++
++	for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
++		if (i == ARRAY_SIZE(xsd_errors) - 1) {
++			printk(KERN_WARNING
++			       "XENBUS xen store gave: unknown error %s",
++			       errorstring);
++			return EINVAL;
++		}
++	}
++	return xsd_errors[i].errnum;
++}
++
++static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
++{
++	struct xs_stored_msg *msg;
++	char *body;
++
++	spin_lock(&xs_state.reply_lock);
++
++	while (list_empty(&xs_state.reply_list)) {
++		spin_unlock(&xs_state.reply_lock);
++		/* XXX FIXME: Avoid synchronous wait for response here. */
++		wait_event(xs_state.reply_waitq,
++			   !list_empty(&xs_state.reply_list));
++		spin_lock(&xs_state.reply_lock);
++	}
++
++	msg = list_entry(xs_state.reply_list.next,
++			 struct xs_stored_msg, list);
++	list_del(&msg->list);
++
++	spin_unlock(&xs_state.reply_lock);
++
++	*type = msg->hdr.type;
++	if (len)
++		*len = msg->hdr.len;
++	body = msg->u.reply.body;
++
++	kfree(msg);
++
++	return body;
++}
++
++void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
++{
++	void *ret;
++	struct xsd_sockmsg req_msg = *msg;
++	int err;
++
++	if (req_msg.type == XS_TRANSACTION_START)
++		down_read(&xs_state.transaction_mutex);
++
++	mutex_lock(&xs_state.request_mutex);
++
++	err = xb_write(msg, sizeof(*msg) + msg->len);
++	if (err) {
++		msg->type = XS_ERROR;
++		ret = ERR_PTR(err);
++	} else
++		ret = read_reply(&msg->type, &msg->len);
++
++	mutex_unlock(&xs_state.request_mutex);
++
++	if ((req_msg.type == XS_TRANSACTION_END) ||
++	    ((req_msg.type == XS_TRANSACTION_START) &&
++	     (msg->type == XS_ERROR)))
++		up_read(&xs_state.transaction_mutex);
++
++	return ret;
++}
++
++/* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
++static void *xs_talkv(struct xenbus_transaction t,
++		      enum xsd_sockmsg_type type,
++		      const struct kvec *iovec,
++		      unsigned int num_vecs,
++		      unsigned int *len)
++{
++	struct xsd_sockmsg msg;
++	void *ret = NULL;
++	unsigned int i;
++	int err;
++
++	msg.tx_id = t.id;
++	msg.req_id = 0;
++	msg.type = type;
++	msg.len = 0;
++	for (i = 0; i < num_vecs; i++)
++		msg.len += iovec[i].iov_len;
++
++	mutex_lock(&xs_state.request_mutex);
++
++	err = xb_write(&msg, sizeof(msg));
++	if (err) {
++		mutex_unlock(&xs_state.request_mutex);
++		return ERR_PTR(err);
++	}
++
++	for (i = 0; i < num_vecs; i++) {
++		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
++		if (err) {
++			mutex_unlock(&xs_state.request_mutex);
++			return ERR_PTR(err);
++		}
++	}
++
++	ret = read_reply(&msg.type, len);
++
++	mutex_unlock(&xs_state.request_mutex);
++
++	if (IS_ERR(ret))
++		return ret;
++
++	if (msg.type == XS_ERROR) {
++		err = get_error(ret);
++		kfree(ret);
++		return ERR_PTR(-err);
++	}
++
++	if (msg.type != type) {
++		if (printk_ratelimit())
++			printk(KERN_WARNING
++			       "XENBUS unexpected type [%d], expected [%d]\n",
++			       msg.type, type);
++		kfree(ret);
++		return ERR_PTR(-EINVAL);
++	}
++	return ret;
++}
++
++/* Simplified version of xs_talkv: single message. */
++static void *xs_single(struct xenbus_transaction t,
++		       enum xsd_sockmsg_type type,
++		       const char *string,
++		       unsigned int *len)
++{
++	struct kvec iovec;
++
++	iovec.iov_base = (void *)string;
++	iovec.iov_len = strlen(string) + 1;
++	return xs_talkv(t, type, &iovec, 1, len);
++}
++
++/* Many commands only need an ack, don't care what it says. */
++static int xs_error(char *reply)
++{
++	if (IS_ERR(reply))
++		return PTR_ERR(reply);
++	kfree(reply);
++	return 0;
++}
++
++static unsigned int count_strings(const char *strings, unsigned int len)
++{
++	unsigned int num;
++	const char *p;
++
++	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
++		num++;
++
++	return num;
++}
++
++/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
++static char *join(const char *dir, const char *name)
++{
++	char *buffer;
++
++	if (strlen(name) == 0)
++		buffer = kasprintf(GFP_KERNEL, "%s", dir);
++	else
++		buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
++	return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
++}
++
++static char **split(char *strings, unsigned int len, unsigned int *num)
++{
++	char *p, **ret;
++
++	/* Count the strings. */
++	*num = count_strings(strings, len);
++
++	/* Transfer to one big alloc for easy freeing. */
++	ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL);
++	if (!ret) {
++		kfree(strings);
++		return ERR_PTR(-ENOMEM);
++	}
++	memcpy(&ret[*num], strings, len);
++	kfree(strings);
++
++	strings = (char *)&ret[*num];
++	for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
++		ret[(*num)++] = p;
++
++	return ret;
++}
++
++char **xenbus_directory(struct xenbus_transaction t,
++			const char *dir, const char *node, unsigned int *num)
++{
++	char *strings, *path;
++	unsigned int len;
++
++	path = join(dir, node);
++	if (IS_ERR(path))
++		return (char **)path;
++
++	strings = xs_single(t, XS_DIRECTORY, path, &len);
++	kfree(path);
++	if (IS_ERR(strings))
++		return (char **)strings;
++
++	return split(strings, len, num);
++}
++EXPORT_SYMBOL_GPL(xenbus_directory);
++
++/* Check if a path exists. Return 1 if it does. */
++int xenbus_exists(struct xenbus_transaction t,
++		  const char *dir, const char *node)
++{
++	char **d;
++	int dir_n;
++
++	d = xenbus_directory(t, dir, node, &dir_n);
++	if (IS_ERR(d))
++		return 0;
++	kfree(d);
++	return 1;
++}
++EXPORT_SYMBOL_GPL(xenbus_exists);
++
++/* Get the value of a single file.
++ * Returns a kmalloced value: call free() on it after use.
++ * len indicates length in bytes.
++ */
++void *xenbus_read(struct xenbus_transaction t,
++		  const char *dir, const char *node, unsigned int *len)
++{
++	char *path;
++	void *ret;
++
++	path = join(dir, node);
++	if (IS_ERR(path))
++		return (void *)path;
++
++	ret = xs_single(t, XS_READ, path, len);
++	kfree(path);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xenbus_read);
++
++/* Write the value of a single file.
++ * Returns -err on failure.
++ */
++int xenbus_write(struct xenbus_transaction t,
++		 const char *dir, const char *node, const char *string)
++{
++	const char *path;
++	struct kvec iovec[2];
++	int ret;
++
++	path = join(dir, node);
++	if (IS_ERR(path))
++		return PTR_ERR(path);
++
++	iovec[0].iov_base = (void *)path;
++	iovec[0].iov_len = strlen(path) + 1;
++	iovec[1].iov_base = (void *)string;
++	iovec[1].iov_len = strlen(string);
++
++	ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
++	kfree(path);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xenbus_write);
++
++/* Create a new directory. */
++int xenbus_mkdir(struct xenbus_transaction t,
++		 const char *dir, const char *node)
++{
++	char *path;
++	int ret;
++
++	path = join(dir, node);
++	if (IS_ERR(path))
++		return PTR_ERR(path);
++
++	ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
++	kfree(path);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xenbus_mkdir);
++
++/* Destroy a file or directory (directories must be empty). */
++int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
++{
++	char *path;
++	int ret;
++
++	path = join(dir, node);
++	if (IS_ERR(path))
++		return PTR_ERR(path);
++
++	ret = xs_error(xs_single(t, XS_RM, path, NULL));
++	kfree(path);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xenbus_rm);
++
++/* Start a transaction: changes by others will not be seen during this
++ * transaction, and changes will not be visible to others until end.
++ */
++int xenbus_transaction_start(struct xenbus_transaction *t)
++{
++	char *id_str;
++
++	down_read(&xs_state.transaction_mutex);
++
++	id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
++	if (IS_ERR(id_str)) {
++		up_read(&xs_state.transaction_mutex);
++		return PTR_ERR(id_str);
++	}
++
++	t->id = simple_strtoul(id_str, NULL, 0);
++	kfree(id_str);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xenbus_transaction_start);
++
++/* End a transaction.
++ * If abandon is true, transaction is discarded instead of committed.
++ */
++int xenbus_transaction_end(struct xenbus_transaction t, int abort)
++{
++	char abortstr[2];
++	int err;
++
++	if (abort)
++		strcpy(abortstr, "F");
++	else
++		strcpy(abortstr, "T");
++
++	err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
++
++	up_read(&xs_state.transaction_mutex);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(xenbus_transaction_end);
++
++/* Single read and scanf: returns -errno or num scanned. */
++int xenbus_scanf(struct xenbus_transaction t,
++		 const char *dir, const char *node, const char *fmt, ...)
++{
++	va_list ap;
++	int ret;
++	char *val;
++
++	val = xenbus_read(t, dir, node, NULL);
++	if (IS_ERR(val))
++		return PTR_ERR(val);
++
++	va_start(ap, fmt);
++	ret = vsscanf(val, fmt, ap);
++	va_end(ap);
++	kfree(val);
++	/* Distinctive errno. */
++	if (ret == 0)
++		return -ERANGE;
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xenbus_scanf);
++
++/* Single printf and write: returns -errno or 0. */
++int xenbus_printf(struct xenbus_transaction t,
++		  const char *dir, const char *node, const char *fmt, ...)
++{
++	va_list ap;
++	int ret;
++#define PRINTF_BUFFER_SIZE 4096
++	char *printf_buffer;
++
++	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
++	if (printf_buffer == NULL)
++		return -ENOMEM;
++
++	va_start(ap, fmt);
++	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
++	va_end(ap);
++
++	BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
++	ret = xenbus_write(t, dir, node, printf_buffer);
++
++	kfree(printf_buffer);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xenbus_printf);
++
++/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
++int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
++{
++	va_list ap;
++	const char *name;
++	int ret = 0;
++
++	va_start(ap, dir);
++	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
++		const char *fmt = va_arg(ap, char *);
++		void *result = va_arg(ap, void *);
++		char *p;
++
++		p = xenbus_read(t, dir, name, NULL);
++		if (IS_ERR(p)) {
++			ret = PTR_ERR(p);
++			break;
++		}
++		if (fmt) {
++			if (sscanf(p, fmt, result) == 0)
++				ret = -EINVAL;
++			kfree(p);
++		} else
++			*(char **)result = p;
++	}
++	va_end(ap);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xenbus_gather);
++
++static int xs_watch(const char *path, const char *token)
++{
++	struct kvec iov[2];
++
++	iov[0].iov_base = (void *)path;
++	iov[0].iov_len = strlen(path) + 1;
++	iov[1].iov_base = (void *)token;
++	iov[1].iov_len = strlen(token) + 1;
++
++	return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
++				 ARRAY_SIZE(iov), NULL));
++}
++
++static int xs_unwatch(const char *path, const char *token)
++{
++	struct kvec iov[2];
++
++	iov[0].iov_base = (char *)path;
++	iov[0].iov_len = strlen(path) + 1;
++	iov[1].iov_base = (char *)token;
++	iov[1].iov_len = strlen(token) + 1;
++
++	return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
++				 ARRAY_SIZE(iov), NULL));
++}
++
++static struct xenbus_watch *find_watch(const char *token)
++{
++	struct xenbus_watch *i, *cmp;
++
++	cmp = (void *)simple_strtoul(token, NULL, 16);
++
++	list_for_each_entry(i, &watches, list)
++		if (i == cmp)
++			return i;
++
++	return NULL;
++}
++
++/* Register callback to watch this node. */
++int register_xenbus_watch(struct xenbus_watch *watch)
++{
++	/* Pointer in ascii is the token. */
++	char token[sizeof(watch) * 2 + 1];
++	int err;
++
++	sprintf(token, "%lX", (long)watch);
++
++	down_read(&xs_state.watch_mutex);
++
++	spin_lock(&watches_lock);
++	BUG_ON(find_watch(token));
++	list_add(&watch->list, &watches);
++	spin_unlock(&watches_lock);
++
++	err = xs_watch(watch->node, token);
++
++	/* Ignore errors due to multiple registration. */
++	if ((err != 0) && (err != -EEXIST)) {
++		spin_lock(&watches_lock);
++		list_del(&watch->list);
++		spin_unlock(&watches_lock);
++	}
++
++	up_read(&xs_state.watch_mutex);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(register_xenbus_watch);
++
++void unregister_xenbus_watch(struct xenbus_watch *watch)
++{
++	struct xs_stored_msg *msg, *tmp;
++	char token[sizeof(watch) * 2 + 1];
++	int err;
++
++	sprintf(token, "%lX", (long)watch);
++
++	down_read(&xs_state.watch_mutex);
++
++	spin_lock(&watches_lock);
++	BUG_ON(!find_watch(token));
++	list_del(&watch->list);
++	spin_unlock(&watches_lock);
++
++	err = xs_unwatch(watch->node, token);
++	if (err)
++		printk(KERN_WARNING
++		       "XENBUS Failed to release watch %s: %i\n",
++		       watch->node, err);
++
++	up_read(&xs_state.watch_mutex);
++
++	/* Cancel pending watch events. */
++	spin_lock(&watch_events_lock);
++	list_for_each_entry_safe(msg, tmp, &watch_events, list) {
++		if (msg->u.watch.handle != watch)
++			continue;
++		list_del(&msg->list);
++		kfree(msg->u.watch.vec);
++		kfree(msg);
++	}
++	spin_unlock(&watch_events_lock);
++
++	/* Flush any currently-executing callback, unless we are it. :-) */
++	if (current->pid != xenwatch_pid) {
++		mutex_lock(&xenwatch_mutex);
++		mutex_unlock(&xenwatch_mutex);
++	}
++}
++EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
++
++void xs_suspend(void)
++{
++	down_write(&xs_state.transaction_mutex);
++	down_write(&xs_state.watch_mutex);
++	mutex_lock(&xs_state.request_mutex);
++	mutex_lock(&xs_state.response_mutex);
++}
++
++void xs_resume(void)
++{
++	struct xenbus_watch *watch;
++	char token[sizeof(watch) * 2 + 1];
++
++	mutex_unlock(&xs_state.response_mutex);
++	mutex_unlock(&xs_state.request_mutex);
++	up_write(&xs_state.transaction_mutex);
++
++	/* No need for watches_lock: the watch_mutex is sufficient. */
++	list_for_each_entry(watch, &watches, list) {
++		sprintf(token, "%lX", (long)watch);
++		xs_watch(watch->node, token);
++	}
++
++	up_write(&xs_state.watch_mutex);
++}
++
++void xs_suspend_cancel(void)
++{
++	mutex_unlock(&xs_state.response_mutex);
++	mutex_unlock(&xs_state.request_mutex);
++	up_write(&xs_state.watch_mutex);
++	up_write(&xs_state.transaction_mutex);
++}
++
++static int xenwatch_handle_callback(void *data)
++{
++	struct xs_stored_msg *msg = data;
++
++	msg->u.watch.handle->callback(msg->u.watch.handle,
++				      (const char **)msg->u.watch.vec,
++				      msg->u.watch.vec_size);
++
++	kfree(msg->u.watch.vec);
++	kfree(msg);
++
++	/* Kill this kthread if we were spawned just for this callback. */
++	if (current->pid != xenwatch_pid)
++		do_exit(0);
++
++	return 0;
++}
++
++static int xenwatch_thread(void *unused)
++{
++	struct list_head *ent;
++	struct xs_stored_msg *msg;
++
++	for (;;) {
++		wait_event_interruptible(watch_events_waitq,
++					 !list_empty(&watch_events));
++
++		if (kthread_should_stop())
++			break;
++
++		mutex_lock(&xenwatch_mutex);
++
++		spin_lock(&watch_events_lock);
++		ent = watch_events.next;
++		if (ent != &watch_events)
++			list_del(ent);
++		spin_unlock(&watch_events_lock);
++
++		if (ent != &watch_events) {
++			msg = list_entry(ent, struct xs_stored_msg, list);
++			if (msg->u.watch.handle->flags & XBWF_new_thread)
++				kthread_run(xenwatch_handle_callback,
++					    msg, "xenwatch_cb");
++			else
++				xenwatch_handle_callback(msg);
++		}
++
++		mutex_unlock(&xenwatch_mutex);
++	}
++
++	return 0;
++}
++
++static int process_msg(void)
++{
++	struct xs_stored_msg *msg;
++	char *body;
++	int err;
++
++	/*
++	 * We must disallow save/restore while reading a xenstore message.
++	 * A partial read across s/r leaves us out of sync with xenstored.
++	 */
++	for (;;) {
++		err = xb_wait_for_data_to_read();
++		if (err)
++			return err;
++		mutex_lock(&xs_state.response_mutex);
++		if (xb_data_to_read())
++			break;
++		/* We raced with save/restore: pending data 'disappeared'. */
++		mutex_unlock(&xs_state.response_mutex);
++	}
++
++
++	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
++	if (msg == NULL) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	err = xb_read(&msg->hdr, sizeof(msg->hdr));
++	if (err) {
++		kfree(msg);
++		goto out;
++	}
++
++	body = kmalloc(msg->hdr.len + 1, GFP_KERNEL);
++	if (body == NULL) {
++		kfree(msg);
++		err = -ENOMEM;
++		goto out;
++	}
++
++	err = xb_read(body, msg->hdr.len);
++	if (err) {
++		kfree(body);
++		kfree(msg);
++		goto out;
++	}
++	body[msg->hdr.len] = '\0';
++
++	if (msg->hdr.type == XS_WATCH_EVENT) {
++		msg->u.watch.vec = split(body, msg->hdr.len,
++					 &msg->u.watch.vec_size);
++		if (IS_ERR(msg->u.watch.vec)) {
++			kfree(msg);
++			err = PTR_ERR(msg->u.watch.vec);
++			goto out;
++		}
++
++		spin_lock(&watches_lock);
++		msg->u.watch.handle = find_watch(
++			msg->u.watch.vec[XS_WATCH_TOKEN]);
++		if (msg->u.watch.handle != NULL) {
++			spin_lock(&watch_events_lock);
++			list_add_tail(&msg->list, &watch_events);
++			wake_up(&watch_events_waitq);
++			spin_unlock(&watch_events_lock);
++		} else {
++			kfree(msg->u.watch.vec);
++			kfree(msg);
++		}
++		spin_unlock(&watches_lock);
++	} else {
++		msg->u.reply.body = body;
++		spin_lock(&xs_state.reply_lock);
++		list_add_tail(&msg->list, &xs_state.reply_list);
++		spin_unlock(&xs_state.reply_lock);
++		wake_up(&xs_state.reply_waitq);
++	}
++
++ out:
++	mutex_unlock(&xs_state.response_mutex);
++	return err;
++}
++
++static int xenbus_thread(void *unused)
++{
++	int err;
++
++	for (;;) {
++		err = process_msg();
++		if (err)
++			printk(KERN_WARNING "XENBUS error %d while reading "
++			       "message\n", err);
++		if (kthread_should_stop())
++			break;
++	}
++
++	return 0;
++}
++
++int xs_init(void)
++{
++	int err;
++	struct task_struct *task;
++
++	INIT_LIST_HEAD(&xs_state.reply_list);
++	spin_lock_init(&xs_state.reply_lock);
++	init_waitqueue_head(&xs_state.reply_waitq);
++
++	mutex_init(&xs_state.request_mutex);
++	mutex_init(&xs_state.response_mutex);
++	init_rwsem(&xs_state.transaction_mutex);
++	init_rwsem(&xs_state.watch_mutex);
++
++	/* Initialize the shared memory rings to talk to xenstored */
++	err = xb_init_comms();
++	if (err)
++		return err;
++
++	task = kthread_run(xenwatch_thread, NULL, "xenwatch");
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	xenwatch_pid = task->pid;
++
++	task = kthread_run(xenbus_thread, NULL, "xenbus");
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	return 0;
++}
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ b/drivers/xen/xenoprof/xenoprofile.c	2007-08-27 14:02:03.000000000 -0400
+@@ -0,0 +1,500 @@
++/**
++ * @file xenoprofile.c
++ *
++ * @remark Copyright 2002 OProfile authors
++ * @remark Read the file COPYING
++ *
++ * @author John Levon <levon@movementarian.org>
++ *
++ * Modified by Aravind Menon and Jose Renato Santos for Xen
++ * These modifications are:
++ * Copyright (C) 2005 Hewlett-Packard Co.
++ *
++ * Separated out arch-generic part
++ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
++ *                    VA Linux Systems Japan K.K.
++ */
++
++#include <linux/init.h>
++#include <linux/notifier.h>
++#include <linux/smp.h>
++#include <linux/oprofile.h>
++#include <linux/sysdev.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/vmalloc.h>
++#include <asm/pgtable.h>
++#include <xen/evtchn.h>
++#include <xen/xenoprof.h>
++#include <xen/driver_util.h>
++#include <xen/interface/xen.h>
++#include <xen/interface/xenoprof.h>
++#include "../../../drivers/oprofile/cpu_buffer.h"
++#include "../../../drivers/oprofile/event_buffer.h"
++
++#define MAX_XENOPROF_SAMPLES 16
++
++/* sample buffers shared with Xen */
++xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
++/* Shared buffer area */
++struct xenoprof_shared_buffer shared_buffer;
++
++/* Passive sample buffers shared with Xen */
++xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS];
++/* Passive shared buffer area */
++struct xenoprof_shared_buffer p_shared_buffer[MAX_OPROF_DOMAINS];
++
++static int xenoprof_start(void);
++static void xenoprof_stop(void);
++
++static int xenoprof_enabled = 0;
++static int xenoprof_is_primary = 0;
++static int active_defined;
++
++/* Number of buffers in shared area (one per VCPU) */
++int nbuf;
++/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
++int ovf_irq[NR_CPUS];
++/* cpu model type string - copied from Xen memory space on XENOPROF_init command */
++char cpu_type[XENOPROF_CPU_TYPE_SIZE];
++
++#ifdef CONFIG_PM
++
++static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
++{
++	if (xenoprof_enabled == 1)
++		xenoprof_stop();
++	return 0;
++}
++
++
++static int xenoprof_resume(struct sys_device * dev)
++{
++	if (xenoprof_enabled == 1)
++		xenoprof_start();
++	return 0;
++}
++
++
++static struct sysdev_class oprofile_sysclass = {
++	set_kset_name("oprofile"),
++	.resume		= xenoprof_resume,
++	.suspend	= xenoprof_suspend
++};
++
++
++static struct sys_device device_oprofile = {
++	.id	= 0,
++	.cls	= &oprofile_sysclass,
++};
++
++
++static int __init init_driverfs(void)
++{
++	int error;
++	if (!(error = sysdev_class_register(&oprofile_sysclass)))
++		error = sysdev_register(&device_oprofile);
++	return error;
++}
++
++
++static void exit_driverfs(void)
++{
++	sysdev_unregister(&device_oprofile);
++	sysdev_class_unregister(&oprofile_sysclass);
++}
++
++#else
++#define init_driverfs() do { } while (0)
++#define exit_driverfs() do { } while (0)
++#endif /* CONFIG_PM */
++
++unsigned long long oprofile_samples = 0;
++unsigned long long p_oprofile_samples = 0;
++
++unsigned int pdomains;
++struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
++
++static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
++{
++	int head, tail, size;
++
++	head = buf->event_head;
++	tail = buf->event_tail;
++	size = buf->event_size;
++
++	if (tail > head) {
++		while (tail < size) {
++			oprofile_add_pc(buf->event_log[tail].eip,
++					buf->event_log[tail].mode,
++					buf->event_log[tail].event);
++			if (!is_passive)
++				oprofile_samples++;
++			else
++				p_oprofile_samples++;
++			tail++;
++		}
++		tail = 0;
++	}
++	while (tail < head) {
++		oprofile_add_pc(buf->event_log[tail].eip,
++				buf->event_log[tail].mode,
++				buf->event_log[tail].event);
++		if (!is_passive)
++			oprofile_samples++;
++		else
++			p_oprofile_samples++;
++		tail++;
++	}
++
++	buf->event_tail = tail;
++}
++
++static void xenoprof_handle_passive(void)
++{
++	int i, j;
++	int flag_domain, flag_switch = 0;
++	
++	for (i = 0; i < pdomains; i++) {
++		flag_domain = 0;
++		for (j = 0; j < passive_domains[i].nbuf; j++) {
++			xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
++			if (buf->event_head == buf->event_tail)
++				continue;
++			if (!flag_domain) {
++				if (!oprofile_add_domain_switch(passive_domains[i].
++								domain_id))
++					goto done;
++				flag_domain = 1;
++			}
++			xenoprof_add_pc(buf, 1);
++			flag_switch = 1;
++		}
++	}
++done:
++	if (flag_switch)
++		oprofile_add_domain_switch(COORDINATOR_DOMAIN);
++}
++
++static irqreturn_t 
++xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
++{
++	struct xenoprof_buf * buf;
++	int cpu;
++	static unsigned long flag;
++
++	cpu = smp_processor_id();
++	buf = xenoprof_buf[cpu];
++
++	xenoprof_add_pc(buf, 0);
++
++	if (xenoprof_is_primary && !test_and_set_bit(0, &flag)) {
++		xenoprof_handle_passive();
++		smp_mb__before_clear_bit();
++		clear_bit(0, &flag);
++	}
++
++	return IRQ_HANDLED;
++}
++
++
++static void unbind_virq(void)
++{
++	int i;
++
++	for_each_online_cpu(i) {
++		if (ovf_irq[i] >= 0) {
++			unbind_from_irqhandler(ovf_irq[i], NULL);
++			ovf_irq[i] = -1;
++		}
++	}
++}
++
++
++static int bind_virq(void)
++{
++	int i, result;
++
++	for_each_online_cpu(i) {
++		result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
++						 i,
++						 xenoprof_ovf_interrupt,
++						 SA_INTERRUPT,
++						 "xenoprof",
++						 NULL);
++
++		if (result < 0) {
++			unbind_virq();
++			return result;
++		}
++
++		ovf_irq[i] = result;
++	}
++		
++	return 0;
++}
++
++
++static void unmap_passive_list(void)
++{
++	int i;
++	for (i = 0; i < pdomains; i++)
++		xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
++	pdomains = 0;
++}
++
++
++static int map_xenoprof_buffer(int max_samples)
++{
++	struct xenoprof_get_buffer get_buffer;
++	struct xenoprof_buf *buf;
++	int ret, i;
++
++	if ( shared_buffer.buffer )
++		return 0;
++
++	get_buffer.max_samples = max_samples;
++	ret = xenoprof_arch_map_shared_buffer(&get_buffer, &shared_buffer);
++	if (ret)
++		return ret;
++	nbuf = get_buffer.nbuf;
++
++	for (i=0; i< nbuf; i++) {
++		buf = (struct xenoprof_buf*) 
++			&shared_buffer.buffer[i * get_buffer.bufsize];
++		BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
++		xenoprof_buf[buf->vcpu_id] = buf;
++	}
++
++	return 0;
++}
++
++
++static int xenoprof_setup(void)
++{
++	int ret;
++
++	if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) )
++		return ret;
++
++	if ( (ret = bind_virq()) )
++		return ret;
++
++	if (xenoprof_is_primary) {
++		/* Define dom0 as an active domain if not done yet */
++		if (!active_defined) {
++			domid_t domid;
++			ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
++			if (ret)
++				goto err;
++			domid = 0;
++			ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
++			if (ret)
++				goto err;
++			active_defined = 1;
++		}
++
++		ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
++		if (ret)
++			goto err;
++		xenoprof_arch_counter();
++		ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
++
++		if (ret)
++			goto err;
++	}
++
++	ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
++	if (ret)
++		goto err;
++
++	xenoprof_enabled = 1;
++	return 0;
++ err:
++	unbind_virq();
++	return ret;
++}
++
++
++static void xenoprof_shutdown(void)
++{
++	xenoprof_enabled = 0;
++
++	HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL);
++
++	if (xenoprof_is_primary) {
++		HYPERVISOR_xenoprof_op(XENOPROF_release_counters, NULL);
++		active_defined = 0;
++	}
++
++	unbind_virq();
++
++	xenoprof_arch_unmap_shared_buffer(&shared_buffer);
++	if (xenoprof_is_primary)
++		unmap_passive_list();
++}
++
++
++static int xenoprof_start(void)
++{
++	int ret = 0;
++
++	if (xenoprof_is_primary)
++		ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
++	if (!ret)
++		xenoprof_arch_start();
++	return ret;
++}
++
++
++static void xenoprof_stop(void)
++{
++	if (xenoprof_is_primary)
++		HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL);
++	xenoprof_arch_stop();
++}
++
++
++static int xenoprof_set_active(int * active_domains,
++			       unsigned int adomains)
++{
++	int ret = 0;
++	int i;
++	int set_dom0 = 0;
++	domid_t domid;
++
++	if (!xenoprof_is_primary)
++		return 0;
++
++	if (adomains > MAX_OPROF_DOMAINS)
++		return -E2BIG;
++
++	ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
++	if (ret)
++		return ret;
++
++	for (i=0; i<adomains; i++) {
++		domid = active_domains[i];
++		if (domid != active_domains[i]) {
++			ret = -EINVAL;
++			goto out;
++		}
++		ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
++		if (ret)
++			goto out;
++		if (active_domains[i] == 0)
++			set_dom0 = 1;
++	}
++	/* dom0 must always be active but may not be in the list */ 
++	if (!set_dom0) {
++		domid = 0;
++		ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
++	}
++
++out:
++	if (ret)
++		HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
++	active_defined = !ret;
++	return ret;
++}
++
++static int xenoprof_set_passive(int * p_domains,
++                                unsigned int pdoms)
++{
++	int ret;
++	int i, j;
++	struct xenoprof_buf *buf;
++
++	if (!xenoprof_is_primary)
++        	return 0;
++
++	if (pdoms > MAX_OPROF_DOMAINS)
++		return -E2BIG;
++
++	ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
++	if (ret)
++		return ret;
++	unmap_passive_list();
++
++	for (i = 0; i < pdoms; i++) {
++		passive_domains[i].domain_id = p_domains[i];
++		passive_domains[i].max_samples = 2048;
++		ret = xenoprof_arch_set_passive(&passive_domains[i],
++						&p_shared_buffer[i]);
++		if (ret)
++			goto out;
++		for (j = 0; j < passive_domains[i].nbuf; j++) {
++			buf = (struct xenoprof_buf *)
++				&p_shared_buffer[i].buffer[j * passive_domains[i].bufsize];
++			BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
++			p_xenoprof_buf[i][buf->vcpu_id] = buf;
++		}
++	}
++
++	pdomains = pdoms;
++	return 0;
++
++out:
++	for (j = 0; j < i; j++)
++		xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
++
++ 	return ret;
++}
++
++struct oprofile_operations xenoprof_ops = {
++#ifdef HAVE_XENOPROF_CREATE_FILES
++	.create_files 	= xenoprof_create_files,
++#endif
++	.set_active	= xenoprof_set_active,
++	.set_passive    = xenoprof_set_passive,
++	.setup 		= xenoprof_setup,
++	.shutdown	= xenoprof_shutdown,
++	.start		= xenoprof_start,
++	.stop		= xenoprof_stop
++};
++
++
++/* in order to get driverfs right */
++static int using_xenoprof;
++
++int __init xenoprofile_init(struct oprofile_operations * ops)
++{
++	struct xenoprof_init init;
++	int ret, i;
++
++	ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
++	if (!ret) {
++		xenoprof_arch_init_counter(&init);
++		xenoprof_is_primary = init.is_primary;
++
++		/*  cpu_type is detected by Xen */
++		cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
++		strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
++		xenoprof_ops.cpu_type = cpu_type;
++
++		init_driverfs();
++		using_xenoprof = 1;
++		*ops = xenoprof_ops;
++
++		for (i=0; i<NR_CPUS; i++)
++			ovf_irq[i] = -1;
++
++		active_defined = 0;
++	}
++	printk(KERN_INFO "%s: ret %d, events %d, xenoprof_is_primary %d\n",
++	       __func__, ret, init.num_events, xenoprof_is_primary);
++	return ret;
++}
++
++
++void xenoprofile_exit(void)
++{
++	if (using_xenoprof)
++		exit_driverfs();
++
++	xenoprof_arch_unmap_shared_buffer(&shared_buffer);
++	if (xenoprof_is_primary) {
++		unmap_passive_list();
++		HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL);
++        }
++}