From 78c28a729a85ef2518a0611f0da4c0a755a69dfb Mon Sep 17 00:00:00 2001
From: Alasdair Kergon <agk@redhat.com>
Date: Wed, 26 Mar 2003 16:05:38 +0000
Subject: [PATCH] Patches for 2.4.20, first cut.

---
 patches/common/linux-2.4.20-arch64.patch    |  191 +
 patches/common/linux-2.4.20-b_private.patch |  185 +
 patches/common/linux-2.4.20-config.patch    |   53 +
 patches/common/linux-2.4.20-devmapper.patch | 6683 ++++++++++++++++
 patches/common/linux-2.4.20-makefile.patch  |   30 +
 patches/common/linux-2.4.20-mempool.patch   |  356 +
 patches/common/linux-2.4.20-vcalloc.patch   |   49 +
 patches/linux-2.4.20-VFS-lock.patch         |  243 +
 patches/linux-2.4.20-devmapper-ioctl.patch  | 7546 +++++++++++++++++++
 9 files changed, 15336 insertions(+)
 create mode 100644 patches/common/linux-2.4.20-arch64.patch
 create mode 100644 patches/common/linux-2.4.20-b_private.patch
 create mode 100644 patches/common/linux-2.4.20-config.patch
 create mode 100644 patches/common/linux-2.4.20-devmapper.patch
 create mode 100644 patches/common/linux-2.4.20-makefile.patch
 create mode 100644 patches/common/linux-2.4.20-mempool.patch
 create mode 100644 patches/common/linux-2.4.20-vcalloc.patch
 create mode 100644 patches/linux-2.4.20-VFS-lock.patch
 create mode 100644 patches/linux-2.4.20-devmapper-ioctl.patch

diff --git a/patches/common/linux-2.4.20-arch64.patch b/patches/common/linux-2.4.20-arch64.patch
new file mode 100644
index 0000000..5fe751a
--- /dev/null
+++ b/patches/common/linux-2.4.20-arch64.patch
@@ -0,0 +1,191 @@
+diff -ruN linux-2.4.20/arch/mips64/kernel/ioctl32.c linux/arch/mips64/kernel/ioctl32.c
+--- linux-2.4.20/arch/mips64/kernel/ioctl32.c	Fri Jan 10 16:34:18 2003
++++ linux/arch/mips64/kernel/ioctl32.c	Wed Mar 26 13:37:24 2003
+@@ -33,6 +33,7 @@
+ #include <linux/auto_fs.h>
+ #include <linux/ext2_fs.h>
+ #include <linux/raid/md_u.h>
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ #undef __KERNEL__		/* This file was born to be ugly ...  */
+@@ -914,6 +915,20 @@
+ 	IOCTL32_DEFAULT(STOP_ARRAY_RO),
+ 	IOCTL32_DEFAULT(RESTART_ARRAY_RW),
+ #endif /* CONFIG_MD */
++
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++	IOCTL32_DEFAULT(DM_VERSION),
++	IOCTL32_DEFAULT(DM_REMOVE_ALL),
++	IOCTL32_DEFAULT(DM_DEV_CREATE),
++	IOCTL32_DEFAULT(DM_DEV_REMOVE),
++	IOCTL32_DEFAULT(DM_DEV_RELOAD),
++	IOCTL32_DEFAULT(DM_DEV_SUSPEND),
++	IOCTL32_DEFAULT(DM_DEV_RENAME),
++	IOCTL32_DEFAULT(DM_DEV_DEPS),
++	IOCTL32_DEFAULT(DM_DEV_STATUS),
++	IOCTL32_DEFAULT(DM_TARGET_STATUS),
++	IOCTL32_DEFAULT(DM_TARGET_WAIT),
++#endif /* CONFIG_BLK_DEV_DM */
+ 
+ 	IOCTL32_DEFAULT(MTIOCTOP),			/* mtio.h ioctls  */
+ 	IOCTL32_HANDLER(MTIOCGET32, mt_ioctl_trans),
+diff -ruN linux-2.4.20/arch/parisc/kernel/ioctl32.c linux/arch/parisc/kernel/ioctl32.c
+--- linux-2.4.20/arch/parisc/kernel/ioctl32.c	Fri Jan 10 16:34:19 2003
++++ linux/arch/parisc/kernel/ioctl32.c	Wed Mar 26 14:28:37 2003
+@@ -55,6 +55,7 @@
+ #define max max */
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -3415,6 +3416,20 @@
+ COMPATIBLE_IOCTL(LV_BMAP)
+ COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
+ #endif /* LVM */
++/* Device-Mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_DEV_RELOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_DEV_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_WAIT)
++#endif /* CONFIG_BLK_DEV_DM */
+ #if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
+ COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
+ COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
+diff -ruN linux-2.4.20/arch/ppc64/kernel/ioctl32.c linux/arch/ppc64/kernel/ioctl32.c
+--- linux-2.4.20/arch/ppc64/kernel/ioctl32.c	Fri Jan 10 16:34:24 2003
++++ linux/arch/ppc64/kernel/ioctl32.c	Wed Mar 26 14:31:43 2003
+@@ -66,6 +66,7 @@
+ #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -4362,6 +4363,20 @@
+ COMPATIBLE_IOCTL(NBD_PRINT_DEBUG),
+ COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS),
+ COMPATIBLE_IOCTL(NBD_DISCONNECT),
++/* device-mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION),
++COMPATIBLE_IOCTL(DM_REMOVE_ALL),
++COMPATIBLE_IOCTL(DM_DEV_CREATE),
++COMPATIBLE_IOCTL(DM_DEV_REMOVE),
++COMPATIBLE_IOCTL(DM_DEV_RELOAD),
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND),
++COMPATIBLE_IOCTL(DM_DEV_RENAME),
++COMPATIBLE_IOCTL(DM_DEV_DEPS),
++COMPATIBLE_IOCTL(DM_DEV_STATUS),
++COMPATIBLE_IOCTL(DM_TARGET_STATUS),
++COMPATIBLE_IOCTL(DM_TARGET_WAIT),
++#endif /* CONFIG_BLK_DEV_DM */
+ /* Remove *PRIVATE in 2.5 */
+ COMPATIBLE_IOCTL(SIOCDEVPRIVATE),
+ COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1),
+diff -ruN linux-2.4.20/arch/s390x/kernel/ioctl32.c linux/arch/s390x/kernel/ioctl32.c
+--- linux-2.4.20/arch/s390x/kernel/ioctl32.c	Fri Jan 10 16:34:26 2003
++++ linux/arch/s390x/kernel/ioctl32.c	Wed Mar 26 13:36:43 2003
+@@ -25,6 +25,7 @@
+ #include <linux/ext2_fs.h>
+ #include <linux/hdreg.h>
+ #include <linux/if_bonding.h>
++#include <linux/dm-ioctl.h>
+ #include <asm/types.h>
+ #include <asm/uaccess.h>
+ #include <asm/dasd.h>
+@@ -507,6 +508,18 @@
+ 	IOCTL32_DEFAULT(VT_UNLOCKSWITCH),
+ 
+ 	IOCTL32_DEFAULT(SIOCGSTAMP),
++
++	IOCTL32_DEFAULT(DM_VERSION),
++	IOCTL32_DEFAULT(DM_REMOVE_ALL),
++	IOCTL32_DEFAULT(DM_DEV_CREATE),
++	IOCTL32_DEFAULT(DM_DEV_REMOVE),
++	IOCTL32_DEFAULT(DM_DEV_RELOAD),
++	IOCTL32_DEFAULT(DM_DEV_SUSPEND),
++	IOCTL32_DEFAULT(DM_DEV_RENAME),
++	IOCTL32_DEFAULT(DM_DEV_DEPS),
++	IOCTL32_DEFAULT(DM_DEV_STATUS),
++	IOCTL32_DEFAULT(DM_TARGET_STATUS),
++	IOCTL32_DEFAULT(DM_TARGET_WAIT),
+ 
+ 	IOCTL32_HANDLER(SIOCGIFNAME, dev_ifname32),
+ 	IOCTL32_HANDLER(SIOCGIFCONF, dev_ifconf),
+diff -ruN linux-2.4.20/arch/sparc64/kernel/ioctl32.c linux/arch/sparc64/kernel/ioctl32.c
+--- linux-2.4.20/arch/sparc64/kernel/ioctl32.c	Fri Jan 10 16:34:30 2003
++++ linux/arch/sparc64/kernel/ioctl32.c	Wed Mar 26 14:32:03 2003
+@@ -55,6 +55,7 @@
+ #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -5023,6 +5024,21 @@
+ COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
+ COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS)
+ COMPATIBLE_IOCTL(NBD_DISCONNECT)
++/* device-mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_DEV_RELOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_DEV_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_WAIT)
++#endif /* CONFIG_BLK_DEV_DM */
++
+ /* And these ioctls need translation */
+ HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
+ HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
+diff -ruN linux-2.4.20/arch/x86_64/ia32/ia32_ioctl.c linux/arch/x86_64/ia32/ia32_ioctl.c
+--- linux-2.4.20/arch/x86_64/ia32/ia32_ioctl.c	Fri Jan 10 16:34:32 2003
++++ linux/arch/x86_64/ia32/ia32_ioctl.c	Wed Mar 26 14:29:31 2003
+@@ -62,6 +62,7 @@
+ #define max max
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -3776,6 +3777,20 @@
+ COMPATIBLE_IOCTL(LV_BMAP)
+ COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
+ #endif /* LVM */
++/* Device-Mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_DEV_RELOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_DEV_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_WAIT)
++#endif /* CONFIG_BLK_DEV_DM */
+ #if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
+ COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
+ COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
diff --git a/patches/common/linux-2.4.20-b_private.patch b/patches/common/linux-2.4.20-b_private.patch
new file mode 100644
index 0000000..aad57ce
--- /dev/null
+++ b/patches/common/linux-2.4.20-b_private.patch
@@ -0,0 +1,185 @@
+diff -ruN linux-2.4.20/fs/buffer.c linux/fs/buffer.c
+--- linux-2.4.20/fs/buffer.c	Fri Jan 10 16:35:24 2003
++++ linux/fs/buffer.c	Wed Mar 26 12:53:19 2003
+@@ -586,9 +586,10 @@
+ void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode)
+ {
+ 	spin_lock(&lru_list_lock);
+-	if (bh->b_inode)
++	if (buffer_inode(bh))
+ 		list_del(&bh->b_inode_buffers);
+-	bh->b_inode = inode;
++	else
++		set_buffer_inode(bh);
+ 	list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers);
+ 	spin_unlock(&lru_list_lock);
+ }
+@@ -596,9 +597,10 @@
+ void buffer_insert_inode_data_queue(struct buffer_head *bh, struct inode *inode)
+ {
+ 	spin_lock(&lru_list_lock);
+-	if (bh->b_inode)
++	if (buffer_inode(bh))
+ 		list_del(&bh->b_inode_buffers);
+-	bh->b_inode = inode;
++	else
++		set_buffer_inode(bh);
+ 	list_add(&bh->b_inode_buffers, &inode->i_dirty_data_buffers);
+ 	spin_unlock(&lru_list_lock);
+ }
+@@ -607,13 +609,13 @@
+    remove_inode_queue functions.  */
+ static void __remove_inode_queue(struct buffer_head *bh)
+ {
+-	bh->b_inode = NULL;
++	clear_buffer_inode(bh);
+ 	list_del(&bh->b_inode_buffers);
+ }
+ 
+ static inline void remove_inode_queue(struct buffer_head *bh)
+ {
+-	if (bh->b_inode)
++	if (buffer_inode(bh))
+ 		__remove_inode_queue(bh);
+ }
+ 
+@@ -741,6 +743,7 @@
+ 	bh->b_list = BUF_CLEAN;
+ 	bh->b_end_io = handler;
+ 	bh->b_private = private;
++	bh->b_journal_head = NULL;
+ }
+ 
+ static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
+@@ -842,9 +845,9 @@
+ 		bh = BH_ENTRY(list->next);
+ 		list_del(&bh->b_inode_buffers);
+ 		if (!buffer_dirty(bh) && !buffer_locked(bh))
+-			bh->b_inode = NULL;
++			clear_buffer_inode(bh);
+ 		else {
+-			bh->b_inode = &tmp;
++			set_buffer_inode(bh);
+ 			list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers);
+ 			if (buffer_dirty(bh)) {
+ 				get_bh(bh);
+@@ -1138,7 +1141,7 @@
+  */
+ static void __put_unused_buffer_head(struct buffer_head * bh)
+ {
+-	if (bh->b_inode)
++	if (buffer_inode(bh))
+ 		BUG();
+ 	if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
+ 		kmem_cache_free(bh_cachep, bh);
+diff -ruN linux-2.4.20/fs/jbd/journal.c linux/fs/jbd/journal.c
+--- linux-2.4.20/fs/jbd/journal.c	Fri Jan 10 16:35:27 2003
++++ linux/fs/jbd/journal.c	Wed Mar 26 12:53:19 2003
+@@ -1664,8 +1664,8 @@
+  *
+  * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
+  * is set.  This bit is tested in core kernel code where we need to take
+- * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
+- * there.
++ * JBD-specific actions.  Testing the zeroness of ->b_journal_head is not
++ * reliable there.
+  *
+  * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
+  *
+@@ -1720,9 +1720,9 @@
+ 
+ 		if (buffer_jbd(bh)) {
+ 			/* Someone did it for us! */
+-			J_ASSERT_BH(bh, bh->b_private != NULL);
++			J_ASSERT_BH(bh, bh->b_journal_head != NULL);
+ 			journal_free_journal_head(jh);
+-			jh = bh->b_private;
++			jh = bh->b_journal_head;
+ 		} else {
+ 			/*
+ 			 * We actually don't need jh_splice_lock when
+@@ -1730,7 +1730,7 @@
+ 			 */
+ 			spin_lock(&jh_splice_lock);
+ 			set_bit(BH_JBD, &bh->b_state);
+-			bh->b_private = jh;
++			bh->b_journal_head = jh;
+ 			jh->b_bh = bh;
+ 			atomic_inc(&bh->b_count);
+ 			spin_unlock(&jh_splice_lock);
+@@ -1739,7 +1739,7 @@
+ 	}
+ 	jh->b_jcount++;
+ 	spin_unlock(&journal_datalist_lock);
+-	return bh->b_private;
++	return bh->b_journal_head;
+ }
+ 
+ /*
+@@ -1772,7 +1772,7 @@
+ 			J_ASSERT_BH(bh, jh2bh(jh) == bh);
+ 			BUFFER_TRACE(bh, "remove journal_head");
+ 			spin_lock(&jh_splice_lock);
+-			bh->b_private = NULL;
++			bh->b_journal_head = NULL;
+ 			jh->b_bh = NULL;	/* debug, really */
+ 			clear_bit(BH_JBD, &bh->b_state);
+ 			__brelse(bh);
+diff -ruN linux-2.4.20/include/linux/fs.h linux/include/linux/fs.h
+--- linux-2.4.20/include/linux/fs.h	Fri Jan 10 16:35:55 2003
++++ linux/include/linux/fs.h	Wed Mar 26 12:53:19 2003
+@@ -220,6 +220,7 @@
+ 	BH_Wait_IO,	/* 1 if we should write out this buffer */
+ 	BH_Launder,	/* 1 if we can throttle on this buffer */
+ 	BH_JBD,		/* 1 if it has an attached journal_head */
++	BH_Inode,	/* 1 if it is attached to i_dirty[_data]_buffers */
+ 
+ 	BH_PrivateStart,/* not a state bit, but the first bit available
+ 			 * for private allocation by other entities
+@@ -262,11 +263,10 @@
+ 	struct page *b_page;		/* the page this bh is mapped to */
+ 	void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+  	void *b_private;		/* reserved for b_end_io */
+-
++ 	void *b_journal_head;		/* ext3 journal_heads */
+ 	unsigned long b_rsector;	/* Real buffer location on disk */
+ 	wait_queue_head_t b_wait;
+ 
+-	struct inode *	     b_inode;
+ 	struct list_head     b_inode_buffers;	/* doubly linked list of inode dirty buffers */
+ };
+ 
+@@ -1184,6 +1184,21 @@
+ 		set_bit(BH_Async, &bh->b_state);
+ 	else
+ 		clear_bit(BH_Async, &bh->b_state);
++}
++
++static inline void set_buffer_inode(struct buffer_head *bh)
++{
++	set_bit(BH_Inode, &bh->b_state);
++}
++
++static inline void clear_buffer_inode(struct buffer_head *bh)
++{
++	clear_bit(BH_Inode, &bh->b_state);
++}
++
++static inline int buffer_inode(struct buffer_head *bh)
++{
++	return test_bit(BH_Inode, &bh->b_state);
+ }
+ 
+ /*
+diff -ruN linux-2.4.20/include/linux/jbd.h linux/include/linux/jbd.h
+--- linux-2.4.20/include/linux/jbd.h	Fri Jan 10 16:35:55 2003
++++ linux/include/linux/jbd.h	Wed Mar 26 12:53:19 2003
+@@ -254,7 +254,7 @@
+ 
+ static inline struct journal_head *bh2jh(struct buffer_head *bh)
+ {
+-	return bh->b_private;
++	return bh->b_journal_head;
+ }
+ 
+ #define HAVE_JOURNAL_CALLBACK_STATUS
diff --git a/patches/common/linux-2.4.20-config.patch b/patches/common/linux-2.4.20-config.patch
new file mode 100644
index 0000000..c9a8dda
--- /dev/null
+++ b/patches/common/linux-2.4.20-config.patch
@@ -0,0 +1,53 @@
+diff -ruN linux-2.4.20/MAINTAINERS linux/MAINTAINERS
+--- linux-2.4.20/MAINTAINERS	Fri Jan 10 16:33:49 2003
++++ linux/MAINTAINERS	Wed Mar 26 12:53:19 2003
+@@ -439,6 +439,13 @@
+ W:	http://www.debian.org/~dz/i8k/
+ S:	Maintained
+ 
++DEVICE MAPPER
++P:	Joe Thornber
++M:	dm@uk.sistina.com
++L:	linux-LVM@sistina.com
++W:	http://www.sistina.com/lvm
++S:	Maintained
++
+ DEVICE NUMBER REGISTRY
+ P:	H. Peter Anvin
+ M:	hpa@zytor.com
+diff -ruN linux-2.4.20/drivers/md/Config.in linux/drivers/md/Config.in
+--- linux-2.4.20/drivers/md/Config.in	Fri Jan 10 16:34:50 2003
++++ linux/drivers/md/Config.in	Wed Mar 26 12:53:19 2003
+@@ -14,5 +14,8 @@
+ dep_tristate '  Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
+ 
+ dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++   dep_tristate ' Device-mapper support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM $CONFIG_MD
++fi
+ 
+ endmenu
+diff -ruN linux-2.4.20/Documentation/Configure.help linux/Documentation/Configure.help
+--- linux-2.4.20/Documentation/Configure.help	Fri Jan 10 16:33:55 2003
++++ linux/Documentation/Configure.help	Wed Mar 26 12:53:19 2003
+@@ -1822,6 +1822,20 @@
+   want), say M here and read <file:Documentation/modules.txt>.  The
+   module will be called lvm-mod.o.
+ 
++Device-mapper support
++CONFIG_BLK_DEV_DM
++  Device-mapper is a low level volume manager.  It works by allowing
++  people to specify mappings for ranges of logical sectors.  Various
++  mapping types are available, in addition people may write their own
++  modules containing custom mappings if they wish.
++
++  Higher level volume managers such as LVM2 use this driver.
++
++  If you want to compile this as a module, say M here and read 
++  <file:Documentation/modules.txt>.  The module will be called dm-mod.o.
++
++  If unsure, say N.
++
+ Multiple devices driver support (RAID and LVM)
+ CONFIG_MD
+   Support multiple physical spindles through a single logical device.
diff --git a/patches/common/linux-2.4.20-devmapper.patch b/patches/common/linux-2.4.20-devmapper.patch
new file mode 100644
index 0000000..18651ac
--- /dev/null
+++ b/patches/common/linux-2.4.20-devmapper.patch
@@ -0,0 +1,6683 @@
+diff -ruN linux-2.4.20/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c
+--- linux-2.4.20/drivers/md/dm-exception-store.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-exception-store.c	Wed Mar 26 14:21:16 2003
+@@ -0,0 +1,704 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm-snapshot.h"
++#include "kcopyd.h"
++
++#include <linux/mm.h>
++#include <linux/pagemap.h>
++#include <linux/vmalloc.h>
++#include <linux/slab.h>
++
++/*-----------------------------------------------------------------
++ * Persistent snapshots, by persistent we mean that the snapshot
++ * will survive a reboot.
++ *---------------------------------------------------------------*/
++
++/*
++ * We need to store a record of which parts of the origin have
++ * been copied to the snapshot device.  The snapshot code
++ * requires that we copy exception chunks to chunk aligned areas
++ * of the COW store.  It makes sense therefore, to store the
++ * metadata in chunk size blocks.
++ *
++ * There is no backward or forward compatibility implemented,
++ * snapshots with different disk versions than the kernel will
++ * not be usable.  It is expected that "lvcreate" will blank out
++ * the start of a fresh COW device before calling the snapshot
++ * constructor.
++ *
++ * The first chunk of the COW device just contains the header.
++ * After this there is a chunk filled with exception metadata,
++ * followed by as many exception chunks as can fit in the
++ * metadata areas.
++ *
++ * All on disk structures are in little-endian format.  The end
++ * of the exceptions info is indicated by an exception with a
++ * new_chunk of 0, which is invalid since it would point to the
++ * header chunk.
++ */
++
++/*
++ * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
++ */
++#define SNAP_MAGIC 0x70416e53
++
++/*
++ * The on-disk version of the metadata.
++ */
++#define SNAPSHOT_DISK_VERSION 1
++
++struct disk_header {
++	uint32_t magic;
++
++	/*
++	 * Is this snapshot valid.  There is no way of recovering
++	 * an invalid snapshot.
++	 */
++	int valid;
++
++	/*
++	 * Simple, incrementing version. no backward
++	 * compatibility.
++	 */
++	uint32_t version;
++
++	/* In sectors */
++	uint32_t chunk_size;
++};
++
++struct disk_exception {
++	uint64_t old_chunk;
++	uint64_t new_chunk;
++};
++
++struct commit_callback {
++	void (*callback) (void *, int success);
++	void *context;
++};
++
++/*
++ * The top level structure for a persistent exception store.
++ */
++struct pstore {
++	struct dm_snapshot *snap;	/* up pointer to my snapshot */
++	int version;
++	int valid;
++	uint32_t chunk_size;
++	uint32_t exceptions_per_area;
++
++	/*
++	 * Now that we have an asynchronous kcopyd there is no
++	 * need for large chunk sizes, so it wont hurt to have a
++	 * whole chunks worth of metadata in memory at once.
++	 */
++	void *area;
++	struct kiobuf *iobuf;
++
++	/*
++	 * Used to keep track of which metadata area the data in
++	 * 'chunk' refers to.
++	 */
++	uint32_t current_area;
++
++	/*
++	 * The next free chunk for an exception.
++	 */
++	uint32_t next_free;
++
++	/*
++	 * The index of next free exception in the current
++	 * metadata area.
++	 */
++	uint32_t current_committed;
++
++	atomic_t pending_count;
++	uint32_t callback_count;
++	struct commit_callback *callbacks;
++};
++
++/*
++ * For performance reasons we want to defer writing a committed
++ * exceptions metadata to disk so that we can amortise away this
++ * exensive operation.
++ *
++ * For the initial version of this code we will remain with
++ * synchronous io.  There are some deadlock issues with async
++ * that I haven't yet worked out.
++ */
++static int do_io(int rw, struct kcopyd_region *where, struct kiobuf *iobuf)
++{
++	int i, sectors_per_block, nr_blocks, start;
++	int blocksize = get_hardsect_size(where->dev);
++	int status;
++
++	sectors_per_block = blocksize / SECTOR_SIZE;
++
++	nr_blocks = where->count / sectors_per_block;
++	start = where->sector / sectors_per_block;
++
++	for (i = 0; i < nr_blocks; i++)
++		iobuf->blocks[i] = start++;
++
++	iobuf->length = where->count << 9;
++	iobuf->locked = 1;
++
++	status = brw_kiovec(rw, 1, &iobuf, where->dev, iobuf->blocks,
++			    blocksize);
++	if (status != (where->count << 9))
++		return -EIO;
++
++	return 0;
++}
++
++static int allocate_iobuf(struct pstore *ps)
++{
++	size_t i, r = -ENOMEM, len, nr_pages;
++	struct page *page;
++
++	len = ps->chunk_size << SECTOR_SHIFT;
++
++	/*
++	 * Allocate the chunk_size block of memory that will hold
++	 * a single metadata area.
++	 */
++	ps->area = vmalloc(len);
++	if (!ps->area)
++		return r;
++
++	if (alloc_kiovec(1, &ps->iobuf))
++		goto bad;
++
++	nr_pages = ps->chunk_size / (PAGE_SIZE / SECTOR_SIZE);
++	r = expand_kiobuf(ps->iobuf, nr_pages);
++	if (r)
++		goto bad;
++
++	/*
++	 * We lock the pages for ps->area into memory since they'll be
++	 * doing a lot of io.
++	 */
++	for (i = 0; i < nr_pages; i++) {
++		page = vmalloc_to_page(ps->area + (i * PAGE_SIZE));
++		LockPage(page);
++		ps->iobuf->maplist[i] = page;
++		ps->iobuf->nr_pages++;
++	}
++
++	ps->iobuf->nr_pages = nr_pages;
++	ps->iobuf->offset = 0;
++
++	return 0;
++
++      bad:
++	if (ps->iobuf)
++		free_kiovec(1, &ps->iobuf);
++
++	if (ps->area)
++		vfree(ps->area);
++	ps->iobuf = NULL;
++	return r;
++}
++
++static void free_iobuf(struct pstore *ps)
++{
++	int i;
++
++	for (i = 0; i < ps->iobuf->nr_pages; i++)
++		UnlockPage(ps->iobuf->maplist[i]);
++	ps->iobuf->locked = 0;
++
++	free_kiovec(1, &ps->iobuf);
++	vfree(ps->area);
++}
++
++/*
++ * Read or write a chunk aligned and sized block of data from a device.
++ */
++static int chunk_io(struct pstore *ps, uint32_t chunk, int rw)
++{
++	int r;
++	struct kcopyd_region where;
++
++	where.dev = ps->snap->cow->dev;
++	where.sector = ps->chunk_size * chunk;
++	where.count = ps->chunk_size;
++
++	r = do_io(rw, &where, ps->iobuf);
++	if (r)
++		return r;
++
++	return 0;
++}
++
++/*
++ * Read or write a metadata area.  Remembering to skip the first
++ * chunk which holds the header.
++ */
++static int area_io(struct pstore *ps, uint32_t area, int rw)
++{
++	int r;
++	uint32_t chunk;
++
++	/* convert a metadata area index to a chunk index */
++	chunk = 1 + ((ps->exceptions_per_area + 1) * area);
++
++	r = chunk_io(ps, chunk, rw);
++	if (r)
++		return r;
++
++	ps->current_area = area;
++	return 0;
++}
++
++static int zero_area(struct pstore *ps, uint32_t area)
++{
++	memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
++	return area_io(ps, area, WRITE);
++}
++
++static int read_header(struct pstore *ps, int *new_snapshot)
++{
++	int r;
++	struct disk_header *dh;
++
++	r = chunk_io(ps, 0, READ);
++	if (r)
++		return r;
++
++	dh = (struct disk_header *) ps->area;
++
++	if (dh->magic == 0) {
++		*new_snapshot = 1;
++
++	} else if (dh->magic == SNAP_MAGIC) {
++		*new_snapshot = 0;
++		ps->valid = dh->valid;
++		ps->version = dh->version;
++		ps->chunk_size = dh->chunk_size;
++
++	} else {
++		DMWARN("Invalid/corrupt snapshot");
++		r = -ENXIO;
++	}
++
++	return r;
++}
++
++static int write_header(struct pstore *ps)
++{
++	struct disk_header *dh;
++
++	memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
++
++	dh = (struct disk_header *) ps->area;
++	dh->magic = SNAP_MAGIC;
++	dh->valid = ps->valid;
++	dh->version = ps->version;
++	dh->chunk_size = ps->chunk_size;
++
++	return chunk_io(ps, 0, WRITE);
++}
++
++/*
++ * Access functions for the disk exceptions, these do the endian conversions.
++ */
++static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
++{
++	if (index >= ps->exceptions_per_area)
++		return NULL;
++
++	return ((struct disk_exception *) ps->area) + index;
++}
++
++static int read_exception(struct pstore *ps,
++			  uint32_t index, struct disk_exception *result)
++{
++	struct disk_exception *e;
++
++	e = get_exception(ps, index);
++	if (!e)
++		return -EINVAL;
++
++	/* copy it */
++	result->old_chunk = le64_to_cpu(e->old_chunk);
++	result->new_chunk = le64_to_cpu(e->new_chunk);
++
++	return 0;
++}
++
++static int write_exception(struct pstore *ps,
++			   uint32_t index, struct disk_exception *de)
++{
++	struct disk_exception *e;
++
++	e = get_exception(ps, index);
++	if (!e)
++		return -EINVAL;
++
++	/* copy it */
++	e->old_chunk = cpu_to_le64(de->old_chunk);
++	e->new_chunk = cpu_to_le64(de->new_chunk);
++
++	return 0;
++}
++
++/*
++ * Registers the exceptions that are present in the current area.
++ * 'full' is filled in to indicate if the area has been
++ * filled.
++ */
++static int insert_exceptions(struct pstore *ps, int *full)
++{
++	int i, r;
++	struct disk_exception de;
++
++	/* presume the area is full */
++	*full = 1;
++
++	for (i = 0; i < ps->exceptions_per_area; i++) {
++		r = read_exception(ps, i, &de);
++
++		if (r)
++			return r;
++
++		/*
++		 * If the new_chunk is pointing at the start of
++		 * the COW device, where the first metadata area
++		 * is we know that we've hit the end of the
++		 * exceptions.  Therefore the area is not full.
++		 */
++		if (de.new_chunk == 0LL) {
++			ps->current_committed = i;
++			*full = 0;
++			break;
++		}
++
++		/*
++		 * Keep track of the start of the free chunks.
++		 */
++		if (ps->next_free <= de.new_chunk)
++			ps->next_free = de.new_chunk + 1;
++
++		/*
++		 * Otherwise we add the exception to the snapshot.
++		 */
++		r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
++		if (r)
++			return r;
++	}
++
++	return 0;
++}
++
++static int read_exceptions(struct pstore *ps)
++{
++	uint32_t area;
++	int r, full = 1;
++
++	/*
++	 * Keeping reading chunks and inserting exceptions until
++	 * we find a partially full area.
++	 */
++	for (area = 0; full; area++) {
++		r = area_io(ps, area, READ);
++		if (r)
++			return r;
++
++		r = insert_exceptions(ps, &full);
++		if (r)
++			return r;
++
++		area++;
++	}
++
++	return 0;
++}
++
++static inline struct pstore *get_info(struct exception_store *store)
++{
++	return (struct pstore *) store->context;
++}
++
++static int persistent_percentfull(struct exception_store *store)
++{
++	struct pstore *ps = get_info(store);
++	return (ps->next_free * store->snap->chunk_size * 100) /
++	    get_dev_size(store->snap->cow->dev);
++}
++
++static void persistent_destroy(struct exception_store *store)
++{
++	struct pstore *ps = get_info(store);
++
++	vfree(ps->callbacks);
++	free_iobuf(ps);
++	kfree(ps);
++}
++
++static int persistent_prepare(struct exception_store *store,
++			      struct exception *e)
++{
++	struct pstore *ps = get_info(store);
++	uint32_t stride;
++	sector_t size = get_dev_size(store->snap->cow->dev);
++
++	/* Is there enough room ? */
++	if (size < ((ps->next_free + 1) * store->snap->chunk_size))
++		return -ENOSPC;
++
++	e->new_chunk = ps->next_free;
++
++	/*
++	 * Move onto the next free pending, making sure to take
++	 * into account the location of the metadata chunks.
++	 */
++	stride = (ps->exceptions_per_area + 1);
++	if ((++ps->next_free % stride) == 1)
++		ps->next_free++;
++
++	atomic_inc(&ps->pending_count);
++	return 0;
++}
++
++static void persistent_commit(struct exception_store *store,
++			      struct exception *e,
++			      void (*callback) (void *, int success),
++			      void *callback_context)
++{
++	int r, i;
++	struct pstore *ps = get_info(store);
++	struct disk_exception de;
++	struct commit_callback *cb;
++
++	de.old_chunk = e->old_chunk;
++	de.new_chunk = e->new_chunk;
++	write_exception(ps, ps->current_committed++, &de);
++
++	/*
++	 * Add the callback to the back of the array.  This code
++	 * is the only place where the callback array is
++	 * manipulated, and we know that it will never be called
++	 * multiple times concurrently.
++	 */
++	cb = ps->callbacks + ps->callback_count++;
++	cb->callback = callback;
++	cb->context = callback_context;
++
++	/*
++	 * If there are no more exceptions in flight, or we have
++	 * filled this metadata area we commit the exceptions to
++	 * disk.
++	 */
++	if (atomic_dec_and_test(&ps->pending_count) ||
++	    (ps->current_committed == ps->exceptions_per_area)) {
++		r = area_io(ps, ps->current_area, WRITE);
++		if (r)
++			ps->valid = 0;
++
++		for (i = 0; i < ps->callback_count; i++) {
++			cb = ps->callbacks + i;
++			cb->callback(cb->context, r == 0 ? 1 : 0);
++		}
++
++		ps->callback_count = 0;
++	}
++
++	/*
++	 * Have we completely filled the current area ?
++	 */
++	if (ps->current_committed == ps->exceptions_per_area) {
++		ps->current_committed = 0;
++		r = zero_area(ps, ps->current_area + 1);
++		if (r)
++			ps->valid = 0;
++	}
++}
++
++static void persistent_drop(struct exception_store *store)
++{
++	struct pstore *ps = get_info(store);
++
++	ps->valid = 0;
++	if (write_header(ps))
++		DMWARN("write header failed");
++}
++
++int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
++{
++	int r, new_snapshot;
++	struct pstore *ps;
++
++	/* allocate the pstore */
++	ps = kmalloc(sizeof(*ps), GFP_KERNEL);
++	if (!ps)
++		return -ENOMEM;
++
++	ps->snap = store->snap;
++	ps->valid = 1;
++	ps->version = SNAPSHOT_DISK_VERSION;
++	ps->chunk_size = chunk_size;
++	ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
++	    sizeof(struct disk_exception);
++	ps->next_free = 2;	/* skipping the header and first area */
++	ps->current_committed = 0;
++
++	r = allocate_iobuf(ps);
++	if (r)
++		goto bad;
++
++	/*
++	 * Allocate space for all the callbacks.
++	 */
++	ps->callback_count = 0;
++	atomic_set(&ps->pending_count, 0);
++	ps->callbacks = vcalloc(ps->exceptions_per_area,
++				sizeof(*ps->callbacks));
++
++	if (!ps->callbacks)
++		goto bad;
++
++	/*
++	 * Read the snapshot header.
++	 */
++	r = read_header(ps, &new_snapshot);
++	if (r)
++		goto bad;
++
++	/*
++	 * Do we need to setup a new snapshot ?
++	 */
++	if (new_snapshot) {
++		r = write_header(ps);
++		if (r) {
++			DMWARN("write_header failed");
++			goto bad;
++		}
++
++		r = zero_area(ps, 0);
++		if (r) {
++			DMWARN("zero_area(0) failed");
++			goto bad;
++		}
++
++	} else {
++		/*
++		 * Sanity checks.
++		 */
++		if (!ps->valid) {
++			DMWARN("snapshot is marked invalid");
++			r = -EINVAL;
++			goto bad;   
++		}
++
++		if (ps->chunk_size != chunk_size) {
++			DMWARN("chunk size for existing snapshot different "
++			       "from that requested");
++			r = -EINVAL;
++			goto bad;
++		}
++
++		if (ps->version != SNAPSHOT_DISK_VERSION) {
++			DMWARN("unable to handle snapshot disk version %d",
++			       ps->version);
++			r = -EINVAL;
++			goto bad;
++		}
++
++		/*
++		 * Read the metadata.
++		 */
++		r = read_exceptions(ps);
++		if (r)
++			goto bad;
++	}
++
++	store->destroy = persistent_destroy;
++	store->prepare_exception = persistent_prepare;
++	store->commit_exception = persistent_commit;
++	store->drop_snapshot = persistent_drop;
++	store->percent_full = persistent_percentfull;
++	store->context = ps;
++
++	return r;
++
++      bad:
++	if (ps) {
++		if (ps->callbacks)
++			vfree(ps->callbacks);
++
++		if (ps->iobuf)
++			free_iobuf(ps);
++
++		kfree(ps);
++	}
++	return r;
++}
++
++/*-----------------------------------------------------------------
++ * Implementation of the store for non-persistent snapshots.
++ *---------------------------------------------------------------*/
++struct transient_c {
++	sector_t next_free;
++};
++
++void transient_destroy(struct exception_store *store)
++{
++	kfree(store->context);
++}
++
++int transient_prepare(struct exception_store *store, struct exception *e)
++{
++	struct transient_c *tc = (struct transient_c *) store->context;
++	sector_t size = get_dev_size(store->snap->cow->dev);
++
++	if (size < (tc->next_free + store->snap->chunk_size))
++		return -1;
++
++	e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
++	tc->next_free += store->snap->chunk_size;
++
++	return 0;
++}
++
++void transient_commit(struct exception_store *store,
++		      struct exception *e,
++		      void (*callback) (void *, int success),
++		      void *callback_context)
++{
++	/* Just succeed */
++	callback(callback_context, 1);
++}
++
++static int transient_percentfull(struct exception_store *store)
++{
++	struct transient_c *tc = (struct transient_c *) store->context;
++	return (tc->next_free * 100) / get_dev_size(store->snap->cow->dev);
++}
++
++int dm_create_transient(struct exception_store *store,
++			struct dm_snapshot *s, int blocksize)
++{
++	struct transient_c *tc;
++
++	memset(store, 0, sizeof(*store));
++	store->destroy = transient_destroy;
++	store->prepare_exception = transient_prepare;
++	store->commit_exception = transient_commit;
++	store->percent_full = transient_percentfull;
++	store->snap = s;
++
++	tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
++	if (!tc)
++		return -ENOMEM;
++
++	tc->next_free = 0;
++	store->context = tc;
++
++	return 0;
++}
+diff -ruN linux-2.4.20/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c
+--- linux-2.4.20/drivers/md/dm-ioctl.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-ioctl.c	Wed Mar 26 14:34:50 2003
+@@ -0,0 +1,1160 @@
++/*
++ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/miscdevice.h>
++#include <linux/dm-ioctl.h>
++#include <linux/init.h>
++#include <linux/wait.h>
++#include <linux/blk.h>
++#include <linux/slab.h>
++
++#include <asm/uaccess.h>
++
++#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
++
++/*-----------------------------------------------------------------
++ * The ioctl interface needs to be able to look up devices by
++ * name or uuid.
++ *---------------------------------------------------------------*/
++struct hash_cell {
++	struct list_head name_list;
++	struct list_head uuid_list;
++
++	char *name;
++	char *uuid;
++	struct mapped_device *md;
++
++	/* I hate devfs */
++	devfs_handle_t devfs_entry;
++};
++
++#define NUM_BUCKETS 64
++#define MASK_BUCKETS (NUM_BUCKETS - 1)
++static struct list_head _name_buckets[NUM_BUCKETS];
++static struct list_head _uuid_buckets[NUM_BUCKETS];
++
++static devfs_handle_t _dev_dir;
++void dm_hash_remove_all(void);
++
++/*
++ * Guards access to all three tables.
++ */
++static DECLARE_RWSEM(_hash_lock);
++
++static void init_buckets(struct list_head *buckets)
++{
++	unsigned int i;
++
++	for (i = 0; i < NUM_BUCKETS; i++)
++		INIT_LIST_HEAD(buckets + i);
++}
++
++int dm_hash_init(void)
++{
++	init_buckets(_name_buckets);
++	init_buckets(_uuid_buckets);
++	_dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
++	return 0;
++}
++
++void dm_hash_exit(void)
++{
++	dm_hash_remove_all();
++	devfs_unregister(_dev_dir);
++}
++
++/*-----------------------------------------------------------------
++ * Hash function:
++ * We're not really concerned with the str hash function being
++ * fast since it's only used by the ioctl interface.
++ *---------------------------------------------------------------*/
++static unsigned int hash_str(const char *str)
++{
++	const unsigned int hash_mult = 2654435387U;
++	unsigned int h = 0;
++
++	while (*str)
++		h = (h + (unsigned int) *str++) * hash_mult;
++
++	return h & MASK_BUCKETS;
++}
++
++/*-----------------------------------------------------------------
++ * Code for looking up a device by name
++ *---------------------------------------------------------------*/
++static struct hash_cell *__get_name_cell(const char *str)
++{
++	struct list_head *tmp;
++	struct hash_cell *hc;
++	unsigned int h = hash_str(str);
++
++	list_for_each(tmp, _name_buckets + h) {
++		hc = list_entry(tmp, struct hash_cell, name_list);
++		if (!strcmp(hc->name, str))
++			return hc;
++	}
++
++	return NULL;
++}
++
++static struct hash_cell *__get_uuid_cell(const char *str)
++{
++	struct list_head *tmp;
++	struct hash_cell *hc;
++	unsigned int h = hash_str(str);
++
++	list_for_each(tmp, _uuid_buckets + h) {
++		hc = list_entry(tmp, struct hash_cell, uuid_list);
++		if (!strcmp(hc->uuid, str))
++			return hc;
++	}
++
++	return NULL;
++}
++
++/*-----------------------------------------------------------------
++ * Inserting, removing and renaming a device.
++ *---------------------------------------------------------------*/
++static inline char *kstrdup(const char *str)
++{
++	char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
++	if (r)
++		strcpy(r, str);
++	return r;
++}
++
++static struct hash_cell *alloc_cell(const char *name, const char *uuid,
++				    struct mapped_device *md)
++{
++	struct hash_cell *hc;
++
++	hc = kmalloc(sizeof(*hc), GFP_KERNEL);
++	if (!hc)
++		return NULL;
++
++	hc->name = kstrdup(name);
++	if (!hc->name) {
++		kfree(hc);
++		return NULL;
++	}
++
++	if (!uuid)
++		hc->uuid = NULL;
++
++	else {
++		hc->uuid = kstrdup(uuid);
++		if (!hc->uuid) {
++			kfree(hc->name);
++			kfree(hc);
++			return NULL;
++		}
++	}
++
++	INIT_LIST_HEAD(&hc->name_list);
++	INIT_LIST_HEAD(&hc->uuid_list);
++	hc->md = md;
++	return hc;
++}
++
++static void free_cell(struct hash_cell *hc)
++{
++	if (hc) {
++		kfree(hc->name);
++		kfree(hc->uuid);
++		kfree(hc);
++	}
++}
++
++/*
++ * devfs stuff.
++ */
++static int register_with_devfs(struct hash_cell *hc)
++{
++	kdev_t dev = dm_kdev(hc->md);
++
++	hc->devfs_entry =
++	    devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER,
++			   major(dev), minor(dev),
++			   S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
++			   &dm_blk_dops, NULL);
++
++	return 0;
++}
++
++static int unregister_with_devfs(struct hash_cell *hc)
++{
++	devfs_unregister(hc->devfs_entry);
++	return 0;
++}
++
++/*
++ * The kdev_t and uuid of a device can never change once it is
++ * initially inserted.
++ */
++int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
++{
++	struct hash_cell *cell;
++
++	/*
++	 * Allocate the new cells.
++	 */
++	cell = alloc_cell(name, uuid, md);
++	if (!cell)
++		return -ENOMEM;
++
++	/*
++	 * Insert the cell into all three hash tables.
++	 */
++	down_write(&_hash_lock);
++	if (__get_name_cell(name))
++		goto bad;
++
++	list_add(&cell->name_list, _name_buckets + hash_str(name));
++
++	if (uuid) {
++		if (__get_uuid_cell(uuid)) {
++			list_del(&cell->name_list);
++			goto bad;
++		}
++		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
++	}
++	register_with_devfs(cell);
++	dm_get(md);
++	up_write(&_hash_lock);
++
++	return 0;
++
++      bad:
++	up_write(&_hash_lock);
++	free_cell(cell);
++	return -EBUSY;
++}
++
++void __hash_remove(struct hash_cell *hc)
++{
++	/* remove from the dev hash */
++	list_del(&hc->uuid_list);
++	list_del(&hc->name_list);
++	unregister_with_devfs(hc);
++	dm_put(hc->md);
++}
++
++void dm_hash_remove_all(void)
++{
++	int i;
++	struct hash_cell *hc;
++	struct list_head *tmp, *n;
++
++	down_write(&_hash_lock);
++	for (i = 0; i < NUM_BUCKETS; i++) {
++		list_for_each_safe(tmp, n, _name_buckets + i) {
++			hc = list_entry(tmp, struct hash_cell, name_list);
++			__hash_remove(hc);
++		}
++	}
++	up_write(&_hash_lock);
++}
++
++int dm_hash_rename(const char *old, const char *new)
++{
++	char *new_name, *old_name;
++	struct hash_cell *hc;
++
++	/*
++	 * duplicate new.
++	 */
++	new_name = kstrdup(new);
++	if (!new_name)
++		return -ENOMEM;
++
++	down_write(&_hash_lock);
++
++	/*
++	 * Is new free ?
++	 */
++	hc = __get_name_cell(new);
++	if (hc) {
++		DMWARN("asked to rename to an already existing name %s -> %s",
++		       old, new);
++		up_write(&_hash_lock);
++		return -EBUSY;
++	}
++
++	/*
++	 * Is there such a device as 'old' ?
++	 */
++	hc = __get_name_cell(old);
++	if (!hc) {
++		DMWARN("asked to rename a non existent device %s -> %s",
++		       old, new);
++		up_write(&_hash_lock);
++		return -ENXIO;
++	}
++
++	/*
++	 * rename and move the name cell.
++	 */
++	list_del(&hc->name_list);
++	old_name = hc->name;
++	hc->name = new_name;
++	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
++
++	/* rename the device node in devfs */
++	unregister_with_devfs(hc);
++	register_with_devfs(hc);
++
++	up_write(&_hash_lock);
++	kfree(old_name);
++	return 0;
++}
++
++
++/*-----------------------------------------------------------------
++ * Implementation of the ioctl commands
++ *---------------------------------------------------------------*/
++
++/*
++ * All the ioctl commands get dispatched to functions with this
++ * prototype.
++ */
++typedef int (*ioctl_fn)(struct dm_ioctl *param, struct dm_ioctl *user);
++
++/*
++ * Check a string doesn't overrun the chunk of
++ * memory we copied from userland.
++ */
++static int valid_str(char *str, void *begin, void *end)
++{
++	while (((void *) str >= begin) && ((void *) str < end))
++		if (!*str++)
++			return 0;
++
++	return -EINVAL;
++}
++
++static int next_target(struct dm_target_spec *last, uint32_t next,
++		       void *begin, void *end,
++		       struct dm_target_spec **spec, char **params)
++{
++	*spec = (struct dm_target_spec *)
++	    ((unsigned char *) last + next);
++	*params = (char *) (*spec + 1);
++
++	if (*spec < (last + 1) || ((void *) *spec > end))
++		return -EINVAL;
++
++	return valid_str(*params, begin, end);
++}
++
++static int populate_table(struct dm_table *table, struct dm_ioctl *args)
++{
++	int i = 0, r, first = 1;
++	struct dm_target_spec *spec;
++	char *params;
++	void *begin, *end;
++
++	if (!args->target_count) {
++		DMWARN("populate_table: no targets specified");
++		return -EINVAL;
++	}
++
++	begin = (void *) args;
++	end = begin + args->data_size;
++
++	for (i = 0; i < args->target_count; i++) {
++
++		if (first)
++			r = next_target((struct dm_target_spec *) args,
++					args->data_start,
++					begin, end, &spec, &params);
++		else
++			r = next_target(spec, spec->next, begin, end,
++					&spec, &params);
++
++		if (r) {
++			DMWARN("unable to find target");
++			return -EINVAL;
++		}
++
++		r = dm_table_add_target(table, spec->target_type,
++					spec->sector_start, spec->length,
++					params);
++		if (r) {
++			DMWARN("error adding target to table");
++			return -EINVAL;
++		}
++
++		first = 0;
++	}
++
++	return dm_table_complete(table);
++}
++
++/*
++ * Round up the ptr to the next 'align' boundary.  Obviously
++ * 'align' must be a power of 2.
++ */
++static inline void *align_ptr(void *ptr, unsigned int align)
++{
++	align--;
++	return (void *) (((unsigned long) (ptr + align)) & ~align);
++}
++
++/*
++ * Copies a dm_ioctl and an optional additional payload to
++ * userland.
++ */
++static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param,
++			   void *data, uint32_t len)
++{
++	int r;
++	void *ptr = NULL;
++
++	if (data) {
++		ptr = align_ptr(user + 1, sizeof(unsigned long));
++		param->data_start = ptr - (void *) user;
++	}
++
++	/*
++	 * The version number has already been filled in, so we
++	 * just copy later fields.
++	 */
++	r = copy_to_user(&user->data_size, &param->data_size,
++			 sizeof(*param) - sizeof(param->version));
++	if (r)
++		return -EFAULT;
++
++	if (data) {
++		if (param->data_start + len > param->data_size)
++			return -ENOSPC;
++
++		if (copy_to_user(ptr, data, len))
++			r = -EFAULT;
++	}
++
++	return r;
++}
++
++/*
++ * Fills in a dm_ioctl structure, ready for sending back to
++ * userland.
++ */
++static int __info(struct mapped_device *md, struct dm_ioctl *param)
++{
++	kdev_t dev = dm_kdev(md);
++	struct dm_table *table;
++	struct block_device *bdev;
++
++	param->flags = DM_EXISTS_FLAG;
++	if (dm_suspended(md))
++		param->flags |= DM_SUSPEND_FLAG;
++
++	param->dev = kdev_t_to_nr(dev);
++	bdev = bdget(param->dev);
++	if (!bdev)
++		return -ENXIO;
++
++	param->open_count = bdev->bd_openers;
++	bdput(bdev);
++
++	if (is_read_only(dev))
++		param->flags |= DM_READONLY_FLAG;
++
++	table = dm_get_table(md);
++	param->target_count = dm_table_get_num_targets(table);
++	dm_table_put(table);
++
++	return 0;
++}
++
++/*
++ * Always use UUID for lookups if it's present, otherwise use name.
++ */
++static inline struct mapped_device *find_device(struct dm_ioctl *param)
++{
++	struct hash_cell *hc;
++	struct mapped_device *md = NULL;
++
++	down_read(&_hash_lock);
++	hc = *param->uuid ? __get_uuid_cell(param->uuid) :
++	    __get_name_cell(param->name);
++	if (hc) {
++		md = hc->md;
++
++		/*
++		 * Sneakily write in both the name and the uuid
++		 * while we have the cell.
++		 */
++		strncpy(param->name, hc->name, sizeof(param->name));
++		if (hc->uuid)
++			strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1);
++		else
++			param->uuid[0] = '\0';
++
++		dm_get(md);
++	}
++	up_read(&_hash_lock);
++
++	return md;
++}
++
++#define ALIGNMENT sizeof(int)
++static void *_align(void *ptr, unsigned int a)
++{
++	register unsigned long align = --a;
++
++	return (void *) (((unsigned long) ptr + align) & ~align);
++}
++
++/*
++ * Copies device info back to user space, used by
++ * the create and info ioctls.
++ */
++static int info(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	struct mapped_device *md;
++
++	param->flags = 0;
++
++	md = find_device(param);
++	if (!md)
++		/*
++		 * Device not found - returns cleared exists flag.
++		 */
++		goto out;
++
++	__info(md, param);
++	dm_put(md);
++
++      out:
++	return results_to_user(user, param, NULL, 0);
++}
++
++static inline int get_mode(struct dm_ioctl *param)
++{
++	int mode = FMODE_READ | FMODE_WRITE;
++
++	if (param->flags & DM_READONLY_FLAG)
++		mode = FMODE_READ;
++
++	return mode;
++}
++
++static int check_name(const char *name)
++{
++	if (strchr(name, '/')) {
++		DMWARN("invalid device name");
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int create(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int r;
++	kdev_t dev;
++	struct dm_table *t;
++	struct mapped_device *md;
++	int minor;
++
++	r = check_name(param->name);
++	if (r)
++		return r;
++
++	r = dm_table_create(&t, get_mode(param));
++	if (r)
++		return r;
++
++	r = populate_table(t, param);
++	if (r) {
++		dm_table_put(t);
++		return r;
++	}
++
++	minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ?
++	    minor(to_kdev_t(param->dev)) : -1;
++
++	r = dm_create(minor, t, &md);
++	if (r) {
++		dm_table_put(t);
++		return r;
++	}
++	dm_table_put(t);	/* md will have grabbed its own reference */
++
++	dev = dm_kdev(md);
++	set_device_ro(dev, (param->flags & DM_READONLY_FLAG));
++	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
++	dm_put(md);
++
++	return r ? r : info(param, user);
++}
++
++/*
++ * Build up the status struct for each target
++ */
++static int __status(struct mapped_device *md, struct dm_ioctl *param,
++		    char *outbuf, int *len)
++{
++	int i, num_targets;
++	struct dm_target_spec *spec;
++	char *outptr;
++	status_type_t type;
++	struct dm_table *table = dm_get_table(md);
++
++	if (param->flags & DM_STATUS_TABLE_FLAG)
++		type = STATUSTYPE_TABLE;
++	else
++		type = STATUSTYPE_INFO;
++
++	outptr = outbuf;
++
++	/* Get all the target info */
++	num_targets = dm_table_get_num_targets(table);
++	for (i = 0; i < num_targets; i++) {
++		struct dm_target *ti = dm_table_get_target(table, i);
++
++		if (outptr - outbuf +
++		    sizeof(struct dm_target_spec) > param->data_size) {
++			dm_table_put(table);
++			return -ENOMEM;
++		}
++
++		spec = (struct dm_target_spec *) outptr;
++
++		spec->status = 0;
++		spec->sector_start = ti->begin;
++		spec->length = ti->len;
++		strncpy(spec->target_type, ti->type->name,
++			sizeof(spec->target_type));
++
++		outptr += sizeof(struct dm_target_spec);
++
++		/* Get the status/table string from the target driver */
++		if (ti->type->status)
++			ti->type->status(ti, type, outptr,
++					 outbuf + param->data_size - outptr);
++		else
++			outptr[0] = '\0';
++
++		outptr += strlen(outptr) + 1;
++		_align(outptr, ALIGNMENT);
++		spec->next = outptr - outbuf;
++	}
++
++	param->target_count = num_targets;
++	*len = outptr - outbuf;
++	dm_table_put(table);
++
++	return 0;
++}
++
++/*
++ * Return the status of a device as a text string for each
++ * target.
++ */
++static int get_status(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	struct mapped_device *md;
++	int len = 0;
++	int ret;
++	char *outbuf = NULL;
++
++	md = find_device(param);
++	if (!md)
++		/*
++		 * Device not found - returns cleared exists flag.
++		 */
++		goto out;
++
++	/* We haven't a clue how long the resultant data will be so
++	   just allocate as much as userland has allowed us and make sure
++	   we don't overun it */
++	outbuf = kmalloc(param->data_size, GFP_KERNEL);
++	if (!outbuf)
++		goto out;
++	/*
++	 * Get the status of all targets
++	 */
++	__status(md, param, outbuf, &len);
++
++	/*
++	 * Setup the basic dm_ioctl structure.
++	 */
++	__info(md, param);
++
++      out:
++	if (md)
++		dm_put(md);
++
++	ret = results_to_user(user, param, outbuf, len);
++
++	if (outbuf)
++		kfree(outbuf);
++
++	return ret;
++}
++
++/*
++ * Wait for a device to report an event
++ */
++static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	struct mapped_device *md;
++	struct dm_table *table;
++	DECLARE_WAITQUEUE(wq, current);
++
++	md = find_device(param);
++	if (!md)
++		/*
++		 * Device not found - returns cleared exists flag.
++		 */
++		goto out;
++
++	/*
++	 * Setup the basic dm_ioctl structure.
++	 */
++	__info(md, param);
++
++	/*
++	 * Wait for a notification event
++	 */
++	set_current_state(TASK_INTERRUPTIBLE);
++	table = dm_get_table(md);
++	dm_table_add_wait_queue(table, &wq);
++	dm_table_put(table);
++	dm_put(md);
++
++	yield();
++	set_current_state(TASK_RUNNING);
++
++      out:
++	return results_to_user(user, param, NULL, 0);
++}
++
++/*
++ * Retrieves a list of devices used by a particular dm device.
++ */
++static int dep(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int count, r;
++	struct mapped_device *md;
++	struct list_head *tmp;
++	size_t len = 0;
++	struct dm_target_deps *deps = NULL;
++	struct dm_table *table;
++
++	md = find_device(param);
++	if (!md)
++		goto out;
++	table = dm_get_table(md);
++
++	/*
++	 * Setup the basic dm_ioctl structure.
++	 */
++	__info(md, param);
++
++	/*
++	 * Count the devices.
++	 */
++	count = 0;
++	list_for_each(tmp, dm_table_get_devices(table))
++	    count++;
++
++	/*
++	 * Allocate a kernel space version of the dm_target_status
++	 * struct.
++	 */
++	if (array_too_big(sizeof(*deps), sizeof(*deps->dev), count)) {
++		dm_table_put(table);
++		dm_put(md);
++		return -ENOMEM;
++	}
++
++	len = sizeof(*deps) + (sizeof(*deps->dev) * count);
++	deps = kmalloc(len, GFP_KERNEL);
++	if (!deps) {
++		dm_table_put(table);
++		dm_put(md);
++		return -ENOMEM;
++	}
++
++	/*
++	 * Fill in the devices.
++	 */
++	deps->count = count;
++	count = 0;
++	list_for_each(tmp, dm_table_get_devices(table)) {
++		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++		deps->dev[count++] = dd->bdev->bd_dev;
++	}
++	dm_table_put(table);
++	dm_put(md);
++
++      out:
++	r = results_to_user(user, param, deps, len);
++
++	kfree(deps);
++	return r;
++}
++
++static int remove(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	struct hash_cell *hc;
++
++	down_write(&_hash_lock);
++	hc = *param->uuid ? __get_uuid_cell(param->uuid) :
++	    __get_name_cell(param->name);
++	if (!hc) {
++		DMWARN("device doesn't appear to be in the dev hash table.");
++		up_write(&_hash_lock);
++		return -EINVAL;
++	}
++
++	/*
++	 * You may ask the interface to drop its reference to an
++	 * in use device.  This is no different to unlinking a
++	 * file that someone still has open.  The device will not
++	 * actually be destroyed until the last opener closes it.
++	 * The name and uuid of the device (both are interface
++	 * properties) will be available for reuse immediately.
++	 *
++	 * You don't want to drop a _suspended_ device from the
++	 * interface, since that will leave you with no way of
++	 * resuming it.
++	 */
++	if (dm_suspended(hc->md)) {
++		DMWARN("refusing to remove a suspended device.");
++		up_write(&_hash_lock);
++		return -EPERM;
++	}
++
++	__hash_remove(hc);
++	up_write(&_hash_lock);
++	return 0;
++}
++
++static int remove_all(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	dm_hash_remove_all();
++	return 0;
++}
++
++static int suspend(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int r;
++	struct mapped_device *md;
++
++	md = find_device(param);
++	if (!md)
++		return -ENXIO;
++
++	if (param->flags & DM_SUSPEND_FLAG)
++		r = dm_suspend(md);
++	else
++		r = dm_resume(md);
++
++	dm_put(md);
++	return r;
++}
++
++static int reload(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int r;
++	kdev_t dev;
++	struct mapped_device *md;
++	struct dm_table *t;
++
++	r = dm_table_create(&t, get_mode(param));
++	if (r)
++		return r;
++
++	r = populate_table(t, param);
++	if (r) {
++		dm_table_put(t);
++		return r;
++	}
++
++	md = find_device(param);
++	if (!md) {
++		dm_table_put(t);
++		return -ENXIO;
++	}
++
++	r = dm_swap_table(md, t);
++	if (r) {
++		dm_put(md);
++		dm_table_put(t);
++		return r;
++	}
++	dm_table_put(t);	/* md will have taken its own reference */
++
++	dev = dm_kdev(md);
++	set_device_ro(dev, (param->flags & DM_READONLY_FLAG));
++	dm_put(md);
++
++	r = info(param, user);
++	return r;
++}
++
++static int rename(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int r;
++	char *new_name = (char *) param + param->data_start;
++
++	if (valid_str(new_name, (void *) param,
++		      (void *) param + param->data_size)) {
++		DMWARN("Invalid new logical volume name supplied.");
++		return -EINVAL;
++	}
++
++	r = check_name(new_name);
++	if (r)
++		return r;
++
++	return dm_hash_rename(param->name, new_name);
++}
++
++
++/*-----------------------------------------------------------------
++ * Implementation of open/close/ioctl on the special char
++ * device.
++ *---------------------------------------------------------------*/
++static ioctl_fn lookup_ioctl(unsigned int cmd)
++{
++	static struct {
++		int cmd;
++		ioctl_fn fn;
++	} _ioctls[] = {
++		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
++		{DM_REMOVE_ALL_CMD, remove_all},
++		{DM_DEV_CREATE_CMD, create},
++		{DM_DEV_REMOVE_CMD, remove},
++		{DM_DEV_RELOAD_CMD, reload},
++		{DM_DEV_RENAME_CMD, rename},
++		{DM_DEV_SUSPEND_CMD, suspend},
++		{DM_DEV_DEPS_CMD, dep},
++		{DM_DEV_STATUS_CMD, info},
++		{DM_TARGET_STATUS_CMD, get_status},
++		{DM_TARGET_WAIT_CMD, wait_device_event},
++	};
++
++	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
++}
++
++/*
++ * As well as checking the version compatibility this always
++ * copies the kernel interface version out.
++ */
++static int check_version(int cmd, struct dm_ioctl *user)
++{
++	uint32_t version[3];
++	int r = 0;
++
++	if (copy_from_user(version, user->version, sizeof(version)))
++		return -EFAULT;
++
++	if ((DM_VERSION_MAJOR != version[0]) ||
++	    (DM_VERSION_MINOR < version[1])) {
++		DMWARN("ioctl interface mismatch: "
++		       "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
++		       DM_VERSION_MAJOR, DM_VERSION_MINOR,
++		       DM_VERSION_PATCHLEVEL,
++		       version[0], version[1], version[2], cmd);
++		r = -EINVAL;
++	}
++
++	/*
++	 * Fill in the kernel version.
++	 */
++	version[0] = DM_VERSION_MAJOR;
++	version[1] = DM_VERSION_MINOR;
++	version[2] = DM_VERSION_PATCHLEVEL;
++	if (copy_to_user(user->version, version, sizeof(version)))
++		return -EFAULT;
++
++	return r;
++}
++
++static void free_params(struct dm_ioctl *param)
++{
++	vfree(param);
++}
++
++static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
++{
++	struct dm_ioctl tmp, *dmi;
++
++	if (copy_from_user(&tmp, user, sizeof(tmp)))
++		return -EFAULT;
++
++	if (tmp.data_size < sizeof(tmp))
++		return -EINVAL;
++
++	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
++	if (!dmi)
++		return -ENOMEM;
++
++	if (copy_from_user(dmi, user, tmp.data_size)) {
++		vfree(dmi);
++		return -EFAULT;
++	}
++
++	*param = dmi;
++	return 0;
++}
++
++static int validate_params(uint cmd, struct dm_ioctl *param)
++{
++	/* Ignores parameters */
++	if (cmd == DM_REMOVE_ALL_CMD)
++		return 0;
++
++	/* Unless creating, either name of uuid but not both */
++	if (cmd != DM_DEV_CREATE_CMD) {
++		if ((!*param->uuid && !*param->name) ||
++		    (*param->uuid && *param->name)) {
++			DMWARN("one of name or uuid must be supplied");
++			return -EINVAL;
++		}
++	}
++
++	/* Ensure strings are terminated */
++	param->name[DM_NAME_LEN - 1] = '\0';
++	param->uuid[DM_UUID_LEN - 1] = '\0';
++
++	return 0;
++}
++
++static int ctl_ioctl(struct inode *inode, struct file *file,
++		     uint command, ulong u)
++{
++	int r = 0, cmd;
++	struct dm_ioctl *param;
++	struct dm_ioctl *user = (struct dm_ioctl *) u;
++	ioctl_fn fn = NULL;
++
++	/* only root can play with this */
++	if (!capable(CAP_SYS_ADMIN))
++		return -EACCES;
++
++	if (_IOC_TYPE(command) != DM_IOCTL)
++		return -ENOTTY;
++
++	cmd = _IOC_NR(command);
++
++	/*
++	 * Check the interface version passed in.  This also
++	 * writes out the kernel's interface version.
++	 */
++	r = check_version(cmd, user);
++	if (r)
++		return r;
++
++	/*
++	 * Nothing more to do for the version command.
++	 */
++	if (cmd == DM_VERSION_CMD)
++		return 0;
++
++	fn = lookup_ioctl(cmd);
++	if (!fn) {
++		DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
++		return -ENOTTY;
++	}
++
++	/*
++	 * Copy the parameters into kernel space.
++	 */
++	r = copy_params(user, &param);
++	if (r)
++		return r;
++
++	r = validate_params(cmd, param);
++	if (r) {
++		free_params(param);
++		return r;
++	}
++
++	r = fn(param, user);
++	free_params(param);
++	return r;
++}
++
++static struct file_operations _ctl_fops = {
++	.ioctl	 = ctl_ioctl,
++	.owner	 = THIS_MODULE,
++};
++
++static devfs_handle_t _ctl_handle;
++
++static struct miscdevice _dm_misc = {
++	.minor = MISC_DYNAMIC_MINOR,
++	.name  = DM_NAME,
++	.fops  = &_ctl_fops
++};
++
++/*
++ * Create misc character device and link to DM_DIR/control.
++ */
++int __init dm_interface_init(void)
++{
++	int r;
++	char rname[64];
++
++	r = dm_hash_init();
++	if (r)
++		return r;
++
++	r = misc_register(&_dm_misc);
++	if (r) {
++		DMERR("misc_register failed for control device");
++		dm_hash_exit();
++		return r;
++	}
++
++	r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
++				sizeof rname - 3);
++	if (r == -ENOSYS)
++		goto done;	/* devfs not present */
++
++	if (r < 0) {
++		DMERR("devfs_generate_path failed for control device");
++		goto failed;
++	}
++
++	strncpy(rname + r, "../", 3);
++	r = devfs_mk_symlink(NULL, DM_DIR "/control",
++			     DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
++	if (r) {
++		DMERR("devfs_mk_symlink failed for control device");
++		goto failed;
++	}
++	devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
++
++      done:
++	DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
++	       DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
++	       DM_DRIVER_EMAIL);
++	return 0;
++
++      failed:
++	misc_deregister(&_dm_misc);
++	dm_hash_exit();
++	return r;
++}
++
++void dm_interface_exit(void)
++{
++	if (misc_deregister(&_dm_misc) < 0)
++		DMERR("misc_deregister failed for control device");
++
++	dm_hash_exit();
++}
+diff -ruN linux-2.4.20/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c
+--- linux-2.4.20/drivers/md/dm-linear.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-linear.c	Wed Mar 26 13:27:22 2003
+@@ -0,0 +1,121 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++
++/*
++ * Linear: maps a linear range of a device.
++ */
++struct linear_c {
++	struct dm_dev *dev;
++	sector_t start;
++};
++
++/*
++ * Construct a linear mapping: <dev_path> <offset>
++ */
++static int linear_ctr(struct dm_target *ti, int argc, char **argv)
++{
++	struct linear_c *lc;
++
++	if (argc != 2) {
++		ti->error = "dm-linear: Not enough arguments";
++		return -EINVAL;
++	}
++
++	lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++	if (lc == NULL) {
++		ti->error = "dm-linear: Cannot allocate linear context";
++		return -ENOMEM;
++	}
++
++	if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
++		ti->error = "dm-linear: Invalid device sector";
++		goto bad;
++	}
++
++	if (dm_get_device(ti, argv[0], lc->start, ti->len,
++			  dm_table_get_mode(ti->table), &lc->dev)) {
++		ti->error = "dm-linear: Device lookup failed";
++		goto bad;
++	}
++
++	ti->private = lc;
++	return 0;
++
++      bad:
++	kfree(lc);
++	return -EINVAL;
++}
++
++static void linear_dtr(struct dm_target *ti)
++{
++	struct linear_c *lc = (struct linear_c *) ti->private;
++
++	dm_put_device(ti, lc->dev);
++	kfree(lc);
++}
++
++static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++		      void **map_context)
++{
++	struct linear_c *lc = (struct linear_c *) ti->private;
++
++	bh->b_rdev = lc->dev->dev;
++	bh->b_rsector = lc->start + (bh->b_rsector - ti->begin);
++
++	return 1;
++}
++
++static int linear_status(struct dm_target *ti, status_type_t type,
++			 char *result, int maxlen)
++{
++	struct linear_c *lc = (struct linear_c *) ti->private;
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		result[0] = '\0';
++		break;
++
++	case STATUSTYPE_TABLE:
++		snprintf(result, maxlen, "%s " SECTOR_FORMAT,
++			 kdevname(to_kdev_t(lc->dev->bdev->bd_dev)), lc->start);
++		break;
++	}
++	return 0;
++}
++
++static struct target_type linear_target = {
++	.name   = "linear",
++	.module = THIS_MODULE,
++	.ctr    = linear_ctr,
++	.dtr    = linear_dtr,
++	.map    = linear_map,
++	.status = linear_status,
++};
++
++int __init dm_linear_init(void)
++{
++	int r = dm_register_target(&linear_target);
++
++	if (r < 0)
++		DMERR("linear: register failed %d", r);
++
++	return r;
++}
++
++void dm_linear_exit(void)
++{
++	int r = dm_unregister_target(&linear_target);
++
++	if (r < 0)
++		DMERR("linear: unregister failed %d", r);
++}
+diff -ruN linux-2.4.20/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c
+--- linux-2.4.20/drivers/md/dm-snapshot.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.c	Wed Mar 26 14:12:59 2003
+@@ -0,0 +1,1170 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <linux/config.h>
++#include <linux/ctype.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/mempool.h>
++#include <linux/device-mapper.h>
++#include <linux/vmalloc.h>
++
++#include "dm-snapshot.h"
++#include "kcopyd.h"
++
++/*
++ * FIXME: Remove this before release.
++ */
++#if 0
++#define DMDEBUG(x...) DMWARN( ## x)
++#else
++#define DMDEBUG(x...)
++#endif
++
++/*
++ * The percentage increment we will wake up users at
++ */
++#define WAKE_UP_PERCENT 5
++
++/*
++ * kcopyd priority of snapshot operations
++ */
++#define SNAPSHOT_COPY_PRIORITY 2
++
++struct pending_exception {
++	struct exception e;
++
++	/*
++	 * Origin buffers waiting for this to complete are held
++	 * in a list (using b_reqnext).
++	 */
++	struct buffer_head *origin_bhs;
++	struct buffer_head *snapshot_bhs;
++
++	/*
++	 * Other pending_exceptions that are processing this
++	 * chunk.  When this list is empty, we know we can
++	 * complete the origins.
++	 */
++	struct list_head siblings;
++
++	/* Pointer back to snapshot context */
++	struct dm_snapshot *snap;
++
++	/*
++	 * 1 indicates the exception has already been sent to
++	 * kcopyd.
++	 */
++	int started;
++};
++
++/*
++ * Hash table mapping origin volumes to lists of snapshots and
++ * a lock to protect it
++ */
++static kmem_cache_t *exception_cache;
++static kmem_cache_t *pending_cache;
++static mempool_t *pending_pool;
++
++/*
++ * One of these per registered origin, held in the snapshot_origins hash
++ */
++struct origin {
++	/* The origin device */
++	kdev_t dev;
++
++	struct list_head hash_list;
++
++	/* List of snapshots for this origin */
++	struct list_head snapshots;
++};
++
++/*
++ * Size of the hash table for origin volumes. If we make this
++ * the size of the minors list then it should be nearly perfect
++ */
++#define ORIGIN_HASH_SIZE 256
++#define ORIGIN_MASK      0xFF
++static struct list_head *_origins;
++static struct rw_semaphore _origins_lock;
++
++static int init_origin_hash(void)
++{
++	int i;
++
++	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
++			   GFP_KERNEL);
++	if (!_origins) {
++		DMERR("Device mapper: Snapshot: unable to allocate memory");
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
++		INIT_LIST_HEAD(_origins + i);
++	init_rwsem(&_origins_lock);
++
++	return 0;
++}
++
++static void exit_origin_hash(void)
++{
++	kfree(_origins);
++}
++
++static inline unsigned int origin_hash(kdev_t dev)
++{
++	return MINOR(dev) & ORIGIN_MASK;
++}
++
++static struct origin *__lookup_origin(kdev_t origin)
++{
++	struct list_head *slist;
++	struct list_head *ol;
++	struct origin *o;
++
++	ol = &_origins[origin_hash(origin)];
++	list_for_each(slist, ol) {
++		o = list_entry(slist, struct origin, hash_list);
++
++		if (o->dev == origin)
++			return o;
++	}
++
++	return NULL;
++}
++
++static void __insert_origin(struct origin *o)
++{
++	struct list_head *sl = &_origins[origin_hash(o->dev)];
++	list_add_tail(&o->hash_list, sl);
++}
++
++/*
++ * Make a note of the snapshot and its origin so we can look it
++ * up when the origin has a write on it.
++ */
++static int register_snapshot(struct dm_snapshot *snap)
++{
++	struct origin *o;
++	kdev_t dev = snap->origin->dev;
++
++	down_write(&_origins_lock);
++	o = __lookup_origin(dev);
++
++	if (!o) {
++		/* New origin */
++		o = kmalloc(sizeof(*o), GFP_KERNEL);
++		if (!o) {
++			up_write(&_origins_lock);
++			return -ENOMEM;
++		}
++
++		/* Initialise the struct */
++		INIT_LIST_HEAD(&o->snapshots);
++		o->dev = dev;
++
++		__insert_origin(o);
++	}
++
++	list_add_tail(&snap->list, &o->snapshots);
++
++	up_write(&_origins_lock);
++	return 0;
++}
++
++static void unregister_snapshot(struct dm_snapshot *s)
++{
++	struct origin *o;
++
++	down_write(&_origins_lock);
++	o = __lookup_origin(s->origin->dev);
++
++	list_del(&s->list);
++	if (list_empty(&o->snapshots)) {
++		list_del(&o->hash_list);
++		kfree(o);
++	}
++
++	up_write(&_origins_lock);
++}
++
++/*
++ * Implementation of the exception hash tables.
++ */
++static int init_exception_table(struct exception_table *et, uint32_t size)
++{
++	int i;
++
++	et->hash_mask = size - 1;
++	et->table = vcalloc(size, sizeof(struct list_head));
++	if (!et->table)
++		return -ENOMEM;
++
++	for (i = 0; i < size; i++)
++		INIT_LIST_HEAD(et->table + i);
++
++	return 0;
++}
++
++static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
++{
++	struct list_head *slot, *entry, *temp;
++	struct exception *ex;
++	int i, size;
++
++	size = et->hash_mask + 1;
++	for (i = 0; i < size; i++) {
++		slot = et->table + i;
++
++		list_for_each_safe(entry, temp, slot) {
++			ex = list_entry(entry, struct exception, hash_list);
++			kmem_cache_free(mem, ex);
++		}
++	}
++
++	vfree(et->table);
++}
++
++/*
++ * FIXME: check how this hash fn is performing.
++ */
++static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
++{
++	return chunk & et->hash_mask;
++}
++
++static void insert_exception(struct exception_table *eh, struct exception *e)
++{
++	struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
++	list_add(&e->hash_list, l);
++}
++
++static inline void remove_exception(struct exception *e)
++{
++	list_del(&e->hash_list);
++}
++
++/*
++ * Return the exception data for a sector, or NULL if not
++ * remapped.
++ */
++static struct exception *lookup_exception(struct exception_table *et,
++					  chunk_t chunk)
++{
++	struct list_head *slot, *el;
++	struct exception *e;
++
++	slot = &et->table[exception_hash(et, chunk)];
++	list_for_each(el, slot) {
++		e = list_entry(el, struct exception, hash_list);
++		if (e->old_chunk == chunk)
++			return e;
++	}
++
++	return NULL;
++}
++
++static inline struct exception *alloc_exception(void)
++{
++	struct exception *e;
++
++	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
++	if (!e)
++		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
++
++	return e;
++}
++
++static inline void free_exception(struct exception *e)
++{
++	kmem_cache_free(exception_cache, e);
++}
++
++static inline struct pending_exception *alloc_pending_exception(void)
++{
++	return mempool_alloc(pending_pool, GFP_NOIO);
++}
++
++static inline void free_pending_exception(struct pending_exception *pe)
++{
++	mempool_free(pe, pending_pool);
++}
++
++int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
++{
++	struct exception *e;
++
++	e = alloc_exception();
++	if (!e)
++		return -ENOMEM;
++
++	e->old_chunk = old;
++	e->new_chunk = new;
++	insert_exception(&s->complete, e);
++	return 0;
++}
++
++/*
++ * Hard coded magic.
++ */
++static int calc_max_buckets(void)
++{
++	unsigned long mem;
++
++	mem = num_physpages << PAGE_SHIFT;
++	mem /= 50;
++	mem /= sizeof(struct list_head);
++
++	return mem;
++}
++
++/*
++ * Rounds a number down to a power of 2.
++ */
++static inline uint32_t round_down(uint32_t n)
++{
++	while (n & (n - 1))
++		n &= (n - 1);
++	return n;
++}
++
++/*
++ * Allocate room for a suitable hash table.
++ */
++static int init_hash_tables(struct dm_snapshot *s)
++{
++	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
++
++	/*
++	 * Calculate based on the size of the original volume or
++	 * the COW volume...
++	 */
++	cow_dev_size = get_dev_size(s->cow->dev);
++	origin_dev_size = get_dev_size(s->origin->dev);
++	max_buckets = calc_max_buckets();
++
++	hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size;
++	hash_size = min(hash_size, max_buckets);
++
++	/* Round it down to a power of 2 */
++	hash_size = round_down(hash_size);
++	if (init_exception_table(&s->complete, hash_size))
++		return -ENOMEM;
++
++	/*
++	 * Allocate hash table for in-flight exceptions
++	 * Make this smaller than the real hash table
++	 */
++	hash_size >>= 3;
++	if (!hash_size)
++		hash_size = 64;
++
++	if (init_exception_table(&s->pending, hash_size)) {
++		exit_exception_table(&s->complete, exception_cache);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++/*
++ * Round a number up to the nearest 'size' boundary.  size must
++ * be a power of 2.
++ */
++static inline ulong round_up(ulong n, ulong size)
++{
++	size--;
++	return (n + size) & ~size;
++}
++
++/*
++ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
++ */
++static int snapshot_ctr(struct dm_target *ti, int argc, char **argv)
++{
++	struct dm_snapshot *s;
++	unsigned long chunk_size;
++	int r = -EINVAL;
++	char persistent;
++	char *origin_path;
++	char *cow_path;
++	char *value;
++	int blocksize;
++
++	if (argc < 4) {
++		ti->error = "dm-snapshot: requires exactly 4 arguments";
++		r = -EINVAL;
++		goto bad;
++	}
++
++	origin_path = argv[0];
++	cow_path = argv[1];
++	persistent = toupper(*argv[2]);
++
++	if (persistent != 'P' && persistent != 'N') {
++		ti->error = "Persistent flag is not P or N";
++		r = -EINVAL;
++		goto bad;
++	}
++
++	chunk_size = simple_strtoul(argv[3], &value, 10);
++	if (chunk_size == 0 || value == NULL) {
++		ti->error = "Invalid chunk size";
++		r = -EINVAL;
++		goto bad;
++	}
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
++	if (s == NULL) {
++		ti->error = "Cannot allocate snapshot context private "
++		    "structure";
++		r = -ENOMEM;
++		goto bad;
++	}
++
++	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
++	if (r) {
++		ti->error = "Cannot get origin device";
++		goto bad_free;
++	}
++
++	/* FIXME: get cow length */
++	r = dm_get_device(ti, cow_path, 0, 0,
++			  FMODE_READ | FMODE_WRITE, &s->cow);
++	if (r) {
++		dm_put_device(ti, s->origin);
++		ti->error = "Cannot get COW device";
++		goto bad_free;
++	}
++
++	/*
++	 * Chunk size must be multiple of page size.  Silently
++	 * round up if it's not.
++	 */
++	chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE);
++
++	/* Validate the chunk size against the device block size */
++	blocksize = get_hardsect_size(s->cow->dev);
++	if (chunk_size % (blocksize / SECTOR_SIZE)) {
++		ti->error = "Chunk size is not a multiple of device blocksize";
++		r = -EINVAL;
++		goto bad_putdev;
++	}
++
++	/* Check the sizes are small enough to fit in one kiovec */
++	if (chunk_size > KIO_MAX_SECTORS) {
++		ti->error = "Chunk size is too big";
++		r = -EINVAL;
++		goto bad_putdev;
++	}
++
++	/* Check chunk_size is a power of 2 */
++	if (chunk_size & (chunk_size - 1)) {
++		ti->error = "Chunk size is not a power of 2";
++		r = -EINVAL;
++		goto bad_putdev;
++	}
++
++	s->chunk_size = chunk_size;
++	s->chunk_mask = chunk_size - 1;
++	s->type = persistent;
++	for (s->chunk_shift = 0; chunk_size;
++	     s->chunk_shift++, chunk_size >>= 1)
++		;
++	s->chunk_shift--;
++
++	s->valid = 1;
++	s->last_percent = 0;
++	init_rwsem(&s->lock);
++	s->table = ti->table;
++
++	/* Allocate hash table for COW data */
++	if (init_hash_tables(s)) {
++		ti->error = "Unable to allocate hash table space";
++		r = -ENOMEM;
++		goto bad_putdev;
++	}
++
++	/*
++	 * Check the persistent flag - done here because we need the iobuf
++	 * to check the LV header
++	 */
++	s->store.snap = s;
++
++	if (persistent == 'P')
++		r = dm_create_persistent(&s->store, s->chunk_size);
++	else
++		r = dm_create_transient(&s->store, s, blocksize);
++
++	if (r) {
++		ti->error = "Couldn't create exception store";
++		r = -EINVAL;
++		goto bad_free1;
++	}
++
++	/* Flush IO to the origin device */
++#if LVM_VFS_ENHANCEMENT
++	fsync_dev_lockfs(s->origin->dev);
++#else
++	fsync_dev(s->origin->dev);
++#endif
++
++	/* Add snapshot to the list of snapshots for this origin */
++	if (register_snapshot(s)) {
++		r = -EINVAL;
++		ti->error = "Cannot register snapshot origin";
++		goto bad_free2;
++	}
++#if LVM_VFS_ENHANCEMENT
++	unlockfs(s->origin->dev);
++#endif
++	kcopyd_inc_client_count();
++
++	ti->private = s;
++	return 0;
++
++      bad_free2:
++#if LVM_VFS_ENHANCEMENT
++	unlockfs(s->origin->dev);
++#endif
++	s->store.destroy(&s->store);
++
++      bad_free1:
++	exit_exception_table(&s->pending, pending_cache);
++	exit_exception_table(&s->complete, exception_cache);
++
++      bad_putdev:
++	dm_put_device(ti, s->cow);
++	dm_put_device(ti, s->origin);
++
++      bad_free:
++	kfree(s);
++
++      bad:
++	return r;
++}
++
++static void snapshot_dtr(struct dm_target *ti)
++{
++	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
++
++	dm_table_event(ti->table);
++
++	unregister_snapshot(s);
++
++	exit_exception_table(&s->pending, pending_cache);
++	exit_exception_table(&s->complete, exception_cache);
++
++	/* Deallocate memory used */
++	s->store.destroy(&s->store);
++
++	dm_put_device(ti, s->origin);
++	dm_put_device(ti, s->cow);
++	kfree(s);
++
++	kcopyd_dec_client_count();
++}
++
++/*
++ * We hold lists of buffer_heads, using the b_reqnext field.
++ */
++static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh)
++{
++	bh->b_reqnext = *queue;
++	*queue = bh;
++}
++
++/*
++ * Flush a list of buffers.
++ */
++static void flush_buffers(struct buffer_head *bh)
++{
++	struct buffer_head *n;
++
++	DMDEBUG("begin flush");
++	while (bh) {
++		n = bh->b_reqnext;
++		bh->b_reqnext = NULL;
++		DMDEBUG("flushing %p", bh);
++		generic_make_request(WRITE, bh);
++		bh = n;
++	}
++
++	run_task_queue(&tq_disk);
++}
++
++/*
++ * Error a list of buffers.
++ */
++static void error_buffers(struct buffer_head *bh)
++{
++	struct buffer_head *n;
++
++	while (bh) {
++		n = bh->b_reqnext;
++		bh->b_reqnext = NULL;
++		buffer_IO_error(bh);
++		bh = n;
++	}
++}
++
++static void pending_complete(struct pending_exception *pe, int success)
++{
++	struct exception *e;
++	struct dm_snapshot *s = pe->snap;
++
++	if (success) {
++		e = alloc_exception();
++		if (!e) {
++			printk("Unable to allocate exception.");
++			down_write(&s->lock);
++			s->store.drop_snapshot(&s->store);
++			s->valid = 0;
++			up_write(&s->lock);
++			return;
++		}
++
++		/*
++		 * Add a proper exception, and remove the
++		 * inflight exception from the list.
++		 */
++		down_write(&s->lock);
++
++		memcpy(e, &pe->e, sizeof(*e));
++		insert_exception(&s->complete, e);
++		remove_exception(&pe->e);
++
++		/* Submit any pending write BHs */
++		up_write(&s->lock);
++
++		flush_buffers(pe->snapshot_bhs);
++		DMDEBUG("Exception completed successfully.");
++
++		/* Notify any interested parties */
++		if (s->store.percent_full) {
++			int pc = s->store.percent_full(&s->store);
++
++			if (pc >= s->last_percent + WAKE_UP_PERCENT) {
++				dm_table_event(s->table);
++				s->last_percent = pc - pc % WAKE_UP_PERCENT;
++			}
++		}
++
++	} else {
++		/* Read/write error - snapshot is unusable */
++		DMERR("Error reading/writing snapshot");
++
++		down_write(&s->lock);
++		s->store.drop_snapshot(&s->store);
++		s->valid = 0;
++		remove_exception(&pe->e);
++		up_write(&s->lock);
++
++		error_buffers(pe->snapshot_bhs);
++
++		dm_table_event(s->table);
++		DMDEBUG("Exception failed.");
++	}
++
++	if (list_empty(&pe->siblings))
++		flush_buffers(pe->origin_bhs);
++	else
++		list_del(&pe->siblings);
++
++	free_pending_exception(pe);
++}
++
++static void commit_callback(void *context, int success)
++{
++	struct pending_exception *pe = (struct pending_exception *) context;
++	pending_complete(pe, success);
++}
++
++/*
++ * Called when the copy I/O has finished.  kcopyd actually runs
++ * this code so don't block.
++ */
++static void copy_callback(int err, void *context)
++{
++	struct pending_exception *pe = (struct pending_exception *) context;
++	struct dm_snapshot *s = pe->snap;
++
++	if (err)
++		pending_complete(pe, 0);
++
++	else
++		/* Update the metadata if we are persistent */
++		s->store.commit_exception(&s->store, &pe->e, commit_callback,
++					  pe);
++}
++
++/*
++ * Dispatches the copy operation to kcopyd.
++ */
++static inline void start_copy(struct pending_exception *pe)
++{
++	struct dm_snapshot *s = pe->snap;
++	struct kcopyd_region src, dest;
++
++	src.dev = s->origin->dev;
++	src.sector = chunk_to_sector(s, pe->e.old_chunk);
++	src.count = s->chunk_size;
++
++	dest.dev = s->cow->dev;
++	dest.sector = chunk_to_sector(s, pe->e.new_chunk);
++	dest.count = s->chunk_size;
++
++	if (!pe->started) {
++		/* Hand over to kcopyd */
++		kcopyd_copy(&src, &dest, copy_callback, pe);
++		pe->started = 1;
++	}
++}
++
++/*
++ * Looks to see if this snapshot already has a pending exception
++ * for this chunk, otherwise it allocates a new one and inserts
++ * it into the pending table.
++ */
++static struct pending_exception *find_pending_exception(struct dm_snapshot *s,
++							struct buffer_head *bh)
++{
++	struct exception *e;
++	struct pending_exception *pe;
++	chunk_t chunk = sector_to_chunk(s, bh->b_rsector);
++
++	/*
++	 * Is there a pending exception for this already ?
++	 */
++	e = lookup_exception(&s->pending, chunk);
++	if (e) {
++		/* cast the exception to a pending exception */
++		pe = list_entry(e, struct pending_exception, e);
++
++	} else {
++		/* Create a new pending exception */
++		pe = alloc_pending_exception();
++		if (!pe) {
++			DMWARN("Couldn't allocate pending exception.");
++			return NULL;
++		}
++
++		pe->e.old_chunk = chunk;
++		pe->origin_bhs = pe->snapshot_bhs = NULL;
++		INIT_LIST_HEAD(&pe->siblings);
++		pe->snap = s;
++		pe->started = 0;
++
++		if (s->store.prepare_exception(&s->store, &pe->e)) {
++			free_pending_exception(pe);
++			s->valid = 0;
++			return NULL;
++		}
++
++		insert_exception(&s->pending, &pe->e);
++	}
++
++	return pe;
++}
++
++static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
++				   struct buffer_head *bh)
++{
++	bh->b_rdev = s->cow->dev;
++	bh->b_rsector = chunk_to_sector(s, e->new_chunk) +
++	    (bh->b_rsector & s->chunk_mask);
++}
++
++static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++			void **map_context)
++{
++	struct exception *e;
++	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
++	int r = 1;
++	chunk_t chunk;
++	struct pending_exception *pe;
++
++	chunk = sector_to_chunk(s, bh->b_rsector);
++
++	/* Full snapshots are not usable */
++	if (!s->valid)
++		return -1;
++
++	/*
++	 * Write to snapshot - higher level takes care of RW/RO
++	 * flags so we should only get this if we are
++	 * writeable.
++	 */
++	if (rw == WRITE) {
++
++		down_write(&s->lock);
++
++		/* If the block is already remapped - use that, else remap it */
++		e = lookup_exception(&s->complete, chunk);
++		if (e)
++			remap_exception(s, e, bh);
++
++		else {
++			pe = find_pending_exception(s, bh);
++
++			if (!pe) {
++				s->store.drop_snapshot(&s->store);
++				s->valid = 0;
++			}
++
++			queue_buffer(&pe->snapshot_bhs, bh);
++			start_copy(pe);
++			r = 0;
++		}
++
++		up_write(&s->lock);
++
++	} else {
++		/*
++		 * FIXME: this read path scares me because we
++		 * always use the origin when we have a pending
++		 * exception.  However I can't think of a
++		 * situation where this is wrong - ejt.
++		 */
++
++		/* Do reads */
++		down_read(&s->lock);
++
++		/* See if it it has been remapped */
++		e = lookup_exception(&s->complete, chunk);
++		if (e)
++			remap_exception(s, e, bh);
++		else
++			bh->b_rdev = s->origin->dev;
++
++		up_read(&s->lock);
++	}
++
++	return r;
++}
++
++static void list_merge(struct list_head *l1, struct list_head *l2)
++{
++	struct list_head *l1_n, *l2_p;
++
++	l1_n = l1->next;
++	l2_p = l2->prev;
++
++	l1->next = l2;
++	l2->prev = l1;
++
++	l2_p->next = l1_n;
++	l1_n->prev = l2_p;
++}
++
++static int __origin_write(struct list_head *snapshots, struct buffer_head *bh)
++{
++	int r = 1;
++	struct list_head *sl;
++	struct dm_snapshot *snap;
++	struct exception *e;
++	struct pending_exception *pe, *last = NULL;
++	chunk_t chunk;
++
++	/* Do all the snapshots on this origin */
++	list_for_each(sl, snapshots) {
++		snap = list_entry(sl, struct dm_snapshot, list);
++
++		/* Only deal with valid snapshots */
++		if (!snap->valid)
++			continue;
++
++		down_write(&snap->lock);
++
++		/*
++		 * Remember, different snapshots can have
++		 * different chunk sizes.
++		 */
++		chunk = sector_to_chunk(snap, bh->b_rsector);
++
++		/*
++		 * Check exception table to see if block
++		 * is already remapped in this snapshot
++		 * and trigger an exception if not.
++		 */
++		e = lookup_exception(&snap->complete, chunk);
++		if (!e) {
++			pe = find_pending_exception(snap, bh);
++			if (!pe) {
++				snap->store.drop_snapshot(&snap->store);
++				snap->valid = 0;
++
++			} else {
++				if (last)
++					list_merge(&pe->siblings,
++						   &last->siblings);
++
++				last = pe;
++				r = 0;
++			}
++		}
++
++		up_write(&snap->lock);
++	}
++
++	/*
++	 * Now that we have a complete pe list we can start the copying.
++	 */
++	if (last) {
++		pe = last;
++		do {
++			down_write(&pe->snap->lock);
++			queue_buffer(&pe->origin_bhs, bh);
++			start_copy(pe);
++			up_write(&pe->snap->lock);
++			pe = list_entry(pe->siblings.next,
++					struct pending_exception, siblings);
++
++		} while (pe != last);
++	}
++
++	return r;
++}
++
++static int snapshot_status(struct dm_target *ti, status_type_t type,
++			   char *result, int maxlen)
++{
++	struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
++	char cow[16];
++	char org[16];
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		if (!snap->valid)
++			snprintf(result, maxlen, "Invalid");
++		else {
++			if (snap->store.percent_full)
++				snprintf(result, maxlen, "%d%%",
++					 snap->store.percent_full(&snap->
++								  store));
++			else
++				snprintf(result, maxlen, "Unknown");
++		}
++		break;
++
++	case STATUSTYPE_TABLE:
++		/*
++		 * kdevname returns a static pointer so we need
++		 * to make private copies if the output is to
++		 * make sense.
++		 */
++		strncpy(cow, kdevname(snap->cow->dev), sizeof(cow));
++		strncpy(org, kdevname(snap->origin->dev), sizeof(org));
++		snprintf(result, maxlen, "%s %s %c %ld", org, cow,
++			 snap->type, snap->chunk_size);
++		break;
++	}
++
++	return 0;
++}
++
++/*
++ * Called on a write from the origin driver.
++ */
++int do_origin(struct dm_dev *origin, struct buffer_head *bh)
++{
++	struct origin *o;
++	int r;
++
++	down_read(&_origins_lock);
++	o = __lookup_origin(origin->dev);
++	if (!o)
++		BUG();
++
++	r = __origin_write(&o->snapshots, bh);
++	up_read(&_origins_lock);
++
++	return r;
++}
++
++/*
++ * Origin: maps a linear range of a device, with hooks for snapshotting.
++ */
++
++/*
++ * Construct an origin mapping: <dev_path>
++ * The context for an origin is merely a 'struct dm_dev *'
++ * pointing to the real device.
++ */
++static int origin_ctr(struct dm_target *ti, int argc, char **argv)
++{
++	int r;
++	struct dm_dev *dev;
++
++	if (argc != 1) {
++		ti->error = "dm-origin: incorrect number of arguments";
++		return -EINVAL;
++	}
++
++	r = dm_get_device(ti, argv[0], 0, ti->len,
++			  dm_table_get_mode(ti->table), &dev);
++	if (r) {
++		ti->error = "Cannot get target device";
++		return r;
++	}
++
++	ti->private = dev;
++
++	return 0;
++}
++
++static void origin_dtr(struct dm_target *ti)
++{
++	struct dm_dev *dev = (struct dm_dev *) ti->private;
++	dm_put_device(ti, dev);
++}
++
++static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++		      void **map_context)
++{
++	struct dm_dev *dev = (struct dm_dev *) ti->private;
++	bh->b_rdev = dev->dev;
++
++	/* Only tell snapshots if this is a write */
++	return (rw == WRITE) ? do_origin(dev, bh) : 1;
++}
++
++static int origin_status(struct dm_target *ti, status_type_t type, char *result,
++			 int maxlen)
++{
++	struct dm_dev *dev = (struct dm_dev *) ti->private;
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		result[0] = '\0';
++		break;
++
++	case STATUSTYPE_TABLE:
++		snprintf(result, maxlen, "%s", kdevname(dev->dev));
++		break;
++	}
++
++	return 0;
++}
++
++static struct target_type origin_target = {
++	name:	"snapshot-origin",
++	module:	THIS_MODULE,
++	ctr:	origin_ctr,
++	dtr:	origin_dtr,
++	map:	origin_map,
++	status:	origin_status,
++};
++
++static struct target_type snapshot_target = {
++	name:	"snapshot",
++	module:	THIS_MODULE,
++	ctr:	snapshot_ctr,
++	dtr:	snapshot_dtr,
++	map:	snapshot_map,
++	status:	snapshot_status,
++};
++
++int __init dm_snapshot_init(void)
++{
++	int r;
++
++	r = dm_register_target(&snapshot_target);
++	if (r) {
++		DMERR("snapshot target register failed %d", r);
++		return r;
++	}
++
++	r = dm_register_target(&origin_target);
++	if (r < 0) {
++		DMERR("Device mapper: Origin: register failed %d\n", r);
++		goto bad1;
++	}
++
++	r = init_origin_hash();
++	if (r) {
++		DMERR("init_origin_hash failed.");
++		goto bad2;
++	}
++
++	exception_cache = kmem_cache_create("dm-snapshot-ex",
++					    sizeof(struct exception),
++					    __alignof__(struct exception),
++					    0, NULL, NULL);
++	if (!exception_cache) {
++		DMERR("Couldn't create exception cache.");
++		r = -ENOMEM;
++		goto bad3;
++	}
++
++	pending_cache =
++	    kmem_cache_create("dm-snapshot-in",
++			      sizeof(struct pending_exception),
++			      __alignof__(struct pending_exception),
++			      0, NULL, NULL);
++	if (!pending_cache) {
++		DMERR("Couldn't create pending cache.");
++		r = -ENOMEM;
++		goto bad4;
++	}
++
++	pending_pool = mempool_create(128, mempool_alloc_slab,
++				      mempool_free_slab, pending_cache);
++	if (!pending_pool) {
++		DMERR("Couldn't create pending pool.");
++		r = -ENOMEM;
++		goto bad5;
++	}
++
++	return 0;
++
++      bad5:
++	kmem_cache_destroy(pending_cache);
++      bad4:
++	kmem_cache_destroy(exception_cache);
++      bad3:
++	exit_origin_hash();
++      bad2:
++	dm_unregister_target(&origin_target);
++      bad1:
++	dm_unregister_target(&snapshot_target);
++	return r;
++}
++
++void dm_snapshot_exit(void)
++{
++	int r;
++
++	r = dm_unregister_target(&snapshot_target);
++	if (r)
++		DMERR("snapshot unregister failed %d", r);
++
++	r = dm_unregister_target(&origin_target);
++	if (r)
++		DMERR("origin unregister failed %d", r);
++
++	exit_origin_hash();
++	mempool_destroy(pending_pool);
++	kmem_cache_destroy(pending_cache);
++	kmem_cache_destroy(exception_cache);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.20/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h
+--- linux-2.4.20/drivers/md/dm-snapshot.h	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.h	Wed Mar 26 12:53:19 2003
+@@ -0,0 +1,147 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#ifndef DM_SNAPSHOT_H
++#define DM_SNAPSHOT_H
++
++#include "dm.h"
++#include <linux/blkdev.h>
++
++struct exception_table {
++	uint32_t hash_mask;
++	struct list_head *table;
++};
++
++/*
++ * The snapshot code deals with largish chunks of the disk at a
++ * time. Typically 64k - 256k.
++ */
++/* FIXME: can we get away with limiting these to a uint32_t ? */
++typedef sector_t chunk_t;
++
++/*
++ * An exception is used where an old chunk of data has been
++ * replaced by a new one.
++ */
++struct exception {
++	struct list_head hash_list;
++
++	chunk_t old_chunk;
++	chunk_t new_chunk;
++};
++
++/*
++ * Abstraction to handle the meta/layout of exception stores (the
++ * COW device).
++ */
++struct exception_store {
++
++	/*
++	 * Destroys this object when you've finished with it.
++	 */
++	void (*destroy) (struct exception_store *store);
++
++	/*
++	 * Find somewhere to store the next exception.
++	 */
++	int (*prepare_exception) (struct exception_store *store,
++				  struct exception *e);
++
++	/*
++	 * Update the metadata with this exception.
++	 */
++	void (*commit_exception) (struct exception_store *store,
++				  struct exception *e,
++				  void (*callback) (void *, int success),
++				  void *callback_context);
++
++	/*
++	 * The snapshot is invalid, note this in the metadata.
++	 */
++	void (*drop_snapshot) (struct exception_store *store);
++
++	/*
++	 * Return the %age full of the snapshot
++	 */
++	int (*percent_full) (struct exception_store *store);
++
++	struct dm_snapshot *snap;
++	void *context;
++};
++
++struct dm_snapshot {
++	struct rw_semaphore lock;
++	struct dm_table *table;
++
++	struct dm_dev *origin;
++	struct dm_dev *cow;
++
++	/* List of snapshots per Origin */
++	struct list_head list;
++
++	/* Size of data blocks saved - must be a power of 2 */
++	chunk_t chunk_size;
++	chunk_t chunk_mask;
++	chunk_t chunk_shift;
++
++	/* You can't use a snapshot if this is 0 (e.g. if full) */
++	int valid;
++
++	/* Used for display of table */
++	char type;
++
++	/* The last percentage we notified */
++	int last_percent;
++
++	struct exception_table pending;
++	struct exception_table complete;
++
++	/* The on disk metadata handler */
++	struct exception_store store;
++};
++
++/*
++ * Used by the exception stores to load exceptions hen
++ * initialising.
++ */
++int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
++
++/*
++ * Constructor and destructor for the default persistent
++ * store.
++ */
++int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
++
++int dm_create_transient(struct exception_store *store,
++			struct dm_snapshot *s, int blocksize);
++
++/*
++ * Return the number of sectors in the device.
++ */
++static inline sector_t get_dev_size(kdev_t dev)
++{
++	int *sizes;
++
++	sizes = blk_size[MAJOR(dev)];
++	if (sizes)
++		return sizes[MINOR(dev)] << 1;
++
++	return 0;
++}
++
++static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
++{
++	return (sector & ~s->chunk_mask) >> s->chunk_shift;
++}
++
++static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
++{
++	return chunk << s->chunk_shift;
++}
++
++#endif
+diff -ruN linux-2.4.20/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c
+--- linux-2.4.20/drivers/md/dm-stripe.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-stripe.c	Wed Mar 26 14:07:57 2003
+@@ -0,0 +1,257 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++
++struct stripe {
++	struct dm_dev *dev;
++	sector_t physical_start;
++};
++
++struct stripe_c {
++	uint32_t stripes;
++
++	/* The size of this target / num. stripes */
++	uint32_t stripe_width;
++
++	/* stripe chunk size */
++	uint32_t chunk_shift;
++	sector_t chunk_mask;
++
++	struct stripe stripe[0];
++};
++
++static inline struct stripe_c *alloc_context(int stripes)
++{
++	size_t len;
++
++	if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
++			  stripes))
++		return NULL;
++
++	len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
++
++	return kmalloc(len, GFP_KERNEL);
++}
++
++/*
++ * Parse a single <dev> <sector> pair
++ */
++static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
++		      int stripe, char **argv)
++{
++	sector_t start;
++
++	if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
++		return -EINVAL;
++
++	if (dm_get_device(ti, argv[0], start, sc->stripe_width,
++			  dm_table_get_mode(ti->table),
++			  &sc->stripe[stripe].dev))
++		return -ENXIO;
++
++	sc->stripe[stripe].physical_start = start;
++	return 0;
++}
++
++/*
++ * FIXME: Nasty function, only present because we can't link
++ * against __moddi3 and __divdi3.
++ *
++ * returns a == b * n
++ */
++static int multiple(sector_t a, sector_t b, sector_t *n)
++{
++	sector_t acc, prev, i;
++
++	*n = 0;
++	while (a >= b) {
++		for (acc = b, prev = 0, i = 1;
++		     acc <= a;
++		     prev = acc, acc <<= 1, i <<= 1)
++			;
++
++		a -= prev;
++		*n += i >> 1;
++	}
++
++	return a == 0;
++}
++
++/*
++ * Construct a striped mapping.
++ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
++ */
++static int stripe_ctr(struct dm_target *ti, int argc, char **argv)
++{
++	struct stripe_c *sc;
++	sector_t width;
++	uint32_t stripes;
++	uint32_t chunk_size;
++	char *end;
++	int r, i;
++
++	if (argc < 2) {
++		ti->error = "dm-stripe: Not enough arguments";
++		return -EINVAL;
++	}
++
++	stripes = simple_strtoul(argv[0], &end, 10);
++	if (*end) {
++		ti->error = "dm-stripe: Invalid stripe count";
++		return -EINVAL;
++	}
++
++	chunk_size = simple_strtoul(argv[1], &end, 10);
++	if (*end) {
++		ti->error = "dm-stripe: Invalid chunk_size";
++		return -EINVAL;
++	}
++
++	/*
++	 * chunk_size is a power of two
++	 */
++	if (!chunk_size || (chunk_size & (chunk_size - 1))) {
++		ti->error = "dm-stripe: Invalid chunk size";
++		return -EINVAL;
++	}
++
++	if (!multiple(ti->len, stripes, &width)) {
++		ti->error = "dm-stripe: Target length not divisable by "
++		    "number of stripes";
++		return -EINVAL;
++	}
++
++	/*
++	 * Do we have enough arguments for that many stripes ?
++	 */
++	if (argc != (2 + 2 * stripes)) {
++		ti->error = "dm-stripe: Not enough destinations specified";
++		return -EINVAL;
++	}
++
++	sc = alloc_context(stripes);
++	if (!sc) {
++		ti->error = "dm-stripe: Memory allocation for striped context "
++		    "failed";
++		return -ENOMEM;
++	}
++
++	sc->stripes = stripes;
++	sc->stripe_width = width;
++
++	sc->chunk_mask = ((sector_t) chunk_size) - 1;
++	for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
++		chunk_size >>= 1;
++	sc->chunk_shift--;
++
++	/*
++	 * Get the stripe destinations.
++	 */
++	for (i = 0; i < stripes; i++) {
++		argv += 2;
++
++		r = get_stripe(ti, sc, i, argv);
++		if (r < 0) {
++			ti->error = "dm-stripe: Couldn't parse stripe "
++			    "destination";
++			while (i--)
++				dm_put_device(ti, sc->stripe[i].dev);
++			kfree(sc);
++			return r;
++		}
++	}
++
++	ti->private = sc;
++	return 0;
++}
++
++static void stripe_dtr(struct dm_target *ti)
++{
++	unsigned int i;
++	struct stripe_c *sc = (struct stripe_c *) ti->private;
++
++	for (i = 0; i < sc->stripes; i++)
++		dm_put_device(ti, sc->stripe[i].dev);
++
++	kfree(sc);
++}
++
++static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++		      void **context)
++{
++	struct stripe_c *sc = (struct stripe_c *) ti->private;
++
++	sector_t offset = bh->b_rsector - ti->begin;
++	uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
++	uint32_t stripe = chunk % sc->stripes;	/* 32bit modulus */
++	chunk = chunk / sc->stripes;
++
++	bh->b_rdev = sc->stripe[stripe].dev->dev;
++	bh->b_rsector = sc->stripe[stripe].physical_start +
++	    (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
++	return 1;
++}
++
++static int stripe_status(struct dm_target *ti,
++			 status_type_t type, char *result, int maxlen)
++{
++	struct stripe_c *sc = (struct stripe_c *) ti->private;
++	int offset;
++	int i;
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		result[0] = '\0';
++		break;
++
++	case STATUSTYPE_TABLE:
++		offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT,
++				  sc->stripes, sc->chunk_mask + 1);
++		for (i = 0; i < sc->stripes; i++) {
++			offset +=
++			    snprintf(result + offset, maxlen - offset,
++				     " %s " SECTOR_FORMAT,
++		       kdevname(to_kdev_t(sc->stripe[i].dev->bdev->bd_dev)),
++				     sc->stripe[i].physical_start);
++		}
++		break;
++	}
++	return 0;
++}
++
++static struct target_type stripe_target = {
++	.name   = "striped",
++	.module = THIS_MODULE,
++	.ctr    = stripe_ctr,
++	.dtr    = stripe_dtr,
++	.map    = stripe_map,
++	.status = stripe_status,
++};
++
++int __init dm_stripe_init(void)
++{
++	int r;
++
++	r = dm_register_target(&stripe_target);
++	if (r < 0)
++		DMWARN("striped target registration failed");
++
++	return r;
++}
++
++void dm_stripe_exit(void)
++{
++	if (dm_unregister_target(&stripe_target))
++		DMWARN("striped target unregistration failed");
++
++	return;
++}
+diff -ruN linux-2.4.20/drivers/md/dm-table.c linux/drivers/md/dm-table.c
+--- linux-2.4.20/drivers/md/dm-table.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-table.c	Wed Mar 26 14:09:13 2003
+@@ -0,0 +1,666 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/blkdev.h>
++#include <linux/ctype.h>
++#include <linux/slab.h>
++#include <asm/atomic.h>
++
++#define MAX_DEPTH 16
++#define NODE_SIZE L1_CACHE_BYTES
++#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
++#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
++
++struct dm_table {
++	atomic_t holders;
++
++	/* btree table */
++	int depth;
++	int counts[MAX_DEPTH];	/* in nodes */
++	sector_t *index[MAX_DEPTH];
++
++	int num_targets;
++	int num_allocated;
++	sector_t *highs;
++	struct dm_target *targets;
++
++	/*
++	 * Indicates the rw permissions for the new logical
++	 * device.  This should be a combination of FMODE_READ
++	 * and FMODE_WRITE.
++	 */
++	int mode;
++
++	/* a list of devices used by this table */
++	struct list_head devices;
++
++	/*
++	 * A waitqueue for processes waiting for something
++	 * interesting to happen to this table.
++	 */
++	wait_queue_head_t eventq;
++};
++
++/*
++ * Ceiling(n / size)
++ */
++static inline unsigned long div_up(unsigned long n, unsigned long size)
++{
++	return dm_round_up(n, size) / size;
++}
++
++/*
++ * Similar to ceiling(log_size(n))
++ */
++static unsigned int int_log(unsigned long n, unsigned long base)
++{
++	int result = 0;
++
++	while (n > 1) {
++		n = div_up(n, base);
++		result++;
++	}
++
++	return result;
++}
++
++/*
++ * Calculate the index of the child node of the n'th node k'th key.
++ */
++static inline int get_child(int n, int k)
++{
++	return (n * CHILDREN_PER_NODE) + k;
++}
++
++/*
++ * Return the n'th node of level l from table t.
++ */
++static inline sector_t *get_node(struct dm_table *t, int l, int n)
++{
++	return t->index[l] + (n * KEYS_PER_NODE);
++}
++
++/*
++ * Return the highest key that you could lookup from the n'th
++ * node on level l of the btree.
++ */
++static sector_t high(struct dm_table *t, int l, int n)
++{
++	for (; l < t->depth - 1; l++)
++		n = get_child(n, CHILDREN_PER_NODE - 1);
++
++	if (n >= t->counts[l])
++		return (sector_t) - 1;
++
++	return get_node(t, l, n)[KEYS_PER_NODE - 1];
++}
++
++/*
++ * Fills in a level of the btree based on the highs of the level
++ * below it.
++ */
++static int setup_btree_index(int l, struct dm_table *t)
++{
++	int n, k;
++	sector_t *node;
++
++	for (n = 0; n < t->counts[l]; n++) {
++		node = get_node(t, l, n);
++
++		for (k = 0; k < KEYS_PER_NODE; k++)
++			node[k] = high(t, l + 1, get_child(n, k));
++	}
++
++	return 0;
++}
++
++/*
++ * highs, and targets are managed as dynamic arrays during a
++ * table load.
++ */
++static int alloc_targets(struct dm_table *t, int num)
++{
++	sector_t *n_highs;
++	struct dm_target *n_targets;
++	int n = t->num_targets;
++
++	/*
++	 * Allocate both the target array and offset array at once.
++	 */
++	n_highs = (sector_t *) vcalloc(sizeof(struct dm_target) +
++				       sizeof(sector_t), num);
++	if (!n_highs)
++		return -ENOMEM;
++
++	n_targets = (struct dm_target *) (n_highs + num);
++
++	if (n) {
++		memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
++		memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
++	}
++
++	memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
++	vfree(t->highs);
++
++	t->num_allocated = num;
++	t->highs = n_highs;
++	t->targets = n_targets;
++
++	return 0;
++}
++
++int dm_table_create(struct dm_table **result, int mode)
++{
++	struct dm_table *t = kmalloc(sizeof(*t), GFP_NOIO);
++
++	if (!t)
++		return -ENOMEM;
++
++	memset(t, 0, sizeof(*t));
++	INIT_LIST_HEAD(&t->devices);
++	atomic_set(&t->holders, 1);
++
++	/* allocate a single nodes worth of targets to begin with */
++	if (alloc_targets(t, KEYS_PER_NODE)) {
++		kfree(t);
++		t = NULL;
++		return -ENOMEM;
++	}
++
++	init_waitqueue_head(&t->eventq);
++	t->mode = mode;
++	*result = t;
++	return 0;
++}
++
++static void free_devices(struct list_head *devices)
++{
++	struct list_head *tmp, *next;
++
++	for (tmp = devices->next; tmp != devices; tmp = next) {
++		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++		next = tmp->next;
++		kfree(dd);
++	}
++}
++
++void table_destroy(struct dm_table *t)
++{
++	int i;
++
++	/* destroying the table counts as an event */
++	dm_table_event(t);
++
++	/* free the indexes (see dm_table_complete) */
++	if (t->depth >= 2)
++		vfree(t->index[t->depth - 2]);
++
++	/* free the targets */
++	for (i = 0; i < t->num_targets; i++) {
++		struct dm_target *tgt = t->targets + i;
++
++		if (tgt->type->dtr)
++			tgt->type->dtr(tgt);
++
++		dm_put_target_type(tgt->type);
++	}
++
++	vfree(t->highs);
++
++	/* free the device list */
++	if (t->devices.next != &t->devices) {
++		DMWARN("devices still present during destroy: "
++		       "dm_table_remove_device calls missing");
++
++		free_devices(&t->devices);
++	}
++
++	kfree(t);
++}
++
++void dm_table_get(struct dm_table *t)
++{
++	atomic_inc(&t->holders);
++}
++
++void dm_table_put(struct dm_table *t)
++{
++	if (atomic_dec_and_test(&t->holders))
++		table_destroy(t);
++}
++
++/*
++ * Checks to see if we need to extend highs or targets.
++ */
++static inline int check_space(struct dm_table *t)
++{
++	if (t->num_targets >= t->num_allocated)
++		return alloc_targets(t, t->num_allocated * 2);
++
++	return 0;
++}
++
++/*
++ * Convert a device path to a dev_t.
++ */
++static int lookup_device(const char *path, kdev_t *dev)
++{
++	int r;
++	struct nameidata nd;
++	struct inode *inode;
++
++	if (!path_init(path, LOOKUP_FOLLOW, &nd))
++		return 0;
++
++	if ((r = path_walk(path, &nd)))
++		goto out;
++
++	inode = nd.dentry->d_inode;
++	if (!inode) {
++		r = -ENOENT;
++		goto out;
++	}
++
++	if (!S_ISBLK(inode->i_mode)) {
++		r = -ENOTBLK;
++		goto out;
++	}
++
++	*dev = inode->i_rdev;
++
++      out:
++	path_release(&nd);
++	return r;
++}
++
++/*
++ * See if we've already got a device in the list.
++ */
++static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
++{
++	struct list_head *tmp;
++
++	list_for_each(tmp, l) {
++		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++		if (kdev_same(dd->dev, dev))
++			return dd;
++	}
++
++	return NULL;
++}
++
++/*
++ * Open a device so we can use it as a map destination.
++ */
++static int open_dev(struct dm_dev *dd)
++{
++	if (dd->bdev)
++		BUG();
++
++	dd->bdev = bdget(kdev_t_to_nr(dd->dev));
++	if (!dd->bdev)
++		return -ENOMEM;
++
++	return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW);
++}
++
++/*
++ * Close a device that we've been using.
++ */
++static void close_dev(struct dm_dev *dd)
++{
++	if (!dd->bdev)
++		return;
++
++	blkdev_put(dd->bdev, BDEV_RAW);
++	dd->bdev = NULL;
++}
++
++/*
++ * If possible (ie. blk_size[major] is set), this checks an area
++ * of a destination device is valid.
++ */
++static int check_device_area(kdev_t dev, sector_t start, sector_t len)
++{
++	int *sizes;
++	sector_t dev_size;
++
++	if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)]))
++		/* we don't know the device details,
++		 * so give the benefit of the doubt */
++		return 1;
++
++	/* convert to 512-byte sectors */
++	dev_size <<= 1;
++
++	return ((start < dev_size) && (len <= (dev_size - start)));
++}
++
++/*
++ * This upgrades the mode on an already open dm_dev.  Being
++ * careful to leave things as they were if we fail to reopen the
++ * device.
++ */
++static int upgrade_mode(struct dm_dev *dd, int new_mode)
++{
++	int r;
++	struct dm_dev dd_copy;
++
++	memcpy(&dd_copy, dd, sizeof(dd_copy));
++
++	dd->mode |= new_mode;
++	dd->bdev = NULL;
++	r = open_dev(dd);
++	if (!r)
++		close_dev(&dd_copy);
++	else
++		memcpy(dd, &dd_copy, sizeof(dd_copy));
++
++	return r;
++}
++
++/*
++ * Add a device to the list, or just increment the usage count if
++ * it's already present.
++ */
++int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
++		  sector_t len, int mode, struct dm_dev **result)
++{
++	int r;
++	kdev_t dev;
++	struct dm_dev *dd;
++	int major, minor;
++	struct dm_table *t = ti->table;
++
++	if (!t)
++		BUG();
++
++	if (sscanf(path, "%x:%x", &major, &minor) == 2) {
++		/* Extract the major/minor numbers */
++		dev = mk_kdev(major, minor);
++	} else {
++		/* convert the path to a device */
++		if ((r = lookup_device(path, &dev)))
++			return r;
++	}
++
++	dd = find_device(&t->devices, dev);
++	if (!dd) {
++		dd = kmalloc(sizeof(*dd), GFP_KERNEL);
++		if (!dd)
++			return -ENOMEM;
++
++		dd->dev = dev;
++		dd->mode = mode;
++		dd->bdev = NULL;
++
++		if ((r = open_dev(dd))) {
++			kfree(dd);
++			return r;
++		}
++
++		atomic_set(&dd->count, 0);
++		list_add(&dd->list, &t->devices);
++
++	} else if (dd->mode != (mode | dd->mode)) {
++		r = upgrade_mode(dd, mode);
++		if (r)
++			return r;
++	}
++	atomic_inc(&dd->count);
++
++	if (!check_device_area(dd->dev, start, len)) {
++		DMWARN("device %s too small for target", path);
++		dm_put_device(ti, dd);
++		return -EINVAL;
++	}
++
++	*result = dd;
++
++	return 0;
++}
++
++/*
++ * Decrement a devices use count and remove it if neccessary.
++ */
++void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
++{
++	if (atomic_dec_and_test(&dd->count)) {
++		close_dev(dd);
++		list_del(&dd->list);
++		kfree(dd);
++	}
++}
++
++/*
++ * Checks to see if the target joins onto the end of the table.
++ */
++static int adjoin(struct dm_table *table, struct dm_target *ti)
++{
++	struct dm_target *prev;
++
++	if (!table->num_targets)
++		return !ti->begin;
++
++	prev = &table->targets[table->num_targets - 1];
++	return (ti->begin == (prev->begin + prev->len));
++}
++
++/*
++ * Destructively splits up the argument list to pass to ctr.
++ */
++static int split_args(int max, int *argc, char **argv, char *input)
++{
++	char *start, *end = input, *out;
++	*argc = 0;
++
++	while (1) {
++		start = end;
++
++		/* Skip whitespace */
++		while (*start && isspace(*start))
++			start++;
++
++		if (!*start)
++			break;	/* success, we hit the end */
++
++		/* 'out' is used to remove any back-quotes */
++		end = out = start;
++		while (*end) {
++			/* Everything apart from '\0' can be quoted */
++			if (*end == '\\' && *(end + 1)) {
++				*out++ = *(end + 1);
++				end += 2;
++				continue;
++			}
++
++			if (isspace(*end))
++				break;	/* end of token */
++
++			*out++ = *end++;
++		}
++
++		/* have we already filled the array ? */
++		if ((*argc + 1) > max)
++			return -EINVAL;
++
++		/* we know this is whitespace */
++		if (*end)
++			end++;
++
++		/* terminate the string and put it in the array */
++		*out = '\0';
++		argv[*argc] = start;
++		(*argc)++;
++	}
++
++	return 0;
++}
++
++int dm_table_add_target(struct dm_table *t, const char *type,
++			sector_t start, sector_t len, char *params)
++{
++	int r = -EINVAL, argc;
++	char *argv[32];
++	struct dm_target *tgt;
++
++	if ((r = check_space(t)))
++		return r;
++
++	tgt = t->targets + t->num_targets;
++	memset(tgt, 0, sizeof(*tgt));
++
++	tgt->type = dm_get_target_type(type);
++	if (!tgt->type) {
++		tgt->error = "unknown target type";
++		return -EINVAL;
++	}
++
++	tgt->table = t;
++	tgt->begin = start;
++	tgt->len = len;
++	tgt->error = "Unknown error";
++
++	/*
++	 * Does this target adjoin the previous one ?
++	 */
++	if (!adjoin(t, tgt)) {
++		tgt->error = "Gap in table";
++		r = -EINVAL;
++		goto bad;
++	}
++
++	r = split_args(ARRAY_SIZE(argv), &argc, argv, params);
++	if (r) {
++		tgt->error = "couldn't split parameters";
++		goto bad;
++	}
++
++	r = tgt->type->ctr(tgt, argc, argv);
++	if (r)
++		goto bad;
++
++	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
++	return 0;
++
++      bad:
++	printk(KERN_ERR DM_NAME ": %s\n", tgt->error);
++	dm_put_target_type(tgt->type);
++	return r;
++}
++
++static int setup_indexes(struct dm_table *t)
++{
++	int i, total = 0;
++	sector_t *indexes;
++
++	/* allocate the space for *all* the indexes */
++	for (i = t->depth - 2; i >= 0; i--) {
++		t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE);
++		total += t->counts[i];
++	}
++
++	indexes = (sector_t *) vcalloc(total, (unsigned long) NODE_SIZE);
++	if (!indexes)
++		return -ENOMEM;
++
++	/* set up internal nodes, bottom-up */
++	for (i = t->depth - 2, total = 0; i >= 0; i--) {
++		t->index[i] = indexes;
++		indexes += (KEYS_PER_NODE * t->counts[i]);
++		setup_btree_index(i, t);
++	}
++
++	return 0;
++}
++
++/*
++ * Builds the btree to index the map.
++ */
++int dm_table_complete(struct dm_table *t)
++{
++	int leaf_nodes, r = 0;
++
++	/* how many indexes will the btree have ? */
++	leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE);
++	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
++
++	/* leaf layer has already been set up */
++	t->counts[t->depth - 1] = leaf_nodes;
++	t->index[t->depth - 1] = t->highs;
++
++	if (t->depth >= 2)
++		r = setup_indexes(t);
++
++	return r;
++}
++
++void dm_table_event(struct dm_table *t)
++{
++	wake_up_interruptible(&t->eventq);
++}
++
++sector_t dm_table_get_size(struct dm_table *t)
++{
++	return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
++}
++
++struct dm_target *dm_table_get_target(struct dm_table *t, int index)
++{
++	if (index > t->num_targets)
++		return NULL;
++
++	return t->targets + index;
++}
++
++/*
++ * Search the btree for the correct target.
++ */
++struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
++{
++	int l, n = 0, k = 0;
++	sector_t *node;
++
++	for (l = 0; l < t->depth; l++) {
++		n = get_child(n, k);
++		node = get_node(t, l, n);
++
++		for (k = 0; k < KEYS_PER_NODE; k++)
++			if (node[k] >= sector)
++				break;
++	}
++
++	return &t->targets[(KEYS_PER_NODE * n) + k];
++}
++
++unsigned int dm_table_get_num_targets(struct dm_table *t)
++{
++	return t->num_targets;
++}
++
++struct list_head *dm_table_get_devices(struct dm_table *t)
++{
++	return &t->devices;
++}
++
++int dm_table_get_mode(struct dm_table *t)
++{
++	return t->mode;
++}
++
++void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq)
++{
++	add_wait_queue(&t->eventq, wq);
++}
++
++EXPORT_SYMBOL(dm_get_device);
++EXPORT_SYMBOL(dm_put_device);
++EXPORT_SYMBOL(dm_table_event);
++EXPORT_SYMBOL(dm_table_get_mode);
+diff -ruN linux-2.4.20/drivers/md/dm-target.c linux/drivers/md/dm-target.c
+--- linux-2.4.20/drivers/md/dm-target.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-target.c	Wed Mar 26 12:54:14 2003
+@@ -0,0 +1,187 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/kmod.h>
++#include <linux/slab.h>
++
++struct tt_internal {
++	struct target_type tt;
++
++	struct list_head list;
++	long use;
++};
++
++static LIST_HEAD(_targets);
++static DECLARE_RWSEM(_lock);
++
++#define DM_MOD_NAME_SIZE 32
++
++static inline struct tt_internal *__find_target_type(const char *name)
++{
++	struct list_head *tih;
++	struct tt_internal *ti;
++
++	list_for_each(tih, &_targets) {
++		ti = list_entry(tih, struct tt_internal, list);
++
++		if (!strcmp(name, ti->tt.name))
++			return ti;
++	}
++
++	return NULL;
++}
++
++static struct tt_internal *get_target_type(const char *name)
++{
++	struct tt_internal *ti;
++
++	down_read(&_lock);
++	ti = __find_target_type(name);
++
++	if (ti) {
++		if (ti->use == 0 && ti->tt.module)
++			__MOD_INC_USE_COUNT(ti->tt.module);
++		ti->use++;
++	}
++	up_read(&_lock);
++
++	return ti;
++}
++
++static void load_module(const char *name)
++{
++	char module_name[DM_MOD_NAME_SIZE] = "dm-";
++
++	/* Length check for strcat() below */
++	if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
++		return;
++
++	strcat(module_name, name);
++	request_module(module_name);
++}
++
++struct target_type *dm_get_target_type(const char *name)
++{
++	struct tt_internal *ti = get_target_type(name);
++
++	if (!ti) {
++		load_module(name);
++		ti = get_target_type(name);
++	}
++
++	return ti ? &ti->tt : NULL;
++}
++
++void dm_put_target_type(struct target_type *t)
++{
++	struct tt_internal *ti = (struct tt_internal *) t;
++
++	down_read(&_lock);
++	if (--ti->use == 0 && ti->tt.module)
++		__MOD_DEC_USE_COUNT(ti->tt.module);
++
++	if (ti->use < 0)
++		BUG();
++	up_read(&_lock);
++
++	return;
++}
++
++static struct tt_internal *alloc_target(struct target_type *t)
++{
++	struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
++
++	if (ti) {
++		memset(ti, 0, sizeof(*ti));
++		ti->tt = *t;
++	}
++
++	return ti;
++}
++
++int dm_register_target(struct target_type *t)
++{
++	int rv = 0;
++	struct tt_internal *ti = alloc_target(t);
++
++	if (!ti)
++		return -ENOMEM;
++
++	down_write(&_lock);
++	if (__find_target_type(t->name))
++		rv = -EEXIST;
++	else
++		list_add(&ti->list, &_targets);
++
++	up_write(&_lock);
++	return rv;
++}
++
++int dm_unregister_target(struct target_type *t)
++{
++	struct tt_internal *ti;
++
++	down_write(&_lock);
++	if (!(ti = __find_target_type(t->name))) {
++		up_write(&_lock);
++		return -EINVAL;
++	}
++
++	if (ti->use) {
++		up_write(&_lock);
++		return -ETXTBSY;
++	}
++
++	list_del(&ti->list);
++	kfree(ti);
++
++	up_write(&_lock);
++	return 0;
++}
++
++/*
++ * io-err: always fails an io, useful for bringing
++ * up LVs that have holes in them.
++ */
++static int io_err_ctr(struct dm_target *ti, int argc, char **args)
++{
++	return 0;
++}
++
++static void io_err_dtr(struct dm_target *ti)
++{
++	/* empty */
++}
++
++static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++		      void **map_context)
++{
++	return -EIO;
++}
++
++static struct target_type error_target = {
++	.name = "error",
++	.ctr  = io_err_ctr,
++	.dtr  = io_err_dtr,
++	.map  = io_err_map,
++};
++
++int dm_target_init(void)
++{
++	return dm_register_target(&error_target);
++}
++
++void dm_target_exit(void)
++{
++	if (dm_unregister_target(&error_target))
++		DMWARN("error target unregistration failed");
++}
++
++EXPORT_SYMBOL(dm_register_target);
++EXPORT_SYMBOL(dm_unregister_target);
+diff -ruN linux-2.4.20/drivers/md/dm.c linux/drivers/md/dm.c
+--- linux-2.4.20/drivers/md/dm.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm.c	Wed Mar 26 14:23:27 2003
+@@ -0,0 +1,878 @@
++/*
++ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/blk.h>
++#include <linux/blkpg.h>
++#include <linux/mempool.h>
++#include <linux/slab.h>
++#include <linux/kdev_t.h>
++#include <linux/lvm.h>
++
++#include <asm/uaccess.h>
++
++static const char *_name = DM_NAME;
++#define MAX_DEVICES (1 << MINORBITS)
++#define DEFAULT_READ_AHEAD 64
++
++static int major = 0;
++static int _major = 0;
++
++struct dm_io {
++	struct mapped_device *md;
++
++	struct dm_target *ti;
++	int rw;
++	void *map_context;
++	void (*end_io) (struct buffer_head * bh, int uptodate);
++	void *context;
++};
++
++struct deferred_io {
++	int rw;
++	struct buffer_head *bh;
++	struct deferred_io *next;
++};
++
++/*
++ * Bits for the md->flags field.
++ */
++#define DMF_BLOCK_IO 0
++#define DMF_SUSPENDED 1
++
++struct mapped_device {
++	struct rw_semaphore lock;
++	atomic_t holders;
++
++	kdev_t dev;
++	unsigned long flags;
++
++	/*
++	 * A list of ios that arrived while we were suspended.
++	 */
++	atomic_t pending;
++	wait_queue_head_t wait;
++	struct deferred_io *deferred;
++
++	/*
++	 * The current mapping.
++	 */
++	struct dm_table *map;
++
++	/*
++	 * io objects are allocated from here.
++	 */
++	mempool_t *io_pool;
++};
++
++#define MIN_IOS 256
++static kmem_cache_t *_io_cache;
++
++/* block device arrays */
++static int _block_size[MAX_DEVICES];
++static int _blksize_size[MAX_DEVICES];
++static int _hardsect_size[MAX_DEVICES];
++
++static struct mapped_device *get_kdev(kdev_t dev);
++static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh);
++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
++
++static __init int local_init(void)
++{
++	int r;
++
++	/* allocate a slab for the dm_ios */
++	_io_cache = kmem_cache_create("dm io",
++				      sizeof(struct dm_io), 0, 0, NULL, NULL);
++
++	if (!_io_cache)
++		return -ENOMEM;
++
++	_major = major;
++	r = register_blkdev(_major, _name, &dm_blk_dops);
++	if (r < 0) {
++		DMERR("register_blkdev failed");
++		kmem_cache_destroy(_io_cache);
++		return r;
++	}
++
++	if (!_major)
++		_major = r;
++
++	/* set up the arrays */
++	read_ahead[_major] = DEFAULT_READ_AHEAD;
++	blk_size[_major] = _block_size;
++	blksize_size[_major] = _blksize_size;
++	hardsect_size[_major] = _hardsect_size;
++
++	blk_queue_make_request(BLK_DEFAULT_QUEUE(_major), dm_request);
++
++	return 0;
++}
++
++static void local_exit(void)
++{
++	kmem_cache_destroy(_io_cache);
++
++	if (unregister_blkdev(_major, _name) < 0)
++		DMERR("devfs_unregister_blkdev failed");
++
++	read_ahead[_major] = 0;
++	blk_size[_major] = NULL;
++	blksize_size[_major] = NULL;
++	hardsect_size[_major] = NULL;
++	_major = 0;
++
++	DMINFO("cleaned up");
++}
++
++/*
++ * We have a lot of init/exit functions, so it seems easier to
++ * store them in an array.  The disposable macro 'xx'
++ * expands a prefix into a pair of function names.
++ */
++static struct {
++	int (*init) (void);
++	void (*exit) (void);
++
++} _inits[] = {
++#define xx(n) {n ## _init, n ## _exit},
++	xx(local)
++	xx(dm_target)
++	xx(dm_linear)
++	xx(dm_stripe)
++	xx(dm_snapshot)
++	xx(dm_interface)
++#undef xx
++};
++
++static int __init dm_init(void)
++{
++	const int count = ARRAY_SIZE(_inits);
++
++	int r, i;
++
++	for (i = 0; i < count; i++) {
++		r = _inits[i].init();
++		if (r)
++			goto bad;
++	}
++
++	return 0;
++
++      bad:
++	while (i--)
++		_inits[i].exit();
++
++	return r;
++}
++
++static void __exit dm_exit(void)
++{
++	int i = ARRAY_SIZE(_inits);
++
++	while (i--)
++		_inits[i].exit();
++}
++
++/*
++ * Block device functions
++ */
++static int dm_blk_open(struct inode *inode, struct file *file)
++{
++	struct mapped_device *md;
++
++	md = get_kdev(inode->i_rdev);
++	if (!md)
++		return -ENXIO;
++
++	return 0;
++}
++
++static int dm_blk_close(struct inode *inode, struct file *file)
++{
++	struct mapped_device *md;
++
++	md = get_kdev(inode->i_rdev);
++	dm_put(md);		/* put the reference gained by dm_blk_open */
++	dm_put(md);
++	return 0;
++}
++
++static inline struct dm_io *alloc_io(struct mapped_device *md)
++{
++	return mempool_alloc(md->io_pool, GFP_NOIO);
++}
++
++static inline void free_io(struct mapped_device *md, struct dm_io *io)
++{
++	mempool_free(io, md->io_pool);
++}
++
++static inline struct deferred_io *alloc_deferred(void)
++{
++	return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
++}
++
++static inline void free_deferred(struct deferred_io *di)
++{
++	kfree(di);
++}
++
++/* In 512-byte units */
++#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1)
++
++/* FIXME: check this */
++static int dm_blk_ioctl(struct inode *inode, struct file *file,
++			uint command, unsigned long a)
++{
++	int minor = MINOR(inode->i_rdev);
++	long size;
++
++	if (minor >= MAX_DEVICES)
++		return -ENXIO;
++
++	switch (command) {
++	case BLKROSET:
++	case BLKROGET:
++	case BLKRASET:
++	case BLKRAGET:
++	case BLKFLSBUF:
++	case BLKSSZGET:
++		//case BLKRRPART: /* Re-read partition tables */
++		//case BLKPG:
++	case BLKELVGET:
++	case BLKELVSET:
++	case BLKBSZGET:
++	case BLKBSZSET:
++		return blk_ioctl(inode->i_rdev, command, a);
++		break;
++
++	case BLKGETSIZE:
++		size = VOLUME_SIZE(minor);
++		if (copy_to_user((void *) a, &size, sizeof(long)))
++			return -EFAULT;
++		break;
++
++	case BLKGETSIZE64:
++		size = VOLUME_SIZE(minor);
++		if (put_user((u64) ((u64) size) << 9, (u64 *) a))
++			return -EFAULT;
++		break;
++
++	case BLKRRPART:
++		return -ENOTTY;
++
++	case LV_BMAP:
++		return dm_user_bmap(inode, (struct lv_bmap *) a);
++
++	default:
++		DMWARN("unknown block ioctl 0x%x", command);
++		return -ENOTTY;
++	}
++
++	return 0;
++}
++
++/*
++ * Add the buffer to the list of deferred io.
++ */
++static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
++{
++	struct deferred_io *di;
++
++	di = alloc_deferred();
++	if (!di)
++		return -ENOMEM;
++
++	down_write(&md->lock);
++
++	if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
++		up_write(&md->lock);
++		free_deferred(di);
++		return 1;
++	}
++
++	di->bh = bh;
++	di->rw = rw;
++	di->next = md->deferred;
++	md->deferred = di;
++
++	up_write(&md->lock);
++	return 0;		/* deferred successfully */
++}
++
++/*
++ * bh->b_end_io routine that decrements the pending count
++ * and then calls the original bh->b_end_io fn.
++ */
++static void dec_pending(struct buffer_head *bh, int uptodate)
++{
++	int r;
++	struct dm_io *io = bh->b_private;
++	dm_endio_fn endio = io->ti->type->end_io;
++
++	if (endio) {
++		r = endio(io->ti, bh, io->rw, uptodate ? 0 : -EIO,
++			  io->map_context);
++		if (r < 0)
++			uptodate = 0;
++
++		else if (r > 0)
++			/* the target wants another shot at the io */
++			return;
++	}
++
++	if (atomic_dec_and_test(&io->md->pending))
++		/* nudge anyone waiting on suspend queue */
++		wake_up(&io->md->wait);
++
++	bh->b_end_io = io->end_io;
++	bh->b_private = io->context;
++	free_io(io->md, io);
++
++	bh->b_end_io(bh, uptodate);
++}
++
++/*
++ * Do the bh mapping for a given leaf
++ */
++static inline int __map_buffer(struct mapped_device *md, int rw,
++			       struct buffer_head *bh, struct dm_io *io)
++{
++	struct dm_target *ti;
++
++	ti = dm_table_find_target(md->map, bh->b_rsector);
++	if (!ti || !ti->type)
++		return -EINVAL;
++
++	/* hook the end io request fn */
++	atomic_inc(&md->pending);
++	io->md = md;
++	io->ti = ti;
++	io->rw = rw;
++	io->end_io = bh->b_end_io;
++	io->context = bh->b_private;
++	bh->b_end_io = dec_pending;
++	bh->b_private = io;
++
++	return ti->type->map(ti, bh, rw, &io->map_context);
++}
++
++/*
++ * Checks to see if we should be deferring io, if so it queues it
++ * and returns 1.
++ */
++static inline int __deferring(struct mapped_device *md, int rw,
++			      struct buffer_head *bh)
++{
++	int r;
++
++	/*
++	 * If we're suspended we have to queue this io for later.
++	 */
++	while (test_bit(DMF_BLOCK_IO, &md->flags)) {
++		up_read(&md->lock);
++
++		/*
++		 * There's no point deferring a read ahead
++		 * request, just drop it.
++		 */
++		if (rw == READA) {
++			down_read(&md->lock);
++			return -EIO;
++		}
++
++		r = queue_io(md, bh, rw);
++		down_read(&md->lock);
++
++		if (r < 0)
++			return r;
++
++		if (r == 0)
++			return 1; /* deferred successfully */
++
++	}
++
++	return 0;
++}
++
++static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh)
++{
++	int r;
++	struct dm_io *io;
++	struct mapped_device *md;
++
++	md = get_kdev(bh->b_rdev);
++	if (!md) {
++		buffer_IO_error(bh);
++		return 0;
++	}
++
++	io = alloc_io(md);
++	down_read(&md->lock);
++
++	r = __deferring(md, rw, bh);
++	if (r < 0)
++		goto bad;
++
++	else if (!r) {
++		/* not deferring */
++		r = __map_buffer(md, rw, bh, io);
++		if (r < 0)
++			goto bad;
++	} else
++		r = 0;
++
++	up_read(&md->lock);
++	dm_put(md);
++	return r;
++
++      bad:
++	buffer_IO_error(bh);
++	up_read(&md->lock);
++	dm_put(md);
++	return 0;
++}
++
++static int check_dev_size(kdev_t dev, unsigned long block)
++{
++	/* FIXME: check this */
++	int minor = MINOR(dev);
++	unsigned long max_sector = (_block_size[minor] << 1) + 1;
++	unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9);
++
++	return (sector > max_sector) ? 0 : 1;
++}
++
++/*
++ * Creates a dummy buffer head and maps it (for lilo).
++ */
++static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block,
++		  kdev_t *r_dev, unsigned long *r_block)
++{
++	struct buffer_head bh;
++	struct dm_target *ti;
++	void *map_context;
++	int r;
++
++	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
++		return -EPERM;
++	}
++
++	if (!check_dev_size(dev, block)) {
++		return -EINVAL;
++	}
++
++	/* setup dummy bh */
++	memset(&bh, 0, sizeof(bh));
++	bh.b_blocknr = block;
++	bh.b_dev = bh.b_rdev = dev;
++	bh.b_size = _blksize_size[MINOR(dev)];
++	bh.b_rsector = block * (bh.b_size >> 9);
++
++	/* find target */
++	ti = dm_table_find_target(md->map, bh.b_rsector);
++
++	/* do the mapping */
++	r = ti->type->map(ti, &bh, READ, &map_context);
++	ti->type->end_io(ti, &bh, READ, 0, map_context);
++
++	if (!r) {
++		*r_dev = bh.b_rdev;
++		*r_block = bh.b_rsector / (bh.b_size >> 9);
++	}
++
++	return r;
++}
++
++/*
++ * Marshals arguments and results between user and kernel space.
++ */
++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
++{
++	struct mapped_device *md;
++	unsigned long block, r_block;
++	kdev_t r_dev;
++	int r;
++
++	if (get_user(block, &lvb->lv_block))
++		return -EFAULT;
++
++	md = get_kdev(inode->i_rdev);
++	if (!md)
++		return -ENXIO;
++
++	down_read(&md->lock);
++	r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block);
++	up_read(&md->lock);
++	dm_put(md);
++
++	if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
++		   put_user(r_block, &lvb->lv_block)))
++		r = -EFAULT;
++
++	return r;
++}
++
++/*-----------------------------------------------------------------
++ * A bitset is used to keep track of allocated minor numbers.
++ *---------------------------------------------------------------*/
++static spinlock_t _minor_lock = SPIN_LOCK_UNLOCKED;
++static struct mapped_device *_mds[MAX_DEVICES];
++
++static void free_minor(int minor)
++{
++	spin_lock(&_minor_lock);
++	_mds[minor] = NULL;
++	spin_unlock(&_minor_lock);
++}
++
++/*
++ * See if the device with a specific minor # is free.
++ */
++static int specific_minor(int minor, struct mapped_device *md)
++{
++	int r = -EBUSY;
++
++	if (minor >= MAX_DEVICES) {
++		DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)",
++		       MAX_DEVICES);
++		return -EINVAL;
++	}
++
++	spin_lock(&_minor_lock);
++	if (!_mds[minor]) {
++		_mds[minor] = md;
++		r = minor;
++	}
++	spin_unlock(&_minor_lock);
++
++	return r;
++}
++
++static int next_free_minor(struct mapped_device *md)
++{
++	int i;
++
++	spin_lock(&_minor_lock);
++	for (i = 0; i < MAX_DEVICES; i++) {
++		if (!_mds[i]) {
++			_mds[i] = md;
++			break;
++		}
++	}
++	spin_unlock(&_minor_lock);
++
++	return (i < MAX_DEVICES) ? i : -EBUSY;
++}
++
++static struct mapped_device *get_kdev(kdev_t dev)
++{
++	struct mapped_device *md;
++
++	if (major(dev) != _major)
++		return NULL;
++
++	spin_lock(&_minor_lock);
++	md = _mds[minor(dev)];
++	if (md)
++		dm_get(md);
++	spin_unlock(&_minor_lock);
++
++	return md;
++}
++
++/*
++ * Allocate and initialise a blank device with a given minor.
++ */
++static struct mapped_device *alloc_dev(int minor)
++{
++	struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
++
++	if (!md) {
++		DMWARN("unable to allocate device, out of memory.");
++		return NULL;
++	}
++
++	/* get a minor number for the dev */
++	minor = (minor < 0) ? next_free_minor(md) : specific_minor(minor, md);
++	if (minor < 0) {
++		kfree(md);
++		return NULL;
++	}
++
++	memset(md, 0, sizeof(*md));
++
++	md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
++				     mempool_free_slab, _io_cache);
++	if (!md->io_pool) {
++		free_minor(minor);
++		kfree(md);
++		return NULL;
++	}
++
++	md->dev = mk_kdev(_major, minor);
++	init_rwsem(&md->lock);
++	atomic_set(&md->holders, 1);
++	atomic_set(&md->pending, 0);
++	init_waitqueue_head(&md->wait);
++
++	return md;
++}
++
++static void free_dev(struct mapped_device *md)
++{
++	free_minor(minor(md->dev));
++	mempool_destroy(md->io_pool);
++	kfree(md);
++}
++
++/*
++ * The hardsect size for a mapped device is the largest hardsect size
++ * from the devices it maps onto.
++ */
++static int __find_hardsect_size(struct list_head *devices)
++{
++	int result = 512, size;
++	struct list_head *tmp;
++
++	list_for_each(tmp, devices) {
++		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++		size = get_hardsect_size(dd->dev);
++		if (size > result)
++			result = size;
++	}
++
++	return result;
++}
++
++/*
++ * Bind a table to the device.
++ */
++static int __bind(struct mapped_device *md, struct dm_table *t)
++{
++	int minor = minor(md->dev);
++	md->map = t;
++
++	/* in k */
++	_block_size[minor] = dm_table_get_size(t) >> 1;
++	_blksize_size[minor] = BLOCK_SIZE;
++	_hardsect_size[minor] = __find_hardsect_size(dm_table_get_devices(t));
++	register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]);
++
++	dm_table_get(t);
++	return 0;
++}
++
++static void __unbind(struct mapped_device *md)
++{
++	int minor = minor(md->dev);
++
++	dm_table_put(md->map);
++	md->map = NULL;
++
++	_block_size[minor] = 0;
++	_blksize_size[minor] = 0;
++	_hardsect_size[minor] = 0;
++}
++
++/*
++ * Constructor for a new device.
++ */
++int dm_create(int minor, struct dm_table *table, struct mapped_device **result)
++{
++	int r;
++	struct mapped_device *md;
++
++	md = alloc_dev(minor);
++	if (!md)
++		return -ENXIO;
++
++	r = __bind(md, table);
++	if (r) {
++		free_dev(md);
++		return r;
++	}
++
++	*result = md;
++	return 0;
++}
++
++void dm_get(struct mapped_device *md)
++{
++	atomic_inc(&md->holders);
++}
++
++void dm_put(struct mapped_device *md)
++{
++	if (atomic_dec_and_test(&md->holders)) {
++		__unbind(md);
++		free_dev(md);
++	}
++}
++
++/*
++ * Requeue the deferred io by calling generic_make_request.
++ */
++static void flush_deferred_io(struct deferred_io *c)
++{
++	struct deferred_io *n;
++
++	while (c) {
++		n = c->next;
++		generic_make_request(c->rw, c->bh);
++		free_deferred(c);
++		c = n;
++	}
++}
++
++/*
++ * Swap in a new table (destroying old one).
++ */
++int dm_swap_table(struct mapped_device *md, struct dm_table *table)
++{
++	int r;
++
++	down_write(&md->lock);
++
++	/* device must be suspended */
++	if (!test_bit(DMF_SUSPENDED, &md->flags)) {
++		up_write(&md->lock);
++		return -EPERM;
++	}
++
++	__unbind(md);
++	r = __bind(md, table);
++	if (r)
++		return r;
++
++	up_write(&md->lock);
++	return 0;
++}
++
++/*
++ * We need to be able to change a mapping table under a mounted
++ * filesystem.  For example we might want to move some data in
++ * the background.  Before the table can be swapped with
++ * dm_bind_table, dm_suspend must be called to flush any in
++ * flight io and ensure that any further io gets deferred.
++ */
++int dm_suspend(struct mapped_device *md)
++{
++	DECLARE_WAITQUEUE(wait, current);
++
++	down_write(&md->lock);
++
++	/*
++	 * First we set the BLOCK_IO flag so no more ios will be
++	 * mapped.
++	 */
++	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
++		up_write(&md->lock);
++		return -EINVAL;
++	}
++
++	set_bit(DMF_BLOCK_IO, &md->flags);
++	add_wait_queue(&md->wait, &wait);
++	up_write(&md->lock);
++
++	/*
++	 * Then we wait for the already mapped ios to
++	 * complete.
++	 */
++	run_task_queue(&tq_disk);
++	while (1) {
++		set_current_state(TASK_INTERRUPTIBLE);
++
++		if (!atomic_read(&md->pending))
++			break;
++
++		schedule();
++	}
++
++	current->state = TASK_RUNNING;
++
++	down_write(&md->lock);
++	remove_wait_queue(&md->wait, &wait);
++	set_bit(DMF_SUSPENDED, &md->flags);
++	up_write(&md->lock);
++
++	return 0;
++}
++
++int dm_resume(struct mapped_device *md)
++{
++	struct deferred_io *def;
++
++	down_write(&md->lock);
++	if (!test_bit(DMF_SUSPENDED, &md->flags) ||
++	    !dm_table_get_size(md->map)) {
++		up_write(&md->lock);
++		return -EINVAL;
++	}
++
++	clear_bit(DMF_SUSPENDED, &md->flags);
++	clear_bit(DMF_BLOCK_IO, &md->flags);
++	def = md->deferred;
++	md->deferred = NULL;
++	up_write(&md->lock);
++
++	flush_deferred_io(def);
++	run_task_queue(&tq_disk);
++
++	return 0;
++}
++
++struct dm_table *dm_get_table(struct mapped_device *md)
++{
++	struct dm_table *t;
++
++	down_read(&md->lock);
++	t = md->map;
++	dm_table_get(t);
++	up_read(&md->lock);
++
++	return t;
++}
++
++kdev_t dm_kdev(struct mapped_device *md)
++{
++	kdev_t dev;
++
++	down_read(&md->lock);
++	dev = md->dev;
++	up_read(&md->lock);
++
++	return dev;
++}
++
++int dm_suspended(struct mapped_device *md)
++{
++	return test_bit(DMF_SUSPENDED, &md->flags);
++}
++
++struct block_device_operations dm_blk_dops = {
++	.open = dm_blk_open,
++	.release = dm_blk_close,
++	.ioctl = dm_blk_ioctl,
++	.owner = THIS_MODULE
++};
++
++/*
++ * module hooks
++ */
++module_init(dm_init);
++module_exit(dm_exit);
++
++MODULE_PARM(major, "i");
++MODULE_PARM_DESC(major, "The major number of the device mapper");
++MODULE_DESCRIPTION(DM_NAME " driver");
++MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
++MODULE_LICENSE("GPL");
+diff -ruN linux-2.4.20/drivers/md/dm.h linux/drivers/md/dm.h
+--- linux-2.4.20/drivers/md/dm.h	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm.h	Wed Mar 26 14:12:00 2003
+@@ -0,0 +1,154 @@
++/*
++ * Internal header file for device mapper
++ *
++ * Copyright (C) 2001, 2002 Sistina Software
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef DM_INTERNAL_H
++#define DM_INTERNAL_H
++
++#include <linux/fs.h>
++#include <linux/device-mapper.h>
++#include <linux/list.h>
++#include <linux/blkdev.h>
++
++#define DM_NAME "device-mapper"
++#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
++#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
++#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
++
++/*
++ * FIXME: I think this should be with the definition of sector_t
++ * in types.h.
++ */
++#ifdef CONFIG_LBD
++#define SECTOR_FORMAT "%Lu"
++#else
++#define SECTOR_FORMAT "%lu"
++#endif
++
++#define SECTOR_SHIFT 9
++#define SECTOR_SIZE (1 << SECTOR_SHIFT)
++
++extern struct block_device_operations dm_blk_dops;
++
++/*
++ * List of devices that a metadevice uses and should open/close.
++ */
++struct dm_dev {
++	struct list_head list;
++
++	atomic_t count;
++	int mode;
++	kdev_t dev;
++	struct block_device *bdev;
++};
++
++struct dm_table;
++struct mapped_device;
++
++/*-----------------------------------------------------------------
++ * Functions for manipulating a struct mapped_device.
++ * Drop the reference with dm_put when you finish with the object.
++ *---------------------------------------------------------------*/
++int dm_create(int minor, struct dm_table *table, struct mapped_device **md);
++
++/*
++ * Reference counting for md.
++ */
++void dm_get(struct mapped_device *md);
++void dm_put(struct mapped_device *md);
++
++/*
++ * A device can still be used while suspended, but I/O is deferred.
++ */
++int dm_suspend(struct mapped_device *md);
++int dm_resume(struct mapped_device *md);
++
++/*
++ * The device must be suspended before calling this method.
++ */
++int dm_swap_table(struct mapped_device *md, struct dm_table *t);
++
++/*
++ * Drop a reference on the table when you've finished with the
++ * result.
++ */
++struct dm_table *dm_get_table(struct mapped_device *md);
++
++/*
++ * Info functions.
++ */
++kdev_t dm_kdev(struct mapped_device *md);
++int dm_suspended(struct mapped_device *md);
++
++/*-----------------------------------------------------------------
++ * Functions for manipulating a table.  Tables are also reference
++ * counted.
++ *---------------------------------------------------------------*/
++int dm_table_create(struct dm_table **result, int mode);
++
++void dm_table_get(struct dm_table *t);
++void dm_table_put(struct dm_table *t);
++
++int dm_table_add_target(struct dm_table *t, const char *type,
++			sector_t start,	sector_t len, char *params);
++int dm_table_complete(struct dm_table *t);
++void dm_table_event(struct dm_table *t);
++sector_t dm_table_get_size(struct dm_table *t);
++struct dm_target *dm_table_get_target(struct dm_table *t, int index);
++struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
++unsigned int dm_table_get_num_targets(struct dm_table *t);
++struct list_head *dm_table_get_devices(struct dm_table *t);
++int dm_table_get_mode(struct dm_table *t);
++void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq);
++
++/*-----------------------------------------------------------------
++ * A registry of target types.
++ *---------------------------------------------------------------*/
++int dm_target_init(void);
++void dm_target_exit(void);
++struct target_type *dm_get_target_type(const char *name);
++void dm_put_target_type(struct target_type *t);
++
++
++/*-----------------------------------------------------------------
++ * Useful inlines.
++ *---------------------------------------------------------------*/
++static inline int array_too_big(unsigned long fixed, unsigned long obj,
++				unsigned long num)
++{
++	return (num > (ULONG_MAX - fixed) / obj);
++}
++
++/*
++ * ceiling(n / size) * size
++ */
++static inline unsigned long dm_round_up(unsigned long n, unsigned long size)
++{
++	unsigned long r = n % size;
++	return n + (r ? (size - r) : 0);
++}
++
++/*
++ * The device-mapper can be driven through one of two interfaces;
++ * ioctl or filesystem, depending which patch you have applied.
++ */
++int dm_interface_init(void);
++void dm_interface_exit(void);
++
++/*
++ * Targets for linear and striped mappings
++ */
++int dm_linear_init(void);
++void dm_linear_exit(void);
++
++int dm_stripe_init(void);
++void dm_stripe_exit(void);
++
++int dm_snapshot_init(void);
++void dm_snapshot_exit(void);
++
++#endif
+diff -ruN linux-2.4.20/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c
+--- linux-2.4.20/drivers/md/kcopyd.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/kcopyd.c	Wed Mar 26 12:54:17 2003
+@@ -0,0 +1,839 @@
++/*
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <asm/atomic.h>
++
++#include <linux/blkdev.h>
++#include <linux/config.h>
++#include <linux/device-mapper.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/locks.h>
++#include <linux/mempool.h>
++#include <linux/module.h>
++#include <linux/pagemap.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++
++#include "kcopyd.h"
++
++/* FIXME: this is only needed for the DMERR macros */
++#include "dm.h"
++
++static void wake_kcopyd(void);
++
++/*-----------------------------------------------------------------
++ * We reserve our own pool of preallocated pages that are
++ * only used for kcopyd io.
++ *---------------------------------------------------------------*/
++
++/*
++ * FIXME: This should be configurable.
++ */
++#define NUM_PAGES 512
++
++static DECLARE_MUTEX(_pages_lock);
++static int _num_free_pages;
++static struct page *_pages_array[NUM_PAGES];
++static DECLARE_MUTEX(start_lock);
++
++static int init_pages(void)
++{
++	int i;
++	struct page *p;
++
++	for (i = 0; i < NUM_PAGES; i++) {
++		p = alloc_page(GFP_KERNEL);
++		if (!p)
++			goto bad;
++
++		LockPage(p);
++		_pages_array[i] = p;
++	}
++
++	_num_free_pages = NUM_PAGES;
++	return 0;
++
++      bad:
++	while (i--) {
++		UnlockPage(_pages_array[i]);
++		__free_page(_pages_array[i]);
++	}
++	return -ENOMEM;
++}
++
++static void exit_pages(void)
++{
++	int i;
++	struct page *p;
++
++	for (i = 0; i < NUM_PAGES; i++) {
++		p = _pages_array[i];
++		UnlockPage(p);
++		__free_page(p);
++	}
++
++	_num_free_pages = 0;
++}
++
++static int kcopyd_get_pages(int num, struct page **result)
++{
++	int i;
++
++	down(&_pages_lock);
++	if (_num_free_pages < num) {
++		up(&_pages_lock);
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < num; i++) {
++		_num_free_pages--;
++		result[i] = _pages_array[_num_free_pages];
++	}
++	up(&_pages_lock);
++
++	return 0;
++}
++
++static void kcopyd_free_pages(int num, struct page **result)
++{
++	int i;
++
++	down(&_pages_lock);
++	for (i = 0; i < num; i++)
++		_pages_array[_num_free_pages++] = result[i];
++	up(&_pages_lock);
++}
++
++/*-----------------------------------------------------------------
++ * We keep our own private pool of buffer_heads.  These are just
++ * held in a list on the b_reqnext field.
++ *---------------------------------------------------------------*/
++
++/*
++ * Make sure we have enough buffers to always keep the pages
++ * occupied.  So we assume the worst case scenario where blocks
++ * are the size of a single sector.
++ */
++#define NUM_BUFFERS NUM_PAGES * (PAGE_SIZE / SECTOR_SIZE)
++
++static spinlock_t _buffer_lock = SPIN_LOCK_UNLOCKED;
++static struct buffer_head *_all_buffers;
++static struct buffer_head *_free_buffers;
++
++static int init_buffers(void)
++{
++	int i;
++	struct buffer_head *buffers;
++
++	buffers = vcalloc(NUM_BUFFERS, sizeof(struct buffer_head));
++	if (!buffers) {
++		DMWARN("Couldn't allocate buffer heads.");
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < NUM_BUFFERS; i++) {
++		if (i < NUM_BUFFERS - 1)
++			buffers[i].b_reqnext = &buffers[i + 1];
++		init_waitqueue_head(&buffers[i].b_wait);
++		INIT_LIST_HEAD(&buffers[i].b_inode_buffers);
++	}
++
++	_all_buffers = _free_buffers = buffers;
++	return 0;
++}
++
++static void exit_buffers(void)
++{
++	vfree(_all_buffers);
++}
++
++static struct buffer_head *alloc_buffer(void)
++{
++	struct buffer_head *r;
++	int flags;
++
++	spin_lock_irqsave(&_buffer_lock, flags);
++
++	if (!_free_buffers)
++		r = NULL;
++	else {
++		r = _free_buffers;
++		_free_buffers = _free_buffers->b_reqnext;
++		r->b_reqnext = NULL;
++	}
++
++	spin_unlock_irqrestore(&_buffer_lock, flags);
++
++	return r;
++}
++
++/*
++ * Only called from interrupt context.
++ */
++static void free_buffer(struct buffer_head *bh)
++{
++	int flags, was_empty;
++
++	spin_lock_irqsave(&_buffer_lock, flags);
++	was_empty = (_free_buffers == NULL) ? 1 : 0;
++	bh->b_reqnext = _free_buffers;
++	_free_buffers = bh;
++	spin_unlock_irqrestore(&_buffer_lock, flags);
++
++	/*
++	 * If the buffer list was empty then kcopyd probably went
++	 * to sleep because it ran out of buffer heads, so let's
++	 * wake it up.
++	 */
++	if (was_empty)
++		wake_kcopyd();
++}
++
++/*-----------------------------------------------------------------
++ * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
++ * for this reason we use a mempool to prevent the client from
++ * ever having to do io (which could cause a
++ * deadlock).
++ *---------------------------------------------------------------*/
++#define MIN_JOBS NUM_PAGES
++
++static kmem_cache_t *_job_cache = NULL;
++static mempool_t *_job_pool = NULL;
++
++/*
++ * We maintain three lists of jobs:
++ *
++ * i)   jobs waiting for pages
++ * ii)  jobs that have pages, and are waiting for the io to be issued.
++ * iii) jobs that have completed.
++ *
++ * All three of these are protected by job_lock.
++ */
++
++static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED;
++
++static LIST_HEAD(_complete_jobs);
++static LIST_HEAD(_io_jobs);
++static LIST_HEAD(_pages_jobs);
++
++static int init_jobs(void)
++{
++	INIT_LIST_HEAD(&_complete_jobs);
++	INIT_LIST_HEAD(&_io_jobs);
++	INIT_LIST_HEAD(&_pages_jobs);
++
++	_job_cache = kmem_cache_create("kcopyd-jobs", sizeof(struct kcopyd_job),
++				       __alignof__(struct kcopyd_job),
++				       0, NULL, NULL);
++	if (!_job_cache)
++		return -ENOMEM;
++
++	_job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
++				   mempool_free_slab, _job_cache);
++	if (!_job_pool) {
++		kmem_cache_destroy(_job_cache);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static void exit_jobs(void)
++{
++	mempool_destroy(_job_pool);
++	kmem_cache_destroy(_job_cache);
++}
++
++struct kcopyd_job *kcopyd_alloc_job(void)
++{
++	struct kcopyd_job *job;
++
++	job = mempool_alloc(_job_pool, GFP_NOIO);
++	if (!job)
++		return NULL;
++
++	memset(job, 0, sizeof(*job));
++	return job;
++}
++
++void kcopyd_free_job(struct kcopyd_job *job)
++{
++	mempool_free(job, _job_pool);
++}
++
++/*
++ * Functions to push and pop a job onto the head of a given job
++ * list.
++ */
++static inline struct kcopyd_job *pop(struct list_head *jobs)
++{
++	struct kcopyd_job *job = NULL;
++	int flags;
++
++	spin_lock_irqsave(&_job_lock, flags);
++
++	if (!list_empty(jobs)) {
++		job = list_entry(jobs->next, struct kcopyd_job, list);
++		list_del(&job->list);
++	}
++	spin_unlock_irqrestore(&_job_lock, flags);
++
++	return job;
++}
++
++static inline void push(struct list_head *jobs, struct kcopyd_job *job)
++{
++	int flags;
++
++	spin_lock_irqsave(&_job_lock, flags);
++	list_add(&job->list, jobs);
++	spin_unlock_irqrestore(&_job_lock, flags);
++}
++
++/*
++ * Completion function for one of our buffers.
++ */
++static void end_bh(struct buffer_head *bh, int uptodate)
++{
++	struct kcopyd_job *job = bh->b_private;
++
++	mark_buffer_uptodate(bh, uptodate);
++	unlock_buffer(bh);
++
++	if (!uptodate)
++		job->err = -EIO;
++
++	/* are we the last ? */
++	if (atomic_dec_and_test(&job->nr_incomplete)) {
++		push(&_complete_jobs, job);
++		wake_kcopyd();
++	}
++
++	free_buffer(bh);
++}
++
++static void dispatch_bh(struct kcopyd_job *job,
++			struct buffer_head *bh, int block)
++{
++	int p;
++
++	/*
++	 * Add in the job offset
++	 */
++	bh->b_blocknr = (job->disk.sector >> job->block_shift) + block;
++
++	p = block >> job->bpp_shift;
++	block &= job->bpp_mask;
++
++	bh->b_size = job->block_size;
++	set_bh_page(bh, job->pages[p], ((block << job->block_shift) +
++					job->offset) << SECTOR_SHIFT);
++	bh->b_this_page = bh;
++
++	init_buffer(bh, end_bh, job);
++
++	bh->b_dev = job->disk.dev;
++	atomic_set(&bh->b_count, 1);
++
++	bh->b_state = ((1 << BH_Uptodate) | (1 << BH_Mapped) |
++		       (1 << BH_Lock) | (1 << BH_Req));
++
++	if (job->rw == WRITE)
++		clear_bit(BH_Dirty, &bh->b_state);
++
++	submit_bh(job->rw, bh);
++}
++
++/*
++ * These three functions process 1 item from the corresponding
++ * job list.
++ *
++ * They return:
++ * < 0: error
++ *   0: success
++ * > 0: can't process yet.
++ */
++static int run_complete_job(struct kcopyd_job *job)
++{
++	job->callback(job);
++	return 0;
++}
++
++/*
++ * Request io on as many buffer heads as we can currently get for
++ * a particular job.
++ */
++static int run_io_job(struct kcopyd_job *job)
++{
++	unsigned int block;
++	struct buffer_head *bh;
++
++	for (block = atomic_read(&job->nr_requested);
++	     block < job->nr_blocks; block++) {
++		bh = alloc_buffer();
++		if (!bh)
++			break;
++
++		atomic_inc(&job->nr_requested);
++		dispatch_bh(job, bh, block);
++	}
++
++	return (block == job->nr_blocks) ? 0 : 1;
++}
++
++static int run_pages_job(struct kcopyd_job *job)
++{
++	int r;
++
++	job->nr_pages = (job->disk.count + job->offset) /
++	    (PAGE_SIZE / SECTOR_SIZE);
++	r = kcopyd_get_pages(job->nr_pages, job->pages);
++
++	if (!r) {
++		/* this job is ready for io */
++		push(&_io_jobs, job);
++		return 0;
++	}
++
++	if (r == -ENOMEM)
++		/* can't complete now */
++		return 1;
++
++	return r;
++}
++
++/*
++ * Run through a list for as long as possible.  Returns the count
++ * of successful jobs.
++ */
++static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
++{
++	struct kcopyd_job *job;
++	int r, count = 0;
++
++	while ((job = pop(jobs))) {
++
++		r = fn(job);
++
++		if (r < 0) {
++			/* error this rogue job */
++			job->err = r;
++			push(&_complete_jobs, job);
++			break;
++		}
++
++		if (r > 0) {
++			/*
++			 * We couldn't service this job ATM, so
++			 * push this job back onto the list.
++			 */
++			push(jobs, job);
++			break;
++		}
++
++		count++;
++	}
++
++	return count;
++}
++
++/*
++ * kcopyd does this every time it's woken up.
++ */
++static void do_work(void)
++{
++	int count;
++
++	/*
++	 * We loop round until there is no more work to do.
++	 */
++	do {
++		count = process_jobs(&_complete_jobs, run_complete_job);
++		count += process_jobs(&_io_jobs, run_io_job);
++		count += process_jobs(&_pages_jobs, run_pages_job);
++
++	} while (count);
++
++	run_task_queue(&tq_disk);
++}
++
++/*-----------------------------------------------------------------
++ * The daemon
++ *---------------------------------------------------------------*/
++static atomic_t _kcopyd_must_die;
++static DECLARE_MUTEX(_run_lock);
++static DECLARE_WAIT_QUEUE_HEAD(_job_queue);
++
++static int kcopyd(void *arg)
++{
++	DECLARE_WAITQUEUE(wq, current);
++
++	daemonize();
++	strcpy(current->comm, "kcopyd");
++	atomic_set(&_kcopyd_must_die, 0);
++
++	add_wait_queue(&_job_queue, &wq);
++
++	down(&_run_lock);
++	up(&start_lock);
++
++	while (1) {
++		set_current_state(TASK_INTERRUPTIBLE);
++
++		if (atomic_read(&_kcopyd_must_die))
++			break;
++
++		do_work();
++		schedule();
++	}
++
++	set_current_state(TASK_RUNNING);
++	remove_wait_queue(&_job_queue, &wq);
++
++	up(&_run_lock);
++
++	return 0;
++}
++
++static int start_daemon(void)
++{
++	static pid_t pid = 0;
++
++	down(&start_lock);
++
++	pid = kernel_thread(kcopyd, NULL, 0);
++	if (pid <= 0) {
++		DMERR("Failed to start kcopyd thread");
++		return -EAGAIN;
++	}
++
++	/*
++	 * wait for the daemon to up this mutex.
++	 */
++	down(&start_lock);
++	up(&start_lock);
++
++	return 0;
++}
++
++static int stop_daemon(void)
++{
++	atomic_set(&_kcopyd_must_die, 1);
++	wake_kcopyd();
++	down(&_run_lock);
++	up(&_run_lock);
++
++	return 0;
++}
++
++static void wake_kcopyd(void)
++{
++	wake_up_interruptible(&_job_queue);
++}
++
++static int calc_shift(unsigned int n)
++{
++	int s;
++
++	for (s = 0; n; s++, n >>= 1)
++		;
++
++	return --s;
++}
++
++static void calc_block_sizes(struct kcopyd_job *job)
++{
++	job->block_size = get_hardsect_size(job->disk.dev);
++	job->block_shift = calc_shift(job->block_size / SECTOR_SIZE);
++	job->bpp_shift = PAGE_SHIFT - job->block_shift - SECTOR_SHIFT;
++	job->bpp_mask = (1 << job->bpp_shift) - 1;
++	job->nr_blocks = job->disk.count >> job->block_shift;
++	atomic_set(&job->nr_requested, 0);
++	atomic_set(&job->nr_incomplete, job->nr_blocks);
++}
++
++int kcopyd_io(struct kcopyd_job *job)
++{
++	calc_block_sizes(job);
++	push(job->pages[0] ? &_io_jobs : &_pages_jobs, job);
++	wake_kcopyd();
++	return 0;
++}
++
++/*-----------------------------------------------------------------
++ * The copier is implemented on top of the simpler async io
++ * daemon above.
++ *---------------------------------------------------------------*/
++struct copy_info {
++	kcopyd_notify_fn notify;
++	void *notify_context;
++
++	struct kcopyd_region to;
++};
++
++#define MIN_INFOS 128
++static kmem_cache_t *_copy_cache = NULL;
++static mempool_t *_copy_pool = NULL;
++
++static int init_copier(void)
++{
++	_copy_cache = kmem_cache_create("kcopyd-info",
++					sizeof(struct copy_info),
++					__alignof__(struct copy_info),
++					0, NULL, NULL);
++	if (!_copy_cache)
++		return -ENOMEM;
++
++	_copy_pool = mempool_create(MIN_INFOS, mempool_alloc_slab,
++				    mempool_free_slab, _copy_cache);
++	if (!_copy_pool) {
++		kmem_cache_destroy(_copy_cache);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static void exit_copier(void)
++{
++	if (_copy_pool)
++		mempool_destroy(_copy_pool);
++
++	if (_copy_cache)
++		kmem_cache_destroy(_copy_cache);
++}
++
++static inline struct copy_info *alloc_copy_info(void)
++{
++	return mempool_alloc(_copy_pool, GFP_NOIO);
++}
++
++static inline void free_copy_info(struct copy_info *info)
++{
++	mempool_free(info, _copy_pool);
++}
++
++void copy_complete(struct kcopyd_job *job)
++{
++	struct copy_info *info = (struct copy_info *) job->context;
++
++	if (info->notify)
++		info->notify(job->err, info->notify_context);
++
++	free_copy_info(info);
++
++	kcopyd_free_pages(job->nr_pages, job->pages);
++
++	kcopyd_free_job(job);
++}
++
++static void page_write_complete(struct kcopyd_job *job)
++{
++	struct copy_info *info = (struct copy_info *) job->context;
++	int i;
++
++	if (info->notify)
++		info->notify(job->err, info->notify_context);
++
++	free_copy_info(info);
++	for (i = 0; i < job->nr_pages; i++)
++		put_page(job->pages[i]);
++
++	kcopyd_free_job(job);
++}
++
++/*
++ * These callback functions implement the state machine that copies regions.
++ */
++void copy_write(struct kcopyd_job *job)
++{
++	struct copy_info *info = (struct copy_info *) job->context;
++
++	if (job->err) {
++		if (info->notify)
++			info->notify(job->err, job->context);
++
++		kcopyd_free_job(job);
++		free_copy_info(info);
++		return;
++	}
++
++	job->rw = WRITE;
++	memcpy(&job->disk, &info->to, sizeof(job->disk));
++	job->callback = copy_complete;
++
++	/*
++	 * Queue the write.
++	 */
++	kcopyd_io(job);
++}
++
++int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages,
++		       struct page **pages, int offset, kcopyd_notify_fn fn,
++		       void *context)
++{
++	struct copy_info *info;
++	struct kcopyd_job *job;
++	int i;
++
++	/*
++	 * Allocate a new copy_info.
++	 */
++	info = alloc_copy_info();
++	if (!info)
++		return -ENOMEM;
++
++	job = kcopyd_alloc_job();
++	if (!job) {
++		free_copy_info(info);
++		return -ENOMEM;
++	}
++
++	/*
++	 * set up for the write.
++	 */
++	info->notify = fn;
++	info->notify_context = context;
++	memcpy(&info->to, to, sizeof(*to));
++
++	/* Get the pages */
++	job->nr_pages = nr_pages;
++	for (i = 0; i < nr_pages; i++) {
++		get_page(pages[i]);
++		job->pages[i] = pages[i];
++	}
++
++	job->rw = WRITE;
++
++	memcpy(&job->disk, &info->to, sizeof(job->disk));
++	job->offset = offset;
++	job->callback = page_write_complete;
++	job->context = info;
++
++	/*
++	 * Trigger job.
++	 */
++	kcopyd_io(job);
++	return 0;
++}
++
++int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to,
++		kcopyd_notify_fn fn, void *context)
++{
++	struct copy_info *info;
++	struct kcopyd_job *job;
++
++	/*
++	 * Allocate a new copy_info.
++	 */
++	info = alloc_copy_info();
++	if (!info)
++		return -ENOMEM;
++
++	job = kcopyd_alloc_job();
++	if (!job) {
++		free_copy_info(info);
++		return -ENOMEM;
++	}
++
++	/*
++	 * set up for the read.
++	 */
++	info->notify = fn;
++	info->notify_context = context;
++	memcpy(&info->to, to, sizeof(*to));
++
++	job->rw = READ;
++	memcpy(&job->disk, from, sizeof(*from));
++
++	job->offset = 0;
++	job->callback = copy_write;
++	job->context = info;
++
++	/*
++	 * Trigger job.
++	 */
++	kcopyd_io(job);
++	return 0;
++}
++
++/*-----------------------------------------------------------------
++ * Unit setup
++ *---------------------------------------------------------------*/
++static struct {
++	int (*init) (void);
++	void (*exit) (void);
++
++} _inits[] = {
++#define xx(n) { init_ ## n, exit_ ## n}
++	xx(pages),
++	xx(buffers),
++	xx(jobs),
++	xx(copier)
++#undef xx
++};
++
++static int _client_count = 0;
++static DECLARE_MUTEX(_client_count_sem);
++
++static int kcopyd_init(void)
++{
++	const int count = sizeof(_inits) / sizeof(*_inits);
++
++	int r, i;
++
++	for (i = 0; i < count; i++) {
++		r = _inits[i].init();
++		if (r)
++			goto bad;
++	}
++
++	start_daemon();
++	return 0;
++
++      bad:
++	while (i--)
++		_inits[i].exit();
++
++	return r;
++}
++
++static void kcopyd_exit(void)
++{
++	int i = sizeof(_inits) / sizeof(*_inits);
++
++	if (stop_daemon())
++		DMWARN("Couldn't stop kcopyd.");
++
++	while (i--)
++		_inits[i].exit();
++}
++
++void kcopyd_inc_client_count(void)
++{
++	/*
++	 * What I need here is an atomic_test_and_inc that returns
++	 * the previous value of the atomic...  In its absence I lock
++	 * an int with a semaphore. :-(
++	 */
++	down(&_client_count_sem);
++	if (_client_count == 0)
++		kcopyd_init();
++	_client_count++;
++
++	up(&_client_count_sem);
++}
++
++void kcopyd_dec_client_count(void)
++{
++	down(&_client_count_sem);
++	if (--_client_count == 0)
++		kcopyd_exit();
++
++	up(&_client_count_sem);
++}
+diff -ruN linux-2.4.20/drivers/md/kcopyd.h linux/drivers/md/kcopyd.h
+--- linux-2.4.20/drivers/md/kcopyd.h	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/kcopyd.h	Wed Mar 26 14:14:34 2003
+@@ -0,0 +1,101 @@
++/*
++ * Copyright (C) 2001 Sistina Software
++ *
++ * This file is released under the GPL.
++ */
++
++#ifndef DM_KCOPYD_H
++#define DM_KCOPYD_H
++
++/*
++ * Needed for the definition of offset_t.
++ */
++#include <linux/device-mapper.h>
++#include <linux/iobuf.h>
++
++struct kcopyd_region {
++	kdev_t dev;
++	sector_t sector;
++	sector_t count;
++};
++
++#define MAX_KCOPYD_PAGES 128
++
++struct kcopyd_job {
++	struct list_head list;
++
++	/*
++	 * Error state of the job.
++	 */
++	int err;
++
++	/*
++	 * Either READ or WRITE
++	 */
++	int rw;
++
++	/*
++	 * The source or destination for the transfer.
++	 */
++	struct kcopyd_region disk;
++
++	int nr_pages;
++	struct page *pages[MAX_KCOPYD_PAGES];
++
++	/*
++	 * Shifts and masks that will be useful when dispatching
++	 * each buffer_head.
++	 */
++	sector_t offset;
++	sector_t block_size;
++	sector_t block_shift;
++	sector_t bpp_shift;	/* blocks per page */
++	sector_t bpp_mask;
++
++	/*
++	 * nr_blocks is how many buffer heads will have to be
++	 * displatched to service this job, nr_requested is how
++	 * many have been dispatched and nr_complete is how many
++	 * have come back.
++	 */
++	unsigned int nr_blocks;
++	atomic_t nr_requested;
++	atomic_t nr_incomplete;
++
++	/*
++	 * Set this to ensure you are notified when the job has
++	 * completed.  'context' is for callback to use.
++	 */
++	void (*callback) (struct kcopyd_job * job);
++	void *context;
++};
++
++/*
++ * Low level async io routines.
++ */
++struct kcopyd_job *kcopyd_alloc_job(void);
++void kcopyd_free_job(struct kcopyd_job *job);
++
++int kcopyd_queue_job(struct kcopyd_job *job);
++
++/*
++ * Submit a copy job to kcopyd.  This is built on top of the
++ * previous three fns.
++ */
++typedef void (*kcopyd_notify_fn) (int err, void *context);
++
++int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to,
++		kcopyd_notify_fn fn, void *context);
++
++int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages,
++		       struct page **pages, int offset, kcopyd_notify_fn fn,
++		       void *context);
++
++/*
++ * We only want kcopyd to reserve resources if someone is
++ * actually using it.
++ */
++void kcopyd_inc_client_count(void);
++void kcopyd_dec_client_count(void);
++
++#endif
+diff -ruN linux-2.4.20/include/linux/device-mapper.h linux/include/linux/device-mapper.h
+--- linux-2.4.20/include/linux/device-mapper.h	Thu Jan  1 01:00:00 1970
++++ linux/include/linux/device-mapper.h	Wed Mar 26 14:14:55 2003
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DEVICE_MAPPER_H
++#define _LINUX_DEVICE_MAPPER_H
++
++typedef unsigned long sector_t;
++
++struct dm_target;
++struct dm_table;
++struct dm_dev;
++
++typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
++
++/*
++ * In the constructor the target parameter will already have the
++ * table, type, begin and len fields filled in.
++ */
++typedef int (*dm_ctr_fn) (struct dm_target * target, int argc, char **argv);
++
++/*
++ * The destructor doesn't need to free the dm_target, just
++ * anything hidden ti->private.
++ */
++typedef void (*dm_dtr_fn) (struct dm_target * ti);
++
++/*
++ * The map function must return:
++ * < 0: error
++ * = 0: The target will handle the io by resubmitting it later
++ * > 0: simple remap complete
++ */
++typedef int (*dm_map_fn) (struct dm_target * ti, struct buffer_head * bh,
++			  int rw, void **map_context);
++
++/*
++ * Returns:
++ * < 0 : error (currently ignored)
++ * 0   : ended successfully
++ * 1   : for some reason the io has still not completed (eg,
++ *       multipath target might want to requeue a failed io).
++ */
++typedef int (*dm_endio_fn) (struct dm_target * ti,
++			    struct buffer_head * bh, int rw, int error,
++			    void *map_context);
++typedef int (*dm_status_fn) (struct dm_target * ti, status_type_t status_type,
++			     char *result, int maxlen);
++
++void dm_error(const char *message);
++
++/*
++ * Constructors should call these functions to ensure destination devices
++ * are opened/closed correctly.
++ * FIXME: too many arguments.
++ */
++int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
++		  sector_t len, int mode, struct dm_dev **result);
++void dm_put_device(struct dm_target *ti, struct dm_dev *d);
++
++/*
++ * Information about a target type
++ */
++struct target_type {
++	const char *name;
++	struct module *module;
++	dm_ctr_fn ctr;
++	dm_dtr_fn dtr;
++	dm_map_fn map;
++	dm_endio_fn end_io;
++	dm_status_fn status;
++};
++
++struct dm_target {
++	struct dm_table *table;
++	struct target_type *type;
++
++	/* target limits */
++	sector_t begin;
++	sector_t len;
++
++	/* target specific data */
++	void *private;
++
++	/* Used to provide an error string from the ctr */
++	char *error;
++};
++
++int dm_register_target(struct target_type *t);
++int dm_unregister_target(struct target_type *t);
++
++#endif				/* _LINUX_DEVICE_MAPPER_H */
+diff -ruN linux-2.4.20/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h
+--- linux-2.4.20/include/linux/dm-ioctl.h	Thu Jan  1 01:00:00 1970
++++ linux/include/linux/dm-ioctl.h	Wed Mar 26 13:30:30 2003
+@@ -0,0 +1,149 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DM_IOCTL_H
++#define _LINUX_DM_IOCTL_H
++
++#include <linux/types.h>
++
++#define DM_DIR "mapper"		/* Slashes not supported */
++#define DM_MAX_TYPE_NAME 16
++#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
++
++/*
++ * Implements a traditional ioctl interface to the device mapper.
++ */
++
++/*
++ * All ioctl arguments consist of a single chunk of memory, with
++ * this structure at the start.  If a uuid is specified any
++ * lookup (eg. for a DM_INFO) will be done on that, *not* the
++ * name.
++ */
++struct dm_ioctl {
++	/*
++	 * The version number is made up of three parts:
++	 * major - no backward or forward compatibility,
++	 * minor - only backwards compatible,
++	 * patch - both backwards and forwards compatible.
++	 *
++	 * All clients of the ioctl interface should fill in the
++	 * version number of the interface that they were
++	 * compiled with.
++	 *
++	 * All recognised ioctl commands (ie. those that don't
++	 * return -ENOTTY) fill out this field, even if the
++	 * command failed.
++	 */
++	uint32_t version[3];	/* in/out */
++	uint32_t data_size;	/* total size of data passed in
++				 * including this struct */
++
++	uint32_t data_start;	/* offset to start of data
++				 * relative to start of this struct */
++
++	uint32_t target_count;	/* in/out */
++	uint32_t open_count;	/* out */
++	uint32_t flags;		/* in/out */
++
++	__kernel_dev_t dev;	/* in/out */
++
++	char name[DM_NAME_LEN];	/* device name */
++	char uuid[DM_UUID_LEN];	/* unique identifier for
++				 * the block device */
++};
++
++/*
++ * Used to specify tables.  These structures appear after the
++ * dm_ioctl.
++ */
++struct dm_target_spec {
++	int32_t status;		/* used when reading from kernel only */
++	uint64_t sector_start;
++	uint32_t length;
++
++	/*
++	 * Offset in bytes (from the start of this struct) to
++	 * next target_spec.
++	 */
++	uint32_t next;
++
++	char target_type[DM_MAX_TYPE_NAME];
++
++	/*
++	 * Parameter string starts immediately after this object.
++	 * Be careful to add padding after string to ensure correct
++	 * alignment of subsequent dm_target_spec.
++	 */
++};
++
++/*
++ * Used to retrieve the target dependencies.
++ */
++struct dm_target_deps {
++	uint32_t count;
++
++	__kernel_dev_t dev[0];	/* out */
++};
++
++/*
++ * If you change this make sure you make the corresponding change
++ * to dm-ioctl.c:lookup_ioctl()
++ */
++enum {
++	/* Top level cmds */
++	DM_VERSION_CMD = 0,
++	DM_REMOVE_ALL_CMD,
++
++	/* device level cmds */
++	DM_DEV_CREATE_CMD,
++	DM_DEV_REMOVE_CMD,
++	DM_DEV_RELOAD_CMD,
++	DM_DEV_RENAME_CMD,
++	DM_DEV_SUSPEND_CMD,
++	DM_DEV_DEPS_CMD,
++	DM_DEV_STATUS_CMD,
++
++	/* target level cmds */
++	DM_TARGET_STATUS_CMD,
++	DM_TARGET_WAIT_CMD
++};
++
++#define DM_IOCTL 0xfd
++
++#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
++#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
++
++#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
++#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
++#define DM_DEV_RELOAD    _IOWR(DM_IOCTL, DM_DEV_RELOAD_CMD, struct dm_ioctl)
++#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
++#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
++#define DM_DEV_DEPS      _IOWR(DM_IOCTL, DM_DEV_DEPS_CMD, struct dm_ioctl)
++#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
++
++#define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl)
++#define DM_TARGET_WAIT   _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl)
++
++#define DM_VERSION_MAJOR	1
++#define DM_VERSION_MINOR	0
++#define DM_VERSION_PATCHLEVEL	10
++#define DM_VERSION_EXTRA	"-ioctl (2003-03-26)"
++
++/* Status bits */
++#define DM_READONLY_FLAG	0x00000001
++#define DM_SUSPEND_FLAG		0x00000002
++#define DM_EXISTS_FLAG		0x00000004
++#define DM_PERSISTENT_DEV_FLAG	0x00000008
++
++/*
++ * Flag passed into ioctl STATUS command to get table information
++ * rather than current status.
++ */
++#define DM_STATUS_TABLE_FLAG	0x00000010
++
++#endif				/* _LINUX_DM_IOCTL_H */
diff --git a/patches/common/linux-2.4.20-makefile.patch b/patches/common/linux-2.4.20-makefile.patch
new file mode 100644
index 0000000..8ce64a1
--- /dev/null
+++ b/patches/common/linux-2.4.20-makefile.patch
@@ -0,0 +1,30 @@
+diff -ruN linux-2.4.20/drivers/md/Makefile linux/drivers/md/Makefile
+--- linux-2.4.20/drivers/md/Makefile	Fri Jan 10 16:34:50 2003
++++ linux/drivers/md/Makefile	Wed Mar 26 12:53:19 2003
+@@ -4,9 +4,12 @@
+ 
+ O_TARGET	:= mddev.o
+ 
+-export-objs	:= md.o xor.o
++export-objs	:= md.o xor.o dm-table.o dm-target.o kcopyd.o
+ list-multi	:= lvm-mod.o
+ lvm-mod-objs	:= lvm.o lvm-snap.o lvm-fs.o
++dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-ioctl.o \
++		   dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \
++		   kcopyd.o
+ 
+ # Note: link order is important.  All raid personalities
+ # and xor.o must come before md.o, as they each initialise 
+@@ -20,8 +23,12 @@
+ obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
+ obj-$(CONFIG_BLK_DEV_MD)	+= md.o
+ obj-$(CONFIG_BLK_DEV_LVM)	+= lvm-mod.o
++obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
+ 
+ include $(TOPDIR)/Rules.make
+ 
+ lvm-mod.o: $(lvm-mod-objs)
+ 	$(LD) -r -o $@ $(lvm-mod-objs)
++
++dm-mod.o: $(dm-mod-objs)
++	$(LD) -r -o $@ $(dm-mod-objs)
diff --git a/patches/common/linux-2.4.20-mempool.patch b/patches/common/linux-2.4.20-mempool.patch
new file mode 100644
index 0000000..7bbd29d
--- /dev/null
+++ b/patches/common/linux-2.4.20-mempool.patch
@@ -0,0 +1,356 @@
+diff -ruN linux-2.4.20/include/linux/mempool.h linux/include/linux/mempool.h
+--- linux-2.4.20/include/linux/mempool.h	Thu Jan  1 01:00:00 1970
++++ linux/include/linux/mempool.h	Wed Mar 26 12:53:48 2003
+@@ -0,0 +1,31 @@
++/*
++ * memory buffer pool support
++ */
++#ifndef _LINUX_MEMPOOL_H
++#define _LINUX_MEMPOOL_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct mempool_s;
++typedef struct mempool_s mempool_t;
++
++typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data);
++typedef void (mempool_free_t)(void *element, void *pool_data);
++
++extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
++				 mempool_free_t *free_fn, void *pool_data);
++extern int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask);
++extern void mempool_destroy(mempool_t *pool);
++extern void * mempool_alloc(mempool_t *pool, int gfp_mask);
++extern void mempool_free(void *element, mempool_t *pool);
++
++/*
++ * A mempool_alloc_t and mempool_free_t that get the memory from
++ * a slab that is passed in through pool_data.
++ */
++void *mempool_alloc_slab(int gfp_mask, void *pool_data);
++void mempool_free_slab(void *element, void *pool_data);
++
++
++#endif /* _LINUX_MEMPOOL_H */
+diff -ruN linux-2.4.20/mm/Makefile linux/mm/Makefile
+--- linux-2.4.20/mm/Makefile	Fri Jan 10 16:36:02 2003
++++ linux/mm/Makefile	Wed Mar 26 12:53:19 2003
+@@ -9,12 +9,12 @@
+ 
+ O_TARGET := mm.o
+ 
+-export-objs := shmem.o filemap.o memory.o page_alloc.o
++export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
+ 
+ obj-y	 := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
+ 	    vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
+ 	    page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
+-	    shmem.o
++	    shmem.o mempool.o
+ 
+ obj-$(CONFIG_HIGHMEM) += highmem.o
+ 
+diff -ruN linux-2.4.20/mm/mempool.c linux/mm/mempool.c
+--- linux-2.4.20/mm/mempool.c	Thu Jan  1 01:00:00 1970
++++ linux/mm/mempool.c	Wed Mar 26 12:53:48 2003
+@@ -0,0 +1,299 @@
++/*
++ *  linux/mm/mempool.c
++ *
++ *  memory buffer pool support. Such pools are mostly used
++ *  for guaranteed, deadlock-free memory allocations during
++ *  extreme VM load.
++ *
++ *  started by Ingo Molnar, Copyright (C) 2001
++ */
++
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <linux/module.h>
++#include <linux/mempool.h>
++
++struct mempool_s {
++	spinlock_t lock;
++	int min_nr;		/* nr of elements at *elements */
++	int curr_nr;		/* Current nr of elements at *elements */
++	void **elements;
++
++	void *pool_data;
++	mempool_alloc_t *alloc;
++	mempool_free_t *free;
++	wait_queue_head_t wait;
++};
++
++static void add_element(mempool_t *pool, void *element)
++{
++	BUG_ON(pool->curr_nr >= pool->min_nr);
++	pool->elements[pool->curr_nr++] = element;
++}
++
++static void *remove_element(mempool_t *pool)
++{
++	BUG_ON(pool->curr_nr <= 0);
++	return pool->elements[--pool->curr_nr];
++}
++
++static void free_pool(mempool_t *pool)
++{
++	while (pool->curr_nr) {
++		void *element = remove_element(pool);
++		pool->free(element, pool->pool_data);
++	}
++	kfree(pool->elements);
++	kfree(pool);
++}
++
++/**
++ * mempool_create - create a memory pool
++ * @min_nr:    the minimum number of elements guaranteed to be
++ *             allocated for this pool.
++ * @alloc_fn:  user-defined element-allocation function.
++ * @free_fn:   user-defined element-freeing function.
++ * @pool_data: optional private data available to the user-defined functions.
++ *
++ * this function creates and allocates a guaranteed size, preallocated
++ * memory pool. The pool can be used from the mempool_alloc and mempool_free
++ * functions. This function might sleep. Both the alloc_fn() and the free_fn()
++ * functions might sleep - as long as the mempool_alloc function is not called
++ * from IRQ contexts.
++ */
++mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
++				mempool_free_t *free_fn, void *pool_data)
++{
++	mempool_t *pool;
++
++	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
++	if (!pool)
++		return NULL;
++	memset(pool, 0, sizeof(*pool));
++	pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL);
++	if (!pool->elements) {
++		kfree(pool);
++		return NULL;
++	}
++	spin_lock_init(&pool->lock);
++	pool->min_nr = min_nr;
++	pool->pool_data = pool_data;
++	init_waitqueue_head(&pool->wait);
++	pool->alloc = alloc_fn;
++	pool->free = free_fn;
++
++	/*
++	 * First pre-allocate the guaranteed number of buffers.
++	 */
++	while (pool->curr_nr < pool->min_nr) {
++		void *element;
++
++		element = pool->alloc(GFP_KERNEL, pool->pool_data);
++		if (unlikely(!element)) {
++			free_pool(pool);
++			return NULL;
++		}
++		add_element(pool, element);
++	}
++	return pool;
++}
++
++/**
++ * mempool_resize - resize an existing memory pool
++ * @pool:       pointer to the memory pool which was allocated via
++ *              mempool_create().
++ * @new_min_nr: the new minimum number of elements guaranteed to be
++ *              allocated for this pool.
++ * @gfp_mask:   the usual allocation bitmask.
++ *
++ * This function shrinks/grows the pool. In the case of growing,
++ * it cannot be guaranteed that the pool will be grown to the new
++ * size immediately, but new mempool_free() calls will refill it.
++ *
++ * Note, the caller must guarantee that no mempool_destroy is called
++ * while this function is running. mempool_alloc() & mempool_free()
++ * might be called (eg. from IRQ contexts) while this function executes.
++ */
++int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask)
++{
++	void *element;
++	void **new_elements;
++	unsigned long flags;
++
++	BUG_ON(new_min_nr <= 0);
++
++	spin_lock_irqsave(&pool->lock, flags);
++	if (new_min_nr < pool->min_nr) {
++		while (pool->curr_nr > new_min_nr) {
++			element = remove_element(pool);
++			spin_unlock_irqrestore(&pool->lock, flags);
++			pool->free(element, pool->pool_data);
++			spin_lock_irqsave(&pool->lock, flags);
++		}
++		pool->min_nr = new_min_nr;
++		goto out_unlock;
++	}
++	spin_unlock_irqrestore(&pool->lock, flags);
++
++	/* Grow the pool */
++	new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask);
++	if (!new_elements)
++		return -ENOMEM;
++
++	spin_lock_irqsave(&pool->lock, flags);
++	memcpy(new_elements, pool->elements,
++			pool->curr_nr * sizeof(*new_elements));
++	kfree(pool->elements);
++	pool->elements = new_elements;
++	pool->min_nr = new_min_nr;
++
++	while (pool->curr_nr < pool->min_nr) {
++		spin_unlock_irqrestore(&pool->lock, flags);
++		element = pool->alloc(gfp_mask, pool->pool_data);
++		if (!element)
++			goto out;
++		spin_lock_irqsave(&pool->lock, flags);
++		if (pool->curr_nr < pool->min_nr)
++			add_element(pool, element);
++		else
++			kfree(element);		/* Raced */
++	}
++out_unlock:
++	spin_unlock_irqrestore(&pool->lock, flags);
++out:
++	return 0;
++}
++
++/**
++ * mempool_destroy - deallocate a memory pool
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ *
++ * this function only sleeps if the free_fn() function sleeps. The caller
++ * has to guarantee that all elements have been returned to the pool (ie:
++ * freed) prior to calling mempool_destroy().
++ */
++void mempool_destroy(mempool_t *pool)
++{
++	if (pool->curr_nr != pool->min_nr)
++		BUG();		/* There were outstanding elements */
++	free_pool(pool);
++}
++
++/**
++ * mempool_alloc - allocate an element from a specific memory pool
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ * @gfp_mask:  the usual allocation bitmask.
++ *
++ * this function only sleeps if the alloc_fn function sleeps or
++ * returns NULL. Note that due to preallocation, this function
++ * *never* fails when called from process contexts. (it might
++ * fail if called from an IRQ context.)
++ */
++void * mempool_alloc(mempool_t *pool, int gfp_mask)
++{
++	void *element;
++	unsigned long flags;
++	int curr_nr;
++	DECLARE_WAITQUEUE(wait, current);
++	int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
++
++repeat_alloc:
++	element = pool->alloc(gfp_nowait, pool->pool_data);
++	if (likely(element != NULL))
++		return element;
++
++	/*
++	 * If the pool is less than 50% full then try harder
++	 * to allocate an element:
++	 */
++	if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) {
++		element = pool->alloc(gfp_mask, pool->pool_data);
++		if (likely(element != NULL))
++			return element;
++	}
++
++	/*
++	 * Kick the VM at this point.
++	 */
++	wakeup_bdflush();
++
++	spin_lock_irqsave(&pool->lock, flags);
++	if (likely(pool->curr_nr)) {
++		element = remove_element(pool);
++		spin_unlock_irqrestore(&pool->lock, flags);
++		return element;
++	}
++	spin_unlock_irqrestore(&pool->lock, flags);
++
++	/* We must not sleep in the GFP_ATOMIC case */
++	if (gfp_mask == gfp_nowait)
++		return NULL;
++
++	run_task_queue(&tq_disk);
++
++	add_wait_queue_exclusive(&pool->wait, &wait);
++	set_task_state(current, TASK_UNINTERRUPTIBLE);
++
++	spin_lock_irqsave(&pool->lock, flags);
++	curr_nr = pool->curr_nr;
++	spin_unlock_irqrestore(&pool->lock, flags);
++
++	if (!curr_nr)
++		schedule();
++
++	current->state = TASK_RUNNING;
++	remove_wait_queue(&pool->wait, &wait);
++
++	goto repeat_alloc;
++}
++
++/**
++ * mempool_free - return an element to the pool.
++ * @element:   pool element pointer.
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ *
++ * this function only sleeps if the free_fn() function sleeps.
++ */
++void mempool_free(void *element, mempool_t *pool)
++{
++	unsigned long flags;
++
++	if (pool->curr_nr < pool->min_nr) {
++		spin_lock_irqsave(&pool->lock, flags);
++		if (pool->curr_nr < pool->min_nr) {
++			add_element(pool, element);
++			spin_unlock_irqrestore(&pool->lock, flags);
++			wake_up(&pool->wait);
++			return;
++		}
++		spin_unlock_irqrestore(&pool->lock, flags);
++	}
++	pool->free(element, pool->pool_data);
++}
++
++/*
++ * A commonly used alloc and free fn.
++ */
++void *mempool_alloc_slab(int gfp_mask, void *pool_data)
++{
++	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
++	return kmem_cache_alloc(mem, gfp_mask);
++}
++
++void mempool_free_slab(void *element, void *pool_data)
++{
++	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
++	kmem_cache_free(mem, element);
++}
++
++
++EXPORT_SYMBOL(mempool_create);
++EXPORT_SYMBOL(mempool_resize);
++EXPORT_SYMBOL(mempool_destroy);
++EXPORT_SYMBOL(mempool_alloc);
++EXPORT_SYMBOL(mempool_free);
++EXPORT_SYMBOL(mempool_alloc_slab);
++EXPORT_SYMBOL(mempool_free_slab);
diff --git a/patches/common/linux-2.4.20-vcalloc.patch b/patches/common/linux-2.4.20-vcalloc.patch
new file mode 100644
index 0000000..f088450
--- /dev/null
+++ b/patches/common/linux-2.4.20-vcalloc.patch
@@ -0,0 +1,49 @@
+diff -ruN linux-2.4.20/mm/vmalloc.c linux/mm/vmalloc.c
+--- linux-2.4.20/mm/vmalloc.c	Fri Jan 10 16:36:03 2003
++++ linux/mm/vmalloc.c	Wed Mar 26 12:53:19 2003
+@@ -327,3 +327,22 @@
+ 	read_unlock(&vmlist_lock);
+ 	return buf - buf_start;
+ }
++
++void *vcalloc(unsigned long nmemb, unsigned long elem_size)
++{
++	unsigned long size;
++	void *addr;
++
++	/*
++	 * Check that we're not going to overflow.
++	 */
++	if (nmemb > (ULONG_MAX / elem_size))
++		return NULL;
++
++	size = nmemb * elem_size;
++	addr = vmalloc(size);
++	if (addr)
++		memset(addr, 0, size);
++
++	return addr;
++}
+diff -ruN linux-2.4.20/include/linux/vmalloc.h linux/include/linux/vmalloc.h
+--- linux-2.4.20/include/linux/vmalloc.h	Fri Jan 10 16:35:58 2003
++++ linux/include/linux/vmalloc.h	Wed Mar 26 12:53:19 2003
+@@ -26,6 +26,7 @@
+ extern void vmfree_area_pages(unsigned long address, unsigned long size);
+ extern int vmalloc_area_pages(unsigned long address, unsigned long size,
+                               int gfp_mask, pgprot_t prot);
++extern void *vcalloc(unsigned long nmemb, unsigned long elem_size);
+ 
+ /*
+  *	Allocate any pages
+diff -ruN linux-2.4.20/kernel/ksyms.c linux/kernel/ksyms.c
+--- linux-2.4.20/kernel/ksyms.c	Fri Jan 10 16:36:02 2003
++++ linux/kernel/ksyms.c	Wed Mar 26 12:53:19 2003
+@@ -111,6 +111,7 @@
+ EXPORT_SYMBOL(vfree);
+ EXPORT_SYMBOL(__vmalloc);
+ EXPORT_SYMBOL(vmalloc_to_page);
++EXPORT_SYMBOL(vcalloc);
+ EXPORT_SYMBOL(mem_map);
+ EXPORT_SYMBOL(remap_page_range);
+ EXPORT_SYMBOL(max_mapnr);
+
diff --git a/patches/linux-2.4.20-VFS-lock.patch b/patches/linux-2.4.20-VFS-lock.patch
new file mode 100644
index 0000000..33cffae
--- /dev/null
+++ b/patches/linux-2.4.20-VFS-lock.patch
@@ -0,0 +1,243 @@
+diff -ruN linux-2.4.20/drivers/md/lvm.c linux/drivers/md/lvm.c
+--- linux-2.4.20/drivers/md/lvm.c	Fri Jan 10 16:34:50 2003
++++ linux/drivers/md/lvm.c	Wed Mar 26 15:36:34 2003
+@@ -229,9 +229,6 @@
+ #define DEVICE_OFF(device)
+ #define LOCAL_END_REQUEST
+ 
+-/* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */
+-/* #define	LVM_VFS_ENHANCEMENT */
+-
+ #include <linux/config.h>
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+@@ -2171,12 +2168,8 @@
+ 	if (lv_ptr->lv_access & LV_SNAPSHOT) {
+ 		lv_t *org = lv_ptr->lv_snapshot_org, *last;
+ 
+-		/* sync the original logical volume */
+-		fsync_dev(org->lv_dev);
+-#ifdef	LVM_VFS_ENHANCEMENT
+ 		/* VFS function call to sync and lock the filesystem */
+ 		fsync_dev_lockfs(org->lv_dev);
+-#endif
+ 
+ 		down_write(&org->lv_lock);
+ 		org->lv_access |= LV_SNAPSHOT_ORG;
+@@ -2201,11 +2194,9 @@
+ 	else
+ 		set_device_ro(lv_ptr->lv_dev, 1);
+ 
+-#ifdef	LVM_VFS_ENHANCEMENT
+ /* VFS function call to unlock the filesystem */
+ 	if (lv_ptr->lv_access & LV_SNAPSHOT)
+ 		unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
+-#endif
+ 
+ 	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
+ 	    lvm_fs_create_lv(vg_ptr, lv_ptr);
+diff -ruN linux-2.4.20/fs/buffer.c linux/fs/buffer.c
+--- linux-2.4.20/fs/buffer.c	Fri Jan 10 16:35:24 2003
++++ linux/fs/buffer.c	Wed Mar 26 15:36:34 2003
+@@ -361,6 +361,38 @@
+ 	fsync_dev(dev);
+ }
+ 
++int fsync_dev_lockfs(kdev_t dev)
++{
++	/* you are not allowed to try locking all the filesystems
++	** on the system, your chances of getting through without
++	** total deadlock are slim to none.
++	*/
++	if (!dev)
++		return fsync_dev(dev) ;
++
++	sync_buffers(dev, 0);
++
++	lock_kernel();
++	/* note, the FS might need to start transactions to 
++	** sync the inodes, or the quota, no locking until
++	** after these are done
++	*/
++	sync_inodes(dev);
++#ifdef DQUOT_SYNC_DEV
++	DQUOT_SYNC_DEV(dev);
++#else
++	DQUOT_SYNC(dev);
++#endif
++	/* if inodes or quotas could be dirtied during the
++	** sync_supers_lockfs call, the FS is responsible for getting
++	** them on disk, without deadlocking against the lock
++	*/
++	sync_supers_lockfs(dev) ;
++	unlock_kernel();
++
++	return sync_buffers(dev, 1) ;
++}
++
+ asmlinkage long sys_sync(void)
+ {
+ 	fsync_dev(0);
+diff -ruN linux-2.4.20/fs/reiserfs/super.c linux/fs/reiserfs/super.c
+--- linux-2.4.20/fs/reiserfs/super.c	Fri Jan 10 16:35:32 2003
++++ linux/fs/reiserfs/super.c	Wed Mar 26 15:36:34 2003
+@@ -44,7 +44,7 @@
+     reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
+     journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s));
+     reiserfs_block_writes(&th) ;
+-    journal_end(&th, s, 1) ;
++    journal_end_sync(&th, s, 1) ;
+   }
+   s->s_dirt = dirty;
+   unlock_kernel() ;
+diff -ruN linux-2.4.20/fs/super.c linux/fs/super.c
+--- linux-2.4.20/fs/super.c	Fri Jan 10 16:35:25 2003
++++ linux/fs/super.c	Wed Mar 26 15:36:34 2003
+@@ -37,6 +37,13 @@
+ LIST_HEAD(super_blocks);
+ spinlock_t sb_lock = SPIN_LOCK_UNLOCKED;
+ 
++/* 
++ * lock/unlockfs grab a read lock on s_umount, but you need this lock to 
++ * make sure no lockfs runs are in progress before inserting/removing 
++ * supers from the list.  
++ */
++static DECLARE_MUTEX(lockfs_sem);
++
+ /*
+  * Handling of filesystem drivers list.
+  * Rules:
+@@ -431,6 +438,19 @@
+ 	put_super(sb);
+ }
+ 
++static void write_super_lockfs(struct super_block *sb)
++{
++	lock_super(sb);
++	if (sb->s_root && sb->s_op) {
++		if (sb->s_dirt && sb->s_op->write_super)
++			sb->s_op->write_super(sb);
++		if (sb->s_op->write_super_lockfs) {
++			sb->s_op->write_super_lockfs(sb);
++		}
++	}
++	unlock_super(sb);
++}
++
+ static inline void write_super(struct super_block *sb)
+ {
+ 	lock_super(sb);
+@@ -474,6 +494,39 @@
+ 	spin_unlock(&sb_lock);
+ }
+ 
++/*
++ * Note: don't check the dirty flag before waiting, we want the lock
++ * to happen every time this is called.  dev must be non-zero
++ */
++void sync_supers_lockfs(kdev_t dev)
++{
++	struct super_block * sb;
++
++	down(&lockfs_sem) ;
++	if (dev) {
++		sb = get_super(dev);
++		if (sb) {
++			write_super_lockfs(sb);
++			drop_super(sb);
++		}
++	}
++}
++
++void unlockfs(kdev_t dev)
++{
++	struct super_block * sb;
++
++	if (dev) {
++		sb = get_super(dev);
++		if (sb) {
++			if (sb->s_op && sb->s_op->unlockfs)
++				sb->s_op->unlockfs(sb) ;
++			drop_super(sb);
++		}
++	}
++	up(&lockfs_sem) ;
++}
++
+ /**
+  *	get_super	-	get the superblock of a device
+  *	@dev: device to get the superblock for
+@@ -693,6 +746,7 @@
+ 		goto out1;
+ 
+ 	error = -EBUSY;
++	down(&lockfs_sem);
+ restart:
+ 	spin_lock(&sb_lock);
+ 
+@@ -704,6 +758,7 @@
+ 		    ((flags ^ old->s_flags) & MS_RDONLY)) {
+ 			spin_unlock(&sb_lock);
+ 			destroy_super(s);
++			up(&lockfs_sem);
+ 			goto out1;
+ 		}
+ 		if (!grab_super(old))
+@@ -711,12 +766,14 @@
+ 		destroy_super(s);
+ 		blkdev_put(bdev, BDEV_FS);
+ 		path_release(&nd);
++		up(&lockfs_sem);
+ 		return old;
+ 	}
+ 	s->s_dev = dev;
+ 	s->s_bdev = bdev;
+ 	s->s_flags = flags;
+ 	insert_super(s, fs_type);
++	up(&lockfs_sem);
+ 	if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0))
+ 		goto Einval;
+ 	s->s_flags |= MS_ACTIVE;
+@@ -824,7 +881,10 @@
+ 	if (!deactivate_super(sb))
+ 		return;
+ 
++	down(&lockfs_sem);
+ 	down_write(&sb->s_umount);
++	up(&lockfs_sem);
++
+ 	sb->s_root = NULL;
+ 	/* Need to clean after the sucker */
+ 	if (fs->fs_flags & FS_LITTER)
+diff -ruN linux-2.4.20/include/linux/fs.h linux/include/linux/fs.h
+--- linux-2.4.20/include/linux/fs.h	Fri Jan 10 16:35:55 2003
++++ linux/include/linux/fs.h	Wed Mar 26 15:36:34 2003
+@@ -1225,6 +1225,7 @@
+ extern int sync_buffers(kdev_t, int);
+ extern void sync_dev(kdev_t);
+ extern int fsync_dev(kdev_t);
++extern int fsync_dev_lockfs(kdev_t);
+ extern int fsync_super(struct super_block *);
+ extern int fsync_no_super(kdev_t);
+ extern void sync_inodes_sb(struct super_block *);
+@@ -1241,6 +1242,8 @@
+ extern int filemap_fdatasync(struct address_space *);
+ extern int filemap_fdatawait(struct address_space *);
+ extern void sync_supers(kdev_t);
++extern void sync_supers_lockfs(kdev_t);
++extern void unlockfs(kdev_t);
+ extern int bmap(struct inode *, int);
+ extern int notify_change(struct dentry *, struct iattr *);
+ extern int permission(struct inode *, int);
+diff -ruN linux-2.4.20/kernel/ksyms.c linux/kernel/ksyms.c
+--- linux-2.4.20/kernel/ksyms.c	Fri Jan 10 16:36:02 2003
++++ linux/kernel/ksyms.c	Wed Mar 26 15:36:34 2003
+@@ -184,6 +184,8 @@
+ EXPORT_SYMBOL(invalidate_inode_pages);
+ EXPORT_SYMBOL(truncate_inode_pages);
+ EXPORT_SYMBOL(fsync_dev);
++EXPORT_SYMBOL(fsync_dev_lockfs);
++EXPORT_SYMBOL(unlockfs);
+ EXPORT_SYMBOL(fsync_no_super);
+ EXPORT_SYMBOL(permission);
+ EXPORT_SYMBOL(vfs_permission);
diff --git a/patches/linux-2.4.20-devmapper-ioctl.patch b/patches/linux-2.4.20-devmapper-ioctl.patch
new file mode 100644
index 0000000..523b8ba
--- /dev/null
+++ b/patches/linux-2.4.20-devmapper-ioctl.patch
@@ -0,0 +1,7546 @@
+diff -ruN linux-2.4.20/Documentation/Configure.help linux/Documentation/Configure.help
+--- linux-2.4.20/Documentation/Configure.help	Fri Jan 10 16:33:55 2003
++++ linux/Documentation/Configure.help	Wed Mar 26 12:53:19 2003
+@@ -1822,6 +1822,20 @@
+   want), say M here and read <file:Documentation/modules.txt>.  The
+   module will be called lvm-mod.o.
+ 
++Device-mapper support
++CONFIG_BLK_DEV_DM
++  Device-mapper is a low level volume manager.  It works by allowing
++  people to specify mappings for ranges of logical sectors.  Various
++  mapping types are available, in addition people may write their own
++  modules containing custom mappings if they wish.
++
++  Higher level volume managers such as LVM2 use this driver.
++
++  If you want to compile this as a module, say M here and read 
++  <file:Documentation/modules.txt>.  The module will be called dm-mod.o.
++
++  If unsure, say N.
++
+ Multiple devices driver support (RAID and LVM)
+ CONFIG_MD
+   Support multiple physical spindles through a single logical device.
+diff -ruN linux-2.4.20/MAINTAINERS linux/MAINTAINERS
+--- linux-2.4.20/MAINTAINERS	Fri Jan 10 16:33:49 2003
++++ linux/MAINTAINERS	Wed Mar 26 12:53:19 2003
+@@ -439,6 +439,13 @@
+ W:	http://www.debian.org/~dz/i8k/
+ S:	Maintained
+ 
++DEVICE MAPPER
++P:	Joe Thornber
++M:	dm@uk.sistina.com
++L:	linux-LVM@sistina.com
++W:	http://www.sistina.com/lvm
++S:	Maintained
++
+ DEVICE NUMBER REGISTRY
+ P:	H. Peter Anvin
+ M:	hpa@zytor.com
+diff -ruN linux-2.4.20/arch/mips64/kernel/ioctl32.c linux/arch/mips64/kernel/ioctl32.c
+--- linux-2.4.20/arch/mips64/kernel/ioctl32.c	Fri Jan 10 16:34:18 2003
++++ linux/arch/mips64/kernel/ioctl32.c	Wed Mar 26 13:37:24 2003
+@@ -33,6 +33,7 @@
+ #include <linux/auto_fs.h>
+ #include <linux/ext2_fs.h>
+ #include <linux/raid/md_u.h>
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ #undef __KERNEL__		/* This file was born to be ugly ...  */
+@@ -914,6 +915,20 @@
+ 	IOCTL32_DEFAULT(STOP_ARRAY_RO),
+ 	IOCTL32_DEFAULT(RESTART_ARRAY_RW),
+ #endif /* CONFIG_MD */
++
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++	IOCTL32_DEFAULT(DM_VERSION),
++	IOCTL32_DEFAULT(DM_REMOVE_ALL),
++	IOCTL32_DEFAULT(DM_DEV_CREATE),
++	IOCTL32_DEFAULT(DM_DEV_REMOVE),
++	IOCTL32_DEFAULT(DM_DEV_RELOAD),
++	IOCTL32_DEFAULT(DM_DEV_SUSPEND),
++	IOCTL32_DEFAULT(DM_DEV_RENAME),
++	IOCTL32_DEFAULT(DM_DEV_DEPS),
++	IOCTL32_DEFAULT(DM_DEV_STATUS),
++	IOCTL32_DEFAULT(DM_TARGET_STATUS),
++	IOCTL32_DEFAULT(DM_TARGET_WAIT),
++#endif /* CONFIG_BLK_DEV_DM */
+ 
+ 	IOCTL32_DEFAULT(MTIOCTOP),			/* mtio.h ioctls  */
+ 	IOCTL32_HANDLER(MTIOCGET32, mt_ioctl_trans),
+diff -ruN linux-2.4.20/arch/parisc/kernel/ioctl32.c linux/arch/parisc/kernel/ioctl32.c
+--- linux-2.4.20/arch/parisc/kernel/ioctl32.c	Fri Jan 10 16:34:19 2003
++++ linux/arch/parisc/kernel/ioctl32.c	Wed Mar 26 14:28:37 2003
+@@ -55,6 +55,7 @@
+ #define max max */
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -3415,6 +3416,20 @@
+ COMPATIBLE_IOCTL(LV_BMAP)
+ COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
+ #endif /* LVM */
++/* Device-Mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_DEV_RELOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_DEV_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_WAIT)
++#endif /* CONFIG_BLK_DEV_DM */
+ #if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
+ COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
+ COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
+diff -ruN linux-2.4.20/arch/ppc64/kernel/ioctl32.c linux/arch/ppc64/kernel/ioctl32.c
+--- linux-2.4.20/arch/ppc64/kernel/ioctl32.c	Fri Jan 10 16:34:24 2003
++++ linux/arch/ppc64/kernel/ioctl32.c	Wed Mar 26 14:31:43 2003
+@@ -66,6 +66,7 @@
+ #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -4362,6 +4363,20 @@
+ COMPATIBLE_IOCTL(NBD_PRINT_DEBUG),
+ COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS),
+ COMPATIBLE_IOCTL(NBD_DISCONNECT),
++/* device-mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION),
++COMPATIBLE_IOCTL(DM_REMOVE_ALL),
++COMPATIBLE_IOCTL(DM_DEV_CREATE),
++COMPATIBLE_IOCTL(DM_DEV_REMOVE),
++COMPATIBLE_IOCTL(DM_DEV_RELOAD),
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND),
++COMPATIBLE_IOCTL(DM_DEV_RENAME),
++COMPATIBLE_IOCTL(DM_DEV_DEPS),
++COMPATIBLE_IOCTL(DM_DEV_STATUS),
++COMPATIBLE_IOCTL(DM_TARGET_STATUS),
++COMPATIBLE_IOCTL(DM_TARGET_WAIT),
++#endif /* CONFIG_BLK_DEV_DM */
+ /* Remove *PRIVATE in 2.5 */
+ COMPATIBLE_IOCTL(SIOCDEVPRIVATE),
+ COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1),
+diff -ruN linux-2.4.20/arch/s390x/kernel/ioctl32.c linux/arch/s390x/kernel/ioctl32.c
+--- linux-2.4.20/arch/s390x/kernel/ioctl32.c	Fri Jan 10 16:34:26 2003
++++ linux/arch/s390x/kernel/ioctl32.c	Wed Mar 26 13:36:43 2003
+@@ -25,6 +25,7 @@
+ #include <linux/ext2_fs.h>
+ #include <linux/hdreg.h>
+ #include <linux/if_bonding.h>
++#include <linux/dm-ioctl.h>
+ #include <asm/types.h>
+ #include <asm/uaccess.h>
+ #include <asm/dasd.h>
+@@ -507,6 +508,18 @@
+ 	IOCTL32_DEFAULT(VT_UNLOCKSWITCH),
+ 
+ 	IOCTL32_DEFAULT(SIOCGSTAMP),
++
++	IOCTL32_DEFAULT(DM_VERSION),
++	IOCTL32_DEFAULT(DM_REMOVE_ALL),
++	IOCTL32_DEFAULT(DM_DEV_CREATE),
++	IOCTL32_DEFAULT(DM_DEV_REMOVE),
++	IOCTL32_DEFAULT(DM_DEV_RELOAD),
++	IOCTL32_DEFAULT(DM_DEV_SUSPEND),
++	IOCTL32_DEFAULT(DM_DEV_RENAME),
++	IOCTL32_DEFAULT(DM_DEV_DEPS),
++	IOCTL32_DEFAULT(DM_DEV_STATUS),
++	IOCTL32_DEFAULT(DM_TARGET_STATUS),
++	IOCTL32_DEFAULT(DM_TARGET_WAIT),
+ 
+ 	IOCTL32_HANDLER(SIOCGIFNAME, dev_ifname32),
+ 	IOCTL32_HANDLER(SIOCGIFCONF, dev_ifconf),
+diff -ruN linux-2.4.20/arch/sparc64/kernel/ioctl32.c linux/arch/sparc64/kernel/ioctl32.c
+--- linux-2.4.20/arch/sparc64/kernel/ioctl32.c	Fri Jan 10 16:34:30 2003
++++ linux/arch/sparc64/kernel/ioctl32.c	Wed Mar 26 14:32:03 2003
+@@ -55,6 +55,7 @@
+ #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -5023,6 +5024,21 @@
+ COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
+ COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS)
+ COMPATIBLE_IOCTL(NBD_DISCONNECT)
++/* device-mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_DEV_RELOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_DEV_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_WAIT)
++#endif /* CONFIG_BLK_DEV_DM */
++
+ /* And these ioctls need translation */
+ HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
+ HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
+diff -ruN linux-2.4.20/arch/x86_64/ia32/ia32_ioctl.c linux/arch/x86_64/ia32/ia32_ioctl.c
+--- linux-2.4.20/arch/x86_64/ia32/ia32_ioctl.c	Fri Jan 10 16:34:32 2003
++++ linux/arch/x86_64/ia32/ia32_ioctl.c	Wed Mar 26 14:29:31 2003
+@@ -62,6 +62,7 @@
+ #define max max
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -3776,6 +3777,20 @@
+ COMPATIBLE_IOCTL(LV_BMAP)
+ COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
+ #endif /* LVM */
++/* Device-Mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_DEV_RELOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_DEV_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_STATUS)
++COMPATIBLE_IOCTL(DM_TARGET_WAIT)
++#endif /* CONFIG_BLK_DEV_DM */
+ #if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
+ COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
+ COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
+diff -ruN linux-2.4.20/drivers/md/Config.in linux/drivers/md/Config.in
+--- linux-2.4.20/drivers/md/Config.in	Fri Jan 10 16:34:50 2003
++++ linux/drivers/md/Config.in	Wed Mar 26 12:53:19 2003
+@@ -14,5 +14,8 @@
+ dep_tristate '  Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
+ 
+ dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++   dep_tristate ' Device-mapper support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM $CONFIG_MD
++fi
+ 
+ endmenu
+diff -ruN linux-2.4.20/drivers/md/Makefile linux/drivers/md/Makefile
+--- linux-2.4.20/drivers/md/Makefile	Fri Jan 10 16:34:50 2003
++++ linux/drivers/md/Makefile	Wed Mar 26 12:53:19 2003
+@@ -4,9 +4,12 @@
+ 
+ O_TARGET	:= mddev.o
+ 
+-export-objs	:= md.o xor.o
++export-objs	:= md.o xor.o dm-table.o dm-target.o kcopyd.o
+ list-multi	:= lvm-mod.o
+ lvm-mod-objs	:= lvm.o lvm-snap.o lvm-fs.o
++dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-ioctl.o \
++		   dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \
++		   kcopyd.o
+ 
+ # Note: link order is important.  All raid personalities
+ # and xor.o must come before md.o, as they each initialise 
+@@ -20,8 +23,12 @@
+ obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
+ obj-$(CONFIG_BLK_DEV_MD)	+= md.o
+ obj-$(CONFIG_BLK_DEV_LVM)	+= lvm-mod.o
++obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
+ 
+ include $(TOPDIR)/Rules.make
+ 
+ lvm-mod.o: $(lvm-mod-objs)
+ 	$(LD) -r -o $@ $(lvm-mod-objs)
++
++dm-mod.o: $(dm-mod-objs)
++	$(LD) -r -o $@ $(dm-mod-objs)
+diff -ruN linux-2.4.20/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c
+--- linux-2.4.20/drivers/md/dm-exception-store.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-exception-store.c	Wed Mar 26 14:21:16 2003
+@@ -0,0 +1,704 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm-snapshot.h"
++#include "kcopyd.h"
++
++#include <linux/mm.h>
++#include <linux/pagemap.h>
++#include <linux/vmalloc.h>
++#include <linux/slab.h>
++
++/*-----------------------------------------------------------------
++ * Persistent snapshots, by persistent we mean that the snapshot
++ * will survive a reboot.
++ *---------------------------------------------------------------*/
++
++/*
++ * We need to store a record of which parts of the origin have
++ * been copied to the snapshot device.  The snapshot code
++ * requires that we copy exception chunks to chunk aligned areas
++ * of the COW store.  It makes sense therefore, to store the
++ * metadata in chunk size blocks.
++ *
++ * There is no backward or forward compatibility implemented,
++ * snapshots with different disk versions than the kernel will
++ * not be usable.  It is expected that "lvcreate" will blank out
++ * the start of a fresh COW device before calling the snapshot
++ * constructor.
++ *
++ * The first chunk of the COW device just contains the header.
++ * After this there is a chunk filled with exception metadata,
++ * followed by as many exception chunks as can fit in the
++ * metadata areas.
++ *
++ * All on disk structures are in little-endian format.  The end
++ * of the exceptions info is indicated by an exception with a
++ * new_chunk of 0, which is invalid since it would point to the
++ * header chunk.
++ */
++
++/*
++ * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
++ */
++#define SNAP_MAGIC 0x70416e53
++
++/*
++ * The on-disk version of the metadata.
++ */
++#define SNAPSHOT_DISK_VERSION 1
++
++struct disk_header {
++	uint32_t magic;
++
++	/*
++	 * Is this snapshot valid.  There is no way of recovering
++	 * an invalid snapshot.
++	 */
++	int valid;
++
++	/*
++	 * Simple, incrementing version. no backward
++	 * compatibility.
++	 */
++	uint32_t version;
++
++	/* In sectors */
++	uint32_t chunk_size;
++};
++
++struct disk_exception {
++	uint64_t old_chunk;
++	uint64_t new_chunk;
++};
++
++struct commit_callback {
++	void (*callback) (void *, int success);
++	void *context;
++};
++
++/*
++ * The top level structure for a persistent exception store.
++ */
++struct pstore {
++	struct dm_snapshot *snap;	/* up pointer to my snapshot */
++	int version;
++	int valid;
++	uint32_t chunk_size;
++	uint32_t exceptions_per_area;
++
++	/*
++	 * Now that we have an asynchronous kcopyd there is no
++	 * need for large chunk sizes, so it wont hurt to have a
++	 * whole chunks worth of metadata in memory at once.
++	 */
++	void *area;
++	struct kiobuf *iobuf;
++
++	/*
++	 * Used to keep track of which metadata area the data in
++	 * 'chunk' refers to.
++	 */
++	uint32_t current_area;
++
++	/*
++	 * The next free chunk for an exception.
++	 */
++	uint32_t next_free;
++
++	/*
++	 * The index of next free exception in the current
++	 * metadata area.
++	 */
++	uint32_t current_committed;
++
++	atomic_t pending_count;
++	uint32_t callback_count;
++	struct commit_callback *callbacks;
++};
++
++/*
++ * For performance reasons we want to defer writing a committed
++ * exceptions metadata to disk so that we can amortise away this
++ * exensive operation.
++ *
++ * For the initial version of this code we will remain with
++ * synchronous io.  There are some deadlock issues with async
++ * that I haven't yet worked out.
++ */
++static int do_io(int rw, struct kcopyd_region *where, struct kiobuf *iobuf)
++{
++	int i, sectors_per_block, nr_blocks, start;
++	int blocksize = get_hardsect_size(where->dev);
++	int status;
++
++	sectors_per_block = blocksize / SECTOR_SIZE;
++
++	nr_blocks = where->count / sectors_per_block;
++	start = where->sector / sectors_per_block;
++
++	for (i = 0; i < nr_blocks; i++)
++		iobuf->blocks[i] = start++;
++
++	iobuf->length = where->count << 9;
++	iobuf->locked = 1;
++
++	status = brw_kiovec(rw, 1, &iobuf, where->dev, iobuf->blocks,
++			    blocksize);
++	if (status != (where->count << 9))
++		return -EIO;
++
++	return 0;
++}
++
++static int allocate_iobuf(struct pstore *ps)
++{
++	size_t i, r = -ENOMEM, len, nr_pages;
++	struct page *page;
++
++	len = ps->chunk_size << SECTOR_SHIFT;
++
++	/*
++	 * Allocate the chunk_size block of memory that will hold
++	 * a single metadata area.
++	 */
++	ps->area = vmalloc(len);
++	if (!ps->area)
++		return r;
++
++	if (alloc_kiovec(1, &ps->iobuf))
++		goto bad;
++
++	nr_pages = ps->chunk_size / (PAGE_SIZE / SECTOR_SIZE);
++	r = expand_kiobuf(ps->iobuf, nr_pages);
++	if (r)
++		goto bad;
++
++	/*
++	 * We lock the pages for ps->area into memory since they'll be
++	 * doing a lot of io.
++	 */
++	for (i = 0; i < nr_pages; i++) {
++		page = vmalloc_to_page(ps->area + (i * PAGE_SIZE));
++		LockPage(page);
++		ps->iobuf->maplist[i] = page;
++		ps->iobuf->nr_pages++;
++	}
++
++	ps->iobuf->nr_pages = nr_pages;
++	ps->iobuf->offset = 0;
++
++	return 0;
++
++      bad:
++	if (ps->iobuf)
++		free_kiovec(1, &ps->iobuf);
++
++	if (ps->area)
++		vfree(ps->area);
++	ps->iobuf = NULL;
++	return r;
++}
++
++static void free_iobuf(struct pstore *ps)
++{
++	int i;
++
++	for (i = 0; i < ps->iobuf->nr_pages; i++)
++		UnlockPage(ps->iobuf->maplist[i]);
++	ps->iobuf->locked = 0;
++
++	free_kiovec(1, &ps->iobuf);
++	vfree(ps->area);
++}
++
++/*
++ * Read or write a chunk aligned and sized block of data from a device.
++ */
++static int chunk_io(struct pstore *ps, uint32_t chunk, int rw)
++{
++	int r;
++	struct kcopyd_region where;
++
++	where.dev = ps->snap->cow->dev;
++	where.sector = ps->chunk_size * chunk;
++	where.count = ps->chunk_size;
++
++	r = do_io(rw, &where, ps->iobuf);
++	if (r)
++		return r;
++
++	return 0;
++}
++
++/*
++ * Read or write a metadata area.  Remembering to skip the first
++ * chunk which holds the header.
++ */
++static int area_io(struct pstore *ps, uint32_t area, int rw)
++{
++	int r;
++	uint32_t chunk;
++
++	/* convert a metadata area index to a chunk index */
++	chunk = 1 + ((ps->exceptions_per_area + 1) * area);
++
++	r = chunk_io(ps, chunk, rw);
++	if (r)
++		return r;
++
++	ps->current_area = area;
++	return 0;
++}
++
++static int zero_area(struct pstore *ps, uint32_t area)
++{
++	memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
++	return area_io(ps, area, WRITE);
++}
++
++static int read_header(struct pstore *ps, int *new_snapshot)
++{
++	int r;
++	struct disk_header *dh;
++
++	r = chunk_io(ps, 0, READ);
++	if (r)
++		return r;
++
++	dh = (struct disk_header *) ps->area;
++
++	if (dh->magic == 0) {
++		*new_snapshot = 1;
++
++	} else if (dh->magic == SNAP_MAGIC) {
++		*new_snapshot = 0;
++		ps->valid = dh->valid;
++		ps->version = dh->version;
++		ps->chunk_size = dh->chunk_size;
++
++	} else {
++		DMWARN("Invalid/corrupt snapshot");
++		r = -ENXIO;
++	}
++
++	return r;
++}
++
++static int write_header(struct pstore *ps)
++{
++	struct disk_header *dh;
++
++	memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
++
++	dh = (struct disk_header *) ps->area;
++	dh->magic = SNAP_MAGIC;
++	dh->valid = ps->valid;
++	dh->version = ps->version;
++	dh->chunk_size = ps->chunk_size;
++
++	return chunk_io(ps, 0, WRITE);
++}
++
++/*
++ * Access functions for the disk exceptions, these do the endian conversions.
++ */
++static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
++{
++	if (index >= ps->exceptions_per_area)
++		return NULL;
++
++	return ((struct disk_exception *) ps->area) + index;
++}
++
++static int read_exception(struct pstore *ps,
++			  uint32_t index, struct disk_exception *result)
++{
++	struct disk_exception *e;
++
++	e = get_exception(ps, index);
++	if (!e)
++		return -EINVAL;
++
++	/* copy it */
++	result->old_chunk = le64_to_cpu(e->old_chunk);
++	result->new_chunk = le64_to_cpu(e->new_chunk);
++
++	return 0;
++}
++
++static int write_exception(struct pstore *ps,
++			   uint32_t index, struct disk_exception *de)
++{
++	struct disk_exception *e;
++
++	e = get_exception(ps, index);
++	if (!e)
++		return -EINVAL;
++
++	/* copy it */
++	e->old_chunk = cpu_to_le64(de->old_chunk);
++	e->new_chunk = cpu_to_le64(de->new_chunk);
++
++	return 0;
++}
++
++/*
++ * Registers the exceptions that are present in the current area.
++ * 'full' is filled in to indicate if the area has been
++ * filled.
++ */
++static int insert_exceptions(struct pstore *ps, int *full)
++{
++	int i, r;
++	struct disk_exception de;
++
++	/* presume the area is full */
++	*full = 1;
++
++	for (i = 0; i < ps->exceptions_per_area; i++) {
++		r = read_exception(ps, i, &de);
++
++		if (r)
++			return r;
++
++		/*
++		 * If the new_chunk is pointing at the start of
++		 * the COW device, where the first metadata area
++		 * is we know that we've hit the end of the
++		 * exceptions.  Therefore the area is not full.
++		 */
++		if (de.new_chunk == 0LL) {
++			ps->current_committed = i;
++			*full = 0;
++			break;
++		}
++
++		/*
++		 * Keep track of the start of the free chunks.
++		 */
++		if (ps->next_free <= de.new_chunk)
++			ps->next_free = de.new_chunk + 1;
++
++		/*
++		 * Otherwise we add the exception to the snapshot.
++		 */
++		r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
++		if (r)
++			return r;
++	}
++
++	return 0;
++}
++
++static int read_exceptions(struct pstore *ps)
++{
++	uint32_t area;
++	int r, full = 1;
++
++	/*
++	 * Keeping reading chunks and inserting exceptions until
++	 * we find a partially full area.
++	 */
++	for (area = 0; full; area++) {
++		r = area_io(ps, area, READ);
++		if (r)
++			return r;
++
++		r = insert_exceptions(ps, &full);
++		if (r)
++			return r;
++
++		area++;
++	}
++
++	return 0;
++}
++
++static inline struct pstore *get_info(struct exception_store *store)
++{
++	return (struct pstore *) store->context;
++}
++
++static int persistent_percentfull(struct exception_store *store)
++{
++	struct pstore *ps = get_info(store);
++	return (ps->next_free * store->snap->chunk_size * 100) /
++	    get_dev_size(store->snap->cow->dev);
++}
++
++static void persistent_destroy(struct exception_store *store)
++{
++	struct pstore *ps = get_info(store);
++
++	vfree(ps->callbacks);
++	free_iobuf(ps);
++	kfree(ps);
++}
++
++static int persistent_prepare(struct exception_store *store,
++			      struct exception *e)
++{
++	struct pstore *ps = get_info(store);
++	uint32_t stride;
++	sector_t size = get_dev_size(store->snap->cow->dev);
++
++	/* Is there enough room ? */
++	if (size < ((ps->next_free + 1) * store->snap->chunk_size))
++		return -ENOSPC;
++
++	e->new_chunk = ps->next_free;
++
++	/*
++	 * Move onto the next free pending, making sure to take
++	 * into account the location of the metadata chunks.
++	 */
++	stride = (ps->exceptions_per_area + 1);
++	if ((++ps->next_free % stride) == 1)
++		ps->next_free++;
++
++	atomic_inc(&ps->pending_count);
++	return 0;
++}
++
++static void persistent_commit(struct exception_store *store,
++			      struct exception *e,
++			      void (*callback) (void *, int success),
++			      void *callback_context)
++{
++	int r, i;
++	struct pstore *ps = get_info(store);
++	struct disk_exception de;
++	struct commit_callback *cb;
++
++	de.old_chunk = e->old_chunk;
++	de.new_chunk = e->new_chunk;
++	write_exception(ps, ps->current_committed++, &de);
++
++	/*
++	 * Add the callback to the back of the array.  This code
++	 * is the only place where the callback array is
++	 * manipulated, and we know that it will never be called
++	 * multiple times concurrently.
++	 */
++	cb = ps->callbacks + ps->callback_count++;
++	cb->callback = callback;
++	cb->context = callback_context;
++
++	/*
++	 * If there are no more exceptions in flight, or we have
++	 * filled this metadata area we commit the exceptions to
++	 * disk.
++	 */
++	if (atomic_dec_and_test(&ps->pending_count) ||
++	    (ps->current_committed == ps->exceptions_per_area)) {
++		r = area_io(ps, ps->current_area, WRITE);
++		if (r)
++			ps->valid = 0;
++
++		for (i = 0; i < ps->callback_count; i++) {
++			cb = ps->callbacks + i;
++			cb->callback(cb->context, r == 0 ? 1 : 0);
++		}
++
++		ps->callback_count = 0;
++	}
++
++	/*
++	 * Have we completely filled the current area ?
++	 */
++	if (ps->current_committed == ps->exceptions_per_area) {
++		ps->current_committed = 0;
++		r = zero_area(ps, ps->current_area + 1);
++		if (r)
++			ps->valid = 0;
++	}
++}
++
++static void persistent_drop(struct exception_store *store)
++{
++	struct pstore *ps = get_info(store);
++
++	ps->valid = 0;
++	if (write_header(ps))
++		DMWARN("write header failed");
++}
++
++int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
++{
++	int r, new_snapshot;
++	struct pstore *ps;
++
++	/* allocate the pstore */
++	ps = kmalloc(sizeof(*ps), GFP_KERNEL);
++	if (!ps)
++		return -ENOMEM;
++
++	ps->snap = store->snap;
++	ps->valid = 1;
++	ps->version = SNAPSHOT_DISK_VERSION;
++	ps->chunk_size = chunk_size;
++	ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
++	    sizeof(struct disk_exception);
++	ps->next_free = 2;	/* skipping the header and first area */
++	ps->current_committed = 0;
++
++	r = allocate_iobuf(ps);
++	if (r)
++		goto bad;
++
++	/*
++	 * Allocate space for all the callbacks.
++	 */
++	ps->callback_count = 0;
++	atomic_set(&ps->pending_count, 0);
++	ps->callbacks = vcalloc(ps->exceptions_per_area,
++				sizeof(*ps->callbacks));
++
++	if (!ps->callbacks)
++		goto bad;
++
++	/*
++	 * Read the snapshot header.
++	 */
++	r = read_header(ps, &new_snapshot);
++	if (r)
++		goto bad;
++
++	/*
++	 * Do we need to setup a new snapshot ?
++	 */
++	if (new_snapshot) {
++		r = write_header(ps);
++		if (r) {
++			DMWARN("write_header failed");
++			goto bad;
++		}
++
++		r = zero_area(ps, 0);
++		if (r) {
++			DMWARN("zero_area(0) failed");
++			goto bad;
++		}
++
++	} else {
++		/*
++		 * Sanity checks.
++		 */
++		if (!ps->valid) {
++			DMWARN("snapshot is marked invalid");
++			r = -EINVAL;
++			goto bad;   
++		}
++
++		if (ps->chunk_size != chunk_size) {
++			DMWARN("chunk size for existing snapshot different "
++			       "from that requested");
++			r = -EINVAL;
++			goto bad;
++		}
++
++		if (ps->version != SNAPSHOT_DISK_VERSION) {
++			DMWARN("unable to handle snapshot disk version %d",
++			       ps->version);
++			r = -EINVAL;
++			goto bad;
++		}
++
++		/*
++		 * Read the metadata.
++		 */
++		r = read_exceptions(ps);
++		if (r)
++			goto bad;
++	}
++
++	store->destroy = persistent_destroy;
++	store->prepare_exception = persistent_prepare;
++	store->commit_exception = persistent_commit;
++	store->drop_snapshot = persistent_drop;
++	store->percent_full = persistent_percentfull;
++	store->context = ps;
++
++	return r;
++
++      bad:
++	if (ps) {
++		if (ps->callbacks)
++			vfree(ps->callbacks);
++
++		if (ps->iobuf)
++			free_iobuf(ps);
++
++		kfree(ps);
++	}
++	return r;
++}
++
++/*-----------------------------------------------------------------
++ * Implementation of the store for non-persistent snapshots.
++ *---------------------------------------------------------------*/
++struct transient_c {
++	sector_t next_free;
++};
++
++void transient_destroy(struct exception_store *store)
++{
++	kfree(store->context);
++}
++
++int transient_prepare(struct exception_store *store, struct exception *e)
++{
++	struct transient_c *tc = (struct transient_c *) store->context;
++	sector_t size = get_dev_size(store->snap->cow->dev);
++
++	if (size < (tc->next_free + store->snap->chunk_size))
++		return -1;
++
++	e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
++	tc->next_free += store->snap->chunk_size;
++
++	return 0;
++}
++
++void transient_commit(struct exception_store *store,
++		      struct exception *e,
++		      void (*callback) (void *, int success),
++		      void *callback_context)
++{
++	/* Just succeed */
++	callback(callback_context, 1);
++}
++
++static int transient_percentfull(struct exception_store *store)
++{
++	struct transient_c *tc = (struct transient_c *) store->context;
++	return (tc->next_free * 100) / get_dev_size(store->snap->cow->dev);
++}
++
++int dm_create_transient(struct exception_store *store,
++			struct dm_snapshot *s, int blocksize)
++{
++	struct transient_c *tc;
++
++	memset(store, 0, sizeof(*store));
++	store->destroy = transient_destroy;
++	store->prepare_exception = transient_prepare;
++	store->commit_exception = transient_commit;
++	store->percent_full = transient_percentfull;
++	store->snap = s;
++
++	tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
++	if (!tc)
++		return -ENOMEM;
++
++	tc->next_free = 0;
++	store->context = tc;
++
++	return 0;
++}
+diff -ruN linux-2.4.20/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c
+--- linux-2.4.20/drivers/md/dm-ioctl.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-ioctl.c	Wed Mar 26 14:34:50 2003
+@@ -0,0 +1,1160 @@
++/*
++ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/miscdevice.h>
++#include <linux/dm-ioctl.h>
++#include <linux/init.h>
++#include <linux/wait.h>
++#include <linux/blk.h>
++#include <linux/slab.h>
++
++#include <asm/uaccess.h>
++
++#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
++
++/*-----------------------------------------------------------------
++ * The ioctl interface needs to be able to look up devices by
++ * name or uuid.
++ *---------------------------------------------------------------*/
++struct hash_cell {
++	struct list_head name_list;
++	struct list_head uuid_list;
++
++	char *name;
++	char *uuid;
++	struct mapped_device *md;
++
++	/* I hate devfs */
++	devfs_handle_t devfs_entry;
++};
++
++#define NUM_BUCKETS 64
++#define MASK_BUCKETS (NUM_BUCKETS - 1)
++static struct list_head _name_buckets[NUM_BUCKETS];
++static struct list_head _uuid_buckets[NUM_BUCKETS];
++
++static devfs_handle_t _dev_dir;
++void dm_hash_remove_all(void);
++
++/*
++ * Guards access to all three tables.
++ */
++static DECLARE_RWSEM(_hash_lock);
++
++static void init_buckets(struct list_head *buckets)
++{
++	unsigned int i;
++
++	for (i = 0; i < NUM_BUCKETS; i++)
++		INIT_LIST_HEAD(buckets + i);
++}
++
++int dm_hash_init(void)
++{
++	init_buckets(_name_buckets);
++	init_buckets(_uuid_buckets);
++	_dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
++	return 0;
++}
++
++void dm_hash_exit(void)
++{
++	dm_hash_remove_all();
++	devfs_unregister(_dev_dir);
++}
++
++/*-----------------------------------------------------------------
++ * Hash function:
++ * We're not really concerned with the str hash function being
++ * fast since it's only used by the ioctl interface.
++ *---------------------------------------------------------------*/
++static unsigned int hash_str(const char *str)
++{
++	const unsigned int hash_mult = 2654435387U;
++	unsigned int h = 0;
++
++	while (*str)
++		h = (h + (unsigned int) *str++) * hash_mult;
++
++	return h & MASK_BUCKETS;
++}
++
++/*-----------------------------------------------------------------
++ * Code for looking up a device by name
++ *---------------------------------------------------------------*/
++static struct hash_cell *__get_name_cell(const char *str)
++{
++	struct list_head *tmp;
++	struct hash_cell *hc;
++	unsigned int h = hash_str(str);
++
++	list_for_each(tmp, _name_buckets + h) {
++		hc = list_entry(tmp, struct hash_cell, name_list);
++		if (!strcmp(hc->name, str))
++			return hc;
++	}
++
++	return NULL;
++}
++
++static struct hash_cell *__get_uuid_cell(const char *str)
++{
++	struct list_head *tmp;
++	struct hash_cell *hc;
++	unsigned int h = hash_str(str);
++
++	list_for_each(tmp, _uuid_buckets + h) {
++		hc = list_entry(tmp, struct hash_cell, uuid_list);
++		if (!strcmp(hc->uuid, str))
++			return hc;
++	}
++
++	return NULL;
++}
++
++/*-----------------------------------------------------------------
++ * Inserting, removing and renaming a device.
++ *---------------------------------------------------------------*/
++static inline char *kstrdup(const char *str)
++{
++	char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
++	if (r)
++		strcpy(r, str);
++	return r;
++}
++
++static struct hash_cell *alloc_cell(const char *name, const char *uuid,
++				    struct mapped_device *md)
++{
++	struct hash_cell *hc;
++
++	hc = kmalloc(sizeof(*hc), GFP_KERNEL);
++	if (!hc)
++		return NULL;
++
++	hc->name = kstrdup(name);
++	if (!hc->name) {
++		kfree(hc);
++		return NULL;
++	}
++
++	if (!uuid)
++		hc->uuid = NULL;
++
++	else {
++		hc->uuid = kstrdup(uuid);
++		if (!hc->uuid) {
++			kfree(hc->name);
++			kfree(hc);
++			return NULL;
++		}
++	}
++
++	INIT_LIST_HEAD(&hc->name_list);
++	INIT_LIST_HEAD(&hc->uuid_list);
++	hc->md = md;
++	return hc;
++}
++
++static void free_cell(struct hash_cell *hc)
++{
++	if (hc) {
++		kfree(hc->name);
++		kfree(hc->uuid);
++		kfree(hc);
++	}
++}
++
++/*
++ * devfs stuff.
++ */
++static int register_with_devfs(struct hash_cell *hc)
++{
++	kdev_t dev = dm_kdev(hc->md);
++
++	hc->devfs_entry =
++	    devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER,
++			   major(dev), minor(dev),
++			   S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
++			   &dm_blk_dops, NULL);
++
++	return 0;
++}
++
++static int unregister_with_devfs(struct hash_cell *hc)
++{
++	devfs_unregister(hc->devfs_entry);
++	return 0;
++}
++
++/*
++ * The kdev_t and uuid of a device can never change once it is
++ * initially inserted.
++ */
++int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
++{
++	struct hash_cell *cell;
++
++	/*
++	 * Allocate the new cells.
++	 */
++	cell = alloc_cell(name, uuid, md);
++	if (!cell)
++		return -ENOMEM;
++
++	/*
++	 * Insert the cell into all three hash tables.
++	 */
++	down_write(&_hash_lock);
++	if (__get_name_cell(name))
++		goto bad;
++
++	list_add(&cell->name_list, _name_buckets + hash_str(name));
++
++	if (uuid) {
++		if (__get_uuid_cell(uuid)) {
++			list_del(&cell->name_list);
++			goto bad;
++		}
++		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
++	}
++	register_with_devfs(cell);
++	dm_get(md);
++	up_write(&_hash_lock);
++
++	return 0;
++
++      bad:
++	up_write(&_hash_lock);
++	free_cell(cell);
++	return -EBUSY;
++}
++
++void __hash_remove(struct hash_cell *hc)
++{
++	/* remove from the dev hash */
++	list_del(&hc->uuid_list);
++	list_del(&hc->name_list);
++	unregister_with_devfs(hc);
++	dm_put(hc->md);
++}
++
++void dm_hash_remove_all(void)
++{
++	int i;
++	struct hash_cell *hc;
++	struct list_head *tmp, *n;
++
++	down_write(&_hash_lock);
++	for (i = 0; i < NUM_BUCKETS; i++) {
++		list_for_each_safe(tmp, n, _name_buckets + i) {
++			hc = list_entry(tmp, struct hash_cell, name_list);
++			__hash_remove(hc);
++		}
++	}
++	up_write(&_hash_lock);
++}
++
++int dm_hash_rename(const char *old, const char *new)
++{
++	char *new_name, *old_name;
++	struct hash_cell *hc;
++
++	/*
++	 * duplicate new.
++	 */
++	new_name = kstrdup(new);
++	if (!new_name)
++		return -ENOMEM;
++
++	down_write(&_hash_lock);
++
++	/*
++	 * Is new free ?
++	 */
++	hc = __get_name_cell(new);
++	if (hc) {
++		DMWARN("asked to rename to an already existing name %s -> %s",
++		       old, new);
++		up_write(&_hash_lock);
++		return -EBUSY;
++	}
++
++	/*
++	 * Is there such a device as 'old' ?
++	 */
++	hc = __get_name_cell(old);
++	if (!hc) {
++		DMWARN("asked to rename a non existent device %s -> %s",
++		       old, new);
++		up_write(&_hash_lock);
++		return -ENXIO;
++	}
++
++	/*
++	 * rename and move the name cell.
++	 */
++	list_del(&hc->name_list);
++	old_name = hc->name;
++	hc->name = new_name;
++	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
++
++	/* rename the device node in devfs */
++	unregister_with_devfs(hc);
++	register_with_devfs(hc);
++
++	up_write(&_hash_lock);
++	kfree(old_name);
++	return 0;
++}
++
++
++/*-----------------------------------------------------------------
++ * Implementation of the ioctl commands
++ *---------------------------------------------------------------*/
++
++/*
++ * All the ioctl commands get dispatched to functions with this
++ * prototype.
++ */
++typedef int (*ioctl_fn)(struct dm_ioctl *param, struct dm_ioctl *user);
++
++/*
++ * Check a string doesn't overrun the chunk of
++ * memory we copied from userland.
++ */
++static int valid_str(char *str, void *begin, void *end)
++{
++	while (((void *) str >= begin) && ((void *) str < end))
++		if (!*str++)
++			return 0;
++
++	return -EINVAL;
++}
++
++static int next_target(struct dm_target_spec *last, uint32_t next,
++		       void *begin, void *end,
++		       struct dm_target_spec **spec, char **params)
++{
++	*spec = (struct dm_target_spec *)
++	    ((unsigned char *) last + next);
++	*params = (char *) (*spec + 1);
++
++	if (*spec < (last + 1) || ((void *) *spec > end))
++		return -EINVAL;
++
++	return valid_str(*params, begin, end);
++}
++
++static int populate_table(struct dm_table *table, struct dm_ioctl *args)
++{
++	int i = 0, r, first = 1;
++	struct dm_target_spec *spec;
++	char *params;
++	void *begin, *end;
++
++	if (!args->target_count) {
++		DMWARN("populate_table: no targets specified");
++		return -EINVAL;
++	}
++
++	begin = (void *) args;
++	end = begin + args->data_size;
++
++	for (i = 0; i < args->target_count; i++) {
++
++		if (first)
++			r = next_target((struct dm_target_spec *) args,
++					args->data_start,
++					begin, end, &spec, &params);
++		else
++			r = next_target(spec, spec->next, begin, end,
++					&spec, &params);
++
++		if (r) {
++			DMWARN("unable to find target");
++			return -EINVAL;
++		}
++
++		r = dm_table_add_target(table, spec->target_type,
++					spec->sector_start, spec->length,
++					params);
++		if (r) {
++			DMWARN("error adding target to table");
++			return -EINVAL;
++		}
++
++		first = 0;
++	}
++
++	return dm_table_complete(table);
++}
++
++/*
++ * Round up the ptr to the next 'align' boundary.  Obviously
++ * 'align' must be a power of 2.
++ */
++static inline void *align_ptr(void *ptr, unsigned int align)
++{
++	align--;
++	return (void *) (((unsigned long) (ptr + align)) & ~align);
++}
++
++/*
++ * Copies a dm_ioctl and an optional additional payload to
++ * userland.
++ */
++static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param,
++			   void *data, uint32_t len)
++{
++	int r;
++	void *ptr = NULL;
++
++	if (data) {
++		ptr = align_ptr(user + 1, sizeof(unsigned long));
++		param->data_start = ptr - (void *) user;
++	}
++
++	/*
++	 * The version number has already been filled in, so we
++	 * just copy later fields.
++	 */
++	r = copy_to_user(&user->data_size, &param->data_size,
++			 sizeof(*param) - sizeof(param->version));
++	if (r)
++		return -EFAULT;
++
++	if (data) {
++		if (param->data_start + len > param->data_size)
++			return -ENOSPC;
++
++		if (copy_to_user(ptr, data, len))
++			r = -EFAULT;
++	}
++
++	return r;
++}
++
++/*
++ * Fills in a dm_ioctl structure, ready for sending back to
++ * userland.
++ */
++static int __info(struct mapped_device *md, struct dm_ioctl *param)
++{
++	kdev_t dev = dm_kdev(md);
++	struct dm_table *table;
++	struct block_device *bdev;
++
++	param->flags = DM_EXISTS_FLAG;
++	if (dm_suspended(md))
++		param->flags |= DM_SUSPEND_FLAG;
++
++	param->dev = kdev_t_to_nr(dev);
++	bdev = bdget(param->dev);
++	if (!bdev)
++		return -ENXIO;
++
++	param->open_count = bdev->bd_openers;
++	bdput(bdev);
++
++	if (is_read_only(dev))
++		param->flags |= DM_READONLY_FLAG;
++
++	table = dm_get_table(md);
++	param->target_count = dm_table_get_num_targets(table);
++	dm_table_put(table);
++
++	return 0;
++}
++
++/*
++ * Always use UUID for lookups if it's present, otherwise use name.
++ */
++static inline struct mapped_device *find_device(struct dm_ioctl *param)
++{
++	struct hash_cell *hc;
++	struct mapped_device *md = NULL;
++
++	down_read(&_hash_lock);
++	hc = *param->uuid ? __get_uuid_cell(param->uuid) :
++	    __get_name_cell(param->name);
++	if (hc) {
++		md = hc->md;
++
++		/*
++		 * Sneakily write in both the name and the uuid
++		 * while we have the cell.
++		 */
++		strncpy(param->name, hc->name, sizeof(param->name));
++		if (hc->uuid)
++			strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1);
++		else
++			param->uuid[0] = '\0';
++
++		dm_get(md);
++	}
++	up_read(&_hash_lock);
++
++	return md;
++}
++
++#define ALIGNMENT sizeof(int)
++static void *_align(void *ptr, unsigned int a)
++{
++	register unsigned long align = --a;
++
++	return (void *) (((unsigned long) ptr + align) & ~align);
++}
++
++/*
++ * Copies device info back to user space, used by
++ * the create and info ioctls.
++ */
++static int info(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	struct mapped_device *md;
++
++	param->flags = 0;
++
++	md = find_device(param);
++	if (!md)
++		/*
++		 * Device not found - returns cleared exists flag.
++		 */
++		goto out;
++
++	__info(md, param);
++	dm_put(md);
++
++      out:
++	return results_to_user(user, param, NULL, 0);
++}
++
++static inline int get_mode(struct dm_ioctl *param)
++{
++	int mode = FMODE_READ | FMODE_WRITE;
++
++	if (param->flags & DM_READONLY_FLAG)
++		mode = FMODE_READ;
++
++	return mode;
++}
++
++static int check_name(const char *name)
++{
++	if (strchr(name, '/')) {
++		DMWARN("invalid device name");
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int create(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int r;
++	kdev_t dev;
++	struct dm_table *t;
++	struct mapped_device *md;
++	int minor;
++
++	r = check_name(param->name);
++	if (r)
++		return r;
++
++	r = dm_table_create(&t, get_mode(param));
++	if (r)
++		return r;
++
++	r = populate_table(t, param);
++	if (r) {
++		dm_table_put(t);
++		return r;
++	}
++
++	minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ?
++	    minor(to_kdev_t(param->dev)) : -1;
++
++	r = dm_create(minor, t, &md);
++	if (r) {
++		dm_table_put(t);
++		return r;
++	}
++	dm_table_put(t);	/* md will have grabbed its own reference */
++
++	dev = dm_kdev(md);
++	set_device_ro(dev, (param->flags & DM_READONLY_FLAG));
++	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
++	dm_put(md);
++
++	return r ? r : info(param, user);
++}
++
++/*
++ * Build up the status struct for each target
++ */
++static int __status(struct mapped_device *md, struct dm_ioctl *param,
++		    char *outbuf, int *len)
++{
++	int i, num_targets;
++	struct dm_target_spec *spec;
++	char *outptr;
++	status_type_t type;
++	struct dm_table *table = dm_get_table(md);
++
++	if (param->flags & DM_STATUS_TABLE_FLAG)
++		type = STATUSTYPE_TABLE;
++	else
++		type = STATUSTYPE_INFO;
++
++	outptr = outbuf;
++
++	/* Get all the target info */
++	num_targets = dm_table_get_num_targets(table);
++	for (i = 0; i < num_targets; i++) {
++		struct dm_target *ti = dm_table_get_target(table, i);
++
++		if (outptr - outbuf +
++		    sizeof(struct dm_target_spec) > param->data_size) {
++			dm_table_put(table);
++			return -ENOMEM;
++		}
++
++		spec = (struct dm_target_spec *) outptr;
++
++		spec->status = 0;
++		spec->sector_start = ti->begin;
++		spec->length = ti->len;
++		strncpy(spec->target_type, ti->type->name,
++			sizeof(spec->target_type));
++
++		outptr += sizeof(struct dm_target_spec);
++
++		/* Get the status/table string from the target driver */
++		if (ti->type->status)
++			ti->type->status(ti, type, outptr,
++					 outbuf + param->data_size - outptr);
++		else
++			outptr[0] = '\0';
++
++		outptr += strlen(outptr) + 1;
++		_align(outptr, ALIGNMENT);
++		spec->next = outptr - outbuf;
++	}
++
++	param->target_count = num_targets;
++	*len = outptr - outbuf;
++	dm_table_put(table);
++
++	return 0;
++}
++
++/*
++ * Return the status of a device as a text string for each
++ * target.
++ */
++static int get_status(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	struct mapped_device *md;
++	int len = 0;
++	int ret;
++	char *outbuf = NULL;
++
++	md = find_device(param);
++	if (!md)
++		/*
++		 * Device not found - returns cleared exists flag.
++		 */
++		goto out;
++
++	/* We haven't a clue how long the resultant data will be so
++	   just allocate as much as userland has allowed us and make sure
++	   we don't overun it */
++	outbuf = kmalloc(param->data_size, GFP_KERNEL);
++	if (!outbuf)
++		goto out;
++	/*
++	 * Get the status of all targets
++	 */
++	__status(md, param, outbuf, &len);
++
++	/*
++	 * Setup the basic dm_ioctl structure.
++	 */
++	__info(md, param);
++
++      out:
++	if (md)
++		dm_put(md);
++
++	ret = results_to_user(user, param, outbuf, len);
++
++	if (outbuf)
++		kfree(outbuf);
++
++	return ret;
++}
++
++/*
++ * Wait for a device to report an event
++ */
++static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	struct mapped_device *md;
++	struct dm_table *table;
++	DECLARE_WAITQUEUE(wq, current);
++
++	md = find_device(param);
++	if (!md)
++		/*
++		 * Device not found - returns cleared exists flag.
++		 */
++		goto out;
++
++	/*
++	 * Setup the basic dm_ioctl structure.
++	 */
++	__info(md, param);
++
++	/*
++	 * Wait for a notification event
++	 */
++	set_current_state(TASK_INTERRUPTIBLE);
++	table = dm_get_table(md);
++	dm_table_add_wait_queue(table, &wq);
++	dm_table_put(table);
++	dm_put(md);
++
++	yield();
++	set_current_state(TASK_RUNNING);
++
++      out:
++	return results_to_user(user, param, NULL, 0);
++}
++
++/*
++ * Retrieves a list of devices used by a particular dm device.
++ */
++static int dep(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int count, r;
++	struct mapped_device *md;
++	struct list_head *tmp;
++	size_t len = 0;
++	struct dm_target_deps *deps = NULL;
++	struct dm_table *table;
++
++	md = find_device(param);
++	if (!md)
++		goto out;
++	table = dm_get_table(md);
++
++	/*
++	 * Setup the basic dm_ioctl structure.
++	 */
++	__info(md, param);
++
++	/*
++	 * Count the devices.
++	 */
++	count = 0;
++	list_for_each(tmp, dm_table_get_devices(table))
++	    count++;
++
++	/*
++	 * Allocate a kernel space version of the dm_target_status
++	 * struct.
++	 */
++	if (array_too_big(sizeof(*deps), sizeof(*deps->dev), count)) {
++		dm_table_put(table);
++		dm_put(md);
++		return -ENOMEM;
++	}
++
++	len = sizeof(*deps) + (sizeof(*deps->dev) * count);
++	deps = kmalloc(len, GFP_KERNEL);
++	if (!deps) {
++		dm_table_put(table);
++		dm_put(md);
++		return -ENOMEM;
++	}
++
++	/*
++	 * Fill in the devices.
++	 */
++	deps->count = count;
++	count = 0;
++	list_for_each(tmp, dm_table_get_devices(table)) {
++		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++		deps->dev[count++] = dd->bdev->bd_dev;
++	}
++	dm_table_put(table);
++	dm_put(md);
++
++      out:
++	r = results_to_user(user, param, deps, len);
++
++	kfree(deps);
++	return r;
++}
++
++static int remove(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	struct hash_cell *hc;
++
++	down_write(&_hash_lock);
++	hc = *param->uuid ? __get_uuid_cell(param->uuid) :
++	    __get_name_cell(param->name);
++	if (!hc) {
++		DMWARN("device doesn't appear to be in the dev hash table.");
++		up_write(&_hash_lock);
++		return -EINVAL;
++	}
++
++	/*
++	 * You may ask the interface to drop its reference to an
++	 * in use device.  This is no different to unlinking a
++	 * file that someone still has open.  The device will not
++	 * actually be destroyed until the last opener closes it.
++	 * The name and uuid of the device (both are interface
++	 * properties) will be available for reuse immediately.
++	 *
++	 * You don't want to drop a _suspended_ device from the
++	 * interface, since that will leave you with no way of
++	 * resuming it.
++	 */
++	if (dm_suspended(hc->md)) {
++		DMWARN("refusing to remove a suspended device.");
++		up_write(&_hash_lock);
++		return -EPERM;
++	}
++
++	__hash_remove(hc);
++	up_write(&_hash_lock);
++	return 0;
++}
++
++static int remove_all(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	dm_hash_remove_all();
++	return 0;
++}
++
++static int suspend(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int r;
++	struct mapped_device *md;
++
++	md = find_device(param);
++	if (!md)
++		return -ENXIO;
++
++	if (param->flags & DM_SUSPEND_FLAG)
++		r = dm_suspend(md);
++	else
++		r = dm_resume(md);
++
++	dm_put(md);
++	return r;
++}
++
++static int reload(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int r;
++	kdev_t dev;
++	struct mapped_device *md;
++	struct dm_table *t;
++
++	r = dm_table_create(&t, get_mode(param));
++	if (r)
++		return r;
++
++	r = populate_table(t, param);
++	if (r) {
++		dm_table_put(t);
++		return r;
++	}
++
++	md = find_device(param);
++	if (!md) {
++		dm_table_put(t);
++		return -ENXIO;
++	}
++
++	r = dm_swap_table(md, t);
++	if (r) {
++		dm_put(md);
++		dm_table_put(t);
++		return r;
++	}
++	dm_table_put(t);	/* md will have taken its own reference */
++
++	dev = dm_kdev(md);
++	set_device_ro(dev, (param->flags & DM_READONLY_FLAG));
++	dm_put(md);
++
++	r = info(param, user);
++	return r;
++}
++
++static int rename(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++	int r;
++	char *new_name = (char *) param + param->data_start;
++
++	if (valid_str(new_name, (void *) param,
++		      (void *) param + param->data_size)) {
++		DMWARN("Invalid new logical volume name supplied.");
++		return -EINVAL;
++	}
++
++	r = check_name(new_name);
++	if (r)
++		return r;
++
++	return dm_hash_rename(param->name, new_name);
++}
++
++
++/*-----------------------------------------------------------------
++ * Implementation of open/close/ioctl on the special char
++ * device.
++ *---------------------------------------------------------------*/
++static ioctl_fn lookup_ioctl(unsigned int cmd)
++{
++	static struct {
++		int cmd;
++		ioctl_fn fn;
++	} _ioctls[] = {
++		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
++		{DM_REMOVE_ALL_CMD, remove_all},
++		{DM_DEV_CREATE_CMD, create},
++		{DM_DEV_REMOVE_CMD, remove},
++		{DM_DEV_RELOAD_CMD, reload},
++		{DM_DEV_RENAME_CMD, rename},
++		{DM_DEV_SUSPEND_CMD, suspend},
++		{DM_DEV_DEPS_CMD, dep},
++		{DM_DEV_STATUS_CMD, info},
++		{DM_TARGET_STATUS_CMD, get_status},
++		{DM_TARGET_WAIT_CMD, wait_device_event},
++	};
++
++	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
++}
++
++/*
++ * As well as checking the version compatibility this always
++ * copies the kernel interface version out.
++ */
++static int check_version(int cmd, struct dm_ioctl *user)
++{
++	uint32_t version[3];
++	int r = 0;
++
++	if (copy_from_user(version, user->version, sizeof(version)))
++		return -EFAULT;
++
++	if ((DM_VERSION_MAJOR != version[0]) ||
++	    (DM_VERSION_MINOR < version[1])) {
++		DMWARN("ioctl interface mismatch: "
++		       "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
++		       DM_VERSION_MAJOR, DM_VERSION_MINOR,
++		       DM_VERSION_PATCHLEVEL,
++		       version[0], version[1], version[2], cmd);
++		r = -EINVAL;
++	}
++
++	/*
++	 * Fill in the kernel version.
++	 */
++	version[0] = DM_VERSION_MAJOR;
++	version[1] = DM_VERSION_MINOR;
++	version[2] = DM_VERSION_PATCHLEVEL;
++	if (copy_to_user(user->version, version, sizeof(version)))
++		return -EFAULT;
++
++	return r;
++}
++
++static void free_params(struct dm_ioctl *param)
++{
++	vfree(param);
++}
++
++static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
++{
++	struct dm_ioctl tmp, *dmi;
++
++	if (copy_from_user(&tmp, user, sizeof(tmp)))
++		return -EFAULT;
++
++	if (tmp.data_size < sizeof(tmp))
++		return -EINVAL;
++
++	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
++	if (!dmi)
++		return -ENOMEM;
++
++	if (copy_from_user(dmi, user, tmp.data_size)) {
++		vfree(dmi);
++		return -EFAULT;
++	}
++
++	*param = dmi;
++	return 0;
++}
++
++static int validate_params(uint cmd, struct dm_ioctl *param)
++{
++	/* Ignores parameters */
++	if (cmd == DM_REMOVE_ALL_CMD)
++		return 0;
++
++	/* Unless creating, either name of uuid but not both */
++	if (cmd != DM_DEV_CREATE_CMD) {
++		if ((!*param->uuid && !*param->name) ||
++		    (*param->uuid && *param->name)) {
++			DMWARN("one of name or uuid must be supplied");
++			return -EINVAL;
++		}
++	}
++
++	/* Ensure strings are terminated */
++	param->name[DM_NAME_LEN - 1] = '\0';
++	param->uuid[DM_UUID_LEN - 1] = '\0';
++
++	return 0;
++}
++
++static int ctl_ioctl(struct inode *inode, struct file *file,
++		     uint command, ulong u)
++{
++	int r = 0, cmd;
++	struct dm_ioctl *param;
++	struct dm_ioctl *user = (struct dm_ioctl *) u;
++	ioctl_fn fn = NULL;
++
++	/* only root can play with this */
++	if (!capable(CAP_SYS_ADMIN))
++		return -EACCES;
++
++	if (_IOC_TYPE(command) != DM_IOCTL)
++		return -ENOTTY;
++
++	cmd = _IOC_NR(command);
++
++	/*
++	 * Check the interface version passed in.  This also
++	 * writes out the kernel's interface version.
++	 */
++	r = check_version(cmd, user);
++	if (r)
++		return r;
++
++	/*
++	 * Nothing more to do for the version command.
++	 */
++	if (cmd == DM_VERSION_CMD)
++		return 0;
++
++	fn = lookup_ioctl(cmd);
++	if (!fn) {
++		DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
++		return -ENOTTY;
++	}
++
++	/*
++	 * Copy the parameters into kernel space.
++	 */
++	r = copy_params(user, &param);
++	if (r)
++		return r;
++
++	r = validate_params(cmd, param);
++	if (r) {
++		free_params(param);
++		return r;
++	}
++
++	r = fn(param, user);
++	free_params(param);
++	return r;
++}
++
++static struct file_operations _ctl_fops = {
++	.ioctl	 = ctl_ioctl,
++	.owner	 = THIS_MODULE,
++};
++
++static devfs_handle_t _ctl_handle;
++
++static struct miscdevice _dm_misc = {
++	.minor = MISC_DYNAMIC_MINOR,
++	.name  = DM_NAME,
++	.fops  = &_ctl_fops
++};
++
++/*
++ * Create misc character device and link to DM_DIR/control.
++ */
++int __init dm_interface_init(void)
++{
++	int r;
++	char rname[64];
++
++	r = dm_hash_init();
++	if (r)
++		return r;
++
++	r = misc_register(&_dm_misc);
++	if (r) {
++		DMERR("misc_register failed for control device");
++		dm_hash_exit();
++		return r;
++	}
++
++	r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
++				sizeof rname - 3);
++	if (r == -ENOSYS)
++		goto done;	/* devfs not present */
++
++	if (r < 0) {
++		DMERR("devfs_generate_path failed for control device");
++		goto failed;
++	}
++
++	strncpy(rname + r, "../", 3);
++	r = devfs_mk_symlink(NULL, DM_DIR "/control",
++			     DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
++	if (r) {
++		DMERR("devfs_mk_symlink failed for control device");
++		goto failed;
++	}
++	devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
++
++      done:
++	DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
++	       DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
++	       DM_DRIVER_EMAIL);
++	return 0;
++
++      failed:
++	misc_deregister(&_dm_misc);
++	dm_hash_exit();
++	return r;
++}
++
++void dm_interface_exit(void)
++{
++	if (misc_deregister(&_dm_misc) < 0)
++		DMERR("misc_deregister failed for control device");
++
++	dm_hash_exit();
++}
+diff -ruN linux-2.4.20/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c
+--- linux-2.4.20/drivers/md/dm-linear.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-linear.c	Wed Mar 26 13:27:22 2003
+@@ -0,0 +1,121 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++
++/*
++ * Linear: maps a linear range of a device.
++ */
++struct linear_c {
++	struct dm_dev *dev;
++	sector_t start;
++};
++
++/*
++ * Construct a linear mapping: <dev_path> <offset>
++ */
++static int linear_ctr(struct dm_target *ti, int argc, char **argv)
++{
++	struct linear_c *lc;
++
++	if (argc != 2) {
++		ti->error = "dm-linear: Not enough arguments";
++		return -EINVAL;
++	}
++
++	lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++	if (lc == NULL) {
++		ti->error = "dm-linear: Cannot allocate linear context";
++		return -ENOMEM;
++	}
++
++	if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
++		ti->error = "dm-linear: Invalid device sector";
++		goto bad;
++	}
++
++	if (dm_get_device(ti, argv[0], lc->start, ti->len,
++			  dm_table_get_mode(ti->table), &lc->dev)) {
++		ti->error = "dm-linear: Device lookup failed";
++		goto bad;
++	}
++
++	ti->private = lc;
++	return 0;
++
++      bad:
++	kfree(lc);
++	return -EINVAL;
++}
++
++static void linear_dtr(struct dm_target *ti)
++{
++	struct linear_c *lc = (struct linear_c *) ti->private;
++
++	dm_put_device(ti, lc->dev);
++	kfree(lc);
++}
++
++static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++		      void **map_context)
++{
++	struct linear_c *lc = (struct linear_c *) ti->private;
++
++	bh->b_rdev = lc->dev->dev;
++	bh->b_rsector = lc->start + (bh->b_rsector - ti->begin);
++
++	return 1;
++}
++
++static int linear_status(struct dm_target *ti, status_type_t type,
++			 char *result, int maxlen)
++{
++	struct linear_c *lc = (struct linear_c *) ti->private;
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		result[0] = '\0';
++		break;
++
++	case STATUSTYPE_TABLE:
++		snprintf(result, maxlen, "%s " SECTOR_FORMAT,
++			 kdevname(to_kdev_t(lc->dev->bdev->bd_dev)), lc->start);
++		break;
++	}
++	return 0;
++}
++
++static struct target_type linear_target = {
++	.name   = "linear",
++	.module = THIS_MODULE,
++	.ctr    = linear_ctr,
++	.dtr    = linear_dtr,
++	.map    = linear_map,
++	.status = linear_status,
++};
++
++int __init dm_linear_init(void)
++{
++	int r = dm_register_target(&linear_target);
++
++	if (r < 0)
++		DMERR("linear: register failed %d", r);
++
++	return r;
++}
++
++void dm_linear_exit(void)
++{
++	int r = dm_unregister_target(&linear_target);
++
++	if (r < 0)
++		DMERR("linear: unregister failed %d", r);
++}
+diff -ruN linux-2.4.20/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c
+--- linux-2.4.20/drivers/md/dm-snapshot.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.c	Wed Mar 26 14:12:59 2003
+@@ -0,0 +1,1170 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <linux/config.h>
++#include <linux/ctype.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/mempool.h>
++#include <linux/device-mapper.h>
++#include <linux/vmalloc.h>
++
++#include "dm-snapshot.h"
++#include "kcopyd.h"
++
++/*
++ * FIXME: Remove this before release.
++ */
++#if 0
++#define DMDEBUG(x...) DMWARN( ## x)
++#else
++#define DMDEBUG(x...)
++#endif
++
++/*
++ * The percentage increment we will wake up users at
++ */
++#define WAKE_UP_PERCENT 5
++
++/*
++ * kcopyd priority of snapshot operations
++ */
++#define SNAPSHOT_COPY_PRIORITY 2
++
++struct pending_exception {
++	struct exception e;
++
++	/*
++	 * Origin buffers waiting for this to complete are held
++	 * in a list (using b_reqnext).
++	 */
++	struct buffer_head *origin_bhs;
++	struct buffer_head *snapshot_bhs;
++
++	/*
++	 * Other pending_exceptions that are processing this
++	 * chunk.  When this list is empty, we know we can
++	 * complete the origins.
++	 */
++	struct list_head siblings;
++
++	/* Pointer back to snapshot context */
++	struct dm_snapshot *snap;
++
++	/*
++	 * 1 indicates the exception has already been sent to
++	 * kcopyd.
++	 */
++	int started;
++};
++
++/*
++ * Hash table mapping origin volumes to lists of snapshots and
++ * a lock to protect it
++ */
++static kmem_cache_t *exception_cache;
++static kmem_cache_t *pending_cache;
++static mempool_t *pending_pool;
++
++/*
++ * One of these per registered origin, held in the snapshot_origins hash
++ */
++struct origin {
++	/* The origin device */
++	kdev_t dev;
++
++	struct list_head hash_list;
++
++	/* List of snapshots for this origin */
++	struct list_head snapshots;
++};
++
++/*
++ * Size of the hash table for origin volumes. If we make this
++ * the size of the minors list then it should be nearly perfect
++ */
++#define ORIGIN_HASH_SIZE 256
++#define ORIGIN_MASK      0xFF
++static struct list_head *_origins;
++static struct rw_semaphore _origins_lock;
++
++static int init_origin_hash(void)
++{
++	int i;
++
++	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
++			   GFP_KERNEL);
++	if (!_origins) {
++		DMERR("Device mapper: Snapshot: unable to allocate memory");
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
++		INIT_LIST_HEAD(_origins + i);
++	init_rwsem(&_origins_lock);
++
++	return 0;
++}
++
++static void exit_origin_hash(void)
++{
++	kfree(_origins);
++}
++
++static inline unsigned int origin_hash(kdev_t dev)
++{
++	return MINOR(dev) & ORIGIN_MASK;
++}
++
++static struct origin *__lookup_origin(kdev_t origin)
++{
++	struct list_head *slist;
++	struct list_head *ol;
++	struct origin *o;
++
++	ol = &_origins[origin_hash(origin)];
++	list_for_each(slist, ol) {
++		o = list_entry(slist, struct origin, hash_list);
++
++		if (o->dev == origin)
++			return o;
++	}
++
++	return NULL;
++}
++
++static void __insert_origin(struct origin *o)
++{
++	struct list_head *sl = &_origins[origin_hash(o->dev)];
++	list_add_tail(&o->hash_list, sl);
++}
++
++/*
++ * Make a note of the snapshot and its origin so we can look it
++ * up when the origin has a write on it.
++ */
++static int register_snapshot(struct dm_snapshot *snap)
++{
++	struct origin *o;
++	kdev_t dev = snap->origin->dev;
++
++	down_write(&_origins_lock);
++	o = __lookup_origin(dev);
++
++	if (!o) {
++		/* New origin */
++		o = kmalloc(sizeof(*o), GFP_KERNEL);
++		if (!o) {
++			up_write(&_origins_lock);
++			return -ENOMEM;
++		}
++
++		/* Initialise the struct */
++		INIT_LIST_HEAD(&o->snapshots);
++		o->dev = dev;
++
++		__insert_origin(o);
++	}
++
++	list_add_tail(&snap->list, &o->snapshots);
++
++	up_write(&_origins_lock);
++	return 0;
++}
++
++static void unregister_snapshot(struct dm_snapshot *s)
++{
++	struct origin *o;
++
++	down_write(&_origins_lock);
++	o = __lookup_origin(s->origin->dev);
++
++	list_del(&s->list);
++	if (list_empty(&o->snapshots)) {
++		list_del(&o->hash_list);
++		kfree(o);
++	}
++
++	up_write(&_origins_lock);
++}
++
++/*
++ * Implementation of the exception hash tables.
++ */
++static int init_exception_table(struct exception_table *et, uint32_t size)
++{
++	int i;
++
++	et->hash_mask = size - 1;
++	et->table = vcalloc(size, sizeof(struct list_head));
++	if (!et->table)
++		return -ENOMEM;
++
++	for (i = 0; i < size; i++)
++		INIT_LIST_HEAD(et->table + i);
++
++	return 0;
++}
++
++static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
++{
++	struct list_head *slot, *entry, *temp;
++	struct exception *ex;
++	int i, size;
++
++	size = et->hash_mask + 1;
++	for (i = 0; i < size; i++) {
++		slot = et->table + i;
++
++		list_for_each_safe(entry, temp, slot) {
++			ex = list_entry(entry, struct exception, hash_list);
++			kmem_cache_free(mem, ex);
++		}
++	}
++
++	vfree(et->table);
++}
++
++/*
++ * FIXME: check how this hash fn is performing.
++ */
++static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
++{
++	return chunk & et->hash_mask;
++}
++
++static void insert_exception(struct exception_table *eh, struct exception *e)
++{
++	struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
++	list_add(&e->hash_list, l);
++}
++
++static inline void remove_exception(struct exception *e)
++{
++	list_del(&e->hash_list);
++}
++
++/*
++ * Return the exception data for a sector, or NULL if not
++ * remapped.
++ */
++static struct exception *lookup_exception(struct exception_table *et,
++					  chunk_t chunk)
++{
++	struct list_head *slot, *el;
++	struct exception *e;
++
++	slot = &et->table[exception_hash(et, chunk)];
++	list_for_each(el, slot) {
++		e = list_entry(el, struct exception, hash_list);
++		if (e->old_chunk == chunk)
++			return e;
++	}
++
++	return NULL;
++}
++
++static inline struct exception *alloc_exception(void)
++{
++	struct exception *e;
++
++	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
++	if (!e)
++		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
++
++	return e;
++}
++
++static inline void free_exception(struct exception *e)
++{
++	kmem_cache_free(exception_cache, e);
++}
++
++static inline struct pending_exception *alloc_pending_exception(void)
++{
++	return mempool_alloc(pending_pool, GFP_NOIO);
++}
++
++static inline void free_pending_exception(struct pending_exception *pe)
++{
++	mempool_free(pe, pending_pool);
++}
++
++int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
++{
++	struct exception *e;
++
++	e = alloc_exception();
++	if (!e)
++		return -ENOMEM;
++
++	e->old_chunk = old;
++	e->new_chunk = new;
++	insert_exception(&s->complete, e);
++	return 0;
++}
++
++/*
++ * Hard coded magic.
++ */
++static int calc_max_buckets(void)
++{
++	unsigned long mem;
++
++	mem = num_physpages << PAGE_SHIFT;
++	mem /= 50;
++	mem /= sizeof(struct list_head);
++
++	return mem;
++}
++
++/*
++ * Rounds a number down to a power of 2.
++ */
++static inline uint32_t round_down(uint32_t n)
++{
++	while (n & (n - 1))
++		n &= (n - 1);
++	return n;
++}
++
++/*
++ * Allocate room for a suitable hash table.
++ */
++static int init_hash_tables(struct dm_snapshot *s)
++{
++	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
++
++	/*
++	 * Calculate based on the size of the original volume or
++	 * the COW volume...
++	 */
++	cow_dev_size = get_dev_size(s->cow->dev);
++	origin_dev_size = get_dev_size(s->origin->dev);
++	max_buckets = calc_max_buckets();
++
++	hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size;
++	hash_size = min(hash_size, max_buckets);
++
++	/* Round it down to a power of 2 */
++	hash_size = round_down(hash_size);
++	if (init_exception_table(&s->complete, hash_size))
++		return -ENOMEM;
++
++	/*
++	 * Allocate hash table for in-flight exceptions
++	 * Make this smaller than the real hash table
++	 */
++	hash_size >>= 3;
++	if (!hash_size)
++		hash_size = 64;
++
++	if (init_exception_table(&s->pending, hash_size)) {
++		exit_exception_table(&s->complete, exception_cache);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++/*
++ * Round a number up to the nearest 'size' boundary.  size must
++ * be a power of 2.
++ */
++static inline ulong round_up(ulong n, ulong size)
++{
++	size--;
++	return (n + size) & ~size;
++}
++
++/*
++ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
++ */
++static int snapshot_ctr(struct dm_target *ti, int argc, char **argv)
++{
++	struct dm_snapshot *s;
++	unsigned long chunk_size;
++	int r = -EINVAL;
++	char persistent;
++	char *origin_path;
++	char *cow_path;
++	char *value;
++	int blocksize;
++
++	if (argc < 4) {
++		ti->error = "dm-snapshot: requires exactly 4 arguments";
++		r = -EINVAL;
++		goto bad;
++	}
++
++	origin_path = argv[0];
++	cow_path = argv[1];
++	persistent = toupper(*argv[2]);
++
++	if (persistent != 'P' && persistent != 'N') {
++		ti->error = "Persistent flag is not P or N";
++		r = -EINVAL;
++		goto bad;
++	}
++
++	chunk_size = simple_strtoul(argv[3], &value, 10);
++	if (chunk_size == 0 || value == NULL) {
++		ti->error = "Invalid chunk size";
++		r = -EINVAL;
++		goto bad;
++	}
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
++	if (s == NULL) {
++		ti->error = "Cannot allocate snapshot context private "
++		    "structure";
++		r = -ENOMEM;
++		goto bad;
++	}
++
++	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
++	if (r) {
++		ti->error = "Cannot get origin device";
++		goto bad_free;
++	}
++
++	/* FIXME: get cow length */
++	r = dm_get_device(ti, cow_path, 0, 0,
++			  FMODE_READ | FMODE_WRITE, &s->cow);
++	if (r) {
++		dm_put_device(ti, s->origin);
++		ti->error = "Cannot get COW device";
++		goto bad_free;
++	}
++
++	/*
++	 * Chunk size must be multiple of page size.  Silently
++	 * round up if it's not.
++	 */
++	chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE);
++
++	/* Validate the chunk size against the device block size */
++	blocksize = get_hardsect_size(s->cow->dev);
++	if (chunk_size % (blocksize / SECTOR_SIZE)) {
++		ti->error = "Chunk size is not a multiple of device blocksize";
++		r = -EINVAL;
++		goto bad_putdev;
++	}
++
++	/* Check the sizes are small enough to fit in one kiovec */
++	if (chunk_size > KIO_MAX_SECTORS) {
++		ti->error = "Chunk size is too big";
++		r = -EINVAL;
++		goto bad_putdev;
++	}
++
++	/* Check chunk_size is a power of 2 */
++	if (chunk_size & (chunk_size - 1)) {
++		ti->error = "Chunk size is not a power of 2";
++		r = -EINVAL;
++		goto bad_putdev;
++	}
++
++	s->chunk_size = chunk_size;
++	s->chunk_mask = chunk_size - 1;
++	s->type = persistent;
++	for (s->chunk_shift = 0; chunk_size;
++	     s->chunk_shift++, chunk_size >>= 1)
++		;
++	s->chunk_shift--;
++
++	s->valid = 1;
++	s->last_percent = 0;
++	init_rwsem(&s->lock);
++	s->table = ti->table;
++
++	/* Allocate hash table for COW data */
++	if (init_hash_tables(s)) {
++		ti->error = "Unable to allocate hash table space";
++		r = -ENOMEM;
++		goto bad_putdev;
++	}
++
++	/*
++	 * Check the persistent flag - done here because we need the iobuf
++	 * to check the LV header
++	 */
++	s->store.snap = s;
++
++	if (persistent == 'P')
++		r = dm_create_persistent(&s->store, s->chunk_size);
++	else
++		r = dm_create_transient(&s->store, s, blocksize);
++
++	if (r) {
++		ti->error = "Couldn't create exception store";
++		r = -EINVAL;
++		goto bad_free1;
++	}
++
++	/* Flush IO to the origin device */
++#if LVM_VFS_ENHANCEMENT
++	fsync_dev_lockfs(s->origin->dev);
++#else
++	fsync_dev(s->origin->dev);
++#endif
++
++	/* Add snapshot to the list of snapshots for this origin */
++	if (register_snapshot(s)) {
++		r = -EINVAL;
++		ti->error = "Cannot register snapshot origin";
++		goto bad_free2;
++	}
++#if LVM_VFS_ENHANCEMENT
++	unlockfs(s->origin->dev);
++#endif
++	kcopyd_inc_client_count();
++
++	ti->private = s;
++	return 0;
++
++      bad_free2:
++#if LVM_VFS_ENHANCEMENT
++	unlockfs(s->origin->dev);
++#endif
++	s->store.destroy(&s->store);
++
++      bad_free1:
++	exit_exception_table(&s->pending, pending_cache);
++	exit_exception_table(&s->complete, exception_cache);
++
++      bad_putdev:
++	dm_put_device(ti, s->cow);
++	dm_put_device(ti, s->origin);
++
++      bad_free:
++	kfree(s);
++
++      bad:
++	return r;
++}
++
++static void snapshot_dtr(struct dm_target *ti)
++{
++	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
++
++	dm_table_event(ti->table);
++
++	unregister_snapshot(s);
++
++	exit_exception_table(&s->pending, pending_cache);
++	exit_exception_table(&s->complete, exception_cache);
++
++	/* Deallocate memory used */
++	s->store.destroy(&s->store);
++
++	dm_put_device(ti, s->origin);
++	dm_put_device(ti, s->cow);
++	kfree(s);
++
++	kcopyd_dec_client_count();
++}
++
++/*
++ * We hold lists of buffer_heads, using the b_reqnext field.
++ */
++static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh)
++{
++	bh->b_reqnext = *queue;
++	*queue = bh;
++}
++
++/*
++ * Flush a list of buffers.
++ */
++static void flush_buffers(struct buffer_head *bh)
++{
++	struct buffer_head *n;
++
++	DMDEBUG("begin flush");
++	while (bh) {
++		n = bh->b_reqnext;
++		bh->b_reqnext = NULL;
++		DMDEBUG("flushing %p", bh);
++		generic_make_request(WRITE, bh);
++		bh = n;
++	}
++
++	run_task_queue(&tq_disk);
++}
++
++/*
++ * Error a list of buffers.
++ */
++static void error_buffers(struct buffer_head *bh)
++{
++	struct buffer_head *n;
++
++	while (bh) {
++		n = bh->b_reqnext;
++		bh->b_reqnext = NULL;
++		buffer_IO_error(bh);
++		bh = n;
++	}
++}
++
++static void pending_complete(struct pending_exception *pe, int success)
++{
++	struct exception *e;
++	struct dm_snapshot *s = pe->snap;
++
++	if (success) {
++		e = alloc_exception();
++		if (!e) {
++			printk("Unable to allocate exception.");
++			down_write(&s->lock);
++			s->store.drop_snapshot(&s->store);
++			s->valid = 0;
++			up_write(&s->lock);
++			return;
++		}
++
++		/*
++		 * Add a proper exception, and remove the
++		 * inflight exception from the list.
++		 */
++		down_write(&s->lock);
++
++		memcpy(e, &pe->e, sizeof(*e));
++		insert_exception(&s->complete, e);
++		remove_exception(&pe->e);
++
++		/* Submit any pending write BHs */
++		up_write(&s->lock);
++
++		flush_buffers(pe->snapshot_bhs);
++		DMDEBUG("Exception completed successfully.");
++
++		/* Notify any interested parties */
++		if (s->store.percent_full) {
++			int pc = s->store.percent_full(&s->store);
++
++			if (pc >= s->last_percent + WAKE_UP_PERCENT) {
++				dm_table_event(s->table);
++				s->last_percent = pc - pc % WAKE_UP_PERCENT;
++			}
++		}
++
++	} else {
++		/* Read/write error - snapshot is unusable */
++		DMERR("Error reading/writing snapshot");
++
++		down_write(&s->lock);
++		s->store.drop_snapshot(&s->store);
++		s->valid = 0;
++		remove_exception(&pe->e);
++		up_write(&s->lock);
++
++		error_buffers(pe->snapshot_bhs);
++
++		dm_table_event(s->table);
++		DMDEBUG("Exception failed.");
++	}
++
++	if (list_empty(&pe->siblings))
++		flush_buffers(pe->origin_bhs);
++	else
++		list_del(&pe->siblings);
++
++	free_pending_exception(pe);
++}
++
++static void commit_callback(void *context, int success)
++{
++	struct pending_exception *pe = (struct pending_exception *) context;
++	pending_complete(pe, success);
++}
++
++/*
++ * Called when the copy I/O has finished.  kcopyd actually runs
++ * this code so don't block.
++ */
++static void copy_callback(int err, void *context)
++{
++	struct pending_exception *pe = (struct pending_exception *) context;
++	struct dm_snapshot *s = pe->snap;
++
++	if (err)
++		pending_complete(pe, 0);
++
++	else
++		/* Update the metadata if we are persistent */
++		s->store.commit_exception(&s->store, &pe->e, commit_callback,
++					  pe);
++}
++
++/*
++ * Dispatches the copy operation to kcopyd.
++ */
++static inline void start_copy(struct pending_exception *pe)
++{
++	struct dm_snapshot *s = pe->snap;
++	struct kcopyd_region src, dest;
++
++	src.dev = s->origin->dev;
++	src.sector = chunk_to_sector(s, pe->e.old_chunk);
++	src.count = s->chunk_size;
++
++	dest.dev = s->cow->dev;
++	dest.sector = chunk_to_sector(s, pe->e.new_chunk);
++	dest.count = s->chunk_size;
++
++	if (!pe->started) {
++		/* Hand over to kcopyd */
++		kcopyd_copy(&src, &dest, copy_callback, pe);
++		pe->started = 1;
++	}
++}
++
++/*
++ * Looks to see if this snapshot already has a pending exception
++ * for this chunk, otherwise it allocates a new one and inserts
++ * it into the pending table.
++ */
++static struct pending_exception *find_pending_exception(struct dm_snapshot *s,
++							struct buffer_head *bh)
++{
++	struct exception *e;
++	struct pending_exception *pe;
++	chunk_t chunk = sector_to_chunk(s, bh->b_rsector);
++
++	/*
++	 * Is there a pending exception for this already ?
++	 */
++	e = lookup_exception(&s->pending, chunk);
++	if (e) {
++		/* cast the exception to a pending exception */
++		pe = list_entry(e, struct pending_exception, e);
++
++	} else {
++		/* Create a new pending exception */
++		pe = alloc_pending_exception();
++		if (!pe) {
++			DMWARN("Couldn't allocate pending exception.");
++			return NULL;
++		}
++
++		pe->e.old_chunk = chunk;
++		pe->origin_bhs = pe->snapshot_bhs = NULL;
++		INIT_LIST_HEAD(&pe->siblings);
++		pe->snap = s;
++		pe->started = 0;
++
++		if (s->store.prepare_exception(&s->store, &pe->e)) {
++			free_pending_exception(pe);
++			s->valid = 0;
++			return NULL;
++		}
++
++		insert_exception(&s->pending, &pe->e);
++	}
++
++	return pe;
++}
++
++static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
++				   struct buffer_head *bh)
++{
++	bh->b_rdev = s->cow->dev;
++	bh->b_rsector = chunk_to_sector(s, e->new_chunk) +
++	    (bh->b_rsector & s->chunk_mask);
++}
++
++static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++			void **map_context)
++{
++	struct exception *e;
++	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
++	int r = 1;
++	chunk_t chunk;
++	struct pending_exception *pe;
++
++	chunk = sector_to_chunk(s, bh->b_rsector);
++
++	/* Full snapshots are not usable */
++	if (!s->valid)
++		return -1;
++
++	/*
++	 * Write to snapshot - higher level takes care of RW/RO
++	 * flags so we should only get this if we are
++	 * writeable.
++	 */
++	if (rw == WRITE) {
++
++		down_write(&s->lock);
++
++		/* If the block is already remapped - use that, else remap it */
++		e = lookup_exception(&s->complete, chunk);
++		if (e)
++			remap_exception(s, e, bh);
++
++		else {
++			pe = find_pending_exception(s, bh);
++
++			if (!pe) {
++				s->store.drop_snapshot(&s->store);
++				s->valid = 0;
++			}
++
++			queue_buffer(&pe->snapshot_bhs, bh);
++			start_copy(pe);
++			r = 0;
++		}
++
++		up_write(&s->lock);
++
++	} else {
++		/*
++		 * FIXME: this read path scares me because we
++		 * always use the origin when we have a pending
++		 * exception.  However I can't think of a
++		 * situation where this is wrong - ejt.
++		 */
++
++		/* Do reads */
++		down_read(&s->lock);
++
++		/* See if it it has been remapped */
++		e = lookup_exception(&s->complete, chunk);
++		if (e)
++			remap_exception(s, e, bh);
++		else
++			bh->b_rdev = s->origin->dev;
++
++		up_read(&s->lock);
++	}
++
++	return r;
++}
++
++static void list_merge(struct list_head *l1, struct list_head *l2)
++{
++	struct list_head *l1_n, *l2_p;
++
++	l1_n = l1->next;
++	l2_p = l2->prev;
++
++	l1->next = l2;
++	l2->prev = l1;
++
++	l2_p->next = l1_n;
++	l1_n->prev = l2_p;
++}
++
++static int __origin_write(struct list_head *snapshots, struct buffer_head *bh)
++{
++	int r = 1;
++	struct list_head *sl;
++	struct dm_snapshot *snap;
++	struct exception *e;
++	struct pending_exception *pe, *last = NULL;
++	chunk_t chunk;
++
++	/* Do all the snapshots on this origin */
++	list_for_each(sl, snapshots) {
++		snap = list_entry(sl, struct dm_snapshot, list);
++
++		/* Only deal with valid snapshots */
++		if (!snap->valid)
++			continue;
++
++		down_write(&snap->lock);
++
++		/*
++		 * Remember, different snapshots can have
++		 * different chunk sizes.
++		 */
++		chunk = sector_to_chunk(snap, bh->b_rsector);
++
++		/*
++		 * Check exception table to see if block
++		 * is already remapped in this snapshot
++		 * and trigger an exception if not.
++		 */
++		e = lookup_exception(&snap->complete, chunk);
++		if (!e) {
++			pe = find_pending_exception(snap, bh);
++			if (!pe) {
++				snap->store.drop_snapshot(&snap->store);
++				snap->valid = 0;
++
++			} else {
++				if (last)
++					list_merge(&pe->siblings,
++						   &last->siblings);
++
++				last = pe;
++				r = 0;
++			}
++		}
++
++		up_write(&snap->lock);
++	}
++
++	/*
++	 * Now that we have a complete pe list we can start the copying.
++	 */
++	if (last) {
++		pe = last;
++		do {
++			down_write(&pe->snap->lock);
++			queue_buffer(&pe->origin_bhs, bh);
++			start_copy(pe);
++			up_write(&pe->snap->lock);
++			pe = list_entry(pe->siblings.next,
++					struct pending_exception, siblings);
++
++		} while (pe != last);
++	}
++
++	return r;
++}
++
++static int snapshot_status(struct dm_target *ti, status_type_t type,
++			   char *result, int maxlen)
++{
++	struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
++	char cow[16];
++	char org[16];
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		if (!snap->valid)
++			snprintf(result, maxlen, "Invalid");
++		else {
++			if (snap->store.percent_full)
++				snprintf(result, maxlen, "%d%%",
++					 snap->store.percent_full(&snap->
++								  store));
++			else
++				snprintf(result, maxlen, "Unknown");
++		}
++		break;
++
++	case STATUSTYPE_TABLE:
++		/*
++		 * kdevname returns a static pointer so we need
++		 * to make private copies if the output is to
++		 * make sense.
++		 */
++		strncpy(cow, kdevname(snap->cow->dev), sizeof(cow));
++		strncpy(org, kdevname(snap->origin->dev), sizeof(org));
++		snprintf(result, maxlen, "%s %s %c %ld", org, cow,
++			 snap->type, snap->chunk_size);
++		break;
++	}
++
++	return 0;
++}
++
++/*
++ * Called on a write from the origin driver.
++ */
++int do_origin(struct dm_dev *origin, struct buffer_head *bh)
++{
++	struct origin *o;
++	int r;
++
++	down_read(&_origins_lock);
++	o = __lookup_origin(origin->dev);
++	if (!o)
++		BUG();
++
++	r = __origin_write(&o->snapshots, bh);
++	up_read(&_origins_lock);
++
++	return r;
++}
++
++/*
++ * Origin: maps a linear range of a device, with hooks for snapshotting.
++ */
++
++/*
++ * Construct an origin mapping: <dev_path>
++ * The context for an origin is merely a 'struct dm_dev *'
++ * pointing to the real device.
++ */
++static int origin_ctr(struct dm_target *ti, int argc, char **argv)
++{
++	int r;
++	struct dm_dev *dev;
++
++	if (argc != 1) {
++		ti->error = "dm-origin: incorrect number of arguments";
++		return -EINVAL;
++	}
++
++	r = dm_get_device(ti, argv[0], 0, ti->len,
++			  dm_table_get_mode(ti->table), &dev);
++	if (r) {
++		ti->error = "Cannot get target device";
++		return r;
++	}
++
++	ti->private = dev;
++
++	return 0;
++}
++
++static void origin_dtr(struct dm_target *ti)
++{
++	struct dm_dev *dev = (struct dm_dev *) ti->private;
++	dm_put_device(ti, dev);
++}
++
++static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++		      void **map_context)
++{
++	struct dm_dev *dev = (struct dm_dev *) ti->private;
++	bh->b_rdev = dev->dev;
++
++	/* Only tell snapshots if this is a write */
++	return (rw == WRITE) ? do_origin(dev, bh) : 1;
++}
++
++static int origin_status(struct dm_target *ti, status_type_t type, char *result,
++			 int maxlen)
++{
++	struct dm_dev *dev = (struct dm_dev *) ti->private;
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		result[0] = '\0';
++		break;
++
++	case STATUSTYPE_TABLE:
++		snprintf(result, maxlen, "%s", kdevname(dev->dev));
++		break;
++	}
++
++	return 0;
++}
++
++static struct target_type origin_target = {
++	name:	"snapshot-origin",
++	module:	THIS_MODULE,
++	ctr:	origin_ctr,
++	dtr:	origin_dtr,
++	map:	origin_map,
++	status:	origin_status,
++};
++
++static struct target_type snapshot_target = {
++	name:	"snapshot",
++	module:	THIS_MODULE,
++	ctr:	snapshot_ctr,
++	dtr:	snapshot_dtr,
++	map:	snapshot_map,
++	status:	snapshot_status,
++};
++
++int __init dm_snapshot_init(void)
++{
++	int r;
++
++	r = dm_register_target(&snapshot_target);
++	if (r) {
++		DMERR("snapshot target register failed %d", r);
++		return r;
++	}
++
++	r = dm_register_target(&origin_target);
++	if (r < 0) {
++		DMERR("Device mapper: Origin: register failed %d\n", r);
++		goto bad1;
++	}
++
++	r = init_origin_hash();
++	if (r) {
++		DMERR("init_origin_hash failed.");
++		goto bad2;
++	}
++
++	exception_cache = kmem_cache_create("dm-snapshot-ex",
++					    sizeof(struct exception),
++					    __alignof__(struct exception),
++					    0, NULL, NULL);
++	if (!exception_cache) {
++		DMERR("Couldn't create exception cache.");
++		r = -ENOMEM;
++		goto bad3;
++	}
++
++	pending_cache =
++	    kmem_cache_create("dm-snapshot-in",
++			      sizeof(struct pending_exception),
++			      __alignof__(struct pending_exception),
++			      0, NULL, NULL);
++	if (!pending_cache) {
++		DMERR("Couldn't create pending cache.");
++		r = -ENOMEM;
++		goto bad4;
++	}
++
++	pending_pool = mempool_create(128, mempool_alloc_slab,
++				      mempool_free_slab, pending_cache);
++	if (!pending_pool) {
++		DMERR("Couldn't create pending pool.");
++		r = -ENOMEM;
++		goto bad5;
++	}
++
++	return 0;
++
++      bad5:
++	kmem_cache_destroy(pending_cache);
++      bad4:
++	kmem_cache_destroy(exception_cache);
++      bad3:
++	exit_origin_hash();
++      bad2:
++	dm_unregister_target(&origin_target);
++      bad1:
++	dm_unregister_target(&snapshot_target);
++	return r;
++}
++
++void dm_snapshot_exit(void)
++{
++	int r;
++
++	r = dm_unregister_target(&snapshot_target);
++	if (r)
++		DMERR("snapshot unregister failed %d", r);
++
++	r = dm_unregister_target(&origin_target);
++	if (r)
++		DMERR("origin unregister failed %d", r);
++
++	exit_origin_hash();
++	mempool_destroy(pending_pool);
++	kmem_cache_destroy(pending_cache);
++	kmem_cache_destroy(exception_cache);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.20/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h
+--- linux-2.4.20/drivers/md/dm-snapshot.h	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.h	Wed Mar 26 12:53:19 2003
+@@ -0,0 +1,147 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#ifndef DM_SNAPSHOT_H
++#define DM_SNAPSHOT_H
++
++#include "dm.h"
++#include <linux/blkdev.h>
++
++struct exception_table {
++	uint32_t hash_mask;
++	struct list_head *table;
++};
++
++/*
++ * The snapshot code deals with largish chunks of the disk at a
++ * time. Typically 64k - 256k.
++ */
++/* FIXME: can we get away with limiting these to a uint32_t ? */
++typedef sector_t chunk_t;
++
++/*
++ * An exception is used where an old chunk of data has been
++ * replaced by a new one.
++ */
++struct exception {
++	struct list_head hash_list;
++
++	chunk_t old_chunk;
++	chunk_t new_chunk;
++};
++
++/*
++ * Abstraction to handle the meta/layout of exception stores (the
++ * COW device).
++ */
++struct exception_store {
++
++	/*
++	 * Destroys this object when you've finished with it.
++	 */
++	void (*destroy) (struct exception_store *store);
++
++	/*
++	 * Find somewhere to store the next exception.
++	 */
++	int (*prepare_exception) (struct exception_store *store,
++				  struct exception *e);
++
++	/*
++	 * Update the metadata with this exception.
++	 */
++	void (*commit_exception) (struct exception_store *store,
++				  struct exception *e,
++				  void (*callback) (void *, int success),
++				  void *callback_context);
++
++	/*
++	 * The snapshot is invalid, note this in the metadata.
++	 */
++	void (*drop_snapshot) (struct exception_store *store);
++
++	/*
++	 * Return the %age full of the snapshot
++	 */
++	int (*percent_full) (struct exception_store *store);
++
++	struct dm_snapshot *snap;
++	void *context;
++};
++
++struct dm_snapshot {
++	struct rw_semaphore lock;
++	struct dm_table *table;
++
++	struct dm_dev *origin;
++	struct dm_dev *cow;
++
++	/* List of snapshots per Origin */
++	struct list_head list;
++
++	/* Size of data blocks saved - must be a power of 2 */
++	chunk_t chunk_size;
++	chunk_t chunk_mask;
++	chunk_t chunk_shift;
++
++	/* You can't use a snapshot if this is 0 (e.g. if full) */
++	int valid;
++
++	/* Used for display of table */
++	char type;
++
++	/* The last percentage we notified */
++	int last_percent;
++
++	struct exception_table pending;
++	struct exception_table complete;
++
++	/* The on disk metadata handler */
++	struct exception_store store;
++};
++
++/*
++ * Used by the exception stores to load exceptions hen
++ * initialising.
++ */
++int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
++
++/*
++ * Constructor and destructor for the default persistent
++ * store.
++ */
++int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
++
++int dm_create_transient(struct exception_store *store,
++			struct dm_snapshot *s, int blocksize);
++
++/*
++ * Return the number of sectors in the device.
++ */
++static inline sector_t get_dev_size(kdev_t dev)
++{
++	int *sizes;
++
++	sizes = blk_size[MAJOR(dev)];
++	if (sizes)
++		return sizes[MINOR(dev)] << 1;
++
++	return 0;
++}
++
++static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
++{
++	return (sector & ~s->chunk_mask) >> s->chunk_shift;
++}
++
++static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
++{
++	return chunk << s->chunk_shift;
++}
++
++#endif
+diff -ruN linux-2.4.20/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c
+--- linux-2.4.20/drivers/md/dm-stripe.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-stripe.c	Wed Mar 26 14:07:57 2003
+@@ -0,0 +1,257 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++
++struct stripe {
++	struct dm_dev *dev;
++	sector_t physical_start;
++};
++
++struct stripe_c {
++	uint32_t stripes;
++
++	/* The size of this target / num. stripes */
++	uint32_t stripe_width;
++
++	/* stripe chunk size */
++	uint32_t chunk_shift;
++	sector_t chunk_mask;
++
++	struct stripe stripe[0];
++};
++
++static inline struct stripe_c *alloc_context(int stripes)
++{
++	size_t len;
++
++	if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
++			  stripes))
++		return NULL;
++
++	len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
++
++	return kmalloc(len, GFP_KERNEL);
++}
++
++/*
++ * Parse a single <dev> <sector> pair
++ */
++static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
++		      int stripe, char **argv)
++{
++	sector_t start;
++
++	if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
++		return -EINVAL;
++
++	if (dm_get_device(ti, argv[0], start, sc->stripe_width,
++			  dm_table_get_mode(ti->table),
++			  &sc->stripe[stripe].dev))
++		return -ENXIO;
++
++	sc->stripe[stripe].physical_start = start;
++	return 0;
++}
++
++/*
++ * FIXME: Nasty function, only present because we can't link
++ * against __moddi3 and __divdi3.
++ *
++ * returns a == b * n
++ */
++static int multiple(sector_t a, sector_t b, sector_t *n)
++{
++	sector_t acc, prev, i;
++
++	*n = 0;
++	while (a >= b) {
++		for (acc = b, prev = 0, i = 1;
++		     acc <= a;
++		     prev = acc, acc <<= 1, i <<= 1)
++			;
++
++		a -= prev;
++		*n += i >> 1;
++	}
++
++	return a == 0;
++}
++
++/*
++ * Construct a striped mapping.
++ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
++ */
++static int stripe_ctr(struct dm_target *ti, int argc, char **argv)
++{
++	struct stripe_c *sc;
++	sector_t width;
++	uint32_t stripes;
++	uint32_t chunk_size;
++	char *end;
++	int r, i;
++
++	if (argc < 2) {
++		ti->error = "dm-stripe: Not enough arguments";
++		return -EINVAL;
++	}
++
++	stripes = simple_strtoul(argv[0], &end, 10);
++	if (*end) {
++		ti->error = "dm-stripe: Invalid stripe count";
++		return -EINVAL;
++	}
++
++	chunk_size = simple_strtoul(argv[1], &end, 10);
++	if (*end) {
++		ti->error = "dm-stripe: Invalid chunk_size";
++		return -EINVAL;
++	}
++
++	/*
++	 * chunk_size is a power of two
++	 */
++	if (!chunk_size || (chunk_size & (chunk_size - 1))) {
++		ti->error = "dm-stripe: Invalid chunk size";
++		return -EINVAL;
++	}
++
++	if (!multiple(ti->len, stripes, &width)) {
++		ti->error = "dm-stripe: Target length not divisable by "
++		    "number of stripes";
++		return -EINVAL;
++	}
++
++	/*
++	 * Do we have enough arguments for that many stripes ?
++	 */
++	if (argc != (2 + 2 * stripes)) {
++		ti->error = "dm-stripe: Not enough destinations specified";
++		return -EINVAL;
++	}
++
++	sc = alloc_context(stripes);
++	if (!sc) {
++		ti->error = "dm-stripe: Memory allocation for striped context "
++		    "failed";
++		return -ENOMEM;
++	}
++
++	sc->stripes = stripes;
++	sc->stripe_width = width;
++
++	sc->chunk_mask = ((sector_t) chunk_size) - 1;
++	for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
++		chunk_size >>= 1;
++	sc->chunk_shift--;
++
++	/*
++	 * Get the stripe destinations.
++	 */
++	for (i = 0; i < stripes; i++) {
++		argv += 2;
++
++		r = get_stripe(ti, sc, i, argv);
++		if (r < 0) {
++			ti->error = "dm-stripe: Couldn't parse stripe "
++			    "destination";
++			while (i--)
++				dm_put_device(ti, sc->stripe[i].dev);
++			kfree(sc);
++			return r;
++		}
++	}
++
++	ti->private = sc;
++	return 0;
++}
++
++static void stripe_dtr(struct dm_target *ti)
++{
++	unsigned int i;
++	struct stripe_c *sc = (struct stripe_c *) ti->private;
++
++	for (i = 0; i < sc->stripes; i++)
++		dm_put_device(ti, sc->stripe[i].dev);
++
++	kfree(sc);
++}
++
++static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++		      void **context)
++{
++	struct stripe_c *sc = (struct stripe_c *) ti->private;
++
++	sector_t offset = bh->b_rsector - ti->begin;
++	uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
++	uint32_t stripe = chunk % sc->stripes;	/* 32bit modulus */
++	chunk = chunk / sc->stripes;
++
++	bh->b_rdev = sc->stripe[stripe].dev->dev;
++	bh->b_rsector = sc->stripe[stripe].physical_start +
++	    (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
++	return 1;
++}
++
++static int stripe_status(struct dm_target *ti,
++			 status_type_t type, char *result, int maxlen)
++{
++	struct stripe_c *sc = (struct stripe_c *) ti->private;
++	int offset;
++	int i;
++
++	switch (type) {
++	case STATUSTYPE_INFO:
++		result[0] = '\0';
++		break;
++
++	case STATUSTYPE_TABLE:
++		offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT,
++				  sc->stripes, sc->chunk_mask + 1);
++		for (i = 0; i < sc->stripes; i++) {
++			offset +=
++			    snprintf(result + offset, maxlen - offset,
++				     " %s " SECTOR_FORMAT,
++		       kdevname(to_kdev_t(sc->stripe[i].dev->bdev->bd_dev)),
++				     sc->stripe[i].physical_start);
++		}
++		break;
++	}
++	return 0;
++}
++
++static struct target_type stripe_target = {
++	.name   = "striped",
++	.module = THIS_MODULE,
++	.ctr    = stripe_ctr,
++	.dtr    = stripe_dtr,
++	.map    = stripe_map,
++	.status = stripe_status,
++};
++
++int __init dm_stripe_init(void)
++{
++	int r;
++
++	r = dm_register_target(&stripe_target);
++	if (r < 0)
++		DMWARN("striped target registration failed");
++
++	return r;
++}
++
++void dm_stripe_exit(void)
++{
++	if (dm_unregister_target(&stripe_target))
++		DMWARN("striped target unregistration failed");
++
++	return;
++}
+diff -ruN linux-2.4.20/drivers/md/dm-table.c linux/drivers/md/dm-table.c
+--- linux-2.4.20/drivers/md/dm-table.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-table.c	Wed Mar 26 14:09:13 2003
+@@ -0,0 +1,666 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/blkdev.h>
++#include <linux/ctype.h>
++#include <linux/slab.h>
++#include <asm/atomic.h>
++
++#define MAX_DEPTH 16
++#define NODE_SIZE L1_CACHE_BYTES
++#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
++#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
++
++struct dm_table {
++	atomic_t holders;
++
++	/* btree table */
++	int depth;
++	int counts[MAX_DEPTH];	/* in nodes */
++	sector_t *index[MAX_DEPTH];
++
++	int num_targets;
++	int num_allocated;
++	sector_t *highs;
++	struct dm_target *targets;
++
++	/*
++	 * Indicates the rw permissions for the new logical
++	 * device.  This should be a combination of FMODE_READ
++	 * and FMODE_WRITE.
++	 */
++	int mode;
++
++	/* a list of devices used by this table */
++	struct list_head devices;
++
++	/*
++	 * A waitqueue for processes waiting for something
++	 * interesting to happen to this table.
++	 */
++	wait_queue_head_t eventq;
++};
++
++/*
++ * Ceiling(n / size)
++ */
++static inline unsigned long div_up(unsigned long n, unsigned long size)
++{
++	return dm_round_up(n, size) / size;
++}
++
++/*
++ * Similar to ceiling(log_size(n))
++ */
++static unsigned int int_log(unsigned long n, unsigned long base)
++{
++	int result = 0;
++
++	while (n > 1) {
++		n = div_up(n, base);
++		result++;
++	}
++
++	return result;
++}
++
++/*
++ * Calculate the index of the child node of the n'th node k'th key.
++ */
++static inline int get_child(int n, int k)
++{
++	return (n * CHILDREN_PER_NODE) + k;
++}
++
++/*
++ * Return the n'th node of level l from table t.
++ */
++static inline sector_t *get_node(struct dm_table *t, int l, int n)
++{
++	return t->index[l] + (n * KEYS_PER_NODE);
++}
++
++/*
++ * Return the highest key that you could lookup from the n'th
++ * node on level l of the btree.
++ */
++static sector_t high(struct dm_table *t, int l, int n)
++{
++	for (; l < t->depth - 1; l++)
++		n = get_child(n, CHILDREN_PER_NODE - 1);
++
++	if (n >= t->counts[l])
++		return (sector_t) - 1;
++
++	return get_node(t, l, n)[KEYS_PER_NODE - 1];
++}
++
++/*
++ * Fills in a level of the btree based on the highs of the level
++ * below it.
++ */
++static int setup_btree_index(int l, struct dm_table *t)
++{
++	int n, k;
++	sector_t *node;
++
++	for (n = 0; n < t->counts[l]; n++) {
++		node = get_node(t, l, n);
++
++		for (k = 0; k < KEYS_PER_NODE; k++)
++			node[k] = high(t, l + 1, get_child(n, k));
++	}
++
++	return 0;
++}
++
++/*
++ * highs, and targets are managed as dynamic arrays during a
++ * table load.
++ */
++static int alloc_targets(struct dm_table *t, int num)
++{
++	sector_t *n_highs;
++	struct dm_target *n_targets;
++	int n = t->num_targets;
++
++	/*
++	 * Allocate both the target array and offset array at once.
++	 */
++	n_highs = (sector_t *) vcalloc(sizeof(struct dm_target) +
++				       sizeof(sector_t), num);
++	if (!n_highs)
++		return -ENOMEM;
++
++	n_targets = (struct dm_target *) (n_highs + num);
++
++	if (n) {
++		memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
++		memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
++	}
++
++	memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
++	vfree(t->highs);
++
++	t->num_allocated = num;
++	t->highs = n_highs;
++	t->targets = n_targets;
++
++	return 0;
++}
++
++int dm_table_create(struct dm_table **result, int mode)
++{
++	struct dm_table *t = kmalloc(sizeof(*t), GFP_NOIO);
++
++	if (!t)
++		return -ENOMEM;
++
++	memset(t, 0, sizeof(*t));
++	INIT_LIST_HEAD(&t->devices);
++	atomic_set(&t->holders, 1);
++
++	/* allocate a single nodes worth of targets to begin with */
++	if (alloc_targets(t, KEYS_PER_NODE)) {
++		kfree(t);
++		t = NULL;
++		return -ENOMEM;
++	}
++
++	init_waitqueue_head(&t->eventq);
++	t->mode = mode;
++	*result = t;
++	return 0;
++}
++
++static void free_devices(struct list_head *devices)
++{
++	struct list_head *tmp, *next;
++
++	for (tmp = devices->next; tmp != devices; tmp = next) {
++		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++		next = tmp->next;
++		kfree(dd);
++	}
++}
++
++void table_destroy(struct dm_table *t)
++{
++	int i;
++
++	/* destroying the table counts as an event */
++	dm_table_event(t);
++
++	/* free the indexes (see dm_table_complete) */
++	if (t->depth >= 2)
++		vfree(t->index[t->depth - 2]);
++
++	/* free the targets */
++	for (i = 0; i < t->num_targets; i++) {
++		struct dm_target *tgt = t->targets + i;
++
++		if (tgt->type->dtr)
++			tgt->type->dtr(tgt);
++
++		dm_put_target_type(tgt->type);
++	}
++
++	vfree(t->highs);
++
++	/* free the device list */
++	if (t->devices.next != &t->devices) {
++		DMWARN("devices still present during destroy: "
++		       "dm_table_remove_device calls missing");
++
++		free_devices(&t->devices);
++	}
++
++	kfree(t);
++}
++
++void dm_table_get(struct dm_table *t)
++{
++	atomic_inc(&t->holders);
++}
++
++void dm_table_put(struct dm_table *t)
++{
++	if (atomic_dec_and_test(&t->holders))
++		table_destroy(t);
++}
++
++/*
++ * Checks to see if we need to extend highs or targets.
++ */
++static inline int check_space(struct dm_table *t)
++{
++	if (t->num_targets >= t->num_allocated)
++		return alloc_targets(t, t->num_allocated * 2);
++
++	return 0;
++}
++
++/*
++ * Convert a device path to a dev_t.
++ */
++static int lookup_device(const char *path, kdev_t *dev)
++{
++	int r;
++	struct nameidata nd;
++	struct inode *inode;
++
++	if (!path_init(path, LOOKUP_FOLLOW, &nd))
++		return 0;
++
++	if ((r = path_walk(path, &nd)))
++		goto out;
++
++	inode = nd.dentry->d_inode;
++	if (!inode) {
++		r = -ENOENT;
++		goto out;
++	}
++
++	if (!S_ISBLK(inode->i_mode)) {
++		r = -ENOTBLK;
++		goto out;
++	}
++
++	*dev = inode->i_rdev;
++
++      out:
++	path_release(&nd);
++	return r;
++}
++
++/*
++ * See if we've already got a device in the list.
++ */
++static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
++{
++	struct list_head *tmp;
++
++	list_for_each(tmp, l) {
++		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++		if (kdev_same(dd->dev, dev))
++			return dd;
++	}
++
++	return NULL;
++}
++
++/*
++ * Open a device so we can use it as a map destination.
++ */
++static int open_dev(struct dm_dev *dd)
++{
++	if (dd->bdev)
++		BUG();
++
++	dd->bdev = bdget(kdev_t_to_nr(dd->dev));
++	if (!dd->bdev)
++		return -ENOMEM;
++
++	return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW);
++}
++
++/*
++ * Close a device that we've been using.
++ */
++static void close_dev(struct dm_dev *dd)
++{
++	if (!dd->bdev)
++		return;
++
++	blkdev_put(dd->bdev, BDEV_RAW);
++	dd->bdev = NULL;
++}
++
++/*
++ * If possible (ie. blk_size[major] is set), this checks an area
++ * of a destination device is valid.
++ */
++static int check_device_area(kdev_t dev, sector_t start, sector_t len)
++{
++	int *sizes;
++	sector_t dev_size;
++
++	if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)]))
++		/* we don't know the device details,
++		 * so give the benefit of the doubt */
++		return 1;
++
++	/* convert to 512-byte sectors */
++	dev_size <<= 1;
++
++	return ((start < dev_size) && (len <= (dev_size - start)));
++}
++
++/*
++ * This upgrades the mode on an already open dm_dev.  Being
++ * careful to leave things as they were if we fail to reopen the
++ * device.
++ */
++static int upgrade_mode(struct dm_dev *dd, int new_mode)
++{
++	int r;
++	struct dm_dev dd_copy;
++
++	memcpy(&dd_copy, dd, sizeof(dd_copy));
++
++	dd->mode |= new_mode;
++	dd->bdev = NULL;
++	r = open_dev(dd);
++	if (!r)
++		close_dev(&dd_copy);
++	else
++		memcpy(dd, &dd_copy, sizeof(dd_copy));
++
++	return r;
++}
++
++/*
++ * Add a device to the list, or just increment the usage count if
++ * it's already present.
++ */
++int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
++		  sector_t len, int mode, struct dm_dev **result)
++{
++	int r;
++	kdev_t dev;
++	struct dm_dev *dd;
++	int major, minor;
++	struct dm_table *t = ti->table;
++
++	if (!t)
++		BUG();
++
++	if (sscanf(path, "%x:%x", &major, &minor) == 2) {
++		/* Extract the major/minor numbers */
++		dev = mk_kdev(major, minor);
++	} else {
++		/* convert the path to a device */
++		if ((r = lookup_device(path, &dev)))
++			return r;
++	}
++
++	dd = find_device(&t->devices, dev);
++	if (!dd) {
++		dd = kmalloc(sizeof(*dd), GFP_KERNEL);
++		if (!dd)
++			return -ENOMEM;
++
++		dd->dev = dev;
++		dd->mode = mode;
++		dd->bdev = NULL;
++
++		if ((r = open_dev(dd))) {
++			kfree(dd);
++			return r;
++		}
++
++		atomic_set(&dd->count, 0);
++		list_add(&dd->list, &t->devices);
++
++	} else if (dd->mode != (mode | dd->mode)) {
++		r = upgrade_mode(dd, mode);
++		if (r)
++			return r;
++	}
++	atomic_inc(&dd->count);
++
++	if (!check_device_area(dd->dev, start, len)) {
++		DMWARN("device %s too small for target", path);
++		dm_put_device(ti, dd);
++		return -EINVAL;
++	}
++
++	*result = dd;
++
++	return 0;
++}
++
++/*
++ * Decrement a devices use count and remove it if neccessary.
++ */
++void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
++{
++	if (atomic_dec_and_test(&dd->count)) {
++		close_dev(dd);
++		list_del(&dd->list);
++		kfree(dd);
++	}
++}
++
++/*
++ * Checks to see if the target joins onto the end of the table.
++ */
++static int adjoin(struct dm_table *table, struct dm_target *ti)
++{
++	struct dm_target *prev;
++
++	if (!table->num_targets)
++		return !ti->begin;
++
++	prev = &table->targets[table->num_targets - 1];
++	return (ti->begin == (prev->begin + prev->len));
++}
++
++/*
++ * Destructively splits up the argument list to pass to ctr.
++ */
++static int split_args(int max, int *argc, char **argv, char *input)
++{
++	char *start, *end = input, *out;
++	*argc = 0;
++
++	while (1) {
++		start = end;
++
++		/* Skip whitespace */
++		while (*start && isspace(*start))
++			start++;
++
++		if (!*start)
++			break;	/* success, we hit the end */
++
++		/* 'out' is used to remove any back-quotes */
++		end = out = start;
++		while (*end) {
++			/* Everything apart from '\0' can be quoted */
++			if (*end == '\\' && *(end + 1)) {
++				*out++ = *(end + 1);
++				end += 2;
++				continue;
++			}
++
++			if (isspace(*end))
++				break;	/* end of token */
++
++			*out++ = *end++;
++		}
++
++		/* have we already filled the array ? */
++		if ((*argc + 1) > max)
++			return -EINVAL;
++
++		/* we know this is whitespace */
++		if (*end)
++			end++;
++
++		/* terminate the string and put it in the array */
++		*out = '\0';
++		argv[*argc] = start;
++		(*argc)++;
++	}
++
++	return 0;
++}
++
++int dm_table_add_target(struct dm_table *t, const char *type,
++			sector_t start, sector_t len, char *params)
++{
++	int r = -EINVAL, argc;
++	char *argv[32];
++	struct dm_target *tgt;
++
++	if ((r = check_space(t)))
++		return r;
++
++	tgt = t->targets + t->num_targets;
++	memset(tgt, 0, sizeof(*tgt));
++
++	tgt->type = dm_get_target_type(type);
++	if (!tgt->type) {
++		tgt->error = "unknown target type";
++		return -EINVAL;
++	}
++
++	tgt->table = t;
++	tgt->begin = start;
++	tgt->len = len;
++	tgt->error = "Unknown error";
++
++	/*
++	 * Does this target adjoin the previous one ?
++	 */
++	if (!adjoin(t, tgt)) {
++		tgt->error = "Gap in table";
++		r = -EINVAL;
++		goto bad;
++	}
++
++	r = split_args(ARRAY_SIZE(argv), &argc, argv, params);
++	if (r) {
++		tgt->error = "couldn't split parameters";
++		goto bad;
++	}
++
++	r = tgt->type->ctr(tgt, argc, argv);
++	if (r)
++		goto bad;
++
++	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
++	return 0;
++
++      bad:
++	printk(KERN_ERR DM_NAME ": %s\n", tgt->error);
++	dm_put_target_type(tgt->type);
++	return r;
++}
++
++static int setup_indexes(struct dm_table *t)
++{
++	int i, total = 0;
++	sector_t *indexes;
++
++	/* allocate the space for *all* the indexes */
++	for (i = t->depth - 2; i >= 0; i--) {
++		t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE);
++		total += t->counts[i];
++	}
++
++	indexes = (sector_t *) vcalloc(total, (unsigned long) NODE_SIZE);
++	if (!indexes)
++		return -ENOMEM;
++
++	/* set up internal nodes, bottom-up */
++	for (i = t->depth - 2, total = 0; i >= 0; i--) {
++		t->index[i] = indexes;
++		indexes += (KEYS_PER_NODE * t->counts[i]);
++		setup_btree_index(i, t);
++	}
++
++	return 0;
++}
++
++/*
++ * Builds the btree to index the map.
++ */
++int dm_table_complete(struct dm_table *t)
++{
++	int leaf_nodes, r = 0;
++
++	/* how many indexes will the btree have ? */
++	leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE);
++	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
++
++	/* leaf layer has already been set up */
++	t->counts[t->depth - 1] = leaf_nodes;
++	t->index[t->depth - 1] = t->highs;
++
++	if (t->depth >= 2)
++		r = setup_indexes(t);
++
++	return r;
++}
++
++void dm_table_event(struct dm_table *t)
++{
++	wake_up_interruptible(&t->eventq);
++}
++
++sector_t dm_table_get_size(struct dm_table *t)
++{
++	return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
++}
++
++struct dm_target *dm_table_get_target(struct dm_table *t, int index)
++{
++	if (index > t->num_targets)
++		return NULL;
++
++	return t->targets + index;
++}
++
++/*
++ * Search the btree for the correct target.
++ */
++struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
++{
++	int l, n = 0, k = 0;
++	sector_t *node;
++
++	for (l = 0; l < t->depth; l++) {
++		n = get_child(n, k);
++		node = get_node(t, l, n);
++
++		for (k = 0; k < KEYS_PER_NODE; k++)
++			if (node[k] >= sector)
++				break;
++	}
++
++	return &t->targets[(KEYS_PER_NODE * n) + k];
++}
++
++unsigned int dm_table_get_num_targets(struct dm_table *t)
++{
++	return t->num_targets;
++}
++
++struct list_head *dm_table_get_devices(struct dm_table *t)
++{
++	return &t->devices;
++}
++
++int dm_table_get_mode(struct dm_table *t)
++{
++	return t->mode;
++}
++
++void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq)
++{
++	add_wait_queue(&t->eventq, wq);
++}
++
++EXPORT_SYMBOL(dm_get_device);
++EXPORT_SYMBOL(dm_put_device);
++EXPORT_SYMBOL(dm_table_event);
++EXPORT_SYMBOL(dm_table_get_mode);
+diff -ruN linux-2.4.20/drivers/md/dm-target.c linux/drivers/md/dm-target.c
+--- linux-2.4.20/drivers/md/dm-target.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm-target.c	Wed Mar 26 12:54:14 2003
+@@ -0,0 +1,187 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/kmod.h>
++#include <linux/slab.h>
++
++struct tt_internal {
++	struct target_type tt;
++
++	struct list_head list;
++	long use;
++};
++
++static LIST_HEAD(_targets);
++static DECLARE_RWSEM(_lock);
++
++#define DM_MOD_NAME_SIZE 32
++
++static inline struct tt_internal *__find_target_type(const char *name)
++{
++	struct list_head *tih;
++	struct tt_internal *ti;
++
++	list_for_each(tih, &_targets) {
++		ti = list_entry(tih, struct tt_internal, list);
++
++		if (!strcmp(name, ti->tt.name))
++			return ti;
++	}
++
++	return NULL;
++}
++
++static struct tt_internal *get_target_type(const char *name)
++{
++	struct tt_internal *ti;
++
++	down_read(&_lock);
++	ti = __find_target_type(name);
++
++	if (ti) {
++		if (ti->use == 0 && ti->tt.module)
++			__MOD_INC_USE_COUNT(ti->tt.module);
++		ti->use++;
++	}
++	up_read(&_lock);
++
++	return ti;
++}
++
++static void load_module(const char *name)
++{
++	char module_name[DM_MOD_NAME_SIZE] = "dm-";
++
++	/* Length check for strcat() below */
++	if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
++		return;
++
++	strcat(module_name, name);
++	request_module(module_name);
++}
++
++struct target_type *dm_get_target_type(const char *name)
++{
++	struct tt_internal *ti = get_target_type(name);
++
++	if (!ti) {
++		load_module(name);
++		ti = get_target_type(name);
++	}
++
++	return ti ? &ti->tt : NULL;
++}
++
++void dm_put_target_type(struct target_type *t)
++{
++	struct tt_internal *ti = (struct tt_internal *) t;
++
++	down_read(&_lock);
++	if (--ti->use == 0 && ti->tt.module)
++		__MOD_DEC_USE_COUNT(ti->tt.module);
++
++	if (ti->use < 0)
++		BUG();
++	up_read(&_lock);
++
++	return;
++}
++
++static struct tt_internal *alloc_target(struct target_type *t)
++{
++	struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
++
++	if (ti) {
++		memset(ti, 0, sizeof(*ti));
++		ti->tt = *t;
++	}
++
++	return ti;
++}
++
++int dm_register_target(struct target_type *t)
++{
++	int rv = 0;
++	struct tt_internal *ti = alloc_target(t);
++
++	if (!ti)
++		return -ENOMEM;
++
++	down_write(&_lock);
++	if (__find_target_type(t->name))
++		rv = -EEXIST;
++	else
++		list_add(&ti->list, &_targets);
++
++	up_write(&_lock);
++	return rv;
++}
++
++int dm_unregister_target(struct target_type *t)
++{
++	struct tt_internal *ti;
++
++	down_write(&_lock);
++	if (!(ti = __find_target_type(t->name))) {
++		up_write(&_lock);
++		return -EINVAL;
++	}
++
++	if (ti->use) {
++		up_write(&_lock);
++		return -ETXTBSY;
++	}
++
++	list_del(&ti->list);
++	kfree(ti);
++
++	up_write(&_lock);
++	return 0;
++}
++
++/*
++ * io-err: always fails an io, useful for bringing
++ * up LVs that have holes in them.
++ */
++static int io_err_ctr(struct dm_target *ti, int argc, char **args)
++{
++	return 0;
++}
++
++static void io_err_dtr(struct dm_target *ti)
++{
++	/* empty */
++}
++
++static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++		      void **map_context)
++{
++	return -EIO;
++}
++
++static struct target_type error_target = {
++	.name = "error",
++	.ctr  = io_err_ctr,
++	.dtr  = io_err_dtr,
++	.map  = io_err_map,
++};
++
++int dm_target_init(void)
++{
++	return dm_register_target(&error_target);
++}
++
++void dm_target_exit(void)
++{
++	if (dm_unregister_target(&error_target))
++		DMWARN("error target unregistration failed");
++}
++
++EXPORT_SYMBOL(dm_register_target);
++EXPORT_SYMBOL(dm_unregister_target);
+diff -ruN linux-2.4.20/drivers/md/dm.c linux/drivers/md/dm.c
+--- linux-2.4.20/drivers/md/dm.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm.c	Wed Mar 26 14:23:27 2003
+@@ -0,0 +1,878 @@
++/*
++ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/blk.h>
++#include <linux/blkpg.h>
++#include <linux/mempool.h>
++#include <linux/slab.h>
++#include <linux/kdev_t.h>
++#include <linux/lvm.h>
++
++#include <asm/uaccess.h>
++
++static const char *_name = DM_NAME;
++#define MAX_DEVICES (1 << MINORBITS)
++#define DEFAULT_READ_AHEAD 64
++
++static int major = 0;
++static int _major = 0;
++
++struct dm_io {
++	struct mapped_device *md;
++
++	struct dm_target *ti;
++	int rw;
++	void *map_context;
++	void (*end_io) (struct buffer_head * bh, int uptodate);
++	void *context;
++};
++
++struct deferred_io {
++	int rw;
++	struct buffer_head *bh;
++	struct deferred_io *next;
++};
++
++/*
++ * Bits for the md->flags field.
++ */
++#define DMF_BLOCK_IO 0
++#define DMF_SUSPENDED 1
++
++struct mapped_device {
++	struct rw_semaphore lock;
++	atomic_t holders;
++
++	kdev_t dev;
++	unsigned long flags;
++
++	/*
++	 * A list of ios that arrived while we were suspended.
++	 */
++	atomic_t pending;
++	wait_queue_head_t wait;
++	struct deferred_io *deferred;
++
++	/*
++	 * The current mapping.
++	 */
++	struct dm_table *map;
++
++	/*
++	 * io objects are allocated from here.
++	 */
++	mempool_t *io_pool;
++};
++
++#define MIN_IOS 256
++static kmem_cache_t *_io_cache;
++
++/* block device arrays */
++static int _block_size[MAX_DEVICES];
++static int _blksize_size[MAX_DEVICES];
++static int _hardsect_size[MAX_DEVICES];
++
++static struct mapped_device *get_kdev(kdev_t dev);
++static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh);
++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
++
++static __init int local_init(void)
++{
++	int r;
++
++	/* allocate a slab for the dm_ios */
++	_io_cache = kmem_cache_create("dm io",
++				      sizeof(struct dm_io), 0, 0, NULL, NULL);
++
++	if (!_io_cache)
++		return -ENOMEM;
++
++	_major = major;
++	r = register_blkdev(_major, _name, &dm_blk_dops);
++	if (r < 0) {
++		DMERR("register_blkdev failed");
++		kmem_cache_destroy(_io_cache);
++		return r;
++	}
++
++	if (!_major)
++		_major = r;
++
++	/* set up the arrays */
++	read_ahead[_major] = DEFAULT_READ_AHEAD;
++	blk_size[_major] = _block_size;
++	blksize_size[_major] = _blksize_size;
++	hardsect_size[_major] = _hardsect_size;
++
++	blk_queue_make_request(BLK_DEFAULT_QUEUE(_major), dm_request);
++
++	return 0;
++}
++
++static void local_exit(void)
++{
++	kmem_cache_destroy(_io_cache);
++
++	if (unregister_blkdev(_major, _name) < 0)
++		DMERR("devfs_unregister_blkdev failed");
++
++	read_ahead[_major] = 0;
++	blk_size[_major] = NULL;
++	blksize_size[_major] = NULL;
++	hardsect_size[_major] = NULL;
++	_major = 0;
++
++	DMINFO("cleaned up");
++}
++
++/*
++ * We have a lot of init/exit functions, so it seems easier to
++ * store them in an array.  The disposable macro 'xx'
++ * expands a prefix into a pair of function names.
++ */
++static struct {
++	int (*init) (void);
++	void (*exit) (void);
++
++} _inits[] = {
++#define xx(n) {n ## _init, n ## _exit},
++	xx(local)
++	xx(dm_target)
++	xx(dm_linear)
++	xx(dm_stripe)
++	xx(dm_snapshot)
++	xx(dm_interface)
++#undef xx
++};
++
++static int __init dm_init(void)
++{
++	const int count = ARRAY_SIZE(_inits);
++
++	int r, i;
++
++	for (i = 0; i < count; i++) {
++		r = _inits[i].init();
++		if (r)
++			goto bad;
++	}
++
++	return 0;
++
++      bad:
++	while (i--)
++		_inits[i].exit();
++
++	return r;
++}
++
++static void __exit dm_exit(void)
++{
++	int i = ARRAY_SIZE(_inits);
++
++	while (i--)
++		_inits[i].exit();
++}
++
++/*
++ * Block device functions
++ */
++static int dm_blk_open(struct inode *inode, struct file *file)
++{
++	struct mapped_device *md;
++
++	md = get_kdev(inode->i_rdev);
++	if (!md)
++		return -ENXIO;
++
++	return 0;
++}
++
++static int dm_blk_close(struct inode *inode, struct file *file)
++{
++	struct mapped_device *md;
++
++	md = get_kdev(inode->i_rdev);
++	dm_put(md);		/* put the reference gained by dm_blk_open */
++	dm_put(md);
++	return 0;
++}
++
++static inline struct dm_io *alloc_io(struct mapped_device *md)
++{
++	return mempool_alloc(md->io_pool, GFP_NOIO);
++}
++
++static inline void free_io(struct mapped_device *md, struct dm_io *io)
++{
++	mempool_free(io, md->io_pool);
++}
++
++static inline struct deferred_io *alloc_deferred(void)
++{
++	return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
++}
++
++static inline void free_deferred(struct deferred_io *di)
++{
++	kfree(di);
++}
++
++/* In 512-byte units */
++#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1)
++
++/* FIXME: check this */
++static int dm_blk_ioctl(struct inode *inode, struct file *file,
++			uint command, unsigned long a)
++{
++	int minor = MINOR(inode->i_rdev);
++	long size;
++
++	if (minor >= MAX_DEVICES)
++		return -ENXIO;
++
++	switch (command) {
++	case BLKROSET:
++	case BLKROGET:
++	case BLKRASET:
++	case BLKRAGET:
++	case BLKFLSBUF:
++	case BLKSSZGET:
++		//case BLKRRPART: /* Re-read partition tables */
++		//case BLKPG:
++	case BLKELVGET:
++	case BLKELVSET:
++	case BLKBSZGET:
++	case BLKBSZSET:
++		return blk_ioctl(inode->i_rdev, command, a);
++		break;
++
++	case BLKGETSIZE:
++		size = VOLUME_SIZE(minor);
++		if (copy_to_user((void *) a, &size, sizeof(long)))
++			return -EFAULT;
++		break;
++
++	case BLKGETSIZE64:
++		size = VOLUME_SIZE(minor);
++		if (put_user((u64) ((u64) size) << 9, (u64 *) a))
++			return -EFAULT;
++		break;
++
++	case BLKRRPART:
++		return -ENOTTY;
++
++	case LV_BMAP:
++		return dm_user_bmap(inode, (struct lv_bmap *) a);
++
++	default:
++		DMWARN("unknown block ioctl 0x%x", command);
++		return -ENOTTY;
++	}
++
++	return 0;
++}
++
++/*
++ * Add the buffer to the list of deferred io.
++ */
++static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
++{
++	struct deferred_io *di;
++
++	di = alloc_deferred();
++	if (!di)
++		return -ENOMEM;
++
++	down_write(&md->lock);
++
++	if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
++		up_write(&md->lock);
++		free_deferred(di);
++		return 1;
++	}
++
++	di->bh = bh;
++	di->rw = rw;
++	di->next = md->deferred;
++	md->deferred = di;
++
++	up_write(&md->lock);
++	return 0;		/* deferred successfully */
++}
++
++/*
++ * bh->b_end_io routine that decrements the pending count
++ * and then calls the original bh->b_end_io fn.
++ */
++static void dec_pending(struct buffer_head *bh, int uptodate)
++{
++	int r;
++	struct dm_io *io = bh->b_private;
++	dm_endio_fn endio = io->ti->type->end_io;
++
++	if (endio) {
++		r = endio(io->ti, bh, io->rw, uptodate ? 0 : -EIO,
++			  io->map_context);
++		if (r < 0)
++			uptodate = 0;
++
++		else if (r > 0)
++			/* the target wants another shot at the io */
++			return;
++	}
++
++	if (atomic_dec_and_test(&io->md->pending))
++		/* nudge anyone waiting on suspend queue */
++		wake_up(&io->md->wait);
++
++	bh->b_end_io = io->end_io;
++	bh->b_private = io->context;
++	free_io(io->md, io);
++
++	bh->b_end_io(bh, uptodate);
++}
++
++/*
++ * Do the bh mapping for a given leaf
++ */
++static inline int __map_buffer(struct mapped_device *md, int rw,
++			       struct buffer_head *bh, struct dm_io *io)
++{
++	struct dm_target *ti;
++
++	ti = dm_table_find_target(md->map, bh->b_rsector);
++	if (!ti || !ti->type)
++		return -EINVAL;
++
++	/* hook the end io request fn */
++	atomic_inc(&md->pending);
++	io->md = md;
++	io->ti = ti;
++	io->rw = rw;
++	io->end_io = bh->b_end_io;
++	io->context = bh->b_private;
++	bh->b_end_io = dec_pending;
++	bh->b_private = io;
++
++	return ti->type->map(ti, bh, rw, &io->map_context);
++}
++
++/*
++ * Checks to see if we should be deferring io, if so it queues it
++ * and returns 1.
++ */
++static inline int __deferring(struct mapped_device *md, int rw,
++			      struct buffer_head *bh)
++{
++	int r;
++
++	/*
++	 * If we're suspended we have to queue this io for later.
++	 */
++	while (test_bit(DMF_BLOCK_IO, &md->flags)) {
++		up_read(&md->lock);
++
++		/*
++		 * There's no point deferring a read ahead
++		 * request, just drop it.
++		 */
++		if (rw == READA) {
++			down_read(&md->lock);
++			return -EIO;
++		}
++
++		r = queue_io(md, bh, rw);
++		down_read(&md->lock);
++
++		if (r < 0)
++			return r;
++
++		if (r == 0)
++			return 1; /* deferred successfully */
++
++	}
++
++	return 0;
++}
++
++static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh)
++{
++	int r;
++	struct dm_io *io;
++	struct mapped_device *md;
++
++	md = get_kdev(bh->b_rdev);
++	if (!md) {
++		buffer_IO_error(bh);
++		return 0;
++	}
++
++	io = alloc_io(md);
++	down_read(&md->lock);
++
++	r = __deferring(md, rw, bh);
++	if (r < 0)
++		goto bad;
++
++	else if (!r) {
++		/* not deferring */
++		r = __map_buffer(md, rw, bh, io);
++		if (r < 0)
++			goto bad;
++	} else
++		r = 0;
++
++	up_read(&md->lock);
++	dm_put(md);
++	return r;
++
++      bad:
++	buffer_IO_error(bh);
++	up_read(&md->lock);
++	dm_put(md);
++	return 0;
++}
++
++static int check_dev_size(kdev_t dev, unsigned long block)
++{
++	/* FIXME: check this */
++	int minor = MINOR(dev);
++	unsigned long max_sector = (_block_size[minor] << 1) + 1;
++	unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9);
++
++	return (sector > max_sector) ? 0 : 1;
++}
++
++/*
++ * Creates a dummy buffer head and maps it (for lilo).
++ */
++static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block,
++		  kdev_t *r_dev, unsigned long *r_block)
++{
++	struct buffer_head bh;
++	struct dm_target *ti;
++	void *map_context;
++	int r;
++
++	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
++		return -EPERM;
++	}
++
++	if (!check_dev_size(dev, block)) {
++		return -EINVAL;
++	}
++
++	/* setup dummy bh */
++	memset(&bh, 0, sizeof(bh));
++	bh.b_blocknr = block;
++	bh.b_dev = bh.b_rdev = dev;
++	bh.b_size = _blksize_size[MINOR(dev)];
++	bh.b_rsector = block * (bh.b_size >> 9);
++
++	/* find target */
++	ti = dm_table_find_target(md->map, bh.b_rsector);
++
++	/* do the mapping */
++	r = ti->type->map(ti, &bh, READ, &map_context);
++	ti->type->end_io(ti, &bh, READ, 0, map_context);
++
++	if (!r) {
++		*r_dev = bh.b_rdev;
++		*r_block = bh.b_rsector / (bh.b_size >> 9);
++	}
++
++	return r;
++}
++
++/*
++ * Marshals arguments and results between user and kernel space.
++ */
++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
++{
++	struct mapped_device *md;
++	unsigned long block, r_block;
++	kdev_t r_dev;
++	int r;
++
++	if (get_user(block, &lvb->lv_block))
++		return -EFAULT;
++
++	md = get_kdev(inode->i_rdev);
++	if (!md)
++		return -ENXIO;
++
++	down_read(&md->lock);
++	r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block);
++	up_read(&md->lock);
++	dm_put(md);
++
++	if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
++		   put_user(r_block, &lvb->lv_block)))
++		r = -EFAULT;
++
++	return r;
++}
++
++/*-----------------------------------------------------------------
++ * A bitset is used to keep track of allocated minor numbers.
++ *---------------------------------------------------------------*/
++static spinlock_t _minor_lock = SPIN_LOCK_UNLOCKED;
++static struct mapped_device *_mds[MAX_DEVICES];
++
++static void free_minor(int minor)
++{
++	spin_lock(&_minor_lock);
++	_mds[minor] = NULL;
++	spin_unlock(&_minor_lock);
++}
++
++/*
++ * See if the device with a specific minor # is free.
++ */
++static int specific_minor(int minor, struct mapped_device *md)
++{
++	int r = -EBUSY;
++
++	if (minor >= MAX_DEVICES) {
++		DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)",
++		       MAX_DEVICES);
++		return -EINVAL;
++	}
++
++	spin_lock(&_minor_lock);
++	if (!_mds[minor]) {
++		_mds[minor] = md;
++		r = minor;
++	}
++	spin_unlock(&_minor_lock);
++
++	return r;
++}
++
++static int next_free_minor(struct mapped_device *md)
++{
++	int i;
++
++	spin_lock(&_minor_lock);
++	for (i = 0; i < MAX_DEVICES; i++) {
++		if (!_mds[i]) {
++			_mds[i] = md;
++			break;
++		}
++	}
++	spin_unlock(&_minor_lock);
++
++	return (i < MAX_DEVICES) ? i : -EBUSY;
++}
++
++static struct mapped_device *get_kdev(kdev_t dev)
++{
++	struct mapped_device *md;
++
++	if (major(dev) != _major)
++		return NULL;
++
++	spin_lock(&_minor_lock);
++	md = _mds[minor(dev)];
++	if (md)
++		dm_get(md);
++	spin_unlock(&_minor_lock);
++
++	return md;
++}
++
++/*
++ * Allocate and initialise a blank device with a given minor.
++ */
++static struct mapped_device *alloc_dev(int minor)
++{
++	struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
++
++	if (!md) {
++		DMWARN("unable to allocate device, out of memory.");
++		return NULL;
++	}
++
++	/* get a minor number for the dev */
++	minor = (minor < 0) ? next_free_minor(md) : specific_minor(minor, md);
++	if (minor < 0) {
++		kfree(md);
++		return NULL;
++	}
++
++	memset(md, 0, sizeof(*md));
++
++	md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
++				     mempool_free_slab, _io_cache);
++	if (!md->io_pool) {
++		free_minor(minor);
++		kfree(md);
++		return NULL;
++	}
++
++	md->dev = mk_kdev(_major, minor);
++	init_rwsem(&md->lock);
++	atomic_set(&md->holders, 1);
++	atomic_set(&md->pending, 0);
++	init_waitqueue_head(&md->wait);
++
++	return md;
++}
++
++static void free_dev(struct mapped_device *md)
++{
++	free_minor(minor(md->dev));
++	mempool_destroy(md->io_pool);
++	kfree(md);
++}
++
++/*
++ * The hardsect size for a mapped device is the largest hardsect size
++ * from the devices it maps onto.
++ */
++static int __find_hardsect_size(struct list_head *devices)
++{
++	int result = 512, size;
++	struct list_head *tmp;
++
++	list_for_each(tmp, devices) {
++		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++		size = get_hardsect_size(dd->dev);
++		if (size > result)
++			result = size;
++	}
++
++	return result;
++}
++
++/*
++ * Bind a table to the device.
++ */
++static int __bind(struct mapped_device *md, struct dm_table *t)
++{
++	int minor = minor(md->dev);
++	md->map = t;
++
++	/* in k */
++	_block_size[minor] = dm_table_get_size(t) >> 1;
++	_blksize_size[minor] = BLOCK_SIZE;
++	_hardsect_size[minor] = __find_hardsect_size(dm_table_get_devices(t));
++	register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]);
++
++	dm_table_get(t);
++	return 0;
++}
++
++static void __unbind(struct mapped_device *md)
++{
++	int minor = minor(md->dev);
++
++	dm_table_put(md->map);
++	md->map = NULL;
++
++	_block_size[minor] = 0;
++	_blksize_size[minor] = 0;
++	_hardsect_size[minor] = 0;
++}
++
++/*
++ * Constructor for a new device.
++ */
++int dm_create(int minor, struct dm_table *table, struct mapped_device **result)
++{
++	int r;
++	struct mapped_device *md;
++
++	md = alloc_dev(minor);
++	if (!md)
++		return -ENXIO;
++
++	r = __bind(md, table);
++	if (r) {
++		free_dev(md);
++		return r;
++	}
++
++	*result = md;
++	return 0;
++}
++
++void dm_get(struct mapped_device *md)
++{
++	atomic_inc(&md->holders);
++}
++
++void dm_put(struct mapped_device *md)
++{
++	if (atomic_dec_and_test(&md->holders)) {
++		__unbind(md);
++		free_dev(md);
++	}
++}
++
++/*
++ * Requeue the deferred io by calling generic_make_request.
++ */
++static void flush_deferred_io(struct deferred_io *c)
++{
++	struct deferred_io *n;
++
++	while (c) {
++		n = c->next;
++		generic_make_request(c->rw, c->bh);
++		free_deferred(c);
++		c = n;
++	}
++}
++
++/*
++ * Swap in a new table (destroying old one).
++ */
++int dm_swap_table(struct mapped_device *md, struct dm_table *table)
++{
++	int r;
++
++	down_write(&md->lock);
++
++	/* device must be suspended */
++	if (!test_bit(DMF_SUSPENDED, &md->flags)) {
++		up_write(&md->lock);
++		return -EPERM;
++	}
++
++	__unbind(md);
++	r = __bind(md, table);
++	if (r)
++		return r;
++
++	up_write(&md->lock);
++	return 0;
++}
++
++/*
++ * We need to be able to change a mapping table under a mounted
++ * filesystem.  For example we might want to move some data in
++ * the background.  Before the table can be swapped with
++ * dm_bind_table, dm_suspend must be called to flush any in
++ * flight io and ensure that any further io gets deferred.
++ */
++int dm_suspend(struct mapped_device *md)
++{
++	DECLARE_WAITQUEUE(wait, current);
++
++	down_write(&md->lock);
++
++	/*
++	 * First we set the BLOCK_IO flag so no more ios will be
++	 * mapped.
++	 */
++	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
++		up_write(&md->lock);
++		return -EINVAL;
++	}
++
++	set_bit(DMF_BLOCK_IO, &md->flags);
++	add_wait_queue(&md->wait, &wait);
++	up_write(&md->lock);
++
++	/*
++	 * Then we wait for the already mapped ios to
++	 * complete.
++	 */
++	run_task_queue(&tq_disk);
++	while (1) {
++		set_current_state(TASK_INTERRUPTIBLE);
++
++		if (!atomic_read(&md->pending))
++			break;
++
++		schedule();
++	}
++
++	current->state = TASK_RUNNING;
++
++	down_write(&md->lock);
++	remove_wait_queue(&md->wait, &wait);
++	set_bit(DMF_SUSPENDED, &md->flags);
++	up_write(&md->lock);
++
++	return 0;
++}
++
++int dm_resume(struct mapped_device *md)
++{
++	struct deferred_io *def;
++
++	down_write(&md->lock);
++	if (!test_bit(DMF_SUSPENDED, &md->flags) ||
++	    !dm_table_get_size(md->map)) {
++		up_write(&md->lock);
++		return -EINVAL;
++	}
++
++	clear_bit(DMF_SUSPENDED, &md->flags);
++	clear_bit(DMF_BLOCK_IO, &md->flags);
++	def = md->deferred;
++	md->deferred = NULL;
++	up_write(&md->lock);
++
++	flush_deferred_io(def);
++	run_task_queue(&tq_disk);
++
++	return 0;
++}
++
++struct dm_table *dm_get_table(struct mapped_device *md)
++{
++	struct dm_table *t;
++
++	down_read(&md->lock);
++	t = md->map;
++	dm_table_get(t);
++	up_read(&md->lock);
++
++	return t;
++}
++
++kdev_t dm_kdev(struct mapped_device *md)
++{
++	kdev_t dev;
++
++	down_read(&md->lock);
++	dev = md->dev;
++	up_read(&md->lock);
++
++	return dev;
++}
++
++int dm_suspended(struct mapped_device *md)
++{
++	return test_bit(DMF_SUSPENDED, &md->flags);
++}
++
++struct block_device_operations dm_blk_dops = {
++	.open = dm_blk_open,
++	.release = dm_blk_close,
++	.ioctl = dm_blk_ioctl,
++	.owner = THIS_MODULE
++};
++
++/*
++ * module hooks
++ */
++module_init(dm_init);
++module_exit(dm_exit);
++
++MODULE_PARM(major, "i");
++MODULE_PARM_DESC(major, "The major number of the device mapper");
++MODULE_DESCRIPTION(DM_NAME " driver");
++MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
++MODULE_LICENSE("GPL");
+diff -ruN linux-2.4.20/drivers/md/dm.h linux/drivers/md/dm.h
+--- linux-2.4.20/drivers/md/dm.h	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/dm.h	Wed Mar 26 14:12:00 2003
+@@ -0,0 +1,154 @@
++/*
++ * Internal header file for device mapper
++ *
++ * Copyright (C) 2001, 2002 Sistina Software
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef DM_INTERNAL_H
++#define DM_INTERNAL_H
++
++#include <linux/fs.h>
++#include <linux/device-mapper.h>
++#include <linux/list.h>
++#include <linux/blkdev.h>
++
++#define DM_NAME "device-mapper"
++#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
++#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
++#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
++
++/*
++ * FIXME: I think this should be with the definition of sector_t
++ * in types.h.
++ */
++#ifdef CONFIG_LBD
++#define SECTOR_FORMAT "%Lu"
++#else
++#define SECTOR_FORMAT "%lu"
++#endif
++
++#define SECTOR_SHIFT 9
++#define SECTOR_SIZE (1 << SECTOR_SHIFT)
++
++extern struct block_device_operations dm_blk_dops;
++
++/*
++ * List of devices that a metadevice uses and should open/close.
++ */
++struct dm_dev {
++	struct list_head list;
++
++	atomic_t count;
++	int mode;
++	kdev_t dev;
++	struct block_device *bdev;
++};
++
++struct dm_table;
++struct mapped_device;
++
++/*-----------------------------------------------------------------
++ * Functions for manipulating a struct mapped_device.
++ * Drop the reference with dm_put when you finish with the object.
++ *---------------------------------------------------------------*/
++int dm_create(int minor, struct dm_table *table, struct mapped_device **md);
++
++/*
++ * Reference counting for md.
++ */
++void dm_get(struct mapped_device *md);
++void dm_put(struct mapped_device *md);
++
++/*
++ * A device can still be used while suspended, but I/O is deferred.
++ */
++int dm_suspend(struct mapped_device *md);
++int dm_resume(struct mapped_device *md);
++
++/*
++ * The device must be suspended before calling this method.
++ */
++int dm_swap_table(struct mapped_device *md, struct dm_table *t);
++
++/*
++ * Drop a reference on the table when you've finished with the
++ * result.
++ */
++struct dm_table *dm_get_table(struct mapped_device *md);
++
++/*
++ * Info functions.
++ */
++kdev_t dm_kdev(struct mapped_device *md);
++int dm_suspended(struct mapped_device *md);
++
++/*-----------------------------------------------------------------
++ * Functions for manipulating a table.  Tables are also reference
++ * counted.
++ *---------------------------------------------------------------*/
++int dm_table_create(struct dm_table **result, int mode);
++
++void dm_table_get(struct dm_table *t);
++void dm_table_put(struct dm_table *t);
++
++int dm_table_add_target(struct dm_table *t, const char *type,
++			sector_t start,	sector_t len, char *params);
++int dm_table_complete(struct dm_table *t);
++void dm_table_event(struct dm_table *t);
++sector_t dm_table_get_size(struct dm_table *t);
++struct dm_target *dm_table_get_target(struct dm_table *t, int index);
++struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
++unsigned int dm_table_get_num_targets(struct dm_table *t);
++struct list_head *dm_table_get_devices(struct dm_table *t);
++int dm_table_get_mode(struct dm_table *t);
++void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq);
++
++/*-----------------------------------------------------------------
++ * A registry of target types.
++ *---------------------------------------------------------------*/
++int dm_target_init(void);
++void dm_target_exit(void);
++struct target_type *dm_get_target_type(const char *name);
++void dm_put_target_type(struct target_type *t);
++
++
++/*-----------------------------------------------------------------
++ * Useful inlines.
++ *---------------------------------------------------------------*/
++static inline int array_too_big(unsigned long fixed, unsigned long obj,
++				unsigned long num)
++{
++	return (num > (ULONG_MAX - fixed) / obj);
++}
++
++/*
++ * ceiling(n / size) * size
++ */
++static inline unsigned long dm_round_up(unsigned long n, unsigned long size)
++{
++	unsigned long r = n % size;
++	return n + (r ? (size - r) : 0);
++}
++
++/*
++ * The device-mapper can be driven through one of two interfaces;
++ * ioctl or filesystem, depending which patch you have applied.
++ */
++int dm_interface_init(void);
++void dm_interface_exit(void);
++
++/*
++ * Targets for linear and striped mappings
++ */
++int dm_linear_init(void);
++void dm_linear_exit(void);
++
++int dm_stripe_init(void);
++void dm_stripe_exit(void);
++
++int dm_snapshot_init(void);
++void dm_snapshot_exit(void);
++
++#endif
+diff -ruN linux-2.4.20/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c
+--- linux-2.4.20/drivers/md/kcopyd.c	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/kcopyd.c	Wed Mar 26 12:54:17 2003
+@@ -0,0 +1,839 @@
++/*
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <asm/atomic.h>
++
++#include <linux/blkdev.h>
++#include <linux/config.h>
++#include <linux/device-mapper.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/locks.h>
++#include <linux/mempool.h>
++#include <linux/module.h>
++#include <linux/pagemap.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++
++#include "kcopyd.h"
++
++/* FIXME: this is only needed for the DMERR macros */
++#include "dm.h"
++
++static void wake_kcopyd(void);
++
++/*-----------------------------------------------------------------
++ * We reserve our own pool of preallocated pages that are
++ * only used for kcopyd io.
++ *---------------------------------------------------------------*/
++
++/*
++ * FIXME: This should be configurable.
++ */
++#define NUM_PAGES 512
++
++static DECLARE_MUTEX(_pages_lock);
++static int _num_free_pages;
++static struct page *_pages_array[NUM_PAGES];
++static DECLARE_MUTEX(start_lock);
++
++static int init_pages(void)
++{
++	int i;
++	struct page *p;
++
++	for (i = 0; i < NUM_PAGES; i++) {
++		p = alloc_page(GFP_KERNEL);
++		if (!p)
++			goto bad;
++
++		LockPage(p);
++		_pages_array[i] = p;
++	}
++
++	_num_free_pages = NUM_PAGES;
++	return 0;
++
++      bad:
++	while (i--) {
++		UnlockPage(_pages_array[i]);
++		__free_page(_pages_array[i]);
++	}
++	return -ENOMEM;
++}
++
++static void exit_pages(void)
++{
++	int i;
++	struct page *p;
++
++	for (i = 0; i < NUM_PAGES; i++) {
++		p = _pages_array[i];
++		UnlockPage(p);
++		__free_page(p);
++	}
++
++	_num_free_pages = 0;
++}
++
++static int kcopyd_get_pages(int num, struct page **result)
++{
++	int i;
++
++	down(&_pages_lock);
++	if (_num_free_pages < num) {
++		up(&_pages_lock);
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < num; i++) {
++		_num_free_pages--;
++		result[i] = _pages_array[_num_free_pages];
++	}
++	up(&_pages_lock);
++
++	return 0;
++}
++
++static void kcopyd_free_pages(int num, struct page **result)
++{
++	int i;
++
++	down(&_pages_lock);
++	for (i = 0; i < num; i++)
++		_pages_array[_num_free_pages++] = result[i];
++	up(&_pages_lock);
++}
++
++/*-----------------------------------------------------------------
++ * We keep our own private pool of buffer_heads.  These are just
++ * held in a list on the b_reqnext field.
++ *---------------------------------------------------------------*/
++
++/*
++ * Make sure we have enough buffers to always keep the pages
++ * occupied.  So we assume the worst case scenario where blocks
++ * are the size of a single sector.
++ */
++#define NUM_BUFFERS NUM_PAGES * (PAGE_SIZE / SECTOR_SIZE)
++
++static spinlock_t _buffer_lock = SPIN_LOCK_UNLOCKED;
++static struct buffer_head *_all_buffers;
++static struct buffer_head *_free_buffers;
++
++static int init_buffers(void)
++{
++	int i;
++	struct buffer_head *buffers;
++
++	buffers = vcalloc(NUM_BUFFERS, sizeof(struct buffer_head));
++	if (!buffers) {
++		DMWARN("Couldn't allocate buffer heads.");
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < NUM_BUFFERS; i++) {
++		if (i < NUM_BUFFERS - 1)
++			buffers[i].b_reqnext = &buffers[i + 1];
++		init_waitqueue_head(&buffers[i].b_wait);
++		INIT_LIST_HEAD(&buffers[i].b_inode_buffers);
++	}
++
++	_all_buffers = _free_buffers = buffers;
++	return 0;
++}
++
++static void exit_buffers(void)
++{
++	vfree(_all_buffers);
++}
++
++static struct buffer_head *alloc_buffer(void)
++{
++	struct buffer_head *r;
++	int flags;
++
++	spin_lock_irqsave(&_buffer_lock, flags);
++
++	if (!_free_buffers)
++		r = NULL;
++	else {
++		r = _free_buffers;
++		_free_buffers = _free_buffers->b_reqnext;
++		r->b_reqnext = NULL;
++	}
++
++	spin_unlock_irqrestore(&_buffer_lock, flags);
++
++	return r;
++}
++
++/*
++ * Only called from interrupt context.
++ */
++static void free_buffer(struct buffer_head *bh)
++{
++	int flags, was_empty;
++
++	spin_lock_irqsave(&_buffer_lock, flags);
++	was_empty = (_free_buffers == NULL) ? 1 : 0;
++	bh->b_reqnext = _free_buffers;
++	_free_buffers = bh;
++	spin_unlock_irqrestore(&_buffer_lock, flags);
++
++	/*
++	 * If the buffer list was empty then kcopyd probably went
++	 * to sleep because it ran out of buffer heads, so let's
++	 * wake it up.
++	 */
++	if (was_empty)
++		wake_kcopyd();
++}
++
++/*-----------------------------------------------------------------
++ * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
++ * for this reason we use a mempool to prevent the client from
++ * ever having to do io (which could cause a
++ * deadlock).
++ *---------------------------------------------------------------*/
++#define MIN_JOBS NUM_PAGES
++
++static kmem_cache_t *_job_cache = NULL;
++static mempool_t *_job_pool = NULL;
++
++/*
++ * We maintain three lists of jobs:
++ *
++ * i)   jobs waiting for pages
++ * ii)  jobs that have pages, and are waiting for the io to be issued.
++ * iii) jobs that have completed.
++ *
++ * All three of these are protected by job_lock.
++ */
++
++static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED;
++
++static LIST_HEAD(_complete_jobs);
++static LIST_HEAD(_io_jobs);
++static LIST_HEAD(_pages_jobs);
++
++static int init_jobs(void)
++{
++	INIT_LIST_HEAD(&_complete_jobs);
++	INIT_LIST_HEAD(&_io_jobs);
++	INIT_LIST_HEAD(&_pages_jobs);
++
++	_job_cache = kmem_cache_create("kcopyd-jobs", sizeof(struct kcopyd_job),
++				       __alignof__(struct kcopyd_job),
++				       0, NULL, NULL);
++	if (!_job_cache)
++		return -ENOMEM;
++
++	_job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
++				   mempool_free_slab, _job_cache);
++	if (!_job_pool) {
++		kmem_cache_destroy(_job_cache);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static void exit_jobs(void)
++{
++	mempool_destroy(_job_pool);
++	kmem_cache_destroy(_job_cache);
++}
++
++struct kcopyd_job *kcopyd_alloc_job(void)
++{
++	struct kcopyd_job *job;
++
++	job = mempool_alloc(_job_pool, GFP_NOIO);
++	if (!job)
++		return NULL;
++
++	memset(job, 0, sizeof(*job));
++	return job;
++}
++
++void kcopyd_free_job(struct kcopyd_job *job)
++{
++	mempool_free(job, _job_pool);
++}
++
++/*
++ * Functions to push and pop a job onto the head of a given job
++ * list.
++ */
++static inline struct kcopyd_job *pop(struct list_head *jobs)
++{
++	struct kcopyd_job *job = NULL;
++	int flags;
++
++	spin_lock_irqsave(&_job_lock, flags);
++
++	if (!list_empty(jobs)) {
++		job = list_entry(jobs->next, struct kcopyd_job, list);
++		list_del(&job->list);
++	}
++	spin_unlock_irqrestore(&_job_lock, flags);
++
++	return job;
++}
++
++static inline void push(struct list_head *jobs, struct kcopyd_job *job)
++{
++	int flags;
++
++	spin_lock_irqsave(&_job_lock, flags);
++	list_add(&job->list, jobs);
++	spin_unlock_irqrestore(&_job_lock, flags);
++}
++
++/*
++ * Completion function for one of our buffers.
++ */
++static void end_bh(struct buffer_head *bh, int uptodate)
++{
++	struct kcopyd_job *job = bh->b_private;
++
++	mark_buffer_uptodate(bh, uptodate);
++	unlock_buffer(bh);
++
++	if (!uptodate)
++		job->err = -EIO;
++
++	/* are we the last ? */
++	if (atomic_dec_and_test(&job->nr_incomplete)) {
++		push(&_complete_jobs, job);
++		wake_kcopyd();
++	}
++
++	free_buffer(bh);
++}
++
++static void dispatch_bh(struct kcopyd_job *job,
++			struct buffer_head *bh, int block)
++{
++	int p;
++
++	/*
++	 * Add in the job offset
++	 */
++	bh->b_blocknr = (job->disk.sector >> job->block_shift) + block;
++
++	p = block >> job->bpp_shift;
++	block &= job->bpp_mask;
++
++	bh->b_size = job->block_size;
++	set_bh_page(bh, job->pages[p], ((block << job->block_shift) +
++					job->offset) << SECTOR_SHIFT);
++	bh->b_this_page = bh;
++
++	init_buffer(bh, end_bh, job);
++
++	bh->b_dev = job->disk.dev;
++	atomic_set(&bh->b_count, 1);
++
++	bh->b_state = ((1 << BH_Uptodate) | (1 << BH_Mapped) |
++		       (1 << BH_Lock) | (1 << BH_Req));
++
++	if (job->rw == WRITE)
++		clear_bit(BH_Dirty, &bh->b_state);
++
++	submit_bh(job->rw, bh);
++}
++
++/*
++ * These three functions process 1 item from the corresponding
++ * job list.
++ *
++ * They return:
++ * < 0: error
++ *   0: success
++ * > 0: can't process yet.
++ */
++static int run_complete_job(struct kcopyd_job *job)
++{
++	job->callback(job);
++	return 0;
++}
++
++/*
++ * Request io on as many buffer heads as we can currently get for
++ * a particular job.
++ */
++static int run_io_job(struct kcopyd_job *job)
++{
++	unsigned int block;
++	struct buffer_head *bh;
++
++	for (block = atomic_read(&job->nr_requested);
++	     block < job->nr_blocks; block++) {
++		bh = alloc_buffer();
++		if (!bh)
++			break;
++
++		atomic_inc(&job->nr_requested);
++		dispatch_bh(job, bh, block);
++	}
++
++	return (block == job->nr_blocks) ? 0 : 1;
++}
++
++static int run_pages_job(struct kcopyd_job *job)
++{
++	int r;
++
++	job->nr_pages = (job->disk.count + job->offset) /
++	    (PAGE_SIZE / SECTOR_SIZE);
++	r = kcopyd_get_pages(job->nr_pages, job->pages);
++
++	if (!r) {
++		/* this job is ready for io */
++		push(&_io_jobs, job);
++		return 0;
++	}
++
++	if (r == -ENOMEM)
++		/* can't complete now */
++		return 1;
++
++	return r;
++}
++
++/*
++ * Run through a list for as long as possible.  Returns the count
++ * of successful jobs.
++ */
++static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
++{
++	struct kcopyd_job *job;
++	int r, count = 0;
++
++	while ((job = pop(jobs))) {
++
++		r = fn(job);
++
++		if (r < 0) {
++			/* error this rogue job */
++			job->err = r;
++			push(&_complete_jobs, job);
++			break;
++		}
++
++		if (r > 0) {
++			/*
++			 * We couldn't service this job ATM, so
++			 * push this job back onto the list.
++			 */
++			push(jobs, job);
++			break;
++		}
++
++		count++;
++	}
++
++	return count;
++}
++
++/*
++ * kcopyd does this every time it's woken up.
++ */
++static void do_work(void)
++{
++	int count;
++
++	/*
++	 * We loop round until there is no more work to do.
++	 */
++	do {
++		count = process_jobs(&_complete_jobs, run_complete_job);
++		count += process_jobs(&_io_jobs, run_io_job);
++		count += process_jobs(&_pages_jobs, run_pages_job);
++
++	} while (count);
++
++	run_task_queue(&tq_disk);
++}
++
++/*-----------------------------------------------------------------
++ * The daemon
++ *---------------------------------------------------------------*/
++static atomic_t _kcopyd_must_die;
++static DECLARE_MUTEX(_run_lock);
++static DECLARE_WAIT_QUEUE_HEAD(_job_queue);
++
++static int kcopyd(void *arg)
++{
++	DECLARE_WAITQUEUE(wq, current);
++
++	daemonize();
++	strcpy(current->comm, "kcopyd");
++	atomic_set(&_kcopyd_must_die, 0);
++
++	add_wait_queue(&_job_queue, &wq);
++
++	down(&_run_lock);
++	up(&start_lock);
++
++	while (1) {
++		set_current_state(TASK_INTERRUPTIBLE);
++
++		if (atomic_read(&_kcopyd_must_die))
++			break;
++
++		do_work();
++		schedule();
++	}
++
++	set_current_state(TASK_RUNNING);
++	remove_wait_queue(&_job_queue, &wq);
++
++	up(&_run_lock);
++
++	return 0;
++}
++
++static int start_daemon(void)
++{
++	static pid_t pid = 0;
++
++	down(&start_lock);
++
++	pid = kernel_thread(kcopyd, NULL, 0);
++	if (pid <= 0) {
++		DMERR("Failed to start kcopyd thread");
++		return -EAGAIN;
++	}
++
++	/*
++	 * wait for the daemon to up this mutex.
++	 */
++	down(&start_lock);
++	up(&start_lock);
++
++	return 0;
++}
++
++static int stop_daemon(void)
++{
++	atomic_set(&_kcopyd_must_die, 1);
++	wake_kcopyd();
++	down(&_run_lock);
++	up(&_run_lock);
++
++	return 0;
++}
++
++static void wake_kcopyd(void)
++{
++	wake_up_interruptible(&_job_queue);
++}
++
++static int calc_shift(unsigned int n)
++{
++	int s;
++
++	for (s = 0; n; s++, n >>= 1)
++		;
++
++	return --s;
++}
++
++static void calc_block_sizes(struct kcopyd_job *job)
++{
++	job->block_size = get_hardsect_size(job->disk.dev);
++	job->block_shift = calc_shift(job->block_size / SECTOR_SIZE);
++	job->bpp_shift = PAGE_SHIFT - job->block_shift - SECTOR_SHIFT;
++	job->bpp_mask = (1 << job->bpp_shift) - 1;
++	job->nr_blocks = job->disk.count >> job->block_shift;
++	atomic_set(&job->nr_requested, 0);
++	atomic_set(&job->nr_incomplete, job->nr_blocks);
++}
++
++int kcopyd_io(struct kcopyd_job *job)
++{
++	calc_block_sizes(job);
++	push(job->pages[0] ? &_io_jobs : &_pages_jobs, job);
++	wake_kcopyd();
++	return 0;
++}
++
++/*-----------------------------------------------------------------
++ * The copier is implemented on top of the simpler async io
++ * daemon above.
++ *---------------------------------------------------------------*/
++struct copy_info {
++	kcopyd_notify_fn notify;
++	void *notify_context;
++
++	struct kcopyd_region to;
++};
++
++#define MIN_INFOS 128
++static kmem_cache_t *_copy_cache = NULL;
++static mempool_t *_copy_pool = NULL;
++
++static int init_copier(void)
++{
++	_copy_cache = kmem_cache_create("kcopyd-info",
++					sizeof(struct copy_info),
++					__alignof__(struct copy_info),
++					0, NULL, NULL);
++	if (!_copy_cache)
++		return -ENOMEM;
++
++	_copy_pool = mempool_create(MIN_INFOS, mempool_alloc_slab,
++				    mempool_free_slab, _copy_cache);
++	if (!_copy_pool) {
++		kmem_cache_destroy(_copy_cache);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static void exit_copier(void)
++{
++	if (_copy_pool)
++		mempool_destroy(_copy_pool);
++
++	if (_copy_cache)
++		kmem_cache_destroy(_copy_cache);
++}
++
++static inline struct copy_info *alloc_copy_info(void)
++{
++	return mempool_alloc(_copy_pool, GFP_NOIO);
++}
++
++static inline void free_copy_info(struct copy_info *info)
++{
++	mempool_free(info, _copy_pool);
++}
++
++void copy_complete(struct kcopyd_job *job)
++{
++	struct copy_info *info = (struct copy_info *) job->context;
++
++	if (info->notify)
++		info->notify(job->err, info->notify_context);
++
++	free_copy_info(info);
++
++	kcopyd_free_pages(job->nr_pages, job->pages);
++
++	kcopyd_free_job(job);
++}
++
++static void page_write_complete(struct kcopyd_job *job)
++{
++	struct copy_info *info = (struct copy_info *) job->context;
++	int i;
++
++	if (info->notify)
++		info->notify(job->err, info->notify_context);
++
++	free_copy_info(info);
++	for (i = 0; i < job->nr_pages; i++)
++		put_page(job->pages[i]);
++
++	kcopyd_free_job(job);
++}
++
++/*
++ * These callback functions implement the state machine that copies regions.
++ */
++void copy_write(struct kcopyd_job *job)
++{
++	struct copy_info *info = (struct copy_info *) job->context;
++
++	if (job->err) {
++		if (info->notify)
++			info->notify(job->err, job->context);
++
++		kcopyd_free_job(job);
++		free_copy_info(info);
++		return;
++	}
++
++	job->rw = WRITE;
++	memcpy(&job->disk, &info->to, sizeof(job->disk));
++	job->callback = copy_complete;
++
++	/*
++	 * Queue the write.
++	 */
++	kcopyd_io(job);
++}
++
++int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages,
++		       struct page **pages, int offset, kcopyd_notify_fn fn,
++		       void *context)
++{
++	struct copy_info *info;
++	struct kcopyd_job *job;
++	int i;
++
++	/*
++	 * Allocate a new copy_info.
++	 */
++	info = alloc_copy_info();
++	if (!info)
++		return -ENOMEM;
++
++	job = kcopyd_alloc_job();
++	if (!job) {
++		free_copy_info(info);
++		return -ENOMEM;
++	}
++
++	/*
++	 * set up for the write.
++	 */
++	info->notify = fn;
++	info->notify_context = context;
++	memcpy(&info->to, to, sizeof(*to));
++
++	/* Get the pages */
++	job->nr_pages = nr_pages;
++	for (i = 0; i < nr_pages; i++) {
++		get_page(pages[i]);
++		job->pages[i] = pages[i];
++	}
++
++	job->rw = WRITE;
++
++	memcpy(&job->disk, &info->to, sizeof(job->disk));
++	job->offset = offset;
++	job->callback = page_write_complete;
++	job->context = info;
++
++	/*
++	 * Trigger job.
++	 */
++	kcopyd_io(job);
++	return 0;
++}
++
++int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to,
++		kcopyd_notify_fn fn, void *context)
++{
++	struct copy_info *info;
++	struct kcopyd_job *job;
++
++	/*
++	 * Allocate a new copy_info.
++	 */
++	info = alloc_copy_info();
++	if (!info)
++		return -ENOMEM;
++
++	job = kcopyd_alloc_job();
++	if (!job) {
++		free_copy_info(info);
++		return -ENOMEM;
++	}
++
++	/*
++	 * set up for the read.
++	 */
++	info->notify = fn;
++	info->notify_context = context;
++	memcpy(&info->to, to, sizeof(*to));
++
++	job->rw = READ;
++	memcpy(&job->disk, from, sizeof(*from));
++
++	job->offset = 0;
++	job->callback = copy_write;
++	job->context = info;
++
++	/*
++	 * Trigger job.
++	 */
++	kcopyd_io(job);
++	return 0;
++}
++
++/*-----------------------------------------------------------------
++ * Unit setup
++ *---------------------------------------------------------------*/
++static struct {
++	int (*init) (void);
++	void (*exit) (void);
++
++} _inits[] = {
++#define xx(n) { init_ ## n, exit_ ## n}
++	xx(pages),
++	xx(buffers),
++	xx(jobs),
++	xx(copier)
++#undef xx
++};
++
++static int _client_count = 0;
++static DECLARE_MUTEX(_client_count_sem);
++
++static int kcopyd_init(void)
++{
++	const int count = sizeof(_inits) / sizeof(*_inits);
++
++	int r, i;
++
++	for (i = 0; i < count; i++) {
++		r = _inits[i].init();
++		if (r)
++			goto bad;
++	}
++
++	start_daemon();
++	return 0;
++
++      bad:
++	while (i--)
++		_inits[i].exit();
++
++	return r;
++}
++
++static void kcopyd_exit(void)
++{
++	int i = sizeof(_inits) / sizeof(*_inits);
++
++	if (stop_daemon())
++		DMWARN("Couldn't stop kcopyd.");
++
++	while (i--)
++		_inits[i].exit();
++}
++
++void kcopyd_inc_client_count(void)
++{
++	/*
++	 * What I need here is an atomic_test_and_inc that returns
++	 * the previous value of the atomic...  In its absence I lock
++	 * an int with a semaphore. :-(
++	 */
++	down(&_client_count_sem);
++	if (_client_count == 0)
++		kcopyd_init();
++	_client_count++;
++
++	up(&_client_count_sem);
++}
++
++void kcopyd_dec_client_count(void)
++{
++	down(&_client_count_sem);
++	if (--_client_count == 0)
++		kcopyd_exit();
++
++	up(&_client_count_sem);
++}
+diff -ruN linux-2.4.20/drivers/md/kcopyd.h linux/drivers/md/kcopyd.h
+--- linux-2.4.20/drivers/md/kcopyd.h	Thu Jan  1 01:00:00 1970
++++ linux/drivers/md/kcopyd.h	Wed Mar 26 14:14:34 2003
+@@ -0,0 +1,101 @@
++/*
++ * Copyright (C) 2001 Sistina Software
++ *
++ * This file is released under the GPL.
++ */
++
++#ifndef DM_KCOPYD_H
++#define DM_KCOPYD_H
++
++/*
++ * Needed for the definition of offset_t.
++ */
++#include <linux/device-mapper.h>
++#include <linux/iobuf.h>
++
++struct kcopyd_region {
++	kdev_t dev;
++	sector_t sector;
++	sector_t count;
++};
++
++#define MAX_KCOPYD_PAGES 128
++
++struct kcopyd_job {
++	struct list_head list;
++
++	/*
++	 * Error state of the job.
++	 */
++	int err;
++
++	/*
++	 * Either READ or WRITE
++	 */
++	int rw;
++
++	/*
++	 * The source or destination for the transfer.
++	 */
++	struct kcopyd_region disk;
++
++	int nr_pages;
++	struct page *pages[MAX_KCOPYD_PAGES];
++
++	/*
++	 * Shifts and masks that will be useful when dispatching
++	 * each buffer_head.
++	 */
++	sector_t offset;
++	sector_t block_size;
++	sector_t block_shift;
++	sector_t bpp_shift;	/* blocks per page */
++	sector_t bpp_mask;
++
++	/*
++	 * nr_blocks is how many buffer heads will have to be
++	 * displatched to service this job, nr_requested is how
++	 * many have been dispatched and nr_complete is how many
++	 * have come back.
++	 */
++	unsigned int nr_blocks;
++	atomic_t nr_requested;
++	atomic_t nr_incomplete;
++
++	/*
++	 * Set this to ensure you are notified when the job has
++	 * completed.  'context' is for callback to use.
++	 */
++	void (*callback) (struct kcopyd_job * job);
++	void *context;
++};
++
++/*
++ * Low level async io routines.
++ */
++struct kcopyd_job *kcopyd_alloc_job(void);
++void kcopyd_free_job(struct kcopyd_job *job);
++
++int kcopyd_queue_job(struct kcopyd_job *job);
++
++/*
++ * Submit a copy job to kcopyd.  This is built on top of the
++ * previous three fns.
++ */
++typedef void (*kcopyd_notify_fn) (int err, void *context);
++
++int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to,
++		kcopyd_notify_fn fn, void *context);
++
++int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages,
++		       struct page **pages, int offset, kcopyd_notify_fn fn,
++		       void *context);
++
++/*
++ * We only want kcopyd to reserve resources if someone is
++ * actually using it.
++ */
++void kcopyd_inc_client_count(void);
++void kcopyd_dec_client_count(void);
++
++#endif
+diff -ruN linux-2.4.20/fs/buffer.c linux/fs/buffer.c
+--- linux-2.4.20/fs/buffer.c	Fri Jan 10 16:35:24 2003
++++ linux/fs/buffer.c	Wed Mar 26 12:53:19 2003
+@@ -586,9 +586,10 @@
+ void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode)
+ {
+ 	spin_lock(&lru_list_lock);
+-	if (bh->b_inode)
++	if (buffer_inode(bh))
+ 		list_del(&bh->b_inode_buffers);
+-	bh->b_inode = inode;
++	else
++		set_buffer_inode(bh);
+ 	list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers);
+ 	spin_unlock(&lru_list_lock);
+ }
+@@ -596,9 +597,10 @@
+ void buffer_insert_inode_data_queue(struct buffer_head *bh, struct inode *inode)
+ {
+ 	spin_lock(&lru_list_lock);
+-	if (bh->b_inode)
++	if (buffer_inode(bh))
+ 		list_del(&bh->b_inode_buffers);
+-	bh->b_inode = inode;
++	else
++		set_buffer_inode(bh);
+ 	list_add(&bh->b_inode_buffers, &inode->i_dirty_data_buffers);
+ 	spin_unlock(&lru_list_lock);
+ }
+@@ -607,13 +609,13 @@
+    remove_inode_queue functions.  */
+ static void __remove_inode_queue(struct buffer_head *bh)
+ {
+-	bh->b_inode = NULL;
++	clear_buffer_inode(bh);
+ 	list_del(&bh->b_inode_buffers);
+ }
+ 
+ static inline void remove_inode_queue(struct buffer_head *bh)
+ {
+-	if (bh->b_inode)
++	if (buffer_inode(bh))
+ 		__remove_inode_queue(bh);
+ }
+ 
+@@ -741,6 +743,7 @@
+ 	bh->b_list = BUF_CLEAN;
+ 	bh->b_end_io = handler;
+ 	bh->b_private = private;
++	bh->b_journal_head = NULL;
+ }
+ 
+ static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
+@@ -842,9 +845,9 @@
+ 		bh = BH_ENTRY(list->next);
+ 		list_del(&bh->b_inode_buffers);
+ 		if (!buffer_dirty(bh) && !buffer_locked(bh))
+-			bh->b_inode = NULL;
++			clear_buffer_inode(bh);
+ 		else {
+-			bh->b_inode = &tmp;
++			set_buffer_inode(bh);
+ 			list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers);
+ 			if (buffer_dirty(bh)) {
+ 				get_bh(bh);
+@@ -1138,7 +1141,7 @@
+  */
+ static void __put_unused_buffer_head(struct buffer_head * bh)
+ {
+-	if (bh->b_inode)
++	if (buffer_inode(bh))
+ 		BUG();
+ 	if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
+ 		kmem_cache_free(bh_cachep, bh);
+diff -ruN linux-2.4.20/fs/jbd/journal.c linux/fs/jbd/journal.c
+--- linux-2.4.20/fs/jbd/journal.c	Fri Jan 10 16:35:27 2003
++++ linux/fs/jbd/journal.c	Wed Mar 26 12:53:19 2003
+@@ -1664,8 +1664,8 @@
+  *
+  * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
+  * is set.  This bit is tested in core kernel code where we need to take
+- * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
+- * there.
++ * JBD-specific actions.  Testing the zeroness of ->b_journal_head is not
++ * reliable there.
+  *
+  * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
+  *
+@@ -1720,9 +1720,9 @@
+ 
+ 		if (buffer_jbd(bh)) {
+ 			/* Someone did it for us! */
+-			J_ASSERT_BH(bh, bh->b_private != NULL);
++			J_ASSERT_BH(bh, bh->b_journal_head != NULL);
+ 			journal_free_journal_head(jh);
+-			jh = bh->b_private;
++			jh = bh->b_journal_head;
+ 		} else {
+ 			/*
+ 			 * We actually don't need jh_splice_lock when
+@@ -1730,7 +1730,7 @@
+ 			 */
+ 			spin_lock(&jh_splice_lock);
+ 			set_bit(BH_JBD, &bh->b_state);
+-			bh->b_private = jh;
++			bh->b_journal_head = jh;
+ 			jh->b_bh = bh;
+ 			atomic_inc(&bh->b_count);
+ 			spin_unlock(&jh_splice_lock);
+@@ -1739,7 +1739,7 @@
+ 	}
+ 	jh->b_jcount++;
+ 	spin_unlock(&journal_datalist_lock);
+-	return bh->b_private;
++	return bh->b_journal_head;
+ }
+ 
+ /*
+@@ -1772,7 +1772,7 @@
+ 			J_ASSERT_BH(bh, jh2bh(jh) == bh);
+ 			BUFFER_TRACE(bh, "remove journal_head");
+ 			spin_lock(&jh_splice_lock);
+-			bh->b_private = NULL;
++			bh->b_journal_head = NULL;
+ 			jh->b_bh = NULL;	/* debug, really */
+ 			clear_bit(BH_JBD, &bh->b_state);
+ 			__brelse(bh);
+diff -ruN linux-2.4.20/include/linux/device-mapper.h linux/include/linux/device-mapper.h
+--- linux-2.4.20/include/linux/device-mapper.h	Thu Jan  1 01:00:00 1970
++++ linux/include/linux/device-mapper.h	Wed Mar 26 14:14:55 2003
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DEVICE_MAPPER_H
++#define _LINUX_DEVICE_MAPPER_H
++
++typedef unsigned long sector_t;
++
++struct dm_target;
++struct dm_table;
++struct dm_dev;
++
++typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
++
++/*
++ * In the constructor the target parameter will already have the
++ * table, type, begin and len fields filled in.
++ */
++typedef int (*dm_ctr_fn) (struct dm_target * target, int argc, char **argv);
++
++/*
++ * The destructor doesn't need to free the dm_target, just
++ * anything hidden ti->private.
++ */
++typedef void (*dm_dtr_fn) (struct dm_target * ti);
++
++/*
++ * The map function must return:
++ * < 0: error
++ * = 0: The target will handle the io by resubmitting it later
++ * > 0: simple remap complete
++ */
++typedef int (*dm_map_fn) (struct dm_target * ti, struct buffer_head * bh,
++			  int rw, void **map_context);
++
++/*
++ * Returns:
++ * < 0 : error (currently ignored)
++ * 0   : ended successfully
++ * 1   : for some reason the io has still not completed (eg,
++ *       multipath target might want to requeue a failed io).
++ */
++typedef int (*dm_endio_fn) (struct dm_target * ti,
++			    struct buffer_head * bh, int rw, int error,
++			    void *map_context);
++typedef int (*dm_status_fn) (struct dm_target * ti, status_type_t status_type,
++			     char *result, int maxlen);
++
++void dm_error(const char *message);
++
++/*
++ * Constructors should call these functions to ensure destination devices
++ * are opened/closed correctly.
++ * FIXME: too many arguments.
++ */
++int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
++		  sector_t len, int mode, struct dm_dev **result);
++void dm_put_device(struct dm_target *ti, struct dm_dev *d);
++
++/*
++ * Information about a target type
++ */
++struct target_type {
++	const char *name;
++	struct module *module;
++	dm_ctr_fn ctr;
++	dm_dtr_fn dtr;
++	dm_map_fn map;
++	dm_endio_fn end_io;
++	dm_status_fn status;
++};
++
++struct dm_target {
++	struct dm_table *table;
++	struct target_type *type;
++
++	/* target limits */
++	sector_t begin;
++	sector_t len;
++
++	/* target specific data */
++	void *private;
++
++	/* Used to provide an error string from the ctr */
++	char *error;
++};
++
++int dm_register_target(struct target_type *t);
++int dm_unregister_target(struct target_type *t);
++
++#endif				/* _LINUX_DEVICE_MAPPER_H */
+diff -ruN linux-2.4.20/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h
+--- linux-2.4.20/include/linux/dm-ioctl.h	Thu Jan  1 01:00:00 1970
++++ linux/include/linux/dm-ioctl.h	Wed Mar 26 13:30:30 2003
+@@ -0,0 +1,149 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DM_IOCTL_H
++#define _LINUX_DM_IOCTL_H
++
++#include <linux/types.h>
++
++#define DM_DIR "mapper"		/* Slashes not supported */
++#define DM_MAX_TYPE_NAME 16
++#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
++
++/*
++ * Implements a traditional ioctl interface to the device mapper.
++ */
++
++/*
++ * All ioctl arguments consist of a single chunk of memory, with
++ * this structure at the start.  If a uuid is specified any
++ * lookup (eg. for a DM_INFO) will be done on that, *not* the
++ * name.
++ */
++struct dm_ioctl {
++	/*
++	 * The version number is made up of three parts:
++	 * major - no backward or forward compatibility,
++	 * minor - only backwards compatible,
++	 * patch - both backwards and forwards compatible.
++	 *
++	 * All clients of the ioctl interface should fill in the
++	 * version number of the interface that they were
++	 * compiled with.
++	 *
++	 * All recognised ioctl commands (ie. those that don't
++	 * return -ENOTTY) fill out this field, even if the
++	 * command failed.
++	 */
++	uint32_t version[3];	/* in/out */
++	uint32_t data_size;	/* total size of data passed in
++				 * including this struct */
++
++	uint32_t data_start;	/* offset to start of data
++				 * relative to start of this struct */
++
++	uint32_t target_count;	/* in/out */
++	uint32_t open_count;	/* out */
++	uint32_t flags;		/* in/out */
++
++	__kernel_dev_t dev;	/* in/out */
++
++	char name[DM_NAME_LEN];	/* device name */
++	char uuid[DM_UUID_LEN];	/* unique identifier for
++				 * the block device */
++};
++
++/*
++ * Used to specify tables.  These structures appear after the
++ * dm_ioctl.
++ */
++struct dm_target_spec {
++	int32_t status;		/* used when reading from kernel only */
++	uint64_t sector_start;
++	uint32_t length;
++
++	/*
++	 * Offset in bytes (from the start of this struct) to
++	 * next target_spec.
++	 */
++	uint32_t next;
++
++	char target_type[DM_MAX_TYPE_NAME];
++
++	/*
++	 * Parameter string starts immediately after this object.
++	 * Be careful to add padding after string to ensure correct
++	 * alignment of subsequent dm_target_spec.
++	 */
++};
++
++/*
++ * Used to retrieve the target dependencies.
++ */
++struct dm_target_deps {
++	uint32_t count;
++
++	__kernel_dev_t dev[0];	/* out */
++};
++
++/*
++ * If you change this make sure you make the corresponding change
++ * to dm-ioctl.c:lookup_ioctl()
++ */
++enum {
++	/* Top level cmds */
++	DM_VERSION_CMD = 0,
++	DM_REMOVE_ALL_CMD,
++
++	/* device level cmds */
++	DM_DEV_CREATE_CMD,
++	DM_DEV_REMOVE_CMD,
++	DM_DEV_RELOAD_CMD,
++	DM_DEV_RENAME_CMD,
++	DM_DEV_SUSPEND_CMD,
++	DM_DEV_DEPS_CMD,
++	DM_DEV_STATUS_CMD,
++
++	/* target level cmds */
++	DM_TARGET_STATUS_CMD,
++	DM_TARGET_WAIT_CMD
++};
++
++#define DM_IOCTL 0xfd
++
++#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
++#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
++
++#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
++#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
++#define DM_DEV_RELOAD    _IOWR(DM_IOCTL, DM_DEV_RELOAD_CMD, struct dm_ioctl)
++#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
++#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
++#define DM_DEV_DEPS      _IOWR(DM_IOCTL, DM_DEV_DEPS_CMD, struct dm_ioctl)
++#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
++
++#define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl)
++#define DM_TARGET_WAIT   _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl)
++
++#define DM_VERSION_MAJOR	1
++#define DM_VERSION_MINOR	0
++#define DM_VERSION_PATCHLEVEL	10
++#define DM_VERSION_EXTRA	"-ioctl (2003-03-26)"
++
++/* Status bits */
++#define DM_READONLY_FLAG	0x00000001
++#define DM_SUSPEND_FLAG		0x00000002
++#define DM_EXISTS_FLAG		0x00000004
++#define DM_PERSISTENT_DEV_FLAG	0x00000008
++
++/*
++ * Flag passed into ioctl STATUS command to get table information
++ * rather than current status.
++ */
++#define DM_STATUS_TABLE_FLAG	0x00000010
++
++#endif				/* _LINUX_DM_IOCTL_H */
+diff -ruN linux-2.4.20/include/linux/fs.h linux/include/linux/fs.h
+--- linux-2.4.20/include/linux/fs.h	Fri Jan 10 16:35:55 2003
++++ linux/include/linux/fs.h	Wed Mar 26 12:53:19 2003
+@@ -220,6 +220,7 @@
+ 	BH_Wait_IO,	/* 1 if we should write out this buffer */
+ 	BH_Launder,	/* 1 if we can throttle on this buffer */
+ 	BH_JBD,		/* 1 if it has an attached journal_head */
++	BH_Inode,	/* 1 if it is attached to i_dirty[_data]_buffers */
+ 
+ 	BH_PrivateStart,/* not a state bit, but the first bit available
+ 			 * for private allocation by other entities
+@@ -262,11 +263,10 @@
+ 	struct page *b_page;		/* the page this bh is mapped to */
+ 	void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+  	void *b_private;		/* reserved for b_end_io */
+-
++ 	void *b_journal_head;		/* ext3 journal_heads */
+ 	unsigned long b_rsector;	/* Real buffer location on disk */
+ 	wait_queue_head_t b_wait;
+ 
+-	struct inode *	     b_inode;
+ 	struct list_head     b_inode_buffers;	/* doubly linked list of inode dirty buffers */
+ };
+ 
+@@ -1184,6 +1184,21 @@
+ 		set_bit(BH_Async, &bh->b_state);
+ 	else
+ 		clear_bit(BH_Async, &bh->b_state);
++}
++
++static inline void set_buffer_inode(struct buffer_head *bh)
++{
++	set_bit(BH_Inode, &bh->b_state);
++}
++
++static inline void clear_buffer_inode(struct buffer_head *bh)
++{
++	clear_bit(BH_Inode, &bh->b_state);
++}
++
++static inline int buffer_inode(struct buffer_head *bh)
++{
++	return test_bit(BH_Inode, &bh->b_state);
+ }
+ 
+ /*
+diff -ruN linux-2.4.20/include/linux/jbd.h linux/include/linux/jbd.h
+--- linux-2.4.20/include/linux/jbd.h	Fri Jan 10 16:35:55 2003
++++ linux/include/linux/jbd.h	Wed Mar 26 12:53:19 2003
+@@ -254,7 +254,7 @@
+ 
+ static inline struct journal_head *bh2jh(struct buffer_head *bh)
+ {
+-	return bh->b_private;
++	return bh->b_journal_head;
+ }
+ 
+ #define HAVE_JOURNAL_CALLBACK_STATUS
+diff -ruN linux-2.4.20/include/linux/mempool.h linux/include/linux/mempool.h
+--- linux-2.4.20/include/linux/mempool.h	Thu Jan  1 01:00:00 1970
++++ linux/include/linux/mempool.h	Wed Mar 26 12:53:48 2003
+@@ -0,0 +1,31 @@
++/*
++ * memory buffer pool support
++ */
++#ifndef _LINUX_MEMPOOL_H
++#define _LINUX_MEMPOOL_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct mempool_s;
++typedef struct mempool_s mempool_t;
++
++typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data);
++typedef void (mempool_free_t)(void *element, void *pool_data);
++
++extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
++				 mempool_free_t *free_fn, void *pool_data);
++extern int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask);
++extern void mempool_destroy(mempool_t *pool);
++extern void * mempool_alloc(mempool_t *pool, int gfp_mask);
++extern void mempool_free(void *element, mempool_t *pool);
++
++/*
++ * A mempool_alloc_t and mempool_free_t that get the memory from
++ * a slab that is passed in through pool_data.
++ */
++void *mempool_alloc_slab(int gfp_mask, void *pool_data);
++void mempool_free_slab(void *element, void *pool_data);
++
++
++#endif /* _LINUX_MEMPOOL_H */
+diff -ruN linux-2.4.20/include/linux/vmalloc.h linux/include/linux/vmalloc.h
+--- linux-2.4.20/include/linux/vmalloc.h	Fri Jan 10 16:35:58 2003
++++ linux/include/linux/vmalloc.h	Wed Mar 26 12:53:19 2003
+@@ -26,6 +26,7 @@
+ extern void vmfree_area_pages(unsigned long address, unsigned long size);
+ extern int vmalloc_area_pages(unsigned long address, unsigned long size,
+                               int gfp_mask, pgprot_t prot);
++extern void *vcalloc(unsigned long nmemb, unsigned long elem_size);
+ 
+ /*
+  *	Allocate any pages
+diff -ruN linux-2.4.20/kernel/ksyms.c linux/kernel/ksyms.c
+--- linux-2.4.20/kernel/ksyms.c	Fri Jan 10 16:36:02 2003
++++ linux/kernel/ksyms.c	Wed Mar 26 12:53:19 2003
+@@ -111,6 +111,7 @@
+ EXPORT_SYMBOL(vfree);
+ EXPORT_SYMBOL(__vmalloc);
+ EXPORT_SYMBOL(vmalloc_to_page);
++EXPORT_SYMBOL(vcalloc);
+ EXPORT_SYMBOL(mem_map);
+ EXPORT_SYMBOL(remap_page_range);
+ EXPORT_SYMBOL(max_mapnr);
+diff -ruN linux-2.4.20/mm/Makefile linux/mm/Makefile
+--- linux-2.4.20/mm/Makefile	Fri Jan 10 16:36:02 2003
++++ linux/mm/Makefile	Wed Mar 26 12:53:19 2003
+@@ -9,12 +9,12 @@
+ 
+ O_TARGET := mm.o
+ 
+-export-objs := shmem.o filemap.o memory.o page_alloc.o
++export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
+ 
+ obj-y	 := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
+ 	    vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
+ 	    page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
+-	    shmem.o
++	    shmem.o mempool.o
+ 
+ obj-$(CONFIG_HIGHMEM) += highmem.o
+ 
+diff -ruN linux-2.4.20/mm/mempool.c linux/mm/mempool.c
+--- linux-2.4.20/mm/mempool.c	Thu Jan  1 01:00:00 1970
++++ linux/mm/mempool.c	Wed Mar 26 12:53:48 2003
+@@ -0,0 +1,299 @@
++/*
++ *  linux/mm/mempool.c
++ *
++ *  memory buffer pool support. Such pools are mostly used
++ *  for guaranteed, deadlock-free memory allocations during
++ *  extreme VM load.
++ *
++ *  started by Ingo Molnar, Copyright (C) 2001
++ */
++
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <linux/module.h>
++#include <linux/mempool.h>
++
++struct mempool_s {
++	spinlock_t lock;
++	int min_nr;		/* nr of elements at *elements */
++	int curr_nr;		/* Current nr of elements at *elements */
++	void **elements;
++
++	void *pool_data;
++	mempool_alloc_t *alloc;
++	mempool_free_t *free;
++	wait_queue_head_t wait;
++};
++
++static void add_element(mempool_t *pool, void *element)
++{
++	BUG_ON(pool->curr_nr >= pool->min_nr);
++	pool->elements[pool->curr_nr++] = element;
++}
++
++static void *remove_element(mempool_t *pool)
++{
++	BUG_ON(pool->curr_nr <= 0);
++	return pool->elements[--pool->curr_nr];
++}
++
++static void free_pool(mempool_t *pool)
++{
++	while (pool->curr_nr) {
++		void *element = remove_element(pool);
++		pool->free(element, pool->pool_data);
++	}
++	kfree(pool->elements);
++	kfree(pool);
++}
++
++/**
++ * mempool_create - create a memory pool
++ * @min_nr:    the minimum number of elements guaranteed to be
++ *             allocated for this pool.
++ * @alloc_fn:  user-defined element-allocation function.
++ * @free_fn:   user-defined element-freeing function.
++ * @pool_data: optional private data available to the user-defined functions.
++ *
++ * this function creates and allocates a guaranteed size, preallocated
++ * memory pool. The pool can be used from the mempool_alloc and mempool_free
++ * functions. This function might sleep. Both the alloc_fn() and the free_fn()
++ * functions might sleep - as long as the mempool_alloc function is not called
++ * from IRQ contexts.
++ */
++mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
++				mempool_free_t *free_fn, void *pool_data)
++{
++	mempool_t *pool;
++
++	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
++	if (!pool)
++		return NULL;
++	memset(pool, 0, sizeof(*pool));
++	pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL);
++	if (!pool->elements) {
++		kfree(pool);
++		return NULL;
++	}
++	spin_lock_init(&pool->lock);
++	pool->min_nr = min_nr;
++	pool->pool_data = pool_data;
++	init_waitqueue_head(&pool->wait);
++	pool->alloc = alloc_fn;
++	pool->free = free_fn;
++
++	/*
++	 * First pre-allocate the guaranteed number of buffers.
++	 */
++	while (pool->curr_nr < pool->min_nr) {
++		void *element;
++
++		element = pool->alloc(GFP_KERNEL, pool->pool_data);
++		if (unlikely(!element)) {
++			free_pool(pool);
++			return NULL;
++		}
++		add_element(pool, element);
++	}
++	return pool;
++}
++
++/**
++ * mempool_resize - resize an existing memory pool
++ * @pool:       pointer to the memory pool which was allocated via
++ *              mempool_create().
++ * @new_min_nr: the new minimum number of elements guaranteed to be
++ *              allocated for this pool.
++ * @gfp_mask:   the usual allocation bitmask.
++ *
++ * This function shrinks/grows the pool. In the case of growing,
++ * it cannot be guaranteed that the pool will be grown to the new
++ * size immediately, but new mempool_free() calls will refill it.
++ *
++ * Note, the caller must guarantee that no mempool_destroy is called
++ * while this function is running. mempool_alloc() & mempool_free()
++ * might be called (eg. from IRQ contexts) while this function executes.
++ */
++int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask)
++{
++	void *element;
++	void **new_elements;
++	unsigned long flags;
++
++	BUG_ON(new_min_nr <= 0);
++
++	spin_lock_irqsave(&pool->lock, flags);
++	if (new_min_nr < pool->min_nr) {
++		while (pool->curr_nr > new_min_nr) {
++			element = remove_element(pool);
++			spin_unlock_irqrestore(&pool->lock, flags);
++			pool->free(element, pool->pool_data);
++			spin_lock_irqsave(&pool->lock, flags);
++		}
++		pool->min_nr = new_min_nr;
++		goto out_unlock;
++	}
++	spin_unlock_irqrestore(&pool->lock, flags);
++
++	/* Grow the pool */
++	new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask);
++	if (!new_elements)
++		return -ENOMEM;
++
++	spin_lock_irqsave(&pool->lock, flags);
++	memcpy(new_elements, pool->elements,
++			pool->curr_nr * sizeof(*new_elements));
++	kfree(pool->elements);
++	pool->elements = new_elements;
++	pool->min_nr = new_min_nr;
++
++	while (pool->curr_nr < pool->min_nr) {
++		spin_unlock_irqrestore(&pool->lock, flags);
++		element = pool->alloc(gfp_mask, pool->pool_data);
++		if (!element)
++			goto out;
++		spin_lock_irqsave(&pool->lock, flags);
++		if (pool->curr_nr < pool->min_nr)
++			add_element(pool, element);
++		else
++			kfree(element);		/* Raced */
++	}
++out_unlock:
++	spin_unlock_irqrestore(&pool->lock, flags);
++out:
++	return 0;
++}
++
++/**
++ * mempool_destroy - deallocate a memory pool
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ *
++ * this function only sleeps if the free_fn() function sleeps. The caller
++ * has to guarantee that all elements have been returned to the pool (ie:
++ * freed) prior to calling mempool_destroy().
++ */
++void mempool_destroy(mempool_t *pool)
++{
++	if (pool->curr_nr != pool->min_nr)
++		BUG();		/* There were outstanding elements */
++	free_pool(pool);
++}
++
++/**
++ * mempool_alloc - allocate an element from a specific memory pool
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ * @gfp_mask:  the usual allocation bitmask.
++ *
++ * this function only sleeps if the alloc_fn function sleeps or
++ * returns NULL. Note that due to preallocation, this function
++ * *never* fails when called from process contexts. (it might
++ * fail if called from an IRQ context.)
++ */
++void * mempool_alloc(mempool_t *pool, int gfp_mask)
++{
++	void *element;
++	unsigned long flags;
++	int curr_nr;
++	DECLARE_WAITQUEUE(wait, current);
++	int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
++
++repeat_alloc:
++	element = pool->alloc(gfp_nowait, pool->pool_data);
++	if (likely(element != NULL))
++		return element;
++
++	/*
++	 * If the pool is less than 50% full then try harder
++	 * to allocate an element:
++	 */
++	if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) {
++		element = pool->alloc(gfp_mask, pool->pool_data);
++		if (likely(element != NULL))
++			return element;
++	}
++
++	/*
++	 * Kick the VM at this point.
++	 */
++	wakeup_bdflush();
++
++	spin_lock_irqsave(&pool->lock, flags);
++	if (likely(pool->curr_nr)) {
++		element = remove_element(pool);
++		spin_unlock_irqrestore(&pool->lock, flags);
++		return element;
++	}
++	spin_unlock_irqrestore(&pool->lock, flags);
++
++	/* We must not sleep in the GFP_ATOMIC case */
++	if (gfp_mask == gfp_nowait)
++		return NULL;
++
++	run_task_queue(&tq_disk);
++
++	add_wait_queue_exclusive(&pool->wait, &wait);
++	set_task_state(current, TASK_UNINTERRUPTIBLE);
++
++	spin_lock_irqsave(&pool->lock, flags);
++	curr_nr = pool->curr_nr;
++	spin_unlock_irqrestore(&pool->lock, flags);
++
++	if (!curr_nr)
++		schedule();
++
++	current->state = TASK_RUNNING;
++	remove_wait_queue(&pool->wait, &wait);
++
++	goto repeat_alloc;
++}
++
++/**
++ * mempool_free - return an element to the pool.
++ * @element:   pool element pointer.
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ *
++ * this function only sleeps if the free_fn() function sleeps.
++ */
++void mempool_free(void *element, mempool_t *pool)
++{
++	unsigned long flags;
++
++	if (pool->curr_nr < pool->min_nr) {
++		spin_lock_irqsave(&pool->lock, flags);
++		if (pool->curr_nr < pool->min_nr) {
++			add_element(pool, element);
++			spin_unlock_irqrestore(&pool->lock, flags);
++			wake_up(&pool->wait);
++			return;
++		}
++		spin_unlock_irqrestore(&pool->lock, flags);
++	}
++	pool->free(element, pool->pool_data);
++}
++
++/*
++ * A commonly used alloc and free fn.
++ */
++void *mempool_alloc_slab(int gfp_mask, void *pool_data)
++{
++	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
++	return kmem_cache_alloc(mem, gfp_mask);
++}
++
++void mempool_free_slab(void *element, void *pool_data)
++{
++	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
++	kmem_cache_free(mem, element);
++}
++
++
++EXPORT_SYMBOL(mempool_create);
++EXPORT_SYMBOL(mempool_resize);
++EXPORT_SYMBOL(mempool_destroy);
++EXPORT_SYMBOL(mempool_alloc);
++EXPORT_SYMBOL(mempool_free);
++EXPORT_SYMBOL(mempool_alloc_slab);
++EXPORT_SYMBOL(mempool_free_slab);
+diff -ruN linux-2.4.20/mm/vmalloc.c linux/mm/vmalloc.c
+--- linux-2.4.20/mm/vmalloc.c	Fri Jan 10 16:36:03 2003
++++ linux/mm/vmalloc.c	Wed Mar 26 12:53:19 2003
+@@ -327,3 +327,22 @@
+ 	read_unlock(&vmlist_lock);
+ 	return buf - buf_start;
+ }
++
++void *vcalloc(unsigned long nmemb, unsigned long elem_size)
++{
++	unsigned long size;
++	void *addr;
++
++	/*
++	 * Check that we're not going to overflow.
++	 */
++	if (nmemb > (ULONG_MAX / elem_size))
++		return NULL;
++
++	size = nmemb * elem_size;
++	addr = vmalloc(size);
++	if (addr)
++		memset(addr, 0, size);
++
++	return addr;
++}
-- 
2.43.5