From: Alasdair Kergon Date: Mon, 1 Sep 2003 16:16:01 +0000 (+0000) Subject: Remove old patches. X-Git-Tag: v1_00_06~11 X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=ca5e9e3c19ec6ccdbfc7de67843dbb1c91ee5d97;p=dm.git Remove old patches. --- diff --git a/patches/common/linux-2.4.19-arch64.patch b/patches/common/linux-2.4.19-arch64.patch deleted file mode 100644 index 242a638..0000000 --- a/patches/common/linux-2.4.19-arch64.patch +++ /dev/null @@ -1,140 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# This patch format is intended for GNU patch command version 2.5 or higher. -# This patch includes the following deltas: -# ChangeSet 1.676 -> 1.677 -# arch/sparc64/kernel/ioctl32.c 1.26 -> 1.27 -# arch/s390x/kernel/ioctl32.c 1.5 -> 1.6 -# arch/mips64/kernel/ioctl32.c 1.4 -> 1.5 -# arch/ppc64/kernel/ioctl32.c 1.2 -> 1.3 -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/08/21 thornber@sistina.com 1.677 -# [device-mapper] Add dm ioctls to the ioctl32.c files in various 64bit -# architectures. -# -------------------------------------------- -# -diff -Nru a/arch/mips64/kernel/ioctl32.c b/arch/mips64/kernel/ioctl32.c ---- a/arch/mips64/kernel/ioctl32.c Tue Sep 24 14:26:44 2002 -+++ b/arch/mips64/kernel/ioctl32.c Tue Sep 24 14:26:44 2002 -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - #include - #undef __KERNEL__ /* This file was born to be ugly ... */ - #include -@@ -816,6 +817,20 @@ - IOCTL32_DEFAULT(STOP_ARRAY_RO), - IOCTL32_DEFAULT(RESTART_ARRAY_RW), - #endif /* CONFIG_MD */ -+ -+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE) -+ IOCTL32_DEFAULT(DM_VERSION), -+ IOCTL32_DEFAULT(DM_REMOVE_ALL), -+ IOCTL32_DEFAULT(DM_DEV_CREATE), -+ IOCTL32_DEFAULT(DM_DEV_REMOVE), -+ IOCTL32_DEFAULT(DM_DEV_RELOAD), -+ IOCTL32_DEFAULT(DM_DEV_SUSPEND), -+ IOCTL32_DEFAULT(DM_DEV_RENAME), -+ IOCTL32_DEFAULT(DM_DEV_DEPS), -+ IOCTL32_DEFAULT(DM_DEV_STATUS), -+ IOCTL32_DEFAULT(DM_TARGET_STATUS), -+ IOCTL32_DEFAULT(DM_TARGET_WAIT), -+#endif /* CONFIG_BLK_DEV_DM */ - - IOCTL32_DEFAULT(MTIOCTOP), /* mtio.h ioctls */ - IOCTL32_HANDLER(MTIOCGET32, mt_ioctl_trans), -diff -Nru a/arch/ppc64/kernel/ioctl32.c b/arch/ppc64/kernel/ioctl32.c ---- a/arch/ppc64/kernel/ioctl32.c Tue Sep 24 14:26:44 2002 -+++ b/arch/ppc64/kernel/ioctl32.c Tue Sep 24 14:26:44 2002 -@@ -65,6 +65,7 @@ - #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) - #include - #endif /* LVM */ -+#include - - #include - /* Ugly hack. */ -@@ -4187,6 +4188,18 @@ - COMPATIBLE_IOCTL(NBD_PRINT_DEBUG), - COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS), - COMPATIBLE_IOCTL(NBD_DISCONNECT), -+/* device-mapper */ -+COMPATIBLE_IOCTL(DM_VERSION), -+COMPATIBLE_IOCTL(DM_REMOVE_ALL), -+COMPATIBLE_IOCTL(DM_DEV_CREATE), -+COMPATIBLE_IOCTL(DM_DEV_REMOVE), -+COMPATIBLE_IOCTL(DM_DEV_RELOAD), -+COMPATIBLE_IOCTL(DM_DEV_SUSPEND), -+COMPATIBLE_IOCTL(DM_DEV_RENAME), -+COMPATIBLE_IOCTL(DM_DEV_DEPS), -+COMPATIBLE_IOCTL(DM_DEV_STATUS), -+COMPATIBLE_IOCTL(DM_TARGET_STATUS), -+COMPATIBLE_IOCTL(DM_TARGET_WAIT), - /* Remove *PRIVATE in 2.5 */ - COMPATIBLE_IOCTL(SIOCDEVPRIVATE), - COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1), -diff -Nru a/arch/s390x/kernel/ioctl32.c b/arch/s390x/kernel/ioctl32.c ---- a/arch/s390x/kernel/ioctl32.c Tue Sep 24 14:26:44 2002 -+++ b/arch/s390x/kernel/ioctl32.c Tue Sep 24 14:26:44 2002 -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -507,6 +508,18 @@ - IOCTL32_DEFAULT(VT_UNLOCKSWITCH), - - IOCTL32_DEFAULT(SIOCGSTAMP), -+ -+ IOCTL32_DEFAULT(DM_VERSION), -+ IOCTL32_DEFAULT(DM_REMOVE_ALL), -+ IOCTL32_DEFAULT(DM_DEV_CREATE), -+ IOCTL32_DEFAULT(DM_DEV_REMOVE), -+ IOCTL32_DEFAULT(DM_DEV_RELOAD), -+ IOCTL32_DEFAULT(DM_DEV_SUSPEND), -+ IOCTL32_DEFAULT(DM_DEV_RENAME), -+ IOCTL32_DEFAULT(DM_DEV_DEPS), -+ IOCTL32_DEFAULT(DM_DEV_STATUS), -+ IOCTL32_DEFAULT(DM_TARGET_STATUS), -+ IOCTL32_DEFAULT(DM_TARGET_WAIT), - - IOCTL32_HANDLER(SIOCGIFNAME, dev_ifname32), - IOCTL32_HANDLER(SIOCGIFCONF, dev_ifconf), -diff -Nru a/arch/sparc64/kernel/ioctl32.c b/arch/sparc64/kernel/ioctl32.c ---- a/arch/sparc64/kernel/ioctl32.c Tue Sep 24 14:26:44 2002 -+++ b/arch/sparc64/kernel/ioctl32.c Tue Sep 24 14:26:44 2002 -@@ -54,6 +54,7 @@ - #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) - #include - #endif /* LVM */ -+#include - - #include - /* Ugly hack. */ -@@ -4608,6 +4609,19 @@ - COMPATIBLE_IOCTL(NBD_PRINT_DEBUG) - COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS) - COMPATIBLE_IOCTL(NBD_DISCONNECT) -+/* device-mapper */ -+COMPATIBLE_IOCTL(DM_VERSION) -+COMPATIBLE_IOCTL(DM_REMOVE_ALL) -+COMPATIBLE_IOCTL(DM_DEV_CREATE) -+COMPATIBLE_IOCTL(DM_DEV_REMOVE) -+COMPATIBLE_IOCTL(DM_DEV_RELOAD) -+COMPATIBLE_IOCTL(DM_DEV_SUSPEND) -+COMPATIBLE_IOCTL(DM_DEV_RENAME) -+COMPATIBLE_IOCTL(DM_DEV_DEPS) -+COMPATIBLE_IOCTL(DM_DEV_STATUS) -+COMPATIBLE_IOCTL(DM_TARGET_STATUS) -+COMPATIBLE_IOCTL(DM_TARGET_WAIT) -+ - /* And these ioctls need translation */ - HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) - HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) diff --git a/patches/common/linux-2.4.19-b_private.patch b/patches/common/linux-2.4.19-b_private.patch deleted file mode 100644 index add8c40..0000000 --- a/patches/common/linux-2.4.19-b_private.patch +++ /dev/null @@ -1,205 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# This patch format is intended for GNU patch command version 2.5 or higher. -# This patch includes the following deltas: -# ChangeSet 1.582.2.41 -> 1.582.2.42 -# fs/buffer.c 1.67 -> 1.68 -# fs/jbd/journal.c 1.4 -> 1.5 -# include/linux/fs.h 1.64 -> 1.65 -# include/linux/jbd.h 1.5 -> 1.6 -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/07/17 thornber@sistina.com 1.582.2.42 -# Change bh->b_inode to a flag in bh->b_state. -# -# Use a seperate bh->b_journal_head field instead of using bh->b_private. -# -# [Andrew Morton] -# -------------------------------------------- -# -diff -Nru a/fs/buffer.c b/fs/buffer.c ---- a/fs/buffer.c Wed Aug 14 15:59:57 2002 -+++ b/fs/buffer.c Wed Aug 14 15:59:57 2002 -@@ -587,9 +587,10 @@ - void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode) - { - spin_lock(&lru_list_lock); -- if (bh->b_inode) -+ if (buffer_inode(bh)) - list_del(&bh->b_inode_buffers); -- bh->b_inode = inode; -+ else -+ set_buffer_inode(bh); - list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers); - spin_unlock(&lru_list_lock); - } -@@ -597,9 +598,10 @@ - void buffer_insert_inode_data_queue(struct buffer_head *bh, struct inode *inode) - { - spin_lock(&lru_list_lock); -- if (bh->b_inode) -+ if (buffer_inode(bh)) - list_del(&bh->b_inode_buffers); -- bh->b_inode = inode; -+ else -+ set_buffer_inode(bh); - list_add(&bh->b_inode_buffers, &inode->i_dirty_data_buffers); - spin_unlock(&lru_list_lock); - } -@@ -608,13 +610,13 @@ - remove_inode_queue functions. */ - static void __remove_inode_queue(struct buffer_head *bh) - { -- bh->b_inode = NULL; -+ clear_buffer_inode(bh); - list_del(&bh->b_inode_buffers); - } - - static inline void remove_inode_queue(struct buffer_head *bh) - { -- if (bh->b_inode) -+ if (buffer_inode(bh)) - __remove_inode_queue(bh); - } - -@@ -746,6 +748,7 @@ - bh->b_list = BUF_CLEAN; - bh->b_end_io = handler; - bh->b_private = private; -+ bh->b_journal_head = NULL; - } - - static void end_buffer_io_async(struct buffer_head * bh, int uptodate) -@@ -843,9 +846,9 @@ - bh = BH_ENTRY(list->next); - list_del(&bh->b_inode_buffers); - if (!buffer_dirty(bh) && !buffer_locked(bh)) -- bh->b_inode = NULL; -+ clear_buffer_inode(bh); - else { -- bh->b_inode = &tmp; -+ set_buffer_inode(bh); - list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers); - if (buffer_dirty(bh)) { - get_bh(bh); -@@ -1138,7 +1141,7 @@ - */ - static void __put_unused_buffer_head(struct buffer_head * bh) - { -- if (bh->b_inode) -+ if (buffer_inode(bh)) - BUG(); - if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) { - kmem_cache_free(bh_cachep, bh); -diff -Nru a/fs/jbd/journal.c b/fs/jbd/journal.c ---- a/fs/jbd/journal.c Wed Aug 14 15:59:57 2002 -+++ b/fs/jbd/journal.c Wed Aug 14 15:59:57 2002 -@@ -1625,8 +1625,8 @@ - * - * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit - * is set. This bit is tested in core kernel code where we need to take -- * JBD-specific actions. Testing the zeroness of ->b_private is not reliable -- * there. -+ * JBD-specific actions. Testing the zeroness of ->b_journal_head is not -+ * reliable there. - * - * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. - * -@@ -1681,9 +1681,9 @@ - - if (buffer_jbd(bh)) { - /* Someone did it for us! */ -- J_ASSERT_BH(bh, bh->b_private != NULL); -+ J_ASSERT_BH(bh, bh->b_journal_head != NULL); - journal_free_journal_head(jh); -- jh = bh->b_private; -+ jh = bh->b_journal_head; - } else { - /* - * We actually don't need jh_splice_lock when -@@ -1691,7 +1691,7 @@ - */ - spin_lock(&jh_splice_lock); - set_bit(BH_JBD, &bh->b_state); -- bh->b_private = jh; -+ bh->b_journal_head = jh; - jh->b_bh = bh; - atomic_inc(&bh->b_count); - spin_unlock(&jh_splice_lock); -@@ -1700,7 +1700,7 @@ - } - jh->b_jcount++; - spin_unlock(&journal_datalist_lock); -- return bh->b_private; -+ return bh->b_journal_head; - } - - /* -@@ -1733,7 +1733,7 @@ - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - spin_lock(&jh_splice_lock); -- bh->b_private = NULL; -+ bh->b_journal_head = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_bit(BH_JBD, &bh->b_state); - __brelse(bh); -diff -Nru a/include/linux/fs.h b/include/linux/fs.h ---- a/include/linux/fs.h Wed Aug 14 15:59:57 2002 -+++ b/include/linux/fs.h Wed Aug 14 15:59:57 2002 -@@ -219,6 +219,7 @@ - BH_Wait_IO, /* 1 if we should write out this buffer */ - BH_Launder, /* 1 if we can throttle on this buffer */ - BH_JBD, /* 1 if it has an attached journal_head */ -+ BH_Inode, /* 1 if it is attached to i_dirty[_data]_buffers */ - - BH_PrivateStart,/* not a state bit, but the first bit available - * for private allocation by other entities -@@ -261,11 +262,10 @@ - struct page *b_page; /* the page this bh is mapped to */ - void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ - void *b_private; /* reserved for b_end_io */ -- -+ void *b_journal_head; /* ext3 journal_heads */ - unsigned long b_rsector; /* Real buffer location on disk */ - wait_queue_head_t b_wait; - -- struct inode * b_inode; - struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ - }; - -@@ -1179,6 +1179,21 @@ - set_bit(BH_Async, &bh->b_state); - else - clear_bit(BH_Async, &bh->b_state); -+} -+ -+static inline void set_buffer_inode(struct buffer_head *bh) -+{ -+ set_bit(BH_Inode, &bh->b_state); -+} -+ -+static inline void clear_buffer_inode(struct buffer_head *bh) -+{ -+ clear_bit(BH_Inode, &bh->b_state); -+} -+ -+static inline int buffer_inode(struct buffer_head *bh) -+{ -+ return test_bit(BH_Inode, &bh->b_state); - } - - /* -diff -Nru a/include/linux/jbd.h b/include/linux/jbd.h ---- a/include/linux/jbd.h Wed Aug 14 15:59:57 2002 -+++ b/include/linux/jbd.h Wed Aug 14 15:59:57 2002 -@@ -246,7 +246,7 @@ - - static inline struct journal_head *bh2jh(struct buffer_head *bh) - { -- return bh->b_private; -+ return bh->b_journal_head; - } - - struct jbd_revoke_table_s; diff --git a/patches/common/linux-2.4.19-config.patch b/patches/common/linux-2.4.19-config.patch deleted file mode 100644 index cf282cd..0000000 --- a/patches/common/linux-2.4.19-config.patch +++ /dev/null @@ -1,53 +0,0 @@ -diff -ruN linux-2.4.19/MAINTAINERS linux/MAINTAINERS ---- linux-2.4.19/MAINTAINERS Wed Aug 14 11:49:45 2002 -+++ linux/MAINTAINERS Tue Jul 23 16:55:55 2002 -@@ -426,6 +426,13 @@ - W: http://www.debian.org/~dz/i8k/ - S: Maintained - -+DEVICE MAPPER -+P: Joe Thornber -+M: dm@uk.sistina.com -+L: linux-LVM@sistina.com -+W: http://www.sistina.com/lvm -+S: Maintained -+ - DEVICE NUMBER REGISTRY - P: H. Peter Anvin - M: hpa@zytor.com -diff -ruN linux-2.4.19/drivers/md/Config.in linux/drivers/md/Config.in ---- linux-2.4.19/drivers/md/Config.in Wed Aug 14 11:51:06 2002 -+++ linux/drivers/md/Config.in Wed Jul 10 13:12:08 2002 -@@ -14,5 +14,8 @@ - dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD - - dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD -+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -+ dep_tristate ' Device-mapper support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM $CONFIG_MD -+fi - - endmenu -diff -ruN a/Documentation/Configure.help b/Documentation/Configure.help ---- linux-2.4.19/Documentation/Configure.help Tue Jun 25 14:14:05 2002 -+++ linux/Documentation/Configure.help Tue Jun 25 19:18:26 2002 -@@ -1775,6 +1775,20 @@ - want), say M here and read . The - module will be called lvm-mod.o. - -+Device-mapper support -+CONFIG_BLK_DEV_DM -+ Device-mapper is a low level volume manager. It works by allowing -+ people to specify mappings for ranges of logical sectors. Various -+ mapping types are available, in addition people may write their own -+ modules containing custom mappings if they wish. -+ -+ Higher level volume managers such as LVM2 use this driver. -+ -+ If you want to compile this as a module, say M here and read -+ . The module will be called dm-mod.o. -+ -+ If unsure, say N. -+ - Multiple devices driver support (RAID and LVM) - CONFIG_MD - Support multiple physical spindles through a single logical device. diff --git a/patches/common/linux-2.4.19-devmapper_only.patch b/patches/common/linux-2.4.19-devmapper_only.patch deleted file mode 100644 index 95a26f0..0000000 --- a/patches/common/linux-2.4.19-devmapper_only.patch +++ /dev/null @@ -1,6669 +0,0 @@ -diff -ruN linux-2.4.19/drivers/md/Makefile linux-2.4.19-dm/drivers/md/Makefile ---- linux-2.4.19/drivers/md/Makefile Wed Aug 14 11:51:06 2002 -+++ linux-2.4.19-dm/drivers/md/Makefile Thu Nov 14 13:50:32 2002 -@@ -4,9 +4,12 @@ - - O_TARGET := mddev.o - --export-objs := md.o xor.o -+export-objs := md.o xor.o dm-table.o dm-target.o kcopyd.o - list-multi := lvm-mod.o - lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o -+dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \ -+ dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \ -+ kcopyd.o - - # Note: link order is important. All raid personalities - # and xor.o must come before md.o, as they each initialise -@@ -20,8 +23,12 @@ - obj-$(CONFIG_MD_MULTIPATH) += multipath.o - obj-$(CONFIG_BLK_DEV_MD) += md.o - obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o -+obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o - - include $(TOPDIR)/Rules.make - - lvm-mod.o: $(lvm-mod-objs) - $(LD) -r -o $@ $(lvm-mod-objs) -+ -+dm-mod.o: $(dm-mod-objs) -+ $(LD) -r -o $@ $(dm-mod-objs) -diff -ruN linux-2.4.19/drivers/md/dm-exception-store.c linux-2.4.19-dm/drivers/md/dm-exception-store.c ---- linux-2.4.19/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-exception-store.c Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,701 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm-snapshot.h" -+#include "kcopyd.h" -+ -+#include -+#include -+#include -+#include -+ -+#define SECTOR_SIZE 512 -+#define SECTOR_SHIFT 9 -+ -+/*----------------------------------------------------------------- -+ * Persistent snapshots, by persistent we mean that the snapshot -+ * will survive a reboot. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * We need to store a record of which parts of the origin have -+ * been copied to the snapshot device. The snapshot code -+ * requires that we copy exception chunks to chunk aligned areas -+ * of the COW store. It makes sense therefore, to store the -+ * metadata in chunk size blocks. -+ * -+ * There is no backward or forward compatibility implemented, -+ * snapshots with different disk versions than the kernel will -+ * not be usable. It is expected that "lvcreate" will blank out -+ * the start of a fresh COW device before calling the snapshot -+ * constructor. -+ * -+ * The first chunk of the COW device just contains the header. -+ * After this there is a chunk filled with exception metadata, -+ * followed by as many exception chunks as can fit in the -+ * metadata areas. -+ * -+ * All on disk structures are in little-endian format. The end -+ * of the exceptions info is indicated by an exception with a -+ * new_chunk of 0, which is invalid since it would point to the -+ * header chunk. -+ */ -+ -+/* -+ * Magic for persistent snapshots: "SnAp" - Feeble isn't it. -+ */ -+#define SNAP_MAGIC 0x70416e53 -+ -+/* -+ * The on-disk version of the metadata. -+ */ -+#define SNAPSHOT_DISK_VERSION 1 -+ -+struct disk_header { -+ uint32_t magic; -+ -+ /* -+ * Is this snapshot valid. There is no way of recovering -+ * an invalid snapshot. -+ */ -+ int valid; -+ -+ /* -+ * Simple, incrementing version. no backward -+ * compatibility. -+ */ -+ uint32_t version; -+ -+ /* In sectors */ -+ uint32_t chunk_size; -+}; -+ -+struct disk_exception { -+ uint64_t old_chunk; -+ uint64_t new_chunk; -+}; -+ -+struct commit_callback { -+ void (*callback) (void *, int success); -+ void *context; -+}; -+ -+/* -+ * The top level structure for a persistent exception store. -+ */ -+struct pstore { -+ struct dm_snapshot *snap; /* up pointer to my snapshot */ -+ int version; -+ int valid; -+ uint32_t chunk_size; -+ uint32_t exceptions_per_area; -+ -+ /* -+ * Now that we have an asynchronous kcopyd there is no -+ * need for large chunk sizes, so it wont hurt to have a -+ * whole chunks worth of metadata in memory at once. -+ */ -+ void *area; -+ struct kiobuf *iobuf; -+ -+ /* -+ * Used to keep track of which metadata area the data in -+ * 'chunk' refers to. -+ */ -+ uint32_t current_area; -+ -+ /* -+ * The next free chunk for an exception. -+ */ -+ uint32_t next_free; -+ -+ /* -+ * The index of next free exception in the current -+ * metadata area. -+ */ -+ uint32_t current_committed; -+ -+ atomic_t pending_count; -+ uint32_t callback_count; -+ struct commit_callback *callbacks; -+}; -+ -+/* -+ * For performance reasons we want to defer writing a committed -+ * exceptions metadata to disk so that we can amortise away this -+ * exensive operation. -+ * -+ * For the initial version of this code we will remain with -+ * synchronous io. There are some deadlock issues with async -+ * that I haven't yet worked out. -+ */ -+static int do_io(int rw, struct kcopyd_region *where, struct kiobuf *iobuf) -+{ -+ int i, sectors_per_block, nr_blocks, start; -+ int blocksize = get_hardsect_size(where->dev); -+ int status; -+ -+ sectors_per_block = blocksize / SECTOR_SIZE; -+ -+ nr_blocks = where->count / sectors_per_block; -+ start = where->sector / sectors_per_block; -+ -+ for (i = 0; i < nr_blocks; i++) -+ iobuf->blocks[i] = start++; -+ -+ iobuf->length = where->count << 9; -+ iobuf->locked = 1; -+ -+ status = brw_kiovec(rw, 1, &iobuf, where->dev, iobuf->blocks, -+ blocksize); -+ if (status != (where->count << 9)) -+ return -EIO; -+ -+ return 0; -+} -+ -+static int allocate_iobuf(struct pstore *ps) -+{ -+ size_t i, r = -ENOMEM, len, nr_pages; -+ struct page *page; -+ -+ len = ps->chunk_size << SECTOR_SHIFT; -+ -+ /* -+ * Allocate the chunk_size block of memory that will hold -+ * a single metadata area. -+ */ -+ ps->area = vmalloc(len); -+ if (!ps->area) -+ return r; -+ -+ if (alloc_kiovec(1, &ps->iobuf)) -+ goto bad; -+ -+ nr_pages = ps->chunk_size / (PAGE_SIZE / SECTOR_SIZE); -+ r = expand_kiobuf(ps->iobuf, nr_pages); -+ if (r) -+ goto bad; -+ -+ /* -+ * We lock the pages for ps->area into memory since they'll be -+ * doing a lot of io. -+ */ -+ for (i = 0; i < nr_pages; i++) { -+ page = vmalloc_to_page(ps->area + (i * PAGE_SIZE)); -+ LockPage(page); -+ ps->iobuf->maplist[i] = page; -+ ps->iobuf->nr_pages++; -+ } -+ -+ ps->iobuf->nr_pages = nr_pages; -+ ps->iobuf->offset = 0; -+ -+ return 0; -+ -+ bad: -+ if (ps->iobuf) -+ free_kiovec(1, &ps->iobuf); -+ -+ if (ps->area) -+ vfree(ps->area); -+ ps->iobuf = NULL; -+ return r; -+} -+ -+static void free_iobuf(struct pstore *ps) -+{ -+ int i; -+ -+ for (i = 0; i < ps->iobuf->nr_pages; i++) -+ UnlockPage(ps->iobuf->maplist[i]); -+ ps->iobuf->locked = 0; -+ -+ free_kiovec(1, &ps->iobuf); -+ vfree(ps->area); -+} -+ -+/* -+ * Read or write a chunk aligned and sized block of data from a device. -+ */ -+static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) -+{ -+ int r; -+ struct kcopyd_region where; -+ -+ where.dev = ps->snap->cow->dev; -+ where.sector = ps->chunk_size * chunk; -+ where.count = ps->chunk_size; -+ -+ r = do_io(rw, &where, ps->iobuf); -+ if (r) -+ return r; -+ -+ return 0; -+} -+ -+/* -+ * Read or write a metadata area. Remembering to skip the first -+ * chunk which holds the header. -+ */ -+static int area_io(struct pstore *ps, uint32_t area, int rw) -+{ -+ int r; -+ uint32_t chunk; -+ -+ /* convert a metadata area index to a chunk index */ -+ chunk = 1 + ((ps->exceptions_per_area + 1) * area); -+ -+ r = chunk_io(ps, chunk, rw); -+ if (r) -+ return r; -+ -+ ps->current_area = area; -+ return 0; -+} -+ -+static int zero_area(struct pstore *ps, uint32_t area) -+{ -+ memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); -+ return area_io(ps, area, WRITE); -+} -+ -+static int read_header(struct pstore *ps, int *new_snapshot) -+{ -+ int r; -+ struct disk_header *dh; -+ -+ r = chunk_io(ps, 0, READ); -+ if (r) -+ return r; -+ -+ dh = (struct disk_header *) ps->area; -+ -+ if (dh->magic == 0) { -+ *new_snapshot = 1; -+ -+ } else if (dh->magic == SNAP_MAGIC) { -+ *new_snapshot = 0; -+ ps->valid = dh->valid; -+ ps->version = dh->version; -+ ps->chunk_size = dh->chunk_size; -+ -+ } else { -+ DMWARN("Invalid/corrupt snapshot"); -+ r = -ENXIO; -+ } -+ -+ return r; -+} -+ -+static int write_header(struct pstore *ps) -+{ -+ struct disk_header *dh; -+ -+ memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); -+ -+ dh = (struct disk_header *) ps->area; -+ dh->magic = SNAP_MAGIC; -+ dh->valid = ps->valid; -+ dh->version = ps->version; -+ dh->chunk_size = ps->chunk_size; -+ -+ return chunk_io(ps, 0, WRITE); -+} -+ -+/* -+ * Access functions for the disk exceptions, these do the endian conversions. -+ */ -+static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) -+{ -+ if (index >= ps->exceptions_per_area) -+ return NULL; -+ -+ return ((struct disk_exception *) ps->area) + index; -+} -+ -+static int read_exception(struct pstore *ps, -+ uint32_t index, struct disk_exception *result) -+{ -+ struct disk_exception *e; -+ -+ e = get_exception(ps, index); -+ if (!e) -+ return -EINVAL; -+ -+ /* copy it */ -+ result->old_chunk = le64_to_cpu(e->old_chunk); -+ result->new_chunk = le64_to_cpu(e->new_chunk); -+ -+ return 0; -+} -+ -+static int write_exception(struct pstore *ps, -+ uint32_t index, struct disk_exception *de) -+{ -+ struct disk_exception *e; -+ -+ e = get_exception(ps, index); -+ if (!e) -+ return -EINVAL; -+ -+ /* copy it */ -+ e->old_chunk = cpu_to_le64(de->old_chunk); -+ e->new_chunk = cpu_to_le64(de->new_chunk); -+ -+ return 0; -+} -+ -+/* -+ * Registers the exceptions that are present in the current area. -+ * 'full' is filled in to indicate if the area has been -+ * filled. -+ */ -+static int insert_exceptions(struct pstore *ps, int *full) -+{ -+ int i, r; -+ struct disk_exception de; -+ -+ /* presume the area is full */ -+ *full = 1; -+ -+ for (i = 0; i < ps->exceptions_per_area; i++) { -+ r = read_exception(ps, i, &de); -+ -+ if (r) -+ return r; -+ -+ /* -+ * If the new_chunk is pointing at the start of -+ * the COW device, where the first metadata area -+ * is we know that we've hit the end of the -+ * exceptions. Therefore the area is not full. -+ */ -+ if (de.new_chunk == 0LL) { -+ ps->current_committed = i; -+ *full = 0; -+ break; -+ } -+ -+ /* -+ * Keep track of the start of the free chunks. -+ */ -+ if (ps->next_free <= de.new_chunk) -+ ps->next_free = de.new_chunk + 1; -+ -+ /* -+ * Otherwise we add the exception to the snapshot. -+ */ -+ r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); -+ if (r) -+ return r; -+ } -+ -+ return 0; -+} -+ -+static int read_exceptions(struct pstore *ps) -+{ -+ uint32_t area; -+ int r, full = 1; -+ -+ /* -+ * Keeping reading chunks and inserting exceptions until -+ * we find a partially full area. -+ */ -+ for (area = 0; full; area++) { -+ r = area_io(ps, area, READ); -+ if (r) -+ return r; -+ -+ r = insert_exceptions(ps, &full); -+ if (r) -+ return r; -+ -+ area++; -+ } -+ -+ return 0; -+} -+ -+static inline struct pstore *get_info(struct exception_store *store) -+{ -+ return (struct pstore *) store->context; -+} -+ -+static int persistent_percentfull(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ return (ps->next_free * store->snap->chunk_size * 100) / -+ get_dev_size(store->snap->cow->dev); -+} -+ -+static void persistent_destroy(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ -+ vfree(ps->callbacks); -+ free_iobuf(ps); -+ kfree(ps); -+} -+ -+static int persistent_prepare(struct exception_store *store, -+ struct exception *e) -+{ -+ struct pstore *ps = get_info(store); -+ uint32_t stride; -+ sector_t size = get_dev_size(store->snap->cow->dev); -+ -+ /* Is there enough room ? */ -+ if (size <= (ps->next_free * store->snap->chunk_size)) -+ return -ENOSPC; -+ -+ e->new_chunk = ps->next_free; -+ -+ /* -+ * Move onto the next free pending, making sure to take -+ * into account the location of the metadata chunks. -+ */ -+ stride = (ps->exceptions_per_area + 1); -+ if (!(++ps->next_free % stride)) -+ ps->next_free++; -+ -+ atomic_inc(&ps->pending_count); -+ return 0; -+} -+ -+static void persistent_commit(struct exception_store *store, -+ struct exception *e, -+ void (*callback) (void *, int success), -+ void *callback_context) -+{ -+ int r, i; -+ struct pstore *ps = get_info(store); -+ struct disk_exception de; -+ struct commit_callback *cb; -+ -+ de.old_chunk = e->old_chunk; -+ de.new_chunk = e->new_chunk; -+ write_exception(ps, ps->current_committed++, &de); -+ -+ /* -+ * Add the callback to the back of the array. This code -+ * is the only place where the callback array is -+ * manipulated, and we know that it will never be called -+ * multiple times concurrently. -+ */ -+ cb = ps->callbacks + ps->callback_count++; -+ cb->callback = callback; -+ cb->context = callback_context; -+ -+ /* -+ * If there are no more exceptions in flight, or we have -+ * filled this metadata area we commit the exceptions to -+ * disk. -+ */ -+ if (atomic_dec_and_test(&ps->pending_count) || -+ (ps->current_committed == ps->exceptions_per_area)) { -+ r = area_io(ps, ps->current_area, WRITE); -+ if (r) -+ ps->valid = 0; -+ -+ for (i = 0; i < ps->callback_count; i++) { -+ cb = ps->callbacks + i; -+ cb->callback(cb->context, r == 0 ? 1 : 0); -+ } -+ -+ ps->callback_count = 0; -+ } -+ -+ /* -+ * Have we completely filled the current area ? -+ */ -+ if (ps->current_committed == ps->exceptions_per_area) { -+ ps->current_committed = 0; -+ r = zero_area(ps, ps->current_area + 1); -+ if (r) -+ ps->valid = 0; -+ } -+} -+ -+static void persistent_drop(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ -+ ps->valid = 0; -+ if (write_header(ps)) -+ DMWARN("write header failed"); -+} -+ -+int dm_create_persistent(struct exception_store *store, uint32_t chunk_size) -+{ -+ int r, new_snapshot; -+ struct pstore *ps; -+ -+ /* allocate the pstore */ -+ ps = kmalloc(sizeof(*ps), GFP_KERNEL); -+ if (!ps) -+ return -ENOMEM; -+ -+ ps->snap = store->snap; -+ ps->valid = 1; -+ ps->version = SNAPSHOT_DISK_VERSION; -+ ps->chunk_size = chunk_size; -+ ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) / -+ sizeof(struct disk_exception); -+ ps->next_free = 2; /* skipping the header and first area */ -+ ps->current_committed = 0; -+ -+ r = allocate_iobuf(ps); -+ if (r) -+ goto bad; -+ -+ /* -+ * Allocate space for all the callbacks. -+ */ -+ ps->callback_count = 0; -+ atomic_set(&ps->pending_count, 0); -+ ps->callbacks = vcalloc(ps->exceptions_per_area, -+ sizeof(*ps->callbacks)); -+ -+ if (!ps->callbacks) -+ goto bad; -+ -+ /* -+ * Read the snapshot header. -+ */ -+ r = read_header(ps, &new_snapshot); -+ if (r) -+ goto bad; -+ -+ /* -+ * Do we need to setup a new snapshot ? -+ */ -+ if (new_snapshot) { -+ r = write_header(ps); -+ if (r) { -+ DMWARN("write_header failed"); -+ goto bad; -+ } -+ -+ r = zero_area(ps, 0); -+ if (r) { -+ DMWARN("zero_area(0) failed"); -+ goto bad; -+ } -+ -+ } else { -+ /* -+ * Sanity checks. -+ */ -+ if (ps->chunk_size != chunk_size) { -+ DMWARN("chunk size for existing snapshot different " -+ "from that requested"); -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ if (ps->version != SNAPSHOT_DISK_VERSION) { -+ DMWARN("unable to handle snapshot disk version %d", -+ ps->version); -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ /* -+ * Read the metadata. -+ */ -+ r = read_exceptions(ps); -+ if (r) -+ goto bad; -+ } -+ -+ store->destroy = persistent_destroy; -+ store->prepare_exception = persistent_prepare; -+ store->commit_exception = persistent_commit; -+ store->drop_snapshot = persistent_drop; -+ store->percent_full = persistent_percentfull; -+ store->context = ps; -+ -+ return r; -+ -+ bad: -+ if (ps) { -+ if (ps->callbacks) -+ vfree(ps->callbacks); -+ -+ if (ps->iobuf) -+ free_iobuf(ps); -+ -+ kfree(ps); -+ } -+ return r; -+} -+ -+/*----------------------------------------------------------------- -+ * Implementation of the store for non-persistent snapshots. -+ *---------------------------------------------------------------*/ -+struct transient_c { -+ sector_t next_free; -+}; -+ -+void transient_destroy(struct exception_store *store) -+{ -+ kfree(store->context); -+} -+ -+int transient_prepare(struct exception_store *store, struct exception *e) -+{ -+ struct transient_c *tc = (struct transient_c *) store->context; -+ sector_t size = get_dev_size(store->snap->cow->dev); -+ -+ if (size < (tc->next_free + store->snap->chunk_size)) -+ return -1; -+ -+ e->new_chunk = sector_to_chunk(store->snap, tc->next_free); -+ tc->next_free += store->snap->chunk_size; -+ -+ return 0; -+} -+ -+void transient_commit(struct exception_store *store, -+ struct exception *e, -+ void (*callback) (void *, int success), -+ void *callback_context) -+{ -+ /* Just succeed */ -+ callback(callback_context, 1); -+} -+ -+static int transient_percentfull(struct exception_store *store) -+{ -+ struct transient_c *tc = (struct transient_c *) store->context; -+ return (tc->next_free * 100) / get_dev_size(store->snap->cow->dev); -+} -+ -+int dm_create_transient(struct exception_store *store, -+ struct dm_snapshot *s, int blocksize) -+{ -+ struct transient_c *tc; -+ -+ memset(store, 0, sizeof(*store)); -+ store->destroy = transient_destroy; -+ store->prepare_exception = transient_prepare; -+ store->commit_exception = transient_commit; -+ store->percent_full = transient_percentfull; -+ store->snap = s; -+ -+ tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); -+ if (!tc) -+ return -ENOMEM; -+ -+ tc->next_free = 0; -+ store->context = tc; -+ -+ return 0; -+} -diff -ruN linux-2.4.19/drivers/md/dm-ioctl.c linux-2.4.19-dm/drivers/md/dm-ioctl.c ---- linux-2.4.19/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-ioctl.c Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,1139 @@ -+/* -+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DM_DRIVER_EMAIL "dm@uk.sistina.com" -+ -+/*----------------------------------------------------------------- -+ * The ioctl interface needs to be able to look up devices by -+ * name or uuid. -+ *---------------------------------------------------------------*/ -+struct hash_cell { -+ struct list_head name_list; -+ struct list_head uuid_list; -+ -+ char *name; -+ char *uuid; -+ struct mapped_device *md; -+ -+ /* I hate devfs */ -+ devfs_handle_t devfs_entry; -+}; -+ -+#define NUM_BUCKETS 64 -+#define MASK_BUCKETS (NUM_BUCKETS - 1) -+static struct list_head _name_buckets[NUM_BUCKETS]; -+static struct list_head _uuid_buckets[NUM_BUCKETS]; -+ -+static devfs_handle_t _dev_dir; -+void dm_hash_remove_all(void); -+ -+/* -+ * Guards access to all three tables. -+ */ -+static DECLARE_RWSEM(_hash_lock); -+ -+static void init_buckets(struct list_head *buckets) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < NUM_BUCKETS; i++) -+ INIT_LIST_HEAD(buckets + i); -+} -+ -+int dm_hash_init(void) -+{ -+ init_buckets(_name_buckets); -+ init_buckets(_uuid_buckets); -+ _dev_dir = devfs_mk_dir(0, DM_DIR, NULL); -+ return 0; -+} -+ -+void dm_hash_exit(void) -+{ -+ dm_hash_remove_all(); -+ devfs_unregister(_dev_dir); -+} -+ -+/*----------------------------------------------------------------- -+ * Hash function: -+ * We're not really concerned with the str hash function being -+ * fast since it's only used by the ioctl interface. -+ *---------------------------------------------------------------*/ -+static unsigned int hash_str(const char *str) -+{ -+ const unsigned int hash_mult = 2654435387U; -+ unsigned int h = 0; -+ -+ while (*str) -+ h = (h + (unsigned int) *str++) * hash_mult; -+ -+ return h & MASK_BUCKETS; -+} -+ -+/*----------------------------------------------------------------- -+ * Code for looking up a device by name -+ *---------------------------------------------------------------*/ -+static struct hash_cell *__get_name_cell(const char *str) -+{ -+ struct list_head *tmp; -+ struct hash_cell *hc; -+ unsigned int h = hash_str(str); -+ -+ list_for_each(tmp, _name_buckets + h) { -+ hc = list_entry(tmp, struct hash_cell, name_list); -+ if (!strcmp(hc->name, str)) -+ return hc; -+ } -+ -+ return NULL; -+} -+ -+static struct hash_cell *__get_uuid_cell(const char *str) -+{ -+ struct list_head *tmp; -+ struct hash_cell *hc; -+ unsigned int h = hash_str(str); -+ -+ list_for_each(tmp, _uuid_buckets + h) { -+ hc = list_entry(tmp, struct hash_cell, uuid_list); -+ if (!strcmp(hc->uuid, str)) -+ return hc; -+ } -+ -+ return NULL; -+} -+ -+/*----------------------------------------------------------------- -+ * Inserting, removing and renaming a device. -+ *---------------------------------------------------------------*/ -+static inline char *kstrdup(const char *str) -+{ -+ char *r = kmalloc(strlen(str) + 1, GFP_KERNEL); -+ if (r) -+ strcpy(r, str); -+ return r; -+} -+ -+static struct hash_cell *alloc_cell(const char *name, const char *uuid, -+ struct mapped_device *md) -+{ -+ struct hash_cell *hc; -+ -+ hc = kmalloc(sizeof(*hc), GFP_KERNEL); -+ if (!hc) -+ return NULL; -+ -+ hc->name = kstrdup(name); -+ if (!hc->name) { -+ kfree(hc); -+ return NULL; -+ } -+ -+ if (!uuid) -+ hc->uuid = NULL; -+ -+ else { -+ hc->uuid = kstrdup(uuid); -+ if (!hc->uuid) { -+ kfree(hc->name); -+ kfree(hc); -+ return NULL; -+ } -+ } -+ -+ INIT_LIST_HEAD(&hc->name_list); -+ INIT_LIST_HEAD(&hc->uuid_list); -+ hc->md = md; -+ return hc; -+} -+ -+static void free_cell(struct hash_cell *hc) -+{ -+ if (hc) { -+ kfree(hc->name); -+ kfree(hc->uuid); -+ kfree(hc); -+ } -+} -+ -+/* -+ * devfs stuff. -+ */ -+static int register_with_devfs(struct hash_cell *hc) -+{ -+ kdev_t dev = dm_kdev(hc->md); -+ -+ hc->devfs_entry = -+ devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER, -+ major(dev), minor(dev), -+ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, -+ &dm_blk_dops, NULL); -+ -+ return 0; -+} -+ -+static int unregister_with_devfs(struct hash_cell *hc) -+{ -+ devfs_unregister(hc->devfs_entry); -+ return 0; -+} -+ -+/* -+ * The kdev_t and uuid of a device can never change once it is -+ * initially inserted. -+ */ -+int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md) -+{ -+ struct hash_cell *cell; -+ -+ /* -+ * Allocate the new cells. -+ */ -+ cell = alloc_cell(name, uuid, md); -+ if (!cell) -+ return -ENOMEM; -+ -+ /* -+ * Insert the cell into all three hash tables. -+ */ -+ down_write(&_hash_lock); -+ if (__get_name_cell(name)) -+ goto bad; -+ -+ list_add(&cell->name_list, _name_buckets + hash_str(name)); -+ -+ if (uuid) { -+ if (__get_uuid_cell(uuid)) { -+ list_del(&cell->name_list); -+ goto bad; -+ } -+ list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid)); -+ } -+ register_with_devfs(cell); -+ dm_get(md); -+ up_write(&_hash_lock); -+ -+ return 0; -+ -+ bad: -+ up_write(&_hash_lock); -+ free_cell(cell); -+ return -EBUSY; -+} -+ -+void __hash_remove(struct hash_cell *hc) -+{ -+ /* remove from the dev hash */ -+ list_del(&hc->uuid_list); -+ list_del(&hc->name_list); -+ unregister_with_devfs(hc); -+ dm_put(hc->md); -+} -+ -+void dm_hash_remove_all(void) -+{ -+ int i; -+ struct hash_cell *hc; -+ struct list_head *tmp, *n; -+ -+ down_write(&_hash_lock); -+ for (i = 0; i < NUM_BUCKETS; i++) { -+ list_for_each_safe(tmp, n, _name_buckets + i) { -+ hc = list_entry(tmp, struct hash_cell, name_list); -+ __hash_remove(hc); -+ } -+ } -+ up_write(&_hash_lock); -+} -+ -+int dm_hash_rename(const char *old, const char *new) -+{ -+ char *new_name, *old_name; -+ struct hash_cell *hc; -+ -+ /* -+ * duplicate new. -+ */ -+ new_name = kstrdup(new); -+ if (!new_name) -+ return -ENOMEM; -+ -+ down_write(&_hash_lock); -+ -+ /* -+ * Is new free ? -+ */ -+ hc = __get_name_cell(new); -+ if (hc) { -+ DMWARN("asked to rename to an already existing name %s -> %s", -+ old, new); -+ up_write(&_hash_lock); -+ return -EBUSY; -+ } -+ -+ /* -+ * Is there such a device as 'old' ? -+ */ -+ hc = __get_name_cell(old); -+ if (!hc) { -+ DMWARN("asked to rename a non existent device %s -> %s", -+ old, new); -+ up_write(&_hash_lock); -+ return -ENXIO; -+ } -+ -+ /* -+ * rename and move the name cell. -+ */ -+ list_del(&hc->name_list); -+ old_name = hc->name; -+ hc->name = new_name; -+ list_add(&hc->name_list, _name_buckets + hash_str(new_name)); -+ -+ /* rename the device node in devfs */ -+ unregister_with_devfs(hc); -+ register_with_devfs(hc); -+ -+ up_write(&_hash_lock); -+ kfree(old_name); -+ return 0; -+} -+ -+ -+/*----------------------------------------------------------------- -+ * Implementation of the ioctl commands -+ *---------------------------------------------------------------*/ -+ -+/* -+ * All the ioctl commands get dispatched to functions with this -+ * prototype. -+ */ -+typedef int (*ioctl_fn)(struct dm_ioctl *param, struct dm_ioctl *user); -+ -+/* -+ * Check a string doesn't overrun the chunk of -+ * memory we copied from userland. -+ */ -+static int valid_str(char *str, void *begin, void *end) -+{ -+ while (((void *) str >= begin) && ((void *) str < end)) -+ if (!*str++) -+ return 0; -+ -+ return -EINVAL; -+} -+ -+static int next_target(struct dm_target_spec *last, uint32_t next, -+ void *begin, void *end, -+ struct dm_target_spec **spec, char **params) -+{ -+ *spec = (struct dm_target_spec *) -+ ((unsigned char *) last + next); -+ *params = (char *) (*spec + 1); -+ -+ if (*spec < (last + 1) || ((void *) *spec > end)) -+ return -EINVAL; -+ -+ return valid_str(*params, begin, end); -+} -+ -+static int populate_table(struct dm_table *table, struct dm_ioctl *args) -+{ -+ int i = 0, r, first = 1; -+ struct dm_target_spec *spec; -+ char *params; -+ void *begin, *end; -+ -+ if (!args->target_count) { -+ DMWARN("populate_table: no targets specified"); -+ return -EINVAL; -+ } -+ -+ begin = (void *) args; -+ end = begin + args->data_size; -+ -+ for (i = 0; i < args->target_count; i++) { -+ -+ if (first) -+ r = next_target((struct dm_target_spec *) args, -+ args->data_start, -+ begin, end, &spec, ¶ms); -+ else -+ r = next_target(spec, spec->next, begin, end, -+ &spec, ¶ms); -+ -+ if (r) { -+ DMWARN("unable to find target"); -+ return -EINVAL; -+ } -+ -+ r = dm_table_add_target(table, spec->target_type, -+ spec->sector_start, spec->length, -+ params); -+ if (r) { -+ DMWARN("internal error adding target to table"); -+ return -EINVAL; -+ } -+ -+ first = 0; -+ } -+ -+ return dm_table_complete(table); -+} -+ -+/* -+ * Round up the ptr to the next 'align' boundary. Obviously -+ * 'align' must be a power of 2. -+ */ -+static inline void *align_ptr(void *ptr, unsigned int align) -+{ -+ align--; -+ return (void *) (((unsigned long) (ptr + align)) & ~align); -+} -+ -+/* -+ * Copies a dm_ioctl and an optional additional payload to -+ * userland. -+ */ -+static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param, -+ void *data, uint32_t len) -+{ -+ int r; -+ void *ptr = NULL; -+ -+ if (data) { -+ ptr = align_ptr(user + 1, sizeof(unsigned long)); -+ param->data_start = ptr - (void *) user; -+ } -+ -+ /* -+ * The version number has already been filled in, so we -+ * just copy later fields. -+ */ -+ r = copy_to_user(&user->data_size, ¶m->data_size, -+ sizeof(*param) - sizeof(param->version)); -+ if (r) -+ return -EFAULT; -+ -+ if (data) { -+ if (param->data_start + len > param->data_size) -+ return -ENOSPC; -+ -+ if (copy_to_user(ptr, data, len)) -+ r = -EFAULT; -+ } -+ -+ return r; -+} -+ -+/* -+ * Fills in a dm_ioctl structure, ready for sending back to -+ * userland. -+ */ -+static int __info(struct mapped_device *md, struct dm_ioctl *param) -+{ -+ kdev_t dev = dm_kdev(md); -+ struct dm_table *table; -+ struct block_device *bdev; -+ -+ param->flags = DM_EXISTS_FLAG; -+ if (dm_suspended(md)) -+ param->flags |= DM_SUSPEND_FLAG; -+ -+ param->dev = kdev_t_to_nr(dev); -+ bdev = bdget(param->dev); -+ if (!bdev) -+ return -ENXIO; -+ -+ param->open_count = bdev->bd_openers; -+ bdput(bdev); -+ -+ if (is_read_only(dev)) -+ param->flags |= DM_READONLY_FLAG; -+ -+ table = dm_get_table(md); -+ param->target_count = dm_table_get_num_targets(table); -+ dm_table_put(table); -+ -+ return 0; -+} -+ -+/* -+ * Always use UUID for lookups if it's present, otherwise use name. -+ */ -+static inline struct mapped_device *find_device(struct dm_ioctl *param) -+{ -+ struct hash_cell *hc; -+ struct mapped_device *md = NULL; -+ -+ down_read(&_hash_lock); -+ hc = *param->uuid ? __get_uuid_cell(param->uuid) : -+ __get_name_cell(param->name); -+ if (hc) { -+ md = hc->md; -+ -+ /* -+ * Sneakily write in both the name and the uuid -+ * while we have the cell. -+ */ -+ strncpy(param->name, hc->name, sizeof(param->name)); -+ if (hc->uuid) -+ strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1); -+ else -+ param->uuid[0] = '\0'; -+ -+ dm_get(md); -+ } -+ up_read(&_hash_lock); -+ -+ return md; -+} -+ -+#define ALIGNMENT sizeof(int) -+static void *_align(void *ptr, unsigned int a) -+{ -+ register unsigned long align = --a; -+ -+ return (void *) (((unsigned long) ptr + align) & ~align); -+} -+ -+/* -+ * Copies device info back to user space, used by -+ * the create and info ioctls. -+ */ -+static int info(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ -+ param->flags = 0; -+ -+ md = find_device(param); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ __info(md, param); -+ dm_put(md); -+ -+ out: -+ return results_to_user(user, param, NULL, 0); -+} -+ -+static inline int get_mode(struct dm_ioctl *param) -+{ -+ int mode = FMODE_READ | FMODE_WRITE; -+ -+ if (param->flags & DM_READONLY_FLAG) -+ mode = FMODE_READ; -+ -+ return mode; -+} -+ -+static int check_name(const char *name) -+{ -+ if (strchr(name, '/')) { -+ DMWARN("invalid device name"); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int create(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ kdev_t dev; -+ struct dm_table *t; -+ struct mapped_device *md; -+ int minor; -+ -+ r = check_name(param->name); -+ if (r) -+ return r; -+ -+ r = dm_table_create(&t, get_mode(param)); -+ if (r) -+ return r; -+ -+ r = populate_table(t, param); -+ if (r) { -+ dm_table_put(t); -+ return r; -+ } -+ -+ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ? -+ minor(to_kdev_t(param->dev)) : -1; -+ -+ r = dm_create(minor, t, &md); -+ if (r) { -+ dm_table_put(t); -+ return r; -+ } -+ dm_table_put(t); /* md will have grabbed its own reference */ -+ -+ dev = dm_kdev(md); -+ set_device_ro(dev, (param->flags & DM_READONLY_FLAG)); -+ r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md); -+ dm_put(md); -+ -+ return r ? r : info(param, user); -+} -+ -+/* -+ * Build up the status struct for each target -+ */ -+static int __status(struct mapped_device *md, struct dm_ioctl *param, -+ char *outbuf, int *len) -+{ -+ int i, num_targets; -+ struct dm_target_spec *spec; -+ char *outptr; -+ status_type_t type; -+ struct dm_table *table = dm_get_table(md); -+ -+ if (param->flags & DM_STATUS_TABLE_FLAG) -+ type = STATUSTYPE_TABLE; -+ else -+ type = STATUSTYPE_INFO; -+ -+ outptr = outbuf; -+ -+ /* Get all the target info */ -+ num_targets = dm_table_get_num_targets(table); -+ for (i = 0; i < num_targets; i++) { -+ struct dm_target *ti = dm_table_get_target(table, i); -+ -+ if (outptr - outbuf + -+ sizeof(struct dm_target_spec) > param->data_size) { -+ dm_table_put(table); -+ return -ENOMEM; -+ } -+ -+ spec = (struct dm_target_spec *) outptr; -+ -+ spec->status = 0; -+ spec->sector_start = ti->begin; -+ spec->length = ti->len; -+ strncpy(spec->target_type, ti->type->name, -+ sizeof(spec->target_type)); -+ -+ outptr += sizeof(struct dm_target_spec); -+ -+ /* Get the status/table string from the target driver */ -+ if (ti->type->status) -+ ti->type->status(ti, type, outptr, -+ outbuf + param->data_size - outptr); -+ else -+ outptr[0] = '\0'; -+ -+ outptr += strlen(outptr) + 1; -+ _align(outptr, ALIGNMENT); -+ spec->next = outptr - outbuf; -+ } -+ -+ param->target_count = num_targets; -+ *len = outptr - outbuf; -+ dm_table_put(table); -+ -+ return 0; -+} -+ -+/* -+ * Return the status of a device as a text string for each -+ * target. -+ */ -+static int get_status(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ int len = 0; -+ int ret; -+ char *outbuf = NULL; -+ -+ md = find_device(param); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ /* We haven't a clue how long the resultant data will be so -+ just allocate as much as userland has allowed us and make sure -+ we don't overun it */ -+ outbuf = kmalloc(param->data_size, GFP_KERNEL); -+ if (!outbuf) -+ goto out; -+ /* -+ * Get the status of all targets -+ */ -+ __status(md, param, outbuf, &len); -+ -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ out: -+ if (md) -+ dm_put(md); -+ -+ ret = results_to_user(user, param, outbuf, len); -+ -+ if (outbuf) -+ kfree(outbuf); -+ -+ return ret; -+} -+ -+/* -+ * Wait for a device to report an event -+ */ -+static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ struct dm_table *table; -+ DECLARE_WAITQUEUE(wq, current); -+ -+ md = find_device(param); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ /* -+ * Wait for a notification event -+ */ -+ set_current_state(TASK_INTERRUPTIBLE); -+ table = dm_get_table(md); -+ dm_table_add_wait_queue(table, &wq); -+ dm_table_put(table); -+ dm_put(md); -+ -+ yield(); -+ set_current_state(TASK_RUNNING); -+ -+ out: -+ return results_to_user(user, param, NULL, 0); -+} -+ -+/* -+ * Retrieves a list of devices used by a particular dm device. -+ */ -+static int dep(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int count, r; -+ struct mapped_device *md; -+ struct list_head *tmp; -+ size_t len = 0; -+ struct dm_target_deps *deps = NULL; -+ struct dm_table *table; -+ -+ md = find_device(param); -+ if (!md) -+ goto out; -+ table = dm_get_table(md); -+ -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ /* -+ * Count the devices. -+ */ -+ count = 0; -+ list_for_each(tmp, dm_table_get_devices(table)) -+ count++; -+ -+ /* -+ * Allocate a kernel space version of the dm_target_status -+ * struct. -+ */ -+ if (array_too_big(sizeof(*deps), sizeof(*deps->dev), count)) { -+ dm_table_put(table); -+ dm_put(md); -+ return -ENOMEM; -+ } -+ -+ len = sizeof(*deps) + (sizeof(*deps->dev) * count); -+ deps = kmalloc(len, GFP_KERNEL); -+ if (!deps) { -+ dm_table_put(table); -+ dm_put(md); -+ return -ENOMEM; -+ } -+ -+ /* -+ * Fill in the devices. -+ */ -+ deps->count = count; -+ count = 0; -+ list_for_each(tmp, dm_table_get_devices(table)) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ deps->dev[count++] = dd->bdev->bd_dev; -+ } -+ dm_table_put(table); -+ dm_put(md); -+ -+ out: -+ r = results_to_user(user, param, deps, len); -+ -+ kfree(deps); -+ return r; -+} -+ -+static int remove(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct hash_cell *hc; -+ -+ down_write(&_hash_lock); -+ hc = *param->uuid ? __get_uuid_cell(param->uuid) : -+ __get_name_cell(param->name); -+ if (!hc) { -+ DMWARN("device doesn't appear to be in the dev hash table."); -+ up_write(&_hash_lock); -+ return -EINVAL; -+ } -+ -+ __hash_remove(hc); -+ up_write(&_hash_lock); -+ return 0; -+} -+ -+static int remove_all(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ dm_hash_remove_all(); -+ return 0; -+} -+ -+static int suspend(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ md = find_device(param); -+ if (!md) -+ return -ENXIO; -+ -+ if (param->flags & DM_SUSPEND_FLAG) -+ r = dm_suspend(md); -+ else -+ r = dm_resume(md); -+ -+ dm_put(md); -+ return r; -+} -+ -+static int reload(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ kdev_t dev; -+ struct mapped_device *md; -+ struct dm_table *t; -+ -+ r = dm_table_create(&t, get_mode(param)); -+ if (r) -+ return r; -+ -+ r = populate_table(t, param); -+ if (r) { -+ dm_table_put(t); -+ return r; -+ } -+ -+ md = find_device(param); -+ if (!md) { -+ dm_table_put(t); -+ return -ENXIO; -+ } -+ -+ r = dm_swap_table(md, t); -+ if (r) { -+ dm_put(md); -+ dm_table_put(t); -+ return r; -+ } -+ -+ dev = dm_kdev(md); -+ set_device_ro(dev, (param->flags & DM_READONLY_FLAG)); -+ dm_put(md); -+ -+ r = info(param, user); -+ return r; -+} -+ -+static int rename(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ char *new_name = (char *) param + param->data_start; -+ -+ if (valid_str(new_name, (void *) param, -+ (void *) param + param->data_size)) { -+ DMWARN("Invalid new logical volume name supplied."); -+ return -EINVAL; -+ } -+ -+ r = check_name(new_name); -+ if (r) -+ return r; -+ -+ return dm_hash_rename(param->name, new_name); -+} -+ -+ -+/*----------------------------------------------------------------- -+ * Implementation of open/close/ioctl on the special char -+ * device. -+ *---------------------------------------------------------------*/ -+static ioctl_fn lookup_ioctl(unsigned int cmd) -+{ -+ static struct { -+ int cmd; -+ ioctl_fn fn; -+ } _ioctls[] = { -+ {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */ -+ {DM_REMOVE_ALL_CMD, remove_all}, -+ {DM_DEV_CREATE_CMD, create}, -+ {DM_DEV_REMOVE_CMD, remove}, -+ {DM_DEV_RELOAD_CMD, reload}, -+ {DM_DEV_RENAME_CMD, rename}, -+ {DM_DEV_SUSPEND_CMD, suspend}, -+ {DM_DEV_DEPS_CMD, dep}, -+ {DM_DEV_STATUS_CMD, info}, -+ {DM_TARGET_STATUS_CMD, get_status}, -+ {DM_TARGET_WAIT_CMD, wait_device_event}, -+ }; -+ -+ return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn; -+} -+ -+/* -+ * As well as checking the version compatibility this always -+ * copies the kernel interface version out. -+ */ -+static int check_version(int cmd, struct dm_ioctl *user) -+{ -+ uint32_t version[3]; -+ int r = 0; -+ -+ if (copy_from_user(version, user->version, sizeof(version))) -+ return -EFAULT; -+ -+ if ((DM_VERSION_MAJOR != version[0]) || -+ (DM_VERSION_MINOR < version[1])) { -+ DMWARN("ioctl interface mismatch: " -+ "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", -+ DM_VERSION_MAJOR, DM_VERSION_MINOR, -+ DM_VERSION_PATCHLEVEL, -+ version[0], version[1], version[2], cmd); -+ r = -EINVAL; -+ } -+ -+ /* -+ * Fill in the kernel version. -+ */ -+ version[0] = DM_VERSION_MAJOR; -+ version[1] = DM_VERSION_MINOR; -+ version[2] = DM_VERSION_PATCHLEVEL; -+ if (copy_to_user(user->version, version, sizeof(version))) -+ return -EFAULT; -+ -+ return r; -+} -+ -+static void free_params(struct dm_ioctl *param) -+{ -+ vfree(param); -+} -+ -+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param) -+{ -+ struct dm_ioctl tmp, *dmi; -+ -+ if (copy_from_user(&tmp, user, sizeof(tmp))) -+ return -EFAULT; -+ -+ if (tmp.data_size < sizeof(tmp)) -+ return -EINVAL; -+ -+ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); -+ if (!dmi) -+ return -ENOMEM; -+ -+ if (copy_from_user(dmi, user, tmp.data_size)) { -+ vfree(dmi); -+ return -EFAULT; -+ } -+ -+ *param = dmi; -+ return 0; -+} -+ -+static int validate_params(uint cmd, struct dm_ioctl *param) -+{ -+ /* Ignores parameters */ -+ if (cmd == DM_REMOVE_ALL_CMD) -+ return 0; -+ -+ /* Unless creating, either name of uuid but not both */ -+ if (cmd != DM_DEV_CREATE_CMD) { -+ if ((!*param->uuid && !*param->name) || -+ (*param->uuid && *param->name)) { -+ DMWARN("one of name or uuid must be supplied"); -+ return -EINVAL; -+ } -+ } -+ -+ /* Ensure strings are terminated */ -+ param->name[DM_NAME_LEN - 1] = '\0'; -+ param->uuid[DM_UUID_LEN - 1] = '\0'; -+ -+ return 0; -+} -+ -+static int ctl_ioctl(struct inode *inode, struct file *file, -+ uint command, ulong u) -+{ -+ int r = 0, cmd; -+ struct dm_ioctl *param; -+ struct dm_ioctl *user = (struct dm_ioctl *) u; -+ ioctl_fn fn = NULL; -+ -+ /* only root can play with this */ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EACCES; -+ -+ if (_IOC_TYPE(command) != DM_IOCTL) -+ return -ENOTTY; -+ -+ cmd = _IOC_NR(command); -+ -+ /* -+ * Check the interface version passed in. This also -+ * writes out the kernel's interface version. -+ */ -+ r = check_version(cmd, user); -+ if (r) -+ return r; -+ -+ /* -+ * Nothing more to do for the version command. -+ */ -+ if (cmd == DM_VERSION_CMD) -+ return 0; -+ -+ fn = lookup_ioctl(cmd); -+ if (!fn) { -+ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); -+ return -ENOTTY; -+ } -+ -+ /* -+ * Copy the parameters into kernel space. -+ */ -+ r = copy_params(user, ¶m); -+ if (r) -+ return r; -+ -+ r = validate_params(cmd, param); -+ if (r) { -+ free_params(param); -+ return r; -+ } -+ -+ r = fn(param, user); -+ free_params(param); -+ return r; -+} -+ -+static struct file_operations _ctl_fops = { -+ .ioctl = ctl_ioctl, -+ .owner = THIS_MODULE, -+}; -+ -+static devfs_handle_t _ctl_handle; -+ -+static struct miscdevice _dm_misc = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = DM_NAME, -+ .fops = &_ctl_fops -+}; -+ -+/* -+ * Create misc character device and link to DM_DIR/control. -+ */ -+int __init dm_interface_init(void) -+{ -+ int r; -+ char rname[64]; -+ -+ r = dm_hash_init(); -+ if (r) -+ return r; -+ -+ r = misc_register(&_dm_misc); -+ if (r) { -+ DMERR("misc_register failed for control device"); -+ dm_hash_exit(); -+ return r; -+ } -+ -+ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3, -+ sizeof rname - 3); -+ if (r == -ENOSYS) -+ return 0; /* devfs not present */ -+ -+ if (r < 0) { -+ DMERR("devfs_generate_path failed for control device"); -+ goto failed; -+ } -+ -+ strncpy(rname + r, "../", 3); -+ r = devfs_mk_symlink(NULL, DM_DIR "/control", -+ DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL); -+ if (r) { -+ DMERR("devfs_mk_symlink failed for control device"); -+ goto failed; -+ } -+ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle); -+ -+ DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR, -+ DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA, -+ DM_DRIVER_EMAIL); -+ return 0; -+ -+ failed: -+ dm_hash_exit(); -+ misc_deregister(&_dm_misc); -+ return r; -+} -+ -+void dm_interface_exit(void) -+{ -+ dm_hash_exit(); -+ -+ if (misc_deregister(&_dm_misc) < 0) -+ DMERR("misc_deregister failed for control device"); -+} -diff -ruN linux-2.4.19/drivers/md/dm-linear.c linux-2.4.19-dm/drivers/md/dm-linear.c ---- linux-2.4.19/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-linear.c Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,120 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * Linear: maps a linear range of a device. -+ */ -+struct linear_c { -+ struct dm_dev *dev; -+ sector_t start; -+}; -+ -+/* -+ * Construct a linear mapping: -+ */ -+static int linear_ctr(struct dm_target *ti, int argc, char **argv) -+{ -+ struct linear_c *lc; -+ -+ if (argc != 2) { -+ ti->error = "dm-linear: Not enough arguments"; -+ return -EINVAL; -+ } -+ -+ lc = kmalloc(sizeof(*lc), GFP_KERNEL); -+ if (lc == NULL) { -+ ti->error = "dm-linear: Cannot allocate linear context"; -+ return -ENOMEM; -+ } -+ -+ if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) { -+ ti->error = "dm-linear: Invalid device sector"; -+ goto bad; -+ } -+ -+ if (dm_get_device(ti, argv[0], lc->start, ti->len, -+ dm_table_get_mode(ti->table), &lc->dev)) { -+ ti->error = "dm-linear: Device lookup failed"; -+ goto bad; -+ } -+ -+ ti->private = lc; -+ return 0; -+ -+ bad: -+ kfree(lc); -+ return -EINVAL; -+} -+ -+static void linear_dtr(struct dm_target *ti) -+{ -+ struct linear_c *lc = (struct linear_c *) ti->private; -+ -+ dm_put_device(ti, lc->dev); -+ kfree(lc); -+} -+ -+static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ struct linear_c *lc = (struct linear_c *) ti->private; -+ -+ bh->b_rdev = lc->dev->dev; -+ bh->b_rsector = lc->start + (bh->b_rsector - ti->begin); -+ -+ return 1; -+} -+ -+static int linear_status(struct dm_target *ti, status_type_t type, -+ char *result, int maxlen) -+{ -+ struct linear_c *lc = (struct linear_c *) ti->private; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ snprintf(result, maxlen, "%s " SECTOR_FORMAT, -+ kdevname(to_kdev_t(lc->dev->bdev->bd_dev)), lc->start); -+ break; -+ } -+ return 0; -+} -+ -+static struct target_type linear_target = { -+ .name = "linear", -+ .module = THIS_MODULE, -+ .ctr = linear_ctr, -+ .dtr = linear_dtr, -+ .map = linear_map, -+ .status = linear_status, -+}; -+ -+int __init dm_linear_init(void) -+{ -+ int r = dm_register_target(&linear_target); -+ -+ if (r < 0) -+ DMERR("linear: register failed %d", r); -+ -+ return r; -+} -+ -+void dm_linear_exit(void) -+{ -+ int r = dm_unregister_target(&linear_target); -+ -+ if (r < 0) -+ DMERR("linear: unregister failed %d", r); -+} -diff -ruN linux-2.4.19/drivers/md/dm-snapshot.c linux-2.4.19-dm/drivers/md/dm-snapshot.c ---- linux-2.4.19/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-snapshot.c Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,1169 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "dm-snapshot.h" -+#include "kcopyd.h" -+ -+/* -+ * FIXME: Remove this before release. -+ */ -+#if 0 -+#define DMDEBUG(x...) DMWARN( ## x) -+#else -+#define DMDEBUG(x...) -+#endif -+ -+/* -+ * The percentage increment we will wake up users at -+ */ -+#define WAKE_UP_PERCENT 5 -+ -+/* -+ * Hard sector size used all over the kernel -+ */ -+#define SECTOR_SIZE 512 -+ -+/* -+ * kcopyd priority of snapshot operations -+ */ -+#define SNAPSHOT_COPY_PRIORITY 2 -+ -+struct pending_exception { -+ struct exception e; -+ -+ /* -+ * Origin buffers waiting for this to complete are held -+ * in a list (using b_reqnext). -+ */ -+ struct buffer_head *origin_bhs; -+ struct buffer_head *snapshot_bhs; -+ -+ /* -+ * Other pending_exceptions that are processing this -+ * chunk. When this list is empty, we know we can -+ * complete the origins. -+ */ -+ struct list_head siblings; -+ -+ /* Pointer back to snapshot context */ -+ struct dm_snapshot *snap; -+ -+ /* -+ * 1 indicates the exception has already been sent to -+ * kcopyd. -+ */ -+ int started; -+}; -+ -+/* -+ * Hash table mapping origin volumes to lists of snapshots and -+ * a lock to protect it -+ */ -+static kmem_cache_t *exception_cache; -+static kmem_cache_t *pending_cache; -+static mempool_t *pending_pool; -+ -+/* -+ * One of these per registered origin, held in the snapshot_origins hash -+ */ -+struct origin { -+ /* The origin device */ -+ kdev_t dev; -+ -+ struct list_head hash_list; -+ -+ /* List of snapshots for this origin */ -+ struct list_head snapshots; -+}; -+ -+/* -+ * Size of the hash table for origin volumes. If we make this -+ * the size of the minors list then it should be nearly perfect -+ */ -+#define ORIGIN_HASH_SIZE 256 -+#define ORIGIN_MASK 0xFF -+static struct list_head *_origins; -+static struct rw_semaphore _origins_lock; -+ -+static int init_origin_hash(void) -+{ -+ int i; -+ -+ _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!_origins) { -+ DMERR("Device mapper: Snapshot: unable to allocate memory"); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < ORIGIN_HASH_SIZE; i++) -+ INIT_LIST_HEAD(_origins + i); -+ init_rwsem(&_origins_lock); -+ -+ return 0; -+} -+ -+static void exit_origin_hash(void) -+{ -+ kfree(_origins); -+} -+ -+static inline unsigned int origin_hash(kdev_t dev) -+{ -+ return MINOR(dev) & ORIGIN_MASK; -+} -+ -+static struct origin *__lookup_origin(kdev_t origin) -+{ -+ struct list_head *slist; -+ struct list_head *ol; -+ struct origin *o; -+ -+ ol = &_origins[origin_hash(origin)]; -+ list_for_each(slist, ol) { -+ o = list_entry(slist, struct origin, hash_list); -+ -+ if (o->dev == origin) -+ return o; -+ } -+ -+ return NULL; -+} -+ -+static void __insert_origin(struct origin *o) -+{ -+ struct list_head *sl = &_origins[origin_hash(o->dev)]; -+ list_add_tail(&o->hash_list, sl); -+} -+ -+/* -+ * Make a note of the snapshot and its origin so we can look it -+ * up when the origin has a write on it. -+ */ -+static int register_snapshot(struct dm_snapshot *snap) -+{ -+ struct origin *o; -+ kdev_t dev = snap->origin->dev; -+ -+ down_write(&_origins_lock); -+ o = __lookup_origin(dev); -+ -+ if (!o) { -+ /* New origin */ -+ o = kmalloc(sizeof(*o), GFP_KERNEL); -+ if (!o) { -+ up_write(&_origins_lock); -+ return -ENOMEM; -+ } -+ -+ /* Initialise the struct */ -+ INIT_LIST_HEAD(&o->snapshots); -+ o->dev = dev; -+ -+ __insert_origin(o); -+ } -+ -+ list_add_tail(&snap->list, &o->snapshots); -+ -+ up_write(&_origins_lock); -+ return 0; -+} -+ -+static void unregister_snapshot(struct dm_snapshot *s) -+{ -+ struct origin *o; -+ -+ down_write(&_origins_lock); -+ o = __lookup_origin(s->origin->dev); -+ -+ list_del(&s->list); -+ if (list_empty(&o->snapshots)) { -+ list_del(&o->hash_list); -+ kfree(o); -+ } -+ -+ up_write(&_origins_lock); -+} -+ -+/* -+ * Implementation of the exception hash tables. -+ */ -+static int init_exception_table(struct exception_table *et, uint32_t size) -+{ -+ int i; -+ -+ et->hash_mask = size - 1; -+ et->table = vcalloc(size, sizeof(struct list_head)); -+ if (!et->table) -+ return -ENOMEM; -+ -+ for (i = 0; i < size; i++) -+ INIT_LIST_HEAD(et->table + i); -+ -+ return 0; -+} -+ -+static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) -+{ -+ struct list_head *slot, *entry, *temp; -+ struct exception *ex; -+ int i, size; -+ -+ size = et->hash_mask + 1; -+ for (i = 0; i < size; i++) { -+ slot = et->table + i; -+ -+ list_for_each_safe(entry, temp, slot) { -+ ex = list_entry(entry, struct exception, hash_list); -+ kmem_cache_free(mem, ex); -+ } -+ } -+ -+ vfree(et->table); -+} -+ -+/* -+ * FIXME: check how this hash fn is performing. -+ */ -+static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk) -+{ -+ return chunk & et->hash_mask; -+} -+ -+static void insert_exception(struct exception_table *eh, struct exception *e) -+{ -+ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; -+ list_add(&e->hash_list, l); -+} -+ -+static inline void remove_exception(struct exception *e) -+{ -+ list_del(&e->hash_list); -+} -+ -+/* -+ * Return the exception data for a sector, or NULL if not -+ * remapped. -+ */ -+static struct exception *lookup_exception(struct exception_table *et, -+ chunk_t chunk) -+{ -+ struct list_head *slot, *el; -+ struct exception *e; -+ -+ slot = &et->table[exception_hash(et, chunk)]; -+ list_for_each(el, slot) { -+ e = list_entry(el, struct exception, hash_list); -+ if (e->old_chunk == chunk) -+ return e; -+ } -+ -+ return NULL; -+} -+ -+static inline struct exception *alloc_exception(void) -+{ -+ struct exception *e; -+ -+ e = kmem_cache_alloc(exception_cache, GFP_NOIO); -+ if (!e) -+ e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); -+ -+ return e; -+} -+ -+static inline void free_exception(struct exception *e) -+{ -+ kmem_cache_free(exception_cache, e); -+} -+ -+static inline struct pending_exception *alloc_pending_exception(void) -+{ -+ return mempool_alloc(pending_pool, GFP_NOIO); -+} -+ -+static inline void free_pending_exception(struct pending_exception *pe) -+{ -+ mempool_free(pe, pending_pool); -+} -+ -+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) -+{ -+ struct exception *e; -+ -+ e = alloc_exception(); -+ if (!e) -+ return -ENOMEM; -+ -+ e->old_chunk = old; -+ e->new_chunk = new; -+ insert_exception(&s->complete, e); -+ return 0; -+} -+ -+/* -+ * Hard coded magic. -+ */ -+static int calc_max_buckets(void) -+{ -+ unsigned long mem; -+ -+ mem = num_physpages << PAGE_SHIFT; -+ mem /= 50; -+ mem /= sizeof(struct list_head); -+ -+ return mem; -+} -+ -+/* -+ * Rounds a number down to a power of 2. -+ */ -+static inline uint32_t round_down(uint32_t n) -+{ -+ while (n & (n - 1)) -+ n &= (n - 1); -+ return n; -+} -+ -+/* -+ * Allocate room for a suitable hash table. -+ */ -+static int init_hash_tables(struct dm_snapshot *s) -+{ -+ sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; -+ -+ /* -+ * Calculate based on the size of the original volume or -+ * the COW volume... -+ */ -+ cow_dev_size = get_dev_size(s->cow->dev); -+ origin_dev_size = get_dev_size(s->origin->dev); -+ max_buckets = calc_max_buckets(); -+ -+ hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size; -+ hash_size = min(hash_size, max_buckets); -+ -+ /* Round it down to a power of 2 */ -+ hash_size = round_down(hash_size); -+ if (init_exception_table(&s->complete, hash_size)) -+ return -ENOMEM; -+ -+ /* -+ * Allocate hash table for in-flight exceptions -+ * Make this smaller than the real hash table -+ */ -+ hash_size >>= 3; -+ if (!hash_size) -+ hash_size = 64; -+ -+ if (init_exception_table(&s->pending, hash_size)) { -+ exit_exception_table(&s->complete, exception_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Round a number up to the nearest 'size' boundary. size must -+ * be a power of 2. -+ */ -+static inline ulong round_up(ulong n, ulong size) -+{ -+ size--; -+ return (n + size) & ~size; -+} -+ -+/* -+ * Construct a snapshot mapping:

-+ */ -+static int snapshot_ctr(struct dm_target *ti, int argc, char **argv) -+{ -+ struct dm_snapshot *s; -+ unsigned long chunk_size; -+ int r = -EINVAL; -+ char *persistent; -+ char *origin_path; -+ char *cow_path; -+ char *value; -+ int blocksize; -+ -+ if (argc < 4) { -+ ti->error = "dm-snapshot: requires exactly 4 arguments"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ origin_path = argv[0]; -+ cow_path = argv[1]; -+ persistent = argv[2]; -+ -+ if ((*persistent & 0x5f) != 'P' && (*persistent & 0x5f) != 'N') { -+ ti->error = "Persistent flag is not P or N"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ chunk_size = simple_strtoul(argv[3], &value, 10); -+ if (chunk_size == 0 || value == NULL) { -+ ti->error = "Invalid chunk size"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) { -+ ti->error = "Cannot allocate snapshot context private " -+ "structure"; -+ r = -ENOMEM; -+ goto bad; -+ } -+ -+ r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); -+ if (r) { -+ ti->error = "Cannot get origin device"; -+ goto bad_free; -+ } -+ -+ /* FIXME: get cow length */ -+ r = dm_get_device(ti, cow_path, 0, 0, -+ FMODE_READ | FMODE_WRITE, &s->cow); -+ if (r) { -+ dm_put_device(ti, s->origin); -+ ti->error = "Cannot get COW device"; -+ goto bad_free; -+ } -+ -+ /* -+ * Chunk size must be multiple of page size. Silently -+ * round up if it's not. -+ */ -+ chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE); -+ -+ /* Validate the chunk size against the device block size */ -+ blocksize = get_hardsect_size(s->cow->dev); -+ if (chunk_size % (blocksize / SECTOR_SIZE)) { -+ ti->error = "Chunk size is not a multiple of device blocksize"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ /* Check the sizes are small enough to fit in one kiovec */ -+ if (chunk_size > KIO_MAX_SECTORS) { -+ ti->error = "Chunk size is too big"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ /* Check chunk_size is a power of 2 */ -+ if (chunk_size & (chunk_size - 1)) { -+ ti->error = "Chunk size is not a power of 2"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ s->chunk_size = chunk_size; -+ s->chunk_mask = chunk_size - 1; -+ s->type = *persistent; -+ for (s->chunk_shift = 0; chunk_size; -+ s->chunk_shift++, chunk_size >>= 1) -+ ; -+ s->chunk_shift--; -+ -+ s->valid = 1; -+ s->last_percent = 0; -+ init_rwsem(&s->lock); -+ s->table = ti->table; -+ -+ /* Allocate hash table for COW data */ -+ if (init_hash_tables(s)) { -+ ti->error = "Unable to allocate hash table space"; -+ r = -ENOMEM; -+ goto bad_putdev; -+ } -+ -+ /* -+ * Check the persistent flag - done here because we need the iobuf -+ * to check the LV header -+ */ -+ s->store.snap = s; -+ -+ if ((*persistent & 0x5f) == 'P') -+ r = dm_create_persistent(&s->store, s->chunk_size); -+ else -+ r = dm_create_transient(&s->store, s, blocksize); -+ -+ if (r) { -+ ti->error = "Couldn't create exception store"; -+ r = -EINVAL; -+ goto bad_free1; -+ } -+ -+ /* Flush IO to the origin device */ -+#if LVM_VFS_ENHANCEMENT -+ fsync_dev_lockfs(s->origin->dev); -+#else -+ fsync_dev(s->origin->dev); -+#endif -+ -+ /* Add snapshot to the list of snapshots for this origin */ -+ if (register_snapshot(s)) { -+ r = -EINVAL; -+ ti->error = "Cannot register snapshot origin"; -+ goto bad_free2; -+ } -+#if LVM_VFS_ENHANCEMENT -+ unlockfs(s->origin->dev); -+#endif -+ kcopyd_inc_client_count(); -+ -+ ti->private = s; -+ return 0; -+ -+ bad_free2: -+ s->store.destroy(&s->store); -+ -+ bad_free1: -+ exit_exception_table(&s->pending, pending_cache); -+ exit_exception_table(&s->complete, exception_cache); -+ -+ bad_putdev: -+ dm_put_device(ti, s->cow); -+ dm_put_device(ti, s->origin); -+ -+ bad_free: -+ kfree(s); -+ -+ bad: -+ return r; -+} -+ -+static void snapshot_dtr(struct dm_target *ti) -+{ -+ struct dm_snapshot *s = (struct dm_snapshot *) ti->private; -+ -+ dm_table_event(ti->table); -+ -+ unregister_snapshot(s); -+ -+ exit_exception_table(&s->pending, pending_cache); -+ exit_exception_table(&s->complete, exception_cache); -+ -+ /* Deallocate memory used */ -+ s->store.destroy(&s->store); -+ -+ dm_put_device(ti, s->origin); -+ dm_put_device(ti, s->cow); -+ kfree(s); -+ -+ kcopyd_dec_client_count(); -+} -+ -+/* -+ * We hold lists of buffer_heads, using the b_reqnext field. -+ */ -+static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh) -+{ -+ bh->b_reqnext = *queue; -+ *queue = bh; -+} -+ -+/* -+ * Flush a list of buffers. -+ */ -+static void flush_buffers(struct buffer_head *bh) -+{ -+ struct buffer_head *n; -+ -+ DMDEBUG("begin flush"); -+ while (bh) { -+ n = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ DMDEBUG("flushing %p", bh); -+ generic_make_request(WRITE, bh); -+ bh = n; -+ } -+ -+ run_task_queue(&tq_disk); -+} -+ -+/* -+ * Error a list of buffers. -+ */ -+static void error_buffers(struct buffer_head *bh) -+{ -+ struct buffer_head *n; -+ -+ while (bh) { -+ n = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ buffer_IO_error(bh); -+ bh = n; -+ } -+} -+ -+static void pending_complete(struct pending_exception *pe, int success) -+{ -+ struct exception *e; -+ struct dm_snapshot *s = pe->snap; -+ -+ if (success) { -+ e = alloc_exception(); -+ if (!e) { -+ printk("Unable to allocate exception."); -+ down_write(&s->lock); -+ s->store.drop_snapshot(&s->store); -+ s->valid = 0; -+ up_write(&s->lock); -+ return; -+ } -+ -+ /* -+ * Add a proper exception, and remove the -+ * inflight exception from the list. -+ */ -+ down_write(&s->lock); -+ -+ memcpy(e, &pe->e, sizeof(*e)); -+ insert_exception(&s->complete, e); -+ remove_exception(&pe->e); -+ -+ /* Submit any pending write BHs */ -+ up_write(&s->lock); -+ -+ flush_buffers(pe->snapshot_bhs); -+ DMDEBUG("Exception completed successfully."); -+ -+ /* Notify any interested parties */ -+ if (s->store.percent_full) { -+ int pc = s->store.percent_full(&s->store); -+ -+ if (pc >= s->last_percent + WAKE_UP_PERCENT) { -+ dm_table_event(s->table); -+ s->last_percent = pc - pc % WAKE_UP_PERCENT; -+ } -+ } -+ -+ } else { -+ /* Read/write error - snapshot is unusable */ -+ DMERR("Error reading/writing snapshot"); -+ -+ down_write(&s->lock); -+ s->store.drop_snapshot(&s->store); -+ s->valid = 0; -+ remove_exception(&pe->e); -+ up_write(&s->lock); -+ -+ error_buffers(pe->snapshot_bhs); -+ -+ dm_table_event(s->table); -+ DMDEBUG("Exception failed."); -+ } -+ -+ if (list_empty(&pe->siblings)) -+ flush_buffers(pe->origin_bhs); -+ else -+ list_del(&pe->siblings); -+ -+ free_pending_exception(pe); -+} -+ -+static void commit_callback(void *context, int success) -+{ -+ struct pending_exception *pe = (struct pending_exception *) context; -+ pending_complete(pe, success); -+} -+ -+/* -+ * Called when the copy I/O has finished. kcopyd actually runs -+ * this code so don't block. -+ */ -+static void copy_callback(int err, void *context) -+{ -+ struct pending_exception *pe = (struct pending_exception *) context; -+ struct dm_snapshot *s = pe->snap; -+ -+ if (err) -+ pending_complete(pe, 0); -+ -+ else -+ /* Update the metadata if we are persistent */ -+ s->store.commit_exception(&s->store, &pe->e, commit_callback, -+ pe); -+} -+ -+/* -+ * Dispatches the copy operation to kcopyd. -+ */ -+static inline void start_copy(struct pending_exception *pe) -+{ -+ struct dm_snapshot *s = pe->snap; -+ struct kcopyd_region src, dest; -+ -+ src.dev = s->origin->dev; -+ src.sector = chunk_to_sector(s, pe->e.old_chunk); -+ src.count = s->chunk_size; -+ -+ dest.dev = s->cow->dev; -+ dest.sector = chunk_to_sector(s, pe->e.new_chunk); -+ dest.count = s->chunk_size; -+ -+ if (!pe->started) { -+ /* Hand over to kcopyd */ -+ kcopyd_copy(&src, &dest, copy_callback, pe); -+ pe->started = 1; -+ } -+} -+ -+/* -+ * Looks to see if this snapshot already has a pending exception -+ * for this chunk, otherwise it allocates a new one and inserts -+ * it into the pending table. -+ */ -+static struct pending_exception *find_pending_exception(struct dm_snapshot *s, -+ struct buffer_head *bh) -+{ -+ struct exception *e; -+ struct pending_exception *pe; -+ chunk_t chunk = sector_to_chunk(s, bh->b_rsector); -+ -+ /* -+ * Is there a pending exception for this already ? -+ */ -+ e = lookup_exception(&s->pending, chunk); -+ if (e) { -+ /* cast the exception to a pending exception */ -+ pe = list_entry(e, struct pending_exception, e); -+ -+ } else { -+ /* Create a new pending exception */ -+ pe = alloc_pending_exception(); -+ if (!pe) { -+ DMWARN("Couldn't allocate pending exception."); -+ return NULL; -+ } -+ -+ pe->e.old_chunk = chunk; -+ pe->origin_bhs = pe->snapshot_bhs = NULL; -+ INIT_LIST_HEAD(&pe->siblings); -+ pe->snap = s; -+ pe->started = 0; -+ -+ if (s->store.prepare_exception(&s->store, &pe->e)) { -+ free_pending_exception(pe); -+ s->valid = 0; -+ return NULL; -+ } -+ -+ insert_exception(&s->pending, &pe->e); -+ } -+ -+ return pe; -+} -+ -+static inline void remap_exception(struct dm_snapshot *s, struct exception *e, -+ struct buffer_head *bh) -+{ -+ bh->b_rdev = s->cow->dev; -+ bh->b_rsector = chunk_to_sector(s, e->new_chunk) + -+ (bh->b_rsector & s->chunk_mask); -+} -+ -+static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ struct exception *e; -+ struct dm_snapshot *s = (struct dm_snapshot *) ti->private; -+ int r = 1; -+ chunk_t chunk; -+ struct pending_exception *pe; -+ -+ chunk = sector_to_chunk(s, bh->b_rsector); -+ -+ /* Full snapshots are not usable */ -+ if (!s->valid) -+ return -1; -+ -+ /* -+ * Write to snapshot - higher level takes care of RW/RO -+ * flags so we should only get this if we are -+ * writeable. -+ */ -+ if (rw == WRITE) { -+ -+ down_write(&s->lock); -+ -+ /* If the block is already remapped - use that, else remap it */ -+ e = lookup_exception(&s->complete, chunk); -+ if (e) -+ remap_exception(s, e, bh); -+ -+ else { -+ pe = find_pending_exception(s, bh); -+ -+ if (!pe) { -+ s->store.drop_snapshot(&s->store); -+ s->valid = 0; -+ } -+ -+ queue_buffer(&pe->snapshot_bhs, bh); -+ start_copy(pe); -+ r = 0; -+ } -+ -+ up_write(&s->lock); -+ -+ } else { -+ /* -+ * FIXME: this read path scares me because we -+ * always use the origin when we have a pending -+ * exception. However I can't think of a -+ * situation where this is wrong - ejt. -+ */ -+ -+ /* Do reads */ -+ down_read(&s->lock); -+ -+ /* See if it it has been remapped */ -+ e = lookup_exception(&s->complete, chunk); -+ if (e) -+ remap_exception(s, e, bh); -+ else -+ bh->b_rdev = s->origin->dev; -+ -+ up_read(&s->lock); -+ } -+ -+ return r; -+} -+ -+static void list_merge(struct list_head *l1, struct list_head *l2) -+{ -+ struct list_head *l1_n, *l2_p; -+ -+ l1_n = l1->next; -+ l2_p = l2->prev; -+ -+ l1->next = l2; -+ l2->prev = l1; -+ -+ l2_p->next = l1_n; -+ l1_n->prev = l2_p; -+} -+ -+static int __origin_write(struct list_head *snapshots, struct buffer_head *bh) -+{ -+ int r = 1; -+ struct list_head *sl; -+ struct dm_snapshot *snap; -+ struct exception *e; -+ struct pending_exception *pe, *last = NULL; -+ chunk_t chunk; -+ -+ /* Do all the snapshots on this origin */ -+ list_for_each(sl, snapshots) { -+ snap = list_entry(sl, struct dm_snapshot, list); -+ -+ /* Only deal with valid snapshots */ -+ if (!snap->valid) -+ continue; -+ -+ down_write(&snap->lock); -+ -+ /* -+ * Remember, different snapshots can have -+ * different chunk sizes. -+ */ -+ chunk = sector_to_chunk(snap, bh->b_rsector); -+ -+ /* -+ * Check exception table to see if block -+ * is already remapped in this snapshot -+ * and trigger an exception if not. -+ */ -+ e = lookup_exception(&snap->complete, chunk); -+ if (!e) { -+ pe = find_pending_exception(snap, bh); -+ if (!pe) { -+ snap->store.drop_snapshot(&snap->store); -+ snap->valid = 0; -+ -+ } else { -+ if (last) -+ list_merge(&pe->siblings, -+ &last->siblings); -+ -+ last = pe; -+ r = 0; -+ } -+ } -+ -+ up_write(&snap->lock); -+ } -+ -+ /* -+ * Now that we have a complete pe list we can start the copying. -+ */ -+ if (last) { -+ pe = last; -+ do { -+ down_write(&pe->snap->lock); -+ queue_buffer(&pe->origin_bhs, bh); -+ start_copy(pe); -+ up_write(&pe->snap->lock); -+ pe = list_entry(pe->siblings.next, -+ struct pending_exception, siblings); -+ -+ } while (pe != last); -+ } -+ -+ return r; -+} -+ -+static int snapshot_status(struct dm_target *ti, status_type_t type, -+ char *result, int maxlen) -+{ -+ struct dm_snapshot *snap = (struct dm_snapshot *) ti->private; -+ char cow[16]; -+ char org[16]; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ if (!snap->valid) -+ snprintf(result, maxlen, "Invalid"); -+ else { -+ if (snap->store.percent_full) -+ snprintf(result, maxlen, "%d%%", -+ snap->store.percent_full(&snap-> -+ store)); -+ else -+ snprintf(result, maxlen, "Unknown"); -+ } -+ break; -+ -+ case STATUSTYPE_TABLE: -+ /* -+ * kdevname returns a static pointer so we need -+ * to make private copies if the output is to -+ * make sense. -+ */ -+ strncpy(cow, kdevname(snap->cow->dev), sizeof(cow)); -+ strncpy(org, kdevname(snap->origin->dev), sizeof(org)); -+ snprintf(result, maxlen, "%s %s %c %ld", org, cow, -+ snap->type, snap->chunk_size); -+ break; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Called on a write from the origin driver. -+ */ -+int do_origin(struct dm_dev *origin, struct buffer_head *bh) -+{ -+ struct origin *o; -+ int r; -+ -+ down_read(&_origins_lock); -+ o = __lookup_origin(origin->dev); -+ if (!o) -+ BUG(); -+ -+ r = __origin_write(&o->snapshots, bh); -+ up_read(&_origins_lock); -+ -+ return r; -+} -+ -+/* -+ * Origin: maps a linear range of a device, with hooks for snapshotting. -+ */ -+ -+/* -+ * Construct an origin mapping: -+ * The context for an origin is merely a 'struct dm_dev *' -+ * pointing to the real device. -+ */ -+static int origin_ctr(struct dm_target *ti, int argc, char **argv) -+{ -+ int r; -+ struct dm_dev *dev; -+ -+ if (argc != 1) { -+ ti->error = "dm-origin: incorrect number of arguments"; -+ return -EINVAL; -+ } -+ -+ r = dm_get_device(ti, argv[0], 0, ti->len, -+ dm_table_get_mode(ti->table), &dev); -+ if (r) { -+ ti->error = "Cannot get target device"; -+ return r; -+ } -+ -+ ti->private = dev; -+ -+ return 0; -+} -+ -+static void origin_dtr(struct dm_target *ti) -+{ -+ struct dm_dev *dev = (struct dm_dev *) ti->private; -+ dm_put_device(ti, dev); -+} -+ -+static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ struct dm_dev *dev = (struct dm_dev *) ti->private; -+ bh->b_rdev = dev->dev; -+ -+ /* Only tell snapshots if this is a write */ -+ return (rw == WRITE) ? do_origin(dev, bh) : 1; -+} -+ -+static int origin_status(struct dm_target *ti, status_type_t type, char *result, -+ int maxlen) -+{ -+ struct dm_dev *dev = (struct dm_dev *) ti->private; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ snprintf(result, maxlen, "%s", kdevname(dev->dev)); -+ break; -+ } -+ -+ return 0; -+} -+ -+static struct target_type origin_target = { -+ name: "snapshot-origin", -+ module: THIS_MODULE, -+ ctr: origin_ctr, -+ dtr: origin_dtr, -+ map: origin_map, -+ status: origin_status, -+}; -+ -+static struct target_type snapshot_target = { -+ name: "snapshot", -+ module: THIS_MODULE, -+ ctr: snapshot_ctr, -+ dtr: snapshot_dtr, -+ map: snapshot_map, -+ status: snapshot_status, -+}; -+ -+int __init dm_snapshot_init(void) -+{ -+ int r; -+ -+ r = dm_register_target(&snapshot_target); -+ if (r) { -+ DMERR("snapshot target register failed %d", r); -+ return r; -+ } -+ -+ r = dm_register_target(&origin_target); -+ if (r < 0) { -+ DMERR("Device mapper: Origin: register failed %d\n", r); -+ goto bad1; -+ } -+ -+ r = init_origin_hash(); -+ if (r) { -+ DMERR("init_origin_hash failed."); -+ goto bad2; -+ } -+ -+ exception_cache = kmem_cache_create("dm-snapshot-ex", -+ sizeof(struct exception), -+ __alignof__(struct exception), -+ 0, NULL, NULL); -+ if (!exception_cache) { -+ DMERR("Couldn't create exception cache."); -+ r = -ENOMEM; -+ goto bad3; -+ } -+ -+ pending_cache = -+ kmem_cache_create("dm-snapshot-in", -+ sizeof(struct pending_exception), -+ __alignof__(struct pending_exception), -+ 0, NULL, NULL); -+ if (!pending_cache) { -+ DMERR("Couldn't create pending cache."); -+ r = -ENOMEM; -+ goto bad4; -+ } -+ -+ pending_pool = mempool_create(128, mempool_alloc_slab, -+ mempool_free_slab, pending_cache); -+ if (!pending_pool) { -+ DMERR("Couldn't create pending pool."); -+ r = -ENOMEM; -+ goto bad5; -+ } -+ -+ return 0; -+ -+ bad5: -+ kmem_cache_destroy(pending_cache); -+ bad4: -+ kmem_cache_destroy(exception_cache); -+ bad3: -+ exit_origin_hash(); -+ bad2: -+ dm_unregister_target(&origin_target); -+ bad1: -+ dm_unregister_target(&snapshot_target); -+ return r; -+} -+ -+void dm_snapshot_exit(void) -+{ -+ int r; -+ -+ r = dm_unregister_target(&snapshot_target); -+ if (r) -+ DMERR("snapshot unregister failed %d", r); -+ -+ r = dm_unregister_target(&origin_target); -+ if (r) -+ DMERR("origin unregister failed %d", r); -+ -+ exit_origin_hash(); -+ mempool_destroy(pending_pool); -+ kmem_cache_destroy(pending_cache); -+ kmem_cache_destroy(exception_cache); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -ruN linux-2.4.19/drivers/md/dm-snapshot.h linux-2.4.19-dm/drivers/md/dm-snapshot.h ---- linux-2.4.19/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-snapshot.h Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,147 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#ifndef DM_SNAPSHOT_H -+#define DM_SNAPSHOT_H -+ -+#include "dm.h" -+#include -+ -+struct exception_table { -+ uint32_t hash_mask; -+ struct list_head *table; -+}; -+ -+/* -+ * The snapshot code deals with largish chunks of the disk at a -+ * time. Typically 64k - 256k. -+ */ -+/* FIXME: can we get away with limiting these to a uint32_t ? */ -+typedef sector_t chunk_t; -+ -+/* -+ * An exception is used where an old chunk of data has been -+ * replaced by a new one. -+ */ -+struct exception { -+ struct list_head hash_list; -+ -+ chunk_t old_chunk; -+ chunk_t new_chunk; -+}; -+ -+/* -+ * Abstraction to handle the meta/layout of exception stores (the -+ * COW device). -+ */ -+struct exception_store { -+ -+ /* -+ * Destroys this object when you've finished with it. -+ */ -+ void (*destroy) (struct exception_store * store); -+ -+ /* -+ * Find somewhere to store the next exception. -+ */ -+ int (*prepare_exception) (struct exception_store * store, -+ struct exception * e); -+ -+ /* -+ * Update the metadata with this exception. -+ */ -+ void (*commit_exception) (struct exception_store * store, -+ struct exception * e, -+ void (*callback) (void *, int success), -+ void *callback_context); -+ -+ /* -+ * The snapshot is invalid, note this in the metadata. -+ */ -+ void (*drop_snapshot) (struct exception_store * store); -+ -+ /* -+ * Return the %age full of the snapshot -+ */ -+ int (*percent_full) (struct exception_store * store); -+ -+ struct dm_snapshot *snap; -+ void *context; -+}; -+ -+struct dm_snapshot { -+ struct rw_semaphore lock; -+ struct dm_table *table; -+ -+ struct dm_dev *origin; -+ struct dm_dev *cow; -+ -+ /* List of snapshots per Origin */ -+ struct list_head list; -+ -+ /* Size of data blocks saved - must be a power of 2 */ -+ chunk_t chunk_size; -+ chunk_t chunk_mask; -+ chunk_t chunk_shift; -+ -+ /* You can't use a snapshot if this is 0 (e.g. if full) */ -+ int valid; -+ -+ /* Used for display of table */ -+ char type; -+ -+ /* The last percentage we notified */ -+ int last_percent; -+ -+ struct exception_table pending; -+ struct exception_table complete; -+ -+ /* The on disk metadata handler */ -+ struct exception_store store; -+}; -+ -+/* -+ * Used by the exception stores to load exceptions hen -+ * initialising. -+ */ -+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); -+ -+/* -+ * Constructor and destructor for the default persistent -+ * store. -+ */ -+int dm_create_persistent(struct exception_store *store, uint32_t chunk_size); -+ -+int dm_create_transient(struct exception_store *store, -+ struct dm_snapshot *s, int blocksize); -+ -+/* -+ * Return the number of sectors in the device. -+ */ -+static inline sector_t get_dev_size(kdev_t dev) -+{ -+ int *sizes; -+ -+ sizes = blk_size[MAJOR(dev)]; -+ if (sizes) -+ return sizes[MINOR(dev)] << 1; -+ -+ return 0; -+} -+ -+static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector) -+{ -+ return (sector & ~s->chunk_mask) >> s->chunk_shift; -+} -+ -+static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk) -+{ -+ return chunk << s->chunk_shift; -+} -+ -+#endif -diff -ruN linux-2.4.19/drivers/md/dm-stripe.c linux-2.4.19-dm/drivers/md/dm-stripe.c ---- linux-2.4.19/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-stripe.c Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,256 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+ -+struct stripe { -+ struct dm_dev *dev; -+ sector_t physical_start; -+}; -+ -+struct stripe_c { -+ uint32_t stripes; -+ -+ /* The size of this target / num. stripes */ -+ uint32_t stripe_width; -+ -+ /* stripe chunk size */ -+ uint32_t chunk_shift; -+ sector_t chunk_mask; -+ -+ struct stripe stripe[0]; -+}; -+ -+static inline struct stripe_c *alloc_context(int stripes) -+{ -+ size_t len; -+ -+ if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), -+ stripes)) -+ return NULL; -+ -+ len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes); -+ -+ return kmalloc(len, GFP_KERNEL); -+} -+ -+/* -+ * Parse a single pair -+ */ -+static int get_stripe(struct dm_target *ti, struct stripe_c *sc, -+ int stripe, char **argv) -+{ -+ sector_t start; -+ -+ if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1) -+ return -EINVAL; -+ -+ if (dm_get_device(ti, argv[0], start, sc->stripe_width, -+ dm_table_get_mode(ti->table), -+ &sc->stripe[stripe].dev)) -+ return -ENXIO; -+ -+ sc->stripe[stripe].physical_start = start; -+ return 0; -+} -+ -+/* -+ * FIXME: Nasty function, only present because we can't link -+ * against __moddi3 and __divdi3. -+ * -+ * returns a == b * n -+ */ -+static int multiple(sector_t a, sector_t b, sector_t *n) -+{ -+ sector_t acc, prev, i; -+ -+ *n = 0; -+ while (a >= b) { -+ for (acc = b, prev = 0, i = 1; -+ acc <= a; -+ prev = acc, acc <<= 1, i <<= 1) -+ ; -+ -+ a -= prev; -+ *n += i >> 1; -+ } -+ -+ return a == 0; -+} -+ -+/* -+ * Construct a striped mapping. -+ * [ ]+ -+ */ -+static int stripe_ctr(struct dm_target *ti, int argc, char **argv) -+{ -+ struct stripe_c *sc; -+ sector_t width; -+ uint32_t stripes; -+ uint32_t chunk_size; -+ char *end; -+ int r, i; -+ -+ if (argc < 2) { -+ ti->error = "dm-stripe: Not enough arguments"; -+ return -EINVAL; -+ } -+ -+ stripes = simple_strtoul(argv[0], &end, 10); -+ if (*end) { -+ ti->error = "dm-stripe: Invalid stripe count"; -+ return -EINVAL; -+ } -+ -+ chunk_size = simple_strtoul(argv[1], &end, 10); -+ if (*end) { -+ ti->error = "dm-stripe: Invalid chunk_size"; -+ return -EINVAL; -+ } -+ -+ if (!multiple(ti->len, stripes, &width)) { -+ ti->error = "dm-stripe: Target length not divisable by " -+ "number of stripes"; -+ return -EINVAL; -+ } -+ -+ sc = alloc_context(stripes); -+ if (!sc) { -+ ti->error = "dm-stripe: Memory allocation for striped context " -+ "failed"; -+ return -ENOMEM; -+ } -+ -+ sc->stripes = stripes; -+ sc->stripe_width = width; -+ -+ /* -+ * chunk_size is a power of two -+ */ -+ if (!chunk_size || (chunk_size & (chunk_size - 1))) { -+ ti->error = "dm-stripe: Invalid chunk size"; -+ kfree(sc); -+ return -EINVAL; -+ } -+ -+ sc->chunk_mask = ((sector_t) chunk_size) - 1; -+ for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++) -+ chunk_size >>= 1; -+ sc->chunk_shift--; -+ -+ /* -+ * Get the stripe destinations. -+ */ -+ for (i = 0; i < stripes; i++) { -+ if (argc < 2) { -+ ti->error = "dm-stripe: Not enough destinations " -+ "specified"; -+ kfree(sc); -+ return -EINVAL; -+ } -+ -+ argv += 2; -+ -+ r = get_stripe(ti, sc, i, argv); -+ if (r < 0) { -+ ti->error = "dm-stripe: Couldn't parse stripe " -+ "destination"; -+ while (i--) -+ dm_put_device(ti, sc->stripe[i].dev); -+ kfree(sc); -+ return r; -+ } -+ } -+ -+ ti->private = sc; -+ return 0; -+} -+ -+static void stripe_dtr(struct dm_target *ti) -+{ -+ unsigned int i; -+ struct stripe_c *sc = (struct stripe_c *) ti->private; -+ -+ for (i = 0; i < sc->stripes; i++) -+ dm_put_device(ti, sc->stripe[i].dev); -+ -+ kfree(sc); -+} -+ -+static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ struct stripe_c *sc = (struct stripe_c *) ti->private; -+ -+ sector_t offset = bh->b_rsector - ti->begin; -+ uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift); -+ uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */ -+ chunk = chunk / sc->stripes; -+ -+ bh->b_rdev = sc->stripe[stripe].dev->dev; -+ bh->b_rsector = sc->stripe[stripe].physical_start + -+ (chunk << sc->chunk_shift) + (offset & sc->chunk_mask); -+ return 1; -+} -+ -+static int stripe_status(struct dm_target *ti, -+ status_type_t type, char *result, int maxlen) -+{ -+ struct stripe_c *sc = (struct stripe_c *) ti->private; -+ int offset; -+ int i; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT, -+ sc->stripes, sc->chunk_mask + 1); -+ for (i = 0; i < sc->stripes; i++) { -+ offset += snprintf(result + offset, maxlen - offset, -+ " %s " SECTOR_FORMAT, -+ kdevname(to_kdev_t -+ (sc->stripe[i].dev->bdev->bd_dev)), -+ sc->stripe[i].physical_start); -+ } -+ break; -+ } -+ return 0; -+} -+ -+static struct target_type stripe_target = { -+ .name = "striped", -+ .module = THIS_MODULE, -+ .ctr = stripe_ctr, -+ .dtr = stripe_dtr, -+ .map = stripe_map, -+ .status = stripe_status, -+}; -+ -+int __init dm_stripe_init(void) -+{ -+ int r; -+ -+ r = dm_register_target(&stripe_target); -+ if (r < 0) -+ DMWARN("striped target registration failed"); -+ -+ return r; -+} -+ -+void dm_stripe_exit(void) -+{ -+ if (dm_unregister_target(&stripe_target)) -+ DMWARN("striped target unregistration failed"); -+ -+ return; -+} -diff -ruN linux-2.4.19/drivers/md/dm-table.c linux-2.4.19-dm/drivers/md/dm-table.c ---- linux-2.4.19/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-table.c Thu Nov 21 13:39:57 2002 -@@ -0,0 +1,665 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define MAX_DEPTH 16 -+#define NODE_SIZE L1_CACHE_BYTES -+#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) -+#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) -+ -+struct dm_table { -+ atomic_t holders; -+ -+ /* btree table */ -+ int depth; -+ int counts[MAX_DEPTH]; /* in nodes */ -+ sector_t *index[MAX_DEPTH]; -+ -+ int num_targets; -+ int num_allocated; -+ sector_t *highs; -+ struct dm_target *targets; -+ -+ /* -+ * Indicates the rw permissions for the new logical -+ * device. This should be a combination of FMODE_READ -+ * and FMODE_WRITE. -+ */ -+ int mode; -+ -+ /* a list of devices used by this table */ -+ struct list_head devices; -+ -+ /* -+ * A waitqueue for processes waiting for something -+ * interesting to happen to this table. -+ */ -+ wait_queue_head_t eventq; -+}; -+ -+/* -+ * Ceiling(n / size) -+ */ -+static inline unsigned long div_up(unsigned long n, unsigned long size) -+{ -+ return dm_round_up(n, size) / size; -+} -+ -+/* -+ * Similar to ceiling(log_size(n)) -+ */ -+static unsigned int int_log(unsigned long n, unsigned long base) -+{ -+ int result = 0; -+ -+ while (n > 1) { -+ n = div_up(n, base); -+ result++; -+ } -+ -+ return result; -+} -+ -+/* -+ * Calculate the index of the child node of the n'th node k'th key. -+ */ -+static inline int get_child(int n, int k) -+{ -+ return (n * CHILDREN_PER_NODE) + k; -+} -+ -+/* -+ * Return the n'th node of level l from table t. -+ */ -+static inline sector_t *get_node(struct dm_table *t, int l, int n) -+{ -+ return t->index[l] + (n * KEYS_PER_NODE); -+} -+ -+/* -+ * Return the highest key that you could lookup from the n'th -+ * node on level l of the btree. -+ */ -+static sector_t high(struct dm_table *t, int l, int n) -+{ -+ for (; l < t->depth - 1; l++) -+ n = get_child(n, CHILDREN_PER_NODE - 1); -+ -+ if (n >= t->counts[l]) -+ return (sector_t) - 1; -+ -+ return get_node(t, l, n)[KEYS_PER_NODE - 1]; -+} -+ -+/* -+ * Fills in a level of the btree based on the highs of the level -+ * below it. -+ */ -+static int setup_btree_index(int l, struct dm_table *t) -+{ -+ int n, k; -+ sector_t *node; -+ -+ for (n = 0; n < t->counts[l]; n++) { -+ node = get_node(t, l, n); -+ -+ for (k = 0; k < KEYS_PER_NODE; k++) -+ node[k] = high(t, l + 1, get_child(n, k)); -+ } -+ -+ return 0; -+} -+ -+/* -+ * highs, and targets are managed as dynamic arrays during a -+ * table load. -+ */ -+static int alloc_targets(struct dm_table *t, int num) -+{ -+ sector_t *n_highs; -+ struct dm_target *n_targets; -+ int n = t->num_targets; -+ -+ /* -+ * Allocate both the target array and offset array at once. -+ */ -+ n_highs = (sector_t *) vcalloc(sizeof(struct dm_target) + -+ sizeof(sector_t), num); -+ if (!n_highs) -+ return -ENOMEM; -+ -+ n_targets = (struct dm_target *) (n_highs + num); -+ -+ if (n) { -+ memcpy(n_highs, t->highs, sizeof(*n_highs) * n); -+ memcpy(n_targets, t->targets, sizeof(*n_targets) * n); -+ } -+ -+ memset(n_highs + n, -1, sizeof(*n_highs) * (num - n)); -+ vfree(t->highs); -+ -+ t->num_allocated = num; -+ t->highs = n_highs; -+ t->targets = n_targets; -+ -+ return 0; -+} -+ -+int dm_table_create(struct dm_table **result, int mode) -+{ -+ struct dm_table *t = kmalloc(sizeof(*t), GFP_NOIO); -+ -+ if (!t) -+ return -ENOMEM; -+ -+ memset(t, 0, sizeof(*t)); -+ INIT_LIST_HEAD(&t->devices); -+ atomic_set(&t->holders, 1); -+ -+ /* allocate a single nodes worth of targets to begin with */ -+ if (alloc_targets(t, KEYS_PER_NODE)) { -+ kfree(t); -+ t = NULL; -+ return -ENOMEM; -+ } -+ -+ init_waitqueue_head(&t->eventq); -+ t->mode = mode; -+ *result = t; -+ return 0; -+} -+ -+static void free_devices(struct list_head *devices) -+{ -+ struct list_head *tmp, *next; -+ -+ for (tmp = devices->next; tmp != devices; tmp = next) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ next = tmp->next; -+ kfree(dd); -+ } -+} -+ -+void table_destroy(struct dm_table *t) -+{ -+ int i; -+ -+ /* destroying the table counts as an event */ -+ dm_table_event(t); -+ -+ /* free the indexes (see dm_table_complete) */ -+ if (t->depth >= 2) -+ vfree(t->index[t->depth - 2]); -+ -+ /* free the targets */ -+ for (i = 0; i < t->num_targets; i++) { -+ struct dm_target *tgt = &t->targets[i]; -+ -+ dm_put_target_type(t->targets[i].type); -+ -+ if (tgt->type->dtr) -+ tgt->type->dtr(tgt); -+ } -+ -+ vfree(t->highs); -+ -+ /* free the device list */ -+ if (t->devices.next != &t->devices) { -+ DMWARN("devices still present during destroy: " -+ "dm_table_remove_device calls missing"); -+ -+ free_devices(&t->devices); -+ } -+ -+ kfree(t); -+} -+ -+void dm_table_get(struct dm_table *t) -+{ -+ atomic_inc(&t->holders); -+} -+ -+void dm_table_put(struct dm_table *t) -+{ -+ if (atomic_dec_and_test(&t->holders)) -+ table_destroy(t); -+} -+ -+/* -+ * Checks to see if we need to extend highs or targets. -+ */ -+static inline int check_space(struct dm_table *t) -+{ -+ if (t->num_targets >= t->num_allocated) -+ return alloc_targets(t, t->num_allocated * 2); -+ -+ return 0; -+} -+ -+/* -+ * Convert a device path to a dev_t. -+ */ -+static int lookup_device(const char *path, kdev_t *dev) -+{ -+ int r; -+ struct nameidata nd; -+ struct inode *inode; -+ -+ if (!path_init(path, LOOKUP_FOLLOW, &nd)) -+ return 0; -+ -+ if ((r = path_walk(path, &nd))) -+ goto out; -+ -+ inode = nd.dentry->d_inode; -+ if (!inode) { -+ r = -ENOENT; -+ goto out; -+ } -+ -+ if (!S_ISBLK(inode->i_mode)) { -+ r = -ENOTBLK; -+ goto out; -+ } -+ -+ *dev = inode->i_rdev; -+ -+ out: -+ path_release(&nd); -+ return r; -+} -+ -+/* -+ * See if we've already got a device in the list. -+ */ -+static struct dm_dev *find_device(struct list_head *l, kdev_t dev) -+{ -+ struct list_head *tmp; -+ -+ list_for_each(tmp, l) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ if (kdev_same(dd->dev, dev)) -+ return dd; -+ } -+ -+ return NULL; -+} -+ -+/* -+ * Open a device so we can use it as a map destination. -+ */ -+static int open_dev(struct dm_dev *dd) -+{ -+ if (dd->bdev) -+ BUG(); -+ -+ dd->bdev = bdget(kdev_t_to_nr(dd->dev)); -+ if (!dd->bdev) -+ return -ENOMEM; -+ -+ return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW); -+} -+ -+/* -+ * Close a device that we've been using. -+ */ -+static void close_dev(struct dm_dev *dd) -+{ -+ if (!dd->bdev) -+ return; -+ -+ blkdev_put(dd->bdev, BDEV_RAW); -+ dd->bdev = NULL; -+} -+ -+/* -+ * If possible (ie. blk_size[major] is set), this checks an area -+ * of a destination device is valid. -+ */ -+static int check_device_area(kdev_t dev, sector_t start, sector_t len) -+{ -+ int *sizes; -+ sector_t dev_size; -+ -+ if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)])) -+ /* we don't know the device details, -+ * so give the benefit of the doubt */ -+ return 1; -+ -+ /* convert to 512-byte sectors */ -+ dev_size <<= 1; -+ -+ return ((start < dev_size) && (len <= (dev_size - start))); -+} -+ -+/* -+ * This upgrades the mode on an already open dm_dev. Being -+ * careful to leave things as they were if we fail to reopen the -+ * device. -+ */ -+static int upgrade_mode(struct dm_dev *dd, int new_mode) -+{ -+ int r; -+ struct dm_dev dd_copy; -+ -+ memcpy(&dd_copy, dd, sizeof(dd_copy)); -+ -+ dd->mode |= new_mode; -+ dd->bdev = NULL; -+ r = open_dev(dd); -+ if (!r) -+ close_dev(&dd_copy); -+ else -+ memcpy(dd, &dd_copy, sizeof(dd_copy)); -+ -+ return r; -+} -+ -+/* -+ * Add a device to the list, or just increment the usage count if -+ * it's already present. -+ */ -+int dm_get_device(struct dm_target *ti, const char *path, sector_t start, -+ sector_t len, int mode, struct dm_dev **result) -+{ -+ int r; -+ kdev_t dev; -+ struct dm_dev *dd; -+ int major, minor; -+ struct dm_table *t = ti->table; -+ -+ if (!t) -+ BUG(); -+ -+ if (sscanf(path, "%x:%x", &major, &minor) == 2) { -+ /* Extract the major/minor numbers */ -+ dev = mk_kdev(major, minor); -+ } else { -+ /* convert the path to a device */ -+ if ((r = lookup_device(path, &dev))) -+ return r; -+ } -+ -+ dd = find_device(&t->devices, dev); -+ if (!dd) { -+ dd = kmalloc(sizeof(*dd), GFP_KERNEL); -+ if (!dd) -+ return -ENOMEM; -+ -+ dd->dev = dev; -+ dd->mode = mode; -+ dd->bdev = NULL; -+ -+ if ((r = open_dev(dd))) { -+ kfree(dd); -+ return r; -+ } -+ -+ atomic_set(&dd->count, 0); -+ list_add(&dd->list, &t->devices); -+ -+ } else if (dd->mode != (mode | dd->mode)) { -+ r = upgrade_mode(dd, mode); -+ if (r) -+ return r; -+ } -+ atomic_inc(&dd->count); -+ -+ if (!check_device_area(dd->dev, start, len)) { -+ DMWARN("device %s too small for target", path); -+ dm_put_device(ti, dd); -+ return -EINVAL; -+ } -+ -+ *result = dd; -+ -+ return 0; -+} -+ -+/* -+ * Decrement a devices use count and remove it if neccessary. -+ */ -+void dm_put_device(struct dm_target *ti, struct dm_dev *dd) -+{ -+ if (atomic_dec_and_test(&dd->count)) { -+ close_dev(dd); -+ list_del(&dd->list); -+ kfree(dd); -+ } -+} -+ -+/* -+ * Checks to see if the target joins onto the end of the table. -+ */ -+static int adjoin(struct dm_table *table, struct dm_target *ti) -+{ -+ struct dm_target *prev; -+ -+ if (!table->num_targets) -+ return !ti->begin; -+ -+ prev = &table->targets[table->num_targets - 1]; -+ return (ti->begin == (prev->begin + prev->len)); -+} -+ -+/* -+ * Destructively splits up the argument list to pass to ctr. -+ */ -+static int split_args(int max, int *argc, char **argv, char *input) -+{ -+ char *start, *end = input, *out; -+ *argc = 0; -+ -+ while (1) { -+ start = end; -+ -+ /* Skip whitespace */ -+ while (*start && isspace(*start)) -+ start++; -+ -+ if (!*start) -+ break; /* success, we hit the end */ -+ -+ /* 'out' is used to remove any back-quotes */ -+ end = out = start; -+ while (*end) { -+ /* Everything apart from '\0' can be quoted */ -+ if (*end == '\\' && *(end + 1)) { -+ *out++ = *(end + 1); -+ end += 2; -+ continue; -+ } -+ -+ if (isspace(*end)) -+ break; /* end of token */ -+ -+ *out++ = *end++; -+ } -+ -+ /* have we already filled the array ? */ -+ if ((*argc + 1) > max) -+ return -EINVAL; -+ -+ /* we know this is whitespace */ -+ if (*end) -+ end++; -+ -+ /* terminate the string and put it in the array */ -+ *out = '\0'; -+ argv[*argc] = start; -+ (*argc)++; -+ } -+ -+ return 0; -+} -+ -+int dm_table_add_target(struct dm_table *t, const char *type, -+ sector_t start, sector_t len, char *params) -+{ -+ int r, argc; -+ char *argv[32]; -+ struct target_type *tt; -+ struct dm_target *tgt; -+ -+ if ((r = check_space(t))) -+ return r; -+ -+ tgt = t->targets + t->num_targets; -+ memset(tgt, 0, sizeof(*tgt)); -+ -+ tt = dm_get_target_type(type); -+ if (!tt) { -+ tgt->error = "unknown target type"; -+ return -EINVAL; -+ } -+ -+ tgt->table = t; -+ tgt->type = tt; -+ tgt->begin = start; -+ tgt->len = len; -+ tgt->error = "Unknown error"; -+ -+ /* -+ * Does this target adjoin the previous one ? -+ */ -+ if (!adjoin(t, tgt)) { -+ DMERR("Gap in table"); -+ dm_put_target_type(tt); -+ return -EINVAL; -+ } -+ -+ r = split_args(ARRAY_SIZE(argv), &argc, argv, params); -+ if (r) { -+ tgt->error = "couldn't split parameters"; -+ dm_put_target_type(tt); -+ return r; -+ } -+ -+ r = tt->ctr(tgt, argc, argv); -+ if (r) { -+ dm_put_target_type(tt); -+ return r; -+ } -+ -+ t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; -+ return 0; -+} -+ -+static int setup_indexes(struct dm_table *t) -+{ -+ int i, total = 0; -+ sector_t *indexes; -+ -+ /* allocate the space for *all* the indexes */ -+ for (i = t->depth - 2; i >= 0; i--) { -+ t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE); -+ total += t->counts[i]; -+ } -+ -+ indexes = (sector_t *) vcalloc(total, (unsigned long) NODE_SIZE); -+ if (!indexes) -+ return -ENOMEM; -+ -+ /* set up internal nodes, bottom-up */ -+ for (i = t->depth - 2, total = 0; i >= 0; i--) { -+ t->index[i] = indexes; -+ indexes += (KEYS_PER_NODE * t->counts[i]); -+ setup_btree_index(i, t); -+ } -+ -+ return 0; -+} -+ -+/* -+ * Builds the btree to index the map. -+ */ -+int dm_table_complete(struct dm_table *t) -+{ -+ int leaf_nodes, r = 0; -+ -+ /* how many indexes will the btree have ? */ -+ leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE); -+ t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); -+ -+ /* leaf layer has already been set up */ -+ t->counts[t->depth - 1] = leaf_nodes; -+ t->index[t->depth - 1] = t->highs; -+ -+ if (t->depth >= 2) -+ r = setup_indexes(t); -+ -+ return r; -+} -+ -+void dm_table_event(struct dm_table *t) -+{ -+ wake_up_interruptible(&t->eventq); -+} -+ -+sector_t dm_table_get_size(struct dm_table *t) -+{ -+ return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; -+} -+ -+struct dm_target *dm_table_get_target(struct dm_table *t, int index) -+{ -+ if (index > t->num_targets) -+ return NULL; -+ -+ return t->targets + index; -+} -+ -+/* -+ * Search the btree for the correct target. -+ */ -+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) -+{ -+ int l, n = 0, k = 0; -+ sector_t *node; -+ -+ for (l = 0; l < t->depth; l++) { -+ n = get_child(n, k); -+ node = get_node(t, l, n); -+ -+ for (k = 0; k < KEYS_PER_NODE; k++) -+ if (node[k] >= sector) -+ break; -+ } -+ -+ return &t->targets[(KEYS_PER_NODE * n) + k]; -+} -+ -+unsigned int dm_table_get_num_targets(struct dm_table *t) -+{ -+ return t->num_targets; -+} -+ -+struct list_head *dm_table_get_devices(struct dm_table *t) -+{ -+ return &t->devices; -+} -+ -+int dm_table_get_mode(struct dm_table *t) -+{ -+ return t->mode; -+} -+ -+void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq) -+{ -+ add_wait_queue(&t->eventq, wq); -+} -+ -+EXPORT_SYMBOL(dm_get_device); -+EXPORT_SYMBOL(dm_put_device); -+EXPORT_SYMBOL(dm_table_event); -diff -ruN linux-2.4.19/drivers/md/dm-target.c linux-2.4.19-dm/drivers/md/dm-target.c ---- linux-2.4.19/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-target.c Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,190 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+ -+struct tt_internal { -+ struct target_type tt; -+ -+ struct list_head list; -+ long use; -+}; -+ -+static LIST_HEAD(_targets); -+static rwlock_t _lock = RW_LOCK_UNLOCKED; -+ -+#define DM_MOD_NAME_SIZE 32 -+ -+static inline struct tt_internal *__find_target_type(const char *name) -+{ -+ struct list_head *tih; -+ struct tt_internal *ti; -+ -+ list_for_each(tih, &_targets) { -+ ti = list_entry(tih, struct tt_internal, list); -+ -+ if (!strcmp(name, ti->tt.name)) -+ return ti; -+ } -+ -+ return NULL; -+} -+ -+static struct tt_internal *get_target_type(const char *name) -+{ -+ struct tt_internal *ti; -+ -+ read_lock(&_lock); -+ ti = __find_target_type(name); -+ -+ if (ti) { -+ if (ti->use == 0 && ti->tt.module) -+ __MOD_INC_USE_COUNT(ti->tt.module); -+ ti->use++; -+ } -+ read_unlock(&_lock); -+ -+ return ti; -+} -+ -+static void load_module(const char *name) -+{ -+ char module_name[DM_MOD_NAME_SIZE] = "dm-"; -+ -+ /* Length check for strcat() below */ -+ if (strlen(name) > (DM_MOD_NAME_SIZE - 4)) -+ return; -+ -+ strcat(module_name, name); -+ request_module(module_name); -+ -+ return; -+} -+ -+struct target_type *dm_get_target_type(const char *name) -+{ -+ struct tt_internal *ti = get_target_type(name); -+ -+ if (!ti) { -+ load_module(name); -+ ti = get_target_type(name); -+ } -+ -+ return ti ? &ti->tt : NULL; -+} -+ -+void dm_put_target_type(struct target_type *t) -+{ -+ struct tt_internal *ti = (struct tt_internal *) t; -+ -+ read_lock(&_lock); -+ if (--ti->use == 0 && ti->tt.module) -+ __MOD_DEC_USE_COUNT(ti->tt.module); -+ -+ if (ti->use < 0) -+ BUG(); -+ read_unlock(&_lock); -+ -+ return; -+} -+ -+static struct tt_internal *alloc_target(struct target_type *t) -+{ -+ struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL); -+ -+ if (ti) { -+ memset(ti, 0, sizeof(*ti)); -+ ti->tt = *t; -+ } -+ -+ return ti; -+} -+ -+int dm_register_target(struct target_type *t) -+{ -+ int rv = 0; -+ struct tt_internal *ti = alloc_target(t); -+ -+ if (!ti) -+ return -ENOMEM; -+ -+ write_lock(&_lock); -+ if (__find_target_type(t->name)) -+ rv = -EEXIST; -+ else -+ list_add(&ti->list, &_targets); -+ -+ write_unlock(&_lock); -+ return rv; -+} -+ -+int dm_unregister_target(struct target_type *t) -+{ -+ struct tt_internal *ti; -+ -+ write_lock(&_lock); -+ if (!(ti = __find_target_type(t->name))) { -+ write_unlock(&_lock); -+ return -EINVAL; -+ } -+ -+ if (ti->use) { -+ write_unlock(&_lock); -+ return -ETXTBSY; -+ } -+ -+ list_del(&ti->list); -+ kfree(ti); -+ -+ write_unlock(&_lock); -+ return 0; -+} -+ -+/* -+ * io-err: always fails an io, useful for bringing -+ * up LVs that have holes in them. -+ */ -+static int io_err_ctr(struct dm_target *ti, int argc, char **args) -+{ -+ return 0; -+} -+ -+static void io_err_dtr(struct dm_target *ti) -+{ -+ /* empty */ -+ return; -+} -+ -+static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ buffer_IO_error(bh); -+ return 0; -+} -+ -+static struct target_type error_target = { -+ .name = "error", -+ .ctr = io_err_ctr, -+ .dtr = io_err_dtr, -+ .map = io_err_map, -+}; -+ -+int dm_target_init(void) -+{ -+ return dm_register_target(&error_target); -+} -+ -+void dm_target_exit(void) -+{ -+ if (dm_unregister_target(&error_target)) -+ DMWARN("error target unregistration failed"); -+} -+ -+EXPORT_SYMBOL(dm_register_target); -+EXPORT_SYMBOL(dm_unregister_target); -diff -ruN linux-2.4.19/drivers/md/dm.c linux-2.4.19-dm/drivers/md/dm.c ---- linux-2.4.19/drivers/md/dm.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm.c Thu Nov 21 13:40:03 2002 -@@ -0,0 +1,868 @@ -+/* -+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+static const char *_name = DM_NAME; -+#define MAX_DEVICES (1 << MINORBITS) -+#define SECTOR_SHIFT 9 -+#define DEFAULT_READ_AHEAD 64 -+ -+static int major = 0; -+static int _major = 0; -+ -+struct dm_io { -+ struct mapped_device *md; -+ -+ void (*end_io) (struct buffer_head * bh, int uptodate); -+ void *context; -+}; -+ -+struct deferred_io { -+ int rw; -+ struct buffer_head *bh; -+ struct deferred_io *next; -+}; -+ -+/* -+ * Bits for the md->flags field. -+ */ -+#define DMF_BLOCK_IO 0 -+#define DMF_SUSPENDED 1 -+ -+struct mapped_device { -+ struct rw_semaphore lock; -+ atomic_t holders; -+ -+ kdev_t dev; -+ unsigned long flags; -+ -+ /* -+ * A list of ios that arrived while we were suspended. -+ */ -+ atomic_t pending; -+ wait_queue_head_t wait; -+ struct deferred_io *deferred; -+ -+ /* -+ * The current mapping. -+ */ -+ struct dm_table *map; -+}; -+ -+#define MIN_IOS 256 -+static kmem_cache_t *_io_cache; -+static mempool_t *_io_pool; -+ -+/* block device arrays */ -+static int _block_size[MAX_DEVICES]; -+static int _blksize_size[MAX_DEVICES]; -+static int _hardsect_size[MAX_DEVICES]; -+ -+static struct mapped_device *get_kdev(kdev_t dev); -+static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh); -+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb); -+ -+ -+static __init int local_init(void) -+{ -+ int r; -+ -+ /* allocate a slab for the dm_ios */ -+ _io_cache = kmem_cache_create("dm io", -+ sizeof(struct dm_io), 0, 0, NULL, NULL); -+ -+ if (!_io_cache) -+ return -ENOMEM; -+ -+ _io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, -+ mempool_free_slab, _io_cache); -+ if (!_io_pool) { -+ kmem_cache_destroy(_io_cache); -+ return -ENOMEM; -+ } -+ -+ _major = major; -+ r = register_blkdev(_major, _name, &dm_blk_dops); -+ if (r < 0) { -+ DMERR("register_blkdev failed"); -+ mempool_destroy(_io_pool); -+ kmem_cache_destroy(_io_cache); -+ return r; -+ } -+ -+ if (!_major) -+ _major = r; -+ -+ /* set up the arrays */ -+ read_ahead[_major] = DEFAULT_READ_AHEAD; -+ blk_size[_major] = _block_size; -+ blksize_size[_major] = _blksize_size; -+ hardsect_size[_major] = _hardsect_size; -+ -+ blk_queue_make_request(BLK_DEFAULT_QUEUE(_major), dm_request); -+ -+ return 0; -+} -+ -+static void local_exit(void) -+{ -+ mempool_destroy(_io_pool); -+ kmem_cache_destroy(_io_cache); -+ -+ if (unregister_blkdev(_major, _name) < 0) -+ DMERR("devfs_unregister_blkdev failed"); -+ -+ read_ahead[_major] = 0; -+ blk_size[_major] = NULL; -+ blksize_size[_major] = NULL; -+ hardsect_size[_major] = NULL; -+ _major = 0; -+ -+ DMINFO("cleaned up"); -+} -+ -+/* -+ * We have a lot of init/exit functions, so it seems easier to -+ * store them in an array. The disposable macro 'xx' -+ * expands a prefix into a pair of function names. -+ */ -+static struct { -+ int (*init) (void); -+ void (*exit) (void); -+ -+} _inits[] = { -+#define xx(n) {n ## _init, n ## _exit}, -+ xx(local) -+ xx(dm_target) -+ xx(dm_linear) -+ xx(dm_stripe) -+ xx(dm_snapshot) -+ xx(dm_interface) -+#undef xx -+}; -+ -+static int __init dm_init(void) -+{ -+ const int count = ARRAY_SIZE(_inits); -+ -+ int r, i; -+ -+ for (i = 0; i < count; i++) { -+ r = _inits[i].init(); -+ if (r) -+ goto bad; -+ } -+ -+ return 0; -+ -+ bad: -+ while (i--) -+ _inits[i].exit(); -+ -+ return r; -+} -+ -+static void __exit dm_exit(void) -+{ -+ int i = ARRAY_SIZE(_inits); -+ -+ while (i--) -+ _inits[i].exit(); -+} -+ -+/* -+ * Block device functions -+ */ -+static int dm_blk_open(struct inode *inode, struct file *file) -+{ -+ struct mapped_device *md; -+ -+ md = get_kdev(inode->i_rdev); -+ if (!md) -+ return -ENXIO; -+ -+ return 0; -+} -+ -+static int dm_blk_close(struct inode *inode, struct file *file) -+{ -+ struct mapped_device *md; -+ -+ md = get_kdev(inode->i_rdev); -+ dm_put(md); /* put the reference gained by dm_blk_open */ -+ dm_put(md); -+ return 0; -+} -+ -+static inline struct dm_io *alloc_io(void) -+{ -+ return mempool_alloc(_io_pool, GFP_NOIO); -+} -+ -+static inline void free_io(struct dm_io *io) -+{ -+ mempool_free(io, _io_pool); -+} -+ -+static inline struct deferred_io *alloc_deferred(void) -+{ -+ return kmalloc(sizeof(struct deferred_io), GFP_NOIO); -+} -+ -+static inline void free_deferred(struct deferred_io *di) -+{ -+ kfree(di); -+} -+ -+/* In 512-byte units */ -+#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1) -+ -+/* FIXME: check this */ -+static int dm_blk_ioctl(struct inode *inode, struct file *file, -+ uint command, unsigned long a) -+{ -+ int minor = MINOR(inode->i_rdev); -+ long size; -+ -+ if (minor >= MAX_DEVICES) -+ return -ENXIO; -+ -+ switch (command) { -+ case BLKROSET: -+ case BLKROGET: -+ case BLKRASET: -+ case BLKRAGET: -+ case BLKFLSBUF: -+ case BLKSSZGET: -+ //case BLKRRPART: /* Re-read partition tables */ -+ //case BLKPG: -+ case BLKELVGET: -+ case BLKELVSET: -+ case BLKBSZGET: -+ case BLKBSZSET: -+ return blk_ioctl(inode->i_rdev, command, a); -+ break; -+ -+ case BLKGETSIZE: -+ size = VOLUME_SIZE(minor); -+ if (copy_to_user((void *) a, &size, sizeof(long))) -+ return -EFAULT; -+ break; -+ -+ case BLKGETSIZE64: -+ size = VOLUME_SIZE(minor); -+ if (put_user((u64) ((u64) size) << 9, (u64 *) a)) -+ return -EFAULT; -+ break; -+ -+ case BLKRRPART: -+ return -ENOTTY; -+ -+ case LV_BMAP: -+ return dm_user_bmap(inode, (struct lv_bmap *) a); -+ -+ default: -+ DMWARN("unknown block ioctl 0x%x", command); -+ return -ENOTTY; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Add the buffer to the list of deferred io. -+ */ -+static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw) -+{ -+ struct deferred_io *di; -+ -+ di = alloc_deferred(); -+ if (!di) -+ return -ENOMEM; -+ -+ down_write(&md->lock); -+ -+ if (!test_bit(DMF_SUSPENDED, &md->flags)) { -+ up_write(&md->lock); -+ free_deferred(di); -+ return 1; -+ } -+ -+ di->bh = bh; -+ di->rw = rw; -+ di->next = md->deferred; -+ md->deferred = di; -+ -+ up_write(&md->lock); -+ return 0; /* deferred successfully */ -+} -+ -+/* -+ * bh->b_end_io routine that decrements the pending count -+ * and then calls the original bh->b_end_io fn. -+ */ -+static void dec_pending(struct buffer_head *bh, int uptodate) -+{ -+ struct dm_io *io = bh->b_private; -+ -+ if (atomic_dec_and_test(&io->md->pending)) -+ /* nudge anyone waiting on suspend queue */ -+ wake_up(&io->md->wait); -+ -+ bh->b_end_io = io->end_io; -+ bh->b_private = io->context; -+ free_io(io); -+ -+ bh->b_end_io(bh, uptodate); -+} -+ -+/* -+ * Do the bh mapping for a given leaf -+ */ -+static inline int __map_buffer(struct mapped_device *md, -+ int rw, struct buffer_head *bh) -+{ -+ int r; -+ struct dm_io *io; -+ struct dm_target *ti; -+ -+ ti = dm_table_find_target(md->map, bh->b_rsector); -+ if (!ti) -+ return -EINVAL; -+ -+ io = alloc_io(); -+ if (!io) -+ return -ENOMEM; -+ -+ io->md = md; -+ io->end_io = bh->b_end_io; -+ io->context = bh->b_private; -+ -+ r = ti->type->map(ti, bh, rw); -+ -+ if (r > 0) { -+ /* hook the end io request fn */ -+ atomic_inc(&md->pending); -+ bh->b_end_io = dec_pending; -+ bh->b_private = io; -+ -+ } else -+ /* we don't need to hook */ -+ free_io(io); -+ -+ return r; -+} -+ -+/* -+ * Checks to see if we should be deferring io, if so it queues it -+ * and returns 1. -+ */ -+static inline int __deferring(struct mapped_device *md, int rw, -+ struct buffer_head *bh) -+{ -+ int r; -+ -+ /* -+ * If we're suspended we have to queue this io for later. -+ */ -+ while (test_bit(DMF_BLOCK_IO, &md->flags)) { -+ up_read(&md->lock); -+ -+ /* -+ * There's no point deferring a read ahead -+ * request, just drop it. -+ */ -+ if (rw == READA) { -+ down_read(&md->lock); -+ return -EIO; -+ } -+ -+ r = queue_io(md, bh, rw); -+ down_read(&md->lock); -+ -+ if (r < 0) -+ return r; -+ -+ if (r == 0) -+ return 1; /* deferred successfully */ -+ -+ } -+ -+ return 0; -+} -+ -+static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ md = get_kdev(bh->b_rdev); -+ if (!md) { -+ buffer_IO_error(bh); -+ return 0; -+ } -+ -+ down_read(&md->lock); -+ -+ r = __deferring(md, rw, bh); -+ if (r < 0) -+ goto bad; -+ -+ else if (!r) { -+ /* not deferring */ -+ r = __map_buffer(md, rw, bh); -+ if (r < 0) -+ goto bad; -+ } else -+ r = 0; -+ -+ up_read(&md->lock); -+ dm_put(md); -+ return r; -+ -+ bad: -+ buffer_IO_error(bh); -+ up_read(&md->lock); -+ dm_put(md); -+ return 0; -+} -+ -+static int check_dev_size(kdev_t dev, unsigned long block) -+{ -+ /* FIXME: check this */ -+ int minor = MINOR(dev); -+ unsigned long max_sector = (_block_size[minor] << 1) + 1; -+ unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9); -+ -+ return (sector > max_sector) ? 0 : 1; -+} -+ -+/* -+ * Creates a dummy buffer head and maps it (for lilo). -+ */ -+static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block, -+ kdev_t *r_dev, unsigned long *r_block) -+{ -+ struct buffer_head bh; -+ struct dm_target *ti; -+ int r; -+ -+ if (test_bit(DMF_BLOCK_IO, &md->flags)) { -+ return -EPERM; -+ } -+ -+ if (!check_dev_size(dev, block)) { -+ return -EINVAL; -+ } -+ -+ /* setup dummy bh */ -+ memset(&bh, 0, sizeof(bh)); -+ bh.b_blocknr = block; -+ bh.b_dev = bh.b_rdev = dev; -+ bh.b_size = _blksize_size[MINOR(dev)]; -+ bh.b_rsector = block * (bh.b_size >> 9); -+ -+ /* find target */ -+ ti = dm_table_find_target(md->map, bh.b_rsector); -+ -+ /* do the mapping */ -+ r = ti->type->map(ti, &bh, READ); -+ -+ if (!r) { -+ *r_dev = bh.b_rdev; -+ *r_block = bh.b_rsector / (bh.b_size >> 9); -+ } -+ -+ return r; -+} -+ -+/* -+ * Marshals arguments and results between user and kernel space. -+ */ -+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb) -+{ -+ struct mapped_device *md; -+ unsigned long block, r_block; -+ kdev_t r_dev; -+ int r; -+ -+ if (get_user(block, &lvb->lv_block)) -+ return -EFAULT; -+ -+ md = get_kdev(inode->i_rdev); -+ if (!md) -+ return -ENXIO; -+ -+ down_read(&md->lock); -+ r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block); -+ up_read(&md->lock); -+ dm_put(md); -+ -+ if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) || -+ put_user(r_block, &lvb->lv_block))) -+ r = -EFAULT; -+ -+ return r; -+} -+ -+/*----------------------------------------------------------------- -+ * A bitset is used to keep track of allocated minor numbers. -+ *---------------------------------------------------------------*/ -+static spinlock_t _minor_lock = SPIN_LOCK_UNLOCKED; -+static struct mapped_device *_mds[MAX_DEVICES]; -+ -+static void free_minor(int minor) -+{ -+ spin_lock(&_minor_lock); -+ _mds[minor] = NULL; -+ spin_unlock(&_minor_lock); -+} -+ -+/* -+ * See if the device with a specific minor # is free. -+ */ -+static int specific_minor(int minor, struct mapped_device *md) -+{ -+ int r = -EBUSY; -+ -+ if (minor >= MAX_DEVICES) { -+ DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)", -+ MAX_DEVICES); -+ return -EINVAL; -+ } -+ -+ spin_lock(&_minor_lock); -+ if (!_mds[minor]) { -+ _mds[minor] = md; -+ r = minor; -+ } -+ spin_unlock(&_minor_lock); -+ -+ return r; -+} -+ -+static int next_free_minor(struct mapped_device *md) -+{ -+ int i; -+ -+ spin_lock(&_minor_lock); -+ for (i = 0; i < MAX_DEVICES; i++) { -+ if (!_mds[i]) { -+ _mds[i] = md; -+ break; -+ } -+ } -+ spin_unlock(&_minor_lock); -+ -+ return (i < MAX_DEVICES) ? i : -EBUSY; -+} -+ -+static struct mapped_device *get_kdev(kdev_t dev) -+{ -+ struct mapped_device *md; -+ -+ if (major(dev) != _major) -+ return NULL; -+ -+ spin_lock(&_minor_lock); -+ md = _mds[minor(dev)]; -+ if (md) -+ dm_get(md); -+ spin_unlock(&_minor_lock); -+ -+ return md; -+} -+ -+/* -+ * Allocate and initialise a blank device with a given minor. -+ */ -+static struct mapped_device *alloc_dev(int minor) -+{ -+ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); -+ -+ if (!md) { -+ DMWARN("unable to allocate device, out of memory."); -+ return NULL; -+ } -+ -+ /* get a minor number for the dev */ -+ minor = (minor < 0) ? next_free_minor(md) : specific_minor(minor, md); -+ if (minor < 0) { -+ kfree(md); -+ return NULL; -+ } -+ -+ memset(md, 0, sizeof(*md)); -+ md->dev = mk_kdev(_major, minor); -+ init_rwsem(&md->lock); -+ atomic_set(&md->holders, 1); -+ atomic_set(&md->pending, 0); -+ init_waitqueue_head(&md->wait); -+ -+ return md; -+} -+ -+static void free_dev(struct mapped_device *md) -+{ -+ free_minor(minor(md->dev)); -+ kfree(md); -+} -+ -+/* -+ * The hardsect size for a mapped device is the largest hardsect size -+ * from the devices it maps onto. -+ */ -+static int __find_hardsect_size(struct list_head *devices) -+{ -+ int result = 512, size; -+ struct list_head *tmp; -+ -+ list_for_each(tmp, devices) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ size = get_hardsect_size(dd->dev); -+ if (size > result) -+ result = size; -+ } -+ -+ return result; -+} -+ -+/* -+ * Bind a table to the device. -+ */ -+static int __bind(struct mapped_device *md, struct dm_table *t) -+{ -+ int minor = minor(md->dev); -+ md->map = t; -+ -+ /* in k */ -+ _block_size[minor] = dm_table_get_size(t) >> 1; -+ _blksize_size[minor] = BLOCK_SIZE; -+ _hardsect_size[minor] = __find_hardsect_size(dm_table_get_devices(t)); -+ register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]); -+ -+ dm_table_get(t); -+ return 0; -+} -+ -+static void __unbind(struct mapped_device *md) -+{ -+ int minor = minor(md->dev); -+ -+ dm_table_put(md->map); -+ md->map = NULL; -+ -+ _block_size[minor] = 0; -+ _blksize_size[minor] = 0; -+ _hardsect_size[minor] = 0; -+} -+ -+/* -+ * Constructor for a new device. -+ */ -+int dm_create(int minor, struct dm_table *table, struct mapped_device **result) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ md = alloc_dev(minor); -+ if (!md) -+ return -ENXIO; -+ -+ r = __bind(md, table); -+ if (r) { -+ free_dev(md); -+ return r; -+ } -+ -+ *result = md; -+ return 0; -+} -+ -+void dm_get(struct mapped_device *md) -+{ -+ atomic_inc(&md->holders); -+} -+ -+void dm_put(struct mapped_device *md) -+{ -+ if (atomic_dec_and_test(&md->holders)) { -+ __unbind(md); -+ free_dev(md); -+ } -+} -+ -+/* -+ * Requeue the deferred io by calling generic_make_request. -+ */ -+static void flush_deferred_io(struct deferred_io *c) -+{ -+ struct deferred_io *n; -+ -+ while (c) { -+ n = c->next; -+ generic_make_request(c->rw, c->bh); -+ free_deferred(c); -+ c = n; -+ } -+} -+ -+/* -+ * Swap in a new table (destroying old one). -+ */ -+int dm_swap_table(struct mapped_device *md, struct dm_table *table) -+{ -+ int r; -+ -+ down_write(&md->lock); -+ -+ /* device must be suspended */ -+ if (!test_bit(DMF_SUSPENDED, &md->flags)) { -+ up_write(&md->lock); -+ return -EPERM; -+ } -+ -+ __unbind(md); -+ r = __bind(md, table); -+ if (r) -+ return r; -+ -+ up_write(&md->lock); -+ return 0; -+} -+ -+/* -+ * We need to be able to change a mapping table under a mounted -+ * filesystem. For example we might want to move some data in -+ * the background. Before the table can be swapped with -+ * dm_bind_table, dm_suspend must be called to flush any in -+ * flight io and ensure that any further io gets deferred. -+ */ -+int dm_suspend(struct mapped_device *md) -+{ -+ DECLARE_WAITQUEUE(wait, current); -+ -+ down_write(&md->lock); -+ -+ /* -+ * First we set the BLOCK_IO flag so no more ios will be -+ * mapped. -+ */ -+ if (test_bit(DMF_BLOCK_IO, &md->flags)) { -+ up_write(&md->lock); -+ return -EINVAL; -+ } -+ -+ set_bit(DMF_BLOCK_IO, &md->flags); -+ up_write(&md->lock); -+ -+ /* -+ * Then we wait for the already mapped ios to -+ * complete. -+ */ -+ down_read(&md->lock); -+ -+ add_wait_queue(&md->wait, &wait); -+ while (1) { -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ if (!atomic_read(&md->pending)) -+ break; -+ -+ schedule(); -+ } -+ -+ current->state = TASK_RUNNING; -+ remove_wait_queue(&md->wait, &wait); -+ up_read(&md->lock); -+ -+ /* set_bit is atomic */ -+ set_bit(DMF_SUSPENDED, &md->flags); -+ -+ return 0; -+} -+ -+int dm_resume(struct mapped_device *md) -+{ -+ struct deferred_io *def; -+ -+ down_write(&md->lock); -+ if (!test_bit(DMF_SUSPENDED, &md->flags) || -+ !dm_table_get_size(md->map)) { -+ up_write(&md->lock); -+ return -EINVAL; -+ } -+ -+ clear_bit(DMF_SUSPENDED, &md->flags); -+ clear_bit(DMF_BLOCK_IO, &md->flags); -+ def = md->deferred; -+ md->deferred = NULL; -+ up_write(&md->lock); -+ -+ flush_deferred_io(def); -+ run_task_queue(&tq_disk); -+ -+ return 0; -+} -+ -+struct dm_table *dm_get_table(struct mapped_device *md) -+{ -+ struct dm_table *t; -+ -+ down_read(&md->lock); -+ t = md->map; -+ dm_table_get(t); -+ up_read(&md->lock); -+ -+ return t; -+} -+ -+kdev_t dm_kdev(struct mapped_device *md) -+{ -+ kdev_t dev; -+ -+ down_read(&md->lock); -+ dev = md->dev; -+ up_read(&md->lock); -+ -+ return dev; -+} -+ -+int dm_suspended(struct mapped_device *md) -+{ -+ return test_bit(DMF_SUSPENDED, &md->flags); -+} -+ -+struct block_device_operations dm_blk_dops = { -+ .open = dm_blk_open, -+ .release = dm_blk_close, -+ .ioctl = dm_blk_ioctl, -+ .owner = THIS_MODULE -+}; -+ -+/* -+ * module hooks -+ */ -+module_init(dm_init); -+module_exit(dm_exit); -+ -+MODULE_PARM(major, "i"); -+MODULE_PARM_DESC(major, "The major number of the device mapper"); -+MODULE_DESCRIPTION(DM_NAME " driver"); -+MODULE_AUTHOR("Joe Thornber "); -+MODULE_LICENSE("GPL"); -diff -ruN linux-2.4.19/drivers/md/dm.h linux-2.4.19-dm/drivers/md/dm.h ---- linux-2.4.19/drivers/md/dm.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm.h Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,150 @@ -+/* -+ * Internal header file for device mapper -+ * -+ * Copyright (C) 2001, 2002 Sistina Software -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef DM_INTERNAL_H -+#define DM_INTERNAL_H -+ -+#include -+#include -+#include -+#include -+ -+#define DM_NAME "device-mapper" -+#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) -+#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x) -+#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x) -+ -+/* -+ * FIXME: I think this should be with the definition of sector_t -+ * in types.h. -+ */ -+#ifdef CONFIG_LBD -+#define SECTOR_FORMAT "%Lu" -+#else -+#define SECTOR_FORMAT "%lu" -+#endif -+ -+extern struct block_device_operations dm_blk_dops; -+ -+/* -+ * List of devices that a metadevice uses and should open/close. -+ */ -+struct dm_dev { -+ struct list_head list; -+ -+ atomic_t count; -+ int mode; -+ kdev_t dev; -+ struct block_device *bdev; -+}; -+ -+struct dm_table; -+struct mapped_device; -+ -+/*----------------------------------------------------------------- -+ * Functions for manipulating a struct mapped_device. -+ * Drop the reference with dm_put when you finish with the object. -+ *---------------------------------------------------------------*/ -+int dm_create(int minor, struct dm_table *table, struct mapped_device **md); -+ -+/* -+ * Reference counting for md. -+ */ -+void dm_get(struct mapped_device *md); -+void dm_put(struct mapped_device *md); -+ -+/* -+ * A device can still be used while suspended, but I/O is deferred. -+ */ -+int dm_suspend(struct mapped_device *md); -+int dm_resume(struct mapped_device *md); -+ -+/* -+ * The device must be suspended before calling this method. -+ */ -+int dm_swap_table(struct mapped_device *md, struct dm_table *t); -+ -+/* -+ * Drop a reference on the table when you've finished with the -+ * result. -+ */ -+struct dm_table *dm_get_table(struct mapped_device *md); -+ -+/* -+ * Info functions. -+ */ -+kdev_t dm_kdev(struct mapped_device *md); -+int dm_suspended(struct mapped_device *md); -+ -+/*----------------------------------------------------------------- -+ * Functions for manipulating a table. Tables are also reference -+ * counted. -+ *---------------------------------------------------------------*/ -+int dm_table_create(struct dm_table **result, int mode); -+ -+void dm_table_get(struct dm_table *t); -+void dm_table_put(struct dm_table *t); -+ -+int dm_table_add_target(struct dm_table *t, const char *type, -+ sector_t start, sector_t len, char *params); -+int dm_table_complete(struct dm_table *t); -+void dm_table_event(struct dm_table *t); -+sector_t dm_table_get_size(struct dm_table *t); -+struct dm_target *dm_table_get_target(struct dm_table *t, int index); -+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); -+unsigned int dm_table_get_num_targets(struct dm_table *t); -+struct list_head *dm_table_get_devices(struct dm_table *t); -+int dm_table_get_mode(struct dm_table *t); -+void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq); -+ -+/*----------------------------------------------------------------- -+ * A registry of target types. -+ *---------------------------------------------------------------*/ -+int dm_target_init(void); -+void dm_target_exit(void); -+struct target_type *dm_get_target_type(const char *name); -+void dm_put_target_type(struct target_type *t); -+ -+/*----------------------------------------------------------------- -+ * Useful inlines. -+ *---------------------------------------------------------------*/ -+static inline int array_too_big(unsigned long fixed, unsigned long obj, -+ unsigned long num) -+{ -+ return (num > (ULONG_MAX - fixed) / obj); -+} -+ -+/* -+ * ceiling(n / size) * size -+ */ -+static inline unsigned long dm_round_up(unsigned long n, unsigned long size) -+{ -+ unsigned long r = n % size; -+ return n + (r ? (size - r) : 0); -+} -+ -+/* -+ * The device-mapper can be driven through one of two interfaces; -+ * ioctl or filesystem, depending which patch you have applied. -+ */ -+int dm_interface_init(void); -+void dm_interface_exit(void); -+ -+/* -+ * Targets for linear and striped mappings -+ */ -+int dm_linear_init(void); -+void dm_linear_exit(void); -+ -+int dm_stripe_init(void); -+void dm_stripe_exit(void); -+ -+int dm_snapshot_init(void); -+void dm_snapshot_exit(void); -+ -+#endif -diff -ruN linux-2.4.19/drivers/md/kcopyd.c linux-2.4.19-dm/drivers/md/kcopyd.c ---- linux-2.4.19/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/kcopyd.c Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,843 @@ -+/* -+ * Copyright (C) 2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "kcopyd.h" -+ -+/* FIXME: this is only needed for the DMERR macros */ -+#include "dm.h" -+ -+/* -+ * Hard sector size used all over the kernel. -+ */ -+#define SECTOR_SIZE 512 -+#define SECTOR_SHIFT 9 -+ -+static void wake_kcopyd(void); -+ -+/*----------------------------------------------------------------- -+ * We reserve our own pool of preallocated pages that are -+ * only used for kcopyd io. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * FIXME: This should be configurable. -+ */ -+#define NUM_PAGES 512 -+ -+static DECLARE_MUTEX(_pages_lock); -+static int _num_free_pages; -+static struct page *_pages_array[NUM_PAGES]; -+static DECLARE_MUTEX(start_lock); -+ -+static int init_pages(void) -+{ -+ int i; -+ struct page *p; -+ -+ for (i = 0; i < NUM_PAGES; i++) { -+ p = alloc_page(GFP_KERNEL); -+ if (!p) -+ goto bad; -+ -+ LockPage(p); -+ _pages_array[i] = p; -+ } -+ -+ _num_free_pages = NUM_PAGES; -+ return 0; -+ -+ bad: -+ while (i--) -+ __free_page(_pages_array[i]); -+ return -ENOMEM; -+} -+ -+static void exit_pages(void) -+{ -+ int i; -+ struct page *p; -+ -+ for (i = 0; i < NUM_PAGES; i++) { -+ p = _pages_array[i]; -+ UnlockPage(p); -+ __free_page(p); -+ } -+ -+ _num_free_pages = 0; -+} -+ -+static int kcopyd_get_pages(int num, struct page **result) -+{ -+ int i; -+ -+ down(&_pages_lock); -+ if (_num_free_pages < num) { -+ up(&_pages_lock); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < num; i++) { -+ _num_free_pages--; -+ result[i] = _pages_array[_num_free_pages]; -+ } -+ up(&_pages_lock); -+ -+ return 0; -+} -+ -+static void kcopyd_free_pages(int num, struct page **result) -+{ -+ int i; -+ -+ down(&_pages_lock); -+ for (i = 0; i < num; i++) -+ _pages_array[_num_free_pages++] = result[i]; -+ up(&_pages_lock); -+} -+ -+/*----------------------------------------------------------------- -+ * We keep our own private pool of buffer_heads. These are just -+ * held in a list on the b_reqnext field. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * Make sure we have enough buffers to always keep the pages -+ * occupied. So we assume the worst case scenario where blocks -+ * are the size of a single sector. -+ */ -+#define NUM_BUFFERS NUM_PAGES * (PAGE_SIZE / SECTOR_SIZE) -+ -+static spinlock_t _buffer_lock = SPIN_LOCK_UNLOCKED; -+static struct buffer_head *_all_buffers; -+static struct buffer_head *_free_buffers; -+ -+static int init_buffers(void) -+{ -+ int i; -+ struct buffer_head *buffers; -+ -+ buffers = vcalloc(NUM_BUFFERS, sizeof(struct buffer_head)); -+ if (!buffers) { -+ DMWARN("Couldn't allocate buffer heads."); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < NUM_BUFFERS; i++) { -+ if (i < NUM_BUFFERS - 1) -+ buffers[i].b_reqnext = &buffers[i + 1]; -+ init_waitqueue_head(&buffers[i].b_wait); -+ INIT_LIST_HEAD(&buffers[i].b_inode_buffers); -+ } -+ -+ _all_buffers = _free_buffers = buffers; -+ return 0; -+} -+ -+static void exit_buffers(void) -+{ -+ vfree(_all_buffers); -+} -+ -+static struct buffer_head *alloc_buffer(void) -+{ -+ struct buffer_head *r; -+ int flags; -+ -+ spin_lock_irqsave(&_buffer_lock, flags); -+ -+ if (!_free_buffers) -+ r = NULL; -+ else { -+ r = _free_buffers; -+ _free_buffers = _free_buffers->b_reqnext; -+ r->b_reqnext = NULL; -+ } -+ -+ spin_unlock_irqrestore(&_buffer_lock, flags); -+ -+ return r; -+} -+ -+/* -+ * Only called from interrupt context. -+ */ -+static void free_buffer(struct buffer_head *bh) -+{ -+ int flags, was_empty; -+ -+ spin_lock_irqsave(&_buffer_lock, flags); -+ was_empty = (_free_buffers == NULL) ? 1 : 0; -+ bh->b_reqnext = _free_buffers; -+ _free_buffers = bh; -+ spin_unlock_irqrestore(&_buffer_lock, flags); -+ -+ /* -+ * If the buffer list was empty then kcopyd probably went -+ * to sleep because it ran out of buffer heads, so let's -+ * wake it up. -+ */ -+ if (was_empty) -+ wake_kcopyd(); -+} -+ -+/*----------------------------------------------------------------- -+ * kcopyd_jobs need to be allocated by the *clients* of kcopyd, -+ * for this reason we use a mempool to prevent the client from -+ * ever having to do io (which could cause a -+ * deadlock). -+ *---------------------------------------------------------------*/ -+#define MIN_JOBS NUM_PAGES -+ -+static kmem_cache_t *_job_cache = NULL; -+static mempool_t *_job_pool = NULL; -+ -+/* -+ * We maintain three lists of jobs: -+ * -+ * i) jobs waiting for pages -+ * ii) jobs that have pages, and are waiting for the io to be issued. -+ * iii) jobs that have completed. -+ * -+ * All three of these are protected by job_lock. -+ */ -+ -+static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED; -+ -+static LIST_HEAD(_complete_jobs); -+static LIST_HEAD(_io_jobs); -+static LIST_HEAD(_pages_jobs); -+ -+static int init_jobs(void) -+{ -+ INIT_LIST_HEAD(&_complete_jobs); -+ INIT_LIST_HEAD(&_io_jobs); -+ INIT_LIST_HEAD(&_pages_jobs); -+ -+ _job_cache = kmem_cache_create("kcopyd-jobs", sizeof(struct kcopyd_job), -+ __alignof__(struct kcopyd_job), -+ 0, NULL, NULL); -+ if (!_job_cache) -+ return -ENOMEM; -+ -+ _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab, -+ mempool_free_slab, _job_cache); -+ if (!_job_pool) { -+ kmem_cache_destroy(_job_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static void exit_jobs(void) -+{ -+ mempool_destroy(_job_pool); -+ kmem_cache_destroy(_job_cache); -+} -+ -+struct kcopyd_job *kcopyd_alloc_job(void) -+{ -+ struct kcopyd_job *job; -+ -+ job = mempool_alloc(_job_pool, GFP_NOIO); -+ if (!job) -+ return NULL; -+ -+ memset(job, 0, sizeof(*job)); -+ return job; -+} -+ -+void kcopyd_free_job(struct kcopyd_job *job) -+{ -+ mempool_free(job, _job_pool); -+} -+ -+/* -+ * Functions to push and pop a job onto the head of a given job -+ * list. -+ */ -+static inline struct kcopyd_job *pop(struct list_head *jobs) -+{ -+ struct kcopyd_job *job = NULL; -+ int flags; -+ -+ spin_lock_irqsave(&_job_lock, flags); -+ -+ if (!list_empty(jobs)) { -+ job = list_entry(jobs->next, struct kcopyd_job, list); -+ list_del(&job->list); -+ } -+ spin_unlock_irqrestore(&_job_lock, flags); -+ -+ return job; -+} -+ -+static inline void push(struct list_head *jobs, struct kcopyd_job *job) -+{ -+ int flags; -+ -+ spin_lock_irqsave(&_job_lock, flags); -+ list_add(&job->list, jobs); -+ spin_unlock_irqrestore(&_job_lock, flags); -+} -+ -+/* -+ * Completion function for one of our buffers. -+ */ -+static void end_bh(struct buffer_head *bh, int uptodate) -+{ -+ struct kcopyd_job *job = bh->b_private; -+ -+ mark_buffer_uptodate(bh, uptodate); -+ unlock_buffer(bh); -+ -+ if (!uptodate) -+ job->err = -EIO; -+ -+ /* are we the last ? */ -+ if (atomic_dec_and_test(&job->nr_incomplete)) { -+ push(&_complete_jobs, job); -+ wake_kcopyd(); -+ } -+ -+ free_buffer(bh); -+} -+ -+static void dispatch_bh(struct kcopyd_job *job, -+ struct buffer_head *bh, int block) -+{ -+ int p; -+ -+ /* -+ * Add in the job offset -+ */ -+ bh->b_blocknr = (job->disk.sector >> job->block_shift) + block; -+ -+ p = block >> job->bpp_shift; -+ block &= job->bpp_mask; -+ -+ bh->b_dev = B_FREE; -+ bh->b_size = job->block_size; -+ set_bh_page(bh, job->pages[p], ((block << job->block_shift) + -+ job->offset) << SECTOR_SHIFT); -+ bh->b_this_page = bh; -+ -+ init_buffer(bh, end_bh, job); -+ -+ bh->b_dev = job->disk.dev; -+ bh->b_state = ((1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req)); -+ -+ set_bit(BH_Uptodate, &bh->b_state); -+ if (job->rw == WRITE) -+ clear_bit(BH_Dirty, &bh->b_state); -+ -+ submit_bh(job->rw, bh); -+} -+ -+/* -+ * These three functions process 1 item from the corresponding -+ * job list. -+ * -+ * They return: -+ * < 0: error -+ * 0: success -+ * > 0: can't process yet. -+ */ -+static int run_complete_job(struct kcopyd_job *job) -+{ -+ job->callback(job); -+ return 0; -+} -+ -+/* -+ * Request io on as many buffer heads as we can currently get for -+ * a particular job. -+ */ -+static int run_io_job(struct kcopyd_job *job) -+{ -+ unsigned int block; -+ struct buffer_head *bh; -+ -+ for (block = atomic_read(&job->nr_requested); -+ block < job->nr_blocks; block++) { -+ bh = alloc_buffer(); -+ if (!bh) -+ break; -+ -+ atomic_inc(&job->nr_requested); -+ dispatch_bh(job, bh, block); -+ } -+ -+ return (block == job->nr_blocks) ? 0 : 1; -+} -+ -+static int run_pages_job(struct kcopyd_job *job) -+{ -+ int r; -+ -+ job->nr_pages = (job->disk.count + job->offset) / -+ (PAGE_SIZE / SECTOR_SIZE); -+ r = kcopyd_get_pages(job->nr_pages, job->pages); -+ -+ if (!r) { -+ /* this job is ready for io */ -+ push(&_io_jobs, job); -+ return 0; -+ } -+ -+ if (r == -ENOMEM) -+ /* can complete now */ -+ return 1; -+ -+ return r; -+} -+ -+/* -+ * Run through a list for as long as possible. Returns the count -+ * of successful jobs. -+ */ -+static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) -+{ -+ struct kcopyd_job *job; -+ int r, count = 0; -+ -+ while ((job = pop(jobs))) { -+ -+ r = fn(job); -+ -+ if (r < 0) { -+ /* error this rogue job */ -+ job->err = r; -+ push(&_complete_jobs, job); -+ break; -+ } -+ -+ if (r > 0) { -+ /* -+ * We couldn't service this job ATM, so -+ * push this job back onto the list. -+ */ -+ push(jobs, job); -+ break; -+ } -+ -+ count++; -+ } -+ -+ return count; -+} -+ -+/* -+ * kcopyd does this every time it's woken up. -+ */ -+static void do_work(void) -+{ -+ int count; -+ -+ /* -+ * We loop round until there is no more work to do. -+ */ -+ do { -+ count = process_jobs(&_complete_jobs, run_complete_job); -+ count += process_jobs(&_io_jobs, run_io_job); -+ count += process_jobs(&_pages_jobs, run_pages_job); -+ -+ } while (count); -+ -+ run_task_queue(&tq_disk); -+} -+ -+/*----------------------------------------------------------------- -+ * The daemon -+ *---------------------------------------------------------------*/ -+static atomic_t _kcopyd_must_die; -+static DECLARE_MUTEX(_run_lock); -+static DECLARE_WAIT_QUEUE_HEAD(_job_queue); -+ -+static int kcopyd(void *arg) -+{ -+ DECLARE_WAITQUEUE(wq, current); -+ -+ daemonize(); -+ strcpy(current->comm, "kcopyd"); -+ atomic_set(&_kcopyd_must_die, 0); -+ -+ add_wait_queue(&_job_queue, &wq); -+ -+ down(&_run_lock); -+ up(&start_lock); -+ -+ while (1) { -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ if (atomic_read(&_kcopyd_must_die)) -+ break; -+ -+ do_work(); -+ schedule(); -+ } -+ -+ set_current_state(TASK_RUNNING); -+ remove_wait_queue(&_job_queue, &wq); -+ -+ up(&_run_lock); -+ -+ return 0; -+} -+ -+static int start_daemon(void) -+{ -+ static pid_t pid = 0; -+ -+ down(&start_lock); -+ -+ pid = kernel_thread(kcopyd, NULL, 0); -+ if (pid <= 0) { -+ DMERR("Failed to start kcopyd thread"); -+ return -EAGAIN; -+ } -+ -+ /* -+ * wait for the daemon to up this mutex. -+ */ -+ down(&start_lock); -+ up(&start_lock); -+ -+ return 0; -+} -+ -+static int stop_daemon(void) -+{ -+ atomic_set(&_kcopyd_must_die, 1); -+ wake_kcopyd(); -+ down(&_run_lock); -+ up(&_run_lock); -+ -+ return 0; -+} -+ -+static void wake_kcopyd(void) -+{ -+ wake_up_interruptible(&_job_queue); -+} -+ -+static int calc_shift(unsigned int n) -+{ -+ int s; -+ -+ for (s = 0; n; s++, n >>= 1) -+ ; -+ -+ return --s; -+} -+ -+static void calc_block_sizes(struct kcopyd_job *job) -+{ -+ job->block_size = get_hardsect_size(job->disk.dev); -+ job->block_shift = calc_shift(job->block_size / SECTOR_SIZE); -+ job->bpp_shift = PAGE_SHIFT - job->block_shift - SECTOR_SHIFT; -+ job->bpp_mask = (1 << job->bpp_shift) - 1; -+ job->nr_blocks = job->disk.count >> job->block_shift; -+ atomic_set(&job->nr_requested, 0); -+ atomic_set(&job->nr_incomplete, job->nr_blocks); -+} -+ -+int kcopyd_io(struct kcopyd_job *job) -+{ -+ calc_block_sizes(job); -+ push(job->pages[0] ? &_io_jobs : &_pages_jobs, job); -+ wake_kcopyd(); -+ return 0; -+} -+ -+/*----------------------------------------------------------------- -+ * The copier is implemented on top of the simpler async io -+ * daemon above. -+ *---------------------------------------------------------------*/ -+struct copy_info { -+ kcopyd_notify_fn notify; -+ void *notify_context; -+ -+ struct kcopyd_region to; -+}; -+ -+#define MIN_INFOS 128 -+static kmem_cache_t *_copy_cache = NULL; -+static mempool_t *_copy_pool = NULL; -+ -+static int init_copier(void) -+{ -+ _copy_cache = kmem_cache_create("kcopyd-info", -+ sizeof(struct copy_info), -+ __alignof__(struct copy_info), -+ 0, NULL, NULL); -+ if (!_copy_cache) -+ return -ENOMEM; -+ -+ _copy_pool = mempool_create(MIN_INFOS, mempool_alloc_slab, -+ mempool_free_slab, _copy_cache); -+ if (!_copy_pool) { -+ kmem_cache_destroy(_copy_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static void exit_copier(void) -+{ -+ if (_copy_pool) -+ mempool_destroy(_copy_pool); -+ -+ if (_copy_cache) -+ kmem_cache_destroy(_copy_cache); -+} -+ -+static inline struct copy_info *alloc_copy_info(void) -+{ -+ return mempool_alloc(_copy_pool, GFP_NOIO); -+} -+ -+static inline void free_copy_info(struct copy_info *info) -+{ -+ mempool_free(info, _copy_pool); -+} -+ -+void copy_complete(struct kcopyd_job *job) -+{ -+ struct copy_info *info = (struct copy_info *) job->context; -+ -+ if (info->notify) -+ info->notify(job->err, info->notify_context); -+ -+ free_copy_info(info); -+ -+ kcopyd_free_pages(job->nr_pages, job->pages); -+ -+ kcopyd_free_job(job); -+} -+ -+static void page_write_complete(struct kcopyd_job *job) -+{ -+ struct copy_info *info = (struct copy_info *) job->context; -+ int i; -+ -+ if (info->notify) -+ info->notify(job->err, info->notify_context); -+ -+ free_copy_info(info); -+ for (i = 0; i < job->nr_pages; i++) -+ put_page(job->pages[i]); -+ -+ kcopyd_free_job(job); -+} -+ -+/* -+ * These callback functions implement the state machine that copies regions. -+ */ -+void copy_write(struct kcopyd_job *job) -+{ -+ struct copy_info *info = (struct copy_info *) job->context; -+ -+ if (job->err && info->notify) { -+ info->notify(job->err, job->context); -+ kcopyd_free_job(job); -+ free_copy_info(info); -+ return; -+ } -+ -+ job->rw = WRITE; -+ memcpy(&job->disk, &info->to, sizeof(job->disk)); -+ job->callback = copy_complete; -+ job->context = info; -+ -+ /* -+ * Queue the write. -+ */ -+ kcopyd_io(job); -+} -+ -+int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages, -+ struct page **pages, int offset, kcopyd_notify_fn fn, -+ void *context) -+{ -+ struct copy_info *info; -+ struct kcopyd_job *job; -+ int i; -+ -+ /* -+ * Allocate a new copy_info. -+ */ -+ info = alloc_copy_info(); -+ if (!info) -+ return -ENOMEM; -+ -+ job = kcopyd_alloc_job(); -+ if (!job) { -+ free_copy_info(info); -+ return -ENOMEM; -+ } -+ -+ /* -+ * set up for the write. -+ */ -+ info->notify = fn; -+ info->notify_context = context; -+ memcpy(&info->to, to, sizeof(*to)); -+ -+ /* Get the pages */ -+ job->nr_pages = nr_pages; -+ for (i = 0; i < nr_pages; i++) { -+ get_page(pages[i]); -+ job->pages[i] = pages[i]; -+ } -+ -+ job->rw = WRITE; -+ -+ memcpy(&job->disk, &info->to, sizeof(job->disk)); -+ job->offset = offset; -+ calc_block_sizes(job); -+ job->callback = page_write_complete; -+ job->context = info; -+ -+ /* -+ * Trigger job. -+ */ -+ kcopyd_io(job); -+ return 0; -+} -+ -+int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to, -+ kcopyd_notify_fn fn, void *context) -+{ -+ struct copy_info *info; -+ struct kcopyd_job *job; -+ -+ /* -+ * Allocate a new copy_info. -+ */ -+ info = alloc_copy_info(); -+ if (!info) -+ return -ENOMEM; -+ -+ job = kcopyd_alloc_job(); -+ if (!job) { -+ free_copy_info(info); -+ return -ENOMEM; -+ } -+ -+ /* -+ * set up for the read. -+ */ -+ info->notify = fn; -+ info->notify_context = context; -+ memcpy(&info->to, to, sizeof(*to)); -+ -+ job->rw = READ; -+ memcpy(&job->disk, from, sizeof(*from)); -+ -+ job->offset = 0; -+ calc_block_sizes(job); -+ job->callback = copy_write; -+ job->context = info; -+ -+ /* -+ * Trigger job. -+ */ -+ kcopyd_io(job); -+ return 0; -+} -+ -+/*----------------------------------------------------------------- -+ * Unit setup -+ *---------------------------------------------------------------*/ -+static struct { -+ int (*init) (void); -+ void (*exit) (void); -+ -+} _inits[] = { -+#define xx(n) { init_ ## n, exit_ ## n} -+ xx(pages), -+ xx(buffers), -+ xx(jobs), -+ xx(copier) -+#undef xx -+}; -+ -+static int _client_count = 0; -+static DECLARE_MUTEX(_client_count_sem); -+ -+static int kcopyd_init(void) -+{ -+ const int count = sizeof(_inits) / sizeof(*_inits); -+ -+ int r, i; -+ -+ for (i = 0; i < count; i++) { -+ r = _inits[i].init(); -+ if (r) -+ goto bad; -+ } -+ -+ start_daemon(); -+ return 0; -+ -+ bad: -+ while (i--) -+ _inits[i].exit(); -+ -+ return r; -+} -+ -+static void kcopyd_exit(void) -+{ -+ int i = sizeof(_inits) / sizeof(*_inits); -+ -+ if (stop_daemon()) -+ DMWARN("Couldn't stop kcopyd."); -+ -+ while (i--) -+ _inits[i].exit(); -+} -+ -+void kcopyd_inc_client_count(void) -+{ -+ /* -+ * What I need here is an atomic_test_and_inc that returns -+ * the previous value of the atomic... In its absence I lock -+ * an int with a semaphore. :-( -+ */ -+ down(&_client_count_sem); -+ if (_client_count == 0) -+ kcopyd_init(); -+ _client_count++; -+ -+ up(&_client_count_sem); -+} -+ -+void kcopyd_dec_client_count(void) -+{ -+ down(&_client_count_sem); -+ if (--_client_count == 0) -+ kcopyd_exit(); -+ -+ up(&_client_count_sem); -+} -diff -ruN linux-2.4.19/drivers/md/kcopyd.h linux-2.4.19-dm/drivers/md/kcopyd.h ---- linux-2.4.19/drivers/md/kcopyd.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/kcopyd.h Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,101 @@ -+/* -+ * Copyright (C) 2001 Sistina Software -+ * -+ * This file is released under the GPL. -+ */ -+ -+#ifndef DM_KCOPYD_H -+#define DM_KCOPYD_H -+ -+/* -+ * Needed for the definition of offset_t. -+ */ -+#include -+#include -+ -+struct kcopyd_region { -+ kdev_t dev; -+ sector_t sector; -+ sector_t count; -+}; -+ -+#define MAX_KCOPYD_PAGES 128 -+ -+struct kcopyd_job { -+ struct list_head list; -+ -+ /* -+ * Error state of the job. -+ */ -+ int err; -+ -+ /* -+ * Either READ or WRITE -+ */ -+ int rw; -+ -+ /* -+ * The source or destination for the transfer. -+ */ -+ struct kcopyd_region disk; -+ -+ int nr_pages; -+ struct page *pages[MAX_KCOPYD_PAGES]; -+ -+ /* -+ * Shifts and masks that will be useful when dispatching -+ * each buffer_head. -+ */ -+ sector_t offset; -+ sector_t block_size; -+ sector_t block_shift; -+ sector_t bpp_shift; /* blocks per page */ -+ sector_t bpp_mask; -+ -+ /* -+ * nr_blocks is how many buffer heads will have to be -+ * displatched to service this job, nr_requested is how -+ * many have been dispatched and nr_complete is how many -+ * have come back. -+ */ -+ unsigned int nr_blocks; -+ atomic_t nr_requested; -+ atomic_t nr_incomplete; -+ -+ /* -+ * Set this to ensure you are notified when the job has -+ * completed. 'context' is for callback to use. -+ */ -+ void (*callback) (struct kcopyd_job *job); -+ void *context; -+}; -+ -+/* -+ * Low level async io routines. -+ */ -+struct kcopyd_job *kcopyd_alloc_job(void); -+void kcopyd_free_job(struct kcopyd_job *job); -+ -+int kcopyd_queue_job(struct kcopyd_job *job); -+ -+/* -+ * Submit a copy job to kcopyd. This is built on top of the -+ * previous three fns. -+ */ -+typedef void (*kcopyd_notify_fn) (int err, void *context); -+ -+int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to, -+ kcopyd_notify_fn fn, void *context); -+ -+int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages, -+ struct page **pages, int offset, kcopyd_notify_fn fn, -+ void *context); -+ -+/* -+ * We only want kcopyd to reserve resources if someone is -+ * actually using it. -+ */ -+void kcopyd_inc_client_count(void); -+void kcopyd_dec_client_count(void); -+ -+#endif -diff -ruN linux-2.4.19/include/linux/device-mapper.h linux-2.4.19-dm/include/linux/device-mapper.h ---- linux-2.4.19/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/include/linux/device-mapper.h Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,85 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef _LINUX_DEVICE_MAPPER_H -+#define _LINUX_DEVICE_MAPPER_H -+ -+#ifdef __KERNEL__ -+ -+typedef unsigned long sector_t; -+ -+struct dm_target; -+struct dm_table; -+struct dm_dev; -+ -+typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; -+ -+/* -+ * In the constructor the target parameter will already have the -+ * table, type, begin and len fields filled in. -+ */ -+typedef int (*dm_ctr_fn) (struct dm_target *target, int argc, char **argv); -+ -+/* -+ * The destructor doesn't need to free the dm_target, just -+ * anything hidden ti->private. -+ */ -+typedef void (*dm_dtr_fn) (struct dm_target *ti); -+ -+/* -+ * The map function must return: -+ * < 0: error -+ * = 0: The target will handle the io by resubmitting it later -+ * > 0: simple remap complete -+ */ -+typedef int (*dm_map_fn) (struct dm_target *ti, struct buffer_head *bh, int rw); -+typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, -+ char *result, int maxlen); -+ -+void dm_error(const char *message); -+ -+/* -+ * Constructors should call these functions to ensure destination devices -+ * are opened/closed correctly. -+ * FIXME: too many arguments. -+ */ -+int dm_get_device(struct dm_target *ti, const char *path, sector_t start, -+ sector_t len, int mode, struct dm_dev **result); -+void dm_put_device(struct dm_target *ti, struct dm_dev *d); -+ -+/* -+ * Information about a target type -+ */ -+struct target_type { -+ const char *name; -+ struct module *module; -+ dm_ctr_fn ctr; -+ dm_dtr_fn dtr; -+ dm_map_fn map; -+ dm_status_fn status; -+}; -+ -+struct dm_target { -+ struct dm_table *table; -+ struct target_type *type; -+ -+ /* target limits */ -+ sector_t begin; -+ sector_t len; -+ -+ /* target specific data */ -+ void *private; -+ -+ /* Used to provide an error string from the ctr */ -+ char *error; -+}; -+ -+int dm_register_target(struct target_type *t); -+int dm_unregister_target(struct target_type *t); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _LINUX_DEVICE_MAPPER_H */ -diff -ruN linux-2.4.19/include/linux/dm-ioctl.h linux-2.4.19-dm/include/linux/dm-ioctl.h ---- linux-2.4.19/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/include/linux/dm-ioctl.h Thu Nov 14 13:50:32 2002 -@@ -0,0 +1,149 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef _LINUX_DM_IOCTL_H -+#define _LINUX_DM_IOCTL_H -+ -+#include -+ -+#define DM_DIR "mapper" /* Slashes not supported */ -+#define DM_MAX_TYPE_NAME 16 -+#define DM_NAME_LEN 128 -+#define DM_UUID_LEN 129 -+ -+/* -+ * Implements a traditional ioctl interface to the device mapper. -+ */ -+ -+/* -+ * All ioctl arguments consist of a single chunk of memory, with -+ * this structure at the start. If a uuid is specified any -+ * lookup (eg. for a DM_INFO) will be done on that, *not* the -+ * name. -+ */ -+struct dm_ioctl { -+ /* -+ * The version number is made up of three parts: -+ * major - no backward or forward compatibility, -+ * minor - only backwards compatible, -+ * patch - both backwards and forwards compatible. -+ * -+ * All clients of the ioctl interface should fill in the -+ * version number of the interface that they were -+ * compiled with. -+ * -+ * All recognised ioctl commands (ie. those that don't -+ * return -ENOTTY) fill out this field, even if the -+ * command failed. -+ */ -+ uint32_t version[3]; /* in/out */ -+ uint32_t data_size; /* total size of data passed in -+ * including this struct */ -+ -+ uint32_t data_start; /* offset to start of data -+ * relative to start of this struct */ -+ -+ uint32_t target_count; /* in/out */ -+ uint32_t open_count; /* out */ -+ uint32_t flags; /* in/out */ -+ -+ __kernel_dev_t dev; /* in/out */ -+ -+ char name[DM_NAME_LEN]; /* device name */ -+ char uuid[DM_UUID_LEN]; /* unique identifier for -+ * the block device */ -+}; -+ -+/* -+ * Used to specify tables. These structures appear after the -+ * dm_ioctl. -+ */ -+struct dm_target_spec { -+ int32_t status; /* used when reading from kernel only */ -+ uint64_t sector_start; -+ uint32_t length; -+ -+ /* -+ * Offset in bytes (from the start of this struct) to -+ * next target_spec. -+ */ -+ uint32_t next; -+ -+ char target_type[DM_MAX_TYPE_NAME]; -+ -+ /* -+ * Parameter string starts immediately after this object. -+ * Be careful to add padding after string to ensure correct -+ * alignment of subsequent dm_target_spec. -+ */ -+}; -+ -+/* -+ * Used to retrieve the target dependencies. -+ */ -+struct dm_target_deps { -+ uint32_t count; -+ -+ __kernel_dev_t dev[0]; /* out */ -+}; -+ -+/* -+ * If you change this make sure you make the corresponding change -+ * to dm-ioctl.c:lookup_ioctl() -+ */ -+enum { -+ /* Top level cmds */ -+ DM_VERSION_CMD = 0, -+ DM_REMOVE_ALL_CMD, -+ -+ /* device level cmds */ -+ DM_DEV_CREATE_CMD, -+ DM_DEV_REMOVE_CMD, -+ DM_DEV_RELOAD_CMD, -+ DM_DEV_RENAME_CMD, -+ DM_DEV_SUSPEND_CMD, -+ DM_DEV_DEPS_CMD, -+ DM_DEV_STATUS_CMD, -+ -+ /* target level cmds */ -+ DM_TARGET_STATUS_CMD, -+ DM_TARGET_WAIT_CMD -+}; -+ -+#define DM_IOCTL 0xfd -+ -+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) -+#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) -+ -+#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) -+#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl) -+#define DM_DEV_RELOAD _IOWR(DM_IOCTL, DM_DEV_RELOAD_CMD, struct dm_ioctl) -+#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl) -+#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl) -+#define DM_DEV_DEPS _IOWR(DM_IOCTL, DM_DEV_DEPS_CMD, struct dm_ioctl) -+#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) -+ -+#define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl) -+#define DM_TARGET_WAIT _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl) -+ -+#define DM_VERSION_MAJOR 1 -+#define DM_VERSION_MINOR 0 -+#define DM_VERSION_PATCHLEVEL 8 -+#define DM_VERSION_EXTRA "-ioctl (2002-11-21)" -+ -+/* Status bits */ -+#define DM_READONLY_FLAG 0x00000001 -+#define DM_SUSPEND_FLAG 0x00000002 -+#define DM_EXISTS_FLAG 0x00000004 -+#define DM_PERSISTENT_DEV_FLAG 0x00000008 -+ -+/* -+ * Flag passed into ioctl STATUS command to get table information -+ * rather than current status. -+ */ -+#define DM_STATUS_TABLE_FLAG 0x00000010 -+ -+#endif /* _LINUX_DM_IOCTL_H */ diff --git a/patches/common/linux-2.4.19-mempool.patch b/patches/common/linux-2.4.19-mempool.patch deleted file mode 100644 index ca288ac..0000000 --- a/patches/common/linux-2.4.19-mempool.patch +++ /dev/null @@ -1,330 +0,0 @@ -diff -ruN linux-2.4.19/include/linux/mempool.h linux-2.4.19-dm/include/linux/mempool.h ---- linux-2.4.19/include/linux/mempool.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/include/linux/mempool.h Thu Nov 21 14:21:19 2002 -@@ -0,0 +1,23 @@ -+/* -+ * memory buffer pool support -+ */ -+#ifndef _LINUX_MEMPOOL_H -+#define _LINUX_MEMPOOL_H -+ -+#include -+#include -+ -+struct mempool_s; -+typedef struct mempool_s mempool_t; -+ -+typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data); -+typedef void (mempool_free_t)(void *element, void *pool_data); -+ -+extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, -+ mempool_free_t *free_fn, void *pool_data); -+extern int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask); -+extern void mempool_destroy(mempool_t *pool); -+extern void * mempool_alloc(mempool_t *pool, int gfp_mask); -+extern void mempool_free(void *element, mempool_t *pool); -+ -+#endif /* _LINUX_MEMPOOL_H */ -diff -ruN linux-2.4.19/mm/Makefile linux-2.4.19-dm/mm/Makefile ---- linux-2.4.19/mm/Makefile Wed Aug 14 11:52:12 2002 -+++ linux-2.4.19-dm/mm/Makefile Thu Nov 14 13:50:32 2002 -@@ -9,12 +9,12 @@ - - O_TARGET := mm.o - --export-objs := shmem.o filemap.o memory.o page_alloc.o -+export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o - - obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ - page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \ -- shmem.o -+ shmem.o mempool.o - - obj-$(CONFIG_HIGHMEM) += highmem.o - -diff -ruN linux-2.4.19/mm/mempool.c linux-2.4.19-dm/mm/mempool.c ---- linux-2.4.19/mm/mempool.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/mm/mempool.c Thu Nov 21 14:21:19 2002 -@@ -0,0 +1,281 @@ -+/* -+ * linux/mm/mempool.c -+ * -+ * memory buffer pool support. Such pools are mostly used -+ * for guaranteed, deadlock-free memory allocations during -+ * extreme VM load. -+ * -+ * started by Ingo Molnar, Copyright (C) 2001 -+ */ -+ -+#include -+#include -+#include -+#include -+ -+struct mempool_s { -+ spinlock_t lock; -+ int min_nr; /* nr of elements at *elements */ -+ int curr_nr; /* Current nr of elements at *elements */ -+ void **elements; -+ -+ void *pool_data; -+ mempool_alloc_t *alloc; -+ mempool_free_t *free; -+ wait_queue_head_t wait; -+}; -+ -+static void add_element(mempool_t *pool, void *element) -+{ -+ BUG_ON(pool->curr_nr >= pool->min_nr); -+ pool->elements[pool->curr_nr++] = element; -+} -+ -+static void *remove_element(mempool_t *pool) -+{ -+ BUG_ON(pool->curr_nr <= 0); -+ return pool->elements[--pool->curr_nr]; -+} -+ -+static void free_pool(mempool_t *pool) -+{ -+ while (pool->curr_nr) { -+ void *element = remove_element(pool); -+ pool->free(element, pool->pool_data); -+ } -+ kfree(pool->elements); -+ kfree(pool); -+} -+ -+/** -+ * mempool_create - create a memory pool -+ * @min_nr: the minimum number of elements guaranteed to be -+ * allocated for this pool. -+ * @alloc_fn: user-defined element-allocation function. -+ * @free_fn: user-defined element-freeing function. -+ * @pool_data: optional private data available to the user-defined functions. -+ * -+ * this function creates and allocates a guaranteed size, preallocated -+ * memory pool. The pool can be used from the mempool_alloc and mempool_free -+ * functions. This function might sleep. Both the alloc_fn() and the free_fn() -+ * functions might sleep - as long as the mempool_alloc function is not called -+ * from IRQ contexts. -+ */ -+mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, -+ mempool_free_t *free_fn, void *pool_data) -+{ -+ mempool_t *pool; -+ -+ pool = kmalloc(sizeof(*pool), GFP_KERNEL); -+ if (!pool) -+ return NULL; -+ memset(pool, 0, sizeof(*pool)); -+ pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL); -+ if (!pool->elements) { -+ kfree(pool); -+ return NULL; -+ } -+ spin_lock_init(&pool->lock); -+ pool->min_nr = min_nr; -+ pool->pool_data = pool_data; -+ init_waitqueue_head(&pool->wait); -+ pool->alloc = alloc_fn; -+ pool->free = free_fn; -+ -+ /* -+ * First pre-allocate the guaranteed number of buffers. -+ */ -+ while (pool->curr_nr < pool->min_nr) { -+ void *element; -+ -+ element = pool->alloc(GFP_KERNEL, pool->pool_data); -+ if (unlikely(!element)) { -+ free_pool(pool); -+ return NULL; -+ } -+ add_element(pool, element); -+ } -+ return pool; -+} -+ -+/** -+ * mempool_resize - resize an existing memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * @new_min_nr: the new minimum number of elements guaranteed to be -+ * allocated for this pool. -+ * @gfp_mask: the usual allocation bitmask. -+ * -+ * This function shrinks/grows the pool. In the case of growing, -+ * it cannot be guaranteed that the pool will be grown to the new -+ * size immediately, but new mempool_free() calls will refill it. -+ * -+ * Note, the caller must guarantee that no mempool_destroy is called -+ * while this function is running. mempool_alloc() & mempool_free() -+ * might be called (eg. from IRQ contexts) while this function executes. -+ */ -+int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask) -+{ -+ void *element; -+ void **new_elements; -+ unsigned long flags; -+ -+ BUG_ON(new_min_nr <= 0); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ if (new_min_nr < pool->min_nr) { -+ while (pool->curr_nr > new_min_nr) { -+ element = remove_element(pool); -+ spin_unlock_irqrestore(&pool->lock, flags); -+ pool->free(element, pool->pool_data); -+ spin_lock_irqsave(&pool->lock, flags); -+ } -+ pool->min_nr = new_min_nr; -+ goto out_unlock; -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ /* Grow the pool */ -+ new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); -+ if (!new_elements) -+ return -ENOMEM; -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ memcpy(new_elements, pool->elements, -+ pool->curr_nr * sizeof(*new_elements)); -+ kfree(pool->elements); -+ pool->elements = new_elements; -+ pool->min_nr = new_min_nr; -+ -+ while (pool->curr_nr < pool->min_nr) { -+ spin_unlock_irqrestore(&pool->lock, flags); -+ element = pool->alloc(gfp_mask, pool->pool_data); -+ if (!element) -+ goto out; -+ spin_lock_irqsave(&pool->lock, flags); -+ if (pool->curr_nr < pool->min_nr) -+ add_element(pool, element); -+ else -+ kfree(element); /* Raced */ -+ } -+out_unlock: -+ spin_unlock_irqrestore(&pool->lock, flags); -+out: -+ return 0; -+} -+ -+/** -+ * mempool_destroy - deallocate a memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * -+ * this function only sleeps if the free_fn() function sleeps. The caller -+ * has to guarantee that all elements have been returned to the pool (ie: -+ * freed) prior to calling mempool_destroy(). -+ */ -+void mempool_destroy(mempool_t *pool) -+{ -+ if (pool->curr_nr != pool->min_nr) -+ BUG(); /* There were outstanding elements */ -+ free_pool(pool); -+} -+ -+/** -+ * mempool_alloc - allocate an element from a specific memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * @gfp_mask: the usual allocation bitmask. -+ * -+ * this function only sleeps if the alloc_fn function sleeps or -+ * returns NULL. Note that due to preallocation, this function -+ * *never* fails when called from process contexts. (it might -+ * fail if called from an IRQ context.) -+ */ -+void * mempool_alloc(mempool_t *pool, int gfp_mask) -+{ -+ void *element; -+ unsigned long flags; -+ int curr_nr; -+ DECLARE_WAITQUEUE(wait, current); -+ int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); -+ -+repeat_alloc: -+ element = pool->alloc(gfp_nowait, pool->pool_data); -+ if (likely(element != NULL)) -+ return element; -+ -+ /* -+ * If the pool is less than 50% full then try harder -+ * to allocate an element: -+ */ -+ if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) { -+ element = pool->alloc(gfp_mask, pool->pool_data); -+ if (likely(element != NULL)) -+ return element; -+ } -+ -+ /* -+ * Kick the VM at this point. -+ */ -+ wakeup_bdflush(); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ if (likely(pool->curr_nr)) { -+ element = remove_element(pool); -+ spin_unlock_irqrestore(&pool->lock, flags); -+ return element; -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ /* We must not sleep in the GFP_ATOMIC case */ -+ if (gfp_mask == gfp_nowait) -+ return NULL; -+ -+ run_task_queue(&tq_disk); -+ -+ add_wait_queue_exclusive(&pool->wait, &wait); -+ set_task_state(current, TASK_UNINTERRUPTIBLE); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ curr_nr = pool->curr_nr; -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ if (!curr_nr) -+ schedule(); -+ -+ current->state = TASK_RUNNING; -+ remove_wait_queue(&pool->wait, &wait); -+ -+ goto repeat_alloc; -+} -+ -+/** -+ * mempool_free - return an element to the pool. -+ * @element: pool element pointer. -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * -+ * this function only sleeps if the free_fn() function sleeps. -+ */ -+void mempool_free(void *element, mempool_t *pool) -+{ -+ unsigned long flags; -+ -+ if (pool->curr_nr < pool->min_nr) { -+ spin_lock_irqsave(&pool->lock, flags); -+ if (pool->curr_nr < pool->min_nr) { -+ add_element(pool, element); -+ spin_unlock_irqrestore(&pool->lock, flags); -+ wake_up(&pool->wait); -+ return; -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ } -+ pool->free(element, pool->pool_data); -+} -+ -+EXPORT_SYMBOL(mempool_create); -+EXPORT_SYMBOL(mempool_resize); -+EXPORT_SYMBOL(mempool_destroy); -+EXPORT_SYMBOL(mempool_alloc); -+EXPORT_SYMBOL(mempool_free); diff --git a/patches/common/linux-2.4.19-mempool_slab.patch b/patches/common/linux-2.4.19-mempool_slab.patch deleted file mode 100644 index 0794692..0000000 --- a/patches/common/linux-2.4.19-mempool_slab.patch +++ /dev/null @@ -1,46 +0,0 @@ -diff -Nru linux-2.4.19/include/linux/mempool.h linux/include/linux/mempool.h ---- linux-2.4.19/include/linux/mempool.h Wed Dec 31 16:00:00 2001 -+++ linux/include/linux/mempool.h Tue Apr 23 20:55:52 2002 -@@ -19,5 +19,12 @@ - extern void mempool_destroy(mempool_t *pool); - extern void * mempool_alloc(mempool_t *pool, int gfp_mask); - extern void mempool_free(void *element, mempool_t *pool); - -+/* -+ * A mempool_alloc_t and mempool_free_t that get the memory from -+ * a slab that is passed in through pool_data. -+ */ -+void *mempool_alloc_slab(int gfp_mask, void *pool_data); -+void mempool_free_slab(void *element, void *pool_data); -+ - #endif /* _LINUX_MEMPOOL_H */ -diff -Nru linux-2.4.19/mm/mempool.c linux/mm/mempool.c ---- linux-2.4.19/mm/mempool.c Wed Dec 31 16:00:00 1969 -+++ linux/mm/mempool.c Tue Apr 23 20:55:52 2002 -@@ -273,9 +273,26 @@ - } - pool->free(element, pool->pool_data); - } - -+/* -+ * A commonly used alloc and free fn. -+ */ -+void *mempool_alloc_slab(int gfp_mask, void *pool_data) -+{ -+ kmem_cache_t *mem = (kmem_cache_t *) pool_data; -+ return kmem_cache_alloc(mem, gfp_mask); -+} -+ -+void mempool_free_slab(void *element, void *pool_data) -+{ -+ kmem_cache_t *mem = (kmem_cache_t *) pool_data; -+ kmem_cache_free(mem, element); -+} -+ - EXPORT_SYMBOL(mempool_create); - EXPORT_SYMBOL(mempool_resize); - EXPORT_SYMBOL(mempool_destroy); - EXPORT_SYMBOL(mempool_alloc); - EXPORT_SYMBOL(mempool_free); -+EXPORT_SYMBOL(mempool_alloc_slab); -+EXPORT_SYMBOL(mempool_free_slab); diff --git a/patches/common/linux-2.4.19-vcalloc.patch b/patches/common/linux-2.4.19-vcalloc.patch deleted file mode 100644 index e8fc41b..0000000 --- a/patches/common/linux-2.4.19-vcalloc.patch +++ /dev/null @@ -1,48 +0,0 @@ -diff -Nru linux-2.4.19/mm/vmalloc.c linux/mm/vmalloc.c ---- linux-2.4.19/mm/vmalloc.c Wed Jun 12 12:04:44 2002 -+++ linux/mm/vmalloc.c Thu Jun 13 13:13:44 2002 -@@ -321,3 +321,22 @@ - read_unlock(&vmlist_lock); - return buf - buf_start; - } -+ -+void *vcalloc(unsigned long nmemb, unsigned long elem_size) -+{ -+ unsigned long size; -+ void *addr; -+ -+ /* -+ * Check that we're not going to overflow. -+ */ -+ if (nmemb > (ULONG_MAX / elem_size)) -+ return NULL; -+ -+ size = nmemb * elem_size; -+ addr = vmalloc(size); -+ if (addr) -+ memset(addr, 0, size); -+ -+ return addr; -+} -diff -Nru linux-2.4.19/include/linux/vmalloc.h linux/include/linux/vmalloc.h ---- linux-2.4.19/include/linux/vmalloc.h Wed Jun 12 12:35:58 2002 -+++ linux/include/linux/vmalloc.h Thu Jun 13 13:13:39 2002 -@@ -25,6 +25,7 @@ - extern void vmfree_area_pages(unsigned long address, unsigned long size); - extern int vmalloc_area_pages(unsigned long address, unsigned long size, - int gfp_mask, pgprot_t prot); -+extern void *vcalloc(unsigned long nmemb, unsigned long elem_size); - - /* - * Allocate any pages -diff -Nru linux-2.4.19/kernel/ksyms.c linux/kernel/ksyms.c ---- linux-2.4.19/kernel/ksyms.c Wed Jun 26 18:42:28 2002 -+++ linux/kernel/ksyms.c Wed Jun 26 18:37:01 2002 -@@ -109,6 +109,7 @@ - EXPORT_SYMBOL(vfree); - EXPORT_SYMBOL(__vmalloc); - EXPORT_SYMBOL(vmalloc_to_page); -+EXPORT_SYMBOL(vcalloc); - EXPORT_SYMBOL(mem_map); - EXPORT_SYMBOL(remap_page_range); - EXPORT_SYMBOL(max_mapnr); diff --git a/patches/linux-2.4.19-VFS-lock.patch b/patches/linux-2.4.19-VFS-lock.patch deleted file mode 100644 index e7b80cd..0000000 --- a/patches/linux-2.4.19-VFS-lock.patch +++ /dev/null @@ -1,243 +0,0 @@ -diff -Nru a/drivers/md/lvm.c b/drivers/md/lvm.c ---- a/drivers/md/lvm.c Wed May 22 10:43:49 2002 -+++ b/drivers/md/lvm.c Wed May 22 10:43:49 2002 -@@ -223,9 +223,6 @@ - #define DEVICE_OFF(device) - #define LOCAL_END_REQUEST - --/* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */ --/* #define LVM_VFS_ENHANCEMENT */ -- - #include - #include - #include -@@ -2178,12 +2175,8 @@ - if (lv_ptr->lv_access & LV_SNAPSHOT) { - lv_t *org = lv_ptr->lv_snapshot_org, *last; - -- /* sync the original logical volume */ -- fsync_dev(org->lv_dev); --#ifdef LVM_VFS_ENHANCEMENT - /* VFS function call to sync and lock the filesystem */ - fsync_dev_lockfs(org->lv_dev); --#endif - - down_write(&org->lv_lock); - org->lv_access |= LV_SNAPSHOT_ORG; -@@ -2209,11 +2202,9 @@ - else - set_device_ro(lv_ptr->lv_dev, 1); - --#ifdef LVM_VFS_ENHANCEMENT - /* VFS function call to unlock the filesystem */ - if (lv_ptr->lv_access & LV_SNAPSHOT) - unlockfs(lv_ptr->lv_snapshot_org->lv_dev); --#endif - - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = - lvm_fs_create_lv(vg_ptr, lv_ptr); -diff -Nru a/fs/buffer.c b/fs/buffer.c ---- a/fs/buffer.c Wed May 22 10:43:49 2002 -+++ b/fs/buffer.c Wed May 22 10:43:49 2002 -@@ -362,6 +362,38 @@ - fsync_dev(dev); - } - -+int fsync_dev_lockfs(kdev_t dev) -+{ -+ /* you are not allowed to try locking all the filesystems -+ ** on the system, your chances of getting through without -+ ** total deadlock are slim to none. -+ */ -+ if (!dev) -+ return fsync_dev(dev) ; -+ -+ sync_buffers(dev, 0); -+ -+ lock_kernel(); -+ /* note, the FS might need to start transactions to -+ ** sync the inodes, or the quota, no locking until -+ ** after these are done -+ */ -+ sync_inodes(dev); -+#ifdef DQUOT_SYNC_DEV -+ DQUOT_SYNC_DEV(dev); -+#else -+ DQUOT_SYNC(dev); -+#endif -+ /* if inodes or quotas could be dirtied during the -+ ** sync_supers_lockfs call, the FS is responsible for getting -+ ** them on disk, without deadlocking against the lock -+ */ -+ sync_supers_lockfs(dev) ; -+ unlock_kernel(); -+ -+ return sync_buffers(dev, 1) ; -+} -+ - asmlinkage long sys_sync(void) - { - fsync_dev(0); -diff -Nru a/fs/reiserfs/super.c b/fs/reiserfs/super.c ---- a/fs/reiserfs/super.c Wed May 22 10:43:49 2002 -+++ b/fs/reiserfs/super.c Wed May 22 10:43:49 2002 -@@ -66,7 +66,7 @@ - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); - journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); - reiserfs_block_writes(&th) ; -- journal_end(&th, s, 1) ; -+ journal_end_sync(&th, s, 1) ; - } - s->s_dirt = dirty; - unlock_kernel() ; -diff -Nru a/fs/super.c b/fs/super.c ---- a/fs/super.c Wed May 22 10:43:49 2002 -+++ b/fs/super.c Wed May 22 10:43:49 2002 -@@ -37,6 +37,13 @@ - LIST_HEAD(super_blocks); - spinlock_t sb_lock = SPIN_LOCK_UNLOCKED; - -+/* -+ * lock/unlockfs grab a read lock on s_umount, but you need this lock to -+ * make sure no lockfs runs are in progress before inserting/removing -+ * supers from the list. -+ */ -+static DECLARE_MUTEX(lockfs_sem); -+ - /* - * Handling of filesystem drivers list. - * Rules: -@@ -431,6 +438,19 @@ - put_super(sb); - } - -+static void write_super_lockfs(struct super_block *sb) -+{ -+ lock_super(sb); -+ if (sb->s_root && sb->s_op) { -+ if (sb->s_dirt && sb->s_op->write_super) -+ sb->s_op->write_super(sb); -+ if (sb->s_op->write_super_lockfs) { -+ sb->s_op->write_super_lockfs(sb); -+ } -+ } -+ unlock_super(sb); -+} -+ - static inline void write_super(struct super_block *sb) - { - lock_super(sb); -@@ -474,6 +494,39 @@ - spin_unlock(&sb_lock); - } - -+/* -+ * Note: don't check the dirty flag before waiting, we want the lock -+ * to happen every time this is called. dev must be non-zero -+ */ -+void sync_supers_lockfs(kdev_t dev) -+{ -+ struct super_block * sb; -+ -+ down(&lockfs_sem) ; -+ if (dev) { -+ sb = get_super(dev); -+ if (sb) { -+ write_super_lockfs(sb); -+ drop_super(sb); -+ } -+ } -+} -+ -+void unlockfs(kdev_t dev) -+{ -+ struct super_block * sb; -+ -+ if (dev) { -+ sb = get_super(dev); -+ if (sb) { -+ if (sb->s_op && sb->s_op->unlockfs) -+ sb->s_op->unlockfs(sb) ; -+ drop_super(sb); -+ } -+ } -+ up(&lockfs_sem) ; -+} -+ - /** - * get_super - get the superblock of a device - * @dev: device to get the superblock for -@@ -694,6 +747,7 @@ - goto out1; - - error = -EBUSY; -+ down(&lockfs_sem); - restart: - spin_lock(&sb_lock); - -@@ -705,6 +759,7 @@ - ((flags ^ old->s_flags) & MS_RDONLY)) { - spin_unlock(&sb_lock); - destroy_super(s); -+ up(&lockfs_sem); - goto out1; - } - if (!grab_super(old)) -@@ -712,12 +767,14 @@ - destroy_super(s); - blkdev_put(bdev, BDEV_FS); - path_release(&nd); -+ up(&lockfs_sem); - return old; - } - s->s_dev = dev; - s->s_bdev = bdev; - s->s_flags = flags; - insert_super(s, fs_type); -+ up(&lockfs_sem); - if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) - goto Einval; - s->s_flags |= MS_ACTIVE; -@@ -825,7 +882,10 @@ - if (!deactivate_super(sb)) - return; - -+ down(&lockfs_sem); - down_write(&sb->s_umount); -+ up(&lockfs_sem); -+ - sb->s_root = NULL; - /* Need to clean after the sucker */ - if (fs->fs_flags & FS_LITTER) -diff -Nru a/include/linux/fs.h b/include/linux/fs.h ---- a/include/linux/fs.h Wed May 22 10:43:49 2002 -+++ b/include/linux/fs.h Wed May 22 10:43:49 2002 -@@ -1235,6 +1235,7 @@ - extern int sync_buffers(kdev_t, int); - extern void sync_dev(kdev_t); - extern int fsync_dev(kdev_t); -+extern int fsync_dev_lockfs(kdev_t); - extern int fsync_super(struct super_block *); - extern int fsync_no_super(kdev_t); - extern void sync_inodes_sb(struct super_block *); -@@ -1251,6 +1251,8 @@ - extern int filemap_fdatasync(struct address_space *); - extern int filemap_fdatawait(struct address_space *); - extern void sync_supers(kdev_t); -+extern void sync_supers_lockfs(kdev_t); -+extern void unlockfs(kdev_t); - extern int bmap(struct inode *, int); - extern int notify_change(struct dentry *, struct iattr *); - extern int permission(struct inode *, int); -diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c ---- a/kernel/ksyms.c Wed May 22 10:43:49 2002 -+++ b/kernel/ksyms.c Wed May 22 10:43:49 2002 -@@ -181,6 +181,8 @@ - EXPORT_SYMBOL(invalidate_inode_pages); - EXPORT_SYMBOL(truncate_inode_pages); - EXPORT_SYMBOL(fsync_dev); -+EXPORT_SYMBOL(fsync_dev_lockfs); -+EXPORT_SYMBOL(unlockfs); - EXPORT_SYMBOL(fsync_no_super); - EXPORT_SYMBOL(permission); - EXPORT_SYMBOL(vfs_permission); diff --git a/patches/linux-2.4.19-devmapper-ioctl.patch b/patches/linux-2.4.19-devmapper-ioctl.patch deleted file mode 100644 index e806e1c..0000000 --- a/patches/linux-2.4.19-devmapper-ioctl.patch +++ /dev/null @@ -1,7432 +0,0 @@ -diff -ruN linux-2.4.19/Documentation/Configure.help linux-2.4.19-dm/Documentation/Configure.help ---- linux-2.4.19/Documentation/Configure.help Wed Aug 14 11:49:48 2002 -+++ linux-2.4.19-dm/Documentation/Configure.help Thu Nov 21 14:31:25 2002 -@@ -1775,6 +1775,20 @@ - want), say M here and read . The - module will be called lvm-mod.o. - -+Device-mapper support -+CONFIG_BLK_DEV_DM -+ Device-mapper is a low level volume manager. It works by allowing -+ people to specify mappings for ranges of logical sectors. Various -+ mapping types are available, in addition people may write their own -+ modules containing custom mappings if they wish. -+ -+ Higher level volume managers such as LVM2 use this driver. -+ -+ If you want to compile this as a module, say M here and read -+ . The module will be called dm-mod.o. -+ -+ If unsure, say N. -+ - Multiple devices driver support (RAID and LVM) - CONFIG_MD - Support multiple physical spindles through a single logical device. -diff -ruN linux-2.4.19/MAINTAINERS linux-2.4.19-dm/MAINTAINERS ---- linux-2.4.19/MAINTAINERS Wed Aug 14 11:49:45 2002 -+++ linux-2.4.19-dm/MAINTAINERS Thu Nov 21 14:31:25 2002 -@@ -426,6 +426,13 @@ - W: http://www.debian.org/~dz/i8k/ - S: Maintained - -+DEVICE MAPPER -+P: Joe Thornber -+M: dm@uk.sistina.com -+L: linux-LVM@sistina.com -+W: http://www.sistina.com/lvm -+S: Maintained -+ - DEVICE NUMBER REGISTRY - P: H. Peter Anvin - M: hpa@zytor.com -diff -ruN linux-2.4.19/arch/mips64/kernel/ioctl32.c linux-2.4.19-dm/arch/mips64/kernel/ioctl32.c ---- linux-2.4.19/arch/mips64/kernel/ioctl32.c Wed Aug 14 11:50:16 2002 -+++ linux-2.4.19-dm/arch/mips64/kernel/ioctl32.c Thu Nov 21 14:31:14 2002 -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - #include - #undef __KERNEL__ /* This file was born to be ugly ... */ - #include -@@ -816,6 +817,20 @@ - IOCTL32_DEFAULT(STOP_ARRAY_RO), - IOCTL32_DEFAULT(RESTART_ARRAY_RW), - #endif /* CONFIG_MD */ -+ -+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE) -+ IOCTL32_DEFAULT(DM_VERSION), -+ IOCTL32_DEFAULT(DM_REMOVE_ALL), -+ IOCTL32_DEFAULT(DM_DEV_CREATE), -+ IOCTL32_DEFAULT(DM_DEV_REMOVE), -+ IOCTL32_DEFAULT(DM_DEV_RELOAD), -+ IOCTL32_DEFAULT(DM_DEV_SUSPEND), -+ IOCTL32_DEFAULT(DM_DEV_RENAME), -+ IOCTL32_DEFAULT(DM_DEV_DEPS), -+ IOCTL32_DEFAULT(DM_DEV_STATUS), -+ IOCTL32_DEFAULT(DM_TARGET_STATUS), -+ IOCTL32_DEFAULT(DM_TARGET_WAIT), -+#endif /* CONFIG_BLK_DEV_DM */ - - IOCTL32_DEFAULT(MTIOCTOP), /* mtio.h ioctls */ - IOCTL32_HANDLER(MTIOCGET32, mt_ioctl_trans), -diff -ruN linux-2.4.19/arch/ppc64/kernel/ioctl32.c linux-2.4.19-dm/arch/ppc64/kernel/ioctl32.c ---- linux-2.4.19/arch/ppc64/kernel/ioctl32.c Wed Aug 14 11:50:22 2002 -+++ linux-2.4.19-dm/arch/ppc64/kernel/ioctl32.c Thu Nov 21 14:31:14 2002 -@@ -65,6 +65,7 @@ - #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) - #include - #endif /* LVM */ -+#include - - #include - /* Ugly hack. */ -@@ -4187,6 +4188,18 @@ - COMPATIBLE_IOCTL(NBD_PRINT_DEBUG), - COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS), - COMPATIBLE_IOCTL(NBD_DISCONNECT), -+/* device-mapper */ -+COMPATIBLE_IOCTL(DM_VERSION), -+COMPATIBLE_IOCTL(DM_REMOVE_ALL), -+COMPATIBLE_IOCTL(DM_DEV_CREATE), -+COMPATIBLE_IOCTL(DM_DEV_REMOVE), -+COMPATIBLE_IOCTL(DM_DEV_RELOAD), -+COMPATIBLE_IOCTL(DM_DEV_SUSPEND), -+COMPATIBLE_IOCTL(DM_DEV_RENAME), -+COMPATIBLE_IOCTL(DM_DEV_DEPS), -+COMPATIBLE_IOCTL(DM_DEV_STATUS), -+COMPATIBLE_IOCTL(DM_TARGET_STATUS), -+COMPATIBLE_IOCTL(DM_TARGET_WAIT), - /* Remove *PRIVATE in 2.5 */ - COMPATIBLE_IOCTL(SIOCDEVPRIVATE), - COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1), -diff -ruN linux-2.4.19/arch/s390x/kernel/ioctl32.c linux-2.4.19-dm/arch/s390x/kernel/ioctl32.c ---- linux-2.4.19/arch/s390x/kernel/ioctl32.c Wed Aug 14 11:50:27 2002 -+++ linux-2.4.19-dm/arch/s390x/kernel/ioctl32.c Thu Nov 21 14:31:14 2002 -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -507,6 +508,18 @@ - IOCTL32_DEFAULT(VT_UNLOCKSWITCH), - - IOCTL32_DEFAULT(SIOCGSTAMP), -+ -+ IOCTL32_DEFAULT(DM_VERSION), -+ IOCTL32_DEFAULT(DM_REMOVE_ALL), -+ IOCTL32_DEFAULT(DM_DEV_CREATE), -+ IOCTL32_DEFAULT(DM_DEV_REMOVE), -+ IOCTL32_DEFAULT(DM_DEV_RELOAD), -+ IOCTL32_DEFAULT(DM_DEV_SUSPEND), -+ IOCTL32_DEFAULT(DM_DEV_RENAME), -+ IOCTL32_DEFAULT(DM_DEV_DEPS), -+ IOCTL32_DEFAULT(DM_DEV_STATUS), -+ IOCTL32_DEFAULT(DM_TARGET_STATUS), -+ IOCTL32_DEFAULT(DM_TARGET_WAIT), - - IOCTL32_HANDLER(SIOCGIFNAME, dev_ifname32), - IOCTL32_HANDLER(SIOCGIFCONF, dev_ifconf), -diff -ruN linux-2.4.19/arch/sparc64/kernel/ioctl32.c linux-2.4.19-dm/arch/sparc64/kernel/ioctl32.c ---- linux-2.4.19/arch/sparc64/kernel/ioctl32.c Wed Aug 14 11:50:32 2002 -+++ linux-2.4.19-dm/arch/sparc64/kernel/ioctl32.c Thu Nov 21 14:31:14 2002 -@@ -54,6 +54,7 @@ - #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) - #include - #endif /* LVM */ -+#include - - #include - /* Ugly hack. */ -@@ -4608,6 +4609,19 @@ - COMPATIBLE_IOCTL(NBD_PRINT_DEBUG) - COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS) - COMPATIBLE_IOCTL(NBD_DISCONNECT) -+/* device-mapper */ -+COMPATIBLE_IOCTL(DM_VERSION) -+COMPATIBLE_IOCTL(DM_REMOVE_ALL) -+COMPATIBLE_IOCTL(DM_DEV_CREATE) -+COMPATIBLE_IOCTL(DM_DEV_REMOVE) -+COMPATIBLE_IOCTL(DM_DEV_RELOAD) -+COMPATIBLE_IOCTL(DM_DEV_SUSPEND) -+COMPATIBLE_IOCTL(DM_DEV_RENAME) -+COMPATIBLE_IOCTL(DM_DEV_DEPS) -+COMPATIBLE_IOCTL(DM_DEV_STATUS) -+COMPATIBLE_IOCTL(DM_TARGET_STATUS) -+COMPATIBLE_IOCTL(DM_TARGET_WAIT) -+ - /* And these ioctls need translation */ - HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) - HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) -diff -ruN linux-2.4.19/drivers/md/Config.in linux-2.4.19-dm/drivers/md/Config.in ---- linux-2.4.19/drivers/md/Config.in Wed Aug 14 11:51:06 2002 -+++ linux-2.4.19-dm/drivers/md/Config.in Thu Nov 21 14:31:25 2002 -@@ -14,5 +14,8 @@ - dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD - - dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD -+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -+ dep_tristate ' Device-mapper support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM $CONFIG_MD -+fi - - endmenu -diff -ruN linux-2.4.19/drivers/md/Makefile linux-2.4.19-dm/drivers/md/Makefile ---- linux-2.4.19/drivers/md/Makefile Wed Aug 14 11:51:06 2002 -+++ linux-2.4.19-dm/drivers/md/Makefile Thu Nov 21 14:31:09 2002 -@@ -4,9 +4,12 @@ - - O_TARGET := mddev.o - --export-objs := md.o xor.o -+export-objs := md.o xor.o dm-table.o dm-target.o kcopyd.o - list-multi := lvm-mod.o - lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o -+dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \ -+ dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \ -+ kcopyd.o - - # Note: link order is important. All raid personalities - # and xor.o must come before md.o, as they each initialise -@@ -20,8 +23,12 @@ - obj-$(CONFIG_MD_MULTIPATH) += multipath.o - obj-$(CONFIG_BLK_DEV_MD) += md.o - obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o -+obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o - - include $(TOPDIR)/Rules.make - - lvm-mod.o: $(lvm-mod-objs) - $(LD) -r -o $@ $(lvm-mod-objs) -+ -+dm-mod.o: $(dm-mod-objs) -+ $(LD) -r -o $@ $(dm-mod-objs) -diff -ruN linux-2.4.19/drivers/md/dm-exception-store.c linux-2.4.19-dm/drivers/md/dm-exception-store.c ---- linux-2.4.19/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-exception-store.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,701 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm-snapshot.h" -+#include "kcopyd.h" -+ -+#include -+#include -+#include -+#include -+ -+#define SECTOR_SIZE 512 -+#define SECTOR_SHIFT 9 -+ -+/*----------------------------------------------------------------- -+ * Persistent snapshots, by persistent we mean that the snapshot -+ * will survive a reboot. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * We need to store a record of which parts of the origin have -+ * been copied to the snapshot device. The snapshot code -+ * requires that we copy exception chunks to chunk aligned areas -+ * of the COW store. It makes sense therefore, to store the -+ * metadata in chunk size blocks. -+ * -+ * There is no backward or forward compatibility implemented, -+ * snapshots with different disk versions than the kernel will -+ * not be usable. It is expected that "lvcreate" will blank out -+ * the start of a fresh COW device before calling the snapshot -+ * constructor. -+ * -+ * The first chunk of the COW device just contains the header. -+ * After this there is a chunk filled with exception metadata, -+ * followed by as many exception chunks as can fit in the -+ * metadata areas. -+ * -+ * All on disk structures are in little-endian format. The end -+ * of the exceptions info is indicated by an exception with a -+ * new_chunk of 0, which is invalid since it would point to the -+ * header chunk. -+ */ -+ -+/* -+ * Magic for persistent snapshots: "SnAp" - Feeble isn't it. -+ */ -+#define SNAP_MAGIC 0x70416e53 -+ -+/* -+ * The on-disk version of the metadata. -+ */ -+#define SNAPSHOT_DISK_VERSION 1 -+ -+struct disk_header { -+ uint32_t magic; -+ -+ /* -+ * Is this snapshot valid. There is no way of recovering -+ * an invalid snapshot. -+ */ -+ int valid; -+ -+ /* -+ * Simple, incrementing version. no backward -+ * compatibility. -+ */ -+ uint32_t version; -+ -+ /* In sectors */ -+ uint32_t chunk_size; -+}; -+ -+struct disk_exception { -+ uint64_t old_chunk; -+ uint64_t new_chunk; -+}; -+ -+struct commit_callback { -+ void (*callback) (void *, int success); -+ void *context; -+}; -+ -+/* -+ * The top level structure for a persistent exception store. -+ */ -+struct pstore { -+ struct dm_snapshot *snap; /* up pointer to my snapshot */ -+ int version; -+ int valid; -+ uint32_t chunk_size; -+ uint32_t exceptions_per_area; -+ -+ /* -+ * Now that we have an asynchronous kcopyd there is no -+ * need for large chunk sizes, so it wont hurt to have a -+ * whole chunks worth of metadata in memory at once. -+ */ -+ void *area; -+ struct kiobuf *iobuf; -+ -+ /* -+ * Used to keep track of which metadata area the data in -+ * 'chunk' refers to. -+ */ -+ uint32_t current_area; -+ -+ /* -+ * The next free chunk for an exception. -+ */ -+ uint32_t next_free; -+ -+ /* -+ * The index of next free exception in the current -+ * metadata area. -+ */ -+ uint32_t current_committed; -+ -+ atomic_t pending_count; -+ uint32_t callback_count; -+ struct commit_callback *callbacks; -+}; -+ -+/* -+ * For performance reasons we want to defer writing a committed -+ * exceptions metadata to disk so that we can amortise away this -+ * exensive operation. -+ * -+ * For the initial version of this code we will remain with -+ * synchronous io. There are some deadlock issues with async -+ * that I haven't yet worked out. -+ */ -+static int do_io(int rw, struct kcopyd_region *where, struct kiobuf *iobuf) -+{ -+ int i, sectors_per_block, nr_blocks, start; -+ int blocksize = get_hardsect_size(where->dev); -+ int status; -+ -+ sectors_per_block = blocksize / SECTOR_SIZE; -+ -+ nr_blocks = where->count / sectors_per_block; -+ start = where->sector / sectors_per_block; -+ -+ for (i = 0; i < nr_blocks; i++) -+ iobuf->blocks[i] = start++; -+ -+ iobuf->length = where->count << 9; -+ iobuf->locked = 1; -+ -+ status = brw_kiovec(rw, 1, &iobuf, where->dev, iobuf->blocks, -+ blocksize); -+ if (status != (where->count << 9)) -+ return -EIO; -+ -+ return 0; -+} -+ -+static int allocate_iobuf(struct pstore *ps) -+{ -+ size_t i, r = -ENOMEM, len, nr_pages; -+ struct page *page; -+ -+ len = ps->chunk_size << SECTOR_SHIFT; -+ -+ /* -+ * Allocate the chunk_size block of memory that will hold -+ * a single metadata area. -+ */ -+ ps->area = vmalloc(len); -+ if (!ps->area) -+ return r; -+ -+ if (alloc_kiovec(1, &ps->iobuf)) -+ goto bad; -+ -+ nr_pages = ps->chunk_size / (PAGE_SIZE / SECTOR_SIZE); -+ r = expand_kiobuf(ps->iobuf, nr_pages); -+ if (r) -+ goto bad; -+ -+ /* -+ * We lock the pages for ps->area into memory since they'll be -+ * doing a lot of io. -+ */ -+ for (i = 0; i < nr_pages; i++) { -+ page = vmalloc_to_page(ps->area + (i * PAGE_SIZE)); -+ LockPage(page); -+ ps->iobuf->maplist[i] = page; -+ ps->iobuf->nr_pages++; -+ } -+ -+ ps->iobuf->nr_pages = nr_pages; -+ ps->iobuf->offset = 0; -+ -+ return 0; -+ -+ bad: -+ if (ps->iobuf) -+ free_kiovec(1, &ps->iobuf); -+ -+ if (ps->area) -+ vfree(ps->area); -+ ps->iobuf = NULL; -+ return r; -+} -+ -+static void free_iobuf(struct pstore *ps) -+{ -+ int i; -+ -+ for (i = 0; i < ps->iobuf->nr_pages; i++) -+ UnlockPage(ps->iobuf->maplist[i]); -+ ps->iobuf->locked = 0; -+ -+ free_kiovec(1, &ps->iobuf); -+ vfree(ps->area); -+} -+ -+/* -+ * Read or write a chunk aligned and sized block of data from a device. -+ */ -+static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) -+{ -+ int r; -+ struct kcopyd_region where; -+ -+ where.dev = ps->snap->cow->dev; -+ where.sector = ps->chunk_size * chunk; -+ where.count = ps->chunk_size; -+ -+ r = do_io(rw, &where, ps->iobuf); -+ if (r) -+ return r; -+ -+ return 0; -+} -+ -+/* -+ * Read or write a metadata area. Remembering to skip the first -+ * chunk which holds the header. -+ */ -+static int area_io(struct pstore *ps, uint32_t area, int rw) -+{ -+ int r; -+ uint32_t chunk; -+ -+ /* convert a metadata area index to a chunk index */ -+ chunk = 1 + ((ps->exceptions_per_area + 1) * area); -+ -+ r = chunk_io(ps, chunk, rw); -+ if (r) -+ return r; -+ -+ ps->current_area = area; -+ return 0; -+} -+ -+static int zero_area(struct pstore *ps, uint32_t area) -+{ -+ memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); -+ return area_io(ps, area, WRITE); -+} -+ -+static int read_header(struct pstore *ps, int *new_snapshot) -+{ -+ int r; -+ struct disk_header *dh; -+ -+ r = chunk_io(ps, 0, READ); -+ if (r) -+ return r; -+ -+ dh = (struct disk_header *) ps->area; -+ -+ if (dh->magic == 0) { -+ *new_snapshot = 1; -+ -+ } else if (dh->magic == SNAP_MAGIC) { -+ *new_snapshot = 0; -+ ps->valid = dh->valid; -+ ps->version = dh->version; -+ ps->chunk_size = dh->chunk_size; -+ -+ } else { -+ DMWARN("Invalid/corrupt snapshot"); -+ r = -ENXIO; -+ } -+ -+ return r; -+} -+ -+static int write_header(struct pstore *ps) -+{ -+ struct disk_header *dh; -+ -+ memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); -+ -+ dh = (struct disk_header *) ps->area; -+ dh->magic = SNAP_MAGIC; -+ dh->valid = ps->valid; -+ dh->version = ps->version; -+ dh->chunk_size = ps->chunk_size; -+ -+ return chunk_io(ps, 0, WRITE); -+} -+ -+/* -+ * Access functions for the disk exceptions, these do the endian conversions. -+ */ -+static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) -+{ -+ if (index >= ps->exceptions_per_area) -+ return NULL; -+ -+ return ((struct disk_exception *) ps->area) + index; -+} -+ -+static int read_exception(struct pstore *ps, -+ uint32_t index, struct disk_exception *result) -+{ -+ struct disk_exception *e; -+ -+ e = get_exception(ps, index); -+ if (!e) -+ return -EINVAL; -+ -+ /* copy it */ -+ result->old_chunk = le64_to_cpu(e->old_chunk); -+ result->new_chunk = le64_to_cpu(e->new_chunk); -+ -+ return 0; -+} -+ -+static int write_exception(struct pstore *ps, -+ uint32_t index, struct disk_exception *de) -+{ -+ struct disk_exception *e; -+ -+ e = get_exception(ps, index); -+ if (!e) -+ return -EINVAL; -+ -+ /* copy it */ -+ e->old_chunk = cpu_to_le64(de->old_chunk); -+ e->new_chunk = cpu_to_le64(de->new_chunk); -+ -+ return 0; -+} -+ -+/* -+ * Registers the exceptions that are present in the current area. -+ * 'full' is filled in to indicate if the area has been -+ * filled. -+ */ -+static int insert_exceptions(struct pstore *ps, int *full) -+{ -+ int i, r; -+ struct disk_exception de; -+ -+ /* presume the area is full */ -+ *full = 1; -+ -+ for (i = 0; i < ps->exceptions_per_area; i++) { -+ r = read_exception(ps, i, &de); -+ -+ if (r) -+ return r; -+ -+ /* -+ * If the new_chunk is pointing at the start of -+ * the COW device, where the first metadata area -+ * is we know that we've hit the end of the -+ * exceptions. Therefore the area is not full. -+ */ -+ if (de.new_chunk == 0LL) { -+ ps->current_committed = i; -+ *full = 0; -+ break; -+ } -+ -+ /* -+ * Keep track of the start of the free chunks. -+ */ -+ if (ps->next_free <= de.new_chunk) -+ ps->next_free = de.new_chunk + 1; -+ -+ /* -+ * Otherwise we add the exception to the snapshot. -+ */ -+ r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); -+ if (r) -+ return r; -+ } -+ -+ return 0; -+} -+ -+static int read_exceptions(struct pstore *ps) -+{ -+ uint32_t area; -+ int r, full = 1; -+ -+ /* -+ * Keeping reading chunks and inserting exceptions until -+ * we find a partially full area. -+ */ -+ for (area = 0; full; area++) { -+ r = area_io(ps, area, READ); -+ if (r) -+ return r; -+ -+ r = insert_exceptions(ps, &full); -+ if (r) -+ return r; -+ -+ area++; -+ } -+ -+ return 0; -+} -+ -+static inline struct pstore *get_info(struct exception_store *store) -+{ -+ return (struct pstore *) store->context; -+} -+ -+static int persistent_percentfull(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ return (ps->next_free * store->snap->chunk_size * 100) / -+ get_dev_size(store->snap->cow->dev); -+} -+ -+static void persistent_destroy(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ -+ vfree(ps->callbacks); -+ free_iobuf(ps); -+ kfree(ps); -+} -+ -+static int persistent_prepare(struct exception_store *store, -+ struct exception *e) -+{ -+ struct pstore *ps = get_info(store); -+ uint32_t stride; -+ sector_t size = get_dev_size(store->snap->cow->dev); -+ -+ /* Is there enough room ? */ -+ if (size <= (ps->next_free * store->snap->chunk_size)) -+ return -ENOSPC; -+ -+ e->new_chunk = ps->next_free; -+ -+ /* -+ * Move onto the next free pending, making sure to take -+ * into account the location of the metadata chunks. -+ */ -+ stride = (ps->exceptions_per_area + 1); -+ if (!(++ps->next_free % stride)) -+ ps->next_free++; -+ -+ atomic_inc(&ps->pending_count); -+ return 0; -+} -+ -+static void persistent_commit(struct exception_store *store, -+ struct exception *e, -+ void (*callback) (void *, int success), -+ void *callback_context) -+{ -+ int r, i; -+ struct pstore *ps = get_info(store); -+ struct disk_exception de; -+ struct commit_callback *cb; -+ -+ de.old_chunk = e->old_chunk; -+ de.new_chunk = e->new_chunk; -+ write_exception(ps, ps->current_committed++, &de); -+ -+ /* -+ * Add the callback to the back of the array. This code -+ * is the only place where the callback array is -+ * manipulated, and we know that it will never be called -+ * multiple times concurrently. -+ */ -+ cb = ps->callbacks + ps->callback_count++; -+ cb->callback = callback; -+ cb->context = callback_context; -+ -+ /* -+ * If there are no more exceptions in flight, or we have -+ * filled this metadata area we commit the exceptions to -+ * disk. -+ */ -+ if (atomic_dec_and_test(&ps->pending_count) || -+ (ps->current_committed == ps->exceptions_per_area)) { -+ r = area_io(ps, ps->current_area, WRITE); -+ if (r) -+ ps->valid = 0; -+ -+ for (i = 0; i < ps->callback_count; i++) { -+ cb = ps->callbacks + i; -+ cb->callback(cb->context, r == 0 ? 1 : 0); -+ } -+ -+ ps->callback_count = 0; -+ } -+ -+ /* -+ * Have we completely filled the current area ? -+ */ -+ if (ps->current_committed == ps->exceptions_per_area) { -+ ps->current_committed = 0; -+ r = zero_area(ps, ps->current_area + 1); -+ if (r) -+ ps->valid = 0; -+ } -+} -+ -+static void persistent_drop(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ -+ ps->valid = 0; -+ if (write_header(ps)) -+ DMWARN("write header failed"); -+} -+ -+int dm_create_persistent(struct exception_store *store, uint32_t chunk_size) -+{ -+ int r, new_snapshot; -+ struct pstore *ps; -+ -+ /* allocate the pstore */ -+ ps = kmalloc(sizeof(*ps), GFP_KERNEL); -+ if (!ps) -+ return -ENOMEM; -+ -+ ps->snap = store->snap; -+ ps->valid = 1; -+ ps->version = SNAPSHOT_DISK_VERSION; -+ ps->chunk_size = chunk_size; -+ ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) / -+ sizeof(struct disk_exception); -+ ps->next_free = 2; /* skipping the header and first area */ -+ ps->current_committed = 0; -+ -+ r = allocate_iobuf(ps); -+ if (r) -+ goto bad; -+ -+ /* -+ * Allocate space for all the callbacks. -+ */ -+ ps->callback_count = 0; -+ atomic_set(&ps->pending_count, 0); -+ ps->callbacks = vcalloc(ps->exceptions_per_area, -+ sizeof(*ps->callbacks)); -+ -+ if (!ps->callbacks) -+ goto bad; -+ -+ /* -+ * Read the snapshot header. -+ */ -+ r = read_header(ps, &new_snapshot); -+ if (r) -+ goto bad; -+ -+ /* -+ * Do we need to setup a new snapshot ? -+ */ -+ if (new_snapshot) { -+ r = write_header(ps); -+ if (r) { -+ DMWARN("write_header failed"); -+ goto bad; -+ } -+ -+ r = zero_area(ps, 0); -+ if (r) { -+ DMWARN("zero_area(0) failed"); -+ goto bad; -+ } -+ -+ } else { -+ /* -+ * Sanity checks. -+ */ -+ if (ps->chunk_size != chunk_size) { -+ DMWARN("chunk size for existing snapshot different " -+ "from that requested"); -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ if (ps->version != SNAPSHOT_DISK_VERSION) { -+ DMWARN("unable to handle snapshot disk version %d", -+ ps->version); -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ /* -+ * Read the metadata. -+ */ -+ r = read_exceptions(ps); -+ if (r) -+ goto bad; -+ } -+ -+ store->destroy = persistent_destroy; -+ store->prepare_exception = persistent_prepare; -+ store->commit_exception = persistent_commit; -+ store->drop_snapshot = persistent_drop; -+ store->percent_full = persistent_percentfull; -+ store->context = ps; -+ -+ return r; -+ -+ bad: -+ if (ps) { -+ if (ps->callbacks) -+ vfree(ps->callbacks); -+ -+ if (ps->iobuf) -+ free_iobuf(ps); -+ -+ kfree(ps); -+ } -+ return r; -+} -+ -+/*----------------------------------------------------------------- -+ * Implementation of the store for non-persistent snapshots. -+ *---------------------------------------------------------------*/ -+struct transient_c { -+ sector_t next_free; -+}; -+ -+void transient_destroy(struct exception_store *store) -+{ -+ kfree(store->context); -+} -+ -+int transient_prepare(struct exception_store *store, struct exception *e) -+{ -+ struct transient_c *tc = (struct transient_c *) store->context; -+ sector_t size = get_dev_size(store->snap->cow->dev); -+ -+ if (size < (tc->next_free + store->snap->chunk_size)) -+ return -1; -+ -+ e->new_chunk = sector_to_chunk(store->snap, tc->next_free); -+ tc->next_free += store->snap->chunk_size; -+ -+ return 0; -+} -+ -+void transient_commit(struct exception_store *store, -+ struct exception *e, -+ void (*callback) (void *, int success), -+ void *callback_context) -+{ -+ /* Just succeed */ -+ callback(callback_context, 1); -+} -+ -+static int transient_percentfull(struct exception_store *store) -+{ -+ struct transient_c *tc = (struct transient_c *) store->context; -+ return (tc->next_free * 100) / get_dev_size(store->snap->cow->dev); -+} -+ -+int dm_create_transient(struct exception_store *store, -+ struct dm_snapshot *s, int blocksize) -+{ -+ struct transient_c *tc; -+ -+ memset(store, 0, sizeof(*store)); -+ store->destroy = transient_destroy; -+ store->prepare_exception = transient_prepare; -+ store->commit_exception = transient_commit; -+ store->percent_full = transient_percentfull; -+ store->snap = s; -+ -+ tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); -+ if (!tc) -+ return -ENOMEM; -+ -+ tc->next_free = 0; -+ store->context = tc; -+ -+ return 0; -+} -diff -ruN linux-2.4.19/drivers/md/dm-ioctl.c linux-2.4.19-dm/drivers/md/dm-ioctl.c ---- linux-2.4.19/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-ioctl.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,1139 @@ -+/* -+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DM_DRIVER_EMAIL "dm@uk.sistina.com" -+ -+/*----------------------------------------------------------------- -+ * The ioctl interface needs to be able to look up devices by -+ * name or uuid. -+ *---------------------------------------------------------------*/ -+struct hash_cell { -+ struct list_head name_list; -+ struct list_head uuid_list; -+ -+ char *name; -+ char *uuid; -+ struct mapped_device *md; -+ -+ /* I hate devfs */ -+ devfs_handle_t devfs_entry; -+}; -+ -+#define NUM_BUCKETS 64 -+#define MASK_BUCKETS (NUM_BUCKETS - 1) -+static struct list_head _name_buckets[NUM_BUCKETS]; -+static struct list_head _uuid_buckets[NUM_BUCKETS]; -+ -+static devfs_handle_t _dev_dir; -+void dm_hash_remove_all(void); -+ -+/* -+ * Guards access to all three tables. -+ */ -+static DECLARE_RWSEM(_hash_lock); -+ -+static void init_buckets(struct list_head *buckets) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < NUM_BUCKETS; i++) -+ INIT_LIST_HEAD(buckets + i); -+} -+ -+int dm_hash_init(void) -+{ -+ init_buckets(_name_buckets); -+ init_buckets(_uuid_buckets); -+ _dev_dir = devfs_mk_dir(0, DM_DIR, NULL); -+ return 0; -+} -+ -+void dm_hash_exit(void) -+{ -+ dm_hash_remove_all(); -+ devfs_unregister(_dev_dir); -+} -+ -+/*----------------------------------------------------------------- -+ * Hash function: -+ * We're not really concerned with the str hash function being -+ * fast since it's only used by the ioctl interface. -+ *---------------------------------------------------------------*/ -+static unsigned int hash_str(const char *str) -+{ -+ const unsigned int hash_mult = 2654435387U; -+ unsigned int h = 0; -+ -+ while (*str) -+ h = (h + (unsigned int) *str++) * hash_mult; -+ -+ return h & MASK_BUCKETS; -+} -+ -+/*----------------------------------------------------------------- -+ * Code for looking up a device by name -+ *---------------------------------------------------------------*/ -+static struct hash_cell *__get_name_cell(const char *str) -+{ -+ struct list_head *tmp; -+ struct hash_cell *hc; -+ unsigned int h = hash_str(str); -+ -+ list_for_each(tmp, _name_buckets + h) { -+ hc = list_entry(tmp, struct hash_cell, name_list); -+ if (!strcmp(hc->name, str)) -+ return hc; -+ } -+ -+ return NULL; -+} -+ -+static struct hash_cell *__get_uuid_cell(const char *str) -+{ -+ struct list_head *tmp; -+ struct hash_cell *hc; -+ unsigned int h = hash_str(str); -+ -+ list_for_each(tmp, _uuid_buckets + h) { -+ hc = list_entry(tmp, struct hash_cell, uuid_list); -+ if (!strcmp(hc->uuid, str)) -+ return hc; -+ } -+ -+ return NULL; -+} -+ -+/*----------------------------------------------------------------- -+ * Inserting, removing and renaming a device. -+ *---------------------------------------------------------------*/ -+static inline char *kstrdup(const char *str) -+{ -+ char *r = kmalloc(strlen(str) + 1, GFP_KERNEL); -+ if (r) -+ strcpy(r, str); -+ return r; -+} -+ -+static struct hash_cell *alloc_cell(const char *name, const char *uuid, -+ struct mapped_device *md) -+{ -+ struct hash_cell *hc; -+ -+ hc = kmalloc(sizeof(*hc), GFP_KERNEL); -+ if (!hc) -+ return NULL; -+ -+ hc->name = kstrdup(name); -+ if (!hc->name) { -+ kfree(hc); -+ return NULL; -+ } -+ -+ if (!uuid) -+ hc->uuid = NULL; -+ -+ else { -+ hc->uuid = kstrdup(uuid); -+ if (!hc->uuid) { -+ kfree(hc->name); -+ kfree(hc); -+ return NULL; -+ } -+ } -+ -+ INIT_LIST_HEAD(&hc->name_list); -+ INIT_LIST_HEAD(&hc->uuid_list); -+ hc->md = md; -+ return hc; -+} -+ -+static void free_cell(struct hash_cell *hc) -+{ -+ if (hc) { -+ kfree(hc->name); -+ kfree(hc->uuid); -+ kfree(hc); -+ } -+} -+ -+/* -+ * devfs stuff. -+ */ -+static int register_with_devfs(struct hash_cell *hc) -+{ -+ kdev_t dev = dm_kdev(hc->md); -+ -+ hc->devfs_entry = -+ devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER, -+ major(dev), minor(dev), -+ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, -+ &dm_blk_dops, NULL); -+ -+ return 0; -+} -+ -+static int unregister_with_devfs(struct hash_cell *hc) -+{ -+ devfs_unregister(hc->devfs_entry); -+ return 0; -+} -+ -+/* -+ * The kdev_t and uuid of a device can never change once it is -+ * initially inserted. -+ */ -+int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md) -+{ -+ struct hash_cell *cell; -+ -+ /* -+ * Allocate the new cells. -+ */ -+ cell = alloc_cell(name, uuid, md); -+ if (!cell) -+ return -ENOMEM; -+ -+ /* -+ * Insert the cell into all three hash tables. -+ */ -+ down_write(&_hash_lock); -+ if (__get_name_cell(name)) -+ goto bad; -+ -+ list_add(&cell->name_list, _name_buckets + hash_str(name)); -+ -+ if (uuid) { -+ if (__get_uuid_cell(uuid)) { -+ list_del(&cell->name_list); -+ goto bad; -+ } -+ list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid)); -+ } -+ register_with_devfs(cell); -+ dm_get(md); -+ up_write(&_hash_lock); -+ -+ return 0; -+ -+ bad: -+ up_write(&_hash_lock); -+ free_cell(cell); -+ return -EBUSY; -+} -+ -+void __hash_remove(struct hash_cell *hc) -+{ -+ /* remove from the dev hash */ -+ list_del(&hc->uuid_list); -+ list_del(&hc->name_list); -+ unregister_with_devfs(hc); -+ dm_put(hc->md); -+} -+ -+void dm_hash_remove_all(void) -+{ -+ int i; -+ struct hash_cell *hc; -+ struct list_head *tmp, *n; -+ -+ down_write(&_hash_lock); -+ for (i = 0; i < NUM_BUCKETS; i++) { -+ list_for_each_safe(tmp, n, _name_buckets + i) { -+ hc = list_entry(tmp, struct hash_cell, name_list); -+ __hash_remove(hc); -+ } -+ } -+ up_write(&_hash_lock); -+} -+ -+int dm_hash_rename(const char *old, const char *new) -+{ -+ char *new_name, *old_name; -+ struct hash_cell *hc; -+ -+ /* -+ * duplicate new. -+ */ -+ new_name = kstrdup(new); -+ if (!new_name) -+ return -ENOMEM; -+ -+ down_write(&_hash_lock); -+ -+ /* -+ * Is new free ? -+ */ -+ hc = __get_name_cell(new); -+ if (hc) { -+ DMWARN("asked to rename to an already existing name %s -> %s", -+ old, new); -+ up_write(&_hash_lock); -+ return -EBUSY; -+ } -+ -+ /* -+ * Is there such a device as 'old' ? -+ */ -+ hc = __get_name_cell(old); -+ if (!hc) { -+ DMWARN("asked to rename a non existent device %s -> %s", -+ old, new); -+ up_write(&_hash_lock); -+ return -ENXIO; -+ } -+ -+ /* -+ * rename and move the name cell. -+ */ -+ list_del(&hc->name_list); -+ old_name = hc->name; -+ hc->name = new_name; -+ list_add(&hc->name_list, _name_buckets + hash_str(new_name)); -+ -+ /* rename the device node in devfs */ -+ unregister_with_devfs(hc); -+ register_with_devfs(hc); -+ -+ up_write(&_hash_lock); -+ kfree(old_name); -+ return 0; -+} -+ -+ -+/*----------------------------------------------------------------- -+ * Implementation of the ioctl commands -+ *---------------------------------------------------------------*/ -+ -+/* -+ * All the ioctl commands get dispatched to functions with this -+ * prototype. -+ */ -+typedef int (*ioctl_fn)(struct dm_ioctl *param, struct dm_ioctl *user); -+ -+/* -+ * Check a string doesn't overrun the chunk of -+ * memory we copied from userland. -+ */ -+static int valid_str(char *str, void *begin, void *end) -+{ -+ while (((void *) str >= begin) && ((void *) str < end)) -+ if (!*str++) -+ return 0; -+ -+ return -EINVAL; -+} -+ -+static int next_target(struct dm_target_spec *last, uint32_t next, -+ void *begin, void *end, -+ struct dm_target_spec **spec, char **params) -+{ -+ *spec = (struct dm_target_spec *) -+ ((unsigned char *) last + next); -+ *params = (char *) (*spec + 1); -+ -+ if (*spec < (last + 1) || ((void *) *spec > end)) -+ return -EINVAL; -+ -+ return valid_str(*params, begin, end); -+} -+ -+static int populate_table(struct dm_table *table, struct dm_ioctl *args) -+{ -+ int i = 0, r, first = 1; -+ struct dm_target_spec *spec; -+ char *params; -+ void *begin, *end; -+ -+ if (!args->target_count) { -+ DMWARN("populate_table: no targets specified"); -+ return -EINVAL; -+ } -+ -+ begin = (void *) args; -+ end = begin + args->data_size; -+ -+ for (i = 0; i < args->target_count; i++) { -+ -+ if (first) -+ r = next_target((struct dm_target_spec *) args, -+ args->data_start, -+ begin, end, &spec, ¶ms); -+ else -+ r = next_target(spec, spec->next, begin, end, -+ &spec, ¶ms); -+ -+ if (r) { -+ DMWARN("unable to find target"); -+ return -EINVAL; -+ } -+ -+ r = dm_table_add_target(table, spec->target_type, -+ spec->sector_start, spec->length, -+ params); -+ if (r) { -+ DMWARN("internal error adding target to table"); -+ return -EINVAL; -+ } -+ -+ first = 0; -+ } -+ -+ return dm_table_complete(table); -+} -+ -+/* -+ * Round up the ptr to the next 'align' boundary. Obviously -+ * 'align' must be a power of 2. -+ */ -+static inline void *align_ptr(void *ptr, unsigned int align) -+{ -+ align--; -+ return (void *) (((unsigned long) (ptr + align)) & ~align); -+} -+ -+/* -+ * Copies a dm_ioctl and an optional additional payload to -+ * userland. -+ */ -+static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param, -+ void *data, uint32_t len) -+{ -+ int r; -+ void *ptr = NULL; -+ -+ if (data) { -+ ptr = align_ptr(user + 1, sizeof(unsigned long)); -+ param->data_start = ptr - (void *) user; -+ } -+ -+ /* -+ * The version number has already been filled in, so we -+ * just copy later fields. -+ */ -+ r = copy_to_user(&user->data_size, ¶m->data_size, -+ sizeof(*param) - sizeof(param->version)); -+ if (r) -+ return -EFAULT; -+ -+ if (data) { -+ if (param->data_start + len > param->data_size) -+ return -ENOSPC; -+ -+ if (copy_to_user(ptr, data, len)) -+ r = -EFAULT; -+ } -+ -+ return r; -+} -+ -+/* -+ * Fills in a dm_ioctl structure, ready for sending back to -+ * userland. -+ */ -+static int __info(struct mapped_device *md, struct dm_ioctl *param) -+{ -+ kdev_t dev = dm_kdev(md); -+ struct dm_table *table; -+ struct block_device *bdev; -+ -+ param->flags = DM_EXISTS_FLAG; -+ if (dm_suspended(md)) -+ param->flags |= DM_SUSPEND_FLAG; -+ -+ param->dev = kdev_t_to_nr(dev); -+ bdev = bdget(param->dev); -+ if (!bdev) -+ return -ENXIO; -+ -+ param->open_count = bdev->bd_openers; -+ bdput(bdev); -+ -+ if (is_read_only(dev)) -+ param->flags |= DM_READONLY_FLAG; -+ -+ table = dm_get_table(md); -+ param->target_count = dm_table_get_num_targets(table); -+ dm_table_put(table); -+ -+ return 0; -+} -+ -+/* -+ * Always use UUID for lookups if it's present, otherwise use name. -+ */ -+static inline struct mapped_device *find_device(struct dm_ioctl *param) -+{ -+ struct hash_cell *hc; -+ struct mapped_device *md = NULL; -+ -+ down_read(&_hash_lock); -+ hc = *param->uuid ? __get_uuid_cell(param->uuid) : -+ __get_name_cell(param->name); -+ if (hc) { -+ md = hc->md; -+ -+ /* -+ * Sneakily write in both the name and the uuid -+ * while we have the cell. -+ */ -+ strncpy(param->name, hc->name, sizeof(param->name)); -+ if (hc->uuid) -+ strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1); -+ else -+ param->uuid[0] = '\0'; -+ -+ dm_get(md); -+ } -+ up_read(&_hash_lock); -+ -+ return md; -+} -+ -+#define ALIGNMENT sizeof(int) -+static void *_align(void *ptr, unsigned int a) -+{ -+ register unsigned long align = --a; -+ -+ return (void *) (((unsigned long) ptr + align) & ~align); -+} -+ -+/* -+ * Copies device info back to user space, used by -+ * the create and info ioctls. -+ */ -+static int info(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ -+ param->flags = 0; -+ -+ md = find_device(param); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ __info(md, param); -+ dm_put(md); -+ -+ out: -+ return results_to_user(user, param, NULL, 0); -+} -+ -+static inline int get_mode(struct dm_ioctl *param) -+{ -+ int mode = FMODE_READ | FMODE_WRITE; -+ -+ if (param->flags & DM_READONLY_FLAG) -+ mode = FMODE_READ; -+ -+ return mode; -+} -+ -+static int check_name(const char *name) -+{ -+ if (strchr(name, '/')) { -+ DMWARN("invalid device name"); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int create(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ kdev_t dev; -+ struct dm_table *t; -+ struct mapped_device *md; -+ int minor; -+ -+ r = check_name(param->name); -+ if (r) -+ return r; -+ -+ r = dm_table_create(&t, get_mode(param)); -+ if (r) -+ return r; -+ -+ r = populate_table(t, param); -+ if (r) { -+ dm_table_put(t); -+ return r; -+ } -+ -+ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ? -+ minor(to_kdev_t(param->dev)) : -1; -+ -+ r = dm_create(minor, t, &md); -+ if (r) { -+ dm_table_put(t); -+ return r; -+ } -+ dm_table_put(t); /* md will have grabbed its own reference */ -+ -+ dev = dm_kdev(md); -+ set_device_ro(dev, (param->flags & DM_READONLY_FLAG)); -+ r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md); -+ dm_put(md); -+ -+ return r ? r : info(param, user); -+} -+ -+/* -+ * Build up the status struct for each target -+ */ -+static int __status(struct mapped_device *md, struct dm_ioctl *param, -+ char *outbuf, int *len) -+{ -+ int i, num_targets; -+ struct dm_target_spec *spec; -+ char *outptr; -+ status_type_t type; -+ struct dm_table *table = dm_get_table(md); -+ -+ if (param->flags & DM_STATUS_TABLE_FLAG) -+ type = STATUSTYPE_TABLE; -+ else -+ type = STATUSTYPE_INFO; -+ -+ outptr = outbuf; -+ -+ /* Get all the target info */ -+ num_targets = dm_table_get_num_targets(table); -+ for (i = 0; i < num_targets; i++) { -+ struct dm_target *ti = dm_table_get_target(table, i); -+ -+ if (outptr - outbuf + -+ sizeof(struct dm_target_spec) > param->data_size) { -+ dm_table_put(table); -+ return -ENOMEM; -+ } -+ -+ spec = (struct dm_target_spec *) outptr; -+ -+ spec->status = 0; -+ spec->sector_start = ti->begin; -+ spec->length = ti->len; -+ strncpy(spec->target_type, ti->type->name, -+ sizeof(spec->target_type)); -+ -+ outptr += sizeof(struct dm_target_spec); -+ -+ /* Get the status/table string from the target driver */ -+ if (ti->type->status) -+ ti->type->status(ti, type, outptr, -+ outbuf + param->data_size - outptr); -+ else -+ outptr[0] = '\0'; -+ -+ outptr += strlen(outptr) + 1; -+ _align(outptr, ALIGNMENT); -+ spec->next = outptr - outbuf; -+ } -+ -+ param->target_count = num_targets; -+ *len = outptr - outbuf; -+ dm_table_put(table); -+ -+ return 0; -+} -+ -+/* -+ * Return the status of a device as a text string for each -+ * target. -+ */ -+static int get_status(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ int len = 0; -+ int ret; -+ char *outbuf = NULL; -+ -+ md = find_device(param); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ /* We haven't a clue how long the resultant data will be so -+ just allocate as much as userland has allowed us and make sure -+ we don't overun it */ -+ outbuf = kmalloc(param->data_size, GFP_KERNEL); -+ if (!outbuf) -+ goto out; -+ /* -+ * Get the status of all targets -+ */ -+ __status(md, param, outbuf, &len); -+ -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ out: -+ if (md) -+ dm_put(md); -+ -+ ret = results_to_user(user, param, outbuf, len); -+ -+ if (outbuf) -+ kfree(outbuf); -+ -+ return ret; -+} -+ -+/* -+ * Wait for a device to report an event -+ */ -+static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ struct dm_table *table; -+ DECLARE_WAITQUEUE(wq, current); -+ -+ md = find_device(param); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ /* -+ * Wait for a notification event -+ */ -+ set_current_state(TASK_INTERRUPTIBLE); -+ table = dm_get_table(md); -+ dm_table_add_wait_queue(table, &wq); -+ dm_table_put(table); -+ dm_put(md); -+ -+ yield(); -+ set_current_state(TASK_RUNNING); -+ -+ out: -+ return results_to_user(user, param, NULL, 0); -+} -+ -+/* -+ * Retrieves a list of devices used by a particular dm device. -+ */ -+static int dep(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int count, r; -+ struct mapped_device *md; -+ struct list_head *tmp; -+ size_t len = 0; -+ struct dm_target_deps *deps = NULL; -+ struct dm_table *table; -+ -+ md = find_device(param); -+ if (!md) -+ goto out; -+ table = dm_get_table(md); -+ -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ /* -+ * Count the devices. -+ */ -+ count = 0; -+ list_for_each(tmp, dm_table_get_devices(table)) -+ count++; -+ -+ /* -+ * Allocate a kernel space version of the dm_target_status -+ * struct. -+ */ -+ if (array_too_big(sizeof(*deps), sizeof(*deps->dev), count)) { -+ dm_table_put(table); -+ dm_put(md); -+ return -ENOMEM; -+ } -+ -+ len = sizeof(*deps) + (sizeof(*deps->dev) * count); -+ deps = kmalloc(len, GFP_KERNEL); -+ if (!deps) { -+ dm_table_put(table); -+ dm_put(md); -+ return -ENOMEM; -+ } -+ -+ /* -+ * Fill in the devices. -+ */ -+ deps->count = count; -+ count = 0; -+ list_for_each(tmp, dm_table_get_devices(table)) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ deps->dev[count++] = dd->bdev->bd_dev; -+ } -+ dm_table_put(table); -+ dm_put(md); -+ -+ out: -+ r = results_to_user(user, param, deps, len); -+ -+ kfree(deps); -+ return r; -+} -+ -+static int remove(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct hash_cell *hc; -+ -+ down_write(&_hash_lock); -+ hc = *param->uuid ? __get_uuid_cell(param->uuid) : -+ __get_name_cell(param->name); -+ if (!hc) { -+ DMWARN("device doesn't appear to be in the dev hash table."); -+ up_write(&_hash_lock); -+ return -EINVAL; -+ } -+ -+ __hash_remove(hc); -+ up_write(&_hash_lock); -+ return 0; -+} -+ -+static int remove_all(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ dm_hash_remove_all(); -+ return 0; -+} -+ -+static int suspend(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ md = find_device(param); -+ if (!md) -+ return -ENXIO; -+ -+ if (param->flags & DM_SUSPEND_FLAG) -+ r = dm_suspend(md); -+ else -+ r = dm_resume(md); -+ -+ dm_put(md); -+ return r; -+} -+ -+static int reload(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ kdev_t dev; -+ struct mapped_device *md; -+ struct dm_table *t; -+ -+ r = dm_table_create(&t, get_mode(param)); -+ if (r) -+ return r; -+ -+ r = populate_table(t, param); -+ if (r) { -+ dm_table_put(t); -+ return r; -+ } -+ -+ md = find_device(param); -+ if (!md) { -+ dm_table_put(t); -+ return -ENXIO; -+ } -+ -+ r = dm_swap_table(md, t); -+ if (r) { -+ dm_put(md); -+ dm_table_put(t); -+ return r; -+ } -+ -+ dev = dm_kdev(md); -+ set_device_ro(dev, (param->flags & DM_READONLY_FLAG)); -+ dm_put(md); -+ -+ r = info(param, user); -+ return r; -+} -+ -+static int rename(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ char *new_name = (char *) param + param->data_start; -+ -+ if (valid_str(new_name, (void *) param, -+ (void *) param + param->data_size)) { -+ DMWARN("Invalid new logical volume name supplied."); -+ return -EINVAL; -+ } -+ -+ r = check_name(new_name); -+ if (r) -+ return r; -+ -+ return dm_hash_rename(param->name, new_name); -+} -+ -+ -+/*----------------------------------------------------------------- -+ * Implementation of open/close/ioctl on the special char -+ * device. -+ *---------------------------------------------------------------*/ -+static ioctl_fn lookup_ioctl(unsigned int cmd) -+{ -+ static struct { -+ int cmd; -+ ioctl_fn fn; -+ } _ioctls[] = { -+ {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */ -+ {DM_REMOVE_ALL_CMD, remove_all}, -+ {DM_DEV_CREATE_CMD, create}, -+ {DM_DEV_REMOVE_CMD, remove}, -+ {DM_DEV_RELOAD_CMD, reload}, -+ {DM_DEV_RENAME_CMD, rename}, -+ {DM_DEV_SUSPEND_CMD, suspend}, -+ {DM_DEV_DEPS_CMD, dep}, -+ {DM_DEV_STATUS_CMD, info}, -+ {DM_TARGET_STATUS_CMD, get_status}, -+ {DM_TARGET_WAIT_CMD, wait_device_event}, -+ }; -+ -+ return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn; -+} -+ -+/* -+ * As well as checking the version compatibility this always -+ * copies the kernel interface version out. -+ */ -+static int check_version(int cmd, struct dm_ioctl *user) -+{ -+ uint32_t version[3]; -+ int r = 0; -+ -+ if (copy_from_user(version, user->version, sizeof(version))) -+ return -EFAULT; -+ -+ if ((DM_VERSION_MAJOR != version[0]) || -+ (DM_VERSION_MINOR < version[1])) { -+ DMWARN("ioctl interface mismatch: " -+ "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", -+ DM_VERSION_MAJOR, DM_VERSION_MINOR, -+ DM_VERSION_PATCHLEVEL, -+ version[0], version[1], version[2], cmd); -+ r = -EINVAL; -+ } -+ -+ /* -+ * Fill in the kernel version. -+ */ -+ version[0] = DM_VERSION_MAJOR; -+ version[1] = DM_VERSION_MINOR; -+ version[2] = DM_VERSION_PATCHLEVEL; -+ if (copy_to_user(user->version, version, sizeof(version))) -+ return -EFAULT; -+ -+ return r; -+} -+ -+static void free_params(struct dm_ioctl *param) -+{ -+ vfree(param); -+} -+ -+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param) -+{ -+ struct dm_ioctl tmp, *dmi; -+ -+ if (copy_from_user(&tmp, user, sizeof(tmp))) -+ return -EFAULT; -+ -+ if (tmp.data_size < sizeof(tmp)) -+ return -EINVAL; -+ -+ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); -+ if (!dmi) -+ return -ENOMEM; -+ -+ if (copy_from_user(dmi, user, tmp.data_size)) { -+ vfree(dmi); -+ return -EFAULT; -+ } -+ -+ *param = dmi; -+ return 0; -+} -+ -+static int validate_params(uint cmd, struct dm_ioctl *param) -+{ -+ /* Ignores parameters */ -+ if (cmd == DM_REMOVE_ALL_CMD) -+ return 0; -+ -+ /* Unless creating, either name of uuid but not both */ -+ if (cmd != DM_DEV_CREATE_CMD) { -+ if ((!*param->uuid && !*param->name) || -+ (*param->uuid && *param->name)) { -+ DMWARN("one of name or uuid must be supplied"); -+ return -EINVAL; -+ } -+ } -+ -+ /* Ensure strings are terminated */ -+ param->name[DM_NAME_LEN - 1] = '\0'; -+ param->uuid[DM_UUID_LEN - 1] = '\0'; -+ -+ return 0; -+} -+ -+static int ctl_ioctl(struct inode *inode, struct file *file, -+ uint command, ulong u) -+{ -+ int r = 0, cmd; -+ struct dm_ioctl *param; -+ struct dm_ioctl *user = (struct dm_ioctl *) u; -+ ioctl_fn fn = NULL; -+ -+ /* only root can play with this */ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EACCES; -+ -+ if (_IOC_TYPE(command) != DM_IOCTL) -+ return -ENOTTY; -+ -+ cmd = _IOC_NR(command); -+ -+ /* -+ * Check the interface version passed in. This also -+ * writes out the kernel's interface version. -+ */ -+ r = check_version(cmd, user); -+ if (r) -+ return r; -+ -+ /* -+ * Nothing more to do for the version command. -+ */ -+ if (cmd == DM_VERSION_CMD) -+ return 0; -+ -+ fn = lookup_ioctl(cmd); -+ if (!fn) { -+ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); -+ return -ENOTTY; -+ } -+ -+ /* -+ * Copy the parameters into kernel space. -+ */ -+ r = copy_params(user, ¶m); -+ if (r) -+ return r; -+ -+ r = validate_params(cmd, param); -+ if (r) { -+ free_params(param); -+ return r; -+ } -+ -+ r = fn(param, user); -+ free_params(param); -+ return r; -+} -+ -+static struct file_operations _ctl_fops = { -+ .ioctl = ctl_ioctl, -+ .owner = THIS_MODULE, -+}; -+ -+static devfs_handle_t _ctl_handle; -+ -+static struct miscdevice _dm_misc = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = DM_NAME, -+ .fops = &_ctl_fops -+}; -+ -+/* -+ * Create misc character device and link to DM_DIR/control. -+ */ -+int __init dm_interface_init(void) -+{ -+ int r; -+ char rname[64]; -+ -+ r = dm_hash_init(); -+ if (r) -+ return r; -+ -+ r = misc_register(&_dm_misc); -+ if (r) { -+ DMERR("misc_register failed for control device"); -+ dm_hash_exit(); -+ return r; -+ } -+ -+ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3, -+ sizeof rname - 3); -+ if (r == -ENOSYS) -+ return 0; /* devfs not present */ -+ -+ if (r < 0) { -+ DMERR("devfs_generate_path failed for control device"); -+ goto failed; -+ } -+ -+ strncpy(rname + r, "../", 3); -+ r = devfs_mk_symlink(NULL, DM_DIR "/control", -+ DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL); -+ if (r) { -+ DMERR("devfs_mk_symlink failed for control device"); -+ goto failed; -+ } -+ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle); -+ -+ DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR, -+ DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA, -+ DM_DRIVER_EMAIL); -+ return 0; -+ -+ failed: -+ dm_hash_exit(); -+ misc_deregister(&_dm_misc); -+ return r; -+} -+ -+void dm_interface_exit(void) -+{ -+ dm_hash_exit(); -+ -+ if (misc_deregister(&_dm_misc) < 0) -+ DMERR("misc_deregister failed for control device"); -+} -diff -ruN linux-2.4.19/drivers/md/dm-linear.c linux-2.4.19-dm/drivers/md/dm-linear.c ---- linux-2.4.19/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-linear.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,120 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * Linear: maps a linear range of a device. -+ */ -+struct linear_c { -+ struct dm_dev *dev; -+ sector_t start; -+}; -+ -+/* -+ * Construct a linear mapping: -+ */ -+static int linear_ctr(struct dm_target *ti, int argc, char **argv) -+{ -+ struct linear_c *lc; -+ -+ if (argc != 2) { -+ ti->error = "dm-linear: Not enough arguments"; -+ return -EINVAL; -+ } -+ -+ lc = kmalloc(sizeof(*lc), GFP_KERNEL); -+ if (lc == NULL) { -+ ti->error = "dm-linear: Cannot allocate linear context"; -+ return -ENOMEM; -+ } -+ -+ if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) { -+ ti->error = "dm-linear: Invalid device sector"; -+ goto bad; -+ } -+ -+ if (dm_get_device(ti, argv[0], lc->start, ti->len, -+ dm_table_get_mode(ti->table), &lc->dev)) { -+ ti->error = "dm-linear: Device lookup failed"; -+ goto bad; -+ } -+ -+ ti->private = lc; -+ return 0; -+ -+ bad: -+ kfree(lc); -+ return -EINVAL; -+} -+ -+static void linear_dtr(struct dm_target *ti) -+{ -+ struct linear_c *lc = (struct linear_c *) ti->private; -+ -+ dm_put_device(ti, lc->dev); -+ kfree(lc); -+} -+ -+static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ struct linear_c *lc = (struct linear_c *) ti->private; -+ -+ bh->b_rdev = lc->dev->dev; -+ bh->b_rsector = lc->start + (bh->b_rsector - ti->begin); -+ -+ return 1; -+} -+ -+static int linear_status(struct dm_target *ti, status_type_t type, -+ char *result, int maxlen) -+{ -+ struct linear_c *lc = (struct linear_c *) ti->private; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ snprintf(result, maxlen, "%s " SECTOR_FORMAT, -+ kdevname(to_kdev_t(lc->dev->bdev->bd_dev)), lc->start); -+ break; -+ } -+ return 0; -+} -+ -+static struct target_type linear_target = { -+ .name = "linear", -+ .module = THIS_MODULE, -+ .ctr = linear_ctr, -+ .dtr = linear_dtr, -+ .map = linear_map, -+ .status = linear_status, -+}; -+ -+int __init dm_linear_init(void) -+{ -+ int r = dm_register_target(&linear_target); -+ -+ if (r < 0) -+ DMERR("linear: register failed %d", r); -+ -+ return r; -+} -+ -+void dm_linear_exit(void) -+{ -+ int r = dm_unregister_target(&linear_target); -+ -+ if (r < 0) -+ DMERR("linear: unregister failed %d", r); -+} -diff -ruN linux-2.4.19/drivers/md/dm-snapshot.c linux-2.4.19-dm/drivers/md/dm-snapshot.c ---- linux-2.4.19/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-snapshot.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,1169 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "dm-snapshot.h" -+#include "kcopyd.h" -+ -+/* -+ * FIXME: Remove this before release. -+ */ -+#if 0 -+#define DMDEBUG(x...) DMWARN( ## x) -+#else -+#define DMDEBUG(x...) -+#endif -+ -+/* -+ * The percentage increment we will wake up users at -+ */ -+#define WAKE_UP_PERCENT 5 -+ -+/* -+ * Hard sector size used all over the kernel -+ */ -+#define SECTOR_SIZE 512 -+ -+/* -+ * kcopyd priority of snapshot operations -+ */ -+#define SNAPSHOT_COPY_PRIORITY 2 -+ -+struct pending_exception { -+ struct exception e; -+ -+ /* -+ * Origin buffers waiting for this to complete are held -+ * in a list (using b_reqnext). -+ */ -+ struct buffer_head *origin_bhs; -+ struct buffer_head *snapshot_bhs; -+ -+ /* -+ * Other pending_exceptions that are processing this -+ * chunk. When this list is empty, we know we can -+ * complete the origins. -+ */ -+ struct list_head siblings; -+ -+ /* Pointer back to snapshot context */ -+ struct dm_snapshot *snap; -+ -+ /* -+ * 1 indicates the exception has already been sent to -+ * kcopyd. -+ */ -+ int started; -+}; -+ -+/* -+ * Hash table mapping origin volumes to lists of snapshots and -+ * a lock to protect it -+ */ -+static kmem_cache_t *exception_cache; -+static kmem_cache_t *pending_cache; -+static mempool_t *pending_pool; -+ -+/* -+ * One of these per registered origin, held in the snapshot_origins hash -+ */ -+struct origin { -+ /* The origin device */ -+ kdev_t dev; -+ -+ struct list_head hash_list; -+ -+ /* List of snapshots for this origin */ -+ struct list_head snapshots; -+}; -+ -+/* -+ * Size of the hash table for origin volumes. If we make this -+ * the size of the minors list then it should be nearly perfect -+ */ -+#define ORIGIN_HASH_SIZE 256 -+#define ORIGIN_MASK 0xFF -+static struct list_head *_origins; -+static struct rw_semaphore _origins_lock; -+ -+static int init_origin_hash(void) -+{ -+ int i; -+ -+ _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!_origins) { -+ DMERR("Device mapper: Snapshot: unable to allocate memory"); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < ORIGIN_HASH_SIZE; i++) -+ INIT_LIST_HEAD(_origins + i); -+ init_rwsem(&_origins_lock); -+ -+ return 0; -+} -+ -+static void exit_origin_hash(void) -+{ -+ kfree(_origins); -+} -+ -+static inline unsigned int origin_hash(kdev_t dev) -+{ -+ return MINOR(dev) & ORIGIN_MASK; -+} -+ -+static struct origin *__lookup_origin(kdev_t origin) -+{ -+ struct list_head *slist; -+ struct list_head *ol; -+ struct origin *o; -+ -+ ol = &_origins[origin_hash(origin)]; -+ list_for_each(slist, ol) { -+ o = list_entry(slist, struct origin, hash_list); -+ -+ if (o->dev == origin) -+ return o; -+ } -+ -+ return NULL; -+} -+ -+static void __insert_origin(struct origin *o) -+{ -+ struct list_head *sl = &_origins[origin_hash(o->dev)]; -+ list_add_tail(&o->hash_list, sl); -+} -+ -+/* -+ * Make a note of the snapshot and its origin so we can look it -+ * up when the origin has a write on it. -+ */ -+static int register_snapshot(struct dm_snapshot *snap) -+{ -+ struct origin *o; -+ kdev_t dev = snap->origin->dev; -+ -+ down_write(&_origins_lock); -+ o = __lookup_origin(dev); -+ -+ if (!o) { -+ /* New origin */ -+ o = kmalloc(sizeof(*o), GFP_KERNEL); -+ if (!o) { -+ up_write(&_origins_lock); -+ return -ENOMEM; -+ } -+ -+ /* Initialise the struct */ -+ INIT_LIST_HEAD(&o->snapshots); -+ o->dev = dev; -+ -+ __insert_origin(o); -+ } -+ -+ list_add_tail(&snap->list, &o->snapshots); -+ -+ up_write(&_origins_lock); -+ return 0; -+} -+ -+static void unregister_snapshot(struct dm_snapshot *s) -+{ -+ struct origin *o; -+ -+ down_write(&_origins_lock); -+ o = __lookup_origin(s->origin->dev); -+ -+ list_del(&s->list); -+ if (list_empty(&o->snapshots)) { -+ list_del(&o->hash_list); -+ kfree(o); -+ } -+ -+ up_write(&_origins_lock); -+} -+ -+/* -+ * Implementation of the exception hash tables. -+ */ -+static int init_exception_table(struct exception_table *et, uint32_t size) -+{ -+ int i; -+ -+ et->hash_mask = size - 1; -+ et->table = vcalloc(size, sizeof(struct list_head)); -+ if (!et->table) -+ return -ENOMEM; -+ -+ for (i = 0; i < size; i++) -+ INIT_LIST_HEAD(et->table + i); -+ -+ return 0; -+} -+ -+static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) -+{ -+ struct list_head *slot, *entry, *temp; -+ struct exception *ex; -+ int i, size; -+ -+ size = et->hash_mask + 1; -+ for (i = 0; i < size; i++) { -+ slot = et->table + i; -+ -+ list_for_each_safe(entry, temp, slot) { -+ ex = list_entry(entry, struct exception, hash_list); -+ kmem_cache_free(mem, ex); -+ } -+ } -+ -+ vfree(et->table); -+} -+ -+/* -+ * FIXME: check how this hash fn is performing. -+ */ -+static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk) -+{ -+ return chunk & et->hash_mask; -+} -+ -+static void insert_exception(struct exception_table *eh, struct exception *e) -+{ -+ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; -+ list_add(&e->hash_list, l); -+} -+ -+static inline void remove_exception(struct exception *e) -+{ -+ list_del(&e->hash_list); -+} -+ -+/* -+ * Return the exception data for a sector, or NULL if not -+ * remapped. -+ */ -+static struct exception *lookup_exception(struct exception_table *et, -+ chunk_t chunk) -+{ -+ struct list_head *slot, *el; -+ struct exception *e; -+ -+ slot = &et->table[exception_hash(et, chunk)]; -+ list_for_each(el, slot) { -+ e = list_entry(el, struct exception, hash_list); -+ if (e->old_chunk == chunk) -+ return e; -+ } -+ -+ return NULL; -+} -+ -+static inline struct exception *alloc_exception(void) -+{ -+ struct exception *e; -+ -+ e = kmem_cache_alloc(exception_cache, GFP_NOIO); -+ if (!e) -+ e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); -+ -+ return e; -+} -+ -+static inline void free_exception(struct exception *e) -+{ -+ kmem_cache_free(exception_cache, e); -+} -+ -+static inline struct pending_exception *alloc_pending_exception(void) -+{ -+ return mempool_alloc(pending_pool, GFP_NOIO); -+} -+ -+static inline void free_pending_exception(struct pending_exception *pe) -+{ -+ mempool_free(pe, pending_pool); -+} -+ -+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) -+{ -+ struct exception *e; -+ -+ e = alloc_exception(); -+ if (!e) -+ return -ENOMEM; -+ -+ e->old_chunk = old; -+ e->new_chunk = new; -+ insert_exception(&s->complete, e); -+ return 0; -+} -+ -+/* -+ * Hard coded magic. -+ */ -+static int calc_max_buckets(void) -+{ -+ unsigned long mem; -+ -+ mem = num_physpages << PAGE_SHIFT; -+ mem /= 50; -+ mem /= sizeof(struct list_head); -+ -+ return mem; -+} -+ -+/* -+ * Rounds a number down to a power of 2. -+ */ -+static inline uint32_t round_down(uint32_t n) -+{ -+ while (n & (n - 1)) -+ n &= (n - 1); -+ return n; -+} -+ -+/* -+ * Allocate room for a suitable hash table. -+ */ -+static int init_hash_tables(struct dm_snapshot *s) -+{ -+ sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; -+ -+ /* -+ * Calculate based on the size of the original volume or -+ * the COW volume... -+ */ -+ cow_dev_size = get_dev_size(s->cow->dev); -+ origin_dev_size = get_dev_size(s->origin->dev); -+ max_buckets = calc_max_buckets(); -+ -+ hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size; -+ hash_size = min(hash_size, max_buckets); -+ -+ /* Round it down to a power of 2 */ -+ hash_size = round_down(hash_size); -+ if (init_exception_table(&s->complete, hash_size)) -+ return -ENOMEM; -+ -+ /* -+ * Allocate hash table for in-flight exceptions -+ * Make this smaller than the real hash table -+ */ -+ hash_size >>= 3; -+ if (!hash_size) -+ hash_size = 64; -+ -+ if (init_exception_table(&s->pending, hash_size)) { -+ exit_exception_table(&s->complete, exception_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Round a number up to the nearest 'size' boundary. size must -+ * be a power of 2. -+ */ -+static inline ulong round_up(ulong n, ulong size) -+{ -+ size--; -+ return (n + size) & ~size; -+} -+ -+/* -+ * Construct a snapshot mapping:

-+ */ -+static int snapshot_ctr(struct dm_target *ti, int argc, char **argv) -+{ -+ struct dm_snapshot *s; -+ unsigned long chunk_size; -+ int r = -EINVAL; -+ char *persistent; -+ char *origin_path; -+ char *cow_path; -+ char *value; -+ int blocksize; -+ -+ if (argc < 4) { -+ ti->error = "dm-snapshot: requires exactly 4 arguments"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ origin_path = argv[0]; -+ cow_path = argv[1]; -+ persistent = argv[2]; -+ -+ if ((*persistent & 0x5f) != 'P' && (*persistent & 0x5f) != 'N') { -+ ti->error = "Persistent flag is not P or N"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ chunk_size = simple_strtoul(argv[3], &value, 10); -+ if (chunk_size == 0 || value == NULL) { -+ ti->error = "Invalid chunk size"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) { -+ ti->error = "Cannot allocate snapshot context private " -+ "structure"; -+ r = -ENOMEM; -+ goto bad; -+ } -+ -+ r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); -+ if (r) { -+ ti->error = "Cannot get origin device"; -+ goto bad_free; -+ } -+ -+ /* FIXME: get cow length */ -+ r = dm_get_device(ti, cow_path, 0, 0, -+ FMODE_READ | FMODE_WRITE, &s->cow); -+ if (r) { -+ dm_put_device(ti, s->origin); -+ ti->error = "Cannot get COW device"; -+ goto bad_free; -+ } -+ -+ /* -+ * Chunk size must be multiple of page size. Silently -+ * round up if it's not. -+ */ -+ chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE); -+ -+ /* Validate the chunk size against the device block size */ -+ blocksize = get_hardsect_size(s->cow->dev); -+ if (chunk_size % (blocksize / SECTOR_SIZE)) { -+ ti->error = "Chunk size is not a multiple of device blocksize"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ /* Check the sizes are small enough to fit in one kiovec */ -+ if (chunk_size > KIO_MAX_SECTORS) { -+ ti->error = "Chunk size is too big"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ /* Check chunk_size is a power of 2 */ -+ if (chunk_size & (chunk_size - 1)) { -+ ti->error = "Chunk size is not a power of 2"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ s->chunk_size = chunk_size; -+ s->chunk_mask = chunk_size - 1; -+ s->type = *persistent; -+ for (s->chunk_shift = 0; chunk_size; -+ s->chunk_shift++, chunk_size >>= 1) -+ ; -+ s->chunk_shift--; -+ -+ s->valid = 1; -+ s->last_percent = 0; -+ init_rwsem(&s->lock); -+ s->table = ti->table; -+ -+ /* Allocate hash table for COW data */ -+ if (init_hash_tables(s)) { -+ ti->error = "Unable to allocate hash table space"; -+ r = -ENOMEM; -+ goto bad_putdev; -+ } -+ -+ /* -+ * Check the persistent flag - done here because we need the iobuf -+ * to check the LV header -+ */ -+ s->store.snap = s; -+ -+ if ((*persistent & 0x5f) == 'P') -+ r = dm_create_persistent(&s->store, s->chunk_size); -+ else -+ r = dm_create_transient(&s->store, s, blocksize); -+ -+ if (r) { -+ ti->error = "Couldn't create exception store"; -+ r = -EINVAL; -+ goto bad_free1; -+ } -+ -+ /* Flush IO to the origin device */ -+#if LVM_VFS_ENHANCEMENT -+ fsync_dev_lockfs(s->origin->dev); -+#else -+ fsync_dev(s->origin->dev); -+#endif -+ -+ /* Add snapshot to the list of snapshots for this origin */ -+ if (register_snapshot(s)) { -+ r = -EINVAL; -+ ti->error = "Cannot register snapshot origin"; -+ goto bad_free2; -+ } -+#if LVM_VFS_ENHANCEMENT -+ unlockfs(s->origin->dev); -+#endif -+ kcopyd_inc_client_count(); -+ -+ ti->private = s; -+ return 0; -+ -+ bad_free2: -+ s->store.destroy(&s->store); -+ -+ bad_free1: -+ exit_exception_table(&s->pending, pending_cache); -+ exit_exception_table(&s->complete, exception_cache); -+ -+ bad_putdev: -+ dm_put_device(ti, s->cow); -+ dm_put_device(ti, s->origin); -+ -+ bad_free: -+ kfree(s); -+ -+ bad: -+ return r; -+} -+ -+static void snapshot_dtr(struct dm_target *ti) -+{ -+ struct dm_snapshot *s = (struct dm_snapshot *) ti->private; -+ -+ dm_table_event(ti->table); -+ -+ unregister_snapshot(s); -+ -+ exit_exception_table(&s->pending, pending_cache); -+ exit_exception_table(&s->complete, exception_cache); -+ -+ /* Deallocate memory used */ -+ s->store.destroy(&s->store); -+ -+ dm_put_device(ti, s->origin); -+ dm_put_device(ti, s->cow); -+ kfree(s); -+ -+ kcopyd_dec_client_count(); -+} -+ -+/* -+ * We hold lists of buffer_heads, using the b_reqnext field. -+ */ -+static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh) -+{ -+ bh->b_reqnext = *queue; -+ *queue = bh; -+} -+ -+/* -+ * Flush a list of buffers. -+ */ -+static void flush_buffers(struct buffer_head *bh) -+{ -+ struct buffer_head *n; -+ -+ DMDEBUG("begin flush"); -+ while (bh) { -+ n = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ DMDEBUG("flushing %p", bh); -+ generic_make_request(WRITE, bh); -+ bh = n; -+ } -+ -+ run_task_queue(&tq_disk); -+} -+ -+/* -+ * Error a list of buffers. -+ */ -+static void error_buffers(struct buffer_head *bh) -+{ -+ struct buffer_head *n; -+ -+ while (bh) { -+ n = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ buffer_IO_error(bh); -+ bh = n; -+ } -+} -+ -+static void pending_complete(struct pending_exception *pe, int success) -+{ -+ struct exception *e; -+ struct dm_snapshot *s = pe->snap; -+ -+ if (success) { -+ e = alloc_exception(); -+ if (!e) { -+ printk("Unable to allocate exception."); -+ down_write(&s->lock); -+ s->store.drop_snapshot(&s->store); -+ s->valid = 0; -+ up_write(&s->lock); -+ return; -+ } -+ -+ /* -+ * Add a proper exception, and remove the -+ * inflight exception from the list. -+ */ -+ down_write(&s->lock); -+ -+ memcpy(e, &pe->e, sizeof(*e)); -+ insert_exception(&s->complete, e); -+ remove_exception(&pe->e); -+ -+ /* Submit any pending write BHs */ -+ up_write(&s->lock); -+ -+ flush_buffers(pe->snapshot_bhs); -+ DMDEBUG("Exception completed successfully."); -+ -+ /* Notify any interested parties */ -+ if (s->store.percent_full) { -+ int pc = s->store.percent_full(&s->store); -+ -+ if (pc >= s->last_percent + WAKE_UP_PERCENT) { -+ dm_table_event(s->table); -+ s->last_percent = pc - pc % WAKE_UP_PERCENT; -+ } -+ } -+ -+ } else { -+ /* Read/write error - snapshot is unusable */ -+ DMERR("Error reading/writing snapshot"); -+ -+ down_write(&s->lock); -+ s->store.drop_snapshot(&s->store); -+ s->valid = 0; -+ remove_exception(&pe->e); -+ up_write(&s->lock); -+ -+ error_buffers(pe->snapshot_bhs); -+ -+ dm_table_event(s->table); -+ DMDEBUG("Exception failed."); -+ } -+ -+ if (list_empty(&pe->siblings)) -+ flush_buffers(pe->origin_bhs); -+ else -+ list_del(&pe->siblings); -+ -+ free_pending_exception(pe); -+} -+ -+static void commit_callback(void *context, int success) -+{ -+ struct pending_exception *pe = (struct pending_exception *) context; -+ pending_complete(pe, success); -+} -+ -+/* -+ * Called when the copy I/O has finished. kcopyd actually runs -+ * this code so don't block. -+ */ -+static void copy_callback(int err, void *context) -+{ -+ struct pending_exception *pe = (struct pending_exception *) context; -+ struct dm_snapshot *s = pe->snap; -+ -+ if (err) -+ pending_complete(pe, 0); -+ -+ else -+ /* Update the metadata if we are persistent */ -+ s->store.commit_exception(&s->store, &pe->e, commit_callback, -+ pe); -+} -+ -+/* -+ * Dispatches the copy operation to kcopyd. -+ */ -+static inline void start_copy(struct pending_exception *pe) -+{ -+ struct dm_snapshot *s = pe->snap; -+ struct kcopyd_region src, dest; -+ -+ src.dev = s->origin->dev; -+ src.sector = chunk_to_sector(s, pe->e.old_chunk); -+ src.count = s->chunk_size; -+ -+ dest.dev = s->cow->dev; -+ dest.sector = chunk_to_sector(s, pe->e.new_chunk); -+ dest.count = s->chunk_size; -+ -+ if (!pe->started) { -+ /* Hand over to kcopyd */ -+ kcopyd_copy(&src, &dest, copy_callback, pe); -+ pe->started = 1; -+ } -+} -+ -+/* -+ * Looks to see if this snapshot already has a pending exception -+ * for this chunk, otherwise it allocates a new one and inserts -+ * it into the pending table. -+ */ -+static struct pending_exception *find_pending_exception(struct dm_snapshot *s, -+ struct buffer_head *bh) -+{ -+ struct exception *e; -+ struct pending_exception *pe; -+ chunk_t chunk = sector_to_chunk(s, bh->b_rsector); -+ -+ /* -+ * Is there a pending exception for this already ? -+ */ -+ e = lookup_exception(&s->pending, chunk); -+ if (e) { -+ /* cast the exception to a pending exception */ -+ pe = list_entry(e, struct pending_exception, e); -+ -+ } else { -+ /* Create a new pending exception */ -+ pe = alloc_pending_exception(); -+ if (!pe) { -+ DMWARN("Couldn't allocate pending exception."); -+ return NULL; -+ } -+ -+ pe->e.old_chunk = chunk; -+ pe->origin_bhs = pe->snapshot_bhs = NULL; -+ INIT_LIST_HEAD(&pe->siblings); -+ pe->snap = s; -+ pe->started = 0; -+ -+ if (s->store.prepare_exception(&s->store, &pe->e)) { -+ free_pending_exception(pe); -+ s->valid = 0; -+ return NULL; -+ } -+ -+ insert_exception(&s->pending, &pe->e); -+ } -+ -+ return pe; -+} -+ -+static inline void remap_exception(struct dm_snapshot *s, struct exception *e, -+ struct buffer_head *bh) -+{ -+ bh->b_rdev = s->cow->dev; -+ bh->b_rsector = chunk_to_sector(s, e->new_chunk) + -+ (bh->b_rsector & s->chunk_mask); -+} -+ -+static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ struct exception *e; -+ struct dm_snapshot *s = (struct dm_snapshot *) ti->private; -+ int r = 1; -+ chunk_t chunk; -+ struct pending_exception *pe; -+ -+ chunk = sector_to_chunk(s, bh->b_rsector); -+ -+ /* Full snapshots are not usable */ -+ if (!s->valid) -+ return -1; -+ -+ /* -+ * Write to snapshot - higher level takes care of RW/RO -+ * flags so we should only get this if we are -+ * writeable. -+ */ -+ if (rw == WRITE) { -+ -+ down_write(&s->lock); -+ -+ /* If the block is already remapped - use that, else remap it */ -+ e = lookup_exception(&s->complete, chunk); -+ if (e) -+ remap_exception(s, e, bh); -+ -+ else { -+ pe = find_pending_exception(s, bh); -+ -+ if (!pe) { -+ s->store.drop_snapshot(&s->store); -+ s->valid = 0; -+ } -+ -+ queue_buffer(&pe->snapshot_bhs, bh); -+ start_copy(pe); -+ r = 0; -+ } -+ -+ up_write(&s->lock); -+ -+ } else { -+ /* -+ * FIXME: this read path scares me because we -+ * always use the origin when we have a pending -+ * exception. However I can't think of a -+ * situation where this is wrong - ejt. -+ */ -+ -+ /* Do reads */ -+ down_read(&s->lock); -+ -+ /* See if it it has been remapped */ -+ e = lookup_exception(&s->complete, chunk); -+ if (e) -+ remap_exception(s, e, bh); -+ else -+ bh->b_rdev = s->origin->dev; -+ -+ up_read(&s->lock); -+ } -+ -+ return r; -+} -+ -+static void list_merge(struct list_head *l1, struct list_head *l2) -+{ -+ struct list_head *l1_n, *l2_p; -+ -+ l1_n = l1->next; -+ l2_p = l2->prev; -+ -+ l1->next = l2; -+ l2->prev = l1; -+ -+ l2_p->next = l1_n; -+ l1_n->prev = l2_p; -+} -+ -+static int __origin_write(struct list_head *snapshots, struct buffer_head *bh) -+{ -+ int r = 1; -+ struct list_head *sl; -+ struct dm_snapshot *snap; -+ struct exception *e; -+ struct pending_exception *pe, *last = NULL; -+ chunk_t chunk; -+ -+ /* Do all the snapshots on this origin */ -+ list_for_each(sl, snapshots) { -+ snap = list_entry(sl, struct dm_snapshot, list); -+ -+ /* Only deal with valid snapshots */ -+ if (!snap->valid) -+ continue; -+ -+ down_write(&snap->lock); -+ -+ /* -+ * Remember, different snapshots can have -+ * different chunk sizes. -+ */ -+ chunk = sector_to_chunk(snap, bh->b_rsector); -+ -+ /* -+ * Check exception table to see if block -+ * is already remapped in this snapshot -+ * and trigger an exception if not. -+ */ -+ e = lookup_exception(&snap->complete, chunk); -+ if (!e) { -+ pe = find_pending_exception(snap, bh); -+ if (!pe) { -+ snap->store.drop_snapshot(&snap->store); -+ snap->valid = 0; -+ -+ } else { -+ if (last) -+ list_merge(&pe->siblings, -+ &last->siblings); -+ -+ last = pe; -+ r = 0; -+ } -+ } -+ -+ up_write(&snap->lock); -+ } -+ -+ /* -+ * Now that we have a complete pe list we can start the copying. -+ */ -+ if (last) { -+ pe = last; -+ do { -+ down_write(&pe->snap->lock); -+ queue_buffer(&pe->origin_bhs, bh); -+ start_copy(pe); -+ up_write(&pe->snap->lock); -+ pe = list_entry(pe->siblings.next, -+ struct pending_exception, siblings); -+ -+ } while (pe != last); -+ } -+ -+ return r; -+} -+ -+static int snapshot_status(struct dm_target *ti, status_type_t type, -+ char *result, int maxlen) -+{ -+ struct dm_snapshot *snap = (struct dm_snapshot *) ti->private; -+ char cow[16]; -+ char org[16]; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ if (!snap->valid) -+ snprintf(result, maxlen, "Invalid"); -+ else { -+ if (snap->store.percent_full) -+ snprintf(result, maxlen, "%d%%", -+ snap->store.percent_full(&snap-> -+ store)); -+ else -+ snprintf(result, maxlen, "Unknown"); -+ } -+ break; -+ -+ case STATUSTYPE_TABLE: -+ /* -+ * kdevname returns a static pointer so we need -+ * to make private copies if the output is to -+ * make sense. -+ */ -+ strncpy(cow, kdevname(snap->cow->dev), sizeof(cow)); -+ strncpy(org, kdevname(snap->origin->dev), sizeof(org)); -+ snprintf(result, maxlen, "%s %s %c %ld", org, cow, -+ snap->type, snap->chunk_size); -+ break; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Called on a write from the origin driver. -+ */ -+int do_origin(struct dm_dev *origin, struct buffer_head *bh) -+{ -+ struct origin *o; -+ int r; -+ -+ down_read(&_origins_lock); -+ o = __lookup_origin(origin->dev); -+ if (!o) -+ BUG(); -+ -+ r = __origin_write(&o->snapshots, bh); -+ up_read(&_origins_lock); -+ -+ return r; -+} -+ -+/* -+ * Origin: maps a linear range of a device, with hooks for snapshotting. -+ */ -+ -+/* -+ * Construct an origin mapping: -+ * The context for an origin is merely a 'struct dm_dev *' -+ * pointing to the real device. -+ */ -+static int origin_ctr(struct dm_target *ti, int argc, char **argv) -+{ -+ int r; -+ struct dm_dev *dev; -+ -+ if (argc != 1) { -+ ti->error = "dm-origin: incorrect number of arguments"; -+ return -EINVAL; -+ } -+ -+ r = dm_get_device(ti, argv[0], 0, ti->len, -+ dm_table_get_mode(ti->table), &dev); -+ if (r) { -+ ti->error = "Cannot get target device"; -+ return r; -+ } -+ -+ ti->private = dev; -+ -+ return 0; -+} -+ -+static void origin_dtr(struct dm_target *ti) -+{ -+ struct dm_dev *dev = (struct dm_dev *) ti->private; -+ dm_put_device(ti, dev); -+} -+ -+static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ struct dm_dev *dev = (struct dm_dev *) ti->private; -+ bh->b_rdev = dev->dev; -+ -+ /* Only tell snapshots if this is a write */ -+ return (rw == WRITE) ? do_origin(dev, bh) : 1; -+} -+ -+static int origin_status(struct dm_target *ti, status_type_t type, char *result, -+ int maxlen) -+{ -+ struct dm_dev *dev = (struct dm_dev *) ti->private; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ snprintf(result, maxlen, "%s", kdevname(dev->dev)); -+ break; -+ } -+ -+ return 0; -+} -+ -+static struct target_type origin_target = { -+ name: "snapshot-origin", -+ module: THIS_MODULE, -+ ctr: origin_ctr, -+ dtr: origin_dtr, -+ map: origin_map, -+ status: origin_status, -+}; -+ -+static struct target_type snapshot_target = { -+ name: "snapshot", -+ module: THIS_MODULE, -+ ctr: snapshot_ctr, -+ dtr: snapshot_dtr, -+ map: snapshot_map, -+ status: snapshot_status, -+}; -+ -+int __init dm_snapshot_init(void) -+{ -+ int r; -+ -+ r = dm_register_target(&snapshot_target); -+ if (r) { -+ DMERR("snapshot target register failed %d", r); -+ return r; -+ } -+ -+ r = dm_register_target(&origin_target); -+ if (r < 0) { -+ DMERR("Device mapper: Origin: register failed %d\n", r); -+ goto bad1; -+ } -+ -+ r = init_origin_hash(); -+ if (r) { -+ DMERR("init_origin_hash failed."); -+ goto bad2; -+ } -+ -+ exception_cache = kmem_cache_create("dm-snapshot-ex", -+ sizeof(struct exception), -+ __alignof__(struct exception), -+ 0, NULL, NULL); -+ if (!exception_cache) { -+ DMERR("Couldn't create exception cache."); -+ r = -ENOMEM; -+ goto bad3; -+ } -+ -+ pending_cache = -+ kmem_cache_create("dm-snapshot-in", -+ sizeof(struct pending_exception), -+ __alignof__(struct pending_exception), -+ 0, NULL, NULL); -+ if (!pending_cache) { -+ DMERR("Couldn't create pending cache."); -+ r = -ENOMEM; -+ goto bad4; -+ } -+ -+ pending_pool = mempool_create(128, mempool_alloc_slab, -+ mempool_free_slab, pending_cache); -+ if (!pending_pool) { -+ DMERR("Couldn't create pending pool."); -+ r = -ENOMEM; -+ goto bad5; -+ } -+ -+ return 0; -+ -+ bad5: -+ kmem_cache_destroy(pending_cache); -+ bad4: -+ kmem_cache_destroy(exception_cache); -+ bad3: -+ exit_origin_hash(); -+ bad2: -+ dm_unregister_target(&origin_target); -+ bad1: -+ dm_unregister_target(&snapshot_target); -+ return r; -+} -+ -+void dm_snapshot_exit(void) -+{ -+ int r; -+ -+ r = dm_unregister_target(&snapshot_target); -+ if (r) -+ DMERR("snapshot unregister failed %d", r); -+ -+ r = dm_unregister_target(&origin_target); -+ if (r) -+ DMERR("origin unregister failed %d", r); -+ -+ exit_origin_hash(); -+ mempool_destroy(pending_pool); -+ kmem_cache_destroy(pending_cache); -+ kmem_cache_destroy(exception_cache); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -ruN linux-2.4.19/drivers/md/dm-snapshot.h linux-2.4.19-dm/drivers/md/dm-snapshot.h ---- linux-2.4.19/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-snapshot.h Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,147 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#ifndef DM_SNAPSHOT_H -+#define DM_SNAPSHOT_H -+ -+#include "dm.h" -+#include -+ -+struct exception_table { -+ uint32_t hash_mask; -+ struct list_head *table; -+}; -+ -+/* -+ * The snapshot code deals with largish chunks of the disk at a -+ * time. Typically 64k - 256k. -+ */ -+/* FIXME: can we get away with limiting these to a uint32_t ? */ -+typedef sector_t chunk_t; -+ -+/* -+ * An exception is used where an old chunk of data has been -+ * replaced by a new one. -+ */ -+struct exception { -+ struct list_head hash_list; -+ -+ chunk_t old_chunk; -+ chunk_t new_chunk; -+}; -+ -+/* -+ * Abstraction to handle the meta/layout of exception stores (the -+ * COW device). -+ */ -+struct exception_store { -+ -+ /* -+ * Destroys this object when you've finished with it. -+ */ -+ void (*destroy) (struct exception_store * store); -+ -+ /* -+ * Find somewhere to store the next exception. -+ */ -+ int (*prepare_exception) (struct exception_store * store, -+ struct exception * e); -+ -+ /* -+ * Update the metadata with this exception. -+ */ -+ void (*commit_exception) (struct exception_store * store, -+ struct exception * e, -+ void (*callback) (void *, int success), -+ void *callback_context); -+ -+ /* -+ * The snapshot is invalid, note this in the metadata. -+ */ -+ void (*drop_snapshot) (struct exception_store * store); -+ -+ /* -+ * Return the %age full of the snapshot -+ */ -+ int (*percent_full) (struct exception_store * store); -+ -+ struct dm_snapshot *snap; -+ void *context; -+}; -+ -+struct dm_snapshot { -+ struct rw_semaphore lock; -+ struct dm_table *table; -+ -+ struct dm_dev *origin; -+ struct dm_dev *cow; -+ -+ /* List of snapshots per Origin */ -+ struct list_head list; -+ -+ /* Size of data blocks saved - must be a power of 2 */ -+ chunk_t chunk_size; -+ chunk_t chunk_mask; -+ chunk_t chunk_shift; -+ -+ /* You can't use a snapshot if this is 0 (e.g. if full) */ -+ int valid; -+ -+ /* Used for display of table */ -+ char type; -+ -+ /* The last percentage we notified */ -+ int last_percent; -+ -+ struct exception_table pending; -+ struct exception_table complete; -+ -+ /* The on disk metadata handler */ -+ struct exception_store store; -+}; -+ -+/* -+ * Used by the exception stores to load exceptions hen -+ * initialising. -+ */ -+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); -+ -+/* -+ * Constructor and destructor for the default persistent -+ * store. -+ */ -+int dm_create_persistent(struct exception_store *store, uint32_t chunk_size); -+ -+int dm_create_transient(struct exception_store *store, -+ struct dm_snapshot *s, int blocksize); -+ -+/* -+ * Return the number of sectors in the device. -+ */ -+static inline sector_t get_dev_size(kdev_t dev) -+{ -+ int *sizes; -+ -+ sizes = blk_size[MAJOR(dev)]; -+ if (sizes) -+ return sizes[MINOR(dev)] << 1; -+ -+ return 0; -+} -+ -+static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector) -+{ -+ return (sector & ~s->chunk_mask) >> s->chunk_shift; -+} -+ -+static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk) -+{ -+ return chunk << s->chunk_shift; -+} -+ -+#endif -diff -ruN linux-2.4.19/drivers/md/dm-stripe.c linux-2.4.19-dm/drivers/md/dm-stripe.c ---- linux-2.4.19/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-stripe.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,256 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+ -+struct stripe { -+ struct dm_dev *dev; -+ sector_t physical_start; -+}; -+ -+struct stripe_c { -+ uint32_t stripes; -+ -+ /* The size of this target / num. stripes */ -+ uint32_t stripe_width; -+ -+ /* stripe chunk size */ -+ uint32_t chunk_shift; -+ sector_t chunk_mask; -+ -+ struct stripe stripe[0]; -+}; -+ -+static inline struct stripe_c *alloc_context(int stripes) -+{ -+ size_t len; -+ -+ if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), -+ stripes)) -+ return NULL; -+ -+ len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes); -+ -+ return kmalloc(len, GFP_KERNEL); -+} -+ -+/* -+ * Parse a single pair -+ */ -+static int get_stripe(struct dm_target *ti, struct stripe_c *sc, -+ int stripe, char **argv) -+{ -+ sector_t start; -+ -+ if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1) -+ return -EINVAL; -+ -+ if (dm_get_device(ti, argv[0], start, sc->stripe_width, -+ dm_table_get_mode(ti->table), -+ &sc->stripe[stripe].dev)) -+ return -ENXIO; -+ -+ sc->stripe[stripe].physical_start = start; -+ return 0; -+} -+ -+/* -+ * FIXME: Nasty function, only present because we can't link -+ * against __moddi3 and __divdi3. -+ * -+ * returns a == b * n -+ */ -+static int multiple(sector_t a, sector_t b, sector_t *n) -+{ -+ sector_t acc, prev, i; -+ -+ *n = 0; -+ while (a >= b) { -+ for (acc = b, prev = 0, i = 1; -+ acc <= a; -+ prev = acc, acc <<= 1, i <<= 1) -+ ; -+ -+ a -= prev; -+ *n += i >> 1; -+ } -+ -+ return a == 0; -+} -+ -+/* -+ * Construct a striped mapping. -+ * [ ]+ -+ */ -+static int stripe_ctr(struct dm_target *ti, int argc, char **argv) -+{ -+ struct stripe_c *sc; -+ sector_t width; -+ uint32_t stripes; -+ uint32_t chunk_size; -+ char *end; -+ int r, i; -+ -+ if (argc < 2) { -+ ti->error = "dm-stripe: Not enough arguments"; -+ return -EINVAL; -+ } -+ -+ stripes = simple_strtoul(argv[0], &end, 10); -+ if (*end) { -+ ti->error = "dm-stripe: Invalid stripe count"; -+ return -EINVAL; -+ } -+ -+ chunk_size = simple_strtoul(argv[1], &end, 10); -+ if (*end) { -+ ti->error = "dm-stripe: Invalid chunk_size"; -+ return -EINVAL; -+ } -+ -+ if (!multiple(ti->len, stripes, &width)) { -+ ti->error = "dm-stripe: Target length not divisable by " -+ "number of stripes"; -+ return -EINVAL; -+ } -+ -+ sc = alloc_context(stripes); -+ if (!sc) { -+ ti->error = "dm-stripe: Memory allocation for striped context " -+ "failed"; -+ return -ENOMEM; -+ } -+ -+ sc->stripes = stripes; -+ sc->stripe_width = width; -+ -+ /* -+ * chunk_size is a power of two -+ */ -+ if (!chunk_size || (chunk_size & (chunk_size - 1))) { -+ ti->error = "dm-stripe: Invalid chunk size"; -+ kfree(sc); -+ return -EINVAL; -+ } -+ -+ sc->chunk_mask = ((sector_t) chunk_size) - 1; -+ for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++) -+ chunk_size >>= 1; -+ sc->chunk_shift--; -+ -+ /* -+ * Get the stripe destinations. -+ */ -+ for (i = 0; i < stripes; i++) { -+ if (argc < 2) { -+ ti->error = "dm-stripe: Not enough destinations " -+ "specified"; -+ kfree(sc); -+ return -EINVAL; -+ } -+ -+ argv += 2; -+ -+ r = get_stripe(ti, sc, i, argv); -+ if (r < 0) { -+ ti->error = "dm-stripe: Couldn't parse stripe " -+ "destination"; -+ while (i--) -+ dm_put_device(ti, sc->stripe[i].dev); -+ kfree(sc); -+ return r; -+ } -+ } -+ -+ ti->private = sc; -+ return 0; -+} -+ -+static void stripe_dtr(struct dm_target *ti) -+{ -+ unsigned int i; -+ struct stripe_c *sc = (struct stripe_c *) ti->private; -+ -+ for (i = 0; i < sc->stripes; i++) -+ dm_put_device(ti, sc->stripe[i].dev); -+ -+ kfree(sc); -+} -+ -+static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ struct stripe_c *sc = (struct stripe_c *) ti->private; -+ -+ sector_t offset = bh->b_rsector - ti->begin; -+ uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift); -+ uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */ -+ chunk = chunk / sc->stripes; -+ -+ bh->b_rdev = sc->stripe[stripe].dev->dev; -+ bh->b_rsector = sc->stripe[stripe].physical_start + -+ (chunk << sc->chunk_shift) + (offset & sc->chunk_mask); -+ return 1; -+} -+ -+static int stripe_status(struct dm_target *ti, -+ status_type_t type, char *result, int maxlen) -+{ -+ struct stripe_c *sc = (struct stripe_c *) ti->private; -+ int offset; -+ int i; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT, -+ sc->stripes, sc->chunk_mask + 1); -+ for (i = 0; i < sc->stripes; i++) { -+ offset += snprintf(result + offset, maxlen - offset, -+ " %s " SECTOR_FORMAT, -+ kdevname(to_kdev_t -+ (sc->stripe[i].dev->bdev->bd_dev)), -+ sc->stripe[i].physical_start); -+ } -+ break; -+ } -+ return 0; -+} -+ -+static struct target_type stripe_target = { -+ .name = "striped", -+ .module = THIS_MODULE, -+ .ctr = stripe_ctr, -+ .dtr = stripe_dtr, -+ .map = stripe_map, -+ .status = stripe_status, -+}; -+ -+int __init dm_stripe_init(void) -+{ -+ int r; -+ -+ r = dm_register_target(&stripe_target); -+ if (r < 0) -+ DMWARN("striped target registration failed"); -+ -+ return r; -+} -+ -+void dm_stripe_exit(void) -+{ -+ if (dm_unregister_target(&stripe_target)) -+ DMWARN("striped target unregistration failed"); -+ -+ return; -+} -diff -ruN linux-2.4.19/drivers/md/dm-table.c linux-2.4.19-dm/drivers/md/dm-table.c ---- linux-2.4.19/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-table.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,665 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define MAX_DEPTH 16 -+#define NODE_SIZE L1_CACHE_BYTES -+#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) -+#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) -+ -+struct dm_table { -+ atomic_t holders; -+ -+ /* btree table */ -+ int depth; -+ int counts[MAX_DEPTH]; /* in nodes */ -+ sector_t *index[MAX_DEPTH]; -+ -+ int num_targets; -+ int num_allocated; -+ sector_t *highs; -+ struct dm_target *targets; -+ -+ /* -+ * Indicates the rw permissions for the new logical -+ * device. This should be a combination of FMODE_READ -+ * and FMODE_WRITE. -+ */ -+ int mode; -+ -+ /* a list of devices used by this table */ -+ struct list_head devices; -+ -+ /* -+ * A waitqueue for processes waiting for something -+ * interesting to happen to this table. -+ */ -+ wait_queue_head_t eventq; -+}; -+ -+/* -+ * Ceiling(n / size) -+ */ -+static inline unsigned long div_up(unsigned long n, unsigned long size) -+{ -+ return dm_round_up(n, size) / size; -+} -+ -+/* -+ * Similar to ceiling(log_size(n)) -+ */ -+static unsigned int int_log(unsigned long n, unsigned long base) -+{ -+ int result = 0; -+ -+ while (n > 1) { -+ n = div_up(n, base); -+ result++; -+ } -+ -+ return result; -+} -+ -+/* -+ * Calculate the index of the child node of the n'th node k'th key. -+ */ -+static inline int get_child(int n, int k) -+{ -+ return (n * CHILDREN_PER_NODE) + k; -+} -+ -+/* -+ * Return the n'th node of level l from table t. -+ */ -+static inline sector_t *get_node(struct dm_table *t, int l, int n) -+{ -+ return t->index[l] + (n * KEYS_PER_NODE); -+} -+ -+/* -+ * Return the highest key that you could lookup from the n'th -+ * node on level l of the btree. -+ */ -+static sector_t high(struct dm_table *t, int l, int n) -+{ -+ for (; l < t->depth - 1; l++) -+ n = get_child(n, CHILDREN_PER_NODE - 1); -+ -+ if (n >= t->counts[l]) -+ return (sector_t) - 1; -+ -+ return get_node(t, l, n)[KEYS_PER_NODE - 1]; -+} -+ -+/* -+ * Fills in a level of the btree based on the highs of the level -+ * below it. -+ */ -+static int setup_btree_index(int l, struct dm_table *t) -+{ -+ int n, k; -+ sector_t *node; -+ -+ for (n = 0; n < t->counts[l]; n++) { -+ node = get_node(t, l, n); -+ -+ for (k = 0; k < KEYS_PER_NODE; k++) -+ node[k] = high(t, l + 1, get_child(n, k)); -+ } -+ -+ return 0; -+} -+ -+/* -+ * highs, and targets are managed as dynamic arrays during a -+ * table load. -+ */ -+static int alloc_targets(struct dm_table *t, int num) -+{ -+ sector_t *n_highs; -+ struct dm_target *n_targets; -+ int n = t->num_targets; -+ -+ /* -+ * Allocate both the target array and offset array at once. -+ */ -+ n_highs = (sector_t *) vcalloc(sizeof(struct dm_target) + -+ sizeof(sector_t), num); -+ if (!n_highs) -+ return -ENOMEM; -+ -+ n_targets = (struct dm_target *) (n_highs + num); -+ -+ if (n) { -+ memcpy(n_highs, t->highs, sizeof(*n_highs) * n); -+ memcpy(n_targets, t->targets, sizeof(*n_targets) * n); -+ } -+ -+ memset(n_highs + n, -1, sizeof(*n_highs) * (num - n)); -+ vfree(t->highs); -+ -+ t->num_allocated = num; -+ t->highs = n_highs; -+ t->targets = n_targets; -+ -+ return 0; -+} -+ -+int dm_table_create(struct dm_table **result, int mode) -+{ -+ struct dm_table *t = kmalloc(sizeof(*t), GFP_NOIO); -+ -+ if (!t) -+ return -ENOMEM; -+ -+ memset(t, 0, sizeof(*t)); -+ INIT_LIST_HEAD(&t->devices); -+ atomic_set(&t->holders, 1); -+ -+ /* allocate a single nodes worth of targets to begin with */ -+ if (alloc_targets(t, KEYS_PER_NODE)) { -+ kfree(t); -+ t = NULL; -+ return -ENOMEM; -+ } -+ -+ init_waitqueue_head(&t->eventq); -+ t->mode = mode; -+ *result = t; -+ return 0; -+} -+ -+static void free_devices(struct list_head *devices) -+{ -+ struct list_head *tmp, *next; -+ -+ for (tmp = devices->next; tmp != devices; tmp = next) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ next = tmp->next; -+ kfree(dd); -+ } -+} -+ -+void table_destroy(struct dm_table *t) -+{ -+ int i; -+ -+ /* destroying the table counts as an event */ -+ dm_table_event(t); -+ -+ /* free the indexes (see dm_table_complete) */ -+ if (t->depth >= 2) -+ vfree(t->index[t->depth - 2]); -+ -+ /* free the targets */ -+ for (i = 0; i < t->num_targets; i++) { -+ struct dm_target *tgt = &t->targets[i]; -+ -+ dm_put_target_type(t->targets[i].type); -+ -+ if (tgt->type->dtr) -+ tgt->type->dtr(tgt); -+ } -+ -+ vfree(t->highs); -+ -+ /* free the device list */ -+ if (t->devices.next != &t->devices) { -+ DMWARN("devices still present during destroy: " -+ "dm_table_remove_device calls missing"); -+ -+ free_devices(&t->devices); -+ } -+ -+ kfree(t); -+} -+ -+void dm_table_get(struct dm_table *t) -+{ -+ atomic_inc(&t->holders); -+} -+ -+void dm_table_put(struct dm_table *t) -+{ -+ if (atomic_dec_and_test(&t->holders)) -+ table_destroy(t); -+} -+ -+/* -+ * Checks to see if we need to extend highs or targets. -+ */ -+static inline int check_space(struct dm_table *t) -+{ -+ if (t->num_targets >= t->num_allocated) -+ return alloc_targets(t, t->num_allocated * 2); -+ -+ return 0; -+} -+ -+/* -+ * Convert a device path to a dev_t. -+ */ -+static int lookup_device(const char *path, kdev_t *dev) -+{ -+ int r; -+ struct nameidata nd; -+ struct inode *inode; -+ -+ if (!path_init(path, LOOKUP_FOLLOW, &nd)) -+ return 0; -+ -+ if ((r = path_walk(path, &nd))) -+ goto out; -+ -+ inode = nd.dentry->d_inode; -+ if (!inode) { -+ r = -ENOENT; -+ goto out; -+ } -+ -+ if (!S_ISBLK(inode->i_mode)) { -+ r = -ENOTBLK; -+ goto out; -+ } -+ -+ *dev = inode->i_rdev; -+ -+ out: -+ path_release(&nd); -+ return r; -+} -+ -+/* -+ * See if we've already got a device in the list. -+ */ -+static struct dm_dev *find_device(struct list_head *l, kdev_t dev) -+{ -+ struct list_head *tmp; -+ -+ list_for_each(tmp, l) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ if (kdev_same(dd->dev, dev)) -+ return dd; -+ } -+ -+ return NULL; -+} -+ -+/* -+ * Open a device so we can use it as a map destination. -+ */ -+static int open_dev(struct dm_dev *dd) -+{ -+ if (dd->bdev) -+ BUG(); -+ -+ dd->bdev = bdget(kdev_t_to_nr(dd->dev)); -+ if (!dd->bdev) -+ return -ENOMEM; -+ -+ return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW); -+} -+ -+/* -+ * Close a device that we've been using. -+ */ -+static void close_dev(struct dm_dev *dd) -+{ -+ if (!dd->bdev) -+ return; -+ -+ blkdev_put(dd->bdev, BDEV_RAW); -+ dd->bdev = NULL; -+} -+ -+/* -+ * If possible (ie. blk_size[major] is set), this checks an area -+ * of a destination device is valid. -+ */ -+static int check_device_area(kdev_t dev, sector_t start, sector_t len) -+{ -+ int *sizes; -+ sector_t dev_size; -+ -+ if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)])) -+ /* we don't know the device details, -+ * so give the benefit of the doubt */ -+ return 1; -+ -+ /* convert to 512-byte sectors */ -+ dev_size <<= 1; -+ -+ return ((start < dev_size) && (len <= (dev_size - start))); -+} -+ -+/* -+ * This upgrades the mode on an already open dm_dev. Being -+ * careful to leave things as they were if we fail to reopen the -+ * device. -+ */ -+static int upgrade_mode(struct dm_dev *dd, int new_mode) -+{ -+ int r; -+ struct dm_dev dd_copy; -+ -+ memcpy(&dd_copy, dd, sizeof(dd_copy)); -+ -+ dd->mode |= new_mode; -+ dd->bdev = NULL; -+ r = open_dev(dd); -+ if (!r) -+ close_dev(&dd_copy); -+ else -+ memcpy(dd, &dd_copy, sizeof(dd_copy)); -+ -+ return r; -+} -+ -+/* -+ * Add a device to the list, or just increment the usage count if -+ * it's already present. -+ */ -+int dm_get_device(struct dm_target *ti, const char *path, sector_t start, -+ sector_t len, int mode, struct dm_dev **result) -+{ -+ int r; -+ kdev_t dev; -+ struct dm_dev *dd; -+ int major, minor; -+ struct dm_table *t = ti->table; -+ -+ if (!t) -+ BUG(); -+ -+ if (sscanf(path, "%x:%x", &major, &minor) == 2) { -+ /* Extract the major/minor numbers */ -+ dev = mk_kdev(major, minor); -+ } else { -+ /* convert the path to a device */ -+ if ((r = lookup_device(path, &dev))) -+ return r; -+ } -+ -+ dd = find_device(&t->devices, dev); -+ if (!dd) { -+ dd = kmalloc(sizeof(*dd), GFP_KERNEL); -+ if (!dd) -+ return -ENOMEM; -+ -+ dd->dev = dev; -+ dd->mode = mode; -+ dd->bdev = NULL; -+ -+ if ((r = open_dev(dd))) { -+ kfree(dd); -+ return r; -+ } -+ -+ atomic_set(&dd->count, 0); -+ list_add(&dd->list, &t->devices); -+ -+ } else if (dd->mode != (mode | dd->mode)) { -+ r = upgrade_mode(dd, mode); -+ if (r) -+ return r; -+ } -+ atomic_inc(&dd->count); -+ -+ if (!check_device_area(dd->dev, start, len)) { -+ DMWARN("device %s too small for target", path); -+ dm_put_device(ti, dd); -+ return -EINVAL; -+ } -+ -+ *result = dd; -+ -+ return 0; -+} -+ -+/* -+ * Decrement a devices use count and remove it if neccessary. -+ */ -+void dm_put_device(struct dm_target *ti, struct dm_dev *dd) -+{ -+ if (atomic_dec_and_test(&dd->count)) { -+ close_dev(dd); -+ list_del(&dd->list); -+ kfree(dd); -+ } -+} -+ -+/* -+ * Checks to see if the target joins onto the end of the table. -+ */ -+static int adjoin(struct dm_table *table, struct dm_target *ti) -+{ -+ struct dm_target *prev; -+ -+ if (!table->num_targets) -+ return !ti->begin; -+ -+ prev = &table->targets[table->num_targets - 1]; -+ return (ti->begin == (prev->begin + prev->len)); -+} -+ -+/* -+ * Destructively splits up the argument list to pass to ctr. -+ */ -+static int split_args(int max, int *argc, char **argv, char *input) -+{ -+ char *start, *end = input, *out; -+ *argc = 0; -+ -+ while (1) { -+ start = end; -+ -+ /* Skip whitespace */ -+ while (*start && isspace(*start)) -+ start++; -+ -+ if (!*start) -+ break; /* success, we hit the end */ -+ -+ /* 'out' is used to remove any back-quotes */ -+ end = out = start; -+ while (*end) { -+ /* Everything apart from '\0' can be quoted */ -+ if (*end == '\\' && *(end + 1)) { -+ *out++ = *(end + 1); -+ end += 2; -+ continue; -+ } -+ -+ if (isspace(*end)) -+ break; /* end of token */ -+ -+ *out++ = *end++; -+ } -+ -+ /* have we already filled the array ? */ -+ if ((*argc + 1) > max) -+ return -EINVAL; -+ -+ /* we know this is whitespace */ -+ if (*end) -+ end++; -+ -+ /* terminate the string and put it in the array */ -+ *out = '\0'; -+ argv[*argc] = start; -+ (*argc)++; -+ } -+ -+ return 0; -+} -+ -+int dm_table_add_target(struct dm_table *t, const char *type, -+ sector_t start, sector_t len, char *params) -+{ -+ int r, argc; -+ char *argv[32]; -+ struct target_type *tt; -+ struct dm_target *tgt; -+ -+ if ((r = check_space(t))) -+ return r; -+ -+ tgt = t->targets + t->num_targets; -+ memset(tgt, 0, sizeof(*tgt)); -+ -+ tt = dm_get_target_type(type); -+ if (!tt) { -+ tgt->error = "unknown target type"; -+ return -EINVAL; -+ } -+ -+ tgt->table = t; -+ tgt->type = tt; -+ tgt->begin = start; -+ tgt->len = len; -+ tgt->error = "Unknown error"; -+ -+ /* -+ * Does this target adjoin the previous one ? -+ */ -+ if (!adjoin(t, tgt)) { -+ DMERR("Gap in table"); -+ dm_put_target_type(tt); -+ return -EINVAL; -+ } -+ -+ r = split_args(ARRAY_SIZE(argv), &argc, argv, params); -+ if (r) { -+ tgt->error = "couldn't split parameters"; -+ dm_put_target_type(tt); -+ return r; -+ } -+ -+ r = tt->ctr(tgt, argc, argv); -+ if (r) { -+ dm_put_target_type(tt); -+ return r; -+ } -+ -+ t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; -+ return 0; -+} -+ -+static int setup_indexes(struct dm_table *t) -+{ -+ int i, total = 0; -+ sector_t *indexes; -+ -+ /* allocate the space for *all* the indexes */ -+ for (i = t->depth - 2; i >= 0; i--) { -+ t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE); -+ total += t->counts[i]; -+ } -+ -+ indexes = (sector_t *) vcalloc(total, (unsigned long) NODE_SIZE); -+ if (!indexes) -+ return -ENOMEM; -+ -+ /* set up internal nodes, bottom-up */ -+ for (i = t->depth - 2, total = 0; i >= 0; i--) { -+ t->index[i] = indexes; -+ indexes += (KEYS_PER_NODE * t->counts[i]); -+ setup_btree_index(i, t); -+ } -+ -+ return 0; -+} -+ -+/* -+ * Builds the btree to index the map. -+ */ -+int dm_table_complete(struct dm_table *t) -+{ -+ int leaf_nodes, r = 0; -+ -+ /* how many indexes will the btree have ? */ -+ leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE); -+ t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); -+ -+ /* leaf layer has already been set up */ -+ t->counts[t->depth - 1] = leaf_nodes; -+ t->index[t->depth - 1] = t->highs; -+ -+ if (t->depth >= 2) -+ r = setup_indexes(t); -+ -+ return r; -+} -+ -+void dm_table_event(struct dm_table *t) -+{ -+ wake_up_interruptible(&t->eventq); -+} -+ -+sector_t dm_table_get_size(struct dm_table *t) -+{ -+ return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; -+} -+ -+struct dm_target *dm_table_get_target(struct dm_table *t, int index) -+{ -+ if (index > t->num_targets) -+ return NULL; -+ -+ return t->targets + index; -+} -+ -+/* -+ * Search the btree for the correct target. -+ */ -+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) -+{ -+ int l, n = 0, k = 0; -+ sector_t *node; -+ -+ for (l = 0; l < t->depth; l++) { -+ n = get_child(n, k); -+ node = get_node(t, l, n); -+ -+ for (k = 0; k < KEYS_PER_NODE; k++) -+ if (node[k] >= sector) -+ break; -+ } -+ -+ return &t->targets[(KEYS_PER_NODE * n) + k]; -+} -+ -+unsigned int dm_table_get_num_targets(struct dm_table *t) -+{ -+ return t->num_targets; -+} -+ -+struct list_head *dm_table_get_devices(struct dm_table *t) -+{ -+ return &t->devices; -+} -+ -+int dm_table_get_mode(struct dm_table *t) -+{ -+ return t->mode; -+} -+ -+void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq) -+{ -+ add_wait_queue(&t->eventq, wq); -+} -+ -+EXPORT_SYMBOL(dm_get_device); -+EXPORT_SYMBOL(dm_put_device); -+EXPORT_SYMBOL(dm_table_event); -diff -ruN linux-2.4.19/drivers/md/dm-target.c linux-2.4.19-dm/drivers/md/dm-target.c ---- linux-2.4.19/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm-target.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,190 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+ -+struct tt_internal { -+ struct target_type tt; -+ -+ struct list_head list; -+ long use; -+}; -+ -+static LIST_HEAD(_targets); -+static rwlock_t _lock = RW_LOCK_UNLOCKED; -+ -+#define DM_MOD_NAME_SIZE 32 -+ -+static inline struct tt_internal *__find_target_type(const char *name) -+{ -+ struct list_head *tih; -+ struct tt_internal *ti; -+ -+ list_for_each(tih, &_targets) { -+ ti = list_entry(tih, struct tt_internal, list); -+ -+ if (!strcmp(name, ti->tt.name)) -+ return ti; -+ } -+ -+ return NULL; -+} -+ -+static struct tt_internal *get_target_type(const char *name) -+{ -+ struct tt_internal *ti; -+ -+ read_lock(&_lock); -+ ti = __find_target_type(name); -+ -+ if (ti) { -+ if (ti->use == 0 && ti->tt.module) -+ __MOD_INC_USE_COUNT(ti->tt.module); -+ ti->use++; -+ } -+ read_unlock(&_lock); -+ -+ return ti; -+} -+ -+static void load_module(const char *name) -+{ -+ char module_name[DM_MOD_NAME_SIZE] = "dm-"; -+ -+ /* Length check for strcat() below */ -+ if (strlen(name) > (DM_MOD_NAME_SIZE - 4)) -+ return; -+ -+ strcat(module_name, name); -+ request_module(module_name); -+ -+ return; -+} -+ -+struct target_type *dm_get_target_type(const char *name) -+{ -+ struct tt_internal *ti = get_target_type(name); -+ -+ if (!ti) { -+ load_module(name); -+ ti = get_target_type(name); -+ } -+ -+ return ti ? &ti->tt : NULL; -+} -+ -+void dm_put_target_type(struct target_type *t) -+{ -+ struct tt_internal *ti = (struct tt_internal *) t; -+ -+ read_lock(&_lock); -+ if (--ti->use == 0 && ti->tt.module) -+ __MOD_DEC_USE_COUNT(ti->tt.module); -+ -+ if (ti->use < 0) -+ BUG(); -+ read_unlock(&_lock); -+ -+ return; -+} -+ -+static struct tt_internal *alloc_target(struct target_type *t) -+{ -+ struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL); -+ -+ if (ti) { -+ memset(ti, 0, sizeof(*ti)); -+ ti->tt = *t; -+ } -+ -+ return ti; -+} -+ -+int dm_register_target(struct target_type *t) -+{ -+ int rv = 0; -+ struct tt_internal *ti = alloc_target(t); -+ -+ if (!ti) -+ return -ENOMEM; -+ -+ write_lock(&_lock); -+ if (__find_target_type(t->name)) -+ rv = -EEXIST; -+ else -+ list_add(&ti->list, &_targets); -+ -+ write_unlock(&_lock); -+ return rv; -+} -+ -+int dm_unregister_target(struct target_type *t) -+{ -+ struct tt_internal *ti; -+ -+ write_lock(&_lock); -+ if (!(ti = __find_target_type(t->name))) { -+ write_unlock(&_lock); -+ return -EINVAL; -+ } -+ -+ if (ti->use) { -+ write_unlock(&_lock); -+ return -ETXTBSY; -+ } -+ -+ list_del(&ti->list); -+ kfree(ti); -+ -+ write_unlock(&_lock); -+ return 0; -+} -+ -+/* -+ * io-err: always fails an io, useful for bringing -+ * up LVs that have holes in them. -+ */ -+static int io_err_ctr(struct dm_target *ti, int argc, char **args) -+{ -+ return 0; -+} -+ -+static void io_err_dtr(struct dm_target *ti) -+{ -+ /* empty */ -+ return; -+} -+ -+static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw) -+{ -+ buffer_IO_error(bh); -+ return 0; -+} -+ -+static struct target_type error_target = { -+ .name = "error", -+ .ctr = io_err_ctr, -+ .dtr = io_err_dtr, -+ .map = io_err_map, -+}; -+ -+int dm_target_init(void) -+{ -+ return dm_register_target(&error_target); -+} -+ -+void dm_target_exit(void) -+{ -+ if (dm_unregister_target(&error_target)) -+ DMWARN("error target unregistration failed"); -+} -+ -+EXPORT_SYMBOL(dm_register_target); -+EXPORT_SYMBOL(dm_unregister_target); -diff -ruN linux-2.4.19/drivers/md/dm.c linux-2.4.19-dm/drivers/md/dm.c ---- linux-2.4.19/drivers/md/dm.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,868 @@ -+/* -+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+static const char *_name = DM_NAME; -+#define MAX_DEVICES (1 << MINORBITS) -+#define SECTOR_SHIFT 9 -+#define DEFAULT_READ_AHEAD 64 -+ -+static int major = 0; -+static int _major = 0; -+ -+struct dm_io { -+ struct mapped_device *md; -+ -+ void (*end_io) (struct buffer_head * bh, int uptodate); -+ void *context; -+}; -+ -+struct deferred_io { -+ int rw; -+ struct buffer_head *bh; -+ struct deferred_io *next; -+}; -+ -+/* -+ * Bits for the md->flags field. -+ */ -+#define DMF_BLOCK_IO 0 -+#define DMF_SUSPENDED 1 -+ -+struct mapped_device { -+ struct rw_semaphore lock; -+ atomic_t holders; -+ -+ kdev_t dev; -+ unsigned long flags; -+ -+ /* -+ * A list of ios that arrived while we were suspended. -+ */ -+ atomic_t pending; -+ wait_queue_head_t wait; -+ struct deferred_io *deferred; -+ -+ /* -+ * The current mapping. -+ */ -+ struct dm_table *map; -+}; -+ -+#define MIN_IOS 256 -+static kmem_cache_t *_io_cache; -+static mempool_t *_io_pool; -+ -+/* block device arrays */ -+static int _block_size[MAX_DEVICES]; -+static int _blksize_size[MAX_DEVICES]; -+static int _hardsect_size[MAX_DEVICES]; -+ -+static struct mapped_device *get_kdev(kdev_t dev); -+static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh); -+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb); -+ -+ -+static __init int local_init(void) -+{ -+ int r; -+ -+ /* allocate a slab for the dm_ios */ -+ _io_cache = kmem_cache_create("dm io", -+ sizeof(struct dm_io), 0, 0, NULL, NULL); -+ -+ if (!_io_cache) -+ return -ENOMEM; -+ -+ _io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, -+ mempool_free_slab, _io_cache); -+ if (!_io_pool) { -+ kmem_cache_destroy(_io_cache); -+ return -ENOMEM; -+ } -+ -+ _major = major; -+ r = register_blkdev(_major, _name, &dm_blk_dops); -+ if (r < 0) { -+ DMERR("register_blkdev failed"); -+ mempool_destroy(_io_pool); -+ kmem_cache_destroy(_io_cache); -+ return r; -+ } -+ -+ if (!_major) -+ _major = r; -+ -+ /* set up the arrays */ -+ read_ahead[_major] = DEFAULT_READ_AHEAD; -+ blk_size[_major] = _block_size; -+ blksize_size[_major] = _blksize_size; -+ hardsect_size[_major] = _hardsect_size; -+ -+ blk_queue_make_request(BLK_DEFAULT_QUEUE(_major), dm_request); -+ -+ return 0; -+} -+ -+static void local_exit(void) -+{ -+ mempool_destroy(_io_pool); -+ kmem_cache_destroy(_io_cache); -+ -+ if (unregister_blkdev(_major, _name) < 0) -+ DMERR("devfs_unregister_blkdev failed"); -+ -+ read_ahead[_major] = 0; -+ blk_size[_major] = NULL; -+ blksize_size[_major] = NULL; -+ hardsect_size[_major] = NULL; -+ _major = 0; -+ -+ DMINFO("cleaned up"); -+} -+ -+/* -+ * We have a lot of init/exit functions, so it seems easier to -+ * store them in an array. The disposable macro 'xx' -+ * expands a prefix into a pair of function names. -+ */ -+static struct { -+ int (*init) (void); -+ void (*exit) (void); -+ -+} _inits[] = { -+#define xx(n) {n ## _init, n ## _exit}, -+ xx(local) -+ xx(dm_target) -+ xx(dm_linear) -+ xx(dm_stripe) -+ xx(dm_snapshot) -+ xx(dm_interface) -+#undef xx -+}; -+ -+static int __init dm_init(void) -+{ -+ const int count = ARRAY_SIZE(_inits); -+ -+ int r, i; -+ -+ for (i = 0; i < count; i++) { -+ r = _inits[i].init(); -+ if (r) -+ goto bad; -+ } -+ -+ return 0; -+ -+ bad: -+ while (i--) -+ _inits[i].exit(); -+ -+ return r; -+} -+ -+static void __exit dm_exit(void) -+{ -+ int i = ARRAY_SIZE(_inits); -+ -+ while (i--) -+ _inits[i].exit(); -+} -+ -+/* -+ * Block device functions -+ */ -+static int dm_blk_open(struct inode *inode, struct file *file) -+{ -+ struct mapped_device *md; -+ -+ md = get_kdev(inode->i_rdev); -+ if (!md) -+ return -ENXIO; -+ -+ return 0; -+} -+ -+static int dm_blk_close(struct inode *inode, struct file *file) -+{ -+ struct mapped_device *md; -+ -+ md = get_kdev(inode->i_rdev); -+ dm_put(md); /* put the reference gained by dm_blk_open */ -+ dm_put(md); -+ return 0; -+} -+ -+static inline struct dm_io *alloc_io(void) -+{ -+ return mempool_alloc(_io_pool, GFP_NOIO); -+} -+ -+static inline void free_io(struct dm_io *io) -+{ -+ mempool_free(io, _io_pool); -+} -+ -+static inline struct deferred_io *alloc_deferred(void) -+{ -+ return kmalloc(sizeof(struct deferred_io), GFP_NOIO); -+} -+ -+static inline void free_deferred(struct deferred_io *di) -+{ -+ kfree(di); -+} -+ -+/* In 512-byte units */ -+#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1) -+ -+/* FIXME: check this */ -+static int dm_blk_ioctl(struct inode *inode, struct file *file, -+ uint command, unsigned long a) -+{ -+ int minor = MINOR(inode->i_rdev); -+ long size; -+ -+ if (minor >= MAX_DEVICES) -+ return -ENXIO; -+ -+ switch (command) { -+ case BLKROSET: -+ case BLKROGET: -+ case BLKRASET: -+ case BLKRAGET: -+ case BLKFLSBUF: -+ case BLKSSZGET: -+ //case BLKRRPART: /* Re-read partition tables */ -+ //case BLKPG: -+ case BLKELVGET: -+ case BLKELVSET: -+ case BLKBSZGET: -+ case BLKBSZSET: -+ return blk_ioctl(inode->i_rdev, command, a); -+ break; -+ -+ case BLKGETSIZE: -+ size = VOLUME_SIZE(minor); -+ if (copy_to_user((void *) a, &size, sizeof(long))) -+ return -EFAULT; -+ break; -+ -+ case BLKGETSIZE64: -+ size = VOLUME_SIZE(minor); -+ if (put_user((u64) ((u64) size) << 9, (u64 *) a)) -+ return -EFAULT; -+ break; -+ -+ case BLKRRPART: -+ return -ENOTTY; -+ -+ case LV_BMAP: -+ return dm_user_bmap(inode, (struct lv_bmap *) a); -+ -+ default: -+ DMWARN("unknown block ioctl 0x%x", command); -+ return -ENOTTY; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Add the buffer to the list of deferred io. -+ */ -+static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw) -+{ -+ struct deferred_io *di; -+ -+ di = alloc_deferred(); -+ if (!di) -+ return -ENOMEM; -+ -+ down_write(&md->lock); -+ -+ if (!test_bit(DMF_SUSPENDED, &md->flags)) { -+ up_write(&md->lock); -+ free_deferred(di); -+ return 1; -+ } -+ -+ di->bh = bh; -+ di->rw = rw; -+ di->next = md->deferred; -+ md->deferred = di; -+ -+ up_write(&md->lock); -+ return 0; /* deferred successfully */ -+} -+ -+/* -+ * bh->b_end_io routine that decrements the pending count -+ * and then calls the original bh->b_end_io fn. -+ */ -+static void dec_pending(struct buffer_head *bh, int uptodate) -+{ -+ struct dm_io *io = bh->b_private; -+ -+ if (atomic_dec_and_test(&io->md->pending)) -+ /* nudge anyone waiting on suspend queue */ -+ wake_up(&io->md->wait); -+ -+ bh->b_end_io = io->end_io; -+ bh->b_private = io->context; -+ free_io(io); -+ -+ bh->b_end_io(bh, uptodate); -+} -+ -+/* -+ * Do the bh mapping for a given leaf -+ */ -+static inline int __map_buffer(struct mapped_device *md, -+ int rw, struct buffer_head *bh) -+{ -+ int r; -+ struct dm_io *io; -+ struct dm_target *ti; -+ -+ ti = dm_table_find_target(md->map, bh->b_rsector); -+ if (!ti) -+ return -EINVAL; -+ -+ io = alloc_io(); -+ if (!io) -+ return -ENOMEM; -+ -+ io->md = md; -+ io->end_io = bh->b_end_io; -+ io->context = bh->b_private; -+ -+ r = ti->type->map(ti, bh, rw); -+ -+ if (r > 0) { -+ /* hook the end io request fn */ -+ atomic_inc(&md->pending); -+ bh->b_end_io = dec_pending; -+ bh->b_private = io; -+ -+ } else -+ /* we don't need to hook */ -+ free_io(io); -+ -+ return r; -+} -+ -+/* -+ * Checks to see if we should be deferring io, if so it queues it -+ * and returns 1. -+ */ -+static inline int __deferring(struct mapped_device *md, int rw, -+ struct buffer_head *bh) -+{ -+ int r; -+ -+ /* -+ * If we're suspended we have to queue this io for later. -+ */ -+ while (test_bit(DMF_BLOCK_IO, &md->flags)) { -+ up_read(&md->lock); -+ -+ /* -+ * There's no point deferring a read ahead -+ * request, just drop it. -+ */ -+ if (rw == READA) { -+ down_read(&md->lock); -+ return -EIO; -+ } -+ -+ r = queue_io(md, bh, rw); -+ down_read(&md->lock); -+ -+ if (r < 0) -+ return r; -+ -+ if (r == 0) -+ return 1; /* deferred successfully */ -+ -+ } -+ -+ return 0; -+} -+ -+static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ md = get_kdev(bh->b_rdev); -+ if (!md) { -+ buffer_IO_error(bh); -+ return 0; -+ } -+ -+ down_read(&md->lock); -+ -+ r = __deferring(md, rw, bh); -+ if (r < 0) -+ goto bad; -+ -+ else if (!r) { -+ /* not deferring */ -+ r = __map_buffer(md, rw, bh); -+ if (r < 0) -+ goto bad; -+ } else -+ r = 0; -+ -+ up_read(&md->lock); -+ dm_put(md); -+ return r; -+ -+ bad: -+ buffer_IO_error(bh); -+ up_read(&md->lock); -+ dm_put(md); -+ return 0; -+} -+ -+static int check_dev_size(kdev_t dev, unsigned long block) -+{ -+ /* FIXME: check this */ -+ int minor = MINOR(dev); -+ unsigned long max_sector = (_block_size[minor] << 1) + 1; -+ unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9); -+ -+ return (sector > max_sector) ? 0 : 1; -+} -+ -+/* -+ * Creates a dummy buffer head and maps it (for lilo). -+ */ -+static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block, -+ kdev_t *r_dev, unsigned long *r_block) -+{ -+ struct buffer_head bh; -+ struct dm_target *ti; -+ int r; -+ -+ if (test_bit(DMF_BLOCK_IO, &md->flags)) { -+ return -EPERM; -+ } -+ -+ if (!check_dev_size(dev, block)) { -+ return -EINVAL; -+ } -+ -+ /* setup dummy bh */ -+ memset(&bh, 0, sizeof(bh)); -+ bh.b_blocknr = block; -+ bh.b_dev = bh.b_rdev = dev; -+ bh.b_size = _blksize_size[MINOR(dev)]; -+ bh.b_rsector = block * (bh.b_size >> 9); -+ -+ /* find target */ -+ ti = dm_table_find_target(md->map, bh.b_rsector); -+ -+ /* do the mapping */ -+ r = ti->type->map(ti, &bh, READ); -+ -+ if (!r) { -+ *r_dev = bh.b_rdev; -+ *r_block = bh.b_rsector / (bh.b_size >> 9); -+ } -+ -+ return r; -+} -+ -+/* -+ * Marshals arguments and results between user and kernel space. -+ */ -+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb) -+{ -+ struct mapped_device *md; -+ unsigned long block, r_block; -+ kdev_t r_dev; -+ int r; -+ -+ if (get_user(block, &lvb->lv_block)) -+ return -EFAULT; -+ -+ md = get_kdev(inode->i_rdev); -+ if (!md) -+ return -ENXIO; -+ -+ down_read(&md->lock); -+ r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block); -+ up_read(&md->lock); -+ dm_put(md); -+ -+ if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) || -+ put_user(r_block, &lvb->lv_block))) -+ r = -EFAULT; -+ -+ return r; -+} -+ -+/*----------------------------------------------------------------- -+ * A bitset is used to keep track of allocated minor numbers. -+ *---------------------------------------------------------------*/ -+static spinlock_t _minor_lock = SPIN_LOCK_UNLOCKED; -+static struct mapped_device *_mds[MAX_DEVICES]; -+ -+static void free_minor(int minor) -+{ -+ spin_lock(&_minor_lock); -+ _mds[minor] = NULL; -+ spin_unlock(&_minor_lock); -+} -+ -+/* -+ * See if the device with a specific minor # is free. -+ */ -+static int specific_minor(int minor, struct mapped_device *md) -+{ -+ int r = -EBUSY; -+ -+ if (minor >= MAX_DEVICES) { -+ DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)", -+ MAX_DEVICES); -+ return -EINVAL; -+ } -+ -+ spin_lock(&_minor_lock); -+ if (!_mds[minor]) { -+ _mds[minor] = md; -+ r = minor; -+ } -+ spin_unlock(&_minor_lock); -+ -+ return r; -+} -+ -+static int next_free_minor(struct mapped_device *md) -+{ -+ int i; -+ -+ spin_lock(&_minor_lock); -+ for (i = 0; i < MAX_DEVICES; i++) { -+ if (!_mds[i]) { -+ _mds[i] = md; -+ break; -+ } -+ } -+ spin_unlock(&_minor_lock); -+ -+ return (i < MAX_DEVICES) ? i : -EBUSY; -+} -+ -+static struct mapped_device *get_kdev(kdev_t dev) -+{ -+ struct mapped_device *md; -+ -+ if (major(dev) != _major) -+ return NULL; -+ -+ spin_lock(&_minor_lock); -+ md = _mds[minor(dev)]; -+ if (md) -+ dm_get(md); -+ spin_unlock(&_minor_lock); -+ -+ return md; -+} -+ -+/* -+ * Allocate and initialise a blank device with a given minor. -+ */ -+static struct mapped_device *alloc_dev(int minor) -+{ -+ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); -+ -+ if (!md) { -+ DMWARN("unable to allocate device, out of memory."); -+ return NULL; -+ } -+ -+ /* get a minor number for the dev */ -+ minor = (minor < 0) ? next_free_minor(md) : specific_minor(minor, md); -+ if (minor < 0) { -+ kfree(md); -+ return NULL; -+ } -+ -+ memset(md, 0, sizeof(*md)); -+ md->dev = mk_kdev(_major, minor); -+ init_rwsem(&md->lock); -+ atomic_set(&md->holders, 1); -+ atomic_set(&md->pending, 0); -+ init_waitqueue_head(&md->wait); -+ -+ return md; -+} -+ -+static void free_dev(struct mapped_device *md) -+{ -+ free_minor(minor(md->dev)); -+ kfree(md); -+} -+ -+/* -+ * The hardsect size for a mapped device is the largest hardsect size -+ * from the devices it maps onto. -+ */ -+static int __find_hardsect_size(struct list_head *devices) -+{ -+ int result = 512, size; -+ struct list_head *tmp; -+ -+ list_for_each(tmp, devices) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ size = get_hardsect_size(dd->dev); -+ if (size > result) -+ result = size; -+ } -+ -+ return result; -+} -+ -+/* -+ * Bind a table to the device. -+ */ -+static int __bind(struct mapped_device *md, struct dm_table *t) -+{ -+ int minor = minor(md->dev); -+ md->map = t; -+ -+ /* in k */ -+ _block_size[minor] = dm_table_get_size(t) >> 1; -+ _blksize_size[minor] = BLOCK_SIZE; -+ _hardsect_size[minor] = __find_hardsect_size(dm_table_get_devices(t)); -+ register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]); -+ -+ dm_table_get(t); -+ return 0; -+} -+ -+static void __unbind(struct mapped_device *md) -+{ -+ int minor = minor(md->dev); -+ -+ dm_table_put(md->map); -+ md->map = NULL; -+ -+ _block_size[minor] = 0; -+ _blksize_size[minor] = 0; -+ _hardsect_size[minor] = 0; -+} -+ -+/* -+ * Constructor for a new device. -+ */ -+int dm_create(int minor, struct dm_table *table, struct mapped_device **result) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ md = alloc_dev(minor); -+ if (!md) -+ return -ENXIO; -+ -+ r = __bind(md, table); -+ if (r) { -+ free_dev(md); -+ return r; -+ } -+ -+ *result = md; -+ return 0; -+} -+ -+void dm_get(struct mapped_device *md) -+{ -+ atomic_inc(&md->holders); -+} -+ -+void dm_put(struct mapped_device *md) -+{ -+ if (atomic_dec_and_test(&md->holders)) { -+ __unbind(md); -+ free_dev(md); -+ } -+} -+ -+/* -+ * Requeue the deferred io by calling generic_make_request. -+ */ -+static void flush_deferred_io(struct deferred_io *c) -+{ -+ struct deferred_io *n; -+ -+ while (c) { -+ n = c->next; -+ generic_make_request(c->rw, c->bh); -+ free_deferred(c); -+ c = n; -+ } -+} -+ -+/* -+ * Swap in a new table (destroying old one). -+ */ -+int dm_swap_table(struct mapped_device *md, struct dm_table *table) -+{ -+ int r; -+ -+ down_write(&md->lock); -+ -+ /* device must be suspended */ -+ if (!test_bit(DMF_SUSPENDED, &md->flags)) { -+ up_write(&md->lock); -+ return -EPERM; -+ } -+ -+ __unbind(md); -+ r = __bind(md, table); -+ if (r) -+ return r; -+ -+ up_write(&md->lock); -+ return 0; -+} -+ -+/* -+ * We need to be able to change a mapping table under a mounted -+ * filesystem. For example we might want to move some data in -+ * the background. Before the table can be swapped with -+ * dm_bind_table, dm_suspend must be called to flush any in -+ * flight io and ensure that any further io gets deferred. -+ */ -+int dm_suspend(struct mapped_device *md) -+{ -+ DECLARE_WAITQUEUE(wait, current); -+ -+ down_write(&md->lock); -+ -+ /* -+ * First we set the BLOCK_IO flag so no more ios will be -+ * mapped. -+ */ -+ if (test_bit(DMF_BLOCK_IO, &md->flags)) { -+ up_write(&md->lock); -+ return -EINVAL; -+ } -+ -+ set_bit(DMF_BLOCK_IO, &md->flags); -+ up_write(&md->lock); -+ -+ /* -+ * Then we wait for the already mapped ios to -+ * complete. -+ */ -+ down_read(&md->lock); -+ -+ add_wait_queue(&md->wait, &wait); -+ while (1) { -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ if (!atomic_read(&md->pending)) -+ break; -+ -+ schedule(); -+ } -+ -+ current->state = TASK_RUNNING; -+ remove_wait_queue(&md->wait, &wait); -+ up_read(&md->lock); -+ -+ /* set_bit is atomic */ -+ set_bit(DMF_SUSPENDED, &md->flags); -+ -+ return 0; -+} -+ -+int dm_resume(struct mapped_device *md) -+{ -+ struct deferred_io *def; -+ -+ down_write(&md->lock); -+ if (!test_bit(DMF_SUSPENDED, &md->flags) || -+ !dm_table_get_size(md->map)) { -+ up_write(&md->lock); -+ return -EINVAL; -+ } -+ -+ clear_bit(DMF_SUSPENDED, &md->flags); -+ clear_bit(DMF_BLOCK_IO, &md->flags); -+ def = md->deferred; -+ md->deferred = NULL; -+ up_write(&md->lock); -+ -+ flush_deferred_io(def); -+ run_task_queue(&tq_disk); -+ -+ return 0; -+} -+ -+struct dm_table *dm_get_table(struct mapped_device *md) -+{ -+ struct dm_table *t; -+ -+ down_read(&md->lock); -+ t = md->map; -+ dm_table_get(t); -+ up_read(&md->lock); -+ -+ return t; -+} -+ -+kdev_t dm_kdev(struct mapped_device *md) -+{ -+ kdev_t dev; -+ -+ down_read(&md->lock); -+ dev = md->dev; -+ up_read(&md->lock); -+ -+ return dev; -+} -+ -+int dm_suspended(struct mapped_device *md) -+{ -+ return test_bit(DMF_SUSPENDED, &md->flags); -+} -+ -+struct block_device_operations dm_blk_dops = { -+ .open = dm_blk_open, -+ .release = dm_blk_close, -+ .ioctl = dm_blk_ioctl, -+ .owner = THIS_MODULE -+}; -+ -+/* -+ * module hooks -+ */ -+module_init(dm_init); -+module_exit(dm_exit); -+ -+MODULE_PARM(major, "i"); -+MODULE_PARM_DESC(major, "The major number of the device mapper"); -+MODULE_DESCRIPTION(DM_NAME " driver"); -+MODULE_AUTHOR("Joe Thornber "); -+MODULE_LICENSE("GPL"); -diff -ruN linux-2.4.19/drivers/md/dm.h linux-2.4.19-dm/drivers/md/dm.h ---- linux-2.4.19/drivers/md/dm.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/dm.h Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,150 @@ -+/* -+ * Internal header file for device mapper -+ * -+ * Copyright (C) 2001, 2002 Sistina Software -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef DM_INTERNAL_H -+#define DM_INTERNAL_H -+ -+#include -+#include -+#include -+#include -+ -+#define DM_NAME "device-mapper" -+#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) -+#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x) -+#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x) -+ -+/* -+ * FIXME: I think this should be with the definition of sector_t -+ * in types.h. -+ */ -+#ifdef CONFIG_LBD -+#define SECTOR_FORMAT "%Lu" -+#else -+#define SECTOR_FORMAT "%lu" -+#endif -+ -+extern struct block_device_operations dm_blk_dops; -+ -+/* -+ * List of devices that a metadevice uses and should open/close. -+ */ -+struct dm_dev { -+ struct list_head list; -+ -+ atomic_t count; -+ int mode; -+ kdev_t dev; -+ struct block_device *bdev; -+}; -+ -+struct dm_table; -+struct mapped_device; -+ -+/*----------------------------------------------------------------- -+ * Functions for manipulating a struct mapped_device. -+ * Drop the reference with dm_put when you finish with the object. -+ *---------------------------------------------------------------*/ -+int dm_create(int minor, struct dm_table *table, struct mapped_device **md); -+ -+/* -+ * Reference counting for md. -+ */ -+void dm_get(struct mapped_device *md); -+void dm_put(struct mapped_device *md); -+ -+/* -+ * A device can still be used while suspended, but I/O is deferred. -+ */ -+int dm_suspend(struct mapped_device *md); -+int dm_resume(struct mapped_device *md); -+ -+/* -+ * The device must be suspended before calling this method. -+ */ -+int dm_swap_table(struct mapped_device *md, struct dm_table *t); -+ -+/* -+ * Drop a reference on the table when you've finished with the -+ * result. -+ */ -+struct dm_table *dm_get_table(struct mapped_device *md); -+ -+/* -+ * Info functions. -+ */ -+kdev_t dm_kdev(struct mapped_device *md); -+int dm_suspended(struct mapped_device *md); -+ -+/*----------------------------------------------------------------- -+ * Functions for manipulating a table. Tables are also reference -+ * counted. -+ *---------------------------------------------------------------*/ -+int dm_table_create(struct dm_table **result, int mode); -+ -+void dm_table_get(struct dm_table *t); -+void dm_table_put(struct dm_table *t); -+ -+int dm_table_add_target(struct dm_table *t, const char *type, -+ sector_t start, sector_t len, char *params); -+int dm_table_complete(struct dm_table *t); -+void dm_table_event(struct dm_table *t); -+sector_t dm_table_get_size(struct dm_table *t); -+struct dm_target *dm_table_get_target(struct dm_table *t, int index); -+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); -+unsigned int dm_table_get_num_targets(struct dm_table *t); -+struct list_head *dm_table_get_devices(struct dm_table *t); -+int dm_table_get_mode(struct dm_table *t); -+void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq); -+ -+/*----------------------------------------------------------------- -+ * A registry of target types. -+ *---------------------------------------------------------------*/ -+int dm_target_init(void); -+void dm_target_exit(void); -+struct target_type *dm_get_target_type(const char *name); -+void dm_put_target_type(struct target_type *t); -+ -+/*----------------------------------------------------------------- -+ * Useful inlines. -+ *---------------------------------------------------------------*/ -+static inline int array_too_big(unsigned long fixed, unsigned long obj, -+ unsigned long num) -+{ -+ return (num > (ULONG_MAX - fixed) / obj); -+} -+ -+/* -+ * ceiling(n / size) * size -+ */ -+static inline unsigned long dm_round_up(unsigned long n, unsigned long size) -+{ -+ unsigned long r = n % size; -+ return n + (r ? (size - r) : 0); -+} -+ -+/* -+ * The device-mapper can be driven through one of two interfaces; -+ * ioctl or filesystem, depending which patch you have applied. -+ */ -+int dm_interface_init(void); -+void dm_interface_exit(void); -+ -+/* -+ * Targets for linear and striped mappings -+ */ -+int dm_linear_init(void); -+void dm_linear_exit(void); -+ -+int dm_stripe_init(void); -+void dm_stripe_exit(void); -+ -+int dm_snapshot_init(void); -+void dm_snapshot_exit(void); -+ -+#endif -diff -ruN linux-2.4.19/drivers/md/kcopyd.c linux-2.4.19-dm/drivers/md/kcopyd.c ---- linux-2.4.19/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/kcopyd.c Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,843 @@ -+/* -+ * Copyright (C) 2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "kcopyd.h" -+ -+/* FIXME: this is only needed for the DMERR macros */ -+#include "dm.h" -+ -+/* -+ * Hard sector size used all over the kernel. -+ */ -+#define SECTOR_SIZE 512 -+#define SECTOR_SHIFT 9 -+ -+static void wake_kcopyd(void); -+ -+/*----------------------------------------------------------------- -+ * We reserve our own pool of preallocated pages that are -+ * only used for kcopyd io. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * FIXME: This should be configurable. -+ */ -+#define NUM_PAGES 512 -+ -+static DECLARE_MUTEX(_pages_lock); -+static int _num_free_pages; -+static struct page *_pages_array[NUM_PAGES]; -+static DECLARE_MUTEX(start_lock); -+ -+static int init_pages(void) -+{ -+ int i; -+ struct page *p; -+ -+ for (i = 0; i < NUM_PAGES; i++) { -+ p = alloc_page(GFP_KERNEL); -+ if (!p) -+ goto bad; -+ -+ LockPage(p); -+ _pages_array[i] = p; -+ } -+ -+ _num_free_pages = NUM_PAGES; -+ return 0; -+ -+ bad: -+ while (i--) -+ __free_page(_pages_array[i]); -+ return -ENOMEM; -+} -+ -+static void exit_pages(void) -+{ -+ int i; -+ struct page *p; -+ -+ for (i = 0; i < NUM_PAGES; i++) { -+ p = _pages_array[i]; -+ UnlockPage(p); -+ __free_page(p); -+ } -+ -+ _num_free_pages = 0; -+} -+ -+static int kcopyd_get_pages(int num, struct page **result) -+{ -+ int i; -+ -+ down(&_pages_lock); -+ if (_num_free_pages < num) { -+ up(&_pages_lock); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < num; i++) { -+ _num_free_pages--; -+ result[i] = _pages_array[_num_free_pages]; -+ } -+ up(&_pages_lock); -+ -+ return 0; -+} -+ -+static void kcopyd_free_pages(int num, struct page **result) -+{ -+ int i; -+ -+ down(&_pages_lock); -+ for (i = 0; i < num; i++) -+ _pages_array[_num_free_pages++] = result[i]; -+ up(&_pages_lock); -+} -+ -+/*----------------------------------------------------------------- -+ * We keep our own private pool of buffer_heads. These are just -+ * held in a list on the b_reqnext field. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * Make sure we have enough buffers to always keep the pages -+ * occupied. So we assume the worst case scenario where blocks -+ * are the size of a single sector. -+ */ -+#define NUM_BUFFERS NUM_PAGES * (PAGE_SIZE / SECTOR_SIZE) -+ -+static spinlock_t _buffer_lock = SPIN_LOCK_UNLOCKED; -+static struct buffer_head *_all_buffers; -+static struct buffer_head *_free_buffers; -+ -+static int init_buffers(void) -+{ -+ int i; -+ struct buffer_head *buffers; -+ -+ buffers = vcalloc(NUM_BUFFERS, sizeof(struct buffer_head)); -+ if (!buffers) { -+ DMWARN("Couldn't allocate buffer heads."); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < NUM_BUFFERS; i++) { -+ if (i < NUM_BUFFERS - 1) -+ buffers[i].b_reqnext = &buffers[i + 1]; -+ init_waitqueue_head(&buffers[i].b_wait); -+ INIT_LIST_HEAD(&buffers[i].b_inode_buffers); -+ } -+ -+ _all_buffers = _free_buffers = buffers; -+ return 0; -+} -+ -+static void exit_buffers(void) -+{ -+ vfree(_all_buffers); -+} -+ -+static struct buffer_head *alloc_buffer(void) -+{ -+ struct buffer_head *r; -+ int flags; -+ -+ spin_lock_irqsave(&_buffer_lock, flags); -+ -+ if (!_free_buffers) -+ r = NULL; -+ else { -+ r = _free_buffers; -+ _free_buffers = _free_buffers->b_reqnext; -+ r->b_reqnext = NULL; -+ } -+ -+ spin_unlock_irqrestore(&_buffer_lock, flags); -+ -+ return r; -+} -+ -+/* -+ * Only called from interrupt context. -+ */ -+static void free_buffer(struct buffer_head *bh) -+{ -+ int flags, was_empty; -+ -+ spin_lock_irqsave(&_buffer_lock, flags); -+ was_empty = (_free_buffers == NULL) ? 1 : 0; -+ bh->b_reqnext = _free_buffers; -+ _free_buffers = bh; -+ spin_unlock_irqrestore(&_buffer_lock, flags); -+ -+ /* -+ * If the buffer list was empty then kcopyd probably went -+ * to sleep because it ran out of buffer heads, so let's -+ * wake it up. -+ */ -+ if (was_empty) -+ wake_kcopyd(); -+} -+ -+/*----------------------------------------------------------------- -+ * kcopyd_jobs need to be allocated by the *clients* of kcopyd, -+ * for this reason we use a mempool to prevent the client from -+ * ever having to do io (which could cause a -+ * deadlock). -+ *---------------------------------------------------------------*/ -+#define MIN_JOBS NUM_PAGES -+ -+static kmem_cache_t *_job_cache = NULL; -+static mempool_t *_job_pool = NULL; -+ -+/* -+ * We maintain three lists of jobs: -+ * -+ * i) jobs waiting for pages -+ * ii) jobs that have pages, and are waiting for the io to be issued. -+ * iii) jobs that have completed. -+ * -+ * All three of these are protected by job_lock. -+ */ -+ -+static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED; -+ -+static LIST_HEAD(_complete_jobs); -+static LIST_HEAD(_io_jobs); -+static LIST_HEAD(_pages_jobs); -+ -+static int init_jobs(void) -+{ -+ INIT_LIST_HEAD(&_complete_jobs); -+ INIT_LIST_HEAD(&_io_jobs); -+ INIT_LIST_HEAD(&_pages_jobs); -+ -+ _job_cache = kmem_cache_create("kcopyd-jobs", sizeof(struct kcopyd_job), -+ __alignof__(struct kcopyd_job), -+ 0, NULL, NULL); -+ if (!_job_cache) -+ return -ENOMEM; -+ -+ _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab, -+ mempool_free_slab, _job_cache); -+ if (!_job_pool) { -+ kmem_cache_destroy(_job_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static void exit_jobs(void) -+{ -+ mempool_destroy(_job_pool); -+ kmem_cache_destroy(_job_cache); -+} -+ -+struct kcopyd_job *kcopyd_alloc_job(void) -+{ -+ struct kcopyd_job *job; -+ -+ job = mempool_alloc(_job_pool, GFP_NOIO); -+ if (!job) -+ return NULL; -+ -+ memset(job, 0, sizeof(*job)); -+ return job; -+} -+ -+void kcopyd_free_job(struct kcopyd_job *job) -+{ -+ mempool_free(job, _job_pool); -+} -+ -+/* -+ * Functions to push and pop a job onto the head of a given job -+ * list. -+ */ -+static inline struct kcopyd_job *pop(struct list_head *jobs) -+{ -+ struct kcopyd_job *job = NULL; -+ int flags; -+ -+ spin_lock_irqsave(&_job_lock, flags); -+ -+ if (!list_empty(jobs)) { -+ job = list_entry(jobs->next, struct kcopyd_job, list); -+ list_del(&job->list); -+ } -+ spin_unlock_irqrestore(&_job_lock, flags); -+ -+ return job; -+} -+ -+static inline void push(struct list_head *jobs, struct kcopyd_job *job) -+{ -+ int flags; -+ -+ spin_lock_irqsave(&_job_lock, flags); -+ list_add(&job->list, jobs); -+ spin_unlock_irqrestore(&_job_lock, flags); -+} -+ -+/* -+ * Completion function for one of our buffers. -+ */ -+static void end_bh(struct buffer_head *bh, int uptodate) -+{ -+ struct kcopyd_job *job = bh->b_private; -+ -+ mark_buffer_uptodate(bh, uptodate); -+ unlock_buffer(bh); -+ -+ if (!uptodate) -+ job->err = -EIO; -+ -+ /* are we the last ? */ -+ if (atomic_dec_and_test(&job->nr_incomplete)) { -+ push(&_complete_jobs, job); -+ wake_kcopyd(); -+ } -+ -+ free_buffer(bh); -+} -+ -+static void dispatch_bh(struct kcopyd_job *job, -+ struct buffer_head *bh, int block) -+{ -+ int p; -+ -+ /* -+ * Add in the job offset -+ */ -+ bh->b_blocknr = (job->disk.sector >> job->block_shift) + block; -+ -+ p = block >> job->bpp_shift; -+ block &= job->bpp_mask; -+ -+ bh->b_dev = B_FREE; -+ bh->b_size = job->block_size; -+ set_bh_page(bh, job->pages[p], ((block << job->block_shift) + -+ job->offset) << SECTOR_SHIFT); -+ bh->b_this_page = bh; -+ -+ init_buffer(bh, end_bh, job); -+ -+ bh->b_dev = job->disk.dev; -+ bh->b_state = ((1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req)); -+ -+ set_bit(BH_Uptodate, &bh->b_state); -+ if (job->rw == WRITE) -+ clear_bit(BH_Dirty, &bh->b_state); -+ -+ submit_bh(job->rw, bh); -+} -+ -+/* -+ * These three functions process 1 item from the corresponding -+ * job list. -+ * -+ * They return: -+ * < 0: error -+ * 0: success -+ * > 0: can't process yet. -+ */ -+static int run_complete_job(struct kcopyd_job *job) -+{ -+ job->callback(job); -+ return 0; -+} -+ -+/* -+ * Request io on as many buffer heads as we can currently get for -+ * a particular job. -+ */ -+static int run_io_job(struct kcopyd_job *job) -+{ -+ unsigned int block; -+ struct buffer_head *bh; -+ -+ for (block = atomic_read(&job->nr_requested); -+ block < job->nr_blocks; block++) { -+ bh = alloc_buffer(); -+ if (!bh) -+ break; -+ -+ atomic_inc(&job->nr_requested); -+ dispatch_bh(job, bh, block); -+ } -+ -+ return (block == job->nr_blocks) ? 0 : 1; -+} -+ -+static int run_pages_job(struct kcopyd_job *job) -+{ -+ int r; -+ -+ job->nr_pages = (job->disk.count + job->offset) / -+ (PAGE_SIZE / SECTOR_SIZE); -+ r = kcopyd_get_pages(job->nr_pages, job->pages); -+ -+ if (!r) { -+ /* this job is ready for io */ -+ push(&_io_jobs, job); -+ return 0; -+ } -+ -+ if (r == -ENOMEM) -+ /* can complete now */ -+ return 1; -+ -+ return r; -+} -+ -+/* -+ * Run through a list for as long as possible. Returns the count -+ * of successful jobs. -+ */ -+static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) -+{ -+ struct kcopyd_job *job; -+ int r, count = 0; -+ -+ while ((job = pop(jobs))) { -+ -+ r = fn(job); -+ -+ if (r < 0) { -+ /* error this rogue job */ -+ job->err = r; -+ push(&_complete_jobs, job); -+ break; -+ } -+ -+ if (r > 0) { -+ /* -+ * We couldn't service this job ATM, so -+ * push this job back onto the list. -+ */ -+ push(jobs, job); -+ break; -+ } -+ -+ count++; -+ } -+ -+ return count; -+} -+ -+/* -+ * kcopyd does this every time it's woken up. -+ */ -+static void do_work(void) -+{ -+ int count; -+ -+ /* -+ * We loop round until there is no more work to do. -+ */ -+ do { -+ count = process_jobs(&_complete_jobs, run_complete_job); -+ count += process_jobs(&_io_jobs, run_io_job); -+ count += process_jobs(&_pages_jobs, run_pages_job); -+ -+ } while (count); -+ -+ run_task_queue(&tq_disk); -+} -+ -+/*----------------------------------------------------------------- -+ * The daemon -+ *---------------------------------------------------------------*/ -+static atomic_t _kcopyd_must_die; -+static DECLARE_MUTEX(_run_lock); -+static DECLARE_WAIT_QUEUE_HEAD(_job_queue); -+ -+static int kcopyd(void *arg) -+{ -+ DECLARE_WAITQUEUE(wq, current); -+ -+ daemonize(); -+ strcpy(current->comm, "kcopyd"); -+ atomic_set(&_kcopyd_must_die, 0); -+ -+ add_wait_queue(&_job_queue, &wq); -+ -+ down(&_run_lock); -+ up(&start_lock); -+ -+ while (1) { -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ if (atomic_read(&_kcopyd_must_die)) -+ break; -+ -+ do_work(); -+ schedule(); -+ } -+ -+ set_current_state(TASK_RUNNING); -+ remove_wait_queue(&_job_queue, &wq); -+ -+ up(&_run_lock); -+ -+ return 0; -+} -+ -+static int start_daemon(void) -+{ -+ static pid_t pid = 0; -+ -+ down(&start_lock); -+ -+ pid = kernel_thread(kcopyd, NULL, 0); -+ if (pid <= 0) { -+ DMERR("Failed to start kcopyd thread"); -+ return -EAGAIN; -+ } -+ -+ /* -+ * wait for the daemon to up this mutex. -+ */ -+ down(&start_lock); -+ up(&start_lock); -+ -+ return 0; -+} -+ -+static int stop_daemon(void) -+{ -+ atomic_set(&_kcopyd_must_die, 1); -+ wake_kcopyd(); -+ down(&_run_lock); -+ up(&_run_lock); -+ -+ return 0; -+} -+ -+static void wake_kcopyd(void) -+{ -+ wake_up_interruptible(&_job_queue); -+} -+ -+static int calc_shift(unsigned int n) -+{ -+ int s; -+ -+ for (s = 0; n; s++, n >>= 1) -+ ; -+ -+ return --s; -+} -+ -+static void calc_block_sizes(struct kcopyd_job *job) -+{ -+ job->block_size = get_hardsect_size(job->disk.dev); -+ job->block_shift = calc_shift(job->block_size / SECTOR_SIZE); -+ job->bpp_shift = PAGE_SHIFT - job->block_shift - SECTOR_SHIFT; -+ job->bpp_mask = (1 << job->bpp_shift) - 1; -+ job->nr_blocks = job->disk.count >> job->block_shift; -+ atomic_set(&job->nr_requested, 0); -+ atomic_set(&job->nr_incomplete, job->nr_blocks); -+} -+ -+int kcopyd_io(struct kcopyd_job *job) -+{ -+ calc_block_sizes(job); -+ push(job->pages[0] ? &_io_jobs : &_pages_jobs, job); -+ wake_kcopyd(); -+ return 0; -+} -+ -+/*----------------------------------------------------------------- -+ * The copier is implemented on top of the simpler async io -+ * daemon above. -+ *---------------------------------------------------------------*/ -+struct copy_info { -+ kcopyd_notify_fn notify; -+ void *notify_context; -+ -+ struct kcopyd_region to; -+}; -+ -+#define MIN_INFOS 128 -+static kmem_cache_t *_copy_cache = NULL; -+static mempool_t *_copy_pool = NULL; -+ -+static int init_copier(void) -+{ -+ _copy_cache = kmem_cache_create("kcopyd-info", -+ sizeof(struct copy_info), -+ __alignof__(struct copy_info), -+ 0, NULL, NULL); -+ if (!_copy_cache) -+ return -ENOMEM; -+ -+ _copy_pool = mempool_create(MIN_INFOS, mempool_alloc_slab, -+ mempool_free_slab, _copy_cache); -+ if (!_copy_pool) { -+ kmem_cache_destroy(_copy_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static void exit_copier(void) -+{ -+ if (_copy_pool) -+ mempool_destroy(_copy_pool); -+ -+ if (_copy_cache) -+ kmem_cache_destroy(_copy_cache); -+} -+ -+static inline struct copy_info *alloc_copy_info(void) -+{ -+ return mempool_alloc(_copy_pool, GFP_NOIO); -+} -+ -+static inline void free_copy_info(struct copy_info *info) -+{ -+ mempool_free(info, _copy_pool); -+} -+ -+void copy_complete(struct kcopyd_job *job) -+{ -+ struct copy_info *info = (struct copy_info *) job->context; -+ -+ if (info->notify) -+ info->notify(job->err, info->notify_context); -+ -+ free_copy_info(info); -+ -+ kcopyd_free_pages(job->nr_pages, job->pages); -+ -+ kcopyd_free_job(job); -+} -+ -+static void page_write_complete(struct kcopyd_job *job) -+{ -+ struct copy_info *info = (struct copy_info *) job->context; -+ int i; -+ -+ if (info->notify) -+ info->notify(job->err, info->notify_context); -+ -+ free_copy_info(info); -+ for (i = 0; i < job->nr_pages; i++) -+ put_page(job->pages[i]); -+ -+ kcopyd_free_job(job); -+} -+ -+/* -+ * These callback functions implement the state machine that copies regions. -+ */ -+void copy_write(struct kcopyd_job *job) -+{ -+ struct copy_info *info = (struct copy_info *) job->context; -+ -+ if (job->err && info->notify) { -+ info->notify(job->err, job->context); -+ kcopyd_free_job(job); -+ free_copy_info(info); -+ return; -+ } -+ -+ job->rw = WRITE; -+ memcpy(&job->disk, &info->to, sizeof(job->disk)); -+ job->callback = copy_complete; -+ job->context = info; -+ -+ /* -+ * Queue the write. -+ */ -+ kcopyd_io(job); -+} -+ -+int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages, -+ struct page **pages, int offset, kcopyd_notify_fn fn, -+ void *context) -+{ -+ struct copy_info *info; -+ struct kcopyd_job *job; -+ int i; -+ -+ /* -+ * Allocate a new copy_info. -+ */ -+ info = alloc_copy_info(); -+ if (!info) -+ return -ENOMEM; -+ -+ job = kcopyd_alloc_job(); -+ if (!job) { -+ free_copy_info(info); -+ return -ENOMEM; -+ } -+ -+ /* -+ * set up for the write. -+ */ -+ info->notify = fn; -+ info->notify_context = context; -+ memcpy(&info->to, to, sizeof(*to)); -+ -+ /* Get the pages */ -+ job->nr_pages = nr_pages; -+ for (i = 0; i < nr_pages; i++) { -+ get_page(pages[i]); -+ job->pages[i] = pages[i]; -+ } -+ -+ job->rw = WRITE; -+ -+ memcpy(&job->disk, &info->to, sizeof(job->disk)); -+ job->offset = offset; -+ calc_block_sizes(job); -+ job->callback = page_write_complete; -+ job->context = info; -+ -+ /* -+ * Trigger job. -+ */ -+ kcopyd_io(job); -+ return 0; -+} -+ -+int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to, -+ kcopyd_notify_fn fn, void *context) -+{ -+ struct copy_info *info; -+ struct kcopyd_job *job; -+ -+ /* -+ * Allocate a new copy_info. -+ */ -+ info = alloc_copy_info(); -+ if (!info) -+ return -ENOMEM; -+ -+ job = kcopyd_alloc_job(); -+ if (!job) { -+ free_copy_info(info); -+ return -ENOMEM; -+ } -+ -+ /* -+ * set up for the read. -+ */ -+ info->notify = fn; -+ info->notify_context = context; -+ memcpy(&info->to, to, sizeof(*to)); -+ -+ job->rw = READ; -+ memcpy(&job->disk, from, sizeof(*from)); -+ -+ job->offset = 0; -+ calc_block_sizes(job); -+ job->callback = copy_write; -+ job->context = info; -+ -+ /* -+ * Trigger job. -+ */ -+ kcopyd_io(job); -+ return 0; -+} -+ -+/*----------------------------------------------------------------- -+ * Unit setup -+ *---------------------------------------------------------------*/ -+static struct { -+ int (*init) (void); -+ void (*exit) (void); -+ -+} _inits[] = { -+#define xx(n) { init_ ## n, exit_ ## n} -+ xx(pages), -+ xx(buffers), -+ xx(jobs), -+ xx(copier) -+#undef xx -+}; -+ -+static int _client_count = 0; -+static DECLARE_MUTEX(_client_count_sem); -+ -+static int kcopyd_init(void) -+{ -+ const int count = sizeof(_inits) / sizeof(*_inits); -+ -+ int r, i; -+ -+ for (i = 0; i < count; i++) { -+ r = _inits[i].init(); -+ if (r) -+ goto bad; -+ } -+ -+ start_daemon(); -+ return 0; -+ -+ bad: -+ while (i--) -+ _inits[i].exit(); -+ -+ return r; -+} -+ -+static void kcopyd_exit(void) -+{ -+ int i = sizeof(_inits) / sizeof(*_inits); -+ -+ if (stop_daemon()) -+ DMWARN("Couldn't stop kcopyd."); -+ -+ while (i--) -+ _inits[i].exit(); -+} -+ -+void kcopyd_inc_client_count(void) -+{ -+ /* -+ * What I need here is an atomic_test_and_inc that returns -+ * the previous value of the atomic... In its absence I lock -+ * an int with a semaphore. :-( -+ */ -+ down(&_client_count_sem); -+ if (_client_count == 0) -+ kcopyd_init(); -+ _client_count++; -+ -+ up(&_client_count_sem); -+} -+ -+void kcopyd_dec_client_count(void) -+{ -+ down(&_client_count_sem); -+ if (--_client_count == 0) -+ kcopyd_exit(); -+ -+ up(&_client_count_sem); -+} -diff -ruN linux-2.4.19/drivers/md/kcopyd.h linux-2.4.19-dm/drivers/md/kcopyd.h ---- linux-2.4.19/drivers/md/kcopyd.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/drivers/md/kcopyd.h Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,101 @@ -+/* -+ * Copyright (C) 2001 Sistina Software -+ * -+ * This file is released under the GPL. -+ */ -+ -+#ifndef DM_KCOPYD_H -+#define DM_KCOPYD_H -+ -+/* -+ * Needed for the definition of offset_t. -+ */ -+#include -+#include -+ -+struct kcopyd_region { -+ kdev_t dev; -+ sector_t sector; -+ sector_t count; -+}; -+ -+#define MAX_KCOPYD_PAGES 128 -+ -+struct kcopyd_job { -+ struct list_head list; -+ -+ /* -+ * Error state of the job. -+ */ -+ int err; -+ -+ /* -+ * Either READ or WRITE -+ */ -+ int rw; -+ -+ /* -+ * The source or destination for the transfer. -+ */ -+ struct kcopyd_region disk; -+ -+ int nr_pages; -+ struct page *pages[MAX_KCOPYD_PAGES]; -+ -+ /* -+ * Shifts and masks that will be useful when dispatching -+ * each buffer_head. -+ */ -+ sector_t offset; -+ sector_t block_size; -+ sector_t block_shift; -+ sector_t bpp_shift; /* blocks per page */ -+ sector_t bpp_mask; -+ -+ /* -+ * nr_blocks is how many buffer heads will have to be -+ * displatched to service this job, nr_requested is how -+ * many have been dispatched and nr_complete is how many -+ * have come back. -+ */ -+ unsigned int nr_blocks; -+ atomic_t nr_requested; -+ atomic_t nr_incomplete; -+ -+ /* -+ * Set this to ensure you are notified when the job has -+ * completed. 'context' is for callback to use. -+ */ -+ void (*callback) (struct kcopyd_job *job); -+ void *context; -+}; -+ -+/* -+ * Low level async io routines. -+ */ -+struct kcopyd_job *kcopyd_alloc_job(void); -+void kcopyd_free_job(struct kcopyd_job *job); -+ -+int kcopyd_queue_job(struct kcopyd_job *job); -+ -+/* -+ * Submit a copy job to kcopyd. This is built on top of the -+ * previous three fns. -+ */ -+typedef void (*kcopyd_notify_fn) (int err, void *context); -+ -+int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to, -+ kcopyd_notify_fn fn, void *context); -+ -+int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages, -+ struct page **pages, int offset, kcopyd_notify_fn fn, -+ void *context); -+ -+/* -+ * We only want kcopyd to reserve resources if someone is -+ * actually using it. -+ */ -+void kcopyd_inc_client_count(void); -+void kcopyd_dec_client_count(void); -+ -+#endif -diff -ruN linux-2.4.19/fs/buffer.c linux-2.4.19-dm/fs/buffer.c ---- linux-2.4.19/fs/buffer.c Wed Aug 14 11:51:40 2002 -+++ linux-2.4.19-dm/fs/buffer.c Thu Nov 21 14:31:23 2002 -@@ -587,9 +587,10 @@ - void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode) - { - spin_lock(&lru_list_lock); -- if (bh->b_inode) -+ if (buffer_inode(bh)) - list_del(&bh->b_inode_buffers); -- bh->b_inode = inode; -+ else -+ set_buffer_inode(bh); - list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers); - spin_unlock(&lru_list_lock); - } -@@ -597,9 +598,10 @@ - void buffer_insert_inode_data_queue(struct buffer_head *bh, struct inode *inode) - { - spin_lock(&lru_list_lock); -- if (bh->b_inode) -+ if (buffer_inode(bh)) - list_del(&bh->b_inode_buffers); -- bh->b_inode = inode; -+ else -+ set_buffer_inode(bh); - list_add(&bh->b_inode_buffers, &inode->i_dirty_data_buffers); - spin_unlock(&lru_list_lock); - } -@@ -608,13 +610,13 @@ - remove_inode_queue functions. */ - static void __remove_inode_queue(struct buffer_head *bh) - { -- bh->b_inode = NULL; -+ clear_buffer_inode(bh); - list_del(&bh->b_inode_buffers); - } - - static inline void remove_inode_queue(struct buffer_head *bh) - { -- if (bh->b_inode) -+ if (buffer_inode(bh)) - __remove_inode_queue(bh); - } - -@@ -746,6 +748,7 @@ - bh->b_list = BUF_CLEAN; - bh->b_end_io = handler; - bh->b_private = private; -+ bh->b_journal_head = NULL; - } - - static void end_buffer_io_async(struct buffer_head * bh, int uptodate) -@@ -843,9 +846,9 @@ - bh = BH_ENTRY(list->next); - list_del(&bh->b_inode_buffers); - if (!buffer_dirty(bh) && !buffer_locked(bh)) -- bh->b_inode = NULL; -+ clear_buffer_inode(bh); - else { -- bh->b_inode = &tmp; -+ set_buffer_inode(bh); - list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers); - if (buffer_dirty(bh)) { - get_bh(bh); -@@ -1138,7 +1141,7 @@ - */ - static void __put_unused_buffer_head(struct buffer_head * bh) - { -- if (bh->b_inode) -+ if (buffer_inode(bh)) - BUG(); - if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) { - kmem_cache_free(bh_cachep, bh); -diff -ruN linux-2.4.19/fs/jbd/journal.c linux-2.4.19-dm/fs/jbd/journal.c ---- linux-2.4.19/fs/jbd/journal.c Wed Aug 14 11:51:43 2002 -+++ linux-2.4.19-dm/fs/jbd/journal.c Thu Nov 21 14:31:23 2002 -@@ -1625,8 +1625,8 @@ - * - * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit - * is set. This bit is tested in core kernel code where we need to take -- * JBD-specific actions. Testing the zeroness of ->b_private is not reliable -- * there. -+ * JBD-specific actions. Testing the zeroness of ->b_journal_head is not -+ * reliable there. - * - * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. - * -@@ -1681,9 +1681,9 @@ - - if (buffer_jbd(bh)) { - /* Someone did it for us! */ -- J_ASSERT_BH(bh, bh->b_private != NULL); -+ J_ASSERT_BH(bh, bh->b_journal_head != NULL); - journal_free_journal_head(jh); -- jh = bh->b_private; -+ jh = bh->b_journal_head; - } else { - /* - * We actually don't need jh_splice_lock when -@@ -1691,7 +1691,7 @@ - */ - spin_lock(&jh_splice_lock); - set_bit(BH_JBD, &bh->b_state); -- bh->b_private = jh; -+ bh->b_journal_head = jh; - jh->b_bh = bh; - atomic_inc(&bh->b_count); - spin_unlock(&jh_splice_lock); -@@ -1700,7 +1700,7 @@ - } - jh->b_jcount++; - spin_unlock(&journal_datalist_lock); -- return bh->b_private; -+ return bh->b_journal_head; - } - - /* -@@ -1733,7 +1733,7 @@ - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - spin_lock(&jh_splice_lock); -- bh->b_private = NULL; -+ bh->b_journal_head = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_bit(BH_JBD, &bh->b_state); - __brelse(bh); -diff -ruN linux-2.4.19/include/linux/device-mapper.h linux-2.4.19-dm/include/linux/device-mapper.h ---- linux-2.4.19/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/include/linux/device-mapper.h Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,85 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef _LINUX_DEVICE_MAPPER_H -+#define _LINUX_DEVICE_MAPPER_H -+ -+#ifdef __KERNEL__ -+ -+typedef unsigned long sector_t; -+ -+struct dm_target; -+struct dm_table; -+struct dm_dev; -+ -+typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; -+ -+/* -+ * In the constructor the target parameter will already have the -+ * table, type, begin and len fields filled in. -+ */ -+typedef int (*dm_ctr_fn) (struct dm_target *target, int argc, char **argv); -+ -+/* -+ * The destructor doesn't need to free the dm_target, just -+ * anything hidden ti->private. -+ */ -+typedef void (*dm_dtr_fn) (struct dm_target *ti); -+ -+/* -+ * The map function must return: -+ * < 0: error -+ * = 0: The target will handle the io by resubmitting it later -+ * > 0: simple remap complete -+ */ -+typedef int (*dm_map_fn) (struct dm_target *ti, struct buffer_head *bh, int rw); -+typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, -+ char *result, int maxlen); -+ -+void dm_error(const char *message); -+ -+/* -+ * Constructors should call these functions to ensure destination devices -+ * are opened/closed correctly. -+ * FIXME: too many arguments. -+ */ -+int dm_get_device(struct dm_target *ti, const char *path, sector_t start, -+ sector_t len, int mode, struct dm_dev **result); -+void dm_put_device(struct dm_target *ti, struct dm_dev *d); -+ -+/* -+ * Information about a target type -+ */ -+struct target_type { -+ const char *name; -+ struct module *module; -+ dm_ctr_fn ctr; -+ dm_dtr_fn dtr; -+ dm_map_fn map; -+ dm_status_fn status; -+}; -+ -+struct dm_target { -+ struct dm_table *table; -+ struct target_type *type; -+ -+ /* target limits */ -+ sector_t begin; -+ sector_t len; -+ -+ /* target specific data */ -+ void *private; -+ -+ /* Used to provide an error string from the ctr */ -+ char *error; -+}; -+ -+int dm_register_target(struct target_type *t); -+int dm_unregister_target(struct target_type *t); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _LINUX_DEVICE_MAPPER_H */ -diff -ruN linux-2.4.19/include/linux/dm-ioctl.h linux-2.4.19-dm/include/linux/dm-ioctl.h ---- linux-2.4.19/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/include/linux/dm-ioctl.h Thu Nov 21 14:31:09 2002 -@@ -0,0 +1,149 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef _LINUX_DM_IOCTL_H -+#define _LINUX_DM_IOCTL_H -+ -+#include -+ -+#define DM_DIR "mapper" /* Slashes not supported */ -+#define DM_MAX_TYPE_NAME 16 -+#define DM_NAME_LEN 128 -+#define DM_UUID_LEN 129 -+ -+/* -+ * Implements a traditional ioctl interface to the device mapper. -+ */ -+ -+/* -+ * All ioctl arguments consist of a single chunk of memory, with -+ * this structure at the start. If a uuid is specified any -+ * lookup (eg. for a DM_INFO) will be done on that, *not* the -+ * name. -+ */ -+struct dm_ioctl { -+ /* -+ * The version number is made up of three parts: -+ * major - no backward or forward compatibility, -+ * minor - only backwards compatible, -+ * patch - both backwards and forwards compatible. -+ * -+ * All clients of the ioctl interface should fill in the -+ * version number of the interface that they were -+ * compiled with. -+ * -+ * All recognised ioctl commands (ie. those that don't -+ * return -ENOTTY) fill out this field, even if the -+ * command failed. -+ */ -+ uint32_t version[3]; /* in/out */ -+ uint32_t data_size; /* total size of data passed in -+ * including this struct */ -+ -+ uint32_t data_start; /* offset to start of data -+ * relative to start of this struct */ -+ -+ uint32_t target_count; /* in/out */ -+ uint32_t open_count; /* out */ -+ uint32_t flags; /* in/out */ -+ -+ __kernel_dev_t dev; /* in/out */ -+ -+ char name[DM_NAME_LEN]; /* device name */ -+ char uuid[DM_UUID_LEN]; /* unique identifier for -+ * the block device */ -+}; -+ -+/* -+ * Used to specify tables. These structures appear after the -+ * dm_ioctl. -+ */ -+struct dm_target_spec { -+ int32_t status; /* used when reading from kernel only */ -+ uint64_t sector_start; -+ uint32_t length; -+ -+ /* -+ * Offset in bytes (from the start of this struct) to -+ * next target_spec. -+ */ -+ uint32_t next; -+ -+ char target_type[DM_MAX_TYPE_NAME]; -+ -+ /* -+ * Parameter string starts immediately after this object. -+ * Be careful to add padding after string to ensure correct -+ * alignment of subsequent dm_target_spec. -+ */ -+}; -+ -+/* -+ * Used to retrieve the target dependencies. -+ */ -+struct dm_target_deps { -+ uint32_t count; -+ -+ __kernel_dev_t dev[0]; /* out */ -+}; -+ -+/* -+ * If you change this make sure you make the corresponding change -+ * to dm-ioctl.c:lookup_ioctl() -+ */ -+enum { -+ /* Top level cmds */ -+ DM_VERSION_CMD = 0, -+ DM_REMOVE_ALL_CMD, -+ -+ /* device level cmds */ -+ DM_DEV_CREATE_CMD, -+ DM_DEV_REMOVE_CMD, -+ DM_DEV_RELOAD_CMD, -+ DM_DEV_RENAME_CMD, -+ DM_DEV_SUSPEND_CMD, -+ DM_DEV_DEPS_CMD, -+ DM_DEV_STATUS_CMD, -+ -+ /* target level cmds */ -+ DM_TARGET_STATUS_CMD, -+ DM_TARGET_WAIT_CMD -+}; -+ -+#define DM_IOCTL 0xfd -+ -+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) -+#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) -+ -+#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) -+#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl) -+#define DM_DEV_RELOAD _IOWR(DM_IOCTL, DM_DEV_RELOAD_CMD, struct dm_ioctl) -+#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl) -+#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl) -+#define DM_DEV_DEPS _IOWR(DM_IOCTL, DM_DEV_DEPS_CMD, struct dm_ioctl) -+#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) -+ -+#define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl) -+#define DM_TARGET_WAIT _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl) -+ -+#define DM_VERSION_MAJOR 1 -+#define DM_VERSION_MINOR 0 -+#define DM_VERSION_PATCHLEVEL 8 -+#define DM_VERSION_EXTRA "-ioctl (2002-11-21)" -+ -+/* Status bits */ -+#define DM_READONLY_FLAG 0x00000001 -+#define DM_SUSPEND_FLAG 0x00000002 -+#define DM_EXISTS_FLAG 0x00000004 -+#define DM_PERSISTENT_DEV_FLAG 0x00000008 -+ -+/* -+ * Flag passed into ioctl STATUS command to get table information -+ * rather than current status. -+ */ -+#define DM_STATUS_TABLE_FLAG 0x00000010 -+ -+#endif /* _LINUX_DM_IOCTL_H */ -diff -ruN linux-2.4.19/include/linux/fs.h linux-2.4.19-dm/include/linux/fs.h ---- linux-2.4.19/include/linux/fs.h Wed Aug 14 11:52:06 2002 -+++ linux-2.4.19-dm/include/linux/fs.h Thu Nov 21 14:31:23 2002 -@@ -219,6 +219,7 @@ - BH_Wait_IO, /* 1 if we should write out this buffer */ - BH_Launder, /* 1 if we can throttle on this buffer */ - BH_JBD, /* 1 if it has an attached journal_head */ -+ BH_Inode, /* 1 if it is attached to i_dirty[_data]_buffers */ - - BH_PrivateStart,/* not a state bit, but the first bit available - * for private allocation by other entities -@@ -261,11 +262,10 @@ - struct page *b_page; /* the page this bh is mapped to */ - void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ - void *b_private; /* reserved for b_end_io */ -- -+ void *b_journal_head; /* ext3 journal_heads */ - unsigned long b_rsector; /* Real buffer location on disk */ - wait_queue_head_t b_wait; - -- struct inode * b_inode; - struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ - }; - -@@ -1179,6 +1179,21 @@ - set_bit(BH_Async, &bh->b_state); - else - clear_bit(BH_Async, &bh->b_state); -+} -+ -+static inline void set_buffer_inode(struct buffer_head *bh) -+{ -+ set_bit(BH_Inode, &bh->b_state); -+} -+ -+static inline void clear_buffer_inode(struct buffer_head *bh) -+{ -+ clear_bit(BH_Inode, &bh->b_state); -+} -+ -+static inline int buffer_inode(struct buffer_head *bh) -+{ -+ return test_bit(BH_Inode, &bh->b_state); - } - - /* -diff -ruN linux-2.4.19/include/linux/jbd.h linux-2.4.19-dm/include/linux/jbd.h ---- linux-2.4.19/include/linux/jbd.h Wed Aug 14 11:52:07 2002 -+++ linux-2.4.19-dm/include/linux/jbd.h Thu Nov 21 14:31:23 2002 -@@ -246,7 +246,7 @@ - - static inline struct journal_head *bh2jh(struct buffer_head *bh) - { -- return bh->b_private; -+ return bh->b_journal_head; - } - - struct jbd_revoke_table_s; -diff -ruN linux-2.4.19/include/linux/mempool.h linux-2.4.19-dm/include/linux/mempool.h ---- linux-2.4.19/include/linux/mempool.h Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/include/linux/mempool.h Thu Nov 21 14:31:05 2002 -@@ -0,0 +1,30 @@ -+/* -+ * memory buffer pool support -+ */ -+#ifndef _LINUX_MEMPOOL_H -+#define _LINUX_MEMPOOL_H -+ -+#include -+#include -+ -+struct mempool_s; -+typedef struct mempool_s mempool_t; -+ -+typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data); -+typedef void (mempool_free_t)(void *element, void *pool_data); -+ -+extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, -+ mempool_free_t *free_fn, void *pool_data); -+extern int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask); -+extern void mempool_destroy(mempool_t *pool); -+extern void * mempool_alloc(mempool_t *pool, int gfp_mask); -+extern void mempool_free(void *element, mempool_t *pool); -+ -+/* -+ * A mempool_alloc_t and mempool_free_t that get the memory from -+ * a slab that is passed in through pool_data. -+ */ -+void *mempool_alloc_slab(int gfp_mask, void *pool_data); -+void mempool_free_slab(void *element, void *pool_data); -+ -+#endif /* _LINUX_MEMPOOL_H */ -diff -ruN linux-2.4.19/include/linux/vmalloc.h linux-2.4.19-dm/include/linux/vmalloc.h ---- linux-2.4.19/include/linux/vmalloc.h Wed Aug 14 11:52:09 2002 -+++ linux-2.4.19-dm/include/linux/vmalloc.h Thu Nov 21 14:31:19 2002 -@@ -25,6 +25,7 @@ - extern void vmfree_area_pages(unsigned long address, unsigned long size); - extern int vmalloc_area_pages(unsigned long address, unsigned long size, - int gfp_mask, pgprot_t prot); -+extern void *vcalloc(unsigned long nmemb, unsigned long elem_size); - - /* - * Allocate any pages -diff -ruN linux-2.4.19/kernel/ksyms.c linux-2.4.19-dm/kernel/ksyms.c ---- linux-2.4.19/kernel/ksyms.c Wed Aug 14 11:52:12 2002 -+++ linux-2.4.19-dm/kernel/ksyms.c Thu Nov 21 14:31:19 2002 -@@ -109,6 +109,7 @@ - EXPORT_SYMBOL(vfree); - EXPORT_SYMBOL(__vmalloc); - EXPORT_SYMBOL(vmalloc_to_page); -+EXPORT_SYMBOL(vcalloc); - EXPORT_SYMBOL(mem_map); - EXPORT_SYMBOL(remap_page_range); - EXPORT_SYMBOL(max_mapnr); -diff -ruN linux-2.4.19/mm/Makefile linux-2.4.19-dm/mm/Makefile ---- linux-2.4.19/mm/Makefile Wed Aug 14 11:52:12 2002 -+++ linux-2.4.19-dm/mm/Makefile Thu Nov 21 14:31:01 2002 -@@ -9,12 +9,12 @@ - - O_TARGET := mm.o - --export-objs := shmem.o filemap.o memory.o page_alloc.o -+export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o - - obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ - page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \ -- shmem.o -+ shmem.o mempool.o - - obj-$(CONFIG_HIGHMEM) += highmem.o - -diff -ruN linux-2.4.19/mm/mempool.c linux-2.4.19-dm/mm/mempool.c ---- linux-2.4.19/mm/mempool.c Thu Jan 1 01:00:00 1970 -+++ linux-2.4.19-dm/mm/mempool.c Thu Nov 21 14:31:05 2002 -@@ -0,0 +1,298 @@ -+/* -+ * linux/mm/mempool.c -+ * -+ * memory buffer pool support. Such pools are mostly used -+ * for guaranteed, deadlock-free memory allocations during -+ * extreme VM load. -+ * -+ * started by Ingo Molnar, Copyright (C) 2001 -+ */ -+ -+#include -+#include -+#include -+#include -+ -+struct mempool_s { -+ spinlock_t lock; -+ int min_nr; /* nr of elements at *elements */ -+ int curr_nr; /* Current nr of elements at *elements */ -+ void **elements; -+ -+ void *pool_data; -+ mempool_alloc_t *alloc; -+ mempool_free_t *free; -+ wait_queue_head_t wait; -+}; -+ -+static void add_element(mempool_t *pool, void *element) -+{ -+ BUG_ON(pool->curr_nr >= pool->min_nr); -+ pool->elements[pool->curr_nr++] = element; -+} -+ -+static void *remove_element(mempool_t *pool) -+{ -+ BUG_ON(pool->curr_nr <= 0); -+ return pool->elements[--pool->curr_nr]; -+} -+ -+static void free_pool(mempool_t *pool) -+{ -+ while (pool->curr_nr) { -+ void *element = remove_element(pool); -+ pool->free(element, pool->pool_data); -+ } -+ kfree(pool->elements); -+ kfree(pool); -+} -+ -+/** -+ * mempool_create - create a memory pool -+ * @min_nr: the minimum number of elements guaranteed to be -+ * allocated for this pool. -+ * @alloc_fn: user-defined element-allocation function. -+ * @free_fn: user-defined element-freeing function. -+ * @pool_data: optional private data available to the user-defined functions. -+ * -+ * this function creates and allocates a guaranteed size, preallocated -+ * memory pool. The pool can be used from the mempool_alloc and mempool_free -+ * functions. This function might sleep. Both the alloc_fn() and the free_fn() -+ * functions might sleep - as long as the mempool_alloc function is not called -+ * from IRQ contexts. -+ */ -+mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, -+ mempool_free_t *free_fn, void *pool_data) -+{ -+ mempool_t *pool; -+ -+ pool = kmalloc(sizeof(*pool), GFP_KERNEL); -+ if (!pool) -+ return NULL; -+ memset(pool, 0, sizeof(*pool)); -+ pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL); -+ if (!pool->elements) { -+ kfree(pool); -+ return NULL; -+ } -+ spin_lock_init(&pool->lock); -+ pool->min_nr = min_nr; -+ pool->pool_data = pool_data; -+ init_waitqueue_head(&pool->wait); -+ pool->alloc = alloc_fn; -+ pool->free = free_fn; -+ -+ /* -+ * First pre-allocate the guaranteed number of buffers. -+ */ -+ while (pool->curr_nr < pool->min_nr) { -+ void *element; -+ -+ element = pool->alloc(GFP_KERNEL, pool->pool_data); -+ if (unlikely(!element)) { -+ free_pool(pool); -+ return NULL; -+ } -+ add_element(pool, element); -+ } -+ return pool; -+} -+ -+/** -+ * mempool_resize - resize an existing memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * @new_min_nr: the new minimum number of elements guaranteed to be -+ * allocated for this pool. -+ * @gfp_mask: the usual allocation bitmask. -+ * -+ * This function shrinks/grows the pool. In the case of growing, -+ * it cannot be guaranteed that the pool will be grown to the new -+ * size immediately, but new mempool_free() calls will refill it. -+ * -+ * Note, the caller must guarantee that no mempool_destroy is called -+ * while this function is running. mempool_alloc() & mempool_free() -+ * might be called (eg. from IRQ contexts) while this function executes. -+ */ -+int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask) -+{ -+ void *element; -+ void **new_elements; -+ unsigned long flags; -+ -+ BUG_ON(new_min_nr <= 0); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ if (new_min_nr < pool->min_nr) { -+ while (pool->curr_nr > new_min_nr) { -+ element = remove_element(pool); -+ spin_unlock_irqrestore(&pool->lock, flags); -+ pool->free(element, pool->pool_data); -+ spin_lock_irqsave(&pool->lock, flags); -+ } -+ pool->min_nr = new_min_nr; -+ goto out_unlock; -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ /* Grow the pool */ -+ new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); -+ if (!new_elements) -+ return -ENOMEM; -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ memcpy(new_elements, pool->elements, -+ pool->curr_nr * sizeof(*new_elements)); -+ kfree(pool->elements); -+ pool->elements = new_elements; -+ pool->min_nr = new_min_nr; -+ -+ while (pool->curr_nr < pool->min_nr) { -+ spin_unlock_irqrestore(&pool->lock, flags); -+ element = pool->alloc(gfp_mask, pool->pool_data); -+ if (!element) -+ goto out; -+ spin_lock_irqsave(&pool->lock, flags); -+ if (pool->curr_nr < pool->min_nr) -+ add_element(pool, element); -+ else -+ kfree(element); /* Raced */ -+ } -+out_unlock: -+ spin_unlock_irqrestore(&pool->lock, flags); -+out: -+ return 0; -+} -+ -+/** -+ * mempool_destroy - deallocate a memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * -+ * this function only sleeps if the free_fn() function sleeps. The caller -+ * has to guarantee that all elements have been returned to the pool (ie: -+ * freed) prior to calling mempool_destroy(). -+ */ -+void mempool_destroy(mempool_t *pool) -+{ -+ if (pool->curr_nr != pool->min_nr) -+ BUG(); /* There were outstanding elements */ -+ free_pool(pool); -+} -+ -+/** -+ * mempool_alloc - allocate an element from a specific memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * @gfp_mask: the usual allocation bitmask. -+ * -+ * this function only sleeps if the alloc_fn function sleeps or -+ * returns NULL. Note that due to preallocation, this function -+ * *never* fails when called from process contexts. (it might -+ * fail if called from an IRQ context.) -+ */ -+void * mempool_alloc(mempool_t *pool, int gfp_mask) -+{ -+ void *element; -+ unsigned long flags; -+ int curr_nr; -+ DECLARE_WAITQUEUE(wait, current); -+ int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); -+ -+repeat_alloc: -+ element = pool->alloc(gfp_nowait, pool->pool_data); -+ if (likely(element != NULL)) -+ return element; -+ -+ /* -+ * If the pool is less than 50% full then try harder -+ * to allocate an element: -+ */ -+ if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) { -+ element = pool->alloc(gfp_mask, pool->pool_data); -+ if (likely(element != NULL)) -+ return element; -+ } -+ -+ /* -+ * Kick the VM at this point. -+ */ -+ wakeup_bdflush(); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ if (likely(pool->curr_nr)) { -+ element = remove_element(pool); -+ spin_unlock_irqrestore(&pool->lock, flags); -+ return element; -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ /* We must not sleep in the GFP_ATOMIC case */ -+ if (gfp_mask == gfp_nowait) -+ return NULL; -+ -+ run_task_queue(&tq_disk); -+ -+ add_wait_queue_exclusive(&pool->wait, &wait); -+ set_task_state(current, TASK_UNINTERRUPTIBLE); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ curr_nr = pool->curr_nr; -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ if (!curr_nr) -+ schedule(); -+ -+ current->state = TASK_RUNNING; -+ remove_wait_queue(&pool->wait, &wait); -+ -+ goto repeat_alloc; -+} -+ -+/** -+ * mempool_free - return an element to the pool. -+ * @element: pool element pointer. -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * -+ * this function only sleeps if the free_fn() function sleeps. -+ */ -+void mempool_free(void *element, mempool_t *pool) -+{ -+ unsigned long flags; -+ -+ if (pool->curr_nr < pool->min_nr) { -+ spin_lock_irqsave(&pool->lock, flags); -+ if (pool->curr_nr < pool->min_nr) { -+ add_element(pool, element); -+ spin_unlock_irqrestore(&pool->lock, flags); -+ wake_up(&pool->wait); -+ return; -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ } -+ pool->free(element, pool->pool_data); -+} -+ -+/* -+ * A commonly used alloc and free fn. -+ */ -+void *mempool_alloc_slab(int gfp_mask, void *pool_data) -+{ -+ kmem_cache_t *mem = (kmem_cache_t *) pool_data; -+ return kmem_cache_alloc(mem, gfp_mask); -+} -+ -+void mempool_free_slab(void *element, void *pool_data) -+{ -+ kmem_cache_t *mem = (kmem_cache_t *) pool_data; -+ kmem_cache_free(mem, element); -+} -+ -+EXPORT_SYMBOL(mempool_create); -+EXPORT_SYMBOL(mempool_resize); -+EXPORT_SYMBOL(mempool_destroy); -+EXPORT_SYMBOL(mempool_alloc); -+EXPORT_SYMBOL(mempool_free); -+EXPORT_SYMBOL(mempool_alloc_slab); -+EXPORT_SYMBOL(mempool_free_slab); -diff -ruN linux-2.4.19/mm/vmalloc.c linux-2.4.19-dm/mm/vmalloc.c ---- linux-2.4.19/mm/vmalloc.c Wed Aug 14 11:52:13 2002 -+++ linux-2.4.19-dm/mm/vmalloc.c Thu Nov 21 14:31:19 2002 -@@ -321,3 +321,22 @@ - read_unlock(&vmlist_lock); - return buf - buf_start; - } -+ -+void *vcalloc(unsigned long nmemb, unsigned long elem_size) -+{ -+ unsigned long size; -+ void *addr; -+ -+ /* -+ * Check that we're not going to overflow. -+ */ -+ if (nmemb > (ULONG_MAX / elem_size)) -+ return NULL; -+ -+ size = nmemb * elem_size; -+ addr = vmalloc(size); -+ if (addr) -+ memset(addr, 0, size); -+ -+ return addr; -+} diff --git a/patches/linux-2.4.20-interface-v3.patch b/patches/linux-2.4.20-interface-v3.patch deleted file mode 100644 index 5efe311..0000000 --- a/patches/linux-2.4.20-interface-v3.patch +++ /dev/null @@ -1,76 +0,0 @@ ---- linux-2.4.20-dm-10/drivers/md/dm-table.c Wed Mar 26 16:50:29 2003 -+++ linux/drivers/md/dm-table.c Fri Mar 28 16:50:12 2003 -@@ -640,7 +640,7 @@ - return &t->targets[(KEYS_PER_NODE * n) + k]; - } - --unsigned int dm_table_get_num_targets(struct dm_table *t) -+int dm_table_get_num_targets(struct dm_table *t) - { - return t->num_targets; - } ---- linux-2.4.20-dm-10/drivers/md/dm.h Wed Mar 26 16:12:53 2003 -+++ linux/drivers/md/dm.h Fri Mar 28 16:42:31 2003 -@@ -100,7 +100,7 @@ - sector_t dm_table_get_size(struct dm_table *t); - struct dm_target *dm_table_get_target(struct dm_table *t, int index); - struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); --unsigned int dm_table_get_num_targets(struct dm_table *t); -+int dm_table_get_num_targets(struct dm_table *t); - struct list_head *dm_table_get_devices(struct dm_table *t); - int dm_table_get_mode(struct dm_table *t); - void dm_table_add_wait_queue(struct dm_table *t, wait_queue_t *wq); ---- linux-2.4.20-dm-10/include/linux/dm-ioctl.h Fri Mar 28 16:01:18 2003 -+++ linux/include/linux/dm-ioctl.h Fri Mar 28 16:54:02 2003 -@@ -47,10 +47,10 @@ - * relative to start of this struct */ - - uint32_t target_count; /* in/out */ -- uint32_t open_count; /* out */ -+ int32_t open_count; /* out */ - uint32_t flags; /* in/out */ - -- __kernel_dev_t dev; /* in/out */ -+ uint64_t dev; /* in/out */ - - char name[DM_NAME_LEN]; /* device name */ - char uuid[DM_UUID_LEN]; /* unique identifier for -@@ -62,9 +62,9 @@ - * dm_ioctl. - */ - struct dm_target_spec { -- int32_t status; /* used when reading from kernel only */ - uint64_t sector_start; -- uint32_t length; -+ uint64_t length; -+ int32_t status; /* used when reading from kernel only */ - - /* - * Offset in bytes (from the start of this struct) to -@@ -85,9 +85,9 @@ - * Used to retrieve the target dependencies. - */ - struct dm_target_deps { -- uint32_t count; -- -- __kernel_dev_t dev[0]; /* out */ -+ uint32_t count; /* Array size */ -+ uint32_t padding; /* unused */ -+ uint64_t dev[0]; /* out */ - }; - - /* -@@ -129,10 +129,10 @@ - #define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl) - #define DM_TARGET_WAIT _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl) - --#define DM_VERSION_MAJOR 1 -+#define DM_VERSION_MAJOR 3 - #define DM_VERSION_MINOR 0 --#define DM_VERSION_PATCHLEVEL 10 --#define DM_VERSION_EXTRA "-ioctl (2003-03-27)" -+#define DM_VERSION_PATCHLEVEL 0 -+#define DM_VERSION_EXTRA "-ioctl (2003-03-28)" - - /* Status bits */ - #define DM_READONLY_FLAG 0x00000001