From: Alasdair Kergon Date: Tue, 25 Jun 2002 22:46:22 +0000 (+0000) Subject: Patches tidied and split and regenerated against 2.4.19-rc1. X-Git-Tag: beta4~3 X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=29e757191b6ef937089ed48effc9615c7e78f05d;p=dm.git Patches tidied and split and regenerated against 2.4.19-rc1. Apply either the combined patch: linux-2.4.19-rc1-devmapper-ioctl.patch Or apply separately: common/linux-2.4.19-rc1* *-config.patch - add device-mapper option (tagged experimental) *-mempool.patch - sct's backport *-mempool_slab.patch - a couple more functions *-vcalloc.patch - a calloc implementation (with overflow check) *-b_bdev_private.patch - add a private b_private (avoids ext3 conflict) *-devmapper_1_core.patch - the core driver *-devmapper_2_ioctl.patch - ioctl interface to driver *-devmapper_3_basic_mappings.patch - linear and striped mappings *-devmapper_4_snapshots.patch - snapshot implementation *-devmapper_5_mirror.patch - mirror implementation (for pvmove) --- diff --git a/patches/common/linux-2.4.19-rc1-b_bdev_private.patch b/patches/common/linux-2.4.19-rc1-b_bdev_private.patch new file mode 100644 index 0000000..f5f0cfd --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-b_bdev_private.patch @@ -0,0 +1,15 @@ +diff -ruN linux-2.4.19-rc1/include/linux/fs.h linux/include/linux/fs.h +--- linux-2.4.19-rc1/include/linux/fs.h Tue Feb 19 15:24:57 2002 ++++ linux/include/linux/fs.h Thu Feb 21 12:34:42 2002 +@@ -260,7 +260,10 @@ + char * b_data; /* pointer to data block */ + struct page *b_page; /* the page this bh is mapped to */ + void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ +- void *b_private; /* reserved for b_end_io */ ++ void *b_private; /* reserved for b_end_io, also used by ext3 */ ++ void *b_bdev_private; /* a hack to get around ext3 using b_private ++ * after handing the buffer_head to the ++ * block layer */ + + unsigned long b_rsector; /* Real buffer location on disk */ + wait_queue_head_t b_wait; diff --git a/patches/common/linux-2.4.19-rc1-config.patch b/patches/common/linux-2.4.19-rc1-config.patch new file mode 100644 index 0000000..bc909e1 --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-config.patch @@ -0,0 +1,34 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/Config.in linux/drivers/md/Config.in +--- linux-2.4.19-rc1/drivers/md/Config.in Fri Sep 14 22:22:18 2001 ++++ linux/drivers/md/Config.in Wed Jan 2 19:23:58 2002 +@@ -14,5 +14,8 @@ + dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD + + dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD ++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then ++ dep_tristate ' Device-mapper support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM $CONFIG_MD ++fi + + endmenu +diff -ruN a/Documentation/Configure.help b/Documentation/Configure.help +--- linux-2.4.19-rc1/Documentation/Configure.help Tue Jun 25 14:14:05 2002 ++++ linux/Documentation/Configure.help Tue Jun 25 19:18:26 2002 +@@ -1775,6 +1775,18 @@ + want), say M here and read . The + module will be called lvm-mod.o. + ++Device-mapper support ++CONFIG_BLK_DEV_DM ++ This option lets you create logical block devices dynamically by ++ joining together segments of existing block devices. This mechanism ++ is used by the new version of the logical volume manager under ++ development, LVM2. ++ ++ If you want to compile this as a module, say M here and read ++ . The module will be called dm-mod.o. ++ ++ If unsure, say N. ++ + Multiple devices driver support (RAID and LVM) + CONFIG_MD + Support multiple physical spindles through a single logical device. diff --git a/patches/common/linux-2.4.19-rc1-devmapper_1_core.patch b/patches/common/linux-2.4.19-rc1-devmapper_1_core.patch new file mode 100644 index 0000000..86b4900 --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-devmapper_1_core.patch @@ -0,0 +1,2149 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/Makefile linux/drivers/md/Makefile +--- linux-2.4.19-rc1/drivers/md/Makefile Tue Jun 25 22:14:21 2002 ++++ linux/drivers/md/Makefile Tue Jun 25 22:09:23 2002 +@@ -4,9 +4,10 @@ + + O_TARGET := mddev.o + +-export-objs := md.o xor.o ++export-objs := md.o xor.o dm-table.o dm-target.o + list-multi := lvm-mod.o + lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o ++dm-mod-objs := dm.o dm-table.o dm-target.o + + # Note: link order is important. All raid personalities + # and xor.o must come before md.o, as they each initialise +@@ -20,8 +21,12 @@ + obj-$(CONFIG_MD_MULTIPATH) += multipath.o + obj-$(CONFIG_BLK_DEV_MD) += md.o + obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o ++obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o + + include $(TOPDIR)/Rules.make + + lvm-mod.o: $(lvm-mod-objs) + $(LD) -r -o $@ $(lvm-mod-objs) ++ ++dm-mod.o: $(dm-mod-objs) ++ $(LD) -r -o $@ $(dm-mod-objs) +diff -ruN linux-2.4.19-rc1/drivers/md/dm-table.c linux/drivers/md/dm-table.c +--- linux-2.4.19-rc1/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-table.c Tue Jun 25 22:02:56 2002 +@@ -0,0 +1,421 @@ ++/* ++ * Copyright (C) 2001 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include "dm.h" ++ ++#include ++ ++/* ceiling(n / size) * size */ ++static inline unsigned long round_up(unsigned long n, unsigned long size) ++{ ++ unsigned long r = n % size; ++ return n + (r ? (size - r) : 0); ++} ++ ++/* ceiling(n / size) */ ++static inline unsigned long div_up(unsigned long n, unsigned long size) ++{ ++ return round_up(n, size) / size; ++} ++ ++/* similar to ceiling(log_size(n)) */ ++static uint int_log(unsigned long n, unsigned long base) ++{ ++ int result = 0; ++ ++ while (n > 1) { ++ n = div_up(n, base); ++ result++; ++ } ++ ++ return result; ++} ++ ++/* ++ * return the highest key that you could lookup ++ * from the n'th node on level l of the btree. ++ */ ++static offset_t high(struct dm_table *t, int l, int n) ++{ ++ for (; l < t->depth - 1; l++) ++ n = get_child(n, CHILDREN_PER_NODE - 1); ++ ++ if (n >= t->counts[l]) ++ return (offset_t) - 1; ++ ++ return get_node(t, l, n)[KEYS_PER_NODE - 1]; ++} ++ ++/* ++ * fills in a level of the btree based on the ++ * highs of the level below it. ++ */ ++static int setup_btree_index(int l, struct dm_table *t) ++{ ++ int n, k; ++ offset_t *node; ++ ++ for (n = 0; n < t->counts[l]; n++) { ++ node = get_node(t, l, n); ++ ++ for (k = 0; k < KEYS_PER_NODE; k++) ++ node[k] = high(t, l + 1, get_child(n, k)); ++ } ++ ++ return 0; ++} ++ ++/* ++ * highs, and targets are managed as dynamic ++ * arrays during a table load. ++ */ ++static int alloc_targets(struct dm_table *t, int num) ++{ ++ offset_t *n_highs; ++ struct target *n_targets; ++ int n = t->num_targets; ++ ++ /* ++ * Allocate both the target array and offset array at once. ++ */ ++ n_highs = (offset_t *) vcalloc(sizeof(struct target) + sizeof(offset_t), ++ num); ++ if (!n_highs) ++ return -ENOMEM; ++ ++ n_targets = (struct target *) (n_highs + num); ++ ++ if (n) { ++ memcpy(n_highs, t->highs, sizeof(*n_highs) * n); ++ memcpy(n_targets, t->targets, sizeof(*n_targets) * n); ++ } ++ ++ memset(n_highs + n, -1, sizeof(*n_highs) * (num - n)); ++ if (t->highs) ++ vfree(t->highs); ++ ++ t->num_allocated = num; ++ t->highs = n_highs; ++ t->targets = n_targets; ++ ++ return 0; ++} ++ ++int dm_table_create(struct dm_table **result) ++{ ++ struct dm_table *t = kmalloc(sizeof(*t), GFP_NOIO); ++ ++ if (!t) ++ return -ENOMEM; ++ ++ memset(t, 0, sizeof(*t)); ++ INIT_LIST_HEAD(&t->devices); ++ ++ /* allocate a single node's worth of targets to begin with */ ++ if (alloc_targets(t, KEYS_PER_NODE)) { ++ kfree(t); ++ t = NULL; ++ return -ENOMEM; ++ } ++ ++ init_waitqueue_head(&t->eventq); ++ *result = t; ++ return 0; ++} ++ ++static void free_devices(struct list_head *devices) ++{ ++ struct list_head *tmp, *next; ++ ++ for (tmp = devices->next; tmp != devices; tmp = next) { ++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); ++ next = tmp->next; ++ kfree(dd); ++ } ++} ++ ++void dm_table_destroy(struct dm_table *t) ++{ ++ int i; ++ ++ /* destroying the table counts as an event */ ++ dm_table_event(t); ++ ++ /* free the indexes (see dm_table_complete) */ ++ if (t->depth >= 2) ++ vfree(t->index[t->depth - 2]); ++ ++ /* free the targets */ ++ for (i = 0; i < t->num_targets; i++) { ++ struct target *tgt = &t->targets[i]; ++ ++ dm_put_target_type(t->targets[i].type); ++ ++ if (tgt->type->dtr) ++ tgt->type->dtr(t, tgt->private); ++ } ++ ++ vfree(t->highs); ++ ++ /* free the device list */ ++ if (t->devices.next != &t->devices) { ++ DMWARN("devices still present during destroy: " ++ "dm_table_remove_device calls missing"); ++ ++ free_devices(&t->devices); ++ } ++ ++ kfree(t); ++} ++ ++/* ++ * Checks to see if we need to extend highs or targets. ++ */ ++static inline int check_space(struct dm_table *t) ++{ ++ if (t->num_targets >= t->num_allocated) ++ return alloc_targets(t, t->num_allocated * 2); ++ ++ return 0; ++} ++ ++/* ++ * Convert a device path to a kdev_t. ++ */ ++int lookup_device(const char *path, kdev_t *dev) ++{ ++ int r; ++ struct nameidata nd; ++ struct inode *inode; ++ ++ if (!path_init(path, LOOKUP_FOLLOW, &nd)) ++ return 0; ++ ++ if ((r = path_walk(path, &nd))) ++ goto bad; ++ ++ inode = nd.dentry->d_inode; ++ if (!inode) { ++ r = -ENOENT; ++ goto bad; ++ } ++ ++ if (!S_ISBLK(inode->i_mode)) { ++ r = -EINVAL; ++ goto bad; ++ } ++ ++ *dev = inode->i_rdev; ++ ++ bad: ++ path_release(&nd); ++ return r; ++} ++ ++/* ++ * See if we've already got a device in the list. ++ */ ++static struct dm_dev *find_device(struct list_head *l, kdev_t dev) ++{ ++ struct list_head *tmp; ++ ++ list_for_each(tmp, l) { ++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); ++ if (dd->dev == dev) ++ return dd; ++ } ++ ++ return NULL; ++} ++ ++/* ++ * Open a device so we can use it as a map destination. ++ */ ++static int open_dev(struct dm_dev *d) ++{ ++ int err; ++ ++ if (d->bd) ++ BUG(); ++ ++ if (!(d->bd = bdget(kdev_t_to_nr(d->dev)))) ++ return -ENOMEM; ++ ++ if ((err = blkdev_get(d->bd, FMODE_READ | FMODE_WRITE, 0, BDEV_FILE))) ++ return err; ++ ++ return 0; ++} ++ ++/* ++ * Close a device that we've been using. ++ */ ++static void close_dev(struct dm_dev *d) ++{ ++ if (!d->bd) ++ return; ++ ++ blkdev_put(d->bd, BDEV_FILE); ++ d->bd = NULL; ++} ++ ++/* ++ * If possible (ie. blk_size[major] is set), this ++ * checks an area of a destination device is ++ * valid. ++ */ ++static int check_device_area(kdev_t dev, offset_t start, offset_t len) ++{ ++ int *sizes; ++ offset_t dev_size; ++ ++ if (!(sizes = blk_size[MAJOR(dev)]) || !(dev_size = sizes[MINOR(dev)])) ++ /* we don't know the device details, ++ * so give the benefit of the doubt */ ++ return 1; ++ ++ /* convert to 512-byte sectors */ ++ dev_size <<= 1; ++ ++ return ((start < dev_size) && (len <= (dev_size - start))); ++} ++ ++/* ++ * Add a device to the list, or just increment the usage count ++ * if it's already present. ++ */ ++int dm_table_get_device(struct dm_table *t, const char *path, ++ offset_t start, offset_t len, struct dm_dev **result) ++{ ++ int r; ++ kdev_t dev; ++ struct dm_dev *dd; ++ int major, minor; ++ ++ if (sscanf(path, "%x:%x", &major, &minor) == 2) { ++ /* Extract the major/minor numbers */ ++ dev = MKDEV(major, minor); ++ } else { ++ /* convert the path to a device */ ++ if ((r = lookup_device(path, &dev))) ++ return r; ++ } ++ ++ dd = find_device(&t->devices, dev); ++ if (!dd) { ++ dd = kmalloc(sizeof(*dd), GFP_KERNEL); ++ if (!dd) ++ return -ENOMEM; ++ ++ dd->dev = dev; ++ dd->bd = NULL; ++ ++ if ((r = open_dev(dd))) { ++ kfree(dd); ++ return r; ++ } ++ ++ atomic_set(&dd->count, 0); ++ list_add(&dd->list, &t->devices); ++ } ++ atomic_inc(&dd->count); ++ ++ if (!check_device_area(dd->dev, start, len)) { ++ DMWARN("device %s too small for target", path); ++ dm_table_put_device(t, dd); ++ return -EINVAL; ++ } ++ ++ *result = dd; ++ ++ return 0; ++} ++ ++/* ++ * Decrement a devices use count and remove it if neccessary. ++ */ ++void dm_table_put_device(struct dm_table *t, struct dm_dev *dd) ++{ ++ if (atomic_dec_and_test(&dd->count)) { ++ close_dev(dd); ++ list_del(&dd->list); ++ kfree(dd); ++ } ++} ++ ++/* ++ * Adds a target to the map ++ */ ++int dm_table_add_target(struct dm_table *t, offset_t highs, ++ struct target_type *type, void *private) ++{ ++ int r, n; ++ ++ if ((r = check_space(t))) ++ return r; ++ ++ n = t->num_targets++; ++ t->highs[n] = highs; ++ t->targets[n].type = type; ++ t->targets[n].private = private; ++ ++ return 0; ++} ++ ++static int setup_indexes(struct dm_table *t) ++{ ++ int i, total = 0; ++ offset_t *indexes; ++ ++ /* allocate the space for *all* the indexes */ ++ for (i = t->depth - 2; i >= 0; i--) { ++ t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE); ++ total += t->counts[i]; ++ } ++ ++ indexes = (offset_t *) vcalloc(total, (unsigned long) NODE_SIZE); ++ if (!indexes) ++ return -ENOMEM; ++ ++ /* set up internal nodes, bottom-up */ ++ for (i = t->depth - 2, total = 0; i >= 0; i--) { ++ t->index[i] = indexes; ++ indexes += (KEYS_PER_NODE * t->counts[i]); ++ setup_btree_index(i, t); ++ } ++ ++ return 0; ++} ++ ++/* ++ * Builds the btree to index the map ++ */ ++int dm_table_complete(struct dm_table *t) ++{ ++ int leaf_nodes, r = 0; ++ ++ /* how many indexes will the btree have ? */ ++ leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE); ++ t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); ++ ++ /* leaf layer has already been set up */ ++ t->counts[t->depth - 1] = leaf_nodes; ++ t->index[t->depth - 1] = t->highs; ++ ++ if (t->depth >= 2) ++ r = setup_indexes(t); ++ ++ return r; ++} ++ ++void dm_table_event(struct dm_table *t) ++{ ++ wake_up_interruptible(&t->eventq); ++} ++ ++EXPORT_SYMBOL(dm_table_get_device); ++EXPORT_SYMBOL(dm_table_put_device); ++EXPORT_SYMBOL(dm_table_event); +diff -ruN linux-2.4.19-rc1/drivers/md/dm-target.c linux/drivers/md/dm-target.c +--- linux-2.4.19-rc1/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-target.c Tue Jun 25 22:02:56 2002 +@@ -0,0 +1,242 @@ ++/* ++ * Copyright (C) 2001 Sistina Software (UK) Limited ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include "dm.h" ++ ++#include ++ ++struct tt_internal { ++ struct target_type tt; ++ ++ struct list_head list; ++ long use; ++}; ++ ++static LIST_HEAD(_targets); ++static rwlock_t _lock = RW_LOCK_UNLOCKED; ++ ++#define DM_MOD_NAME_SIZE 32 ++ ++/* ++ * Destructively splits up the argument list to pass to ctr. ++ */ ++int split_args(int max, int *argc, char **argv, char *input) ++{ ++ char *start, *end = input, *out; ++ *argc = 0; ++ ++ while (1) { ++ start = end; ++ ++ /* Skip whitespace */ ++ while (*start && isspace(*start)) ++ start++; ++ ++ if (!*start) ++ break; /* success, we hit the end */ ++ ++ /* 'out' is used to remove any back-quotes */ ++ end = out = start; ++ while (*end) { ++ /* Everything apart from '\0' can be quoted */ ++ if (*end == '\\' && *(end + 1)) { ++ *out++ = *(end + 1); ++ end += 2; ++ continue; ++ } ++ ++ if (isspace(*end)) ++ break; /* end of token */ ++ ++ *out++ = *end++; ++ } ++ ++ /* have we already filled the array ? */ ++ if ((*argc + 1) > max) ++ return -EINVAL; ++ ++ /* we know this is whitespace */ ++ if (*end) ++ end++; ++ ++ /* terminate the string and put it in the array */ ++ *out = '\0'; ++ argv[*argc] = start; ++ (*argc)++; ++ } ++ ++ return 0; ++} ++ ++static inline struct tt_internal *__find_target_type(const char *name) ++{ ++ struct list_head *tih; ++ struct tt_internal *ti; ++ ++ list_for_each(tih, &_targets) { ++ ti = list_entry(tih, struct tt_internal, list); ++ ++ if (!strcmp(name, ti->tt.name)) ++ return ti; ++ } ++ ++ return NULL; ++} ++ ++static struct tt_internal *get_target_type(const char *name) ++{ ++ struct tt_internal *ti; ++ ++ read_lock(&_lock); ++ ti = __find_target_type(name); ++ ++ if (ti) { ++ if (ti->use == 0 && ti->tt.module) ++ __MOD_INC_USE_COUNT(ti->tt.module); ++ ti->use++; ++ } ++ read_unlock(&_lock); ++ ++ return ti; ++} ++ ++static void load_module(const char *name) ++{ ++ char module_name[DM_MOD_NAME_SIZE] = "dm-"; ++ ++ /* Length check for strcat() below */ ++ if (strlen(name) > (DM_MOD_NAME_SIZE - 4)) ++ return; ++ ++ strcat(module_name, name); ++ request_module(module_name); ++ ++ return; ++} ++ ++struct target_type *dm_get_target_type(const char *name) ++{ ++ struct tt_internal *ti = get_target_type(name); ++ ++ if (!ti) { ++ load_module(name); ++ ti = get_target_type(name); ++ } ++ ++ return ti ? &ti->tt : NULL; ++} ++ ++void dm_put_target_type(struct target_type *t) ++{ ++ struct tt_internal *ti = (struct tt_internal *) t; ++ ++ read_lock(&_lock); ++ if (--ti->use == 0 && ti->tt.module) ++ __MOD_DEC_USE_COUNT(ti->tt.module); ++ ++ if (ti->use < 0) ++ BUG(); ++ read_unlock(&_lock); ++ ++ return; ++} ++ ++static struct tt_internal *alloc_target(struct target_type *t) ++{ ++ struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL); ++ ++ if (ti) { ++ memset(ti, 0, sizeof(*ti)); ++ ti->tt = *t; ++ } ++ ++ return ti; ++} ++ ++int dm_register_target(struct target_type *t) ++{ ++ int rv = 0; ++ struct tt_internal *ti = alloc_target(t); ++ ++ if (!ti) ++ return -ENOMEM; ++ ++ write_lock(&_lock); ++ if (__find_target_type(t->name)) ++ rv = -EEXIST; ++ else ++ list_add(&ti->list, &_targets); ++ ++ write_unlock(&_lock); ++ return rv; ++} ++ ++int dm_unregister_target(struct target_type *t) ++{ ++ struct tt_internal *ti; ++ ++ write_lock(&_lock); ++ if (!(ti = __find_target_type(t->name))) { ++ write_unlock(&_lock); ++ return -EINVAL; ++ } ++ ++ if (ti->use) { ++ write_unlock(&_lock); ++ return -ETXTBSY; ++ } ++ ++ list_del(&ti->list); ++ kfree(ti); ++ ++ write_unlock(&_lock); ++ return 0; ++} ++ ++/* ++ * io-err: always fails an io, useful for bringing ++ * up LV's that have holes in them. ++ */ ++static int io_err_ctr(struct dm_table *t, offset_t b, offset_t l, ++ int argc, char **args, void **context) ++{ ++ *context = NULL; ++ return 0; ++} ++ ++static void io_err_dtr(struct dm_table *t, void *c) ++{ ++ /* empty */ ++ return; ++} ++ ++static int io_err_map(struct buffer_head *bh, int rw, void *context) ++{ ++ buffer_IO_error(bh); ++ return 0; ++} ++ ++static struct target_type error_target = { ++ name: "error", ++ ctr: io_err_ctr, ++ dtr: io_err_dtr, ++ map: io_err_map, ++ status: NULL, ++}; ++ ++int dm_target_init(void) ++{ ++ return dm_register_target(&error_target); ++} ++ ++void dm_target_exit(void) ++{ ++ if (dm_unregister_target(&error_target)) ++ DMWARN("error target unregistration failed"); ++} ++ ++EXPORT_SYMBOL(dm_register_target); ++EXPORT_SYMBOL(dm_unregister_target); +diff -ruN linux-2.4.19-rc1/drivers/md/dm.c linux/drivers/md/dm.c +--- linux-2.4.19-rc1/drivers/md/dm.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm.c Tue Jun 25 22:12:09 2002 +@@ -0,0 +1,1168 @@ ++/* ++ * Copyright (C) 2001 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include "dm.h" ++ ++#include ++#include ++ ++/* we only need this for the lv_bmap struct definition, not happy */ ++#include ++ ++#define DEFAULT_READ_AHEAD 64 ++ ++static const char *_name = DM_NAME; ++ ++static int major = 0; ++static int _major = 0; ++ ++struct io_hook { ++ struct mapped_device *md; ++ struct target *target; ++ int rw; ++ ++ void (*end_io) (struct buffer_head * bh, int uptodate); ++ void *context; ++}; ++ ++static kmem_cache_t *_io_hook_cache; ++ ++static struct mapped_device *_devs[MAX_DEVICES]; ++static struct rw_semaphore _dev_locks[MAX_DEVICES]; ++ ++/* ++ * This lock is only held by dm_create and dm_set_name to avoid ++ * race conditions where someone else may create a device with ++ * the same name. ++ */ ++static spinlock_t _create_lock = SPIN_LOCK_UNLOCKED; ++ ++/* block device arrays */ ++static int _block_size[MAX_DEVICES]; ++static int _blksize_size[MAX_DEVICES]; ++static int _hardsect_size[MAX_DEVICES]; ++ ++static devfs_handle_t _dev_dir; ++ ++static int request(request_queue_t * q, int rw, struct buffer_head *bh); ++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb); ++ ++/* ++ * Protect the mapped_devices referenced from _dev[] ++ */ ++struct mapped_device *dm_get_r(int minor) ++{ ++ struct mapped_device *md; ++ ++ if (minor >= MAX_DEVICES) ++ return NULL; ++ ++ down_read(_dev_locks + minor); ++ md = _devs[minor]; ++ if (!md) ++ up_read(_dev_locks + minor); ++ return md; ++} ++ ++struct mapped_device *dm_get_w(int minor) ++{ ++ struct mapped_device *md; ++ ++ if (minor >= MAX_DEVICES) ++ return NULL; ++ ++ down_write(_dev_locks + minor); ++ md = _devs[minor]; ++ if (!md) ++ up_write(_dev_locks + minor); ++ return md; ++} ++ ++static int namecmp(struct mapped_device *md, const char *name, int nametype) ++{ ++ switch (nametype) { ++ case DM_LOOKUP_BY_NAME: ++ return strcmp(md->name, name); ++ break; ++ ++ case DM_LOOKUP_BY_UUID: ++ if (!md->uuid) ++ return -1; /* never equal */ ++ ++ return strcmp(md->uuid, name); ++ break; ++ ++ default: ++ DMWARN("Unknown comparison type in namecmp: %d", nametype); ++ BUG(); ++ } ++ ++ return -1; ++} ++ ++/* ++ * The interface (eg, ioctl) will probably access the devices ++ * through these slow 'by name' locks, this needs improving at ++ * some point if people start playing with *large* numbers of dm ++ * devices. ++ */ ++struct mapped_device *dm_get_name_r(const char *name, int nametype) ++{ ++ int i; ++ struct mapped_device *md; ++ ++ for (i = 0; i < MAX_DEVICES; i++) { ++ md = dm_get_r(i); ++ if (md) { ++ if (!namecmp(md, name, nametype)) ++ return md; ++ ++ dm_put_r(md); ++ } ++ } ++ ++ return NULL; ++} ++ ++struct mapped_device *dm_get_name_w(const char *name, int nametype) ++{ ++ int i; ++ struct mapped_device *md; ++ ++ /* ++ * To avoid getting write locks on all the devices we try ++ * and promote a read lock to a write lock, this can ++ * fail, in which case we just start again. ++ */ ++ ++ restart: ++ for (i = 0; i < MAX_DEVICES; i++) { ++ md = dm_get_r(i); ++ if (!md) ++ continue; ++ ++ if (namecmp(md, name, nametype)) { ++ dm_put_r(md); ++ continue; ++ } ++ ++ /* found it */ ++ dm_put_r(md); ++ ++ md = dm_get_w(i); ++ if (!md) ++ goto restart; ++ ++ if (namecmp(md, name, nametype)) { ++ dm_put_w(md); ++ goto restart; ++ } ++ ++ return md; ++ } ++ ++ return NULL; ++} ++ ++void dm_put_r(struct mapped_device *md) ++{ ++ int minor = MINOR(md->dev); ++ ++ if (minor >= MAX_DEVICES) ++ return; ++ ++ up_read(_dev_locks + minor); ++} ++ ++void dm_put_w(struct mapped_device *md) ++{ ++ int minor = MINOR(md->dev); ++ ++ if (minor >= MAX_DEVICES) ++ return; ++ ++ up_write(_dev_locks + minor); ++} ++ ++/* ++ * Setup and tear down the driver ++ */ ++static __init void init_locks(void) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_DEVICES; i++) ++ init_rwsem(_dev_locks + i); ++} ++ ++static __init int local_init(void) ++{ ++ int r; ++ ++ init_locks(); ++ ++ /* allocate a slab for the io-hooks */ ++ if (!_io_hook_cache && ++ !(_io_hook_cache = kmem_cache_create("dm io hooks", ++ sizeof(struct io_hook), ++ 0, 0, NULL, NULL))) ++ return -ENOMEM; ++ ++ _major = major; ++ r = devfs_register_blkdev(_major, _name, &dm_blk_dops); ++ if (r < 0) { ++ DMERR("register_blkdev failed"); ++ kmem_cache_destroy(_io_hook_cache); ++ return r; ++ } ++ ++ if (!_major) ++ _major = r; ++ ++ /* set up the arrays */ ++ read_ahead[_major] = DEFAULT_READ_AHEAD; ++ blk_size[_major] = _block_size; ++ blksize_size[_major] = _blksize_size; ++ hardsect_size[_major] = _hardsect_size; ++ ++ blk_queue_make_request(BLK_DEFAULT_QUEUE(_major), request); ++ ++ _dev_dir = devfs_mk_dir(0, DM_DIR, NULL); ++ ++ return 0; ++} ++ ++static void local_exit(void) ++{ ++ if (kmem_cache_destroy(_io_hook_cache)) ++ DMWARN("io_hooks still allocated during unregistration"); ++ _io_hook_cache = NULL; ++ ++ if (devfs_unregister_blkdev(_major, _name) < 0) ++ DMERR("devfs_unregister_blkdev failed"); ++ ++ read_ahead[_major] = 0; ++ blk_size[_major] = NULL; ++ blksize_size[_major] = NULL; ++ hardsect_size[_major] = NULL; ++ _major = 0; ++ ++ DMINFO("cleaned up"); ++} ++ ++/* ++ * We have a lot of init/exit functions, so it seems easier to ++ * store them in an array. The disposable macro 'xx' ++ * expands a prefix into a pair of function names. ++ */ ++static struct { ++ int (*init)(void); ++ void (*exit)(void); ++ ++} _inits[] = { ++#define xx(n) {n ## _init, n ## _exit}, ++ xx(local) ++ xx(dm_target) ++#undef xx ++}; ++ ++static int __init dm_init(void) ++{ ++ const int count = sizeof(_inits) / sizeof(*_inits); ++ ++ int r, i; ++ ++ for (i = 0; i < count; i++) { ++ r = _inits[i].init(); ++ if (r) ++ goto bad; ++ } ++ ++ return 0; ++ ++ bad: ++ while (i--) ++ _inits[i].exit(); ++ ++ return r; ++} ++ ++static void __exit dm_exit(void) ++{ ++ int i = sizeof(_inits) / sizeof(*_inits); ++ ++ dm_destroy_all(); ++ while (i--) ++ _inits[i].exit(); ++} ++ ++/* ++ * Block device functions ++ */ ++static int dm_blk_open(struct inode *inode, struct file *file) ++{ ++ struct mapped_device *md; ++ ++ md = dm_get_w(MINOR(inode->i_rdev)); ++ if (!md) ++ return -ENXIO; ++ ++ md->use_count++; ++ dm_put_w(md); ++ ++ return 0; ++} ++ ++static int dm_blk_close(struct inode *inode, struct file *file) ++{ ++ struct mapped_device *md; ++ ++ md = dm_get_w(MINOR(inode->i_rdev)); ++ if (!md) ++ return -ENXIO; ++ ++ if (md->use_count < 1) ++ DMWARN("incorrect reference count found in mapped_device"); ++ ++ md->use_count--; ++ dm_put_w(md); ++ ++ return 0; ++} ++ ++/* In 512-byte units */ ++#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1) ++ ++static int dm_blk_ioctl(struct inode *inode, struct file *file, ++ uint command, unsigned long a) ++{ ++ int minor = MINOR(inode->i_rdev); ++ long size; ++ ++ if (minor >= MAX_DEVICES) ++ return -ENXIO; ++ ++ switch (command) { ++ case BLKROSET: ++ case BLKROGET: ++ case BLKRASET: ++ case BLKRAGET: ++ case BLKFLSBUF: ++ case BLKSSZGET: ++ //case BLKRRPART: /* Re-read partition tables */ ++ //case BLKPG: ++ case BLKELVGET: ++ case BLKELVSET: ++ case BLKBSZGET: ++ case BLKBSZSET: ++ return blk_ioctl(inode->i_rdev, command, a); ++ break; ++ ++ case BLKGETSIZE: ++ size = VOLUME_SIZE(minor); ++ if (copy_to_user((void *) a, &size, sizeof(long))) ++ return -EFAULT; ++ break; ++ ++ case BLKGETSIZE64: ++ size = VOLUME_SIZE(minor); ++ if (put_user((u64) ((u64) size) << 9, (u64 *) a)) ++ return -EFAULT; ++ break; ++ ++ case BLKRRPART: ++ return -ENOTTY; ++ ++ case LV_BMAP: ++ return dm_user_bmap(inode, (struct lv_bmap *) a); ++ ++ default: ++ DMWARN("unknown block ioctl 0x%x", command); ++ return -ENOTTY; ++ } ++ ++ return 0; ++} ++ ++static inline struct io_hook *alloc_io_hook(void) ++{ ++ return kmem_cache_alloc(_io_hook_cache, GFP_NOIO); ++} ++ ++static inline void free_io_hook(struct io_hook *ih) ++{ ++ kmem_cache_free(_io_hook_cache, ih); ++} ++ ++/* ++ * FIXME: We need to decide if deferred_io's need ++ * their own slab, I say no for now since they are ++ * only used when the device is suspended. ++ */ ++static inline struct deferred_io *alloc_deferred(void) ++{ ++ return kmalloc(sizeof(struct deferred_io), GFP_NOIO); ++} ++ ++static inline void free_deferred(struct deferred_io *di) ++{ ++ kfree(di); ++} ++ ++/* ++ * Call a target's optional error function if an I/O failed. ++ */ ++static inline int call_err_fn(struct io_hook *ih, struct buffer_head *bh) ++{ ++ dm_err_fn err = ih->target->type->err; ++ ++ if (err) ++ return err(bh, ih->rw, ih->target->private); ++ ++ return 0; ++} ++ ++/* ++ * bh->b_end_io routine that decrements the pending count ++ * and then calls the original bh->b_end_io fn. ++ */ ++static void dec_pending(struct buffer_head *bh, int uptodate) ++{ ++ struct io_hook *ih = bh->b_bdev_private; ++ ++ if (!uptodate && call_err_fn(ih, bh)) ++ return; ++ ++ if (atomic_dec_and_test(&ih->md->pending)) ++ /* nudge anyone waiting on suspend queue */ ++ wake_up(&ih->md->wait); ++ ++ bh->b_end_io = ih->end_io; ++ bh->b_bdev_private = ih->context; ++ free_io_hook(ih); ++ ++ bh->b_end_io(bh, uptodate); ++} ++ ++/* ++ * Add the bh to the list of deferred io. ++ */ ++static int queue_io(struct buffer_head *bh, int rw) ++{ ++ struct deferred_io *di = alloc_deferred(); ++ struct mapped_device *md; ++ ++ if (!di) ++ return -ENOMEM; ++ ++ md = dm_get_w(MINOR(bh->b_rdev)); ++ if (!md) { ++ free_deferred(di); ++ return -ENXIO; ++ } ++ ++ if (!md->suspended) { ++ dm_put_w(md); ++ free_deferred(di); ++ return 1; ++ } ++ ++ di->bh = bh; ++ di->rw = rw; ++ di->next = md->deferred; ++ md->deferred = di; ++ ++ dm_put_w(md); ++ ++ return 0; /* deferred successfully */ ++} ++ ++/* ++ * Do the bh mapping for a given leaf ++ */ ++static inline int __map_buffer(struct mapped_device *md, ++ struct buffer_head *bh, int rw, int leaf) ++{ ++ int r; ++ dm_map_fn fn; ++ void *context; ++ struct io_hook *ih = NULL; ++ struct target *ti = md->map->targets + leaf; ++ ++ fn = ti->type->map; ++ context = ti->private; ++ ++ ih = alloc_io_hook(); ++ ++ if (!ih) ++ return -1; ++ ++ ih->md = md; ++ ih->rw = rw; ++ ih->target = ti; ++ ih->end_io = bh->b_end_io; ++ ih->context = bh->b_bdev_private; ++ ++ r = fn(bh, rw, context); ++ ++ if (r > 0) { ++ /* hook the end io request fn */ ++ atomic_inc(&md->pending); ++ bh->b_end_io = dec_pending; ++ bh->b_bdev_private = ih; ++ ++ } else if (r == 0) ++ /* we don't need to hook */ ++ free_io_hook(ih); ++ ++ else if (r < 0) { ++ free_io_hook(ih); ++ return -1; ++ } ++ ++ return r; ++} ++ ++/* ++ * Search the btree for the correct target. ++ */ ++static inline int __find_node(struct dm_table *t, struct buffer_head *bh) ++{ ++ int l, n = 0, k = 0; ++ offset_t *node; ++ ++ for (l = 0; l < t->depth; l++) { ++ n = get_child(n, k); ++ node = get_node(t, l, n); ++ ++ for (k = 0; k < KEYS_PER_NODE; k++) ++ if (node[k] >= bh->b_rsector) ++ break; ++ } ++ ++ return (KEYS_PER_NODE * n) + k; ++} ++ ++static int request(request_queue_t * q, int rw, struct buffer_head *bh) ++{ ++ struct mapped_device *md; ++ int r, minor = MINOR(bh->b_rdev); ++ unsigned int block_size = _blksize_size[minor]; ++ ++ md = dm_get_r(minor); ++ if (!md) { ++ buffer_IO_error(bh); ++ return 0; ++ } ++ ++ /* ++ * Sanity checks. ++ */ ++ if (bh->b_size > block_size) ++ DMERR("request is larger than block size " ++ "b_size (%d), block size (%d)", ++ bh->b_size, block_size); ++ ++ if (bh->b_rsector & ((bh->b_size >> 9) - 1)) ++ DMERR("misaligned block requested logical " ++ "sector (%lu), b_size (%d)", ++ bh->b_rsector, bh->b_size); ++ ++ /* ++ * If we're suspended we have to queue ++ * this io for later. ++ */ ++ while (md->suspended) { ++ dm_put_r(md); ++ ++ if (rw == READA) ++ goto bad_no_lock; ++ ++ r = queue_io(bh, rw); ++ ++ if (r < 0) ++ goto bad_no_lock; ++ ++ else if (r == 0) ++ return 0; /* deferred successfully */ ++ ++ /* ++ * We're in a while loop, because someone could suspend ++ * before we get to the following read lock. ++ */ ++ md = dm_get_r(minor); ++ if (!md) { ++ buffer_IO_error(bh); ++ return 0; ++ } ++ } ++ ++ if ((r = __map_buffer(md, bh, rw, __find_node(md->map, bh))) < 0) ++ goto bad; ++ ++ dm_put_r(md); ++ return r; ++ ++ bad: ++ dm_put_r(md); ++ ++ bad_no_lock: ++ buffer_IO_error(bh); ++ return 0; ++} ++ ++static int check_dev_size(int minor, unsigned long block) ++{ ++ /* FIXME: check this */ ++ unsigned long max_sector = (_block_size[minor] << 1) + 1; ++ unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9); ++ ++ return (sector > max_sector) ? 0 : 1; ++} ++ ++/* ++ * Creates a dummy buffer head and maps it (for lilo). ++ */ ++static int do_bmap(kdev_t dev, unsigned long block, ++ kdev_t * r_dev, unsigned long *r_block) ++{ ++ struct mapped_device *md; ++ struct buffer_head bh; ++ int minor = MINOR(dev), r; ++ struct target *t; ++ ++ md = dm_get_r(minor); ++ if (!md) ++ return -ENXIO; ++ ++ if (md->suspended) { ++ dm_put_r(md); ++ return -EPERM; ++ } ++ ++ if (!check_dev_size(minor, block)) { ++ dm_put_r(md); ++ return -EINVAL; ++ } ++ ++ /* setup dummy bh */ ++ memset(&bh, 0, sizeof(bh)); ++ bh.b_blocknr = block; ++ bh.b_dev = bh.b_rdev = dev; ++ bh.b_size = _blksize_size[minor]; ++ bh.b_rsector = block * (bh.b_size >> 9); ++ ++ /* find target */ ++ t = md->map->targets + __find_node(md->map, &bh); ++ ++ /* do the mapping */ ++ r = t->type->map(&bh, READ, t->private); ++ ++ *r_dev = bh.b_rdev; ++ *r_block = bh.b_rsector / (bh.b_size >> 9); ++ ++ dm_put_r(md); ++ return r; ++} ++ ++/* ++ * Marshals arguments and results between user and kernel space. ++ */ ++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb) ++{ ++ unsigned long block, r_block; ++ kdev_t r_dev; ++ int r; ++ ++ if (get_user(block, &lvb->lv_block)) ++ return -EFAULT; ++ ++ if ((r = do_bmap(inode->i_rdev, block, &r_dev, &r_block))) ++ return r; ++ ++ if (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) || ++ put_user(r_block, &lvb->lv_block)) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++/* ++ * See if the device with a specific minor # is free. The write ++ * lock is held when it returns successfully. ++ */ ++static inline int specific_dev(int minor, struct mapped_device *md) ++{ ++ if (minor >= MAX_DEVICES) { ++ DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)", ++ MAX_DEVICES); ++ return -1; ++ } ++ ++ down_write(_dev_locks + minor); ++ if (_devs[minor]) { ++ /* in use */ ++ up_write(_dev_locks + minor); ++ return -1; ++ } ++ ++ return minor; ++} ++ ++/* ++ * Find the first free device. Again the write lock is held on ++ * success. ++ */ ++static int any_old_dev(struct mapped_device *md) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_DEVICES; i++) ++ if (specific_dev(i, md) != -1) ++ return i; ++ ++ return -1; ++} ++ ++/* ++ * Allocate and initialise a blank device. ++ * Caller must ensure uuid is null-terminated. ++ * Device is returned with a write lock held. ++ */ ++static struct mapped_device *alloc_dev(const char *name, const char *uuid, ++ int minor) ++{ ++ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); ++ int len; ++ ++ if (!md) { ++ DMWARN("unable to allocate device, out of memory."); ++ return NULL; ++ } ++ ++ memset(md, 0, sizeof(*md)); ++ ++ /* ++ * This grabs the write lock if it succeeds. ++ */ ++ minor = (minor < 0) ? any_old_dev(md) : specific_dev(minor, md); ++ if (minor < 0) { ++ kfree(md); ++ return NULL; ++ } ++ ++ md->dev = MKDEV(_major, minor); ++ md->suspended = 0; ++ ++ strncpy(md->name, name, sizeof(md->name) - 1); ++ md->name[sizeof(md->name) - 1] = '\0'; ++ ++ /* ++ * Copy in the uuid. ++ */ ++ if (uuid && *uuid) { ++ len = strlen(uuid) + 1; ++ if (!(md->uuid = kmalloc(len, GFP_KERNEL))) { ++ DMWARN("unable to allocate uuid - out of memory."); ++ kfree(md); ++ return NULL; ++ } ++ strcpy(md->uuid, uuid); ++ } ++ ++ init_waitqueue_head(&md->wait); ++ return md; ++} ++ ++static int __register_device(struct mapped_device *md) ++{ ++ md->devfs_entry = ++ devfs_register(_dev_dir, md->name, DEVFS_FL_CURRENT_OWNER, ++ MAJOR(md->dev), MINOR(md->dev), ++ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, ++ &dm_blk_dops, NULL); ++ ++ return 0; ++} ++ ++static int __unregister_device(struct mapped_device *md) ++{ ++ devfs_unregister(md->devfs_entry); ++ return 0; ++} ++ ++/* ++ * The hardsect size for a mapped device is the smallest hardsect size ++ * from the devices it maps onto. ++ */ ++static int __find_hardsect_size(struct list_head *devices) ++{ ++ int result = INT_MAX, size; ++ struct list_head *tmp; ++ ++ list_for_each(tmp, devices) { ++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); ++ size = get_hardsect_size(dd->dev); ++ if (size < result) ++ result = size; ++ } ++ ++ /* ++ * I think it's safe to assume that no block devices have ++ * a hard sector size this large. ++ */ ++ if (result == INT_MAX) ++ result = 512; ++ ++ return result; ++} ++ ++/* ++ * Bind a table to the device. ++ */ ++static int __bind(struct mapped_device *md, struct dm_table *t) ++{ ++ int minor = MINOR(md->dev); ++ ++ md->map = t; ++ ++ if (!t->num_targets) { ++ _block_size[minor] = 0; ++ _blksize_size[minor] = BLOCK_SIZE; ++ _hardsect_size[minor] = 0; ++ return 0; ++ } ++ ++ /* in k */ ++ _block_size[minor] = (t->highs[t->num_targets - 1] + 1) >> 1; ++ ++ _blksize_size[minor] = BLOCK_SIZE; ++ _hardsect_size[minor] = __find_hardsect_size(&t->devices); ++ register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]); ++ ++ return 0; ++} ++ ++static void __unbind(struct mapped_device *md) ++{ ++ int minor = MINOR(md->dev); ++ ++ dm_table_destroy(md->map); ++ md->map = NULL; ++ ++ _block_size[minor] = 0; ++ _blksize_size[minor] = 0; ++ _hardsect_size[minor] = 0; ++} ++ ++static int check_name(const char *name) ++{ ++ struct mapped_device *md; ++ ++ if (strchr(name, '/') || strlen(name) > DM_NAME_LEN) { ++ DMWARN("invalid device name"); ++ return -1; ++ } ++ ++ md = dm_get_name_r(name, DM_LOOKUP_BY_NAME); ++ if (md) { ++ dm_put_r(md); ++ DMWARN("device name already in use"); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int check_uuid(const char *uuid) ++{ ++ struct mapped_device *md; ++ ++ if (uuid) { ++ md = dm_get_name_r(uuid, DM_LOOKUP_BY_UUID); ++ if (md) { ++ dm_put_r(md); ++ DMWARN("device uuid already in use"); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ ++/* ++ * Constructor for a new device. ++ */ ++int dm_create(const char *name, const char *uuid, int minor, int ro, ++ struct dm_table *table) ++{ ++ int r; ++ struct mapped_device *md; ++ ++ spin_lock(&_create_lock); ++ if (check_name(name) || check_uuid(uuid)) { ++ spin_unlock(&_create_lock); ++ return -EINVAL; ++ } ++ ++ md = alloc_dev(name, uuid, minor); ++ if (!md) { ++ spin_unlock(&_create_lock); ++ return -ENXIO; ++ } ++ minor = MINOR(md->dev); ++ _devs[minor] = md; ++ ++ r = __register_device(md); ++ if (r) ++ goto err; ++ ++ r = __bind(md, table); ++ if (r) ++ goto err; ++ ++ dm_set_ro(md, ro); ++ ++ spin_unlock(&_create_lock); ++ dm_put_w(md); ++ return 0; ++ ++ err: ++ _devs[minor] = NULL; ++ if (md->uuid) ++ kfree(md->uuid); ++ ++ dm_put_w(md); ++ kfree(md); ++ spin_unlock(&_create_lock); ++ return r; ++} ++ ++/* ++ * Renames the device. No lock held. ++ */ ++int dm_set_name(const char *name, int nametype, const char *newname) ++{ ++ int r; ++ struct mapped_device *md; ++ ++ spin_lock(&_create_lock); ++ if (check_name(newname) < 0) { ++ spin_unlock(&_create_lock); ++ return -EINVAL; ++ } ++ ++ md = dm_get_name_w(name, nametype); ++ if (!md) { ++ spin_unlock(&_create_lock); ++ return -ENXIO; ++ } ++ ++ r = __unregister_device(md); ++ if (r) ++ goto out; ++ ++ strcpy(md->name, newname); ++ r = __register_device(md); ++ ++ out: ++ dm_put_w(md); ++ spin_unlock(&_create_lock); ++ return r; ++} ++ ++/* ++ * Destructor for the device. You cannot destroy an open ++ * device. Write lock must be held before calling. ++ * Caller must dm_put_w(md) then kfree(md) if call was successful. ++ */ ++int dm_destroy(struct mapped_device *md) ++{ ++ int minor, r; ++ ++ if (md->use_count) ++ return -EPERM; ++ ++ r = __unregister_device(md); ++ if (r) ++ return r; ++ ++ minor = MINOR(md->dev); ++ _devs[minor] = NULL; ++ __unbind(md); ++ ++ if (md->uuid) ++ kfree(md->uuid); ++ ++ return 0; ++} ++ ++/* ++ * Destroy all devices - except open ones ++ */ ++void dm_destroy_all(void) ++{ ++ int i, some_destroyed, r; ++ struct mapped_device *md; ++ ++ do { ++ some_destroyed = 0; ++ for (i = 0; i < MAX_DEVICES; i++) { ++ md = dm_get_w(i); ++ if (!md) ++ continue; ++ ++ r = dm_destroy(md); ++ dm_put_w(md); ++ ++ if (!r) { ++ kfree(md); ++ some_destroyed = 1; ++ } ++ } ++ } while (some_destroyed); ++} ++ ++/* ++ * Sets or clears the read-only flag for the device. Write lock ++ * must be held. ++ */ ++void dm_set_ro(struct mapped_device *md, int ro) ++{ ++ md->read_only = ro; ++ set_device_ro(md->dev, ro); ++} ++ ++/* ++ * A target is notifying us of some event ++ */ ++void dm_notify(void *target) ++{ ++} ++ ++/* ++ * Requeue the deferred buffer_heads by calling generic_make_request. ++ */ ++static void flush_deferred_io(struct deferred_io *c) ++{ ++ struct deferred_io *n; ++ ++ while (c) { ++ n = c->next; ++ generic_make_request(c->rw, c->bh); ++ free_deferred(c); ++ c = n; ++ } ++} ++ ++/* ++ * Swap in a new table (destroying old one). Write lock must be ++ * held. ++ */ ++int dm_swap_table(struct mapped_device *md, struct dm_table *table) ++{ ++ int r; ++ ++ /* device must be suspended */ ++ if (!md->suspended) ++ return -EPERM; ++ ++ __unbind(md); ++ ++ r = __bind(md, table); ++ if (r) ++ return r; ++ ++ return 0; ++} ++ ++/* ++ * We need to be able to change a mapping table under a mounted ++ * filesystem. for example we might want to move some data in ++ * the background. Before the table can be swapped with ++ * dm_bind_table, dm_suspend must be called to flush any in ++ * flight buffer_heads and ensure that any further io gets ++ * deferred. Write lock must be held. ++ */ ++int dm_suspend(struct mapped_device *md) ++{ ++ int minor = MINOR(md->dev); ++ DECLARE_WAITQUEUE(wait, current); ++ ++ if (md->suspended) ++ return -EINVAL; ++ ++ md->suspended = 1; ++ dm_put_w(md); ++ ++ /* wait for all the pending io to flush */ ++ add_wait_queue(&md->wait, &wait); ++ current->state = TASK_UNINTERRUPTIBLE; ++ do { ++ md = dm_get_w(minor); ++ if (!md) { ++ /* Caller expects to free this lock. Yuck. */ ++ down_write(_dev_locks + minor); ++ return -ENXIO; ++ } ++ ++ if (!atomic_read(&md->pending)) ++ break; ++ ++ dm_put_w(md); ++ schedule(); ++ ++ } while (1); ++ ++ current->state = TASK_RUNNING; ++ remove_wait_queue(&md->wait, &wait); ++ ++ return 0; ++} ++ ++int dm_resume(struct mapped_device *md) ++{ ++ int minor = MINOR(md->dev); ++ struct deferred_io *def; ++ ++ if (!md->suspended || !md->map->num_targets) ++ return -EINVAL; ++ ++ md->suspended = 0; ++ def = md->deferred; ++ md->deferred = NULL; ++ ++ dm_put_w(md); ++ flush_deferred_io(def); ++ run_task_queue(&tq_disk); ++ ++ if (!dm_get_w(minor)) { ++ /* FIXME: yuck */ ++ down_write(_dev_locks + minor); ++ return -ENXIO; ++ } ++ ++ return 0; ++} ++ ++struct block_device_operations dm_blk_dops = { ++ open: dm_blk_open, ++ release: dm_blk_close, ++ ioctl: dm_blk_ioctl, ++ owner: THIS_MODULE ++}; ++ ++/* ++ * module hooks ++ */ ++module_init(dm_init); ++module_exit(dm_exit); ++ ++MODULE_PARM(major, "i"); ++MODULE_PARM_DESC(major, "The major number of the device mapper"); ++MODULE_DESCRIPTION(DM_NAME " driver"); ++MODULE_AUTHOR("Joe Thornber "); ++MODULE_LICENSE("GPL"); +diff -ruN linux-2.4.19-rc1/drivers/md/dm.h linux/drivers/md/dm.h +--- linux-2.4.19-rc1/drivers/md/dm.h Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm.h Tue Jun 25 22:11:46 2002 +@@ -0,0 +1,208 @@ ++/* ++ * Internal header file for device mapper ++ * ++ * Copyright (C) 2001 Sistina Software ++ * ++ * This file is released under the LGPL. ++ */ ++ ++#ifndef DM_INTERNAL_H ++#define DM_INTERNAL_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DM_NAME "device-mapper" /* Name for messaging */ ++#define DM_DRIVER_EMAIL "lvm-devel@lists.sistina.com" ++#define MAX_DEPTH 16 ++#define NODE_SIZE L1_CACHE_BYTES ++#define KEYS_PER_NODE (NODE_SIZE / sizeof(offset_t)) ++#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) ++#define MAX_ARGS 32 ++#define MAX_DEVICES 256 ++ ++/* ++ * List of devices that a metadevice uses and should open/close. ++ */ ++struct dm_dev { ++ atomic_t count; ++ struct list_head list; ++ ++ kdev_t dev; ++ struct block_device *bd; ++}; ++ ++/* ++ * I/O that had to be deferred while we were suspended ++ */ ++struct deferred_io { ++ int rw; ++ struct buffer_head *bh; ++ struct deferred_io *next; ++}; ++ ++/* ++ * Btree leaf - this does the actual mapping ++ */ ++struct target { ++ struct target_type *type; ++ void *private; ++}; ++ ++/* ++ * The btree ++ */ ++struct dm_table { ++ /* btree table */ ++ int depth; ++ int counts[MAX_DEPTH]; /* in nodes */ ++ offset_t *index[MAX_DEPTH]; ++ ++ int num_targets; ++ int num_allocated; ++ offset_t *highs; ++ struct target *targets; ++ ++ /* a list of devices used by this table */ ++ struct list_head devices; ++ ++ /* ++ * A waitqueue for processes waiting for something ++ * interesting to happen to this table. ++ */ ++ wait_queue_head_t eventq; ++}; ++ ++/* ++ * The actual device struct ++ */ ++struct mapped_device { ++ kdev_t dev; ++ char name[DM_NAME_LEN]; ++ char *uuid; ++ ++ int use_count; ++ int suspended; ++ int read_only; ++ ++ /* a list of io's that arrived while we were suspended */ ++ atomic_t pending; ++ wait_queue_head_t wait; ++ struct deferred_io *deferred; ++ ++ struct dm_table *map; ++ ++ /* used by dm-fs.c */ ++ devfs_handle_t devfs_entry; ++}; ++ ++extern struct block_device_operations dm_blk_dops; ++ ++/* dm-target.c */ ++int dm_target_init(void); ++struct target_type *dm_get_target_type(const char *name); ++void dm_put_target_type(struct target_type *t); ++void dm_target_exit(void); ++ ++/* ++ * Destructively splits argument list to pass to ctr. ++ */ ++int split_args(int max, int *argc, char **argv, char *input); ++ ++/* dm.c */ ++struct mapped_device *dm_get_r(int minor); ++struct mapped_device *dm_get_w(int minor); ++ ++/* ++ * There are two ways to lookup a device. ++ */ ++enum { ++ DM_LOOKUP_BY_NAME, ++ DM_LOOKUP_BY_UUID ++}; ++ ++struct mapped_device *dm_get_name_r(const char *name, int nametype); ++struct mapped_device *dm_get_name_w(const char *name, int nametype); ++ ++void dm_put_r(struct mapped_device *md); ++void dm_put_w(struct mapped_device *md); ++ ++/* ++ * Call with no lock. ++ */ ++int dm_create(const char *name, const char *uuid, int minor, int ro, ++ struct dm_table *table); ++int dm_set_name(const char *name, int nametype, const char *newname); ++void dm_destroy_all(void); ++ ++/* ++ * You must have the write lock before calling the remaining md ++ * methods. ++ */ ++int dm_destroy(struct mapped_device *md); ++void dm_set_ro(struct mapped_device *md, int ro); ++ ++/* ++ * The device must be suspended before calling this method. ++ */ ++int dm_swap_table(struct mapped_device *md, struct dm_table *t); ++ ++/* ++ * A device can still be used while suspended, but I/O is deferred. ++ */ ++int dm_suspend(struct mapped_device *md); ++int dm_resume(struct mapped_device *md); ++ ++/* dm-table.c */ ++int dm_table_create(struct dm_table **result); ++void dm_table_destroy(struct dm_table *t); ++ ++int dm_table_add_target(struct dm_table *t, offset_t highs, ++ struct target_type *type, void *private); ++int dm_table_complete(struct dm_table *t); ++ ++/* ++ * Event handling ++ */ ++void dm_table_event(struct dm_table *t); ++ ++#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) ++#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x) ++#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x) ++ ++/* ++ * Calculate the index of the child node of the n'th node k'th key. ++ */ ++static inline int get_child(int n, int k) ++{ ++ return (n * CHILDREN_PER_NODE) + k; ++} ++ ++/* ++ * Return the n'th node of level l from table t. ++ */ ++static inline offset_t *get_node(struct dm_table *t, int l, int n) ++{ ++ return t->index[l] + (n * KEYS_PER_NODE); ++} ++ ++static inline int array_too_big(unsigned long fixed, unsigned long obj, ++ unsigned long num) ++{ ++ return (num > (ULONG_MAX - fixed) / obj); ++} ++ ++#endif +diff -ruN linux-2.4.19-rc1/include/linux/device-mapper.h linux/include/linux/device-mapper.h +--- linux-2.4.19-rc1/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970 ++++ linux/include/linux/device-mapper.h Tue Jun 25 22:02:56 2002 +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (C) 2001 Sistina Software (UK) Limited. ++ * ++ * This file is released under the LGPL. ++ */ ++ ++#ifndef _LINUX_DEVICE_MAPPER_H ++#define _LINUX_DEVICE_MAPPER_H ++ ++#define DM_DIR "device-mapper" /* Slashes not supported */ ++#define DM_MAX_TYPE_NAME 16 ++#define DM_NAME_LEN 128 ++#define DM_UUID_LEN 129 ++ ++#ifdef __KERNEL__ ++ ++struct dm_table; ++struct dm_dev; ++typedef unsigned long offset_t; ++ ++typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; ++ ++/* ++ * Prototypes for functions for a target ++ */ ++typedef int (*dm_ctr_fn) (struct dm_table *t, offset_t b, offset_t l, ++ int argc, char **argv, void **context); ++typedef void (*dm_dtr_fn) (struct dm_table *t, void *c); ++typedef int (*dm_map_fn) (struct buffer_head *bh, int rw, void *context); ++typedef int (*dm_err_fn) (struct buffer_head *bh, int rw, void *context); ++typedef int (*dm_status_fn) (status_type_t status_type, char *result, ++ int maxlen, void *context); ++ ++void dm_error(const char *message); ++ ++/* ++ * Constructors should call these functions to ensure destination devices ++ * are opened/closed correctly ++ */ ++int dm_table_get_device(struct dm_table *t, const char *path, ++ offset_t start, offset_t len, struct dm_dev **result); ++void dm_table_put_device(struct dm_table *table, struct dm_dev *d); ++ ++/* ++ * Information about a target type ++ */ ++struct target_type { ++ const char *name; ++ struct module *module; ++ dm_ctr_fn ctr; ++ dm_dtr_fn dtr; ++ dm_map_fn map; ++ dm_err_fn err; ++ dm_status_fn status; ++}; ++ ++int dm_register_target(struct target_type *t); ++int dm_unregister_target(struct target_type *t); ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* _LINUX_DEVICE_MAPPER_H */ diff --git a/patches/common/linux-2.4.19-rc1-devmapper_2_ioctl.patch b/patches/common/linux-2.4.19-rc1-devmapper_2_ioctl.patch new file mode 100644 index 0000000..3c7c3a9 --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-devmapper_2_ioctl.patch @@ -0,0 +1,998 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/Makefile linux/drivers/md/Makefile +--- linux-2.4.19-rc1/drivers/md/Makefile Tue Jun 25 22:18:02 2002 ++++ linux/drivers/md/Makefile Tue Jun 25 22:18:35 2002 +@@ -7,7 +7,7 @@ + export-objs := md.o xor.o dm-table.o dm-target.o + list-multi := lvm-mod.o + lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o +-dm-mod-objs := dm.o dm-table.o dm-target.o ++dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o + + # Note: link order is important. All raid personalities + # and xor.o must come before md.o, as they each initialise +diff -ruN linux-2.4.19-rc1/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c +--- linux-2.4.19-rc1/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-ioctl.c Tue Jun 25 22:18:23 2002 +@@ -0,0 +1,807 @@ ++/* ++ * Copyright (C) 2001 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include "dm.h" ++ ++#include ++#include ++#include ++#include ++ ++/*----------------------------------------------------------------- ++ * Implementation of the ioctl commands ++ *---------------------------------------------------------------*/ ++ ++/* ++ * All the ioctl commands get dispatched to functions with this ++ * prototype. ++ */ ++typedef int (*ioctl_fn)(struct dm_ioctl *param, struct dm_ioctl *user); ++ ++/* ++ * This is really a debug only call. ++ */ ++static int remove_all(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ dm_destroy_all(); ++ return 0; ++} ++ ++/* ++ * Check a string doesn't overrun the chunk of ++ * memory we copied from userland. ++ */ ++static int valid_str(char *str, void *begin, void *end) ++{ ++ while (((void *) str >= begin) && ((void *) str < end)) ++ if (!*str++) ++ return 0; ++ ++ return -EINVAL; ++} ++ ++static int next_target(struct dm_target_spec *last, uint32_t next, ++ void *begin, void *end, ++ struct dm_target_spec **spec, char **params) ++{ ++ *spec = (struct dm_target_spec *) ++ ((unsigned char *) last + next); ++ *params = (char *) (*spec + 1); ++ ++ if (*spec < (last + 1) || ((void *) *spec > end)) ++ return -EINVAL; ++ ++ return valid_str(*params, begin, end); ++} ++ ++/* ++ * Checks to see if there's a gap in the table. ++ * Returns true iff there is a gap. ++ */ ++static int gap(struct dm_table *table, struct dm_target_spec *spec) ++{ ++ if (!table->num_targets) ++ return (spec->sector_start > 0) ? 1 : 0; ++ ++ if (spec->sector_start != table->highs[table->num_targets - 1] + 1) ++ return 1; ++ ++ return 0; ++} ++ ++static int populate_table(struct dm_table *table, struct dm_ioctl *args) ++{ ++ int i = 0, r, first = 1, argc; ++ struct dm_target_spec *spec; ++ char *params, *argv[MAX_ARGS]; ++ struct target_type *ttype; ++ void *context, *begin, *end; ++ offset_t highs = 0; ++ ++ if (!args->target_count) { ++ DMWARN("populate_table: no targets specified"); ++ return -EINVAL; ++ } ++ ++ begin = (void *) args; ++ end = begin + args->data_size; ++ ++#define PARSE_ERROR(msg) {DMWARN(msg); return -EINVAL;} ++ ++ for (i = 0; i < args->target_count; i++) { ++ ++ if (first) ++ r = next_target((struct dm_target_spec *) args, ++ args->data_start, ++ begin, end, &spec, ¶ms); ++ else ++ r = next_target(spec, spec->next, begin, end, ++ &spec, ¶ms); ++ ++ if (r) ++ PARSE_ERROR("unable to find target"); ++ ++ /* Look up the target type */ ++ ttype = dm_get_target_type(spec->target_type); ++ if (!ttype) ++ PARSE_ERROR("unable to find target type"); ++ ++ if (gap(table, spec)) ++ PARSE_ERROR("gap in target ranges"); ++ ++ /* Split up the parameter list */ ++ if (split_args(MAX_ARGS, &argc, argv, params) < 0) ++ PARSE_ERROR("Too many arguments"); ++ ++ /* Build the target */ ++ if (ttype->ctr(table, spec->sector_start, spec->length, ++ argc, argv, &context)) { ++ DMWARN("%s: target constructor failed", ++ (char *) context); ++ return -EINVAL; ++ } ++ ++ /* Add the target to the table */ ++ highs = spec->sector_start + (spec->length - 1); ++ if (dm_table_add_target(table, highs, ttype, context)) ++ PARSE_ERROR("internal error adding target to table"); ++ ++ first = 0; ++ } ++ ++#undef PARSE_ERROR ++ ++ r = dm_table_complete(table); ++ return r; ++} ++ ++/* ++ * Round up the ptr to the next 'align' boundary. Obviously ++ * 'align' must be a power of 2. ++ */ ++static inline void *align_ptr(void *ptr, unsigned int align) ++{ ++ align--; ++ return (void *) (((unsigned long) (ptr + align)) & ~align); ++} ++ ++/* ++ * Copies a dm_ioctl and an optional additional payload to ++ * userland. ++ */ ++static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param, ++ void *data, uint32_t len) ++{ ++ int r; ++ void *ptr = NULL; ++ ++ if (data) { ++ ptr = align_ptr(user + 1, sizeof(unsigned long)); ++ param->data_start = ptr - (void *) user; ++ } ++ ++ /* ++ * The version number has already been filled in, so we ++ * just copy later fields. ++ */ ++ r = copy_to_user(&user->data_size, ¶m->data_size, ++ sizeof(*param) - sizeof(param->version)); ++ if (r) ++ return -EFAULT; ++ ++ if (data) { ++ if (param->data_start + len > param->data_size) ++ return -ENOSPC; ++ ++ if (copy_to_user(ptr, data, len)) ++ r = -EFAULT; ++ } ++ ++ return r; ++} ++ ++/* ++ * Fills in a dm_ioctl structure, ready for sending back to ++ * userland. ++ */ ++static void __info(struct mapped_device *md, struct dm_ioctl *param) ++{ ++ param->flags = DM_EXISTS_FLAG; ++ if (md->suspended) ++ param->flags |= DM_SUSPEND_FLAG; ++ if (md->read_only) ++ param->flags |= DM_READONLY_FLAG; ++ ++ strncpy(param->name, md->name, sizeof(param->name)); ++ ++ if (md->uuid) ++ strncpy(param->uuid, md->uuid, sizeof(param->uuid) - 1); ++ else ++ param->uuid[0] = '\0'; ++ ++ param->open_count = md->use_count; ++ param->dev = kdev_t_to_nr(md->dev); ++ param->target_count = md->map->num_targets; ++} ++ ++/* ++ * Always use UUID for lookups if it's present, otherwise use name. ++ */ ++static inline char *lookup_name(struct dm_ioctl *param) ++{ ++ return (*param->uuid) ? param->uuid : param->name; ++} ++ ++static inline int lookup_type(struct dm_ioctl *param) ++{ ++ return (*param->uuid) ? DM_LOOKUP_BY_UUID : DM_LOOKUP_BY_NAME; ++} ++ ++#define ALIGNMENT sizeof(int) ++static void *_align(void *ptr, unsigned int a) ++{ ++ register unsigned long align = --a; ++ ++ return (void *) (((unsigned long) ptr + align) & ~align); ++} ++ ++/* ++ * Copies device info back to user space, used by ++ * the create and info ioctls. ++ */ ++static int info(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ struct mapped_device *md; ++ ++ param->flags = 0; ++ ++ md = dm_get_name_r(lookup_name(param), lookup_type(param)); ++ if (!md) ++ /* ++ * Device not found - returns cleared exists flag. ++ */ ++ goto out; ++ ++ __info(md, param); ++ dm_put_r(md); ++ ++ out: ++ return results_to_user(user, param, NULL, 0); ++} ++ ++static int create(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ int r, ro; ++ struct dm_table *t; ++ int minor; ++ ++ r = dm_table_create(&t); ++ if (r) ++ return r; ++ ++ r = populate_table(t, param); ++ if (r) { ++ dm_table_destroy(t); ++ return r; ++ } ++ ++ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ? ++ MINOR(to_kdev_t(param->dev)) : -1; ++ ++ ro = (param->flags & DM_READONLY_FLAG) ? 1 : 0; ++ ++ r = dm_create(param->name, param->uuid, minor, ro, t); ++ if (r) { ++ dm_table_destroy(t); ++ return r; ++ } ++ ++ r = info(param, user); ++ return r; ++} ++ ++ ++ ++/* ++ * Build up the status struct for each target ++ */ ++static int __status(struct mapped_device *md, struct dm_ioctl *param, ++ char *outbuf, int *len) ++{ ++ int i; ++ struct dm_target_spec *spec; ++ uint64_t sector = 0LL; ++ char *outptr; ++ status_type_t type; ++ ++ if (param->flags & DM_STATUS_TABLE_FLAG) ++ type = STATUSTYPE_TABLE; ++ else ++ type = STATUSTYPE_INFO; ++ ++ outptr = outbuf; ++ ++ /* Get all the target info */ ++ for (i = 0; i < md->map->num_targets; i++) { ++ struct target_type *tt = md->map->targets[i].type; ++ offset_t high = md->map->highs[i]; ++ ++ if (outptr - outbuf + ++ sizeof(struct dm_target_spec) > param->data_size) ++ return -ENOMEM; ++ ++ spec = (struct dm_target_spec *) outptr; ++ ++ spec->status = 0; ++ spec->sector_start = sector; ++ spec->length = high - sector + 1; ++ strncpy(spec->target_type, tt->name, sizeof(spec->target_type)); ++ ++ outptr += sizeof(struct dm_target_spec); ++ ++ /* Get the status/table string from the target driver */ ++ if (tt->status) ++ tt->status(type, outptr, ++ outbuf + param->data_size - outptr, ++ md->map->targets[i].private); ++ else ++ outptr[0] = '\0'; ++ ++ outptr += strlen(outptr) + 1; ++ _align(outptr, ALIGNMENT); ++ ++ sector = high + 1; ++ ++ spec->next = outptr - outbuf; ++ } ++ ++ param->target_count = md->map->num_targets; ++ *len = outptr - outbuf; ++ ++ return 0; ++} ++ ++/* ++ * Return the status of a device as a text string for each ++ * target. ++ */ ++static int get_status(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ struct mapped_device *md; ++ int len = 0; ++ int ret; ++ char *outbuf = NULL; ++ ++ md = dm_get_name_r(lookup_name(param), lookup_type(param)); ++ if (!md) ++ /* ++ * Device not found - returns cleared exists flag. ++ */ ++ goto out; ++ ++ /* We haven't a clue how long the resultant data will be so ++ just allocate as much as userland has allowed us and make sure ++ we don't overun it */ ++ outbuf = kmalloc(param->data_size, GFP_KERNEL); ++ if (!outbuf) ++ goto out; ++ /* ++ * Get the status of all targets ++ */ ++ __status(md, param, outbuf, &len); ++ ++ /* ++ * Setup the basic dm_ioctl structure. ++ */ ++ __info(md, param); ++ ++ out: ++ if (md) ++ dm_put_r(md); ++ ++ ret = results_to_user(user, param, outbuf, len); ++ ++ if (outbuf) ++ kfree(outbuf); ++ ++ return ret; ++} ++ ++/* ++ * Wait for a device to report an event ++ */ ++static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ struct mapped_device *md; ++ DECLARE_WAITQUEUE(wq, current); ++ ++ md = dm_get_name_r(lookup_name(param), lookup_type(param)); ++ if (!md) ++ /* ++ * Device not found - returns cleared exists flag. ++ */ ++ goto out; ++ /* ++ * Setup the basic dm_ioctl structure. ++ */ ++ __info(md, param); ++ ++ /* ++ * Wait for a notification event ++ */ ++ set_current_state(TASK_INTERRUPTIBLE); ++ add_wait_queue(&md->map->eventq, &wq); ++ ++ dm_put_r(md); ++ ++ schedule(); ++ set_current_state(TASK_RUNNING); ++ ++ out: ++ return results_to_user(user, param, NULL, 0); ++} ++ ++/* ++ * Retrieves a list of devices used by a particular dm device. ++ */ ++static int dep(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ int count, r; ++ struct mapped_device *md; ++ struct list_head *tmp; ++ size_t len = 0; ++ struct dm_target_deps *deps = NULL; ++ ++ md = dm_get_name_r(lookup_name(param), lookup_type(param)); ++ if (!md) ++ goto out; ++ ++ /* ++ * Setup the basic dm_ioctl structure. ++ */ ++ __info(md, param); ++ ++ /* ++ * Count the devices. ++ */ ++ count = 0; ++ list_for_each(tmp, &md->map->devices) ++ count++; ++ ++ /* ++ * Allocate a kernel space version of the dm_target_status ++ * struct. ++ */ ++ if (array_too_big(sizeof(*deps), sizeof(*deps->dev), count)) { ++ dm_put_r(md); ++ return -ENOMEM; ++ } ++ ++ len = sizeof(*deps) + (sizeof(*deps->dev) * count); ++ deps = kmalloc(len, GFP_KERNEL); ++ if (!deps) { ++ dm_put_r(md); ++ return -ENOMEM; ++ } ++ ++ /* ++ * Fill in the devices. ++ */ ++ deps->count = count; ++ count = 0; ++ list_for_each(tmp, &md->map->devices) { ++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); ++ deps->dev[count++] = kdev_t_to_nr(dd->dev); ++ } ++ dm_put_r(md); ++ ++ out: ++ r = results_to_user(user, param, deps, len); ++ ++ kfree(deps); ++ return r; ++} ++ ++static int remove(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ int r; ++ struct mapped_device *md; ++ ++ md = dm_get_name_w(lookup_name(param), lookup_type(param)); ++ if (!md) ++ return -ENXIO; ++ ++ r = dm_destroy(md); ++ dm_put_w(md); ++ if (!r) ++ kfree(md); ++ ++ return r; ++} ++ ++static int suspend(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ int r; ++ struct mapped_device *md; ++ ++ md = dm_get_name_w(lookup_name(param), lookup_type(param)); ++ if (!md) ++ return -ENXIO; ++ ++ r = (param->flags & DM_SUSPEND_FLAG) ? dm_suspend(md) : dm_resume(md); ++ dm_put_w(md); ++ ++ return r; ++} ++ ++static int reload(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ int r; ++ struct mapped_device *md; ++ struct dm_table *t; ++ ++ r = dm_table_create(&t); ++ if (r) ++ return r; ++ ++ r = populate_table(t, param); ++ if (r) { ++ dm_table_destroy(t); ++ return r; ++ } ++ ++ md = dm_get_name_w(lookup_name(param), lookup_type(param)); ++ if (!md) { ++ dm_table_destroy(t); ++ return -ENXIO; ++ } ++ ++ r = dm_swap_table(md, t); ++ if (r) { ++ dm_put_w(md); ++ dm_table_destroy(t); ++ return r; ++ } ++ ++ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0); ++ dm_put_w(md); ++ ++ r = info(param, user); ++ return r; ++} ++ ++static int rename(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ char *newname = (char *) param + param->data_start; ++ ++ if (valid_str(newname, (void *) param, ++ (void *) param + param->data_size) || ++ dm_set_name(lookup_name(param), lookup_type(param), newname)) { ++ DMWARN("Invalid new logical volume name supplied."); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++ ++/*----------------------------------------------------------------- ++ * Implementation of open/close/ioctl on the special char ++ * device. ++ *---------------------------------------------------------------*/ ++static int ctl_open(struct inode *inode, struct file *file) ++{ ++ /* only root can open this */ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ MOD_INC_USE_COUNT; ++ ++ return 0; ++} ++ ++static int ctl_close(struct inode *inode, struct file *file) ++{ ++ MOD_DEC_USE_COUNT; ++ return 0; ++} ++ ++static ioctl_fn lookup_ioctl(unsigned int cmd) ++{ ++ static struct { ++ int cmd; ++ ioctl_fn fn; ++ } _ioctls[] = { ++ {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */ ++ {DM_REMOVE_ALL_CMD, remove_all}, ++ {DM_DEV_CREATE_CMD, create}, ++ {DM_DEV_REMOVE_CMD, remove}, ++ {DM_DEV_RELOAD_CMD, reload}, ++ {DM_DEV_RENAME_CMD, rename}, ++ {DM_DEV_SUSPEND_CMD, suspend}, ++ {DM_DEV_DEPS_CMD, dep}, ++ {DM_DEV_STATUS_CMD, info}, ++ {DM_TARGET_STATUS_CMD, get_status}, ++ {DM_TARGET_WAIT_CMD, wait_device_event}, ++ }; ++ static int nelts = sizeof(_ioctls) / sizeof(*_ioctls); ++ ++ return (cmd >= nelts) ? NULL : _ioctls[cmd].fn; ++} ++ ++/* ++ * As well as checking the version compatibility this always ++ * copies the kernel interface version out. ++ */ ++static int check_version(int cmd, struct dm_ioctl *user) ++{ ++ uint32_t version[3]; ++ int r = 0; ++ ++ if (copy_from_user(version, user->version, sizeof(version))) ++ return -EFAULT; ++ ++ if ((DM_VERSION_MAJOR != version[0]) || ++ (DM_VERSION_MINOR < version[1])) { ++ DMWARN("ioctl interface mismatch: " ++ "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", ++ DM_VERSION_MAJOR, DM_VERSION_MINOR, ++ DM_VERSION_PATCHLEVEL, ++ version[0], version[1], version[2], cmd); ++ r = -EINVAL; ++ } ++ ++ /* ++ * Fill in the kernel version. ++ */ ++ version[0] = DM_VERSION_MAJOR; ++ version[1] = DM_VERSION_MINOR; ++ version[2] = DM_VERSION_PATCHLEVEL; ++ if (copy_to_user(user->version, version, sizeof(version))) ++ return -EFAULT; ++ ++ return r; ++} ++ ++static void free_params(struct dm_ioctl *param) ++{ ++ vfree(param); ++} ++ ++static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param) ++{ ++ struct dm_ioctl tmp, *dmi; ++ ++ if (copy_from_user(&tmp, user, sizeof(tmp))) ++ return -EFAULT; ++ ++ if (tmp.data_size < sizeof(tmp)) ++ return -EINVAL; ++ ++ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); ++ if (!dmi) ++ return -ENOMEM; ++ ++ if (copy_from_user(dmi, user, tmp.data_size)) { ++ vfree(dmi); ++ return -EFAULT; ++ } ++ ++ *param = dmi; ++ return 0; ++} ++ ++static int validate_params(uint cmd, struct dm_ioctl *param) ++{ ++ /* Unless creating, either name of uuid but not both */ ++ if (cmd != DM_DEV_CREATE_CMD) { ++ if ((!*param->uuid && !*param->name) || ++ (*param->uuid && *param->name)) { ++ DMWARN("one of name or uuid must be supplied"); ++ return -EINVAL; ++ } ++ } ++ ++ /* Ensure strings are terminated */ ++ param->name[DM_NAME_LEN - 1] = '\0'; ++ param->uuid[DM_UUID_LEN - 1] = '\0'; ++ ++ return 0; ++} ++ ++static int ctl_ioctl(struct inode *inode, struct file *file, ++ uint command, ulong u) ++{ ++ ++ int r = 0, cmd; ++ struct dm_ioctl *param; ++ struct dm_ioctl *user = (struct dm_ioctl *) u; ++ ioctl_fn fn = NULL; ++ ++ if (_IOC_TYPE(command) != DM_IOCTL) ++ return -ENOTTY; ++ ++ cmd = _IOC_NR(command); ++ ++ /* ++ * Check the interface version passed in. This also ++ * writes out the kernel's interface version. ++ */ ++ r = check_version(cmd, user); ++ if (r) ++ return r; ++ ++ /* ++ * Nothing more to do for the version command. ++ */ ++ if (cmd == DM_VERSION_CMD) ++ return 0; ++ ++ fn = lookup_ioctl(cmd); ++ if (!fn) { ++ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); ++ return -ENOTTY; ++ } ++ ++ /* ++ * Copy the parameters into kernel space. ++ */ ++ r = copy_params(user, ¶m); ++ if (r) ++ return r; ++ ++ r = validate_params(cmd, param); ++ if (r) { ++ free_params(param); ++ return r; ++ } ++ ++ r = fn(param, user); ++ free_params(param); ++ return r; ++} ++ ++static struct file_operations _ctl_fops = { ++ open: ctl_open, ++ release: ctl_close, ++ ioctl: ctl_ioctl, ++ owner: THIS_MODULE, ++}; ++ ++static devfs_handle_t _ctl_handle; ++ ++static struct miscdevice _dm_misc = { ++ minor: MISC_DYNAMIC_MINOR, ++ name: DM_NAME, ++ fops: &_ctl_fops ++}; ++ ++/* Create misc character device and link to DM_DIR/control */ ++int __init dm_interface_init(void) ++{ ++ int r; ++ char rname[64]; ++ ++ r = misc_register(&_dm_misc); ++ if (r) { ++ DMERR("misc_register failed for control device"); ++ return r; ++ } ++ ++ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3, ++ sizeof rname - 3); ++ if (r == -ENOSYS) ++ return 0; /* devfs not present */ ++ ++ if (r < 0) { ++ DMERR("devfs_generate_path failed for control device"); ++ goto failed; ++ } ++ ++ strncpy(rname + r, "../", 3); ++ r = devfs_mk_symlink(NULL, DM_DIR "/control", ++ DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL); ++ if (r) { ++ DMERR("devfs_mk_symlink failed for control device"); ++ goto failed; ++ } ++ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle); ++ ++ DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR, ++ DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA, ++ DM_DRIVER_EMAIL); ++ return 0; ++ ++ failed: ++ misc_deregister(&_dm_misc); ++ return r; ++} ++ ++void dm_interface_exit(void) ++{ ++ if (misc_deregister(&_dm_misc) < 0) ++ DMERR("misc_deregister failed for control device"); ++} +diff -ruN linux-2.4.19-rc1/drivers/md/dm.c linux/drivers/md/dm.c +--- linux-2.4.19-rc1/drivers/md/dm.c Tue Jun 25 22:18:02 2002 ++++ linux/drivers/md/dm.c Tue Jun 25 22:22:17 2002 +@@ -266,6 +266,7 @@ + #define xx(n) {n ## _init, n ## _exit}, + xx(local) + xx(dm_target) ++ xx(dm_interface) + #undef xx + }; + +diff -ruN linux-2.4.19-rc1/drivers/md/dm.h linux/drivers/md/dm.h +--- linux-2.4.19-rc1/drivers/md/dm.h Tue Jun 25 22:18:02 2002 ++++ linux/drivers/md/dm.h Tue Jun 25 22:26:00 2002 +@@ -205,4 +205,11 @@ + return (num > (ULONG_MAX - fixed) / obj); + } + ++/* ++ * The device-mapper can be driven through one of two interfaces; ++ * ioctl or filesystem, depending which patch you have applied. ++ */ ++int __init dm_interface_init(void); ++void dm_interface_exit(void); ++ + #endif +diff -ruN linux-2.4.19-rc1/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h +--- linux-2.4.19-rc1/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970 ++++ linux/include/linux/dm-ioctl.h Tue Jun 25 22:18:23 2002 +@@ -0,0 +1,145 @@ ++/* ++ * Copyright (C) 2001 Sistina Software (UK) Limited. ++ * ++ * This file is released under the LGPL. ++ */ ++ ++#ifndef _LINUX_DM_IOCTL_H ++#define _LINUX_DM_IOCTL_H ++ ++#include "device-mapper.h" ++#include "types.h" ++ ++/* ++ * Implements a traditional ioctl interface to the device mapper. ++ */ ++ ++/* ++ * All ioctl arguments consist of a single chunk of memory, with ++ * this structure at the start. If a uuid is specified any ++ * lookup (eg. for a DM_INFO) will be done on that, *not* the ++ * name. ++ */ ++struct dm_ioctl { ++ /* ++ * The version number is made up of three parts: ++ * major - no backward or forward compatibility, ++ * minor - only backwards compatible, ++ * patch - both backwards and forwards compatible. ++ * ++ * All clients of the ioctl interface should fill in the ++ * version number of the interface that they were ++ * compiled with. ++ * ++ * All recognised ioctl commands (ie. those that don't ++ * return -ENOTTY) fill out this field, even if the ++ * command failed. ++ */ ++ uint32_t version[3]; /* in/out */ ++ uint32_t data_size; /* total size of data passed in ++ * including this struct */ ++ ++ uint32_t data_start; /* offset to start of data ++ * relative to start of this struct */ ++ ++ uint32_t target_count; /* in/out */ ++ uint32_t open_count; /* out */ ++ uint32_t flags; /* in/out */ ++ ++ __kernel_dev_t dev; /* in/out */ ++ ++ char name[DM_NAME_LEN]; /* device name */ ++ char uuid[DM_UUID_LEN]; /* unique identifier for ++ * the block device */ ++}; ++ ++/* ++ * Used to specify tables. These structures appear after the ++ * dm_ioctl. ++ */ ++struct dm_target_spec { ++ int32_t status; /* used when reading from kernel only */ ++ uint64_t sector_start; ++ uint32_t length; ++ ++ /* ++ * Offset in bytes (from the start of this struct) to ++ * next target_spec. ++ */ ++ uint32_t next; ++ ++ char target_type[DM_MAX_TYPE_NAME]; ++ ++ /* ++ * Parameter string starts immediately after this object. ++ * Be careful to add padding after string to ensure correct ++ * alignment of subsequent dm_target_spec. ++ */ ++}; ++ ++/* ++ * Used to retrieve the target dependencies. ++ */ ++struct dm_target_deps { ++ uint32_t count; ++ ++ __kernel_dev_t dev[0]; /* out */ ++}; ++ ++/* ++ * If you change this make sure you make the corresponding change ++ * to dm-ioctl.c:lookup_ioctl() ++ */ ++enum { ++ /* Top level cmds */ ++ DM_VERSION_CMD = 0, ++ DM_REMOVE_ALL_CMD, ++ ++ /* device level cmds */ ++ DM_DEV_CREATE_CMD, ++ DM_DEV_REMOVE_CMD, ++ DM_DEV_RELOAD_CMD, ++ DM_DEV_RENAME_CMD, ++ DM_DEV_SUSPEND_CMD, ++ DM_DEV_DEPS_CMD, ++ DM_DEV_STATUS_CMD, ++ ++ /* target level cmds */ ++ DM_TARGET_STATUS_CMD, ++ DM_TARGET_WAIT_CMD ++}; ++ ++#define DM_IOCTL 0xfd ++ ++#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) ++#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) ++ ++#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) ++#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl) ++#define DM_DEV_RELOAD _IOWR(DM_IOCTL, DM_DEV_RELOAD_CMD, struct dm_ioctl) ++#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl) ++#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl) ++#define DM_DEV_DEPS _IOWR(DM_IOCTL, DM_DEV_DEPS_CMD, struct dm_ioctl) ++#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) ++ ++#define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl) ++#define DM_TARGET_WAIT _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl) ++ ++#define DM_VERSION_MAJOR 1 ++#define DM_VERSION_MINOR 0 ++#define DM_VERSION_PATCHLEVEL 0 ++#define DM_VERSION_EXTRA "-ioctl (2002-06-25)" ++ ++/* Status bits */ ++#define DM_READONLY_FLAG 0x00000001 ++#define DM_SUSPEND_FLAG 0x00000002 ++#define DM_EXISTS_FLAG 0x00000004 ++#define DM_PERSISTENT_DEV_FLAG 0x00000008 ++ ++/* ++ * Flag passed into ioctl STATUS command to get table information ++ * rather than current status. ++ */ ++#define DM_STATUS_TABLE_FLAG 0x00000010 ++ ++#endif /* _LINUX_DM_IOCTL_H */ diff --git a/patches/common/linux-2.4.19-rc1-devmapper_3_basic_mappings.patch b/patches/common/linux-2.4.19-rc1-devmapper_3_basic_mappings.patch new file mode 100644 index 0000000..0b3699d --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-devmapper_3_basic_mappings.patch @@ -0,0 +1,410 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/Makefile linux/drivers/md/Makefile +--- linux-2.4.19-rc1/drivers/md/Makefile Tue Jun 25 22:18:35 2002 ++++ linux/drivers/md/Makefile Tue Jun 25 22:28:32 2002 +@@ -7,7 +7,8 @@ + export-objs := md.o xor.o dm-table.o dm-target.o + list-multi := lvm-mod.o + lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o +-dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o ++dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \ ++ dm-linear.o dm-stripe.o + + # Note: link order is important. All raid personalities + # and xor.o must come before md.o, as they each initialise +diff -ruN linux-2.4.19-rc1/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c +--- linux-2.4.19-rc1/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-linear.c Tue Jun 25 22:28:43 2002 +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (C) 2001 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include "dm.h" ++ ++#include ++#include ++#include ++ ++/* ++ * Linear: maps a linear range of a device. ++ */ ++struct linear_c { ++ long delta; /* FIXME: we need a signed offset type */ ++ long start; /* For display only */ ++ struct dm_dev *dev; ++}; ++ ++/* ++ * Construct a linear mapping: ++ */ ++static int linear_ctr(struct dm_table *t, offset_t b, offset_t l, ++ int argc, char **argv, void **context) ++{ ++ struct linear_c *lc; ++ unsigned long start; /* FIXME: unsigned long long */ ++ char *end; ++ ++ if (argc != 2) { ++ *context = "dm-linear: Not enough arguments"; ++ return -EINVAL; ++ } ++ ++ lc = kmalloc(sizeof(*lc), GFP_KERNEL); ++ if (lc == NULL) { ++ *context = "dm-linear: Cannot allocate linear context"; ++ return -ENOMEM; ++ } ++ ++ start = simple_strtoul(argv[1], &end, 10); ++ if (*end) { ++ *context = "dm-linear: Invalid device sector"; ++ goto bad; ++ } ++ ++ if (dm_table_get_device(t, argv[0], start, l, &lc->dev)) { ++ *context = "dm-linear: Device lookup failed"; ++ goto bad; ++ } ++ ++ lc->delta = (int) start - (int) b; ++ lc->start = start; ++ *context = lc; ++ return 0; ++ ++ bad: ++ kfree(lc); ++ return -EINVAL; ++} ++ ++static void linear_dtr(struct dm_table *t, void *c) ++{ ++ struct linear_c *lc = (struct linear_c *) c; ++ ++ dm_table_put_device(t, lc->dev); ++ kfree(c); ++} ++ ++static int linear_map(struct buffer_head *bh, int rw, void *context) ++{ ++ struct linear_c *lc = (struct linear_c *) context; ++ ++ bh->b_rdev = lc->dev->dev; ++ bh->b_rsector = bh->b_rsector + lc->delta; ++ ++ return 1; ++} ++ ++static int linear_status(status_type_t type, char *result, int maxlen, ++ void *context) ++{ ++ struct linear_c *lc = (struct linear_c *) context; ++ ++ switch (type) { ++ case STATUSTYPE_INFO: ++ result[0] = '\0'; ++ break; ++ ++ case STATUSTYPE_TABLE: ++ snprintf(result, maxlen, "%s %ld", kdevname(lc->dev->dev), ++ lc->start); ++ break; ++ } ++ return 0; ++} ++ ++static struct target_type linear_target = { ++ name: "linear", ++ module: THIS_MODULE, ++ ctr: linear_ctr, ++ dtr: linear_dtr, ++ map: linear_map, ++ status: linear_status, ++}; ++ ++int __init dm_linear_init(void) ++{ ++ int r = dm_register_target(&linear_target); ++ ++ if (r < 0) ++ DMERR("linear: register failed %d", r); ++ ++ return r; ++} ++ ++void dm_linear_exit(void) ++{ ++ int r = dm_unregister_target(&linear_target); ++ ++ if (r < 0) ++ DMERR("linear: unregister failed %d", r); ++} +diff -ruN linux-2.4.19-rc1/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c +--- linux-2.4.19-rc1/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-stripe.c Tue Jun 25 22:28:43 2002 +@@ -0,0 +1,234 @@ ++/* ++ * Copyright (C) 2001 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include "dm.h" ++ ++#include ++#include ++#include ++ ++struct stripe { ++ struct dm_dev *dev; ++ offset_t physical_start; ++}; ++ ++struct stripe_c { ++ offset_t logical_start; ++ uint32_t stripes; ++ ++ /* The size of this target / num. stripes */ ++ uint32_t stripe_width; ++ ++ /* stripe chunk size */ ++ uint32_t chunk_shift; ++ offset_t chunk_mask; ++ ++ struct stripe stripe[0]; ++}; ++ ++static inline struct stripe_c *alloc_context(int stripes) ++{ ++ size_t len; ++ ++ if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), ++ stripes)) ++ return NULL; ++ ++ len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes); ++ ++ return kmalloc(len, GFP_KERNEL); ++} ++ ++/* ++ * Parse a single pair ++ */ ++static int get_stripe(struct dm_table *t, struct stripe_c *sc, ++ int stripe, char **argv) ++{ ++ char *end; ++ unsigned long start; ++ ++ start = simple_strtoul(argv[1], &end, 10); ++ if (*end) ++ return -EINVAL; ++ ++ if (dm_table_get_device(t, argv[0], start, sc->stripe_width, ++ &sc->stripe[stripe].dev)) ++ return -ENXIO; ++ ++ sc->stripe[stripe].physical_start = start; ++ return 0; ++} ++ ++/* ++ * Construct a striped mapping. ++ * [ ]+ ++ */ ++static int stripe_ctr(struct dm_table *t, offset_t b, offset_t l, ++ int argc, char **argv, void **context) ++{ ++ struct stripe_c *sc; ++ uint32_t stripes; ++ uint32_t chunk_size; ++ char *end; ++ int r, i; ++ ++ if (argc < 2) { ++ *context = "dm-stripe: Not enough arguments"; ++ return -EINVAL; ++ } ++ ++ stripes = simple_strtoul(argv[0], &end, 10); ++ if (*end) { ++ *context = "dm-stripe: Invalid stripe count"; ++ return -EINVAL; ++ } ++ ++ chunk_size = simple_strtoul(argv[1], &end, 10); ++ if (*end) { ++ *context = "dm-stripe: Invalid chunk_size"; ++ return -EINVAL; ++ } ++ ++ if (l % stripes) { ++ *context = "dm-stripe: Target length not divisable by " ++ "number of stripes"; ++ return -EINVAL; ++ } ++ ++ sc = alloc_context(stripes); ++ if (!sc) { ++ *context = "dm-stripe: Memory allocation for striped context " ++ "failed"; ++ return -ENOMEM; ++ } ++ ++ sc->logical_start = b; ++ sc->stripes = stripes; ++ sc->stripe_width = l / stripes; ++ ++ /* ++ * chunk_size is a power of two ++ */ ++ if (!chunk_size || (chunk_size & (chunk_size - 1))) { ++ *context = "dm-stripe: Invalid chunk size"; ++ kfree(sc); ++ return -EINVAL; ++ } ++ ++ sc->chunk_mask = chunk_size - 1; ++ for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++) ++ chunk_size >>= 1; ++ sc->chunk_shift--; ++ ++ /* ++ * Get the stripe destinations. ++ */ ++ for (i = 0; i < stripes; i++) { ++ if (argc < 2) { ++ *context = "dm-stripe: Not enough destinations " ++ "specified"; ++ kfree(sc); ++ return -EINVAL; ++ } ++ ++ argv += 2; ++ ++ r = get_stripe(t, sc, i, argv); ++ if (r < 0) { ++ *context = "dm-stripe: Couldn't parse stripe " ++ "destination"; ++ while (i--) ++ dm_table_put_device(t, sc->stripe[i].dev); ++ kfree(sc); ++ return r; ++ } ++ } ++ ++ *context = sc; ++ return 0; ++} ++ ++static void stripe_dtr(struct dm_table *t, void *c) ++{ ++ unsigned int i; ++ struct stripe_c *sc = (struct stripe_c *) c; ++ ++ for (i = 0; i < sc->stripes; i++) ++ dm_table_put_device(t, sc->stripe[i].dev); ++ ++ kfree(sc); ++} ++ ++static int stripe_map(struct buffer_head *bh, int rw, void *context) ++{ ++ struct stripe_c *sc = (struct stripe_c *) context; ++ ++ offset_t offset = bh->b_rsector - sc->logical_start; ++ uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift); ++ uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */ ++ chunk = chunk / sc->stripes; ++ ++ bh->b_rdev = sc->stripe[stripe].dev->dev; ++ bh->b_rsector = sc->stripe[stripe].physical_start + ++ (chunk << sc->chunk_shift) + (offset & sc->chunk_mask); ++ return 1; ++} ++ ++static int stripe_status(status_type_t type, char *result, int maxlen, ++ void *context) ++{ ++ struct stripe_c *sc = (struct stripe_c *) context; ++ int offset; ++ int i; ++ ++ switch (type) { ++ case STATUSTYPE_INFO: ++ result[0] = '\0'; ++ break; ++ ++ case STATUSTYPE_TABLE: ++ offset = snprintf(result, maxlen, "%d %ld", ++ sc->stripes, sc->chunk_mask + 1); ++ for (i = 0; i < sc->stripes; i++) { ++ offset += ++ snprintf(result + offset, maxlen - offset, ++ " %s %ld", ++ kdevname(sc->stripe[i].dev->dev), ++ sc->stripe[i].physical_start); ++ } ++ break; ++ } ++ return 0; ++} ++ ++static struct target_type stripe_target = { ++ name: "striped", ++ module: THIS_MODULE, ++ ctr: stripe_ctr, ++ dtr: stripe_dtr, ++ map: stripe_map, ++ status: stripe_status, ++}; ++ ++int __init dm_stripe_init(void) ++{ ++ int r; ++ ++ r = dm_register_target(&stripe_target); ++ if (r < 0) ++ DMWARN("striped target registration failed"); ++ ++ return r; ++} ++ ++void dm_stripe_exit(void) ++{ ++ if (dm_unregister_target(&stripe_target)) ++ DMWARN("striped target unregistration failed"); ++ ++ return; ++} +diff -ruN linux-2.4.19-rc1/drivers/md/dm.c linux/drivers/md/dm.c +--- linux-2.4.19-rc1/drivers/md/dm.c Tue Jun 25 22:22:17 2002 ++++ linux/drivers/md/dm.c Tue Jun 25 22:30:30 2002 +@@ -266,6 +266,8 @@ + #define xx(n) {n ## _init, n ## _exit}, + xx(local) + xx(dm_target) ++ xx(dm_linear) ++ xx(dm_stripe) + xx(dm_interface) + #undef xx + }; +diff -ruN linux-2.4.19-rc1/drivers/md/dm.h linux/drivers/md/dm.h +--- linux-2.4.19-rc1/drivers/md/dm.h Tue Jun 25 22:26:00 2002 ++++ linux/drivers/md/dm.h Tue Jun 25 22:30:03 2002 +@@ -212,4 +212,14 @@ + int __init dm_interface_init(void); + void dm_interface_exit(void); + ++/* ++ * Targets for linear and striped mappings ++ */ ++ ++int dm_linear_init(void); ++void dm_linear_exit(void); ++ ++int dm_stripe_init(void); ++void dm_stripe_exit(void); ++ + #endif diff --git a/patches/common/linux-2.4.19-rc1-devmapper_4_snapshots.patch b/patches/common/linux-2.4.19-rc1-devmapper_4_snapshots.patch new file mode 100644 index 0000000..b376dd6 --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-devmapper_4_snapshots.patch @@ -0,0 +1,3060 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/Makefile linux/drivers/md/Makefile +--- linux-2.4.19-rc1/drivers/md/Makefile Tue Jun 25 22:28:32 2002 ++++ linux/drivers/md/Makefile Tue Jun 25 23:12:36 2002 +@@ -4,11 +4,12 @@ + + O_TARGET := mddev.o + +-export-objs := md.o xor.o dm-table.o dm-target.o ++export-objs := md.o xor.o dm-table.o dm-target.o kcopyd.o + list-multi := lvm-mod.o + lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o + dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \ +- dm-linear.o dm-stripe.o ++ dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \ ++ kcopyd.o + + # Note: link order is important. All raid personalities + # and xor.o must come before md.o, as they each initialise +diff -ruN linux-2.4.19-rc1/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c +--- linux-2.4.19-rc1/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-exception-store.c Tue Jun 25 22:31:08 2002 +@@ -0,0 +1,727 @@ ++/* ++ * dm-snapshot.c ++ * ++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include "dm-snapshot.h" ++#include "kcopyd.h" ++#include ++#include ++ ++#define SECTOR_SIZE 512 ++#define SECTOR_SHIFT 9 ++ ++/*----------------------------------------------------------------- ++ * Persistent snapshots, by persistent we mean that the snapshot ++ * will survive a reboot. ++ *---------------------------------------------------------------*/ ++ ++/* ++ * We need to store a record of which parts of the origin have ++ * been copied to the snapshot device. The snapshot code ++ * requires that we copy exception chunks to chunk aligned areas ++ * of the COW store. It makes sense therefore, to store the ++ * metadata in chunk size blocks. ++ * ++ * There is no backward or forward compatibility implemented, ++ * snapshots with different disk versions than the kernel will ++ * not be usable. It is expected that "lvcreate" will blank out ++ * the start of a fresh COW device before calling the snapshot ++ * constructor. ++ * ++ * The first chunk of the COW device just contains the header. ++ * After this there is a chunk filled with exception metadata, ++ * followed by as many exception chunks as can fit in the ++ * metadata areas. ++ * ++ * All on disk structures are in little-endian format. The end ++ * of the exceptions info is indicated by an exception with a ++ * new_chunk of 0, which is invalid since it would point to the ++ * header chunk. ++ */ ++ ++/* ++ * Magic for persistent snapshots: "SnAp" - Feeble isn't it. ++ */ ++#define SNAP_MAGIC 0x70416e53 ++ ++/* ++ * The on-disk version of the metadata. ++ */ ++#define SNAPSHOT_DISK_VERSION 1 ++ ++struct disk_header { ++ uint32_t magic; ++ ++ /* ++ * Is this snapshot valid. There is no way of recovering ++ * an invalid snapshot. ++ */ ++ int valid; ++ ++ /* ++ * Simple, incrementing version. no backward ++ * compatibility. ++ */ ++ uint32_t version; ++ ++ /* In sectors */ ++ uint32_t chunk_size; ++}; ++ ++struct disk_exception { ++ uint64_t old_chunk; ++ uint64_t new_chunk; ++}; ++ ++struct commit_callback { ++ void (*callback)(void *, int success); ++ void *context; ++}; ++ ++/* ++ * The top level structure for a persistent exception store. ++ */ ++struct pstore { ++ struct dm_snapshot *snap; /* up pointer to my snapshot */ ++ int version; ++ int valid; ++ uint32_t chunk_size; ++ uint32_t exceptions_per_area; ++ ++ /* ++ * Now that we have an asynchronous kcopyd there is no ++ * need for large chunk sizes, so it wont hurt to have a ++ * whole chunks worth of metadata in memory at once. ++ */ ++ void *area; ++ struct kiobuf *iobuf; ++ ++ /* ++ * Used to keep track of which metadata area the data in ++ * 'chunk' refers to. ++ */ ++ uint32_t current_area; ++ ++ /* ++ * The next free chunk for an exception. ++ */ ++ uint32_t next_free; ++ ++ /* ++ * The index of next free exception in the current ++ * metadata area. ++ */ ++ uint32_t current_committed; ++ ++ atomic_t pending_count; ++ uint32_t callback_count; ++ struct commit_callback *callbacks; ++}; ++ ++/* ++ * For performance reasons we want to defer writing a committed ++ * exceptions metadata to disk so that we can amortise away this ++ * exensive operation. ++ * ++ * For the initial version of this code we will remain with ++ * synchronous io. There are some deadlock issues with async ++ * that I haven't yet worked out. ++ */ ++static int do_io(int rw, struct kcopyd_region *where, struct kiobuf *iobuf) ++{ ++ int i, sectors_per_block, nr_blocks, start; ++ int blocksize = get_hardsect_size(where->dev); ++ int status; ++ ++ sectors_per_block = blocksize / SECTOR_SIZE; ++ ++ nr_blocks = where->count / sectors_per_block; ++ start = where->sector / sectors_per_block; ++ ++ for (i = 0; i < nr_blocks; i++) ++ iobuf->blocks[i] = start++; ++ ++ iobuf->length = where->count << 9; ++ iobuf->locked = 1; ++ ++ status = brw_kiovec(rw, 1, &iobuf, where->dev, iobuf->blocks, ++ blocksize); ++ if (status != (where->count << 9)) ++ return -EIO; ++ ++ return 0; ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION ( 2, 4, 19) ++/* ++ * FIXME: Remove once 2.4.19 has been released. ++ */ ++struct page *vmalloc_to_page(void *vmalloc_addr) ++{ ++ unsigned long addr = (unsigned long) vmalloc_addr; ++ struct page *page = NULL; ++ pmd_t *pmd; ++ pte_t *pte; ++ pgd_t *pgd; ++ ++ pgd = pgd_offset_k(addr); ++ if (!pgd_none(*pgd)) { ++ pmd = pmd_offset(pgd, addr); ++ if (!pmd_none(*pmd)) { ++ pte = pte_offset(pmd, addr); ++ if (pte_present(*pte)) { ++ page = pte_page(*pte); ++ } ++ } ++ } ++ return page; ++} ++#endif ++ ++static int allocate_iobuf(struct pstore *ps) ++{ ++ size_t i, r = -ENOMEM, len, nr_pages; ++ struct page *page; ++ ++ len = ps->chunk_size << SECTOR_SHIFT; ++ ++ /* ++ * Allocate the chunk_size block of memory that will hold ++ * a single metadata area. ++ */ ++ ps->area = vmalloc(len); ++ if (!ps->area) ++ return r; ++ ++ if (alloc_kiovec(1, &ps->iobuf)) ++ goto bad; ++ ++ if (alloc_kiobuf_bhs(ps->iobuf)) ++ goto bad; ++ ++ nr_pages = ps->chunk_size / (PAGE_SIZE / SECTOR_SIZE); ++ r = expand_kiobuf(ps->iobuf, nr_pages); ++ if (r) ++ goto bad; ++ ++ /* ++ * We lock the pages for ps->area into memory since they'll be ++ * doing a lot of io. ++ */ ++ for (i = 0; i < nr_pages; i++) { ++ page = vmalloc_to_page(ps->area + (i * PAGE_SIZE)); ++ LockPage(page); ++ ps->iobuf->maplist[i] = page; ++ ps->iobuf->nr_pages++; ++ } ++ ++ ps->iobuf->nr_pages = nr_pages; ++ ps->iobuf->offset = 0; ++ ++ return 0; ++ ++ bad: ++ if (ps->iobuf) ++ free_kiovec(1, &ps->iobuf); ++ ++ if (ps->area) ++ vfree(ps->area); ++ ps->iobuf = NULL; ++ return r; ++} ++ ++static void free_iobuf(struct pstore *ps) ++{ ++ int i; ++ ++ for (i = 0; i < ps->iobuf->nr_pages; i++) ++ UnlockPage(ps->iobuf->maplist[i]); ++ ps->iobuf->locked = 0; ++ ++ free_kiovec(1, &ps->iobuf); ++ vfree(ps->area); ++} ++ ++/* ++ * Read or write a chunk aligned and sized block of data from a device. ++ */ ++static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) ++{ ++ int r; ++ struct kcopyd_region where; ++ ++ where.dev = ps->snap->cow->dev; ++ where.sector = ps->chunk_size * chunk; ++ where.count = ps->chunk_size; ++ ++ r = do_io(rw, &where, ps->iobuf); ++ if (r) ++ return r; ++ ++ return 0; ++} ++ ++/* ++ * Read or write a metadata area. Remembering to skip the first ++ * chunk which holds the header. ++ */ ++static int area_io(struct pstore *ps, uint32_t area, int rw) ++{ ++ int r; ++ uint32_t chunk; ++ ++ /* convert a metadata area index to a chunk index */ ++ chunk = 1 + ((ps->exceptions_per_area + 1) * area); ++ ++ r = chunk_io(ps, chunk, rw); ++ if (r) ++ return r; ++ ++ ps->current_area = area; ++ return 0; ++} ++ ++static int zero_area(struct pstore *ps, uint32_t area) ++{ ++ memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); ++ return area_io(ps, area, WRITE); ++} ++ ++static int read_header(struct pstore *ps, int *new_snapshot) ++{ ++ int r; ++ struct disk_header *dh; ++ ++ r = chunk_io(ps, 0, READ); ++ if (r) ++ return r; ++ ++ dh = (struct disk_header *) ps->area; ++ ++ if (dh->magic == 0) { ++ *new_snapshot = 1; ++ ++ } else if (dh->magic == SNAP_MAGIC) { ++ *new_snapshot = 0; ++ ps->valid = dh->valid; ++ ps->version = dh->version; ++ ps->chunk_size = dh->chunk_size; ++ ++ } else { ++ DMWARN("Invalid/corrupt snapshot"); ++ r = -ENXIO; ++ } ++ ++ return r; ++} ++ ++static int write_header(struct pstore *ps) ++{ ++ struct disk_header *dh; ++ ++ memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); ++ ++ dh = (struct disk_header *) ps->area; ++ dh->magic = SNAP_MAGIC; ++ dh->valid = ps->valid; ++ dh->version = ps->version; ++ dh->chunk_size = ps->chunk_size; ++ ++ return chunk_io(ps, 0, WRITE); ++} ++ ++/* ++ * Access functions for the disk exceptions, these do the endian conversions. ++ */ ++static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) ++{ ++ if (index >= ps->exceptions_per_area) ++ return NULL; ++ ++ return ((struct disk_exception *) ps->area) + index; ++} ++ ++static int read_exception(struct pstore *ps, ++ uint32_t index, struct disk_exception *result) ++{ ++ struct disk_exception *e; ++ ++ e = get_exception(ps, index); ++ if (!e) ++ return -EINVAL; ++ ++ /* copy it */ ++ result->old_chunk = le64_to_cpu(e->old_chunk); ++ result->new_chunk = le64_to_cpu(e->new_chunk); ++ ++ return 0; ++} ++ ++static int write_exception(struct pstore *ps, ++ uint32_t index, struct disk_exception *de) ++{ ++ struct disk_exception *e; ++ ++ e = get_exception(ps, index); ++ if (!e) ++ return -EINVAL; ++ ++ /* copy it */ ++ e->old_chunk = cpu_to_le64(de->old_chunk); ++ e->new_chunk = cpu_to_le64(de->new_chunk); ++ ++ return 0; ++} ++ ++/* ++ * Registers the exceptions that are present in the current area. ++ * 'full' is filled in to indicate if the area has been ++ * filled. ++ */ ++static int insert_exceptions(struct pstore *ps, int *full) ++{ ++ int i, r; ++ struct disk_exception de; ++ ++ /* presume the area is full */ ++ *full = 1; ++ ++ for (i = 0; i < ps->exceptions_per_area; i++) { ++ r = read_exception(ps, i, &de); ++ ++ if (r) ++ return r; ++ ++ /* ++ * If the new_chunk is pointing at the start of ++ * the COW device, where the first metadata area ++ * is we know that we've hit the end of the ++ * exceptions. Therefore the area is not full. ++ */ ++ if (de.new_chunk == 0LL) { ++ ps->current_committed = i; ++ *full = 0; ++ break; ++ } ++ ++ /* ++ * Keep track of the start of the free chunks. ++ */ ++ if (ps->next_free <= de.new_chunk) ++ ps->next_free = de.new_chunk + 1; ++ ++ /* ++ * Otherwise we add the exception to the snapshot. ++ */ ++ r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); ++ if (r) ++ return r; ++ } ++ ++ return 0; ++} ++ ++static int read_exceptions(struct pstore *ps) ++{ ++ uint32_t area; ++ int r, full = 1; ++ ++ /* ++ * Keeping reading chunks and inserting exceptions until ++ * we find a partially full area. ++ */ ++ for (area = 0; full; area++) { ++ r = area_io(ps, area, READ); ++ if (r) ++ return r; ++ ++ r = insert_exceptions(ps, &full); ++ if (r) ++ return r; ++ ++ area++; ++ } ++ ++ return 0; ++} ++ ++static inline struct pstore *get_info(struct exception_store *store) ++{ ++ return (struct pstore *) store->context; ++} ++ ++static int persistent_percentfull(struct exception_store *store) ++{ ++ struct pstore *ps = get_info(store); ++ return (ps->next_free * store->snap->chunk_size * 100) / ++ get_dev_size(store->snap->cow->dev); ++} ++ ++static void persistent_destroy(struct exception_store *store) ++{ ++ struct pstore *ps = get_info(store); ++ ++ vfree(ps->callbacks); ++ free_iobuf(ps); ++ kfree(ps); ++} ++ ++static int persistent_prepare(struct exception_store *store, ++ struct exception *e) ++{ ++ struct pstore *ps = get_info(store); ++ uint32_t stride; ++ offset_t size = get_dev_size(store->snap->cow->dev); ++ ++ /* Is there enough room ? */ ++ if (size <= (ps->next_free * store->snap->chunk_size)) ++ return -ENOSPC; ++ ++ e->new_chunk = ps->next_free; ++ ++ /* ++ * Move onto the next free pending, making sure to take ++ * into account the location of the metadata chunks. ++ */ ++ stride = (ps->exceptions_per_area + 1); ++ if (!(++ps->next_free % stride)) ++ ps->next_free++; ++ ++ atomic_inc(&ps->pending_count); ++ return 0; ++} ++ ++static void persistent_commit(struct exception_store *store, ++ struct exception *e, ++ void (*callback) (void *, int success), ++ void *callback_context) ++{ ++ int r, i; ++ struct pstore *ps = get_info(store); ++ struct disk_exception de; ++ struct commit_callback *cb; ++ ++ de.old_chunk = e->old_chunk; ++ de.new_chunk = e->new_chunk; ++ write_exception(ps, ps->current_committed++, &de); ++ ++ /* ++ * Add the callback to the back of the array. This code ++ * is the only place where the callback array is ++ * manipulated, and we know that it will never be called ++ * multiple times concurrently. ++ */ ++ cb = ps->callbacks + ps->callback_count++; ++ cb->callback = callback; ++ cb->context = callback_context; ++ ++ /* ++ * If there are no more exceptions in flight, or we have ++ * filled this metadata area we commit the exceptions to ++ * disk. ++ */ ++ if (atomic_dec_and_test(&ps->pending_count) || ++ (ps->current_committed == ps->exceptions_per_area)) { ++ r = area_io(ps, ps->current_area, WRITE); ++ if (r) ++ ps->valid = 0; ++ ++ for (i = 0; i < ps->callback_count; i++) { ++ cb = ps->callbacks + i; ++ cb->callback(cb->context, r == 0 ? 1 : 0); ++ } ++ ++ ps->callback_count = 0; ++ } ++ ++ /* ++ * Have we completely filled the current area ? ++ */ ++ if (ps->current_committed == ps->exceptions_per_area) { ++ ps->current_committed = 0; ++ r = zero_area(ps, ps->current_area + 1); ++ if (r) ++ ps->valid = 0; ++ } ++} ++ ++static void persistent_drop(struct exception_store *store) ++{ ++ struct pstore *ps = get_info(store); ++ ++ ps->valid = 0; ++ if (write_header(ps)) ++ DMWARN("write header failed"); ++} ++ ++int dm_create_persistent(struct exception_store *store, uint32_t chunk_size) ++{ ++ int r, new_snapshot; ++ struct pstore *ps; ++ ++ /* allocate the pstore */ ++ ps = kmalloc(sizeof(*ps), GFP_KERNEL); ++ if (!ps) ++ return -ENOMEM; ++ ++ ps->snap = store->snap; ++ ps->valid = 1; ++ ps->version = SNAPSHOT_DISK_VERSION; ++ ps->chunk_size = chunk_size; ++ ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) / ++ sizeof(struct disk_exception); ++ ps->next_free = 2; /* skipping the header and first area */ ++ ps->current_committed = 0; ++ ++ r = allocate_iobuf(ps); ++ if (r) ++ goto bad; ++ ++ /* ++ * Allocate space for all the callbacks. ++ */ ++ ps->callback_count = 0; ++ atomic_set(&ps->pending_count, 0); ++ ps->callbacks = vcalloc(ps->exceptions_per_area, ++ sizeof(*ps->callbacks)); ++ ++ if (!ps->callbacks) ++ goto bad; ++ ++ /* ++ * Read the snapshot header. ++ */ ++ r = read_header(ps, &new_snapshot); ++ if (r) ++ goto bad; ++ ++ /* ++ * Do we need to setup a new snapshot ? ++ */ ++ if (new_snapshot) { ++ r = write_header(ps); ++ if (r) { ++ DMWARN("write_header failed"); ++ goto bad; ++ } ++ ++ r = zero_area(ps, 0); ++ if (r) { ++ DMWARN("zero_area(0) failed"); ++ goto bad; ++ } ++ ++ } else { ++ /* ++ * Sanity checks. ++ */ ++ if (ps->chunk_size != chunk_size) { ++ DMWARN("chunk size for existing snapshot different " ++ "from that requested"); ++ r = -EINVAL; ++ goto bad; ++ } ++ ++ if (ps->version != SNAPSHOT_DISK_VERSION) { ++ DMWARN("unable to handle snapshot disk version %d", ++ ps->version); ++ r = -EINVAL; ++ goto bad; ++ } ++ ++ /* ++ * Read the metadata. ++ */ ++ r = read_exceptions(ps); ++ if (r) ++ goto bad; ++ } ++ ++ store->destroy = persistent_destroy; ++ store->prepare_exception = persistent_prepare; ++ store->commit_exception = persistent_commit; ++ store->drop_snapshot = persistent_drop; ++ store->percent_full = persistent_percentfull; ++ store->context = ps; ++ ++ return r; ++ ++ bad: ++ if (ps) { ++ if (ps->callbacks) ++ vfree(ps->callbacks); ++ ++ if (ps->iobuf) ++ free_iobuf(ps); ++ ++ kfree(ps); ++ } ++ return r; ++} ++ ++/*----------------------------------------------------------------- ++ * Implementation of the store for non-persistent snapshots. ++ *---------------------------------------------------------------*/ ++struct transient_c { ++ offset_t next_free; ++}; ++ ++void transient_destroy(struct exception_store *store) ++{ ++ kfree(store->context); ++} ++ ++int transient_prepare(struct exception_store *store, struct exception *e) ++{ ++ struct transient_c *tc = (struct transient_c *) store->context; ++ offset_t size = get_dev_size(store->snap->cow->dev); ++ ++ if (size < (tc->next_free + store->snap->chunk_size)) ++ return -1; ++ ++ e->new_chunk = sector_to_chunk(store->snap, tc->next_free); ++ tc->next_free += store->snap->chunk_size; ++ ++ return 0; ++} ++ ++void transient_commit(struct exception_store *store, ++ struct exception *e, ++ void (*callback) (void *, int success), ++ void *callback_context) ++{ ++ /* Just succeed */ ++ callback(callback_context, 1); ++} ++ ++static int transient_percentfull(struct exception_store *store) ++{ ++ struct transient_c *tc = (struct transient_c *) store->context; ++ return (tc->next_free * 100) / get_dev_size(store->snap->cow->dev); ++} ++ ++int dm_create_transient(struct exception_store *store, ++ struct dm_snapshot *s, int blocksize, void **error) ++{ ++ struct transient_c *tc; ++ ++ memset(store, 0, sizeof(*store)); ++ store->destroy = transient_destroy; ++ store->prepare_exception = transient_prepare; ++ store->commit_exception = transient_commit; ++ store->percent_full = transient_percentfull; ++ store->snap = s; ++ ++ tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); ++ if (!tc) ++ return -ENOMEM; ++ ++ tc->next_free = 0; ++ store->context = tc; ++ ++ return 0; ++} +diff -ruN linux-2.4.19-rc1/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c +--- linux-2.4.19-rc1/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-snapshot.c Tue Jun 25 22:31:08 2002 +@@ -0,0 +1,1182 @@ ++/* ++ * dm-snapshot.c ++ * ++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "dm-snapshot.h" ++#include "kcopyd.h" ++ ++/* ++ * FIXME: Remove this before release. ++ */ ++#if 0 ++#define DMDEBUG(x...) DMWARN( ## x) ++#else ++#define DMDEBUG(x...) ++#endif ++ ++/* ++ * The percentage increment we will wake up users at ++ */ ++#define WAKE_UP_PERCENT 5 ++ ++/* ++ * Hard sector size used all over the kernel ++ */ ++#define SECTOR_SIZE 512 ++ ++/* ++ * kcopyd priority of snapshot operations ++ */ ++#define SNAPSHOT_COPY_PRIORITY 2 ++ ++struct pending_exception { ++ struct exception e; ++ ++ /* ++ * Origin buffers waiting for this to complete are held ++ * in a list (using b_reqnext). ++ */ ++ struct buffer_head *origin_bhs; ++ struct buffer_head *snapshot_bhs; ++ ++ /* ++ * Other pending_exceptions that are processing this ++ * chunk. When this list is empty, we know we can ++ * complete the origins. ++ */ ++ struct list_head siblings; ++ ++ /* Pointer back to snapshot context */ ++ struct dm_snapshot *snap; ++ ++ /* ++ * 1 indicates the exception has already been sent to ++ * kcopyd. ++ */ ++ int started; ++}; ++ ++/* ++ * Hash table mapping origin volumes to lists of snapshots and ++ * a lock to protect it ++ */ ++static kmem_cache_t *exception_cache; ++static kmem_cache_t *pending_cache; ++static mempool_t *pending_pool; ++ ++/* ++ * One of these per registered origin, held in the snapshot_origins hash ++ */ ++struct origin { ++ /* The origin device */ ++ kdev_t dev; ++ ++ struct list_head hash_list; ++ ++ /* List of snapshots for this origin */ ++ struct list_head snapshots; ++}; ++ ++/* ++ * Size of the hash table for origin volumes. If we make this ++ * the size of the minors list then it should be nearly perfect ++ */ ++#define ORIGIN_HASH_SIZE 256 ++#define ORIGIN_MASK 0xFF ++static struct list_head *_origins; ++static struct rw_semaphore _origins_lock; ++ ++static int init_origin_hash(void) ++{ ++ int i; ++ ++ _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), ++ GFP_KERNEL); ++ if (!_origins) { ++ DMERR("Device mapper: Snapshot: unable to allocate memory"); ++ return -ENOMEM; ++ } ++ ++ for (i = 0; i < ORIGIN_HASH_SIZE; i++) ++ INIT_LIST_HEAD(_origins + i); ++ init_rwsem(&_origins_lock); ++ ++ return 0; ++} ++ ++static void exit_origin_hash(void) ++{ ++ kfree(_origins); ++} ++ ++static inline unsigned int origin_hash(kdev_t dev) ++{ ++ return MINOR(dev) & ORIGIN_MASK; ++} ++ ++static struct origin *__lookup_origin(kdev_t origin) ++{ ++ struct list_head *slist; ++ struct list_head *ol; ++ struct origin *o; ++ ++ ol = &_origins[origin_hash(origin)]; ++ list_for_each(slist, ol) { ++ o = list_entry(slist, struct origin, hash_list); ++ ++ if (o->dev == origin) ++ return o; ++ } ++ ++ return NULL; ++} ++ ++static void __insert_origin(struct origin *o) ++{ ++ struct list_head *sl = &_origins[origin_hash(o->dev)]; ++ list_add_tail(&o->hash_list, sl); ++} ++ ++/* ++ * Make a note of the snapshot and its origin so we can look it ++ * up when the origin has a write on it. ++ */ ++static int register_snapshot(struct dm_snapshot *snap) ++{ ++ struct origin *o; ++ kdev_t dev = snap->origin->dev; ++ ++ down_write(&_origins_lock); ++ o = __lookup_origin(dev); ++ ++ if (!o) { ++ /* New origin */ ++ o = kmalloc(sizeof(*o), GFP_KERNEL); ++ if (!o) { ++ up_write(&_origins_lock); ++ return -ENOMEM; ++ } ++ ++ /* Initialise the struct */ ++ INIT_LIST_HEAD(&o->snapshots); ++ o->dev = dev; ++ ++ __insert_origin(o); ++ } ++ ++ list_add_tail(&snap->list, &o->snapshots); ++ ++ up_write(&_origins_lock); ++ return 0; ++} ++ ++static void unregister_snapshot(struct dm_snapshot *s) ++{ ++ struct origin *o; ++ ++ down_write(&_origins_lock); ++ o = __lookup_origin(s->origin->dev); ++ ++ list_del(&s->list); ++ if (list_empty(&o->snapshots)) { ++ list_del(&o->hash_list); ++ kfree(o); ++ } ++ ++ up_write(&_origins_lock); ++} ++ ++/* ++ * Implementation of the exception hash tables. ++ */ ++static int init_exception_table(struct exception_table *et, uint32_t size) ++{ ++ int i; ++ ++ et->hash_mask = size - 1; ++ et->table = vcalloc(size, sizeof(struct list_head)); ++ if (!et->table) ++ return -ENOMEM; ++ ++ for (i = 0; i < size; i++) ++ INIT_LIST_HEAD(et->table + i); ++ ++ return 0; ++} ++ ++static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) ++{ ++ struct list_head *slot, *entry, *temp; ++ struct exception *ex; ++ int i, size; ++ ++ size = et->hash_mask + 1; ++ for (i = 0; i < size; i++) { ++ slot = et->table + i; ++ ++ list_for_each_safe(entry, temp, slot) { ++ ex = list_entry(entry, struct exception, hash_list); ++ kmem_cache_free(mem, ex); ++ } ++ } ++ ++ vfree(et->table); ++} ++ ++/* ++ * FIXME: check how this hash fn is performing. ++ */ ++static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk) ++{ ++ return chunk & et->hash_mask; ++} ++ ++static void insert_exception(struct exception_table *eh, struct exception *e) ++{ ++ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; ++ list_add(&e->hash_list, l); ++} ++ ++static inline void remove_exception(struct exception *e) ++{ ++ list_del(&e->hash_list); ++} ++ ++/* ++ * Return the exception data for a sector, or NULL if not ++ * remapped. ++ */ ++static struct exception *lookup_exception(struct exception_table *et, ++ chunk_t chunk) ++{ ++ struct list_head *slot, *el; ++ struct exception *e; ++ ++ slot = &et->table[exception_hash(et, chunk)]; ++ list_for_each(el, slot) { ++ e = list_entry(el, struct exception, hash_list); ++ if (e->old_chunk == chunk) ++ return e; ++ } ++ ++ return NULL; ++} ++ ++static inline struct exception *alloc_exception(void) ++{ ++ struct exception *e; ++ ++ e = kmem_cache_alloc(exception_cache, GFP_NOIO); ++ if (!e) ++ e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); ++ ++ return e; ++} ++ ++static inline void free_exception(struct exception *e) ++{ ++ kmem_cache_free(exception_cache, e); ++} ++ ++static inline struct pending_exception *alloc_pending_exception(void) ++{ ++ return mempool_alloc(pending_pool, GFP_NOIO); ++} ++ ++static inline void free_pending_exception(struct pending_exception *pe) ++{ ++ mempool_free(pe, pending_pool); ++} ++ ++int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) ++{ ++ struct exception *e; ++ ++ e = alloc_exception(); ++ if (!e) ++ return -ENOMEM; ++ ++ e->old_chunk = old; ++ e->new_chunk = new; ++ insert_exception(&s->complete, e); ++ return 0; ++} ++ ++/* ++ * Hard coded magic. ++ */ ++static int calc_max_buckets(void) ++{ ++ unsigned long mem; ++ ++ mem = num_physpages << PAGE_SHIFT; ++ mem /= 50; ++ mem /= sizeof(struct list_head); ++ ++ return mem; ++} ++ ++/* ++ * Rounds a number down to a power of 2. ++ */ ++static inline uint32_t round_down(uint32_t n) ++{ ++ while (n & (n - 1)) ++ n &= (n - 1); ++ return n; ++} ++ ++/* ++ * Allocate room for a suitable hash table. ++ */ ++static int init_hash_tables(struct dm_snapshot *s) ++{ ++ offset_t hash_size, cow_dev_size, origin_dev_size, max_buckets; ++ ++ /* ++ * Calculate based on the size of the original volume or ++ * the COW volume... ++ */ ++ cow_dev_size = get_dev_size(s->cow->dev); ++ origin_dev_size = get_dev_size(s->origin->dev); ++ max_buckets = calc_max_buckets(); ++ ++ hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size; ++ hash_size = min(hash_size, max_buckets); ++ ++ /* Round it down to a power of 2 */ ++ hash_size = round_down(hash_size); ++ if (init_exception_table(&s->complete, hash_size)) ++ return -ENOMEM; ++ ++ /* ++ * Allocate hash table for in-flight exceptions ++ * Make this smaller than the real hash table ++ */ ++ hash_size >>= 3; ++ if (!hash_size) ++ hash_size = 64; ++ ++ if (init_exception_table(&s->pending, hash_size)) { ++ exit_exception_table(&s->complete, exception_cache); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Construct a snapshot mapping:

++ * ++ */ ++static int snapshot_ctr(struct dm_table *t, offset_t b, offset_t l, ++ int argc, char **argv, void **context) ++{ ++ struct dm_snapshot *s; ++ unsigned long chunk_size; ++ unsigned long extent_size = 0L; ++ int r = -EINVAL; ++ char *persistent; ++ char *origin_path; ++ char *cow_path; ++ char *value; ++ int blocksize; ++ ++ if (argc < 4) { ++ *context = "dm-snapshot: Not enough arguments"; ++ r = -EINVAL; ++ goto bad; ++ } ++ ++ origin_path = argv[0]; ++ cow_path = argv[1]; ++ persistent = argv[2]; ++ ++ if ((*persistent & 0x5f) != 'P' && (*persistent & 0x5f) != 'N') { ++ *context = "Persistent flag is not P or N"; ++ r = -EINVAL; ++ goto bad; ++ } ++ ++ chunk_size = simple_strtoul(argv[3], &value, 10); ++ if (chunk_size == 0 || value == NULL) { ++ *context = "Invalid chunk size"; ++ r = -EINVAL; ++ goto bad; ++ } ++ ++ /* Get the extent size for persistent snapshots */ ++ if ((*persistent & 0x5f) == 'P') { ++ if (argc < 5) { ++ *context = "No extent size specified"; ++ r = -EINVAL; ++ goto bad; ++ } ++ ++ extent_size = simple_strtoul(argv[4], &value, 10); ++ if (extent_size == 0 || value == NULL) { ++ *context = "Invalid extent size"; ++ r = -EINVAL; ++ goto bad; ++ } ++ } ++ ++ s = kmalloc(sizeof(*s), GFP_KERNEL); ++ if (s == NULL) { ++ *context = "Cannot allocate snapshot context private structure"; ++ r = -ENOMEM; ++ goto bad; ++ } ++ ++ r = dm_table_get_device(t, origin_path, 0, 0, &s->origin); ++ if (r) { ++ *context = "Cannot get origin device"; ++ goto bad_free; ++ } ++ ++ r = dm_table_get_device(t, cow_path, 0, 0, &s->cow); ++ if (r) { ++ dm_table_put_device(t, s->origin); ++ *context = "Cannot get COW device"; ++ goto bad_free; ++ } ++ ++ /* Validate the extent and chunk sizes against the device block size */ ++ blocksize = get_hardsect_size(s->cow->dev); ++ if (chunk_size % (blocksize / SECTOR_SIZE)) { ++ *context = "Chunk size is not a multiple of device blocksize"; ++ r = -EINVAL; ++ goto bad_putdev; ++ } ++ ++ if (extent_size % (blocksize / SECTOR_SIZE)) { ++ *context = "Extent size is not a multiple of device blocksize"; ++ r = -EINVAL; ++ goto bad_putdev; ++ } ++ ++ /* Check the sizes are small enough to fit in one kiovec */ ++ if (chunk_size > KIO_MAX_SECTORS) { ++ *context = "Chunk size is too big"; ++ r = -EINVAL; ++ goto bad_putdev; ++ } ++ ++ if (extent_size > KIO_MAX_SECTORS) { ++ *context = "Extent size is too big"; ++ r = -EINVAL; ++ goto bad_putdev; ++ } ++ ++ /* Check chunk_size is a power of 2 */ ++ if (chunk_size & (chunk_size - 1)) { ++ *context = "Chunk size is not a power of 2"; ++ r = -EINVAL; ++ goto bad_putdev; ++ } ++ ++ s->chunk_size = chunk_size; ++ s->chunk_mask = chunk_size - 1; ++ s->type = *persistent; ++ for (s->chunk_shift = 0; chunk_size; ++ s->chunk_shift++, chunk_size >>= 1) ++ ; ++ s->chunk_shift--; ++ ++ s->valid = 1; ++ s->last_percent = 0; ++ s->table = t; ++ init_rwsem(&s->lock); ++ ++ /* Allocate hash table for COW data */ ++ if (init_hash_tables(s)) { ++ *context = "Unable to allocate hash table space"; ++ r = -ENOMEM; ++ goto bad_putdev; ++ } ++ ++ /* ++ * Check the persistent flag - done here because we need the iobuf ++ * to check the LV header ++ */ ++ s->store.snap = s; ++ ++ if ((*persistent & 0x5f) == 'P') ++ r = dm_create_persistent(&s->store, s->chunk_size); ++ else ++ r = dm_create_transient(&s->store, s, blocksize, context); ++ ++ if (r) { ++ *context = "Couldn't create exception store"; ++ r = -EINVAL; ++ goto bad_free1; ++ } ++ ++ /* Flush IO to the origin device */ ++#if LVM_VFS_ENHANCEMENT ++ fsync_dev_lockfs(s->origin->dev); ++#else ++ fsync_dev(s->origin->dev); ++#endif ++ ++ /* Add snapshot to the list of snapshots for this origin */ ++ if (register_snapshot(s)) { ++ r = -EINVAL; ++ *context = "Cannot register snapshot origin"; ++ goto bad_free2; ++ } ++#if LVM_VFS_ENHANCEMENT ++ unlockfs(s->origin->dev); ++#endif ++ kcopyd_inc_client_count(); ++ ++ *context = s; ++ return 0; ++ ++ bad_free2: ++ s->store.destroy(&s->store); ++ ++ bad_free1: ++ exit_exception_table(&s->pending, pending_cache); ++ exit_exception_table(&s->complete, exception_cache); ++ ++ bad_putdev: ++ dm_table_put_device(t, s->cow); ++ dm_table_put_device(t, s->origin); ++ ++ bad_free: ++ kfree(s); ++ ++ bad: ++ return r; ++} ++ ++static void snapshot_dtr(struct dm_table *t, void *context) ++{ ++ struct dm_snapshot *s = (struct dm_snapshot *) context; ++ ++ dm_table_event(s->table); ++ ++ unregister_snapshot(s); ++ ++ exit_exception_table(&s->pending, pending_cache); ++ exit_exception_table(&s->complete, exception_cache); ++ ++ /* Deallocate memory used */ ++ s->store.destroy(&s->store); ++ ++ dm_table_put_device(t, s->origin); ++ dm_table_put_device(t, s->cow); ++ kfree(s); ++ ++ kcopyd_dec_client_count(); ++} ++ ++/* ++ * We hold lists of buffer_heads, using the b_reqnext field. ++ */ ++static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh) ++{ ++ bh->b_reqnext = *queue; ++ *queue = bh; ++} ++ ++/* ++ * Flush a list of buffers. ++ */ ++static void flush_buffers(struct buffer_head *bh) ++{ ++ struct buffer_head *n; ++ ++ DMDEBUG("begin flush"); ++ while (bh) { ++ n = bh->b_reqnext; ++ bh->b_reqnext = NULL; ++ DMDEBUG("flushing %p", bh); ++ generic_make_request(WRITE, bh); ++ bh = n; ++ } ++ ++ run_task_queue(&tq_disk); ++} ++ ++/* ++ * Error a list of buffers. ++ */ ++static void error_buffers(struct buffer_head *bh) ++{ ++ struct buffer_head *n; ++ ++ while (bh) { ++ n = bh->b_reqnext; ++ bh->b_reqnext = NULL; ++ buffer_IO_error(bh); ++ bh = n; ++ } ++} ++ ++static void pending_complete(struct pending_exception *pe, int success) ++{ ++ struct exception *e; ++ struct dm_snapshot *s = pe->snap; ++ ++ if (success) { ++ e = alloc_exception(); ++ if (!e) { ++ printk("Unable to allocate exception."); ++ down_write(&s->lock); ++ s->store.drop_snapshot(&s->store); ++ s->valid = 0; ++ up_write(&s->lock); ++ return; ++ } ++ ++ /* ++ * Add a proper exception, and remove the ++ * inflight exception from the list. ++ */ ++ down_write(&s->lock); ++ ++ memcpy(e, &pe->e, sizeof(*e)); ++ insert_exception(&s->complete, e); ++ remove_exception(&pe->e); ++ ++ /* Submit any pending write BHs */ ++ up_write(&s->lock); ++ ++ flush_buffers(pe->snapshot_bhs); ++ DMDEBUG("Exception completed successfully."); ++ ++ /* Notify any interested parties */ ++ if (s->store.percent_full) { ++ int pc = s->store.percent_full(&s->store); ++ ++ if (pc >= s->last_percent + WAKE_UP_PERCENT) { ++ dm_table_event(s->table); ++ s->last_percent = pc - pc % WAKE_UP_PERCENT; ++ } ++ } ++ ++ } else { ++ /* Read/write error - snapshot is unusable */ ++ DMERR("Error reading/writing snapshot"); ++ ++ down_write(&s->lock); ++ s->store.drop_snapshot(&s->store); ++ s->valid = 0; ++ remove_exception(&pe->e); ++ up_write(&s->lock); ++ ++ error_buffers(pe->snapshot_bhs); ++ ++ dm_table_event(s->table); ++ DMDEBUG("Exception failed."); ++ } ++ ++ if (list_empty(&pe->siblings)) ++ flush_buffers(pe->origin_bhs); ++ else ++ list_del(&pe->siblings); ++ ++ free_pending_exception(pe); ++} ++ ++static void commit_callback(void *context, int success) ++{ ++ struct pending_exception *pe = (struct pending_exception *) context; ++ pending_complete(pe, success); ++} ++ ++/* ++ * Called when the copy I/O has finished. kcopyd actually runs ++ * this code so don't block. ++ */ ++static void copy_callback(int err, void *context) ++{ ++ struct pending_exception *pe = (struct pending_exception *) context; ++ struct dm_snapshot *s = pe->snap; ++ ++ if (err) ++ pending_complete(pe, 0); ++ ++ else ++ /* Update the metadata if we are persistent */ ++ s->store.commit_exception(&s->store, &pe->e, commit_callback, ++ pe); ++} ++ ++/* ++ * Dispatches the copy operation to kcopyd. ++ */ ++static inline void start_copy(struct pending_exception *pe) ++{ ++ struct dm_snapshot *s = pe->snap; ++ struct kcopyd_region src, dest; ++ ++ src.dev = s->origin->dev; ++ src.sector = chunk_to_sector(s, pe->e.old_chunk); ++ src.count = s->chunk_size; ++ ++ dest.dev = s->cow->dev; ++ dest.sector = chunk_to_sector(s, pe->e.new_chunk); ++ dest.count = s->chunk_size; ++ ++ if (!pe->started) { ++ /* Hand over to kcopyd */ ++ kcopyd_copy(&src, &dest, copy_callback, pe); ++ pe->started = 1; ++ } ++} ++ ++/* ++ * Looks to see if this snapshot already has a pending exception ++ * for this chunk, otherwise it allocates a new one and inserts ++ * it into the pending table. ++ */ ++static struct pending_exception *find_pending_exception(struct dm_snapshot *s, ++ struct buffer_head *bh) ++{ ++ struct exception *e; ++ struct pending_exception *pe; ++ chunk_t chunk = sector_to_chunk(s, bh->b_rsector); ++ ++ /* ++ * Is there a pending exception for this already ? ++ */ ++ e = lookup_exception(&s->pending, chunk); ++ if (e) { ++ /* cast the exception to a pending exception */ ++ pe = list_entry(e, struct pending_exception, e); ++ ++ } else { ++ /* Create a new pending exception */ ++ pe = alloc_pending_exception(); ++ if (!pe) { ++ DMWARN("Couldn't allocate pending exception."); ++ return NULL; ++ } ++ ++ pe->e.old_chunk = chunk; ++ pe->origin_bhs = pe->snapshot_bhs = NULL; ++ INIT_LIST_HEAD(&pe->siblings); ++ pe->snap = s; ++ pe->started = 0; ++ ++ if (s->store.prepare_exception(&s->store, &pe->e)) { ++ free_pending_exception(pe); ++ s->valid = 0; ++ return NULL; ++ } ++ ++ insert_exception(&s->pending, &pe->e); ++ } ++ ++ return pe; ++} ++ ++static inline void remap_exception(struct dm_snapshot *s, struct exception *e, ++ struct buffer_head *bh) ++{ ++ bh->b_rdev = s->cow->dev; ++ bh->b_rsector = chunk_to_sector(s, e->new_chunk) + ++ (bh->b_rsector & s->chunk_mask); ++} ++ ++static int snapshot_map(struct buffer_head *bh, int rw, void *context) ++{ ++ struct exception *e; ++ struct dm_snapshot *s = (struct dm_snapshot *) context; ++ int r = 1; ++ chunk_t chunk; ++ struct pending_exception *pe; ++ ++ chunk = sector_to_chunk(s, bh->b_rsector); ++ ++ /* Full snapshots are not usable */ ++ if (!s->valid) ++ return -1; ++ ++ /* ++ * Write to snapshot - higher level takes care of RW/RO ++ * flags so we should only get this if we are ++ * writeable. ++ */ ++ if (rw == WRITE) { ++ ++ down_write(&s->lock); ++ ++ /* If the block is already remapped - use that, else remap it */ ++ e = lookup_exception(&s->complete, chunk); ++ if (e) ++ remap_exception(s, e, bh); ++ ++ else { ++ pe = find_pending_exception(s, bh); ++ ++ if (!pe) { ++ s->store.drop_snapshot(&s->store); ++ s->valid = 0; ++ } ++ ++ queue_buffer(&pe->snapshot_bhs, bh); ++ start_copy(pe); ++ r = 0; ++ } ++ ++ up_write(&s->lock); ++ ++ } else { ++ /* ++ * FIXME: this read path scares me because we ++ * always use the origin when we have a pending ++ * exception. However I can't think of a ++ * situation where this is wrong - ejt. ++ */ ++ ++ /* Do reads */ ++ down_read(&s->lock); ++ ++ /* See if it it has been remapped */ ++ e = lookup_exception(&s->complete, chunk); ++ if (e) ++ remap_exception(s, e, bh); ++ else ++ bh->b_rdev = s->origin->dev; ++ ++ up_read(&s->lock); ++ } ++ ++ return r; ++} ++ ++static void list_merge(struct list_head *l1, struct list_head *l2) ++{ ++ struct list_head *l1_n, *l2_p; ++ ++ l1_n = l1->next; ++ l2_p = l2->prev; ++ ++ l1->next = l2; ++ l2->prev = l1; ++ ++ l2_p->next = l1_n; ++ l1_n->prev = l2_p; ++} ++ ++static int __origin_write(struct list_head *snapshots, struct buffer_head *bh) ++{ ++ int r = 1; ++ struct list_head *sl; ++ struct dm_snapshot *snap; ++ struct exception *e; ++ struct pending_exception *pe, *last = NULL; ++ chunk_t chunk; ++ ++ /* Do all the snapshots on this origin */ ++ list_for_each(sl, snapshots) { ++ snap = list_entry(sl, struct dm_snapshot, list); ++ ++ /* Only deal with valid snapshots */ ++ if (!snap->valid) ++ continue; ++ ++ down_write(&snap->lock); ++ ++ /* ++ * Remember, different snapshots can have ++ * different chunk sizes. ++ */ ++ chunk = sector_to_chunk(snap, bh->b_rsector); ++ ++ /* ++ * Check exception table to see if block ++ * is already remapped in this snapshot ++ * and trigger an exception if not. ++ */ ++ e = lookup_exception(&snap->complete, chunk); ++ if (!e) { ++ pe = find_pending_exception(snap, bh); ++ if (!pe) { ++ snap->store.drop_snapshot(&snap->store); ++ snap->valid = 0; ++ ++ } else { ++ if (last) ++ list_merge(&pe->siblings, ++ &last->siblings); ++ ++ last = pe; ++ r = 0; ++ } ++ } ++ ++ up_write(&snap->lock); ++ } ++ ++ /* ++ * Now that we have a complete pe list we can start the copying. ++ */ ++ if (last) { ++ pe = last; ++ do { ++ down_write(&pe->snap->lock); ++ queue_buffer(&pe->origin_bhs, bh); ++ start_copy(pe); ++ up_write(&pe->snap->lock); ++ pe = list_entry(pe->siblings.next, ++ struct pending_exception, siblings); ++ ++ } while (pe != last); ++ } ++ ++ return r; ++} ++ ++static int snapshot_status(status_type_t type, char *result, ++ int maxlen, void *context) ++{ ++ struct dm_snapshot *snap = (struct dm_snapshot *) context; ++ char cow[16]; ++ char org[16]; ++ ++ switch (type) { ++ case STATUSTYPE_INFO: ++ if (!snap->valid) ++ snprintf(result, maxlen, "Invalid"); ++ else { ++ if (snap->store.percent_full) ++ snprintf(result, maxlen, "%d%%", ++ snap->store.percent_full(&snap-> ++ store)); ++ else ++ snprintf(result, maxlen, "Unknown"); ++ } ++ break; ++ ++ case STATUSTYPE_TABLE: ++ /* ++ * kdevname returns a static pointer so we need ++ * to make private copies if the output is to ++ * make sense. ++ */ ++ strncpy(cow, kdevname(snap->cow->dev), sizeof(cow)); ++ strncpy(org, kdevname(snap->origin->dev), sizeof(org)); ++ snprintf(result, maxlen, "%s %s %c %ld", org, cow, ++ snap->type, snap->chunk_size); ++ break; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Called on a write from the origin driver. ++ */ ++int do_origin(struct dm_dev *origin, struct buffer_head *bh) ++{ ++ struct origin *o; ++ int r; ++ ++ down_read(&_origins_lock); ++ o = __lookup_origin(origin->dev); ++ if (!o) ++ BUG(); ++ ++ r = __origin_write(&o->snapshots, bh); ++ up_read(&_origins_lock); ++ ++ return r; ++} ++ ++/* ++ * Origin: maps a linear range of a device, with hooks for snapshotting. ++ */ ++ ++/* ++ * Construct an origin mapping: ++ * The context for an origin is merely a 'struct dm_dev *' ++ * pointing to the real device. ++ */ ++static int origin_ctr(struct dm_table *t, offset_t b, offset_t l, ++ int argc, char **argv, void **context) ++{ ++ int r; ++ struct dm_dev *dev; ++ ++ if (argc != 1) { ++ *context = "dm-origin: incorrect number of arguments"; ++ return -EINVAL; ++ } ++ ++ r = dm_table_get_device(t, argv[0], 0, l, &dev); ++ if (r) { ++ *context = "Cannot get target device"; ++ return r; ++ } ++ ++ *context = dev; ++ ++ return 0; ++} ++ ++static void origin_dtr(struct dm_table *t, void *c) ++{ ++ struct dm_dev *dev = (struct dm_dev *) c; ++ dm_table_put_device(t, dev); ++} ++ ++static int origin_map(struct buffer_head *bh, int rw, void *context) ++{ ++ struct dm_dev *dev = (struct dm_dev *) context; ++ bh->b_rdev = dev->dev; ++ ++ /* Only tell snapshots if this is a write */ ++ return (rw == WRITE) ? do_origin(dev, bh) : 1; ++} ++ ++static int origin_status(status_type_t type, char *result, ++ int maxlen, void *context) ++{ ++ struct dm_dev *dev = (struct dm_dev *) context; ++ ++ switch (type) { ++ case STATUSTYPE_INFO: ++ result[0] = '\0'; ++ break; ++ ++ case STATUSTYPE_TABLE: ++ snprintf(result, maxlen, "%s", kdevname(dev->dev)); ++ break; ++ } ++ ++ return 0; ++} ++ ++static struct target_type origin_target = { ++ name: "snapshot-origin", ++ module: THIS_MODULE, ++ ctr: origin_ctr, ++ dtr: origin_dtr, ++ map: origin_map, ++ status: origin_status, ++ err: NULL ++}; ++ ++static struct target_type snapshot_target = { ++ name: "snapshot", ++ module: THIS_MODULE, ++ ctr: snapshot_ctr, ++ dtr: snapshot_dtr, ++ map: snapshot_map, ++ status: snapshot_status, ++ err: NULL ++}; ++ ++int __init dm_snapshot_init(void) ++{ ++ int r; ++ ++ r = dm_register_target(&snapshot_target); ++ if (r) { ++ DMERR("snapshot target register failed %d", r); ++ return r; ++ } ++ ++ r = dm_register_target(&origin_target); ++ if (r < 0) { ++ DMERR("Device mapper: Origin: register failed %d\n", r); ++ goto bad1; ++ } ++ ++ r = init_origin_hash(); ++ if (r) { ++ DMERR("init_origin_hash failed."); ++ goto bad2; ++ } ++ ++ exception_cache = kmem_cache_create("dm-snapshot-ex", ++ sizeof(struct exception), ++ __alignof__(struct exception), ++ 0, NULL, NULL); ++ if (!exception_cache) { ++ DMERR("Couldn't create exception cache."); ++ r = -ENOMEM; ++ goto bad3; ++ } ++ ++ pending_cache = ++ kmem_cache_create("dm-snapshot-in", ++ sizeof(struct pending_exception), ++ __alignof__(struct pending_exception), ++ 0, NULL, NULL); ++ if (!pending_cache) { ++ DMERR("Couldn't create pending cache."); ++ r = -ENOMEM; ++ goto bad4; ++ } ++ ++ pending_pool = mempool_create(128, mempool_alloc_slab, ++ mempool_free_slab, pending_cache); ++ if (!pending_pool) { ++ DMERR("Couldn't create pending pool."); ++ r = -ENOMEM; ++ goto bad5; ++ } ++ ++ return 0; ++ ++ bad5: ++ kmem_cache_destroy(pending_cache); ++ bad4: ++ kmem_cache_destroy(exception_cache); ++ bad3: ++ exit_origin_hash(); ++ bad2: ++ dm_unregister_target(&origin_target); ++ bad1: ++ dm_unregister_target(&snapshot_target); ++ return r; ++} ++ ++void dm_snapshot_exit(void) ++{ ++ int r; ++ ++ r = dm_unregister_target(&snapshot_target); ++ if (r) ++ DMERR("snapshot unregister failed %d", r); ++ ++ r = dm_unregister_target(&origin_target); ++ if (r) ++ DMERR("origin unregister failed %d", r); ++ ++ exit_origin_hash(); ++ mempool_destroy(pending_pool); ++ kmem_cache_destroy(pending_cache); ++ kmem_cache_destroy(exception_cache); ++} ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -ruN linux-2.4.19-rc1/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h +--- linux-2.4.19-rc1/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-snapshot.h Tue Jun 25 22:39:48 2002 +@@ -0,0 +1,147 @@ ++/* ++ * dm-snapshot.c ++ * ++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#ifndef DM_SNAPSHOT_H ++#define DM_SNAPSHOT_H ++ ++#include "dm.h" ++#include ++ ++struct exception_table { ++ uint32_t hash_mask; ++ struct list_head *table; ++}; ++ ++/* ++ * The snapshot code deals with largish chunks of the disk at a ++ * time. Typically 64k - 256k. ++ */ ++/* FIXME: can we get away with limiting these to a uint32_t ? */ ++typedef offset_t chunk_t; ++ ++/* ++ * An exception is used where an old chunk of data has been ++ * replaced by a new one. ++ */ ++struct exception { ++ struct list_head hash_list; ++ ++ chunk_t old_chunk; ++ chunk_t new_chunk; ++}; ++ ++/* ++ * Abstraction to handle the meta/layout of exception stores (the ++ * COW device). ++ */ ++struct exception_store { ++ ++ /* ++ * Destroys this object when you've finished with it. ++ */ ++ void (*destroy) (struct exception_store *store); ++ ++ /* ++ * Find somewhere to store the next exception. ++ */ ++ int (*prepare_exception) (struct exception_store *store, ++ struct exception *e); ++ ++ /* ++ * Update the metadata with this exception. ++ */ ++ void (*commit_exception) (struct exception_store *store, ++ struct exception *e, ++ void (*callback) (void *, int success), ++ void *callback_context); ++ ++ /* ++ * The snapshot is invalid, note this in the metadata. ++ */ ++ void (*drop_snapshot) (struct exception_store *store); ++ ++ /* ++ * Return the %age full of the snapshot ++ */ ++ int (*percent_full) (struct exception_store *store); ++ ++ struct dm_snapshot *snap; ++ void *context; ++}; ++ ++struct dm_snapshot { ++ struct rw_semaphore lock; ++ struct dm_table *table; ++ ++ struct dm_dev *origin; ++ struct dm_dev *cow; ++ ++ /* List of snapshots per Origin */ ++ struct list_head list; ++ ++ /* Size of data blocks saved - must be a power of 2 */ ++ chunk_t chunk_size; ++ chunk_t chunk_mask; ++ chunk_t chunk_shift; ++ ++ /* You can't use a snapshot if this is 0 (e.g. if full) */ ++ int valid; ++ ++ /* Used for display of table */ ++ char type; ++ ++ /* The last percentage we notified */ ++ int last_percent; ++ ++ struct exception_table pending; ++ struct exception_table complete; ++ ++ /* The on disk metadata handler */ ++ struct exception_store store; ++}; ++ ++/* ++ * Used by the exception stores to load exceptions hen ++ * initialising. ++ */ ++int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); ++ ++/* ++ * Constructor and destructor for the default persistent ++ * store. ++ */ ++int dm_create_persistent(struct exception_store *store, uint32_t chunk_size); ++ ++int dm_create_transient(struct exception_store *store, ++ struct dm_snapshot *s, int blocksize, void **error); ++ ++/* ++ * Return the number of sectors in the device. ++ */ ++static inline offset_t get_dev_size(kdev_t dev) ++{ ++ int *sizes; ++ ++ sizes = blk_size[MAJOR(dev)]; ++ if (sizes) ++ return sizes[MINOR(dev)] << 1; ++ ++ return 0; ++} ++ ++static inline chunk_t sector_to_chunk(struct dm_snapshot *s, offset_t sector) ++{ ++ return (sector & ~s->chunk_mask) >> s->chunk_shift; ++} ++ ++static inline offset_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk) ++{ ++ return chunk << s->chunk_shift; ++} ++ ++#endif +diff -ruN linux-2.4.19-rc1/drivers/md/dm.c linux/drivers/md/dm.c +--- linux-2.4.19-rc1/drivers/md/dm.c Tue Jun 25 22:30:30 2002 ++++ linux/drivers/md/dm.c Tue Jun 25 22:31:54 2002 +@@ -5,6 +5,7 @@ + */ + + #include "dm.h" ++#include "kcopyd.h" + + #include + #include +@@ -268,6 +269,7 @@ + xx(dm_target) + xx(dm_linear) + xx(dm_stripe) ++ xx(dm_snapshot) + xx(dm_interface) + #undef xx + }; +diff -ruN linux-2.4.19-rc1/drivers/md/dm.h linux/drivers/md/dm.h +--- linux-2.4.19-rc1/drivers/md/dm.h Tue Jun 25 22:46:34 2002 ++++ linux/drivers/md/dm.h Tue Jun 25 22:39:45 2002 +@@ -179,6 +179,10 @@ + */ + void dm_table_event(struct dm_table *t); + ++/* Snapshots */ ++int dm_snapshot_init(void); ++void dm_snapshot_exit(void); ++ + #define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) + #define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x) + #define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x) +diff -ruN linux-2.4.19-rc1/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c +--- linux-2.4.19-rc1/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/kcopyd.c Tue Jun 25 22:31:08 2002 +@@ -0,0 +1,832 @@ ++/* ++ * Copyright (C) 2002 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "kcopyd.h" ++ ++/* FIXME: this is only needed for the DMERR macros */ ++#include "dm.h" ++ ++/* ++ * Hard sector size used all over the kernel. ++ */ ++#define SECTOR_SIZE 512 ++#define SECTOR_SHIFT 9 ++ ++static void wake_kcopyd(void); ++ ++/*----------------------------------------------------------------- ++ * We reserve our own pool of preallocated pages that are ++ * only used for kcopyd io. ++ *---------------------------------------------------------------*/ ++ ++/* ++ * FIXME: This should be configurable. ++ */ ++#define NUM_PAGES 512 ++ ++static DECLARE_MUTEX(_pages_lock); ++static int _num_free_pages; ++static struct page *_pages_array[NUM_PAGES]; ++static DECLARE_MUTEX(start_lock); ++ ++static int init_pages(void) ++{ ++ int i; ++ struct page *p; ++ ++ for (i = 0; i < NUM_PAGES; i++) { ++ p = alloc_page(GFP_KERNEL); ++ if (!p) ++ goto bad; ++ ++ LockPage(p); ++ _pages_array[i] = p; ++ } ++ ++ _num_free_pages = NUM_PAGES; ++ return 0; ++ ++ bad: ++ while (i--) ++ __free_page(_pages_array[i]); ++ return -ENOMEM; ++} ++ ++static void exit_pages(void) ++{ ++ int i; ++ struct page *p; ++ ++ for (i = 0; i < NUM_PAGES; i++) { ++ p = _pages_array[i]; ++ UnlockPage(p); ++ __free_page(p); ++ } ++ ++ _num_free_pages = 0; ++} ++ ++static int kcopyd_get_pages(int num, struct page **result) ++{ ++ int i; ++ ++ down(&_pages_lock); ++ if (_num_free_pages < num) { ++ up(&_pages_lock); ++ return -ENOMEM; ++ } ++ ++ for (i = 0; i < num; i++) { ++ _num_free_pages--; ++ result[i] = _pages_array[_num_free_pages]; ++ } ++ up(&_pages_lock); ++ ++ return 0; ++} ++ ++static void kcopyd_free_pages(int num, struct page **result) ++{ ++ int i; ++ ++ down(&_pages_lock); ++ for (i = 0; i < num; i++) ++ _pages_array[_num_free_pages++] = result[i]; ++ up(&_pages_lock); ++} ++ ++/*----------------------------------------------------------------- ++ * We keep our own private pool of buffer_heads. These are just ++ * held in a list on the b_reqnext field. ++ *---------------------------------------------------------------*/ ++ ++/* ++ * Make sure we have enough buffers to always keep the pages ++ * occupied. So we assume the worst case scenario where blocks ++ * are the size of a single sector. ++ */ ++#define NUM_BUFFERS NUM_PAGES * (PAGE_SIZE / SECTOR_SIZE) ++ ++static spinlock_t _buffer_lock = SPIN_LOCK_UNLOCKED; ++static struct buffer_head *_all_buffers; ++static struct buffer_head *_free_buffers; ++ ++static int init_buffers(void) ++{ ++ int i; ++ struct buffer_head *buffers; ++ ++ buffers = vcalloc(NUM_BUFFERS, sizeof(struct buffer_head)); ++ if (!buffers) { ++ DMWARN("Couldn't allocate buffer heads."); ++ return -ENOMEM; ++ } ++ ++ for (i = 0; i < NUM_BUFFERS; i++) { ++ if (i < NUM_BUFFERS - 1) ++ buffers[i].b_reqnext = &buffers[i + 1]; ++ init_waitqueue_head(&buffers[i].b_wait); ++ INIT_LIST_HEAD(&buffers[i].b_inode_buffers); ++ } ++ ++ _all_buffers = _free_buffers = buffers; ++ return 0; ++} ++ ++static void exit_buffers(void) ++{ ++ vfree(_all_buffers); ++} ++ ++static struct buffer_head *alloc_buffer(void) ++{ ++ struct buffer_head *r; ++ int flags; ++ ++ spin_lock_irqsave(&_buffer_lock, flags); ++ ++ if (!_free_buffers) ++ r = NULL; ++ else { ++ r = _free_buffers; ++ _free_buffers = _free_buffers->b_reqnext; ++ r->b_reqnext = NULL; ++ } ++ ++ spin_unlock_irqrestore(&_buffer_lock, flags); ++ ++ return r; ++} ++ ++/* ++ * Only called from interrupt context. ++ */ ++static void free_buffer(struct buffer_head *bh) ++{ ++ int flags; ++ ++ spin_lock_irqsave(&_buffer_lock, flags); ++ bh->b_reqnext = _free_buffers; ++ _free_buffers = bh; ++ spin_unlock_irqrestore(&_buffer_lock, flags); ++} ++ ++/*----------------------------------------------------------------- ++ * kcopyd_jobs need to be allocated by the *clients* of kcopyd, ++ * for this reason we use a mempool to prevent the client from ++ * ever having to do io (which could cause a ++ * deadlock). ++ *---------------------------------------------------------------*/ ++#define MIN_JOBS NUM_PAGES ++ ++static kmem_cache_t *_job_cache = NULL; ++static mempool_t *_job_pool = NULL; ++ ++/* ++ * We maintain three lists of jobs: ++ * ++ * i) jobs waiting for pages ++ * ii) jobs that have pages, and are waiting for the io to be issued. ++ * iii) jobs that have completed. ++ * ++ * All three of these are protected by job_lock. ++ */ ++ ++static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED; ++ ++static LIST_HEAD(_complete_jobs); ++static LIST_HEAD(_io_jobs); ++static LIST_HEAD(_pages_jobs); ++ ++static int init_jobs(void) ++{ ++ INIT_LIST_HEAD(&_complete_jobs); ++ INIT_LIST_HEAD(&_io_jobs); ++ INIT_LIST_HEAD(&_pages_jobs); ++ ++ _job_cache = kmem_cache_create("kcopyd-jobs", sizeof(struct kcopyd_job), ++ __alignof__(struct kcopyd_job), ++ 0, NULL, NULL); ++ if (!_job_cache) ++ return -ENOMEM; ++ ++ _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab, ++ mempool_free_slab, _job_cache); ++ if (!_job_pool) { ++ kmem_cache_destroy(_job_cache); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static void exit_jobs(void) ++{ ++ mempool_destroy(_job_pool); ++ kmem_cache_destroy(_job_cache); ++} ++ ++struct kcopyd_job *kcopyd_alloc_job(void) ++{ ++ struct kcopyd_job *job; ++ ++ job = mempool_alloc(_job_pool, GFP_KERNEL); ++ if (!job) ++ return NULL; ++ ++ memset(job, 0, sizeof(*job)); ++ return job; ++} ++ ++void kcopyd_free_job(struct kcopyd_job *job) ++{ ++ mempool_free(job, _job_pool); ++} ++ ++/* ++ * Functions to push and pop a job onto the head of a given job ++ * list. ++ */ ++static inline struct kcopyd_job *pop(struct list_head *jobs) ++{ ++ struct kcopyd_job *job = NULL; ++ int flags; ++ ++ spin_lock_irqsave(&_job_lock, flags); ++ ++ if (!list_empty(jobs)) { ++ job = list_entry(jobs->next, struct kcopyd_job, list); ++ list_del(&job->list); ++ } ++ spin_unlock_irqrestore(&_job_lock, flags); ++ ++ return job; ++} ++ ++static inline void push(struct list_head *jobs, struct kcopyd_job *job) ++{ ++ int flags; ++ ++ spin_lock_irqsave(&_job_lock, flags); ++ list_add(&job->list, jobs); ++ spin_unlock_irqrestore(&_job_lock, flags); ++} ++ ++/* ++ * Completion function for one of our buffers. ++ */ ++static void end_bh(struct buffer_head *bh, int uptodate) ++{ ++ struct kcopyd_job *job = bh->b_private; ++ ++ mark_buffer_uptodate(bh, uptodate); ++ unlock_buffer(bh); ++ ++ if (!uptodate) ++ job->err = -EIO; ++ ++ /* are we the last ? */ ++ if (atomic_dec_and_test(&job->nr_incomplete)) { ++ push(&_complete_jobs, job); ++ wake_kcopyd(); ++ } ++ ++ free_buffer(bh); ++} ++ ++static void dispatch_bh(struct kcopyd_job *job, ++ struct buffer_head *bh, int block) ++{ ++ int p; ++ ++ /* ++ * Add in the job offset ++ */ ++ bh->b_blocknr = (job->disk.sector >> job->block_shift) + block; ++ ++ p = block >> job->bpp_shift; ++ block &= job->bpp_mask; ++ ++ bh->b_dev = B_FREE; ++ bh->b_size = job->block_size; ++ set_bh_page(bh, job->pages[p], ((block << job->block_shift) + ++ job->offset) << SECTOR_SHIFT); ++ bh->b_this_page = bh; ++ ++ init_buffer(bh, end_bh, job); ++ ++ bh->b_dev = job->disk.dev; ++ bh->b_state = ((1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req)); ++ ++ set_bit(BH_Uptodate, &bh->b_state); ++ if (job->rw == WRITE) ++ clear_bit(BH_Dirty, &bh->b_state); ++ ++ submit_bh(job->rw, bh); ++} ++ ++/* ++ * These three functions process 1 item from the corresponding ++ * job list. ++ * ++ * They return: ++ * < 0: error ++ * 0: success ++ * > 0: can't process yet. ++ */ ++static int run_complete_job(struct kcopyd_job *job) ++{ ++ job->callback(job); ++ return 0; ++} ++ ++/* ++ * Request io on as many buffer heads as we can currently get for ++ * a particular job. ++ */ ++static int run_io_job(struct kcopyd_job *job) ++{ ++ unsigned int block; ++ struct buffer_head *bh; ++ ++ for (block = atomic_read(&job->nr_requested); ++ block < job->nr_blocks; block++) { ++ bh = alloc_buffer(); ++ if (!bh) ++ break; ++ ++ atomic_inc(&job->nr_requested); ++ dispatch_bh(job, bh, block); ++ } ++ ++ return (block == job->nr_blocks) ? 0 : 1; ++} ++ ++static int run_pages_job(struct kcopyd_job *job) ++{ ++ int r; ++ ++ job->nr_pages = (job->disk.count + job->offset) / ++ (PAGE_SIZE / SECTOR_SIZE); ++ r = kcopyd_get_pages(job->nr_pages, job->pages); ++ ++ if (!r) { ++ /* this job is ready for io */ ++ push(&_io_jobs, job); ++ return 0; ++ } ++ ++ if (r == -ENOMEM) ++ /* can complete now */ ++ return 1; ++ ++ return r; ++} ++ ++/* ++ * Run through a list for as long as possible. Returns the count ++ * of successful jobs. ++ */ ++static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) ++{ ++ struct kcopyd_job *job; ++ int r, count = 0; ++ ++ while ((job = pop(jobs))) { ++ ++ r = fn(job); ++ ++ if (r < 0) { ++ /* error this rogue job */ ++ job->err = r; ++ push(&_complete_jobs, job); ++ break; ++ } ++ ++ if (r > 0) { ++ /* ++ * We couldn't service this job ATM, so ++ * push this job back onto the list. ++ */ ++ push(jobs, job); ++ break; ++ } ++ ++ count++; ++ } ++ ++ return count; ++} ++ ++/* ++ * kcopyd does this every time it's woken up. ++ */ ++static void do_work(void) ++{ ++ int count; ++ ++ /* ++ * We loop round until there is no more work to do. ++ */ ++ do { ++ count = process_jobs(&_complete_jobs, run_complete_job); ++ count += process_jobs(&_io_jobs, run_io_job); ++ count += process_jobs(&_pages_jobs, run_pages_job); ++ ++ } while (count); ++ ++ run_task_queue(&tq_disk); ++} ++ ++/*----------------------------------------------------------------- ++ * The daemon ++ *---------------------------------------------------------------*/ ++static atomic_t _kcopyd_must_die; ++static DECLARE_MUTEX(_run_lock); ++static DECLARE_WAIT_QUEUE_HEAD(_job_queue); ++ ++static int kcopyd(void *arg) ++{ ++ DECLARE_WAITQUEUE(wq, current); ++ ++ daemonize(); ++ strcpy(current->comm, "kcopyd"); ++ atomic_set(&_kcopyd_must_die, 0); ++ ++ add_wait_queue(&_job_queue, &wq); ++ ++ down(&_run_lock); ++ up(&start_lock); ++ ++ while (1) { ++ set_current_state(TASK_INTERRUPTIBLE); ++ ++ if (atomic_read(&_kcopyd_must_die)) ++ break; ++ ++ do_work(); ++ schedule(); ++ } ++ ++ set_current_state(TASK_RUNNING); ++ remove_wait_queue(&_job_queue, &wq); ++ ++ up(&_run_lock); ++ ++ return 0; ++} ++ ++static int start_daemon(void) ++{ ++ static pid_t pid = 0; ++ ++ down(&start_lock); ++ ++ pid = kernel_thread(kcopyd, NULL, 0); ++ if (pid <= 0) { ++ DMERR("Failed to start kcopyd thread"); ++ return -EAGAIN; ++ } ++ ++ /* ++ * wait for the daemon to up this mutex. ++ */ ++ down(&start_lock); ++ up(&start_lock); ++ ++ return 0; ++} ++ ++static int stop_daemon(void) ++{ ++ atomic_set(&_kcopyd_must_die, 1); ++ wake_kcopyd(); ++ down(&_run_lock); ++ up(&_run_lock); ++ ++ return 0; ++} ++ ++static void wake_kcopyd(void) ++{ ++ wake_up_interruptible(&_job_queue); ++} ++ ++static int calc_shift(unsigned int n) ++{ ++ int s; ++ ++ for (s = 0; n; s++, n >>= 1) ++ ; ++ ++ return --s; ++} ++ ++static void calc_block_sizes(struct kcopyd_job *job) ++{ ++ job->block_size = get_hardsect_size(job->disk.dev); ++ job->block_shift = calc_shift(job->block_size / SECTOR_SIZE); ++ job->bpp_shift = PAGE_SHIFT - job->block_shift - SECTOR_SHIFT; ++ job->bpp_mask = (1 << job->bpp_shift) - 1; ++ job->nr_blocks = job->disk.count >> job->block_shift; ++ atomic_set(&job->nr_requested, 0); ++ atomic_set(&job->nr_incomplete, job->nr_blocks); ++} ++ ++int kcopyd_io(struct kcopyd_job *job) ++{ ++ calc_block_sizes(job); ++ push(job->pages[0] ? &_io_jobs : &_pages_jobs, job); ++ wake_kcopyd(); ++ return 0; ++} ++ ++/*----------------------------------------------------------------- ++ * The copier is implemented on top of the simpler async io ++ * daemon above. ++ *---------------------------------------------------------------*/ ++struct copy_info { ++ kcopyd_notify_fn notify; ++ void *notify_context; ++ ++ struct kcopyd_region to; ++}; ++ ++#define MIN_INFOS 128 ++static kmem_cache_t *_copy_cache = NULL; ++static mempool_t *_copy_pool = NULL; ++ ++static int init_copier(void) ++{ ++ _copy_cache = kmem_cache_create("kcopyd-info", ++ sizeof(struct copy_info), ++ __alignof__(struct copy_info), ++ 0, NULL, NULL); ++ if (!_copy_cache) ++ return -ENOMEM; ++ ++ _copy_pool = mempool_create(MIN_INFOS, mempool_alloc_slab, ++ mempool_free_slab, _copy_cache); ++ if (!_copy_pool) { ++ kmem_cache_destroy(_copy_cache); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static void exit_copier(void) ++{ ++ if (_copy_pool) ++ mempool_destroy(_copy_pool); ++ ++ if (_copy_cache) ++ kmem_cache_destroy(_copy_cache); ++} ++ ++static inline struct copy_info *alloc_copy_info(void) ++{ ++ return mempool_alloc(_copy_pool, GFP_KERNEL); ++} ++ ++static inline void free_copy_info(struct copy_info *info) ++{ ++ mempool_free(info, _copy_pool); ++} ++ ++void copy_complete(struct kcopyd_job *job) ++{ ++ struct copy_info *info = (struct copy_info *) job->context; ++ ++ if (info->notify) ++ info->notify(job->err, info->notify_context); ++ ++ free_copy_info(info); ++ ++ kcopyd_free_pages(job->nr_pages, job->pages); ++ ++ kcopyd_free_job(job); ++} ++ ++static void page_write_complete(struct kcopyd_job *job) ++{ ++ struct copy_info *info = (struct copy_info *) job->context; ++ int i; ++ ++ if (info->notify) ++ info->notify(job->err, info->notify_context); ++ ++ free_copy_info(info); ++ for (i = 0; i < job->nr_pages; i++) ++ put_page(job->pages[i]); ++ ++ kcopyd_free_job(job); ++} ++ ++/* ++ * These callback functions implement the state machine that copies regions. ++ */ ++void copy_write(struct kcopyd_job *job) ++{ ++ struct copy_info *info = (struct copy_info *) job->context; ++ ++ if (job->err && info->notify) { ++ info->notify(job->err, job->context); ++ kcopyd_free_job(job); ++ free_copy_info(info); ++ return; ++ } ++ ++ job->rw = WRITE; ++ memcpy(&job->disk, &info->to, sizeof(job->disk)); ++ job->callback = copy_complete; ++ job->context = info; ++ ++ /* ++ * Queue the write. ++ */ ++ kcopyd_io(job); ++} ++ ++int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages, ++ struct page **pages, int offset, kcopyd_notify_fn fn, ++ void *context) ++{ ++ struct copy_info *info; ++ struct kcopyd_job *job; ++ int i; ++ ++ /* ++ * Allocate a new copy_info. ++ */ ++ info = alloc_copy_info(); ++ if (!info) ++ return -ENOMEM; ++ ++ job = kcopyd_alloc_job(); ++ if (!job) { ++ free_copy_info(info); ++ return -ENOMEM; ++ } ++ ++ /* ++ * set up for the write. ++ */ ++ info->notify = fn; ++ info->notify_context = context; ++ memcpy(&info->to, to, sizeof(*to)); ++ ++ /* Get the pages */ ++ job->nr_pages = nr_pages; ++ for (i = 0; i < nr_pages; i++) { ++ get_page(pages[i]); ++ job->pages[i] = pages[i]; ++ } ++ ++ job->rw = WRITE; ++ ++ memcpy(&job->disk, &info->to, sizeof(job->disk)); ++ job->offset = offset; ++ calc_block_sizes(job); ++ job->callback = page_write_complete; ++ job->context = info; ++ ++ /* ++ * Trigger job. ++ */ ++ kcopyd_io(job); ++ return 0; ++} ++ ++int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to, ++ kcopyd_notify_fn fn, void *context) ++{ ++ struct copy_info *info; ++ struct kcopyd_job *job; ++ ++ /* ++ * Allocate a new copy_info. ++ */ ++ info = alloc_copy_info(); ++ if (!info) ++ return -ENOMEM; ++ ++ job = kcopyd_alloc_job(); ++ if (!job) { ++ free_copy_info(info); ++ return -ENOMEM; ++ } ++ ++ /* ++ * set up for the read. ++ */ ++ info->notify = fn; ++ info->notify_context = context; ++ memcpy(&info->to, to, sizeof(*to)); ++ ++ job->rw = READ; ++ memcpy(&job->disk, from, sizeof(*from)); ++ ++ job->offset = 0; ++ calc_block_sizes(job); ++ job->callback = copy_write; ++ job->context = info; ++ ++ /* ++ * Trigger job. ++ */ ++ kcopyd_io(job); ++ return 0; ++} ++ ++/*----------------------------------------------------------------- ++ * Unit setup ++ *---------------------------------------------------------------*/ ++static struct { ++ int (*init) (void); ++ void (*exit) (void); ++ ++} _inits[] = { ++#define xx(n) { init_ ## n, exit_ ## n} ++ xx(pages), ++ xx(buffers), ++ xx(jobs), ++ xx(copier) ++#undef xx ++}; ++ ++static int _client_count = 0; ++static DECLARE_MUTEX(_client_count_sem); ++ ++static int kcopyd_init(void) ++{ ++ const int count = sizeof(_inits) / sizeof(*_inits); ++ ++ int r, i; ++ ++ for (i = 0; i < count; i++) { ++ r = _inits[i].init(); ++ if (r) ++ goto bad; ++ } ++ ++ start_daemon(); ++ return 0; ++ ++ bad: ++ while (i--) ++ _inits[i].exit(); ++ ++ return r; ++} ++ ++static void kcopyd_exit(void) ++{ ++ int i = sizeof(_inits) / sizeof(*_inits); ++ ++ if (stop_daemon()) ++ DMWARN("Couldn't stop kcopyd."); ++ ++ while (i--) ++ _inits[i].exit(); ++} ++ ++void kcopyd_inc_client_count(void) ++{ ++ /* ++ * What I need here is an atomic_test_and_inc that returns ++ * the previous value of the atomic... In its absence I lock ++ * an int with a semaphore. :-( ++ */ ++ down(&_client_count_sem); ++ if (_client_count == 0) ++ kcopyd_init(); ++ _client_count++; ++ ++ up(&_client_count_sem); ++} ++ ++void kcopyd_dec_client_count(void) ++{ ++ down(&_client_count_sem); ++ if (--_client_count == 0) ++ kcopyd_exit(); ++ ++ up(&_client_count_sem); ++} +diff -ruN linux-2.4.19-rc1/drivers/md/kcopyd.h linux/drivers/md/kcopyd.h +--- linux-2.4.19-rc1/drivers/md/kcopyd.h Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/kcopyd.h Tue Jun 25 22:39:45 2002 +@@ -0,0 +1,101 @@ ++/* ++ * Copyright (C) 2001 Sistina Software ++ * ++ * This file is released under the GPL. ++ */ ++ ++#ifndef DM_KCOPYD_H ++#define DM_KCOPYD_H ++ ++/* ++ * Needed for the definition of offset_t. ++ */ ++#include ++#include ++ ++struct kcopyd_region { ++ kdev_t dev; ++ offset_t sector; ++ offset_t count; ++}; ++ ++#define MAX_KCOPYD_PAGES 128 ++ ++struct kcopyd_job { ++ struct list_head list; ++ ++ /* ++ * Error state of the job. ++ */ ++ int err; ++ ++ /* ++ * Either READ or WRITE ++ */ ++ int rw; ++ ++ /* ++ * The source or destination for the transfer. ++ */ ++ struct kcopyd_region disk; ++ ++ int nr_pages; ++ struct page *pages[MAX_KCOPYD_PAGES]; ++ ++ /* ++ * Shifts and masks that will be useful when dispatching ++ * each buffer_head. ++ */ ++ offset_t offset; ++ offset_t block_size; ++ offset_t block_shift; ++ offset_t bpp_shift; /* blocks per page */ ++ offset_t bpp_mask; ++ ++ /* ++ * nr_blocks is how many buffer heads will have to be ++ * displatched to service this job, nr_requested is how ++ * many have been dispatched and nr_complete is how many ++ * have come back. ++ */ ++ unsigned int nr_blocks; ++ atomic_t nr_requested; ++ atomic_t nr_incomplete; ++ ++ /* ++ * Set this to ensure you are notified when the job has ++ * completed. 'context' is for callback to use. ++ */ ++ void (*callback)(struct kcopyd_job *job); ++ void *context; ++}; ++ ++/* ++ * Low level async io routines. ++ */ ++struct kcopyd_job *kcopyd_alloc_job(void); ++void kcopyd_free_job(struct kcopyd_job *job); ++ ++int kcopyd_queue_job(struct kcopyd_job *job); ++ ++/* ++ * Submit a copy job to kcopyd. This is built on top of the ++ * previous three fns. ++ */ ++typedef void (*kcopyd_notify_fn)(int err, void *context); ++ ++int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to, ++ kcopyd_notify_fn fn, void *context); ++ ++int kcopyd_write_pages(struct kcopyd_region *to, int nr_pages, ++ struct page **pages, int offset, kcopyd_notify_fn fn, ++ void *context); ++ ++/* ++ * We only want kcopyd to reserve resources if someone is ++ * actually using it. ++ */ ++void kcopyd_inc_client_count(void); ++void kcopyd_dec_client_count(void); ++ ++#endif diff --git a/patches/common/linux-2.4.19-rc1-devmapper_5_mirror.patch b/patches/common/linux-2.4.19-rc1-devmapper_5_mirror.patch new file mode 100644 index 0000000..fd3c7c5 --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-devmapper_5_mirror.patch @@ -0,0 +1,384 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/Makefile linux/drivers/md/Makefile +--- linux-2.4.19-rc1/drivers/md/Makefile Tue Jun 25 22:31:17 2002 ++++ linux/drivers/md/Makefile Tue Jun 25 22:33:49 2002 +@@ -9,7 +9,7 @@ + lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o + dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \ + dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \ +- kcopyd.o ++ kcopyd.o dm-mirror.o + + # Note: link order is important. All raid personalities + # and xor.o must come before md.o, as they each initialise +diff -ruN linux-2.4.19-rc1/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c +--- linux-2.4.19-rc1/drivers/md/dm-mirror.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-mirror.c Tue Jun 25 22:33:59 2002 +@@ -0,0 +1,343 @@ ++/* ++ * Copyright (C) 2002 Sistina Software (UK) Limited. ++ * ++ * This file is released under the GPL. ++ */ ++ ++#include "dm.h" ++#include "kcopyd.h" ++ ++#include ++#include ++#include ++ ++/* kcopyd priority of mirror operations */ ++#define MIRROR_COPY_PRIORITY 5 ++ ++/* ++ * The percentage increment we will wake up users at ++ */ ++#define WAKE_UP_PERCENT 5 ++ ++/* ++ * Mirror: maps a mirror range of a device. ++ */ ++struct mirror_c { ++ struct dm_dev *fromdev; ++ struct dm_dev *todev; ++ ++ unsigned long from_delta; ++ unsigned long to_delta; ++ ++ unsigned long frompos; ++ unsigned long topos; ++ ++ unsigned int chunksize; ++ unsigned long got_to; ++ unsigned long size; ++ struct rw_semaphore lock; ++ struct buffer_head *bhstring; ++ ++ struct dm_table *table; ++ ++ int last_percent; ++ ++ int error; ++}; ++ ++/* Called when a duplicating I/O has finished */ ++static void mirror_callback(int err, void *context) ++{ ++ struct mirror_c *lc = (struct mirror_c *) context; ++ ++ /* Flag error if it failed */ ++ if (err) { ++ DMERR("Mirror copy to %s failed", kdevname(lc->todev->dev)); ++ lc->error = 1; ++ dm_table_event(lc->table); ++ } ++} ++ ++static void mirror_bh(struct mirror_c *mc, struct buffer_head *bh) ++{ ++ struct kcopyd_region dest; ++ ++ dest.dev = mc->todev->dev; ++ dest.sector = bh->b_rsector - mc->from_delta + mc->to_delta; ++ dest.count = bh->b_size / 512; ++ kcopyd_write_pages(&dest, 1, &bh->b_page, ++ ((long) bh->b_data - ++ (long) page_address(bh->b_page)) / 512, ++ mirror_callback, mc); ++} ++ ++/* Called when the copy I/O has finished */ ++static void copy_callback(int err, void *context) ++{ ++ struct mirror_c *lc = (struct mirror_c *) context; ++ struct buffer_head *bh; ++ ++ /* Submit, and mirror any pending BHs */ ++ down_write(&lc->lock); ++ ++ bh = lc->bhstring; ++ lc->bhstring = NULL; ++ up_write(&lc->lock); ++ ++ while (bh) { ++ struct buffer_head *nextbh = bh->b_reqnext; ++ bh->b_reqnext = NULL; ++ mirror_bh(lc, bh); ++ generic_make_request(WRITE, bh); ++ bh = nextbh; ++ } ++ ++ if (err) { ++ DMERR("Mirror block IO failed"); /* More detail to follow... */ ++ lc->error = 1; ++ return; ++ } ++ if (lc->got_to + lc->chunksize < lc->size) { ++ int pc = (lc->got_to - lc->from_delta) * 100 / lc->size; ++ struct kcopyd_region src, dest; ++ ++ /* Wake up any listeners if we've reached a milestone percentage */ ++ if (pc >= lc->last_percent + WAKE_UP_PERCENT) { ++ dm_table_event(lc->table); ++ lc->last_percent = pc - pc % WAKE_UP_PERCENT; ++ } ++ ++ /* Do next chunk */ ++ lc->got_to += lc->chunksize; ++ ++ src.dev = lc->fromdev->dev; ++ src.sector = lc->frompos + lc->got_to; ++ src.count = min((unsigned long) lc->chunksize, ++ lc->size - lc->got_to); ++ ++ dest.dev = lc->todev->dev; ++ dest.sector = lc->topos + lc->got_to; ++ dest.count = src.count; ++ ++ if (kcopyd_copy(&src, &dest, copy_callback, lc)) { ++ lc->error = 1; ++ return; ++ } ++ } else { ++ /* Finished */ ++ dm_table_event(lc->table); ++ lc->got_to = lc->size; ++ } ++} ++ ++/* ++ * Construct a mirror mapping: [] ++ */ ++static int mirror_ctr(struct dm_table *t, offset_t b, offset_t l, ++ int argc, char **argv, void **context) ++{ ++ struct mirror_c *lc; ++ unsigned long offset1, offset2; ++ char *value; ++ int priority = MIRROR_COPY_PRIORITY; ++ int chunksize; ++ struct kcopyd_region src, dest; ++ ++ if (argc <= 4) { ++ *context = "dm-mirror: Not enough arguments"; ++ return -EINVAL; ++ } ++ ++ lc = kmalloc(sizeof(*lc), GFP_KERNEL); ++ if (lc == NULL) { ++ *context = "dm-mirror: Cannot allocate mirror context"; ++ return -ENOMEM; ++ } ++ ++ if (dm_table_get_device(t, argv[0], 0, l, &lc->fromdev)) { ++ *context = "dm-mirror: Device lookup failed"; ++ goto bad; ++ } ++ ++ offset1 = simple_strtoul(argv[1], &value, 10); ++ if (value == NULL) { ++ *context = "Invalid offset for dev1"; ++ dm_table_put_device(t, lc->fromdev); ++ goto bad; ++ } ++ ++ if (dm_table_get_device(t, argv[2], 0, l, &lc->todev)) { ++ *context = "dm-mirror: Device lookup failed"; ++ dm_table_put_device(t, lc->fromdev); ++ goto bad; ++ } ++ ++ offset2 = simple_strtoul(argv[3], &value, 10); ++ if (value == NULL) { ++ *context = "Invalid offset for dev2"; ++ goto bad_put; ++ } ++ ++ chunksize = simple_strtoul(argv[4], &value, 10); ++ if (value == NULL || chunksize == 16) { ++ *context = "Invalid chunk size value"; ++ goto bad_put; ++ } ++ ++ if (argc > 5) { ++ priority = simple_strtoul(argv[5], &value, 10); ++ if (value == NULL) { ++ *context = "Invalid priority value"; ++ goto bad_put; ++ } ++ } ++ ++ lc->from_delta = (int) offset1 - (int) b; ++ lc->to_delta = (int) offset2 - (int) b; ++ lc->frompos = offset1; ++ lc->topos = offset2; ++ lc->error = 0; ++ lc->bhstring = NULL; ++ lc->size = l - offset1; ++ lc->last_percent = 0; ++ lc->got_to = 0; ++ lc->chunksize = chunksize; ++ lc->table = t; ++ init_rwsem(&lc->lock); ++ *context = lc; ++ ++ /* Tell kcopyd to do the biz */ ++ src.dev = lc->fromdev->dev; ++ src.sector = offset1; ++ src.count = min((unsigned long) chunksize, lc->size); ++ ++ dest.dev = lc->todev->dev; ++ dest.sector = offset2; ++ dest.count = src.count; ++ ++ kcopyd_inc_client_count(); ++ ++ if (kcopyd_copy(&src, &dest, copy_callback, lc)) { ++ DMERR("block copy call failed"); ++ dm_table_put_device(t, lc->fromdev); ++ dm_table_put_device(t, lc->todev); ++ kcopyd_dec_client_count(); ++ goto bad; ++ } ++ return 0; ++ ++ bad_put: ++ dm_table_put_device(t, lc->fromdev); ++ dm_table_put_device(t, lc->todev); ++ bad: ++ kfree(lc); ++ return -EINVAL; ++} ++ ++static void mirror_dtr(struct dm_table *t, void *c) ++{ ++ struct mirror_c *lc = (struct mirror_c *) c; ++ ++ dm_table_put_device(t, lc->fromdev); ++ dm_table_put_device(t, lc->todev); ++ kfree(c); ++ kcopyd_dec_client_count(); ++} ++ ++static int mirror_map(struct buffer_head *bh, int rw, void *context) ++{ ++ struct mirror_c *lc = (struct mirror_c *) context; ++ ++ bh->b_rdev = lc->fromdev->dev; ++ bh->b_rsector = bh->b_rsector + lc->from_delta; ++ ++ if (rw == WRITE) { ++ down_write(&lc->lock); ++ ++ /* ++ * If this area is in flight then save it until it's ++ * committed to the mirror disk and then submit it and ++ * its mirror. ++ */ ++ if (bh->b_rsector > lc->got_to && ++ bh->b_rsector <= lc->got_to + lc->chunksize) { ++ bh->b_reqnext = lc->bhstring; ++ lc->bhstring = bh; ++ up_write(&lc->lock); ++ return 0; ++ } ++ ++ /* ++ * If we've already copied this block then duplicate ++ * it to the mirror device ++ */ ++ if (bh->b_rsector < lc->got_to) { ++ mirror_bh(lc, bh); ++ } ++ up_write(&lc->lock); ++ } ++ return 1; ++} ++ ++static int mirror_status(status_type_t sts_type, char *result, int maxlen, ++ void *context) ++{ ++ struct mirror_c *mc = (struct mirror_c *) context; ++ ++ switch (sts_type) { ++ case STATUSTYPE_INFO: ++ if (mc->error) ++ snprintf(result, maxlen, "Error"); ++ else ++ snprintf(result, maxlen, "%ld%%", ++ (mc->got_to - ++ mc->from_delta) * 100 / mc->size); ++ break; ++ ++ case STATUSTYPE_TABLE: ++ snprintf(result, maxlen, "%s %ld %s %ld %d", ++ kdevname(mc->fromdev->dev), mc->frompos, ++ kdevname(mc->todev->dev), mc->topos, mc->chunksize); ++ break; ++ } ++ return 0; ++} ++ ++static struct target_type mirror_target = { ++ name: "mirror", ++ module: THIS_MODULE, ++ ctr: mirror_ctr, ++ dtr: mirror_dtr, ++ map: mirror_map, ++ status: mirror_status, ++}; ++ ++int __init dm_mirror_init(void) ++{ ++ int r; ++ ++ r = dm_register_target(&mirror_target); ++ if (r < 0) { ++ DMERR("mirror: register failed %d", r); ++ } ++ return r; ++} ++ ++void dm_mirror_exit(void) ++{ ++ int r = dm_unregister_target(&mirror_target); ++ ++ if (r < 0) ++ DMERR("mirror: unregister failed %d", r); ++} ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -ruN linux-2.4.19-rc1/drivers/md/dm.c linux/drivers/md/dm.c +--- linux-2.4.19-rc1/drivers/md/dm.c Tue Jun 25 22:31:54 2002 ++++ linux/drivers/md/dm.c Tue Jun 25 22:34:34 2002 +@@ -270,6 +270,7 @@ + xx(dm_linear) + xx(dm_stripe) + xx(dm_snapshot) ++ xx(dm_mirror) + xx(dm_interface) + #undef xx + }; +diff -ruN linux-2.4.19-rc1/drivers/md/dm.h linux/drivers/md/dm.h +--- linux-2.4.19-rc1/drivers/md/dm.h Tue Jun 25 22:33:11 2002 ++++ linux/drivers/md/dm.h Tue Jun 25 22:34:15 2002 +@@ -183,6 +183,10 @@ + int dm_snapshot_init(void); + void dm_snapshot_exit(void); + ++/* dm-mirror.c */ ++int dm_mirror_init(void); ++void dm_mirror_exit(void); ++ + #define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) + #define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x) + #define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x) diff --git a/patches/common/linux-2.4.19-rc1-mempool.patch b/patches/common/linux-2.4.19-rc1-mempool.patch new file mode 100644 index 0000000..f9374fe --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-mempool.patch @@ -0,0 +1,336 @@ +diff -Nru linux-2.4.19-rc1/include/linux/mempool.h linux/include/linux/mempool.h +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ linux/include/linux/mempool.h Tue Apr 23 20:55:52 2002 +@@ -0,0 +1,33 @@ ++/* ++ * memory buffer pool support ++ */ ++#ifndef _LINUX_MEMPOOL_H ++#define _LINUX_MEMPOOL_H ++ ++#include ++#include ++ ++struct mempool_s; ++typedef struct mempool_s mempool_t; ++ ++typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data); ++typedef void (mempool_free_t)(void *element, void *pool_data); ++ ++struct mempool_s { ++ spinlock_t lock; ++ int min_nr, curr_nr; ++ struct list_head elements; ++ ++ void *pool_data; ++ mempool_alloc_t *alloc; ++ mempool_free_t *free; ++ wait_queue_head_t wait; ++}; ++extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, ++ mempool_free_t *free_fn, void *pool_data); ++extern void mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask); ++extern void mempool_destroy(mempool_t *pool); ++extern void * mempool_alloc(mempool_t *pool, int gfp_mask); ++extern void mempool_free(void *element, mempool_t *pool); ++ ++#endif /* _LINUX_MEMPOOL_H */ +diff -Nru linux-2.4.19-rc1/mm/Makefile linux/mm/Makefile +--- linux-2.4.19-rc1/mm/Makefile Mon Mar 25 14:40:15 2002 ++++ linux/mm/Makefile Mon Mar 25 14:40:15 2002 +@@ -9,12 +9,12 @@ + + O_TARGET := mm.o + +-export-objs := shmem.o filemap.o memory.o page_alloc.o ++export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o + + obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ + vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ + page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \ +- shmem.o ++ shmem.o mempool.o + + obj-$(CONFIG_HIGHMEM) += highmem.o + +diff -Nru linux-2.4.19-rc1/mm/mempool.c b/mm/mempool.c +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ linux/mm/mempool.c Tue Apr 23 20:55:52 2002 +@@ -0,0 +1,277 @@ ++/* ++ * linux/mm/mempool.c ++ * ++ * memory buffer pool support. Such pools are mostly used ++ * for guaranteed, deadlock-free memory allocations during ++ * extreme VM load. ++ * ++ * started by Ingo Molnar, Copyright (C) 2001 ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * mempool_create - create a memory pool ++ * @min_nr: the minimum number of elements guaranteed to be ++ * allocated for this pool. ++ * @alloc_fn: user-defined element-allocation function. ++ * @free_fn: user-defined element-freeing function. ++ * @pool_data: optional private data available to the user-defined functions. ++ * ++ * this function creates and allocates a guaranteed size, preallocated ++ * memory pool. The pool can be used from the mempool_alloc and mempool_free ++ * functions. This function might sleep. Both the alloc_fn() and the free_fn() ++ * functions might sleep - as long as the mempool_alloc function is not called ++ * from IRQ contexts. The element allocated by alloc_fn() must be able to ++ * hold a struct list_head. (8 bytes on x86.) ++ */ ++mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, ++ mempool_free_t *free_fn, void *pool_data) ++{ ++ mempool_t *pool; ++ int i; ++ ++ pool = kmalloc(sizeof(*pool), GFP_KERNEL); ++ if (!pool) ++ return NULL; ++ memset(pool, 0, sizeof(*pool)); ++ ++ spin_lock_init(&pool->lock); ++ pool->min_nr = min_nr; ++ pool->pool_data = pool_data; ++ INIT_LIST_HEAD(&pool->elements); ++ init_waitqueue_head(&pool->wait); ++ pool->alloc = alloc_fn; ++ pool->free = free_fn; ++ ++ /* ++ * First pre-allocate the guaranteed number of buffers. ++ */ ++ for (i = 0; i < min_nr; i++) { ++ void *element; ++ struct list_head *tmp; ++ element = pool->alloc(GFP_KERNEL, pool->pool_data); ++ ++ if (unlikely(!element)) { ++ /* ++ * Not enough memory - free the allocated ones ++ * and return: ++ */ ++ list_for_each(tmp, &pool->elements) { ++ element = tmp; ++ pool->free(element, pool->pool_data); ++ } ++ kfree(pool); ++ ++ return NULL; ++ } ++ tmp = element; ++ list_add(tmp, &pool->elements); ++ pool->curr_nr++; ++ } ++ return pool; ++} ++ ++/** ++ * mempool_resize - resize an existing memory pool ++ * @pool: pointer to the memory pool which was allocated via ++ * mempool_create(). ++ * @new_min_nr: the new minimum number of elements guaranteed to be ++ * allocated for this pool. ++ * @gfp_mask: the usual allocation bitmask. ++ * ++ * This function shrinks/grows the pool. In the case of growing, ++ * it cannot be guaranteed that the pool will be grown to the new ++ * size immediately, but new mempool_free() calls will refill it. ++ * ++ * Note, the caller must guarantee that no mempool_destroy is called ++ * while this function is running. mempool_alloc() & mempool_free() ++ * might be called (eg. from IRQ contexts) while this function executes. ++ */ ++void mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask) ++{ ++ int delta; ++ void *element; ++ unsigned long flags; ++ struct list_head *tmp; ++ ++ if (new_min_nr <= 0) ++ BUG(); ++ ++ spin_lock_irqsave(&pool->lock, flags); ++ if (new_min_nr < pool->min_nr) { ++ pool->min_nr = new_min_nr; ++ /* ++ * Free possible excess elements. ++ */ ++ while (pool->curr_nr > pool->min_nr) { ++ tmp = pool->elements.next; ++ if (tmp == &pool->elements) ++ BUG(); ++ list_del(tmp); ++ element = tmp; ++ pool->curr_nr--; ++ spin_unlock_irqrestore(&pool->lock, flags); ++ ++ pool->free(element, pool->pool_data); ++ ++ spin_lock_irqsave(&pool->lock, flags); ++ } ++ spin_unlock_irqrestore(&pool->lock, flags); ++ return; ++ } ++ delta = new_min_nr - pool->min_nr; ++ pool->min_nr = new_min_nr; ++ spin_unlock_irqrestore(&pool->lock, flags); ++ ++ /* ++ * We refill the pool up to the new treshold - but we dont ++ * (cannot) guarantee that the refill succeeds. ++ */ ++ while (delta) { ++ element = pool->alloc(gfp_mask, pool->pool_data); ++ if (!element) ++ break; ++ mempool_free(element, pool); ++ delta--; ++ } ++} ++ ++/** ++ * mempool_destroy - deallocate a memory pool ++ * @pool: pointer to the memory pool which was allocated via ++ * mempool_create(). ++ * ++ * this function only sleeps if the free_fn() function sleeps. The caller ++ * has to guarantee that no mempool_alloc() nor mempool_free() happens in ++ * this pool when calling this function. ++ */ ++void mempool_destroy(mempool_t *pool) ++{ ++ void *element; ++ struct list_head *head, *tmp; ++ ++ if (!pool) ++ return; ++ ++ head = &pool->elements; ++ for (tmp = head->next; tmp != head; ) { ++ element = tmp; ++ tmp = tmp->next; ++ pool->free(element, pool->pool_data); ++ pool->curr_nr--; ++ } ++ if (pool->curr_nr) ++ BUG(); ++ kfree(pool); ++} ++ ++/** ++ * mempool_alloc - allocate an element from a specific memory pool ++ * @pool: pointer to the memory pool which was allocated via ++ * mempool_create(). ++ * @gfp_mask: the usual allocation bitmask. ++ * ++ * this function only sleeps if the alloc_fn function sleeps or ++ * returns NULL. Note that due to preallocation, this function ++ * *never* fails when called from process contexts. (it might ++ * fail if called from an IRQ context.) ++ */ ++void * mempool_alloc(mempool_t *pool, int gfp_mask) ++{ ++ void *element; ++ unsigned long flags; ++ struct list_head *tmp; ++ int curr_nr; ++ DECLARE_WAITQUEUE(wait, current); ++ int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); ++ ++repeat_alloc: ++ element = pool->alloc(gfp_nowait, pool->pool_data); ++ if (likely(element != NULL)) ++ return element; ++ ++ /* ++ * If the pool is less than 50% full then try harder ++ * to allocate an element: ++ */ ++ if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) { ++ element = pool->alloc(gfp_mask, pool->pool_data); ++ if (likely(element != NULL)) ++ return element; ++ } ++ ++ /* ++ * Kick the VM at this point. ++ */ ++ wakeup_bdflush(); ++ ++ spin_lock_irqsave(&pool->lock, flags); ++ if (likely(pool->curr_nr)) { ++ tmp = pool->elements.next; ++ list_del(tmp); ++ element = tmp; ++ pool->curr_nr--; ++ spin_unlock_irqrestore(&pool->lock, flags); ++ return element; ++ } ++ spin_unlock_irqrestore(&pool->lock, flags); ++ ++ /* We must not sleep in the GFP_ATOMIC case */ ++ if (gfp_mask == gfp_nowait) ++ return NULL; ++ ++ run_task_queue(&tq_disk); ++ ++ add_wait_queue_exclusive(&pool->wait, &wait); ++ set_task_state(current, TASK_UNINTERRUPTIBLE); ++ ++ spin_lock_irqsave(&pool->lock, flags); ++ curr_nr = pool->curr_nr; ++ spin_unlock_irqrestore(&pool->lock, flags); ++ ++ if (!curr_nr) ++ schedule(); ++ ++ current->state = TASK_RUNNING; ++ remove_wait_queue(&pool->wait, &wait); ++ ++ goto repeat_alloc; ++} ++ ++/** ++ * mempool_free - return an element to the pool. ++ * @element: pool element pointer. ++ * @pool: pointer to the memory pool which was allocated via ++ * mempool_create(). ++ * ++ * this function only sleeps if the free_fn() function sleeps. ++ */ ++void mempool_free(void *element, mempool_t *pool) ++{ ++ unsigned long flags; ++ ++ if (pool->curr_nr < pool->min_nr) { ++ spin_lock_irqsave(&pool->lock, flags); ++ if (pool->curr_nr < pool->min_nr) { ++ list_add(element, &pool->elements); ++ pool->curr_nr++; ++ spin_unlock_irqrestore(&pool->lock, flags); ++ wake_up(&pool->wait); ++ return; ++ } ++ spin_unlock_irqrestore(&pool->lock, flags); ++ } ++ pool->free(element, pool->pool_data); ++} ++ ++EXPORT_SYMBOL(mempool_create); ++EXPORT_SYMBOL(mempool_resize); ++EXPORT_SYMBOL(mempool_destroy); ++EXPORT_SYMBOL(mempool_alloc); ++EXPORT_SYMBOL(mempool_free); ++ diff --git a/patches/common/linux-2.4.19-rc1-mempool_slab.patch b/patches/common/linux-2.4.19-rc1-mempool_slab.patch new file mode 100644 index 0000000..b7dafe0 --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-mempool_slab.patch @@ -0,0 +1,49 @@ +diff -Nru linux-2.4.19-rc1/include/linux/mempool.h linux/include/linux/mempool.h +--- linux-2.4.19-rc1/include/linux/mempool.h Wed Dec 31 16:00:00 2001 ++++ linux/include/linux/mempool.h Tue Apr 23 20:55:52 2002 +@@ -29,5 +29,13 @@ + extern void mempool_destroy(mempool_t *pool); + extern void * mempool_alloc(mempool_t *pool, int gfp_mask); + extern void mempool_free(void *element, mempool_t *pool); + ++ ++/* ++ * A mempool_alloc_t and mempool_free_t that get the memory from ++ * a slab that is passed in through pool_data. ++ */ ++void *mempool_alloc_slab(int gfp_mask, void *pool_data); ++void mempool_free_slab(void *element, void *pool_data); ++ + #endif /* _LINUX_MEMPOOL_H */ +diff -Nru linux-2.4.19-rc1/mm/mempool.c linux/mm/mempool.c +--- linux-2.4.19-rc1/mm/mempool.c Wed Dec 31 16:00:00 1969 ++++ linux/mm/mempool.c Tue Apr 23 20:55:52 2002 +@@ -268,10 +268,28 @@ + } + pool->free(element, pool->pool_data); + } + ++/* ++ * A commonly used alloc and free fn. ++ */ ++void *mempool_alloc_slab(int gfp_mask, void *pool_data) ++{ ++ kmem_cache_t *mem = (kmem_cache_t *) pool_data; ++ return kmem_cache_alloc(mem, gfp_mask); ++} ++ ++void mempool_free_slab(void *element, void *pool_data) ++{ ++ kmem_cache_t *mem = (kmem_cache_t *) pool_data; ++ kmem_cache_free(mem, element); ++} ++ ++ + EXPORT_SYMBOL(mempool_create); + EXPORT_SYMBOL(mempool_resize); + EXPORT_SYMBOL(mempool_destroy); + EXPORT_SYMBOL(mempool_alloc); + EXPORT_SYMBOL(mempool_free); ++EXPORT_SYMBOL(mempool_alloc_slab); ++EXPORT_SYMBOL(mempool_free_slab); + diff --git a/patches/common/linux-2.4.19-rc1-vcalloc.patch b/patches/common/linux-2.4.19-rc1-vcalloc.patch new file mode 100644 index 0000000..d5daee6 --- /dev/null +++ b/patches/common/linux-2.4.19-rc1-vcalloc.patch @@ -0,0 +1,37 @@ +diff -Nru linux-2.4.19-rc1/mm/vmalloc.c linux/mm/vmalloc.c +--- linux-2.4.19-rc1/mm/vmalloc.c Wed Jun 12 12:04:44 2002 ++++ linux/mm/vmalloc.c Thu Jun 13 13:13:44 2002 +@@ -321,3 +321,22 @@ + read_unlock(&vmlist_lock); + return buf - buf_start; + } ++ ++void *vcalloc(unsigned long nmemb, unsigned long elem_size) ++{ ++ unsigned long size; ++ void *addr; ++ ++ /* ++ * Check that we're not going to overflow. ++ */ ++ if (nmemb > (ULONG_MAX / elem_size)) ++ return NULL; ++ ++ size = nmemb * elem_size; ++ addr = vmalloc(size); ++ if (addr) ++ memset(addr, 0, size); ++ ++ return addr; ++} +diff -Nru linux-2.4.19-rc1/include/linux/vmalloc.h linux/include/linux/vmalloc.h +--- linux-2.4.19-rc1/include/linux/vmalloc.h Wed Jun 12 12:35:58 2002 ++++ linux/include/linux/vmalloc.h Thu Jun 13 13:13:39 2002 +@@ -25,6 +25,7 @@ + extern void vmfree_area_pages(unsigned long address, unsigned long size); + extern int vmalloc_area_pages(unsigned long address, unsigned long size, + int gfp_mask, pgprot_t prot); ++extern void *vcalloc(unsigned long nmemb, unsigned long elem_size); + + /* + * Allocate any pages diff --git a/patches/linux-2.4.19-pre8-devmapper-ioctl.patch b/patches/linux-2.4.19-pre8-devmapper-ioctl.patch deleted file mode 100644 index 9f8e643..0000000 --- a/patches/linux-2.4.19-pre8-devmapper-ioctl.patch +++ /dev/null @@ -1,7107 +0,0 @@ -diff -ruN linux-2.4.19-pre8/drivers/md/Config.in linux/drivers/md/Config.in ---- linux-2.4.19-pre8/drivers/md/Config.in Mon May 20 16:37:23 2002 -+++ linux/drivers/md/Config.in Mon May 20 16:58:11 2002 -@@ -14,5 +14,6 @@ - dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD - - dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD -+dep_tristate ' Device mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD - - endmenu -diff -ruN linux-2.4.19-pre8/drivers/md/Makefile linux/drivers/md/Makefile ---- linux-2.4.19-pre8/drivers/md/Makefile Mon May 20 16:37:23 2002 -+++ linux/drivers/md/Makefile Mon May 20 17:03:40 2002 -@@ -4,9 +4,12 @@ - - O_TARGET := mddev.o - --export-objs := md.o xor.o -+export-objs := md.o xor.o dm-table.o dm-target.o kcopyd.o - list-multi := lvm-mod.o - lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o -+dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \ -+ dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \ -+ kcopyd.o - - # Note: link order is important. All raid personalities - # and xor.o must come before md.o, as they each initialise -@@ -20,8 +23,12 @@ - obj-$(CONFIG_MD_MULTIPATH) += multipath.o - obj-$(CONFIG_BLK_DEV_MD) += md.o - obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o -+obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o - - include $(TOPDIR)/Rules.make - - lvm-mod.o: $(lvm-mod-objs) - $(LD) -r -o $@ $(lvm-mod-objs) -+ -+dm-mod.o: $(dm-mod-objs) -+ $(LD) -r -o $@ $(dm-mod-objs) -diff -ruN linux-2.4.19-pre8/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c ---- linux-2.4.19-pre8/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-exception-store.c Mon May 20 14:29:15 2002 -@@ -0,0 +1,727 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm-snapshot.h" -+#include "kcopyd.h" -+#include -+#include -+ -+#define SECTOR_SIZE 512 -+#define SECTOR_SHIFT 9 -+ -+/*----------------------------------------------------------------- -+ * Persistent snapshots, by persistent we mean that the snapshot -+ * will survive a reboot. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * We need to store a record of which parts of the origin have -+ * been copied to the snapshot device. The snapshot code -+ * requires that we copy exception chunks to chunk aligned areas -+ * of the COW store. It makes sense therefore, to store the -+ * metadata in chunk size blocks. -+ * -+ * There is no backward or forward compatibility implemented, -+ * snapshots with different disk versions than the kernel will -+ * not be usable. It is expected that "lvcreate" will blank out -+ * the start of a fresh COW device before calling the snapshot -+ * constructor. -+ * -+ * The first chunk of the COW device just contains the header. -+ * After this there is a chunk filled with exception metadata, -+ * followed by as many exception chunks as can fit in the -+ * metadata areas. -+ * -+ * All on disk structures are in little-endian format. The end -+ * of the exceptions info is indicated by an exception with a -+ * new_chunk of 0, which is invalid since it would point to the -+ * header chunk. -+ */ -+ -+/* -+ * Magic for persistent snapshots: "SnAp" - Feeble isn't it. -+ */ -+#define SNAP_MAGIC 0x70416e53 -+ -+/* -+ * The on-disk version of the metadata. -+ */ -+#define SNAPSHOT_DISK_VERSION 1 -+ -+struct disk_header { -+ uint32_t magic; -+ -+ /* -+ * Is this snapshot valid. There is no way of recovering -+ * an invalid snapshot. -+ */ -+ int valid; -+ -+ /* -+ * Simple, incrementing version. no backward -+ * compatibility. -+ */ -+ uint32_t version; -+ -+ /* In sectors */ -+ uint32_t chunk_size; -+}; -+ -+struct disk_exception { -+ uint64_t old_chunk; -+ uint64_t new_chunk; -+}; -+ -+struct commit_callback { -+ void (*callback)(void *, int success); -+ void *context; -+}; -+ -+/* -+ * The top level structure for a persistent exception store. -+ */ -+struct pstore { -+ struct dm_snapshot *snap; /* up pointer to my snapshot */ -+ int version; -+ int valid; -+ uint32_t chunk_size; -+ uint32_t exceptions_per_area; -+ -+ /* -+ * Now that we have an asynchronous kcopyd there is no -+ * need for large chunk sizes, so it wont hurt to have a -+ * whole chunks worth of metadata in memory at once. -+ */ -+ void *area; -+ struct kiobuf *iobuf; -+ -+ /* -+ * Used to keep track of which metadata area the data in -+ * 'chunk' refers to. -+ */ -+ uint32_t current_area; -+ -+ /* -+ * The next free chunk for an exception. -+ */ -+ uint32_t next_free; -+ -+ /* -+ * The index of next free exception in the current -+ * metadata area. -+ */ -+ uint32_t current_committed; -+ -+ atomic_t pending_count; -+ uint32_t callback_count; -+ struct commit_callback *callbacks; -+}; -+ -+/* -+ * For performance reasons we want to defer writing a committed -+ * exceptions metadata to disk so that we can amortise away this -+ * exensive operation. -+ * -+ * For the initial version of this code we will remain with -+ * synchronous io. There are some deadlock issues with async -+ * that I haven't yet worked out. -+ */ -+static int do_io(int rw, struct kcopyd_region *where, struct kiobuf *iobuf) -+{ -+ int i, sectors_per_block, nr_blocks, start; -+ int blocksize = get_hardsect_size(where->dev); -+ int status; -+ -+ sectors_per_block = blocksize / SECTOR_SIZE; -+ -+ nr_blocks = where->count / sectors_per_block; -+ start = where->sector / sectors_per_block; -+ -+ for (i = 0; i < nr_blocks; i++) -+ iobuf->blocks[i] = start++; -+ -+ iobuf->length = where->count << 9; -+ iobuf->locked = 1; -+ -+ status = brw_kiovec(rw, 1, &iobuf, where->dev, iobuf->blocks, -+ blocksize); -+ if (status != (where->count << 9)) -+ return -EIO; -+ -+ return 0; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION ( 2, 4, 19) -+/* -+ * FIXME: Remove once 2.4.19 has been released. -+ */ -+struct page *vmalloc_to_page(void *vmalloc_addr) -+{ -+ unsigned long addr = (unsigned long) vmalloc_addr; -+ struct page *page = NULL; -+ pmd_t *pmd; -+ pte_t *pte; -+ pgd_t *pgd; -+ -+ pgd = pgd_offset_k(addr); -+ if (!pgd_none(*pgd)) { -+ pmd = pmd_offset(pgd, addr); -+ if (!pmd_none(*pmd)) { -+ pte = pte_offset(pmd, addr); -+ if (pte_present(*pte)) { -+ page = pte_page(*pte); -+ } -+ } -+ } -+ return page; -+} -+#endif -+ -+static int allocate_iobuf(struct pstore *ps) -+{ -+ size_t i, r = -ENOMEM, len, nr_pages; -+ struct page *page; -+ -+ len = ps->chunk_size << SECTOR_SHIFT; -+ -+ /* -+ * Allocate the chunk_size block of memory that will hold -+ * a single metadata area. -+ */ -+ ps->area = vmalloc(len); -+ if (!ps->area) -+ return r; -+ -+ if (alloc_kiovec(1, &ps->iobuf)) -+ goto bad; -+ -+ if (alloc_kiobuf_bhs(ps->iobuf)) -+ goto bad; -+ -+ nr_pages = ps->chunk_size / (PAGE_SIZE / SECTOR_SIZE); -+ r = expand_kiobuf(ps->iobuf, nr_pages); -+ if (r) -+ goto bad; -+ -+ /* -+ * We lock the pages for ps->area into memory since they'll be -+ * doing a lot of io. -+ */ -+ for (i = 0; i < nr_pages; i++) { -+ page = vmalloc_to_page(ps->area + (i * PAGE_SIZE)); -+ LockPage(page); -+ ps->iobuf->maplist[i] = page; -+ ps->iobuf->nr_pages++; -+ } -+ -+ ps->iobuf->nr_pages = nr_pages; -+ ps->iobuf->offset = 0; -+ -+ return 0; -+ -+ bad: -+ if (ps->iobuf) -+ free_kiovec(1, &ps->iobuf); -+ -+ if (ps->area) -+ vfree(ps->area); -+ ps->iobuf = NULL; -+ return r; -+} -+ -+static void free_iobuf(struct pstore *ps) -+{ -+ int i; -+ -+ for (i = 0; i < ps->iobuf->nr_pages; i++) -+ UnlockPage(ps->iobuf->maplist[i]); -+ ps->iobuf->locked = 0; -+ -+ free_kiovec(1, &ps->iobuf); -+ vfree(ps->area); -+} -+ -+/* -+ * Read or write a chunk aligned and sized block of data from a device. -+ */ -+static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) -+{ -+ int r; -+ struct kcopyd_region where; -+ -+ where.dev = ps->snap->cow->dev; -+ where.sector = ps->chunk_size * chunk; -+ where.count = ps->chunk_size; -+ -+ r = do_io(rw, &where, ps->iobuf); -+ if (r) -+ return r; -+ -+ return 0; -+} -+ -+/* -+ * Read or write a metadata area. Remembering to skip the first -+ * chunk which holds the header. -+ */ -+static int area_io(struct pstore *ps, uint32_t area, int rw) -+{ -+ int r; -+ uint32_t chunk; -+ -+ /* convert a metadata area index to a chunk index */ -+ chunk = 1 + ((ps->exceptions_per_area + 1) * area); -+ -+ r = chunk_io(ps, chunk, rw); -+ if (r) -+ return r; -+ -+ ps->current_area = area; -+ return 0; -+} -+ -+static int zero_area(struct pstore *ps, uint32_t area) -+{ -+ memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); -+ return area_io(ps, area, WRITE); -+} -+ -+static int read_header(struct pstore *ps, int *new_snapshot) -+{ -+ int r; -+ struct disk_header *dh; -+ -+ r = chunk_io(ps, 0, READ); -+ if (r) -+ return r; -+ -+ dh = (struct disk_header *) ps->area; -+ -+ if (dh->magic == 0) { -+ *new_snapshot = 1; -+ -+ } else if (dh->magic == SNAP_MAGIC) { -+ *new_snapshot = 0; -+ ps->valid = dh->valid; -+ ps->version = dh->version; -+ ps->chunk_size = dh->chunk_size; -+ -+ } else { -+ DMWARN("Invalid/corrupt snapshot"); -+ r = -ENXIO; -+ } -+ -+ return r; -+} -+ -+static int write_header(struct pstore *ps) -+{ -+ struct disk_header *dh; -+ -+ memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); -+ -+ dh = (struct disk_header *) ps->area; -+ dh->magic = SNAP_MAGIC; -+ dh->valid = ps->valid; -+ dh->version = ps->version; -+ dh->chunk_size = ps->chunk_size; -+ -+ return chunk_io(ps, 0, WRITE); -+} -+ -+/* -+ * Access functions for the disk exceptions, these do the endian conversions. -+ */ -+static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) -+{ -+ if (index >= ps->exceptions_per_area) -+ return NULL; -+ -+ return ((struct disk_exception *) ps->area) + index; -+} -+ -+static int read_exception(struct pstore *ps, -+ uint32_t index, struct disk_exception *result) -+{ -+ struct disk_exception *e; -+ -+ e = get_exception(ps, index); -+ if (!e) -+ return -EINVAL; -+ -+ /* copy it */ -+ result->old_chunk = le64_to_cpu(e->old_chunk); -+ result->new_chunk = le64_to_cpu(e->new_chunk); -+ -+ return 0; -+} -+ -+static int write_exception(struct pstore *ps, -+ uint32_t index, struct disk_exception *de) -+{ -+ struct disk_exception *e; -+ -+ e = get_exception(ps, index); -+ if (!e) -+ return -EINVAL; -+ -+ /* copy it */ -+ e->old_chunk = cpu_to_le64(de->old_chunk); -+ e->new_chunk = cpu_to_le64(de->new_chunk); -+ -+ return 0; -+} -+ -+/* -+ * Registers the exceptions that are present in the current area. -+ * 'full' is filled in to indicate if the area has been -+ * filled. -+ */ -+static int insert_exceptions(struct pstore *ps, int *full) -+{ -+ int i, r; -+ struct disk_exception de; -+ -+ /* presume the area is full */ -+ *full = 1; -+ -+ for (i = 0; i < ps->exceptions_per_area; i++) { -+ r = read_exception(ps, i, &de); -+ -+ if (r) -+ return r; -+ -+ /* -+ * If the new_chunk is pointing at the start of -+ * the COW device, where the first metadata area -+ * is we know that we've hit the end of the -+ * exceptions. Therefore the area is not full. -+ */ -+ if (de.new_chunk == 0LL) { -+ ps->current_committed = i; -+ *full = 0; -+ break; -+ } -+ -+ /* -+ * Keep track of the start of the free chunks. -+ */ -+ if (ps->next_free <= de.new_chunk) -+ ps->next_free = de.new_chunk + 1; -+ -+ /* -+ * Otherwise we add the exception to the snapshot. -+ */ -+ r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); -+ if (r) -+ return r; -+ } -+ -+ return 0; -+} -+ -+static int read_exceptions(struct pstore *ps) -+{ -+ uint32_t area; -+ int r, full = 1; -+ -+ /* -+ * Keeping reading chunks and inserting exceptions until -+ * we find a partially full area. -+ */ -+ for (area = 0; full; area++) { -+ r = area_io(ps, area, READ); -+ if (r) -+ return r; -+ -+ r = insert_exceptions(ps, &full); -+ if (r) -+ return r; -+ -+ area++; -+ } -+ -+ return 0; -+} -+ -+static inline struct pstore *get_info(struct exception_store *store) -+{ -+ return (struct pstore *) store->context; -+} -+ -+static int persistent_percentfull(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ return (ps->next_free * store->snap->chunk_size * 100) / -+ get_dev_size(store->snap->cow->dev); -+} -+ -+static void persistent_destroy(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ -+ vfree(ps->callbacks); -+ free_iobuf(ps); -+ kfree(ps); -+} -+ -+static int persistent_prepare(struct exception_store *store, -+ struct exception *e) -+{ -+ struct pstore *ps = get_info(store); -+ uint32_t stride; -+ offset_t size = get_dev_size(store->snap->cow->dev); -+ -+ /* Is there enough room ? */ -+ if (size <= (ps->next_free * store->snap->chunk_size)) -+ return -ENOSPC; -+ -+ e->new_chunk = ps->next_free; -+ -+ /* -+ * Move onto the next free pending, making sure to take -+ * into account the location of the metadata chunks. -+ */ -+ stride = (ps->exceptions_per_area + 1); -+ if (!(++ps->next_free % stride)) -+ ps->next_free++; -+ -+ atomic_inc(&ps->pending_count); -+ return 0; -+} -+ -+static void persistent_commit(struct exception_store *store, -+ struct exception *e, -+ void (*callback) (void *, int success), -+ void *callback_context) -+{ -+ int r, i; -+ struct pstore *ps = get_info(store); -+ struct disk_exception de; -+ struct commit_callback *cb; -+ -+ de.old_chunk = e->old_chunk; -+ de.new_chunk = e->new_chunk; -+ write_exception(ps, ps->current_committed++, &de); -+ -+ /* -+ * Add the callback to the back of the array. This code -+ * is the only place where the callback array is -+ * manipulated, and we know that it will never be called -+ * multiple times concurrently. -+ */ -+ cb = ps->callbacks + ps->callback_count++; -+ cb->callback = callback; -+ cb->context = callback_context; -+ -+ /* -+ * If there are no more exceptions in flight, or we have -+ * filled this metadata area we commit the exceptions to -+ * disk. -+ */ -+ if (atomic_dec_and_test(&ps->pending_count) || -+ (ps->current_committed == ps->exceptions_per_area)) { -+ r = area_io(ps, ps->current_area, WRITE); -+ if (r) -+ ps->valid = 0; -+ -+ for (i = 0; i < ps->callback_count; i++) { -+ cb = ps->callbacks + i; -+ cb->callback(cb->context, r == 0 ? 1 : 0); -+ } -+ -+ ps->callback_count = 0; -+ } -+ -+ /* -+ * Have we completely filled the current area ? -+ */ -+ if (ps->current_committed == ps->exceptions_per_area) { -+ ps->current_committed = 0; -+ r = zero_area(ps, ps->current_area + 1); -+ if (r) -+ ps->valid = 0; -+ } -+} -+ -+static void persistent_drop(struct exception_store *store) -+{ -+ struct pstore *ps = get_info(store); -+ -+ ps->valid = 0; -+ if (write_header(ps)) -+ DMWARN("write header failed"); -+} -+ -+int dm_create_persistent(struct exception_store *store, uint32_t chunk_size) -+{ -+ int r, new_snapshot; -+ struct pstore *ps; -+ -+ /* allocate the pstore */ -+ ps = kmalloc(sizeof(*ps), GFP_KERNEL); -+ if (!ps) -+ return -ENOMEM; -+ -+ ps->snap = store->snap; -+ ps->valid = 1; -+ ps->version = SNAPSHOT_DISK_VERSION; -+ ps->chunk_size = chunk_size; -+ ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) / -+ sizeof(struct disk_exception); -+ ps->next_free = 2; /* skipping the header and first area */ -+ ps->current_committed = 0; -+ -+ r = allocate_iobuf(ps); -+ if (r) -+ goto bad; -+ -+ /* -+ * Allocate space for all the callbacks. -+ */ -+ ps->callback_count = 0; -+ atomic_set(&ps->pending_count, 0); -+ ps->callbacks = vmalloc(sizeof(*ps->callbacks) * -+ ps->exceptions_per_area); -+ -+ if (!ps->callbacks) -+ goto bad; -+ -+ /* -+ * Read the snapshot header. -+ */ -+ r = read_header(ps, &new_snapshot); -+ if (r) -+ goto bad; -+ -+ /* -+ * Do we need to setup a new snapshot ? -+ */ -+ if (new_snapshot) { -+ r = write_header(ps); -+ if (r) { -+ DMWARN("write_header failed"); -+ goto bad; -+ } -+ -+ r = zero_area(ps, 0); -+ if (r) { -+ DMWARN("zero_area(0) failed"); -+ goto bad; -+ } -+ -+ } else { -+ /* -+ * Sanity checks. -+ */ -+ if (ps->chunk_size != chunk_size) { -+ DMWARN("chunk size for existing snapshot different " -+ "from that requested"); -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ if (ps->version != SNAPSHOT_DISK_VERSION) { -+ DMWARN("unable to handle snapshot disk version %d", -+ ps->version); -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ /* -+ * Read the metadata. -+ */ -+ r = read_exceptions(ps); -+ if (r) -+ goto bad; -+ } -+ -+ store->destroy = persistent_destroy; -+ store->prepare_exception = persistent_prepare; -+ store->commit_exception = persistent_commit; -+ store->drop_snapshot = persistent_drop; -+ store->percent_full = persistent_percentfull; -+ store->context = ps; -+ -+ return r; -+ -+ bad: -+ if (ps) { -+ if (ps->callbacks) -+ vfree(ps->callbacks); -+ -+ if (ps->iobuf) -+ free_iobuf(ps); -+ -+ kfree(ps); -+ } -+ return r; -+} -+ -+/*----------------------------------------------------------------- -+ * Implementation of the store for non-persistent snapshots. -+ *---------------------------------------------------------------*/ -+struct transient_c { -+ offset_t next_free; -+}; -+ -+void transient_destroy(struct exception_store *store) -+{ -+ kfree(store->context); -+} -+ -+int transient_prepare(struct exception_store *store, struct exception *e) -+{ -+ struct transient_c *tc = (struct transient_c *) store->context; -+ offset_t size = get_dev_size(store->snap->cow->dev); -+ -+ if (size < (tc->next_free + store->snap->chunk_size)) -+ return -1; -+ -+ e->new_chunk = sector_to_chunk(store->snap, tc->next_free); -+ tc->next_free += store->snap->chunk_size; -+ -+ return 0; -+} -+ -+void transient_commit(struct exception_store *store, -+ struct exception *e, -+ void (*callback) (void *, int success), -+ void *callback_context) -+{ -+ /* Just succeed */ -+ callback(callback_context, 1); -+} -+ -+static int transient_percentfull(struct exception_store *store) -+{ -+ struct transient_c *tc = (struct transient_c *) store->context; -+ return (tc->next_free * 100) / get_dev_size(store->snap->cow->dev); -+} -+ -+int dm_create_transient(struct exception_store *store, -+ struct dm_snapshot *s, int blocksize, void **error) -+{ -+ struct transient_c *tc; -+ -+ memset(store, 0, sizeof(*store)); -+ store->destroy = transient_destroy; -+ store->prepare_exception = transient_prepare; -+ store->commit_exception = transient_commit; -+ store->percent_full = transient_percentfull; -+ store->snap = s; -+ -+ tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); -+ if (!tc) -+ return -ENOMEM; -+ -+ tc->next_free = 0; -+ store->context = tc; -+ -+ return 0; -+} -diff -ruN linux-2.4.19-pre8/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c ---- linux-2.4.19-pre8/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-ioctl.c Mon May 20 14:27:13 2002 -@@ -0,0 +1,776 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+#include -+ -+static void free_params(struct dm_ioctl *param) -+{ -+ vfree(param); -+} -+ -+static int version(struct dm_ioctl *user) -+{ -+ return copy_to_user(user, DM_DRIVER_VERSION, sizeof(DM_DRIVER_VERSION)); -+} -+ -+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param) -+{ -+ struct dm_ioctl tmp, *dmi; -+ -+ if (copy_from_user(&tmp, user, sizeof(tmp))) -+ return -EFAULT; -+ -+ if (strcmp(DM_IOCTL_VERSION, tmp.version)) { -+ DMWARN("struct dm_ioctl version incompatible"); -+ return -EINVAL; -+ } -+ -+ if (tmp.data_size < sizeof(tmp)) -+ return -EINVAL; -+ -+ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); -+ if (!dmi) -+ return -ENOMEM; -+ -+ if (copy_from_user(dmi, user, tmp.data_size)) { -+ vfree(dmi); -+ return -EFAULT; -+ } -+ -+ *param = dmi; -+ return 0; -+} -+ -+static int validate_params(uint cmd, struct dm_ioctl *param) -+{ -+ /* Unless creating, either name of uuid but not both */ -+ if (cmd != DM_CREATE_CMD) { -+ if ((!*param->uuid && !*param->name) || -+ (*param->uuid && *param->name)) { -+ DMWARN("one of name or uuid must be supplied"); -+ return -EINVAL; -+ } -+ } -+ -+ /* Ensure strings are terminated */ -+ param->name[DM_NAME_LEN - 1] = '\0'; -+ param->uuid[DM_UUID_LEN - 1] = '\0'; -+ -+ return 0; -+} -+ -+/* -+ * Check a string doesn't overrun the chunk of -+ * memory we copied from userland. -+ */ -+static int valid_str(char *str, void *begin, void *end) -+{ -+ while (((void *) str >= begin) && ((void *) str < end)) -+ if (!*str++) -+ return 0; -+ -+ return -EINVAL; -+} -+ -+static int next_target(struct dm_target_spec *last, unsigned long next, -+ void *begin, void *end, -+ struct dm_target_spec **spec, char **params) -+{ -+ *spec = (struct dm_target_spec *) -+ ((unsigned char *) last + next); -+ *params = (char *) (*spec + 1); -+ -+ if (*spec < (last + 1) || ((void *) *spec > end)) -+ return -EINVAL; -+ -+ return valid_str(*params, begin, end); -+} -+ -+/* -+ * Checks to see if there's a gap in the table. -+ * Returns true iff there is a gap. -+ */ -+static int gap(struct dm_table *table, struct dm_target_spec *spec) -+{ -+ if (!table->num_targets) -+ return (spec->sector_start > 0) ? 1 : 0; -+ -+ if (spec->sector_start != table->highs[table->num_targets - 1] + 1) -+ return 1; -+ -+ return 0; -+} -+ -+static int populate_table(struct dm_table *table, struct dm_ioctl *args) -+{ -+ int i = 0, r, first = 1, argc; -+ struct dm_target_spec *spec; -+ char *params, *argv[MAX_ARGS]; -+ struct target_type *ttype; -+ void *context, *begin, *end; -+ offset_t highs = 0; -+ -+ if (!args->target_count) { -+ DMWARN("populate_table: no targets specified"); -+ return -EINVAL; -+ } -+ -+ begin = (void *) args; -+ end = begin + args->data_size; -+ -+#define PARSE_ERROR(msg) {DMWARN(msg); return -EINVAL;} -+ -+ for (i = 0; i < args->target_count; i++) { -+ -+ r = first ? next_target((struct dm_target_spec *) args, -+ args->data_start, -+ begin, end, &spec, ¶ms) : -+ next_target(spec, spec->next, begin, end, &spec, ¶ms); -+ -+ if (r) -+ PARSE_ERROR("unable to find target"); -+ -+ /* Look up the target type */ -+ ttype = dm_get_target_type(spec->target_type); -+ if (!ttype) -+ PARSE_ERROR("unable to find target type"); -+ -+ if (gap(table, spec)) -+ PARSE_ERROR("gap in target ranges"); -+ -+ /* Split up the parameter list */ -+ if (split_args(MAX_ARGS, &argc, argv, params) < 0) -+ PARSE_ERROR("Too many arguments"); -+ -+ /* Build the target */ -+ if (ttype->ctr(table, spec->sector_start, spec->length, -+ argc, argv, &context)) { -+ DMWARN("%s: target constructor failed", -+ (char *) context); -+ return -EINVAL; -+ } -+ -+ /* Add the target to the table */ -+ highs = spec->sector_start + (spec->length - 1); -+ if (dm_table_add_target(table, highs, ttype, context)) -+ PARSE_ERROR("internal error adding target to table"); -+ -+ first = 0; -+ } -+ -+#undef PARSE_ERROR -+ -+ r = dm_table_complete(table); -+ return r; -+} -+ -+/* -+ * Round up the ptr to the next 'align' boundary. Obviously -+ * 'align' must be a power of 2. -+ */ -+static inline void *align_ptr(void *ptr, unsigned int align) -+{ -+ align--; -+ return (void *) (((unsigned long) (ptr + align)) & ~align); -+} -+ -+/* -+ * Copies a dm_ioctl and an optional additional payload to -+ * userland. -+ */ -+static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param, -+ void *data, unsigned long len) -+{ -+ int r; -+ void *ptr = NULL; -+ -+ strncpy(param->version, DM_IOCTL_VERSION, sizeof(param->version)); -+ -+ if (data) { -+ ptr = align_ptr(user + 1, sizeof(unsigned long)); -+ param->data_start = ptr - (void *) user; -+ } -+ -+ r = copy_to_user(user, param, sizeof(*param)); -+ if (r) -+ return r; -+ -+ if (data) { -+ if (param->data_start + len > param->data_size) -+ return -ENOSPC; -+ r = copy_to_user(ptr, data, len); -+ } -+ -+ return r; -+} -+ -+/* -+ * Fills in a dm_ioctl structure, ready for sending back to -+ * userland. -+ */ -+static void __info(struct mapped_device *md, struct dm_ioctl *param) -+{ -+ param->flags = DM_EXISTS_FLAG; -+ if (md->suspended) -+ param->flags |= DM_SUSPEND_FLAG; -+ if (md->read_only) -+ param->flags |= DM_READONLY_FLAG; -+ -+ strncpy(param->name, md->name, sizeof(param->name)); -+ -+ if (md->uuid) -+ strncpy(param->uuid, md->uuid, sizeof(param->uuid) - 1); -+ else -+ param->uuid[0] = '\0'; -+ -+ param->open_count = md->use_count; -+ param->dev = kdev_t_to_nr(md->dev); -+ param->target_count = md->map->num_targets; -+} -+ -+/* -+ * Always use UUID for lookups if it's present, otherwise use name. -+ */ -+static inline char *lookup_name(struct dm_ioctl *param) -+{ -+ return (*param->uuid) ? param->uuid : param->name; -+} -+ -+static inline int lookup_type(struct dm_ioctl *param) -+{ -+ return (*param->uuid) ? DM_LOOKUP_BY_UUID : DM_LOOKUP_BY_NAME; -+} -+ -+#define ALIGNMENT sizeof(int) -+static void *_align(void *ptr, unsigned int a) -+{ -+ register unsigned long align = --a; -+ -+ return (void *) (((unsigned long) ptr + align) & ~align); -+} -+ -+/* -+ * Build up the status struct for each target -+ */ -+static int __status(struct mapped_device *md, struct dm_ioctl *param, -+ char *outbuf, int *len) -+{ -+ int i; -+ struct dm_target_spec *spec; -+ unsigned long long sector = 0LL; -+ char *outptr; -+ status_type_t type; -+ -+ if (param->flags & DM_STATUS_TABLE_FLAG) -+ type = STATUSTYPE_TABLE; -+ else -+ type = STATUSTYPE_INFO; -+ -+ outptr = outbuf; -+ -+ /* Get all the target info */ -+ for (i = 0; i < md->map->num_targets; i++) { -+ struct target_type *tt = md->map->targets[i].type; -+ offset_t high = md->map->highs[i]; -+ -+ if (outptr - outbuf + -+ sizeof(struct dm_target_spec) > param->data_size) -+ return -ENOMEM; -+ -+ spec = (struct dm_target_spec *) outptr; -+ -+ spec->status = 0; -+ spec->sector_start = sector; -+ spec->length = high - sector + 1; -+ strncpy(spec->target_type, tt->name, sizeof(spec->target_type)); -+ -+ outptr += sizeof(struct dm_target_spec); -+ -+ /* Get the status/table string from the target driver */ -+ if (tt->status) -+ tt->status(type, outptr, -+ outbuf + param->data_size - outptr, -+ md->map->targets[i].private); -+ else -+ outptr[0] = '\0'; -+ -+ outptr += strlen(outptr) + 1; -+ _align(outptr, ALIGNMENT); -+ -+ sector = high + 1; -+ -+ spec->next = outptr - outbuf; -+ } -+ -+ param->target_count = md->map->num_targets; -+ *len = outptr - outbuf; -+ -+ return 0; -+} -+ -+static int __wait(struct mapped_device *md, struct dm_ioctl *param) -+{ -+ int waiting = 0; -+ int i; -+ DECLARE_WAITQUEUE(waitq, current); -+ -+ /* Get all the target info */ -+ for (i = 0; i < md->map->num_targets; i++) { -+ struct target_type *tt = md->map->targets[i].type; -+ -+ set_task_state(current, TASK_INTERRUPTIBLE); -+ -+ /* Add ourself to the target's wait queue */ -+ if (tt->wait && -+ (!tt->wait(md->map->targets[i].private, &waitq, 1))) -+ waiting = 1; -+ } -+ -+ /* If at least one call succeeded then sleep */ -+ if (waiting) { -+ schedule(); -+ -+ for (i = 0; i < md->map->num_targets; i++) { -+ struct target_type *tt = md->map->targets[i].type; -+ -+ /* And remove ourself */ -+ if (tt->wait) -+ tt->wait(md->map->targets[i].private, -+ &waitq, 0); -+ } -+ } -+ -+ set_task_state(current, TASK_RUNNING); -+ -+ return 0; -+} -+ -+/* -+ * Return the status of a device as a text string for each -+ * target. -+ */ -+static int get_status(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ int len = 0; -+ int ret; -+ char *outbuf = NULL; -+ -+ md = dm_get_name_r(lookup_name(param), lookup_type(param)); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ /* We haven't a clue how long the resultant data will be so -+ just allocate as much as userland has allowed us and make sure -+ we don't overun it */ -+ outbuf = kmalloc(param->data_size, GFP_KERNEL); -+ if (!outbuf) -+ goto out; -+ /* -+ * Get the status of all targets -+ */ -+ __status(md, param, outbuf, &len); -+ -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ out: -+ if (md) -+ dm_put_r(md); -+ -+ ret = results_to_user(user, param, outbuf, len); -+ -+ if (outbuf) -+ kfree(outbuf); -+ -+ return ret; -+} -+ -+/* -+ * Wait for a device to report an event -+ */ -+static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ -+ md = dm_get_name_r(lookup_name(param), lookup_type(param)); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ /* -+ * Wait for anotification event -+ */ -+ __wait(md, param); -+ -+ dm_put_r(md); -+ -+ out: -+ return results_to_user(user, param, NULL, 0); -+} -+ -+/* -+ * Copies device info back to user space, used by -+ * the create and info ioctls. -+ */ -+static int info(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ -+ param->flags = 0; -+ -+ md = dm_get_name_r(lookup_name(param), lookup_type(param)); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ __info(md, param); -+ dm_put_r(md); -+ -+ out: -+ return results_to_user(user, param, NULL, 0); -+} -+ -+/* -+ * Retrieves a list of devices used by a particular dm device. -+ */ -+static int dep(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int count, r; -+ struct mapped_device *md; -+ struct list_head *tmp; -+ size_t len = 0; -+ struct dm_target_deps *deps = NULL; -+ -+ md = dm_get_name_r(lookup_name(param), lookup_type(param)); -+ if (!md) -+ goto out; -+ -+ /* -+ * Setup the basic dm_ioctl structure. -+ */ -+ __info(md, param); -+ -+ /* -+ * Count the devices. -+ */ -+ count = 0; -+ list_for_each(tmp, &md->map->devices) -+ count++; -+ -+ /* -+ * Allocate a kernel space version of the dm_target_status -+ * struct. -+ */ -+ len = sizeof(*deps) + (sizeof(*deps->dev) * count); -+ deps = kmalloc(len, GFP_KERNEL); -+ if (!deps) { -+ dm_put_r(md); -+ return -ENOMEM; -+ } -+ -+ /* -+ * Fill in the devices. -+ */ -+ deps->count = count; -+ count = 0; -+ list_for_each(tmp, &md->map->devices) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ deps->dev[count++] = kdev_t_to_nr(dd->dev); -+ } -+ dm_put_r(md); -+ -+ out: -+ r = results_to_user(user, param, deps, len); -+ -+ kfree(deps); -+ return r; -+} -+ -+static int create(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r, ro; -+ struct dm_table *t; -+ int minor; -+ -+ r = dm_table_create(&t); -+ if (r) -+ return r; -+ -+ r = populate_table(t, param); -+ if (r) { -+ dm_table_destroy(t); -+ return r; -+ } -+ -+ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ? -+ MINOR(to_kdev_t(param->dev)) : -1; -+ -+ ro = (param->flags & DM_READONLY_FLAG) ? 1 : 0; -+ -+ r = dm_create(param->name, param->uuid, minor, ro, t); -+ if (r) { -+ dm_table_destroy(t); -+ return r; -+ } -+ -+ r = info(param, user); -+ return r; -+} -+ -+static int remove(struct dm_ioctl *param) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ md = dm_get_name_w(lookup_name(param), lookup_type(param)); -+ if (!md) -+ return -ENXIO; -+ -+ r = dm_destroy(md); -+ dm_put_w(md); -+ if (!r) -+ kfree(md); -+ -+ return r; -+} -+ -+static int suspend(struct dm_ioctl *param) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ md = dm_get_name_w(lookup_name(param), lookup_type(param)); -+ if (!md) -+ return -ENXIO; -+ -+ r = (param->flags & DM_SUSPEND_FLAG) ? dm_suspend(md) : dm_resume(md); -+ dm_put_w(md); -+ -+ return r; -+} -+ -+static int reload(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r; -+ struct mapped_device *md; -+ struct dm_table *t; -+ -+ r = dm_table_create(&t); -+ if (r) -+ return r; -+ -+ r = populate_table(t, param); -+ if (r) { -+ dm_table_destroy(t); -+ return r; -+ } -+ -+ md = dm_get_name_w(lookup_name(param), lookup_type(param)); -+ if (!md) { -+ dm_table_destroy(t); -+ return -ENXIO; -+ } -+ -+ r = dm_swap_table(md, t); -+ if (r) { -+ dm_put_w(md); -+ dm_table_destroy(t); -+ return r; -+ } -+ -+ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0); -+ dm_put_w(md); -+ -+ r = info(param, user); -+ return r; -+} -+ -+static int rename(struct dm_ioctl *param) -+{ -+ char *newname = (char *) param + param->data_start; -+ -+ if (valid_str(newname, (void *) param, -+ (void *) param + param->data_size) || -+ dm_set_name(lookup_name(param), lookup_type(param), newname)) { -+ DMWARN("Invalid new logical volume name supplied."); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int ctl_open(struct inode *inode, struct file *file) -+{ -+ /* only root can open this */ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EACCES; -+ -+ MOD_INC_USE_COUNT; -+ -+ return 0; -+} -+ -+static int ctl_close(struct inode *inode, struct file *file) -+{ -+ MOD_DEC_USE_COUNT; -+ return 0; -+} -+ -+static int ctl_ioctl(struct inode *inode, struct file *file, -+ uint command, ulong u) -+{ -+ int r = 0; -+ struct dm_ioctl *param; -+ struct dm_ioctl *user = (struct dm_ioctl *) u; -+ uint cmd = _IOC_NR(command); -+ -+ /* Process commands without params first - always return version */ -+ switch (cmd) { -+ case DM_REMOVE_ALL_CMD: -+ dm_destroy_all(); -+ case DM_VERSION_CMD: -+ return version(user); -+ default: -+ break; -+ } -+ -+ r = copy_params(user, ¶m); -+ if (r) -+ goto err; -+ -+ r = validate_params(cmd, param); -+ if (r) { -+ free_params(param); -+ goto err; -+ } -+ -+ switch (cmd) { -+ case DM_INFO_CMD: -+ r = info(param, user); -+ break; -+ -+ case DM_SUSPEND_CMD: -+ r = suspend(param); -+ break; -+ -+ case DM_CREATE_CMD: -+ r = create(param, user); -+ break; -+ -+ case DM_RELOAD_CMD: -+ r = reload(param, user); -+ break; -+ -+ case DM_REMOVE_CMD: -+ r = remove(param); -+ break; -+ -+ case DM_RENAME_CMD: -+ r = rename(param); -+ break; -+ -+ case DM_DEPS_CMD: -+ r = dep(param, user); -+ break; -+ -+ case DM_GET_STATUS_CMD: -+ r = get_status(param, user); -+ break; -+ -+ case DM_WAIT_EVENT_CMD: -+ r = wait_device_event(param, user); -+ break; -+ -+ default: -+ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); -+ r = -EINVAL; -+ } -+ -+ free_params(param); -+ return r; -+ -+ err: -+ version(user); -+ return r; -+} -+ -+static struct file_operations _ctl_fops = { -+ open: ctl_open, -+ release:ctl_close, -+ ioctl: ctl_ioctl, -+ owner: THIS_MODULE, -+}; -+ -+static devfs_handle_t _ctl_handle; -+ -+static struct miscdevice _dm_misc = { -+ minor: MISC_DYNAMIC_MINOR, -+ name: DM_NAME, -+ fops: &_ctl_fops -+}; -+ -+/* Create misc character device and link to DM_DIR/control */ -+int __init dm_interface_init(void) -+{ -+ int r; -+ char rname[64]; -+ -+ r = misc_register(&_dm_misc); -+ if (r) { -+ DMERR("misc_register failed for control device"); -+ return r; -+ } -+ -+ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3, -+ sizeof rname - 3); -+ if (r == -ENOSYS) -+ return 0; /* devfs not present */ -+ -+ if (r < 0) { -+ DMERR("devfs_generate_path failed for control device"); -+ goto failed; -+ } -+ -+ strncpy(rname + r, "../", 3); -+ r = devfs_mk_symlink(NULL, DM_DIR "/control", -+ DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL); -+ if (r) { -+ DMERR("devfs_mk_symlink failed for control device"); -+ goto failed; -+ } -+ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle); -+ -+ return 0; -+ -+ failed: -+ misc_deregister(&_dm_misc); -+ return r; -+} -+ -+void dm_interface_exit(void) -+{ -+ if (misc_deregister(&_dm_misc) < 0) -+ DMERR("misc_deregister failed for control device"); -+} -diff -ruN linux-2.4.19-pre8/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c ---- linux-2.4.19-pre8/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-linear.c Mon May 20 14:23:20 2002 -@@ -0,0 +1,126 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+ -+/* -+ * Linear: maps a linear range of a device. -+ */ -+struct linear_c { -+ long delta; /* FIXME: we need a signed offset type */ -+ long start; /* For display only */ -+ struct dm_dev *dev; -+}; -+ -+/* -+ * Construct a linear mapping: -+ */ -+static int linear_ctr(struct dm_table *t, offset_t b, offset_t l, -+ int argc, char **argv, void **context) -+{ -+ struct linear_c *lc; -+ unsigned long start; /* FIXME: unsigned long long */ -+ char *end; -+ -+ if (argc != 2) { -+ *context = "dm-linear: Not enough arguments"; -+ return -EINVAL; -+ } -+ -+ lc = kmalloc(sizeof(*lc), GFP_KERNEL); -+ if (lc == NULL) { -+ *context = "dm-linear: Cannot allocate linear context"; -+ return -ENOMEM; -+ } -+ -+ start = simple_strtoul(argv[1], &end, 10); -+ if (*end) { -+ *context = "dm-linear: Invalid device sector"; -+ goto bad; -+ } -+ -+ if (dm_table_get_device(t, argv[0], start, l, &lc->dev)) { -+ *context = "dm-linear: Device lookup failed"; -+ goto bad; -+ } -+ -+ lc->delta = (int) start - (int) b; -+ lc->start = start; -+ *context = lc; -+ return 0; -+ -+ bad: -+ kfree(lc); -+ return -EINVAL; -+} -+ -+static void linear_dtr(struct dm_table *t, void *c) -+{ -+ struct linear_c *lc = (struct linear_c *) c; -+ -+ dm_table_put_device(t, lc->dev); -+ kfree(c); -+} -+ -+static int linear_map(struct buffer_head *bh, int rw, void *context) -+{ -+ struct linear_c *lc = (struct linear_c *) context; -+ -+ bh->b_rdev = lc->dev->dev; -+ bh->b_rsector = bh->b_rsector + lc->delta; -+ -+ return 1; -+} -+ -+static int linear_status(status_type_t type, char *result, int maxlen, -+ void *context) -+{ -+ struct linear_c *lc = (struct linear_c *) context; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ snprintf(result, maxlen, "%s %ld", kdevname(lc->dev->dev), -+ lc->start); -+ break; -+ } -+ return 0; -+} -+ -+static struct target_type linear_target = { -+ name: "linear", -+ module: THIS_MODULE, -+ ctr: linear_ctr, -+ dtr: linear_dtr, -+ map: linear_map, -+ status: linear_status, -+ wait: NULL, /* No wait function */ -+}; -+ -+int __init dm_linear_init(void) -+{ -+ int r = dm_register_target(&linear_target); -+ -+ if (r < 0) -+ DMERR("linear: register failed %d", r); -+ -+ return r; -+} -+ -+void dm_linear_exit(void) -+{ -+ int r = dm_unregister_target(&linear_target); -+ -+ if (r < 0) -+ DMERR("linear: unregister failed %d", r); -+} -diff -ruN linux-2.4.19-pre8/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c ---- linux-2.4.19-pre8/drivers/md/dm-mirror.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-mirror.c Fri May 10 15:40:22 2002 -@@ -0,0 +1,344 @@ -+/* -+ * Copyright (C) 2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+#include "kcopyd.h" -+ -+#include -+#include -+#include -+ -+/* kcopyd priority of mirror operations */ -+#define MIRROR_COPY_PRIORITY 5 -+ -+static kmem_cache_t *bh_cachep; -+ -+/* -+ * Mirror: maps a mirror range of a device. -+ */ -+struct mirror_c { -+ struct dm_dev *fromdev; -+ struct dm_dev *todev; -+ -+ unsigned long from_delta; -+ unsigned long to_delta; -+ -+ unsigned long frompos; -+ unsigned long topos; -+ -+ unsigned long got_to; -+ unsigned long size; /* for %age calculation */ -+ struct rw_semaphore lock; -+ struct buffer_head *bhstring; -+ wait_queue_head_t waitq; -+ int error; -+}; -+ -+/* Called when a duplicating I/O has finished */ -+static void mirror_end_io(struct buffer_head *bh, int uptodate) -+{ -+ struct mirror_c *lc = (struct mirror_c *) bh->b_private; -+ -+ /* Flag error if it failed */ -+ if (!uptodate) { -+ DMERR("Mirror copy to %s failed", kdevname(lc->todev->dev)); -+ lc->error = 1; -+ wake_up_interruptible(&lc->waitq); -+ } -+ kmem_cache_free(bh_cachep, bh); -+ wake_up_interruptible(&lc->waitq); -+} -+ -+static void mirror_bh(struct mirror_c *mc, struct buffer_head *bh) -+{ -+ struct buffer_head *dbh = kmem_cache_alloc(bh_cachep, GFP_NOIO); -+ if (dbh) { -+ *dbh = *bh; -+ dbh->b_rdev = mc->todev->dev; -+ dbh->b_rsector = bh->b_rsector - mc->from_delta + mc->to_delta; -+ dbh->b_end_io = mirror_end_io; -+ dbh->b_private = mc; -+ -+ generic_make_request(WRITE, dbh); -+ } else { -+ DMERR("kmem_cache_alloc failed for mirror bh"); -+ mc->error = 1; -+ } -+} -+ -+/* Called when the copy I/O has finished */ -+static void copy_callback(copy_cb_reason_t reason, void *context, long arg) -+{ -+ struct mirror_c *lc = (struct mirror_c *) context; -+ struct buffer_head *bh; -+ -+ if (reason == COPY_CB_FAILED_READ || reason == COPY_CB_FAILED_WRITE) { -+ DMERR("Mirror block %s on %s failed, sector %ld", -+ reason == COPY_CB_FAILED_READ ? "read" : "write", -+ reason == COPY_CB_FAILED_READ ? -+ kdevname(lc->fromdev->dev) : -+ kdevname(lc->todev->dev), arg); -+ lc->error = 1; -+ return; -+ } -+ -+ if (reason == COPY_CB_COMPLETE) { -+ /* Say we've finished */ -+ dm_notify(lc); /* TODO: interface ?? */ -+ } -+ -+ if (reason == COPY_CB_PROGRESS) { -+ dm_notify(lc); /* TODO: interface ?? */ -+ } -+ -+ /* Submit, and mirror any pending BHs */ -+ down_write(&lc->lock); -+ lc->got_to = arg; -+ -+ bh = lc->bhstring; -+ lc->bhstring = NULL; -+ up_write(&lc->lock); -+ -+ while (bh) { -+ struct buffer_head *nextbh = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ generic_make_request(WRITE, bh); -+ mirror_bh(lc, bh); -+ bh = nextbh; -+ } -+} -+ -+/* -+ * Construct a mirror mapping: [] -+ */ -+static int mirror_ctr(struct dm_table *t, offset_t b, offset_t l, -+ int argc, char **argv, void **context) -+{ -+ struct mirror_c *lc; -+ unsigned long offset1, offset2; -+ char *value; -+ int priority = MIRROR_COPY_PRIORITY; -+ int throttle; -+ struct kcopyd_region src, dest; -+ -+ if (argc <= 4) { -+ *context = "dm-mirror: Not enough arguments"; -+ return -EINVAL; -+ } -+ -+ lc = kmalloc(sizeof(*lc), GFP_KERNEL); -+ if (lc == NULL) { -+ *context = "dm-mirror: Cannot allocate mirror context"; -+ return -ENOMEM; -+ } -+ -+ if (dm_table_get_device(t, argv[0], 0, l, &lc->fromdev)) { -+ *context = "dm-mirror: Device lookup failed"; -+ goto bad; -+ } -+ -+ offset1 = simple_strtoul(argv[1], &value, 10); -+ if (value == NULL) { -+ *context = "Invalid offset for dev1"; -+ dm_table_put_device(t, lc->fromdev); -+ goto bad; -+ } -+ -+ if (dm_table_get_device(t, argv[2], 0, l, &lc->todev)) { -+ *context = "dm-mirror: Device lookup failed"; -+ dm_table_put_device(t, lc->fromdev); -+ goto bad; -+ } -+ -+ offset2 = simple_strtoul(argv[3], &value, 10); -+ if (value == NULL) { -+ *context = "Invalid offset for dev2"; -+ goto bad_put; -+ } -+ -+ throttle = simple_strtoul(argv[4], &value, 10); -+ if (value == NULL) { -+ *context = "Invalid throttle value"; -+ goto bad_put; -+ } -+ -+ if (argc > 5) { -+ priority = simple_strtoul(argv[5], &value, 10); -+ if (value == NULL) { -+ *context = "Invalid priority value"; -+ goto bad_put; -+ } -+ } -+ -+ lc->from_delta = (int) offset1 - (int) b; -+ lc->to_delta = (int) offset2 - (int) b; -+ lc->frompos = offset1; -+ lc->topos = offset2; -+ lc->error = 0; -+ lc->bhstring = NULL; -+ lc->size = l - offset1; -+ init_waitqueue_head(&lc->waitq); -+ init_rwsem(&lc->lock); -+ *context = lc; -+ -+ /* Tell kcopyd to do the biz */ -+ src.dev = lc->fromdev->dev; -+ src.sector = offset1; -+ src.count = l - offset1; -+ -+ dest.dev = lc->todev->dev; -+ dest.sector = offset2; -+ dest.count = l - offset1; -+ -+ if (kcopyd_copy(&src, &dest, priority, 0, copy_callback, lc)) { -+ DMERR("block copy call failed"); -+ dm_table_put_device(t, lc->fromdev); -+ dm_table_put_device(t, lc->todev); -+ goto bad; -+ } -+ return 0; -+ -+ bad_put: -+ dm_table_put_device(t, lc->fromdev); -+ dm_table_put_device(t, lc->todev); -+ bad: -+ kfree(lc); -+ return -EINVAL; -+} -+ -+static void mirror_dtr(struct dm_table *t, void *c) -+{ -+ struct mirror_c *lc = (struct mirror_c *) c; -+ -+ /* Just in case anyone is still waiting... */ -+ wake_up_interruptible(&lc->waitq); -+ -+ dm_table_put_device(t, lc->fromdev); -+ dm_table_put_device(t, lc->todev); -+ kfree(c); -+} -+ -+static int mirror_map(struct buffer_head *bh, int rw, void *context) -+{ -+ struct mirror_c *lc = (struct mirror_c *) context; -+ -+ bh->b_rdev = lc->fromdev->dev; -+ bh->b_rsector = bh->b_rsector + lc->from_delta; -+ -+ if (rw == WRITE) { -+ down_write(&lc->lock); -+ -+ /* -+ * If this area is in flight then save it until it's -+ * commited to the mirror disk and then submit it and -+ * its mirror. -+ */ -+ if (bh->b_rsector > lc->got_to && -+ bh->b_rsector <= lc->got_to + KIO_MAX_SECTORS) { -+ bh->b_reqnext = lc->bhstring; -+ lc->bhstring = bh; -+ up_write(&lc->lock); -+ return 0; -+ } -+ -+ /* -+ * If we've already copied this block then duplicate -+ * it to the mirror device -+ */ -+ if (bh->b_rsector < lc->got_to) { -+ /* Schedule copy of I/O to other target */ -+ mirror_bh(lc, bh); -+ } -+ up_write(&lc->lock); -+ } -+ return 1; -+} -+ -+static int mirror_sts(status_type_t sts_type, char *result, int maxlen, -+ void *context) -+{ -+ struct mirror_c *mc = (struct mirror_c *) context; -+ -+ switch (sts_type) { -+ case STATUSTYPE_INFO: -+ if (mc->error) -+ snprintf(result, maxlen, "Error"); -+ else -+ snprintf(result, maxlen, "%ld%%", -+ (mc->got_to - -+ mc->from_delta) * 100 / mc->size); -+ break; -+ -+ case STATUSTYPE_TABLE: -+ snprintf(result, maxlen, "%s %ld %s %ld %d", -+ kdevname(mc->fromdev->dev), mc->frompos, -+ kdevname(mc->todev->dev), mc->topos, 0); -+ break; -+ } -+ return 0; -+} -+ -+static int mirror_wait(wait_queue_t *wq, void *context) -+{ -+ struct mirror_c *mc = (struct mirror_c *) context; -+ -+ if (add) -+ add_wait_queue(&mc->waitq, wq); -+ else -+ remove_wait_queue(&mc->waitq, wq); -+ -+ return 0; -+} -+ -+static struct target_type mirror_target = { -+ name: "mirror", -+ module: THIS_MODULE, -+ ctr: mirror_ctr, -+ dtr: mirror_dtr, -+ map: mirror_map, -+ sts: mirror_sts, -+ wait: mirror_wait, -+}; -+ -+int __init dm_mirror_init(void) -+{ -+ int r; -+ -+ bh_cachep = kmem_cache_create("dm-mirror", -+ sizeof(struct buffer_head), -+ __alignof__(struct buffer_head), -+ 0, NULL, NULL); -+ if (!bh_cachep) -+ return -1; -+ -+ r = dm_register_target(&mirror_target); -+ if (r < 0) { -+ DMERR("mirror: register failed %d", r); -+ kmem_cache_destroy(bh_cachep); -+ } -+ return r; -+} -+ -+void dm_mirror_exit(void) -+{ -+ int r = dm_unregister_target(&mirror_target); -+ -+ if (r < 0) -+ DMERR("mirror: unregister failed %d", r); -+ -+ kmem_cache_destroy(bh_cachep); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -ruN linux-2.4.19-pre8/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c ---- linux-2.4.19-pre8/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-snapshot.c Mon May 20 14:25:58 2002 -@@ -0,0 +1,1208 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "dm-snapshot.h" -+#include "kcopyd.h" -+ -+/* -+ * FIXME: Remove this before release. -+ */ -+#if 0 -+#define DMDEBUG(x...) DMWARN( ## x) -+#else -+#define DMDEBUG(x...) -+#endif -+ -+/* -+ * The percentage increment we will wake up users at -+ */ -+#define WAKE_UP_PERCENT 5 -+ -+/* -+ * Hard sector size used all over the kernel -+ */ -+#define SECTOR_SIZE 512 -+ -+/* -+ * kcopyd priority of snapshot operations -+ */ -+#define SNAPSHOT_COPY_PRIORITY 2 -+ -+struct pending_exception { -+ struct exception e; -+ -+ /* -+ * Origin buffers waiting for this to complete are held -+ * in a list (using b_reqnext). -+ */ -+ struct buffer_head *origin_bhs; -+ struct buffer_head *snapshot_bhs; -+ -+ /* -+ * Other pending_exceptions that are processing this -+ * chunk. When this list is empty, we know we can -+ * complete the origins. -+ */ -+ struct list_head siblings; -+ -+ /* Pointer back to snapshot context */ -+ struct dm_snapshot *snap; -+ -+ /* -+ * 1 indicates the exception has already been sent to -+ * kcopyd. -+ */ -+ int started; -+}; -+ -+/* -+ * Hash table mapping origin volumes to lists of snapshots and -+ * a lock to protect it -+ */ -+static kmem_cache_t *exception_cache; -+static kmem_cache_t *pending_cache; -+static mempool_t *pending_pool; -+ -+/* -+ * One of these per registered origin, held in the snapshot_origins hash -+ */ -+struct origin { -+ /* The origin device */ -+ kdev_t dev; -+ -+ struct list_head hash_list; -+ -+ /* List of snapshots for this origin */ -+ struct list_head snapshots; -+}; -+ -+/* -+ * Useful macro for running the store functions. Use -+ * store_int_fn if you want the return value. -+ */ -+#define store_fn(snap, fn, args...) \ -+ if ((snap)->store. ## fn) \ -+ (snap)->store. ## fn ( &(snap)->store , ## args ) -+ -+#define store_int_fn(snap, fn, args...) \ -+ (((snap)->store. ## fn) ? \ -+ ((snap)->store. ## fn ( &(snap)->store , ## args )) : 0) -+ -+/* -+ * Size of the hash table for origin volumes. If we make this -+ * the size of the minors list then it should be nearly perfect -+ */ -+#define ORIGIN_HASH_SIZE 256 -+#define ORIGIN_MASK 0xFF -+static struct list_head *_origins; -+static struct rw_semaphore _origins_lock; -+ -+static int init_origin_hash(void) -+{ -+ int i; -+ -+ _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!_origins) { -+ DMERR("Device mapper: Snapshot: unable to allocate memory"); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < ORIGIN_HASH_SIZE; i++) -+ INIT_LIST_HEAD(_origins + i); -+ init_rwsem(&_origins_lock); -+ -+ return 0; -+} -+ -+static void exit_origin_hash(void) -+{ -+ kfree(_origins); -+} -+ -+static inline unsigned int origin_hash(kdev_t dev) -+{ -+ return MINOR(dev) & ORIGIN_MASK; -+} -+ -+static struct origin *__lookup_origin(kdev_t origin) -+{ -+ struct list_head *slist; -+ struct list_head *ol; -+ struct origin *o; -+ -+ ol = &_origins[origin_hash(origin)]; -+ list_for_each(slist, ol) { -+ o = list_entry(slist, struct origin, hash_list); -+ -+ if (o->dev == origin) -+ return o; -+ } -+ -+ return NULL; -+} -+ -+static void __insert_origin(struct origin *o) -+{ -+ struct list_head *sl = &_origins[origin_hash(o->dev)]; -+ list_add_tail(&o->hash_list, sl); -+} -+ -+/* -+ * Make a note of the snapshot and its origin so we can look it -+ * up when the origin has a write on it. -+ */ -+static int register_snapshot(struct dm_snapshot *snap) -+{ -+ struct origin *o; -+ kdev_t dev = snap->origin->dev; -+ -+ down_write(&_origins_lock); -+ o = __lookup_origin(dev); -+ -+ if (!o) { -+ /* New origin */ -+ o = kmalloc(sizeof(*o), GFP_KERNEL); -+ if (!o) { -+ up_write(&_origins_lock); -+ return -ENOMEM; -+ } -+ -+ /* Initialise the struct */ -+ INIT_LIST_HEAD(&o->snapshots); -+ o->dev = dev; -+ -+ __insert_origin(o); -+ } -+ -+ list_add_tail(&snap->list, &o->snapshots); -+ -+ up_write(&_origins_lock); -+ return 0; -+} -+ -+static void unregister_snapshot(struct dm_snapshot *s) -+{ -+ struct origin *o; -+ -+ down_write(&_origins_lock); -+ o = __lookup_origin(s->origin->dev); -+ -+ list_del(&s->list); -+ if (list_empty(&o->snapshots)) { -+ list_del(&o->hash_list); -+ kfree(o); -+ } -+ -+ up_write(&_origins_lock); -+} -+ -+/* -+ * Implementation of the exception hash tables. -+ */ -+static int init_exception_table(struct exception_table *et, uint32_t size) -+{ -+ int i; -+ -+ et->hash_mask = size - 1; -+ et->table = vmalloc(sizeof(struct list_head) * (size)); -+ if (!et->table) -+ return -ENOMEM; -+ -+ for (i = 0; i < size; i++) -+ INIT_LIST_HEAD(et->table + i); -+ -+ return 0; -+} -+ -+static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) -+{ -+ struct list_head *slot, *entry, *temp; -+ struct exception *ex; -+ int i, size; -+ -+ size = et->hash_mask + 1; -+ for (i = 0; i < size; i++) { -+ slot = et->table + i; -+ -+ list_for_each_safe(entry, temp, slot) { -+ ex = list_entry(entry, struct exception, hash_list); -+ kmem_cache_free(mem, ex); -+ } -+ } -+ -+ vfree(et->table); -+} -+ -+/* -+ * FIXME: check how this hash fn is performing. -+ */ -+static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk) -+{ -+ return chunk & et->hash_mask; -+} -+ -+static void insert_exception(struct exception_table *eh, struct exception *e) -+{ -+ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; -+ list_add(&e->hash_list, l); -+} -+ -+static inline void remove_exception(struct exception *e) -+{ -+ list_del(&e->hash_list); -+} -+ -+/* -+ * Return the exception data for a sector, or NULL if not -+ * remapped. -+ */ -+static struct exception *lookup_exception(struct exception_table *et, -+ chunk_t chunk) -+{ -+ struct list_head *slot, *el; -+ struct exception *e; -+ -+ slot = &et->table[exception_hash(et, chunk)]; -+ list_for_each(el, slot) { -+ e = list_entry(el, struct exception, hash_list); -+ if (e->old_chunk == chunk) -+ return e; -+ } -+ -+ return NULL; -+} -+ -+static inline struct exception *alloc_exception(void) -+{ -+ struct exception *e; -+ -+ e = kmem_cache_alloc(exception_cache, GFP_NOIO); -+ if (!e) -+ e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); -+ -+ return e; -+} -+ -+static inline void free_exception(struct exception *e) -+{ -+ kmem_cache_free(exception_cache, e); -+} -+ -+static inline struct pending_exception *alloc_pending_exception(void) -+{ -+ return mempool_alloc(pending_pool, GFP_NOIO); -+} -+ -+static inline void free_pending_exception(struct pending_exception *pe) -+{ -+ mempool_free(pe, pending_pool); -+} -+ -+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) -+{ -+ struct exception *e; -+ -+ e = alloc_exception(); -+ if (!e) -+ return -ENOMEM; -+ -+ e->old_chunk = old; -+ e->new_chunk = new; -+ insert_exception(&s->complete, e); -+ return 0; -+} -+ -+/* -+ * Hard coded magic. -+ */ -+static int calc_max_buckets(void) -+{ -+ unsigned long mem; -+ -+ mem = num_physpages << PAGE_SHIFT; -+ mem /= 50; -+ mem /= sizeof(struct list_head); -+ -+ return mem; -+} -+ -+/* -+ * Rounds a number down to a power of 2. -+ */ -+static inline uint32_t round_down(uint32_t n) -+{ -+ while (n & (n - 1)) -+ n &= (n - 1); -+ return n; -+} -+ -+/* -+ * Allocate room for a suitable hash table. -+ */ -+static int init_hash_tables(struct dm_snapshot *s) -+{ -+ offset_t hash_size, cow_dev_size, origin_dev_size, max_buckets; -+ -+ /* -+ * Calculate based on the size of the original volume or -+ * the COW volume... -+ */ -+ cow_dev_size = get_dev_size(s->cow->dev); -+ origin_dev_size = get_dev_size(s->origin->dev); -+ max_buckets = calc_max_buckets(); -+ -+ hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size; -+ hash_size = min(hash_size, max_buckets); -+ -+ /* Round it down to a power of 2 */ -+ hash_size = round_down(hash_size); -+ if (init_exception_table(&s->complete, hash_size)) -+ return -ENOMEM; -+ -+ /* -+ * Allocate hash table for in-flight exceptions -+ * Make this smaller than the real hash table -+ */ -+ hash_size >>= 3; -+ if (!hash_size) -+ hash_size = 64; -+ -+ if (init_exception_table(&s->pending, hash_size)) { -+ exit_exception_table(&s->complete, exception_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Construct a snapshot mapping:

-+ * -+ */ -+static int snapshot_ctr(struct dm_table *t, offset_t b, offset_t l, -+ int argc, char **argv, void **context) -+{ -+ struct dm_snapshot *s; -+ unsigned long chunk_size; -+ unsigned long extent_size = 0L; -+ int r = -EINVAL; -+ char *persistent; -+ char *origin_path; -+ char *cow_path; -+ char *value; -+ int blocksize; -+ -+ if (argc < 4) { -+ *context = "dm-snapshot: Not enough arguments"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ origin_path = argv[0]; -+ cow_path = argv[1]; -+ persistent = argv[2]; -+ -+ if ((*persistent & 0x5f) != 'P' && (*persistent & 0x5f) != 'N') { -+ *context = "Persistent flag is not P or N"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ chunk_size = simple_strtoul(argv[3], &value, 10); -+ if (chunk_size == 0 || value == NULL) { -+ *context = "Invalid chunk size"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ /* Get the extent size for persistent snapshots */ -+ if ((*persistent & 0x5f) == 'P') { -+ if (argc < 5) { -+ *context = "No extent size specified"; -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ extent_size = simple_strtoul(argv[4], &value, 10); -+ if (extent_size == 0 || value == NULL) { -+ *context = "Invalid extent size"; -+ r = -EINVAL; -+ goto bad; -+ } -+ } -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) { -+ *context = "Cannot allocate snapshot context private structure"; -+ r = -ENOMEM; -+ goto bad; -+ } -+ -+ r = dm_table_get_device(t, origin_path, 0, 0, &s->origin); -+ if (r) { -+ *context = "Cannot get origin device"; -+ goto bad_free; -+ } -+ -+ r = dm_table_get_device(t, cow_path, 0, 0, &s->cow); -+ if (r) { -+ dm_table_put_device(t, s->origin); -+ *context = "Cannot get COW device"; -+ goto bad_free; -+ } -+ -+ /* Validate the extent and chunk sizes against the device block size */ -+ blocksize = get_hardsect_size(s->cow->dev); -+ if (chunk_size % (blocksize / SECTOR_SIZE)) { -+ *context = "Chunk size is not a multiple of device blocksize"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ if (extent_size % (blocksize / SECTOR_SIZE)) { -+ *context = "Extent size is not a multiple of device blocksize"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ /* Check the sizes are small enough to fit in one kiovec */ -+ if (chunk_size > KIO_MAX_SECTORS) { -+ *context = "Chunk size is too big"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ if (extent_size > KIO_MAX_SECTORS) { -+ *context = "Extent size is too big"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ /* Check chunk_size is a power of 2 */ -+ if (chunk_size & (chunk_size - 1)) { -+ *context = "Chunk size is not a power of 2"; -+ r = -EINVAL; -+ goto bad_putdev; -+ } -+ -+ init_waitqueue_head(&s->waitq); -+ s->chunk_size = chunk_size; -+ s->chunk_mask = chunk_size - 1; -+ s->type = *persistent; -+ for (s->chunk_shift = 0; chunk_size; -+ s->chunk_shift++, chunk_size >>= 1) -+ ; -+ s->chunk_shift--; -+ -+ s->valid = 1; -+ s->last_percent = 0; -+ init_rwsem(&s->lock); -+ -+ /* Allocate hash table for COW data */ -+ if (init_hash_tables(s)) { -+ *context = "Unable to allocate hash table space"; -+ r = -ENOMEM; -+ goto bad_putdev; -+ } -+ -+ /* -+ * Check the persistent flag - done here because we need the iobuf -+ * to check the LV header -+ */ -+ s->store.snap = s; -+ -+ if ((*persistent & 0x5f) == 'P') -+ r = dm_create_persistent(&s->store, s->chunk_size); -+ else -+ r = dm_create_transient(&s->store, s, blocksize, context); -+ -+ if (r) { -+ *context = "Couldn't create exception store"; -+ r = -EINVAL; -+ goto bad_free1; -+ } -+ -+ /* Flush IO to the origin device */ -+#if LVM_VFS_ENHANCEMENT -+ fsync_dev_lockfs(s->origin->dev); -+#else -+ fsync_dev(s->origin->dev); -+#endif -+ -+ /* Add snapshot to the list of snapshots for this origin */ -+ if (register_snapshot(s)) { -+ r = -EINVAL; -+ *context = "Cannot register snapshot origin"; -+ goto bad_free2; -+ } -+#if LVM_VFS_ENHANCEMENT -+ unlockfs(s->origin->dev); -+#endif -+ kcopyd_inc_client_count(); -+ -+ *context = s; -+ return 0; -+ -+ bad_free2: -+ store_fn(s, destroy); -+ -+ bad_free1: -+ exit_exception_table(&s->pending, pending_cache); -+ exit_exception_table(&s->complete, exception_cache); -+ -+ bad_putdev: -+ dm_table_put_device(t, s->cow); -+ dm_table_put_device(t, s->origin); -+ -+ bad_free: -+ kfree(s); -+ -+ bad: -+ return r; -+} -+ -+static void snapshot_dtr(struct dm_table *t, void *context) -+{ -+ struct dm_snapshot *s = (struct dm_snapshot *) context; -+ -+ wake_up_interruptible(&s->waitq); -+ -+ unregister_snapshot(s); -+ -+ exit_exception_table(&s->pending, pending_cache); -+ exit_exception_table(&s->complete, exception_cache); -+ -+ /* Deallocate memory used */ -+ store_fn(s, destroy); -+ -+ dm_table_put_device(t, s->origin); -+ dm_table_put_device(t, s->cow); -+ kfree(s); -+ -+ kcopyd_dec_client_count(); -+} -+ -+/* -+ * We hold lists of buffer_heads, using the b_reqnext field. -+ */ -+static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh) -+{ -+ bh->b_reqnext = *queue; -+ *queue = bh; -+} -+ -+/* -+ * Flush a list of buffers. -+ */ -+static void flush_buffers(struct buffer_head *bh) -+{ -+ struct buffer_head *n; -+ -+ DMDEBUG("begin flush"); -+ while (bh) { -+ n = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ DMDEBUG("flushing %p", bh); -+ generic_make_request(WRITE, bh); -+ bh = n; -+ } -+ -+ run_task_queue(&tq_disk); -+} -+ -+/* -+ * Error a list of buffers. -+ */ -+static void error_buffers(struct buffer_head *bh) -+{ -+ struct buffer_head *n; -+ -+ while (bh) { -+ n = bh->b_reqnext; -+ bh->b_reqnext = NULL; -+ buffer_IO_error(bh); -+ bh = n; -+ } -+} -+ -+static void pending_complete(struct pending_exception *pe, int success) -+{ -+ struct exception *e; -+ struct dm_snapshot *s = pe->snap; -+ -+ if (success) { -+ e = alloc_exception(); -+ if (!e) { -+ printk("Unable to allocate exception."); -+ down_write(&s->lock); -+ store_fn(s, drop_snapshot); -+ s->valid = 0; -+ up_write(&s->lock); -+ return; -+ } -+ -+ /* -+ * Add a proper exception, and remove the -+ * inflight exception from the list. -+ */ -+ down_write(&s->lock); -+ -+ memcpy(e, &pe->e, sizeof(*e)); -+ insert_exception(&s->complete, e); -+ remove_exception(&pe->e); -+ -+ /* Submit any pending write BHs */ -+ up_write(&s->lock); -+ -+ flush_buffers(pe->snapshot_bhs); -+ DMDEBUG("Exception completed successfully."); -+ -+ /* Notify any interested parties */ -+ if (s->store.percent_full) { -+ int pc = s->store.percent_full(&s->store); -+ -+ if (pc >= s->last_percent + WAKE_UP_PERCENT) { -+ wake_up_interruptible(&s->waitq); -+ s->last_percent = pc - pc % WAKE_UP_PERCENT; -+ } -+ } -+ -+ } else { -+ /* Read/write error - snapshot is unusable */ -+ DMERR("Error reading/writing snapshot"); -+ -+ down_write(&s->lock); -+ store_fn(s, drop_snapshot); -+ s->valid = 0; -+ remove_exception(&pe->e); -+ up_write(&s->lock); -+ -+ error_buffers(pe->snapshot_bhs); -+ -+ wake_up_interruptible(&s->waitq); -+ DMDEBUG("Exception failed."); -+ } -+ -+ if (list_empty(&pe->siblings)) -+ flush_buffers(pe->origin_bhs); -+ else -+ list_del(&pe->siblings); -+ -+ free_pending_exception(pe); -+} -+ -+static void commit_callback(void *context, int success) -+{ -+ struct pending_exception *pe = (struct pending_exception *) context; -+ pending_complete(pe, success); -+} -+ -+/* -+ * Called when the copy I/O has finished. kcopyd actually runs -+ * this code so don't block. -+ */ -+static void copy_callback(int err, void *context) -+{ -+ struct pending_exception *pe = (struct pending_exception *) context; -+ struct dm_snapshot *s = pe->snap; -+ -+ if (err) -+ pending_complete(pe, 0); -+ -+ else -+ /* Update the metadata if we are persistent */ -+ s->store.commit_exception(&s->store, &pe->e, commit_callback, -+ pe); -+} -+ -+/* -+ * Dispatches the copy operation to kcopyd. -+ */ -+static inline void start_copy(struct pending_exception *pe) -+{ -+ struct dm_snapshot *s = pe->snap; -+ struct kcopyd_region src, dest; -+ -+ src.dev = s->origin->dev; -+ src.sector = chunk_to_sector(s, pe->e.old_chunk); -+ src.count = s->chunk_size; -+ -+ dest.dev = s->cow->dev; -+ dest.sector = chunk_to_sector(s, pe->e.new_chunk); -+ dest.count = s->chunk_size; -+ -+ if (!pe->started) { -+ /* Hand over to kcopyd */ -+ kcopyd_copy(&src, &dest, copy_callback, pe); -+ pe->started = 1; -+ } -+} -+ -+/* -+ * Looks to see if this snapshot already has a pending exception -+ * for this chunk, otherwise it allocates a new one and inserts -+ * it into the pending table. -+ */ -+static struct pending_exception *find_pending_exception(struct dm_snapshot *s, -+ struct buffer_head *bh) -+{ -+ struct exception *e; -+ struct pending_exception *pe; -+ chunk_t chunk = sector_to_chunk(s, bh->b_rsector); -+ -+ /* -+ * Is there a pending exception for this already ? -+ */ -+ e = lookup_exception(&s->pending, chunk); -+ if (e) { -+ /* cast the exception to a pending exception */ -+ pe = list_entry(e, struct pending_exception, e); -+ -+ } else { -+ /* Create a new pending exception */ -+ pe = alloc_pending_exception(); -+ if (!pe) { -+ DMWARN("Couldn't allocate pending exception."); -+ return NULL; -+ } -+ -+ pe->e.old_chunk = chunk; -+ pe->origin_bhs = pe->snapshot_bhs = NULL; -+ INIT_LIST_HEAD(&pe->siblings); -+ pe->snap = s; -+ pe->started = 0; -+ -+ if (store_int_fn(s, prepare_exception, &pe->e)) { -+ free_pending_exception(pe); -+ s->valid = 0; -+ return NULL; -+ } -+ -+ insert_exception(&s->pending, &pe->e); -+ } -+ -+ return pe; -+} -+ -+static inline void remap_exception(struct dm_snapshot *s, struct exception *e, -+ struct buffer_head *bh) -+{ -+ bh->b_rdev = s->cow->dev; -+ bh->b_rsector = chunk_to_sector(s, e->new_chunk) + -+ (bh->b_rsector & s->chunk_mask); -+} -+ -+static int snapshot_map(struct buffer_head *bh, int rw, void *context) -+{ -+ struct exception *e; -+ struct dm_snapshot *s = (struct dm_snapshot *) context; -+ int r = 1; -+ chunk_t chunk; -+ struct pending_exception *pe; -+ -+ chunk = sector_to_chunk(s, bh->b_rsector); -+ -+ /* Full snapshots are not usable */ -+ if (!s->valid) -+ return -1; -+ -+ /* -+ * Write to snapshot - higher level takes care of RW/RO -+ * flags so we should only get this if we are -+ * writeable. -+ */ -+ if (rw == WRITE) { -+ -+ down_write(&s->lock); -+ -+ /* If the block is already remapped - use that, else remap it */ -+ e = lookup_exception(&s->complete, chunk); -+ if (e) -+ remap_exception(s, e, bh); -+ -+ else { -+ pe = find_pending_exception(s, bh); -+ -+ if (!pe) { -+ store_fn(s, drop_snapshot); -+ s->valid = 0; -+ } -+ -+ queue_buffer(&pe->snapshot_bhs, bh); -+ start_copy(pe); -+ r = 0; -+ } -+ -+ up_write(&s->lock); -+ -+ } else { -+ /* -+ * FIXME: this read path scares me because we -+ * always use the origin when we have a pending -+ * exception. However I can't think of a -+ * situation where this is wrong - ejt. -+ */ -+ -+ /* Do reads */ -+ down_read(&s->lock); -+ -+ /* See if it it has been remapped */ -+ e = lookup_exception(&s->complete, chunk); -+ if (e) -+ remap_exception(s, e, bh); -+ else -+ bh->b_rdev = s->origin->dev; -+ -+ up_read(&s->lock); -+ } -+ -+ return r; -+} -+ -+static void list_merge(struct list_head *l1, struct list_head *l2) -+{ -+ struct list_head *l1_n, *l2_p; -+ -+ l1_n = l1->next; -+ l2_p = l2->prev; -+ -+ l1->next = l2; -+ l2->prev = l1; -+ -+ l2_p->next = l1_n; -+ l1_n->prev = l2_p; -+} -+ -+static int __origin_write(struct list_head *snapshots, struct buffer_head *bh) -+{ -+ int r = 1; -+ struct list_head *sl; -+ struct dm_snapshot *snap; -+ struct exception *e; -+ struct pending_exception *pe, *last = NULL; -+ chunk_t chunk; -+ -+ /* Do all the snapshots on this origin */ -+ list_for_each(sl, snapshots) { -+ snap = list_entry(sl, struct dm_snapshot, list); -+ -+ /* Only deal with valid snapshots */ -+ if (!snap->valid) -+ continue; -+ -+ down_write(&snap->lock); -+ -+ /* -+ * Remember, different snapshots can have -+ * different chunk sizes. -+ */ -+ chunk = sector_to_chunk(snap, bh->b_rsector); -+ -+ /* -+ * Check exception table to see if block -+ * is already remapped in this snapshot -+ * and trigger an exception if not. -+ */ -+ e = lookup_exception(&snap->complete, chunk); -+ if (!e) { -+ pe = find_pending_exception(snap, bh); -+ if (!pe) { -+ store_fn(snap, drop_snapshot); -+ snap->valid = 0; -+ -+ } else { -+ if (last) -+ list_merge(&pe->siblings, -+ &last->siblings); -+ -+ last = pe; -+ r = 0; -+ } -+ } -+ -+ up_write(&snap->lock); -+ } -+ -+ /* -+ * Now that we have a complete pe list we can start the copying. -+ */ -+ if (last) { -+ pe = last; -+ do { -+ down_write(&pe->snap->lock); -+ queue_buffer(&pe->origin_bhs, bh); -+ start_copy(pe); -+ up_write(&pe->snap->lock); -+ pe = list_entry(pe->siblings.next, -+ struct pending_exception, siblings); -+ -+ } while (pe != last); -+ } -+ -+ return r; -+} -+ -+static int snapshot_status(status_type_t type, char *result, -+ int maxlen, void *context) -+{ -+ struct dm_snapshot *snap = (struct dm_snapshot *) context; -+ char cow[16]; -+ char org[16]; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ if (!snap->valid) -+ snprintf(result, maxlen, "Invalid"); -+ else { -+ if (snap->store.percent_full) -+ snprintf(result, maxlen, "%d%%", -+ snap->store.percent_full(&snap-> -+ store)); -+ else -+ snprintf(result, maxlen, "Unknown"); -+ } -+ break; -+ -+ case STATUSTYPE_TABLE: -+ /* -+ * kdevname returns a static pointer so we need -+ * to make private copies if the output is to -+ * make sense. -+ */ -+ strncpy(cow, kdevname(snap->cow->dev), sizeof(cow)); -+ strncpy(org, kdevname(snap->origin->dev), sizeof(org)); -+ snprintf(result, maxlen, "%s %s %c %ld", org, cow, -+ snap->type, snap->chunk_size); -+ break; -+ } -+ -+ return 0; -+} -+ -+static int snapshot_wait(void *context, wait_queue_t *wq, int add) -+{ -+ struct dm_snapshot *snap = (struct dm_snapshot *) context; -+ -+ if (add) -+ add_wait_queue(&snap->waitq, wq); -+ else -+ remove_wait_queue(&snap->waitq, wq); -+ -+ return 0; -+} -+ -+/* -+ * Called on a write from the origin driver. -+ */ -+int do_origin(struct dm_dev *origin, struct buffer_head *bh) -+{ -+ struct origin *o; -+ int r; -+ -+ down_read(&_origins_lock); -+ o = __lookup_origin(origin->dev); -+ if (!o) -+ BUG(); -+ -+ r = __origin_write(&o->snapshots, bh); -+ up_read(&_origins_lock); -+ -+ return r; -+} -+ -+/* -+ * Origin: maps a linear range of a device, with hooks for snapshotting. -+ */ -+ -+/* -+ * Construct an origin mapping: -+ * The context for an origin is merely a 'struct dm_dev *' -+ * pointing to the real device. -+ */ -+static int origin_ctr(struct dm_table *t, offset_t b, offset_t l, -+ int argc, char **argv, void **context) -+{ -+ int r; -+ struct dm_dev *dev; -+ -+ if (argc != 1) { -+ *context = "dm-origin: incorrect number of arguments"; -+ return -EINVAL; -+ } -+ -+ r = dm_table_get_device(t, argv[0], 0, l, &dev); -+ if (r) { -+ *context = "Cannot get target device"; -+ return r; -+ } -+ -+ *context = dev; -+ -+ return 0; -+} -+ -+static void origin_dtr(struct dm_table *t, void *c) -+{ -+ struct dm_dev *dev = (struct dm_dev *) c; -+ dm_table_put_device(t, dev); -+} -+ -+static int origin_map(struct buffer_head *bh, int rw, void *context) -+{ -+ struct dm_dev *dev = (struct dm_dev *) context; -+ bh->b_rdev = dev->dev; -+ -+ /* Only tell snapshots if this is a write */ -+ return (rw == WRITE) ? do_origin(dev, bh) : 1; -+} -+ -+static int origin_status(status_type_t type, char *result, -+ int maxlen, void *context) -+{ -+ struct dm_dev *dev = (struct dm_dev *) context; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ snprintf(result, maxlen, "%s", kdevname(dev->dev)); -+ break; -+ } -+ -+ return 0; -+} -+ -+static struct target_type origin_target = { -+ name: "snapshot-origin", -+ module: THIS_MODULE, -+ ctr: origin_ctr, -+ dtr: origin_dtr, -+ map: origin_map, -+ status: origin_status, -+ wait: NULL, -+ err: NULL -+}; -+ -+static struct target_type snapshot_target = { -+ name: "snapshot", -+ module: THIS_MODULE, -+ ctr: snapshot_ctr, -+ dtr: snapshot_dtr, -+ map: snapshot_map, -+ status: snapshot_status, -+ wait: snapshot_wait, -+ err: NULL -+}; -+ -+int __init dm_snapshot_init(void) -+{ -+ int r; -+ -+ r = dm_register_target(&snapshot_target); -+ if (r) { -+ DMERR("snapshot target register failed %d", r); -+ return r; -+ } -+ -+ r = dm_register_target(&origin_target); -+ if (r < 0) { -+ DMERR("Device mapper: Origin: register failed %d\n", r); -+ goto bad1; -+ } -+ -+ r = init_origin_hash(); -+ if (r) { -+ DMERR("init_origin_hash failed."); -+ goto bad2; -+ } -+ -+ exception_cache = kmem_cache_create("dm-snapshot-ex", -+ sizeof(struct exception), -+ __alignof__(struct exception), -+ 0, NULL, NULL); -+ if (!exception_cache) { -+ DMERR("Couldn't create exception cache."); -+ r = -ENOMEM; -+ goto bad3; -+ } -+ -+ pending_cache = -+ kmem_cache_create("dm-snapshot-in", -+ sizeof(struct pending_exception), -+ __alignof__(struct pending_exception), -+ 0, NULL, NULL); -+ if (!pending_cache) { -+ DMERR("Couldn't create pending cache."); -+ r = -ENOMEM; -+ goto bad4; -+ } -+ -+ pending_pool = mempool_create(128, mempool_alloc_slab, -+ mempool_free_slab, pending_cache); -+ if (!pending_pool) { -+ DMERR("Couldn't create pending pool."); -+ r = -ENOMEM; -+ goto bad5; -+ } -+ -+ return 0; -+ -+ bad5: -+ kmem_cache_destroy(pending_cache); -+ bad4: -+ kmem_cache_destroy(exception_cache); -+ bad3: -+ exit_origin_hash(); -+ bad2: -+ dm_unregister_target(&origin_target); -+ bad1: -+ dm_unregister_target(&snapshot_target); -+ return r; -+} -+ -+void dm_snapshot_exit(void) -+{ -+ int r; -+ -+ r = dm_unregister_target(&snapshot_target); -+ if (r) -+ DMERR("snapshot unregister failed %d", r); -+ -+ r = dm_unregister_target(&origin_target); -+ if (r) -+ DMERR("origin unregister failed %d", r); -+ -+ exit_origin_hash(); -+ mempool_destroy(pending_pool); -+ kmem_cache_destroy(pending_cache); -+ kmem_cache_destroy(exception_cache); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -ruN linux-2.4.19-pre8/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h ---- linux-2.4.19-pre8/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-snapshot.h Mon May 20 15:02:16 2002 -@@ -0,0 +1,149 @@ -+/* -+ * dm-snapshot.c -+ * -+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#ifndef DM_SNAPSHOT_H -+#define DM_SNAPSHOT_H -+ -+#include "dm.h" -+#include -+ -+struct exception_table { -+ uint32_t hash_mask; -+ struct list_head *table; -+}; -+ -+/* -+ * The snapshot code deals with largish chunks of the disk at a -+ * time. Typically 64k - 256k. -+ */ -+/* FIXME: can we get away with limiting these to a uint32_t ? */ -+typedef offset_t chunk_t; -+ -+/* -+ * An exception is used where an old chunk of data has been -+ * replaced by a new one. -+ */ -+struct exception { -+ struct list_head hash_list; -+ -+ chunk_t old_chunk; -+ chunk_t new_chunk; -+}; -+ -+/* -+ * Abstraction to handle the meta/layout of exception stores (the -+ * COW device). -+ */ -+struct exception_store { -+ -+ /* -+ * Destroys this object when you've finished with it. -+ */ -+ void (*destroy) (struct exception_store *store); -+ -+ /* -+ * Find somewhere to store the next exception. -+ */ -+ int (*prepare_exception) (struct exception_store *store, -+ struct exception *e); -+ -+ /* -+ * Update the metadata with this exception. -+ */ -+ void (*commit_exception) (struct exception_store *store, -+ struct exception *e, -+ void (*callback) (void *, int success), -+ void *callback_context); -+ -+ /* -+ * The snapshot is invalid, note this in the metadata. -+ */ -+ void (*drop_snapshot) (struct exception_store *store); -+ -+ /* -+ * Return the %age full of the snapshot -+ */ -+ int (*percent_full) (struct exception_store *store); -+ -+ struct dm_snapshot *snap; -+ void *context; -+}; -+ -+struct dm_snapshot { -+ struct rw_semaphore lock; -+ -+ struct dm_dev *origin; -+ struct dm_dev *cow; -+ -+ /* List of snapshots per Origin */ -+ struct list_head list; -+ -+ /* Processes wait on this when they want to block on status changes */ -+ wait_queue_head_t waitq; -+ -+ /* Size of data blocks saved - must be a power of 2 */ -+ chunk_t chunk_size; -+ chunk_t chunk_mask; -+ chunk_t chunk_shift; -+ -+ /* You can't use a snapshot if this is 0 (e.g. if full) */ -+ int valid; -+ -+ /* Used for display of table */ -+ char type; -+ -+ /* The last percentage we notified */ -+ int last_percent; -+ -+ struct exception_table pending; -+ struct exception_table complete; -+ -+ /* The on disk metadata handler */ -+ struct exception_store store; -+}; -+ -+/* -+ * Used by the exception stores to load exceptions hen -+ * initialising. -+ */ -+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); -+ -+/* -+ * Constructor and destructor for the default persistent -+ * store. -+ */ -+int dm_create_persistent(struct exception_store *store, uint32_t chunk_size); -+ -+int dm_create_transient(struct exception_store *store, -+ struct dm_snapshot *s, int blocksize, void **error); -+ -+/* -+ * Return the number of sectors in the device. -+ */ -+static inline offset_t get_dev_size(kdev_t dev) -+{ -+ int *sizes; -+ -+ sizes = blk_size[MAJOR(dev)]; -+ if (sizes) -+ return sizes[MINOR(dev)] << 1; -+ -+ return 0; -+} -+ -+static inline chunk_t sector_to_chunk(struct dm_snapshot *s, offset_t sector) -+{ -+ return (sector & ~s->chunk_mask) >> s->chunk_shift; -+} -+ -+static inline offset_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk) -+{ -+ return chunk << s->chunk_shift; -+} -+ -+#endif -diff -ruN linux-2.4.19-pre8/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c ---- linux-2.4.19-pre8/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-stripe.c Mon May 20 14:20:28 2002 -@@ -0,0 +1,230 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+#include -+#include -+ -+struct stripe { -+ struct dm_dev *dev; -+ offset_t physical_start; -+}; -+ -+struct stripe_c { -+ offset_t logical_start; -+ uint32_t stripes; -+ -+ /* The size of this target / num. stripes */ -+ uint32_t stripe_width; -+ -+ /* stripe chunk size */ -+ uint32_t chunk_shift; -+ offset_t chunk_mask; -+ -+ struct stripe stripe[0]; -+}; -+ -+static inline struct stripe_c *alloc_context(int stripes) -+{ -+ size_t len = sizeof(struct stripe_c) + -+ (sizeof(struct stripe) * stripes); -+ -+ return kmalloc(len, GFP_KERNEL); -+} -+ -+/* -+ * Parse a single pair -+ */ -+static int get_stripe(struct dm_table *t, struct stripe_c *sc, -+ int stripe, char **argv) -+{ -+ char *end; -+ unsigned long start; -+ -+ start = simple_strtoul(argv[1], &end, 10); -+ if (*end) -+ return -EINVAL; -+ -+ if (dm_table_get_device(t, argv[0], start, sc->stripe_width, -+ &sc->stripe[stripe].dev)) -+ return -ENXIO; -+ -+ sc->stripe[stripe].physical_start = start; -+ return 0; -+} -+ -+/* -+ * Construct a striped mapping. -+ * [ ]+ -+ */ -+static int stripe_ctr(struct dm_table *t, offset_t b, offset_t l, -+ int argc, char **argv, void **context) -+{ -+ struct stripe_c *sc; -+ uint32_t stripes; -+ uint32_t chunk_size; -+ char *end; -+ int r, i; -+ -+ if (argc < 2) { -+ *context = "dm-stripe: Not enough arguments"; -+ return -EINVAL; -+ } -+ -+ stripes = simple_strtoul(argv[0], &end, 10); -+ if (*end) { -+ *context = "dm-stripe: Invalid stripe count"; -+ return -EINVAL; -+ } -+ -+ chunk_size = simple_strtoul(argv[1], &end, 10); -+ if (*end) { -+ *context = "dm-stripe: Invalid chunk_size"; -+ return -EINVAL; -+ } -+ -+ if (l % stripes) { -+ *context = "dm-stripe: Target length not divisable by " -+ "number of stripes"; -+ return -EINVAL; -+ } -+ -+ sc = alloc_context(stripes); -+ if (!sc) { -+ *context = "dm-stripe: Memory allocation for striped context " -+ "failed"; -+ return -ENOMEM; -+ } -+ -+ sc->logical_start = b; -+ sc->stripes = stripes; -+ sc->stripe_width = l / stripes; -+ -+ /* -+ * chunk_size is a power of two -+ */ -+ if (!chunk_size || (chunk_size & (chunk_size - 1))) { -+ *context = "dm-stripe: Invalid chunk size"; -+ kfree(sc); -+ return -EINVAL; -+ } -+ -+ sc->chunk_mask = chunk_size - 1; -+ for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++) -+ chunk_size >>= 1; -+ sc->chunk_shift--; -+ -+ /* -+ * Get the stripe destinations. -+ */ -+ for (i = 0; i < stripes; i++) { -+ if (argc < 2) { -+ *context = "dm-stripe: Not enough destinations " -+ "specified"; -+ kfree(sc); -+ return -EINVAL; -+ } -+ -+ argv += 2; -+ -+ r = get_stripe(t, sc, i, argv); -+ if (r < 0) { -+ *context = "dm-stripe: Couldn't parse stripe " -+ "destination"; -+ while (i--) -+ dm_table_put_device(t, sc->stripe[i].dev); -+ kfree(sc); -+ return r; -+ } -+ } -+ -+ *context = sc; -+ return 0; -+} -+ -+static void stripe_dtr(struct dm_table *t, void *c) -+{ -+ unsigned int i; -+ struct stripe_c *sc = (struct stripe_c *) c; -+ -+ for (i = 0; i < sc->stripes; i++) -+ dm_table_put_device(t, sc->stripe[i].dev); -+ -+ kfree(sc); -+} -+ -+static int stripe_map(struct buffer_head *bh, int rw, void *context) -+{ -+ struct stripe_c *sc = (struct stripe_c *) context; -+ -+ offset_t offset = bh->b_rsector - sc->logical_start; -+ uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift); -+ uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */ -+ chunk = chunk / sc->stripes; -+ -+ bh->b_rdev = sc->stripe[stripe].dev->dev; -+ bh->b_rsector = sc->stripe[stripe].physical_start + -+ (chunk << sc->chunk_shift) + (offset & sc->chunk_mask); -+ return 1; -+} -+ -+static int stripe_status(status_type_t type, char *result, int maxlen, -+ void *context) -+{ -+ struct stripe_c *sc = (struct stripe_c *) context; -+ int offset; -+ int i; -+ -+ switch (type) { -+ case STATUSTYPE_INFO: -+ result[0] = '\0'; -+ break; -+ -+ case STATUSTYPE_TABLE: -+ offset = snprintf(result, maxlen, "%d %ld", -+ sc->stripes, sc->chunk_mask + 1); -+ for (i = 0; i < sc->stripes; i++) { -+ offset += -+ snprintf(result + offset, maxlen - offset, -+ " %s %ld", -+ kdevname(sc->stripe[i].dev->dev), -+ sc->stripe[i].physical_start); -+ } -+ break; -+ } -+ return 0; -+} -+ -+static struct target_type stripe_target = { -+ name: "striped", -+ module: THIS_MODULE, -+ ctr: stripe_ctr, -+ dtr: stripe_dtr, -+ map: stripe_map, -+ status: stripe_status, -+ wait: NULL, -+}; -+ -+int __init dm_stripe_init(void) -+{ -+ int r; -+ -+ r = dm_register_target(&stripe_target); -+ if (r < 0) -+ DMWARN("striped target registration failed"); -+ -+ return r; -+} -+ -+void dm_stripe_exit(void) -+{ -+ if (dm_unregister_target(&stripe_target)) -+ DMWARN("striped target unregistration failed"); -+ -+ return; -+} -diff -ruN linux-2.4.19-pre8/drivers/md/dm-table.c linux/drivers/md/dm-table.c ---- linux-2.4.19-pre8/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-table.c Fri May 10 16:13:11 2002 -@@ -0,0 +1,410 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+ -+/* ceiling(n / size) * size */ -+static inline unsigned long round_up(unsigned long n, unsigned long size) -+{ -+ unsigned long r = n % size; -+ return n + (r ? (size - r) : 0); -+} -+ -+/* ceiling(n / size) */ -+static inline unsigned long div_up(unsigned long n, unsigned long size) -+{ -+ return round_up(n, size) / size; -+} -+ -+/* similar to ceiling(log_size(n)) */ -+static uint int_log(unsigned long n, unsigned long base) -+{ -+ int result = 0; -+ -+ while (n > 1) { -+ n = div_up(n, base); -+ result++; -+ } -+ -+ return result; -+} -+ -+/* -+ * return the highest key that you could lookup -+ * from the n'th node on level l of the btree. -+ */ -+static offset_t high(struct dm_table *t, int l, int n) -+{ -+ for (; l < t->depth - 1; l++) -+ n = get_child(n, CHILDREN_PER_NODE - 1); -+ -+ if (n >= t->counts[l]) -+ return (offset_t) - 1; -+ -+ return get_node(t, l, n)[KEYS_PER_NODE - 1]; -+} -+ -+/* -+ * fills in a level of the btree based on the -+ * highs of the level below it. -+ */ -+static int setup_btree_index(int l, struct dm_table *t) -+{ -+ int n, k; -+ offset_t *node; -+ -+ for (n = 0; n < t->counts[l]; n++) { -+ node = get_node(t, l, n); -+ -+ for (k = 0; k < KEYS_PER_NODE; k++) -+ node[k] = high(t, l + 1, get_child(n, k)); -+ } -+ -+ return 0; -+} -+ -+/* -+ * highs, and targets are managed as dynamic -+ * arrays during a table load. -+ */ -+static int alloc_targets(struct dm_table *t, int num) -+{ -+ offset_t *n_highs; -+ struct target *n_targets; -+ int n = t->num_targets; -+ unsigned long size = (sizeof(struct target) + sizeof(offset_t)) * num; -+ -+ n_highs = (offset_t *) vmalloc(size); -+ if (!n_highs) -+ return -ENOMEM; -+ -+ memset(n_highs, 0, size); -+ -+ n_targets = (struct target *) (n_highs + num); -+ -+ if (n) { -+ memcpy(n_highs, t->highs, sizeof(*n_highs) * n); -+ memcpy(n_targets, t->targets, sizeof(*n_targets) * n); -+ } -+ -+ memset(n_highs + n, -1, sizeof(*n_highs) * (num - n)); -+ if (t->highs) -+ vfree(t->highs); -+ -+ t->num_allocated = num; -+ t->highs = n_highs; -+ t->targets = n_targets; -+ -+ return 0; -+} -+ -+int dm_table_create(struct dm_table **result) -+{ -+ struct dm_table *t = kmalloc(sizeof(struct dm_table), GFP_NOIO); -+ -+ if (!t) -+ return -ENOMEM; -+ -+ memset(t, 0, sizeof(*t)); -+ INIT_LIST_HEAD(&t->devices); -+ -+ /* allocate a single node's worth of targets to begin with */ -+ if (alloc_targets(t, KEYS_PER_NODE)) { -+ kfree(t); -+ t = NULL; -+ return -ENOMEM; -+ } -+ -+ *result = t; -+ return 0; -+} -+ -+static void free_devices(struct list_head *devices) -+{ -+ struct list_head *tmp, *next; -+ -+ for (tmp = devices->next; tmp != devices; tmp = next) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ next = tmp->next; -+ kfree(dd); -+ } -+} -+ -+void dm_table_destroy(struct dm_table *t) -+{ -+ int i; -+ -+ /* free the indexes (see dm_table_complete) */ -+ if (t->depth >= 2) -+ vfree(t->index[t->depth - 2]); -+ -+ /* free the targets */ -+ for (i = 0; i < t->num_targets; i++) { -+ struct target *tgt = &t->targets[i]; -+ -+ dm_put_target_type(t->targets[i].type); -+ -+ if (tgt->type->dtr) -+ tgt->type->dtr(t, tgt->private); -+ } -+ -+ vfree(t->highs); -+ -+ /* free the device list */ -+ if (t->devices.next != &t->devices) { -+ DMWARN("devices still present during destroy: " -+ "dm_table_remove_device calls missing"); -+ -+ free_devices(&t->devices); -+ } -+ -+ kfree(t); -+} -+ -+/* -+ * Checks to see if we need to extend highs or targets. -+ */ -+static inline int check_space(struct dm_table *t) -+{ -+ if (t->num_targets >= t->num_allocated) -+ return alloc_targets(t, t->num_allocated * 2); -+ -+ return 0; -+} -+ -+/* -+ * Convert a device path to a kdev_t. -+ */ -+int lookup_device(const char *path, kdev_t *dev) -+{ -+ int r; -+ struct nameidata nd; -+ struct inode *inode; -+ -+ if (!path_init(path, LOOKUP_FOLLOW, &nd)) -+ return 0; -+ -+ if ((r = path_walk(path, &nd))) -+ goto bad; -+ -+ inode = nd.dentry->d_inode; -+ if (!inode) { -+ r = -ENOENT; -+ goto bad; -+ } -+ -+ if (!S_ISBLK(inode->i_mode)) { -+ r = -EINVAL; -+ goto bad; -+ } -+ -+ *dev = inode->i_rdev; -+ -+ bad: -+ path_release(&nd); -+ return r; -+} -+ -+/* -+ * See if we've already got a device in the list. -+ */ -+static struct dm_dev *find_device(struct list_head *l, kdev_t dev) -+{ -+ struct list_head *tmp; -+ -+ list_for_each(tmp, l) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ if (dd->dev == dev) -+ return dd; -+ } -+ -+ return NULL; -+} -+ -+/* -+ * Open a device so we can use it as a map destination. -+ */ -+static int open_dev(struct dm_dev *d) -+{ -+ int err; -+ -+ if (d->bd) -+ BUG(); -+ -+ if (!(d->bd = bdget(kdev_t_to_nr(d->dev)))) -+ return -ENOMEM; -+ -+ if ((err = blkdev_get(d->bd, FMODE_READ | FMODE_WRITE, 0, BDEV_FILE))) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * Close a device that we've been using. -+ */ -+static void close_dev(struct dm_dev *d) -+{ -+ if (!d->bd) -+ return; -+ -+ blkdev_put(d->bd, BDEV_FILE); -+ d->bd = NULL; -+} -+ -+/* -+ * If possible (ie. blk_size[major] is set), this -+ * checks an area of a destination device is -+ * valid. -+ */ -+static int check_device_area(kdev_t dev, offset_t start, offset_t len) -+{ -+ int *sizes; -+ offset_t dev_size; -+ -+ if (!(sizes = blk_size[MAJOR(dev)]) || !(dev_size = sizes[MINOR(dev)])) -+ /* we don't know the device details, -+ * so give the benefit of the doubt */ -+ return 1; -+ -+ /* convert to 512-byte sectors */ -+ dev_size <<= 1; -+ -+ return ((start < dev_size) && (len <= (dev_size - start))); -+} -+ -+/* -+ * Add a device to the list, or just increment the usage count -+ * if it's already present. -+ */ -+int dm_table_get_device(struct dm_table *t, const char *path, -+ offset_t start, offset_t len, struct dm_dev **result) -+{ -+ int r; -+ kdev_t dev; -+ struct dm_dev *dd; -+ int major, minor; -+ -+ if (sscanf(path, "%x:%x", &major, &minor) == 2) { -+ /* Extract the major/minor numbers */ -+ dev = MKDEV(major, minor); -+ } else { -+ /* convert the path to a device */ -+ if ((r = lookup_device(path, &dev))) -+ return r; -+ } -+ -+ dd = find_device(&t->devices, dev); -+ if (!dd) { -+ dd = kmalloc(sizeof(*dd), GFP_KERNEL); -+ if (!dd) -+ return -ENOMEM; -+ -+ dd->dev = dev; -+ dd->bd = NULL; -+ -+ if ((r = open_dev(dd))) { -+ kfree(dd); -+ return r; -+ } -+ -+ atomic_set(&dd->count, 0); -+ list_add(&dd->list, &t->devices); -+ } -+ atomic_inc(&dd->count); -+ -+ if (!check_device_area(dd->dev, start, len)) { -+ DMWARN("device %s too small for target", path); -+ dm_table_put_device(t, dd); -+ return -EINVAL; -+ } -+ -+ *result = dd; -+ -+ return 0; -+} -+ -+/* -+ * Decrement a devices use count and remove it if neccessary. -+ */ -+void dm_table_put_device(struct dm_table *t, struct dm_dev *dd) -+{ -+ if (atomic_dec_and_test(&dd->count)) { -+ close_dev(dd); -+ list_del(&dd->list); -+ kfree(dd); -+ } -+} -+ -+/* -+ * Adds a target to the map -+ */ -+int dm_table_add_target(struct dm_table *t, offset_t highs, -+ struct target_type *type, void *private) -+{ -+ int r, n; -+ -+ if ((r = check_space(t))) -+ return r; -+ -+ n = t->num_targets++; -+ t->highs[n] = highs; -+ t->targets[n].type = type; -+ t->targets[n].private = private; -+ -+ return 0; -+} -+ -+static int setup_indexes(struct dm_table *t) -+{ -+ int i, total = 0; -+ offset_t *indexes; -+ -+ /* allocate the space for *all* the indexes */ -+ for (i = t->depth - 2; i >= 0; i--) { -+ t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE); -+ total += t->counts[i]; -+ } -+ -+ indexes = (offset_t *) vmalloc((unsigned long) NODE_SIZE * total); -+ if (!indexes) -+ return -ENOMEM; -+ -+ /* set up internal nodes, bottom-up */ -+ for (i = t->depth - 2, total = 0; i >= 0; i--) { -+ t->index[i] = indexes; -+ indexes += (KEYS_PER_NODE * t->counts[i]); -+ setup_btree_index(i, t); -+ } -+ -+ return 0; -+} -+ -+/* -+ * Builds the btree to index the map -+ */ -+int dm_table_complete(struct dm_table *t) -+{ -+ int leaf_nodes, r = 0; -+ -+ /* how many indexes will the btree have ? */ -+ leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE); -+ t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); -+ -+ /* leaf layer has already been set up */ -+ t->counts[t->depth - 1] = leaf_nodes; -+ t->index[t->depth - 1] = t->highs; -+ -+ if (t->depth >= 2) -+ r = setup_indexes(t); -+ -+ return r; -+} -+ -+EXPORT_SYMBOL(dm_table_get_device); -+EXPORT_SYMBOL(dm_table_put_device); -diff -ruN linux-2.4.19-pre8/drivers/md/dm-target.c linux/drivers/md/dm-target.c ---- linux-2.4.19-pre8/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-target.c Mon May 20 14:13:44 2002 -@@ -0,0 +1,243 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+ -+#include -+ -+struct tt_internal { -+ struct target_type tt; -+ -+ struct list_head list; -+ long use; -+}; -+ -+static LIST_HEAD(_targets); -+static rwlock_t _lock = RW_LOCK_UNLOCKED; -+ -+#define DM_MOD_NAME_SIZE 32 -+ -+/* -+ * Destructively splits up the argument list to pass to ctr. -+ */ -+int split_args(int max, int *argc, char **argv, char *input) -+{ -+ char *start, *end = input, *out; -+ *argc = 0; -+ -+ while (1) { -+ start = end; -+ -+ /* Skip whitespace */ -+ while (*start && isspace(*start)) -+ start++; -+ -+ if (!*start) -+ break; /* success, we hit the end */ -+ -+ /* 'out' is used to remove any back-quotes */ -+ end = out = start; -+ while (*end) { -+ /* Everything apart from '\0' can be quoted */ -+ if (*end == '\\' && *(end + 1)) { -+ *out++ = *(end + 1); -+ end += 2; -+ continue; -+ } -+ -+ if (isspace(*end)) -+ break; /* end of token */ -+ -+ *out++ = *end++; -+ } -+ -+ /* have we already filled the array ? */ -+ if ((*argc + 1) > max) -+ return -EINVAL; -+ -+ /* we know this is whitespace */ -+ if (*end) -+ end++; -+ -+ /* terminate the string and put it in the array */ -+ *out = '\0'; -+ argv[*argc] = start; -+ (*argc)++; -+ } -+ -+ return 0; -+} -+ -+static inline struct tt_internal *__find_target_type(const char *name) -+{ -+ struct list_head *tih; -+ struct tt_internal *ti; -+ -+ list_for_each(tih, &_targets) { -+ ti = list_entry(tih, struct tt_internal, list); -+ -+ if (!strcmp(name, ti->tt.name)) -+ return ti; -+ } -+ -+ return NULL; -+} -+ -+static struct tt_internal *get_target_type(const char *name) -+{ -+ struct tt_internal *ti; -+ -+ read_lock(&_lock); -+ ti = __find_target_type(name); -+ -+ if (ti) { -+ if (ti->use == 0 && ti->tt.module) -+ __MOD_INC_USE_COUNT(ti->tt.module); -+ ti->use++; -+ } -+ read_unlock(&_lock); -+ -+ return ti; -+} -+ -+static void load_module(const char *name) -+{ -+ char module_name[DM_MOD_NAME_SIZE] = "dm-"; -+ -+ /* Length check for strcat() below */ -+ if (strlen(name) > (DM_MOD_NAME_SIZE - 4)) -+ return; -+ -+ strcat(module_name, name); -+ request_module(module_name); -+ -+ return; -+} -+ -+struct target_type *dm_get_target_type(const char *name) -+{ -+ struct tt_internal *ti = get_target_type(name); -+ -+ if (!ti) { -+ load_module(name); -+ ti = get_target_type(name); -+ } -+ -+ return ti ? &ti->tt : NULL; -+} -+ -+void dm_put_target_type(struct target_type *t) -+{ -+ struct tt_internal *ti = (struct tt_internal *) t; -+ -+ read_lock(&_lock); -+ if (--ti->use == 0 && ti->tt.module) -+ __MOD_DEC_USE_COUNT(ti->tt.module); -+ -+ if (ti->use < 0) -+ BUG(); -+ read_unlock(&_lock); -+ -+ return; -+} -+ -+static struct tt_internal *alloc_target(struct target_type *t) -+{ -+ struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL); -+ -+ if (ti) { -+ memset(ti, 0, sizeof(*ti)); -+ ti->tt = *t; -+ } -+ -+ return ti; -+} -+ -+int dm_register_target(struct target_type *t) -+{ -+ int rv = 0; -+ struct tt_internal *ti = alloc_target(t); -+ -+ if (!ti) -+ return -ENOMEM; -+ -+ write_lock(&_lock); -+ if (__find_target_type(t->name)) -+ rv = -EEXIST; -+ else -+ list_add(&ti->list, &_targets); -+ -+ write_unlock(&_lock); -+ return rv; -+} -+ -+int dm_unregister_target(struct target_type *t) -+{ -+ struct tt_internal *ti; -+ -+ write_lock(&_lock); -+ if (!(ti = __find_target_type(t->name))) { -+ write_unlock(&_lock); -+ return -EINVAL; -+ } -+ -+ if (ti->use) { -+ write_unlock(&_lock); -+ return -ETXTBSY; -+ } -+ -+ list_del(&ti->list); -+ kfree(ti); -+ -+ write_unlock(&_lock); -+ return 0; -+} -+ -+/* -+ * io-err: always fails an io, useful for bringing -+ * up LV's that have holes in them. -+ */ -+static int io_err_ctr(struct dm_table *t, offset_t b, offset_t l, -+ int argc, char **args, void **context) -+{ -+ *context = NULL; -+ return 0; -+} -+ -+static void io_err_dtr(struct dm_table *t, void *c) -+{ -+ /* empty */ -+ return; -+} -+ -+static int io_err_map(struct buffer_head *bh, int rw, void *context) -+{ -+ buffer_IO_error(bh); -+ return 0; -+} -+ -+static struct target_type error_target = { -+ name: "error", -+ ctr: io_err_ctr, -+ dtr: io_err_dtr, -+ map: io_err_map, -+ status: NULL, -+ wait: NULL, -+}; -+ -+int dm_target_init(void) -+{ -+ return dm_register_target(&error_target); -+} -+ -+void dm_target_exit(void) -+{ -+ if (dm_unregister_target(&error_target)) -+ DMWARN("error target unregistration failed"); -+} -+ -+EXPORT_SYMBOL(dm_register_target); -+EXPORT_SYMBOL(dm_unregister_target); -diff -ruN linux-2.4.19-pre8/drivers/md/dm.c linux/drivers/md/dm.c ---- linux-2.4.19-pre8/drivers/md/dm.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm.c Mon May 20 16:47:30 2002 -@@ -0,0 +1,1153 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include "dm.h" -+#include "kcopyd.h" -+ -+#include -+#include -+ -+/* we only need this for the lv_bmap struct definition, not happy */ -+#include -+ -+#define DEFAULT_READ_AHEAD 64 -+ -+static const char *_name = DM_NAME; -+static const char *_version = "0.94.11-ioctl (2002-05-20)"; -+static const char *_email = "lvm-devel@lists.sistina.com"; -+ -+static int major = 0; -+static int _major = 0; -+ -+struct io_hook { -+ struct mapped_device *md; -+ struct target *target; -+ int rw; -+ -+ void (*end_io) (struct buffer_head * bh, int uptodate); -+ void *context; -+}; -+ -+static kmem_cache_t *_io_hook_cache; -+ -+static struct mapped_device *_devs[MAX_DEVICES]; -+static struct rw_semaphore _dev_locks[MAX_DEVICES]; -+ -+/* -+ * This lock is only held by dm_create and dm_set_name to avoid -+ * race conditions where someone else may create a device with -+ * the same name. -+ */ -+static spinlock_t _create_lock = SPIN_LOCK_UNLOCKED; -+ -+/* block device arrays */ -+static int _block_size[MAX_DEVICES]; -+static int _blksize_size[MAX_DEVICES]; -+static int _hardsect_size[MAX_DEVICES]; -+ -+static devfs_handle_t _dev_dir; -+ -+static int request(request_queue_t * q, int rw, struct buffer_head *bh); -+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb); -+ -+/* -+ * Protect the mapped_devices referenced from _dev[] -+ */ -+struct mapped_device *dm_get_r(int minor) -+{ -+ struct mapped_device *md; -+ -+ if (minor >= MAX_DEVICES) -+ return NULL; -+ -+ down_read(_dev_locks + minor); -+ md = _devs[minor]; -+ if (!md) -+ up_read(_dev_locks + minor); -+ return md; -+} -+ -+struct mapped_device *dm_get_w(int minor) -+{ -+ struct mapped_device *md; -+ -+ if (minor >= MAX_DEVICES) -+ return NULL; -+ -+ down_write(_dev_locks + minor); -+ md = _devs[minor]; -+ if (!md) -+ up_write(_dev_locks + minor); -+ return md; -+} -+ -+static int namecmp(struct mapped_device *md, const char *name, int nametype) -+{ -+ switch (nametype) { -+ case DM_LOOKUP_BY_NAME: -+ return strcmp(md->name, name); -+ break; -+ -+ case DM_LOOKUP_BY_UUID: -+ if (!md->uuid) -+ return -1; /* never equal */ -+ -+ return strcmp(md->uuid, name); -+ break; -+ -+ default: -+ DMWARN("Unknown comparison type in namecmp: %d", nametype); -+ BUG(); -+ } -+ -+ return -1; -+} -+ -+/* -+ * The interface (eg, ioctl) will probably access the devices -+ * through these slow 'by name' locks, this needs improving at -+ * some point if people start playing with *large* numbers of dm -+ * devices. -+ */ -+struct mapped_device *dm_get_name_r(const char *name, int nametype) -+{ -+ int i; -+ struct mapped_device *md; -+ -+ for (i = 0; i < MAX_DEVICES; i++) { -+ md = dm_get_r(i); -+ if (md) { -+ if (!namecmp(md, name, nametype)) -+ return md; -+ -+ dm_put_r(md); -+ } -+ } -+ -+ return NULL; -+} -+ -+struct mapped_device *dm_get_name_w(const char *name, int nametype) -+{ -+ int i; -+ struct mapped_device *md; -+ -+ /* -+ * To avoid getting write locks on all the devices we try -+ * and promote a read lock to a write lock, this can -+ * fail, in which case we just start again. -+ */ -+ -+ restart: -+ for (i = 0; i < MAX_DEVICES; i++) { -+ md = dm_get_r(i); -+ if (!md) -+ continue; -+ -+ if (namecmp(md, name, nametype)) { -+ dm_put_r(md); -+ continue; -+ } -+ -+ /* found it */ -+ dm_put_r(md); -+ -+ md = dm_get_w(i); -+ if (!md) -+ goto restart; -+ -+ if (namecmp(md, name, nametype)) { -+ dm_put_w(md); -+ goto restart; -+ } -+ -+ return md; -+ } -+ -+ return NULL; -+} -+ -+void dm_put_r(struct mapped_device *md) -+{ -+ int minor = MINOR(md->dev); -+ -+ if (minor >= MAX_DEVICES) -+ return; -+ -+ up_read(_dev_locks + minor); -+} -+ -+void dm_put_w(struct mapped_device *md) -+{ -+ int minor = MINOR(md->dev); -+ -+ if (minor >= MAX_DEVICES) -+ return; -+ -+ up_write(_dev_locks + minor); -+} -+ -+/* -+ * Setup and tear down the driver -+ */ -+static __init void init_locks(void) -+{ -+ int i; -+ -+ for (i = 0; i < MAX_DEVICES; i++) -+ init_rwsem(_dev_locks + i); -+} -+ -+static __init int local_init(void) -+{ -+ int r; -+ -+ init_locks(); -+ -+ /* allocate a slab for the io-hooks */ -+ if (!_io_hook_cache && -+ !(_io_hook_cache = kmem_cache_create("dm io hooks", -+ sizeof(struct io_hook), -+ 0, 0, NULL, NULL))) -+ return -ENOMEM; -+ -+ _major = major; -+ r = devfs_register_blkdev(_major, _name, &dm_blk_dops); -+ if (r < 0) { -+ DMERR("register_blkdev failed"); -+ kmem_cache_destroy(_io_hook_cache); -+ return r; -+ } -+ -+ if (!_major) -+ _major = r; -+ -+ /* set up the arrays */ -+ read_ahead[_major] = DEFAULT_READ_AHEAD; -+ blk_size[_major] = _block_size; -+ blksize_size[_major] = _blksize_size; -+ hardsect_size[_major] = _hardsect_size; -+ -+ blk_queue_make_request(BLK_DEFAULT_QUEUE(_major), request); -+ -+ _dev_dir = devfs_mk_dir(0, DM_DIR, NULL); -+ -+ DMINFO("%s initialised: %s", _version, _email); -+ return 0; -+} -+ -+static void local_exit(void) -+{ -+ if (kmem_cache_destroy(_io_hook_cache)) -+ DMWARN("io_hooks still allocated during unregistration"); -+ _io_hook_cache = NULL; -+ -+ if (devfs_unregister_blkdev(_major, _name) < 0) -+ DMERR("devfs_unregister_blkdev failed"); -+ -+ read_ahead[_major] = 0; -+ blk_size[_major] = NULL; -+ blksize_size[_major] = NULL; -+ hardsect_size[_major] = NULL; -+ _major = 0; -+ -+ DMINFO("%s cleaned up", _version); -+} -+ -+/* -+ * We have a lot of init/exit functions, so it seems easier to -+ * store them in an array. The disposable macro 'xx' -+ * expands a prefix into a pair of function names. -+ */ -+static struct { -+ int (*init)(void); -+ void (*exit)(void); -+ -+} _inits[] = { -+#define xx(n) {n ## _init, n ## _exit}, -+ xx(local) -+ xx(dm_target) -+ xx(dm_linear) -+ xx(dm_stripe) -+ xx(dm_snapshot) -+ xx(dm_interface) -+#undef xx -+}; -+ -+static int __init dm_init(void) -+{ -+ const int count = sizeof(_inits) / sizeof(*_inits); -+ -+ int r, i; -+ -+ for (i = 0; i < count; i++) { -+ r = _inits[i].init(); -+ if (r) -+ goto bad; -+ } -+ -+ return 0; -+ -+ bad: -+ while (i--) -+ _inits[i].exit(); -+ -+ return r; -+} -+ -+static void __exit dm_exit(void) -+{ -+ int i = sizeof(_inits) / sizeof(*_inits); -+ -+ dm_destroy_all(); -+ while (i--) -+ _inits[i].exit(); -+} -+ -+/* -+ * Block device functions -+ */ -+static int dm_blk_open(struct inode *inode, struct file *file) -+{ -+ struct mapped_device *md; -+ -+ md = dm_get_w(MINOR(inode->i_rdev)); -+ if (!md) -+ return -ENXIO; -+ -+ md->use_count++; -+ dm_put_w(md); -+ -+ return 0; -+} -+ -+static int dm_blk_close(struct inode *inode, struct file *file) -+{ -+ struct mapped_device *md; -+ -+ md = dm_get_w(MINOR(inode->i_rdev)); -+ if (!md) -+ return -ENXIO; -+ -+ if (md->use_count < 1) -+ DMWARN("incorrect reference count found in mapped_device"); -+ -+ md->use_count--; -+ dm_put_w(md); -+ -+ return 0; -+} -+ -+/* In 512-byte units */ -+#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1) -+ -+static int dm_blk_ioctl(struct inode *inode, struct file *file, -+ uint command, unsigned long a) -+{ -+ int minor = MINOR(inode->i_rdev); -+ long size; -+ -+ if (minor >= MAX_DEVICES) -+ return -ENXIO; -+ -+ switch (command) { -+ case BLKSSZGET: -+ case BLKBSZGET: -+ case BLKROGET: -+ case BLKROSET: -+ case BLKRASET: -+ case BLKRAGET: -+ case BLKFLSBUF: -+#if 0 /* Future stacking block device */ -+ case BLKELVSET: -+ case BLKELVGET: -+#endif -+ return blk_ioctl(inode->i_rdev, command, a); -+ break; -+ -+ case BLKGETSIZE: -+ size = VOLUME_SIZE(minor); -+ if (copy_to_user((void *) a, &size, sizeof(long))) -+ return -EFAULT; -+ break; -+ -+ case BLKGETSIZE64: -+ size = VOLUME_SIZE(minor); -+ if (put_user((u64) size, (u64 *) a)) -+ return -EFAULT; -+ break; -+ -+ case BLKRRPART: -+ return -EINVAL; -+ -+ case LV_BMAP: -+ return dm_user_bmap(inode, (struct lv_bmap *) a); -+ -+ default: -+ DMWARN("unknown block ioctl 0x%x", command); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static inline struct io_hook *alloc_io_hook(void) -+{ -+ return kmem_cache_alloc(_io_hook_cache, GFP_NOIO); -+} -+ -+static inline void free_io_hook(struct io_hook *ih) -+{ -+ kmem_cache_free(_io_hook_cache, ih); -+} -+ -+/* -+ * FIXME: We need to decide if deferred_io's need -+ * their own slab, I say no for now since they are -+ * only used when the device is suspended. -+ */ -+static inline struct deferred_io *alloc_deferred(void) -+{ -+ return kmalloc(sizeof(struct deferred_io), GFP_NOIO); -+} -+ -+static inline void free_deferred(struct deferred_io *di) -+{ -+ kfree(di); -+} -+ -+/* -+ * Call a target's optional error function if an I/O failed. -+ */ -+static inline int call_err_fn(struct io_hook *ih, struct buffer_head *bh) -+{ -+ dm_err_fn err = ih->target->type->err; -+ -+ if (err) -+ return err(bh, ih->rw, ih->target->private); -+ -+ return 0; -+} -+ -+/* -+ * bh->b_end_io routine that decrements the pending count -+ * and then calls the original bh->b_end_io fn. -+ */ -+static void dec_pending(struct buffer_head *bh, int uptodate) -+{ -+ struct io_hook *ih = bh->b_bdev_private; -+ -+ if (!uptodate && call_err_fn(ih, bh)) -+ return; -+ -+ if (atomic_dec_and_test(&ih->md->pending)) -+ /* nudge anyone waiting on suspend queue */ -+ wake_up(&ih->md->wait); -+ -+ bh->b_end_io = ih->end_io; -+ bh->b_bdev_private = ih->context; -+ free_io_hook(ih); -+ -+ bh->b_end_io(bh, uptodate); -+} -+ -+/* -+ * Add the bh to the list of deferred io. -+ */ -+static int queue_io(struct buffer_head *bh, int rw) -+{ -+ struct deferred_io *di = alloc_deferred(); -+ struct mapped_device *md; -+ -+ if (!di) -+ return -ENOMEM; -+ -+ md = dm_get_w(MINOR(bh->b_rdev)); -+ if (!md) { -+ free_deferred(di); -+ return -ENXIO; -+ } -+ -+ if (!md->suspended) { -+ dm_put_w(md); -+ free_deferred(di); -+ return 1; -+ } -+ -+ di->bh = bh; -+ di->rw = rw; -+ di->next = md->deferred; -+ md->deferred = di; -+ -+ dm_put_w(md); -+ -+ return 0; /* deferred successfully */ -+} -+ -+/* -+ * Do the bh mapping for a given leaf -+ */ -+static inline int __map_buffer(struct mapped_device *md, -+ struct buffer_head *bh, int rw, int leaf) -+{ -+ int r; -+ dm_map_fn fn; -+ void *context; -+ struct io_hook *ih = NULL; -+ struct target *ti = md->map->targets + leaf; -+ -+ fn = ti->type->map; -+ context = ti->private; -+ -+ ih = alloc_io_hook(); -+ -+ if (!ih) -+ return -1; -+ -+ ih->md = md; -+ ih->rw = rw; -+ ih->target = ti; -+ ih->end_io = bh->b_end_io; -+ ih->context = bh->b_bdev_private; -+ -+ r = fn(bh, rw, context); -+ -+ if (r > 0) { -+ /* hook the end io request fn */ -+ atomic_inc(&md->pending); -+ bh->b_end_io = dec_pending; -+ bh->b_bdev_private = ih; -+ -+ } else if (r == 0) -+ /* we don't need to hook */ -+ free_io_hook(ih); -+ -+ else if (r < 0) { -+ free_io_hook(ih); -+ return -1; -+ } -+ -+ return r; -+} -+ -+/* -+ * Search the btree for the correct target. -+ */ -+static inline int __find_node(struct dm_table *t, struct buffer_head *bh) -+{ -+ int l, n = 0, k = 0; -+ offset_t *node; -+ -+ for (l = 0; l < t->depth; l++) { -+ n = get_child(n, k); -+ node = get_node(t, l, n); -+ -+ for (k = 0; k < KEYS_PER_NODE; k++) -+ if (node[k] >= bh->b_rsector) -+ break; -+ } -+ -+ return (KEYS_PER_NODE * n) + k; -+} -+ -+static int request(request_queue_t * q, int rw, struct buffer_head *bh) -+{ -+ struct mapped_device *md; -+ int r, minor = MINOR(bh->b_rdev); -+ -+ md = dm_get_r(minor); -+ if (!md) { -+ buffer_IO_error(bh); -+ return 0; -+ } -+ -+ /* -+ * If we're suspended we have to queue -+ * this io for later. -+ */ -+ while (md->suspended) { -+ dm_put_r(md); -+ -+ if (rw == READA) -+ goto bad_no_lock; -+ -+ r = queue_io(bh, rw); -+ -+ if (r < 0) -+ goto bad_no_lock; -+ -+ else if (r == 0) -+ return 0; /* deferred successfully */ -+ -+ /* -+ * We're in a while loop, because someone could suspend -+ * before we get to the following read lock. -+ */ -+ md = dm_get_r(minor); -+ if (!md) { -+ buffer_IO_error(bh); -+ return 0; -+ } -+ } -+ -+ if ((r = __map_buffer(md, bh, rw, __find_node(md->map, bh))) < 0) -+ goto bad; -+ -+ dm_put_r(md); -+ return r; -+ -+ bad: -+ dm_put_r(md); -+ -+ bad_no_lock: -+ buffer_IO_error(bh); -+ return 0; -+} -+ -+static int check_dev_size(int minor, unsigned long block) -+{ -+ /* FIXME: check this */ -+ unsigned long max_sector = (_block_size[minor] << 1) + 1; -+ unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9); -+ -+ return (sector > max_sector) ? 0 : 1; -+} -+ -+/* -+ * Creates a dummy buffer head and maps it (for lilo). -+ */ -+static int do_bmap(kdev_t dev, unsigned long block, -+ kdev_t * r_dev, unsigned long *r_block) -+{ -+ struct mapped_device *md; -+ struct buffer_head bh; -+ int minor = MINOR(dev), r; -+ struct target *t; -+ -+ md = dm_get_r(minor); -+ if (!md) -+ return -ENXIO; -+ -+ if (md->suspended) { -+ dm_put_r(md); -+ return -EPERM; -+ } -+ -+ if (!check_dev_size(minor, block)) { -+ dm_put_r(md); -+ return -EINVAL; -+ } -+ -+ /* setup dummy bh */ -+ memset(&bh, 0, sizeof(bh)); -+ bh.b_blocknr = block; -+ bh.b_dev = bh.b_rdev = dev; -+ bh.b_size = _blksize_size[minor]; -+ bh.b_rsector = block * (bh.b_size >> 9); -+ -+ /* find target */ -+ t = md->map->targets + __find_node(md->map, &bh); -+ -+ /* do the mapping */ -+ r = t->type->map(&bh, READ, t->private); -+ -+ *r_dev = bh.b_rdev; -+ *r_block = bh.b_rsector / (bh.b_size >> 9); -+ -+ dm_put_r(md); -+ return r; -+} -+ -+/* -+ * Marshals arguments and results between user and kernel space. -+ */ -+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb) -+{ -+ unsigned long block, r_block; -+ kdev_t r_dev; -+ int r; -+ -+ if (get_user(block, &lvb->lv_block)) -+ return -EFAULT; -+ -+ if ((r = do_bmap(inode->i_rdev, block, &r_dev, &r_block))) -+ return r; -+ -+ if (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) || -+ put_user(r_block, &lvb->lv_block)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+/* -+ * See if the device with a specific minor # is free. The write -+ * lock is held when it returns successfully. -+ */ -+static inline int specific_dev(int minor, struct mapped_device *md) -+{ -+ if (minor >= MAX_DEVICES) { -+ DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)", -+ MAX_DEVICES); -+ return -1; -+ } -+ -+ down_write(_dev_locks + minor); -+ if (_devs[minor]) { -+ /* in use */ -+ up_write(_dev_locks + minor); -+ return -1; -+ } -+ -+ return minor; -+} -+ -+/* -+ * Find the first free device. Again the write lock is held on -+ * success. -+ */ -+static int any_old_dev(struct mapped_device *md) -+{ -+ int i; -+ -+ for (i = 0; i < MAX_DEVICES; i++) -+ if (specific_dev(i, md) != -1) -+ return i; -+ -+ return -1; -+} -+ -+/* -+ * Allocate and initialise a blank device. -+ * Caller must ensure uuid is null-terminated. -+ * Device is returned with a write lock held. -+ */ -+static struct mapped_device *alloc_dev(const char *name, const char *uuid, -+ int minor) -+{ -+ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); -+ int len; -+ -+ if (!md) { -+ DMWARN("unable to allocate device, out of memory."); -+ return NULL; -+ } -+ -+ memset(md, 0, sizeof(*md)); -+ -+ /* -+ * This grabs the write lock if it succeeds. -+ */ -+ minor = (minor < 0) ? any_old_dev(md) : specific_dev(minor, md); -+ if (minor < 0) { -+ kfree(md); -+ return NULL; -+ } -+ -+ md->dev = MKDEV(_major, minor); -+ md->suspended = 0; -+ -+ strncpy(md->name, name, sizeof(md->name) - 1); -+ md->name[sizeof(md->name) - 1] = '\0'; -+ -+ /* -+ * Copy in the uuid. -+ */ -+ if (uuid && *uuid) { -+ len = strlen(uuid) + 1; -+ if (!(md->uuid = kmalloc(len, GFP_KERNEL))) { -+ DMWARN("unable to allocate uuid - out of memory."); -+ kfree(md); -+ return NULL; -+ } -+ strcpy(md->uuid, uuid); -+ } -+ -+ init_waitqueue_head(&md->wait); -+ return md; -+} -+ -+static int __register_device(struct mapped_device *md) -+{ -+ md->devfs_entry = -+ devfs_register(_dev_dir, md->name, DEVFS_FL_CURRENT_OWNER, -+ MAJOR(md->dev), MINOR(md->dev), -+ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, -+ &dm_blk_dops, NULL); -+ -+ return 0; -+} -+ -+static int __unregister_device(struct mapped_device *md) -+{ -+ devfs_unregister(md->devfs_entry); -+ return 0; -+} -+ -+/* -+ * The hardsect size for a mapped device is the smallest hardsect size -+ * from the devices it maps onto. -+ */ -+static int __find_hardsect_size(struct list_head *devices) -+{ -+ int result = INT_MAX, size; -+ struct list_head *tmp; -+ -+ list_for_each(tmp, devices) { -+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); -+ size = get_hardsect_size(dd->dev); -+ if (size < result) -+ result = size; -+ } -+ return result; -+} -+ -+/* -+ * Bind a table to the device. -+ */ -+static int __bind(struct mapped_device *md, struct dm_table *t) -+{ -+ int minor = MINOR(md->dev); -+ -+ md->map = t; -+ -+ if (!t->num_targets) { -+ _block_size[minor] = 0; -+ _blksize_size[minor] = BLOCK_SIZE; -+ _hardsect_size[minor] = 0; -+ return 0; -+ } -+ -+ /* in k */ -+ _block_size[minor] = (t->highs[t->num_targets - 1] + 1) >> 1; -+ -+ _blksize_size[minor] = BLOCK_SIZE; -+ _hardsect_size[minor] = __find_hardsect_size(&t->devices); -+ register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]); -+ -+ return 0; -+} -+ -+static void __unbind(struct mapped_device *md) -+{ -+ int minor = MINOR(md->dev); -+ -+ dm_table_destroy(md->map); -+ md->map = NULL; -+ -+ _block_size[minor] = 0; -+ _blksize_size[minor] = 0; -+ _hardsect_size[minor] = 0; -+} -+ -+static int check_name(const char *name) -+{ -+ struct mapped_device *md; -+ -+ if (strchr(name, '/') || strlen(name) > DM_NAME_LEN) { -+ DMWARN("invalid device name"); -+ return -1; -+ } -+ -+ md = dm_get_name_r(name, DM_LOOKUP_BY_NAME); -+ if (md) { -+ dm_put_r(md); -+ DMWARN("device name already in use"); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int check_uuid(const char *uuid) -+{ -+ struct mapped_device *md; -+ -+ if (uuid) { -+ md = dm_get_name_r(uuid, DM_LOOKUP_BY_UUID); -+ if (md) { -+ dm_put_r(md); -+ DMWARN("device uuid already in use"); -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ -+/* -+ * Constructor for a new device. -+ */ -+int dm_create(const char *name, const char *uuid, int minor, int ro, -+ struct dm_table *table) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ spin_lock(&_create_lock); -+ if (check_name(name) || check_uuid(uuid)) { -+ spin_unlock(&_create_lock); -+ return -EINVAL; -+ } -+ -+ md = alloc_dev(name, uuid, minor); -+ if (!md) { -+ spin_unlock(&_create_lock); -+ return -ENXIO; -+ } -+ minor = MINOR(md->dev); -+ _devs[minor] = md; -+ -+ r = __register_device(md); -+ if (r) -+ goto err; -+ -+ r = __bind(md, table); -+ if (r) -+ goto err; -+ -+ dm_set_ro(md, ro); -+ -+ spin_unlock(&_create_lock); -+ dm_put_w(md); -+ return 0; -+ -+ err: -+ _devs[minor] = NULL; -+ if (md->uuid) -+ kfree(md->uuid); -+ -+ dm_put_w(md); -+ kfree(md); -+ spin_unlock(&_create_lock); -+ return r; -+} -+ -+/* -+ * Renames the device. No lock held. -+ */ -+int dm_set_name(const char *name, int nametype, const char *newname) -+{ -+ int r; -+ struct mapped_device *md; -+ -+ spin_lock(&_create_lock); -+ if (check_name(newname) < 0) { -+ spin_unlock(&_create_lock); -+ return -EINVAL; -+ } -+ -+ md = dm_get_name_w(name, nametype); -+ if (!md) { -+ spin_unlock(&_create_lock); -+ return -ENXIO; -+ } -+ -+ r = __unregister_device(md); -+ if (r) -+ goto out; -+ -+ strcpy(md->name, newname); -+ r = __register_device(md); -+ -+ out: -+ dm_put_w(md); -+ spin_unlock(&_create_lock); -+ return r; -+} -+ -+/* -+ * Destructor for the device. You cannot destroy an open -+ * device. Write lock must be held before calling. -+ * Caller must dm_put_w(md) then kfree(md) if call was successful. -+ */ -+int dm_destroy(struct mapped_device *md) -+{ -+ int minor, r; -+ -+ if (md->use_count) -+ return -EPERM; -+ -+ r = __unregister_device(md); -+ if (r) -+ return r; -+ -+ minor = MINOR(md->dev); -+ _devs[minor] = NULL; -+ __unbind(md); -+ -+ if (md->uuid) -+ kfree(md->uuid); -+ -+ return 0; -+} -+ -+/* -+ * Destroy all devices - except open ones -+ */ -+void dm_destroy_all(void) -+{ -+ int i, some_destroyed, r; -+ struct mapped_device *md; -+ -+ do { -+ some_destroyed = 0; -+ for (i = 0; i < MAX_DEVICES; i++) { -+ md = dm_get_w(i); -+ if (!md) -+ continue; -+ -+ r = dm_destroy(md); -+ dm_put_w(md); -+ -+ if (!r) { -+ kfree(md); -+ some_destroyed = 1; -+ } -+ } -+ } while (some_destroyed); -+} -+ -+/* -+ * Sets or clears the read-only flag for the device. Write lock -+ * must be held. -+ */ -+void dm_set_ro(struct mapped_device *md, int ro) -+{ -+ md->read_only = ro; -+ set_device_ro(md->dev, ro); -+} -+ -+/* -+ * A target is notifying us of some event -+ */ -+void dm_notify(void *target) -+{ -+} -+ -+/* -+ * Requeue the deferred buffer_heads by calling generic_make_request. -+ */ -+static void flush_deferred_io(struct deferred_io *c) -+{ -+ struct deferred_io *n; -+ -+ while (c) { -+ n = c->next; -+ generic_make_request(c->rw, c->bh); -+ free_deferred(c); -+ c = n; -+ } -+} -+ -+/* -+ * Swap in a new table (destroying old one). Write lock must be -+ * held. -+ */ -+int dm_swap_table(struct mapped_device *md, struct dm_table *table) -+{ -+ int r; -+ -+ /* device must be suspended */ -+ if (!md->suspended) -+ return -EPERM; -+ -+ __unbind(md); -+ -+ r = __bind(md, table); -+ if (r) -+ return r; -+ -+ return 0; -+} -+ -+/* -+ * We need to be able to change a mapping table under a mounted -+ * filesystem. for example we might want to move some data in -+ * the background. Before the table can be swapped with -+ * dm_bind_table, dm_suspend must be called to flush any in -+ * flight buffer_heads and ensure that any further io gets -+ * deferred. Write lock must be held. -+ */ -+int dm_suspend(struct mapped_device *md) -+{ -+ int minor = MINOR(md->dev); -+ DECLARE_WAITQUEUE(wait, current); -+ -+ if (md->suspended) -+ return -EINVAL; -+ -+ md->suspended = 1; -+ dm_put_w(md); -+ -+ /* wait for all the pending io to flush */ -+ add_wait_queue(&md->wait, &wait); -+ current->state = TASK_UNINTERRUPTIBLE; -+ do { -+ md = dm_get_w(minor); -+ if (!md) { -+ /* Caller expects to free this lock. Yuck. */ -+ down_write(_dev_locks + minor); -+ return -ENXIO; -+ } -+ -+ if (!atomic_read(&md->pending)) -+ break; -+ -+ dm_put_w(md); -+ schedule(); -+ -+ } while (1); -+ -+ current->state = TASK_RUNNING; -+ remove_wait_queue(&md->wait, &wait); -+ -+ return 0; -+} -+ -+int dm_resume(struct mapped_device *md) -+{ -+ int minor = MINOR(md->dev); -+ struct deferred_io *def; -+ -+ if (!md->suspended || !md->map->num_targets) -+ return -EINVAL; -+ -+ md->suspended = 0; -+ def = md->deferred; -+ md->deferred = NULL; -+ -+ dm_put_w(md); -+ flush_deferred_io(def); -+ run_task_queue(&tq_disk); -+ -+ if (!dm_get_w(minor)) { -+ /* FIXME: yuck */ -+ down_write(_dev_locks + minor); -+ return -ENXIO; -+ } -+ -+ return 0; -+} -+ -+struct block_device_operations dm_blk_dops = { -+ open: dm_blk_open, -+ release: dm_blk_close, -+ ioctl: dm_blk_ioctl, -+ owner: THIS_MODULE -+}; -+ -+/* -+ * module hooks -+ */ -+module_init(dm_init); -+module_exit(dm_exit); -+ -+MODULE_PARM(major, "i"); -+MODULE_PARM_DESC(major, "The major number of the device mapper"); -+MODULE_DESCRIPTION(DM_NAME " driver"); -+MODULE_AUTHOR("Joe Thornber "); -+MODULE_LICENSE("GPL"); -diff -ruN linux-2.4.19-pre8/drivers/md/dm.h linux/drivers/md/dm.h ---- linux-2.4.19-pre8/drivers/md/dm.h Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm.h Mon May 20 15:02:13 2002 -@@ -0,0 +1,220 @@ -+/* -+ * Internal header file for device mapper -+ * -+ * Copyright (C) 2001 Sistina Software -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef DM_INTERNAL_H -+#define DM_INTERNAL_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DM_NAME "device-mapper" /* Name for messaging */ -+#define MAX_DEPTH 16 -+#define NODE_SIZE L1_CACHE_BYTES -+#define KEYS_PER_NODE (NODE_SIZE / sizeof(offset_t)) -+#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) -+#define MAX_ARGS 32 -+#define MAX_DEVICES 256 -+ -+/* -+ * List of devices that a metadevice uses and should open/close. -+ */ -+struct dm_dev { -+ atomic_t count; -+ struct list_head list; -+ -+ kdev_t dev; -+ struct block_device *bd; -+}; -+ -+/* -+ * I/O that had to be deferred while we were suspended -+ */ -+struct deferred_io { -+ int rw; -+ struct buffer_head *bh; -+ struct deferred_io *next; -+}; -+ -+/* -+ * Btree leaf - this does the actual mapping -+ */ -+struct target { -+ struct target_type *type; -+ void *private; -+}; -+ -+/* -+ * The btree -+ */ -+struct dm_table { -+ /* btree table */ -+ int depth; -+ int counts[MAX_DEPTH]; /* in nodes */ -+ offset_t *index[MAX_DEPTH]; -+ -+ int num_targets; -+ int num_allocated; -+ offset_t *highs; -+ struct target *targets; -+ -+ /* a list of devices used by this table */ -+ struct list_head devices; -+}; -+ -+/* -+ * The actual device struct -+ */ -+struct mapped_device { -+ kdev_t dev; -+ char name[DM_NAME_LEN]; -+ char *uuid; -+ -+ int use_count; -+ int suspended; -+ int read_only; -+ -+ /* a list of io's that arrived while we were suspended */ -+ atomic_t pending; -+ wait_queue_head_t wait; -+ struct deferred_io *deferred; -+ -+ struct dm_table *map; -+ -+ /* used by dm-fs.c */ -+ devfs_handle_t devfs_entry; -+}; -+ -+extern struct block_device_operations dm_blk_dops; -+ -+/* dm-target.c */ -+int dm_target_init(void); -+struct target_type *dm_get_target_type(const char *name); -+void dm_put_target_type(struct target_type *t); -+void dm_target_exit(void); -+ -+/* -+ * Destructively splits argument list to pass to ctr. -+ */ -+int split_args(int max, int *argc, char **argv, char *input); -+ -+/* dm.c */ -+struct mapped_device *dm_get_r(int minor); -+struct mapped_device *dm_get_w(int minor); -+ -+/* -+ * There are two ways to lookup a device. -+ */ -+enum { -+ DM_LOOKUP_BY_NAME, -+ DM_LOOKUP_BY_UUID -+}; -+ -+struct mapped_device *dm_get_name_r(const char *name, int nametype); -+struct mapped_device *dm_get_name_w(const char *name, int nametype); -+ -+void dm_put_r(struct mapped_device *md); -+void dm_put_w(struct mapped_device *md); -+ -+/* -+ * Call with no lock. -+ */ -+int dm_create(const char *name, const char *uuid, int minor, int ro, -+ struct dm_table *table); -+int dm_set_name(const char *name, int nametype, const char *newname); -+void dm_destroy_all(void); -+ -+/* -+ * You must have the write lock before calling the remaining md -+ * methods. -+ */ -+int dm_destroy(struct mapped_device *md); -+void dm_set_ro(struct mapped_device *md, int ro); -+ -+/* -+ * The device must be suspended before calling this method. -+ */ -+int dm_swap_table(struct mapped_device *md, struct dm_table *t); -+ -+/* -+ * A device can still be used while suspended, but I/O is deferred. -+ */ -+int dm_suspend(struct mapped_device *md); -+int dm_resume(struct mapped_device *md); -+ -+/* -+ * Event notification -+ */ -+void dm_notify(void *target); -+ -+/* dm-table.c */ -+int dm_table_create(struct dm_table **result); -+void dm_table_destroy(struct dm_table *t); -+ -+int dm_table_add_target(struct dm_table *t, offset_t highs, -+ struct target_type *type, void *private); -+int dm_table_complete(struct dm_table *t); -+ -+/* Snapshots */ -+int dm_snapshot_init(void); -+void dm_snapshot_exit(void); -+ -+/* dm-mirror.c */ -+int dm_mirror_init(void); -+void dm_mirror_exit(void); -+ -+#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) -+#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x) -+#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x) -+ -+/* -+ * Calculate the index of the child node of the n'th node k'th key. -+ */ -+static inline int get_child(int n, int k) -+{ -+ return (n * CHILDREN_PER_NODE) + k; -+} -+ -+/* -+ * Return the n'th node of level l from table t. -+ */ -+static inline offset_t *get_node(struct dm_table *t, int l, int n) -+{ -+ return t->index[l] + (n * KEYS_PER_NODE); -+} -+ -+/* -+ * The device-mapper can be driven through one of two interfaces; -+ * ioctl or filesystem, depending which patch you have applied. -+ */ -+int __init dm_interface_init(void); -+void dm_interface_exit(void); -+ -+/* -+ * Targets for linear and striped mappings -+ */ -+ -+int dm_linear_init(void); -+void dm_linear_exit(void); -+ -+int dm_stripe_init(void); -+void dm_stripe_exit(void); -+ -+#endif -diff -ruN linux-2.4.19-pre8/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c ---- linux-2.4.19-pre8/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/kcopyd.c Mon May 20 14:18:59 2002 -@@ -0,0 +1,770 @@ -+/* -+ * Copyright (C) 2002 Sistina Software (UK) Limited. -+ * -+ * This file is released under the GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "kcopyd.h" -+ -+/* FIXME: this is only needed for the DMERR macros */ -+#include "dm.h" -+ -+/* -+ * Hard sector size used all over the kernel. -+ */ -+#define SECTOR_SIZE 512 -+#define SECTOR_SHIFT 9 -+ -+static void wake_kcopyd(void); -+ -+/*----------------------------------------------------------------- -+ * We reserve our own pool of preallocated pages that are -+ * only used for kcopyd io. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * FIXME: This should be configurable. -+ */ -+#define NUM_PAGES 512 -+ -+static DECLARE_MUTEX(_pages_lock); -+static int _num_free_pages; -+static struct page *_pages_array[NUM_PAGES]; -+static DECLARE_MUTEX(start_lock); -+ -+static int init_pages(void) -+{ -+ int i; -+ struct page *p; -+ -+ for (i = 0; i < NUM_PAGES; i++) { -+ p = alloc_page(GFP_KERNEL); -+ if (!p) -+ goto bad; -+ -+ LockPage(p); -+ _pages_array[i] = p; -+ } -+ -+ _num_free_pages = NUM_PAGES; -+ return 0; -+ -+ bad: -+ while (i--) -+ __free_page(_pages_array[i]); -+ return -ENOMEM; -+} -+ -+static void exit_pages(void) -+{ -+ int i; -+ struct page *p; -+ -+ for (i = 0; i < NUM_PAGES; i++) { -+ p = _pages_array[i]; -+ UnlockPage(p); -+ __free_page(p); -+ } -+ -+ _num_free_pages = 0; -+} -+ -+static int kcopyd_get_pages(int num, struct page **result) -+{ -+ int i; -+ -+ down(&_pages_lock); -+ if (_num_free_pages < num) { -+ up(&_pages_lock); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < num; i++) { -+ _num_free_pages--; -+ result[i] = _pages_array[_num_free_pages]; -+ } -+ up(&_pages_lock); -+ -+ return 0; -+} -+ -+static void kcopyd_free_pages(int num, struct page **result) -+{ -+ int i; -+ -+ down(&_pages_lock); -+ for (i = 0; i < num; i++) -+ _pages_array[_num_free_pages++] = result[i]; -+ up(&_pages_lock); -+} -+ -+/*----------------------------------------------------------------- -+ * We keep our own private pool of buffer_heads. These are just -+ * held in a list on the b_reqnext field. -+ *---------------------------------------------------------------*/ -+ -+/* -+ * Make sure we have enough buffers to always keep the pages -+ * occupied. So we assume the worst case scenario where blocks -+ * are the size of a single sector. -+ */ -+#define NUM_BUFFERS NUM_PAGES * (PAGE_SIZE / SECTOR_SIZE) -+ -+static spinlock_t _buffer_lock = SPIN_LOCK_UNLOCKED; -+static struct buffer_head *_all_buffers; -+static struct buffer_head *_free_buffers; -+ -+static int init_buffers(void) -+{ -+ int i; -+ struct buffer_head *buffers; -+ size_t s = sizeof(struct buffer_head) * NUM_BUFFERS; -+ -+ /* -+ * FIXME: this should be a vmalloc. -+ */ -+ buffers = vmalloc(s); -+ if (!buffers) { -+ DMWARN("Couldn't allocate buffer heads."); -+ return -ENOMEM; -+ } -+ -+ memset(buffers, 0, s); -+ for (i = 0; i < NUM_BUFFERS; i++) { -+ if (i < NUM_BUFFERS - 1) -+ buffers[i].b_reqnext = &buffers[i + 1]; -+ init_waitqueue_head(&buffers[i].b_wait); -+ INIT_LIST_HEAD(&buffers[i].b_inode_buffers); -+ } -+ -+ _all_buffers = _free_buffers = buffers; -+ return 0; -+} -+ -+static void exit_buffers(void) -+{ -+ vfree(_all_buffers); -+} -+ -+static struct buffer_head *alloc_buffer(void) -+{ -+ struct buffer_head *r; -+ int flags; -+ -+ spin_lock_irqsave(&_buffer_lock, flags); -+ -+ if (!_free_buffers) -+ r = NULL; -+ else { -+ r = _free_buffers; -+ _free_buffers = _free_buffers->b_reqnext; -+ r->b_reqnext = NULL; -+ } -+ -+ spin_unlock_irqrestore(&_buffer_lock, flags); -+ -+ return r; -+} -+ -+/* -+ * Only called from interrupt context. -+ */ -+static void free_buffer(struct buffer_head *bh) -+{ -+ int flags; -+ -+ spin_lock_irqsave(&_buffer_lock, flags); -+ bh->b_reqnext = _free_buffers; -+ _free_buffers = bh; -+ spin_unlock_irqrestore(&_buffer_lock, flags); -+} -+ -+/*----------------------------------------------------------------- -+ * kcopyd_jobs need to be allocated by the *clients* of kcopyd, -+ * for this reason we use a mempool to prevent the client from -+ * ever having to do io (which could cause a -+ * deadlock). -+ *---------------------------------------------------------------*/ -+#define MIN_JOBS NUM_PAGES -+ -+static kmem_cache_t *_job_cache = NULL; -+static mempool_t *_job_pool = NULL; -+ -+/* -+ * We maintain three lists of jobs: -+ * -+ * i) jobs waiting for pages -+ * ii) jobs that have pages, and are waiting for the io to be issued. -+ * iii) jobs that have completed. -+ * -+ * All three of these are protected by job_lock. -+ */ -+ -+static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED; -+ -+static LIST_HEAD(_complete_jobs); -+static LIST_HEAD(_io_jobs); -+static LIST_HEAD(_pages_jobs); -+ -+static int init_jobs(void) -+{ -+ INIT_LIST_HEAD(&_complete_jobs); -+ INIT_LIST_HEAD(&_io_jobs); -+ INIT_LIST_HEAD(&_pages_jobs); -+ -+ _job_cache = kmem_cache_create("kcopyd-jobs", sizeof(struct kcopyd_job), -+ __alignof__(struct kcopyd_job), -+ 0, NULL, NULL); -+ if (!_job_cache) -+ return -ENOMEM; -+ -+ _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab, -+ mempool_free_slab, _job_cache); -+ if (!_job_pool) { -+ kmem_cache_destroy(_job_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static void exit_jobs(void) -+{ -+ mempool_destroy(_job_pool); -+ kmem_cache_destroy(_job_cache); -+} -+ -+struct kcopyd_job *kcopyd_alloc_job(void) -+{ -+ struct kcopyd_job *job; -+ -+ job = mempool_alloc(_job_pool, GFP_KERNEL); -+ if (!job) -+ return NULL; -+ -+ memset(job, 0, sizeof(*job)); -+ return job; -+} -+ -+void kcopyd_free_job(struct kcopyd_job *job) -+{ -+ mempool_free(job, _job_pool); -+} -+ -+/* -+ * Functions to push and pop a job onto the head of a given job -+ * list. -+ */ -+static inline struct kcopyd_job *pop(struct list_head *jobs) -+{ -+ struct kcopyd_job *job = NULL; -+ int flags; -+ -+ spin_lock_irqsave(&_job_lock, flags); -+ -+ if (!list_empty(jobs)) { -+ job = list_entry(jobs->next, struct kcopyd_job, list); -+ list_del(&job->list); -+ } -+ spin_unlock_irqrestore(&_job_lock, flags); -+ -+ return job; -+} -+ -+static inline void push(struct list_head *jobs, struct kcopyd_job *job) -+{ -+ int flags; -+ -+ spin_lock_irqsave(&_job_lock, flags); -+ list_add(&job->list, jobs); -+ spin_unlock_irqrestore(&_job_lock, flags); -+} -+ -+/* -+ * Completion function for one of our buffers. -+ */ -+static void end_bh(struct buffer_head *bh, int uptodate) -+{ -+ struct kcopyd_job *job = bh->b_private; -+ -+ mark_buffer_uptodate(bh, uptodate); -+ unlock_buffer(bh); -+ -+ if (!uptodate) -+ job->err = -EIO; -+ -+ /* are we the last ? */ -+ if (atomic_dec_and_test(&job->nr_incomplete)) { -+ push(&_complete_jobs, job); -+ wake_kcopyd(); -+ } -+ -+ free_buffer(bh); -+} -+ -+static void dispatch_bh(struct kcopyd_job *job, -+ struct buffer_head *bh, int block) -+{ -+ int p; -+ -+ /* -+ * Add in the job offset -+ */ -+ block += job->offset >> job->block_shift; -+ bh->b_blocknr = (job->disk.sector >> job->block_shift) + block; -+ -+ p = block >> job->bpp_shift; -+ block &= job->bpp_mask; -+ -+ bh->b_dev = B_FREE; -+ bh->b_size = job->block_size; -+ set_bh_page(bh, job->pages[p], -+ (block << job->block_shift) << SECTOR_SHIFT); -+ bh->b_this_page = bh; -+ -+ init_buffer(bh, end_bh, job); -+ -+ bh->b_dev = job->disk.dev; -+ bh->b_state = ((1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req)); -+ -+ set_bit(BH_Uptodate, &bh->b_state); -+ if (job->rw == WRITE) -+ clear_bit(BH_Dirty, &bh->b_state); -+ -+ submit_bh(job->rw, bh); -+} -+ -+/* -+ * These three functions process 1 item from the corresponding -+ * job list. -+ * -+ * They return: -+ * < 0: error -+ * 0: success -+ * > 0: can't process yet. -+ */ -+static int run_complete_job(struct kcopyd_job *job) -+{ -+ job->callback(job); -+ return 0; -+} -+ -+/* -+ * Request io on as many buffer heads as we can currently get for -+ * a particular job. -+ */ -+static int run_io_job(struct kcopyd_job *job) -+{ -+ unsigned int block; -+ struct buffer_head *bh; -+ -+ for (block = atomic_read(&job->nr_requested); -+ block < job->nr_blocks; block++) { -+ bh = alloc_buffer(); -+ if (!bh) -+ break; -+ -+ atomic_inc(&job->nr_requested); -+ dispatch_bh(job, bh, block); -+ } -+ -+ return (block == job->nr_blocks) ? 0 : 1; -+} -+ -+static int run_pages_job(struct kcopyd_job *job) -+{ -+ int r; -+ -+ job->nr_pages = (job->disk.count + job->offset) / -+ (PAGE_SIZE / SECTOR_SIZE); -+ r = kcopyd_get_pages(job->nr_pages, job->pages); -+ if (!r) { -+ /* this job is ready for io */ -+ push(&_io_jobs, job); -+ return 0; -+ } -+ -+ if (r == -ENOMEM) -+ /* can complete now */ -+ return 1; -+ -+ return r; -+} -+ -+/* -+ * Run through a list for as long as possible. Returns the count -+ * of successful jobs. -+ */ -+static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) -+{ -+ struct kcopyd_job *job; -+ int r, count = 0; -+ -+ while ((job = pop(jobs))) { -+ -+ r = fn(job); -+ -+ if (r < 0) { -+ /* error this rogue job */ -+ job->err = r; -+ push(&_complete_jobs, job); -+ break; -+ } -+ -+ if (r > 0) { -+ /* -+ * We couldn't service this job ATM, so -+ * push this job back onto the list. -+ */ -+ push(jobs, job); -+ break; -+ } -+ -+ count++; -+ } -+ -+ return count; -+} -+ -+/* -+ * kcopyd does this every time it's woken up. -+ */ -+static void do_work(void) -+{ -+ int count; -+ -+ /* -+ * We loop round until there is no more work to do. -+ */ -+ do { -+ count = process_jobs(&_complete_jobs, run_complete_job); -+ count += process_jobs(&_io_jobs, run_io_job); -+ count += process_jobs(&_pages_jobs, run_pages_job); -+ -+ } while (count); -+ -+ run_task_queue(&tq_disk); -+} -+ -+/*----------------------------------------------------------------- -+ * The daemon -+ *---------------------------------------------------------------*/ -+static atomic_t _kcopyd_must_die; -+static DECLARE_MUTEX(_run_lock); -+static DECLARE_WAIT_QUEUE_HEAD(_job_queue); -+ -+static int kcopyd(void *arg) -+{ -+ DECLARE_WAITQUEUE(wq, current); -+ -+ daemonize(); -+ strcpy(current->comm, "kcopyd"); -+ atomic_set(&_kcopyd_must_die, 0); -+ -+ add_wait_queue(&_job_queue, &wq); -+ -+ down(&_run_lock); -+ up(&start_lock); -+ -+ while (1) { -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ if (atomic_read(&_kcopyd_must_die)) -+ break; -+ -+ do_work(); -+ schedule(); -+ } -+ -+ set_current_state(TASK_RUNNING); -+ remove_wait_queue(&_job_queue, &wq); -+ -+ up(&_run_lock); -+ -+ return 0; -+} -+ -+static int start_daemon(void) -+{ -+ static pid_t pid = 0; -+ -+ down(&start_lock); -+ -+ pid = kernel_thread(kcopyd, NULL, 0); -+ if (pid <= 0) { -+ DMERR("Failed to start kcopyd thread"); -+ return -EAGAIN; -+ } -+ -+ /* -+ * wait for the daemon to up this mutex. -+ */ -+ down(&start_lock); -+ up(&start_lock); -+ -+ return 0; -+} -+ -+static int stop_daemon(void) -+{ -+ atomic_set(&_kcopyd_must_die, 1); -+ wake_kcopyd(); -+ down(&_run_lock); -+ up(&_run_lock); -+ -+ return 0; -+} -+ -+static void wake_kcopyd(void) -+{ -+ wake_up_interruptible(&_job_queue); -+} -+ -+static int calc_shift(unsigned int n) -+{ -+ int s; -+ -+ for (s = 0; n; s++, n >>= 1) -+ ; -+ -+ return --s; -+} -+ -+static void calc_block_sizes(struct kcopyd_job *job) -+{ -+ job->block_size = get_hardsect_size(job->disk.dev); -+ job->block_shift = calc_shift(job->block_size / SECTOR_SIZE); -+ job->bpp_shift = PAGE_SHIFT - job->block_shift - SECTOR_SHIFT; -+ job->bpp_mask = (1 << job->bpp_shift) - 1; -+ job->nr_blocks = job->disk.count >> job->block_shift; -+ atomic_set(&job->nr_requested, 0); -+ atomic_set(&job->nr_incomplete, job->nr_blocks); -+} -+ -+int kcopyd_io(struct kcopyd_job *job) -+{ -+ calc_block_sizes(job); -+ push(job->pages[0] ? &_io_jobs : &_pages_jobs, job); -+ wake_kcopyd(); -+ return 0; -+} -+ -+/*----------------------------------------------------------------- -+ * The copier is implemented on top of the simpler async io -+ * daemon above. -+ *---------------------------------------------------------------*/ -+struct copy_info { -+ kcopyd_notify_fn notify; -+ void *notify_context; -+ -+ struct kcopyd_region to; -+}; -+ -+#define MIN_INFOS 128 -+static kmem_cache_t *_copy_cache = NULL; -+static mempool_t *_copy_pool = NULL; -+ -+static int init_copier(void) -+{ -+ _copy_cache = kmem_cache_create("kcopyd-info", -+ sizeof(struct copy_info), -+ __alignof__(struct copy_info), -+ 0, NULL, NULL); -+ if (!_copy_cache) -+ return -ENOMEM; -+ -+ _copy_pool = mempool_create(MIN_INFOS, mempool_alloc_slab, -+ mempool_free_slab, _copy_cache); -+ if (!_copy_pool) { -+ kmem_cache_destroy(_copy_cache); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+static void exit_copier(void) -+{ -+ if (_copy_pool) -+ mempool_destroy(_copy_pool); -+ -+ if (_copy_cache) -+ kmem_cache_destroy(_copy_cache); -+} -+ -+static inline struct copy_info *alloc_copy_info(void) -+{ -+ return mempool_alloc(_copy_pool, GFP_KERNEL); -+} -+ -+static inline void free_copy_info(struct copy_info *info) -+{ -+ mempool_free(info, _copy_pool); -+} -+ -+void copy_complete(struct kcopyd_job *job) -+{ -+ struct copy_info *info = (struct copy_info *) job->context; -+ -+ if (info->notify) -+ info->notify(job->err, info->notify_context); -+ -+ free_copy_info(info); -+ kcopyd_free_pages(job->nr_pages, job->pages); -+ kcopyd_free_job(job); -+} -+ -+/* -+ * These callback functions implement the state machine that copies regions. -+ */ -+void copy_write(struct kcopyd_job *job) -+{ -+ struct copy_info *info = (struct copy_info *) job->context; -+ -+ if (job->err && info->notify) { -+ info->notify(job->err, job->context); -+ kcopyd_free_job(job); -+ free_copy_info(info); -+ return; -+ } -+ -+ job->rw = WRITE; -+ memcpy(&job->disk, &info->to, sizeof(job->disk)); -+ job->callback = copy_complete; -+ job->context = info; -+ -+ /* -+ * Queue the write. -+ */ -+ kcopyd_io(job); -+} -+ -+int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to, -+ kcopyd_notify_fn fn, void *context) -+{ -+ struct copy_info *info; -+ struct kcopyd_job *job; -+ -+ /* -+ * Allocate a new copy_info. -+ */ -+ info = alloc_copy_info(); -+ if (!info) -+ return -ENOMEM; -+ -+ job = kcopyd_alloc_job(); -+ if (!job) { -+ free_copy_info(info); -+ return -ENOMEM; -+ } -+ -+ /* -+ * set up for the read. -+ */ -+ info->notify = fn; -+ info->notify_context = context; -+ memcpy(&info->to, to, sizeof(*to)); -+ -+ job->rw = READ; -+ memcpy(&job->disk, from, sizeof(*from)); -+ -+ job->offset = 0; -+ calc_block_sizes(job); -+ job->callback = copy_write; -+ job->context = info; -+ -+ /* -+ * Trigger job. -+ */ -+ kcopyd_io(job); -+ return 0; -+} -+ -+/*----------------------------------------------------------------- -+ * Unit setup -+ *---------------------------------------------------------------*/ -+static struct { -+ int (*init) (void); -+ void (*exit) (void); -+ -+} _inits[] = { -+#define xx(n) { init_ ## n, exit_ ## n} -+ xx(pages), -+ xx(buffers), -+ xx(jobs), -+ xx(copier) -+#undef xx -+}; -+ -+static int _client_count = 0; -+static DECLARE_MUTEX(_client_count_sem); -+ -+static int kcopyd_init(void) -+{ -+ const int count = sizeof(_inits) / sizeof(*_inits); -+ -+ int r, i; -+ -+ for (i = 0; i < count; i++) { -+ r = _inits[i].init(); -+ if (r) -+ goto bad; -+ } -+ -+ start_daemon(); -+ return 0; -+ -+ bad: -+ while (i--) -+ _inits[i].exit(); -+ -+ return r; -+} -+ -+static void kcopyd_exit(void) -+{ -+ int i = sizeof(_inits) / sizeof(*_inits); -+ -+ if (stop_daemon()) -+ DMWARN("Couldn't stop kcopyd."); -+ -+ while (i--) -+ _inits[i].exit(); -+} -+ -+void kcopyd_inc_client_count(void) -+{ -+ /* -+ * What I need here is an atomic_test_and_inc that returns -+ * the previous value of the atomic... In its absence I lock -+ * an int with a semaphore. :-( -+ */ -+ down(&_client_count_sem); -+ if (_client_count == 0) -+ kcopyd_init(); -+ _client_count++; -+ -+ up(&_client_count_sem); -+} -+ -+void kcopyd_dec_client_count(void) -+{ -+ down(&_client_count_sem); -+ if (--_client_count == 0) -+ kcopyd_exit(); -+ -+ up(&_client_count_sem); -+} -diff -ruN linux-2.4.19-pre8/drivers/md/kcopyd.h linux/drivers/md/kcopyd.h ---- linux-2.4.19-pre8/drivers/md/kcopyd.h Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/kcopyd.h Mon May 20 14:17:36 2002 -@@ -0,0 +1,97 @@ -+/* -+ * Copyright (C) 2001 Sistina Software -+ * -+ * This file is released under the GPL. -+ */ -+ -+#ifndef DM_KCOPYD_H -+#define DM_KCOPYD_H -+ -+/* -+ * Needed for the definition of offset_t. -+ */ -+#include -+#include -+ -+struct kcopyd_region { -+ kdev_t dev; -+ offset_t sector; -+ offset_t count; -+}; -+ -+#define MAX_KCOPYD_PAGES 128 -+ -+struct kcopyd_job { -+ struct list_head list; -+ -+ /* -+ * Error state of the job. -+ */ -+ int err; -+ -+ /* -+ * Either READ or WRITE -+ */ -+ int rw; -+ -+ /* -+ * The source or destination for the transfer. -+ */ -+ struct kcopyd_region disk; -+ -+ int nr_pages; -+ struct page *pages[MAX_KCOPYD_PAGES]; -+ -+ /* -+ * Shifts and masks that will be useful when dispatching -+ * each buffer_head. -+ */ -+ offset_t offset; -+ offset_t block_size; -+ offset_t block_shift; -+ offset_t bpp_shift; /* blocks per page */ -+ offset_t bpp_mask; -+ -+ /* -+ * nr_blocks is how many buffer heads will have to be -+ * displatched to service this job, nr_requested is how -+ * many have been dispatched and nr_complete is how many -+ * have come back. -+ */ -+ unsigned int nr_blocks; -+ atomic_t nr_requested; -+ atomic_t nr_incomplete; -+ -+ /* -+ * Set this to ensure you are notified when the job has -+ * completed. 'context' is for callback to use. -+ */ -+ void (*callback)(struct kcopyd_job *job); -+ void *context; -+}; -+ -+/* -+ * Low level async io routines. -+ */ -+struct kcopyd_job *kcopyd_alloc_job(void); -+void kcopyd_free_job(struct kcopyd_job *job); -+ -+int kcopyd_queue_job(struct kcopyd_job *job); -+ -+/* -+ * Submit a copy job to kcopyd. This is built on top of the -+ * previous three fns. -+ */ -+typedef void (*kcopyd_notify_fn)(int err, void *context); -+ -+int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to, -+ kcopyd_notify_fn fn, void *context); -+ -+/* -+ * We only want kcopyd to reserve resources if someone is -+ * actually using it. -+ */ -+void kcopyd_inc_client_count(void); -+void kcopyd_dec_client_count(void); -+ -+#endif -diff -ruN linux-2.4.19-pre8/include/linux/device-mapper.h linux/include/linux/device-mapper.h ---- linux-2.4.19-pre8/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970 -+++ linux/include/linux/device-mapper.h Mon May 20 15:00:56 2002 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef _LINUX_DEVICE_MAPPER_H -+#define _LINUX_DEVICE_MAPPER_H -+ -+#define DM_DIR "device-mapper" /* Slashes not supported */ -+#define DM_MAX_TYPE_NAME 16 -+#define DM_NAME_LEN 128 -+#define DM_UUID_LEN 129 -+ -+#ifdef __KERNEL__ -+ -+struct dm_table; -+struct dm_dev; -+typedef unsigned long offset_t; -+ -+typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; -+ -+/* -+ * Prototypes for functions for a target -+ */ -+typedef int (*dm_ctr_fn) (struct dm_table *t, offset_t b, offset_t l, -+ int argc, char **argv, void **context); -+typedef void (*dm_dtr_fn) (struct dm_table *t, void *c); -+typedef int (*dm_map_fn) (struct buffer_head *bh, int rw, void *context); -+typedef int (*dm_err_fn) (struct buffer_head *bh, int rw, void *context); -+typedef int (*dm_status_fn) (status_type_t status_type, char *result, -+ int maxlen, void *context); -+typedef int (*dm_wait_fn) (void *context, wait_queue_t *wq, int add); -+ -+void dm_error(const char *message); -+ -+/* -+ * Constructors should call these functions to ensure destination devices -+ * are opened/closed correctly -+ */ -+int dm_table_get_device(struct dm_table *t, const char *path, -+ offset_t start, offset_t len, struct dm_dev **result); -+void dm_table_put_device(struct dm_table *table, struct dm_dev *d); -+ -+/* -+ * Information about a target type -+ */ -+struct target_type { -+ const char *name; -+ struct module *module; -+ dm_ctr_fn ctr; -+ dm_dtr_fn dtr; -+ dm_map_fn map; -+ dm_err_fn err; -+ dm_status_fn status; -+ dm_wait_fn wait; -+}; -+ -+int dm_register_target(struct target_type *t); -+int dm_unregister_target(struct target_type *t); -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _LINUX_DEVICE_MAPPER_H */ -diff -ruN linux-2.4.19-pre8/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h ---- linux-2.4.19-pre8/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970 -+++ linux/include/linux/dm-ioctl.h Mon May 20 16:47:30 2002 -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (C) 2001 Sistina Software (UK) Limited. -+ * -+ * This file is released under the LGPL. -+ */ -+ -+#ifndef _LINUX_DM_IOCTL_H -+#define _LINUX_DM_IOCTL_H -+ -+#include "device-mapper.h" -+ -+/* -+ * Implements a traditional ioctl interface to the device mapper. -+ */ -+ -+/* -+ * All ioctl arguments consist of a single chunk of memory, with -+ * this structure at the start. If a uuid is specified any -+ * lookup (eg. for a DM_INFO) will be done on that, *not* the -+ * name. -+ */ -+struct dm_ioctl { -+ char version[16]; -+ -+ unsigned long data_size; /* total size of data passed in -+ * including this struct */ -+ -+ unsigned long data_start; /* offset to start of data -+ * relative to start of this struct */ -+ -+ char name[DM_NAME_LEN]; /* device name */ -+ -+ unsigned int target_count; /* in/out */ -+ unsigned int open_count; /* out */ -+ unsigned int flags; /* in/out */ -+ -+ __kernel_dev_t dev; /* in/out */ -+ -+ char uuid[DM_UUID_LEN]; /* unique identifier for -+ * the block device */ -+}; -+ -+/* -+ * Used to specify tables. These structures appear after the -+ * dm_ioctl. -+ */ -+struct dm_target_spec { -+ int32_t status; /* used when reading from kernel only */ -+ unsigned long long sector_start; -+ unsigned long long length; -+ -+ char target_type[DM_MAX_TYPE_NAME]; -+ -+ unsigned long next; /* offset in bytes to next target_spec */ -+ -+ /* -+ * Parameter string starts immediately after this object. -+ * Be careful to add padding after string to ensure correct -+ * alignment of subsequent dm_target_spec. -+ */ -+}; -+ -+/* -+ * Used to retrieve the target dependencies. -+ */ -+struct dm_target_deps { -+ unsigned int count; -+ -+ __kernel_dev_t dev[0]; /* out */ -+}; -+ -+#define DM_IOCTL 0xfd -+ -+enum { -+ DM_CREATE_CMD = 0, -+ DM_REMOVE_CMD, -+ DM_SUSPEND_CMD, -+ DM_RELOAD_CMD, -+ DM_INFO_CMD, -+ DM_RENAME_CMD, -+ DM_VERSION_CMD, -+ DM_DEPS_CMD, -+ DM_REMOVE_ALL_CMD, -+ DM_GET_STATUS_CMD, -+ DM_WAIT_EVENT_CMD -+}; -+ -+#define DM_CREATE _IOWR(DM_IOCTL, DM_CREATE_CMD, struct dm_ioctl) -+#define DM_REMOVE _IOW(DM_IOCTL, DM_REMOVE_CMD, struct dm_ioctl) -+#define DM_SUSPEND _IOW(DM_IOCTL, DM_SUSPEND_CMD, struct dm_ioctl) -+#define DM_RELOAD _IOW(DM_IOCTL, DM_RELOAD_CMD, struct dm_ioctl) -+#define DM_INFO _IOWR(DM_IOCTL, DM_INFO_CMD, struct dm_ioctl) -+#define DM_RENAME _IOW(DM_IOCTL, DM_RENAME_CMD, struct dm_ioctl) -+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) -+#define DM_DEPS _IOWR(DM_IOCTL, DM_DEPS_CMD, struct dm_ioctl) -+#define DM_REMOVE_ALL _IOR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) -+#define DM_GET_STATUS _IOWR(DM_IOCTL, DM_GET_STATUS_CMD, struct dm_ioctl) -+#define DM_WAIT_EVENT _IOR(DM_IOCTL, DM_WAIT_EVENT_CMD, struct dm_ioctl) -+ -+#define DM_IOCTL_VERSION "0.94" -+#define DM_DRIVER_VERSION "0.94.11-ioctl (2002-05-20)" -+ -+/* Status bits */ -+#define DM_READONLY_FLAG 0x00000001 -+#define DM_SUSPEND_FLAG 0x00000002 -+#define DM_EXISTS_FLAG 0x00000004 -+#define DM_PERSISTENT_DEV_FLAG 0x00000008 -+ -+/* Flag passed into ioctl STATUS command to get table information -+ rather than current status */ -+#define DM_STATUS_TABLE_FLAG 0x00000010 -+ -+#endif /* _LINUX_DM_IOCTL_H */ -diff -ruN linux-2.4.19-pre8/include/linux/fs.h linux/include/linux/fs.h ---- linux-2.4.19-pre8/include/linux/fs.h Mon May 20 16:37:59 2002 -+++ linux/include/linux/fs.h Mon May 20 17:03:43 2002 -@@ -258,7 +258,10 @@ - char * b_data; /* pointer to data block */ - struct page *b_page; /* the page this bh is mapped to */ - void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ -- void *b_private; /* reserved for b_end_io */ -+ void *b_private; /* reserved for b_end_io, also used by ext3 */ -+ void *b_bdev_private; /* a hack to get around ext3 using b_private -+ * after handing the buffer_head to the -+ * block layer */ - - unsigned long b_rsector; /* Real buffer location on disk */ - wait_queue_head_t b_wait; -diff -ruN linux-2.4.19-pre8/include/linux/mempool.h linux/include/linux/mempool.h ---- linux-2.4.19-pre8/include/linux/mempool.h Thu Jan 1 01:00:00 1970 -+++ linux/include/linux/mempool.h Mon May 20 16:58:40 2002 -@@ -0,0 +1,41 @@ -+/* -+ * memory buffer pool support -+ */ -+#ifndef _LINUX_MEMPOOL_H -+#define _LINUX_MEMPOOL_H -+ -+#include -+#include -+ -+struct mempool_s; -+typedef struct mempool_s mempool_t; -+ -+typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data); -+typedef void (mempool_free_t)(void *element, void *pool_data); -+ -+struct mempool_s { -+ spinlock_t lock; -+ int min_nr, curr_nr; -+ struct list_head elements; -+ -+ void *pool_data; -+ mempool_alloc_t *alloc; -+ mempool_free_t *free; -+ wait_queue_head_t wait; -+}; -+extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, -+ mempool_free_t *free_fn, void *pool_data); -+extern void mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask); -+extern void mempool_destroy(mempool_t *pool); -+extern void * mempool_alloc(mempool_t *pool, int gfp_mask); -+extern void mempool_free(void *element, mempool_t *pool); -+ -+ -+/* -+ * A mempool_alloc_t and mempool_free_t that get the memory from -+ * a slab that is passed in through pool_data. -+ */ -+void *mempool_alloc_slab(int gfp_mask, void *pool_data); -+void mempool_free_slab(void *element, void *pool_data); -+ -+#endif /* _LINUX_MEMPOOL_H */ -diff -ruN linux-2.4.19-pre8/mm/Makefile linux/mm/Makefile ---- linux-2.4.19-pre8/mm/Makefile Mon May 20 16:38:02 2002 -+++ linux/mm/Makefile Mon May 20 17:00:27 2002 -@@ -9,12 +9,12 @@ - - O_TARGET := mm.o - --export-objs := shmem.o filemap.o memory.o page_alloc.o -+export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o - - obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ - page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \ -- shmem.o -+ shmem.o mempool.o - - obj-$(CONFIG_HIGHMEM) += highmem.o - -diff -ruN linux-2.4.19-pre8/mm/mempool.c linux/mm/mempool.c ---- linux-2.4.19-pre8/mm/mempool.c Thu Jan 1 01:00:00 1970 -+++ linux/mm/mempool.c Mon May 20 16:58:40 2002 -@@ -0,0 +1,295 @@ -+/* -+ * linux/mm/mempool.c -+ * -+ * memory buffer pool support. Such pools are mostly used -+ * for guaranteed, deadlock-free memory allocations during -+ * extreme VM load. -+ * -+ * started by Ingo Molnar, Copyright (C) 2001 -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+/** -+ * mempool_create - create a memory pool -+ * @min_nr: the minimum number of elements guaranteed to be -+ * allocated for this pool. -+ * @alloc_fn: user-defined element-allocation function. -+ * @free_fn: user-defined element-freeing function. -+ * @pool_data: optional private data available to the user-defined functions. -+ * -+ * this function creates and allocates a guaranteed size, preallocated -+ * memory pool. The pool can be used from the mempool_alloc and mempool_free -+ * functions. This function might sleep. Both the alloc_fn() and the free_fn() -+ * functions might sleep - as long as the mempool_alloc function is not called -+ * from IRQ contexts. The element allocated by alloc_fn() must be able to -+ * hold a struct list_head. (8 bytes on x86.) -+ */ -+mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, -+ mempool_free_t *free_fn, void *pool_data) -+{ -+ mempool_t *pool; -+ int i; -+ -+ pool = kmalloc(sizeof(*pool), GFP_KERNEL); -+ if (!pool) -+ return NULL; -+ memset(pool, 0, sizeof(*pool)); -+ -+ spin_lock_init(&pool->lock); -+ pool->min_nr = min_nr; -+ pool->pool_data = pool_data; -+ INIT_LIST_HEAD(&pool->elements); -+ init_waitqueue_head(&pool->wait); -+ pool->alloc = alloc_fn; -+ pool->free = free_fn; -+ -+ /* -+ * First pre-allocate the guaranteed number of buffers. -+ */ -+ for (i = 0; i < min_nr; i++) { -+ void *element; -+ struct list_head *tmp; -+ element = pool->alloc(GFP_KERNEL, pool->pool_data); -+ -+ if (unlikely(!element)) { -+ /* -+ * Not enough memory - free the allocated ones -+ * and return: -+ */ -+ list_for_each(tmp, &pool->elements) { -+ element = tmp; -+ pool->free(element, pool->pool_data); -+ } -+ kfree(pool); -+ -+ return NULL; -+ } -+ tmp = element; -+ list_add(tmp, &pool->elements); -+ pool->curr_nr++; -+ } -+ return pool; -+} -+ -+/** -+ * mempool_resize - resize an existing memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * @new_min_nr: the new minimum number of elements guaranteed to be -+ * allocated for this pool. -+ * @gfp_mask: the usual allocation bitmask. -+ * -+ * This function shrinks/grows the pool. In the case of growing, -+ * it cannot be guaranteed that the pool will be grown to the new -+ * size immediately, but new mempool_free() calls will refill it. -+ * -+ * Note, the caller must guarantee that no mempool_destroy is called -+ * while this function is running. mempool_alloc() & mempool_free() -+ * might be called (eg. from IRQ contexts) while this function executes. -+ */ -+void mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask) -+{ -+ int delta; -+ void *element; -+ unsigned long flags; -+ struct list_head *tmp; -+ -+ if (new_min_nr <= 0) -+ BUG(); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ if (new_min_nr < pool->min_nr) { -+ pool->min_nr = new_min_nr; -+ /* -+ * Free possible excess elements. -+ */ -+ while (pool->curr_nr > pool->min_nr) { -+ tmp = pool->elements.next; -+ if (tmp == &pool->elements) -+ BUG(); -+ list_del(tmp); -+ element = tmp; -+ pool->curr_nr--; -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ pool->free(element, pool->pool_data); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ return; -+ } -+ delta = new_min_nr - pool->min_nr; -+ pool->min_nr = new_min_nr; -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ /* -+ * We refill the pool up to the new treshold - but we dont -+ * (cannot) guarantee that the refill succeeds. -+ */ -+ while (delta) { -+ element = pool->alloc(gfp_mask, pool->pool_data); -+ if (!element) -+ break; -+ mempool_free(element, pool); -+ delta--; -+ } -+} -+ -+/** -+ * mempool_destroy - deallocate a memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * -+ * this function only sleeps if the free_fn() function sleeps. The caller -+ * has to guarantee that no mempool_alloc() nor mempool_free() happens in -+ * this pool when calling this function. -+ */ -+void mempool_destroy(mempool_t *pool) -+{ -+ void *element; -+ struct list_head *head, *tmp; -+ -+ if (!pool) -+ return; -+ -+ head = &pool->elements; -+ for (tmp = head->next; tmp != head; ) { -+ element = tmp; -+ tmp = tmp->next; -+ pool->free(element, pool->pool_data); -+ pool->curr_nr--; -+ } -+ if (pool->curr_nr) -+ BUG(); -+ kfree(pool); -+} -+ -+/** -+ * mempool_alloc - allocate an element from a specific memory pool -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * @gfp_mask: the usual allocation bitmask. -+ * -+ * this function only sleeps if the alloc_fn function sleeps or -+ * returns NULL. Note that due to preallocation, this function -+ * *never* fails when called from process contexts. (it might -+ * fail if called from an IRQ context.) -+ */ -+void * mempool_alloc(mempool_t *pool, int gfp_mask) -+{ -+ void *element; -+ unsigned long flags; -+ struct list_head *tmp; -+ int curr_nr; -+ DECLARE_WAITQUEUE(wait, current); -+ int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); -+ -+repeat_alloc: -+ element = pool->alloc(gfp_nowait, pool->pool_data); -+ if (likely(element != NULL)) -+ return element; -+ -+ /* -+ * If the pool is less than 50% full then try harder -+ * to allocate an element: -+ */ -+ if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) { -+ element = pool->alloc(gfp_mask, pool->pool_data); -+ if (likely(element != NULL)) -+ return element; -+ } -+ -+ /* -+ * Kick the VM at this point. -+ */ -+ wakeup_bdflush(); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ if (likely(pool->curr_nr)) { -+ tmp = pool->elements.next; -+ list_del(tmp); -+ element = tmp; -+ pool->curr_nr--; -+ spin_unlock_irqrestore(&pool->lock, flags); -+ return element; -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ /* We must not sleep in the GFP_ATOMIC case */ -+ if (gfp_mask == gfp_nowait) -+ return NULL; -+ -+ run_task_queue(&tq_disk); -+ -+ add_wait_queue_exclusive(&pool->wait, &wait); -+ set_task_state(current, TASK_UNINTERRUPTIBLE); -+ -+ spin_lock_irqsave(&pool->lock, flags); -+ curr_nr = pool->curr_nr; -+ spin_unlock_irqrestore(&pool->lock, flags); -+ -+ if (!curr_nr) -+ schedule(); -+ -+ current->state = TASK_RUNNING; -+ remove_wait_queue(&pool->wait, &wait); -+ -+ goto repeat_alloc; -+} -+ -+/** -+ * mempool_free - return an element to the pool. -+ * @element: pool element pointer. -+ * @pool: pointer to the memory pool which was allocated via -+ * mempool_create(). -+ * -+ * this function only sleeps if the free_fn() function sleeps. -+ */ -+void mempool_free(void *element, mempool_t *pool) -+{ -+ unsigned long flags; -+ -+ if (pool->curr_nr < pool->min_nr) { -+ spin_lock_irqsave(&pool->lock, flags); -+ if (pool->curr_nr < pool->min_nr) { -+ list_add(element, &pool->elements); -+ pool->curr_nr++; -+ spin_unlock_irqrestore(&pool->lock, flags); -+ wake_up(&pool->wait); -+ return; -+ } -+ spin_unlock_irqrestore(&pool->lock, flags); -+ } -+ pool->free(element, pool->pool_data); -+} -+ -+/* -+ * A commonly used alloc and free fn. -+ */ -+void *mempool_alloc_slab(int gfp_mask, void *pool_data) -+{ -+ kmem_cache_t *mem = (kmem_cache_t *) pool_data; -+ return kmem_cache_alloc(mem, gfp_mask); -+} -+ -+void mempool_free_slab(void *element, void *pool_data) -+{ -+ kmem_cache_t *mem = (kmem_cache_t *) pool_data; -+ kmem_cache_free(mem, element); -+} -+ -+ -+EXPORT_SYMBOL(mempool_create); -+EXPORT_SYMBOL(mempool_resize); -+EXPORT_SYMBOL(mempool_destroy); -+EXPORT_SYMBOL(mempool_alloc); -+EXPORT_SYMBOL(mempool_free); -+EXPORT_SYMBOL(mempool_alloc_slab); -+EXPORT_SYMBOL(mempool_free_slab); -+ diff --git a/patches/linux-2.4.19-pre10-devmapper-ioctl.patch b/patches/linux-2.4.19-rc1-devmapper-ioctl.patch similarity index 92% rename from patches/linux-2.4.19-pre10-devmapper-ioctl.patch rename to patches/linux-2.4.19-rc1-devmapper-ioctl.patch index 17dfc21..e4f0707 100644 --- a/patches/linux-2.4.19-pre10-devmapper-ioctl.patch +++ b/patches/linux-2.4.19-rc1-devmapper-ioctl.patch @@ -1,16 +1,40 @@ -diff -ruN linux-2.4.19-pre10/drivers/md/Config.in linux/drivers/md/Config.in ---- linux-2.4.19-pre10/drivers/md/Config.in Wed Jun 12 12:03:42 2002 -+++ linux/drivers/md/Config.in Thu Jun 13 17:12:16 2002 -@@ -14,5 +14,6 @@ +diff -ruN linux-2.4.19-rc1/Documentation/Configure.help linux/Documentation/Configure.help +--- linux-2.4.19-rc1/Documentation/Configure.help Tue Jun 25 14:14:05 2002 ++++ linux/Documentation/Configure.help Tue Jun 25 21:33:16 2002 +@@ -1775,6 +1775,18 @@ + want), say M here and read . The + module will be called lvm-mod.o. + ++Device-mapper support ++CONFIG_BLK_DEV_DM ++ This option lets you create logical block devices dynamically by ++ joining together segments of existing block devices. This mechanism ++ is used by the new version of the logical volume manager under ++ development, LVM2. ++ ++ If you want to compile this as a module, say M here and read ++ . The module will be called dm-mod.o. ++ ++ If unsure, say N. ++ + Multiple devices driver support (RAID and LVM) + CONFIG_MD + Support multiple physical spindles through a single logical device. +diff -ruN linux-2.4.19-rc1/drivers/md/Config.in linux/drivers/md/Config.in +--- linux-2.4.19-rc1/drivers/md/Config.in Tue Jun 25 14:14:54 2002 ++++ linux/drivers/md/Config.in Tue Jun 25 21:33:15 2002 +@@ -14,5 +14,8 @@ dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD -+dep_tristate ' Device mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD ++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then ++ dep_tristate ' Device-mapper support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM $CONFIG_MD ++fi endmenu -diff -ruN linux-2.4.19-pre10/drivers/md/Makefile linux/drivers/md/Makefile ---- linux-2.4.19-pre10/drivers/md/Makefile Wed Jun 12 12:03:42 2002 -+++ linux/drivers/md/Makefile Thu Jun 13 17:12:16 2002 +diff -ruN linux-2.4.19-rc1/drivers/md/Makefile linux/drivers/md/Makefile +--- linux-2.4.19-rc1/drivers/md/Makefile Tue Jun 25 14:14:54 2002 ++++ linux/drivers/md/Makefile Tue Jun 25 21:33:16 2002 @@ -4,9 +4,12 @@ O_TARGET := mddev.o @@ -38,8 +62,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/Makefile linux/drivers/md/Makefile + +dm-mod.o: $(dm-mod-objs) + $(LD) -r -o $@ $(dm-mod-objs) -diff -ruN linux-2.4.19-pre10/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c ---- linux-2.4.19-pre10/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c +--- linux-2.4.19-rc1/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/dm-exception-store.c Thu Jun 13 14:58:15 2002 @@ -0,0 +1,727 @@ +/* @@ -769,10 +793,10 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-exception-store.c linux/drivers/md/dm + + return 0; +} -diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c ---- linux-2.4.19-pre10/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-ioctl.c Thu Jun 13 15:05:00 2002 -@@ -0,0 +1,749 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c +--- linux-2.4.19-rc1/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-ioctl.c Wed Jun 19 13:16:33 2002 +@@ -0,0 +1,807 @@ +/* + * Copyright (C) 2001 Sistina Software (UK) Limited. + * @@ -786,59 +810,22 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c +#include +#include + -+static void free_params(struct dm_ioctl *param) -+{ -+ vfree(param); -+} -+ -+static int version(struct dm_ioctl *user) -+{ -+ return copy_to_user(user, DM_DRIVER_VERSION, sizeof(DM_DRIVER_VERSION)); -+} -+ -+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param) -+{ -+ struct dm_ioctl tmp, *dmi; -+ -+ if (copy_from_user(&tmp, user, sizeof(tmp))) -+ return -EFAULT; -+ -+ if (strcmp(DM_IOCTL_VERSION, tmp.version)) { -+ DMWARN("struct dm_ioctl version incompatible"); -+ return -EINVAL; -+ } -+ -+ if (tmp.data_size < sizeof(tmp)) -+ return -EINVAL; -+ -+ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); -+ if (!dmi) -+ return -ENOMEM; -+ -+ if (copy_from_user(dmi, user, tmp.data_size)) { -+ vfree(dmi); -+ return -EFAULT; -+ } ++/*----------------------------------------------------------------- ++ * Implementation of the ioctl commands ++ *---------------------------------------------------------------*/ + -+ *param = dmi; -+ return 0; -+} ++/* ++ * All the ioctl commands get dispatched to functions with this ++ * prototype. ++ */ ++typedef int (*ioctl_fn)(struct dm_ioctl *param, struct dm_ioctl *user); + -+static int validate_params(uint cmd, struct dm_ioctl *param) ++/* ++ * This is really a debug only call. ++ */ ++static int remove_all(struct dm_ioctl *param, struct dm_ioctl *user) +{ -+ /* Unless creating, either name of uuid but not both */ -+ if (cmd != DM_CREATE_CMD) { -+ if ((!*param->uuid && !*param->name) || -+ (*param->uuid && *param->name)) { -+ DMWARN("one of name or uuid must be supplied"); -+ return -EINVAL; -+ } -+ } -+ -+ /* Ensure strings are terminated */ -+ param->name[DM_NAME_LEN - 1] = '\0'; -+ param->uuid[DM_UUID_LEN - 1] = '\0'; -+ ++ dm_destroy_all(); + return 0; +} + @@ -855,7 +842,7 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + return -EINVAL; +} + -+static int next_target(struct dm_target_spec *last, unsigned long next, ++static int next_target(struct dm_target_spec *last, uint32_t next, + void *begin, void *end, + struct dm_target_spec **spec, char **params) +{ @@ -905,10 +892,13 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + + for (i = 0; i < args->target_count; i++) { + -+ r = first ? next_target((struct dm_target_spec *) args, ++ if (first) ++ r = next_target((struct dm_target_spec *) args, + args->data_start, -+ begin, end, &spec, ¶ms) : -+ next_target(spec, spec->next, begin, end, &spec, ¶ms); ++ begin, end, &spec, ¶ms); ++ else ++ r = next_target(spec, spec->next, begin, end, ++ &spec, ¶ms); + + if (r) + PARSE_ERROR("unable to find target"); @@ -962,26 +952,31 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + * userland. + */ +static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param, -+ void *data, unsigned long len) ++ void *data, uint32_t len) +{ + int r; + void *ptr = NULL; + -+ strncpy(param->version, DM_IOCTL_VERSION, sizeof(param->version)); -+ + if (data) { + ptr = align_ptr(user + 1, sizeof(unsigned long)); + param->data_start = ptr - (void *) user; + } + -+ r = copy_to_user(user, param, sizeof(*param)); ++ /* ++ * The version number has already been filled in, so we ++ * just copy later fields. ++ */ ++ r = copy_to_user(&user->data_size, ¶m->data_size, ++ sizeof(*param) - sizeof(param->version)); + if (r) -+ return r; ++ return -EFAULT; + + if (data) { + if (param->data_start + len > param->data_size) + return -ENOSPC; -+ r = copy_to_user(ptr, data, len); ++ ++ if (copy_to_user(ptr, data, len)) ++ r = -EFAULT; + } + + return r; @@ -1033,6 +1028,63 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c +} + +/* ++ * Copies device info back to user space, used by ++ * the create and info ioctls. ++ */ ++static int info(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ struct mapped_device *md; ++ ++ param->flags = 0; ++ ++ md = dm_get_name_r(lookup_name(param), lookup_type(param)); ++ if (!md) ++ /* ++ * Device not found - returns cleared exists flag. ++ */ ++ goto out; ++ ++ __info(md, param); ++ dm_put_r(md); ++ ++ out: ++ return results_to_user(user, param, NULL, 0); ++} ++ ++static int create(struct dm_ioctl *param, struct dm_ioctl *user) ++{ ++ int r, ro; ++ struct dm_table *t; ++ int minor; ++ ++ r = dm_table_create(&t); ++ if (r) ++ return r; ++ ++ r = populate_table(t, param); ++ if (r) { ++ dm_table_destroy(t); ++ return r; ++ } ++ ++ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ? ++ MINOR(to_kdev_t(param->dev)) : -1; ++ ++ ro = (param->flags & DM_READONLY_FLAG) ? 1 : 0; ++ ++ r = dm_create(param->name, param->uuid, minor, ro, t); ++ if (r) { ++ dm_table_destroy(t); ++ return r; ++ } ++ ++ r = info(param, user); ++ return r; ++} ++ ++ ++ ++/* + * Build up the status struct for each target + */ +static int __status(struct mapped_device *md, struct dm_ioctl *param, @@ -1040,7 +1092,7 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c +{ + int i; + struct dm_target_spec *spec; -+ unsigned long long sector = 0LL; ++ uint64_t sector = 0LL; + char *outptr; + status_type_t type; + @@ -1172,30 +1224,6 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c +} + +/* -+ * Copies device info back to user space, used by -+ * the create and info ioctls. -+ */ -+static int info(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ struct mapped_device *md; -+ -+ param->flags = 0; -+ -+ md = dm_get_name_r(lookup_name(param), lookup_type(param)); -+ if (!md) -+ /* -+ * Device not found - returns cleared exists flag. -+ */ -+ goto out; -+ -+ __info(md, param); -+ dm_put_r(md); -+ -+ out: -+ return results_to_user(user, param, NULL, 0); -+} -+ -+/* + * Retrieves a list of devices used by a particular dm device. + */ +static int dep(struct dm_ioctl *param, struct dm_ioctl *user) @@ -1256,38 +1284,7 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + return r; +} + -+static int create(struct dm_ioctl *param, struct dm_ioctl *user) -+{ -+ int r, ro; -+ struct dm_table *t; -+ int minor; -+ -+ r = dm_table_create(&t); -+ if (r) -+ return r; -+ -+ r = populate_table(t, param); -+ if (r) { -+ dm_table_destroy(t); -+ return r; -+ } -+ -+ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ? -+ MINOR(to_kdev_t(param->dev)) : -1; -+ -+ ro = (param->flags & DM_READONLY_FLAG) ? 1 : 0; -+ -+ r = dm_create(param->name, param->uuid, minor, ro, t); -+ if (r) { -+ dm_table_destroy(t); -+ return r; -+ } -+ -+ r = info(param, user); -+ return r; -+} -+ -+static int remove(struct dm_ioctl *param) ++static int remove(struct dm_ioctl *param, struct dm_ioctl *user) +{ + int r; + struct mapped_device *md; @@ -1304,7 +1301,7 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + return r; +} + -+static int suspend(struct dm_ioctl *param) ++static int suspend(struct dm_ioctl *param, struct dm_ioctl *user) +{ + int r; + struct mapped_device *md; @@ -1355,7 +1352,7 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + return r; +} + -+static int rename(struct dm_ioctl *param) ++static int rename(struct dm_ioctl *param, struct dm_ioctl *user) +{ + char *newname = (char *) param + param->data_start; + @@ -1369,6 +1366,11 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + return 0; +} + ++ ++/*----------------------------------------------------------------- ++ * Implementation of open/close/ioctl on the special char ++ * device. ++ *---------------------------------------------------------------*/ +static int ctl_open(struct inode *inode, struct file *file) +{ + /* only root can open this */ @@ -1386,97 +1388,174 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + return 0; +} + -+static int ctl_ioctl(struct inode *inode, struct file *file, -+ uint command, ulong u) ++static ioctl_fn lookup_ioctl(unsigned int cmd) ++{ ++ static struct { ++ int cmd; ++ ioctl_fn fn; ++ } _ioctls[] = { ++ {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */ ++ {DM_REMOVE_ALL_CMD, remove_all}, ++ {DM_DEV_CREATE_CMD, create}, ++ {DM_DEV_REMOVE_CMD, remove}, ++ {DM_DEV_RELOAD_CMD, reload}, ++ {DM_DEV_RENAME_CMD, rename}, ++ {DM_DEV_SUSPEND_CMD, suspend}, ++ {DM_DEV_DEPS_CMD, dep}, ++ {DM_DEV_STATUS_CMD, info}, ++ {DM_TARGET_STATUS_CMD, get_status}, ++ {DM_TARGET_WAIT_CMD, wait_device_event}, ++ }; ++ static int nelts = sizeof(_ioctls) / sizeof(*_ioctls); ++ ++ return (cmd >= nelts) ? NULL : _ioctls[cmd].fn; ++} ++ ++/* ++ * As well as checking the version compatibility this always ++ * copies the kernel interface version out. ++ */ ++static int check_version(int cmd, struct dm_ioctl *user) +{ ++ uint32_t version[3]; + int r = 0; -+ struct dm_ioctl *param; -+ struct dm_ioctl *user = (struct dm_ioctl *) u; -+ uint cmd = _IOC_NR(command); -+ -+ /* Process commands without params first - always return version */ -+ switch (cmd) { -+ case DM_REMOVE_ALL_CMD: -+ dm_destroy_all(); -+ case DM_VERSION_CMD: -+ return version(user); -+ default: -+ break; ++ ++ if (copy_from_user(version, user->version, sizeof(version))) ++ return -EFAULT; ++ ++ if ((DM_VERSION_MAJOR != version[0]) || ++ (DM_VERSION_MINOR < version[1])) { ++ DMWARN("ioctl interface mismatch: " ++ "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", ++ DM_VERSION_MAJOR, DM_VERSION_MINOR, ++ DM_VERSION_PATCHLEVEL, ++ version[0], version[1], version[2], cmd); ++ r = -EINVAL; + } + -+ r = copy_params(user, ¶m); -+ if (r) -+ goto err; ++ /* ++ * Fill in the kernel version. ++ */ ++ version[0] = DM_VERSION_MAJOR; ++ version[1] = DM_VERSION_MINOR; ++ version[2] = DM_VERSION_PATCHLEVEL; ++ if (copy_to_user(user->version, version, sizeof(version))) ++ return -EFAULT; + -+ r = validate_params(cmd, param); -+ if (r) { -+ free_params(param); -+ goto err; ++ return r; ++} ++ ++static void free_params(struct dm_ioctl *param) ++{ ++ vfree(param); ++} ++ ++static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param) ++{ ++ struct dm_ioctl tmp, *dmi; ++ ++ if (copy_from_user(&tmp, user, sizeof(tmp))) ++ return -EFAULT; ++ ++ if (tmp.data_size < sizeof(tmp)) ++ return -EINVAL; ++ ++ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); ++ if (!dmi) ++ return -ENOMEM; ++ ++ if (copy_from_user(dmi, user, tmp.data_size)) { ++ vfree(dmi); ++ return -EFAULT; + } + -+ switch (cmd) { -+ case DM_INFO_CMD: -+ r = info(param, user); -+ break; ++ *param = dmi; ++ return 0; ++} + -+ case DM_SUSPEND_CMD: -+ r = suspend(param); -+ break; ++static int validate_params(uint cmd, struct dm_ioctl *param) ++{ ++ /* Unless creating, either name of uuid but not both */ ++ if (cmd != DM_DEV_CREATE_CMD) { ++ if ((!*param->uuid && !*param->name) || ++ (*param->uuid && *param->name)) { ++ DMWARN("one of name or uuid must be supplied"); ++ return -EINVAL; ++ } ++ } + -+ case DM_CREATE_CMD: -+ r = create(param, user); -+ break; ++ /* Ensure strings are terminated */ ++ param->name[DM_NAME_LEN - 1] = '\0'; ++ param->uuid[DM_UUID_LEN - 1] = '\0'; + -+ case DM_RELOAD_CMD: -+ r = reload(param, user); -+ break; ++ return 0; ++} + -+ case DM_REMOVE_CMD: -+ r = remove(param); -+ break; ++static int ctl_ioctl(struct inode *inode, struct file *file, ++ uint command, ulong u) ++{ + -+ case DM_RENAME_CMD: -+ r = rename(param); -+ break; ++ int r = 0, cmd; ++ struct dm_ioctl *param; ++ struct dm_ioctl *user = (struct dm_ioctl *) u; ++ ioctl_fn fn = NULL; + -+ case DM_DEPS_CMD: -+ r = dep(param, user); -+ break; ++ if (_IOC_TYPE(command) != DM_IOCTL) ++ return -ENOTTY; + -+ case DM_GET_STATUS_CMD: -+ r = get_status(param, user); -+ break; ++ cmd = _IOC_NR(command); + -+ case DM_WAIT_EVENT_CMD: -+ r = wait_device_event(param, user); -+ break; ++ /* ++ * Check the interface version passed in. This also ++ * writes out the kernel's interface version. ++ */ ++ r = check_version(cmd, user); ++ if (r) ++ return r; + -+ default: ++ /* ++ * Nothing more to do for the version command. ++ */ ++ if (cmd == DM_VERSION_CMD) ++ return 0; ++ ++ fn = lookup_ioctl(cmd); ++ if (!fn) { + DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); -+ r = -ENOTTY; ++ return -ENOTTY; + } + -+ free_params(param); -+ return r; ++ /* ++ * Copy the parameters into kernel space. ++ */ ++ r = copy_params(user, ¶m); ++ if (r) ++ return r; + -+ err: -+ version(user); ++ r = validate_params(cmd, param); ++ if (r) { ++ free_params(param); ++ return r; ++ } ++ ++ r = fn(param, user); ++ free_params(param); + return r; +} + +static struct file_operations _ctl_fops = { -+ open: ctl_open, -+ release:ctl_close, -+ ioctl: ctl_ioctl, -+ owner: THIS_MODULE, ++ open: ctl_open, ++ release: ctl_close, ++ ioctl: ctl_ioctl, ++ owner: THIS_MODULE, +}; + +static devfs_handle_t _ctl_handle; + +static struct miscdevice _dm_misc = { -+ minor: MISC_DYNAMIC_MINOR, -+ name: DM_NAME, -+ fops: &_ctl_fops ++ minor: MISC_DYNAMIC_MINOR, ++ name: DM_NAME, ++ fops: &_ctl_fops +}; + +/* Create misc character device and link to DM_DIR/control */ @@ -1510,6 +1589,9 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + } + devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle); + ++ DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR, ++ DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA, ++ DM_DRIVER_EMAIL); + return 0; + + failed: @@ -1522,8 +1604,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c + if (misc_deregister(&_dm_misc) < 0) + DMERR("misc_deregister failed for control device"); +} -diff -ruN linux-2.4.19-pre10/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c ---- linux-2.4.19-pre10/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c +--- linux-2.4.19-rc1/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/dm-linear.c Thu Jun 13 14:59:09 2002 @@ -0,0 +1,125 @@ +/* @@ -1651,9 +1733,9 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c + if (r < 0) + DMERR("linear: unregister failed %d", r); +} -diff -ruN linux-2.4.19-pre10/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c ---- linux-2.4.19-pre10/drivers/md/dm-mirror.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm-mirror.c Thu Jun 13 17:03:27 2002 +diff -ruN linux-2.4.19-rc1/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c +--- linux-2.4.19-rc1/drivers/md/dm-mirror.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm-mirror.c Wed Jun 19 13:28:45 2002 @@ -0,0 +1,343 @@ +/* + * Copyright (C) 2002 Sistina Software (UK) Limited. @@ -1750,7 +1832,7 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c + } + + if (err) { -+ DMERR("Mirror block IO failed"); /* More detail to follow... */ ++ DMERR("Mirror block IO failed"); /* More detail to follow... */ + lc->error = 1; + return; + } @@ -1998,8 +2080,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c + * c-file-style: "linux" + * End: + */ -diff -ruN linux-2.4.19-pre10/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c ---- linux-2.4.19-pre10/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c +--- linux-2.4.19-rc1/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/dm-snapshot.c Thu Jun 13 15:12:59 2002 @@ -0,0 +1,1182 @@ +/* @@ -3184,8 +3266,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapsh + * c-file-style: "linux" + * End: + */ -diff -ruN linux-2.4.19-pre10/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h ---- linux-2.4.19-pre10/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h +--- linux-2.4.19-rc1/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/dm-snapshot.h Thu Jun 13 16:52:31 2002 @@ -0,0 +1,147 @@ +/* @@ -3335,8 +3417,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapsh +} + +#endif -diff -ruN linux-2.4.19-pre10/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c ---- linux-2.4.19-pre10/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c +--- linux-2.4.19-rc1/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/dm-stripe.c Thu Jun 13 15:07:43 2002 @@ -0,0 +1,234 @@ +/* @@ -3573,8 +3655,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c + + return; +} -diff -ruN linux-2.4.19-pre10/drivers/md/dm-table.c linux/drivers/md/dm-table.c ---- linux-2.4.19-pre10/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/dm-table.c linux/drivers/md/dm-table.c +--- linux-2.4.19-rc1/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/dm-table.c Thu Jun 13 17:09:46 2002 @@ -0,0 +1,421 @@ +/* @@ -3998,8 +4080,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-table.c linux/drivers/md/dm-table.c +EXPORT_SYMBOL(dm_table_get_device); +EXPORT_SYMBOL(dm_table_put_device); +EXPORT_SYMBOL(dm_table_event); -diff -ruN linux-2.4.19-pre10/drivers/md/dm-target.c linux/drivers/md/dm-target.c ---- linux-2.4.19-pre10/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/dm-target.c linux/drivers/md/dm-target.c +--- linux-2.4.19-rc1/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/dm-target.c Thu Jun 13 15:07:55 2002 @@ -0,0 +1,242 @@ +/* @@ -4244,10 +4326,10 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm-target.c linux/drivers/md/dm-target.c + +EXPORT_SYMBOL(dm_register_target); +EXPORT_SYMBOL(dm_unregister_target); -diff -ruN linux-2.4.19-pre10/drivers/md/dm.c linux/drivers/md/dm.c ---- linux-2.4.19-pre10/drivers/md/dm.c Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm.c Thu Jun 13 16:28:55 2002 -@@ -0,0 +1,1169 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/dm.c linux/drivers/md/dm.c +--- linux-2.4.19-rc1/drivers/md/dm.c Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm.c Tue Jun 18 18:29:48 2002 +@@ -0,0 +1,1174 @@ +/* + * Copyright (C) 2001 Sistina Software (UK) Limited. + * @@ -4266,8 +4348,6 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm.c linux/drivers/md/dm.c +#define DEFAULT_READ_AHEAD 64 + +static const char *_name = DM_NAME; -+static const char *_version = "0.94.12-ioctl (2002-06-13)"; -+static const char *_email = "lvm-devel@lists.sistina.com"; + +static int major = 0; +static int _major = 0; @@ -4485,7 +4565,6 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm.c linux/drivers/md/dm.c + + _dev_dir = devfs_mk_dir(0, DM_DIR, NULL); + -+ DMINFO("%s initialised: %s", _version, _email); + return 0; +} + @@ -4504,7 +4583,7 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm.c linux/drivers/md/dm.c + hardsect_size[_major] = NULL; + _major = 0; + -+ DMINFO("%s cleaned up", _version); ++ DMINFO("cleaned up"); +} + +/* @@ -5068,6 +5147,14 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm.c linux/drivers/md/dm.c + if (size < result) + result = size; + } ++ ++ /* ++ * I think it's safe to assume that no block devices have ++ * a hard sector size this large. ++ */ ++ if (result == INT_MAX) ++ result = 512; ++ + return result; +} + @@ -5417,10 +5504,10 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm.c linux/drivers/md/dm.c +MODULE_DESCRIPTION(DM_NAME " driver"); +MODULE_AUTHOR("Joe Thornber "); +MODULE_LICENSE("GPL"); -diff -ruN linux-2.4.19-pre10/drivers/md/dm.h linux/drivers/md/dm.h ---- linux-2.4.19-pre10/drivers/md/dm.h Thu Jan 1 01:00:00 1970 -+++ linux/drivers/md/dm.h Thu Jun 13 16:52:29 2002 -@@ -0,0 +1,232 @@ +diff -ruN linux-2.4.19-rc1/drivers/md/dm.h linux/drivers/md/dm.h +--- linux-2.4.19-rc1/drivers/md/dm.h Thu Jan 1 01:00:00 1970 ++++ linux/drivers/md/dm.h Tue Jun 18 20:05:34 2002 +@@ -0,0 +1,233 @@ +/* + * Internal header file for device mapper + * @@ -5449,6 +5536,7 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm.h linux/drivers/md/dm.h +#include + +#define DM_NAME "device-mapper" /* Name for messaging */ ++#define DM_DRIVER_EMAIL "lvm-devel@lists.sistina.com" +#define MAX_DEPTH 16 +#define NODE_SIZE L1_CACHE_BYTES +#define KEYS_PER_NODE (NODE_SIZE / sizeof(offset_t)) @@ -5653,8 +5741,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/dm.h linux/drivers/md/dm.h +void dm_stripe_exit(void); + +#endif -diff -ruN linux-2.4.19-pre10/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c ---- linux-2.4.19-pre10/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c +--- linux-2.4.19-rc1/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/kcopyd.c Thu Jun 13 15:18:21 2002 @@ -0,0 +1,832 @@ +/* @@ -6489,8 +6577,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c + + up(&_client_count_sem); +} -diff -ruN linux-2.4.19-pre10/drivers/md/kcopyd.h linux/drivers/md/kcopyd.h ---- linux-2.4.19-pre10/drivers/md/kcopyd.h Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/drivers/md/kcopyd.h linux/drivers/md/kcopyd.h +--- linux-2.4.19-rc1/drivers/md/kcopyd.h Thu Jan 1 01:00:00 1970 +++ linux/drivers/md/kcopyd.h Thu Jun 13 16:52:29 2002 @@ -0,0 +1,101 @@ +/* @@ -6594,8 +6682,8 @@ diff -ruN linux-2.4.19-pre10/drivers/md/kcopyd.h linux/drivers/md/kcopyd.h +void kcopyd_dec_client_count(void); + +#endif -diff -ruN linux-2.4.19-pre10/include/linux/device-mapper.h linux/include/linux/device-mapper.h ---- linux-2.4.19-pre10/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970 +diff -ruN linux-2.4.19-rc1/include/linux/device-mapper.h linux/include/linux/device-mapper.h +--- linux-2.4.19-rc1/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970 +++ linux/include/linux/device-mapper.h Thu Jun 13 15:01:29 2002 @@ -0,0 +1,62 @@ +/* @@ -6660,10 +6748,10 @@ diff -ruN linux-2.4.19-pre10/include/linux/device-mapper.h linux/include/linux/d +#endif /* __KERNEL__ */ + +#endif /* _LINUX_DEVICE_MAPPER_H */ -diff -ruN linux-2.4.19-pre10/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h ---- linux-2.4.19-pre10/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970 -+++ linux/include/linux/dm-ioctl.h Thu Jun 13 16:28:56 2002 -@@ -0,0 +1,113 @@ +diff -ruN linux-2.4.19-rc1/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h +--- linux-2.4.19-rc1/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970 ++++ linux/include/linux/dm-ioctl.h Tue Jun 25 17:40:32 2002 +@@ -0,0 +1,145 @@ +/* + * Copyright (C) 2001 Sistina Software (UK) Limited. + * @@ -6674,6 +6762,7 @@ diff -ruN linux-2.4.19-pre10/include/linux/dm-ioctl.h linux/include/linux/dm-ioc +#define _LINUX_DM_IOCTL_H + +#include "device-mapper.h" ++#include "types.h" + +/* + * Implements a traditional ioctl interface to the device mapper. @@ -6686,22 +6775,34 @@ diff -ruN linux-2.4.19-pre10/include/linux/dm-ioctl.h linux/include/linux/dm-ioc + * name. + */ +struct dm_ioctl { -+ char version[16]; -+ -+ unsigned long data_size; /* total size of data passed in -+ * including this struct */ -+ -+ unsigned long data_start; /* offset to start of data -+ * relative to start of this struct */ ++ /* ++ * The version number is made up of three parts: ++ * major - no backward or forward compatibility, ++ * minor - only backwards compatible, ++ * patch - both backwards and forwards compatible. ++ * ++ * All clients of the ioctl interface should fill in the ++ * version number of the interface that they were ++ * compiled with. ++ * ++ * All recognised ioctl commands (ie. those that don't ++ * return -ENOTTY) fill out this field, even if the ++ * command failed. ++ */ ++ uint32_t version[3]; /* in/out */ ++ uint32_t data_size; /* total size of data passed in ++ * including this struct */ + -+ char name[DM_NAME_LEN]; /* device name */ ++ uint32_t data_start; /* offset to start of data ++ * relative to start of this struct */ + -+ unsigned int target_count; /* in/out */ -+ unsigned int open_count; /* out */ -+ unsigned int flags; /* in/out */ ++ uint32_t target_count; /* in/out */ ++ uint32_t open_count; /* out */ ++ uint32_t flags; /* in/out */ + + __kernel_dev_t dev; /* in/out */ + ++ char name[DM_NAME_LEN]; /* device name */ + char uuid[DM_UUID_LEN]; /* unique identifier for + * the block device */ +}; @@ -6712,12 +6813,16 @@ diff -ruN linux-2.4.19-pre10/include/linux/dm-ioctl.h linux/include/linux/dm-ioc + */ +struct dm_target_spec { + int32_t status; /* used when reading from kernel only */ -+ unsigned long long sector_start; -+ unsigned long long length; ++ uint64_t sector_start; ++ uint32_t length; + -+ char target_type[DM_MAX_TYPE_NAME]; ++ /* ++ * Offset in bytes (from the start of this struct) to ++ * next target_spec. ++ */ ++ uint32_t next; + -+ unsigned long next; /* offset in bytes to next target_spec */ ++ char target_type[DM_MAX_TYPE_NAME]; + + /* + * Parameter string starts immediately after this object. @@ -6730,41 +6835,54 @@ diff -ruN linux-2.4.19-pre10/include/linux/dm-ioctl.h linux/include/linux/dm-ioc + * Used to retrieve the target dependencies. + */ +struct dm_target_deps { -+ unsigned int count; ++ uint32_t count; + + __kernel_dev_t dev[0]; /* out */ +}; + -+#define DM_IOCTL 0xfd -+ ++/* ++ * If you change this make sure you make the corresponding change ++ * to dm-ioctl.c:lookup_ioctl() ++ */ +enum { -+ DM_CREATE_CMD = 0, -+ DM_REMOVE_CMD, -+ DM_SUSPEND_CMD, -+ DM_RELOAD_CMD, -+ DM_INFO_CMD, -+ DM_RENAME_CMD, -+ DM_VERSION_CMD, -+ DM_DEPS_CMD, ++ /* Top level cmds */ ++ DM_VERSION_CMD = 0, + DM_REMOVE_ALL_CMD, -+ DM_GET_STATUS_CMD, -+ DM_WAIT_EVENT_CMD ++ ++ /* device level cmds */ ++ DM_DEV_CREATE_CMD, ++ DM_DEV_REMOVE_CMD, ++ DM_DEV_RELOAD_CMD, ++ DM_DEV_RENAME_CMD, ++ DM_DEV_SUSPEND_CMD, ++ DM_DEV_DEPS_CMD, ++ DM_DEV_STATUS_CMD, ++ ++ /* target level cmds */ ++ DM_TARGET_STATUS_CMD, ++ DM_TARGET_WAIT_CMD +}; + -+#define DM_CREATE _IOWR(DM_IOCTL, DM_CREATE_CMD, struct dm_ioctl) -+#define DM_REMOVE _IOW(DM_IOCTL, DM_REMOVE_CMD, struct dm_ioctl) -+#define DM_SUSPEND _IOW(DM_IOCTL, DM_SUSPEND_CMD, struct dm_ioctl) -+#define DM_RELOAD _IOW(DM_IOCTL, DM_RELOAD_CMD, struct dm_ioctl) -+#define DM_INFO _IOWR(DM_IOCTL, DM_INFO_CMD, struct dm_ioctl) -+#define DM_RENAME _IOW(DM_IOCTL, DM_RENAME_CMD, struct dm_ioctl) -+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) -+#define DM_DEPS _IOWR(DM_IOCTL, DM_DEPS_CMD, struct dm_ioctl) -+#define DM_REMOVE_ALL _IOR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) -+#define DM_GET_STATUS _IOWR(DM_IOCTL, DM_GET_STATUS_CMD, struct dm_ioctl) -+#define DM_WAIT_EVENT _IOR(DM_IOCTL, DM_WAIT_EVENT_CMD, struct dm_ioctl) -+ -+#define DM_IOCTL_VERSION "0.94" -+#define DM_DRIVER_VERSION "0.94.12-ioctl (2002-06-13)" ++#define DM_IOCTL 0xfd ++ ++#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) ++#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) ++ ++#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) ++#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl) ++#define DM_DEV_RELOAD _IOWR(DM_IOCTL, DM_DEV_RELOAD_CMD, struct dm_ioctl) ++#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl) ++#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl) ++#define DM_DEV_DEPS _IOWR(DM_IOCTL, DM_DEV_DEPS_CMD, struct dm_ioctl) ++#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) ++ ++#define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl) ++#define DM_TARGET_WAIT _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl) ++ ++#define DM_VERSION_MAJOR 1 ++#define DM_VERSION_MINOR 0 ++#define DM_VERSION_PATCHLEVEL 0 ++#define DM_VERSION_EXTRA "-ioctl (2002-06-25)" + +/* Status bits */ +#define DM_READONLY_FLAG 0x00000001 @@ -6772,14 +6890,16 @@ diff -ruN linux-2.4.19-pre10/include/linux/dm-ioctl.h linux/include/linux/dm-ioc +#define DM_EXISTS_FLAG 0x00000004 +#define DM_PERSISTENT_DEV_FLAG 0x00000008 + -+/* Flag passed into ioctl STATUS command to get table information -+ rather than current status */ ++/* ++ * Flag passed into ioctl STATUS command to get table information ++ * rather than current status. ++ */ +#define DM_STATUS_TABLE_FLAG 0x00000010 + +#endif /* _LINUX_DM_IOCTL_H */ -diff -ruN linux-2.4.19-pre10/include/linux/fs.h linux/include/linux/fs.h ---- linux-2.4.19-pre10/include/linux/fs.h Wed Jun 12 12:35:57 2002 -+++ linux/include/linux/fs.h Thu Jun 13 17:12:16 2002 +diff -ruN linux-2.4.19-rc1/include/linux/fs.h linux/include/linux/fs.h +--- linux-2.4.19-rc1/include/linux/fs.h Tue Jun 25 14:15:42 2002 ++++ linux/include/linux/fs.h Tue Jun 25 21:33:16 2002 @@ -260,7 +260,10 @@ char * b_data; /* pointer to data block */ struct page *b_page; /* the page this bh is mapped to */ @@ -6792,9 +6912,9 @@ diff -ruN linux-2.4.19-pre10/include/linux/fs.h linux/include/linux/fs.h unsigned long b_rsector; /* Real buffer location on disk */ wait_queue_head_t b_wait; -diff -ruN linux-2.4.19-pre10/include/linux/mempool.h linux/include/linux/mempool.h ---- linux-2.4.19-pre10/include/linux/mempool.h Thu Jan 1 01:00:00 1970 -+++ linux/include/linux/mempool.h Thu Jun 13 17:12:16 2002 +diff -ruN linux-2.4.19-rc1/include/linux/mempool.h linux/include/linux/mempool.h +--- linux-2.4.19-rc1/include/linux/mempool.h Thu Jan 1 01:00:00 1970 ++++ linux/include/linux/mempool.h Tue Jun 25 21:33:16 2002 @@ -0,0 +1,41 @@ +/* + * memory buffer pool support @@ -6837,20 +6957,20 @@ diff -ruN linux-2.4.19-pre10/include/linux/mempool.h linux/include/linux/mempool +void mempool_free_slab(void *element, void *pool_data); + +#endif /* _LINUX_MEMPOOL_H */ -diff -ruN linux-2.4.19-pre10/include/linux/vmalloc.h linux/include/linux/vmalloc.h ---- linux-2.4.19-pre10/include/linux/vmalloc.h Wed Jun 12 12:35:58 2002 -+++ linux/include/linux/vmalloc.h Thu Jun 13 17:12:16 2002 +diff -ruN linux-2.4.19-rc1/include/linux/vmalloc.h linux/include/linux/vmalloc.h +--- linux-2.4.19-rc1/include/linux/vmalloc.h Tue Jun 25 14:15:45 2002 ++++ linux/include/linux/vmalloc.h Tue Jun 25 21:33:16 2002 @@ -25,6 +25,7 @@ extern void vmfree_area_pages(unsigned long address, unsigned long size); extern int vmalloc_area_pages(unsigned long address, unsigned long size, int gfp_mask, pgprot_t prot); -+extern void *vcalloc(unsigned long nmemb, unsigned long size); ++extern void *vcalloc(unsigned long nmemb, unsigned long elem_size); /* * Allocate any pages -diff -ruN linux-2.4.19-pre10/mm/Makefile linux/mm/Makefile ---- linux-2.4.19-pre10/mm/Makefile Wed Jun 12 12:04:44 2002 -+++ linux/mm/Makefile Thu Jun 13 17:12:16 2002 +diff -ruN linux-2.4.19-rc1/mm/Makefile linux/mm/Makefile +--- linux-2.4.19-rc1/mm/Makefile Tue Jun 25 14:15:47 2002 ++++ linux/mm/Makefile Tue Jun 25 21:33:16 2002 @@ -9,12 +9,12 @@ O_TARGET := mm.o @@ -6866,9 +6986,9 @@ diff -ruN linux-2.4.19-pre10/mm/Makefile linux/mm/Makefile obj-$(CONFIG_HIGHMEM) += highmem.o -diff -ruN linux-2.4.19-pre10/mm/mempool.c linux/mm/mempool.c ---- linux-2.4.19-pre10/mm/mempool.c Thu Jan 1 01:00:00 1970 -+++ linux/mm/mempool.c Thu Jun 13 17:12:16 2002 +diff -ruN linux-2.4.19-rc1/mm/mempool.c linux/mm/mempool.c +--- linux-2.4.19-rc1/mm/mempool.c Thu Jan 1 01:00:00 1970 ++++ linux/mm/mempool.c Tue Jun 25 21:33:16 2002 @@ -0,0 +1,295 @@ +/* + * linux/mm/mempool.c @@ -7165,29 +7285,29 @@ diff -ruN linux-2.4.19-pre10/mm/mempool.c linux/mm/mempool.c +EXPORT_SYMBOL(mempool_alloc_slab); +EXPORT_SYMBOL(mempool_free_slab); + -diff -ruN linux-2.4.19-pre10/mm/vmalloc.c linux/mm/vmalloc.c ---- linux-2.4.19-pre10/mm/vmalloc.c Wed Jun 12 12:04:44 2002 -+++ linux/mm/vmalloc.c Thu Jun 13 17:12:16 2002 +diff -ruN linux-2.4.19-rc1/mm/vmalloc.c linux/mm/vmalloc.c +--- linux-2.4.19-rc1/mm/vmalloc.c Tue Jun 25 14:15:47 2002 ++++ linux/mm/vmalloc.c Tue Jun 25 21:33:16 2002 @@ -321,3 +321,22 @@ read_unlock(&vmlist_lock); return buf - buf_start; } + -+void *vcalloc(unsigned long nmemb, unsigned long size) ++void *vcalloc(unsigned long nmemb, unsigned long elem_size) +{ -+ unsigned long len; -+ void *mem; ++ unsigned long size; ++ void *addr; + + /* + * Check that we're not going to overflow. + */ -+ if (nmemb > (ULONG_MAX / size)) ++ if (nmemb > (ULONG_MAX / elem_size)) + return NULL; + -+ len = nmemb * size; -+ mem = vmalloc(len); -+ if (mem) -+ memset(mem, 0, len); ++ size = nmemb * elem_size; ++ addr = vmalloc(size); ++ if (addr) ++ memset(addr, 0, size); + -+ return mem; ++ return addr; +}