include/Makefile \
dmsetup/Makefile \
lib/Makefile \
+kernel/Makefile \
" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
EOF
cat >> $CONFIG_STATUS <<EOF
include/Makefile \
dmsetup/Makefile \
lib/Makefile \
+kernel/Makefile \
"}
EOF
cat >> $CONFIG_STATUS <<\EOF
include/Makefile \
dmsetup/Makefile \
lib/Makefile \
+kernel/Makefile \
)
--- /dev/null
+#
+# Copyright (C) 2001 Sistina Software
+#
+# This LVM library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This LVM library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this LVM library; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA
+
+SHELL = /bin/sh
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+
+interface = @interface@
+kerneldir = @kerneldir@
+
+LN_S = @LN_S@
+
+all: symlinks
+
+symlinks:
+ for i in common/* fs/* ioctl/* ; do \
+ if [ -L $(kerneldir)/drivers/md/`basename $$i` ] ; \
+ then $(RM) $(kerneldir)/drivers/md/`basename $$i`; \
+ fi; \
+ done
+ for i in common/* $(interface)/* ; do \
+ $(LN_S) `pwd`/$$i $(kerneldir)/drivers/md ; \
+ done
+
+install:
+
+clean:
+
+distclean:
+ $(RM) Makefile
+
+.PHONY: install clean distclean all
+
--- /dev/null
+/*
+ * device-mapper.h
+ *
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DEVICE_MAPPER_H
+#define DEVICE_MAPPER_H
+
+#define DM_DIR "device-mapper"
+#define DM_MAX_TYPE_NAME 16
+
+struct dm_table;
+struct dm_dev;
+typedef unsigned int offset_t;
+
+typedef void (*dm_error_fn)(const char *message, void *private);
+
+/*
+ * constructor, destructor and map fn types
+ */
+typedef int (*dm_ctr_fn)(struct dm_table *t, offset_t b, offset_t l,
+ char *args, void **context);
+
+typedef void (*dm_dtr_fn)(struct dm_table *t, void *c);
+typedef int (*dm_map_fn)(struct buffer_head *bh, int rw, void *context);
+typedef int (*dm_err_fn)(struct buffer_head *bh, int rw, void *context);
+typedef char *(*dm_print_fn)(void *context);
+
+/*
+ * Contructors should call this to make sure any
+ * destination devices are handled correctly
+ * (ie. opened/closed).
+ */
+int dm_table_get_device(struct dm_table *t, const char *path,
+ offset_t start, offset_t len,
+ struct dm_dev **result);
+void dm_table_put_device(struct dm_table *table, struct dm_dev *d);
+
+/*
+ * information about a target type
+ */
+struct target_type {
+ const char *name;
+ struct module *module;
+ dm_ctr_fn ctr;
+ dm_dtr_fn dtr;
+ dm_map_fn map;
+ dm_err_fn err;
+ dm_print_fn print;
+};
+
+int dm_register_target(struct target_type *t);
+int dm_unregister_target(struct target_type *t);
+
+#endif /* DEVICE_MAPPER_H */
+
--- /dev/null
+/*
+ * dm-linear.c
+ *
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/device-mapper.h>
+
+#include "dm.h"
+
+/*
+ * linear: maps a linear range of a device.
+ */
+struct linear_c {
+ long delta; /* FIXME: we need a signed offset type */
+ struct dm_dev *dev;
+};
+
+static inline char *next_token(char **p)
+{
+ static const char *delim = " \t";
+ char *r;
+
+ do {
+ r = strsep(p, delim);
+ } while(r && *r == 0);
+
+ return r;
+}
+
+/*
+ * construct a linear mapping.
+ * <dev_path> <offset>
+ */
+static int linear_ctr(struct dm_table *t, offset_t b, offset_t l,
+ char *args, void **context)
+{
+ struct linear_c *lc;
+ unsigned int start;
+ int r = -EINVAL;
+ char *tok;
+ char *path;
+ char *p = args;
+
+ *context = "No device path given";
+ path = next_token(&p);
+ if (!path)
+ goto bad;
+
+ *context = "No initial offset given";
+ tok = next_token(&p);
+ if (!tok)
+ goto bad;
+ start = simple_strtoul(tok, NULL, 10);
+
+ *context = "Cannot allocate linear context private structure";
+ lc = kmalloc(sizeof(lc), GFP_KERNEL);
+ if (lc == NULL)
+ goto bad;
+
+ *context = "Cannot get target device";
+ r = dm_table_get_device(t, path, start, l, &lc->dev);
+ if (r)
+ goto bad_free;
+
+ lc->delta = (int) start - (int) b;
+ *context = lc;
+ return 0;
+
+bad_free:
+ kfree(lc);
+bad:
+ return r;
+}
+
+static void linear_dtr(struct dm_table *t, void *c)
+{
+ struct linear_c *lc = (struct linear_c *) c;
+ dm_table_put_device(t, lc->dev);
+ kfree(c);
+}
+
+static int linear_map(struct buffer_head *bh, int rw, void *context)
+{
+ struct linear_c *lc = (struct linear_c *) context;
+
+ bh->b_rdev = lc->dev->dev;
+ bh->b_rsector = bh->b_rsector + lc->delta;
+ return 1;
+}
+
+/*
+ * Debugging use only.
+ */
+static char *linear_print(void *context)
+{
+ struct linear_c *lc = (struct linear_c *)context;
+static char buf[256];
+ sprintf(buf, " %lu", lc->delta);
+ return buf;
+}
+
+static struct target_type linear_target = {
+ name: "linear",
+ module: THIS_MODULE,
+ ctr: linear_ctr,
+ dtr: linear_dtr,
+ map: linear_map,
+ print: linear_print,
+};
+
+static int __init linear_init(void)
+{
+ int r = dm_register_target(&linear_target);
+
+ if (r < 0)
+ printk(KERN_ERR
+ "Device mapper: Linear: register failed %d\n", r);
+
+ return r;
+}
+
+static void __exit linear_exit(void)
+{
+ int r = dm_unregister_target(&linear_target);
+
+ if (r < 0)
+ printk(KERN_ERR
+ "Device mapper: Linear: unregister failed %d\n", r);
+}
+
+module_init(linear_init);
+module_exit(linear_exit);
+
+MODULE_AUTHOR("Joe Thornber <thornber@uk.sistina.com>");
+MODULE_DESCRIPTION("Device Mapper: Linear mapping");
+MODULE_LICENSE("GPL");
+
--- /dev/null
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/device-mapper.h>
+
+#include "dm.h"
+
+struct stripe {
+ struct dm_dev *dev;
+ offset_t physical_start;
+};
+
+struct stripe_c {
+ offset_t logical_start;
+ uint32_t stripes;
+
+ /* The size of this target / num. stripes */
+ uint32_t stripe_width;
+
+ /* eg, we stripe in 64k chunks */
+ uint32_t chunk_shift;
+ offset_t chunk_mask;
+
+ struct stripe stripe[0];
+};
+
+
+static inline struct stripe_c *alloc_context(int stripes)
+{
+ size_t len = sizeof(struct stripe_c) +
+ (sizeof(struct stripe) * stripes);
+ return kmalloc(len, GFP_KERNEL);
+}
+
+/*
+ * parses a single <dev> <sector> pair.
+ */
+static int get_stripe(struct dm_table *t, struct stripe_c *sc,
+ int stripe, char *args)
+{
+ int n, r;
+ char path[256]; /* FIXME: buffer overrun risk */
+ unsigned long start;
+
+ if (sscanf(args, "%s %lu %n", path, &start, &n) != 2)
+ return -EINVAL;
+
+ if ((r = dm_table_get_device(t, path, start, sc->stripe_width,
+ &sc->stripe[stripe].dev)))
+ return -ENXIO;
+
+ sc->stripe[stripe].physical_start = start;
+ return n;
+}
+
+/*
+ * construct a striped mapping.
+ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
+ */
+static int stripe_ctr(struct dm_table *t, offset_t b, offset_t l,
+ char *args, void **context)
+{
+ struct stripe_c *sc;
+ uint32_t stripes;
+ uint32_t chunk_size;
+ int n, i;
+
+ *context = "couldn't parse <stripes> <chunk size>";
+ if (sscanf(args, "%u %u %n", &stripes, &chunk_size, &n) != 2) {
+ return -EINVAL;
+ }
+
+ *context = "target length is not divisable by the number of stripes";
+ if (l % stripes) {
+ return -EINVAL;
+ }
+
+ *context = "couldn't allocate memory for striped context";
+ if (!(sc = alloc_context(stripes))) {
+ return -ENOMEM;
+ }
+
+ sc->logical_start = b;
+ sc->stripes = stripes;
+ sc->stripe_width = l / stripes;
+
+ /*
+ * chunk_size is a power of two. We only
+ * that power and the mask.
+ */
+ *context = "invalid chunk size";
+ if (!chunk_size) {
+ return -EINVAL;
+ }
+
+ sc->chunk_mask = chunk_size - 1;
+ for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
+ chunk_size >>= 1;
+ sc->chunk_shift--;
+
+ /*
+ * Get the stripe destinations.
+ */
+ for (i = 0; i < stripes; i++) {
+ args += n;
+ n = get_stripe(t, sc, i, args);
+
+ *context = "couldn't parse stripe destination";
+ if (n < 0) {
+ kfree(sc);
+ return n;
+ }
+ }
+
+
+ *context = sc;
+ return 0;
+}
+
+static void stripe_dtr(struct dm_table *t, void *c)
+{
+ unsigned int i;
+ struct stripe_c *sc = (struct stripe_c *) c;
+
+ for (i = 0; i < sc->stripes; i++)
+ dm_table_put_device(t, sc->stripe[i].dev);
+
+ kfree(sc);
+}
+
+static int stripe_map(struct buffer_head *bh, int rw, void *context)
+{
+ struct stripe_c *sc = (struct stripe_c *) context;
+
+ offset_t offset = bh->b_rsector - sc->logical_start;
+ uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
+ uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */
+ chunk = chunk / sc->stripes;
+
+ bh->b_rdev = sc->stripe[stripe].dev->dev;
+ bh->b_rsector = sc->stripe[stripe].physical_start +
+ (chunk << sc->chunk_shift) +
+ (offset & sc->chunk_mask);
+ return 1;
+}
+
+static struct target_type stripe_target = {
+ name: "striped",
+ module: THIS_MODULE,
+ ctr: stripe_ctr,
+ dtr: stripe_dtr,
+ map: stripe_map,
+};
+
+static int __init stripe_init(void)
+{
+ int r;
+
+ if ((r = dm_register_target(&stripe_target)) < 0)
+ WARN("linear target register failed");
+
+ return r;
+}
+
+static void __exit stripe_exit(void)
+{
+ if (dm_unregister_target(&stripe_target))
+ WARN("striped target unregister failed");
+}
+
+module_init(stripe_init);
+module_exit(stripe_exit);
+
+MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
+MODULE_DESCRIPTION("Device Mapper: Striped mapping");
+MODULE_LICENSE("GPL");
--- /dev/null
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/blkdev.h>
+
+
+/* ceiling(n / size) * size */
+static inline ulong round_up(ulong n, ulong size)
+{
+ ulong r = n % size;
+ return n + (r ? (size - r) : 0);
+}
+
+/* ceiling(n / size) */
+static inline ulong div_up(ulong n, ulong size)
+{
+ return round_up(n, size) / size;
+}
+
+/* similar to ceiling(log_size(n)) */
+static uint int_log(ulong n, ulong base)
+{
+ int result = 0;
+
+ while (n > 1) {
+ n = div_up(n, base);
+ result++;
+ }
+
+ return result;
+}
+
+/*
+ * return the highest key that you could lookup
+ * from the n'th node on level l of the btree.
+ */
+static offset_t high(struct dm_table *t, int l, int n)
+{
+ for (; l < t->depth - 1; l++)
+ n = get_child(n, CHILDREN_PER_NODE - 1);
+
+ if (n >= t->counts[l])
+ return (offset_t) -1;
+
+ return get_node(t, l, n)[KEYS_PER_NODE - 1];
+}
+
+/*
+ * fills in a level of the btree based on the
+ * highs of the level below it.
+ */
+static int setup_btree_index(int l, struct dm_table *t)
+{
+ int n, k;
+ offset_t *node;
+
+ for (n = 0; n < t->counts[l]; n++) {
+ node = get_node(t, l, n);
+
+ for (k = 0; k < KEYS_PER_NODE; k++)
+ node[k] = high(t, l + 1, get_child(n, k));
+ }
+
+ return 0;
+}
+
+/*
+ * highs, and targets are managed as dynamic
+ * arrays during a table load.
+ */
+static int alloc_targets(struct dm_table *t, int num)
+{
+ offset_t *n_highs;
+ struct target *n_targets;
+ int n = t->num_targets;
+ int size = (sizeof(struct target) + sizeof(offset_t)) * num;
+
+ n_highs = vmalloc(size);
+ if (!n_highs)
+ return -ENOMEM;
+
+ n_targets = (struct target *) (n_highs + num);
+
+ if (n) {
+ memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
+ memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
+ }
+
+ memset(n_highs + n , -1, sizeof(*n_highs) * (num - n));
+ vfree(t->highs);
+
+ t->num_allocated = num;
+ t->highs = n_highs;
+ t->targets = n_targets;
+
+ return 0;
+}
+
+struct dm_table *dm_table_create(void)
+{
+ struct dm_table *t = kmalloc(sizeof(struct dm_table), GFP_NOIO);
+
+ if (!t)
+ return ERR_PTR(-ENOMEM);
+
+ memset(t, 0, sizeof(*t));
+ INIT_LIST_HEAD(&t->devices);
+
+ /* allocate a single nodes worth of targets to
+ begin with */
+ if (alloc_targets(t, KEYS_PER_NODE)) {
+ kfree(t);
+ t = ERR_PTR(-ENOMEM);
+ }
+
+ return t;
+}
+
+static void free_devices(struct list_head *devices)
+{
+ struct list_head *tmp, *next;
+
+ for (tmp = devices->next; tmp != devices; tmp = next) {
+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+ next = tmp->next;
+ kfree(dd);
+ }
+}
+
+void dm_table_destroy(struct dm_table *t)
+{
+ int i;
+
+ /* free the indexes (see dm_table_complete) */
+ if (t->depth >= 2)
+ vfree(t->index[t->depth - 2]);
+
+ /* free the targets */
+ for (i = 0; i < t->num_targets; i++) {
+ struct target *tgt = &t->targets[i];
+
+ if (tgt->type->dtr)
+ tgt->type->dtr(t, tgt->private);
+
+ dm_put_target_type(t->targets[i].type);
+ }
+
+ vfree(t->highs);
+
+ /* free the device list */
+ if (t->devices.next != &t->devices) {
+ WARN("there are still devices present, someone isn't "
+ "calling dm_table_remove_device");
+
+ free_devices(&t->devices);
+ }
+
+ kfree(t);
+}
+
+/*
+ * Checks to see if we need to extend
+ * highs or targets.
+ */
+static inline int check_space(struct dm_table *t)
+{
+ if (t->num_targets >= t->num_allocated)
+ return alloc_targets(t, t->num_allocated * 2);
+
+ return 0;
+}
+
+
+/*
+ * convert a device path to a kdev_t.
+ */
+int lookup_device(const char *path, kdev_t *dev)
+{
+ int r;
+ struct nameidata nd;
+ struct inode *inode;
+
+ if (!path_init(path, LOOKUP_FOLLOW, &nd))
+ return 0;
+
+ if ((r = path_walk(path, &nd)))
+ goto bad;
+
+ inode = nd.dentry->d_inode;
+ if (!inode) {
+ r = -ENOENT;
+ goto bad;
+ }
+
+ if (!S_ISBLK(inode->i_mode)) {
+ r = -EINVAL;
+ goto bad;
+ }
+
+ *dev = inode->i_rdev;
+
+ bad:
+ path_release(&nd);
+ return r;
+}
+
+/*
+ * see if we've already got a device in the list.
+ */
+static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
+{
+ struct list_head *tmp;
+
+ list_for_each(tmp, l) {
+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+ if (dd->dev == dev)
+ return dd;
+ }
+
+ return NULL;
+}
+
+/*
+ * open a device so we can use it as a map
+ * destination.
+ */
+static int open_dev(struct dm_dev *d)
+{
+ int err;
+
+ if (d->bd)
+ BUG();
+
+ if (!(d->bd = bdget(kdev_t_to_nr(d->dev))))
+ return -ENOMEM;
+
+ if ((err = blkdev_get(d->bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE)))
+ return err;
+
+ return 0;
+}
+
+/*
+ * close a device that we've been using.
+ */
+static void close_dev(struct dm_dev *d)
+{
+ if (!d->bd)
+ return;
+
+ blkdev_put(d->bd, BDEV_FILE);
+ d->bd = NULL;
+}
+
+/*
+ * If possible (ie. blk_size[major] is set), this
+ * checks an area of a destination device is
+ * valid.
+ */
+static int check_device_area(kdev_t dev, offset_t start, offset_t len)
+{
+ int *sizes;
+ offset_t dev_size;
+
+ if (!(sizes = blk_size[MAJOR(dev)]) || !(dev_size = sizes[MINOR(dev)]))
+ /* we don't know the device details,
+ * so give the benefit of the doubt */
+ return 1;
+
+ /* convert to 512-byte sectors */
+ dev_size <<= 1;
+
+ return ((start < dev_size) && (len <= (dev_size - start)));
+}
+
+/*
+ * add a device to the list, or just increment the
+ * usage count if it's already present.
+ */
+int dm_table_get_device(struct dm_table *t, const char *path,
+ offset_t start, offset_t len,
+ struct dm_dev **result)
+{
+ int r;
+ kdev_t dev;
+ struct dm_dev *dd;
+
+ /* convert the path to a device */
+ if ((r = lookup_device(path, &dev)))
+ return r;
+
+ dd = find_device(&t->devices, dev);
+ if (!dd) {
+ dd = kmalloc(sizeof(*dd), GFP_KERNEL);
+ if (!dd)
+ return -ENOMEM;
+
+ dd->dev = dev;
+ dd->bd = 0;
+
+ if ((r = open_dev(dd))) {
+ kfree(dd);
+ return r;
+ }
+
+ atomic_set(&dd->count, 0);
+ list_add(&dd->list, &t->devices);
+ }
+ atomic_inc(&dd->count);
+
+ if (!check_device_area(dd->dev, start, len)) {
+ WARN("device '%s' not large enough for target", path);
+ dm_table_put_device(t, dd);
+ return -EINVAL;
+ }
+
+ *result = dd;
+
+ return 0;
+}
+
+/*
+ * decrement a devices use count and remove it if
+ * neccessary.
+ */
+void dm_table_put_device(struct dm_table *t, struct dm_dev *dd)
+{
+ if (atomic_dec_and_test(&dd->count)) {
+ close_dev(dd);
+ list_del(&dd->list);
+ kfree(dd);
+ }
+}
+
+/*
+ * adds a target to the map
+ */
+int dm_table_add_target(struct dm_table *t, offset_t high,
+ struct target_type *type, void *private)
+{
+ int r, n;
+
+ if ((r = check_space(t)))
+ return r;
+
+ n = t->num_targets++;
+ t->highs[n] = high;
+ t->targets[n].type = type;
+ t->targets[n].private = private;
+
+ return 0;
+}
+
+
+static int setup_indexes(struct dm_table *t)
+{
+ int i, total = 0;
+ offset_t *indexes;
+
+ /* allocate the space for *all* the indexes */
+ for (i = t->depth - 2; i >= 0; i--) {
+ t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE);
+ total += t->counts[i];
+ }
+
+ if (!(indexes = vmalloc(NODE_SIZE * total)))
+ return -ENOMEM;
+
+ /* set up internal nodes, bottom-up */
+ for (i = t->depth - 2, total = 0; i >= 0; i--) {
+ t->index[i] = indexes;
+ indexes += (KEYS_PER_NODE * t->counts[i]);
+ setup_btree_index(i, t);
+ }
+
+ return 0;
+}
+
+
+/*
+ * builds the btree to index the map
+ */
+int dm_table_complete(struct dm_table *t)
+{
+ int leaf_nodes, r = 0;
+
+ /* how many indexes will the btree have ? */
+ leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE);
+ t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
+
+ /* leaf layer has already been set up */
+ t->counts[t->depth - 1] = leaf_nodes;
+ t->index[t->depth - 1] = t->highs;
+
+ if (t->depth >= 2)
+ r = setup_indexes(t);
+
+ return r;
+}
+
+EXPORT_SYMBOL(dm_table_get_device);
+EXPORT_SYMBOL(dm_table_put_device);
--- /dev/null
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+#include <linux/kmod.h>
+
+struct tt_internal {
+ struct target_type tt;
+
+ struct list_head list;
+ long use;
+};
+
+static LIST_HEAD(_targets);
+static rwlock_t _lock = RW_LOCK_UNLOCKED;
+
+#define DM_MOD_NAME_SIZE 32
+
+static inline struct tt_internal *__find_target_type(const char *name)
+{
+ struct list_head *tmp;
+ struct tt_internal *ti;
+
+ list_for_each(tmp, &_targets) {
+ ti = list_entry(tmp, struct tt_internal, list);
+
+ if (!strcmp(name, ti->tt.name))
+ return ti;
+ }
+
+ return NULL;
+}
+
+static struct tt_internal *get_target_type(const char *name)
+{
+ struct tt_internal *ti;
+
+ read_lock(&_lock);
+ ti = __find_target_type(name);
+
+ if (ti) {
+ if (ti->use == 0 && ti->tt.module)
+ __MOD_INC_USE_COUNT(ti->tt.module);
+ ti->use++;
+ }
+ read_unlock(&_lock);
+
+ return ti;
+}
+
+static void load_module(const char *name)
+{
+ char module_name[DM_MOD_NAME_SIZE] = "dm-";
+
+ /* Length check for strcat() below */
+ if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
+ return;
+
+ strcat(module_name, name);
+ request_module(module_name);
+}
+
+struct target_type *dm_get_target_type(const char *name)
+{
+ struct tt_internal *ti = get_target_type(name);
+
+ if (!ti) {
+ load_module(name);
+ ti = get_target_type(name);
+ }
+
+ return ti ? &ti->tt : NULL;
+}
+
+void dm_put_target_type(struct target_type *t)
+{
+ struct tt_internal *ti = (struct tt_internal *) t;
+
+ read_lock(&_lock);
+ if (--ti->use == 0 && ti->tt.module)
+ __MOD_DEC_USE_COUNT(ti->tt.module);
+
+ if (ti->use < 0)
+ BUG();
+ read_unlock(&_lock);
+}
+
+static struct tt_internal *alloc_target(struct target_type *t)
+{
+ struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+
+ if (ti) {
+ memset(ti, 0, sizeof(*ti));
+ ti->tt = *t;
+ }
+
+ return ti;
+}
+
+int dm_register_target(struct target_type *t)
+{
+ int rv = 0;
+ struct tt_internal *ti = alloc_target(t);
+
+ if (!ti)
+ return -ENOMEM;
+
+ write_lock(&_lock);
+ if (__find_target_type(t->name))
+ rv = -EEXIST;
+ else
+ list_add(&ti->list, &_targets);
+
+ write_unlock(&_lock);
+ return rv;
+}
+
+int dm_unregister_target(struct target_type *t)
+{
+ struct tt_internal *ti;
+
+ write_lock(&_lock);
+ if (!(ti = __find_target_type(t->name))) {
+ write_unlock(&_lock);
+ return -EINVAL;
+ }
+
+ if (ti->use) {
+ write_unlock(&_lock);
+ return -ETXTBSY;
+ }
+
+ list_del(&ti->list);
+ kfree(ti);
+
+ write_unlock(&_lock);
+ return 0;
+}
+
+/*
+ * io-err: always fails an io, useful for bringing
+ * up LV's that have holes in them.
+ */
+static int io_err_ctr(struct dm_table *t, offset_t b, offset_t l,
+ char *args, void **context)
+{
+ *context = NULL;
+ return 0;
+}
+
+static void io_err_dtr(struct dm_table *t, void *c)
+{
+ /* empty */
+}
+
+static int io_err_map(struct buffer_head *bh, int rw, void *context)
+{
+ buffer_IO_error(bh);
+ return 0;
+}
+
+static struct target_type error_target = {
+ name: "error",
+ ctr: io_err_ctr,
+ dtr: io_err_dtr,
+ map: io_err_map
+};
+
+
+int dm_target_init(void)
+{
+ return dm_register_target(&error_target);
+}
+
+EXPORT_SYMBOL(dm_register_target);
+EXPORT_SYMBOL(dm_unregister_target);
+
--- /dev/null
+/*
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/blk.h>
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/kmod.h>
+
+/* we only need this for the lv_bmap struct definition, not happy */
+#include <linux/lvm.h>
+
+#define MAX_DEVICES 64
+#define DEFAULT_READ_AHEAD 64
+#define DEVICE_NAME "device-mapper"
+
+static const char *_name = DEVICE_NAME;
+static int _version[3] = {0, 1, 0};
+static int major = 0;
+
+struct io_hook {
+ struct mapped_device *md;
+ struct target *target;
+ int rw;
+
+ void (*end_io)(struct buffer_head * bh, int uptodate);
+ void *context;
+};
+
+static kmem_cache_t *_io_hook_cache;
+
+#define rl down_read(&_dev_lock)
+#define ru up_read(&_dev_lock)
+#define wl down_write(&_dev_lock)
+#define wu up_write(&_dev_lock)
+
+static struct rw_semaphore _dev_lock;
+static struct mapped_device *_devs[MAX_DEVICES];
+
+/* block device arrays */
+static int _block_size[MAX_DEVICES];
+static int _blksize_size[MAX_DEVICES];
+static int _hardsect_size[MAX_DEVICES];
+
+static devfs_handle_t _dev_dir;
+
+static int request(request_queue_t *q, int rw, struct buffer_head *bh);
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
+
+/*
+ * setup and teardown the driver
+ */
+static int __init dm_init(void)
+{
+ int ret = -ENOMEM;
+
+ init_rwsem(&_dev_lock);
+
+ _io_hook_cache = kmem_cache_create("dm io hooks",
+ sizeof(struct io_hook),
+ 0, 0, NULL, NULL);
+
+ if (!_io_hook_cache)
+ goto err;
+
+ ret = dm_target_init();
+ if (ret < 0)
+ goto err_cache_free;
+
+ ret = dm_interface_init();
+ if (ret < 0)
+ goto err_cache_free;
+
+ ret = devfs_register_blkdev(major, _name, &dm_blk_dops);
+ if (ret < 0)
+ goto err_blkdev;
+
+ if (major == 0)
+ major = ret;
+
+ /* set up the arrays */
+ read_ahead[major] = DEFAULT_READ_AHEAD;
+ blk_size[major] = _block_size;
+ blksize_size[major] = _blksize_size;
+ hardsect_size[major] = _hardsect_size;
+
+ blk_queue_make_request(BLK_DEFAULT_QUEUE(major), request);
+
+ _dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
+
+ printk(KERN_INFO "%s %d.%d.%d initialised\n", _name,
+ _version[0], _version[1], _version[2]);
+ return 0;
+
+err_blkdev:
+ printk(KERN_ERR "%s -- register_blkdev failed\n", _name);
+ dm_interface_exit();
+err_cache_free:
+ kmem_cache_destroy(_io_hook_cache);
+err:
+ return ret;
+}
+
+static void __exit dm_exit(void)
+{
+ dm_interface_exit();
+
+ if (kmem_cache_destroy(_io_hook_cache))
+ WARN("it looks like there are still some io_hooks allocated");
+
+ _io_hook_cache = NULL;
+
+ if (devfs_unregister_blkdev(major, _name) < 0)
+ printk(KERN_ERR "%s -- unregister_blkdev failed\n", _name);
+
+ read_ahead[major] = 0;
+ blk_size[major] = NULL;
+ blksize_size[major] = NULL;
+ hardsect_size[major] = NULL;
+
+ printk(KERN_INFO "%s %d.%d.%d cleaned up\n", _name,
+ _version[0], _version[1], _version[2]);
+}
+
+/*
+ * block device functions
+ */
+static int dm_blk_open(struct inode *inode, struct file *file)
+{
+ int minor = MINOR(inode->i_rdev);
+ struct mapped_device *md;
+
+ if (minor >= MAX_DEVICES)
+ return -ENXIO;
+
+ wl;
+ md = _devs[minor];
+
+ if (!md) {
+ wu;
+ return -ENXIO;
+ }
+
+ md->use_count++;
+ wu;
+
+ return 0;
+}
+
+static int dm_blk_close(struct inode *inode, struct file *file)
+{
+ int minor = MINOR(inode->i_rdev);
+ struct mapped_device *md;
+
+ if (minor >= MAX_DEVICES)
+ return -ENXIO;
+
+ wl;
+ md = _devs[minor];
+ if (!md || md->use_count < 1) {
+ WARN("reference count in mapped_device incorrect");
+ wu;
+ return -ENXIO;
+ }
+
+ md->use_count--;
+ wu;
+
+ return 0;
+}
+
+/* In 512-byte units */
+#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1)
+
+static int dm_blk_ioctl(struct inode *inode, struct file *file,
+ uint command, ulong a)
+{
+ int minor = MINOR(inode->i_rdev);
+ long size;
+
+ if (minor >= MAX_DEVICES)
+ return -ENXIO;
+
+ switch (command) {
+ case BLKSSZGET:
+ case BLKBSZGET:
+ case BLKROGET:
+ case BLKROSET:
+ case BLKRASET:
+ case BLKRAGET:
+ case BLKFLSBUF:
+#if 0
+ case BLKELVSET:
+ case BLKELVGET:
+#endif
+ return blk_ioctl(inode->i_rdev, command, a);
+ break;
+
+ case BLKGETSIZE:
+ size = VOLUME_SIZE(minor);
+ if (copy_to_user((void *) a, &size, sizeof (long)))
+ return -EFAULT;
+ break;
+
+ case BLKGETSIZE64:
+ size = VOLUME_SIZE(minor);
+ if (put_user((u64)size, (u64 *)a))
+ return -EFAULT;
+ break;
+
+ case BLKRRPART:
+ return -EINVAL;
+
+ case LV_BMAP:
+ return dm_user_bmap(inode, (struct lv_bmap *) a);
+
+ default:
+ WARN("unknown block ioctl %d", command);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static inline struct io_hook *alloc_io_hook(void)
+{
+ return kmem_cache_alloc(_io_hook_cache, GFP_NOIO);
+}
+
+static inline void free_io_hook(struct io_hook *ih)
+{
+ kmem_cache_free(_io_hook_cache, ih);
+}
+
+/*
+ * FIXME: need to decide if deferred_io's need
+ * their own slab, I say no for now since they are
+ * only used when the device is suspended.
+ */
+static inline struct deferred_io *alloc_deferred(void)
+{
+ return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
+}
+
+static inline void free_deferred(struct deferred_io *di)
+{
+ kfree(di);
+}
+
+/*
+ * call a targets optional error function if
+ * an io failed.
+ */
+static inline int call_err_fn(struct io_hook *ih, struct buffer_head *bh)
+{
+ dm_err_fn err = ih->target->type->err;
+ if (err)
+ return err(bh, ih->rw, ih->target->private);
+
+ return 0;
+}
+
+/*
+ * bh->b_end_io routine that decrements the
+ * pending count and then calls the original
+ * bh->b_end_io fn.
+ */
+static void dec_pending(struct buffer_head *bh, int uptodate)
+{
+ struct io_hook *ih = bh->b_private;
+
+ if (!uptodate && call_err_fn(ih, bh))
+ return;
+
+ if (atomic_dec_and_test(&ih->md->pending))
+ /* nudge anyone waiting on suspend queue */
+ wake_up(&ih->md->wait);
+
+ bh->b_end_io = ih->end_io;
+ bh->b_private = ih->context;
+ free_io_hook(ih);
+
+ bh->b_end_io(bh, uptodate);
+}
+
+/*
+ * add the bh to the list of deferred io.
+ */
+static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
+{
+ struct deferred_io *di = alloc_deferred();
+
+ if (!di)
+ return -ENOMEM;
+
+ wl;
+ if (!md->suspended) {
+ wu;
+ return 0;
+ }
+
+ di->bh = bh;
+ di->rw = rw;
+ di->next = md->deferred;
+ md->deferred = di;
+ wu;
+
+ return 1;
+}
+
+/*
+ * do the bh mapping for a given leaf
+ */
+static inline int __map_buffer(struct mapped_device *md,
+ struct buffer_head *bh, int rw, int leaf)
+{
+ int r;
+ dm_map_fn fn;
+ void *context;
+ struct io_hook *ih = NULL;
+ struct target *ti = md->map->targets + leaf;
+
+ fn = ti->type->map;
+ context = ti->private;
+
+ ih = alloc_io_hook();
+
+ if (!ih)
+ return 0;
+
+ ih->md = md;
+ ih->rw = rw;
+ ih->target = ti;
+ ih->end_io = bh->b_end_io;
+ ih->context = bh->b_private;
+
+ r = fn(bh, rw, context);
+
+ if (r > 0) {
+ /* hook the end io request fn */
+ atomic_inc(&md->pending);
+ bh->b_end_io = dec_pending;
+ bh->b_private = ih;
+
+ } else if (r == 0)
+ /* we don't need to hook */
+ free_io_hook(ih);
+
+ else if (r < 0) {
+ free_io_hook(ih);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * search the btree for the correct target.
+ */
+static inline int __find_node(struct dm_table *t, struct buffer_head *bh)
+{
+ int l, n = 0, k = 0;
+ offset_t *node;
+
+ for (l = 0; l < t->depth; l++) {
+ n = get_child(n, k);
+ node = get_node(t, l, n);
+
+ for (k = 0; k < KEYS_PER_NODE; k++)
+ if (node[k] >= bh->b_rsector)
+ break;
+ }
+
+ return (KEYS_PER_NODE * n) + k;
+}
+
+static int request(request_queue_t *q, int rw, struct buffer_head *bh)
+{
+ struct mapped_device *md;
+ int r, minor = MINOR(bh->b_rdev);
+
+ if (minor >= MAX_DEVICES)
+ goto bad_no_lock;
+
+ rl;
+ md = _devs[minor];
+
+ if (!md)
+ goto bad;
+
+ /*
+ * If we're suspended we have to queue
+ * this io for later.
+ */
+ while (md->suspended) {
+ ru;
+
+ if (rw == READA)
+ goto bad_no_lock;
+
+ r = queue_io(md, bh, rw);
+
+ if (r < 0)
+ goto bad_no_lock;
+
+ else if (r > 0)
+ return 0; /* deferred successfully */
+
+ /*
+ * We're in a while loop, because
+ * someone could suspend before we
+ * get to the following read
+ * lock
+ */
+ rl;
+ }
+
+ if (!__map_buffer(md, bh, rw, __find_node(md->map, bh)))
+ goto bad;
+
+ ru;
+ return 1;
+
+ bad:
+ ru;
+
+ bad_no_lock:
+ buffer_IO_error(bh);
+ return 0;
+}
+
+static int check_dev_size(int minor, unsigned long block)
+{
+ /* FIXME: check this */
+ unsigned long max_sector = (_block_size[minor] << 1) + 1;
+ unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9);
+
+ return (sector > max_sector) ? 0 : 1;
+}
+
+/*
+ * creates a dummy buffer head and maps it (for lilo).
+ */
+static int do_bmap(kdev_t dev, unsigned long block,
+ kdev_t *r_dev, unsigned long *r_block)
+{
+ struct mapped_device *md;
+ struct buffer_head bh;
+ int minor = MINOR(dev), r;
+ struct target *t;
+
+ rl;
+ if ((minor >= MAX_DEVICES) || !(md = _devs[minor]) || md->suspended) {
+ r = -ENXIO;
+ goto out;
+ }
+
+ if (!check_dev_size(minor, block)) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* setup dummy bh */
+ memset(&bh, 0, sizeof(bh));
+ bh.b_blocknr = block;
+ bh.b_dev = bh.b_rdev = dev;
+ bh.b_size = _blksize_size[minor];
+ bh.b_rsector = block * (bh.b_size >> 9);
+
+ /* find target */
+ t = md->map->targets + __find_node(md->map, &bh);
+
+ /* do the mapping */
+ r = t->type->map(&bh, READ, t->private);
+
+ *r_dev = bh.b_rdev;
+ *r_block = bh.b_rsector / (bh.b_size >> 9);
+
+ out:
+ ru;
+ return r;
+}
+
+/*
+ * marshals arguments and results between user and
+ * kernel space.
+ */
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
+{
+ unsigned long block, r_block;
+ kdev_t r_dev;
+ int r;
+
+ if (get_user(block, &lvb->lv_block))
+ return -EFAULT;
+
+ if ((r = do_bmap(inode->i_rdev, block, &r_dev, &r_block)))
+ return r;
+
+ if (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
+ put_user(r_block, &lvb->lv_block))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * see if the device with a specific minor # is
+ * free.
+ */
+static inline int __specific_dev(int minor)
+{
+ if (minor > MAX_DEVICES) {
+ WARN("request for a mapped_device > than MAX_DEVICES");
+ return 0;
+ }
+
+ if (!_devs[minor])
+ return minor;
+
+ return -1;
+}
+
+/*
+ * find the first free device.
+ */
+static inline int __any_old_dev(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_DEVICES; i++)
+ if (!_devs[i])
+ return i;
+
+ return -1;
+}
+
+/*
+ * allocate and initialise a blank device.
+ */
+static struct mapped_device *alloc_dev(int minor)
+{
+ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+
+ if (!md)
+ return 0;
+
+ memset(md, 0, sizeof (*md));
+
+ wl;
+ minor = (minor < 0) ? __any_old_dev() : __specific_dev(minor);
+
+ if (minor < 0) {
+ WARN("no free devices available");
+ wu;
+ kfree(md);
+ return 0;
+ }
+
+ md->dev = MKDEV(major, minor);
+ md->name[0] = '\0';
+ md->suspended = 0;
+
+ init_waitqueue_head(&md->wait);
+
+ _devs[minor] = md;
+ wu;
+
+ return md;
+}
+
+static void free_dev(struct mapped_device *md)
+{
+ kfree(md);
+}
+
+static int register_device(struct mapped_device *md)
+{
+ md->devfs_entry =
+ devfs_register(_dev_dir, md->name, DEVFS_FL_CURRENT_OWNER,
+ MAJOR(md->dev), MINOR(md->dev),
+ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
+ &dm_blk_dops, NULL);
+
+ return 0;
+}
+
+static int unregister_device(struct mapped_device *md)
+{
+ devfs_unregister(md->devfs_entry);
+ return 0;
+}
+
+/*
+ * the hardsect size for a mapped device is the
+ * smallest hard sect size from the devices it
+ * maps onto.
+ */
+static int __find_hardsect_size(struct list_head *devices)
+{
+ int result = INT_MAX, size;
+ struct list_head *tmp;
+
+ list_for_each(tmp, devices) {
+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+ size = get_hardsect_size(dd->dev);
+ if (size < result)
+ result = size;
+ }
+ return result;
+}
+
+/*
+ * Bind a table to the device.
+ */
+static int __bind(struct mapped_device *md, struct dm_table *t)
+{
+ int minor = MINOR(md->dev);
+
+ md->map = t;
+
+ if (!t->num_targets) {
+ _block_size[minor] = 0;
+ _blksize_size[minor] = BLOCK_SIZE;
+ _hardsect_size[minor] = 0;
+ return 0;
+ }
+
+ /* in k */
+ _block_size[minor] = (t->highs[t->num_targets - 1] + 1) >> 1;
+
+ _blksize_size[minor] = BLOCK_SIZE;
+ _hardsect_size[minor] = __find_hardsect_size(&t->devices);
+ register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]);
+
+ return 0;
+}
+
+static void __unbind(struct mapped_device *md)
+{
+ int minor = MINOR(md->dev);
+
+ dm_table_destroy(md->map);
+ md->map = NULL;
+
+ _block_size[minor] = 0;
+ _blksize_size[minor] = 0;
+ _hardsect_size[minor] = 0;
+}
+
+
+static struct mapped_device *__get_by_name(const char *name)
+{
+ int i;
+
+ for (i = 0; i < MAX_DEVICES; i++)
+ if (_devs[i] && !strcmp(_devs[i]->name, name))
+ return _devs[i];
+
+ return NULL;
+}
+
+static int check_name(const char *name)
+{
+ if (strchr(name, '/')) {
+ WARN("invalid device name");
+ return 0;
+ }
+
+ if (__get_by_name(name)) {
+ WARN("device name already in use");
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * constructor for a new device
+ */
+struct mapped_device *dm_create(const char *name, int minor,
+ struct dm_table *table)
+{
+ int r;
+ struct mapped_device *md;
+
+ if (minor >= MAX_DEVICES)
+ return ERR_PTR(-ENXIO);
+
+ if (!(md = alloc_dev(minor)))
+ return ERR_PTR(-ENXIO);
+
+ wl;
+ if (!check_name(name)) {
+ wu;
+ free_dev(md);
+ return ERR_PTR(-EINVAL);
+ }
+
+ strcpy(md->name, name);
+ _devs[minor] = md;
+ if ((r = register_device(md))) {
+ wu;
+ free_dev(md);
+ return ERR_PTR(r);
+ }
+
+ if ((r = __bind(md, table))) {
+ wu;
+ free_dev(md);
+ return ERR_PTR(r);
+ }
+ wu;
+
+ return md;
+}
+
+/*
+ * Destructor for the device. You cannot destroy
+ * a suspended device.
+ */
+int dm_destroy(struct mapped_device *md)
+{
+ int minor, r;
+
+ rl;
+ if (md->suspended || md->use_count) {
+ ru;
+ return -EPERM;
+ }
+
+ fsync_dev(md->dev);
+ ru;
+
+ wl;
+ if (md->use_count) {
+ wu;
+ return -EPERM;
+ }
+
+ if ((r = unregister_device(md))) {
+ wu;
+ return r;
+ }
+
+ minor = MINOR(md->dev);
+ _devs[minor] = 0;
+ __unbind(md);
+
+ wu;
+
+ free_dev(md);
+
+ return 0;
+}
+
+
+/*
+ * requeue the deferred buffer_heads by calling
+ * generic_make_request.
+ */
+static void flush_deferred_io(struct deferred_io *c)
+{
+ struct deferred_io *n;
+
+ while (c) {
+ n = c->next;
+ generic_make_request(c->rw, c->bh);
+ free_deferred(c);
+ c = n;
+ }
+}
+
+/*
+ * Swap in a new table (destroying old one).
+ */
+int dm_swap_table(struct mapped_device *md, struct dm_table *table)
+{
+ int r;
+
+ wl;
+
+ /* device must be suspended */
+ if (!md->suspended) {
+ wu;
+ return -EPERM;
+ }
+
+ __unbind(md);
+
+ if ((r = __bind(md, table))) {
+ wu;
+ return r;
+ }
+
+ wu;
+
+ return 0;
+}
+
+
+/*
+ * We need to be able to change a mapping table
+ * under a mounted filesystem. for example we
+ * might want to move some data in the background.
+ * Before the table can be swapped with
+ * dm_bind_table, dm_suspend must be called to
+ * flush any in flight buffer_heads and ensure
+ * that any further io gets deferred.
+ */
+int dm_suspend(struct mapped_device *md)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ wl;
+ if (md->suspended) {
+ wu;
+ return -EINVAL;
+ }
+
+ md->suspended = 1;
+ wu;
+
+ /* wait for all the pending io to flush */
+ add_wait_queue(&md->wait, &wait);
+ current->state = TASK_UNINTERRUPTIBLE;
+ do {
+ wl;
+ if (!atomic_read(&md->pending))
+ break;
+
+ wu;
+ schedule();
+
+ } while (1);
+
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&md->wait, &wait);
+ wu;
+
+ return 0;
+}
+
+int dm_resume(struct mapped_device *md)
+{
+ struct deferred_io *def;
+
+ wl;
+ if (!md->suspended) {
+ wu;
+ return -EINVAL;
+ }
+
+ md->suspended = 0;
+ def = md->deferred;
+ md->deferred = NULL;
+ wu;
+
+ flush_deferred_io(def);
+
+ return 0;
+}
+
+/*
+ * Search for a device with a particular name.
+ */
+struct mapped_device *dm_get(const char *name)
+{
+ struct mapped_device *md;
+
+ rl;
+ md = __get_by_name(name);
+ ru;
+
+ return md;
+}
+
+struct block_device_operations dm_blk_dops = {
+ open: dm_blk_open,
+ release: dm_blk_close,
+ ioctl: dm_blk_ioctl,
+ owner: THIS_MODULE,
+};
+
+/*
+ * module hooks
+ */
+module_init(dm_init);
+module_exit(dm_exit);
+
+MODULE_PARM(major, "i");
+MODULE_PARM_DESC(major, "The major number of the device mapper");
+MODULE_DESCRIPTION("device-mapper driver");
+MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
+MODULE_LICENSE("GPL");
+
--- /dev/null
+/*
+ * dm.h
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This file is released under the GPL.
+ */
+
+/*
+ * Internal header file for device mapper
+ *
+ * Changelog
+ *
+ * 16/08/2001 - First version [Joe Thornber]
+ */
+
+/*
+ * This driver attempts to provide a generic way of specifying logical
+ * devices which are mapped onto other devices.
+ *
+ * It does this by mapping sections of the logical device onto 'targets'.
+ *
+ * When the logical device is accessed the make_request function looks up
+ * the correct target for the given sector, and then asks this target
+ * to do the remapping.
+ *
+ * (dm-table.c) A btree like structure is used to hold the sector
+ * range -> target mapping. Because we know all the entries in the
+ * btree in advance we can make a very compact tree, omitting pointers
+ * to child nodes, (child nodes locations can be calculated). Each
+ * node of the btree is 1 level cache line in size, this gives a small
+ * performance boost.
+ *
+ * A userland test program for the btree gave the following results on a
+ * 1 Gigahertz Athlon machine:
+ *
+ * entries in btree lookups per second
+ * ---------------- ------------------
+ * 5 25,000,000
+ * 1000 7,700,000
+ * 10,000,000 3,800,000
+ *
+ * Of course these results should be taken with a pinch of salt; the
+ * lookups were sequential and there were no other applications (other
+ * than X + emacs) running to give any pressure on the level 1 cache.
+ *
+ * Typical LVM users would find they have very few targets for each
+ * LV (probably less than 10).
+ *
+ * (dm-target.c) Target types are not hard coded, instead the
+ * register_mapping_type function should be called. A target type is
+ * specified using three functions (see the header):
+ *
+ * dm_ctr_fn - takes a string and contructs a target specific piece of
+ * context data.
+ * dm_dtr_fn - destroy contexts.
+ * dm_map_fn - function that takes a buffer_head and some previously
+ * constructed context and performs the remapping.
+ *
+ * Currently there are two two trivial mappers, which are
+ * automatically registered: 'linear', and 'io_error'. Linear alone
+ * is enough to implement most LVM features (omitting striped volumes
+ * and snapshots).
+ *
+ * (dm-fs.c) The driver is controlled through a /proc interface:
+ * /proc/device-mapper/control allows you to create and remove devices
+ * by 'cat'ing a line of the following format:
+ *
+ * create <device name> [minor no]
+ * remove <device name>
+ *
+ * /proc/device-mapper/<device name> accepts the mapping table:
+ *
+ * begin
+ * <sector start> <length> <target name> <target args>...
+ * ...
+ * end
+ *
+ * The begin/end lines are nasty, they should be handled by open/close
+ * for the file.
+ *
+ * At the moment the table assumes 32 bit keys (sectors), the move to
+ * 64 bits will involve no interface changes, since the tables will be
+ * read in as ascii data. A different table implementation can
+ * therefor be provided at another time. Either just by changing offset_t
+ * to 64 bits, or maybe implementing a structure which looks up the keys in
+ * stages (ie, 32 bits at a time).
+ *
+ * More interesting targets:
+ *
+ * striped mapping; given a stripe size and a number of device regions
+ * this would stripe data across the regions. Especially useful, since
+ * we could limit each striped region to a 32 bit area and then avoid
+ * nasty 64 bit %'s.
+ *
+ * mirror mapping (reflector ?); would set off a kernel thread slowly
+ * copying data from one region to another, ensuring that any new
+ * writes got copied to both destinations correctly. Great for
+ * implementing pvmove. Not sure how userland would be notified that
+ * the copying process had completed. Possibly by reading a /proc entry
+ * for the LV. Could also use poll() for this kind of thing.
+ */
+
+
+#ifndef DM_INTERNAL_H
+#define DM_INTERNAL_H
+
+#include <linux/version.h>
+#include <linux/major.h>
+#include <linux/iobuf.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/compatmac.h>
+#include <linux/cache.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/ctype.h>
+#include <linux/device-mapper.h>
+#include <linux/list.h>
+
+#define MAX_DEPTH 16
+#define NODE_SIZE L1_CACHE_BYTES
+#define KEYS_PER_NODE (NODE_SIZE / sizeof(offset_t))
+#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
+#define DM_NAME_LEN 128
+
+/*
+ * list of devices that a metadevice uses
+ * and hence should open/close.
+ */
+struct dm_dev {
+ atomic_t count;
+ struct list_head list;
+
+ kdev_t dev;
+ struct block_device *bd;
+};
+
+/*
+ * io that had to be deferred while we were
+ * suspended
+ */
+struct deferred_io {
+ int rw;
+ struct buffer_head *bh;
+ struct deferred_io *next;
+};
+
+/*
+ * btree leaf, these do the actual mapping
+ */
+struct target {
+ struct target_type *type;
+ void *private;
+};
+
+/*
+ * the btree
+ */
+struct dm_table {
+ /* btree table */
+ int depth;
+ int counts[MAX_DEPTH]; /* in nodes */
+ offset_t *index[MAX_DEPTH];
+
+ int num_targets;
+ int num_allocated;
+ offset_t *highs;
+ struct target *targets;
+
+ /* a list of devices used by this table */
+ struct list_head devices;
+};
+
+/*
+ * the actual device struct
+ */
+struct mapped_device {
+ kdev_t dev;
+ char name[DM_NAME_LEN];
+
+ int use_count;
+ int suspended;
+
+ /* a list of io's that arrived while we were suspended */
+ atomic_t pending;
+ wait_queue_head_t wait;
+ struct deferred_io *deferred;
+
+ struct dm_table *map;
+
+ /* used by dm-fs.c */
+ devfs_handle_t devfs_entry;
+};
+
+extern struct block_device_operations dm_blk_dops;
+
+
+/* dm-target.c */
+int dm_target_init(void);
+struct target_type *dm_get_target_type(const char *name);
+void dm_put_target_type(struct target_type *t);
+
+/* dm.c */
+struct mapped_device *dm_find_by_minor(int minor);
+struct mapped_device *dm_get(const char *name);
+struct mapped_device *dm_create(const char *name, int minor, struct dm_table *);int dm_destroy(struct mapped_device *md);
+int dm_swap_table(struct mapped_device *md, struct dm_table *t);
+int dm_suspend(struct mapped_device *md);
+int dm_resume(struct mapped_device *md);
+
+/* dm-table.c */
+struct dm_table *dm_table_create(void);
+void dm_table_destroy(struct dm_table *t);
+
+int dm_table_add_target(struct dm_table *t, offset_t high,
+ struct target_type *type, void *private);
+int dm_table_complete(struct dm_table *t);
+
+#define WARN(f, x...) printk(KERN_WARNING "device-mapper: " f "\n" , ## x)
+
+/*
+ * calculate the index of the child node of the
+ * n'th node k'th key.
+ */
+static inline int get_child(int n, int k)
+{
+ return (n * CHILDREN_PER_NODE) + k;
+}
+
+/*
+ * returns the n'th node of level l from table t.
+ */
+static inline offset_t *get_node(struct dm_table *t, int l, int n)
+{
+ return t->index[l] + (n * KEYS_PER_NODE);
+}
+
+int dm_interface_init(void) __init;
+void dm_interface_exit(void) __exit;
+
+#endif
--- /dev/null
+/*
+ * dmfs-error.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+struct dmfs_error {
+ struct list_head list;
+ unsigned len;
+ char *msg;
+};
+
+static struct dmfs_error oom_error;
+
+static struct list_head oom_list = {
+ next: &oom_error.list,
+ prev: &oom_error.list,
+};
+
+static struct dmfs_error oom_error = {
+ list: { next: &oom_list, prev: &oom_list },
+ len: 39,
+ msg: "Out of memory during creation of table\n",
+};
+
+void dmfs_add_error(struct inode *inode, unsigned num, char *str)
+{
+ struct dmfs_i *dmi = DMFS_I(inode);
+ int len = strlen(str) + sizeof(struct dmfs_error) + 12;
+ struct dmfs_error *e = kmalloc(len, GFP_KERNEL);
+ if (e) {
+ e->msg = (char *)(e + 1);
+ e->len = sprintf(e->msg, "%8u: %s\n", num, str);
+ list_add(&e->list, &dmi->errors);
+ }
+}
+
+void dmfs_zap_errors(struct inode *inode)
+{
+ struct dmfs_i *dmi = DMFS_I(inode);
+ struct dmfs_error *e;
+
+ while(!list_empty(&dmi->errors)) {
+ e = list_entry(dmi->errors.next, struct dmfs_error, list);
+ list_del(&e->list);
+ kfree(e);
+ }
+}
+
+static void *e_start(struct seq_file *e, loff_t *pos)
+{
+ struct list_head *p;
+ loff_t n = *pos;
+ struct dmfs_i *dmi = e->context;
+
+ down(&dmi->sem);
+ if (dmi->status) {
+ list_for_each(p, &oom_list)
+ if (n-- == 0)
+ return list_entry(p, struct dmfs_error, list);
+ } else {
+ list_for_each(p, &dmi->errors)
+ if (n-- == 0)
+ return list_entry(p, struct dmfs_error, list);
+ }
+
+ return NULL;
+}
+
+static void *e_next(struct seq_file *e, void *v, loff_t *pos)
+{
+ struct dmfs_i *dmi = e->context;
+ struct list_head *p = ((struct dmfs_error *)v)->list.next;
+ (*pos)++;
+ return (p == &dmi->errors) || (p == &oom_list) ? NULL
+ : list_entry(p, struct dmfs_error, list);
+}
+
+static void e_stop(struct seq_file *e, void *v)
+{
+ struct dmfs_i *dmi = e->context;
+ up(&dmi->sem);
+}
+
+static int show_error(struct seq_file *e, void *v)
+{
+ struct dmfs_error *d = v;
+ seq_puts(e, d->msg);
+ return 0;
+}
+
+struct seq_operations dmfs_error_seq_ops = {
+ start: e_start,
+ next: e_next,
+ stop: e_stop,
+ show: show_error,
+};
+
+
--- /dev/null
+/*
+ * dmfs-lv.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/* Heavily based upon ramfs */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+struct dmfs_inode_info {
+ const char *name;
+ struct inode *(*create)(struct inode *, int, struct seq_operations *, int);
+ struct seq_operations *seq_ops;
+ int type;
+};
+
+#define DMFS_SEQ(inode) ((struct seq_operations *)(inode)->u.generic_ip)
+
+extern struct inode *dmfs_create_table(struct inode *, int, struct seq_operations *, int);
+extern struct seq_operations dmfs_error_seq_ops;
+extern struct seq_operations dmfs_status_seq_ops;
+extern struct seq_operations dmfs_suspend_seq_ops;
+extern ssize_t dmfs_suspend_write(struct file *file, const char *buf, size_t size, loff_t *ppos);
+
+static int dmfs_seq_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, DMFS_SEQ(inode));
+ if (ret >= 0) {
+ struct seq_file *seq = file->private_data;
+ seq->context = DMFS_I(file->f_dentry->d_parent->d_inode);
+ }
+ return ret;
+}
+
+static int dmfs_no_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ return 0;
+};
+
+static struct file_operations dmfs_suspend_file_operations = {
+ open: dmfs_seq_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+ write: dmfs_suspend_write,
+ fsync: dmfs_no_fsync,
+};
+
+static struct inode_operations dmfs_null_inode_operations = {
+};
+
+static struct file_operations dmfs_seq_ro_file_operations = {
+ open: dmfs_seq_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
+ fsync: dmfs_no_fsync,
+};
+
+static struct inode *dmfs_create_seq_ro(struct inode *dir, int mode, struct seq_operations *seq_ops, int dev)
+{
+ struct inode *inode = dmfs_new_inode(dir->i_sb, mode | S_IFREG);
+ if (inode) {
+ inode->i_fop = &dmfs_seq_ro_file_operations;
+ inode->i_op = &dmfs_null_inode_operations;
+ DMFS_SEQ(inode) = seq_ops;
+ }
+ return inode;
+}
+
+static struct inode *dmfs_create_device(struct inode *dir, int mode, struct seq_operations *seq_ops, int dev)
+{
+ struct inode *inode = dmfs_new_inode(dir->i_sb, mode | S_IFBLK);
+ if (inode) {
+ init_special_inode(inode, mode | S_IFBLK, dev);
+ }
+ return inode;
+}
+
+static struct inode *dmfs_create_suspend(struct inode *dir, int mode, struct seq_operations *seq_ops, int dev)
+{
+ struct inode *inode = dmfs_create_seq_ro(dir, mode, seq_ops, dev);
+ if (inode) {
+ inode->i_fop = &dmfs_suspend_file_operations;
+ }
+ return inode;
+}
+
+static int dmfs_lv_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+
+ inode->i_mapping = &inode->i_data;
+ inode->i_nlink--;
+ return 0;
+}
+
+static struct dmfs_inode_info dmfs_ii[] = {
+ { ".", NULL, NULL, DT_DIR },
+ { "..", NULL, NULL, DT_DIR },
+ { "table", dmfs_create_table, NULL, DT_REG },
+ { "error", dmfs_create_seq_ro, &dmfs_error_seq_ops, DT_REG },
+ { "status", dmfs_create_seq_ro, &dmfs_status_seq_ops, DT_REG },
+ { "device", dmfs_create_device, NULL, DT_BLK },
+ { "suspend", dmfs_create_suspend, &dmfs_suspend_seq_ops, DT_REG },
+};
+
+#define NR_DMFS_II (sizeof(dmfs_ii)/sizeof(struct dmfs_inode_info))
+
+static struct dmfs_inode_info *dmfs_find_by_name(const char *n, int len)
+{
+ int i;
+
+ for(i = 2; i < NR_DMFS_II; i++) {
+ if (strlen(dmfs_ii[i].name) != len)
+ continue;
+ if (memcmp(dmfs_ii[i].name, n, len) == 0)
+ return &dmfs_ii[i];
+ }
+ return NULL;
+}
+
+static struct dentry *dmfs_lv_lookup(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = NULL;
+ struct dmfs_inode_info *ii;
+
+ ii = dmfs_find_by_name(dentry->d_name.name, dentry->d_name.len);
+ if (ii) {
+ int dev = kdev_t_to_nr(DMFS_I(dir)->md->dev);
+ inode = ii->create(dir, 0600, ii->seq_ops, dev);
+ }
+
+ d_add(dentry, inode);
+ return NULL;
+}
+
+static int dmfs_inum(int entry, struct dentry *dentry)
+{
+ if (entry == 0)
+ return dentry->d_inode->i_ino;
+ if (entry == 1)
+ return dentry->d_parent->d_inode->i_ino;
+
+ return entry;
+}
+
+static int dmfs_lv_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct dentry *dentry = filp->f_dentry;
+ struct dmfs_inode_info *ii;
+
+ while (filp->f_pos < NR_DMFS_II) {
+ ii = &dmfs_ii[filp->f_pos];
+ if (filldir(dirent, ii->name, strlen(ii->name), filp->f_pos,
+ dmfs_inum(filp->f_pos, dentry), ii->type) < 0)
+ break;
+ filp->f_pos++;
+ }
+
+ return 0;
+}
+
+
+static int dmfs_lv_sync(struct file *file, struct dentry *dentry, int datasync)
+{
+ return 0;
+}
+
+static struct file_operations dmfs_lv_file_operations = {
+ read: generic_read_dir,
+ readdir: dmfs_lv_readdir,
+ fsync: dmfs_lv_sync,
+};
+
+static struct inode_operations dmfs_lv_inode_operations = {
+ lookup: dmfs_lv_lookup,
+ unlink: dmfs_lv_unlink,
+};
+
+struct inode *dmfs_create_lv(struct super_block *sb, int mode, struct dentry *dentry)
+{
+ struct inode *inode = dmfs_new_private_inode(sb, mode | S_IFDIR);
+ struct mapped_device *md;
+ const char *name = dentry->d_name.name;
+ char tmp_name[DM_NAME_LEN + 1];
+ struct dm_table *table;
+ int ret = -ENOMEM;
+
+ if (inode) {
+ table = dm_table_create();
+ ret = PTR_ERR(table);
+ if (!IS_ERR(table)) {
+ ret = dm_table_complete(table);
+ if (ret == 0) {
+ inode->i_fop = &dmfs_lv_file_operations;
+ inode->i_op = &dmfs_lv_inode_operations;
+ memcpy(tmp_name, name, dentry->d_name.len);
+ tmp_name[dentry->d_name.len] = 0;
+ md = dm_create(tmp_name, -1, table);
+ if (!IS_ERR(md)) {
+ DMFS_I(inode)->md = md;
+ return inode;
+ }
+ ret = PTR_ERR(md);
+ }
+ dm_table_destroy(table);
+ }
+ iput(inode);
+ }
+
+ return ERR_PTR(ret);
+}
+
+
--- /dev/null
+/*
+ * dmfs-root.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/* Heavily based upon ramfs */
+
+#include <linux/config.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+extern struct inode *dmfs_create_lv(struct super_block *sb, int mode, struct dentry *dentry);
+
+static int is_identifier(const char *str, int len)
+{
+ while(len--) {
+ if (!isalnum(*str) && *str != '_')
+ return 0;
+ str++;
+ }
+ return 1;
+}
+
+static int dmfs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+ struct inode *inode;
+ int rv = -ENOSPC;
+
+ if (dentry->d_name.len >= DM_NAME_LEN)
+ return -EINVAL;
+
+ if (!is_identifier(dentry->d_name.name, dentry->d_name.len))
+ return -EPERM;
+
+ if (dentry->d_name.name[0] == '.')
+ return -EINVAL;
+
+ inode = dmfs_create_lv(dir->i_sb, mode, dentry);
+ if (!IS_ERR(inode)) {
+ d_instantiate(dentry, inode);
+ dget(dentry);
+ return 0;
+ }
+ return PTR_ERR(inode);
+}
+
+/*
+ * if u.generic_ip is not NULL, then it indicates an inode which
+ * represents a table. If it is NULL then the inode is a virtual
+ * file and should be deleted along with the directory.
+ */
+static inline int positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
+static int empty(struct dentry *dentry)
+{
+ struct list_head *list;
+
+ spin_lock(&dcache_lock);
+ list = dentry->d_subdirs.next;
+
+ while(list != &dentry->d_subdirs) {
+ struct dentry *de = list_entry(list, struct dentry, d_child);
+
+ if (positive(de)) {
+ spin_unlock(&dcache_lock);
+ return 0;
+ }
+ list = list->next;
+ }
+ spin_unlock(&dcache_lock);
+ return 1;
+}
+
+static int dmfs_root_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ int ret = -ENOTEMPTY;
+
+ if (empty(dentry)) {
+ struct inode *inode = dentry->d_inode;
+ ret = dm_destroy(DMFS_I(inode)->md);
+ if (ret == 0) {
+ DMFS_I(inode)->md = NULL;
+ inode->i_nlink--;
+ dput(dentry);
+ }
+ }
+
+ return ret;
+}
+
+static struct dentry *dmfs_root_lookup(struct inode *dir, struct dentry *dentry)
+{
+ d_add(dentry, NULL);
+ return NULL;
+}
+
+static int dmfs_root_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ /* Can only rename - not move between directories! */
+ if (old_dir != new_dir)
+ return -EPERM;
+
+ return -EINVAL; /* FIXME: a change of LV name here */
+}
+
+static int dmfs_root_sync(struct file *file, struct dentry *dentry, int datasync)
+{
+ return 0;
+}
+
+static struct file_operations dmfs_root_file_operations = {
+ read: generic_read_dir,
+ readdir: dcache_readdir,
+ fsync: dmfs_root_sync,
+};
+
+static struct inode_operations dmfs_root_inode_operations = {
+ lookup: dmfs_root_lookup,
+ mkdir: dmfs_root_mkdir,
+ rmdir: dmfs_root_rmdir,
+ rename: dmfs_root_rename,
+};
+
+struct inode *dmfs_create_root(struct super_block *sb, int mode)
+{
+ struct inode *inode = dmfs_new_inode(sb, mode | S_IFDIR);
+
+ if (inode) {
+ inode->i_fop = &dmfs_root_file_operations;
+ inode->i_op = &dmfs_root_inode_operations;
+ }
+
+ return inode;
+}
+
+
--- /dev/null
+/*
+ * dmfs-status.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+static void *s_start(struct seq_file *s, loff_t *pos)
+{
+ return NULL;
+}
+
+static void *s_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ return NULL;
+}
+
+static void s_stop(struct seq_file *s, void *v)
+{
+}
+
+static int s_show(struct seq_file *s, void *v)
+{
+ return 0;
+}
+
+struct seq_operations dmfs_status_seq_ops = {
+ start: s_start,
+ next: s_next,
+ stop: s_stop,
+ show: s_show,
+};
+
+
--- /dev/null
+/*
+ * dmfs-super.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+
+#include "dmfs.h"
+#include "dm.h"
+
+#define DMFS_MAGIC 0x444D4653
+
+extern struct inode *dmfs_create_root(struct super_block *sb, int);
+
+static int dmfs_statfs(struct super_block *sb, struct statfs *buf)
+{
+ buf->f_type = sb->s_magic;
+ buf->f_bsize = sb->s_blocksize;
+ buf->f_namelen = DM_NAME_LEN - 1;
+
+ return 0;
+}
+
+static void dmfs_delete_inode(struct inode *inode)
+{
+ if (S_ISDIR(inode->i_mode)) {
+ struct dmfs_i *dmi = DMFS_I(inode);
+
+ if (dmi) {
+ if (dmi->md)
+ BUG();
+ if (!list_empty(&dmi->errors))
+ dmfs_zap_errors(inode);
+ kfree(dmi);
+ MOD_DEC_USE_COUNT; /* Don't remove */
+ }
+ }
+
+ inode->u.generic_ip = NULL;
+ clear_inode(inode);
+}
+
+static struct super_operations dmfs_super_operations = {
+ statfs: dmfs_statfs,
+ put_inode: force_delete,
+ delete_inode: dmfs_delete_inode,
+};
+
+static struct super_block *dmfs_read_super(struct super_block *sb, void *data, int silent)
+{
+ struct inode *inode;
+ struct dentry *root;
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = DMFS_MAGIC;
+ sb->s_op = &dmfs_super_operations;
+ sb->s_maxbytes = MAX_NON_LFS;
+
+ inode = dmfs_create_root(sb, 0755);
+ if (IS_ERR(inode))
+ return NULL;
+ root = d_alloc_root(inode);
+ if (!root) {
+ iput(inode);
+ return NULL;
+ }
+ sb->s_root = root;
+
+ return sb;
+}
+
+struct inode *dmfs_new_inode(struct super_block *sb, int mode)
+{
+ struct inode *inode = new_inode(sb);
+
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blocks = 0;
+ inode->i_rdev = NODEV;
+ inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ }
+
+ return inode;
+}
+
+struct inode *dmfs_new_private_inode(struct super_block *sb, int mode)
+{
+ struct inode *inode = dmfs_new_inode(sb, mode);
+ struct dmfs_i *dmi;
+
+ if (inode) {
+ dmi = kmalloc(sizeof(struct dmfs_i), GFP_KERNEL);
+ if (dmi == NULL) {
+ iput(inode);
+ return NULL;
+ }
+ memset(dmi, 0, sizeof(struct dmfs_i));
+ init_MUTEX(&dmi->sem);
+ INIT_LIST_HEAD(&dmi->errors);
+ inode->u.generic_ip = dmi;
+ MOD_INC_USE_COUNT; /* Don't remove */
+ }
+ return inode;
+}
+
+static DECLARE_FSTYPE(dmfs_fstype, "dmfs", dmfs_read_super, FS_SINGLE);
+static struct vfsmount *dmfs_mnt;
+
+int __init dm_interface_init(void)
+{
+ int ret;
+
+ ret = register_filesystem(&dmfs_fstype);
+ if (ret < 0)
+ goto out;
+
+ dmfs_mnt = kern_mount(&dmfs_fstype);
+ if (IS_ERR(dmfs_mnt)) {
+ ret = PTR_ERR(dmfs_mnt);
+ unregister_filesystem(&dmfs_fstype);
+ } else {
+ MOD_DEC_USE_COUNT; /* Yes, this really is correct... */
+ }
+out:
+ return ret;
+}
+
+void __exit dm_interface_exit(void)
+{
+ MOD_INC_USE_COUNT; /* So that it lands up being zero */
+
+ do_umount(dmfs_mnt, 0);
+
+ unregister_filesystem(&dmfs_fstype);
+
+}
+
--- /dev/null
+/*
+ * dmfs-suspend.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+
+static void *s_start(struct seq_file *s, loff_t *pos)
+{
+ struct dmfs_i *dmi = s->context;
+ if (*pos > 0)
+ return NULL;
+ down(&dmi->sem);
+ return (void *)1;
+}
+
+static void *s_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return NULL;
+}
+
+static void s_stop(struct seq_file *s, void *v)
+{
+ struct dmfs_i *dmi = s->context;
+ up(&dmi->sem);
+}
+
+static int s_show(struct seq_file *s, void *v)
+{
+ struct dmfs_i *dmi = s->context;
+ char msg[3] = "1\n";
+ if (dmi->md->suspended == 0) {
+ msg[0] = '0';
+ }
+ seq_puts(s, msg);
+ return 0;
+}
+
+struct seq_operations dmfs_suspend_seq_ops = {
+ start: s_start,
+ next: s_next,
+ stop: s_stop,
+ show: s_show,
+};
+
+ssize_t dmfs_suspend_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+ struct inode *dir = file->f_dentry->d_parent->d_inode;
+ struct dmfs_i *dmi = DMFS_I(dir);
+ int written = 0;
+
+ if (count == 0)
+ goto out;
+ if (count != 1 && count != 2)
+ return -EINVAL;
+ if (buf[0] != '0' && buf[0] != '1')
+ return -EINVAL;
+
+ down(&dmi->sem);
+ if (buf[0] == '0')
+ written = dm_resume(dmi->md);
+ if (buf[0] == '1')
+ written = dm_suspend(dmi->md);
+ if (written >= 0)
+ written = count;
+ up(&dmi->sem);
+
+out:
+ return written;
+}
+
+
--- /dev/null
+/*
+ * dmfs-table.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+static offset_t start_of_next_range(struct dm_table *t)
+{
+ offset_t n = 0;
+ if (t->num_targets) {
+ n = t->highs[t->num_targets - 1] + 1;
+ }
+ return n;
+}
+
+static char *dmfs_parse_line(struct dm_table *t, char *str)
+{
+ offset_t start, size, high;
+ void *context;
+ struct target_type *ttype;
+ int rv = 0;
+ char *msg;
+ int pos = 0;
+ char target[33];
+
+static char *err_table[] = {
+ "Missing/Invalid start argument",
+ "Missing/Invalid size argument",
+ "Missing target type"
+};
+ /* printk("dmfs_parse_line: (%s)\n", str); */
+
+ rv = sscanf(str, "%d %d %32s%n", &start, &size, target, &pos);
+ if (rv < 3) {
+ msg = err_table[rv];
+ goto out;
+ }
+ str += pos;
+ while(*str && isspace(*str))
+ str++;
+
+ msg = "Gap in table";
+ if (start != start_of_next_range(t))
+ goto out;
+
+ msg = "Target type unknown";
+ ttype = dm_get_target_type(target);
+ if (ttype) {
+ msg = "This message should never appear (constructor error)";
+ rv = ttype->ctr(t, start, size, str, &context);
+ msg = context;
+ if (rv == 0) {
+#if 0
+ printk("dmfs_parse: %u %u %s %s\n", start, size,
+ ttype->name,
+ ttype->print ? ttype->print(context) : "-");
+#endif
+ msg = "Error adding target to table";
+ high = start + (size - 1);
+ if (dm_table_add_target(t, high, ttype, context) == 0)
+ return NULL;
+ ttype->dtr(t, context);
+ }
+ dm_put_target_type(ttype);
+ }
+out:
+ return msg;
+}
+
+
+static int dmfs_copy(char *dst, int dstlen, char *src, int srclen, int *flag)
+{
+ int len = min(dstlen, srclen);
+ char *start = dst;
+
+ while(len) {
+ *dst = *src++;
+ if (*dst == '\n')
+ goto end_of_line;
+ dst++;
+ len--;
+ }
+out:
+ return (dst - start);
+end_of_line:
+ dst++;
+ *flag = 1;
+ goto out;
+}
+
+static int dmfs_line_is_not_comment(char *str)
+{
+ while(*str) {
+ if (*str == '#')
+ break;
+ if (!isspace(*str))
+ return 1;
+ str++;
+ }
+ return 0;
+}
+
+struct dmfs_desc {
+ struct dm_table *table;
+ struct inode *inode;
+ char *tmp;
+ loff_t tmpl;
+ unsigned long lnum;
+};
+
+static int dmfs_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size)
+{
+ char *buf, *msg;
+ unsigned long count = desc->count, len, copied;
+ struct dmfs_desc *d = (struct dmfs_desc *)desc->buf;
+
+ if (size > count)
+ size = count;
+
+ len = size;
+ buf = kmap(page);
+ do {
+ int flag = 0;
+ copied = dmfs_copy(d->tmp + d->tmpl, PAGE_SIZE - d->tmpl - 1,
+ buf + offset, len, &flag);
+ offset += copied;
+ len -= copied;
+ if (d->tmpl + copied == PAGE_SIZE - 1)
+ goto line_too_long;
+ d->tmpl += copied;
+ if (flag || (len == 0 && count == size)) {
+ *(d->tmp + d->tmpl) = 0;
+ if (dmfs_line_is_not_comment(d->tmp)) {
+ msg = dmfs_parse_line(d->table, d->tmp);
+ if (msg) {
+ dmfs_add_error(d->inode, d->lnum, msg);
+ }
+ }
+ d->lnum++;
+ d->tmpl = 0;
+ }
+ } while(len > 0);
+ kunmap(page);
+
+ desc->count = count - size;
+ desc->written += size;
+
+ return size;
+
+line_too_long:
+ printk(KERN_INFO "dmfs_read_actor: Line %lu too long\n", d->lnum);
+ kunmap(page);
+ return 0;
+}
+
+static struct dm_table *dmfs_parse(struct inode *inode, struct file *filp)
+{
+ struct dm_table *t = NULL;
+ unsigned long page;
+ struct dmfs_desc d;
+ loff_t pos = 0;
+
+ if (inode->i_size == 0)
+ return NULL;
+
+ page = __get_free_page(GFP_NOFS);
+ if (page) {
+ t = dm_table_create();
+ if (t) {
+ read_descriptor_t desc;
+
+ desc.written = 0;
+ desc.count = inode->i_size;
+ desc.buf = (char *)&d;
+ d.table = t;
+ d.inode = inode;
+ d.tmp = (char *)page;
+ d.tmpl = 0;
+ d.lnum = 1;
+
+ do_generic_file_read(filp, &pos, &desc, dmfs_read_actor);
+ if (desc.written != inode->i_size) {
+ dm_table_destroy(t);
+ t = NULL;
+ }
+ }
+ free_page(page);
+ }
+ if (!list_empty(&DMFS_I(inode)->errors)) {
+ dm_table_destroy(t);
+ t = NULL;
+ }
+ return t;
+}
+
+static int dmfs_table_release(struct inode *inode, struct file *f)
+{
+ struct dentry *dentry = f->f_dentry;
+ struct inode *parent = dentry->d_parent->d_inode;
+ struct dmfs_i *dmi = DMFS_I(parent);
+ struct dm_table *table;
+
+ if (f->f_mode & FMODE_WRITE) {
+
+ down(&dmi->sem);
+ dmfs_zap_errors(dentry->d_parent->d_inode);
+ table = dmfs_parse(dentry->d_parent->d_inode, f);
+
+ if (table) {
+ struct mapped_device *md = dmi->md;
+ int need_resume = 0;
+
+ if (md->suspended == 0) {
+ dm_suspend(md);
+ need_resume = 1;
+ }
+ dm_swap_table(md, table);
+ if (need_resume) {
+ dm_resume(md);
+ }
+ }
+ up(&dmi->sem);
+
+ put_write_access(parent);
+ }
+
+ return 0;
+}
+
+static int dmfs_readpage(struct file *file, struct page *page)
+{
+ if (!Page_Uptodate(page)) {
+ memset(kmap(page), 0, PAGE_CACHE_SIZE);
+ kunmap(page);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ }
+ UnlockPage(page);
+ return 0;
+}
+
+static int dmfs_prepare_write(struct file *file, struct page *page,
+ unsigned offset, unsigned to)
+{
+ void *addr = kmap(page);
+ if (!Page_Uptodate(page)) {
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ }
+ SetPageDirty(page);
+ return 0;
+}
+
+static int dmfs_commit_write(struct file *file, struct page *page,
+ unsigned offset, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
+
+ kunmap(page);
+ if (pos > inode->i_size)
+ inode->i_size = pos;
+ return 0;
+}
+
+/*
+ * There is a small race here in that two processes might call this at
+ * the same time and both fail. So its a fail safe race :-) This should
+ * move into namei.c (and thus use the spinlock and do this properly)
+ * at some stage if we continue to use this set of functions for ensuring
+ * exclusive write access to the file
+ */
+static int get_exclusive_write_access(struct inode *inode)
+{
+ if (get_write_access(inode))
+ return -1;
+ if (atomic_read(&inode->i_writecount) != 1) {
+ put_write_access(inode);
+ return -1;
+ }
+ return 0;
+}
+
+static int dmfs_table_open(struct inode *inode, struct file *file)
+{
+ struct dentry *dentry = file->f_dentry;
+ struct inode *parent = dentry->d_parent->d_inode;
+
+ if (file->f_mode & FMODE_WRITE) {
+ if (get_exclusive_write_access(parent))
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+static int dmfs_table_sync(struct file *file, struct dentry *dentry, int datasync)
+{
+ return 0;
+}
+
+static int dmfs_table_revalidate(struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct inode *parent = dentry->d_parent->d_inode;
+
+ inode->i_size = parent->i_size;
+ return 0;
+}
+
+struct address_space_operations dmfs_address_space_operations = {
+ readpage: dmfs_readpage,
+ writepage: fail_writepage,
+ prepare_write: dmfs_prepare_write,
+ commit_write: dmfs_commit_write,
+};
+
+static struct file_operations dmfs_table_file_operations = {
+ llseek: generic_file_llseek,
+ read: generic_file_read,
+ write: generic_file_write,
+ open: dmfs_table_open,
+ release: dmfs_table_release,
+ fsync: dmfs_table_sync,
+};
+
+static struct inode_operations dmfs_table_inode_operations = {
+ revalidate: dmfs_table_revalidate,
+};
+
+struct inode *dmfs_create_table(struct inode *dir, int mode)
+{
+ struct inode *inode = dmfs_new_inode(dir->i_sb, mode | S_IFREG);
+
+ if (inode) {
+ inode->i_mapping = dir->i_mapping;
+ inode->i_mapping->a_ops = &dmfs_address_space_operations;
+ inode->i_fop = &dmfs_table_file_operations;
+ inode->i_op = &dmfs_table_inode_operations;
+ }
+
+ return inode;
+}
+
--- /dev/null
+#ifndef LINUX_DMFS_H
+#define LINUX_DMFS_H
+
+struct dmfs_i {
+ struct semaphore sem;
+ struct mapped_device *md;
+ struct list_head errors;
+ int status;
+};
+
+#define DMFS_I(inode) ((struct dmfs_i *)(inode)->u.generic_ip)
+
+
+extern struct inode *dmfs_new_inode(struct super_block *sb, int mode);
+extern struct inode *dmfs_new_private_inode(struct super_block *sb, int mode);
+
+extern void dmfs_add_error(struct inode *inode, unsigned num, char *str);
+extern void dmfs_zap_errors(struct inode *inode);
+
+
+
+#endif /* LINUX_DMFS_H */
--- /dev/null
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/fs.h>
+
+#include "dm.h"
+#include <linux/dm-ioctl.h>
+
+static void free_params(struct dm_ioctl *p)
+{
+ vfree(p);
+}
+
+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **result)
+{
+ struct dm_ioctl tmp, *dmi;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)))
+ return -EFAULT;
+
+ if (!(dmi = vmalloc(tmp.data_size)))
+ return -ENOMEM;
+
+ if (copy_from_user(dmi, user, tmp.data_size))
+ return -EFAULT;
+
+ *result = dmi;
+ return 0;
+}
+
+/*
+ * check a string doesn't overrun the chunk of
+ * memory we copied from userland.
+ */
+static int valid_str(char *str, void *end)
+{
+ while (((void *) str < end) && *str)
+ str++;
+
+ return *str ? 0 : 1;
+}
+
+static int first_target(struct dm_ioctl *a, void *end,
+ struct dm_target_spec **spec, char **params)
+{
+ *spec = (struct dm_target_spec *) (a + 1);
+ *params = (char *) (*spec + 1);
+
+ return valid_str(*params, end);
+}
+
+static int next_target(struct dm_target_spec *last, void *end,
+ struct dm_target_spec **spec, char **params)
+{
+ *spec = (struct dm_target_spec *)
+ (((unsigned char *) last) + last->next);
+ *params = (char *) (*spec + 1);
+
+ return valid_str(*params, end);
+}
+
+void err_fn(const char *message, void *private)
+{
+ printk(KERN_WARNING "%s\n", message);
+}
+
+/*
+ * Checks to see if there's a gap in the table.
+ * Returns true iff there is a gap.
+ */
+static int gap(struct dm_table *table, struct dm_target_spec *spec)
+{
+ if (!table->num_targets)
+ return (spec->sector_start > 0) ? 1 : 0;
+
+ if (spec->sector_start != table->highs[table->num_targets - 1] + 1)
+ return 1;
+
+ return 0;
+}
+
+static int populate_table(struct dm_table *table, struct dm_ioctl *args)
+{
+ int i = 0, r, first = 1;
+ struct dm_target_spec *spec;
+ char *params;
+ struct target_type *ttype;
+ void *context, *end;
+ offset_t high = 0;
+
+ if (!args->target_count) {
+ WARN("No targets specified");
+ return -EINVAL;
+ }
+
+ end = ((void *) args) + args->data_size;
+
+#define PARSE_ERROR(msg) {err_fn(msg, NULL); return -EINVAL;}
+
+ for (i = 0; i < args->target_count; i++) {
+
+ r = first ? first_target(args, end, &spec, ¶ms) :
+ next_target(spec, end, &spec, ¶ms);
+
+ if (!r)
+ PARSE_ERROR("unable to find target");
+
+ /* lookup the target type */
+ if (!(ttype = dm_get_target_type(spec->target_type)))
+ PARSE_ERROR("unable to find target type");
+
+ if (gap(table, spec))
+ PARSE_ERROR("gap in target ranges");
+
+ /* build the target */
+ if (ttype->ctr(table, spec->sector_start, spec->length, params,
+ &context))
+ PARSE_ERROR(context);
+
+ /* add the target to the table */
+ high = spec->sector_start + (spec->length - 1);
+ if (dm_table_add_target(table, high, ttype, context))
+ PARSE_ERROR("internal error adding target to table");
+
+ first = 0;
+ }
+
+#undef PARSE_ERROR
+
+ r = dm_table_complete(table);
+ return r;
+}
+
+/*
+ * Copies device info back to user space, used by
+ * the create and info ioctls.
+ */
+static int info(const char *name, struct dm_ioctl *user)
+{
+ struct dm_ioctl param;
+ struct mapped_device *md = dm_get(name);
+
+ if (!md) {
+ param.exists = 0;
+ goto out;
+ }
+
+ param.data_size = 0;
+ strncpy(param.name, md->name, sizeof(param.name));
+ param.exists = 1;
+ param.suspend = md->suspended;
+ param.open_count = md->use_count;
+ param.major = MAJOR(md->dev);
+ param.minor = MINOR(md->dev);
+ param.target_count = md->map->num_targets;
+
+ out:
+ return copy_to_user(user, ¶m, sizeof(param));
+}
+
+static int create(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+ int r;
+ struct mapped_device *md;
+ struct dm_table *t;
+
+ t = dm_table_create();
+ r = PTR_ERR(t);
+ if (IS_ERR(t))
+ goto bad;
+
+ if ((r = populate_table(t, param)))
+ goto bad;
+
+ md = dm_create(param->name, param->minor, t);
+ r = PTR_ERR(md);
+ if (IS_ERR(md))
+ goto bad;
+
+ if ((r = info(param->name, user))) {
+ dm_destroy(md);
+ goto bad;
+ }
+
+ return 0;
+
+ bad:
+ dm_table_destroy(t);
+ return r;
+}
+
+static int remove(struct dm_ioctl *param)
+{
+ struct mapped_device *md = dm_get(param->name);
+
+ if (!md)
+ return -ENXIO;
+
+ return dm_destroy(md);
+}
+
+static int suspend(struct dm_ioctl *param)
+{
+ struct mapped_device *md = dm_get(param->name);
+
+ if (!md)
+ return -ENXIO;
+
+ return param->suspend ? dm_suspend(md) : dm_resume(md);
+}
+
+static int reload(struct dm_ioctl *param)
+{
+ int r;
+ struct mapped_device *md = dm_get(param->name);
+ struct dm_table *t;
+
+ if (!md)
+ return -ENXIO;
+
+ t = dm_table_create();
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+
+ if ((r = populate_table(t, param))) {
+ dm_table_destroy(t);
+ return r;
+ }
+
+ if ((r = dm_swap_table(md, t))) {
+ dm_table_destroy(t);
+ return r;
+ }
+
+ return 0;
+}
+
+static int ctl_open(struct inode *inode, struct file *file)
+{
+ /* only root can open this */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ return 0;
+}
+
+static int ctl_close(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+
+static int ctl_ioctl(struct inode *inode, struct file *file,
+ uint command, ulong a)
+{
+ int r;
+ struct dm_ioctl *p;
+
+ if ((r = copy_params((struct dm_ioctl *) a, &p)))
+ return r;
+
+ switch (command) {
+ case DM_CREATE:
+ r = create(p, (struct dm_ioctl *) a);
+ break;
+
+ case DM_REMOVE:
+ r = remove(p);
+ break;
+
+ case DM_SUSPEND:
+ r = suspend(p);
+ break;
+
+ case DM_RELOAD:
+ r = reload(p);
+ break;
+
+ case DM_INFO:
+ r = info(p->name, (struct dm_ioctl *) a);
+ break;
+
+ default:
+ WARN("dm_ctl_ioctl: unknown command 0x%x\n", command);
+ r = -EINVAL;
+ }
+
+ free_params(p);
+ return r;
+}
+
+
+static struct file_operations _ctl_fops = {
+ open: ctl_open,
+ release: ctl_close,
+ ioctl: ctl_ioctl,
+ owner: THIS_MODULE,
+};
+
+
+static devfs_handle_t _ctl_handle;
+
+int dm_interface_init(void)
+{
+ int r;
+
+ if ((r = devfs_register_chrdev(DM_CHAR_MAJOR, DM_DIR,
+ &_ctl_fops)) < 0) {
+ WARN("devfs_register_chrdev failed for dm control dev");
+ return -EIO;
+ }
+
+ _ctl_handle = devfs_register(0 , DM_DIR "/control", 0,
+ DM_CHAR_MAJOR, 0,
+ S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
+ &_ctl_fops, NULL);
+
+ return r;
+}
+
+void dm_interface_exit(void)
+{
+ // FIXME: remove control device
+
+ if (devfs_unregister_chrdev(DM_CHAR_MAJOR, DM_DIR) < 0)
+ WARN("devfs_unregister_chrdev failed for dm control device");
+}
+
--- /dev/null
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef _DM_IOCTL_H
+#define _DM_IOCTL_H
+
+#include "device-mapper.h"
+
+/*
+ * Implements a traditional ioctl interface to the
+ * device mapper. Yuck.
+ */
+
+struct dm_target_spec {
+ int32_t status; /* used when reading from kernel only */
+ unsigned long long sector_start;
+ unsigned long long length;
+
+ char target_type[DM_MAX_TYPE_NAME];
+
+ unsigned long next; /* offset in bytes to next target_spec */
+
+ /*
+ * Parameter string starts immediately
+ * after this object. Be careful to add
+ * padding after string to ensure correct
+ * alignment of subsequent dm_target_spec.
+ */
+};
+
+struct dm_ioctl {
+ unsigned long data_size; /* the size of this structure */
+ char name[DM_NAME_LEN];
+
+ int exists; /* out */
+ int suspend; /* in/out */
+ int open_count; /* out */
+ int major; /* out */
+ int minor; /* in/out */
+
+ int target_count; /* in/out */
+};
+
+/* FIXME: find own numbers, 109 is pinched from LVM */
+#define DM_IOCTL 0xfd
+#define DM_CHAR_MAJOR 124
+
+#define DM_CREATE _IOWR(DM_IOCTL, 0x00, struct dm_ioctl)
+#define DM_REMOVE _IOW(DM_IOCTL, 0x01, struct dm_ioctl)
+#define DM_SUSPEND _IOW(DM_IOCTL, 0x02, struct dm_ioctl)
+#define DM_RELOAD _IOWR(DM_IOCTL, 0x03, struct dm_ioctl)
+#define DM_INFO _IOWR(DM_IOCTL, 0x04, struct dm_ioctl)
+
+#endif