Merged fs/ioctl file structure (files from LVM2/driver/device-mapper).

author Alasdair Kergon <agk@redhat.com>

Wed, 5 Dec 2001 23:21:03 +0000 (23:21 +0000)

committer Alasdair Kergon <agk@redhat.com>

Wed, 5 Dec 2001 23:21:03 +0000 (23:21 +0000)
author Alasdair Kergon <agk@redhat.com>
Wed, 5 Dec 2001 23:21:03 +0000 (23:21 +0000)
committer Alasdair Kergon <agk@redhat.com>
Wed, 5 Dec 2001 23:21:03 +0000 (23:21 +0000)
diff --git a/configure b/configure

index 81435aacefb7b138cbd1df95d0b560a93866e790..33baee7836731ea1ad0975fa1a2d7a1ae6d6dcb4 100755 (executable)
--- a/configure
+++ b/configure
@@ -2013,6 +2013,7 @@ make.tmpl                                                               \
  include/Makefile                                                        \
  dmsetup/Makefile                                                       \
  lib/Makefile                                                           \
+kernel/Makefile                                                                \
  " | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
  EOF
  cat >> $CONFIG_STATUS <<EOF
@@ -2106,6 +2107,7 @@ make.tmpl                                                               \
  include/Makefile                                                        \
  dmsetup/Makefile                                                       \
  lib/Makefile                                                           \
+kernel/Makefile                                                                \
  "}
  EOF
  cat >> $CONFIG_STATUS <<\EOF
diff --git a/configure.in b/configure.in

index 992ae82e21cb643a4ed5e042bea2c21f5b63dfc3..935397b904bf25b3d974e8113d5198c696228854 100644 (file)
--- a/configure.in
+++ b/configure.in
@@ -98,4 +98,5 @@ make.tmpl                                                               \
  include/Makefile                                                        \
  dmsetup/Makefile                                                       \
  lib/Makefile                                                           \
+kernel/Makefile                                                                \
  )
diff --git a/kernel/Makefile.in b/kernel/Makefile.in

new file mode 100644 (file)

index 0000000..f8e7597
--- /dev/null
+++ b/kernel/Makefile.in
@@ -0,0 +1,50 @@
+#
+# Copyright (C) 2001 Sistina Software
+#
+# This LVM library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This LVM library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this LVM library; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA
+
+SHELL = /bin/sh
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+
+interface = @interface@
+kerneldir = @kerneldir@
+
+LN_S = @LN_S@
+
+all: symlinks
+
+symlinks:
+       for i in common/* fs/* ioctl/* ; do \
+               if [ -L $(kerneldir)/drivers/md/`basename $$i` ] ; \
+                       then $(RM) $(kerneldir)/drivers/md/`basename $$i`; \
+               fi; \
+       done
+       for i in common/* $(interface)/* ; do \
+               $(LN_S) `pwd`/$$i $(kerneldir)/drivers/md ; \
+       done
+
+install:
+
+clean:
+
+distclean:
+       $(RM) Makefile 
+
+.PHONY: install clean distclean all
+
diff --git a/kernel/common/device-mapper.h b/kernel/common/device-mapper.h

new file mode 100644 (file)

index 0000000..2b7d254
--- /dev/null
+++ b/kernel/common/device-mapper.h
@@ -0,0 +1,59 @@
+/*
+ * device-mapper.h
+ *
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DEVICE_MAPPER_H
+#define DEVICE_MAPPER_H
+
+#define DM_DIR "device-mapper"
+#define DM_MAX_TYPE_NAME 16
+
+struct dm_table;
+struct dm_dev;
+typedef unsigned int offset_t;
+
+typedef void (*dm_error_fn)(const char *message, void *private);
+
+/*
+ * constructor, destructor and map fn types
+ */
+typedef int (*dm_ctr_fn)(struct dm_table *t, offset_t b, offset_t l,
+                        char *args, void **context);
+
+typedef void (*dm_dtr_fn)(struct dm_table *t, void *c);
+typedef int (*dm_map_fn)(struct buffer_head *bh, int rw, void *context);
+typedef int (*dm_err_fn)(struct buffer_head *bh, int rw, void *context);
+typedef char *(*dm_print_fn)(void *context);
+
+/*
+ * Contructors should call this to make sure any
+ * destination devices are handled correctly
+ * (ie. opened/closed).
+ */
+int dm_table_get_device(struct dm_table *t, const char *path,
+                       offset_t start, offset_t len,
+                       struct dm_dev **result);
+void dm_table_put_device(struct dm_table *table, struct dm_dev *d);
+
+/*
+ * information about a target type
+ */
+struct target_type {
+        const char *name;
+        struct module *module;
+        dm_ctr_fn ctr;
+        dm_dtr_fn dtr;
+        dm_map_fn map;
+       dm_err_fn err;
+       dm_print_fn print;
+};
+
+int dm_register_target(struct target_type *t);
+int dm_unregister_target(struct target_type *t);
+
+#endif /* DEVICE_MAPPER_H */
+
diff --git a/kernel/common/dm-linear.c b/kernel/common/dm-linear.c

new file mode 100644 (file)

index 0000000..bd11dd8
--- /dev/null
+++ b/kernel/common/dm-linear.c
@@ -0,0 +1,146 @@
+/*
+ * dm-linear.c
+ *
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/device-mapper.h>
+
+#include "dm.h"
+
+/*
+ * linear: maps a linear range of a device.
+ */
+struct linear_c {
+       long delta;             /* FIXME: we need a signed offset type */
+       struct dm_dev *dev;
+};
+
+static inline char *next_token(char **p)
+{
+        static const char *delim = " \t";
+        char *r;
+
+        do {
+                r = strsep(p, delim);
+        } while(r && *r == 0);
+
+        return r;
+}
+
+/*
+ * construct a linear mapping.
+ * <dev_path> <offset>
+ */
+static int linear_ctr(struct dm_table *t, offset_t b, offset_t l,
+                     char *args, void **context)
+{
+       struct linear_c *lc;
+       unsigned int start;
+       int r = -EINVAL;
+       char *tok;
+       char *path;
+       char *p = args;
+
+       *context = "No device path given";
+       path = next_token(&p);
+       if (!path)
+               goto bad;
+
+       *context = "No initial offset given";
+       tok = next_token(&p);
+       if (!tok)
+               goto bad;
+       start = simple_strtoul(tok, NULL, 10);
+
+       *context = "Cannot allocate linear context private structure";
+       lc = kmalloc(sizeof(lc), GFP_KERNEL);
+       if (lc == NULL)
+               goto bad;
+
+       *context = "Cannot get target device";
+       r = dm_table_get_device(t, path, start, l, &lc->dev);
+       if (r)
+               goto bad_free;
+
+       lc->delta = (int) start - (int) b;
+       *context = lc;
+       return 0;
+
+bad_free:
+       kfree(lc);
+bad:
+       return r;
+}
+
+static void linear_dtr(struct dm_table *t, void *c)
+{
+       struct linear_c *lc = (struct linear_c *) c;
+       dm_table_put_device(t, lc->dev);
+       kfree(c);
+}
+
+static int linear_map(struct buffer_head *bh, int rw, void *context)
+{
+       struct linear_c *lc = (struct linear_c *) context;
+
+       bh->b_rdev = lc->dev->dev;
+       bh->b_rsector = bh->b_rsector + lc->delta;
+       return 1;
+}
+
+/*
+ * Debugging use only.
+ */
+static char *linear_print(void *context)
+{
+       struct linear_c *lc = (struct linear_c *)context;
+static char buf[256];
+       sprintf(buf, " %lu", lc->delta);
+       return buf;
+}
+
+static struct target_type linear_target = {
+       name: "linear",
+       module: THIS_MODULE,
+       ctr: linear_ctr,
+       dtr: linear_dtr,
+       map: linear_map,
+       print: linear_print,
+};
+
+static int __init linear_init(void)
+{
+       int r = dm_register_target(&linear_target);
+
+       if (r < 0)
+               printk(KERN_ERR
+                      "Device mapper: Linear: register failed %d\n", r);
+
+       return r;
+}
+
+static void __exit linear_exit(void)
+{
+       int r = dm_unregister_target(&linear_target);
+
+       if (r < 0)
+               printk(KERN_ERR
+                      "Device mapper: Linear: unregister failed %d\n", r);
+}
+
+module_init(linear_init);
+module_exit(linear_exit);
+
+MODULE_AUTHOR("Joe Thornber <thornber@uk.sistina.com>");
+MODULE_DESCRIPTION("Device Mapper: Linear mapping");
+MODULE_LICENSE("GPL");
+
diff --git a/kernel/common/dm-stripe.c b/kernel/common/dm-stripe.c

new file mode 100644 (file)

index 0000000..d128411
--- /dev/null
+++ b/kernel/common/dm-stripe.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/device-mapper.h>
+
+#include "dm.h"
+
+struct stripe {
+       struct dm_dev *dev;
+       offset_t physical_start;
+};
+
+struct stripe_c {
+       offset_t logical_start;
+       uint32_t stripes;
+
+       /* The size of this target / num. stripes */
+       uint32_t stripe_width;
+
+       /* eg, we stripe in 64k chunks */
+       uint32_t chunk_shift;
+       offset_t chunk_mask;
+
+       struct stripe stripe[0];
+};
+
+
+static inline struct stripe_c *alloc_context(int stripes)
+{
+       size_t len = sizeof(struct stripe_c) +
+               (sizeof(struct stripe) * stripes);
+       return kmalloc(len, GFP_KERNEL);
+}
+
+/*
+ * parses a single <dev> <sector> pair.
+ */
+static int get_stripe(struct dm_table *t, struct stripe_c *sc,
+                     int stripe, char *args)
+{
+       int n, r;
+       char path[256];         /* FIXME: buffer overrun risk */
+       unsigned long start;
+
+       if (sscanf(args, "%s %lu %n", path, &start, &n) != 2)
+               return -EINVAL;
+
+       if ((r = dm_table_get_device(t, path, start, sc->stripe_width,
+                                    &sc->stripe[stripe].dev)))
+               return -ENXIO;
+
+       sc->stripe[stripe].physical_start = start;
+       return n;
+}
+
+/*
+ * construct a striped mapping.
+ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
+ */
+static int stripe_ctr(struct dm_table *t, offset_t b, offset_t l,
+                     char *args, void **context)
+{
+       struct stripe_c *sc;
+       uint32_t stripes;
+       uint32_t chunk_size;
+       int n, i;
+
+       *context = "couldn't parse <stripes> <chunk size>";
+       if (sscanf(args, "%u %u %n", &stripes, &chunk_size, &n) != 2) {
+               return -EINVAL;
+       }
+
+       *context = "target length is not divisable by the number of stripes";
+       if (l % stripes) {
+               return -EINVAL;
+       }
+
+       *context = "couldn't allocate memory for striped context";
+       if (!(sc = alloc_context(stripes))) {
+               return -ENOMEM;
+       }
+
+       sc->logical_start = b;
+       sc->stripes = stripes;
+       sc->stripe_width = l / stripes;
+
+       /*
+        * chunk_size is a power of two.  We only
+        * that power and the mask.
+        */
+       *context = "invalid chunk size";
+       if (!chunk_size) {
+               return -EINVAL;
+       }
+
+       sc->chunk_mask = chunk_size - 1;
+       for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
+               chunk_size >>= 1;
+       sc->chunk_shift--;
+
+       /*
+        * Get the stripe destinations.
+        */
+       for (i = 0; i < stripes; i++) {
+               args += n;
+               n = get_stripe(t, sc, i, args);
+
+               *context = "couldn't parse stripe destination";
+               if (n < 0) {
+                       kfree(sc);
+                       return n;
+               }
+       }
+
+
+       *context = sc;
+       return 0;
+}
+
+static void stripe_dtr(struct dm_table *t, void *c)
+{
+       unsigned int i;
+       struct stripe_c *sc = (struct stripe_c *) c;
+
+       for (i = 0; i < sc->stripes; i++)
+               dm_table_put_device(t, sc->stripe[i].dev);
+
+       kfree(sc);
+}
+
+static int stripe_map(struct buffer_head *bh, int rw, void *context)
+{
+       struct stripe_c *sc = (struct stripe_c *) context;
+
+       offset_t offset = bh->b_rsector - sc->logical_start;
+       uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
+       uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */
+       chunk = chunk / sc->stripes;
+
+       bh->b_rdev = sc->stripe[stripe].dev->dev;
+       bh->b_rsector = sc->stripe[stripe].physical_start +
+               (chunk << sc->chunk_shift) +
+               (offset & sc->chunk_mask);
+       return 1;
+}
+
+static struct target_type stripe_target = {
+       name: "striped",
+       module: THIS_MODULE,
+       ctr: stripe_ctr,
+       dtr: stripe_dtr,
+       map: stripe_map,
+};
+
+static int __init stripe_init(void)
+{
+       int r;
+
+       if ((r = dm_register_target(&stripe_target)) < 0)
+               WARN("linear target register failed");
+
+       return r;
+}
+
+static void __exit stripe_exit(void)
+{
+       if (dm_unregister_target(&stripe_target))
+               WARN("striped target unregister failed");
+}
+
+module_init(stripe_init);
+module_exit(stripe_exit);
+
+MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
+MODULE_DESCRIPTION("Device Mapper: Striped mapping");
+MODULE_LICENSE("GPL");
diff --git a/kernel/common/dm-table.c b/kernel/common/dm-table.c

new file mode 100644 (file)

index 0000000..f0a350b
--- /dev/null
+++ b/kernel/common/dm-table.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/blkdev.h>
+
+
+/* ceiling(n / size) * size */
+static inline ulong round_up(ulong n, ulong size)
+{
+       ulong r = n % size;
+       return n + (r ? (size - r) : 0);
+}
+
+/* ceiling(n / size) */
+static inline ulong div_up(ulong n, ulong size)
+{
+       return round_up(n, size) / size;
+}
+
+/* similar to ceiling(log_size(n)) */
+static uint int_log(ulong n, ulong base)
+{
+       int result = 0;
+
+       while (n > 1) {
+               n = div_up(n, base);
+               result++;
+       }
+
+       return result;
+}
+
+/*
+ * return the highest key that you could lookup
+ * from the n'th node on level l of the btree.
+ */
+static offset_t high(struct dm_table *t, int l, int n)
+{
+       for (; l < t->depth - 1; l++)
+               n = get_child(n, CHILDREN_PER_NODE - 1);
+
+       if (n >= t->counts[l])
+               return (offset_t) -1;
+
+       return get_node(t, l, n)[KEYS_PER_NODE - 1];
+}
+
+/*
+ * fills in a level of the btree based on the
+ * highs of the level below it.
+ */
+static int setup_btree_index(int l, struct dm_table *t)
+{
+       int n, k;
+       offset_t *node;
+
+       for (n = 0; n < t->counts[l]; n++) {
+               node = get_node(t, l, n);
+
+               for (k = 0; k < KEYS_PER_NODE; k++)
+                       node[k] = high(t, l + 1, get_child(n, k));
+       }
+
+       return 0;
+}
+
+/*
+ * highs, and targets are managed as dynamic
+ * arrays during a table load.
+ */
+static int alloc_targets(struct dm_table *t, int num)
+{
+       offset_t *n_highs;
+       struct target *n_targets;
+       int n = t->num_targets;
+       int size = (sizeof(struct target) + sizeof(offset_t)) * num;
+
+       n_highs = vmalloc(size);
+       if (!n_highs)
+               return -ENOMEM;
+
+       n_targets = (struct target *) (n_highs + num);
+
+       if (n) {
+               memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
+               memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
+       }
+
+       memset(n_highs + n , -1, sizeof(*n_highs) * (num - n));
+       vfree(t->highs);
+
+       t->num_allocated = num;
+       t->highs = n_highs;
+       t->targets = n_targets;
+
+       return 0;
+}
+
+struct dm_table *dm_table_create(void)
+{
+       struct dm_table *t = kmalloc(sizeof(struct dm_table), GFP_NOIO);
+
+       if (!t)
+               return ERR_PTR(-ENOMEM);
+
+       memset(t, 0, sizeof(*t));
+       INIT_LIST_HEAD(&t->devices);
+
+       /* allocate a single nodes worth of targets to
+          begin with */
+       if (alloc_targets(t, KEYS_PER_NODE)) {
+               kfree(t);
+               t = ERR_PTR(-ENOMEM);
+       }
+
+       return t;
+}
+
+static void free_devices(struct list_head *devices)
+{
+       struct list_head *tmp, *next;
+
+       for (tmp = devices->next; tmp != devices; tmp = next) {
+               struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+               next = tmp->next;
+               kfree(dd);
+       }
+}
+
+void dm_table_destroy(struct dm_table *t)
+{
+       int i;
+
+       /* free the indexes (see dm_table_complete) */
+       if (t->depth >= 2)
+               vfree(t->index[t->depth - 2]);
+
+       /* free the targets */
+       for (i = 0; i < t->num_targets; i++) {
+               struct target *tgt = &t->targets[i];
+
+               if (tgt->type->dtr)
+                       tgt->type->dtr(t, tgt->private);
+
+               dm_put_target_type(t->targets[i].type);
+       }
+
+       vfree(t->highs);
+
+       /* free the device list */
+       if (t->devices.next != &t->devices) {
+               WARN("there are still devices present, someone isn't "
+                    "calling dm_table_remove_device");
+
+               free_devices(&t->devices);
+       }
+
+       kfree(t);
+}
+
+/*
+ * Checks to see if we need to extend
+ * highs or targets.
+ */
+static inline int check_space(struct dm_table *t)
+{
+       if (t->num_targets >= t->num_allocated)
+               return alloc_targets(t, t->num_allocated * 2);
+
+       return 0;
+}
+
+
+/*
+ * convert a device path to a kdev_t.
+ */
+int lookup_device(const char *path, kdev_t *dev)
+{
+       int r;
+       struct nameidata nd;
+       struct inode *inode;
+
+       if (!path_init(path, LOOKUP_FOLLOW, &nd))
+               return 0;
+
+       if ((r = path_walk(path, &nd)))
+               goto bad;
+
+       inode = nd.dentry->d_inode;
+       if (!inode) {
+               r = -ENOENT;
+               goto bad;
+       }
+
+       if (!S_ISBLK(inode->i_mode)) {
+               r = -EINVAL;
+               goto bad;
+       }
+
+       *dev = inode->i_rdev;
+
+ bad:
+       path_release(&nd);
+       return r;
+}
+
+/*
+ * see if we've already got a device in the list.
+ */
+static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
+{
+       struct list_head *tmp;
+
+       list_for_each(tmp, l) {
+               struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+               if (dd->dev == dev)
+                       return dd;
+       }
+
+       return NULL;
+}
+
+/*
+ * open a device so we can use it as a map
+ * destination.
+ */
+static int open_dev(struct dm_dev *d)
+{
+       int err;
+
+       if (d->bd)
+              BUG();
+
+       if (!(d->bd = bdget(kdev_t_to_nr(d->dev))))
+               return -ENOMEM;
+
+       if ((err = blkdev_get(d->bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE)))
+               return err;
+
+       return 0;
+}
+
+/*
+ * close a device that we've been using.
+ */
+static void close_dev(struct dm_dev *d)
+{
+       if (!d->bd)
+               return;
+
+       blkdev_put(d->bd, BDEV_FILE);
+       d->bd = NULL;
+}
+
+/*
+ * If possible (ie. blk_size[major] is set), this
+ * checks an area of a destination device is
+ * valid.
+ */
+static int check_device_area(kdev_t dev, offset_t start, offset_t len)
+{
+       int *sizes;
+       offset_t dev_size;
+
+       if (!(sizes = blk_size[MAJOR(dev)]) || !(dev_size = sizes[MINOR(dev)]))
+               /* we don't know the device details,
+                * so give the benefit of the doubt */
+               return 1;
+
+        /* convert to 512-byte sectors */
+       dev_size <<= 1;
+
+       return ((start < dev_size) && (len <= (dev_size - start)));
+}
+
+/*
+ * add a device to the list, or just increment the
+ * usage count if it's already present.
+ */
+int dm_table_get_device(struct dm_table *t, const char *path,
+                       offset_t start, offset_t len,
+                       struct dm_dev **result)
+{
+       int r;
+       kdev_t dev;
+       struct dm_dev *dd;
+
+       /* convert the path to a device */
+       if ((r = lookup_device(path, &dev)))
+               return r;
+
+       dd = find_device(&t->devices, dev);
+       if (!dd) {
+               dd = kmalloc(sizeof(*dd), GFP_KERNEL);
+               if (!dd)
+                       return -ENOMEM;
+
+               dd->dev = dev;
+               dd->bd = 0;
+
+               if ((r = open_dev(dd))) {
+                       kfree(dd);
+                       return r;
+               }
+
+               atomic_set(&dd->count, 0);
+               list_add(&dd->list, &t->devices);
+       }
+       atomic_inc(&dd->count);
+
+       if (!check_device_area(dd->dev, start, len)) {
+               WARN("device '%s' not large enough for target", path);
+               dm_table_put_device(t, dd);
+               return -EINVAL;
+       }
+
+       *result = dd;
+
+       return 0;
+}
+
+/*
+ * decrement a devices use count and remove it if
+ * neccessary.
+ */
+void dm_table_put_device(struct dm_table *t, struct dm_dev *dd)
+{
+       if (atomic_dec_and_test(&dd->count)) {
+              close_dev(dd);
+              list_del(&dd->list);
+               kfree(dd);
+       }
+}
+
+/*
+ * adds a target to the map
+ */
+int dm_table_add_target(struct dm_table *t, offset_t high,
+                       struct target_type *type, void *private)
+{
+       int r, n;
+
+       if ((r = check_space(t)))
+               return r;
+
+       n = t->num_targets++;
+       t->highs[n] = high;
+       t->targets[n].type = type;
+       t->targets[n].private = private;
+
+       return 0;
+}
+
+
+static int setup_indexes(struct dm_table *t)
+{
+       int i, total = 0;
+       offset_t *indexes;
+
+       /* allocate the space for *all* the indexes */
+       for (i = t->depth - 2; i >= 0; i--) {
+               t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE);
+               total += t->counts[i];
+       }
+
+       if (!(indexes = vmalloc(NODE_SIZE * total)))
+               return -ENOMEM;
+
+       /* set up internal nodes, bottom-up */
+       for (i = t->depth - 2, total = 0; i >= 0; i--) {
+               t->index[i] = indexes;
+               indexes += (KEYS_PER_NODE * t->counts[i]);
+               setup_btree_index(i, t);
+       }
+
+       return 0;
+}
+
+
+/*
+ * builds the btree to index the map
+ */
+int dm_table_complete(struct dm_table *t)
+{
+       int leaf_nodes, r = 0;
+
+       /* how many indexes will the btree have ? */
+       leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE);
+       t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
+
+       /* leaf layer has already been set up */
+       t->counts[t->depth - 1] = leaf_nodes;
+       t->index[t->depth - 1] = t->highs;
+
+       if (t->depth >= 2)
+               r = setup_indexes(t);
+
+       return r;
+}
+
+EXPORT_SYMBOL(dm_table_get_device);
+EXPORT_SYMBOL(dm_table_put_device);
diff --git a/kernel/common/dm-target.c b/kernel/common/dm-target.c

new file mode 100644 (file)

index 0000000..1b88509
--- /dev/null
+++ b/kernel/common/dm-target.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+#include <linux/kmod.h>
+
+struct tt_internal {
+       struct target_type tt;
+
+        struct list_head list;
+        long use;
+};
+
+static LIST_HEAD(_targets);
+static rwlock_t _lock = RW_LOCK_UNLOCKED;
+
+#define DM_MOD_NAME_SIZE 32
+
+static inline struct tt_internal *__find_target_type(const char *name)
+{
+       struct list_head *tmp;
+       struct tt_internal *ti;
+
+       list_for_each(tmp, &_targets) {
+               ti = list_entry(tmp, struct tt_internal, list);
+
+               if (!strcmp(name, ti->tt.name))
+                       return ti;
+       }
+
+       return NULL;
+}
+
+static struct tt_internal *get_target_type(const char *name)
+{
+       struct tt_internal *ti;
+
+       read_lock(&_lock);
+       ti = __find_target_type(name);
+
+       if (ti) {
+               if (ti->use == 0 && ti->tt.module)
+                       __MOD_INC_USE_COUNT(ti->tt.module);
+               ti->use++;
+       }
+       read_unlock(&_lock);
+
+       return ti;
+}
+
+static void load_module(const char *name)
+{
+       char module_name[DM_MOD_NAME_SIZE] = "dm-";
+
+       /* Length check for strcat() below */
+       if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
+               return;
+
+       strcat(module_name, name);
+       request_module(module_name);
+}
+
+struct target_type *dm_get_target_type(const char *name)
+{
+       struct tt_internal *ti = get_target_type(name);
+
+       if (!ti) {
+               load_module(name);
+               ti = get_target_type(name);
+       }
+
+       return ti ? &ti->tt : NULL;
+}
+
+void dm_put_target_type(struct target_type *t)
+{
+       struct tt_internal *ti = (struct tt_internal *) t;
+
+       read_lock(&_lock);
+       if (--ti->use == 0 && ti->tt.module)
+               __MOD_DEC_USE_COUNT(ti->tt.module);
+
+       if (ti->use < 0)
+               BUG();
+       read_unlock(&_lock);
+}
+
+static struct tt_internal *alloc_target(struct target_type *t)
+{
+       struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+
+       if (ti) {
+               memset(ti, 0, sizeof(*ti));
+               ti->tt = *t;
+       }
+
+       return ti;
+}
+
+int dm_register_target(struct target_type *t)
+{
+       int rv = 0;
+       struct tt_internal *ti = alloc_target(t);
+
+       if (!ti)
+               return -ENOMEM;
+
+       write_lock(&_lock);
+       if (__find_target_type(t->name))
+               rv = -EEXIST;
+       else
+               list_add(&ti->list, &_targets);
+
+       write_unlock(&_lock);
+       return rv;
+}
+
+int dm_unregister_target(struct target_type *t)
+{
+       struct tt_internal *ti;
+
+       write_lock(&_lock);
+       if (!(ti = __find_target_type(t->name))) {
+               write_unlock(&_lock);
+               return -EINVAL;
+       }
+
+       if (ti->use) {
+               write_unlock(&_lock);
+               return -ETXTBSY;
+       }
+
+       list_del(&ti->list);
+       kfree(ti);
+
+       write_unlock(&_lock);
+       return 0;
+}
+
+/*
+ * io-err: always fails an io, useful for bringing
+ * up LV's that have holes in them.
+ */
+static int io_err_ctr(struct dm_table *t, offset_t b, offset_t l,
+                     char *args, void **context)
+{
+       *context = NULL;
+       return 0;
+}
+
+static void io_err_dtr(struct dm_table *t, void *c)
+{
+       /* empty */
+}
+
+static int io_err_map(struct buffer_head *bh, int rw, void *context)
+{
+       buffer_IO_error(bh);
+       return 0;
+}
+
+static struct target_type error_target = {
+       name: "error",
+       ctr: io_err_ctr,
+       dtr: io_err_dtr,
+       map: io_err_map
+};
+
+
+int dm_target_init(void)
+{
+       return dm_register_target(&error_target);
+}
+
+EXPORT_SYMBOL(dm_register_target);
+EXPORT_SYMBOL(dm_unregister_target);
+
diff --git a/kernel/common/dm.c b/kernel/common/dm.c

new file mode 100644 (file)

index 0000000..df9473d
--- /dev/null
+++ b/kernel/common/dm.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/blk.h>
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/kmod.h>
+
+/* we only need this for the lv_bmap struct definition, not happy */
+#include <linux/lvm.h>
+
+#define MAX_DEVICES 64
+#define DEFAULT_READ_AHEAD 64
+#define DEVICE_NAME "device-mapper"
+
+static const char *_name = DEVICE_NAME;
+static int _version[3] = {0, 1, 0};
+static int major = 0;
+
+struct io_hook {
+       struct mapped_device *md;
+       struct target *target;
+       int rw;
+
+       void (*end_io)(struct buffer_head * bh, int uptodate);
+       void *context;
+};
+
+static kmem_cache_t *_io_hook_cache;
+
+#define rl down_read(&_dev_lock)
+#define ru up_read(&_dev_lock)
+#define wl down_write(&_dev_lock)
+#define wu up_write(&_dev_lock)
+
+static struct rw_semaphore _dev_lock;
+static struct mapped_device *_devs[MAX_DEVICES];
+
+/* block device arrays */
+static int _block_size[MAX_DEVICES];
+static int _blksize_size[MAX_DEVICES];
+static int _hardsect_size[MAX_DEVICES];
+
+static devfs_handle_t _dev_dir;
+
+static int request(request_queue_t *q, int rw, struct buffer_head *bh);
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
+
+/*
+ * setup and teardown the driver
+ */
+static int __init dm_init(void)
+{
+       int ret = -ENOMEM;
+
+       init_rwsem(&_dev_lock);
+
+       _io_hook_cache = kmem_cache_create("dm io hooks",
+                                          sizeof(struct io_hook),
+                                          0, 0, NULL, NULL);
+
+       if (!_io_hook_cache)
+               goto err;
+
+       ret = dm_target_init();
+       if (ret < 0)
+               goto err_cache_free;
+
+       ret = dm_interface_init();
+       if (ret < 0)
+               goto err_cache_free;
+
+       ret = devfs_register_blkdev(major, _name, &dm_blk_dops);
+       if (ret < 0)
+               goto err_blkdev;
+
+       if (major == 0)
+               major = ret;
+
+       /* set up the arrays */
+       read_ahead[major] = DEFAULT_READ_AHEAD;
+       blk_size[major] = _block_size;
+       blksize_size[major] = _blksize_size;
+       hardsect_size[major] = _hardsect_size;
+
+       blk_queue_make_request(BLK_DEFAULT_QUEUE(major), request);
+
+       _dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
+
+       printk(KERN_INFO "%s %d.%d.%d initialised\n", _name,
+              _version[0], _version[1], _version[2]);
+       return 0;
+
+err_blkdev:
+       printk(KERN_ERR "%s -- register_blkdev failed\n", _name);
+       dm_interface_exit();
+err_cache_free:
+       kmem_cache_destroy(_io_hook_cache);
+err:
+       return ret;
+}
+
+static void __exit dm_exit(void)
+{
+       dm_interface_exit();
+
+       if (kmem_cache_destroy(_io_hook_cache))
+               WARN("it looks like there are still some io_hooks allocated");
+
+       _io_hook_cache = NULL;
+
+       if (devfs_unregister_blkdev(major, _name) < 0)
+               printk(KERN_ERR "%s -- unregister_blkdev failed\n", _name);
+
+       read_ahead[major] = 0;
+       blk_size[major] = NULL;
+       blksize_size[major] = NULL;
+       hardsect_size[major] = NULL;
+
+       printk(KERN_INFO "%s %d.%d.%d cleaned up\n", _name,
+              _version[0], _version[1], _version[2]);
+}
+
+/*
+ * block device functions
+ */
+static int dm_blk_open(struct inode *inode, struct file *file)
+{
+       int minor = MINOR(inode->i_rdev);
+       struct mapped_device *md;
+
+       if (minor >= MAX_DEVICES)
+               return -ENXIO;
+
+       wl;
+       md = _devs[minor];
+
+       if (!md) {
+               wu;
+               return -ENXIO;
+       }
+
+       md->use_count++;
+       wu;
+
+       return 0;
+}
+
+static int dm_blk_close(struct inode *inode, struct file *file)
+{
+       int minor = MINOR(inode->i_rdev);
+       struct mapped_device *md;
+
+       if (minor >= MAX_DEVICES)
+               return -ENXIO;
+
+       wl;
+       md = _devs[minor];
+       if (!md || md->use_count < 1) {
+               WARN("reference count in mapped_device incorrect");
+               wu;
+               return -ENXIO;
+       }
+
+       md->use_count--;
+       wu;
+
+       return 0;
+}
+
+/* In 512-byte units */
+#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1)
+
+static int dm_blk_ioctl(struct inode *inode, struct file *file,
+                       uint command, ulong a)
+{
+       int minor = MINOR(inode->i_rdev);
+       long size;
+
+       if (minor >= MAX_DEVICES)
+               return -ENXIO;
+
+       switch (command) {
+       case BLKSSZGET:
+       case BLKBSZGET:
+       case BLKROGET:
+       case BLKROSET:
+       case BLKRASET:
+       case BLKRAGET:
+       case BLKFLSBUF:
+#if 0
+       case BLKELVSET:
+       case BLKELVGET:
+#endif
+               return blk_ioctl(inode->i_rdev, command, a);
+               break;
+
+       case BLKGETSIZE:
+               size = VOLUME_SIZE(minor);
+               if (copy_to_user((void *) a, &size, sizeof (long)))
+                       return -EFAULT;
+               break;
+
+       case BLKGETSIZE64:
+               size = VOLUME_SIZE(minor);
+               if (put_user((u64)size, (u64 *)a))
+                       return -EFAULT;
+               break;
+
+       case BLKRRPART:
+               return -EINVAL;
+
+       case LV_BMAP:
+               return dm_user_bmap(inode, (struct lv_bmap *) a);
+
+       default:
+               WARN("unknown block ioctl %d", command);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static inline struct io_hook *alloc_io_hook(void)
+{
+       return kmem_cache_alloc(_io_hook_cache, GFP_NOIO);
+}
+
+static inline void free_io_hook(struct io_hook *ih)
+{
+       kmem_cache_free(_io_hook_cache, ih);
+}
+
+/*
+ * FIXME: need to decide if deferred_io's need
+ * their own slab, I say no for now since they are
+ * only used when the device is suspended.
+ */
+static inline struct deferred_io *alloc_deferred(void)
+{
+       return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
+}
+
+static inline void free_deferred(struct deferred_io *di)
+{
+       kfree(di);
+}
+
+/*
+ * call a targets optional error function if
+ * an io failed.
+ */
+static inline int call_err_fn(struct io_hook *ih, struct buffer_head *bh)
+{
+       dm_err_fn err = ih->target->type->err;
+       if (err)
+               return err(bh, ih->rw, ih->target->private);
+
+       return 0;
+}
+
+/*
+ * bh->b_end_io routine that decrements the
+ * pending count and then calls the original
+ * bh->b_end_io fn.
+ */
+static void dec_pending(struct buffer_head *bh, int uptodate)
+{
+       struct io_hook *ih = bh->b_private;
+
+       if (!uptodate && call_err_fn(ih, bh))
+               return;
+
+       if (atomic_dec_and_test(&ih->md->pending))
+               /* nudge anyone waiting on suspend queue */
+               wake_up(&ih->md->wait);
+
+       bh->b_end_io = ih->end_io;
+       bh->b_private = ih->context;
+       free_io_hook(ih);
+
+       bh->b_end_io(bh, uptodate);
+}
+
+/*
+ * add the bh to the list of deferred io.
+ */
+static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
+{
+       struct deferred_io *di = alloc_deferred();
+
+       if (!di)
+               return -ENOMEM;
+
+       wl;
+       if (!md->suspended) {
+               wu;
+               return 0;
+       }
+
+       di->bh = bh;
+       di->rw = rw;
+       di->next = md->deferred;
+       md->deferred = di;
+       wu;
+
+       return 1;
+}
+
+/*
+ * do the bh mapping for a given leaf
+ */
+static inline int __map_buffer(struct mapped_device *md,
+                              struct buffer_head *bh, int rw, int leaf)
+{
+       int r;
+       dm_map_fn fn;
+       void *context;
+       struct io_hook *ih = NULL;
+       struct target *ti = md->map->targets + leaf;
+
+       fn = ti->type->map;
+       context = ti->private;
+
+       ih = alloc_io_hook();
+
+       if (!ih)
+               return 0;
+
+       ih->md = md;
+       ih->rw = rw;
+       ih->target = ti;
+       ih->end_io = bh->b_end_io;
+       ih->context = bh->b_private;
+
+       r = fn(bh, rw, context);
+
+       if (r > 0) {
+               /* hook the end io request fn */
+               atomic_inc(&md->pending);
+               bh->b_end_io = dec_pending;
+               bh->b_private = ih;
+
+       } else if (r == 0)
+               /* we don't need to hook */
+               free_io_hook(ih);
+
+       else if (r < 0) {
+               free_io_hook(ih);
+               return 0;
+       }
+
+       return 1;
+}
+
+/*
+ * search the btree for the correct target.
+ */
+static inline int __find_node(struct dm_table *t, struct buffer_head *bh)
+{
+       int l, n = 0, k = 0;
+       offset_t *node;
+
+       for (l = 0; l < t->depth; l++) {
+               n = get_child(n, k);
+               node = get_node(t, l, n);
+
+               for (k = 0; k < KEYS_PER_NODE; k++)
+                       if (node[k] >= bh->b_rsector)
+                               break;
+       }
+
+       return (KEYS_PER_NODE * n) + k;
+}
+
+static int request(request_queue_t *q, int rw, struct buffer_head *bh)
+{
+       struct mapped_device *md;
+       int r, minor = MINOR(bh->b_rdev);
+
+       if (minor >= MAX_DEVICES)
+               goto bad_no_lock;
+
+       rl;
+       md = _devs[minor];
+
+       if (!md)
+               goto bad;
+
+       /*
+        * If we're suspended we have to queue
+        * this io for later.
+        */
+       while (md->suspended) {
+               ru;
+
+               if (rw == READA)
+                       goto bad_no_lock;
+
+               r = queue_io(md, bh, rw);
+
+               if (r < 0)
+                       goto bad_no_lock;
+
+               else if (r > 0)
+                       return 0; /* deferred successfully */
+
+               /*
+                * We're in a while loop, because
+                * someone could suspend before we
+                * get to the following read
+                * lock
+                */
+               rl;
+       }
+
+       if (!__map_buffer(md, bh, rw, __find_node(md->map, bh)))
+               goto bad;
+
+       ru;
+       return 1;
+
+ bad:
+       ru;
+
+ bad_no_lock:
+       buffer_IO_error(bh);
+       return 0;
+}
+
+static int check_dev_size(int minor, unsigned long block)
+{
+       /* FIXME: check this */
+       unsigned long max_sector = (_block_size[minor] << 1) + 1;
+       unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9);
+
+       return (sector > max_sector) ? 0 : 1;
+}
+
+/*
+ * creates a dummy buffer head and maps it (for lilo).
+ */
+static int do_bmap(kdev_t dev, unsigned long block,
+                  kdev_t *r_dev, unsigned long *r_block)
+{
+       struct mapped_device *md;
+       struct buffer_head bh;
+       int minor = MINOR(dev), r;
+       struct target *t;
+
+       rl;
+       if ((minor >= MAX_DEVICES) || !(md = _devs[minor]) || md->suspended) {
+               r = -ENXIO;
+               goto out;
+       }
+
+       if (!check_dev_size(minor, block)) {
+               r = -EINVAL;
+               goto out;
+       }
+
+       /* setup dummy bh */
+       memset(&bh, 0, sizeof(bh));
+       bh.b_blocknr = block;
+       bh.b_dev = bh.b_rdev = dev;
+       bh.b_size = _blksize_size[minor];
+       bh.b_rsector = block * (bh.b_size >> 9);
+
+       /* find target */
+       t = md->map->targets + __find_node(md->map, &bh);
+
+       /* do the mapping */
+       r = t->type->map(&bh, READ, t->private);
+
+       *r_dev = bh.b_rdev;
+       *r_block = bh.b_rsector / (bh.b_size >> 9);
+
+ out:
+       ru;
+       return r;
+}
+
+/*
+ * marshals arguments and results between user and
+ * kernel space.
+ */
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
+{
+       unsigned long block, r_block;
+       kdev_t r_dev;
+       int r;
+
+       if (get_user(block, &lvb->lv_block))
+               return -EFAULT;
+
+       if ((r = do_bmap(inode->i_rdev, block, &r_dev, &r_block)))
+               return r;
+
+       if (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
+           put_user(r_block, &lvb->lv_block))
+               return -EFAULT;
+
+       return 0;
+}
+
+/*
+ * see if the device with a specific minor # is
+ * free.
+ */
+static inline int __specific_dev(int minor)
+{
+       if (minor > MAX_DEVICES) {
+               WARN("request for a mapped_device > than MAX_DEVICES");
+               return 0;
+       }
+
+       if (!_devs[minor])
+               return minor;
+
+       return -1;
+}
+
+/*
+ * find the first free device.
+ */
+static inline int __any_old_dev(void)
+{
+       int i;
+
+       for (i = 0; i < MAX_DEVICES; i++)
+               if (!_devs[i])
+                       return i;
+
+       return -1;
+}
+
+/*
+ * allocate and initialise a blank device.
+ */
+static struct mapped_device *alloc_dev(int minor)
+{
+       struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+
+       if (!md)
+               return 0;
+
+       memset(md, 0, sizeof (*md));
+
+       wl;
+       minor = (minor < 0) ? __any_old_dev() : __specific_dev(minor);
+
+       if (minor < 0) {
+               WARN("no free devices available");
+               wu;
+               kfree(md);
+               return 0;
+       }
+
+       md->dev = MKDEV(major, minor);
+       md->name[0] = '\0';
+       md->suspended = 0;
+
+       init_waitqueue_head(&md->wait);
+
+       _devs[minor] = md;
+       wu;
+
+       return md;
+}
+
+static void free_dev(struct mapped_device *md)
+{
+       kfree(md);
+}
+
+static int register_device(struct mapped_device *md)
+{
+       md->devfs_entry =
+               devfs_register(_dev_dir, md->name, DEVFS_FL_CURRENT_OWNER,
+                              MAJOR(md->dev), MINOR(md->dev),
+                              S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
+                              &dm_blk_dops, NULL);
+
+       return 0;
+}
+
+static int unregister_device(struct mapped_device *md)
+{
+       devfs_unregister(md->devfs_entry);
+       return 0;
+}
+
+/*
+ * the hardsect size for a mapped device is the
+ * smallest hard sect size from the devices it
+ * maps onto.
+ */
+static int __find_hardsect_size(struct list_head *devices)
+{
+       int result = INT_MAX, size;
+       struct list_head *tmp;
+
+       list_for_each(tmp, devices) {
+               struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+               size = get_hardsect_size(dd->dev);
+               if (size < result)
+                       result = size;
+       }
+       return result;
+}
+
+/*
+ * Bind a table to the device.
+ */
+static int __bind(struct mapped_device *md, struct dm_table *t)
+{
+       int minor = MINOR(md->dev);
+
+       md->map = t;
+
+       if (!t->num_targets) {
+               _block_size[minor] = 0;
+               _blksize_size[minor] = BLOCK_SIZE;
+               _hardsect_size[minor] = 0;
+               return 0;
+       }
+
+       /* in k */
+       _block_size[minor] = (t->highs[t->num_targets - 1] + 1) >> 1;
+
+       _blksize_size[minor] = BLOCK_SIZE;
+       _hardsect_size[minor] = __find_hardsect_size(&t->devices);
+       register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]);
+
+       return 0;
+}
+
+static void __unbind(struct mapped_device *md)
+{
+       int minor = MINOR(md->dev);
+
+       dm_table_destroy(md->map);
+       md->map = NULL;
+
+       _block_size[minor] = 0;
+       _blksize_size[minor] = 0;
+       _hardsect_size[minor] = 0;
+}
+
+
+static struct mapped_device *__get_by_name(const char *name)
+{
+       int i;
+
+       for (i = 0; i < MAX_DEVICES; i++)
+               if (_devs[i] && !strcmp(_devs[i]->name, name))
+                       return _devs[i];
+
+       return NULL;
+}
+
+static int check_name(const char *name)
+{
+       if (strchr(name, '/')) {
+               WARN("invalid device name");
+               return 0;
+       }
+
+       if (__get_by_name(name)) {
+               WARN("device name already in use");
+               return 0;
+       }
+
+       return 1;
+}
+
+/*
+ * constructor for a new device
+ */
+struct mapped_device *dm_create(const char *name, int minor,
+             struct dm_table *table)
+{
+       int r;
+       struct mapped_device *md;
+
+       if (minor >= MAX_DEVICES)
+               return ERR_PTR(-ENXIO);
+
+       if (!(md = alloc_dev(minor)))
+               return ERR_PTR(-ENXIO);
+
+       wl;
+       if (!check_name(name)) {
+               wu;
+               free_dev(md);
+               return ERR_PTR(-EINVAL);
+       }
+
+       strcpy(md->name, name);
+       _devs[minor] = md;
+       if ((r = register_device(md))) {
+               wu;
+               free_dev(md);
+               return ERR_PTR(r);
+       }
+
+       if ((r = __bind(md, table))) {
+               wu;
+               free_dev(md);
+               return ERR_PTR(r);
+       }
+       wu;
+
+       return md;
+}
+
+/*
+ * Destructor for the device.  You cannot destroy
+ * a suspended device.
+ */
+int dm_destroy(struct mapped_device *md)
+{
+       int minor, r;
+
+       rl;
+       if (md->suspended || md->use_count) {
+               ru;
+               return -EPERM;
+       }
+
+       fsync_dev(md->dev);
+       ru;
+
+       wl;
+       if (md->use_count) {
+               wu;
+               return -EPERM;
+       }
+
+       if ((r = unregister_device(md))) {
+               wu;
+               return r;
+       }
+
+       minor = MINOR(md->dev);
+       _devs[minor] = 0;
+       __unbind(md);
+
+       wu;
+
+       free_dev(md);
+
+       return 0;
+}
+
+
+/*
+ * requeue the deferred buffer_heads by calling
+ * generic_make_request.
+ */
+static void flush_deferred_io(struct deferred_io *c)
+{
+       struct deferred_io *n;
+
+       while (c) {
+               n = c->next;
+               generic_make_request(c->rw, c->bh);
+               free_deferred(c);
+               c = n;
+       }
+}
+
+/*
+ * Swap in a new table (destroying old one).
+ */
+int dm_swap_table(struct mapped_device *md, struct dm_table *table)
+{
+       int r;
+
+       wl;
+
+       /* device must be suspended */
+       if (!md->suspended) {
+               wu;
+               return -EPERM;
+       }
+
+       __unbind(md);
+
+       if ((r = __bind(md, table))) {
+               wu;
+               return r;
+       }
+
+       wu;
+
+       return 0;
+}
+
+
+/*
+ * We need to be able to change a mapping table
+ * under a mounted filesystem.  for example we
+ * might want to move some data in the background.
+ * Before the table can be swapped with
+ * dm_bind_table, dm_suspend must be called to
+ * flush any in flight buffer_heads and ensure
+ * that any further io gets deferred.
+ */
+int dm_suspend(struct mapped_device *md)
+{
+       DECLARE_WAITQUEUE(wait, current);
+
+       wl;
+       if (md->suspended) {
+               wu;
+               return -EINVAL;
+       }
+
+       md->suspended = 1;
+       wu;
+
+       /* wait for all the pending io to flush */
+       add_wait_queue(&md->wait, &wait);
+       current->state = TASK_UNINTERRUPTIBLE;
+       do {
+               wl;
+               if (!atomic_read(&md->pending))
+                       break;
+
+               wu;
+               schedule();
+
+       } while (1);
+
+       current->state = TASK_RUNNING;
+       remove_wait_queue(&md->wait, &wait);
+       wu;
+
+       return 0;
+}
+
+int dm_resume(struct mapped_device *md)
+{
+       struct deferred_io *def;
+
+       wl;
+       if (!md->suspended) {
+               wu;
+               return -EINVAL;
+       }
+
+       md->suspended = 0;
+       def = md->deferred;
+       md->deferred = NULL;
+       wu;
+
+       flush_deferred_io(def);
+
+       return 0;
+}
+
+/*
+ * Search for a device with a particular name.
+ */
+struct mapped_device *dm_get(const char *name)
+{
+       struct mapped_device *md;
+
+       rl;
+       md = __get_by_name(name);
+       ru;
+
+       return md;
+}
+
+struct block_device_operations dm_blk_dops = {
+       open:     dm_blk_open,
+       release:  dm_blk_close,
+       ioctl:    dm_blk_ioctl,
+       owner:    THIS_MODULE,
+};
+
+/*
+ * module hooks
+ */
+module_init(dm_init);
+module_exit(dm_exit);
+
+MODULE_PARM(major, "i");
+MODULE_PARM_DESC(major, "The major number of the device mapper");
+MODULE_DESCRIPTION("device-mapper driver");
+MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
+MODULE_LICENSE("GPL");
+
diff --git a/kernel/common/dm.h b/kernel/common/dm.h

new file mode 100644 (file)

index 0000000..a159049
--- /dev/null
+++ b/kernel/common/dm.h
@@ -0,0 +1,243 @@
+/*
+ * dm.h
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This file is released under the GPL.
+ */
+
+/*
+ * Internal header file for device mapper
+ *
+ * Changelog
+ *
+ *     16/08/2001 - First version [Joe Thornber]
+ */
+
+/*
+ * This driver attempts to provide a generic way of specifying logical
+ * devices which are mapped onto other devices.
+ *
+ * It does this by mapping sections of the logical device onto 'targets'.
+ *
+ * When the logical device is accessed the make_request function looks up
+ * the correct target for the given sector, and then asks this target
+ * to do the remapping.
+ *
+ * (dm-table.c) A btree like structure is used to hold the sector
+ * range -> target mapping.  Because we know all the entries in the
+ * btree in advance we can make a very compact tree, omitting pointers
+ * to child nodes, (child nodes locations can be calculated). Each
+ * node of the btree is 1 level cache line in size, this gives a small
+ * performance boost.
+ *
+ * A userland test program for the btree gave the following results on a
+ * 1 Gigahertz Athlon machine:
+ *
+ * entries in btree               lookups per second
+ * ----------------               ------------------
+ * 5                              25,000,000
+ * 1000                           7,700,000
+ * 10,000,000                     3,800,000
+ *
+ * Of course these results should be taken with a pinch of salt; the
+ * lookups were sequential and there were no other applications (other
+ * than X + emacs) running to give any pressure on the level 1 cache.
+ *
+ * Typical LVM users would find they have very few targets for each
+ * LV (probably less than 10).
+ *
+ * (dm-target.c) Target types are not hard coded, instead the
+ * register_mapping_type function should be called.  A target type is
+ * specified using three functions (see the header):
+ *
+ * dm_ctr_fn - takes a string and contructs a target specific piece of
+ *             context data.
+ * dm_dtr_fn - destroy contexts.
+ * dm_map_fn - function that takes a buffer_head and some previously
+ *             constructed context and performs the remapping.
+ *
+ * Currently there are two two trivial mappers, which are
+ * automatically registered: 'linear', and 'io_error'.  Linear alone
+ * is enough to implement most LVM features (omitting striped volumes
+ * and snapshots).
+ *
+ * (dm-fs.c) The driver is controlled through a /proc interface:
+ * /proc/device-mapper/control allows you to create and remove devices
+ * by 'cat'ing a line of the following format:
+ *
+ * create <device name> [minor no]
+ * remove <device name>
+ *
+ * /proc/device-mapper/<device name> accepts the mapping table:
+ *
+ * begin
+ * <sector start> <length> <target name> <target args>...
+ * ...
+ * end
+ *
+ * The begin/end lines are nasty, they should be handled by open/close
+ * for the file.
+ *
+ * At the moment the table assumes 32 bit keys (sectors), the move to
+ * 64 bits will involve no interface changes, since the tables will be
+ * read in as ascii data.  A different table implementation can
+ * therefor be provided at another time.  Either just by changing offset_t
+ * to 64 bits, or maybe implementing a structure which looks up the keys in
+ * stages (ie, 32 bits at a time).
+ *
+ * More interesting targets:
+ *
+ * striped mapping; given a stripe size and a number of device regions
+ * this would stripe data across the regions.  Especially useful, since
+ * we could limit each striped region to a 32 bit area and then avoid
+ * nasty 64 bit %'s.
+ *
+ * mirror mapping (reflector ?); would set off a kernel thread slowly
+ * copying data from one region to another, ensuring that any new
+ * writes got copied to both destinations correctly.  Great for
+ * implementing pvmove.  Not sure how userland would be notified that
+ * the copying process had completed.  Possibly by reading a /proc entry
+ * for the LV.  Could also use poll() for this kind of thing.
+ */
+
+
+#ifndef DM_INTERNAL_H
+#define DM_INTERNAL_H
+
+#include <linux/version.h>
+#include <linux/major.h>
+#include <linux/iobuf.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/compatmac.h>
+#include <linux/cache.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/ctype.h>
+#include <linux/device-mapper.h>
+#include <linux/list.h>
+
+#define MAX_DEPTH 16
+#define NODE_SIZE L1_CACHE_BYTES
+#define KEYS_PER_NODE (NODE_SIZE / sizeof(offset_t))
+#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
+#define DM_NAME_LEN 128
+
+/*
+ * list of devices that a metadevice uses
+ * and hence should open/close.
+ */
+struct dm_dev {
+       atomic_t count;
+       struct list_head list;
+
+       kdev_t dev;
+       struct block_device *bd;
+};
+
+/*
+ * io that had to be deferred while we were
+ * suspended
+ */
+struct deferred_io {
+       int rw;
+       struct buffer_head *bh;
+       struct deferred_io *next;
+};
+
+/*
+ * btree leaf, these do the actual mapping
+ */
+struct target {
+       struct target_type *type;
+       void *private;
+};
+
+/*
+ * the btree
+ */
+struct dm_table {
+       /* btree table */
+       int depth;
+       int counts[MAX_DEPTH];  /* in nodes */
+       offset_t *index[MAX_DEPTH];
+
+       int num_targets;
+       int num_allocated;
+       offset_t *highs;
+       struct target *targets;
+
+       /* a list of devices used by this table */
+       struct list_head devices;
+};
+
+/*
+ * the actual device struct
+ */
+struct mapped_device {
+       kdev_t dev;
+       char name[DM_NAME_LEN];
+
+       int use_count;
+       int suspended;
+
+       /* a list of io's that arrived while we were suspended */
+       atomic_t pending;
+       wait_queue_head_t wait;
+       struct deferred_io *deferred;
+
+       struct dm_table *map;
+
+       /* used by dm-fs.c */
+       devfs_handle_t devfs_entry;
+};
+
+extern struct block_device_operations dm_blk_dops;
+
+
+/* dm-target.c */
+int dm_target_init(void);
+struct target_type *dm_get_target_type(const char *name);
+void dm_put_target_type(struct target_type *t);
+
+/* dm.c */
+struct mapped_device *dm_find_by_minor(int minor);
+struct mapped_device *dm_get(const char *name);
+struct mapped_device *dm_create(const char *name, int minor, struct dm_table *);int dm_destroy(struct mapped_device *md);
+int dm_swap_table(struct mapped_device *md, struct dm_table *t);
+int dm_suspend(struct mapped_device *md);
+int dm_resume(struct mapped_device *md);
+
+/* dm-table.c */
+struct dm_table *dm_table_create(void);
+void dm_table_destroy(struct dm_table *t);
+
+int dm_table_add_target(struct dm_table *t, offset_t high,
+                       struct target_type *type, void *private);
+int dm_table_complete(struct dm_table *t);
+
+#define WARN(f, x...) printk(KERN_WARNING "device-mapper: " f "\n" , ## x)
+
+/*
+ * calculate the index of the child node of the
+ * n'th node k'th key.
+ */
+static inline int get_child(int n, int k)
+{
+       return (n * CHILDREN_PER_NODE) + k;
+}
+
+/*
+ * returns the n'th node of level l from table t.
+ */
+static inline offset_t *get_node(struct dm_table *t, int l, int n)
+{
+       return t->index[l] + (n * KEYS_PER_NODE);
+}
+
+int dm_interface_init(void) __init;
+void dm_interface_exit(void) __exit;
+
+#endif
diff --git a/kernel/fs/dmfs-error.c b/kernel/fs/dmfs-error.c

new file mode 100644 (file)

index 0000000..bfe0e78
--- /dev/null
+++ b/kernel/fs/dmfs-error.c
@@ -0,0 +1,122 @@
+/*
+ * dmfs-error.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+struct dmfs_error {
+       struct list_head list;
+       unsigned len;
+       char *msg;
+};
+
+static struct dmfs_error oom_error;
+
+static struct list_head oom_list = {
+       next: &oom_error.list,
+       prev: &oom_error.list,
+};
+
+static struct dmfs_error oom_error = {
+       list: { next: &oom_list, prev: &oom_list },
+       len: 39,
+       msg: "Out of memory during creation of table\n",
+};
+
+void dmfs_add_error(struct inode *inode, unsigned num, char *str)
+{
+       struct dmfs_i *dmi = DMFS_I(inode);
+       int len = strlen(str) + sizeof(struct dmfs_error) + 12;
+       struct dmfs_error *e = kmalloc(len, GFP_KERNEL);
+       if (e) {
+               e->msg = (char *)(e + 1);
+               e->len = sprintf(e->msg, "%8u: %s\n", num, str);
+               list_add(&e->list, &dmi->errors);
+       }
+}
+
+void dmfs_zap_errors(struct inode *inode)
+{
+       struct dmfs_i *dmi = DMFS_I(inode);
+       struct dmfs_error *e;
+
+       while(!list_empty(&dmi->errors)) {
+               e = list_entry(dmi->errors.next, struct dmfs_error, list);
+               list_del(&e->list);
+               kfree(e);
+       }
+}
+
+static void *e_start(struct seq_file *e, loff_t *pos)
+{
+       struct list_head *p;
+       loff_t n = *pos;
+       struct dmfs_i *dmi = e->context;
+
+       down(&dmi->sem);
+       if (dmi->status) {
+               list_for_each(p, &oom_list)
+                       if (n-- == 0)
+                               return list_entry(p, struct dmfs_error, list);
+       } else {
+               list_for_each(p, &dmi->errors)
+                       if (n-- == 0)
+                               return list_entry(p, struct dmfs_error, list);
+       }
+
+       return NULL;
+}
+
+static void *e_next(struct seq_file *e, void *v, loff_t *pos)
+{
+       struct dmfs_i *dmi = e->context;
+       struct list_head *p = ((struct dmfs_error *)v)->list.next;
+       (*pos)++;
+       return (p == &dmi->errors) || (p == &oom_list) ? NULL 
+                                  : list_entry(p, struct dmfs_error, list);
+}
+
+static void e_stop(struct seq_file *e, void *v)
+{
+       struct dmfs_i *dmi = e->context;
+       up(&dmi->sem);
+}
+
+static int show_error(struct seq_file *e, void *v)
+{
+       struct dmfs_error *d = v;
+       seq_puts(e, d->msg);
+       return 0;
+}
+
+struct seq_operations dmfs_error_seq_ops = {
+       start: e_start,
+       next: e_next,
+       stop: e_stop,
+       show: show_error,
+};
+
+
diff --git a/kernel/fs/dmfs-lv.c b/kernel/fs/dmfs-lv.c

new file mode 100644 (file)

index 0000000..44b2064
--- /dev/null
+++ b/kernel/fs/dmfs-lv.c
@@ -0,0 +1,236 @@
+/*
+ * dmfs-lv.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/* Heavily based upon ramfs */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+struct dmfs_inode_info {
+       const char *name;
+       struct inode *(*create)(struct inode *, int, struct seq_operations *, int);
+       struct seq_operations *seq_ops;
+       int type;
+};
+
+#define DMFS_SEQ(inode) ((struct seq_operations *)(inode)->u.generic_ip)
+
+extern struct inode *dmfs_create_table(struct inode *, int, struct seq_operations *, int);
+extern struct seq_operations dmfs_error_seq_ops;
+extern struct seq_operations dmfs_status_seq_ops;
+extern struct seq_operations dmfs_suspend_seq_ops;
+extern ssize_t dmfs_suspend_write(struct file *file, const char *buf, size_t size, loff_t *ppos);
+
+static int dmfs_seq_open(struct inode *inode, struct file *file)
+{
+       int ret = seq_open(file, DMFS_SEQ(inode));
+       if (ret >= 0) {
+               struct seq_file *seq = file->private_data;
+               seq->context = DMFS_I(file->f_dentry->d_parent->d_inode);
+       }
+       return ret;
+}
+
+static int dmfs_no_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+       return 0;
+};
+
+static struct file_operations dmfs_suspend_file_operations = {
+       open:           dmfs_seq_open,
+       read:           seq_read,
+       llseek:         seq_lseek,
+       release:        seq_release,
+       write:          dmfs_suspend_write,
+       fsync:          dmfs_no_fsync,
+};
+
+static struct inode_operations dmfs_null_inode_operations = {
+};
+
+static struct file_operations dmfs_seq_ro_file_operations = {
+       open:           dmfs_seq_open,
+       read:           seq_read,
+       llseek:         seq_lseek,
+       release:        seq_release,
+       fsync:          dmfs_no_fsync,
+};
+
+static struct inode *dmfs_create_seq_ro(struct inode *dir, int mode, struct seq_operations *seq_ops, int dev)
+{
+       struct inode *inode = dmfs_new_inode(dir->i_sb, mode | S_IFREG);
+       if (inode) {
+               inode->i_fop = &dmfs_seq_ro_file_operations;
+               inode->i_op = &dmfs_null_inode_operations;
+               DMFS_SEQ(inode) = seq_ops;
+       }
+       return inode;
+}
+
+static struct inode *dmfs_create_device(struct inode *dir, int mode, struct seq_operations *seq_ops, int dev)
+{
+       struct inode *inode = dmfs_new_inode(dir->i_sb, mode | S_IFBLK);
+       if (inode) {
+               init_special_inode(inode, mode | S_IFBLK, dev);
+       }
+       return inode;
+}
+
+static struct inode *dmfs_create_suspend(struct inode *dir, int mode, struct seq_operations *seq_ops, int dev)
+{
+       struct inode *inode = dmfs_create_seq_ro(dir, mode, seq_ops, dev);
+       if (inode) {
+               inode->i_fop = &dmfs_suspend_file_operations;
+       }
+       return inode;
+}
+
+static int dmfs_lv_unlink(struct inode *dir, struct dentry *dentry)
+{
+       struct inode *inode = dentry->d_inode;
+
+       inode->i_mapping = &inode->i_data;
+       inode->i_nlink--;
+       return 0;
+}
+
+static struct dmfs_inode_info dmfs_ii[] = {
+       { ".", NULL, NULL, DT_DIR },
+       { "..", NULL, NULL, DT_DIR },
+       { "table", dmfs_create_table, NULL, DT_REG },
+       { "error", dmfs_create_seq_ro, &dmfs_error_seq_ops, DT_REG },
+       { "status", dmfs_create_seq_ro, &dmfs_status_seq_ops, DT_REG },
+       { "device", dmfs_create_device, NULL, DT_BLK },
+       { "suspend", dmfs_create_suspend, &dmfs_suspend_seq_ops, DT_REG },
+};
+
+#define NR_DMFS_II (sizeof(dmfs_ii)/sizeof(struct dmfs_inode_info))
+
+static struct dmfs_inode_info *dmfs_find_by_name(const char *n, int len)
+{
+       int i;
+
+       for(i = 2; i < NR_DMFS_II; i++) {
+               if (strlen(dmfs_ii[i].name) != len)
+                       continue;
+               if (memcmp(dmfs_ii[i].name, n, len) == 0)
+                       return &dmfs_ii[i];
+       }
+       return NULL;
+}
+
+static struct dentry *dmfs_lv_lookup(struct inode *dir, struct dentry *dentry)
+{
+       struct inode *inode = NULL;
+       struct dmfs_inode_info *ii;
+
+       ii = dmfs_find_by_name(dentry->d_name.name, dentry->d_name.len);
+       if (ii) {
+               int dev = kdev_t_to_nr(DMFS_I(dir)->md->dev);
+               inode = ii->create(dir, 0600, ii->seq_ops, dev);
+       }
+
+       d_add(dentry, inode);
+       return NULL;
+}
+
+static int dmfs_inum(int entry, struct dentry *dentry)
+{
+       if (entry == 0)
+               return dentry->d_inode->i_ino;
+       if (entry == 1)
+               return dentry->d_parent->d_inode->i_ino;
+
+       return entry;
+}
+
+static int dmfs_lv_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+       struct dentry *dentry = filp->f_dentry;
+       struct dmfs_inode_info *ii;
+
+       while (filp->f_pos < NR_DMFS_II) {
+               ii = &dmfs_ii[filp->f_pos];
+               if (filldir(dirent, ii->name, strlen(ii->name), filp->f_pos,
+                               dmfs_inum(filp->f_pos, dentry), ii->type) < 0)
+                       break;
+               filp->f_pos++;
+       }
+
+       return 0;
+}
+
+
+static int dmfs_lv_sync(struct file *file, struct dentry *dentry, int datasync)
+{
+       return 0;
+}
+
+static struct file_operations dmfs_lv_file_operations = {
+       read:           generic_read_dir,
+       readdir:        dmfs_lv_readdir,
+       fsync:          dmfs_lv_sync,
+};
+
+static struct inode_operations dmfs_lv_inode_operations = {
+       lookup:         dmfs_lv_lookup,
+       unlink:         dmfs_lv_unlink,
+};
+
+struct inode *dmfs_create_lv(struct super_block *sb, int mode, struct dentry *dentry)
+{
+       struct inode *inode = dmfs_new_private_inode(sb, mode | S_IFDIR);
+       struct mapped_device *md;
+       const char *name = dentry->d_name.name;
+       char tmp_name[DM_NAME_LEN + 1];
+       struct dm_table *table;
+       int ret = -ENOMEM;
+
+       if (inode) {
+               table = dm_table_create();
+               ret = PTR_ERR(table);
+               if (!IS_ERR(table)) {
+                       ret = dm_table_complete(table);
+                       if (ret == 0) {
+                               inode->i_fop = &dmfs_lv_file_operations;
+                               inode->i_op = &dmfs_lv_inode_operations;
+                               memcpy(tmp_name, name, dentry->d_name.len);
+                               tmp_name[dentry->d_name.len] = 0;
+                               md = dm_create(tmp_name, -1, table);
+                               if (!IS_ERR(md)) {
+                                       DMFS_I(inode)->md = md;
+                                       return inode;
+                               }
+                               ret = PTR_ERR(md);
+                       }
+                       dm_table_destroy(table);
+               }
+               iput(inode);
+       }
+
+       return ERR_PTR(ret);
+}
+
+
diff --git a/kernel/fs/dmfs-root.c b/kernel/fs/dmfs-root.c

new file mode 100644 (file)

index 0000000..7ce093a
--- /dev/null
+++ b/kernel/fs/dmfs-root.c
@@ -0,0 +1,159 @@
+/*
+ * dmfs-root.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/* Heavily based upon ramfs */
+
+#include <linux/config.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+extern struct inode *dmfs_create_lv(struct super_block *sb, int mode, struct dentry *dentry);
+
+static int is_identifier(const char *str, int len)
+{
+       while(len--) {
+               if (!isalnum(*str) && *str != '_')
+                       return 0;
+               str++;
+       }
+       return 1;
+}
+
+static int dmfs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+       struct inode *inode;
+       int rv = -ENOSPC;
+
+       if (dentry->d_name.len >= DM_NAME_LEN)
+               return -EINVAL;
+
+       if (!is_identifier(dentry->d_name.name, dentry->d_name.len))
+               return -EPERM;
+
+       if (dentry->d_name.name[0] == '.')
+               return -EINVAL;
+
+       inode = dmfs_create_lv(dir->i_sb, mode, dentry);
+       if (!IS_ERR(inode)) {
+               d_instantiate(dentry, inode);
+               dget(dentry);
+               return 0;
+       }
+       return PTR_ERR(inode);
+}
+
+/*
+ * if u.generic_ip is not NULL, then it indicates an inode which
+ * represents a table. If it is NULL then the inode is a virtual
+ * file and should be deleted along with the directory.
+ */
+static inline int positive(struct dentry *dentry)
+{
+       return dentry->d_inode && !d_unhashed(dentry);
+}
+
+static int empty(struct dentry *dentry)
+{
+       struct list_head *list;
+
+       spin_lock(&dcache_lock);
+       list = dentry->d_subdirs.next;
+
+       while(list != &dentry->d_subdirs) {
+               struct dentry *de = list_entry(list, struct dentry, d_child);
+
+               if (positive(de)) {
+                       spin_unlock(&dcache_lock);
+                       return 0;
+               }
+               list = list->next;
+       }
+       spin_unlock(&dcache_lock);
+       return 1;
+}
+
+static int dmfs_root_rmdir(struct inode *dir, struct dentry *dentry)
+{
+       int ret = -ENOTEMPTY;
+
+       if (empty(dentry)) {
+               struct inode *inode = dentry->d_inode;
+               ret = dm_destroy(DMFS_I(inode)->md);
+               if (ret == 0) {
+                       DMFS_I(inode)->md = NULL;
+                       inode->i_nlink--;
+                       dput(dentry);
+               }
+       }
+
+       return ret;
+}
+
+static struct dentry *dmfs_root_lookup(struct inode *dir, struct dentry *dentry)
+{
+       d_add(dentry, NULL);
+       return NULL;
+}
+
+static int dmfs_root_rename(struct inode *old_dir, struct dentry *old_dentry,
+                       struct inode *new_dir, struct dentry *new_dentry)
+{
+       /* Can only rename - not move between directories! */
+       if (old_dir != new_dir)
+               return -EPERM;
+
+       return -EINVAL; /* FIXME: a change of LV name here */
+}
+
+static int dmfs_root_sync(struct file *file, struct dentry *dentry, int datasync)
+{
+       return 0;
+}
+
+static struct file_operations dmfs_root_file_operations = {
+       read:           generic_read_dir,
+       readdir:        dcache_readdir,
+       fsync:          dmfs_root_sync,
+};
+
+static struct inode_operations dmfs_root_inode_operations = {
+       lookup:         dmfs_root_lookup,
+       mkdir:          dmfs_root_mkdir,
+       rmdir:          dmfs_root_rmdir,
+       rename:         dmfs_root_rename,
+};
+
+struct inode *dmfs_create_root(struct super_block *sb, int mode)
+{
+       struct inode *inode = dmfs_new_inode(sb, mode | S_IFDIR);
+
+       if (inode) {
+               inode->i_fop = &dmfs_root_file_operations;
+               inode->i_op = &dmfs_root_inode_operations;
+       }
+
+       return inode;
+}
+
+
diff --git a/kernel/fs/dmfs-status.c b/kernel/fs/dmfs-status.c

new file mode 100644 (file)

index 0000000..79b73bc
--- /dev/null
+++ b/kernel/fs/dmfs-status.c
@@ -0,0 +1,55 @@
+/*
+ * dmfs-status.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+static void *s_start(struct seq_file *s, loff_t *pos)
+{
+       return NULL;
+}
+
+static void *s_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       return NULL;
+}
+
+static void s_stop(struct seq_file *s, void *v)
+{
+}
+
+static int s_show(struct seq_file *s, void *v)
+{
+       return 0;
+}
+
+struct seq_operations dmfs_status_seq_ops = {
+       start:  s_start,
+       next:   s_next,
+       stop:   s_stop,
+       show:   s_show,
+};
+
+
diff --git a/kernel/fs/dmfs-super.c b/kernel/fs/dmfs-super.c

new file mode 100644 (file)

index 0000000..0270c26
--- /dev/null
+++ b/kernel/fs/dmfs-super.c
@@ -0,0 +1,160 @@
+/*
+ * dmfs-super.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+
+#include "dmfs.h"
+#include "dm.h"
+
+#define DMFS_MAGIC 0x444D4653
+
+extern struct inode *dmfs_create_root(struct super_block *sb, int);
+
+static int dmfs_statfs(struct super_block *sb, struct statfs *buf)
+{
+       buf->f_type = sb->s_magic;
+       buf->f_bsize = sb->s_blocksize;
+       buf->f_namelen = DM_NAME_LEN - 1;
+
+       return 0;
+}
+
+static void dmfs_delete_inode(struct inode *inode)
+{
+       if (S_ISDIR(inode->i_mode)) {
+               struct dmfs_i *dmi = DMFS_I(inode);
+
+               if (dmi) {
+                       if (dmi->md)
+                               BUG();
+                       if (!list_empty(&dmi->errors))
+                               dmfs_zap_errors(inode);
+                       kfree(dmi);
+                       MOD_DEC_USE_COUNT; /* Don't remove */
+               }
+       }
+
+       inode->u.generic_ip = NULL;
+       clear_inode(inode);
+}
+
+static struct super_operations dmfs_super_operations = {
+       statfs:         dmfs_statfs,
+       put_inode:      force_delete,
+       delete_inode:   dmfs_delete_inode,
+};
+
+static struct super_block *dmfs_read_super(struct super_block *sb, void *data, int silent)
+{
+       struct inode *inode;
+       struct dentry *root;
+
+       sb->s_blocksize = PAGE_CACHE_SIZE;
+       sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+       sb->s_magic = DMFS_MAGIC;
+       sb->s_op = &dmfs_super_operations;
+       sb->s_maxbytes = MAX_NON_LFS;
+
+       inode = dmfs_create_root(sb, 0755);
+       if (IS_ERR(inode))
+               return NULL;
+       root = d_alloc_root(inode);
+       if (!root) {
+               iput(inode);
+               return NULL;
+       }
+       sb->s_root = root;
+
+       return sb;
+}
+
+struct inode *dmfs_new_inode(struct super_block *sb, int mode)
+{
+       struct inode *inode = new_inode(sb);
+
+       if (inode) {
+               inode->i_mode = mode;
+               inode->i_uid = current->fsuid;
+               inode->i_gid = current->fsgid;
+               inode->i_blksize = PAGE_CACHE_SIZE;
+               inode->i_blocks = 0;
+               inode->i_rdev = NODEV;
+               inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+       }
+
+       return inode;
+}
+
+struct inode *dmfs_new_private_inode(struct super_block *sb, int mode)
+{
+       struct inode *inode = dmfs_new_inode(sb, mode);
+       struct dmfs_i *dmi;
+
+       if (inode) {
+               dmi = kmalloc(sizeof(struct dmfs_i), GFP_KERNEL);
+               if (dmi == NULL) {
+                       iput(inode);
+                       return NULL;
+               }
+               memset(dmi, 0, sizeof(struct dmfs_i));
+               init_MUTEX(&dmi->sem);
+               INIT_LIST_HEAD(&dmi->errors);
+               inode->u.generic_ip = dmi;
+               MOD_INC_USE_COUNT; /* Don't remove */
+       }
+       return inode;
+}
+
+static DECLARE_FSTYPE(dmfs_fstype, "dmfs", dmfs_read_super, FS_SINGLE);
+static struct vfsmount *dmfs_mnt;
+
+int __init dm_interface_init(void)
+{
+       int ret;
+
+       ret = register_filesystem(&dmfs_fstype);
+       if (ret < 0)
+               goto out;
+
+       dmfs_mnt = kern_mount(&dmfs_fstype);
+       if (IS_ERR(dmfs_mnt)) {
+               ret = PTR_ERR(dmfs_mnt);
+               unregister_filesystem(&dmfs_fstype);
+       } else {
+               MOD_DEC_USE_COUNT; /* Yes, this really is correct... */
+       }
+out:
+       return ret;
+}
+
+void __exit dm_interface_exit(void)
+{
+       MOD_INC_USE_COUNT; /* So that it lands up being zero */
+
+       do_umount(dmfs_mnt, 0);
+
+       unregister_filesystem(&dmfs_fstype);
+
+}
+
diff --git a/kernel/fs/dmfs-suspend.c b/kernel/fs/dmfs-suspend.c

new file mode 100644 (file)

index 0000000..8fc20cf
--- /dev/null
+++ b/kernel/fs/dmfs-suspend.c
@@ -0,0 +1,95 @@
+/*
+ * dmfs-suspend.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+
+static void *s_start(struct seq_file *s, loff_t *pos)
+{
+       struct dmfs_i *dmi = s->context;
+       if (*pos > 0)
+               return NULL;
+       down(&dmi->sem);
+       return (void *)1;
+}
+
+static void *s_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       (*pos)++;
+       return NULL;
+}
+
+static void s_stop(struct seq_file *s, void *v)
+{
+       struct dmfs_i *dmi = s->context;
+       up(&dmi->sem);
+}
+
+static int s_show(struct seq_file *s, void *v)
+{
+       struct dmfs_i *dmi = s->context;
+       char msg[3] = "1\n";
+       if (dmi->md->suspended == 0) {
+               msg[0] = '0';
+       }
+       seq_puts(s, msg);
+       return 0;
+}
+
+struct seq_operations dmfs_suspend_seq_ops = {
+       start:  s_start,
+       next:   s_next,
+       stop:   s_stop,
+       show:   s_show,
+};
+
+ssize_t dmfs_suspend_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+       struct inode *dir = file->f_dentry->d_parent->d_inode;
+       struct dmfs_i *dmi = DMFS_I(dir);
+       int written = 0;
+       
+       if (count == 0)
+               goto out;
+       if (count != 1 && count != 2)
+               return -EINVAL;
+       if (buf[0] != '0' && buf[0] != '1')
+               return -EINVAL;
+
+       down(&dmi->sem);
+       if (buf[0] == '0')
+               written = dm_resume(dmi->md);
+       if (buf[0] == '1')
+               written = dm_suspend(dmi->md);
+       if (written >= 0)
+               written = count;
+       up(&dmi->sem);
+
+out:
+       return written;
+}
+
+
diff --git a/kernel/fs/dmfs-table.c b/kernel/fs/dmfs-table.c

new file mode 100644 (file)

index 0000000..625ad2f
--- /dev/null
+++ b/kernel/fs/dmfs-table.c
@@ -0,0 +1,367 @@
+/*
+ * dmfs-table.c
+ *
+ * Copyright (C) 2001 Sistina Software
+ *
+ * This software is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include "dm.h"
+#include "dmfs.h"
+
+static offset_t start_of_next_range(struct dm_table *t)
+{
+       offset_t n = 0;
+       if (t->num_targets) {
+               n = t->highs[t->num_targets - 1] + 1;
+       }
+       return n;
+}
+
+static char *dmfs_parse_line(struct dm_table *t, char *str)
+{
+       offset_t start, size, high;
+       void *context;
+       struct target_type *ttype;
+       int rv = 0;
+       char *msg;
+       int pos = 0;
+       char target[33];
+
+static char *err_table[] = {
+       "Missing/Invalid start argument",
+       "Missing/Invalid size argument",
+       "Missing target type"
+};
+       /* printk("dmfs_parse_line: (%s)\n", str); */
+
+       rv = sscanf(str, "%d %d %32s%n", &start, &size, target, &pos);
+       if (rv < 3) {
+               msg = err_table[rv];
+               goto out;
+       }
+       str += pos;
+       while(*str && isspace(*str))
+               str++;
+
+       msg = "Gap in table";
+       if (start != start_of_next_range(t))
+               goto out;
+
+       msg = "Target type unknown";
+       ttype = dm_get_target_type(target);
+       if (ttype) {
+               msg = "This message should never appear (constructor error)";
+               rv = ttype->ctr(t, start, size, str, &context);
+               msg = context;
+               if (rv == 0) {
+#if 0
+                       printk("dmfs_parse: %u %u %s %s\n", start, size, 
+                               ttype->name,
+                               ttype->print ? ttype->print(context) : "-");
+#endif
+                       msg = "Error adding target to table";
+                       high = start + (size - 1);
+                       if (dm_table_add_target(t, high, ttype, context) == 0)
+                               return NULL;
+                       ttype->dtr(t, context);
+               }
+               dm_put_target_type(ttype);
+       }
+out:
+       return msg;
+}
+
+
+static int dmfs_copy(char *dst, int dstlen, char *src, int srclen, int *flag)
+{
+       int len = min(dstlen, srclen);
+       char *start = dst;
+
+       while(len) {
+               *dst = *src++;
+               if (*dst == '\n')
+                       goto end_of_line;
+               dst++;
+               len--;
+       }
+out:
+       return (dst - start);
+end_of_line:
+       dst++;
+       *flag = 1;
+       goto out;
+}
+
+static int dmfs_line_is_not_comment(char *str)
+{
+       while(*str) {
+               if (*str == '#')
+                       break;
+               if (!isspace(*str))
+                       return 1;
+               str++;
+       }
+       return 0;
+}
+
+struct dmfs_desc {
+       struct dm_table *table;
+       struct inode *inode;
+       char *tmp;
+       loff_t tmpl;
+       unsigned long lnum;
+};
+
+static int dmfs_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size)
+{
+       char *buf, *msg;
+       unsigned long count = desc->count, len, copied;
+       struct dmfs_desc *d = (struct dmfs_desc *)desc->buf;
+       
+       if (size > count)
+               size = count;
+
+       len = size;
+       buf = kmap(page);
+       do { 
+               int flag = 0;
+               copied = dmfs_copy(d->tmp + d->tmpl, PAGE_SIZE - d->tmpl - 1,
+                                  buf + offset, len, &flag);
+               offset += copied;
+               len -= copied;
+               if (d->tmpl + copied == PAGE_SIZE - 1)
+                       goto line_too_long;
+               d->tmpl += copied;
+               if (flag || (len == 0 && count == size)) {
+                       *(d->tmp + d->tmpl) = 0;
+                       if (dmfs_line_is_not_comment(d->tmp)) {
+                               msg = dmfs_parse_line(d->table, d->tmp);
+                               if (msg) {
+                                       dmfs_add_error(d->inode, d->lnum, msg);
+                               }
+                       }
+                       d->lnum++;
+                       d->tmpl = 0;
+               }
+       } while(len > 0);
+       kunmap(page);
+
+       desc->count = count - size;
+       desc->written += size;
+
+       return size;
+
+line_too_long:
+       printk(KERN_INFO "dmfs_read_actor: Line %lu too long\n", d->lnum);
+       kunmap(page);
+       return 0;
+}
+
+static struct dm_table *dmfs_parse(struct inode *inode, struct file *filp)
+{
+       struct dm_table *t = NULL;
+       unsigned long page;
+       struct dmfs_desc d;
+       loff_t pos = 0;
+
+       if (inode->i_size == 0)
+               return NULL;
+
+       page = __get_free_page(GFP_NOFS);
+       if (page) {
+               t = dm_table_create();
+               if (t) {
+                       read_descriptor_t desc;
+
+                       desc.written = 0;
+                       desc.count = inode->i_size;
+                       desc.buf = (char *)&d;
+                       d.table = t;
+                       d.inode = inode;
+                       d.tmp = (char *)page;
+                       d.tmpl = 0;
+                       d.lnum = 1;
+
+                       do_generic_file_read(filp, &pos, &desc, dmfs_read_actor);
+                       if (desc.written != inode->i_size) {
+                               dm_table_destroy(t);
+                               t = NULL;
+                       }
+               }
+               free_page(page);
+       }
+       if (!list_empty(&DMFS_I(inode)->errors)) {
+               dm_table_destroy(t);
+               t = NULL;
+       }
+       return t;
+}
+
+static int dmfs_table_release(struct inode *inode, struct file *f)
+{
+       struct dentry *dentry = f->f_dentry;
+       struct inode *parent = dentry->d_parent->d_inode;
+       struct dmfs_i *dmi = DMFS_I(parent);
+       struct dm_table *table;
+
+       if (f->f_mode & FMODE_WRITE) {
+
+               down(&dmi->sem);
+               dmfs_zap_errors(dentry->d_parent->d_inode);
+               table = dmfs_parse(dentry->d_parent->d_inode, f);
+
+               if (table) {
+                       struct mapped_device *md = dmi->md;
+                       int need_resume = 0;
+
+                       if (md->suspended == 0) {
+                               dm_suspend(md);
+                               need_resume = 1;
+                       }
+                       dm_swap_table(md, table);
+                       if (need_resume) {
+                               dm_resume(md);
+                       }
+               }
+               up(&dmi->sem);
+
+                put_write_access(parent);
+       }
+
+       return 0;
+}
+
+static int dmfs_readpage(struct file *file, struct page *page)
+{
+       if (!Page_Uptodate(page)) {
+               memset(kmap(page), 0, PAGE_CACHE_SIZE);
+               kunmap(page);
+               flush_dcache_page(page);
+               SetPageUptodate(page);
+       }
+       UnlockPage(page);
+       return 0;
+}
+
+static int dmfs_prepare_write(struct file *file, struct page *page,
+                             unsigned offset, unsigned to)
+{
+       void *addr = kmap(page);
+       if (!Page_Uptodate(page)) {
+               memset(addr, 0, PAGE_CACHE_SIZE);
+               flush_dcache_page(page);
+               SetPageUptodate(page);
+       }
+       SetPageDirty(page);
+       return 0;
+}
+
+static int dmfs_commit_write(struct file *file, struct page *page,
+                            unsigned offset, unsigned to)
+{
+       struct inode *inode = page->mapping->host;
+       loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
+
+       kunmap(page);
+       if (pos > inode->i_size)
+               inode->i_size = pos;
+       return 0;
+}
+
+/*
+ * There is a small race here in that two processes might call this at
+ * the same time and both fail. So its a fail safe race :-) This should
+ * move into namei.c (and thus use the spinlock and do this properly)
+ * at some stage if we continue to use this set of functions for ensuring
+ * exclusive write access to the file
+ */
+static int get_exclusive_write_access(struct inode *inode)
+{
+       if (get_write_access(inode))
+               return -1;
+       if (atomic_read(&inode->i_writecount) != 1) {
+               put_write_access(inode);
+               return -1;
+       }
+       return 0;
+}
+
+static int dmfs_table_open(struct inode *inode, struct file *file)
+{
+       struct dentry *dentry = file->f_dentry;
+       struct inode *parent = dentry->d_parent->d_inode;
+
+       if (file->f_mode & FMODE_WRITE) {
+               if (get_exclusive_write_access(parent))
+                       return -EPERM;
+       }
+
+       return 0;
+}
+
+static int dmfs_table_sync(struct file *file, struct dentry *dentry, int datasync)
+{
+       return 0;
+}
+
+static int dmfs_table_revalidate(struct dentry *dentry)
+{
+       struct inode *inode = dentry->d_inode;
+       struct inode *parent = dentry->d_parent->d_inode;
+
+       inode->i_size = parent->i_size;
+       return 0;
+}
+
+struct address_space_operations dmfs_address_space_operations = {
+       readpage:       dmfs_readpage,
+       writepage:      fail_writepage,
+       prepare_write:  dmfs_prepare_write,
+       commit_write:   dmfs_commit_write,
+};
+
+static struct file_operations dmfs_table_file_operations = {
+       llseek:         generic_file_llseek,
+       read:           generic_file_read,
+       write:          generic_file_write,
+       open:           dmfs_table_open,
+       release:        dmfs_table_release,
+       fsync:          dmfs_table_sync,
+};
+
+static struct inode_operations dmfs_table_inode_operations = {
+       revalidate:     dmfs_table_revalidate,
+};
+
+struct inode *dmfs_create_table(struct inode *dir, int mode)
+{
+       struct inode *inode = dmfs_new_inode(dir->i_sb, mode | S_IFREG);
+
+       if (inode) {
+               inode->i_mapping = dir->i_mapping;
+               inode->i_mapping->a_ops = &dmfs_address_space_operations;
+               inode->i_fop = &dmfs_table_file_operations;
+               inode->i_op = &dmfs_table_inode_operations;
+       }
+
+       return inode;
+}
+
diff --git a/kernel/fs/dmfs.h b/kernel/fs/dmfs.h

new file mode 100644 (file)

index 0000000..5f1e1fa
--- /dev/null
+++ b/kernel/fs/dmfs.h
@@ -0,0 +1,22 @@
+#ifndef LINUX_DMFS_H
+#define LINUX_DMFS_H
+
+struct dmfs_i {
+        struct semaphore sem;
+        struct mapped_device *md;
+        struct list_head errors;
+       int status;
+};
+
+#define DMFS_I(inode) ((struct dmfs_i *)(inode)->u.generic_ip)
+
+
+extern struct inode *dmfs_new_inode(struct super_block *sb, int mode);
+extern struct inode *dmfs_new_private_inode(struct super_block *sb, int mode);
+
+extern void dmfs_add_error(struct inode *inode, unsigned num, char *str);
+extern void dmfs_zap_errors(struct inode *inode);
+
+
+
+#endif /* LINUX_DMFS_H */
diff --git a/kernel/ioctl/dm-ioctl.c b/kernel/ioctl/dm-ioctl.c

new file mode 100644 (file)

index 0000000..66cae85
--- /dev/null
+++ b/kernel/ioctl/dm-ioctl.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/fs.h>
+
+#include "dm.h"
+#include <linux/dm-ioctl.h>
+
+static void free_params(struct dm_ioctl *p)
+{
+       vfree(p);
+}
+
+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **result)
+{
+       struct dm_ioctl tmp, *dmi;
+
+       if (copy_from_user(&tmp, user, sizeof(tmp)))
+               return -EFAULT;
+
+       if (!(dmi = vmalloc(tmp.data_size)))
+               return -ENOMEM;
+
+       if (copy_from_user(dmi, user, tmp.data_size))
+               return -EFAULT;
+
+       *result = dmi;
+       return 0;
+}
+
+/*
+ * check a string doesn't overrun the chunk of
+ * memory we copied from userland.
+ */
+static int valid_str(char *str, void *end)
+{
+       while (((void *) str < end) && *str)
+               str++;
+
+       return *str ? 0 : 1;
+}
+
+static int first_target(struct dm_ioctl *a, void *end,
+                       struct dm_target_spec **spec, char **params)
+{
+       *spec = (struct dm_target_spec *) (a + 1);
+       *params = (char *) (*spec + 1);
+
+       return valid_str(*params, end);
+}
+
+static int next_target(struct dm_target_spec *last, void *end,
+                      struct dm_target_spec **spec, char **params)
+{
+       *spec = (struct dm_target_spec *)
+               (((unsigned char *) last) + last->next);
+       *params = (char *) (*spec + 1);
+
+       return valid_str(*params, end);
+}
+
+void err_fn(const char *message, void *private)
+{
+       printk(KERN_WARNING "%s\n", message);
+}
+
+/*
+ * Checks to see if there's a gap in the table.
+ * Returns true iff there is a gap.
+ */
+static int gap(struct dm_table *table, struct dm_target_spec *spec)
+{
+       if (!table->num_targets)
+               return (spec->sector_start > 0) ? 1 : 0;
+
+       if (spec->sector_start != table->highs[table->num_targets - 1] + 1)
+               return 1;
+
+       return 0;
+}
+
+static int populate_table(struct dm_table *table, struct dm_ioctl *args)
+{
+       int i = 0, r, first = 1;
+       struct dm_target_spec *spec;
+       char *params;
+       struct target_type *ttype;
+       void *context, *end;
+       offset_t high = 0;
+
+       if (!args->target_count) {
+               WARN("No targets specified");
+               return -EINVAL;
+       }
+
+       end = ((void *) args) + args->data_size;
+
+#define PARSE_ERROR(msg) {err_fn(msg, NULL); return -EINVAL;}
+
+       for (i = 0; i < args->target_count; i++) {
+
+               r = first ? first_target(args, end, &spec, &params) :
+                       next_target(spec, end, &spec, &params);
+
+               if (!r)
+                       PARSE_ERROR("unable to find target");
+
+               /* lookup the target type */
+               if (!(ttype = dm_get_target_type(spec->target_type)))
+                       PARSE_ERROR("unable to find target type");
+
+               if (gap(table, spec))
+                       PARSE_ERROR("gap in target ranges");
+
+               /* build the target */
+               if (ttype->ctr(table, spec->sector_start, spec->length, params,
+                              &context))
+                       PARSE_ERROR(context);
+
+               /* add the target to the table */
+               high = spec->sector_start + (spec->length - 1);
+               if (dm_table_add_target(table, high, ttype, context))
+                       PARSE_ERROR("internal error adding target to table");
+
+               first = 0;
+       }
+
+#undef PARSE_ERROR
+
+       r = dm_table_complete(table);
+       return r;
+}
+
+/*
+ * Copies device info back to user space, used by
+ * the create and info ioctls.
+ */
+static int info(const char *name, struct dm_ioctl *user)
+{
+       struct dm_ioctl param;
+       struct mapped_device *md = dm_get(name);
+
+       if (!md) {
+               param.exists = 0;
+               goto out;
+       }
+
+       param.data_size = 0;
+       strncpy(param.name, md->name, sizeof(param.name));
+       param.exists = 1;
+       param.suspend = md->suspended;
+       param.open_count = md->use_count;
+       param.major = MAJOR(md->dev);
+       param.minor = MINOR(md->dev);
+       param.target_count = md->map->num_targets;
+
+ out:
+       return copy_to_user(user, &param, sizeof(param));
+}
+
+static int create(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+       int r;
+       struct mapped_device *md;
+       struct dm_table *t;
+
+       t = dm_table_create();
+       r = PTR_ERR(t);
+       if (IS_ERR(t))
+               goto bad;
+
+       if ((r = populate_table(t, param)))
+               goto bad;
+
+       md = dm_create(param->name, param->minor, t);
+       r = PTR_ERR(md);
+       if (IS_ERR(md))
+               goto bad;
+
+       if ((r = info(param->name, user))) {
+               dm_destroy(md);
+               goto bad;
+       }
+
+       return 0;
+
+ bad:
+       dm_table_destroy(t);
+       return r;
+}
+
+static int remove(struct dm_ioctl *param)
+{
+       struct mapped_device *md = dm_get(param->name);
+
+       if (!md)
+               return -ENXIO;
+
+       return dm_destroy(md);
+}
+
+static int suspend(struct dm_ioctl *param)
+{
+       struct mapped_device *md = dm_get(param->name);
+
+       if (!md)
+               return -ENXIO;
+
+       return param->suspend ? dm_suspend(md) : dm_resume(md);
+}
+
+static int reload(struct dm_ioctl *param)
+{
+       int r;
+       struct mapped_device *md = dm_get(param->name);
+       struct dm_table *t;
+
+       if (!md)
+               return -ENXIO;
+
+       t = dm_table_create();
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+
+       if ((r = populate_table(t, param))) {
+               dm_table_destroy(t);
+               return r;
+       }
+
+       if ((r = dm_swap_table(md, t))) {
+               dm_table_destroy(t);
+               return r;
+       }
+
+       return 0;
+}
+
+static int ctl_open(struct inode *inode, struct file *file)
+{
+       /* only root can open this */
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       return 0;
+}
+
+static int ctl_close(struct inode *inode, struct file *file)
+{
+       return 0;
+}
+
+
+static int ctl_ioctl(struct inode *inode, struct file *file,
+                    uint command, ulong a)
+{
+       int r;
+       struct dm_ioctl *p;
+
+       if ((r = copy_params((struct dm_ioctl *) a, &p)))
+               return r;
+
+       switch (command) {
+       case DM_CREATE:
+               r = create(p, (struct dm_ioctl *) a);
+               break;
+
+       case DM_REMOVE:
+               r = remove(p);
+               break;
+
+       case DM_SUSPEND:
+               r = suspend(p);
+               break;
+
+       case DM_RELOAD:
+               r = reload(p);
+               break;
+
+       case DM_INFO:
+               r = info(p->name, (struct dm_ioctl *) a);
+               break;
+
+       default:
+               WARN("dm_ctl_ioctl: unknown command 0x%x\n", command);
+               r = -EINVAL;
+       }
+
+       free_params(p);
+       return r;
+}
+
+
+static struct file_operations _ctl_fops = {
+       open:           ctl_open,
+       release:        ctl_close,
+       ioctl:          ctl_ioctl,
+       owner:          THIS_MODULE,
+};
+
+
+static devfs_handle_t _ctl_handle;
+
+int dm_interface_init(void)
+{
+       int r;
+
+       if ((r = devfs_register_chrdev(DM_CHAR_MAJOR, DM_DIR,
+                                      &_ctl_fops)) < 0) {
+               WARN("devfs_register_chrdev failed for dm control dev");
+               return -EIO;
+       }
+
+       _ctl_handle = devfs_register(0 , DM_DIR "/control", 0,
+                                    DM_CHAR_MAJOR, 0,
+                                    S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
+                                    &_ctl_fops, NULL);
+
+       return r;
+}
+
+void dm_interface_exit(void)
+{
+       // FIXME: remove control device
+
+       if (devfs_unregister_chrdev(DM_CHAR_MAJOR, DM_DIR) < 0)
+               WARN("devfs_unregister_chrdev failed for dm control device");
+}
+
diff --git a/kernel/ioctl/dm-ioctl.h b/kernel/ioctl/dm-ioctl.h

new file mode 100644 (file)

index 0000000..4f746a2
--- /dev/null
+++ b/kernel/ioctl/dm-ioctl.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef _DM_IOCTL_H
+#define _DM_IOCTL_H
+
+#include "device-mapper.h"
+
+/*
+ * Implements a traditional ioctl interface to the
+ * device mapper.  Yuck.
+ */
+
+struct dm_target_spec {
+       int32_t status;         /* used when reading from kernel only */
+       unsigned long long sector_start;
+       unsigned long long length;
+
+       char target_type[DM_MAX_TYPE_NAME];
+
+       unsigned long next;     /* offset in bytes to next target_spec */
+
+       /*
+        * Parameter string starts immediately
+        * after this object.  Be careful to add
+        * padding after string to ensure correct
+        * alignment of subsequent dm_target_spec.
+        */
+};
+
+struct dm_ioctl {
+       unsigned long data_size;        /* the size of this structure */
+       char name[DM_NAME_LEN];
+
+       int exists;             /* out */
+       int suspend;            /* in/out */
+       int open_count;         /* out */
+       int major;              /* out */
+       int minor;              /* in/out */
+
+       int target_count;       /* in/out */
+};
+
+/* FIXME: find own numbers, 109 is pinched from LVM */
+#define DM_IOCTL 0xfd
+#define DM_CHAR_MAJOR 124
+
+#define        DM_CREATE _IOWR(DM_IOCTL, 0x00, struct dm_ioctl)
+#define        DM_REMOVE _IOW(DM_IOCTL, 0x01, struct dm_ioctl)
+#define        DM_SUSPEND _IOW(DM_IOCTL, 0x02, struct dm_ioctl)
+#define        DM_RELOAD _IOWR(DM_IOCTL, 0x03, struct dm_ioctl)
+#define DM_INFO _IOWR(DM_IOCTL, 0x04, struct dm_ioctl)
+
+#endif
author	Alasdair Kergon <agk@redhat.com>
	Wed, 5 Dec 2001 23:21:03 +0000 (23:21 +0000)
committer	Alasdair Kergon <agk@redhat.com>
	Wed, 5 Dec 2001 23:21:03 +0000 (23:21 +0000)
configure		patch \| blob \| blame \| history
configure.in		patch \| blob \| blame \| history
kernel/Makefile.in	[new file with mode: 0644]	patch \| blob
kernel/common/device-mapper.h	[new file with mode: 0644]	patch \| blob
kernel/common/dm-linear.c	[new file with mode: 0644]	patch \| blob
kernel/common/dm-stripe.c	[new file with mode: 0644]	patch \| blob
kernel/common/dm-table.c	[new file with mode: 0644]	patch \| blob
kernel/common/dm-target.c	[new file with mode: 0644]	patch \| blob
kernel/common/dm.c	[new file with mode: 0644]	patch \| blob
kernel/common/dm.h	[new file with mode: 0644]	patch \| blob
kernel/fs/dmfs-error.c	[new file with mode: 0644]	patch \| blob
kernel/fs/dmfs-lv.c	[new file with mode: 0644]	patch \| blob
kernel/fs/dmfs-root.c	[new file with mode: 0644]	patch \| blob
kernel/fs/dmfs-status.c	[new file with mode: 0644]	patch \| blob
kernel/fs/dmfs-super.c	[new file with mode: 0644]	patch \| blob
kernel/fs/dmfs-suspend.c	[new file with mode: 0644]	patch \| blob
kernel/fs/dmfs-table.c	[new file with mode: 0644]	patch \| blob
kernel/fs/dmfs.h	[new file with mode: 0644]	patch \| blob
kernel/ioctl/dm-ioctl.c	[new file with mode: 0644]	patch \| blob
kernel/ioctl/dm-ioctl.h	[new file with mode: 0644]	patch \| blob