-0.95.04-cvs (2002-03-07)
+0.95.05-cvs (2002-03-14)
-0.94.07-cvs (2002-03-07)
+0.94.08-cvs (2002-03-14)
diff -ruN linux-2.4.16/drivers/md/Config.in linux/drivers/md/Config.in
--- linux-2.4.16/drivers/md/Config.in Fri Sep 14 22:22:18 2001
-+++ linux/drivers/md/Config.in Mon Feb 4 13:16:55 2002
++++ linux/drivers/md/Config.in Thu Mar 14 16:32:40 2002
@@ -14,5 +14,6 @@
dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
endmenu
diff -ruN linux-2.4.16/drivers/md/Makefile linux/drivers/md/Makefile
--- linux-2.4.16/drivers/md/Makefile Thu Dec 6 15:57:55 2001
-+++ linux/drivers/md/Makefile Mon Feb 4 13:16:55 2002
-@@ -4,9 +4,11 @@
++++ linux/drivers/md/Makefile Thu Mar 14 16:32:40 2002
+@@ -4,9 +4,12 @@
O_TARGET := mddev.o
-export-objs := md.o xor.o
-+export-objs := md.o xor.o dm-table.o dm-target.o
++export-objs := md.o xor.o dm-table.o dm-target.o kcopyd.o
list-multi := lvm-mod.o
lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o
+dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \
-+ dm-linear.o dm-stripe.o
++ dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \
++ dm-origin.o dm-mirror.o kcopyd.o
# Note: link order is important. All raid personalities
# and xor.o must come before md.o, as they each initialise
-@@ -20,8 +22,12 @@
+@@ -20,8 +23,12 @@
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_BLK_DEV_MD) += md.o
obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o
+ $(LD) -r -o $@ $(dm-mod-objs)
diff -ruN linux-2.4.16/drivers/md/device-mapper.h linux/drivers/md/device-mapper.h
--- linux-2.4.16/drivers/md/device-mapper.h Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/device-mapper.h Tue Jan 8 13:17:13 2002
-@@ -0,0 +1,58 @@
++++ linux/drivers/md/device-mapper.h Thu Mar 7 16:56:22 2002
+@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+#define DM_DIR "device-mapper" /* Slashes not supported */
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
+
+#ifdef __KERNEL__
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_DEVICE_MAPPER_H */
+diff -ruN linux-2.4.16/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c
+--- linux-2.4.16/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-exception-store.c Thu Mar 14 16:02:50 2002
+@@ -0,0 +1,683 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm-snapshot.h"
++
++#if 0
++/*
++ * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
++ */
++#define SNAP_MAGIC 0x70416e53
++
++/*
++ * The on-disk version of the metadata. Only applicable to
++ * persistent snapshots.
++ * There is no backward or forward compatibility implemented, snapshots
++ * with different disk versions than the kernel will not be usable. It is
++ * expected that "lvcreate" will blank out the start of the COW device
++ * before calling the snapshot constructor.
++ */
++#define SNAPSHOT_DISK_VERSION 1
++
++/*
++ * Metadata format: (please keep this up-to-date!)
++ * Persistent snapshots have a 1 block header (see below for structure) at
++ * the very start of the device. The COW metadata starts at
++ * .start_of_exceptions.
++ *
++ * COW metadata is stored in blocks that are "extent-size" sectors long as
++ * an array of disk_exception structures in Little-Endian format.
++ * The last entry in this array has rsector_new set to 0 (this cannot be a
++ * legal redirection as the header is here) and if rsector_org has a value
++ * it is the sector number of the next COW metadata sector on the disk. if
++ * rsector_org is also zero then this is the end of the COW metadata.
++ *
++ * The metadata is written in hardblocksize lumps rather than in units of
++ * extents for efficiency so don't expect a whole extent to be zeroed out
++ * at any time.
++ *
++ * Non-persistent snapshots simple have redirected blocks stored
++ * (in chunk_size sectors) from hard block 1 to avoid inadvertantly
++ * creating a bad header.
++ */
++
++/*
++ * Internal snapshot structure
++ */
++struct persistent_info {
++ /* Size of extents used for COW blocks */
++ long extent_size;
++
++ /* Number of the next free sector for COW/data */
++ unsigned long next_free_sector;
++
++ /* Where the metadata starts */
++ unsigned long start_of_exceptions;
++
++ /* Where we are currently writing the metadata */
++ unsigned long current_metadata_sector;
++
++ /* Index into disk_cow array */
++ int current_metadata_entry;
++
++ /* Index into mythical extent array */
++ int current_metadata_number;
++
++ /* Number of metadata entries in the disk_cow array */
++ int highest_metadata_entry;
++
++ /* Number of metadata entries per hard disk block */
++ int md_entries_per_block;
++
++ /* kiobuf for doing I/O to header & metadata */
++ struct kiobuf *cow_iobuf;
++
++ /*
++ * Disk extent with COW data in it. as an array of
++ * exception tables. The first one points to the next
++ * block of metadata or 0 if this is the last
++ */
++ struct disk_exception *disk_cow;
++};
++
++/*
++ * An array of these is held in each disk block. LE format
++ */
++struct disk_exception {
++ uint64_t rsector_org;
++ uint64_t rsector_new;
++};
++
++/*
++ * Structure of a (persistent) snapshot header on disk. in LE format
++ */
++struct snap_disk_header {
++ uint32_t magic;
++
++ /* Simple, incrementing version. no backward compatibility */
++ uint32_t version;
++
++ /* In 512 byte sectors */
++ uint32_t chunk_size;
++
++ /* In 512 byte sectors */
++ uint32_t extent_size;
++ uint64_t start_of_exceptions;
++ uint32_t full;
++};
++
++/*
++ * READ or WRITE some blocks to/from a device
++ */
++static int do_io(int rw, struct kiobuf *iobuf, kdev_t dev,
++ unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/*
++ * Write the latest COW metadata block.
++ */
++static int write_metadata(struct snapshot_c *s, struct persistent_info *pi)
++{
++ kdev_t dev = s->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ int writesize = blocksize/SECTOR_SIZE;
++
++ if (do_io(WRITE, pi->cow_iobuf, dev,
++ pi->current_metadata_sector, writesize) != 0) {
++ DMERR("Error writing COW block");
++ return -1;
++ }
++
++ return 0;
++}
++
++/*
++ * Allocate a kiobuf. This is the only code nicked from the old
++ * snapshot driver and I've changed it anyway.
++ */
++static int alloc_iobuf_pages(struct kiobuf *iobuf, int nr_sectors)
++{
++ int nr_pages, err, i;
++
++ if (nr_sectors > KIO_MAX_SECTORS)
++ return -1;
++
++ nr_pages = nr_sectors / (PAGE_SIZE/SECTOR_SIZE);
++ err = expand_kiobuf(iobuf, nr_pages);
++ if (err) goto out;
++
++ err = -ENOMEM;
++ iobuf->locked = 1;
++ iobuf->nr_pages = 0;
++ for (i = 0; i < nr_pages; i++) {
++ struct page * page;
++
++ page = alloc_page(GFP_KERNEL);
++ if (!page) goto out;
++
++ iobuf->maplist[i] = page;
++ LockPage(page);
++ iobuf->nr_pages++;
++ }
++ iobuf->offset = 0;
++
++ err = 0;
++
++out:
++ return err;
++}
++
++/*
++ * Read on-disk COW metadata and populate the hash table.
++ */
++static int read_metadata(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ int status;
++ int i;
++ int entry = 0;
++ int map_page = 0;
++ int nr_sectors = pi->extent_size;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ unsigned long cur_sector = pi->start_of_exceptions;
++ unsigned long last_sector;
++ unsigned long first_free_sector = 0;
++ int entries_per_page = PAGE_SIZE / sizeof(struct disk_exception);
++ struct disk_exception *cow_block;
++ struct kiobuf *read_iobuf;
++ int err = 0;
++ int devsize = get_dev_size(dev);
++
++ /*
++ * Allocate our own iovec for this operation 'cos the
++ * others are way too small.
++ */
++ if (alloc_kiovec(1, &read_iobuf)) {
++ DMERR("Error allocating iobuf for %s",
++ kdevname(dev));
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(read_iobuf, pi->extent_size)) {
++ DMERR("Error allocating iobuf space for %s",
++ kdevname(dev));
++ free_kiovec(1, &read_iobuf);
++ return -1;
++ }
++ cow_block = page_address(read_iobuf->maplist[0]);
++
++ do {
++ /* Make sure the chain does not go off the end of
++ * the device, or backwards */
++ if (cur_sector > devsize || cur_sector < first_free_sector) {
++ DMERR("COW table chain pointers are inconsistent, "
++ "can't activate snapshot");
++ err = -1;
++ goto ret_free;
++ }
++
++ first_free_sector = max(first_free_sector,
++ cur_sector + pi->extent_size);
++ status = do_io(READ, read_iobuf, dev,
++ cur_sector, nr_sectors);
++ if (status == 0) {
++
++ map_page = 0;
++ entry = 0;
++
++ cow_block = page_address(read_iobuf->maplist[0]);
++
++ /* Now populate the hash table from this data */
++ for (i = 0; i <= pi->highest_metadata_entry &&
++ cow_block[entry].rsector_new != 0; i++) {
++
++ struct exception *ex;
++
++ ex = add_exception(lc,
++ le64_to_cpu(cow_block[entry].rsector_org),
++ le64_to_cpu(cow_block[entry].rsector_new));
++
++ first_free_sector = max(first_free_sector,
++ (unsigned long)(le64_to_cpu(cow_block[entry].rsector_new) +
++ lc->chunk_size));
++
++ /* Do we need to move onto the next page? */
++ if (++entry >= entries_per_page) {
++ entry = 0;
++ cow_block = page_address(read_iobuf->maplist[++map_page]);
++ }
++ }
++ }
++ else {
++ DMERR("Error reading COW metadata for %s",
++ kdevname(dev));
++ err = -1;
++ goto ret_free;
++ }
++ last_sector = cur_sector;
++ cur_sector = le64_to_cpu(cow_block[entry].rsector_org);
++
++ } while (cur_sector != 0);
++
++ lc->persistent = 1;
++ pi->current_metadata_sector = last_sector +
++ map_page*PAGE_SIZE/SECTOR_SIZE +
++ entry/(SECTOR_SIZE/sizeof(struct disk_exception));
++ pi->current_metadata_entry = entry;
++ pi->current_metadata_number = i;
++ pi->next_free_sector = first_free_sector;
++
++ /* Copy last block into cow_iobuf */
++ memcpy(pi->disk_cow, (char *)((long)&cow_block[entry] - ((long)&cow_block[entry] & (blocksize-1))), blocksize);
++
++ ret_free:
++ unmap_kiobuf(read_iobuf);
++ free_kiovec(1, &read_iobuf);
++
++ return err;
++}
++
++/*
++ * Read the snapshot volume header, returns 0 only if it read OK
++ * and it was valid. returns 1 if no header was found, -1 on
++ * error. All fields are checked against the snapshot structure
++ * itself to make sure we don't corrupt the data.
++ */
++static int read_header(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ int status;
++ struct snap_disk_header *header;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ unsigned long devsize;
++
++ /* Get it */
++ status = do_io(READ, pi->cow_iobuf, dev, 0L, blocksize/SECTOR_SIZE);
++ if (status != 0) {
++ DMERR("Snapshot dev %s error reading header",
++ kdevname(dev));
++ return -1;
++ }
++
++ header = (struct snap_disk_header *) page_address(pi->cow_iobuf->maplist[0]);
++
++ /*
++ * Check the magic. It's OK if this fails, we just create a new snapshot header
++ * and start from scratch
++ */
++ if (le32_to_cpu(header->magic) != SNAP_MAGIC) {
++ return 1;
++ }
++
++ /* Check the version matches */
++ if (le32_to_cpu(header->version) != SNAPSHOT_DISK_VERSION) {
++ DMWARN("Snapshot dev %s version mismatch. Stored: %d, driver: %d",
++ kdevname(dev), le32_to_cpu(header->version), SNAPSHOT_DISK_VERSION);
++ return -1;
++ }
++
++ /* Check the chunk sizes match */
++ if (le32_to_cpu(header->chunk_size) != lc->chunk_size) {
++ DMWARN("Snapshot dev %s chunk size mismatch. Stored: %d, requested: %d",
++ kdevname(dev), le32_to_cpu(header->chunk_size), lc->chunk_size);
++ return -1;
++ }
++
++ /* Check the extent sizes match */
++ if (le32_to_cpu(header->extent_size) != pi->extent_size) {
++ DMWARN("Snapshot dev %s extent size mismatch. Stored: %d, requested: %ld",
++ kdevname(dev), le32_to_cpu(header->extent_size), pi->extent_size);
++ return -1;
++ }
++
++ /* Get the rest of the data */
++ pi->start_of_exceptions = le64_to_cpu(header->start_of_exceptions);
++ if (header->full) {
++ DMWARN("Snapshot dev %s is full. It cannot be used", kdevname(dev));
++ lc->full = 1;
++ return -1;
++ }
++
++ /* Validate against the size of the volume */
++ devsize = get_dev_size(dev);
++ if (pi->start_of_exceptions > devsize) {
++ DMWARN("Snapshot metadata error on %s. start exceptions > device size (%ld > %ld)",
++ kdevname(dev), pi->start_of_exceptions, devsize);
++ return -1;
++ }
++
++ /* Read metadata into the hash table and update pointers */
++ return read_metadata(lc, &lc->p_info);
++}
++
++/*
++ * Write (or update) the header. The only time we should need to
++ * do an update is when the snapshot becomes full.
++ */
++static int write_header(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ struct snap_disk_header *header;
++ struct kiobuf *head_iobuf;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ /*
++ * Allocate our own iobuf for this so we don't corrupt
++ * any of the other writes that may be going on.
++ */
++ if (alloc_kiovec(1, &head_iobuf)) {
++ DMERR("Error allocating iobuf for header on %s", kdevname(dev));
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(head_iobuf, PAGE_SIZE/SECTOR_SIZE)) {
++ DMERR("Error allocating iobuf space for header on %s", kdevname(dev));
++ free_kiovec(1, &head_iobuf);
++ return -1;
++ }
++
++ header = (struct snap_disk_header *) page_address(head_iobuf->maplist[0]);
++
++ header->magic = cpu_to_le32(SNAP_MAGIC);
++ header->version = cpu_to_le32(SNAPSHOT_DISK_VERSION);
++ header->chunk_size = cpu_to_le32(lc->chunk_size);
++ header->extent_size = cpu_to_le32(pi->extent_size);
++ header->full = cpu_to_le32(lc->full);
++
++ header->start_of_exceptions = cpu_to_le64(pi->start_of_exceptions);
++
++ /* Must write at least a full block */
++ status = do_io(WRITE, head_iobuf, dev, 0, blocksize/SECTOR_SIZE);
++
++ unmap_kiobuf(head_iobuf);
++ free_kiovec(1, &head_iobuf);
++ return status;
++}
++
++
++static int init_persistent_snapshot(struct snapshot_c *lc, int blocksize,
++ unsigned long extent_size, void **context)
++{
++ struct persistent_info *pi = &lc->p_info;
++
++ int status;
++ int i;
++ int cow_sectors;
++
++ pi->extent_size = extent_size;
++ pi->next_free_sector = blocksize / SECTOR_SIZE; /* Leave the first block alone */
++ pi->disk_cow = NULL;
++
++ pi->highest_metadata_entry = (pi->extent_size*SECTOR_SIZE) / sizeof(struct disk_exception) - 1;
++ pi->md_entries_per_block = blocksize / sizeof(struct disk_exception);
++
++ /* Allocate and set up iobuf for metadata I/O */
++ *context = "Unable to allocate COW iovec";
++ if (alloc_kiovec(1, &pi->cow_iobuf))
++ return -1;
++
++ /* Allocate space for the COW buffer. It should be at least PAGE_SIZE. */
++ cow_sectors = blocksize/SECTOR_SIZE + PAGE_SIZE/SECTOR_SIZE;
++ *context = "Unable to allocate COW I/O buffer space";
++ if (alloc_iobuf_pages(pi->cow_iobuf, cow_sectors)) {
++ free_kiovec(1, &pi->cow_iobuf);
++ return -1;
++ }
++
++ for (i=0; i < pi->cow_iobuf->nr_pages; i++) {
++ memset(page_address(pi->cow_iobuf->maplist[i]), 0, PAGE_SIZE);
++ }
++
++ pi->disk_cow = page_address(pi->cow_iobuf->maplist[0]);
++
++ *context = "Error in disk header";
++ /* Check for a header on disk and create a new one if not */
++ if ( (status = read_header(lc, &lc->p_info)) == 1) {
++
++ /* Write a new header */
++ pi->start_of_exceptions = pi->next_free_sector;
++ pi->next_free_sector += pi->extent_size;
++ pi->current_metadata_sector = pi->start_of_exceptions;
++ pi->current_metadata_entry = 0;
++ pi->current_metadata_number = 0;
++
++ *context = "Unable to write snapshot header";
++ if (write_header(lc, &lc->p_info) != 0) {
++ DMERR("Error writing header to snapshot volume %s",
++ kdevname(lc->cow_dev->dev));
++ goto free_ret;
++ }
++
++ /* Write a blank metadata block to the device */
++ if (write_metadata(lc, &lc->p_info) != 0) {
++ DMERR("Error writing initial COW table to snapshot volume %s",
++ kdevname(lc->cow_dev->dev));
++ goto free_ret;
++ }
++ }
++
++ /*
++ * There is a header but it doesn't match - fail so we
++ * don't destroy what might be useful data on disk. If
++ * the user really wants to use this COW device for a
++ * snapshot then the first sector should be zeroed out
++ * first.
++ */
++ if (status == -1)
++ goto free_ret;
++
++ return 0;
++
++ free_ret:
++ unmap_kiobuf(pi->cow_iobuf);
++ free_kiovec(1, &pi->cow_iobuf);
++ return -1;
++}
++
++static void exit_persistent_snapshot(struct persistent_info *pi)
++{
++ unmap_kiobuf(pi->cow_iobuf);
++ free_kiovec(1, &pi->cow_iobuf);
++}
++
++/*
++ * Finds a suitable destination for the exception.
++ */
++static int prepare_exception(struct snapshot_c *s,
++ struct inflight_exception *e)
++{
++ offset_t dev_size;
++
++ /*
++ * Check for full snapshot. Doing the size calculation here means that
++ * the COW device can be resized without us being told
++ */
++ dev_size = get_dev_size(s->cow_dev->dev);
++ if (s->p_info.next_free_sector + s->chunk_size >= dev_size) {
++ /* Snapshot is full, we can't use it */
++ DMWARN("Snapshot %s is full (sec=%ld, size=%ld)",
++ kdevname(s->cow_dev->dev),
++ s->p_info.next_free_sector + s->chunk_size, dev_size);
++ s->full = 1;
++
++ /* Mark it full on the device */
++ if (s->persistent)
++ write_header(s, &s->p_info);
++
++ return -1;
++
++ } else {
++ e->rsector_new = s->p_info.next_free_sector;
++ s->p_info.next_free_sector += s->chunk_size;
++ }
++
++ return 0;
++}
++
++/*
++ * Add a new exception entry to the on-disk metadata.
++ */
++static int commit_exception(struct snapshot_c *sc,
++ unsigned long org, unsigned long new)
++{
++ struct persistent_info *pi = &sc->p_info;
++
++ int i = pi->current_metadata_entry++;
++ unsigned long next_md_block = pi->current_metadata_sector;
++
++ pi->current_metadata_number++;
++
++ /* Update copy of disk COW */
++ pi->disk_cow[i].rsector_org = cpu_to_le64(org);
++ pi->disk_cow[i].rsector_new = cpu_to_le64(new);
++
++ /* Have we filled this extent ? */
++ if (pi->current_metadata_number >= pi->highest_metadata_entry) {
++ /* Fill in pointer to next metadata extent */
++ i++;
++ pi->current_metadata_entry++;
++
++ next_md_block = pi->next_free_sector;
++ pi->next_free_sector += pi->extent_size;
++
++ pi->disk_cow[i].rsector_org = cpu_to_le64(next_md_block);
++ pi->disk_cow[i].rsector_new = 0;
++ }
++
++ /* Commit to disk */
++ if (write_metadata(sc, &sc->p_info)) {
++ sc->full = 1; /* Failed. don't try again */
++ return -1;
++ }
++
++ /*
++ * Write a new (empty) metadata block if we are at the
++ * end of an existing block so that read_metadata finds a
++ * terminating zero entry.
++ */
++ if (pi->current_metadata_entry == pi->md_entries_per_block) {
++ memset(pi->disk_cow, 0, PAGE_SIZE);
++ pi->current_metadata_sector = next_md_block;
++
++ /*
++ * If this is also the end of an extent then go
++ * back to the start.
++ */
++ if (pi->current_metadata_number >= pi->highest_metadata_entry) {
++ pi->current_metadata_number = 0;
++
++ } else {
++ int blocksize = get_hardsect_size(sc->cow_dev->dev);
++ pi->current_metadata_sector += blocksize/SECTOR_SIZE;
++ }
++
++ pi->current_metadata_entry = 0;
++ if (write_metadata(sc, &sc->p_info) != 0) {
++ sc->full = 1;
++ return -1;
++ }
++ }
++ return 0;
++}
++
++/*
++ * Sets the full flag in the metadata. A quick hack for now.
++ */
++static void invalidate_snapshot(struct snapshot_c *s)
++{
++ s->full = 1;
++ if (s->persistent)
++ write_header(s, &s->p_info);
++}
++
++
++#endif
++
++
++struct exception_store * dm_create_persistent(struct dm_snapshot *s,
++ int blocksize,
++ offset_t extent_size,
++ void **error)
++{
++ return NULL;
++}
++
++
++/*
++ * Implementation of the store for non-persistent snapshots.
++ */
++struct transient_c {
++ offset_t next_free;
++};
++
++void destroy_transient(struct exception_store *store)
++{
++ kfree(store->context);
++ kfree(store);
++}
++
++int prepare_transient(struct exception_store *store, struct exception *e)
++{
++ struct transient_c *tc = (struct transient_c *) store->context;
++ offset_t size = get_dev_size(store->snap->cow->dev);
++
++ if (size < (tc->next_free + store->snap->chunk_size))
++ return -1;
++
++ e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
++ tc->next_free += store->snap->chunk_size;
++ return 0;
++}
++
++struct exception_store *dm_create_transient(struct dm_snapshot *s,
++ int blocksize, void **error)
++{
++ struct exception_store *store;
++ struct transient_c *tc;
++
++ store = kmalloc(sizeof(*store), GFP_KERNEL);
++ if (!store) {
++ DMWARN("out of memory.");
++ return NULL;
++ }
++
++ memset(store, 0, sizeof(*store));
++ store->destroy = destroy_transient;
++ store->prepare_exception = prepare_transient;
++ store->snap = s;
++
++ tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
++ if (!tc) {
++ kfree(store);
++ return NULL;
++ }
++
++ tc->next_free = 0;
++ store->context = tc;
++
++ return store;
++}
++
diff -ruN linux-2.4.16/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c
--- linux-2.4.16/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm-ioctl.c Mon Feb 4 13:01:21 2002
-@@ -0,0 +1,443 @@
++++ linux/drivers/md/dm-ioctl.c Thu Mar 14 15:59:31 2002
+@@ -0,0 +1,557 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/miscdevice.h>
++#include <linux/dm-ioctl.h>
++#include <linux/init.h>
++
++static void free_params(struct dm_ioctl *p)
++{
++ vfree(p);
++}
++
++static int version(struct dm_ioctl *user)
++{
++ return copy_to_user(user, DM_DRIVER_VERSION, sizeof(DM_DRIVER_VERSION));
++}
++
++static int copy_params(struct dm_ioctl *user, struct dm_ioctl **result)
++{
++ struct dm_ioctl tmp, *dmi;
++
++ if (copy_from_user(&tmp, user, sizeof(tmp)))
++ return -EFAULT;
++
++ if (strcmp(DM_IOCTL_VERSION, tmp.version)) {
++ DMWARN("dm_ctl_ioctl: struct dm_ioctl version incompatible");
++ return -EINVAL;
++ }
++
++ if (tmp.data_size < sizeof(tmp))
++ return -EINVAL;
++
++ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
++ if (!dmi)
++ return -ENOMEM;
++
++ if (copy_from_user(dmi, user, tmp.data_size)) {
++ vfree(dmi);
++ return -EFAULT;
++ }
++
++ *result = dmi;
++ return 0;
++}
++
++/*
++ * Check a string doesn't overrun the chunk of
++ * memory we copied from userland.
++ */
++static int valid_str(char *str, void *begin, void *end)
++{
++ while (((void *) str >= begin) && ((void *) str < end))
++ if (!*str++)
++ return 0;
++
++ return -EINVAL;
++}
++
++static int next_target(struct dm_target_spec *last, unsigned long next,
++ void *begin, void *end,
++ struct dm_target_spec **spec, char **params)
++{
++ *spec = (struct dm_target_spec *)
++ ((unsigned char *) last + next);
++ *params = (char *) (*spec + 1);
++
++ if (*spec < (last + 1) || ((void *) *spec > end))
++ return -EINVAL;
++
++ return valid_str(*params, begin, end);
++}
++
++/*
++ * Checks to see if there's a gap in the table.
++ * Returns true iff there is a gap.
++ */
++static int gap(struct dm_table *table, struct dm_target_spec *spec)
++{
++ if (!table->num_targets)
++ return (spec->sector_start > 0) ? 1 : 0;
++
++ if (spec->sector_start != table->highs[table->num_targets - 1] + 1)
++ return 1;
++
++ return 0;
++}
++
++static int populate_table(struct dm_table *table, struct dm_ioctl *args)
++{
++ int i = 0, r, first = 1, argc;
++ struct dm_target_spec *spec;
++ char *params, *argv[MAX_ARGS];
++ struct target_type *ttype;
++ void *context, *begin, *end;
++ offset_t highs = 0;
++
++ if (!args->target_count) {
++ DMWARN("populate_table: no targets specified");
++ return -EINVAL;
++ }
++
++ begin = (void *) args;
++ end = begin + args->data_size;
++
++#define PARSE_ERROR(msg) {DMWARN(msg); return -EINVAL;}
++
++ for (i = 0; i < args->target_count; i++) {
++
++ r = first ? next_target((struct dm_target_spec *) args,
++ args->data_start,
++ begin, end, &spec, ¶ms) :
++ next_target(spec, spec->next, begin, end, &spec, ¶ms);
++
++ if (r)
++ PARSE_ERROR("unable to find target");
++
++ /* Look up the target type */
++ ttype = dm_get_target_type(spec->target_type);
++ if (!ttype)
++ PARSE_ERROR("unable to find target type");
++
++ if (gap(table, spec))
++ PARSE_ERROR("gap in target ranges");
++
++ /* Split up the parameter list */
++ if (split_args(MAX_ARGS, &argc, argv, params) < 0)
++ PARSE_ERROR("Too many arguments");
++
++ /* Build the target */
++ if (ttype->ctr(table, spec->sector_start, spec->length,
++ argc, argv, &context)) {
++ DMWARN("%s: target constructor failed",
++ (char *) context);
++ return -EINVAL;
++ }
++
++ /* Add the target to the table */
++ highs = spec->sector_start + (spec->length - 1);
++ if (dm_table_add_target(table, highs, ttype, context))
++ PARSE_ERROR("internal error adding target to table");
++
++ first = 0;
++ }
++
++#undef PARSE_ERROR
++
++ r = dm_table_complete(table);
++ return r;
++}
++
++/*
++ * Round up the ptr to the next 'align' boundary. Obviously
++ * 'align' must be a power of 2.
++ */
++static inline void *align_ptr(void *ptr, unsigned int align)
++{
++ align--;
++ return (void *) (((unsigned long) (ptr + align)) & ~align);
++}
++
++/*
++ * Copies a dm_ioctl and an optional additional payload to
++ * userland.
++ */
++static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param,
++ void *data, unsigned long len)
++{
++ int r;
++ void *ptr = NULL;
++
++ strncpy(param->version, DM_IOCTL_VERSION, sizeof(param->version));
++
++ if (data) {
++ ptr = align_ptr(user + 1, sizeof(unsigned long));
++ param->data_start = ptr - (void *) user;
++ }
++
++ r = copy_to_user(user, param, sizeof(*param));
++ if (r)
++ return r;
++
++ if (data) {
++ if (param->data_start + len > param->data_size)
++ return -ENOSPC;
++ r = copy_to_user(ptr, data, len);
++ }
++
++ return r;
++}
++
++/*
++ * Fills in a dm_ioctl structure, ready for sending back to
++ * userland.
++ */
++static void __info(struct mapped_device *md, struct dm_ioctl *param)
++{
++ param->flags = DM_EXISTS_FLAG;
++ if (md->suspended)
++ param->flags |= DM_SUSPEND_FLAG;
++ if (md->read_only)
++ param->flags |= DM_READONLY_FLAG;
++
++ strncpy(param->name, md->name, sizeof(param->name));
++
++ if (md->uuid)
++ strncpy(param->uuid, md->uuid, sizeof(param->uuid));
++ else
++ param->uuid[0] = '\0';
++
++ param->open_count = md->use_count;
++ param->dev = kdev_t_to_nr(md->dev);
++ param->target_count = md->map->num_targets;
++}
++
++/*
++ * Copies device info back to user space, used by
++ * the create and info ioctls.
++ */
++static int info(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int minor;
++ struct mapped_device *md;
++
++ param->flags = 0;
++
++ md = dm_get_name_r(param->name);
++ if (!md)
++ /*
++ * Device not found - returns cleared exists flag.
++ */
++ goto out;
++
++ minor = MINOR(md->dev);
++ __info(md, param);
++ dm_put_r(minor);
++
++ out:
++ return results_to_user(user, param, NULL, 0);
++}
++
++/*
++ * Retrieves a list of devices used by a particular dm device.
++ */
++static int dep(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int minor, count, r;
++ struct mapped_device *md;
++ struct list_head *tmp;
++ size_t len = 0;
++ struct dm_target_deps *deps = NULL;
++
++ md = dm_get_name_r(param->name);
++ if (!md)
++ goto out;
++ minor = MINOR(md->dev);
++
++ /*
++ * Setup the basic dm_ioctl structure.
++ */
++ __info(md, param);
++
++ /*
++ * Count the devices.
++ */
++ count = 0;
++ list_for_each(tmp, &md->map->devices)
++ count++;
++
++ /*
++ * Allocate a kernel space version of the dm_target_status
++ * struct.
++ */
++ len = sizeof(*deps) + (sizeof(*deps->dev) * count);
++ deps = kmalloc(len, GFP_KERNEL);
++ if (!deps) {
++ dm_put_r(minor);
++ return -ENOMEM;
++ }
++
++ /*
++ * Fill in the devices.
++ */
++ deps->count = count;
++ count = 0;
++ list_for_each(tmp, &md->map->devices) {
++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++ deps->dev[count++] = kdev_t_to_nr(dd->dev);
++ }
++ dm_put_r(minor);
++
++ out:
++ r = results_to_user(user, param, deps, len);
++
++ kfree(deps);
++ return r;
++}
++
++static int create(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int r;
++ struct mapped_device *md;
++ struct dm_table *t;
++ int minor;
++
++ r = dm_table_create(&t);
++ if (r)
++ return r;
++
++ r = populate_table(t, param);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ?
++ MINOR(to_kdev_t(param->dev)) : -1;
++
++ r = dm_create(param->name, param->uuid, minor, t);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ /* shouldn't get here */
++ return -EINVAL;
++
++ minor = MINOR(md->dev);
++ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
++ dm_put_w(minor);
++
++ r = info(param, user);
++ return r;
++}
++
++static int remove(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ return -ENXIO;
++
++ minor = MINOR(md->dev);
++ r = dm_destroy(md);
++ dm_put_w(minor);
++
++ return r;
++}
++
++static int suspend(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ return -ENXIO;
++
++ minor = MINOR(md->dev);
++ r = (param->flags & DM_SUSPEND_FLAG) ? dm_suspend(md) : dm_resume(md);
++ dm_put_w(minor);
++
++ return r;
++}
++
++static int reload(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++ struct dm_table *t;
++
++ r = dm_table_create(&t);
++ if (r)
++ return r;
++
++ r = populate_table(t, param);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ md = dm_get_name_w(param->name);
++ if (!md) {
++ dm_table_destroy(t);
++ return -ENXIO;
++ }
++
++ minor = MINOR(md->dev);
++
++ r = dm_swap_table(md, t);
++ if (r) {
++ dm_put_w(minor);
++ dm_table_destroy(t);
++ return r;
++ }
++
++ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
++ dm_put_w(minor);
++ return 0;
++}
++
++static int rename(struct dm_ioctl *param)
++{
++ char *newname = (char *) param + param->data_start;
++
++ if (valid_str(newname, (void *) param,
++ (void *) param + param->data_size) ||
++ dm_set_name(param->name, newname)) {
++ DMWARN("Invalid new logical volume name supplied.");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static int ctl_open(struct inode *inode, struct file *file)
++{
++ /* only root can open this */
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
++
++ MOD_INC_USE_COUNT;
++
++ return 0;
++}
++
++static int ctl_close(struct inode *inode, struct file *file)
++{
++ MOD_DEC_USE_COUNT;
++ return 0;
++}
++
++static int ctl_ioctl(struct inode *inode, struct file *file,
++ uint command, ulong a)
++{
++ int r;
++ struct dm_ioctl *p;
++ uint cmd = _IOC_NR(command);
++
++ switch (cmd) {
++ case DM_REMOVE_ALL_CMD:
++ dm_destroy_all();
++ case DM_VERSION_CMD:
++ return version((struct dm_ioctl *) a);
++ default:
++ break;
++ }
++
++ r = copy_params((struct dm_ioctl *) a, &p);
++ if (r)
++ return r;
++
++ /* FIXME: Change to use size 0 next time ioctl version gets changed */
++ switch (cmd) {
++ case DM_CREATE_CMD:
++ r = create(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_REMOVE_CMD:
++ r = remove(p);
++ break;
++
++ case DM_SUSPEND_CMD:
++ r = suspend(p);
++ break;
++
++ case DM_RELOAD_CMD:
++ r = reload(p);
++ break;
++
++ case DM_INFO_CMD:
++ r = info(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_DEPS_CMD:
++ r = dep(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_RENAME_CMD:
++ r = rename(p);
++ break;
++
++ default:
++ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
++ r = -EINVAL;
++ }
++
++ free_params(p);
++ return r;
++}
++
++static struct file_operations _ctl_fops = {
++ open: ctl_open,
++ release: ctl_close,
++ ioctl: ctl_ioctl,
++ owner: THIS_MODULE,
++};
++
++static devfs_handle_t _ctl_handle;
++
++static struct miscdevice _dm_misc = {
++ minor: MISC_DYNAMIC_MINOR,
++ name: DM_NAME,
++ fops: &_ctl_fops
++};
++
++/* Create misc character device and link to DM_DIR/control */
++int __init dm_interface_init(void)
++{
++ int r;
++ char rname[64];
++
++ r = misc_register(&_dm_misc);
++ if (r) {
++ DMERR("misc_register failed for control device");
++ return r;
++ }
++
++ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
++ sizeof rname - 3);
++ if (r == -ENOSYS)
++ return 0; /* devfs not present */
++
++ if (r < 0) {
++ DMERR("devfs_generate_path failed for control device");
++ goto failed;
++ }
++
++ strncpy(rname + r, "../", 3);
++ r = devfs_mk_symlink(NULL, DM_DIR "/control",
++ DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
++ if (r) {
++ DMERR("devfs_mk_symlink failed for control device");
++ goto failed;
++ }
++ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
++
++ return 0;
++
++ failed:
++ misc_deregister(&_dm_misc);
++ return r;
++}
++
++void __exit dm_interface_exit(void)
++{
++ if (misc_deregister(&_dm_misc) < 0)
++ DMERR("misc_deregister failed for control device");
++}
+diff -ruN linux-2.4.16/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c
+--- linux-2.4.16/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-linear.c Tue Jan 15 19:53:55 2002
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++
++/*
++ * Linear: maps a linear range of a device.
++ */
++struct linear_c {
++ long delta; /* FIXME: we need a signed offset type */
++ struct dm_dev *dev;
++};
++
++/*
++ * Construct a linear mapping: <dev_path> <offset>
++ */
++static int linear_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ struct linear_c *lc;
++ unsigned long start; /* FIXME: unsigned long long */
++ char *end;
++
++ if (argc != 2) {
++ *context = "dm-linear: Not enough arguments";
++ return -EINVAL;
++ }
++
++ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++ if (lc == NULL) {
++ *context = "dm-linear: Cannot allocate linear context";
++ return -ENOMEM;
++ }
++
++ start = simple_strtoul(argv[1], &end, 10);
++ if (*end) {
++ *context = "dm-linear: Invalid device sector";
++ goto bad;
++ }
++
++ if (dm_table_get_device(t, argv[0], start, l, &lc->dev)) {
++ *context = "dm-linear: Device lookup failed";
++ goto bad;
++ }
++
++ lc->delta = (int) start - (int) b;
++ *context = lc;
++ return 0;
++
++ bad:
++ kfree(lc);
++ return -EINVAL;
++}
++
++static void linear_dtr(struct dm_table *t, void *c)
++{
++ struct linear_c *lc = (struct linear_c *) c;
++
++ dm_table_put_device(t, lc->dev);
++ kfree(c);
++}
++
++static int linear_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct linear_c *lc = (struct linear_c *) context;
++
++ bh->b_rdev = lc->dev->dev;
++ bh->b_rsector = bh->b_rsector + lc->delta;
++
++ return 1;
++}
++
++static struct target_type linear_target = {
++ name: "linear",
++ module: THIS_MODULE,
++ ctr: linear_ctr,
++ dtr: linear_dtr,
++ map: linear_map,
++};
++
++int __init dm_linear_init(void)
++{
++ int r = dm_register_target(&linear_target);
++
++ if (r < 0)
++ DMERR("linear: register failed %d", r);
++
++ return r;
++}
++
++void dm_linear_exit(void)
++{
++ int r = dm_unregister_target(&linear_target);
++
++ if (r < 0)
++ DMERR("linear: unregister failed %d", r);
++}
++
+diff -ruN linux-2.4.16/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c
+--- linux-2.4.16/drivers/md/dm-mirror.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-mirror.c Thu Mar 14 15:53:19 2002
+@@ -0,0 +1,295 @@
+/*
-+ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++
++/* kcopyd priority of mirror operations */
++#define MIRROR_COPY_PRIORITY 5
++
++static kmem_cache_t *bh_cachep;
++
++/*
++ * Mirror: maps a mirror range of a device.
++ */
++struct mirror_c {
++ struct dm_dev *fromdev;
++ struct dm_dev *todev;
++
++ unsigned long from_delta;
++ unsigned long to_delta;
++
++ unsigned long frompos;
++ unsigned long topos;
++
++ unsigned long got_to;
++ struct rw_semaphore lock;
++ struct buffer_head *bhstring;
++ int error;
++};
++
++
++/* Called when a duplicating I/O has finished */
++static void mirror_end_io(struct buffer_head *bh, int uptodate)
++{
++ struct mirror_c *lc = (struct mirror_c *) bh->b_private;
++
++ /* Flag error if it failed */
++ if (!uptodate) {
++ DMERR("Mirror copy to %s failed", kdevname(lc->todev->dev));
++ lc->error = 1;
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++ kmem_cache_free(bh_cachep, bh);
++}
++
++static void mirror_bh(struct mirror_c *mc, struct buffer_head *bh)
++{
++ struct buffer_head *dbh = kmem_cache_alloc(bh_cachep, GFP_NOIO);
++ if (dbh) {
++ *dbh = *bh;
++ dbh->b_rdev = mc->todev->dev;
++ dbh->b_rsector = bh->b_rsector - mc->from_delta
++ + mc->to_delta;
++ dbh->b_end_io = mirror_end_io;
++ dbh->b_private = mc;
++
++ generic_make_request(WRITE, dbh);
++ } else {
++ DMERR("kmem_cache_alloc failed for mirror bh");
++ mc->error = 1;
++ }
++}
++
++/* Called when the copy I/O has finished */
++static void copy_callback(copy_cb_reason_t reason, void *context, long arg)
++{
++ struct mirror_c *lc = (struct mirror_c *) context;
++ struct buffer_head *bh;
++
++ if (reason == COPY_CB_FAILED_READ || reason == COPY_CB_FAILED_WRITE) {
++ DMERR("Mirror block %s on %s failed, sector %ld",
++ reason == COPY_CB_FAILED_READ ? "read" : "write",
++ reason == COPY_CB_FAILED_READ ?
++ kdevname(lc->fromdev->dev) :
++ kdevname(lc->todev->dev), arg);
++ lc->error = 1;
++ return;
++ }
++
++ if (reason == COPY_CB_COMPLETE) {
++ /* Say we've finished */
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++
++ if (reason == COPY_CB_PROGRESS) {
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++
++ /* Submit, and mirror any pending BHs */
++ down_write(&lc->lock);
++ lc->got_to = arg;
++
++ bh = lc->bhstring;
++ lc->bhstring = NULL;
++ up_write(&lc->lock);
++
++ while (bh) {
++ struct buffer_head *nextbh = bh->b_reqnext;
++ bh->b_reqnext = NULL;
++ generic_make_request(WRITE, bh);
++ mirror_bh(lc, bh);
++ bh = nextbh;
++ }
++}
++
++/*
++ * Construct a mirror mapping: <dev_path1> <offset> <dev_path2> <offset> <throttle> [<priority>]
++ */
++static int mirror_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ struct mirror_c *lc;
++ unsigned long offset1, offset2;
++ char *value;
++ int priority = MIRROR_COPY_PRIORITY;
++ int throttle;
++
++ if (argc <= 4) {
++ *context = "dm-mirror: Not enough arguments";
++ return -EINVAL;
++ }
++
++ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++ if (lc == NULL) {
++ *context = "dm-mirror: Cannot allocate mirror context";
++ return -ENOMEM;
++ }
++
++ if (dm_table_get_device(t, argv[0], 0, l, &lc->fromdev)) {
++ *context = "dm-mirror: Device lookup failed";
++ goto bad;
++ }
++
++ offset1 = simple_strtoul(argv[1], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid offset for dev1";
++ dm_table_put_device(t, lc->fromdev);
++ goto bad;
++ }
++
++ if (dm_table_get_device(t, argv[2], 0, l, &lc->todev)) {
++ *context = "dm-mirror: Device lookup failed";
++ dm_table_put_device(t, lc->fromdev);
++ goto bad;
++ }
++
++ offset2 = simple_strtoul(argv[3], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid offset for dev2";
++ goto bad_put;
++ }
++
++ throttle = simple_strtoul(argv[4], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid throttle value";
++ goto bad_put;
++ }
++
++ if (argc > 5) {
++ priority = simple_strtoul(argv[5], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid priority value";
++ goto bad_put;
++ }
++ }
++
++ lc->from_delta = (int) offset1 - (int) b;
++ lc->to_delta = (int) offset2 - (int) b;
++ lc->frompos = offset1;
++ lc->topos = offset2;
++ lc->error = 0;
++ lc->bhstring = NULL;
++ init_rwsem(&lc->lock);
++ *context = lc;
++
++ /* Tell kcopyd to do the biz */
++ if (dm_blockcopy(offset1, offset2,
++ l - offset1,
++ lc->fromdev->dev, lc->todev->dev,
++ priority, 0, copy_callback, lc)) {
++ DMERR("block copy call failed");
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ goto bad;
++ }
++ return 0;
++
++ bad_put:
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ bad:
++ kfree(lc);
++ return -EINVAL;
++}
++
++static void mirror_dtr(struct dm_table *t, void *c)
++{
++ struct mirror_c *lc = (struct mirror_c *) c;
++
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ kfree(c);
++}
++
++static int mirror_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct mirror_c *lc = (struct mirror_c *) context;
++
++ bh->b_rdev = lc->fromdev->dev;
++ bh->b_rsector = bh->b_rsector + lc->from_delta;
++
++ if (rw == WRITE) {
++ down_write(&lc->lock);
++
++ /*
++ * If this area is in flight then save it until it's
++ * commited to the mirror disk and then submit it and
++ * its mirror.
++ */
++ if (bh->b_rsector > lc->got_to &&
++ bh->b_rsector <= lc->got_to + KIO_MAX_SECTORS) {
++ bh->b_reqnext = lc->bhstring;
++ lc->bhstring = bh;
++ up_write(&lc->lock);
++ return 0;
++ }
++
++ /*
++ * If we've already copied this block then duplicate
++ * it to the mirror device
++ */
++ if (bh->b_rsector < lc->got_to) {
++ /* Schedule copy of I/O to other target */
++ mirror_bh(lc, bh);
++ }
++ up_write(&lc->lock);
++ }
++ return 1;
++}
++
++static struct target_type mirror_target = {
++ name: "mirror",
++ module: THIS_MODULE,
++ ctr: mirror_ctr,
++ dtr: mirror_dtr,
++ map: mirror_map,
++};
++
++int __init dm_mirror_init(void)
++{
++ int r;
++
++ bh_cachep = kmem_cache_create("dm-mirror",
++ sizeof(struct buffer_head),
++ __alignof__(struct buffer_head),
++ 0, NULL, NULL);
++ if (!bh_cachep) {
++ return -1;
++ }
++
++
++ r = dm_register_target(&mirror_target);
++ if (r < 0) {
++ DMERR("mirror: register failed %d", r);
++ kmem_cache_destroy(bh_cachep);
++ }
++ return r;
++}
++
++void dm_mirror_exit(void)
++{
++ int r = dm_unregister_target(&mirror_target);
++
++ if (r < 0)
++ DMERR("mirror: unregister failed %d", r);
++
++ kmem_cache_destroy(bh_cachep);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.16/drivers/md/dm-origin.c linux/drivers/md/dm-origin.c
+--- linux-2.4.16/drivers/md/dm-origin.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-origin.c Wed Mar 13 17:28:40 2002
+@@ -0,0 +1,105 @@
++/*
++ * dm-origin.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm.h"
++
++/*
++ * Origin: maps a linear range of a device, with hooks for snapshotting.
++ */
++
++/*
++ * Construct an origin mapping: <dev_path>
++ * The context for an origin is merely a 'struct dm_dev *'
++ * pointing to the real device.
++ */
++static int origin_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ int r;
++ struct dm_dev *dev;
++
++ if (argc != 1) {
++ *context = "dm-origin: incorrect number of arguments";
++ return -EINVAL;
++ }
++
++ r = dm_table_get_device(t, argv[0], 0, l, &dev);
++ if (r) {
++ *context = "Cannot get target device";
++ return r;
++ }
++
++ *context = dev;
++
++ return 0;
++}
++
++static void origin_dtr(struct dm_table *t, void *c)
++{
++ struct dm_dev *dev = (struct dm_dev *) c;
++
++ dm_table_put_device(t, dev);
++}
++
++static int origin_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct dm_dev *dev = (struct dm_dev *) context;
++
++ bh->b_rdev = dev->dev;
++
++ /* Only tell snapshots if this is a write */
++ return (rw == WRITE) ? dm_do_snapshot(dev, bh) : 1;
++}
++
++static struct target_type origin_target = {
++ name: "snapshot-origin",
++ module: THIS_MODULE,
++ ctr: origin_ctr,
++ dtr: origin_dtr,
++ map: origin_map,
++ err: NULL
++};
++
++int __init dm_origin_init(void)
++{
++ int r = dm_register_target(&origin_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Origin: register failed %d\n", r);
++
++ return r;
++}
++
++void dm_origin_exit(void)
++{
++ int r = dm_unregister_target(&origin_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Origin: unregister failed %d\n", r);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.16/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c
+--- linux-2.4.16/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.c Thu Mar 14 16:08:52 2002
+@@ -0,0 +1,862 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
-+#include "dm.h"
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm-snapshot.h"
++
++/*
++ * Hard sector size used all over the kernel
++ */
++#define SECTOR_SIZE 512
++
++/*
++ * kcopyd priority of snapshot operations
++ */
++#define SNAPSHOT_COPY_PRIORITY 2
++
++struct pending_exception {
++ struct exception e;
++
++ /* Chain of WRITE buffer heads to submit when this COW has completed */
++ struct buffer_head *bh;
++
++ /* Pointer back to snapshot context */
++ struct dm_snapshot *snap;
++};
++
++/*
++ * Hash table mapping origin volumes to lists of snapshots and
++ * a lock to protect it
++ */
++static kmem_cache_t *exception_cachep;
++static kmem_cache_t *pending_cachep;
++
++/*
++ * One of these per registered origin, held in the snapshot_origins hash
++ */
++struct origin {
++ /* The origin device */
++ kdev_t dev;
++
++ struct list_head hash_list;
++
++ /* List of snapshots for this origin */
++ struct list_head snapshots;
++};
++
++/*
++ * Size of the hash table for origin volumes. If we make this
++ * the size of the minors list then it should be nearly perfect
++ */
++#define ORIGIN_HASH_SIZE 256
++#define ORIGIN_MASK 0xFF
++static struct list_head *_origins;
++static struct rw_semaphore _origins_lock;
++
++static int init_origin_hash(void)
++{
++ int i;
+
-+#include <linux/miscdevice.h>
-+#include <linux/dm-ioctl.h>
-+#include <linux/init.h>
++ _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
++ GFP_KERNEL);
++ if (!_origins) {
++ DMERR("Device mapper: Snapshot: unable to allocate memory");
++ return -ENOMEM;
++ }
+
-+static void free_params(struct dm_ioctl *p)
++ for (i = 0; i < ORIGIN_HASH_SIZE; i++)
++ INIT_LIST_HEAD(_origins + i);
++ init_rwsem(&_origins_lock);
++
++ return 0;
++}
++
++static void exit_origin_hash(void)
+{
-+ vfree(p);
++ kfree(_origins);
+}
+
-+static int version(struct dm_ioctl *user)
++static inline unsigned int origin_hash(kdev_t dev)
+{
-+ return copy_to_user(user, DM_DRIVER_VERSION, sizeof(DM_DRIVER_VERSION));
++ return MINOR(dev) & ORIGIN_MASK;
+}
+
-+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **result)
++static struct origin *__lookup_origin(kdev_t origin)
+{
-+ struct dm_ioctl tmp, *dmi;
++ struct list_head *slist;
++ struct list_head *ol;
++ struct origin *o;
+
-+ if (copy_from_user(&tmp, user, sizeof(tmp)))
-+ return -EFAULT;
++ ol = &_origins[origin_hash(origin)];
++ list_for_each(slist, ol) {
++ o = list_entry(slist, struct origin, hash_list);
+
-+ if (strcmp(DM_IOCTL_VERSION, tmp.version)) {
-+ DMWARN("dm_ctl_ioctl: struct dm_ioctl version incompatible");
-+ return -EINVAL;
++ if (o->dev == origin)
++ return o;
+ }
+
-+ if (tmp.data_size < sizeof(tmp))
-+ return -EINVAL;
++ return NULL;
++}
+
-+ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
-+ if (!dmi)
-+ return -ENOMEM;
++static void __insert_origin(struct origin *o)
++{
++ struct list_head *sl = &_origins[origin_hash(o->dev)];
++ list_add_tail(&o->hash_list, sl);
++}
+
-+ if (copy_from_user(dmi, user, tmp.data_size)) {
-+ vfree(dmi);
-+ return -EFAULT;
++/*
++ * Make a note of the snapshot and its origin so we can look it
++ * up when the origin has a write on it.
++ */
++static int register_snapshot(struct dm_snapshot *snap)
++{
++ struct origin *o;
++ kdev_t dev = snap->origin->dev;
++
++ down_write(&_origins_lock);
++ o = __lookup_origin(dev);
++
++ if (!o) {
++ /* New origin */
++ o = kmalloc(sizeof(*o), GFP_KERNEL);
++ if (!o) {
++ up_write(&_origins_lock);
++ return -ENOMEM;
++ }
++
++ /* Initialise the struct */
++ INIT_LIST_HEAD(&o->snapshots);
++ o->dev = dev;
++
++ __insert_origin(o);
+ }
+
-+ *result = dmi;
++ list_add_tail(&snap->list, &o->snapshots);
++
++ up_write(&_origins_lock);
+ return 0;
+}
+
++static void unregister_snapshot(struct dm_snapshot *s)
++{
++ struct origin *o;
++
++ down_write(&_origins_lock);
++ o = __lookup_origin(s->origin->dev);
++
++ list_del(&s->list);
++ if (list_empty(&o->snapshots)) {
++ list_del(&o->hash_list);
++ kfree(o);
++ }
++
++ up_write(&_origins_lock);
++}
++
+/*
-+ * Check a string doesn't overrun the chunk of
-+ * memory we copied from userland.
++ * Implementation of the exception hash tables.
+ */
-+static int valid_str(char *str, void *begin, void *end)
++static int init_exception_table(struct exception_table *et, uint32_t size)
+{
-+ while (((void *) str >= begin) && ((void *) str < end))
-+ if (!*str++)
-+ return 0;
++ int i;
+
-+ return -EINVAL;
++ et->hash_mask = size - 1;
++ et->table = vmalloc(sizeof(struct list_head) * (size));
++ if (!et->table)
++ return -ENOMEM;
++
++ for (i = 0; i < size; i++)
++ INIT_LIST_HEAD(et->table + i);
++
++ return 0;
+}
+
-+static int next_target(struct dm_target_spec *last, unsigned long next,
-+ void *begin, void *end,
-+ struct dm_target_spec **spec, char **params)
++static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
+{
-+ *spec = (struct dm_target_spec *)
-+ ((unsigned char *) last + next);
-+ *params = (char *) (*spec + 1);
++ struct list_head *slot, *entry, *temp;
++ struct exception *ex;
++ int i, size;
+
-+ if (*spec < (last + 1) || ((void *)*spec > end))
-+ return -EINVAL;
++ size = et->hash_mask + 1;
++ for (i = 0; i < size; i++) {
++ slot = et->table + i;
+
-+ return valid_str(*params, begin, end);
++ list_for_each_safe(entry, temp, slot) {
++ ex = list_entry(entry, struct exception, hash_list);
++ kmem_cache_free(mem, ex);
++ }
++ }
++
++ vfree(et->table);
++}
++
++/*
++ * FIXME: check how this hash fn is performing.
++ */
++static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
++{
++ return chunk & et->hash_mask;
+}
+
-+void dm_error(const char *message)
++static void insert_exception(struct exception_table *eh, struct exception *e)
+{
-+ DMWARN("%s", message);
++ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
++ list_add(&e->hash_list, l);
++}
++
++static inline void remove_exception(struct exception *e)
++{
++ list_del(&e->hash_list);
+}
+
+/*
-+ * Checks to see if there's a gap in the table.
-+ * Returns true iff there is a gap.
++ * Return the exception data for a sector, or NULL if not
++ * remapped.
+ */
-+static int gap(struct dm_table *table, struct dm_target_spec *spec)
++static struct exception *lookup_exception(struct exception_table *et,
++ chunk_t chunk)
+{
-+ if (!table->num_targets)
-+ return (spec->sector_start > 0) ? 1 : 0;
++ struct list_head *slot, *el;
++ struct exception *e;
++
++ slot = &et->table[exception_hash(et, chunk)];
++ list_for_each(el, slot) {
++ e = list_entry(el, struct exception, hash_list);
++ if (e->old_chunk == chunk)
++ return e;
++ }
+
-+ if (spec->sector_start != table->highs[table->num_targets - 1] + 1)
-+ return 1;
++ return NULL;
++}
+
-+ return 0;
++static inline struct exception *alloc_exception(void)
++{
++ return kmem_cache_alloc(exception_cachep, GFP_NOIO);
+}
+
-+static int populate_table(struct dm_table *table, struct dm_ioctl *args)
++static inline struct pending_exception *alloc_pending_exception(void)
+{
-+ int i = 0, r, first = 1, argc;
-+ struct dm_target_spec *spec;
-+ char *params, *argv[MAX_ARGS];
-+ struct target_type *ttype;
-+ void *context, *begin, *end;
-+ offset_t highs = 0;
++ return kmem_cache_alloc(pending_cachep, GFP_NOIO);
++}
+
-+ if (!args->target_count) {
-+ DMWARN("populate_table: no targets specified");
-+ return -EINVAL;
-+ }
++static inline void free_exception(struct exception *e)
++{
++ kmem_cache_free(exception_cachep, e);
++}
+
-+ begin = (void *) args;
-+ end = begin + args->data_size;
++static inline void free_pending_exception(struct pending_exception *pe)
++{
++ kmem_cache_free(pending_cachep, pe);
++}
+
-+#define PARSE_ERROR(msg) {dm_error(msg); return -EINVAL;}
++/*
++ * Called when the copy I/O has finished
++ */
++static void copy_callback(copy_cb_reason_t reason, void *context, long arg)
++{
++ struct pending_exception *pe = (struct pending_exception *) context;
++ struct dm_snapshot *s = pe->snap;
++ struct exception *e;
+
-+ for (i = 0; i < args->target_count; i++) {
++ if (reason == COPY_CB_COMPLETE) {
++ struct buffer_head *bh;
+
-+ r = first ? next_target((struct dm_target_spec *)args,
-+ args->data_start,
-+ begin, end, &spec, ¶ms) :
-+ next_target(spec, spec->next,
-+ begin, end, &spec, ¶ms);
++ /* Update the metadata if we are persistent */
++ if (s->store->commit_exception)
++ s->store->commit_exception(s->store, &pe->e);
+
-+ if (r)
-+ PARSE_ERROR("unable to find target");
++ e = alloc_exception();
++ if (!e) {
++ /* FIXME: what do we do now ? */
++ return;
++ }
+
-+ /* Look up the target type */
-+ ttype = dm_get_target_type(spec->target_type);
-+ if (!ttype)
-+ PARSE_ERROR("unable to find target type");
++ /* Add a proper exception,
++ and remove the inflight exception from the list */
++ down_write(&pe->snap->lock);
+
-+ if (gap(table, spec))
-+ PARSE_ERROR("gap in target ranges");
++ memcpy(e, &pe->e, sizeof(*e));
++ insert_exception(&s->complete, e);
++ remove_exception(&pe->e);
+
-+ /* Split up the parameter list */
-+ if (split_args(MAX_ARGS, &argc, argv, params) < 0)
-+ PARSE_ERROR("Too many arguments");
++ /* Submit any pending write BHs */
++ bh = pe->bh;
++ pe->bh = NULL;
++ up_write(&pe->snap->lock);
+
-+ /* Build the target */
-+ if (ttype->ctr(table, spec->sector_start, spec->length,
-+ argc, argv, &context)) {
-+ dm_error(context);
-+ PARSE_ERROR("target constructor failed");
++ kmem_cache_free(pending_cachep, pe);
++
++ while (bh) {
++ struct buffer_head *nextbh = bh->b_reqnext;
++ bh->b_reqnext = NULL;
++ generic_make_request(WRITE, bh);
++ bh = nextbh;
+ }
++ }
+
-+ /* Add the target to the table */
-+ highs = spec->sector_start + (spec->length - 1);
-+ if (dm_table_add_target(table, highs, ttype, context))
-+ PARSE_ERROR("internal error adding target to table");
++ /* Read/write error - snapshot is unusable */
++ if (reason == COPY_CB_FAILED_WRITE || reason == COPY_CB_FAILED_READ) {
++ DMERR("Error reading/writing snapshot");
+
-+ first = 0;
++ if (pe->snap->store->drop_snapshot)
++ pe->snap->store->drop_snapshot(pe->snap->store);
++ remove_exception(&pe->e);
++ kmem_cache_free(pending_cachep, pe);
+ }
++}
+
-+#undef PARSE_ERROR
++/*
++ * Hard coded magic.
++ */
++static int calc_max_buckets(void)
++{
++ unsigned long mem;
+
-+ r = dm_table_complete(table);
-+ return r;
++ mem = num_physpages << PAGE_SHIFT;
++ mem /= 50;
++ mem /= sizeof(struct list_head);
++
++ return mem;
+}
+
+/*
-+ * Copies device info back to user space, used by
-+ * the create and info ioctls.
++ * Rounds a number down to a power of 2.
+ */
-+static int info(const char *name, struct dm_ioctl *user)
++static inline uint32_t round_down(uint32_t n)
+{
-+ int minor;
-+ struct dm_ioctl param;
-+ struct mapped_device *md;
++ while (n & (n - 1))
++ n &= (n - 1);
++ return n;
++}
+
-+ param.flags = 0;
-+ strncpy(param.version, DM_IOCTL_VERSION, sizeof(param.version));
++/*
++ * Allocate room for a suitable hash table.
++ */
++static int init_hash_tables(struct dm_snapshot *s)
++{
++ offset_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
+
-+ md = dm_get_name_r(name);
-+ if (!md)
-+ goto out;
-+ minor = MINOR(md->dev);
++ /*
++ * Calculate based on the size of the original volume or
++ * the COW volume...
++ */
++ cow_dev_size = get_dev_size(s->cow->dev);
++ origin_dev_size = get_dev_size(s->origin->dev);
++ max_buckets = calc_max_buckets();
+
-+ param.flags |= DM_EXISTS_FLAG;
-+ if (md->suspended)
-+ param.flags |= DM_SUSPEND_FLAG;
-+ if (md->read_only)
-+ param.flags |= DM_READONLY_FLAG;
++ hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size;
++ hash_size = min(hash_size, max_buckets);
+
-+ param.data_size = 0;
-+ strncpy(param.name, md->name, sizeof(param.name));
-+ param.name[sizeof(param.name) - 1] = '\0';
++ /* Round it down to a power of 2 */
++ hash_size = round_down(hash_size);
++ if (init_exception_table(&s->complete, hash_size))
++ return -ENOMEM;
+
-+ param.open_count = md->use_count;
-+ param.dev = kdev_t_to_nr(md->dev);
-+ param.target_count = md->map->num_targets;
++ /*
++ * Allocate hash table for in-flight exceptions
++ * Make this smaller than the real hash table
++ */
++ hash_size >>= 3;
++ if (!hash_size)
++ hash_size = 64;
+
-+ dm_put_r(minor);
++ if (init_exception_table(&s->pending, hash_size)) {
++ exit_exception_table(&s->complete, exception_cachep);
++ return -ENOMEM;
++ }
+
-+ out:
-+ return copy_to_user(user, ¶m, sizeof(param));
++ return 0;
+}
+
-+static int create(struct dm_ioctl *param, struct dm_ioctl *user)
++/*
++ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n>
++ * <chunk-size> <extent-size>
++ */
++static int snapshot_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
+{
-+ int r;
-+ struct mapped_device *md;
-+ struct dm_table *t;
-+ int minor;
++ struct dm_snapshot *s;
++ unsigned long chunk_size;
++ unsigned long extent_size = 0L;
++ int r = -EINVAL;
++ char *persistent;
++ char *origin_path;
++ char *cow_path;
++ char *value;
++ int blocksize;
++
++ if (argc < 4) {
++ *context = "dm-snapshot: Not enough arguments";
++ r = -EINVAL;
++ goto bad;
++ }
+
-+ r = dm_table_create(&t);
-+ if (r)
-+ return r;
++ origin_path = argv[0];
++ cow_path = argv[1];
++ persistent = argv[2];
+
-+ r = populate_table(t, param);
-+ if (r) {
-+ dm_table_destroy(t);
-+ return r;
++ if ((*persistent & 0x5f) != 'P' && (*persistent & 0x5f) != 'N') {
++ *context = "Persistent flag is not P or N";
++ r = -EINVAL;
++ goto bad;
+ }
+
-+ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ?
-+ MINOR(to_kdev_t(param->dev)) : -1;
++ chunk_size = simple_strtoul(argv[3], &value, 10);
++ if (chunk_size == 0 || value == NULL) {
++ *context = "Invalid chunk size";
++ r = -EINVAL;
++ goto bad;
++ }
++
++ /* Get the extent size for persistent snapshots */
++ if ((*persistent & 0x5f) == 'P') {
++ if (argc < 5) {
++ *context = "No extent size specified";
++ r = -EINVAL;
++ goto bad;
++ }
++
++ extent_size = simple_strtoul(argv[4], &value, 10);
++ if (extent_size == 0 || value == NULL) {
++ *context = "Invalid extent size";
++ r = -EINVAL;
++ goto bad;
++ }
++ }
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL) {
++ *context = "Cannot allocate snapshot context private structure";
++ r = -ENOMEM;
++ goto bad;
++ }
+
-+ r = dm_create(param->name, minor, t);
++ r = dm_table_get_device(t, origin_path, 0, 0, &s->origin);
+ if (r) {
-+ dm_table_destroy(t);
-+ return r;
++ *context = "Cannot get origin device";
++ r = -EINVAL;
++ goto bad_free;
+ }
+
-+ md = dm_get_name_w(param->name);
-+ if (!md)
-+ /* shouldn't get here */
-+ return -EINVAL;
++ r = dm_table_get_device(t, cow_path, 0, 0, &s->cow);
++ if (r) {
++ dm_table_put_device(t, s->origin);
++ *context = "Cannot get COW device";
++ r = -EINVAL;
++ goto bad_free;
++ }
+
-+ minor = MINOR(md->dev);
-+ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
-+ dm_put_w(minor);
++ /* Validate the extent and chunk sizes against the device block size */
++ blocksize = get_hardsect_size(s->cow->dev);
++ if (chunk_size % (blocksize / SECTOR_SIZE)) {
++ *context = "Chunk size is not a multiple of device blocksize";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
+
-+ r = info(param->name, user);
-+ return r;
-+}
++ if (extent_size % (blocksize / SECTOR_SIZE)) {
++ *context = "Extent size is not a multiple of device blocksize";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
+
-+static int remove(struct dm_ioctl *param)
-+{
-+ int r, minor;
-+ struct mapped_device *md;
++ /* Check the sizes are small enough to fit in one kiovec */
++ if (chunk_size > KIO_MAX_SECTORS) {
++ *context = "Chunk size is too big";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ if (extent_size > KIO_MAX_SECTORS) {
++ *context = "Extent size is too big";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ /* Check chunk_size is a power of 2 */
++ if (chunk_size & (chunk_size - 1)) {
++ *context = "Chunk size is not a power of 2";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ s->chunk_size = chunk_size;
++ s->chunk_mask = chunk_size - 1;
++ for (s->chunk_shift = 0; chunk_size;
++ s->chunk_shift++, chunk_size >>= 1) ;
++
++ s->valid = 1;
++ init_rwsem(&s->lock);
++
++ /* Allocate hash table for COW data */
++ if (init_hash_tables(s)) {
++ *context = "Unable to allocate hash table space";
++ r = -ENOMEM;
++ goto bad_putdev;
++ }
++
++ /*
++ * Check the persistent flag - done here because we need the iobuf
++ * to check the LV header
++ */
++#if 0
++ if ((*persistent & 0x5f) == 'P')
++ s->store = dm_create_persistent(s, blocksize,
++ extent_size, context);
++ else
++#endif
++ s->store = dm_create_transient(s, blocksize, context);
++
++ if (!s->store) {
++ *context = "Couldn't create exception store";
++ r = -EINVAL;
++ goto bad_free1;
++ }
++
++ /* Allocate the COW iobuf and set associated variables */
++ if (s->store->init &&
++ s->store->init(s->store, blocksize, extent_size, context)) {
++ *context = "Couldn't initialise exception store";
++ r = -ENOMEM;
++ goto bad_free1;
++ }
++
++ /* Flush IO to the origin device */
++ /* FIXME: what does sct have against fsync_dev ? */
++ fsync_dev(s->origin->dev);
++#if LVM_VFS_ENHANCEMENT
++ fsync_dev_lockfs(s->origin->dev);
++#endif
++
++ /* Add snapshot to the list of snapshots for this origin */
++ if (register_snapshot(s)) {
++ r = -EINVAL;
++ *context = "Cannot register snapshot origin";
++ goto bad_free2;
++ }
++
++#if LVM_VFS_ENHANCEMENT
++ unlockfs(s->origin->dev);
++#endif
++
++ *context = s;
++ return 0;
++
++ bad_free2:
++ if (s->store->destroy)
++ s->store->destroy(s->store);
+
-+ md = dm_get_name_w(param->name);
-+ if (!md)
-+ return -ENXIO;
++ bad_free1:
++ exit_exception_table(&s->pending, pending_cachep);
++ exit_exception_table(&s->complete, exception_cachep);
+
-+ minor = MINOR(md->dev);
-+ r = dm_destroy(md);
-+ dm_put_w(minor);
++ bad_putdev:
++ dm_table_put_device(t, s->cow);
++ dm_table_put_device(t, s->origin);
+
++ bad_free:
++ kfree(s);
++
++ bad:
+ return r;
+}
+
-+static int suspend(struct dm_ioctl *param)
++static void snapshot_dtr(struct dm_table *t, void *context)
+{
-+ int r, minor;
-+ struct mapped_device *md;
++ struct dm_snapshot *s = (struct dm_snapshot *) context;
+
-+ md = dm_get_name_w(param->name);
-+ if (!md)
-+ return -ENXIO;
++ unregister_snapshot(s);
+
-+ minor = MINOR(md->dev);
-+ r = (param->flags & DM_SUSPEND_FLAG) ?
-+ dm_suspend(md) : dm_resume(md);
-+ dm_put_w(minor);
++ exit_exception_table(&s->pending, pending_cachep);
++ exit_exception_table(&s->complete, exception_cachep);
+
-+ return r;
++ /* Deallocate memory used */
++ if (s->store->destroy)
++ s->store->destroy(s->store);
++
++ dm_table_put_device(t, s->origin);
++ dm_table_put_device(t, s->cow);
++ kfree(s);
+}
+
-+static int reload(struct dm_ioctl *param)
++/*
++ * Performs a new copy on write.
++ */
++static int new_exception(struct dm_snapshot *s, struct buffer_head *bh)
+{
-+ int r, minor;
-+ struct mapped_device *md;
-+ struct dm_table *t;
++ struct exception *e;
++ struct pending_exception *pe;
++ chunk_t chunk;
+
-+ r = dm_table_create(&t);
-+ if (r)
-+ return r;
++ chunk = sector_to_chunk(s, bh->b_rsector);
+
-+ r = populate_table(t, param);
-+ if (r) {
-+ dm_table_destroy(t);
-+ return r;
-+ }
++ /*
++ * If the exception is in flight then we just defer the
++ * bh until this copy has completed.
++ */
+
-+ md = dm_get_name_w(param->name);
-+ if (!md) {
-+ dm_table_destroy(t);
-+ return -ENXIO;
++ /* FIXME: great big race. */
++ e = lookup_exception(&s->pending, chunk);
++ if (e) {
++ /* cast the exception to a pending exception */
++ pe = list_entry(e, struct pending_exception, e);
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ return 0;
+ }
+
-+ minor = MINOR(md->dev);
-+
-+ r = dm_swap_table(md, t);
-+ if (r) {
-+ dm_put_w(minor);
-+ dm_table_destroy(t);
-+ return r;
++ pe = alloc_pending_exception();
++ if (!pe) {
++ DMWARN("Couldn't allocate inflight_exception.");
++ return -ENOMEM;
+ }
+
-+ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
-+ dm_put_w(minor);
-+ return 0;
-+}
-+
-+static int rename(struct dm_ioctl *param)
-+{
-+ char *newname = (char *) param + param->data_start;
++ pe->e.old_chunk = chunk;
+
-+ if (valid_str(newname, (void *) param,
-+ (void *) param + param->data_size) ||
-+ dm_set_name(param->name, newname)) {
-+ dm_error("Invalid new logical volume name supplied.");
-+ return -EINVAL;
++ if (s->store->prepare_exception &&
++ s->store->prepare_exception(s->store, &pe->e)) {
++ s->valid = 0;
++ return -ENXIO;
+ }
+
-+ return 0;
-+}
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ pe->snap = s;
+
-+static int ctl_open(struct inode *inode, struct file *file)
-+{
-+ /* only root can open this */
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EACCES;
++ insert_exception(&s->pending, &pe->e);
+
-+ MOD_INC_USE_COUNT;
++ /* Get kcopyd to do the copy */
++ dm_blockcopy(chunk_to_sector(s, pe->e.old_chunk),
++ chunk_to_sector(s, pe->e.new_chunk),
++ s->chunk_size,
++ s->origin->dev,
++ s->cow->dev, SNAPSHOT_COPY_PRIORITY, 0, copy_callback, pe);
+
-+ return 0;
++ return 1;
+}
+
-+static int ctl_close(struct inode *inode, struct file *file)
++static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
++ struct buffer_head *bh)
+{
-+ MOD_DEC_USE_COUNT;
-+ return 0;
++ bh->b_rdev = s->cow->dev;
++ bh->b_rsector = chunk_to_sector(s, e->new_chunk) +
++ (bh->b_rsector & s->chunk_mask);
+}
+
-+static int ctl_ioctl(struct inode *inode, struct file *file,
-+ uint command, ulong a)
++static int snapshot_map(struct buffer_head *bh, int rw, void *context)
+{
-+ int r;
-+ struct dm_ioctl *p;
++ struct exception *e;
++ struct dm_snapshot *s = (struct dm_snapshot *) context;
++ int r = 1;
++ chunk_t chunk;
+
-+ if (command == DM_VERSION)
-+ return version((struct dm_ioctl *) a);
++ chunk = sector_to_chunk(s, bh->b_rsector);
+
-+ r = copy_params((struct dm_ioctl *) a, &p);
-+ if (r)
-+ return r;
++ /* Full snapshots are not usable */
++ if (!s->valid)
++ return -1;
+
-+ switch (command) {
-+ case DM_CREATE:
-+ r = create(p, (struct dm_ioctl *) a);
-+ break;
++ /*
++ * Write to snapshot - higher level takes care of RW/RO
++ * flags so we should only get this if we are
++ * writeable.
++ */
++ if (rw == WRITE) {
+
-+ case DM_REMOVE:
-+ r = remove(p);
-+ break;
++ down_write(&s->lock);
+
-+ case DM_SUSPEND:
-+ r = suspend(p);
-+ break;
++ /* If the block is already remapped - use that, else remap it */
++ e = lookup_exception(&s->complete, chunk);
++ if (e) {
++ remap_exception(s, e, bh);
++ up_write(&s->lock);
++ return 1;
++ }
+
-+ case DM_RELOAD:
-+ r = reload(p);
-+ break;
++ e = lookup_exception(&s->pending, chunk);
++ if (e) {
++ struct pending_exception *pe;
++ pe = list_entry(e, struct pending_exception, e);
++
++ /*
++ * Exception has not been committed to
++ * disk - save this bh
++ */
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ up_write(&s->lock);
++ return 0;
++ }
+
-+ case DM_INFO:
-+ r = info(p->name, (struct dm_ioctl *) a);
-+ break;
++ if (new_exception(s, bh))
++ r = -1;
++ else
++ r = 0;
+
-+ case DM_RENAME:
-+ r = rename(p);
-+ break;
++ up_write(&s->lock);
+
-+ default:
-+ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
-+ r = -EINVAL;
++ } else {
++ /*
++ * FIXME: this read path scares me because we
++ * always use the origin when we have a pending
++ * exception. However I can't think of a
++ * situation where this is wrong - ejt.
++ */
++
++ /* Do reads */
++ down_read(&s->lock);
++
++ /* See if it it has been remapped */
++ e = lookup_exception(&s->complete, chunk);
++ if (e)
++ remap_exception(s, e, bh);
++ else
++ bh->b_rdev = s->origin->dev;
++
++ up_read(&s->lock);
+ }
+
-+ free_params(p);
+ return r;
+}
+
-+static struct file_operations _ctl_fops = {
-+ open: ctl_open,
-+ release: ctl_close,
-+ ioctl: ctl_ioctl,
-+ owner: THIS_MODULE,
-+};
++/*
++ * Called on a write from the origin driver.
++ */
++int dm_do_snapshot(struct dm_dev *origin, struct buffer_head *bh)
++{
++ struct list_head *snap_list;
++ struct origin *o;
++ int r = 1;
++ chunk_t chunk;
++
++ down_read(&_origins_lock);
++ o = __lookup_origin(origin->dev);
++
++ if (o) {
++ struct list_head *origin_snaps = &o->snapshots;
++ struct dm_snapshot *lock_snap;
++
++ /* Lock the metadata */
++ lock_snap = list_entry(origin_snaps->next,
++ struct dm_snapshot, list);
++
++ /* Do all the snapshots on this origin */
++ list_for_each(snap_list, origin_snaps) {
++ struct dm_snapshot *snap;
++ struct exception *e;
++ snap = list_entry(snap_list, struct dm_snapshot, list);
++
++ down_write(&snap->lock);
++
++ /*
++ * Remember different snapshots can have
++ * different chunk sizes.
++ */
++ chunk = sector_to_chunk(snap, bh->b_rsector);
++
++ /* Only deal with valid snapshots */
++ if (snap->valid) {
++ /*
++ * Check exception table to see
++ * if block is already remapped
++ * in this snapshot and mark the
++ * snapshot as needing a COW if
++ * not
++ */
++ e = lookup_exception(&snap->complete, chunk);
++ if (!e && !new_exception(snap, bh))
++ r = 0;
++ }
+
-+static devfs_handle_t _ctl_handle;
++ up_write(&snap->lock);
++ }
++ }
+
-+static struct miscdevice _dm_misc = {
-+ minor: MISC_DYNAMIC_MINOR,
-+ name: DM_NAME,
-+ fops: &_ctl_fops
++ up_read(&_origins_lock);
++ return r;
++}
++
++static struct target_type snapshot_target = {
++ name:"snapshot",
++ module:THIS_MODULE,
++ ctr:snapshot_ctr,
++ dtr:snapshot_dtr,
++ map:snapshot_map,
++ err:NULL
+};
+
-+/* Create misc character device and link to DM_DIR/control */
-+int __init dm_interface_init(void)
++int __init dm_snapshot_init(void)
+{
+ int r;
-+ char rname[64];
+
-+ r = misc_register(&_dm_misc);
++ r = dm_register_target(&snapshot_target);
+ if (r) {
-+ DMERR("misc_register failed for control device");
++ DMERR("snapshot target register failed %d", r);
+ return r;
+ }
+
-+ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
-+ sizeof rname - 3);
-+ if (r == -ENOSYS)
-+ return 0; /* devfs not present */
++ r = init_origin_hash();
++ if (r) {
++ DMERR("init_origin_hash failed.");
++ return r;
++ }
+
-+ if (r < 0) {
-+ DMERR("devfs_generate_path failed for control device");
-+ goto failed;
++ exception_cachep = kmem_cache_create("dm-snapshot-ex",
++ sizeof(struct exception),
++ __alignof__(struct exception),
++ 0, NULL, NULL);
++ if (!exception_cachep) {
++ exit_origin_hash();
++ return -1;
+ }
+
-+ strncpy(rname + r, "../", 3);
-+ r = devfs_mk_symlink(NULL, DM_DIR "/control",
-+ DEVFS_FL_DEFAULT, rname + r,
-+ &_ctl_handle, NULL);
-+ if (r) {
-+ DMERR("devfs_mk_symlink failed for control device");
-+ goto failed;
++ pending_cachep =
++ kmem_cache_create("dm-snapshot-in",
++ sizeof(struct pending_exception),
++ __alignof__(struct pending_exception),
++ 0, NULL, NULL);
++ if (!pending_cachep) {
++ exit_origin_hash();
++ kmem_cache_destroy(exception_cachep);
++ return -1;
+ }
-+ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
+
+ return 0;
-+
-+ failed:
-+ misc_deregister(&_dm_misc);
-+ return r;
+}
+
-+void __exit dm_interface_exit(void)
++void dm_snapshot_exit(void)
+{
-+ if (misc_deregister(&_dm_misc) < 0)
-+ DMERR("misc_deregister failed for control device");
++ int r = dm_unregister_target(&snapshot_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Snapshot: unregister failed %d", r);
++
++ exit_origin_hash();
++
++ kmem_cache_destroy(pending_cachep);
++ kmem_cache_destroy(exception_cachep);
+}
-diff -ruN linux-2.4.16/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c
---- linux-2.4.16/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm-linear.c Tue Jan 15 19:53:55 2002
-@@ -0,0 +1,105 @@
++
+/*
-+ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.16/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h
+--- linux-2.4.16/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.h Thu Mar 14 16:02:50 2002
+@@ -0,0 +1,135 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
-+#include "dm.h"
++#ifndef DM_SNAPSHOT_H
++#define DM_SNAPSHOT_H
+
-+#include <linux/module.h>
-+#include <linux/init.h>
++#include "dm.h"
+#include <linux/blkdev.h>
+
-+/*
-+ * Linear: maps a linear range of a device.
-+ */
-+struct linear_c {
-+ long delta; /* FIXME: we need a signed offset type */
-+ struct dm_dev *dev;
++struct exception_table {
++ uint32_t hash_mask;
++ struct list_head *table;
+};
+
+/*
-+ * Construct a linear mapping: <dev_path> <offset>
++ * The snapshot code deals with largish chunks of the disk at a
++ * time. Typically 64k - 256k.
+ */
-+static int linear_ctr(struct dm_table *t, offset_t b, offset_t l,
-+ int argc, char **argv, void **context)
-+{
-+ struct linear_c *lc;
-+ unsigned long start; /* FIXME: unsigned long long */
-+ char *end;
++/* FIXME: can we get away with limiting these to a uint32_t ? */
++typedef offset_t chunk_t;
+
-+ if (argc != 2) {
-+ *context = "dm-linear: Not enough arguments";
-+ return -EINVAL;
-+ }
++struct dm_snapshot {
++ struct rw_semaphore lock;
+
-+ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
-+ if (lc == NULL) {
-+ *context = "dm-linear: Cannot allocate linear context";
-+ return -ENOMEM;
-+ }
++ struct dm_dev *origin;
++ struct dm_dev *cow;
+
-+ start = simple_strtoul(argv[1], &end, 10);
-+ if (*end) {
-+ *context = "dm-linear: Invalid device sector";
-+ goto bad;
-+ }
++ /* List of snapshots per Origin */
++ struct list_head list;
+
-+ if (dm_table_get_device(t, argv[0], start, l, &lc->dev)) {
-+ *context = "dm-linear: Device lookup failed";
-+ goto bad;
-+ }
++ /* Size of data blocks saved - must be a power of 2 */
++ chunk_t chunk_size;
++ chunk_t chunk_mask;
++ chunk_t chunk_shift;
+
-+ lc->delta = (int) start - (int) b;
-+ *context = lc;
-+ return 0;
++ /* You can't use a snapshot if this is 0 (e.g. if full) */
++ int valid;
+
-+ bad:
-+ kfree(lc);
-+ return -EINVAL;
-+}
++ struct exception_table pending;
++ struct exception_table complete;
+
-+static void linear_dtr(struct dm_table *t, void *c)
-+{
-+ struct linear_c *lc = (struct linear_c *) c;
++ /* The on disk metadata handler */
++ struct exception_store *store;
++};
+
-+ dm_table_put_device(t, lc->dev);
-+ kfree(c);
-+}
++/*
++ * An exception is used where an old chunk of data has been
++ * replaced by a new one.
++ */
++struct exception {
++ struct list_head hash_list;
+
-+static int linear_map(struct buffer_head *bh, int rw, void *context)
-+{
-+ struct linear_c *lc = (struct linear_c *) context;
++ chunk_t old_chunk;
++ chunk_t new_chunk;
++};
+
-+ bh->b_rdev = lc->dev->dev;
-+ bh->b_rsector = bh->b_rsector + lc->delta;
++/*
++ * Abstraction to handle persistent snapshots.
++ */
++struct exception_store {
+
-+ return 1;
-+}
++ /*
++ * Destroys this object when you've finished with it.
++ */
++ void (*destroy)(struct exception_store *store);
+
-+static struct target_type linear_target = {
-+ name: "linear",
-+ module: THIS_MODULE,
-+ ctr: linear_ctr,
-+ dtr: linear_dtr,
-+ map: linear_map,
++ /*
++ * Read the metadata and populate the snapshot.
++ */
++ int (*init)(struct exception_store *store,
++ int blocksize, unsigned long extent_size, void **context);
++
++ /*
++ * Find somewhere to store the next exception.
++ */
++ int (*prepare_exception)(struct exception_store *store,
++ struct exception *e);
++
++ /*
++ * Update the metadata with this exception.
++ */
++ int (*commit_exception)(struct exception_store *store,
++ struct exception *e);
++
++ /*
++ * The snapshot is invalid, note this in the metadata.
++ */
++ void (*drop_snapshot)(struct exception_store *store);
++
++ struct dm_snapshot *snap;
++ void *context;
+};
+
-+int __init dm_linear_init(void)
++/*
++ * Constructor and destructor for the default persistent
++ * store.
++ */
++struct exception_store *dm_create_persistent(struct dm_snapshot *s,
++ int blocksize,
++ offset_t extent_size,
++ void **error);
++
++struct exception_store *dm_create_transient(struct dm_snapshot *s,
++ int blocksize, void **error);
++
++/*
++ * Return the number of sectors in the device.
++ */
++static inline offset_t get_dev_size(kdev_t dev)
+{
-+ int r = dm_register_target(&linear_target);
++ int *sizes;
+
-+ if (r < 0)
-+ DMERR("linear: register failed %d", r);
++ sizes = blk_size[MAJOR(dev)];
++ if (sizes)
++ return sizes[MINOR(dev)] << 1;
+
-+ return r;
++ return 0;
+}
+
-+void dm_linear_exit(void)
++static inline chunk_t sector_to_chunk(struct dm_snapshot *s, offset_t sector)
+{
-+ int r = dm_unregister_target(&linear_target);
++ return (sector & ~s->chunk_mask) >> s->chunk_shift;
++}
+
-+ if (r < 0)
-+ DMERR("linear: unregister failed %d", r);
++static inline offset_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
++{
++ return chunk << s->chunk_shift;
+}
+
++#endif
diff -ruN linux-2.4.16/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c
--- linux-2.4.16/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970
+++ linux/drivers/md/dm-stripe.c Thu Jan 31 17:50:20 2002
+
diff -ruN linux-2.4.16/drivers/md/dm-table.c linux/drivers/md/dm-table.c
--- linux-2.4.16/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm-table.c Tue Jan 8 17:57:45 2002
++++ linux/drivers/md/dm-table.c Thu Feb 21 12:46:57 2002
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ /* free the device list */
+ if (t->devices.next != &t->devices) {
+ DMWARN("devices still present during destroy: "
-+ "dm_table_remove_device calls missing");
++ "dm_table_remove_device calls missing");
+
+ free_devices(&t->devices);
+ }
+EXPORT_SYMBOL(dm_unregister_target);
diff -ruN linux-2.4.16/drivers/md/dm.c linux/drivers/md/dm.c
--- linux-2.4.16/drivers/md/dm.c Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm.c Mon Feb 4 13:16:25 2002
-@@ -0,0 +1,1063 @@
++++ linux/drivers/md/dm.c Thu Mar 14 16:32:26 2002
+@@ -0,0 +1,1130 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+#define DEFAULT_READ_AHEAD 64
+
+static const char *_name = DM_NAME;
-+static const char *_version = "0.94.04-ioctl (2002-02-04)";
++static const char *_version = "0.94.08-ioctl-cvs (2002-03-14)";
+static const char *_email = "lvm-devel@lists.sistina.com";
+
+static int major = 0;
+ struct target *target;
+ int rw;
+
-+ void (*end_io)(struct buffer_head *bh, int uptodate);
++ void (*end_io) (struct buffer_head * bh, int uptodate);
+ void *context;
+};
+
+
+static devfs_handle_t _dev_dir;
+
-+static int request(request_queue_t *q, int rw, struct buffer_head *bh);
++static int request(request_queue_t * q, int rw, struct buffer_head *bh);
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
+
+/*
+ if (r)
+ goto out_linear;
+
-+ r = dm_interface_init();
++ r = kcopyd_init();
+ if (r)
+ goto out_stripe;
+
++ r = dm_snapshot_init();
++ if (r)
++ goto out_kcopyd;
++
++ r = dm_origin_init();
++ if (r)
++ goto out_snapshot;
++
++ r = dm_mirror_init();
++ if (r)
++ goto out_origin;
++
++ r = dm_interface_init();
++ if (r)
++ goto out_mirror;
++
+ return 0;
+
++ out_mirror:
++ dm_mirror_exit();
++ out_origin:
++ dm_origin_exit();
++ out_snapshot:
++ dm_snapshot_exit();
++ out_kcopyd:
++ kcopyd_exit();
+ out_stripe:
+ dm_stripe_exit();
+ out_linear:
+
+static void __exit dm_exit(void)
+{
++ dm_destroy_all();
+ dm_interface_exit();
+ dm_stripe_exit();
+ dm_linear_exit();
++ dm_snapshot_exit();
++ dm_origin_exit();
++ dm_mirror_exit();
++ kcopyd_exit();
+ dm_target_exit();
+ local_exit();
+}
+
+ case BLKGETSIZE:
+ size = VOLUME_SIZE(minor);
-+ if (copy_to_user((void *)a, &size, sizeof(long)))
++ if (copy_to_user((void *) a, &size, sizeof(long)))
+ return -EFAULT;
+ break;
+
+ case BLKGETSIZE64:
+ size = VOLUME_SIZE(minor);
-+ if (put_user((u64)size, (u64 *)a))
++ if (put_user((u64) size, (u64 *) a))
+ return -EFAULT;
+ break;
+
+ */
+static void dec_pending(struct buffer_head *bh, int uptodate)
+{
-+ struct io_hook *ih = bh->b_private;
++ struct io_hook *ih = bh->b_bdev_private;
+
+ if (!uptodate && call_err_fn(ih, bh))
+ return;
+ wake_up(&ih->md->wait);
+
+ bh->b_end_io = ih->end_io;
-+ bh->b_private = ih->context;
++ bh->b_bdev_private = ih->context;
+ free_io_hook(ih);
+
+ bh->b_end_io(bh, uptodate);
+ ih->rw = rw;
+ ih->target = ti;
+ ih->end_io = bh->b_end_io;
-+ ih->context = bh->b_private;
++ ih->context = bh->b_bdev_private;
+
+ r = fn(bh, rw, context);
+
+ /* hook the end io request fn */
+ atomic_inc(&md->pending);
+ bh->b_end_io = dec_pending;
-+ bh->b_private = ih;
++ bh->b_bdev_private = ih;
+
+ } else if (r == 0)
+ /* we don't need to hook */
+ return (KEYS_PER_NODE * n) + k;
+}
+
-+static int request(request_queue_t *q, int rw, struct buffer_head *bh)
++static int request(request_queue_t * q, int rw, struct buffer_head *bh)
+{
+ struct mapped_device *md;
+ int r, minor = MINOR(bh->b_rdev);
+ }
+ }
+
-+
-+ if ((r = __map_buffer(md, bh, rw, __find_node(md->map, bh))) < 0)
++ if ((r = __map_buffer(md, bh, rw, __find_node(md->map, bh))) < 0)
+ goto bad;
+
+ dm_put_r(minor);
+ return r;
+
+ if (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
-+ put_user(r_block, &lvb->lv_block)) return -EFAULT;
++ put_user(r_block, &lvb->lv_block))
++ return -EFAULT;
+
+ return 0;
+}
+ * Allocate and initialise a blank device. Device is returned
+ * with a write lock held.
+ */
-+static struct mapped_device *alloc_dev(int minor)
++static struct mapped_device *alloc_dev(const char *name, const char *uuid,
++ int minor)
+{
+ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+
+
+ _devs[minor] = md;
+ md->dev = MKDEV(_major, minor);
-+ md->name[0] = '\0';
+ md->suspended = 0;
+
++ strncpy(md->name, name, sizeof(md->name) - 1);
++ md->name[sizeof(md->name) - 1] = '\0';
++
++ if (*uuid) {
++ if (!(md->uuid = kmalloc(strnlen(uuid, DM_UUID_LEN),
++ GFP_KERNEL))) {
++ DMWARN("unable to allocate uuid - out of memory.");
++ return NULL;
++ }
++
++ strncpy(md->uuid, uuid, DM_UUID_LEN - 1);
++ md->uuid[DM_UUID_LEN] = '\0';
++ }
++
+ init_waitqueue_head(&md->wait);
+
+ return md;
+/*
+ * Constructor for a new device
+ */
-+int dm_create(const char *name, int minor, struct dm_table *table)
++int dm_create(const char *name, const char *uuid,
++ int minor, struct dm_table *table)
+{
+ int r;
+ struct mapped_device *md;
+ return -EINVAL;
+ }
+
-+ md = alloc_dev(minor);
++ md = alloc_dev(name, uuid, minor);
+ if (!md) {
+ spin_unlock(&_create_lock);
+ return -ENXIO;
+ }
+ minor = MINOR(md->dev);
+
-+ /* FIXME: move name allocation into alloc_dev */
-+ strcpy(md->name, name);
-+
+ r = __register_device(md);
+ if (r)
+ goto err;
+
+ err:
+ _devs[minor] = NULL;
++ if (md->uuid)
++ kfree(md->uuid);
++
+ kfree(md);
+ dm_put_w(minor);
+ spin_unlock(&_create_lock);
+}
+
+/*
-+ * Destructor for the device. You cannot destroy a suspended
++ * Destructor for the device. You cannot destroy an open
+ * device. Write lock must be held before calling.
+ */
+int dm_destroy(struct mapped_device *md)
+{
+ int minor, r;
+
-+ if (md->suspended || md->use_count)
++ if (md->use_count)
+ return -EPERM;
+
+ r = __unregister_device(md);
+ minor = MINOR(md->dev);
+ _devs[minor] = NULL;
+ __unbind(md);
++
++ if (md->uuid)
++ kfree(md->uuid);
++
+ kfree(md);
+
+ return 0;
+}
+
+/*
++ * Destroy all devices - except open ones
++ */
++void dm_destroy_all(void)
++{
++ int i;
++ struct mapped_device *md;
++
++ for (i = 0; i < MAX_DEVICES; i++) {
++ md = dm_get_w(i);
++ if (!md)
++ continue;
++
++ dm_destroy(md);
++ dm_put_w(i);
++ }
++}
++
++/*
+ * Sets or clears the read-only flag for the device. Write lock
+ * must be held.
+ */
+ if (!atomic_read(&md->pending))
+ break;
+
++ dm_put_w(minor);
+ schedule();
+
+ } while (1);
+MODULE_LICENSE("GPL");
diff -ruN linux-2.4.16/drivers/md/dm.h linux/drivers/md/dm.h
--- linux-2.4.16/drivers/md/dm.h Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm.h Mon Feb 4 13:01:21 2002
-@@ -0,0 +1,214 @@
++++ linux/drivers/md/dm.h Thu Mar 7 20:04:19 2002
+@@ -0,0 +1,233 @@
+/*
+ * Internal header file for device mapper
+ *
+#ifndef DM_INTERNAL_H
+#define DM_INTERNAL_H
+
-+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/major.h>
+struct mapped_device {
+ kdev_t dev;
+ char name[DM_NAME_LEN];
++ char *uuid;
+
+ int use_count;
+ int suspended;
+ */
+int split_args(int max, int *argc, char **argv, char *input);
+
-+
+/* dm.c */
+struct mapped_device *dm_get_r(int minor);
+struct mapped_device *dm_get_w(int minor);
+/*
+ * Call with no lock.
+ */
-+int dm_create(const char *name, int minor, struct dm_table *table);
++int dm_create(const char *name, const char *uuid,
++ int minor, struct dm_table *table);
+int dm_set_name(const char *oldname, const char *newname);
++void dm_destroy_all(void);
+
+/*
+ * You must have the write lock before calling the remaining md
+ */
+void dm_notify(void *target);
+
-+
+/* dm-table.c */
+int dm_table_create(struct dm_table **result);
+void dm_table_destroy(struct dm_table *t);
+int dm_table_complete(struct dm_table *t);
+
+/* kcopyd.c */
-+int dm_blockcopy(unsigned long fromsec, unsigned long tosec,
-+ unsigned long nr_sectors,
-+ kdev_t fromdev, kdev_t todev,
-+ int throttle, void (*callback)(int, void *), void *context);
-+
++typedef enum {
++ COPY_CB_COMPLETE,
++ COPY_CB_FAILED_READ,
++ COPY_CB_FAILED_WRITE,
++ COPY_CB_PROGRESS
++} copy_cb_reason_t;
++
++int dm_blockcopy(unsigned long fromsec, unsigned long tosec,
++ unsigned long nr_sectors, kdev_t fromdev, kdev_t todev,
++ int priority, int throttle,
++ void (*callback) (copy_cb_reason_t, void *, long),
++ void *context);
++int kcopyd_init(void);
++void kcopyd_exit(void);
++
++/* Snapshots */
++int dm_snapshot_init(void);
++void dm_snapshot_exit(void);
++int dm_origin_init(void);
++void dm_origin_exit(void);
++
++/* dm-mirror.c */
++int dm_mirror_init(void);
++void dm_mirror_exit(void);
+
+#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
+#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
+void dm_stripe_exit(void);
+
+#endif
+diff -ruN linux-2.4.16/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c
+--- linux-2.4.16/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/kcopyd.c Thu Mar 14 13:45:09 2002
+@@ -0,0 +1,479 @@
++/*
++ * kcopyd.c
++ *
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm.h"
++
++/* Hard sector size used all over the kernel */
++#define SECTOR_SIZE 512
++
++/* Number of entries in the free list to start with */
++#define FREE_LIST_SIZE 32
++
++/* Slab cache for work entries when the freelist runs out */
++static kmem_cache_t *entry_cachep;
++
++/* Structure of work to do in the list */
++struct copy_work
++{
++ unsigned long fromsec;
++ unsigned long tosec;
++ unsigned long nr_sectors;
++ unsigned long done_sectors;
++ kdev_t fromdev;
++ kdev_t todev;
++ int throttle;
++ int priority; /* 0=highest */
++ void (*callback)(copy_cb_reason_t, void *, long);
++ void *context;
++ int freelist; /* Whether we came from the free list */
++ struct list_head list;
++};
++
++static LIST_HEAD(work_list);
++static LIST_HEAD(free_list);
++static struct task_struct *copy_task = NULL;
++static struct rw_semaphore work_list_lock;
++static struct rw_semaphore free_list_lock;
++static DECLARE_MUTEX(start_lock);
++static DECLARE_MUTEX(run_lock);
++static DECLARE_WAIT_QUEUE_HEAD(start_waitq);
++static DECLARE_WAIT_QUEUE_HEAD(work_waitq);
++static DECLARE_WAIT_QUEUE_HEAD(freelist_waitq);
++static struct kiobuf *iobuf;
++static int thread_exit = 0;
++static long last_jiffies = 0;
++
++/* Find a free entry from the free-list or allocate a new one.
++ This routine always returns a valid pointer even if it has to wait
++ for it */
++static struct copy_work *get_work_struct(void)
++{
++ struct copy_work *entry = NULL;
++
++ while (!entry) {
++
++ down_write(&free_list_lock);
++ if (!list_empty(&free_list)) {
++ entry = list_entry(free_list.next, struct copy_work, list);
++ list_del(&entry->list);
++ }
++ up_write(&free_list_lock);
++
++ if (!entry) {
++ /* Nothing on the free-list - try to allocate one without doing IO */
++ entry = kmem_cache_alloc(entry_cachep, GFP_NOIO);
++
++ /* Make sure we know it didn't come from the free list */
++ if (entry) {
++ entry->freelist = 0;
++ }
++ }
++
++ /* Failed...wait for IO to finish */
++ if (!entry) {
++ DECLARE_WAITQUEUE(wq, current);
++
++ set_task_state(current, TASK_INTERRUPTIBLE);
++ add_wait_queue(&freelist_waitq, &wq);
++
++ if (list_empty(&free_list))
++ schedule();
++
++ set_task_state(current, TASK_RUNNING);
++ remove_wait_queue(&freelist_waitq, &wq);
++ }
++ }
++
++ return entry;
++}
++
++/* Allocate pages for a kiobuf. */
++static int alloc_iobuf_pages(struct kiobuf *iobuf, int nr_sectors)
++{
++ int nr_pages, err, i;
++
++ if (nr_sectors > KIO_MAX_SECTORS)
++ return -1;
++
++ nr_pages = nr_sectors / (PAGE_SIZE/SECTOR_SIZE);
++ err = expand_kiobuf(iobuf, nr_pages);
++ if (err) goto out;
++
++ err = -ENOMEM;
++ iobuf->locked = 1;
++ iobuf->nr_pages = 0;
++ for (i = 0; i < nr_pages; i++) {
++ struct page * page;
++
++ page = alloc_page(GFP_KERNEL);
++ if (!page) goto out;
++
++ iobuf->maplist[i] = page;
++ LockPage(page);
++ iobuf->nr_pages++;
++ }
++ iobuf->offset = 0;
++
++ err = 0;
++
++out:
++ return err;
++}
++
++
++/* Add a new entry to the work list - in priority+FIFO order.
++ The work_list_lock semaphore must be held */
++static void add_to_work_list(struct copy_work *item)
++{
++ struct list_head *entry;
++
++ list_for_each(entry, &work_list) {
++ struct copy_work *cw;
++
++ cw = list_entry(entry, struct copy_work, list);
++ if (cw->priority > item->priority) {
++ __list_add(&item->list, cw->list.prev, &cw->list);
++ return;
++ }
++ }
++ list_add_tail(&item->list, &work_list);
++}
++
++/* Read in a chunk from the source device */
++static int read_blocks(struct kiobuf *iobuf, kdev_t dev, unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(READ, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/* Write out blocks */
++static int write_blocks(struct kiobuf *iobuf, kdev_t dev, unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(WRITE, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/* This is where all the real work happens */
++static int copy_kthread(void *unused)
++{
++ daemonize();
++ down(&run_lock);
++
++ strcpy(current->comm, "kcopyd");
++ copy_task = current;
++ wake_up_interruptible(&start_waitq);
++
++ do {
++ DECLARE_WAITQUEUE(wq, current);
++ struct task_struct *tsk = current;
++
++ down_write(&work_list_lock);
++
++ while (!list_empty(&work_list)) {
++
++ struct copy_work *work_item = list_entry(work_list.next, struct copy_work, list);
++ int done_sps;
++ copy_cb_reason_t callback_reason = COPY_CB_COMPLETE;
++ int preempted = 0;
++
++ list_del(&work_item->list);
++ up_write(&work_list_lock);
++
++ while (!preempted && work_item->done_sectors < work_item->nr_sectors) {
++ long nr_sectors = min((unsigned long)KIO_MAX_SECTORS,
++ work_item->nr_sectors - work_item->done_sectors);
++
++ /* Read original blocks */
++ if (read_blocks(iobuf, work_item->fromdev, work_item->fromsec + work_item->done_sectors,
++ nr_sectors)) {
++ DMERR("Read blocks from device %s failed", kdevname(work_item->fromdev));
++
++ /* Callback error */
++ callback_reason = COPY_CB_FAILED_READ;
++ goto done_copy;
++ }
++
++ /* Write them out again */
++ if (write_blocks(iobuf, work_item->todev, work_item->tosec + work_item->done_sectors,
++ nr_sectors)) {
++ DMERR("Write blocks to %s failed", kdevname(work_item->todev));
++
++ /* Callback error */
++ callback_reason = COPY_CB_FAILED_WRITE;
++ goto done_copy;
++ }
++ work_item->done_sectors += nr_sectors;
++
++ /* If we have exceeded the throttle value (in sectors/second) then
++ sleep for a while */
++ done_sps = nr_sectors*HZ/(jiffies-last_jiffies);
++ if (work_item->throttle && done_sps > work_item->throttle && done_sps) {
++ long start_jiffies = jiffies;
++ do {
++ schedule_timeout(done_sps - work_item->throttle * HZ);
++ } while (jiffies <= start_jiffies+(done_sps - work_item->throttle * HZ));
++ }
++
++ /* Do a progress callback */
++ if (work_item->callback && work_item->done_sectors < work_item->nr_sectors)
++ work_item->callback(COPY_CB_PROGRESS, work_item->context, work_item->done_sectors);
++
++ /* Look for higher priority work */
++ down_write(&work_list_lock);
++ if (!list_empty(&work_list)) {
++ struct copy_work *peek_item = list_entry(work_list.next, struct copy_work, list);
++
++ if (peek_item->priority < work_item->priority) {
++
++ /* Put this back on the list and restart to get the new one */
++ add_to_work_list(work_item);
++ preempted = 1;
++ goto restart;
++ }
++ }
++ up_write(&work_list_lock);
++ }
++
++ done_copy:
++ /* Call the callback */
++ if (work_item->callback)
++ work_item->callback(callback_reason, work_item->context, work_item->done_sectors);
++
++ /* Add it back to the free list (if it came from there)
++ and notify anybody waiting for an entry */
++ if (work_item->freelist) {
++ down_write(&free_list_lock);
++ list_add(&work_item->list, &free_list);
++ up_write(&free_list_lock);
++ }
++ else {
++ kmem_cache_free(entry_cachep, work_item);
++ }
++ wake_up_interruptible(&freelist_waitq);
++
++ /* Get the work lock again for the top of the while loop */
++ down_write(&work_list_lock);
++ restart:
++ }
++ up_write(&work_list_lock);
++
++ /* Wait for more work */
++ set_task_state(tsk, TASK_INTERRUPTIBLE);
++ add_wait_queue(&work_waitq, &wq);
++
++ if (list_empty(&work_list))
++ schedule();
++
++ set_task_state(tsk, TASK_RUNNING);
++ remove_wait_queue(&work_waitq, &wq);
++
++ } while (thread_exit == 0);
++
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++
++ up(&run_lock);
++ return 0;
++}
++
++/* API entry point */
++int dm_blockcopy(unsigned long fromsec, unsigned long tosec, unsigned long nr_sectors,
++ kdev_t fromdev, kdev_t todev,
++ int priority, int throttle, void (*callback)(copy_cb_reason_t, void *, long), void *context)
++{
++ struct copy_work *newwork;
++ static pid_t thread_pid = 0;
++ long from_blocksize = get_hardsect_size(fromdev);
++ long to_blocksize = get_hardsect_size(todev);
++
++ /* Make sure the start sectors are on physical block boundaries */
++ if (fromsec % (from_blocksize/SECTOR_SIZE))
++ return -EINVAL;
++ if (tosec % (to_blocksize/SECTOR_SIZE))
++ return -EINVAL;
++
++ /* Start the thread if we don't have one already */
++ down(&start_lock);
++ if (copy_task == NULL) {
++ thread_pid = kernel_thread(copy_kthread, NULL, 0);
++ if (thread_pid > 0) {
++
++ DECLARE_WAITQUEUE(wq, current);
++ struct task_struct *tsk = current;
++
++ DMINFO("Started kcopyd thread");
++
++ /* Wait for it to complete it's startup initialisation */
++ set_task_state(tsk, TASK_INTERRUPTIBLE);
++ add_wait_queue(&start_waitq, &wq);
++
++ if (copy_task == NULL)
++ schedule();
++
++ set_task_state(tsk, TASK_RUNNING);
++ remove_wait_queue(&start_waitq, &wq);
++ }
++ else {
++ DMERR("Failed to start kcopyd thread");
++ up(&start_lock);
++ return -EAGAIN;
++ }
++ }
++ up(&start_lock);
++
++ /* This will wait until one is available */
++ newwork = get_work_struct();
++
++ newwork->fromsec = fromsec;
++ newwork->tosec = tosec;
++ newwork->fromdev = fromdev;
++ newwork->todev = todev;
++ newwork->nr_sectors = nr_sectors;
++ newwork->done_sectors = 0;
++ newwork->throttle = throttle;
++ newwork->priority = priority;
++ newwork->callback = callback;
++ newwork->context = context;
++
++ down_write(&work_list_lock);
++ add_to_work_list(newwork);
++ up_write(&work_list_lock);
++
++ wake_up_interruptible(&work_waitq);
++ return 0;
++}
++
++
++/* Pre-allocate some structures for the free list */
++static int allocate_free_list(void)
++{
++ int i;
++ struct copy_work *newwork;
++
++ for (i=0; i<FREE_LIST_SIZE; i++) {
++ newwork = kmalloc(sizeof(struct copy_work), GFP_KERNEL);
++ if (!newwork)
++ return i;
++ newwork->freelist = 1;
++ list_add(&newwork->list, &free_list);
++ }
++ return i;
++}
++
++int __init kcopyd_init(void)
++{
++ init_rwsem(&work_list_lock);
++ init_rwsem(&free_list_lock);
++ init_MUTEX(&start_lock);
++ init_MUTEX(&run_lock);
++
++ if (alloc_kiovec(1, &iobuf)) {
++ DMERR("Unable to allocate kiobuf for kcopyd");
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(iobuf, KIO_MAX_SECTORS)) {
++ DMERR("Unable to allocate pages for kcopyd");
++ free_kiovec(1, &iobuf);
++ return -1;
++ }
++
++ entry_cachep = kmem_cache_create("kcopyd",
++ sizeof(struct copy_work),
++ __alignof__(struct copy_work),
++ 0, NULL, NULL);
++ if (!entry_cachep) {
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++ DMERR("Unable to allocate slab cache for kcopyd");
++ return -1;
++ }
++
++ if (allocate_free_list() == 0) {
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++ kmem_cache_destroy(entry_cachep);
++ DMERR("Unable to allocate any work structures for the free list");
++ return -1;
++ }
++
++ return 0;
++}
++
++void kcopyd_exit(void)
++{
++ struct list_head *entry, *temp;
++
++ thread_exit = 1;
++ wake_up_interruptible(&work_waitq);
++
++ /* Wait for the thread to finish */
++ down(&run_lock);
++ up(&run_lock);
++
++ /* Free the free list */
++ list_for_each_safe(entry, temp, &free_list) {
++ struct copy_work *cw;
++ cw = list_entry(entry, struct copy_work, list);
++ list_del(&cw->list);
++ kfree(cw);
++ }
++
++ if (entry_cachep)
++ kmem_cache_destroy(entry_cachep);
++}
++
++EXPORT_SYMBOL(dm_blockcopy);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
diff -ruN linux-2.4.16/include/linux/device-mapper.h linux/include/linux/device-mapper.h
--- linux-2.4.16/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970
-+++ linux/include/linux/device-mapper.h Tue Jan 8 13:17:13 2002
-@@ -0,0 +1,58 @@
++++ linux/include/linux/device-mapper.h Thu Mar 7 16:56:22 2002
+@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+#define DM_DIR "device-mapper" /* Slashes not supported */
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
+
+#ifdef __KERNEL__
+
+#endif /* _LINUX_DEVICE_MAPPER_H */
diff -ruN linux-2.4.16/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h
--- linux-2.4.16/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970
-+++ linux/include/linux/dm-ioctl.h Mon Feb 4 13:16:25 2002
-@@ -0,0 +1,69 @@
++++ linux/include/linux/dm-ioctl.h Thu Mar 14 16:32:26 2002
+@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * Implements a traditional ioctl interface to the device mapper.
+ */
+
++/*
++ * All ioctl arguments consist of a single chunk of memory, with
++ * this structure at the start.
++ */
++struct dm_ioctl {
++ char version[16];
++
++ unsigned long data_size; /* total size of data passed in
++ * including this struct */
++
++ unsigned long data_start; /* offset to start of data
++ * relative to start of this struct */
++
++ char name[DM_NAME_LEN]; /* device name */
++
++ unsigned int target_count; /* in/out */
++ unsigned int open_count; /* out */
++ unsigned int flags; /* in/out */
++
++ __kernel_dev_t dev; /* in/out */
++
++ char uuid[DM_UUID_LEN]; /* unique identifier for
++ * the block device */
++};
++
++/*
++ * Used to specify tables. These structures appear after the
++ * dm_ioctl.
++ */
+struct dm_target_spec {
+ int32_t status; /* used when reading from kernel only */
+ unsigned long long sector_start;
+ */
+};
+
-+struct dm_ioctl {
-+ char version[16];
-+
-+ unsigned long data_size; /* total size of data passed in */
-+ /* including this struct */
-+
-+ unsigned long data_start; /* offset to start of data */
-+ /* relative to start of this struct */
-+
-+ char name[DM_NAME_LEN]; /* device name */
-+
-+ unsigned int target_count; /* in/out */
-+ unsigned int open_count; /* out */
-+ unsigned int flags; /* in/out */
++/*
++ * Used to retrieve the target dependencies.
++ */
++struct dm_target_deps {
++ unsigned int count;
+
-+ __kernel_dev_t dev; /* in/out */
++ __kernel_dev_t dev[0]; /* out */
+};
+
+#define DM_IOCTL 0xfd
+
-+#define DM_CREATE _IOWR(DM_IOCTL, 0x00, struct dm_ioctl)
-+#define DM_REMOVE _IOW(DM_IOCTL, 0x01, struct dm_ioctl)
-+#define DM_SUSPEND _IOW(DM_IOCTL, 0x02, struct dm_ioctl)
-+#define DM_RELOAD _IOW(DM_IOCTL, 0x03, struct dm_ioctl)
-+#define DM_INFO _IOWR(DM_IOCTL, 0x04, struct dm_ioctl)
-+#define DM_RENAME _IOW(DM_IOCTL, 0x05, struct dm_ioctl)
-+#define DM_VERSION _IOR(DM_IOCTL, 0x06, struct dm_ioctl)
++enum {
++ DM_CREATE_CMD = 0,
++ DM_REMOVE_CMD,
++ DM_SUSPEND_CMD,
++ DM_RELOAD_CMD,
++ DM_INFO_CMD,
++ DM_RENAME_CMD,
++ DM_VERSION_CMD,
++ DM_DEPS_CMD,
++ DM_REMOVE_ALL_CMD
++};
++
++#define DM_CREATE _IOWR(DM_IOCTL, DM_CREATE_CMD, struct dm_ioctl)
++#define DM_REMOVE _IOW(DM_IOCTL, DM_REMOVE_CMD, struct dm_ioctl)
++#define DM_SUSPEND _IOW(DM_IOCTL, DM_SUSPEND_CMD, struct dm_ioctl)
++#define DM_RELOAD _IOW(DM_IOCTL, DM_RELOAD_CMD, struct dm_ioctl)
++#define DM_INFO _IOWR(DM_IOCTL, DM_INFO_CMD, struct dm_ioctl)
++#define DM_RENAME _IOW(DM_IOCTL, DM_RENAME_CMD, struct dm_ioctl)
++#define DM_VERSION _IOR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
++#define DM_DEPS _IOR(DM_IOCTL, DM_DEPS_CMD, struct dm_ioctl)
++#define DM_REMOVE_ALL _IOR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+
+#define DM_IOCTL_VERSION "0.94"
-+#define DM_DRIVER_VERSION "0.94.04-ioctl (2002-02-04)"
++#define DM_DRIVER_VERSION "0.94.08-ioctl-cvs (2002-03-14)"
+
+/* Status bits */
+#define DM_READONLY_FLAG 0x00000001
+#define DM_EXISTS_FLAG 0x00000004
+#define DM_PERSISTENT_DEV_FLAG 0x00000008
+
-+#endif /* _LINUX_DM_IOCTL_H */
++#endif /* _LINUX_DM_IOCTL_H */
+diff -ruN linux-2.4.16/include/linux/fs.h linux/include/linux/fs.h
+--- linux-2.4.16/include/linux/fs.h Thu Dec 6 15:57:58 2001
++++ linux/include/linux/fs.h Thu Mar 14 16:32:40 2002
+@@ -257,7 +257,10 @@
+ char * b_data; /* pointer to data block */
+ struct page *b_page; /* the page this bh is mapped to */
+ void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+- void *b_private; /* reserved for b_end_io */
++ void *b_private; /* reserved for b_end_io, also used by ext3 */
++ void *b_bdev_private; /* a hack to get around ext3 using b_private
++ * after handing the buffer_head to the
++ * block layer */
+
+ unsigned long b_rsector; /* Real buffer location on disk */
+ wait_queue_head_t b_wait;
diff -ruN linux-2.4.17/drivers/md/Config.in linux/drivers/md/Config.in
--- linux-2.4.17/drivers/md/Config.in Fri Sep 14 22:22:18 2001
-+++ linux/drivers/md/Config.in Mon Feb 4 13:17:12 2002
++++ linux/drivers/md/Config.in Thu Mar 14 16:32:56 2002
@@ -14,5 +14,6 @@
dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
endmenu
diff -ruN linux-2.4.17/drivers/md/Makefile linux/drivers/md/Makefile
--- linux-2.4.17/drivers/md/Makefile Thu Dec 6 15:57:55 2001
-+++ linux/drivers/md/Makefile Mon Feb 4 13:17:12 2002
-@@ -4,9 +4,11 @@
++++ linux/drivers/md/Makefile Thu Mar 14 16:32:56 2002
+@@ -4,9 +4,12 @@
O_TARGET := mddev.o
-export-objs := md.o xor.o
-+export-objs := md.o xor.o dm-table.o dm-target.o
++export-objs := md.o xor.o dm-table.o dm-target.o kcopyd.o
list-multi := lvm-mod.o
lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o
+dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \
-+ dm-linear.o dm-stripe.o
++ dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \
++ dm-origin.o dm-mirror.o kcopyd.o
# Note: link order is important. All raid personalities
# and xor.o must come before md.o, as they each initialise
-@@ -20,8 +22,12 @@
+@@ -20,8 +23,12 @@
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_BLK_DEV_MD) += md.o
obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o
+ $(LD) -r -o $@ $(dm-mod-objs)
diff -ruN linux-2.4.17/drivers/md/device-mapper.h linux/drivers/md/device-mapper.h
--- linux-2.4.17/drivers/md/device-mapper.h Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/device-mapper.h Tue Jan 8 13:17:13 2002
-@@ -0,0 +1,58 @@
++++ linux/drivers/md/device-mapper.h Thu Mar 7 16:56:22 2002
+@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+#define DM_DIR "device-mapper" /* Slashes not supported */
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
+
+#ifdef __KERNEL__
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_DEVICE_MAPPER_H */
+diff -ruN linux-2.4.17/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c
+--- linux-2.4.17/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-exception-store.c Thu Mar 14 16:02:50 2002
+@@ -0,0 +1,683 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm-snapshot.h"
++
++#if 0
++/*
++ * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
++ */
++#define SNAP_MAGIC 0x70416e53
++
++/*
++ * The on-disk version of the metadata. Only applicable to
++ * persistent snapshots.
++ * There is no backward or forward compatibility implemented, snapshots
++ * with different disk versions than the kernel will not be usable. It is
++ * expected that "lvcreate" will blank out the start of the COW device
++ * before calling the snapshot constructor.
++ */
++#define SNAPSHOT_DISK_VERSION 1
++
++/*
++ * Metadata format: (please keep this up-to-date!)
++ * Persistent snapshots have a 1 block header (see below for structure) at
++ * the very start of the device. The COW metadata starts at
++ * .start_of_exceptions.
++ *
++ * COW metadata is stored in blocks that are "extent-size" sectors long as
++ * an array of disk_exception structures in Little-Endian format.
++ * The last entry in this array has rsector_new set to 0 (this cannot be a
++ * legal redirection as the header is here) and if rsector_org has a value
++ * it is the sector number of the next COW metadata sector on the disk. if
++ * rsector_org is also zero then this is the end of the COW metadata.
++ *
++ * The metadata is written in hardblocksize lumps rather than in units of
++ * extents for efficiency so don't expect a whole extent to be zeroed out
++ * at any time.
++ *
++ * Non-persistent snapshots simple have redirected blocks stored
++ * (in chunk_size sectors) from hard block 1 to avoid inadvertantly
++ * creating a bad header.
++ */
++
++/*
++ * Internal snapshot structure
++ */
++struct persistent_info {
++ /* Size of extents used for COW blocks */
++ long extent_size;
++
++ /* Number of the next free sector for COW/data */
++ unsigned long next_free_sector;
++
++ /* Where the metadata starts */
++ unsigned long start_of_exceptions;
++
++ /* Where we are currently writing the metadata */
++ unsigned long current_metadata_sector;
++
++ /* Index into disk_cow array */
++ int current_metadata_entry;
++
++ /* Index into mythical extent array */
++ int current_metadata_number;
++
++ /* Number of metadata entries in the disk_cow array */
++ int highest_metadata_entry;
++
++ /* Number of metadata entries per hard disk block */
++ int md_entries_per_block;
++
++ /* kiobuf for doing I/O to header & metadata */
++ struct kiobuf *cow_iobuf;
++
++ /*
++ * Disk extent with COW data in it. as an array of
++ * exception tables. The first one points to the next
++ * block of metadata or 0 if this is the last
++ */
++ struct disk_exception *disk_cow;
++};
++
++/*
++ * An array of these is held in each disk block. LE format
++ */
++struct disk_exception {
++ uint64_t rsector_org;
++ uint64_t rsector_new;
++};
++
++/*
++ * Structure of a (persistent) snapshot header on disk. in LE format
++ */
++struct snap_disk_header {
++ uint32_t magic;
++
++ /* Simple, incrementing version. no backward compatibility */
++ uint32_t version;
++
++ /* In 512 byte sectors */
++ uint32_t chunk_size;
++
++ /* In 512 byte sectors */
++ uint32_t extent_size;
++ uint64_t start_of_exceptions;
++ uint32_t full;
++};
++
++/*
++ * READ or WRITE some blocks to/from a device
++ */
++static int do_io(int rw, struct kiobuf *iobuf, kdev_t dev,
++ unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/*
++ * Write the latest COW metadata block.
++ */
++static int write_metadata(struct snapshot_c *s, struct persistent_info *pi)
++{
++ kdev_t dev = s->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ int writesize = blocksize/SECTOR_SIZE;
++
++ if (do_io(WRITE, pi->cow_iobuf, dev,
++ pi->current_metadata_sector, writesize) != 0) {
++ DMERR("Error writing COW block");
++ return -1;
++ }
++
++ return 0;
++}
++
++/*
++ * Allocate a kiobuf. This is the only code nicked from the old
++ * snapshot driver and I've changed it anyway.
++ */
++static int alloc_iobuf_pages(struct kiobuf *iobuf, int nr_sectors)
++{
++ int nr_pages, err, i;
++
++ if (nr_sectors > KIO_MAX_SECTORS)
++ return -1;
++
++ nr_pages = nr_sectors / (PAGE_SIZE/SECTOR_SIZE);
++ err = expand_kiobuf(iobuf, nr_pages);
++ if (err) goto out;
++
++ err = -ENOMEM;
++ iobuf->locked = 1;
++ iobuf->nr_pages = 0;
++ for (i = 0; i < nr_pages; i++) {
++ struct page * page;
++
++ page = alloc_page(GFP_KERNEL);
++ if (!page) goto out;
++
++ iobuf->maplist[i] = page;
++ LockPage(page);
++ iobuf->nr_pages++;
++ }
++ iobuf->offset = 0;
++
++ err = 0;
++
++out:
++ return err;
++}
++
++/*
++ * Read on-disk COW metadata and populate the hash table.
++ */
++static int read_metadata(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ int status;
++ int i;
++ int entry = 0;
++ int map_page = 0;
++ int nr_sectors = pi->extent_size;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ unsigned long cur_sector = pi->start_of_exceptions;
++ unsigned long last_sector;
++ unsigned long first_free_sector = 0;
++ int entries_per_page = PAGE_SIZE / sizeof(struct disk_exception);
++ struct disk_exception *cow_block;
++ struct kiobuf *read_iobuf;
++ int err = 0;
++ int devsize = get_dev_size(dev);
++
++ /*
++ * Allocate our own iovec for this operation 'cos the
++ * others are way too small.
++ */
++ if (alloc_kiovec(1, &read_iobuf)) {
++ DMERR("Error allocating iobuf for %s",
++ kdevname(dev));
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(read_iobuf, pi->extent_size)) {
++ DMERR("Error allocating iobuf space for %s",
++ kdevname(dev));
++ free_kiovec(1, &read_iobuf);
++ return -1;
++ }
++ cow_block = page_address(read_iobuf->maplist[0]);
++
++ do {
++ /* Make sure the chain does not go off the end of
++ * the device, or backwards */
++ if (cur_sector > devsize || cur_sector < first_free_sector) {
++ DMERR("COW table chain pointers are inconsistent, "
++ "can't activate snapshot");
++ err = -1;
++ goto ret_free;
++ }
++
++ first_free_sector = max(first_free_sector,
++ cur_sector + pi->extent_size);
++ status = do_io(READ, read_iobuf, dev,
++ cur_sector, nr_sectors);
++ if (status == 0) {
++
++ map_page = 0;
++ entry = 0;
++
++ cow_block = page_address(read_iobuf->maplist[0]);
++
++ /* Now populate the hash table from this data */
++ for (i = 0; i <= pi->highest_metadata_entry &&
++ cow_block[entry].rsector_new != 0; i++) {
++
++ struct exception *ex;
++
++ ex = add_exception(lc,
++ le64_to_cpu(cow_block[entry].rsector_org),
++ le64_to_cpu(cow_block[entry].rsector_new));
++
++ first_free_sector = max(first_free_sector,
++ (unsigned long)(le64_to_cpu(cow_block[entry].rsector_new) +
++ lc->chunk_size));
++
++ /* Do we need to move onto the next page? */
++ if (++entry >= entries_per_page) {
++ entry = 0;
++ cow_block = page_address(read_iobuf->maplist[++map_page]);
++ }
++ }
++ }
++ else {
++ DMERR("Error reading COW metadata for %s",
++ kdevname(dev));
++ err = -1;
++ goto ret_free;
++ }
++ last_sector = cur_sector;
++ cur_sector = le64_to_cpu(cow_block[entry].rsector_org);
++
++ } while (cur_sector != 0);
++
++ lc->persistent = 1;
++ pi->current_metadata_sector = last_sector +
++ map_page*PAGE_SIZE/SECTOR_SIZE +
++ entry/(SECTOR_SIZE/sizeof(struct disk_exception));
++ pi->current_metadata_entry = entry;
++ pi->current_metadata_number = i;
++ pi->next_free_sector = first_free_sector;
++
++ /* Copy last block into cow_iobuf */
++ memcpy(pi->disk_cow, (char *)((long)&cow_block[entry] - ((long)&cow_block[entry] & (blocksize-1))), blocksize);
++
++ ret_free:
++ unmap_kiobuf(read_iobuf);
++ free_kiovec(1, &read_iobuf);
++
++ return err;
++}
++
++/*
++ * Read the snapshot volume header, returns 0 only if it read OK
++ * and it was valid. returns 1 if no header was found, -1 on
++ * error. All fields are checked against the snapshot structure
++ * itself to make sure we don't corrupt the data.
++ */
++static int read_header(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ int status;
++ struct snap_disk_header *header;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ unsigned long devsize;
++
++ /* Get it */
++ status = do_io(READ, pi->cow_iobuf, dev, 0L, blocksize/SECTOR_SIZE);
++ if (status != 0) {
++ DMERR("Snapshot dev %s error reading header",
++ kdevname(dev));
++ return -1;
++ }
++
++ header = (struct snap_disk_header *) page_address(pi->cow_iobuf->maplist[0]);
++
++ /*
++ * Check the magic. It's OK if this fails, we just create a new snapshot header
++ * and start from scratch
++ */
++ if (le32_to_cpu(header->magic) != SNAP_MAGIC) {
++ return 1;
++ }
++
++ /* Check the version matches */
++ if (le32_to_cpu(header->version) != SNAPSHOT_DISK_VERSION) {
++ DMWARN("Snapshot dev %s version mismatch. Stored: %d, driver: %d",
++ kdevname(dev), le32_to_cpu(header->version), SNAPSHOT_DISK_VERSION);
++ return -1;
++ }
++
++ /* Check the chunk sizes match */
++ if (le32_to_cpu(header->chunk_size) != lc->chunk_size) {
++ DMWARN("Snapshot dev %s chunk size mismatch. Stored: %d, requested: %d",
++ kdevname(dev), le32_to_cpu(header->chunk_size), lc->chunk_size);
++ return -1;
++ }
++
++ /* Check the extent sizes match */
++ if (le32_to_cpu(header->extent_size) != pi->extent_size) {
++ DMWARN("Snapshot dev %s extent size mismatch. Stored: %d, requested: %ld",
++ kdevname(dev), le32_to_cpu(header->extent_size), pi->extent_size);
++ return -1;
++ }
++
++ /* Get the rest of the data */
++ pi->start_of_exceptions = le64_to_cpu(header->start_of_exceptions);
++ if (header->full) {
++ DMWARN("Snapshot dev %s is full. It cannot be used", kdevname(dev));
++ lc->full = 1;
++ return -1;
++ }
++
++ /* Validate against the size of the volume */
++ devsize = get_dev_size(dev);
++ if (pi->start_of_exceptions > devsize) {
++ DMWARN("Snapshot metadata error on %s. start exceptions > device size (%ld > %ld)",
++ kdevname(dev), pi->start_of_exceptions, devsize);
++ return -1;
++ }
++
++ /* Read metadata into the hash table and update pointers */
++ return read_metadata(lc, &lc->p_info);
++}
++
++/*
++ * Write (or update) the header. The only time we should need to
++ * do an update is when the snapshot becomes full.
++ */
++static int write_header(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ struct snap_disk_header *header;
++ struct kiobuf *head_iobuf;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ /*
++ * Allocate our own iobuf for this so we don't corrupt
++ * any of the other writes that may be going on.
++ */
++ if (alloc_kiovec(1, &head_iobuf)) {
++ DMERR("Error allocating iobuf for header on %s", kdevname(dev));
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(head_iobuf, PAGE_SIZE/SECTOR_SIZE)) {
++ DMERR("Error allocating iobuf space for header on %s", kdevname(dev));
++ free_kiovec(1, &head_iobuf);
++ return -1;
++ }
++
++ header = (struct snap_disk_header *) page_address(head_iobuf->maplist[0]);
++
++ header->magic = cpu_to_le32(SNAP_MAGIC);
++ header->version = cpu_to_le32(SNAPSHOT_DISK_VERSION);
++ header->chunk_size = cpu_to_le32(lc->chunk_size);
++ header->extent_size = cpu_to_le32(pi->extent_size);
++ header->full = cpu_to_le32(lc->full);
++
++ header->start_of_exceptions = cpu_to_le64(pi->start_of_exceptions);
++
++ /* Must write at least a full block */
++ status = do_io(WRITE, head_iobuf, dev, 0, blocksize/SECTOR_SIZE);
++
++ unmap_kiobuf(head_iobuf);
++ free_kiovec(1, &head_iobuf);
++ return status;
++}
++
++
++static int init_persistent_snapshot(struct snapshot_c *lc, int blocksize,
++ unsigned long extent_size, void **context)
++{
++ struct persistent_info *pi = &lc->p_info;
++
++ int status;
++ int i;
++ int cow_sectors;
++
++ pi->extent_size = extent_size;
++ pi->next_free_sector = blocksize / SECTOR_SIZE; /* Leave the first block alone */
++ pi->disk_cow = NULL;
++
++ pi->highest_metadata_entry = (pi->extent_size*SECTOR_SIZE) / sizeof(struct disk_exception) - 1;
++ pi->md_entries_per_block = blocksize / sizeof(struct disk_exception);
++
++ /* Allocate and set up iobuf for metadata I/O */
++ *context = "Unable to allocate COW iovec";
++ if (alloc_kiovec(1, &pi->cow_iobuf))
++ return -1;
++
++ /* Allocate space for the COW buffer. It should be at least PAGE_SIZE. */
++ cow_sectors = blocksize/SECTOR_SIZE + PAGE_SIZE/SECTOR_SIZE;
++ *context = "Unable to allocate COW I/O buffer space";
++ if (alloc_iobuf_pages(pi->cow_iobuf, cow_sectors)) {
++ free_kiovec(1, &pi->cow_iobuf);
++ return -1;
++ }
++
++ for (i=0; i < pi->cow_iobuf->nr_pages; i++) {
++ memset(page_address(pi->cow_iobuf->maplist[i]), 0, PAGE_SIZE);
++ }
++
++ pi->disk_cow = page_address(pi->cow_iobuf->maplist[0]);
++
++ *context = "Error in disk header";
++ /* Check for a header on disk and create a new one if not */
++ if ( (status = read_header(lc, &lc->p_info)) == 1) {
++
++ /* Write a new header */
++ pi->start_of_exceptions = pi->next_free_sector;
++ pi->next_free_sector += pi->extent_size;
++ pi->current_metadata_sector = pi->start_of_exceptions;
++ pi->current_metadata_entry = 0;
++ pi->current_metadata_number = 0;
++
++ *context = "Unable to write snapshot header";
++ if (write_header(lc, &lc->p_info) != 0) {
++ DMERR("Error writing header to snapshot volume %s",
++ kdevname(lc->cow_dev->dev));
++ goto free_ret;
++ }
++
++ /* Write a blank metadata block to the device */
++ if (write_metadata(lc, &lc->p_info) != 0) {
++ DMERR("Error writing initial COW table to snapshot volume %s",
++ kdevname(lc->cow_dev->dev));
++ goto free_ret;
++ }
++ }
++
++ /*
++ * There is a header but it doesn't match - fail so we
++ * don't destroy what might be useful data on disk. If
++ * the user really wants to use this COW device for a
++ * snapshot then the first sector should be zeroed out
++ * first.
++ */
++ if (status == -1)
++ goto free_ret;
++
++ return 0;
++
++ free_ret:
++ unmap_kiobuf(pi->cow_iobuf);
++ free_kiovec(1, &pi->cow_iobuf);
++ return -1;
++}
++
++static void exit_persistent_snapshot(struct persistent_info *pi)
++{
++ unmap_kiobuf(pi->cow_iobuf);
++ free_kiovec(1, &pi->cow_iobuf);
++}
++
++/*
++ * Finds a suitable destination for the exception.
++ */
++static int prepare_exception(struct snapshot_c *s,
++ struct inflight_exception *e)
++{
++ offset_t dev_size;
++
++ /*
++ * Check for full snapshot. Doing the size calculation here means that
++ * the COW device can be resized without us being told
++ */
++ dev_size = get_dev_size(s->cow_dev->dev);
++ if (s->p_info.next_free_sector + s->chunk_size >= dev_size) {
++ /* Snapshot is full, we can't use it */
++ DMWARN("Snapshot %s is full (sec=%ld, size=%ld)",
++ kdevname(s->cow_dev->dev),
++ s->p_info.next_free_sector + s->chunk_size, dev_size);
++ s->full = 1;
++
++ /* Mark it full on the device */
++ if (s->persistent)
++ write_header(s, &s->p_info);
++
++ return -1;
++
++ } else {
++ e->rsector_new = s->p_info.next_free_sector;
++ s->p_info.next_free_sector += s->chunk_size;
++ }
++
++ return 0;
++}
++
++/*
++ * Add a new exception entry to the on-disk metadata.
++ */
++static int commit_exception(struct snapshot_c *sc,
++ unsigned long org, unsigned long new)
++{
++ struct persistent_info *pi = &sc->p_info;
++
++ int i = pi->current_metadata_entry++;
++ unsigned long next_md_block = pi->current_metadata_sector;
++
++ pi->current_metadata_number++;
++
++ /* Update copy of disk COW */
++ pi->disk_cow[i].rsector_org = cpu_to_le64(org);
++ pi->disk_cow[i].rsector_new = cpu_to_le64(new);
++
++ /* Have we filled this extent ? */
++ if (pi->current_metadata_number >= pi->highest_metadata_entry) {
++ /* Fill in pointer to next metadata extent */
++ i++;
++ pi->current_metadata_entry++;
++
++ next_md_block = pi->next_free_sector;
++ pi->next_free_sector += pi->extent_size;
++
++ pi->disk_cow[i].rsector_org = cpu_to_le64(next_md_block);
++ pi->disk_cow[i].rsector_new = 0;
++ }
++
++ /* Commit to disk */
++ if (write_metadata(sc, &sc->p_info)) {
++ sc->full = 1; /* Failed. don't try again */
++ return -1;
++ }
++
++ /*
++ * Write a new (empty) metadata block if we are at the
++ * end of an existing block so that read_metadata finds a
++ * terminating zero entry.
++ */
++ if (pi->current_metadata_entry == pi->md_entries_per_block) {
++ memset(pi->disk_cow, 0, PAGE_SIZE);
++ pi->current_metadata_sector = next_md_block;
++
++ /*
++ * If this is also the end of an extent then go
++ * back to the start.
++ */
++ if (pi->current_metadata_number >= pi->highest_metadata_entry) {
++ pi->current_metadata_number = 0;
++
++ } else {
++ int blocksize = get_hardsect_size(sc->cow_dev->dev);
++ pi->current_metadata_sector += blocksize/SECTOR_SIZE;
++ }
++
++ pi->current_metadata_entry = 0;
++ if (write_metadata(sc, &sc->p_info) != 0) {
++ sc->full = 1;
++ return -1;
++ }
++ }
++ return 0;
++}
++
++/*
++ * Sets the full flag in the metadata. A quick hack for now.
++ */
++static void invalidate_snapshot(struct snapshot_c *s)
++{
++ s->full = 1;
++ if (s->persistent)
++ write_header(s, &s->p_info);
++}
++
++
++#endif
++
++
++struct exception_store * dm_create_persistent(struct dm_snapshot *s,
++ int blocksize,
++ offset_t extent_size,
++ void **error)
++{
++ return NULL;
++}
++
++
++/*
++ * Implementation of the store for non-persistent snapshots.
++ */
++struct transient_c {
++ offset_t next_free;
++};
++
++void destroy_transient(struct exception_store *store)
++{
++ kfree(store->context);
++ kfree(store);
++}
++
++int prepare_transient(struct exception_store *store, struct exception *e)
++{
++ struct transient_c *tc = (struct transient_c *) store->context;
++ offset_t size = get_dev_size(store->snap->cow->dev);
++
++ if (size < (tc->next_free + store->snap->chunk_size))
++ return -1;
++
++ e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
++ tc->next_free += store->snap->chunk_size;
++ return 0;
++}
++
++struct exception_store *dm_create_transient(struct dm_snapshot *s,
++ int blocksize, void **error)
++{
++ struct exception_store *store;
++ struct transient_c *tc;
++
++ store = kmalloc(sizeof(*store), GFP_KERNEL);
++ if (!store) {
++ DMWARN("out of memory.");
++ return NULL;
++ }
++
++ memset(store, 0, sizeof(*store));
++ store->destroy = destroy_transient;
++ store->prepare_exception = prepare_transient;
++ store->snap = s;
++
++ tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
++ if (!tc) {
++ kfree(store);
++ return NULL;
++ }
++
++ tc->next_free = 0;
++ store->context = tc;
++
++ return store;
++}
++
diff -ruN linux-2.4.17/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c
--- linux-2.4.17/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm-ioctl.c Mon Feb 4 13:01:21 2002
-@@ -0,0 +1,443 @@
++++ linux/drivers/md/dm-ioctl.c Thu Mar 14 15:59:31 2002
+@@ -0,0 +1,557 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/miscdevice.h>
++#include <linux/dm-ioctl.h>
++#include <linux/init.h>
++
++static void free_params(struct dm_ioctl *p)
++{
++ vfree(p);
++}
++
++static int version(struct dm_ioctl *user)
++{
++ return copy_to_user(user, DM_DRIVER_VERSION, sizeof(DM_DRIVER_VERSION));
++}
++
++static int copy_params(struct dm_ioctl *user, struct dm_ioctl **result)
++{
++ struct dm_ioctl tmp, *dmi;
++
++ if (copy_from_user(&tmp, user, sizeof(tmp)))
++ return -EFAULT;
++
++ if (strcmp(DM_IOCTL_VERSION, tmp.version)) {
++ DMWARN("dm_ctl_ioctl: struct dm_ioctl version incompatible");
++ return -EINVAL;
++ }
++
++ if (tmp.data_size < sizeof(tmp))
++ return -EINVAL;
++
++ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
++ if (!dmi)
++ return -ENOMEM;
++
++ if (copy_from_user(dmi, user, tmp.data_size)) {
++ vfree(dmi);
++ return -EFAULT;
++ }
++
++ *result = dmi;
++ return 0;
++}
++
++/*
++ * Check a string doesn't overrun the chunk of
++ * memory we copied from userland.
++ */
++static int valid_str(char *str, void *begin, void *end)
++{
++ while (((void *) str >= begin) && ((void *) str < end))
++ if (!*str++)
++ return 0;
++
++ return -EINVAL;
++}
++
++static int next_target(struct dm_target_spec *last, unsigned long next,
++ void *begin, void *end,
++ struct dm_target_spec **spec, char **params)
++{
++ *spec = (struct dm_target_spec *)
++ ((unsigned char *) last + next);
++ *params = (char *) (*spec + 1);
++
++ if (*spec < (last + 1) || ((void *) *spec > end))
++ return -EINVAL;
++
++ return valid_str(*params, begin, end);
++}
++
++/*
++ * Checks to see if there's a gap in the table.
++ * Returns true iff there is a gap.
++ */
++static int gap(struct dm_table *table, struct dm_target_spec *spec)
++{
++ if (!table->num_targets)
++ return (spec->sector_start > 0) ? 1 : 0;
++
++ if (spec->sector_start != table->highs[table->num_targets - 1] + 1)
++ return 1;
++
++ return 0;
++}
++
++static int populate_table(struct dm_table *table, struct dm_ioctl *args)
++{
++ int i = 0, r, first = 1, argc;
++ struct dm_target_spec *spec;
++ char *params, *argv[MAX_ARGS];
++ struct target_type *ttype;
++ void *context, *begin, *end;
++ offset_t highs = 0;
++
++ if (!args->target_count) {
++ DMWARN("populate_table: no targets specified");
++ return -EINVAL;
++ }
++
++ begin = (void *) args;
++ end = begin + args->data_size;
++
++#define PARSE_ERROR(msg) {DMWARN(msg); return -EINVAL;}
++
++ for (i = 0; i < args->target_count; i++) {
++
++ r = first ? next_target((struct dm_target_spec *) args,
++ args->data_start,
++ begin, end, &spec, ¶ms) :
++ next_target(spec, spec->next, begin, end, &spec, ¶ms);
++
++ if (r)
++ PARSE_ERROR("unable to find target");
++
++ /* Look up the target type */
++ ttype = dm_get_target_type(spec->target_type);
++ if (!ttype)
++ PARSE_ERROR("unable to find target type");
++
++ if (gap(table, spec))
++ PARSE_ERROR("gap in target ranges");
++
++ /* Split up the parameter list */
++ if (split_args(MAX_ARGS, &argc, argv, params) < 0)
++ PARSE_ERROR("Too many arguments");
++
++ /* Build the target */
++ if (ttype->ctr(table, spec->sector_start, spec->length,
++ argc, argv, &context)) {
++ DMWARN("%s: target constructor failed",
++ (char *) context);
++ return -EINVAL;
++ }
++
++ /* Add the target to the table */
++ highs = spec->sector_start + (spec->length - 1);
++ if (dm_table_add_target(table, highs, ttype, context))
++ PARSE_ERROR("internal error adding target to table");
++
++ first = 0;
++ }
++
++#undef PARSE_ERROR
++
++ r = dm_table_complete(table);
++ return r;
++}
++
++/*
++ * Round up the ptr to the next 'align' boundary. Obviously
++ * 'align' must be a power of 2.
++ */
++static inline void *align_ptr(void *ptr, unsigned int align)
++{
++ align--;
++ return (void *) (((unsigned long) (ptr + align)) & ~align);
++}
++
++/*
++ * Copies a dm_ioctl and an optional additional payload to
++ * userland.
++ */
++static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param,
++ void *data, unsigned long len)
++{
++ int r;
++ void *ptr = NULL;
++
++ strncpy(param->version, DM_IOCTL_VERSION, sizeof(param->version));
++
++ if (data) {
++ ptr = align_ptr(user + 1, sizeof(unsigned long));
++ param->data_start = ptr - (void *) user;
++ }
++
++ r = copy_to_user(user, param, sizeof(*param));
++ if (r)
++ return r;
++
++ if (data) {
++ if (param->data_start + len > param->data_size)
++ return -ENOSPC;
++ r = copy_to_user(ptr, data, len);
++ }
++
++ return r;
++}
++
++/*
++ * Fills in a dm_ioctl structure, ready for sending back to
++ * userland.
++ */
++static void __info(struct mapped_device *md, struct dm_ioctl *param)
++{
++ param->flags = DM_EXISTS_FLAG;
++ if (md->suspended)
++ param->flags |= DM_SUSPEND_FLAG;
++ if (md->read_only)
++ param->flags |= DM_READONLY_FLAG;
++
++ strncpy(param->name, md->name, sizeof(param->name));
++
++ if (md->uuid)
++ strncpy(param->uuid, md->uuid, sizeof(param->uuid));
++ else
++ param->uuid[0] = '\0';
++
++ param->open_count = md->use_count;
++ param->dev = kdev_t_to_nr(md->dev);
++ param->target_count = md->map->num_targets;
++}
++
++/*
++ * Copies device info back to user space, used by
++ * the create and info ioctls.
++ */
++static int info(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int minor;
++ struct mapped_device *md;
++
++ param->flags = 0;
++
++ md = dm_get_name_r(param->name);
++ if (!md)
++ /*
++ * Device not found - returns cleared exists flag.
++ */
++ goto out;
++
++ minor = MINOR(md->dev);
++ __info(md, param);
++ dm_put_r(minor);
++
++ out:
++ return results_to_user(user, param, NULL, 0);
++}
++
++/*
++ * Retrieves a list of devices used by a particular dm device.
++ */
++static int dep(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int minor, count, r;
++ struct mapped_device *md;
++ struct list_head *tmp;
++ size_t len = 0;
++ struct dm_target_deps *deps = NULL;
++
++ md = dm_get_name_r(param->name);
++ if (!md)
++ goto out;
++ minor = MINOR(md->dev);
++
++ /*
++ * Setup the basic dm_ioctl structure.
++ */
++ __info(md, param);
++
++ /*
++ * Count the devices.
++ */
++ count = 0;
++ list_for_each(tmp, &md->map->devices)
++ count++;
++
++ /*
++ * Allocate a kernel space version of the dm_target_status
++ * struct.
++ */
++ len = sizeof(*deps) + (sizeof(*deps->dev) * count);
++ deps = kmalloc(len, GFP_KERNEL);
++ if (!deps) {
++ dm_put_r(minor);
++ return -ENOMEM;
++ }
++
++ /*
++ * Fill in the devices.
++ */
++ deps->count = count;
++ count = 0;
++ list_for_each(tmp, &md->map->devices) {
++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++ deps->dev[count++] = kdev_t_to_nr(dd->dev);
++ }
++ dm_put_r(minor);
++
++ out:
++ r = results_to_user(user, param, deps, len);
++
++ kfree(deps);
++ return r;
++}
++
++static int create(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int r;
++ struct mapped_device *md;
++ struct dm_table *t;
++ int minor;
++
++ r = dm_table_create(&t);
++ if (r)
++ return r;
++
++ r = populate_table(t, param);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ?
++ MINOR(to_kdev_t(param->dev)) : -1;
++
++ r = dm_create(param->name, param->uuid, minor, t);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ /* shouldn't get here */
++ return -EINVAL;
++
++ minor = MINOR(md->dev);
++ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
++ dm_put_w(minor);
++
++ r = info(param, user);
++ return r;
++}
++
++static int remove(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ return -ENXIO;
++
++ minor = MINOR(md->dev);
++ r = dm_destroy(md);
++ dm_put_w(minor);
++
++ return r;
++}
++
++static int suspend(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ return -ENXIO;
++
++ minor = MINOR(md->dev);
++ r = (param->flags & DM_SUSPEND_FLAG) ? dm_suspend(md) : dm_resume(md);
++ dm_put_w(minor);
++
++ return r;
++}
++
++static int reload(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++ struct dm_table *t;
++
++ r = dm_table_create(&t);
++ if (r)
++ return r;
++
++ r = populate_table(t, param);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ md = dm_get_name_w(param->name);
++ if (!md) {
++ dm_table_destroy(t);
++ return -ENXIO;
++ }
++
++ minor = MINOR(md->dev);
++
++ r = dm_swap_table(md, t);
++ if (r) {
++ dm_put_w(minor);
++ dm_table_destroy(t);
++ return r;
++ }
++
++ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
++ dm_put_w(minor);
++ return 0;
++}
++
++static int rename(struct dm_ioctl *param)
++{
++ char *newname = (char *) param + param->data_start;
++
++ if (valid_str(newname, (void *) param,
++ (void *) param + param->data_size) ||
++ dm_set_name(param->name, newname)) {
++ DMWARN("Invalid new logical volume name supplied.");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static int ctl_open(struct inode *inode, struct file *file)
++{
++ /* only root can open this */
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
++
++ MOD_INC_USE_COUNT;
++
++ return 0;
++}
++
++static int ctl_close(struct inode *inode, struct file *file)
++{
++ MOD_DEC_USE_COUNT;
++ return 0;
++}
++
++static int ctl_ioctl(struct inode *inode, struct file *file,
++ uint command, ulong a)
++{
++ int r;
++ struct dm_ioctl *p;
++ uint cmd = _IOC_NR(command);
++
++ switch (cmd) {
++ case DM_REMOVE_ALL_CMD:
++ dm_destroy_all();
++ case DM_VERSION_CMD:
++ return version((struct dm_ioctl *) a);
++ default:
++ break;
++ }
++
++ r = copy_params((struct dm_ioctl *) a, &p);
++ if (r)
++ return r;
++
++ /* FIXME: Change to use size 0 next time ioctl version gets changed */
++ switch (cmd) {
++ case DM_CREATE_CMD:
++ r = create(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_REMOVE_CMD:
++ r = remove(p);
++ break;
++
++ case DM_SUSPEND_CMD:
++ r = suspend(p);
++ break;
++
++ case DM_RELOAD_CMD:
++ r = reload(p);
++ break;
++
++ case DM_INFO_CMD:
++ r = info(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_DEPS_CMD:
++ r = dep(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_RENAME_CMD:
++ r = rename(p);
++ break;
++
++ default:
++ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
++ r = -EINVAL;
++ }
++
++ free_params(p);
++ return r;
++}
++
++static struct file_operations _ctl_fops = {
++ open: ctl_open,
++ release: ctl_close,
++ ioctl: ctl_ioctl,
++ owner: THIS_MODULE,
++};
++
++static devfs_handle_t _ctl_handle;
++
++static struct miscdevice _dm_misc = {
++ minor: MISC_DYNAMIC_MINOR,
++ name: DM_NAME,
++ fops: &_ctl_fops
++};
++
++/* Create misc character device and link to DM_DIR/control */
++int __init dm_interface_init(void)
++{
++ int r;
++ char rname[64];
++
++ r = misc_register(&_dm_misc);
++ if (r) {
++ DMERR("misc_register failed for control device");
++ return r;
++ }
++
++ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
++ sizeof rname - 3);
++ if (r == -ENOSYS)
++ return 0; /* devfs not present */
++
++ if (r < 0) {
++ DMERR("devfs_generate_path failed for control device");
++ goto failed;
++ }
++
++ strncpy(rname + r, "../", 3);
++ r = devfs_mk_symlink(NULL, DM_DIR "/control",
++ DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
++ if (r) {
++ DMERR("devfs_mk_symlink failed for control device");
++ goto failed;
++ }
++ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
++
++ return 0;
++
++ failed:
++ misc_deregister(&_dm_misc);
++ return r;
++}
++
++void __exit dm_interface_exit(void)
++{
++ if (misc_deregister(&_dm_misc) < 0)
++ DMERR("misc_deregister failed for control device");
++}
+diff -ruN linux-2.4.17/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c
+--- linux-2.4.17/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-linear.c Tue Jan 15 19:53:55 2002
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++
++/*
++ * Linear: maps a linear range of a device.
++ */
++struct linear_c {
++ long delta; /* FIXME: we need a signed offset type */
++ struct dm_dev *dev;
++};
++
++/*
++ * Construct a linear mapping: <dev_path> <offset>
++ */
++static int linear_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ struct linear_c *lc;
++ unsigned long start; /* FIXME: unsigned long long */
++ char *end;
++
++ if (argc != 2) {
++ *context = "dm-linear: Not enough arguments";
++ return -EINVAL;
++ }
++
++ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++ if (lc == NULL) {
++ *context = "dm-linear: Cannot allocate linear context";
++ return -ENOMEM;
++ }
++
++ start = simple_strtoul(argv[1], &end, 10);
++ if (*end) {
++ *context = "dm-linear: Invalid device sector";
++ goto bad;
++ }
++
++ if (dm_table_get_device(t, argv[0], start, l, &lc->dev)) {
++ *context = "dm-linear: Device lookup failed";
++ goto bad;
++ }
++
++ lc->delta = (int) start - (int) b;
++ *context = lc;
++ return 0;
++
++ bad:
++ kfree(lc);
++ return -EINVAL;
++}
++
++static void linear_dtr(struct dm_table *t, void *c)
++{
++ struct linear_c *lc = (struct linear_c *) c;
++
++ dm_table_put_device(t, lc->dev);
++ kfree(c);
++}
++
++static int linear_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct linear_c *lc = (struct linear_c *) context;
++
++ bh->b_rdev = lc->dev->dev;
++ bh->b_rsector = bh->b_rsector + lc->delta;
++
++ return 1;
++}
++
++static struct target_type linear_target = {
++ name: "linear",
++ module: THIS_MODULE,
++ ctr: linear_ctr,
++ dtr: linear_dtr,
++ map: linear_map,
++};
++
++int __init dm_linear_init(void)
++{
++ int r = dm_register_target(&linear_target);
++
++ if (r < 0)
++ DMERR("linear: register failed %d", r);
++
++ return r;
++}
++
++void dm_linear_exit(void)
++{
++ int r = dm_unregister_target(&linear_target);
++
++ if (r < 0)
++ DMERR("linear: unregister failed %d", r);
++}
++
+diff -ruN linux-2.4.17/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c
+--- linux-2.4.17/drivers/md/dm-mirror.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-mirror.c Thu Mar 14 15:53:19 2002
+@@ -0,0 +1,295 @@
+/*
-+ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++
++/* kcopyd priority of mirror operations */
++#define MIRROR_COPY_PRIORITY 5
++
++static kmem_cache_t *bh_cachep;
++
++/*
++ * Mirror: maps a mirror range of a device.
++ */
++struct mirror_c {
++ struct dm_dev *fromdev;
++ struct dm_dev *todev;
++
++ unsigned long from_delta;
++ unsigned long to_delta;
++
++ unsigned long frompos;
++ unsigned long topos;
++
++ unsigned long got_to;
++ struct rw_semaphore lock;
++ struct buffer_head *bhstring;
++ int error;
++};
++
++
++/* Called when a duplicating I/O has finished */
++static void mirror_end_io(struct buffer_head *bh, int uptodate)
++{
++ struct mirror_c *lc = (struct mirror_c *) bh->b_private;
++
++ /* Flag error if it failed */
++ if (!uptodate) {
++ DMERR("Mirror copy to %s failed", kdevname(lc->todev->dev));
++ lc->error = 1;
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++ kmem_cache_free(bh_cachep, bh);
++}
++
++static void mirror_bh(struct mirror_c *mc, struct buffer_head *bh)
++{
++ struct buffer_head *dbh = kmem_cache_alloc(bh_cachep, GFP_NOIO);
++ if (dbh) {
++ *dbh = *bh;
++ dbh->b_rdev = mc->todev->dev;
++ dbh->b_rsector = bh->b_rsector - mc->from_delta
++ + mc->to_delta;
++ dbh->b_end_io = mirror_end_io;
++ dbh->b_private = mc;
++
++ generic_make_request(WRITE, dbh);
++ } else {
++ DMERR("kmem_cache_alloc failed for mirror bh");
++ mc->error = 1;
++ }
++}
++
++/* Called when the copy I/O has finished */
++static void copy_callback(copy_cb_reason_t reason, void *context, long arg)
++{
++ struct mirror_c *lc = (struct mirror_c *) context;
++ struct buffer_head *bh;
++
++ if (reason == COPY_CB_FAILED_READ || reason == COPY_CB_FAILED_WRITE) {
++ DMERR("Mirror block %s on %s failed, sector %ld",
++ reason == COPY_CB_FAILED_READ ? "read" : "write",
++ reason == COPY_CB_FAILED_READ ?
++ kdevname(lc->fromdev->dev) :
++ kdevname(lc->todev->dev), arg);
++ lc->error = 1;
++ return;
++ }
++
++ if (reason == COPY_CB_COMPLETE) {
++ /* Say we've finished */
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++
++ if (reason == COPY_CB_PROGRESS) {
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++
++ /* Submit, and mirror any pending BHs */
++ down_write(&lc->lock);
++ lc->got_to = arg;
++
++ bh = lc->bhstring;
++ lc->bhstring = NULL;
++ up_write(&lc->lock);
++
++ while (bh) {
++ struct buffer_head *nextbh = bh->b_reqnext;
++ bh->b_reqnext = NULL;
++ generic_make_request(WRITE, bh);
++ mirror_bh(lc, bh);
++ bh = nextbh;
++ }
++}
++
++/*
++ * Construct a mirror mapping: <dev_path1> <offset> <dev_path2> <offset> <throttle> [<priority>]
++ */
++static int mirror_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ struct mirror_c *lc;
++ unsigned long offset1, offset2;
++ char *value;
++ int priority = MIRROR_COPY_PRIORITY;
++ int throttle;
++
++ if (argc <= 4) {
++ *context = "dm-mirror: Not enough arguments";
++ return -EINVAL;
++ }
++
++ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++ if (lc == NULL) {
++ *context = "dm-mirror: Cannot allocate mirror context";
++ return -ENOMEM;
++ }
++
++ if (dm_table_get_device(t, argv[0], 0, l, &lc->fromdev)) {
++ *context = "dm-mirror: Device lookup failed";
++ goto bad;
++ }
++
++ offset1 = simple_strtoul(argv[1], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid offset for dev1";
++ dm_table_put_device(t, lc->fromdev);
++ goto bad;
++ }
++
++ if (dm_table_get_device(t, argv[2], 0, l, &lc->todev)) {
++ *context = "dm-mirror: Device lookup failed";
++ dm_table_put_device(t, lc->fromdev);
++ goto bad;
++ }
++
++ offset2 = simple_strtoul(argv[3], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid offset for dev2";
++ goto bad_put;
++ }
++
++ throttle = simple_strtoul(argv[4], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid throttle value";
++ goto bad_put;
++ }
++
++ if (argc > 5) {
++ priority = simple_strtoul(argv[5], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid priority value";
++ goto bad_put;
++ }
++ }
++
++ lc->from_delta = (int) offset1 - (int) b;
++ lc->to_delta = (int) offset2 - (int) b;
++ lc->frompos = offset1;
++ lc->topos = offset2;
++ lc->error = 0;
++ lc->bhstring = NULL;
++ init_rwsem(&lc->lock);
++ *context = lc;
++
++ /* Tell kcopyd to do the biz */
++ if (dm_blockcopy(offset1, offset2,
++ l - offset1,
++ lc->fromdev->dev, lc->todev->dev,
++ priority, 0, copy_callback, lc)) {
++ DMERR("block copy call failed");
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ goto bad;
++ }
++ return 0;
++
++ bad_put:
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ bad:
++ kfree(lc);
++ return -EINVAL;
++}
++
++static void mirror_dtr(struct dm_table *t, void *c)
++{
++ struct mirror_c *lc = (struct mirror_c *) c;
++
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ kfree(c);
++}
++
++static int mirror_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct mirror_c *lc = (struct mirror_c *) context;
++
++ bh->b_rdev = lc->fromdev->dev;
++ bh->b_rsector = bh->b_rsector + lc->from_delta;
++
++ if (rw == WRITE) {
++ down_write(&lc->lock);
++
++ /*
++ * If this area is in flight then save it until it's
++ * commited to the mirror disk and then submit it and
++ * its mirror.
++ */
++ if (bh->b_rsector > lc->got_to &&
++ bh->b_rsector <= lc->got_to + KIO_MAX_SECTORS) {
++ bh->b_reqnext = lc->bhstring;
++ lc->bhstring = bh;
++ up_write(&lc->lock);
++ return 0;
++ }
++
++ /*
++ * If we've already copied this block then duplicate
++ * it to the mirror device
++ */
++ if (bh->b_rsector < lc->got_to) {
++ /* Schedule copy of I/O to other target */
++ mirror_bh(lc, bh);
++ }
++ up_write(&lc->lock);
++ }
++ return 1;
++}
++
++static struct target_type mirror_target = {
++ name: "mirror",
++ module: THIS_MODULE,
++ ctr: mirror_ctr,
++ dtr: mirror_dtr,
++ map: mirror_map,
++};
++
++int __init dm_mirror_init(void)
++{
++ int r;
++
++ bh_cachep = kmem_cache_create("dm-mirror",
++ sizeof(struct buffer_head),
++ __alignof__(struct buffer_head),
++ 0, NULL, NULL);
++ if (!bh_cachep) {
++ return -1;
++ }
++
++
++ r = dm_register_target(&mirror_target);
++ if (r < 0) {
++ DMERR("mirror: register failed %d", r);
++ kmem_cache_destroy(bh_cachep);
++ }
++ return r;
++}
++
++void dm_mirror_exit(void)
++{
++ int r = dm_unregister_target(&mirror_target);
++
++ if (r < 0)
++ DMERR("mirror: unregister failed %d", r);
++
++ kmem_cache_destroy(bh_cachep);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.17/drivers/md/dm-origin.c linux/drivers/md/dm-origin.c
+--- linux-2.4.17/drivers/md/dm-origin.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-origin.c Wed Mar 13 17:28:40 2002
+@@ -0,0 +1,105 @@
++/*
++ * dm-origin.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm.h"
++
++/*
++ * Origin: maps a linear range of a device, with hooks for snapshotting.
++ */
++
++/*
++ * Construct an origin mapping: <dev_path>
++ * The context for an origin is merely a 'struct dm_dev *'
++ * pointing to the real device.
++ */
++static int origin_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ int r;
++ struct dm_dev *dev;
++
++ if (argc != 1) {
++ *context = "dm-origin: incorrect number of arguments";
++ return -EINVAL;
++ }
++
++ r = dm_table_get_device(t, argv[0], 0, l, &dev);
++ if (r) {
++ *context = "Cannot get target device";
++ return r;
++ }
++
++ *context = dev;
++
++ return 0;
++}
++
++static void origin_dtr(struct dm_table *t, void *c)
++{
++ struct dm_dev *dev = (struct dm_dev *) c;
++
++ dm_table_put_device(t, dev);
++}
++
++static int origin_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct dm_dev *dev = (struct dm_dev *) context;
++
++ bh->b_rdev = dev->dev;
++
++ /* Only tell snapshots if this is a write */
++ return (rw == WRITE) ? dm_do_snapshot(dev, bh) : 1;
++}
++
++static struct target_type origin_target = {
++ name: "snapshot-origin",
++ module: THIS_MODULE,
++ ctr: origin_ctr,
++ dtr: origin_dtr,
++ map: origin_map,
++ err: NULL
++};
++
++int __init dm_origin_init(void)
++{
++ int r = dm_register_target(&origin_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Origin: register failed %d\n", r);
++
++ return r;
++}
++
++void dm_origin_exit(void)
++{
++ int r = dm_unregister_target(&origin_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Origin: unregister failed %d\n", r);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.17/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c
+--- linux-2.4.17/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.c Thu Mar 14 16:08:52 2002
+@@ -0,0 +1,862 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
-+#include "dm.h"
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm-snapshot.h"
++
++/*
++ * Hard sector size used all over the kernel
++ */
++#define SECTOR_SIZE 512
++
++/*
++ * kcopyd priority of snapshot operations
++ */
++#define SNAPSHOT_COPY_PRIORITY 2
++
++struct pending_exception {
++ struct exception e;
++
++ /* Chain of WRITE buffer heads to submit when this COW has completed */
++ struct buffer_head *bh;
++
++ /* Pointer back to snapshot context */
++ struct dm_snapshot *snap;
++};
++
++/*
++ * Hash table mapping origin volumes to lists of snapshots and
++ * a lock to protect it
++ */
++static kmem_cache_t *exception_cachep;
++static kmem_cache_t *pending_cachep;
++
++/*
++ * One of these per registered origin, held in the snapshot_origins hash
++ */
++struct origin {
++ /* The origin device */
++ kdev_t dev;
++
++ struct list_head hash_list;
++
++ /* List of snapshots for this origin */
++ struct list_head snapshots;
++};
++
++/*
++ * Size of the hash table for origin volumes. If we make this
++ * the size of the minors list then it should be nearly perfect
++ */
++#define ORIGIN_HASH_SIZE 256
++#define ORIGIN_MASK 0xFF
++static struct list_head *_origins;
++static struct rw_semaphore _origins_lock;
++
++static int init_origin_hash(void)
++{
++ int i;
+
-+#include <linux/miscdevice.h>
-+#include <linux/dm-ioctl.h>
-+#include <linux/init.h>
++ _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
++ GFP_KERNEL);
++ if (!_origins) {
++ DMERR("Device mapper: Snapshot: unable to allocate memory");
++ return -ENOMEM;
++ }
+
-+static void free_params(struct dm_ioctl *p)
++ for (i = 0; i < ORIGIN_HASH_SIZE; i++)
++ INIT_LIST_HEAD(_origins + i);
++ init_rwsem(&_origins_lock);
++
++ return 0;
++}
++
++static void exit_origin_hash(void)
+{
-+ vfree(p);
++ kfree(_origins);
+}
+
-+static int version(struct dm_ioctl *user)
++static inline unsigned int origin_hash(kdev_t dev)
+{
-+ return copy_to_user(user, DM_DRIVER_VERSION, sizeof(DM_DRIVER_VERSION));
++ return MINOR(dev) & ORIGIN_MASK;
+}
+
-+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **result)
++static struct origin *__lookup_origin(kdev_t origin)
+{
-+ struct dm_ioctl tmp, *dmi;
++ struct list_head *slist;
++ struct list_head *ol;
++ struct origin *o;
+
-+ if (copy_from_user(&tmp, user, sizeof(tmp)))
-+ return -EFAULT;
++ ol = &_origins[origin_hash(origin)];
++ list_for_each(slist, ol) {
++ o = list_entry(slist, struct origin, hash_list);
+
-+ if (strcmp(DM_IOCTL_VERSION, tmp.version)) {
-+ DMWARN("dm_ctl_ioctl: struct dm_ioctl version incompatible");
-+ return -EINVAL;
++ if (o->dev == origin)
++ return o;
+ }
+
-+ if (tmp.data_size < sizeof(tmp))
-+ return -EINVAL;
++ return NULL;
++}
+
-+ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
-+ if (!dmi)
-+ return -ENOMEM;
++static void __insert_origin(struct origin *o)
++{
++ struct list_head *sl = &_origins[origin_hash(o->dev)];
++ list_add_tail(&o->hash_list, sl);
++}
+
-+ if (copy_from_user(dmi, user, tmp.data_size)) {
-+ vfree(dmi);
-+ return -EFAULT;
++/*
++ * Make a note of the snapshot and its origin so we can look it
++ * up when the origin has a write on it.
++ */
++static int register_snapshot(struct dm_snapshot *snap)
++{
++ struct origin *o;
++ kdev_t dev = snap->origin->dev;
++
++ down_write(&_origins_lock);
++ o = __lookup_origin(dev);
++
++ if (!o) {
++ /* New origin */
++ o = kmalloc(sizeof(*o), GFP_KERNEL);
++ if (!o) {
++ up_write(&_origins_lock);
++ return -ENOMEM;
++ }
++
++ /* Initialise the struct */
++ INIT_LIST_HEAD(&o->snapshots);
++ o->dev = dev;
++
++ __insert_origin(o);
+ }
+
-+ *result = dmi;
++ list_add_tail(&snap->list, &o->snapshots);
++
++ up_write(&_origins_lock);
+ return 0;
+}
+
++static void unregister_snapshot(struct dm_snapshot *s)
++{
++ struct origin *o;
++
++ down_write(&_origins_lock);
++ o = __lookup_origin(s->origin->dev);
++
++ list_del(&s->list);
++ if (list_empty(&o->snapshots)) {
++ list_del(&o->hash_list);
++ kfree(o);
++ }
++
++ up_write(&_origins_lock);
++}
++
+/*
-+ * Check a string doesn't overrun the chunk of
-+ * memory we copied from userland.
++ * Implementation of the exception hash tables.
+ */
-+static int valid_str(char *str, void *begin, void *end)
++static int init_exception_table(struct exception_table *et, uint32_t size)
+{
-+ while (((void *) str >= begin) && ((void *) str < end))
-+ if (!*str++)
-+ return 0;
++ int i;
+
-+ return -EINVAL;
++ et->hash_mask = size - 1;
++ et->table = vmalloc(sizeof(struct list_head) * (size));
++ if (!et->table)
++ return -ENOMEM;
++
++ for (i = 0; i < size; i++)
++ INIT_LIST_HEAD(et->table + i);
++
++ return 0;
+}
+
-+static int next_target(struct dm_target_spec *last, unsigned long next,
-+ void *begin, void *end,
-+ struct dm_target_spec **spec, char **params)
++static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
+{
-+ *spec = (struct dm_target_spec *)
-+ ((unsigned char *) last + next);
-+ *params = (char *) (*spec + 1);
++ struct list_head *slot, *entry, *temp;
++ struct exception *ex;
++ int i, size;
+
-+ if (*spec < (last + 1) || ((void *)*spec > end))
-+ return -EINVAL;
++ size = et->hash_mask + 1;
++ for (i = 0; i < size; i++) {
++ slot = et->table + i;
+
-+ return valid_str(*params, begin, end);
++ list_for_each_safe(entry, temp, slot) {
++ ex = list_entry(entry, struct exception, hash_list);
++ kmem_cache_free(mem, ex);
++ }
++ }
++
++ vfree(et->table);
++}
++
++/*
++ * FIXME: check how this hash fn is performing.
++ */
++static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
++{
++ return chunk & et->hash_mask;
+}
+
-+void dm_error(const char *message)
++static void insert_exception(struct exception_table *eh, struct exception *e)
+{
-+ DMWARN("%s", message);
++ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
++ list_add(&e->hash_list, l);
++}
++
++static inline void remove_exception(struct exception *e)
++{
++ list_del(&e->hash_list);
+}
+
+/*
-+ * Checks to see if there's a gap in the table.
-+ * Returns true iff there is a gap.
++ * Return the exception data for a sector, or NULL if not
++ * remapped.
+ */
-+static int gap(struct dm_table *table, struct dm_target_spec *spec)
++static struct exception *lookup_exception(struct exception_table *et,
++ chunk_t chunk)
+{
-+ if (!table->num_targets)
-+ return (spec->sector_start > 0) ? 1 : 0;
++ struct list_head *slot, *el;
++ struct exception *e;
++
++ slot = &et->table[exception_hash(et, chunk)];
++ list_for_each(el, slot) {
++ e = list_entry(el, struct exception, hash_list);
++ if (e->old_chunk == chunk)
++ return e;
++ }
+
-+ if (spec->sector_start != table->highs[table->num_targets - 1] + 1)
-+ return 1;
++ return NULL;
++}
+
-+ return 0;
++static inline struct exception *alloc_exception(void)
++{
++ return kmem_cache_alloc(exception_cachep, GFP_NOIO);
+}
+
-+static int populate_table(struct dm_table *table, struct dm_ioctl *args)
++static inline struct pending_exception *alloc_pending_exception(void)
+{
-+ int i = 0, r, first = 1, argc;
-+ struct dm_target_spec *spec;
-+ char *params, *argv[MAX_ARGS];
-+ struct target_type *ttype;
-+ void *context, *begin, *end;
-+ offset_t highs = 0;
++ return kmem_cache_alloc(pending_cachep, GFP_NOIO);
++}
+
-+ if (!args->target_count) {
-+ DMWARN("populate_table: no targets specified");
-+ return -EINVAL;
-+ }
++static inline void free_exception(struct exception *e)
++{
++ kmem_cache_free(exception_cachep, e);
++}
+
-+ begin = (void *) args;
-+ end = begin + args->data_size;
++static inline void free_pending_exception(struct pending_exception *pe)
++{
++ kmem_cache_free(pending_cachep, pe);
++}
+
-+#define PARSE_ERROR(msg) {dm_error(msg); return -EINVAL;}
++/*
++ * Called when the copy I/O has finished
++ */
++static void copy_callback(copy_cb_reason_t reason, void *context, long arg)
++{
++ struct pending_exception *pe = (struct pending_exception *) context;
++ struct dm_snapshot *s = pe->snap;
++ struct exception *e;
+
-+ for (i = 0; i < args->target_count; i++) {
++ if (reason == COPY_CB_COMPLETE) {
++ struct buffer_head *bh;
+
-+ r = first ? next_target((struct dm_target_spec *)args,
-+ args->data_start,
-+ begin, end, &spec, ¶ms) :
-+ next_target(spec, spec->next,
-+ begin, end, &spec, ¶ms);
++ /* Update the metadata if we are persistent */
++ if (s->store->commit_exception)
++ s->store->commit_exception(s->store, &pe->e);
+
-+ if (r)
-+ PARSE_ERROR("unable to find target");
++ e = alloc_exception();
++ if (!e) {
++ /* FIXME: what do we do now ? */
++ return;
++ }
+
-+ /* Look up the target type */
-+ ttype = dm_get_target_type(spec->target_type);
-+ if (!ttype)
-+ PARSE_ERROR("unable to find target type");
++ /* Add a proper exception,
++ and remove the inflight exception from the list */
++ down_write(&pe->snap->lock);
+
-+ if (gap(table, spec))
-+ PARSE_ERROR("gap in target ranges");
++ memcpy(e, &pe->e, sizeof(*e));
++ insert_exception(&s->complete, e);
++ remove_exception(&pe->e);
+
-+ /* Split up the parameter list */
-+ if (split_args(MAX_ARGS, &argc, argv, params) < 0)
-+ PARSE_ERROR("Too many arguments");
++ /* Submit any pending write BHs */
++ bh = pe->bh;
++ pe->bh = NULL;
++ up_write(&pe->snap->lock);
+
-+ /* Build the target */
-+ if (ttype->ctr(table, spec->sector_start, spec->length,
-+ argc, argv, &context)) {
-+ dm_error(context);
-+ PARSE_ERROR("target constructor failed");
++ kmem_cache_free(pending_cachep, pe);
++
++ while (bh) {
++ struct buffer_head *nextbh = bh->b_reqnext;
++ bh->b_reqnext = NULL;
++ generic_make_request(WRITE, bh);
++ bh = nextbh;
+ }
++ }
+
-+ /* Add the target to the table */
-+ highs = spec->sector_start + (spec->length - 1);
-+ if (dm_table_add_target(table, highs, ttype, context))
-+ PARSE_ERROR("internal error adding target to table");
++ /* Read/write error - snapshot is unusable */
++ if (reason == COPY_CB_FAILED_WRITE || reason == COPY_CB_FAILED_READ) {
++ DMERR("Error reading/writing snapshot");
+
-+ first = 0;
++ if (pe->snap->store->drop_snapshot)
++ pe->snap->store->drop_snapshot(pe->snap->store);
++ remove_exception(&pe->e);
++ kmem_cache_free(pending_cachep, pe);
+ }
++}
+
-+#undef PARSE_ERROR
++/*
++ * Hard coded magic.
++ */
++static int calc_max_buckets(void)
++{
++ unsigned long mem;
+
-+ r = dm_table_complete(table);
-+ return r;
++ mem = num_physpages << PAGE_SHIFT;
++ mem /= 50;
++ mem /= sizeof(struct list_head);
++
++ return mem;
+}
+
+/*
-+ * Copies device info back to user space, used by
-+ * the create and info ioctls.
++ * Rounds a number down to a power of 2.
+ */
-+static int info(const char *name, struct dm_ioctl *user)
++static inline uint32_t round_down(uint32_t n)
+{
-+ int minor;
-+ struct dm_ioctl param;
-+ struct mapped_device *md;
++ while (n & (n - 1))
++ n &= (n - 1);
++ return n;
++}
+
-+ param.flags = 0;
-+ strncpy(param.version, DM_IOCTL_VERSION, sizeof(param.version));
++/*
++ * Allocate room for a suitable hash table.
++ */
++static int init_hash_tables(struct dm_snapshot *s)
++{
++ offset_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
+
-+ md = dm_get_name_r(name);
-+ if (!md)
-+ goto out;
-+ minor = MINOR(md->dev);
++ /*
++ * Calculate based on the size of the original volume or
++ * the COW volume...
++ */
++ cow_dev_size = get_dev_size(s->cow->dev);
++ origin_dev_size = get_dev_size(s->origin->dev);
++ max_buckets = calc_max_buckets();
+
-+ param.flags |= DM_EXISTS_FLAG;
-+ if (md->suspended)
-+ param.flags |= DM_SUSPEND_FLAG;
-+ if (md->read_only)
-+ param.flags |= DM_READONLY_FLAG;
++ hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size;
++ hash_size = min(hash_size, max_buckets);
+
-+ param.data_size = 0;
-+ strncpy(param.name, md->name, sizeof(param.name));
-+ param.name[sizeof(param.name) - 1] = '\0';
++ /* Round it down to a power of 2 */
++ hash_size = round_down(hash_size);
++ if (init_exception_table(&s->complete, hash_size))
++ return -ENOMEM;
+
-+ param.open_count = md->use_count;
-+ param.dev = kdev_t_to_nr(md->dev);
-+ param.target_count = md->map->num_targets;
++ /*
++ * Allocate hash table for in-flight exceptions
++ * Make this smaller than the real hash table
++ */
++ hash_size >>= 3;
++ if (!hash_size)
++ hash_size = 64;
+
-+ dm_put_r(minor);
++ if (init_exception_table(&s->pending, hash_size)) {
++ exit_exception_table(&s->complete, exception_cachep);
++ return -ENOMEM;
++ }
+
-+ out:
-+ return copy_to_user(user, ¶m, sizeof(param));
++ return 0;
+}
+
-+static int create(struct dm_ioctl *param, struct dm_ioctl *user)
++/*
++ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n>
++ * <chunk-size> <extent-size>
++ */
++static int snapshot_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
+{
-+ int r;
-+ struct mapped_device *md;
-+ struct dm_table *t;
-+ int minor;
++ struct dm_snapshot *s;
++ unsigned long chunk_size;
++ unsigned long extent_size = 0L;
++ int r = -EINVAL;
++ char *persistent;
++ char *origin_path;
++ char *cow_path;
++ char *value;
++ int blocksize;
++
++ if (argc < 4) {
++ *context = "dm-snapshot: Not enough arguments";
++ r = -EINVAL;
++ goto bad;
++ }
+
-+ r = dm_table_create(&t);
-+ if (r)
-+ return r;
++ origin_path = argv[0];
++ cow_path = argv[1];
++ persistent = argv[2];
+
-+ r = populate_table(t, param);
-+ if (r) {
-+ dm_table_destroy(t);
-+ return r;
++ if ((*persistent & 0x5f) != 'P' && (*persistent & 0x5f) != 'N') {
++ *context = "Persistent flag is not P or N";
++ r = -EINVAL;
++ goto bad;
+ }
+
-+ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ?
-+ MINOR(to_kdev_t(param->dev)) : -1;
++ chunk_size = simple_strtoul(argv[3], &value, 10);
++ if (chunk_size == 0 || value == NULL) {
++ *context = "Invalid chunk size";
++ r = -EINVAL;
++ goto bad;
++ }
++
++ /* Get the extent size for persistent snapshots */
++ if ((*persistent & 0x5f) == 'P') {
++ if (argc < 5) {
++ *context = "No extent size specified";
++ r = -EINVAL;
++ goto bad;
++ }
++
++ extent_size = simple_strtoul(argv[4], &value, 10);
++ if (extent_size == 0 || value == NULL) {
++ *context = "Invalid extent size";
++ r = -EINVAL;
++ goto bad;
++ }
++ }
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL) {
++ *context = "Cannot allocate snapshot context private structure";
++ r = -ENOMEM;
++ goto bad;
++ }
+
-+ r = dm_create(param->name, minor, t);
++ r = dm_table_get_device(t, origin_path, 0, 0, &s->origin);
+ if (r) {
-+ dm_table_destroy(t);
-+ return r;
++ *context = "Cannot get origin device";
++ r = -EINVAL;
++ goto bad_free;
+ }
+
-+ md = dm_get_name_w(param->name);
-+ if (!md)
-+ /* shouldn't get here */
-+ return -EINVAL;
++ r = dm_table_get_device(t, cow_path, 0, 0, &s->cow);
++ if (r) {
++ dm_table_put_device(t, s->origin);
++ *context = "Cannot get COW device";
++ r = -EINVAL;
++ goto bad_free;
++ }
+
-+ minor = MINOR(md->dev);
-+ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
-+ dm_put_w(minor);
++ /* Validate the extent and chunk sizes against the device block size */
++ blocksize = get_hardsect_size(s->cow->dev);
++ if (chunk_size % (blocksize / SECTOR_SIZE)) {
++ *context = "Chunk size is not a multiple of device blocksize";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
+
-+ r = info(param->name, user);
-+ return r;
-+}
++ if (extent_size % (blocksize / SECTOR_SIZE)) {
++ *context = "Extent size is not a multiple of device blocksize";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
+
-+static int remove(struct dm_ioctl *param)
-+{
-+ int r, minor;
-+ struct mapped_device *md;
++ /* Check the sizes are small enough to fit in one kiovec */
++ if (chunk_size > KIO_MAX_SECTORS) {
++ *context = "Chunk size is too big";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ if (extent_size > KIO_MAX_SECTORS) {
++ *context = "Extent size is too big";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ /* Check chunk_size is a power of 2 */
++ if (chunk_size & (chunk_size - 1)) {
++ *context = "Chunk size is not a power of 2";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ s->chunk_size = chunk_size;
++ s->chunk_mask = chunk_size - 1;
++ for (s->chunk_shift = 0; chunk_size;
++ s->chunk_shift++, chunk_size >>= 1) ;
++
++ s->valid = 1;
++ init_rwsem(&s->lock);
++
++ /* Allocate hash table for COW data */
++ if (init_hash_tables(s)) {
++ *context = "Unable to allocate hash table space";
++ r = -ENOMEM;
++ goto bad_putdev;
++ }
++
++ /*
++ * Check the persistent flag - done here because we need the iobuf
++ * to check the LV header
++ */
++#if 0
++ if ((*persistent & 0x5f) == 'P')
++ s->store = dm_create_persistent(s, blocksize,
++ extent_size, context);
++ else
++#endif
++ s->store = dm_create_transient(s, blocksize, context);
++
++ if (!s->store) {
++ *context = "Couldn't create exception store";
++ r = -EINVAL;
++ goto bad_free1;
++ }
++
++ /* Allocate the COW iobuf and set associated variables */
++ if (s->store->init &&
++ s->store->init(s->store, blocksize, extent_size, context)) {
++ *context = "Couldn't initialise exception store";
++ r = -ENOMEM;
++ goto bad_free1;
++ }
++
++ /* Flush IO to the origin device */
++ /* FIXME: what does sct have against fsync_dev ? */
++ fsync_dev(s->origin->dev);
++#if LVM_VFS_ENHANCEMENT
++ fsync_dev_lockfs(s->origin->dev);
++#endif
++
++ /* Add snapshot to the list of snapshots for this origin */
++ if (register_snapshot(s)) {
++ r = -EINVAL;
++ *context = "Cannot register snapshot origin";
++ goto bad_free2;
++ }
++
++#if LVM_VFS_ENHANCEMENT
++ unlockfs(s->origin->dev);
++#endif
++
++ *context = s;
++ return 0;
++
++ bad_free2:
++ if (s->store->destroy)
++ s->store->destroy(s->store);
+
-+ md = dm_get_name_w(param->name);
-+ if (!md)
-+ return -ENXIO;
++ bad_free1:
++ exit_exception_table(&s->pending, pending_cachep);
++ exit_exception_table(&s->complete, exception_cachep);
+
-+ minor = MINOR(md->dev);
-+ r = dm_destroy(md);
-+ dm_put_w(minor);
++ bad_putdev:
++ dm_table_put_device(t, s->cow);
++ dm_table_put_device(t, s->origin);
+
++ bad_free:
++ kfree(s);
++
++ bad:
+ return r;
+}
+
-+static int suspend(struct dm_ioctl *param)
++static void snapshot_dtr(struct dm_table *t, void *context)
+{
-+ int r, minor;
-+ struct mapped_device *md;
++ struct dm_snapshot *s = (struct dm_snapshot *) context;
+
-+ md = dm_get_name_w(param->name);
-+ if (!md)
-+ return -ENXIO;
++ unregister_snapshot(s);
+
-+ minor = MINOR(md->dev);
-+ r = (param->flags & DM_SUSPEND_FLAG) ?
-+ dm_suspend(md) : dm_resume(md);
-+ dm_put_w(minor);
++ exit_exception_table(&s->pending, pending_cachep);
++ exit_exception_table(&s->complete, exception_cachep);
+
-+ return r;
++ /* Deallocate memory used */
++ if (s->store->destroy)
++ s->store->destroy(s->store);
++
++ dm_table_put_device(t, s->origin);
++ dm_table_put_device(t, s->cow);
++ kfree(s);
+}
+
-+static int reload(struct dm_ioctl *param)
++/*
++ * Performs a new copy on write.
++ */
++static int new_exception(struct dm_snapshot *s, struct buffer_head *bh)
+{
-+ int r, minor;
-+ struct mapped_device *md;
-+ struct dm_table *t;
++ struct exception *e;
++ struct pending_exception *pe;
++ chunk_t chunk;
+
-+ r = dm_table_create(&t);
-+ if (r)
-+ return r;
++ chunk = sector_to_chunk(s, bh->b_rsector);
+
-+ r = populate_table(t, param);
-+ if (r) {
-+ dm_table_destroy(t);
-+ return r;
-+ }
++ /*
++ * If the exception is in flight then we just defer the
++ * bh until this copy has completed.
++ */
+
-+ md = dm_get_name_w(param->name);
-+ if (!md) {
-+ dm_table_destroy(t);
-+ return -ENXIO;
++ /* FIXME: great big race. */
++ e = lookup_exception(&s->pending, chunk);
++ if (e) {
++ /* cast the exception to a pending exception */
++ pe = list_entry(e, struct pending_exception, e);
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ return 0;
+ }
+
-+ minor = MINOR(md->dev);
-+
-+ r = dm_swap_table(md, t);
-+ if (r) {
-+ dm_put_w(minor);
-+ dm_table_destroy(t);
-+ return r;
++ pe = alloc_pending_exception();
++ if (!pe) {
++ DMWARN("Couldn't allocate inflight_exception.");
++ return -ENOMEM;
+ }
+
-+ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
-+ dm_put_w(minor);
-+ return 0;
-+}
-+
-+static int rename(struct dm_ioctl *param)
-+{
-+ char *newname = (char *) param + param->data_start;
++ pe->e.old_chunk = chunk;
+
-+ if (valid_str(newname, (void *) param,
-+ (void *) param + param->data_size) ||
-+ dm_set_name(param->name, newname)) {
-+ dm_error("Invalid new logical volume name supplied.");
-+ return -EINVAL;
++ if (s->store->prepare_exception &&
++ s->store->prepare_exception(s->store, &pe->e)) {
++ s->valid = 0;
++ return -ENXIO;
+ }
+
-+ return 0;
-+}
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ pe->snap = s;
+
-+static int ctl_open(struct inode *inode, struct file *file)
-+{
-+ /* only root can open this */
-+ if (!capable(CAP_SYS_ADMIN))
-+ return -EACCES;
++ insert_exception(&s->pending, &pe->e);
+
-+ MOD_INC_USE_COUNT;
++ /* Get kcopyd to do the copy */
++ dm_blockcopy(chunk_to_sector(s, pe->e.old_chunk),
++ chunk_to_sector(s, pe->e.new_chunk),
++ s->chunk_size,
++ s->origin->dev,
++ s->cow->dev, SNAPSHOT_COPY_PRIORITY, 0, copy_callback, pe);
+
-+ return 0;
++ return 1;
+}
+
-+static int ctl_close(struct inode *inode, struct file *file)
++static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
++ struct buffer_head *bh)
+{
-+ MOD_DEC_USE_COUNT;
-+ return 0;
++ bh->b_rdev = s->cow->dev;
++ bh->b_rsector = chunk_to_sector(s, e->new_chunk) +
++ (bh->b_rsector & s->chunk_mask);
+}
+
-+static int ctl_ioctl(struct inode *inode, struct file *file,
-+ uint command, ulong a)
++static int snapshot_map(struct buffer_head *bh, int rw, void *context)
+{
-+ int r;
-+ struct dm_ioctl *p;
++ struct exception *e;
++ struct dm_snapshot *s = (struct dm_snapshot *) context;
++ int r = 1;
++ chunk_t chunk;
+
-+ if (command == DM_VERSION)
-+ return version((struct dm_ioctl *) a);
++ chunk = sector_to_chunk(s, bh->b_rsector);
+
-+ r = copy_params((struct dm_ioctl *) a, &p);
-+ if (r)
-+ return r;
++ /* Full snapshots are not usable */
++ if (!s->valid)
++ return -1;
+
-+ switch (command) {
-+ case DM_CREATE:
-+ r = create(p, (struct dm_ioctl *) a);
-+ break;
++ /*
++ * Write to snapshot - higher level takes care of RW/RO
++ * flags so we should only get this if we are
++ * writeable.
++ */
++ if (rw == WRITE) {
+
-+ case DM_REMOVE:
-+ r = remove(p);
-+ break;
++ down_write(&s->lock);
+
-+ case DM_SUSPEND:
-+ r = suspend(p);
-+ break;
++ /* If the block is already remapped - use that, else remap it */
++ e = lookup_exception(&s->complete, chunk);
++ if (e) {
++ remap_exception(s, e, bh);
++ up_write(&s->lock);
++ return 1;
++ }
+
-+ case DM_RELOAD:
-+ r = reload(p);
-+ break;
++ e = lookup_exception(&s->pending, chunk);
++ if (e) {
++ struct pending_exception *pe;
++ pe = list_entry(e, struct pending_exception, e);
++
++ /*
++ * Exception has not been committed to
++ * disk - save this bh
++ */
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ up_write(&s->lock);
++ return 0;
++ }
+
-+ case DM_INFO:
-+ r = info(p->name, (struct dm_ioctl *) a);
-+ break;
++ if (new_exception(s, bh))
++ r = -1;
++ else
++ r = 0;
+
-+ case DM_RENAME:
-+ r = rename(p);
-+ break;
++ up_write(&s->lock);
+
-+ default:
-+ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
-+ r = -EINVAL;
++ } else {
++ /*
++ * FIXME: this read path scares me because we
++ * always use the origin when we have a pending
++ * exception. However I can't think of a
++ * situation where this is wrong - ejt.
++ */
++
++ /* Do reads */
++ down_read(&s->lock);
++
++ /* See if it it has been remapped */
++ e = lookup_exception(&s->complete, chunk);
++ if (e)
++ remap_exception(s, e, bh);
++ else
++ bh->b_rdev = s->origin->dev;
++
++ up_read(&s->lock);
+ }
+
-+ free_params(p);
+ return r;
+}
+
-+static struct file_operations _ctl_fops = {
-+ open: ctl_open,
-+ release: ctl_close,
-+ ioctl: ctl_ioctl,
-+ owner: THIS_MODULE,
-+};
++/*
++ * Called on a write from the origin driver.
++ */
++int dm_do_snapshot(struct dm_dev *origin, struct buffer_head *bh)
++{
++ struct list_head *snap_list;
++ struct origin *o;
++ int r = 1;
++ chunk_t chunk;
++
++ down_read(&_origins_lock);
++ o = __lookup_origin(origin->dev);
++
++ if (o) {
++ struct list_head *origin_snaps = &o->snapshots;
++ struct dm_snapshot *lock_snap;
++
++ /* Lock the metadata */
++ lock_snap = list_entry(origin_snaps->next,
++ struct dm_snapshot, list);
++
++ /* Do all the snapshots on this origin */
++ list_for_each(snap_list, origin_snaps) {
++ struct dm_snapshot *snap;
++ struct exception *e;
++ snap = list_entry(snap_list, struct dm_snapshot, list);
++
++ down_write(&snap->lock);
++
++ /*
++ * Remember different snapshots can have
++ * different chunk sizes.
++ */
++ chunk = sector_to_chunk(snap, bh->b_rsector);
++
++ /* Only deal with valid snapshots */
++ if (snap->valid) {
++ /*
++ * Check exception table to see
++ * if block is already remapped
++ * in this snapshot and mark the
++ * snapshot as needing a COW if
++ * not
++ */
++ e = lookup_exception(&snap->complete, chunk);
++ if (!e && !new_exception(snap, bh))
++ r = 0;
++ }
+
-+static devfs_handle_t _ctl_handle;
++ up_write(&snap->lock);
++ }
++ }
+
-+static struct miscdevice _dm_misc = {
-+ minor: MISC_DYNAMIC_MINOR,
-+ name: DM_NAME,
-+ fops: &_ctl_fops
++ up_read(&_origins_lock);
++ return r;
++}
++
++static struct target_type snapshot_target = {
++ name:"snapshot",
++ module:THIS_MODULE,
++ ctr:snapshot_ctr,
++ dtr:snapshot_dtr,
++ map:snapshot_map,
++ err:NULL
+};
+
-+/* Create misc character device and link to DM_DIR/control */
-+int __init dm_interface_init(void)
++int __init dm_snapshot_init(void)
+{
+ int r;
-+ char rname[64];
+
-+ r = misc_register(&_dm_misc);
++ r = dm_register_target(&snapshot_target);
+ if (r) {
-+ DMERR("misc_register failed for control device");
++ DMERR("snapshot target register failed %d", r);
+ return r;
+ }
+
-+ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
-+ sizeof rname - 3);
-+ if (r == -ENOSYS)
-+ return 0; /* devfs not present */
++ r = init_origin_hash();
++ if (r) {
++ DMERR("init_origin_hash failed.");
++ return r;
++ }
+
-+ if (r < 0) {
-+ DMERR("devfs_generate_path failed for control device");
-+ goto failed;
++ exception_cachep = kmem_cache_create("dm-snapshot-ex",
++ sizeof(struct exception),
++ __alignof__(struct exception),
++ 0, NULL, NULL);
++ if (!exception_cachep) {
++ exit_origin_hash();
++ return -1;
+ }
+
-+ strncpy(rname + r, "../", 3);
-+ r = devfs_mk_symlink(NULL, DM_DIR "/control",
-+ DEVFS_FL_DEFAULT, rname + r,
-+ &_ctl_handle, NULL);
-+ if (r) {
-+ DMERR("devfs_mk_symlink failed for control device");
-+ goto failed;
++ pending_cachep =
++ kmem_cache_create("dm-snapshot-in",
++ sizeof(struct pending_exception),
++ __alignof__(struct pending_exception),
++ 0, NULL, NULL);
++ if (!pending_cachep) {
++ exit_origin_hash();
++ kmem_cache_destroy(exception_cachep);
++ return -1;
+ }
-+ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
+
+ return 0;
-+
-+ failed:
-+ misc_deregister(&_dm_misc);
-+ return r;
+}
+
-+void __exit dm_interface_exit(void)
++void dm_snapshot_exit(void)
+{
-+ if (misc_deregister(&_dm_misc) < 0)
-+ DMERR("misc_deregister failed for control device");
++ int r = dm_unregister_target(&snapshot_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Snapshot: unregister failed %d", r);
++
++ exit_origin_hash();
++
++ kmem_cache_destroy(pending_cachep);
++ kmem_cache_destroy(exception_cachep);
+}
-diff -ruN linux-2.4.17/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c
---- linux-2.4.17/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm-linear.c Tue Jan 15 19:53:55 2002
-@@ -0,0 +1,105 @@
++
+/*
-+ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.17/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h
+--- linux-2.4.17/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.h Thu Mar 14 16:02:50 2002
+@@ -0,0 +1,135 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
-+#include "dm.h"
++#ifndef DM_SNAPSHOT_H
++#define DM_SNAPSHOT_H
+
-+#include <linux/module.h>
-+#include <linux/init.h>
++#include "dm.h"
+#include <linux/blkdev.h>
+
-+/*
-+ * Linear: maps a linear range of a device.
-+ */
-+struct linear_c {
-+ long delta; /* FIXME: we need a signed offset type */
-+ struct dm_dev *dev;
++struct exception_table {
++ uint32_t hash_mask;
++ struct list_head *table;
+};
+
+/*
-+ * Construct a linear mapping: <dev_path> <offset>
++ * The snapshot code deals with largish chunks of the disk at a
++ * time. Typically 64k - 256k.
+ */
-+static int linear_ctr(struct dm_table *t, offset_t b, offset_t l,
-+ int argc, char **argv, void **context)
-+{
-+ struct linear_c *lc;
-+ unsigned long start; /* FIXME: unsigned long long */
-+ char *end;
++/* FIXME: can we get away with limiting these to a uint32_t ? */
++typedef offset_t chunk_t;
+
-+ if (argc != 2) {
-+ *context = "dm-linear: Not enough arguments";
-+ return -EINVAL;
-+ }
++struct dm_snapshot {
++ struct rw_semaphore lock;
+
-+ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
-+ if (lc == NULL) {
-+ *context = "dm-linear: Cannot allocate linear context";
-+ return -ENOMEM;
-+ }
++ struct dm_dev *origin;
++ struct dm_dev *cow;
+
-+ start = simple_strtoul(argv[1], &end, 10);
-+ if (*end) {
-+ *context = "dm-linear: Invalid device sector";
-+ goto bad;
-+ }
++ /* List of snapshots per Origin */
++ struct list_head list;
+
-+ if (dm_table_get_device(t, argv[0], start, l, &lc->dev)) {
-+ *context = "dm-linear: Device lookup failed";
-+ goto bad;
-+ }
++ /* Size of data blocks saved - must be a power of 2 */
++ chunk_t chunk_size;
++ chunk_t chunk_mask;
++ chunk_t chunk_shift;
+
-+ lc->delta = (int) start - (int) b;
-+ *context = lc;
-+ return 0;
++ /* You can't use a snapshot if this is 0 (e.g. if full) */
++ int valid;
+
-+ bad:
-+ kfree(lc);
-+ return -EINVAL;
-+}
++ struct exception_table pending;
++ struct exception_table complete;
+
-+static void linear_dtr(struct dm_table *t, void *c)
-+{
-+ struct linear_c *lc = (struct linear_c *) c;
++ /* The on disk metadata handler */
++ struct exception_store *store;
++};
+
-+ dm_table_put_device(t, lc->dev);
-+ kfree(c);
-+}
++/*
++ * An exception is used where an old chunk of data has been
++ * replaced by a new one.
++ */
++struct exception {
++ struct list_head hash_list;
+
-+static int linear_map(struct buffer_head *bh, int rw, void *context)
-+{
-+ struct linear_c *lc = (struct linear_c *) context;
++ chunk_t old_chunk;
++ chunk_t new_chunk;
++};
+
-+ bh->b_rdev = lc->dev->dev;
-+ bh->b_rsector = bh->b_rsector + lc->delta;
++/*
++ * Abstraction to handle persistent snapshots.
++ */
++struct exception_store {
+
-+ return 1;
-+}
++ /*
++ * Destroys this object when you've finished with it.
++ */
++ void (*destroy)(struct exception_store *store);
+
-+static struct target_type linear_target = {
-+ name: "linear",
-+ module: THIS_MODULE,
-+ ctr: linear_ctr,
-+ dtr: linear_dtr,
-+ map: linear_map,
++ /*
++ * Read the metadata and populate the snapshot.
++ */
++ int (*init)(struct exception_store *store,
++ int blocksize, unsigned long extent_size, void **context);
++
++ /*
++ * Find somewhere to store the next exception.
++ */
++ int (*prepare_exception)(struct exception_store *store,
++ struct exception *e);
++
++ /*
++ * Update the metadata with this exception.
++ */
++ int (*commit_exception)(struct exception_store *store,
++ struct exception *e);
++
++ /*
++ * The snapshot is invalid, note this in the metadata.
++ */
++ void (*drop_snapshot)(struct exception_store *store);
++
++ struct dm_snapshot *snap;
++ void *context;
+};
+
-+int __init dm_linear_init(void)
++/*
++ * Constructor and destructor for the default persistent
++ * store.
++ */
++struct exception_store *dm_create_persistent(struct dm_snapshot *s,
++ int blocksize,
++ offset_t extent_size,
++ void **error);
++
++struct exception_store *dm_create_transient(struct dm_snapshot *s,
++ int blocksize, void **error);
++
++/*
++ * Return the number of sectors in the device.
++ */
++static inline offset_t get_dev_size(kdev_t dev)
+{
-+ int r = dm_register_target(&linear_target);
++ int *sizes;
+
-+ if (r < 0)
-+ DMERR("linear: register failed %d", r);
++ sizes = blk_size[MAJOR(dev)];
++ if (sizes)
++ return sizes[MINOR(dev)] << 1;
+
-+ return r;
++ return 0;
+}
+
-+void dm_linear_exit(void)
++static inline chunk_t sector_to_chunk(struct dm_snapshot *s, offset_t sector)
+{
-+ int r = dm_unregister_target(&linear_target);
++ return (sector & ~s->chunk_mask) >> s->chunk_shift;
++}
+
-+ if (r < 0)
-+ DMERR("linear: unregister failed %d", r);
++static inline offset_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
++{
++ return chunk << s->chunk_shift;
+}
+
++#endif
diff -ruN linux-2.4.17/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c
--- linux-2.4.17/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970
+++ linux/drivers/md/dm-stripe.c Thu Jan 31 17:50:20 2002
+
diff -ruN linux-2.4.17/drivers/md/dm-table.c linux/drivers/md/dm-table.c
--- linux-2.4.17/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm-table.c Tue Jan 8 17:57:45 2002
++++ linux/drivers/md/dm-table.c Thu Feb 21 12:46:57 2002
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ /* free the device list */
+ if (t->devices.next != &t->devices) {
+ DMWARN("devices still present during destroy: "
-+ "dm_table_remove_device calls missing");
++ "dm_table_remove_device calls missing");
+
+ free_devices(&t->devices);
+ }
+EXPORT_SYMBOL(dm_unregister_target);
diff -ruN linux-2.4.17/drivers/md/dm.c linux/drivers/md/dm.c
--- linux-2.4.17/drivers/md/dm.c Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm.c Mon Feb 4 13:16:57 2002
-@@ -0,0 +1,1063 @@
++++ linux/drivers/md/dm.c Thu Mar 14 16:32:42 2002
+@@ -0,0 +1,1130 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+#define DEFAULT_READ_AHEAD 64
+
+static const char *_name = DM_NAME;
-+static const char *_version = "0.94.04-ioctl (2002-02-04)";
++static const char *_version = "0.94.08-ioctl-cvs (2002-03-14)";
+static const char *_email = "lvm-devel@lists.sistina.com";
+
+static int major = 0;
+ struct target *target;
+ int rw;
+
-+ void (*end_io)(struct buffer_head *bh, int uptodate);
++ void (*end_io) (struct buffer_head * bh, int uptodate);
+ void *context;
+};
+
+
+static devfs_handle_t _dev_dir;
+
-+static int request(request_queue_t *q, int rw, struct buffer_head *bh);
++static int request(request_queue_t * q, int rw, struct buffer_head *bh);
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
+
+/*
+ if (r)
+ goto out_linear;
+
-+ r = dm_interface_init();
++ r = kcopyd_init();
+ if (r)
+ goto out_stripe;
+
++ r = dm_snapshot_init();
++ if (r)
++ goto out_kcopyd;
++
++ r = dm_origin_init();
++ if (r)
++ goto out_snapshot;
++
++ r = dm_mirror_init();
++ if (r)
++ goto out_origin;
++
++ r = dm_interface_init();
++ if (r)
++ goto out_mirror;
++
+ return 0;
+
++ out_mirror:
++ dm_mirror_exit();
++ out_origin:
++ dm_origin_exit();
++ out_snapshot:
++ dm_snapshot_exit();
++ out_kcopyd:
++ kcopyd_exit();
+ out_stripe:
+ dm_stripe_exit();
+ out_linear:
+
+static void __exit dm_exit(void)
+{
++ dm_destroy_all();
+ dm_interface_exit();
+ dm_stripe_exit();
+ dm_linear_exit();
++ dm_snapshot_exit();
++ dm_origin_exit();
++ dm_mirror_exit();
++ kcopyd_exit();
+ dm_target_exit();
+ local_exit();
+}
+
+ case BLKGETSIZE:
+ size = VOLUME_SIZE(minor);
-+ if (copy_to_user((void *)a, &size, sizeof(long)))
++ if (copy_to_user((void *) a, &size, sizeof(long)))
+ return -EFAULT;
+ break;
+
+ case BLKGETSIZE64:
+ size = VOLUME_SIZE(minor);
-+ if (put_user((u64)size, (u64 *)a))
++ if (put_user((u64) size, (u64 *) a))
+ return -EFAULT;
+ break;
+
+ */
+static void dec_pending(struct buffer_head *bh, int uptodate)
+{
-+ struct io_hook *ih = bh->b_private;
++ struct io_hook *ih = bh->b_bdev_private;
+
+ if (!uptodate && call_err_fn(ih, bh))
+ return;
+ wake_up(&ih->md->wait);
+
+ bh->b_end_io = ih->end_io;
-+ bh->b_private = ih->context;
++ bh->b_bdev_private = ih->context;
+ free_io_hook(ih);
+
+ bh->b_end_io(bh, uptodate);
+ ih->rw = rw;
+ ih->target = ti;
+ ih->end_io = bh->b_end_io;
-+ ih->context = bh->b_private;
++ ih->context = bh->b_bdev_private;
+
+ r = fn(bh, rw, context);
+
+ /* hook the end io request fn */
+ atomic_inc(&md->pending);
+ bh->b_end_io = dec_pending;
-+ bh->b_private = ih;
++ bh->b_bdev_private = ih;
+
+ } else if (r == 0)
+ /* we don't need to hook */
+ return (KEYS_PER_NODE * n) + k;
+}
+
-+static int request(request_queue_t *q, int rw, struct buffer_head *bh)
++static int request(request_queue_t * q, int rw, struct buffer_head *bh)
+{
+ struct mapped_device *md;
+ int r, minor = MINOR(bh->b_rdev);
+ }
+ }
+
-+
-+ if ((r = __map_buffer(md, bh, rw, __find_node(md->map, bh))) < 0)
++ if ((r = __map_buffer(md, bh, rw, __find_node(md->map, bh))) < 0)
+ goto bad;
+
+ dm_put_r(minor);
+ return r;
+
+ if (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
-+ put_user(r_block, &lvb->lv_block)) return -EFAULT;
++ put_user(r_block, &lvb->lv_block))
++ return -EFAULT;
+
+ return 0;
+}
+ * Allocate and initialise a blank device. Device is returned
+ * with a write lock held.
+ */
-+static struct mapped_device *alloc_dev(int minor)
++static struct mapped_device *alloc_dev(const char *name, const char *uuid,
++ int minor)
+{
+ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+
+
+ _devs[minor] = md;
+ md->dev = MKDEV(_major, minor);
-+ md->name[0] = '\0';
+ md->suspended = 0;
+
++ strncpy(md->name, name, sizeof(md->name) - 1);
++ md->name[sizeof(md->name) - 1] = '\0';
++
++ if (*uuid) {
++ if (!(md->uuid = kmalloc(strnlen(uuid, DM_UUID_LEN),
++ GFP_KERNEL))) {
++ DMWARN("unable to allocate uuid - out of memory.");
++ return NULL;
++ }
++
++ strncpy(md->uuid, uuid, DM_UUID_LEN - 1);
++ md->uuid[DM_UUID_LEN] = '\0';
++ }
++
+ init_waitqueue_head(&md->wait);
+
+ return md;
+/*
+ * Constructor for a new device
+ */
-+int dm_create(const char *name, int minor, struct dm_table *table)
++int dm_create(const char *name, const char *uuid,
++ int minor, struct dm_table *table)
+{
+ int r;
+ struct mapped_device *md;
+ return -EINVAL;
+ }
+
-+ md = alloc_dev(minor);
++ md = alloc_dev(name, uuid, minor);
+ if (!md) {
+ spin_unlock(&_create_lock);
+ return -ENXIO;
+ }
+ minor = MINOR(md->dev);
+
-+ /* FIXME: move name allocation into alloc_dev */
-+ strcpy(md->name, name);
-+
+ r = __register_device(md);
+ if (r)
+ goto err;
+
+ err:
+ _devs[minor] = NULL;
++ if (md->uuid)
++ kfree(md->uuid);
++
+ kfree(md);
+ dm_put_w(minor);
+ spin_unlock(&_create_lock);
+}
+
+/*
-+ * Destructor for the device. You cannot destroy a suspended
++ * Destructor for the device. You cannot destroy an open
+ * device. Write lock must be held before calling.
+ */
+int dm_destroy(struct mapped_device *md)
+{
+ int minor, r;
+
-+ if (md->suspended || md->use_count)
++ if (md->use_count)
+ return -EPERM;
+
+ r = __unregister_device(md);
+ minor = MINOR(md->dev);
+ _devs[minor] = NULL;
+ __unbind(md);
++
++ if (md->uuid)
++ kfree(md->uuid);
++
+ kfree(md);
+
+ return 0;
+}
+
+/*
++ * Destroy all devices - except open ones
++ */
++void dm_destroy_all(void)
++{
++ int i;
++ struct mapped_device *md;
++
++ for (i = 0; i < MAX_DEVICES; i++) {
++ md = dm_get_w(i);
++ if (!md)
++ continue;
++
++ dm_destroy(md);
++ dm_put_w(i);
++ }
++}
++
++/*
+ * Sets or clears the read-only flag for the device. Write lock
+ * must be held.
+ */
+ if (!atomic_read(&md->pending))
+ break;
+
++ dm_put_w(minor);
+ schedule();
+
+ } while (1);
+MODULE_LICENSE("GPL");
diff -ruN linux-2.4.17/drivers/md/dm.h linux/drivers/md/dm.h
--- linux-2.4.17/drivers/md/dm.h Thu Jan 1 01:00:00 1970
-+++ linux/drivers/md/dm.h Mon Feb 4 13:01:21 2002
-@@ -0,0 +1,214 @@
++++ linux/drivers/md/dm.h Thu Mar 7 20:04:19 2002
+@@ -0,0 +1,233 @@
+/*
+ * Internal header file for device mapper
+ *
+#ifndef DM_INTERNAL_H
+#define DM_INTERNAL_H
+
-+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/major.h>
+struct mapped_device {
+ kdev_t dev;
+ char name[DM_NAME_LEN];
++ char *uuid;
+
+ int use_count;
+ int suspended;
+ */
+int split_args(int max, int *argc, char **argv, char *input);
+
-+
+/* dm.c */
+struct mapped_device *dm_get_r(int minor);
+struct mapped_device *dm_get_w(int minor);
+/*
+ * Call with no lock.
+ */
-+int dm_create(const char *name, int minor, struct dm_table *table);
++int dm_create(const char *name, const char *uuid,
++ int minor, struct dm_table *table);
+int dm_set_name(const char *oldname, const char *newname);
++void dm_destroy_all(void);
+
+/*
+ * You must have the write lock before calling the remaining md
+ */
+void dm_notify(void *target);
+
-+
+/* dm-table.c */
+int dm_table_create(struct dm_table **result);
+void dm_table_destroy(struct dm_table *t);
+int dm_table_complete(struct dm_table *t);
+
+/* kcopyd.c */
-+int dm_blockcopy(unsigned long fromsec, unsigned long tosec,
-+ unsigned long nr_sectors,
-+ kdev_t fromdev, kdev_t todev,
-+ int throttle, void (*callback)(int, void *), void *context);
-+
++typedef enum {
++ COPY_CB_COMPLETE,
++ COPY_CB_FAILED_READ,
++ COPY_CB_FAILED_WRITE,
++ COPY_CB_PROGRESS
++} copy_cb_reason_t;
++
++int dm_blockcopy(unsigned long fromsec, unsigned long tosec,
++ unsigned long nr_sectors, kdev_t fromdev, kdev_t todev,
++ int priority, int throttle,
++ void (*callback) (copy_cb_reason_t, void *, long),
++ void *context);
++int kcopyd_init(void);
++void kcopyd_exit(void);
++
++/* Snapshots */
++int dm_snapshot_init(void);
++void dm_snapshot_exit(void);
++int dm_origin_init(void);
++void dm_origin_exit(void);
++
++/* dm-mirror.c */
++int dm_mirror_init(void);
++void dm_mirror_exit(void);
+
+#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
+#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
+void dm_stripe_exit(void);
+
+#endif
+diff -ruN linux-2.4.17/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c
+--- linux-2.4.17/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/kcopyd.c Thu Mar 14 13:45:09 2002
+@@ -0,0 +1,479 @@
++/*
++ * kcopyd.c
++ *
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm.h"
++
++/* Hard sector size used all over the kernel */
++#define SECTOR_SIZE 512
++
++/* Number of entries in the free list to start with */
++#define FREE_LIST_SIZE 32
++
++/* Slab cache for work entries when the freelist runs out */
++static kmem_cache_t *entry_cachep;
++
++/* Structure of work to do in the list */
++struct copy_work
++{
++ unsigned long fromsec;
++ unsigned long tosec;
++ unsigned long nr_sectors;
++ unsigned long done_sectors;
++ kdev_t fromdev;
++ kdev_t todev;
++ int throttle;
++ int priority; /* 0=highest */
++ void (*callback)(copy_cb_reason_t, void *, long);
++ void *context;
++ int freelist; /* Whether we came from the free list */
++ struct list_head list;
++};
++
++static LIST_HEAD(work_list);
++static LIST_HEAD(free_list);
++static struct task_struct *copy_task = NULL;
++static struct rw_semaphore work_list_lock;
++static struct rw_semaphore free_list_lock;
++static DECLARE_MUTEX(start_lock);
++static DECLARE_MUTEX(run_lock);
++static DECLARE_WAIT_QUEUE_HEAD(start_waitq);
++static DECLARE_WAIT_QUEUE_HEAD(work_waitq);
++static DECLARE_WAIT_QUEUE_HEAD(freelist_waitq);
++static struct kiobuf *iobuf;
++static int thread_exit = 0;
++static long last_jiffies = 0;
++
++/* Find a free entry from the free-list or allocate a new one.
++ This routine always returns a valid pointer even if it has to wait
++ for it */
++static struct copy_work *get_work_struct(void)
++{
++ struct copy_work *entry = NULL;
++
++ while (!entry) {
++
++ down_write(&free_list_lock);
++ if (!list_empty(&free_list)) {
++ entry = list_entry(free_list.next, struct copy_work, list);
++ list_del(&entry->list);
++ }
++ up_write(&free_list_lock);
++
++ if (!entry) {
++ /* Nothing on the free-list - try to allocate one without doing IO */
++ entry = kmem_cache_alloc(entry_cachep, GFP_NOIO);
++
++ /* Make sure we know it didn't come from the free list */
++ if (entry) {
++ entry->freelist = 0;
++ }
++ }
++
++ /* Failed...wait for IO to finish */
++ if (!entry) {
++ DECLARE_WAITQUEUE(wq, current);
++
++ set_task_state(current, TASK_INTERRUPTIBLE);
++ add_wait_queue(&freelist_waitq, &wq);
++
++ if (list_empty(&free_list))
++ schedule();
++
++ set_task_state(current, TASK_RUNNING);
++ remove_wait_queue(&freelist_waitq, &wq);
++ }
++ }
++
++ return entry;
++}
++
++/* Allocate pages for a kiobuf. */
++static int alloc_iobuf_pages(struct kiobuf *iobuf, int nr_sectors)
++{
++ int nr_pages, err, i;
++
++ if (nr_sectors > KIO_MAX_SECTORS)
++ return -1;
++
++ nr_pages = nr_sectors / (PAGE_SIZE/SECTOR_SIZE);
++ err = expand_kiobuf(iobuf, nr_pages);
++ if (err) goto out;
++
++ err = -ENOMEM;
++ iobuf->locked = 1;
++ iobuf->nr_pages = 0;
++ for (i = 0; i < nr_pages; i++) {
++ struct page * page;
++
++ page = alloc_page(GFP_KERNEL);
++ if (!page) goto out;
++
++ iobuf->maplist[i] = page;
++ LockPage(page);
++ iobuf->nr_pages++;
++ }
++ iobuf->offset = 0;
++
++ err = 0;
++
++out:
++ return err;
++}
++
++
++/* Add a new entry to the work list - in priority+FIFO order.
++ The work_list_lock semaphore must be held */
++static void add_to_work_list(struct copy_work *item)
++{
++ struct list_head *entry;
++
++ list_for_each(entry, &work_list) {
++ struct copy_work *cw;
++
++ cw = list_entry(entry, struct copy_work, list);
++ if (cw->priority > item->priority) {
++ __list_add(&item->list, cw->list.prev, &cw->list);
++ return;
++ }
++ }
++ list_add_tail(&item->list, &work_list);
++}
++
++/* Read in a chunk from the source device */
++static int read_blocks(struct kiobuf *iobuf, kdev_t dev, unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(READ, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/* Write out blocks */
++static int write_blocks(struct kiobuf *iobuf, kdev_t dev, unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(WRITE, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/* This is where all the real work happens */
++static int copy_kthread(void *unused)
++{
++ daemonize();
++ down(&run_lock);
++
++ strcpy(current->comm, "kcopyd");
++ copy_task = current;
++ wake_up_interruptible(&start_waitq);
++
++ do {
++ DECLARE_WAITQUEUE(wq, current);
++ struct task_struct *tsk = current;
++
++ down_write(&work_list_lock);
++
++ while (!list_empty(&work_list)) {
++
++ struct copy_work *work_item = list_entry(work_list.next, struct copy_work, list);
++ int done_sps;
++ copy_cb_reason_t callback_reason = COPY_CB_COMPLETE;
++ int preempted = 0;
++
++ list_del(&work_item->list);
++ up_write(&work_list_lock);
++
++ while (!preempted && work_item->done_sectors < work_item->nr_sectors) {
++ long nr_sectors = min((unsigned long)KIO_MAX_SECTORS,
++ work_item->nr_sectors - work_item->done_sectors);
++
++ /* Read original blocks */
++ if (read_blocks(iobuf, work_item->fromdev, work_item->fromsec + work_item->done_sectors,
++ nr_sectors)) {
++ DMERR("Read blocks from device %s failed", kdevname(work_item->fromdev));
++
++ /* Callback error */
++ callback_reason = COPY_CB_FAILED_READ;
++ goto done_copy;
++ }
++
++ /* Write them out again */
++ if (write_blocks(iobuf, work_item->todev, work_item->tosec + work_item->done_sectors,
++ nr_sectors)) {
++ DMERR("Write blocks to %s failed", kdevname(work_item->todev));
++
++ /* Callback error */
++ callback_reason = COPY_CB_FAILED_WRITE;
++ goto done_copy;
++ }
++ work_item->done_sectors += nr_sectors;
++
++ /* If we have exceeded the throttle value (in sectors/second) then
++ sleep for a while */
++ done_sps = nr_sectors*HZ/(jiffies-last_jiffies);
++ if (work_item->throttle && done_sps > work_item->throttle && done_sps) {
++ long start_jiffies = jiffies;
++ do {
++ schedule_timeout(done_sps - work_item->throttle * HZ);
++ } while (jiffies <= start_jiffies+(done_sps - work_item->throttle * HZ));
++ }
++
++ /* Do a progress callback */
++ if (work_item->callback && work_item->done_sectors < work_item->nr_sectors)
++ work_item->callback(COPY_CB_PROGRESS, work_item->context, work_item->done_sectors);
++
++ /* Look for higher priority work */
++ down_write(&work_list_lock);
++ if (!list_empty(&work_list)) {
++ struct copy_work *peek_item = list_entry(work_list.next, struct copy_work, list);
++
++ if (peek_item->priority < work_item->priority) {
++
++ /* Put this back on the list and restart to get the new one */
++ add_to_work_list(work_item);
++ preempted = 1;
++ goto restart;
++ }
++ }
++ up_write(&work_list_lock);
++ }
++
++ done_copy:
++ /* Call the callback */
++ if (work_item->callback)
++ work_item->callback(callback_reason, work_item->context, work_item->done_sectors);
++
++ /* Add it back to the free list (if it came from there)
++ and notify anybody waiting for an entry */
++ if (work_item->freelist) {
++ down_write(&free_list_lock);
++ list_add(&work_item->list, &free_list);
++ up_write(&free_list_lock);
++ }
++ else {
++ kmem_cache_free(entry_cachep, work_item);
++ }
++ wake_up_interruptible(&freelist_waitq);
++
++ /* Get the work lock again for the top of the while loop */
++ down_write(&work_list_lock);
++ restart:
++ }
++ up_write(&work_list_lock);
++
++ /* Wait for more work */
++ set_task_state(tsk, TASK_INTERRUPTIBLE);
++ add_wait_queue(&work_waitq, &wq);
++
++ if (list_empty(&work_list))
++ schedule();
++
++ set_task_state(tsk, TASK_RUNNING);
++ remove_wait_queue(&work_waitq, &wq);
++
++ } while (thread_exit == 0);
++
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++
++ up(&run_lock);
++ return 0;
++}
++
++/* API entry point */
++int dm_blockcopy(unsigned long fromsec, unsigned long tosec, unsigned long nr_sectors,
++ kdev_t fromdev, kdev_t todev,
++ int priority, int throttle, void (*callback)(copy_cb_reason_t, void *, long), void *context)
++{
++ struct copy_work *newwork;
++ static pid_t thread_pid = 0;
++ long from_blocksize = get_hardsect_size(fromdev);
++ long to_blocksize = get_hardsect_size(todev);
++
++ /* Make sure the start sectors are on physical block boundaries */
++ if (fromsec % (from_blocksize/SECTOR_SIZE))
++ return -EINVAL;
++ if (tosec % (to_blocksize/SECTOR_SIZE))
++ return -EINVAL;
++
++ /* Start the thread if we don't have one already */
++ down(&start_lock);
++ if (copy_task == NULL) {
++ thread_pid = kernel_thread(copy_kthread, NULL, 0);
++ if (thread_pid > 0) {
++
++ DECLARE_WAITQUEUE(wq, current);
++ struct task_struct *tsk = current;
++
++ DMINFO("Started kcopyd thread");
++
++ /* Wait for it to complete it's startup initialisation */
++ set_task_state(tsk, TASK_INTERRUPTIBLE);
++ add_wait_queue(&start_waitq, &wq);
++
++ if (copy_task == NULL)
++ schedule();
++
++ set_task_state(tsk, TASK_RUNNING);
++ remove_wait_queue(&start_waitq, &wq);
++ }
++ else {
++ DMERR("Failed to start kcopyd thread");
++ up(&start_lock);
++ return -EAGAIN;
++ }
++ }
++ up(&start_lock);
++
++ /* This will wait until one is available */
++ newwork = get_work_struct();
++
++ newwork->fromsec = fromsec;
++ newwork->tosec = tosec;
++ newwork->fromdev = fromdev;
++ newwork->todev = todev;
++ newwork->nr_sectors = nr_sectors;
++ newwork->done_sectors = 0;
++ newwork->throttle = throttle;
++ newwork->priority = priority;
++ newwork->callback = callback;
++ newwork->context = context;
++
++ down_write(&work_list_lock);
++ add_to_work_list(newwork);
++ up_write(&work_list_lock);
++
++ wake_up_interruptible(&work_waitq);
++ return 0;
++}
++
++
++/* Pre-allocate some structures for the free list */
++static int allocate_free_list(void)
++{
++ int i;
++ struct copy_work *newwork;
++
++ for (i=0; i<FREE_LIST_SIZE; i++) {
++ newwork = kmalloc(sizeof(struct copy_work), GFP_KERNEL);
++ if (!newwork)
++ return i;
++ newwork->freelist = 1;
++ list_add(&newwork->list, &free_list);
++ }
++ return i;
++}
++
++int __init kcopyd_init(void)
++{
++ init_rwsem(&work_list_lock);
++ init_rwsem(&free_list_lock);
++ init_MUTEX(&start_lock);
++ init_MUTEX(&run_lock);
++
++ if (alloc_kiovec(1, &iobuf)) {
++ DMERR("Unable to allocate kiobuf for kcopyd");
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(iobuf, KIO_MAX_SECTORS)) {
++ DMERR("Unable to allocate pages for kcopyd");
++ free_kiovec(1, &iobuf);
++ return -1;
++ }
++
++ entry_cachep = kmem_cache_create("kcopyd",
++ sizeof(struct copy_work),
++ __alignof__(struct copy_work),
++ 0, NULL, NULL);
++ if (!entry_cachep) {
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++ DMERR("Unable to allocate slab cache for kcopyd");
++ return -1;
++ }
++
++ if (allocate_free_list() == 0) {
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++ kmem_cache_destroy(entry_cachep);
++ DMERR("Unable to allocate any work structures for the free list");
++ return -1;
++ }
++
++ return 0;
++}
++
++void kcopyd_exit(void)
++{
++ struct list_head *entry, *temp;
++
++ thread_exit = 1;
++ wake_up_interruptible(&work_waitq);
++
++ /* Wait for the thread to finish */
++ down(&run_lock);
++ up(&run_lock);
++
++ /* Free the free list */
++ list_for_each_safe(entry, temp, &free_list) {
++ struct copy_work *cw;
++ cw = list_entry(entry, struct copy_work, list);
++ list_del(&cw->list);
++ kfree(cw);
++ }
++
++ if (entry_cachep)
++ kmem_cache_destroy(entry_cachep);
++}
++
++EXPORT_SYMBOL(dm_blockcopy);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
diff -ruN linux-2.4.17/include/linux/device-mapper.h linux/include/linux/device-mapper.h
--- linux-2.4.17/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970
-+++ linux/include/linux/device-mapper.h Tue Jan 8 13:17:13 2002
-@@ -0,0 +1,58 @@
++++ linux/include/linux/device-mapper.h Thu Mar 7 16:56:22 2002
+@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+#define DM_DIR "device-mapper" /* Slashes not supported */
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
+
+#ifdef __KERNEL__
+
+#endif /* _LINUX_DEVICE_MAPPER_H */
diff -ruN linux-2.4.17/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h
--- linux-2.4.17/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970
-+++ linux/include/linux/dm-ioctl.h Mon Feb 4 13:16:57 2002
-@@ -0,0 +1,69 @@
++++ linux/include/linux/dm-ioctl.h Thu Mar 14 16:32:42 2002
+@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * Implements a traditional ioctl interface to the device mapper.
+ */
+
++/*
++ * All ioctl arguments consist of a single chunk of memory, with
++ * this structure at the start.
++ */
++struct dm_ioctl {
++ char version[16];
++
++ unsigned long data_size; /* total size of data passed in
++ * including this struct */
++
++ unsigned long data_start; /* offset to start of data
++ * relative to start of this struct */
++
++ char name[DM_NAME_LEN]; /* device name */
++
++ unsigned int target_count; /* in/out */
++ unsigned int open_count; /* out */
++ unsigned int flags; /* in/out */
++
++ __kernel_dev_t dev; /* in/out */
++
++ char uuid[DM_UUID_LEN]; /* unique identifier for
++ * the block device */
++};
++
++/*
++ * Used to specify tables. These structures appear after the
++ * dm_ioctl.
++ */
+struct dm_target_spec {
+ int32_t status; /* used when reading from kernel only */
+ unsigned long long sector_start;
+ */
+};
+
-+struct dm_ioctl {
-+ char version[16];
-+
-+ unsigned long data_size; /* total size of data passed in */
-+ /* including this struct */
-+
-+ unsigned long data_start; /* offset to start of data */
-+ /* relative to start of this struct */
-+
-+ char name[DM_NAME_LEN]; /* device name */
-+
-+ unsigned int target_count; /* in/out */
-+ unsigned int open_count; /* out */
-+ unsigned int flags; /* in/out */
++/*
++ * Used to retrieve the target dependencies.
++ */
++struct dm_target_deps {
++ unsigned int count;
+
-+ __kernel_dev_t dev; /* in/out */
++ __kernel_dev_t dev[0]; /* out */
+};
+
+#define DM_IOCTL 0xfd
+
-+#define DM_CREATE _IOWR(DM_IOCTL, 0x00, struct dm_ioctl)
-+#define DM_REMOVE _IOW(DM_IOCTL, 0x01, struct dm_ioctl)
-+#define DM_SUSPEND _IOW(DM_IOCTL, 0x02, struct dm_ioctl)
-+#define DM_RELOAD _IOW(DM_IOCTL, 0x03, struct dm_ioctl)
-+#define DM_INFO _IOWR(DM_IOCTL, 0x04, struct dm_ioctl)
-+#define DM_RENAME _IOW(DM_IOCTL, 0x05, struct dm_ioctl)
-+#define DM_VERSION _IOR(DM_IOCTL, 0x06, struct dm_ioctl)
++enum {
++ DM_CREATE_CMD = 0,
++ DM_REMOVE_CMD,
++ DM_SUSPEND_CMD,
++ DM_RELOAD_CMD,
++ DM_INFO_CMD,
++ DM_RENAME_CMD,
++ DM_VERSION_CMD,
++ DM_DEPS_CMD,
++ DM_REMOVE_ALL_CMD
++};
++
++#define DM_CREATE _IOWR(DM_IOCTL, DM_CREATE_CMD, struct dm_ioctl)
++#define DM_REMOVE _IOW(DM_IOCTL, DM_REMOVE_CMD, struct dm_ioctl)
++#define DM_SUSPEND _IOW(DM_IOCTL, DM_SUSPEND_CMD, struct dm_ioctl)
++#define DM_RELOAD _IOW(DM_IOCTL, DM_RELOAD_CMD, struct dm_ioctl)
++#define DM_INFO _IOWR(DM_IOCTL, DM_INFO_CMD, struct dm_ioctl)
++#define DM_RENAME _IOW(DM_IOCTL, DM_RENAME_CMD, struct dm_ioctl)
++#define DM_VERSION _IOR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
++#define DM_DEPS _IOR(DM_IOCTL, DM_DEPS_CMD, struct dm_ioctl)
++#define DM_REMOVE_ALL _IOR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+
+#define DM_IOCTL_VERSION "0.94"
-+#define DM_DRIVER_VERSION "0.94.04-ioctl (2002-02-04)"
++#define DM_DRIVER_VERSION "0.94.08-ioctl-cvs (2002-03-14)"
+
+/* Status bits */
+#define DM_READONLY_FLAG 0x00000001
+#define DM_EXISTS_FLAG 0x00000004
+#define DM_PERSISTENT_DEV_FLAG 0x00000008
+
-+#endif /* _LINUX_DM_IOCTL_H */
++#endif /* _LINUX_DM_IOCTL_H */
+diff -ruN linux-2.4.17/include/linux/fs.h linux/include/linux/fs.h
+--- linux-2.4.17/include/linux/fs.h Wed Jan 2 19:10:48 2002
++++ linux/include/linux/fs.h Thu Mar 14 16:32:56 2002
+@@ -257,7 +257,10 @@
+ char * b_data; /* pointer to data block */
+ struct page *b_page; /* the page this bh is mapped to */
+ void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+- void *b_private; /* reserved for b_end_io */
++ void *b_private; /* reserved for b_end_io, also used by ext3 */
++ void *b_bdev_private; /* a hack to get around ext3 using b_private
++ * after handing the buffer_head to the
++ * block layer */
+
+ unsigned long b_rsector; /* Real buffer location on disk */
+ wait_queue_head_t b_wait;
--- /dev/null
+diff -ruN linux-2.4.18/drivers/md/Config.in linux/drivers/md/Config.in
+--- linux-2.4.18/drivers/md/Config.in Fri Sep 14 22:22:18 2001
++++ linux/drivers/md/Config.in Thu Mar 14 16:33:11 2002
+@@ -14,5 +14,6 @@
+ dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
+
+ dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
++dep_tristate ' Device mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD
+
+ endmenu
+diff -ruN linux-2.4.18/drivers/md/Makefile linux/drivers/md/Makefile
+--- linux-2.4.18/drivers/md/Makefile Thu Dec 6 15:57:55 2001
++++ linux/drivers/md/Makefile Thu Mar 14 16:33:11 2002
+@@ -4,9 +4,12 @@
+
+ O_TARGET := mddev.o
+
+-export-objs := md.o xor.o
++export-objs := md.o xor.o dm-table.o dm-target.o kcopyd.o
+ list-multi := lvm-mod.o
+ lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o
++dm-mod-objs := dm.o dm-table.o dm-target.o dm-ioctl.o \
++ dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \
++ dm-origin.o dm-mirror.o kcopyd.o
+
+ # Note: link order is important. All raid personalities
+ # and xor.o must come before md.o, as they each initialise
+@@ -20,8 +23,12 @@
+ obj-$(CONFIG_MD_MULTIPATH) += multipath.o
+ obj-$(CONFIG_BLK_DEV_MD) += md.o
+ obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o
++obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
+
+ include $(TOPDIR)/Rules.make
+
+ lvm-mod.o: $(lvm-mod-objs)
+ $(LD) -r -o $@ $(lvm-mod-objs)
++
++dm-mod.o: $(dm-mod-objs)
++ $(LD) -r -o $@ $(dm-mod-objs)
+diff -ruN linux-2.4.18/drivers/md/device-mapper.h linux/drivers/md/device-mapper.h
+--- linux-2.4.18/drivers/md/device-mapper.h Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/device-mapper.h Thu Mar 7 16:56:22 2002
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DEVICE_MAPPER_H
++#define _LINUX_DEVICE_MAPPER_H
++
++#define DM_DIR "device-mapper" /* Slashes not supported */
++#define DM_MAX_TYPE_NAME 16
++#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
++
++#ifdef __KERNEL__
++
++struct dm_table;
++struct dm_dev;
++typedef unsigned long offset_t;
++
++
++/*
++ * Prototypes for functions for a target
++ */
++typedef int (*dm_ctr_fn)(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context);
++typedef void (*dm_dtr_fn)(struct dm_table *t, void *c);
++typedef int (*dm_map_fn)(struct buffer_head *bh, int rw, void *context);
++typedef int (*dm_err_fn)(struct buffer_head *bh, int rw, void *context);
++
++
++void dm_error(const char *message);
++
++/*
++ * Constructors should call these functions to ensure destination devices
++ * are opened/closed correctly
++ */
++int dm_table_get_device(struct dm_table *t, const char *path,
++ offset_t start, offset_t len, struct dm_dev **result);
++void dm_table_put_device(struct dm_table *table, struct dm_dev *d);
++
++/*
++ * Information about a target type
++ */
++struct target_type {
++ const char *name;
++ struct module *module;
++ dm_ctr_fn ctr;
++ dm_dtr_fn dtr;
++ dm_map_fn map;
++ dm_err_fn err;
++};
++
++int dm_register_target(struct target_type *t);
++int dm_unregister_target(struct target_type *t);
++
++#endif /* __KERNEL__ */
++
++#endif /* _LINUX_DEVICE_MAPPER_H */
+diff -ruN linux-2.4.18/drivers/md/dm-exception-store.c linux/drivers/md/dm-exception-store.c
+--- linux-2.4.18/drivers/md/dm-exception-store.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-exception-store.c Thu Mar 14 16:02:50 2002
+@@ -0,0 +1,683 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm-snapshot.h"
++
++#if 0
++/*
++ * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
++ */
++#define SNAP_MAGIC 0x70416e53
++
++/*
++ * The on-disk version of the metadata. Only applicable to
++ * persistent snapshots.
++ * There is no backward or forward compatibility implemented, snapshots
++ * with different disk versions than the kernel will not be usable. It is
++ * expected that "lvcreate" will blank out the start of the COW device
++ * before calling the snapshot constructor.
++ */
++#define SNAPSHOT_DISK_VERSION 1
++
++/*
++ * Metadata format: (please keep this up-to-date!)
++ * Persistent snapshots have a 1 block header (see below for structure) at
++ * the very start of the device. The COW metadata starts at
++ * .start_of_exceptions.
++ *
++ * COW metadata is stored in blocks that are "extent-size" sectors long as
++ * an array of disk_exception structures in Little-Endian format.
++ * The last entry in this array has rsector_new set to 0 (this cannot be a
++ * legal redirection as the header is here) and if rsector_org has a value
++ * it is the sector number of the next COW metadata sector on the disk. if
++ * rsector_org is also zero then this is the end of the COW metadata.
++ *
++ * The metadata is written in hardblocksize lumps rather than in units of
++ * extents for efficiency so don't expect a whole extent to be zeroed out
++ * at any time.
++ *
++ * Non-persistent snapshots simple have redirected blocks stored
++ * (in chunk_size sectors) from hard block 1 to avoid inadvertantly
++ * creating a bad header.
++ */
++
++/*
++ * Internal snapshot structure
++ */
++struct persistent_info {
++ /* Size of extents used for COW blocks */
++ long extent_size;
++
++ /* Number of the next free sector for COW/data */
++ unsigned long next_free_sector;
++
++ /* Where the metadata starts */
++ unsigned long start_of_exceptions;
++
++ /* Where we are currently writing the metadata */
++ unsigned long current_metadata_sector;
++
++ /* Index into disk_cow array */
++ int current_metadata_entry;
++
++ /* Index into mythical extent array */
++ int current_metadata_number;
++
++ /* Number of metadata entries in the disk_cow array */
++ int highest_metadata_entry;
++
++ /* Number of metadata entries per hard disk block */
++ int md_entries_per_block;
++
++ /* kiobuf for doing I/O to header & metadata */
++ struct kiobuf *cow_iobuf;
++
++ /*
++ * Disk extent with COW data in it. as an array of
++ * exception tables. The first one points to the next
++ * block of metadata or 0 if this is the last
++ */
++ struct disk_exception *disk_cow;
++};
++
++/*
++ * An array of these is held in each disk block. LE format
++ */
++struct disk_exception {
++ uint64_t rsector_org;
++ uint64_t rsector_new;
++};
++
++/*
++ * Structure of a (persistent) snapshot header on disk. in LE format
++ */
++struct snap_disk_header {
++ uint32_t magic;
++
++ /* Simple, incrementing version. no backward compatibility */
++ uint32_t version;
++
++ /* In 512 byte sectors */
++ uint32_t chunk_size;
++
++ /* In 512 byte sectors */
++ uint32_t extent_size;
++ uint64_t start_of_exceptions;
++ uint32_t full;
++};
++
++/*
++ * READ or WRITE some blocks to/from a device
++ */
++static int do_io(int rw, struct kiobuf *iobuf, kdev_t dev,
++ unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/*
++ * Write the latest COW metadata block.
++ */
++static int write_metadata(struct snapshot_c *s, struct persistent_info *pi)
++{
++ kdev_t dev = s->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ int writesize = blocksize/SECTOR_SIZE;
++
++ if (do_io(WRITE, pi->cow_iobuf, dev,
++ pi->current_metadata_sector, writesize) != 0) {
++ DMERR("Error writing COW block");
++ return -1;
++ }
++
++ return 0;
++}
++
++/*
++ * Allocate a kiobuf. This is the only code nicked from the old
++ * snapshot driver and I've changed it anyway.
++ */
++static int alloc_iobuf_pages(struct kiobuf *iobuf, int nr_sectors)
++{
++ int nr_pages, err, i;
++
++ if (nr_sectors > KIO_MAX_SECTORS)
++ return -1;
++
++ nr_pages = nr_sectors / (PAGE_SIZE/SECTOR_SIZE);
++ err = expand_kiobuf(iobuf, nr_pages);
++ if (err) goto out;
++
++ err = -ENOMEM;
++ iobuf->locked = 1;
++ iobuf->nr_pages = 0;
++ for (i = 0; i < nr_pages; i++) {
++ struct page * page;
++
++ page = alloc_page(GFP_KERNEL);
++ if (!page) goto out;
++
++ iobuf->maplist[i] = page;
++ LockPage(page);
++ iobuf->nr_pages++;
++ }
++ iobuf->offset = 0;
++
++ err = 0;
++
++out:
++ return err;
++}
++
++/*
++ * Read on-disk COW metadata and populate the hash table.
++ */
++static int read_metadata(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ int status;
++ int i;
++ int entry = 0;
++ int map_page = 0;
++ int nr_sectors = pi->extent_size;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ unsigned long cur_sector = pi->start_of_exceptions;
++ unsigned long last_sector;
++ unsigned long first_free_sector = 0;
++ int entries_per_page = PAGE_SIZE / sizeof(struct disk_exception);
++ struct disk_exception *cow_block;
++ struct kiobuf *read_iobuf;
++ int err = 0;
++ int devsize = get_dev_size(dev);
++
++ /*
++ * Allocate our own iovec for this operation 'cos the
++ * others are way too small.
++ */
++ if (alloc_kiovec(1, &read_iobuf)) {
++ DMERR("Error allocating iobuf for %s",
++ kdevname(dev));
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(read_iobuf, pi->extent_size)) {
++ DMERR("Error allocating iobuf space for %s",
++ kdevname(dev));
++ free_kiovec(1, &read_iobuf);
++ return -1;
++ }
++ cow_block = page_address(read_iobuf->maplist[0]);
++
++ do {
++ /* Make sure the chain does not go off the end of
++ * the device, or backwards */
++ if (cur_sector > devsize || cur_sector < first_free_sector) {
++ DMERR("COW table chain pointers are inconsistent, "
++ "can't activate snapshot");
++ err = -1;
++ goto ret_free;
++ }
++
++ first_free_sector = max(first_free_sector,
++ cur_sector + pi->extent_size);
++ status = do_io(READ, read_iobuf, dev,
++ cur_sector, nr_sectors);
++ if (status == 0) {
++
++ map_page = 0;
++ entry = 0;
++
++ cow_block = page_address(read_iobuf->maplist[0]);
++
++ /* Now populate the hash table from this data */
++ for (i = 0; i <= pi->highest_metadata_entry &&
++ cow_block[entry].rsector_new != 0; i++) {
++
++ struct exception *ex;
++
++ ex = add_exception(lc,
++ le64_to_cpu(cow_block[entry].rsector_org),
++ le64_to_cpu(cow_block[entry].rsector_new));
++
++ first_free_sector = max(first_free_sector,
++ (unsigned long)(le64_to_cpu(cow_block[entry].rsector_new) +
++ lc->chunk_size));
++
++ /* Do we need to move onto the next page? */
++ if (++entry >= entries_per_page) {
++ entry = 0;
++ cow_block = page_address(read_iobuf->maplist[++map_page]);
++ }
++ }
++ }
++ else {
++ DMERR("Error reading COW metadata for %s",
++ kdevname(dev));
++ err = -1;
++ goto ret_free;
++ }
++ last_sector = cur_sector;
++ cur_sector = le64_to_cpu(cow_block[entry].rsector_org);
++
++ } while (cur_sector != 0);
++
++ lc->persistent = 1;
++ pi->current_metadata_sector = last_sector +
++ map_page*PAGE_SIZE/SECTOR_SIZE +
++ entry/(SECTOR_SIZE/sizeof(struct disk_exception));
++ pi->current_metadata_entry = entry;
++ pi->current_metadata_number = i;
++ pi->next_free_sector = first_free_sector;
++
++ /* Copy last block into cow_iobuf */
++ memcpy(pi->disk_cow, (char *)((long)&cow_block[entry] - ((long)&cow_block[entry] & (blocksize-1))), blocksize);
++
++ ret_free:
++ unmap_kiobuf(read_iobuf);
++ free_kiovec(1, &read_iobuf);
++
++ return err;
++}
++
++/*
++ * Read the snapshot volume header, returns 0 only if it read OK
++ * and it was valid. returns 1 if no header was found, -1 on
++ * error. All fields are checked against the snapshot structure
++ * itself to make sure we don't corrupt the data.
++ */
++static int read_header(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ int status;
++ struct snap_disk_header *header;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ unsigned long devsize;
++
++ /* Get it */
++ status = do_io(READ, pi->cow_iobuf, dev, 0L, blocksize/SECTOR_SIZE);
++ if (status != 0) {
++ DMERR("Snapshot dev %s error reading header",
++ kdevname(dev));
++ return -1;
++ }
++
++ header = (struct snap_disk_header *) page_address(pi->cow_iobuf->maplist[0]);
++
++ /*
++ * Check the magic. It's OK if this fails, we just create a new snapshot header
++ * and start from scratch
++ */
++ if (le32_to_cpu(header->magic) != SNAP_MAGIC) {
++ return 1;
++ }
++
++ /* Check the version matches */
++ if (le32_to_cpu(header->version) != SNAPSHOT_DISK_VERSION) {
++ DMWARN("Snapshot dev %s version mismatch. Stored: %d, driver: %d",
++ kdevname(dev), le32_to_cpu(header->version), SNAPSHOT_DISK_VERSION);
++ return -1;
++ }
++
++ /* Check the chunk sizes match */
++ if (le32_to_cpu(header->chunk_size) != lc->chunk_size) {
++ DMWARN("Snapshot dev %s chunk size mismatch. Stored: %d, requested: %d",
++ kdevname(dev), le32_to_cpu(header->chunk_size), lc->chunk_size);
++ return -1;
++ }
++
++ /* Check the extent sizes match */
++ if (le32_to_cpu(header->extent_size) != pi->extent_size) {
++ DMWARN("Snapshot dev %s extent size mismatch. Stored: %d, requested: %ld",
++ kdevname(dev), le32_to_cpu(header->extent_size), pi->extent_size);
++ return -1;
++ }
++
++ /* Get the rest of the data */
++ pi->start_of_exceptions = le64_to_cpu(header->start_of_exceptions);
++ if (header->full) {
++ DMWARN("Snapshot dev %s is full. It cannot be used", kdevname(dev));
++ lc->full = 1;
++ return -1;
++ }
++
++ /* Validate against the size of the volume */
++ devsize = get_dev_size(dev);
++ if (pi->start_of_exceptions > devsize) {
++ DMWARN("Snapshot metadata error on %s. start exceptions > device size (%ld > %ld)",
++ kdevname(dev), pi->start_of_exceptions, devsize);
++ return -1;
++ }
++
++ /* Read metadata into the hash table and update pointers */
++ return read_metadata(lc, &lc->p_info);
++}
++
++/*
++ * Write (or update) the header. The only time we should need to
++ * do an update is when the snapshot becomes full.
++ */
++static int write_header(struct snapshot_c *lc, struct persistent_info *pi)
++{
++ struct snap_disk_header *header;
++ struct kiobuf *head_iobuf;
++ kdev_t dev = lc->cow_dev->dev;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ /*
++ * Allocate our own iobuf for this so we don't corrupt
++ * any of the other writes that may be going on.
++ */
++ if (alloc_kiovec(1, &head_iobuf)) {
++ DMERR("Error allocating iobuf for header on %s", kdevname(dev));
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(head_iobuf, PAGE_SIZE/SECTOR_SIZE)) {
++ DMERR("Error allocating iobuf space for header on %s", kdevname(dev));
++ free_kiovec(1, &head_iobuf);
++ return -1;
++ }
++
++ header = (struct snap_disk_header *) page_address(head_iobuf->maplist[0]);
++
++ header->magic = cpu_to_le32(SNAP_MAGIC);
++ header->version = cpu_to_le32(SNAPSHOT_DISK_VERSION);
++ header->chunk_size = cpu_to_le32(lc->chunk_size);
++ header->extent_size = cpu_to_le32(pi->extent_size);
++ header->full = cpu_to_le32(lc->full);
++
++ header->start_of_exceptions = cpu_to_le64(pi->start_of_exceptions);
++
++ /* Must write at least a full block */
++ status = do_io(WRITE, head_iobuf, dev, 0, blocksize/SECTOR_SIZE);
++
++ unmap_kiobuf(head_iobuf);
++ free_kiovec(1, &head_iobuf);
++ return status;
++}
++
++
++static int init_persistent_snapshot(struct snapshot_c *lc, int blocksize,
++ unsigned long extent_size, void **context)
++{
++ struct persistent_info *pi = &lc->p_info;
++
++ int status;
++ int i;
++ int cow_sectors;
++
++ pi->extent_size = extent_size;
++ pi->next_free_sector = blocksize / SECTOR_SIZE; /* Leave the first block alone */
++ pi->disk_cow = NULL;
++
++ pi->highest_metadata_entry = (pi->extent_size*SECTOR_SIZE) / sizeof(struct disk_exception) - 1;
++ pi->md_entries_per_block = blocksize / sizeof(struct disk_exception);
++
++ /* Allocate and set up iobuf for metadata I/O */
++ *context = "Unable to allocate COW iovec";
++ if (alloc_kiovec(1, &pi->cow_iobuf))
++ return -1;
++
++ /* Allocate space for the COW buffer. It should be at least PAGE_SIZE. */
++ cow_sectors = blocksize/SECTOR_SIZE + PAGE_SIZE/SECTOR_SIZE;
++ *context = "Unable to allocate COW I/O buffer space";
++ if (alloc_iobuf_pages(pi->cow_iobuf, cow_sectors)) {
++ free_kiovec(1, &pi->cow_iobuf);
++ return -1;
++ }
++
++ for (i=0; i < pi->cow_iobuf->nr_pages; i++) {
++ memset(page_address(pi->cow_iobuf->maplist[i]), 0, PAGE_SIZE);
++ }
++
++ pi->disk_cow = page_address(pi->cow_iobuf->maplist[0]);
++
++ *context = "Error in disk header";
++ /* Check for a header on disk and create a new one if not */
++ if ( (status = read_header(lc, &lc->p_info)) == 1) {
++
++ /* Write a new header */
++ pi->start_of_exceptions = pi->next_free_sector;
++ pi->next_free_sector += pi->extent_size;
++ pi->current_metadata_sector = pi->start_of_exceptions;
++ pi->current_metadata_entry = 0;
++ pi->current_metadata_number = 0;
++
++ *context = "Unable to write snapshot header";
++ if (write_header(lc, &lc->p_info) != 0) {
++ DMERR("Error writing header to snapshot volume %s",
++ kdevname(lc->cow_dev->dev));
++ goto free_ret;
++ }
++
++ /* Write a blank metadata block to the device */
++ if (write_metadata(lc, &lc->p_info) != 0) {
++ DMERR("Error writing initial COW table to snapshot volume %s",
++ kdevname(lc->cow_dev->dev));
++ goto free_ret;
++ }
++ }
++
++ /*
++ * There is a header but it doesn't match - fail so we
++ * don't destroy what might be useful data on disk. If
++ * the user really wants to use this COW device for a
++ * snapshot then the first sector should be zeroed out
++ * first.
++ */
++ if (status == -1)
++ goto free_ret;
++
++ return 0;
++
++ free_ret:
++ unmap_kiobuf(pi->cow_iobuf);
++ free_kiovec(1, &pi->cow_iobuf);
++ return -1;
++}
++
++static void exit_persistent_snapshot(struct persistent_info *pi)
++{
++ unmap_kiobuf(pi->cow_iobuf);
++ free_kiovec(1, &pi->cow_iobuf);
++}
++
++/*
++ * Finds a suitable destination for the exception.
++ */
++static int prepare_exception(struct snapshot_c *s,
++ struct inflight_exception *e)
++{
++ offset_t dev_size;
++
++ /*
++ * Check for full snapshot. Doing the size calculation here means that
++ * the COW device can be resized without us being told
++ */
++ dev_size = get_dev_size(s->cow_dev->dev);
++ if (s->p_info.next_free_sector + s->chunk_size >= dev_size) {
++ /* Snapshot is full, we can't use it */
++ DMWARN("Snapshot %s is full (sec=%ld, size=%ld)",
++ kdevname(s->cow_dev->dev),
++ s->p_info.next_free_sector + s->chunk_size, dev_size);
++ s->full = 1;
++
++ /* Mark it full on the device */
++ if (s->persistent)
++ write_header(s, &s->p_info);
++
++ return -1;
++
++ } else {
++ e->rsector_new = s->p_info.next_free_sector;
++ s->p_info.next_free_sector += s->chunk_size;
++ }
++
++ return 0;
++}
++
++/*
++ * Add a new exception entry to the on-disk metadata.
++ */
++static int commit_exception(struct snapshot_c *sc,
++ unsigned long org, unsigned long new)
++{
++ struct persistent_info *pi = &sc->p_info;
++
++ int i = pi->current_metadata_entry++;
++ unsigned long next_md_block = pi->current_metadata_sector;
++
++ pi->current_metadata_number++;
++
++ /* Update copy of disk COW */
++ pi->disk_cow[i].rsector_org = cpu_to_le64(org);
++ pi->disk_cow[i].rsector_new = cpu_to_le64(new);
++
++ /* Have we filled this extent ? */
++ if (pi->current_metadata_number >= pi->highest_metadata_entry) {
++ /* Fill in pointer to next metadata extent */
++ i++;
++ pi->current_metadata_entry++;
++
++ next_md_block = pi->next_free_sector;
++ pi->next_free_sector += pi->extent_size;
++
++ pi->disk_cow[i].rsector_org = cpu_to_le64(next_md_block);
++ pi->disk_cow[i].rsector_new = 0;
++ }
++
++ /* Commit to disk */
++ if (write_metadata(sc, &sc->p_info)) {
++ sc->full = 1; /* Failed. don't try again */
++ return -1;
++ }
++
++ /*
++ * Write a new (empty) metadata block if we are at the
++ * end of an existing block so that read_metadata finds a
++ * terminating zero entry.
++ */
++ if (pi->current_metadata_entry == pi->md_entries_per_block) {
++ memset(pi->disk_cow, 0, PAGE_SIZE);
++ pi->current_metadata_sector = next_md_block;
++
++ /*
++ * If this is also the end of an extent then go
++ * back to the start.
++ */
++ if (pi->current_metadata_number >= pi->highest_metadata_entry) {
++ pi->current_metadata_number = 0;
++
++ } else {
++ int blocksize = get_hardsect_size(sc->cow_dev->dev);
++ pi->current_metadata_sector += blocksize/SECTOR_SIZE;
++ }
++
++ pi->current_metadata_entry = 0;
++ if (write_metadata(sc, &sc->p_info) != 0) {
++ sc->full = 1;
++ return -1;
++ }
++ }
++ return 0;
++}
++
++/*
++ * Sets the full flag in the metadata. A quick hack for now.
++ */
++static void invalidate_snapshot(struct snapshot_c *s)
++{
++ s->full = 1;
++ if (s->persistent)
++ write_header(s, &s->p_info);
++}
++
++
++#endif
++
++
++struct exception_store * dm_create_persistent(struct dm_snapshot *s,
++ int blocksize,
++ offset_t extent_size,
++ void **error)
++{
++ return NULL;
++}
++
++
++/*
++ * Implementation of the store for non-persistent snapshots.
++ */
++struct transient_c {
++ offset_t next_free;
++};
++
++void destroy_transient(struct exception_store *store)
++{
++ kfree(store->context);
++ kfree(store);
++}
++
++int prepare_transient(struct exception_store *store, struct exception *e)
++{
++ struct transient_c *tc = (struct transient_c *) store->context;
++ offset_t size = get_dev_size(store->snap->cow->dev);
++
++ if (size < (tc->next_free + store->snap->chunk_size))
++ return -1;
++
++ e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
++ tc->next_free += store->snap->chunk_size;
++ return 0;
++}
++
++struct exception_store *dm_create_transient(struct dm_snapshot *s,
++ int blocksize, void **error)
++{
++ struct exception_store *store;
++ struct transient_c *tc;
++
++ store = kmalloc(sizeof(*store), GFP_KERNEL);
++ if (!store) {
++ DMWARN("out of memory.");
++ return NULL;
++ }
++
++ memset(store, 0, sizeof(*store));
++ store->destroy = destroy_transient;
++ store->prepare_exception = prepare_transient;
++ store->snap = s;
++
++ tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
++ if (!tc) {
++ kfree(store);
++ return NULL;
++ }
++
++ tc->next_free = 0;
++ store->context = tc;
++
++ return store;
++}
++
+diff -ruN linux-2.4.18/drivers/md/dm-ioctl.c linux/drivers/md/dm-ioctl.c
+--- linux-2.4.18/drivers/md/dm-ioctl.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-ioctl.c Thu Mar 14 15:59:31 2002
+@@ -0,0 +1,557 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/miscdevice.h>
++#include <linux/dm-ioctl.h>
++#include <linux/init.h>
++
++static void free_params(struct dm_ioctl *p)
++{
++ vfree(p);
++}
++
++static int version(struct dm_ioctl *user)
++{
++ return copy_to_user(user, DM_DRIVER_VERSION, sizeof(DM_DRIVER_VERSION));
++}
++
++static int copy_params(struct dm_ioctl *user, struct dm_ioctl **result)
++{
++ struct dm_ioctl tmp, *dmi;
++
++ if (copy_from_user(&tmp, user, sizeof(tmp)))
++ return -EFAULT;
++
++ if (strcmp(DM_IOCTL_VERSION, tmp.version)) {
++ DMWARN("dm_ctl_ioctl: struct dm_ioctl version incompatible");
++ return -EINVAL;
++ }
++
++ if (tmp.data_size < sizeof(tmp))
++ return -EINVAL;
++
++ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
++ if (!dmi)
++ return -ENOMEM;
++
++ if (copy_from_user(dmi, user, tmp.data_size)) {
++ vfree(dmi);
++ return -EFAULT;
++ }
++
++ *result = dmi;
++ return 0;
++}
++
++/*
++ * Check a string doesn't overrun the chunk of
++ * memory we copied from userland.
++ */
++static int valid_str(char *str, void *begin, void *end)
++{
++ while (((void *) str >= begin) && ((void *) str < end))
++ if (!*str++)
++ return 0;
++
++ return -EINVAL;
++}
++
++static int next_target(struct dm_target_spec *last, unsigned long next,
++ void *begin, void *end,
++ struct dm_target_spec **spec, char **params)
++{
++ *spec = (struct dm_target_spec *)
++ ((unsigned char *) last + next);
++ *params = (char *) (*spec + 1);
++
++ if (*spec < (last + 1) || ((void *) *spec > end))
++ return -EINVAL;
++
++ return valid_str(*params, begin, end);
++}
++
++/*
++ * Checks to see if there's a gap in the table.
++ * Returns true iff there is a gap.
++ */
++static int gap(struct dm_table *table, struct dm_target_spec *spec)
++{
++ if (!table->num_targets)
++ return (spec->sector_start > 0) ? 1 : 0;
++
++ if (spec->sector_start != table->highs[table->num_targets - 1] + 1)
++ return 1;
++
++ return 0;
++}
++
++static int populate_table(struct dm_table *table, struct dm_ioctl *args)
++{
++ int i = 0, r, first = 1, argc;
++ struct dm_target_spec *spec;
++ char *params, *argv[MAX_ARGS];
++ struct target_type *ttype;
++ void *context, *begin, *end;
++ offset_t highs = 0;
++
++ if (!args->target_count) {
++ DMWARN("populate_table: no targets specified");
++ return -EINVAL;
++ }
++
++ begin = (void *) args;
++ end = begin + args->data_size;
++
++#define PARSE_ERROR(msg) {DMWARN(msg); return -EINVAL;}
++
++ for (i = 0; i < args->target_count; i++) {
++
++ r = first ? next_target((struct dm_target_spec *) args,
++ args->data_start,
++ begin, end, &spec, ¶ms) :
++ next_target(spec, spec->next, begin, end, &spec, ¶ms);
++
++ if (r)
++ PARSE_ERROR("unable to find target");
++
++ /* Look up the target type */
++ ttype = dm_get_target_type(spec->target_type);
++ if (!ttype)
++ PARSE_ERROR("unable to find target type");
++
++ if (gap(table, spec))
++ PARSE_ERROR("gap in target ranges");
++
++ /* Split up the parameter list */
++ if (split_args(MAX_ARGS, &argc, argv, params) < 0)
++ PARSE_ERROR("Too many arguments");
++
++ /* Build the target */
++ if (ttype->ctr(table, spec->sector_start, spec->length,
++ argc, argv, &context)) {
++ DMWARN("%s: target constructor failed",
++ (char *) context);
++ return -EINVAL;
++ }
++
++ /* Add the target to the table */
++ highs = spec->sector_start + (spec->length - 1);
++ if (dm_table_add_target(table, highs, ttype, context))
++ PARSE_ERROR("internal error adding target to table");
++
++ first = 0;
++ }
++
++#undef PARSE_ERROR
++
++ r = dm_table_complete(table);
++ return r;
++}
++
++/*
++ * Round up the ptr to the next 'align' boundary. Obviously
++ * 'align' must be a power of 2.
++ */
++static inline void *align_ptr(void *ptr, unsigned int align)
++{
++ align--;
++ return (void *) (((unsigned long) (ptr + align)) & ~align);
++}
++
++/*
++ * Copies a dm_ioctl and an optional additional payload to
++ * userland.
++ */
++static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param,
++ void *data, unsigned long len)
++{
++ int r;
++ void *ptr = NULL;
++
++ strncpy(param->version, DM_IOCTL_VERSION, sizeof(param->version));
++
++ if (data) {
++ ptr = align_ptr(user + 1, sizeof(unsigned long));
++ param->data_start = ptr - (void *) user;
++ }
++
++ r = copy_to_user(user, param, sizeof(*param));
++ if (r)
++ return r;
++
++ if (data) {
++ if (param->data_start + len > param->data_size)
++ return -ENOSPC;
++ r = copy_to_user(ptr, data, len);
++ }
++
++ return r;
++}
++
++/*
++ * Fills in a dm_ioctl structure, ready for sending back to
++ * userland.
++ */
++static void __info(struct mapped_device *md, struct dm_ioctl *param)
++{
++ param->flags = DM_EXISTS_FLAG;
++ if (md->suspended)
++ param->flags |= DM_SUSPEND_FLAG;
++ if (md->read_only)
++ param->flags |= DM_READONLY_FLAG;
++
++ strncpy(param->name, md->name, sizeof(param->name));
++
++ if (md->uuid)
++ strncpy(param->uuid, md->uuid, sizeof(param->uuid));
++ else
++ param->uuid[0] = '\0';
++
++ param->open_count = md->use_count;
++ param->dev = kdev_t_to_nr(md->dev);
++ param->target_count = md->map->num_targets;
++}
++
++/*
++ * Copies device info back to user space, used by
++ * the create and info ioctls.
++ */
++static int info(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int minor;
++ struct mapped_device *md;
++
++ param->flags = 0;
++
++ md = dm_get_name_r(param->name);
++ if (!md)
++ /*
++ * Device not found - returns cleared exists flag.
++ */
++ goto out;
++
++ minor = MINOR(md->dev);
++ __info(md, param);
++ dm_put_r(minor);
++
++ out:
++ return results_to_user(user, param, NULL, 0);
++}
++
++/*
++ * Retrieves a list of devices used by a particular dm device.
++ */
++static int dep(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int minor, count, r;
++ struct mapped_device *md;
++ struct list_head *tmp;
++ size_t len = 0;
++ struct dm_target_deps *deps = NULL;
++
++ md = dm_get_name_r(param->name);
++ if (!md)
++ goto out;
++ minor = MINOR(md->dev);
++
++ /*
++ * Setup the basic dm_ioctl structure.
++ */
++ __info(md, param);
++
++ /*
++ * Count the devices.
++ */
++ count = 0;
++ list_for_each(tmp, &md->map->devices)
++ count++;
++
++ /*
++ * Allocate a kernel space version of the dm_target_status
++ * struct.
++ */
++ len = sizeof(*deps) + (sizeof(*deps->dev) * count);
++ deps = kmalloc(len, GFP_KERNEL);
++ if (!deps) {
++ dm_put_r(minor);
++ return -ENOMEM;
++ }
++
++ /*
++ * Fill in the devices.
++ */
++ deps->count = count;
++ count = 0;
++ list_for_each(tmp, &md->map->devices) {
++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++ deps->dev[count++] = kdev_t_to_nr(dd->dev);
++ }
++ dm_put_r(minor);
++
++ out:
++ r = results_to_user(user, param, deps, len);
++
++ kfree(deps);
++ return r;
++}
++
++static int create(struct dm_ioctl *param, struct dm_ioctl *user)
++{
++ int r;
++ struct mapped_device *md;
++ struct dm_table *t;
++ int minor;
++
++ r = dm_table_create(&t);
++ if (r)
++ return r;
++
++ r = populate_table(t, param);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ minor = (param->flags & DM_PERSISTENT_DEV_FLAG) ?
++ MINOR(to_kdev_t(param->dev)) : -1;
++
++ r = dm_create(param->name, param->uuid, minor, t);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ /* shouldn't get here */
++ return -EINVAL;
++
++ minor = MINOR(md->dev);
++ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
++ dm_put_w(minor);
++
++ r = info(param, user);
++ return r;
++}
++
++static int remove(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ return -ENXIO;
++
++ minor = MINOR(md->dev);
++ r = dm_destroy(md);
++ dm_put_w(minor);
++
++ return r;
++}
++
++static int suspend(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++
++ md = dm_get_name_w(param->name);
++ if (!md)
++ return -ENXIO;
++
++ minor = MINOR(md->dev);
++ r = (param->flags & DM_SUSPEND_FLAG) ? dm_suspend(md) : dm_resume(md);
++ dm_put_w(minor);
++
++ return r;
++}
++
++static int reload(struct dm_ioctl *param)
++{
++ int r, minor;
++ struct mapped_device *md;
++ struct dm_table *t;
++
++ r = dm_table_create(&t);
++ if (r)
++ return r;
++
++ r = populate_table(t, param);
++ if (r) {
++ dm_table_destroy(t);
++ return r;
++ }
++
++ md = dm_get_name_w(param->name);
++ if (!md) {
++ dm_table_destroy(t);
++ return -ENXIO;
++ }
++
++ minor = MINOR(md->dev);
++
++ r = dm_swap_table(md, t);
++ if (r) {
++ dm_put_w(minor);
++ dm_table_destroy(t);
++ return r;
++ }
++
++ dm_set_ro(md, (param->flags & DM_READONLY_FLAG) ? 1 : 0);
++ dm_put_w(minor);
++ return 0;
++}
++
++static int rename(struct dm_ioctl *param)
++{
++ char *newname = (char *) param + param->data_start;
++
++ if (valid_str(newname, (void *) param,
++ (void *) param + param->data_size) ||
++ dm_set_name(param->name, newname)) {
++ DMWARN("Invalid new logical volume name supplied.");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static int ctl_open(struct inode *inode, struct file *file)
++{
++ /* only root can open this */
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
++
++ MOD_INC_USE_COUNT;
++
++ return 0;
++}
++
++static int ctl_close(struct inode *inode, struct file *file)
++{
++ MOD_DEC_USE_COUNT;
++ return 0;
++}
++
++static int ctl_ioctl(struct inode *inode, struct file *file,
++ uint command, ulong a)
++{
++ int r;
++ struct dm_ioctl *p;
++ uint cmd = _IOC_NR(command);
++
++ switch (cmd) {
++ case DM_REMOVE_ALL_CMD:
++ dm_destroy_all();
++ case DM_VERSION_CMD:
++ return version((struct dm_ioctl *) a);
++ default:
++ break;
++ }
++
++ r = copy_params((struct dm_ioctl *) a, &p);
++ if (r)
++ return r;
++
++ /* FIXME: Change to use size 0 next time ioctl version gets changed */
++ switch (cmd) {
++ case DM_CREATE_CMD:
++ r = create(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_REMOVE_CMD:
++ r = remove(p);
++ break;
++
++ case DM_SUSPEND_CMD:
++ r = suspend(p);
++ break;
++
++ case DM_RELOAD_CMD:
++ r = reload(p);
++ break;
++
++ case DM_INFO_CMD:
++ r = info(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_DEPS_CMD:
++ r = dep(p, (struct dm_ioctl *) a);
++ break;
++
++ case DM_RENAME_CMD:
++ r = rename(p);
++ break;
++
++ default:
++ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
++ r = -EINVAL;
++ }
++
++ free_params(p);
++ return r;
++}
++
++static struct file_operations _ctl_fops = {
++ open: ctl_open,
++ release: ctl_close,
++ ioctl: ctl_ioctl,
++ owner: THIS_MODULE,
++};
++
++static devfs_handle_t _ctl_handle;
++
++static struct miscdevice _dm_misc = {
++ minor: MISC_DYNAMIC_MINOR,
++ name: DM_NAME,
++ fops: &_ctl_fops
++};
++
++/* Create misc character device and link to DM_DIR/control */
++int __init dm_interface_init(void)
++{
++ int r;
++ char rname[64];
++
++ r = misc_register(&_dm_misc);
++ if (r) {
++ DMERR("misc_register failed for control device");
++ return r;
++ }
++
++ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
++ sizeof rname - 3);
++ if (r == -ENOSYS)
++ return 0; /* devfs not present */
++
++ if (r < 0) {
++ DMERR("devfs_generate_path failed for control device");
++ goto failed;
++ }
++
++ strncpy(rname + r, "../", 3);
++ r = devfs_mk_symlink(NULL, DM_DIR "/control",
++ DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
++ if (r) {
++ DMERR("devfs_mk_symlink failed for control device");
++ goto failed;
++ }
++ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
++
++ return 0;
++
++ failed:
++ misc_deregister(&_dm_misc);
++ return r;
++}
++
++void __exit dm_interface_exit(void)
++{
++ if (misc_deregister(&_dm_misc) < 0)
++ DMERR("misc_deregister failed for control device");
++}
+diff -ruN linux-2.4.18/drivers/md/dm-linear.c linux/drivers/md/dm-linear.c
+--- linux-2.4.18/drivers/md/dm-linear.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-linear.c Tue Jan 15 19:53:55 2002
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++
++/*
++ * Linear: maps a linear range of a device.
++ */
++struct linear_c {
++ long delta; /* FIXME: we need a signed offset type */
++ struct dm_dev *dev;
++};
++
++/*
++ * Construct a linear mapping: <dev_path> <offset>
++ */
++static int linear_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ struct linear_c *lc;
++ unsigned long start; /* FIXME: unsigned long long */
++ char *end;
++
++ if (argc != 2) {
++ *context = "dm-linear: Not enough arguments";
++ return -EINVAL;
++ }
++
++ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++ if (lc == NULL) {
++ *context = "dm-linear: Cannot allocate linear context";
++ return -ENOMEM;
++ }
++
++ start = simple_strtoul(argv[1], &end, 10);
++ if (*end) {
++ *context = "dm-linear: Invalid device sector";
++ goto bad;
++ }
++
++ if (dm_table_get_device(t, argv[0], start, l, &lc->dev)) {
++ *context = "dm-linear: Device lookup failed";
++ goto bad;
++ }
++
++ lc->delta = (int) start - (int) b;
++ *context = lc;
++ return 0;
++
++ bad:
++ kfree(lc);
++ return -EINVAL;
++}
++
++static void linear_dtr(struct dm_table *t, void *c)
++{
++ struct linear_c *lc = (struct linear_c *) c;
++
++ dm_table_put_device(t, lc->dev);
++ kfree(c);
++}
++
++static int linear_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct linear_c *lc = (struct linear_c *) context;
++
++ bh->b_rdev = lc->dev->dev;
++ bh->b_rsector = bh->b_rsector + lc->delta;
++
++ return 1;
++}
++
++static struct target_type linear_target = {
++ name: "linear",
++ module: THIS_MODULE,
++ ctr: linear_ctr,
++ dtr: linear_dtr,
++ map: linear_map,
++};
++
++int __init dm_linear_init(void)
++{
++ int r = dm_register_target(&linear_target);
++
++ if (r < 0)
++ DMERR("linear: register failed %d", r);
++
++ return r;
++}
++
++void dm_linear_exit(void)
++{
++ int r = dm_unregister_target(&linear_target);
++
++ if (r < 0)
++ DMERR("linear: unregister failed %d", r);
++}
++
+diff -ruN linux-2.4.18/drivers/md/dm-mirror.c linux/drivers/md/dm-mirror.c
+--- linux-2.4.18/drivers/md/dm-mirror.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-mirror.c Thu Mar 14 15:53:19 2002
+@@ -0,0 +1,295 @@
++/*
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++
++/* kcopyd priority of mirror operations */
++#define MIRROR_COPY_PRIORITY 5
++
++static kmem_cache_t *bh_cachep;
++
++/*
++ * Mirror: maps a mirror range of a device.
++ */
++struct mirror_c {
++ struct dm_dev *fromdev;
++ struct dm_dev *todev;
++
++ unsigned long from_delta;
++ unsigned long to_delta;
++
++ unsigned long frompos;
++ unsigned long topos;
++
++ unsigned long got_to;
++ struct rw_semaphore lock;
++ struct buffer_head *bhstring;
++ int error;
++};
++
++
++/* Called when a duplicating I/O has finished */
++static void mirror_end_io(struct buffer_head *bh, int uptodate)
++{
++ struct mirror_c *lc = (struct mirror_c *) bh->b_private;
++
++ /* Flag error if it failed */
++ if (!uptodate) {
++ DMERR("Mirror copy to %s failed", kdevname(lc->todev->dev));
++ lc->error = 1;
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++ kmem_cache_free(bh_cachep, bh);
++}
++
++static void mirror_bh(struct mirror_c *mc, struct buffer_head *bh)
++{
++ struct buffer_head *dbh = kmem_cache_alloc(bh_cachep, GFP_NOIO);
++ if (dbh) {
++ *dbh = *bh;
++ dbh->b_rdev = mc->todev->dev;
++ dbh->b_rsector = bh->b_rsector - mc->from_delta
++ + mc->to_delta;
++ dbh->b_end_io = mirror_end_io;
++ dbh->b_private = mc;
++
++ generic_make_request(WRITE, dbh);
++ } else {
++ DMERR("kmem_cache_alloc failed for mirror bh");
++ mc->error = 1;
++ }
++}
++
++/* Called when the copy I/O has finished */
++static void copy_callback(copy_cb_reason_t reason, void *context, long arg)
++{
++ struct mirror_c *lc = (struct mirror_c *) context;
++ struct buffer_head *bh;
++
++ if (reason == COPY_CB_FAILED_READ || reason == COPY_CB_FAILED_WRITE) {
++ DMERR("Mirror block %s on %s failed, sector %ld",
++ reason == COPY_CB_FAILED_READ ? "read" : "write",
++ reason == COPY_CB_FAILED_READ ?
++ kdevname(lc->fromdev->dev) :
++ kdevname(lc->todev->dev), arg);
++ lc->error = 1;
++ return;
++ }
++
++ if (reason == COPY_CB_COMPLETE) {
++ /* Say we've finished */
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++
++ if (reason == COPY_CB_PROGRESS) {
++ dm_notify(lc); /* TODO: interface ?? */
++ }
++
++ /* Submit, and mirror any pending BHs */
++ down_write(&lc->lock);
++ lc->got_to = arg;
++
++ bh = lc->bhstring;
++ lc->bhstring = NULL;
++ up_write(&lc->lock);
++
++ while (bh) {
++ struct buffer_head *nextbh = bh->b_reqnext;
++ bh->b_reqnext = NULL;
++ generic_make_request(WRITE, bh);
++ mirror_bh(lc, bh);
++ bh = nextbh;
++ }
++}
++
++/*
++ * Construct a mirror mapping: <dev_path1> <offset> <dev_path2> <offset> <throttle> [<priority>]
++ */
++static int mirror_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ struct mirror_c *lc;
++ unsigned long offset1, offset2;
++ char *value;
++ int priority = MIRROR_COPY_PRIORITY;
++ int throttle;
++
++ if (argc <= 4) {
++ *context = "dm-mirror: Not enough arguments";
++ return -EINVAL;
++ }
++
++ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++ if (lc == NULL) {
++ *context = "dm-mirror: Cannot allocate mirror context";
++ return -ENOMEM;
++ }
++
++ if (dm_table_get_device(t, argv[0], 0, l, &lc->fromdev)) {
++ *context = "dm-mirror: Device lookup failed";
++ goto bad;
++ }
++
++ offset1 = simple_strtoul(argv[1], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid offset for dev1";
++ dm_table_put_device(t, lc->fromdev);
++ goto bad;
++ }
++
++ if (dm_table_get_device(t, argv[2], 0, l, &lc->todev)) {
++ *context = "dm-mirror: Device lookup failed";
++ dm_table_put_device(t, lc->fromdev);
++ goto bad;
++ }
++
++ offset2 = simple_strtoul(argv[3], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid offset for dev2";
++ goto bad_put;
++ }
++
++ throttle = simple_strtoul(argv[4], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid throttle value";
++ goto bad_put;
++ }
++
++ if (argc > 5) {
++ priority = simple_strtoul(argv[5], &value, 10);
++ if (value == NULL) {
++ *context = "Invalid priority value";
++ goto bad_put;
++ }
++ }
++
++ lc->from_delta = (int) offset1 - (int) b;
++ lc->to_delta = (int) offset2 - (int) b;
++ lc->frompos = offset1;
++ lc->topos = offset2;
++ lc->error = 0;
++ lc->bhstring = NULL;
++ init_rwsem(&lc->lock);
++ *context = lc;
++
++ /* Tell kcopyd to do the biz */
++ if (dm_blockcopy(offset1, offset2,
++ l - offset1,
++ lc->fromdev->dev, lc->todev->dev,
++ priority, 0, copy_callback, lc)) {
++ DMERR("block copy call failed");
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ goto bad;
++ }
++ return 0;
++
++ bad_put:
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ bad:
++ kfree(lc);
++ return -EINVAL;
++}
++
++static void mirror_dtr(struct dm_table *t, void *c)
++{
++ struct mirror_c *lc = (struct mirror_c *) c;
++
++ dm_table_put_device(t, lc->fromdev);
++ dm_table_put_device(t, lc->todev);
++ kfree(c);
++}
++
++static int mirror_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct mirror_c *lc = (struct mirror_c *) context;
++
++ bh->b_rdev = lc->fromdev->dev;
++ bh->b_rsector = bh->b_rsector + lc->from_delta;
++
++ if (rw == WRITE) {
++ down_write(&lc->lock);
++
++ /*
++ * If this area is in flight then save it until it's
++ * commited to the mirror disk and then submit it and
++ * its mirror.
++ */
++ if (bh->b_rsector > lc->got_to &&
++ bh->b_rsector <= lc->got_to + KIO_MAX_SECTORS) {
++ bh->b_reqnext = lc->bhstring;
++ lc->bhstring = bh;
++ up_write(&lc->lock);
++ return 0;
++ }
++
++ /*
++ * If we've already copied this block then duplicate
++ * it to the mirror device
++ */
++ if (bh->b_rsector < lc->got_to) {
++ /* Schedule copy of I/O to other target */
++ mirror_bh(lc, bh);
++ }
++ up_write(&lc->lock);
++ }
++ return 1;
++}
++
++static struct target_type mirror_target = {
++ name: "mirror",
++ module: THIS_MODULE,
++ ctr: mirror_ctr,
++ dtr: mirror_dtr,
++ map: mirror_map,
++};
++
++int __init dm_mirror_init(void)
++{
++ int r;
++
++ bh_cachep = kmem_cache_create("dm-mirror",
++ sizeof(struct buffer_head),
++ __alignof__(struct buffer_head),
++ 0, NULL, NULL);
++ if (!bh_cachep) {
++ return -1;
++ }
++
++
++ r = dm_register_target(&mirror_target);
++ if (r < 0) {
++ DMERR("mirror: register failed %d", r);
++ kmem_cache_destroy(bh_cachep);
++ }
++ return r;
++}
++
++void dm_mirror_exit(void)
++{
++ int r = dm_unregister_target(&mirror_target);
++
++ if (r < 0)
++ DMERR("mirror: unregister failed %d", r);
++
++ kmem_cache_destroy(bh_cachep);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.18/drivers/md/dm-origin.c linux/drivers/md/dm-origin.c
+--- linux-2.4.18/drivers/md/dm-origin.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-origin.c Wed Mar 13 17:28:40 2002
+@@ -0,0 +1,105 @@
++/*
++ * dm-origin.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm.h"
++
++/*
++ * Origin: maps a linear range of a device, with hooks for snapshotting.
++ */
++
++/*
++ * Construct an origin mapping: <dev_path>
++ * The context for an origin is merely a 'struct dm_dev *'
++ * pointing to the real device.
++ */
++static int origin_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ int r;
++ struct dm_dev *dev;
++
++ if (argc != 1) {
++ *context = "dm-origin: incorrect number of arguments";
++ return -EINVAL;
++ }
++
++ r = dm_table_get_device(t, argv[0], 0, l, &dev);
++ if (r) {
++ *context = "Cannot get target device";
++ return r;
++ }
++
++ *context = dev;
++
++ return 0;
++}
++
++static void origin_dtr(struct dm_table *t, void *c)
++{
++ struct dm_dev *dev = (struct dm_dev *) c;
++
++ dm_table_put_device(t, dev);
++}
++
++static int origin_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct dm_dev *dev = (struct dm_dev *) context;
++
++ bh->b_rdev = dev->dev;
++
++ /* Only tell snapshots if this is a write */
++ return (rw == WRITE) ? dm_do_snapshot(dev, bh) : 1;
++}
++
++static struct target_type origin_target = {
++ name: "snapshot-origin",
++ module: THIS_MODULE,
++ ctr: origin_ctr,
++ dtr: origin_dtr,
++ map: origin_map,
++ err: NULL
++};
++
++int __init dm_origin_init(void)
++{
++ int r = dm_register_target(&origin_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Origin: register failed %d\n", r);
++
++ return r;
++}
++
++void dm_origin_exit(void)
++{
++ int r = dm_unregister_target(&origin_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Origin: unregister failed %d\n", r);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.18/drivers/md/dm-snapshot.c linux/drivers/md/dm-snapshot.c
+--- linux-2.4.18/drivers/md/dm-snapshot.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.c Thu Mar 14 16:08:52 2002
+@@ -0,0 +1,862 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm-snapshot.h"
++
++/*
++ * Hard sector size used all over the kernel
++ */
++#define SECTOR_SIZE 512
++
++/*
++ * kcopyd priority of snapshot operations
++ */
++#define SNAPSHOT_COPY_PRIORITY 2
++
++struct pending_exception {
++ struct exception e;
++
++ /* Chain of WRITE buffer heads to submit when this COW has completed */
++ struct buffer_head *bh;
++
++ /* Pointer back to snapshot context */
++ struct dm_snapshot *snap;
++};
++
++/*
++ * Hash table mapping origin volumes to lists of snapshots and
++ * a lock to protect it
++ */
++static kmem_cache_t *exception_cachep;
++static kmem_cache_t *pending_cachep;
++
++/*
++ * One of these per registered origin, held in the snapshot_origins hash
++ */
++struct origin {
++ /* The origin device */
++ kdev_t dev;
++
++ struct list_head hash_list;
++
++ /* List of snapshots for this origin */
++ struct list_head snapshots;
++};
++
++/*
++ * Size of the hash table for origin volumes. If we make this
++ * the size of the minors list then it should be nearly perfect
++ */
++#define ORIGIN_HASH_SIZE 256
++#define ORIGIN_MASK 0xFF
++static struct list_head *_origins;
++static struct rw_semaphore _origins_lock;
++
++static int init_origin_hash(void)
++{
++ int i;
++
++ _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
++ GFP_KERNEL);
++ if (!_origins) {
++ DMERR("Device mapper: Snapshot: unable to allocate memory");
++ return -ENOMEM;
++ }
++
++ for (i = 0; i < ORIGIN_HASH_SIZE; i++)
++ INIT_LIST_HEAD(_origins + i);
++ init_rwsem(&_origins_lock);
++
++ return 0;
++}
++
++static void exit_origin_hash(void)
++{
++ kfree(_origins);
++}
++
++static inline unsigned int origin_hash(kdev_t dev)
++{
++ return MINOR(dev) & ORIGIN_MASK;
++}
++
++static struct origin *__lookup_origin(kdev_t origin)
++{
++ struct list_head *slist;
++ struct list_head *ol;
++ struct origin *o;
++
++ ol = &_origins[origin_hash(origin)];
++ list_for_each(slist, ol) {
++ o = list_entry(slist, struct origin, hash_list);
++
++ if (o->dev == origin)
++ return o;
++ }
++
++ return NULL;
++}
++
++static void __insert_origin(struct origin *o)
++{
++ struct list_head *sl = &_origins[origin_hash(o->dev)];
++ list_add_tail(&o->hash_list, sl);
++}
++
++/*
++ * Make a note of the snapshot and its origin so we can look it
++ * up when the origin has a write on it.
++ */
++static int register_snapshot(struct dm_snapshot *snap)
++{
++ struct origin *o;
++ kdev_t dev = snap->origin->dev;
++
++ down_write(&_origins_lock);
++ o = __lookup_origin(dev);
++
++ if (!o) {
++ /* New origin */
++ o = kmalloc(sizeof(*o), GFP_KERNEL);
++ if (!o) {
++ up_write(&_origins_lock);
++ return -ENOMEM;
++ }
++
++ /* Initialise the struct */
++ INIT_LIST_HEAD(&o->snapshots);
++ o->dev = dev;
++
++ __insert_origin(o);
++ }
++
++ list_add_tail(&snap->list, &o->snapshots);
++
++ up_write(&_origins_lock);
++ return 0;
++}
++
++static void unregister_snapshot(struct dm_snapshot *s)
++{
++ struct origin *o;
++
++ down_write(&_origins_lock);
++ o = __lookup_origin(s->origin->dev);
++
++ list_del(&s->list);
++ if (list_empty(&o->snapshots)) {
++ list_del(&o->hash_list);
++ kfree(o);
++ }
++
++ up_write(&_origins_lock);
++}
++
++/*
++ * Implementation of the exception hash tables.
++ */
++static int init_exception_table(struct exception_table *et, uint32_t size)
++{
++ int i;
++
++ et->hash_mask = size - 1;
++ et->table = vmalloc(sizeof(struct list_head) * (size));
++ if (!et->table)
++ return -ENOMEM;
++
++ for (i = 0; i < size; i++)
++ INIT_LIST_HEAD(et->table + i);
++
++ return 0;
++}
++
++static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
++{
++ struct list_head *slot, *entry, *temp;
++ struct exception *ex;
++ int i, size;
++
++ size = et->hash_mask + 1;
++ for (i = 0; i < size; i++) {
++ slot = et->table + i;
++
++ list_for_each_safe(entry, temp, slot) {
++ ex = list_entry(entry, struct exception, hash_list);
++ kmem_cache_free(mem, ex);
++ }
++ }
++
++ vfree(et->table);
++}
++
++/*
++ * FIXME: check how this hash fn is performing.
++ */
++static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
++{
++ return chunk & et->hash_mask;
++}
++
++static void insert_exception(struct exception_table *eh, struct exception *e)
++{
++ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
++ list_add(&e->hash_list, l);
++}
++
++static inline void remove_exception(struct exception *e)
++{
++ list_del(&e->hash_list);
++}
++
++/*
++ * Return the exception data for a sector, or NULL if not
++ * remapped.
++ */
++static struct exception *lookup_exception(struct exception_table *et,
++ chunk_t chunk)
++{
++ struct list_head *slot, *el;
++ struct exception *e;
++
++ slot = &et->table[exception_hash(et, chunk)];
++ list_for_each(el, slot) {
++ e = list_entry(el, struct exception, hash_list);
++ if (e->old_chunk == chunk)
++ return e;
++ }
++
++ return NULL;
++}
++
++static inline struct exception *alloc_exception(void)
++{
++ return kmem_cache_alloc(exception_cachep, GFP_NOIO);
++}
++
++static inline struct pending_exception *alloc_pending_exception(void)
++{
++ return kmem_cache_alloc(pending_cachep, GFP_NOIO);
++}
++
++static inline void free_exception(struct exception *e)
++{
++ kmem_cache_free(exception_cachep, e);
++}
++
++static inline void free_pending_exception(struct pending_exception *pe)
++{
++ kmem_cache_free(pending_cachep, pe);
++}
++
++/*
++ * Called when the copy I/O has finished
++ */
++static void copy_callback(copy_cb_reason_t reason, void *context, long arg)
++{
++ struct pending_exception *pe = (struct pending_exception *) context;
++ struct dm_snapshot *s = pe->snap;
++ struct exception *e;
++
++ if (reason == COPY_CB_COMPLETE) {
++ struct buffer_head *bh;
++
++ /* Update the metadata if we are persistent */
++ if (s->store->commit_exception)
++ s->store->commit_exception(s->store, &pe->e);
++
++ e = alloc_exception();
++ if (!e) {
++ /* FIXME: what do we do now ? */
++ return;
++ }
++
++ /* Add a proper exception,
++ and remove the inflight exception from the list */
++ down_write(&pe->snap->lock);
++
++ memcpy(e, &pe->e, sizeof(*e));
++ insert_exception(&s->complete, e);
++ remove_exception(&pe->e);
++
++ /* Submit any pending write BHs */
++ bh = pe->bh;
++ pe->bh = NULL;
++ up_write(&pe->snap->lock);
++
++ kmem_cache_free(pending_cachep, pe);
++
++ while (bh) {
++ struct buffer_head *nextbh = bh->b_reqnext;
++ bh->b_reqnext = NULL;
++ generic_make_request(WRITE, bh);
++ bh = nextbh;
++ }
++ }
++
++ /* Read/write error - snapshot is unusable */
++ if (reason == COPY_CB_FAILED_WRITE || reason == COPY_CB_FAILED_READ) {
++ DMERR("Error reading/writing snapshot");
++
++ if (pe->snap->store->drop_snapshot)
++ pe->snap->store->drop_snapshot(pe->snap->store);
++ remove_exception(&pe->e);
++ kmem_cache_free(pending_cachep, pe);
++ }
++}
++
++/*
++ * Hard coded magic.
++ */
++static int calc_max_buckets(void)
++{
++ unsigned long mem;
++
++ mem = num_physpages << PAGE_SHIFT;
++ mem /= 50;
++ mem /= sizeof(struct list_head);
++
++ return mem;
++}
++
++/*
++ * Rounds a number down to a power of 2.
++ */
++static inline uint32_t round_down(uint32_t n)
++{
++ while (n & (n - 1))
++ n &= (n - 1);
++ return n;
++}
++
++/*
++ * Allocate room for a suitable hash table.
++ */
++static int init_hash_tables(struct dm_snapshot *s)
++{
++ offset_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
++
++ /*
++ * Calculate based on the size of the original volume or
++ * the COW volume...
++ */
++ cow_dev_size = get_dev_size(s->cow->dev);
++ origin_dev_size = get_dev_size(s->origin->dev);
++ max_buckets = calc_max_buckets();
++
++ hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size;
++ hash_size = min(hash_size, max_buckets);
++
++ /* Round it down to a power of 2 */
++ hash_size = round_down(hash_size);
++ if (init_exception_table(&s->complete, hash_size))
++ return -ENOMEM;
++
++ /*
++ * Allocate hash table for in-flight exceptions
++ * Make this smaller than the real hash table
++ */
++ hash_size >>= 3;
++ if (!hash_size)
++ hash_size = 64;
++
++ if (init_exception_table(&s->pending, hash_size)) {
++ exit_exception_table(&s->complete, exception_cachep);
++ return -ENOMEM;
++ }
++
++ return 0;
++}
++
++/*
++ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n>
++ * <chunk-size> <extent-size>
++ */
++static int snapshot_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ struct dm_snapshot *s;
++ unsigned long chunk_size;
++ unsigned long extent_size = 0L;
++ int r = -EINVAL;
++ char *persistent;
++ char *origin_path;
++ char *cow_path;
++ char *value;
++ int blocksize;
++
++ if (argc < 4) {
++ *context = "dm-snapshot: Not enough arguments";
++ r = -EINVAL;
++ goto bad;
++ }
++
++ origin_path = argv[0];
++ cow_path = argv[1];
++ persistent = argv[2];
++
++ if ((*persistent & 0x5f) != 'P' && (*persistent & 0x5f) != 'N') {
++ *context = "Persistent flag is not P or N";
++ r = -EINVAL;
++ goto bad;
++ }
++
++ chunk_size = simple_strtoul(argv[3], &value, 10);
++ if (chunk_size == 0 || value == NULL) {
++ *context = "Invalid chunk size";
++ r = -EINVAL;
++ goto bad;
++ }
++
++ /* Get the extent size for persistent snapshots */
++ if ((*persistent & 0x5f) == 'P') {
++ if (argc < 5) {
++ *context = "No extent size specified";
++ r = -EINVAL;
++ goto bad;
++ }
++
++ extent_size = simple_strtoul(argv[4], &value, 10);
++ if (extent_size == 0 || value == NULL) {
++ *context = "Invalid extent size";
++ r = -EINVAL;
++ goto bad;
++ }
++ }
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL) {
++ *context = "Cannot allocate snapshot context private structure";
++ r = -ENOMEM;
++ goto bad;
++ }
++
++ r = dm_table_get_device(t, origin_path, 0, 0, &s->origin);
++ if (r) {
++ *context = "Cannot get origin device";
++ r = -EINVAL;
++ goto bad_free;
++ }
++
++ r = dm_table_get_device(t, cow_path, 0, 0, &s->cow);
++ if (r) {
++ dm_table_put_device(t, s->origin);
++ *context = "Cannot get COW device";
++ r = -EINVAL;
++ goto bad_free;
++ }
++
++ /* Validate the extent and chunk sizes against the device block size */
++ blocksize = get_hardsect_size(s->cow->dev);
++ if (chunk_size % (blocksize / SECTOR_SIZE)) {
++ *context = "Chunk size is not a multiple of device blocksize";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ if (extent_size % (blocksize / SECTOR_SIZE)) {
++ *context = "Extent size is not a multiple of device blocksize";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ /* Check the sizes are small enough to fit in one kiovec */
++ if (chunk_size > KIO_MAX_SECTORS) {
++ *context = "Chunk size is too big";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ if (extent_size > KIO_MAX_SECTORS) {
++ *context = "Extent size is too big";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ /* Check chunk_size is a power of 2 */
++ if (chunk_size & (chunk_size - 1)) {
++ *context = "Chunk size is not a power of 2";
++ r = -EINVAL;
++ goto bad_putdev;
++ }
++
++ s->chunk_size = chunk_size;
++ s->chunk_mask = chunk_size - 1;
++ for (s->chunk_shift = 0; chunk_size;
++ s->chunk_shift++, chunk_size >>= 1) ;
++
++ s->valid = 1;
++ init_rwsem(&s->lock);
++
++ /* Allocate hash table for COW data */
++ if (init_hash_tables(s)) {
++ *context = "Unable to allocate hash table space";
++ r = -ENOMEM;
++ goto bad_putdev;
++ }
++
++ /*
++ * Check the persistent flag - done here because we need the iobuf
++ * to check the LV header
++ */
++#if 0
++ if ((*persistent & 0x5f) == 'P')
++ s->store = dm_create_persistent(s, blocksize,
++ extent_size, context);
++ else
++#endif
++ s->store = dm_create_transient(s, blocksize, context);
++
++ if (!s->store) {
++ *context = "Couldn't create exception store";
++ r = -EINVAL;
++ goto bad_free1;
++ }
++
++ /* Allocate the COW iobuf and set associated variables */
++ if (s->store->init &&
++ s->store->init(s->store, blocksize, extent_size, context)) {
++ *context = "Couldn't initialise exception store";
++ r = -ENOMEM;
++ goto bad_free1;
++ }
++
++ /* Flush IO to the origin device */
++ /* FIXME: what does sct have against fsync_dev ? */
++ fsync_dev(s->origin->dev);
++#if LVM_VFS_ENHANCEMENT
++ fsync_dev_lockfs(s->origin->dev);
++#endif
++
++ /* Add snapshot to the list of snapshots for this origin */
++ if (register_snapshot(s)) {
++ r = -EINVAL;
++ *context = "Cannot register snapshot origin";
++ goto bad_free2;
++ }
++
++#if LVM_VFS_ENHANCEMENT
++ unlockfs(s->origin->dev);
++#endif
++
++ *context = s;
++ return 0;
++
++ bad_free2:
++ if (s->store->destroy)
++ s->store->destroy(s->store);
++
++ bad_free1:
++ exit_exception_table(&s->pending, pending_cachep);
++ exit_exception_table(&s->complete, exception_cachep);
++
++ bad_putdev:
++ dm_table_put_device(t, s->cow);
++ dm_table_put_device(t, s->origin);
++
++ bad_free:
++ kfree(s);
++
++ bad:
++ return r;
++}
++
++static void snapshot_dtr(struct dm_table *t, void *context)
++{
++ struct dm_snapshot *s = (struct dm_snapshot *) context;
++
++ unregister_snapshot(s);
++
++ exit_exception_table(&s->pending, pending_cachep);
++ exit_exception_table(&s->complete, exception_cachep);
++
++ /* Deallocate memory used */
++ if (s->store->destroy)
++ s->store->destroy(s->store);
++
++ dm_table_put_device(t, s->origin);
++ dm_table_put_device(t, s->cow);
++ kfree(s);
++}
++
++/*
++ * Performs a new copy on write.
++ */
++static int new_exception(struct dm_snapshot *s, struct buffer_head *bh)
++{
++ struct exception *e;
++ struct pending_exception *pe;
++ chunk_t chunk;
++
++ chunk = sector_to_chunk(s, bh->b_rsector);
++
++ /*
++ * If the exception is in flight then we just defer the
++ * bh until this copy has completed.
++ */
++
++ /* FIXME: great big race. */
++ e = lookup_exception(&s->pending, chunk);
++ if (e) {
++ /* cast the exception to a pending exception */
++ pe = list_entry(e, struct pending_exception, e);
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ return 0;
++ }
++
++ pe = alloc_pending_exception();
++ if (!pe) {
++ DMWARN("Couldn't allocate inflight_exception.");
++ return -ENOMEM;
++ }
++
++ pe->e.old_chunk = chunk;
++
++ if (s->store->prepare_exception &&
++ s->store->prepare_exception(s->store, &pe->e)) {
++ s->valid = 0;
++ return -ENXIO;
++ }
++
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ pe->snap = s;
++
++ insert_exception(&s->pending, &pe->e);
++
++ /* Get kcopyd to do the copy */
++ dm_blockcopy(chunk_to_sector(s, pe->e.old_chunk),
++ chunk_to_sector(s, pe->e.new_chunk),
++ s->chunk_size,
++ s->origin->dev,
++ s->cow->dev, SNAPSHOT_COPY_PRIORITY, 0, copy_callback, pe);
++
++ return 1;
++}
++
++static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
++ struct buffer_head *bh)
++{
++ bh->b_rdev = s->cow->dev;
++ bh->b_rsector = chunk_to_sector(s, e->new_chunk) +
++ (bh->b_rsector & s->chunk_mask);
++}
++
++static int snapshot_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct exception *e;
++ struct dm_snapshot *s = (struct dm_snapshot *) context;
++ int r = 1;
++ chunk_t chunk;
++
++ chunk = sector_to_chunk(s, bh->b_rsector);
++
++ /* Full snapshots are not usable */
++ if (!s->valid)
++ return -1;
++
++ /*
++ * Write to snapshot - higher level takes care of RW/RO
++ * flags so we should only get this if we are
++ * writeable.
++ */
++ if (rw == WRITE) {
++
++ down_write(&s->lock);
++
++ /* If the block is already remapped - use that, else remap it */
++ e = lookup_exception(&s->complete, chunk);
++ if (e) {
++ remap_exception(s, e, bh);
++ up_write(&s->lock);
++ return 1;
++ }
++
++ e = lookup_exception(&s->pending, chunk);
++ if (e) {
++ struct pending_exception *pe;
++ pe = list_entry(e, struct pending_exception, e);
++
++ /*
++ * Exception has not been committed to
++ * disk - save this bh
++ */
++ bh->b_reqnext = pe->bh;
++ pe->bh = bh;
++ up_write(&s->lock);
++ return 0;
++ }
++
++ if (new_exception(s, bh))
++ r = -1;
++ else
++ r = 0;
++
++ up_write(&s->lock);
++
++ } else {
++ /*
++ * FIXME: this read path scares me because we
++ * always use the origin when we have a pending
++ * exception. However I can't think of a
++ * situation where this is wrong - ejt.
++ */
++
++ /* Do reads */
++ down_read(&s->lock);
++
++ /* See if it it has been remapped */
++ e = lookup_exception(&s->complete, chunk);
++ if (e)
++ remap_exception(s, e, bh);
++ else
++ bh->b_rdev = s->origin->dev;
++
++ up_read(&s->lock);
++ }
++
++ return r;
++}
++
++/*
++ * Called on a write from the origin driver.
++ */
++int dm_do_snapshot(struct dm_dev *origin, struct buffer_head *bh)
++{
++ struct list_head *snap_list;
++ struct origin *o;
++ int r = 1;
++ chunk_t chunk;
++
++ down_read(&_origins_lock);
++ o = __lookup_origin(origin->dev);
++
++ if (o) {
++ struct list_head *origin_snaps = &o->snapshots;
++ struct dm_snapshot *lock_snap;
++
++ /* Lock the metadata */
++ lock_snap = list_entry(origin_snaps->next,
++ struct dm_snapshot, list);
++
++ /* Do all the snapshots on this origin */
++ list_for_each(snap_list, origin_snaps) {
++ struct dm_snapshot *snap;
++ struct exception *e;
++ snap = list_entry(snap_list, struct dm_snapshot, list);
++
++ down_write(&snap->lock);
++
++ /*
++ * Remember different snapshots can have
++ * different chunk sizes.
++ */
++ chunk = sector_to_chunk(snap, bh->b_rsector);
++
++ /* Only deal with valid snapshots */
++ if (snap->valid) {
++ /*
++ * Check exception table to see
++ * if block is already remapped
++ * in this snapshot and mark the
++ * snapshot as needing a COW if
++ * not
++ */
++ e = lookup_exception(&snap->complete, chunk);
++ if (!e && !new_exception(snap, bh))
++ r = 0;
++ }
++
++ up_write(&snap->lock);
++ }
++ }
++
++ up_read(&_origins_lock);
++ return r;
++}
++
++static struct target_type snapshot_target = {
++ name:"snapshot",
++ module:THIS_MODULE,
++ ctr:snapshot_ctr,
++ dtr:snapshot_dtr,
++ map:snapshot_map,
++ err:NULL
++};
++
++int __init dm_snapshot_init(void)
++{
++ int r;
++
++ r = dm_register_target(&snapshot_target);
++ if (r) {
++ DMERR("snapshot target register failed %d", r);
++ return r;
++ }
++
++ r = init_origin_hash();
++ if (r) {
++ DMERR("init_origin_hash failed.");
++ return r;
++ }
++
++ exception_cachep = kmem_cache_create("dm-snapshot-ex",
++ sizeof(struct exception),
++ __alignof__(struct exception),
++ 0, NULL, NULL);
++ if (!exception_cachep) {
++ exit_origin_hash();
++ return -1;
++ }
++
++ pending_cachep =
++ kmem_cache_create("dm-snapshot-in",
++ sizeof(struct pending_exception),
++ __alignof__(struct pending_exception),
++ 0, NULL, NULL);
++ if (!pending_cachep) {
++ exit_origin_hash();
++ kmem_cache_destroy(exception_cachep);
++ return -1;
++ }
++
++ return 0;
++}
++
++void dm_snapshot_exit(void)
++{
++ int r = dm_unregister_target(&snapshot_target);
++
++ if (r < 0)
++ DMERR("Device mapper: Snapshot: unregister failed %d", r);
++
++ exit_origin_hash();
++
++ kmem_cache_destroy(pending_cachep);
++ kmem_cache_destroy(exception_cachep);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.18/drivers/md/dm-snapshot.h linux/drivers/md/dm-snapshot.h
+--- linux-2.4.18/drivers/md/dm-snapshot.h Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-snapshot.h Thu Mar 14 16:02:50 2002
+@@ -0,0 +1,135 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#ifndef DM_SNAPSHOT_H
++#define DM_SNAPSHOT_H
++
++#include "dm.h"
++#include <linux/blkdev.h>
++
++struct exception_table {
++ uint32_t hash_mask;
++ struct list_head *table;
++};
++
++/*
++ * The snapshot code deals with largish chunks of the disk at a
++ * time. Typically 64k - 256k.
++ */
++/* FIXME: can we get away with limiting these to a uint32_t ? */
++typedef offset_t chunk_t;
++
++struct dm_snapshot {
++ struct rw_semaphore lock;
++
++ struct dm_dev *origin;
++ struct dm_dev *cow;
++
++ /* List of snapshots per Origin */
++ struct list_head list;
++
++ /* Size of data blocks saved - must be a power of 2 */
++ chunk_t chunk_size;
++ chunk_t chunk_mask;
++ chunk_t chunk_shift;
++
++ /* You can't use a snapshot if this is 0 (e.g. if full) */
++ int valid;
++
++ struct exception_table pending;
++ struct exception_table complete;
++
++ /* The on disk metadata handler */
++ struct exception_store *store;
++};
++
++/*
++ * An exception is used where an old chunk of data has been
++ * replaced by a new one.
++ */
++struct exception {
++ struct list_head hash_list;
++
++ chunk_t old_chunk;
++ chunk_t new_chunk;
++};
++
++/*
++ * Abstraction to handle persistent snapshots.
++ */
++struct exception_store {
++
++ /*
++ * Destroys this object when you've finished with it.
++ */
++ void (*destroy)(struct exception_store *store);
++
++ /*
++ * Read the metadata and populate the snapshot.
++ */
++ int (*init)(struct exception_store *store,
++ int blocksize, unsigned long extent_size, void **context);
++
++ /*
++ * Find somewhere to store the next exception.
++ */
++ int (*prepare_exception)(struct exception_store *store,
++ struct exception *e);
++
++ /*
++ * Update the metadata with this exception.
++ */
++ int (*commit_exception)(struct exception_store *store,
++ struct exception *e);
++
++ /*
++ * The snapshot is invalid, note this in the metadata.
++ */
++ void (*drop_snapshot)(struct exception_store *store);
++
++ struct dm_snapshot *snap;
++ void *context;
++};
++
++/*
++ * Constructor and destructor for the default persistent
++ * store.
++ */
++struct exception_store *dm_create_persistent(struct dm_snapshot *s,
++ int blocksize,
++ offset_t extent_size,
++ void **error);
++
++struct exception_store *dm_create_transient(struct dm_snapshot *s,
++ int blocksize, void **error);
++
++/*
++ * Return the number of sectors in the device.
++ */
++static inline offset_t get_dev_size(kdev_t dev)
++{
++ int *sizes;
++
++ sizes = blk_size[MAJOR(dev)];
++ if (sizes)
++ return sizes[MINOR(dev)] << 1;
++
++ return 0;
++}
++
++static inline chunk_t sector_to_chunk(struct dm_snapshot *s, offset_t sector)
++{
++ return (sector & ~s->chunk_mask) >> s->chunk_shift;
++}
++
++static inline offset_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
++{
++ return chunk << s->chunk_shift;
++}
++
++#endif
+diff -ruN linux-2.4.18/drivers/md/dm-stripe.c linux/drivers/md/dm-stripe.c
+--- linux-2.4.18/drivers/md/dm-stripe.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-stripe.c Thu Jan 31 17:50:20 2002
+@@ -0,0 +1,202 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++
++struct stripe {
++ struct dm_dev *dev;
++ offset_t physical_start;
++};
++
++struct stripe_c {
++ offset_t logical_start;
++ uint32_t stripes;
++
++ /* The size of this target / num. stripes */
++ uint32_t stripe_width;
++
++ /* stripe chunk size */
++ uint32_t chunk_shift;
++ offset_t chunk_mask;
++
++ struct stripe stripe[0];
++};
++
++static inline struct stripe_c *alloc_context(int stripes)
++{
++ size_t len = sizeof(struct stripe_c) +
++ (sizeof(struct stripe) * stripes);
++
++ return kmalloc(len, GFP_KERNEL);
++}
++
++/*
++ * Parse a single <dev> <sector> pair
++ */
++static int get_stripe(struct dm_table *t, struct stripe_c *sc,
++ int stripe, char **argv)
++{
++ char *end;
++ unsigned long start;
++
++ start = simple_strtoul(argv[1], &end, 10);
++ if (*end)
++ return -EINVAL;
++
++ if (dm_table_get_device(t, argv[0], start, sc->stripe_width,
++ &sc->stripe[stripe].dev))
++ return -ENXIO;
++
++ sc->stripe[stripe].physical_start = start;
++ return 0;
++}
++
++/*
++ * Construct a striped mapping.
++ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
++ */
++static int stripe_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context)
++{
++ struct stripe_c *sc;
++ uint32_t stripes;
++ uint32_t chunk_size;
++ char *end;
++ int r, i;
++
++ if (argc < 2) {
++ *context = "dm-stripe: Not enough arguments";
++ return -EINVAL;
++ }
++
++ stripes = simple_strtoul(argv[0], &end, 10);
++ if (*end) {
++ *context = "dm-stripe: Invalid stripe count";
++ return -EINVAL;
++ }
++
++ chunk_size =simple_strtoul(argv[1], &end, 10);
++ if (*end) {
++ *context = "dm-stripe: Invalid chunk_size";
++ return -EINVAL;
++ }
++
++ if (l % stripes) {
++ *context = "dm-stripe: Target length not divisable by "
++ "number of stripes";
++ return -EINVAL;
++ }
++
++ sc = alloc_context(stripes);
++ if (!sc) {
++ *context = "dm-stripe: Memory allocation for striped context "
++ "failed";
++ return -ENOMEM;
++ }
++
++ sc->logical_start = b;
++ sc->stripes = stripes;
++ sc->stripe_width = l / stripes;
++
++ /*
++ * chunk_size is a power of two
++ */
++ if (!chunk_size || (chunk_size & (chunk_size - 1))) {
++ *context = "dm-stripe: Invalid chunk size";
++ kfree(sc);
++ return -EINVAL;
++ }
++
++ sc->chunk_mask = chunk_size - 1;
++ for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
++ chunk_size >>= 1;
++ sc->chunk_shift--;
++
++ /*
++ * Get the stripe destinations.
++ */
++ for (i = 0; i < stripes; i++) {
++ if (argc < 2) {
++ *context = "dm-stripe: Not enough destinations "
++ "specified";
++ kfree(sc);
++ return -EINVAL;
++ }
++
++ argv += 2;
++
++ r = get_stripe(t, sc, i, argv);
++ if (r < 0) {
++ *context = "dm-stripe: Couldn't parse stripe "
++ "destination";
++ while (i--)
++ dm_table_put_device(t, sc->stripe[i].dev);
++ kfree(sc);
++ return r;
++ }
++ }
++
++ *context = sc;
++ return 0;
++}
++
++static void stripe_dtr(struct dm_table *t, void *c)
++{
++ unsigned int i;
++ struct stripe_c *sc = (struct stripe_c *) c;
++
++ for (i = 0; i < sc->stripes; i++)
++ dm_table_put_device(t, sc->stripe[i].dev);
++
++ kfree(sc);
++}
++
++static int stripe_map(struct buffer_head *bh, int rw, void *context)
++{
++ struct stripe_c *sc = (struct stripe_c *) context;
++
++ offset_t offset = bh->b_rsector - sc->logical_start;
++ uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
++ uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */
++ chunk = chunk / sc->stripes;
++
++ bh->b_rdev = sc->stripe[stripe].dev->dev;
++ bh->b_rsector = sc->stripe[stripe].physical_start +
++ (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
++ return 1;
++}
++
++static struct target_type stripe_target = {
++ name: "striped",
++ module: THIS_MODULE,
++ ctr: stripe_ctr,
++ dtr: stripe_dtr,
++ map: stripe_map,
++};
++
++int __init dm_stripe_init(void)
++{
++ int r;
++
++ r = dm_register_target(&stripe_target);
++ if (r < 0)
++ DMWARN("striped target registration failed");
++
++ return r;
++}
++
++void dm_stripe_exit(void)
++{
++ if (dm_unregister_target(&stripe_target))
++ DMWARN("striped target unregistration failed");
++
++ return;
++}
++
+diff -ruN linux-2.4.18/drivers/md/dm-table.c linux/drivers/md/dm-table.c
+--- linux-2.4.18/drivers/md/dm-table.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-table.c Thu Feb 21 12:46:57 2002
+@@ -0,0 +1,404 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/blkdev.h>
++
++/* ceiling(n / size) * size */
++static inline unsigned long round_up(unsigned long n, unsigned long size)
++{
++ unsigned long r = n % size;
++ return n + (r ? (size - r) : 0);
++}
++
++/* ceiling(n / size) */
++static inline unsigned long div_up(unsigned long n, unsigned long size)
++{
++ return round_up(n, size) / size;
++}
++
++/* similar to ceiling(log_size(n)) */
++static uint int_log(unsigned long n, unsigned long base)
++{
++ int result = 0;
++
++ while (n > 1) {
++ n = div_up(n, base);
++ result++;
++ }
++
++ return result;
++}
++
++/*
++ * return the highest key that you could lookup
++ * from the n'th node on level l of the btree.
++ */
++static offset_t high(struct dm_table *t, int l, int n)
++{
++ for (; l < t->depth - 1; l++)
++ n = get_child(n, CHILDREN_PER_NODE - 1);
++
++ if (n >= t->counts[l])
++ return (offset_t) - 1;
++
++ return get_node(t, l, n)[KEYS_PER_NODE - 1];
++}
++
++/*
++ * fills in a level of the btree based on the
++ * highs of the level below it.
++ */
++static int setup_btree_index(int l, struct dm_table *t)
++{
++ int n, k;
++ offset_t *node;
++
++ for (n = 0; n < t->counts[l]; n++) {
++ node = get_node(t, l, n);
++
++ for (k = 0; k < KEYS_PER_NODE; k++)
++ node[k] = high(t, l + 1, get_child(n, k));
++ }
++
++ return 0;
++}
++
++/*
++ * highs, and targets are managed as dynamic
++ * arrays during a table load.
++ */
++static int alloc_targets(struct dm_table *t, int num)
++{
++ offset_t *n_highs;
++ struct target *n_targets;
++ int n = t->num_targets;
++ unsigned long size = (sizeof(struct target) + sizeof(offset_t)) * num;
++
++ n_highs = (offset_t *) vmalloc(size);
++ if (!n_highs)
++ return -ENOMEM;
++
++ memset(n_highs, 0, size);
++
++ n_targets = (struct target *) (n_highs + num);
++
++ if (n) {
++ memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
++ memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
++ }
++
++ memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
++ if (t->highs)
++ vfree(t->highs);
++
++ t->num_allocated = num;
++ t->highs = n_highs;
++ t->targets = n_targets;
++
++ return 0;
++}
++
++int dm_table_create(struct dm_table **result)
++{
++ struct dm_table *t = kmalloc(sizeof(struct dm_table), GFP_NOIO);
++
++ if (!t)
++ return -ENOMEM;
++
++ memset(t, 0, sizeof(*t));
++ INIT_LIST_HEAD(&t->devices);
++
++ /* allocate a single node's worth of targets to begin with */
++ if (alloc_targets(t, KEYS_PER_NODE)) {
++ kfree(t);
++ t = NULL;
++ return -ENOMEM;
++ }
++
++ *result = t;
++ return 0;
++}
++
++static void free_devices(struct list_head *devices)
++{
++ struct list_head *tmp, *next;
++
++ for (tmp = devices->next; tmp != devices; tmp = next) {
++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++ next = tmp->next;
++ kfree(dd);
++ }
++}
++
++void dm_table_destroy(struct dm_table *t)
++{
++ int i;
++
++ /* free the indexes (see dm_table_complete) */
++ if (t->depth >= 2)
++ vfree(t->index[t->depth - 2]);
++
++ /* free the targets */
++ for (i = 0; i < t->num_targets; i++) {
++ struct target *tgt = &t->targets[i];
++
++ dm_put_target_type(t->targets[i].type);
++
++ if (tgt->type->dtr)
++ tgt->type->dtr(t, tgt->private);
++ }
++
++ vfree(t->highs);
++
++ /* free the device list */
++ if (t->devices.next != &t->devices) {
++ DMWARN("devices still present during destroy: "
++ "dm_table_remove_device calls missing");
++
++ free_devices(&t->devices);
++ }
++
++ kfree(t);
++}
++
++/*
++ * Checks to see if we need to extend highs or targets.
++ */
++static inline int check_space(struct dm_table *t)
++{
++ if (t->num_targets >= t->num_allocated)
++ return alloc_targets(t, t->num_allocated * 2);
++
++ return 0;
++}
++
++/*
++ * Convert a device path to a kdev_t.
++ */
++int lookup_device(const char *path, kdev_t *dev)
++{
++ int r;
++ struct nameidata nd;
++ struct inode *inode;
++
++ if (!path_init(path, LOOKUP_FOLLOW, &nd))
++ return 0;
++
++ if ((r = path_walk(path, &nd)))
++ goto bad;
++
++ inode = nd.dentry->d_inode;
++ if (!inode) {
++ r = -ENOENT;
++ goto bad;
++ }
++
++ if (!S_ISBLK(inode->i_mode)) {
++ r = -EINVAL;
++ goto bad;
++ }
++
++ *dev = inode->i_rdev;
++
++ bad:
++ path_release(&nd);
++ return r;
++}
++
++/*
++ * See if we've already got a device in the list.
++ */
++static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
++{
++ struct list_head *tmp;
++
++ list_for_each(tmp, l) {
++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++ if (dd->dev == dev)
++ return dd;
++ }
++
++ return NULL;
++}
++
++/*
++ * Open a device so we can use it as a map destination.
++ */
++static int open_dev(struct dm_dev *d)
++{
++ int err;
++
++ if (d->bd)
++ BUG();
++
++ if (!(d->bd = bdget(kdev_t_to_nr(d->dev))))
++ return -ENOMEM;
++
++ if ((err = blkdev_get(d->bd, FMODE_READ | FMODE_WRITE, 0, BDEV_FILE)))
++ return err;
++
++ return 0;
++}
++
++/*
++ * Close a device that we've been using.
++ */
++static void close_dev(struct dm_dev *d)
++{
++ if (!d->bd)
++ return;
++
++ blkdev_put(d->bd, BDEV_FILE);
++ d->bd = NULL;
++}
++
++/*
++ * If possible (ie. blk_size[major] is set), this
++ * checks an area of a destination device is
++ * valid.
++ */
++static int check_device_area(kdev_t dev, offset_t start, offset_t len)
++{
++ int *sizes;
++ offset_t dev_size;
++
++ if (!(sizes = blk_size[MAJOR(dev)]) || !(dev_size = sizes[MINOR(dev)]))
++ /* we don't know the device details,
++ * so give the benefit of the doubt */
++ return 1;
++
++ /* convert to 512-byte sectors */
++ dev_size <<= 1;
++
++ return ((start < dev_size) && (len <= (dev_size - start)));
++}
++
++/*
++ * Add a device to the list, or just increment the usage count
++ * if it's already present.
++ */
++int dm_table_get_device(struct dm_table *t, const char *path,
++ offset_t start, offset_t len, struct dm_dev **result)
++{
++ int r;
++ kdev_t dev;
++ struct dm_dev *dd;
++
++ /* convert the path to a device */
++ if ((r = lookup_device(path, &dev)))
++ return r;
++
++ dd = find_device(&t->devices, dev);
++ if (!dd) {
++ dd = kmalloc(sizeof(*dd), GFP_KERNEL);
++ if (!dd)
++ return -ENOMEM;
++
++ dd->dev = dev;
++ dd->bd = NULL;
++
++ if ((r = open_dev(dd))) {
++ kfree(dd);
++ return r;
++ }
++
++ atomic_set(&dd->count, 0);
++ list_add(&dd->list, &t->devices);
++ }
++ atomic_inc(&dd->count);
++
++ if (!check_device_area(dd->dev, start, len)) {
++ DMWARN("device %s too small for target", path);
++ dm_table_put_device(t, dd);
++ return -EINVAL;
++ }
++
++ *result = dd;
++
++ return 0;
++}
++
++/*
++ * Decrement a devices use count and remove it if neccessary.
++ */
++void dm_table_put_device(struct dm_table *t, struct dm_dev *dd)
++{
++ if (atomic_dec_and_test(&dd->count)) {
++ close_dev(dd);
++ list_del(&dd->list);
++ kfree(dd);
++ }
++}
++
++/*
++ * Adds a target to the map
++ */
++int dm_table_add_target(struct dm_table *t, offset_t highs,
++ struct target_type *type, void *private)
++{
++ int r, n;
++
++ if ((r = check_space(t)))
++ return r;
++
++ n = t->num_targets++;
++ t->highs[n] = highs;
++ t->targets[n].type = type;
++ t->targets[n].private = private;
++
++ return 0;
++}
++
++static int setup_indexes(struct dm_table *t)
++{
++ int i, total = 0;
++ offset_t *indexes;
++
++ /* allocate the space for *all* the indexes */
++ for (i = t->depth - 2; i >= 0; i--) {
++ t->counts[i] = div_up(t->counts[i + 1], CHILDREN_PER_NODE);
++ total += t->counts[i];
++ }
++
++ indexes = (offset_t *) vmalloc((unsigned long)NODE_SIZE * total);
++ if (!indexes)
++ return -ENOMEM;
++
++ /* set up internal nodes, bottom-up */
++ for (i = t->depth - 2, total = 0; i >= 0; i--) {
++ t->index[i] = indexes;
++ indexes += (KEYS_PER_NODE * t->counts[i]);
++ setup_btree_index(i, t);
++ }
++
++ return 0;
++}
++
++/*
++ * Builds the btree to index the map
++ */
++int dm_table_complete(struct dm_table *t)
++{
++ int leaf_nodes, r = 0;
++
++ /* how many indexes will the btree have ? */
++ leaf_nodes = div_up(t->num_targets, KEYS_PER_NODE);
++ t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
++
++ /* leaf layer has already been set up */
++ t->counts[t->depth - 1] = leaf_nodes;
++ t->index[t->depth - 1] = t->highs;
++
++ if (t->depth >= 2)
++ r = setup_indexes(t);
++
++ return r;
++}
++
++EXPORT_SYMBOL(dm_table_get_device);
++EXPORT_SYMBOL(dm_table_put_device);
+diff -ruN linux-2.4.18/drivers/md/dm-target.c linux/drivers/md/dm-target.c
+--- linux-2.4.18/drivers/md/dm-target.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm-target.c Tue Jan 8 17:57:45 2002
+@@ -0,0 +1,241 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/kmod.h>
++
++struct tt_internal {
++ struct target_type tt;
++
++ struct list_head list;
++ long use;
++};
++
++static LIST_HEAD(_targets);
++static rwlock_t _lock = RW_LOCK_UNLOCKED;
++
++#define DM_MOD_NAME_SIZE 32
++
++/*
++ * Destructively splits up the argument list to pass to ctr.
++ */
++int split_args(int max, int *argc, char **argv, char *input)
++{
++ char *start, *end = input, *out;
++ *argc = 0;
++
++ while (1) {
++ start = end;
++
++ /* Skip whitespace */
++ while (*start && isspace(*start))
++ start++;
++
++ if (!*start)
++ break; /* success, we hit the end */
++
++ /* 'out' is used to remove any back-quotes */
++ end = out = start;
++ while (*end) {
++ /* Everything apart from '\0' can be quoted */
++ if (*end == '\\' && *(end + 1)) {
++ *out++ = *(end + 1);
++ end += 2;
++ continue;
++ }
++
++ if (isspace(*end))
++ break; /* end of token */
++
++ *out++ = *end++;
++ }
++
++ /* have we already filled the array ? */
++ if ((*argc + 1) > max)
++ return -EINVAL;
++
++ /* we know this is whitespace */
++ if (*end)
++ end++;
++
++ /* terminate the string and put it in the array */
++ *out = '\0';
++ argv[*argc] = start;
++ (*argc)++;
++ }
++
++ return 0;
++}
++
++static inline struct tt_internal *__find_target_type(const char *name)
++{
++ struct list_head *tih;
++ struct tt_internal *ti;
++
++ list_for_each(tih, &_targets) {
++ ti = list_entry(tih, struct tt_internal, list);
++
++ if (!strcmp(name, ti->tt.name))
++ return ti;
++ }
++
++ return NULL;
++}
++
++static struct tt_internal *get_target_type(const char *name)
++{
++ struct tt_internal *ti;
++
++ read_lock(&_lock);
++ ti = __find_target_type(name);
++
++ if (ti) {
++ if (ti->use == 0 && ti->tt.module)
++ __MOD_INC_USE_COUNT(ti->tt.module);
++ ti->use++;
++ }
++ read_unlock(&_lock);
++
++ return ti;
++}
++
++static void load_module(const char *name)
++{
++ char module_name[DM_MOD_NAME_SIZE] = "dm-";
++
++ /* Length check for strcat() below */
++ if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
++ return;
++
++ strcat(module_name, name);
++ request_module(module_name);
++
++ return;
++}
++
++struct target_type *dm_get_target_type(const char *name)
++{
++ struct tt_internal *ti = get_target_type(name);
++
++ if (!ti) {
++ load_module(name);
++ ti = get_target_type(name);
++ }
++
++ return ti ? &ti->tt : NULL;
++}
++
++void dm_put_target_type(struct target_type *t)
++{
++ struct tt_internal *ti = (struct tt_internal *) t;
++
++ read_lock(&_lock);
++ if (--ti->use == 0 && ti->tt.module)
++ __MOD_DEC_USE_COUNT(ti->tt.module);
++
++ if (ti->use < 0)
++ BUG();
++ read_unlock(&_lock);
++
++ return;
++}
++
++static struct tt_internal *alloc_target(struct target_type *t)
++{
++ struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
++
++ if (ti) {
++ memset(ti, 0, sizeof(*ti));
++ ti->tt = *t;
++ }
++
++ return ti;
++}
++
++int dm_register_target(struct target_type *t)
++{
++ int rv = 0;
++ struct tt_internal *ti = alloc_target(t);
++
++ if (!ti)
++ return -ENOMEM;
++
++ write_lock(&_lock);
++ if (__find_target_type(t->name))
++ rv = -EEXIST;
++ else
++ list_add(&ti->list, &_targets);
++
++ write_unlock(&_lock);
++ return rv;
++}
++
++int dm_unregister_target(struct target_type *t)
++{
++ struct tt_internal *ti;
++
++ write_lock(&_lock);
++ if (!(ti = __find_target_type(t->name))) {
++ write_unlock(&_lock);
++ return -EINVAL;
++ }
++
++ if (ti->use) {
++ write_unlock(&_lock);
++ return -ETXTBSY;
++ }
++
++ list_del(&ti->list);
++ kfree(ti);
++
++ write_unlock(&_lock);
++ return 0;
++}
++
++/*
++ * io-err: always fails an io, useful for bringing
++ * up LV's that have holes in them.
++ */
++static int io_err_ctr(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **args, void **context)
++{
++ *context = NULL;
++ return 0;
++}
++
++static void io_err_dtr(struct dm_table *t, void *c)
++{
++ /* empty */
++ return;
++}
++
++static int io_err_map(struct buffer_head *bh, int rw, void *context)
++{
++ buffer_IO_error(bh);
++ return 0;
++}
++
++static struct target_type error_target = {
++ name: "error",
++ ctr: io_err_ctr,
++ dtr: io_err_dtr,
++ map: io_err_map,
++};
++
++int dm_target_init(void)
++{
++ return dm_register_target(&error_target);
++}
++
++void dm_target_exit(void)
++{
++ if (dm_unregister_target(&error_target))
++ DMWARN("error target unregistration failed");
++}
++
++EXPORT_SYMBOL(dm_register_target);
++EXPORT_SYMBOL(dm_unregister_target);
+diff -ruN linux-2.4.18/drivers/md/dm.c linux/drivers/md/dm.c
+--- linux-2.4.18/drivers/md/dm.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm.c Thu Mar 14 16:32:58 2002
+@@ -0,0 +1,1130 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/blk.h>
++#include <linux/blkpg.h>
++
++/* we only need this for the lv_bmap struct definition, not happy */
++#include <linux/lvm.h>
++
++#define DEFAULT_READ_AHEAD 64
++
++static const char *_name = DM_NAME;
++static const char *_version = "0.94.08-ioctl-cvs (2002-03-14)";
++static const char *_email = "lvm-devel@lists.sistina.com";
++
++static int major = 0;
++static int _major = 0;
++
++struct io_hook {
++ struct mapped_device *md;
++ struct target *target;
++ int rw;
++
++ void (*end_io) (struct buffer_head * bh, int uptodate);
++ void *context;
++};
++
++static kmem_cache_t *_io_hook_cache;
++
++static struct mapped_device *_devs[MAX_DEVICES];
++static struct rw_semaphore _dev_locks[MAX_DEVICES];
++
++/*
++ * This lock is only held by dm_create and dm_set_name to avoid
++ * race conditions where someone else may create a device with
++ * the same name.
++ */
++static spinlock_t _create_lock = SPIN_LOCK_UNLOCKED;
++
++/* block device arrays */
++static int _block_size[MAX_DEVICES];
++static int _blksize_size[MAX_DEVICES];
++static int _hardsect_size[MAX_DEVICES];
++
++static devfs_handle_t _dev_dir;
++
++static int request(request_queue_t * q, int rw, struct buffer_head *bh);
++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
++
++/*
++ * Protect the mapped_devices referenced from _dev[]
++ */
++struct mapped_device *dm_get_r(int minor)
++{
++ struct mapped_device *md;
++
++ if (minor >= MAX_DEVICES)
++ return NULL;
++
++ down_read(_dev_locks + minor);
++ md = _devs[minor];
++ if (!md)
++ up_read(_dev_locks + minor);
++
++ return md;
++}
++
++struct mapped_device *dm_get_w(int minor)
++{
++ struct mapped_device *md;
++
++ if (minor >= MAX_DEVICES)
++ return NULL;
++
++ down_write(_dev_locks + minor);
++ md = _devs[minor];
++ if (!md)
++ up_write(_dev_locks + minor);
++
++ return md;
++}
++
++/*
++ * The interface (eg, ioctl) will probably access the devices
++ * through these slow 'by name' locks, this needs improving at
++ * some point if people start playing with *large* numbers of dm
++ * devices.
++ */
++struct mapped_device *dm_get_name_r(const char *name)
++{
++ int i;
++ struct mapped_device *md;
++
++ for (i = 0; i < MAX_DEVICES; i++) {
++ md = dm_get_r(i);
++ if (md) {
++ if (!strcmp(md->name, name))
++ return md;
++
++ dm_put_r(i);
++ }
++ }
++
++ return NULL;
++}
++
++struct mapped_device *dm_get_name_w(const char *name)
++{
++ int i;
++ struct mapped_device *md;
++
++ /*
++ * To avoid getting write locks on all the devices we try
++ * and promote a read lock to a write lock, this can
++ * fail, in which case we just start again.
++ */
++
++ restart:
++
++ for (i = 0; i < MAX_DEVICES; i++) {
++ md = dm_get_r(i);
++ if (md) {
++ if (strcmp(md->name, name))
++ dm_put_r(i);
++ else {
++ /* found it */
++ dm_put_r(i);
++
++ md = dm_get_w(i);
++ if (!md)
++ goto restart;
++ if (strcmp(md->name, name)) {
++ dm_put_w(i);
++ goto restart;
++ }
++
++ return md;
++
++ }
++ }
++ }
++
++ return NULL;
++}
++
++void dm_put_r(int minor)
++{
++ if (minor >= MAX_DEVICES)
++ return;
++
++ up_read(_dev_locks + minor);
++}
++
++void dm_put_w(int minor)
++{
++ if (minor >= MAX_DEVICES)
++ return;
++
++ up_write(_dev_locks + minor);
++}
++
++/*
++ * Setup and tear down the driver
++ */
++static __init void init_locks(void)
++{
++ int i;
++
++ for (i = 0; i < MAX_DEVICES; i++)
++ init_rwsem(_dev_locks + i);
++}
++
++static __init int local_init(void)
++{
++ int r;
++
++ init_locks();
++
++ /* allocate a slab for the io-hooks */
++ if (!_io_hook_cache &&
++ !(_io_hook_cache = kmem_cache_create("dm io hooks",
++ sizeof(struct io_hook),
++ 0, 0, NULL, NULL)))
++ return -ENOMEM;
++
++ _major = major;
++ r = devfs_register_blkdev(_major, _name, &dm_blk_dops);
++ if (r < 0) {
++ DMERR("register_blkdev failed");
++ kmem_cache_destroy(_io_hook_cache);
++ return r;
++ }
++
++ if (!_major)
++ _major = r;
++
++ /* set up the arrays */
++ read_ahead[_major] = DEFAULT_READ_AHEAD;
++ blk_size[_major] = _block_size;
++ blksize_size[_major] = _blksize_size;
++ hardsect_size[_major] = _hardsect_size;
++
++ blk_queue_make_request(BLK_DEFAULT_QUEUE(_major), request);
++
++ _dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
++
++ DMINFO("%s initialised: %s", _version, _email);
++ return 0;
++}
++
++static void local_exit(void)
++{
++ if (kmem_cache_destroy(_io_hook_cache))
++ DMWARN("io_hooks still allocated during unregistration");
++ _io_hook_cache = NULL;
++
++ if (devfs_unregister_blkdev(_major, _name) < 0)
++ DMERR("devfs_unregister_blkdev failed");
++
++ read_ahead[_major] = 0;
++ blk_size[_major] = NULL;
++ blksize_size[_major] = NULL;
++ hardsect_size[_major] = NULL;
++ _major = 0;
++
++ DMINFO("%s cleaned up", _version);
++}
++
++static int __init dm_init(void)
++{
++ int r;
++
++ r = local_init();
++ if (r)
++ goto out;
++
++ r = dm_target_init();
++ if (r)
++ goto out_local;
++
++ r = dm_linear_init();
++ if (r)
++ goto out_target;
++
++ r = dm_stripe_init();
++ if (r)
++ goto out_linear;
++
++ r = kcopyd_init();
++ if (r)
++ goto out_stripe;
++
++ r = dm_snapshot_init();
++ if (r)
++ goto out_kcopyd;
++
++ r = dm_origin_init();
++ if (r)
++ goto out_snapshot;
++
++ r = dm_mirror_init();
++ if (r)
++ goto out_origin;
++
++ r = dm_interface_init();
++ if (r)
++ goto out_mirror;
++
++ return 0;
++
++ out_mirror:
++ dm_mirror_exit();
++ out_origin:
++ dm_origin_exit();
++ out_snapshot:
++ dm_snapshot_exit();
++ out_kcopyd:
++ kcopyd_exit();
++ out_stripe:
++ dm_stripe_exit();
++ out_linear:
++ dm_linear_exit();
++ out_target:
++ dm_target_exit();
++ out_local:
++ local_exit();
++ out:
++ return r;
++}
++
++static void __exit dm_exit(void)
++{
++ dm_destroy_all();
++ dm_interface_exit();
++ dm_stripe_exit();
++ dm_linear_exit();
++ dm_snapshot_exit();
++ dm_origin_exit();
++ dm_mirror_exit();
++ kcopyd_exit();
++ dm_target_exit();
++ local_exit();
++}
++
++/*
++ * Block device functions
++ */
++static int dm_blk_open(struct inode *inode, struct file *file)
++{
++ int minor = MINOR(inode->i_rdev);
++ struct mapped_device *md;
++
++ md = dm_get_w(minor);
++ if (!md)
++ return -ENXIO;
++
++ md->use_count++;
++ dm_put_w(minor);
++
++ return 0;
++}
++
++static int dm_blk_close(struct inode *inode, struct file *file)
++{
++ int minor = MINOR(inode->i_rdev);
++ struct mapped_device *md;
++
++ md = dm_get_w(minor);
++ if (!md)
++ return -ENXIO;
++
++ if (md->use_count < 1)
++ DMWARN("incorrect reference count found in mapped_device");
++
++ md->use_count--;
++ dm_put_w(minor);
++
++ return 0;
++}
++
++/* In 512-byte units */
++#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1)
++
++static int dm_blk_ioctl(struct inode *inode, struct file *file,
++ uint command, unsigned long a)
++{
++ int minor = MINOR(inode->i_rdev);
++ long size;
++
++ if (minor >= MAX_DEVICES)
++ return -ENXIO;
++
++ switch (command) {
++ case BLKSSZGET:
++ case BLKBSZGET:
++ case BLKROGET:
++ case BLKROSET:
++ case BLKRASET:
++ case BLKRAGET:
++ case BLKFLSBUF:
++#if 0 /* Future stacking block device */
++ case BLKELVSET:
++ case BLKELVGET:
++#endif
++ return blk_ioctl(inode->i_rdev, command, a);
++ break;
++
++ case BLKGETSIZE:
++ size = VOLUME_SIZE(minor);
++ if (copy_to_user((void *) a, &size, sizeof(long)))
++ return -EFAULT;
++ break;
++
++ case BLKGETSIZE64:
++ size = VOLUME_SIZE(minor);
++ if (put_user((u64) size, (u64 *) a))
++ return -EFAULT;
++ break;
++
++ case BLKRRPART:
++ return -EINVAL;
++
++ case LV_BMAP:
++ return dm_user_bmap(inode, (struct lv_bmap *) a);
++
++ default:
++ DMWARN("unknown block ioctl 0x%x", command);
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static inline struct io_hook *alloc_io_hook(void)
++{
++ return kmem_cache_alloc(_io_hook_cache, GFP_NOIO);
++}
++
++static inline void free_io_hook(struct io_hook *ih)
++{
++ kmem_cache_free(_io_hook_cache, ih);
++}
++
++/*
++ * FIXME: We need to decide if deferred_io's need
++ * their own slab, I say no for now since they are
++ * only used when the device is suspended.
++ */
++static inline struct deferred_io *alloc_deferred(void)
++{
++ return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
++}
++
++static inline void free_deferred(struct deferred_io *di)
++{
++ kfree(di);
++}
++
++/*
++ * Call a target's optional error function if an I/O failed.
++ */
++static inline int call_err_fn(struct io_hook *ih, struct buffer_head *bh)
++{
++ dm_err_fn err = ih->target->type->err;
++
++ if (err)
++ return err(bh, ih->rw, ih->target->private);
++
++ return 0;
++}
++
++/*
++ * bh->b_end_io routine that decrements the pending count
++ * and then calls the original bh->b_end_io fn.
++ */
++static void dec_pending(struct buffer_head *bh, int uptodate)
++{
++ struct io_hook *ih = bh->b_bdev_private;
++
++ if (!uptodate && call_err_fn(ih, bh))
++ return;
++
++ if (atomic_dec_and_test(&ih->md->pending))
++ /* nudge anyone waiting on suspend queue */
++ wake_up(&ih->md->wait);
++
++ bh->b_end_io = ih->end_io;
++ bh->b_bdev_private = ih->context;
++ free_io_hook(ih);
++
++ bh->b_end_io(bh, uptodate);
++}
++
++/*
++ * Add the bh to the list of deferred io.
++ */
++static int queue_io(struct buffer_head *bh, int rw)
++{
++ struct deferred_io *di = alloc_deferred();
++ struct mapped_device *md;
++ int minor = MINOR(bh->b_rdev);
++
++ if (!di)
++ return -ENOMEM;
++
++ md = dm_get_w(minor);
++ if (!md) {
++ free_deferred(di);
++ return -ENXIO;
++ }
++
++ if (!md->suspended) {
++ dm_put_w(minor);
++ free_deferred(di);
++ return 1;
++ }
++
++ di->bh = bh;
++ di->rw = rw;
++ di->next = md->deferred;
++ md->deferred = di;
++
++ dm_put_w(minor);
++
++ return 0; /* deferred successfully */
++}
++
++/*
++ * Do the bh mapping for a given leaf
++ */
++static inline int __map_buffer(struct mapped_device *md,
++ struct buffer_head *bh, int rw, int leaf)
++{
++ int r;
++ dm_map_fn fn;
++ void *context;
++ struct io_hook *ih = NULL;
++ struct target *ti = md->map->targets + leaf;
++
++ fn = ti->type->map;
++ context = ti->private;
++
++ ih = alloc_io_hook();
++
++ if (!ih)
++ return -1;
++
++ ih->md = md;
++ ih->rw = rw;
++ ih->target = ti;
++ ih->end_io = bh->b_end_io;
++ ih->context = bh->b_bdev_private;
++
++ r = fn(bh, rw, context);
++
++ if (r > 0) {
++ /* hook the end io request fn */
++ atomic_inc(&md->pending);
++ bh->b_end_io = dec_pending;
++ bh->b_bdev_private = ih;
++
++ } else if (r == 0)
++ /* we don't need to hook */
++ free_io_hook(ih);
++
++ else if (r < 0) {
++ free_io_hook(ih);
++ return -1;
++ }
++
++ return r;
++}
++
++/*
++ * Search the btree for the correct target.
++ */
++static inline int __find_node(struct dm_table *t, struct buffer_head *bh)
++{
++ int l, n = 0, k = 0;
++ offset_t *node;
++
++ for (l = 0; l < t->depth; l++) {
++ n = get_child(n, k);
++ node = get_node(t, l, n);
++
++ for (k = 0; k < KEYS_PER_NODE; k++)
++ if (node[k] >= bh->b_rsector)
++ break;
++ }
++
++ return (KEYS_PER_NODE * n) + k;
++}
++
++static int request(request_queue_t * q, int rw, struct buffer_head *bh)
++{
++ struct mapped_device *md;
++ int r, minor = MINOR(bh->b_rdev);
++
++ md = dm_get_r(minor);
++ if (!md) {
++ buffer_IO_error(bh);
++ return 0;
++ }
++
++ /*
++ * If we're suspended we have to queue
++ * this io for later.
++ */
++ while (md->suspended) {
++ dm_put_r(minor);
++
++ if (rw == READA)
++ goto bad_no_lock;
++
++ r = queue_io(bh, rw);
++
++ if (r < 0)
++ goto bad_no_lock;
++
++ else if (r == 0)
++ return 0; /* deferred successfully */
++
++ /*
++ * We're in a while loop, because someone could suspend
++ * before we get to the following read lock.
++ */
++ md = dm_get_r(minor);
++ if (!md) {
++ buffer_IO_error(bh);
++ return 0;
++ }
++ }
++
++ if ((r = __map_buffer(md, bh, rw, __find_node(md->map, bh))) < 0)
++ goto bad;
++
++ dm_put_r(minor);
++ return r;
++
++ bad:
++ dm_put_r(minor);
++
++ bad_no_lock:
++ buffer_IO_error(bh);
++ return 0;
++}
++
++static int check_dev_size(int minor, unsigned long block)
++{
++ /* FIXME: check this */
++ unsigned long max_sector = (_block_size[minor] << 1) + 1;
++ unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9);
++
++ return (sector > max_sector) ? 0 : 1;
++}
++
++/*
++ * Creates a dummy buffer head and maps it (for lilo).
++ */
++static int do_bmap(kdev_t dev, unsigned long block,
++ kdev_t * r_dev, unsigned long *r_block)
++{
++ struct mapped_device *md;
++ struct buffer_head bh;
++ int minor = MINOR(dev), r;
++ struct target *t;
++
++ md = dm_get_r(minor);
++ if (!md)
++ return -ENXIO;
++
++ if (md->suspended) {
++ dm_put_r(minor);
++ return -EPERM;
++ }
++
++ if (!check_dev_size(minor, block)) {
++ dm_put_r(minor);
++ return -EINVAL;
++ }
++
++ /* setup dummy bh */
++ memset(&bh, 0, sizeof(bh));
++ bh.b_blocknr = block;
++ bh.b_dev = bh.b_rdev = dev;
++ bh.b_size = _blksize_size[minor];
++ bh.b_rsector = block * (bh.b_size >> 9);
++
++ /* find target */
++ t = md->map->targets + __find_node(md->map, &bh);
++
++ /* do the mapping */
++ r = t->type->map(&bh, READ, t->private);
++
++ *r_dev = bh.b_rdev;
++ *r_block = bh.b_rsector / (bh.b_size >> 9);
++
++ dm_put_r(minor);
++ return r;
++}
++
++/*
++ * Marshals arguments and results between user and kernel space.
++ */
++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
++{
++ unsigned long block, r_block;
++ kdev_t r_dev;
++ int r;
++
++ if (get_user(block, &lvb->lv_block))
++ return -EFAULT;
++
++ if ((r = do_bmap(inode->i_rdev, block, &r_dev, &r_block)))
++ return r;
++
++ if (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
++ put_user(r_block, &lvb->lv_block))
++ return -EFAULT;
++
++ return 0;
++}
++
++/*
++ * See if the device with a specific minor # is free. The write
++ * lock is held when it returns successfully.
++ */
++static inline int specific_dev(int minor, struct mapped_device *md)
++{
++ if (minor >= MAX_DEVICES) {
++ DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)",
++ MAX_DEVICES);
++ return -1;
++ }
++
++ down_write(_dev_locks + minor);
++ if (_devs[minor]) {
++ /* in use */
++ up_write(_dev_locks + minor);
++ return -1;
++ }
++
++ _devs[minor] = md;
++ return minor;
++}
++
++/*
++ * Find the first free device. Again the write lock is held on
++ * success.
++ */
++static int any_old_dev(struct mapped_device *md)
++{
++ int i;
++
++ for (i = 0; i < MAX_DEVICES; i++)
++ if (specific_dev(i, md) != -1)
++ return i;
++
++ return -1;
++}
++
++/*
++ * Allocate and initialise a blank device. Device is returned
++ * with a write lock held.
++ */
++static struct mapped_device *alloc_dev(const char *name, const char *uuid,
++ int minor)
++{
++ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
++
++ if (!md) {
++ DMWARN("unable to allocate device, out of memory.");
++ return NULL;
++ }
++
++ memset(md, 0, sizeof(*md));
++
++ /*
++ * This grabs the write lock if it succeeds.
++ */
++ minor = (minor < 0) ? any_old_dev(md) : specific_dev(minor, md);
++ if (minor < 0) {
++ kfree(md);
++ return NULL;
++ }
++
++ _devs[minor] = md;
++ md->dev = MKDEV(_major, minor);
++ md->suspended = 0;
++
++ strncpy(md->name, name, sizeof(md->name) - 1);
++ md->name[sizeof(md->name) - 1] = '\0';
++
++ if (*uuid) {
++ if (!(md->uuid = kmalloc(strnlen(uuid, DM_UUID_LEN),
++ GFP_KERNEL))) {
++ DMWARN("unable to allocate uuid - out of memory.");
++ return NULL;
++ }
++
++ strncpy(md->uuid, uuid, DM_UUID_LEN - 1);
++ md->uuid[DM_UUID_LEN] = '\0';
++ }
++
++ init_waitqueue_head(&md->wait);
++
++ return md;
++}
++
++static int __register_device(struct mapped_device *md)
++{
++ md->devfs_entry =
++ devfs_register(_dev_dir, md->name, DEVFS_FL_CURRENT_OWNER,
++ MAJOR(md->dev), MINOR(md->dev),
++ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
++ &dm_blk_dops, NULL);
++
++ return 0;
++}
++
++static int __unregister_device(struct mapped_device *md)
++{
++ devfs_unregister(md->devfs_entry);
++ return 0;
++}
++
++/*
++ * The hardsect size for a mapped device is the smallest hardsect size
++ * from the devices it maps onto.
++ */
++static int __find_hardsect_size(struct list_head *devices)
++{
++ int result = INT_MAX, size;
++ struct list_head *tmp;
++
++ list_for_each(tmp, devices) {
++ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++ size = get_hardsect_size(dd->dev);
++ if (size < result)
++ result = size;
++ }
++ return result;
++}
++
++/*
++ * Bind a table to the device.
++ */
++static int __bind(struct mapped_device *md, struct dm_table *t)
++{
++ int minor = MINOR(md->dev);
++
++ md->map = t;
++
++ if (!t->num_targets) {
++ _block_size[minor] = 0;
++ _blksize_size[minor] = BLOCK_SIZE;
++ _hardsect_size[minor] = 0;
++ return 0;
++ }
++
++ /* in k */
++ _block_size[minor] = (t->highs[t->num_targets - 1] + 1) >> 1;
++
++ _blksize_size[minor] = BLOCK_SIZE;
++ _hardsect_size[minor] = __find_hardsect_size(&t->devices);
++ register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]);
++
++ return 0;
++}
++
++static void __unbind(struct mapped_device *md)
++{
++ int minor = MINOR(md->dev);
++
++ dm_table_destroy(md->map);
++ md->map = NULL;
++
++ _block_size[minor] = 0;
++ _blksize_size[minor] = 0;
++ _hardsect_size[minor] = 0;
++}
++
++static int check_name(const char *name)
++{
++ struct mapped_device *md;
++
++ if (strchr(name, '/') || strlen(name) > DM_NAME_LEN) {
++ DMWARN("invalid device name");
++ return -1;
++ }
++
++ md = dm_get_name_r(name);
++
++ if (md) {
++ dm_put_r(MINOR(md->dev));
++ DMWARN("device name already in use");
++ return -1;
++ }
++
++ return 0;
++}
++
++/*
++ * Constructor for a new device
++ */
++int dm_create(const char *name, const char *uuid,
++ int minor, struct dm_table *table)
++{
++ int r;
++ struct mapped_device *md;
++
++ spin_lock(&_create_lock);
++ if (check_name(name) < 0) {
++ spin_unlock(&_create_lock);
++ return -EINVAL;
++ }
++
++ md = alloc_dev(name, uuid, minor);
++ if (!md) {
++ spin_unlock(&_create_lock);
++ return -ENXIO;
++ }
++ minor = MINOR(md->dev);
++
++ r = __register_device(md);
++ if (r)
++ goto err;
++
++ r = __bind(md, table);
++ if (r)
++ goto err;
++
++ dm_put_w(minor);
++ spin_unlock(&_create_lock);
++ return 0;
++
++ err:
++ _devs[minor] = NULL;
++ if (md->uuid)
++ kfree(md->uuid);
++
++ kfree(md);
++ dm_put_w(minor);
++ spin_unlock(&_create_lock);
++ return r;
++}
++
++/*
++ * Renames the device. No lock held.
++ */
++int dm_set_name(const char *oldname, const char *newname)
++{
++ int r, minor;
++ struct mapped_device *md;
++
++ spin_lock(&_create_lock);
++ if (check_name(newname) < 0) {
++ spin_unlock(&_create_lock);
++ return -EINVAL;
++ }
++
++ md = dm_get_name_w(oldname);
++ if (!md) {
++ spin_unlock(&_create_lock);
++ return -ENXIO;
++ }
++ minor = MINOR(md->dev);
++
++ r = __unregister_device(md);
++ if (r)
++ goto out;
++
++ strcpy(md->name, newname);
++ r = __register_device(md);
++
++ out:
++ dm_put_w(minor);
++ spin_unlock(&_create_lock);
++ return r;
++}
++
++/*
++ * Destructor for the device. You cannot destroy an open
++ * device. Write lock must be held before calling.
++ */
++int dm_destroy(struct mapped_device *md)
++{
++ int minor, r;
++
++ if (md->use_count)
++ return -EPERM;
++
++ r = __unregister_device(md);
++ if (r)
++ return r;
++
++ minor = MINOR(md->dev);
++ _devs[minor] = NULL;
++ __unbind(md);
++
++ if (md->uuid)
++ kfree(md->uuid);
++
++ kfree(md);
++
++ return 0;
++}
++
++/*
++ * Destroy all devices - except open ones
++ */
++void dm_destroy_all(void)
++{
++ int i;
++ struct mapped_device *md;
++
++ for (i = 0; i < MAX_DEVICES; i++) {
++ md = dm_get_w(i);
++ if (!md)
++ continue;
++
++ dm_destroy(md);
++ dm_put_w(i);
++ }
++}
++
++/*
++ * Sets or clears the read-only flag for the device. Write lock
++ * must be held.
++ */
++void dm_set_ro(struct mapped_device *md, int ro)
++{
++ md->read_only = ro;
++ set_device_ro(md->dev, ro);
++}
++
++/*
++ * A target is notifying us of some event
++ */
++void dm_notify(void *target)
++{
++}
++
++/*
++ * Requeue the deferred buffer_heads by calling generic_make_request.
++ */
++static void flush_deferred_io(struct deferred_io *c)
++{
++ struct deferred_io *n;
++
++ while (c) {
++ n = c->next;
++ generic_make_request(c->rw, c->bh);
++ free_deferred(c);
++ c = n;
++ }
++}
++
++/*
++ * Swap in a new table (destroying old one). Write lock must be
++ * held.
++ */
++int dm_swap_table(struct mapped_device *md, struct dm_table *table)
++{
++ int r;
++
++ /* device must be suspended */
++ if (!md->suspended)
++ return -EPERM;
++
++ __unbind(md);
++
++ r = __bind(md, table);
++ if (r)
++ return r;
++
++ return 0;
++}
++
++/*
++ * We need to be able to change a mapping table under a mounted
++ * filesystem. for example we might want to move some data in
++ * the background. Before the table can be swapped with
++ * dm_bind_table, dm_suspend must be called to flush any in
++ * flight buffer_heads and ensure that any further io gets
++ * deferred. Write lock must be held.
++ */
++int dm_suspend(struct mapped_device *md)
++{
++ int minor = MINOR(md->dev);
++ DECLARE_WAITQUEUE(wait, current);
++
++ if (md->suspended)
++ return -EINVAL;
++
++ md->suspended = 1;
++ dm_put_w(minor);
++
++ /* wait for all the pending io to flush */
++ add_wait_queue(&md->wait, &wait);
++ current->state = TASK_UNINTERRUPTIBLE;
++ do {
++ md = dm_get_w(minor);
++ if (!md) {
++ /* Caller expects to free this lock. Yuck. */
++ down_write(_dev_locks + minor);
++ return -ENXIO;
++ }
++
++ if (!atomic_read(&md->pending))
++ break;
++
++ dm_put_w(minor);
++ schedule();
++
++ } while (1);
++
++ current->state = TASK_RUNNING;
++ remove_wait_queue(&md->wait, &wait);
++
++ return 0;
++}
++
++int dm_resume(struct mapped_device *md)
++{
++ int minor = MINOR(md->dev);
++ struct deferred_io *def;
++
++ if (!md->suspended || !md->map->num_targets)
++ return -EINVAL;
++
++ md->suspended = 0;
++ def = md->deferred;
++ md->deferred = NULL;
++
++ dm_put_w(minor);
++ flush_deferred_io(def);
++ fsync_dev(md->dev);
++ if (!dm_get_w(minor)) {
++ /* FIXME: yuck */
++ down_write(_dev_locks + minor);
++ return -ENXIO;
++ }
++
++ return 0;
++}
++
++struct block_device_operations dm_blk_dops = {
++ open: dm_blk_open,
++ release: dm_blk_close,
++ ioctl: dm_blk_ioctl,
++ owner: THIS_MODULE
++};
++
++/*
++ * module hooks
++ */
++module_init(dm_init);
++module_exit(dm_exit);
++
++MODULE_PARM(major, "i");
++MODULE_PARM_DESC(major, "The major number of the device mapper");
++MODULE_DESCRIPTION(DM_NAME " driver");
++MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
++MODULE_LICENSE("GPL");
+diff -ruN linux-2.4.18/drivers/md/dm.h linux/drivers/md/dm.h
+--- linux-2.4.18/drivers/md/dm.h Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/dm.h Thu Mar 7 20:04:19 2002
+@@ -0,0 +1,233 @@
++/*
++ * Internal header file for device mapper
++ *
++ * Copyright (C) 2001 Sistina Software
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef DM_INTERNAL_H
++#define DM_INTERNAL_H
++
++#include <linux/config.h>
++#include <linux/version.h>
++#include <linux/major.h>
++#include <linux/iobuf.h>
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/compatmac.h>
++#include <linux/cache.h>
++#include <linux/devfs_fs_kernel.h>
++#include <linux/ctype.h>
++#include <linux/device-mapper.h>
++#include <linux/list.h>
++#include <linux/init.h>
++
++#define DM_NAME "device-mapper" /* Name for messaging */
++#define MAX_DEPTH 16
++#define NODE_SIZE L1_CACHE_BYTES
++#define KEYS_PER_NODE (NODE_SIZE / sizeof(offset_t))
++#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
++#define MAX_ARGS 32
++#define MAX_DEVICES 256
++
++/*
++ * List of devices that a metadevice uses and should open/close.
++ */
++struct dm_dev {
++ atomic_t count;
++ struct list_head list;
++
++ kdev_t dev;
++ struct block_device *bd;
++};
++
++/*
++ * I/O that had to be deferred while we were suspended
++ */
++struct deferred_io {
++ int rw;
++ struct buffer_head *bh;
++ struct deferred_io *next;
++};
++
++/*
++ * Btree leaf - this does the actual mapping
++ */
++struct target {
++ struct target_type *type;
++ void *private;
++};
++
++/*
++ * The btree
++ */
++struct dm_table {
++ /* btree table */
++ int depth;
++ int counts[MAX_DEPTH]; /* in nodes */
++ offset_t *index[MAX_DEPTH];
++
++ int num_targets;
++ int num_allocated;
++ offset_t *highs;
++ struct target *targets;
++
++ /* a list of devices used by this table */
++ struct list_head devices;
++};
++
++/*
++ * The actual device struct
++ */
++struct mapped_device {
++ kdev_t dev;
++ char name[DM_NAME_LEN];
++ char *uuid;
++
++ int use_count;
++ int suspended;
++ int read_only;
++
++ /* a list of io's that arrived while we were suspended */
++ atomic_t pending;
++ wait_queue_head_t wait;
++ struct deferred_io *deferred;
++
++ struct dm_table *map;
++
++ /* used by dm-fs.c */
++ devfs_handle_t devfs_entry;
++};
++
++extern struct block_device_operations dm_blk_dops;
++
++/* dm-target.c */
++int dm_target_init(void);
++struct target_type *dm_get_target_type(const char *name);
++void dm_put_target_type(struct target_type *t);
++void dm_target_exit(void);
++
++/*
++ * Destructively splits argument list to pass to ctr.
++ */
++int split_args(int max, int *argc, char **argv, char *input);
++
++/* dm.c */
++struct mapped_device *dm_get_r(int minor);
++struct mapped_device *dm_get_w(int minor);
++struct mapped_device *dm_get_name_r(const char *name);
++struct mapped_device *dm_get_name_w(const char *name);
++
++void dm_put_r(int minor);
++void dm_put_w(int minor);
++
++/*
++ * Call with no lock.
++ */
++int dm_create(const char *name, const char *uuid,
++ int minor, struct dm_table *table);
++int dm_set_name(const char *oldname, const char *newname);
++void dm_destroy_all(void);
++
++/*
++ * You must have the write lock before calling the remaining md
++ * methods.
++ */
++int dm_destroy(struct mapped_device *md);
++void dm_set_ro(struct mapped_device *md, int ro);
++
++/*
++ * The device must be suspended before calling this method.
++ */
++int dm_swap_table(struct mapped_device *md, struct dm_table *t);
++
++/*
++ * A device can still be used while suspended, but I/O is deferred.
++ */
++int dm_suspend(struct mapped_device *md);
++int dm_resume(struct mapped_device *md);
++
++/*
++ * Event notification
++ */
++void dm_notify(void *target);
++
++/* dm-table.c */
++int dm_table_create(struct dm_table **result);
++void dm_table_destroy(struct dm_table *t);
++
++int dm_table_add_target(struct dm_table *t, offset_t highs,
++ struct target_type *type, void *private);
++int dm_table_complete(struct dm_table *t);
++
++/* kcopyd.c */
++typedef enum {
++ COPY_CB_COMPLETE,
++ COPY_CB_FAILED_READ,
++ COPY_CB_FAILED_WRITE,
++ COPY_CB_PROGRESS
++} copy_cb_reason_t;
++
++int dm_blockcopy(unsigned long fromsec, unsigned long tosec,
++ unsigned long nr_sectors, kdev_t fromdev, kdev_t todev,
++ int priority, int throttle,
++ void (*callback) (copy_cb_reason_t, void *, long),
++ void *context);
++int kcopyd_init(void);
++void kcopyd_exit(void);
++
++/* Snapshots */
++int dm_snapshot_init(void);
++void dm_snapshot_exit(void);
++int dm_origin_init(void);
++void dm_origin_exit(void);
++
++/* dm-mirror.c */
++int dm_mirror_init(void);
++void dm_mirror_exit(void);
++
++#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
++#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
++#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
++
++/*
++ * Calculate the index of the child node of the n'th node k'th key.
++ */
++static inline int get_child(int n, int k)
++{
++ return (n * CHILDREN_PER_NODE) + k;
++}
++
++/*
++ * Return the n'th node of level l from table t.
++ */
++static inline offset_t *get_node(struct dm_table *t, int l, int n)
++{
++ return t->index[l] + (n * KEYS_PER_NODE);
++}
++
++/*
++ * The device-mapper can be driven through one of two interfaces;
++ * ioctl or filesystem, depending which patch you have applied.
++ */
++
++int __init dm_interface_init(void);
++void __exit dm_interface_exit(void);
++
++/* Code in dm-snapshot called by dm-origin to do snapshot COW */
++int dm_do_snapshot(struct dm_dev *origin, struct buffer_head *bh);
++
++/*
++ * Targets for linear and striped mappings
++ */
++
++int dm_linear_init(void);
++void dm_linear_exit(void);
++
++int dm_stripe_init(void);
++void dm_stripe_exit(void);
++
++#endif
+diff -ruN linux-2.4.18/drivers/md/kcopyd.c linux/drivers/md/kcopyd.c
+--- linux-2.4.18/drivers/md/kcopyd.c Thu Jan 1 01:00:00 1970
++++ linux/drivers/md/kcopyd.c Thu Mar 14 13:45:09 2002
+@@ -0,0 +1,479 @@
++/*
++ * kcopyd.c
++ *
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/device-mapper.h>
++
++#include "dm.h"
++
++/* Hard sector size used all over the kernel */
++#define SECTOR_SIZE 512
++
++/* Number of entries in the free list to start with */
++#define FREE_LIST_SIZE 32
++
++/* Slab cache for work entries when the freelist runs out */
++static kmem_cache_t *entry_cachep;
++
++/* Structure of work to do in the list */
++struct copy_work
++{
++ unsigned long fromsec;
++ unsigned long tosec;
++ unsigned long nr_sectors;
++ unsigned long done_sectors;
++ kdev_t fromdev;
++ kdev_t todev;
++ int throttle;
++ int priority; /* 0=highest */
++ void (*callback)(copy_cb_reason_t, void *, long);
++ void *context;
++ int freelist; /* Whether we came from the free list */
++ struct list_head list;
++};
++
++static LIST_HEAD(work_list);
++static LIST_HEAD(free_list);
++static struct task_struct *copy_task = NULL;
++static struct rw_semaphore work_list_lock;
++static struct rw_semaphore free_list_lock;
++static DECLARE_MUTEX(start_lock);
++static DECLARE_MUTEX(run_lock);
++static DECLARE_WAIT_QUEUE_HEAD(start_waitq);
++static DECLARE_WAIT_QUEUE_HEAD(work_waitq);
++static DECLARE_WAIT_QUEUE_HEAD(freelist_waitq);
++static struct kiobuf *iobuf;
++static int thread_exit = 0;
++static long last_jiffies = 0;
++
++/* Find a free entry from the free-list or allocate a new one.
++ This routine always returns a valid pointer even if it has to wait
++ for it */
++static struct copy_work *get_work_struct(void)
++{
++ struct copy_work *entry = NULL;
++
++ while (!entry) {
++
++ down_write(&free_list_lock);
++ if (!list_empty(&free_list)) {
++ entry = list_entry(free_list.next, struct copy_work, list);
++ list_del(&entry->list);
++ }
++ up_write(&free_list_lock);
++
++ if (!entry) {
++ /* Nothing on the free-list - try to allocate one without doing IO */
++ entry = kmem_cache_alloc(entry_cachep, GFP_NOIO);
++
++ /* Make sure we know it didn't come from the free list */
++ if (entry) {
++ entry->freelist = 0;
++ }
++ }
++
++ /* Failed...wait for IO to finish */
++ if (!entry) {
++ DECLARE_WAITQUEUE(wq, current);
++
++ set_task_state(current, TASK_INTERRUPTIBLE);
++ add_wait_queue(&freelist_waitq, &wq);
++
++ if (list_empty(&free_list))
++ schedule();
++
++ set_task_state(current, TASK_RUNNING);
++ remove_wait_queue(&freelist_waitq, &wq);
++ }
++ }
++
++ return entry;
++}
++
++/* Allocate pages for a kiobuf. */
++static int alloc_iobuf_pages(struct kiobuf *iobuf, int nr_sectors)
++{
++ int nr_pages, err, i;
++
++ if (nr_sectors > KIO_MAX_SECTORS)
++ return -1;
++
++ nr_pages = nr_sectors / (PAGE_SIZE/SECTOR_SIZE);
++ err = expand_kiobuf(iobuf, nr_pages);
++ if (err) goto out;
++
++ err = -ENOMEM;
++ iobuf->locked = 1;
++ iobuf->nr_pages = 0;
++ for (i = 0; i < nr_pages; i++) {
++ struct page * page;
++
++ page = alloc_page(GFP_KERNEL);
++ if (!page) goto out;
++
++ iobuf->maplist[i] = page;
++ LockPage(page);
++ iobuf->nr_pages++;
++ }
++ iobuf->offset = 0;
++
++ err = 0;
++
++out:
++ return err;
++}
++
++
++/* Add a new entry to the work list - in priority+FIFO order.
++ The work_list_lock semaphore must be held */
++static void add_to_work_list(struct copy_work *item)
++{
++ struct list_head *entry;
++
++ list_for_each(entry, &work_list) {
++ struct copy_work *cw;
++
++ cw = list_entry(entry, struct copy_work, list);
++ if (cw->priority > item->priority) {
++ __list_add(&item->list, cw->list.prev, &cw->list);
++ return;
++ }
++ }
++ list_add_tail(&item->list, &work_list);
++}
++
++/* Read in a chunk from the source device */
++static int read_blocks(struct kiobuf *iobuf, kdev_t dev, unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(READ, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/* Write out blocks */
++static int write_blocks(struct kiobuf *iobuf, kdev_t dev, unsigned long start, int nr_sectors)
++{
++ int i, sectors_per_block, nr_blocks;
++ int blocksize = get_hardsect_size(dev);
++ int status;
++
++ sectors_per_block = blocksize / SECTOR_SIZE;
++
++ nr_blocks = nr_sectors / sectors_per_block;
++ start /= sectors_per_block;
++
++ for (i = 0; i < nr_blocks; i++)
++ iobuf->blocks[i] = start++;
++
++ iobuf->length = nr_sectors << 9;
++
++ status = brw_kiovec(WRITE, 1, &iobuf, dev, iobuf->blocks, blocksize);
++ return (status != (nr_sectors << 9));
++}
++
++/* This is where all the real work happens */
++static int copy_kthread(void *unused)
++{
++ daemonize();
++ down(&run_lock);
++
++ strcpy(current->comm, "kcopyd");
++ copy_task = current;
++ wake_up_interruptible(&start_waitq);
++
++ do {
++ DECLARE_WAITQUEUE(wq, current);
++ struct task_struct *tsk = current;
++
++ down_write(&work_list_lock);
++
++ while (!list_empty(&work_list)) {
++
++ struct copy_work *work_item = list_entry(work_list.next, struct copy_work, list);
++ int done_sps;
++ copy_cb_reason_t callback_reason = COPY_CB_COMPLETE;
++ int preempted = 0;
++
++ list_del(&work_item->list);
++ up_write(&work_list_lock);
++
++ while (!preempted && work_item->done_sectors < work_item->nr_sectors) {
++ long nr_sectors = min((unsigned long)KIO_MAX_SECTORS,
++ work_item->nr_sectors - work_item->done_sectors);
++
++ /* Read original blocks */
++ if (read_blocks(iobuf, work_item->fromdev, work_item->fromsec + work_item->done_sectors,
++ nr_sectors)) {
++ DMERR("Read blocks from device %s failed", kdevname(work_item->fromdev));
++
++ /* Callback error */
++ callback_reason = COPY_CB_FAILED_READ;
++ goto done_copy;
++ }
++
++ /* Write them out again */
++ if (write_blocks(iobuf, work_item->todev, work_item->tosec + work_item->done_sectors,
++ nr_sectors)) {
++ DMERR("Write blocks to %s failed", kdevname(work_item->todev));
++
++ /* Callback error */
++ callback_reason = COPY_CB_FAILED_WRITE;
++ goto done_copy;
++ }
++ work_item->done_sectors += nr_sectors;
++
++ /* If we have exceeded the throttle value (in sectors/second) then
++ sleep for a while */
++ done_sps = nr_sectors*HZ/(jiffies-last_jiffies);
++ if (work_item->throttle && done_sps > work_item->throttle && done_sps) {
++ long start_jiffies = jiffies;
++ do {
++ schedule_timeout(done_sps - work_item->throttle * HZ);
++ } while (jiffies <= start_jiffies+(done_sps - work_item->throttle * HZ));
++ }
++
++ /* Do a progress callback */
++ if (work_item->callback && work_item->done_sectors < work_item->nr_sectors)
++ work_item->callback(COPY_CB_PROGRESS, work_item->context, work_item->done_sectors);
++
++ /* Look for higher priority work */
++ down_write(&work_list_lock);
++ if (!list_empty(&work_list)) {
++ struct copy_work *peek_item = list_entry(work_list.next, struct copy_work, list);
++
++ if (peek_item->priority < work_item->priority) {
++
++ /* Put this back on the list and restart to get the new one */
++ add_to_work_list(work_item);
++ preempted = 1;
++ goto restart;
++ }
++ }
++ up_write(&work_list_lock);
++ }
++
++ done_copy:
++ /* Call the callback */
++ if (work_item->callback)
++ work_item->callback(callback_reason, work_item->context, work_item->done_sectors);
++
++ /* Add it back to the free list (if it came from there)
++ and notify anybody waiting for an entry */
++ if (work_item->freelist) {
++ down_write(&free_list_lock);
++ list_add(&work_item->list, &free_list);
++ up_write(&free_list_lock);
++ }
++ else {
++ kmem_cache_free(entry_cachep, work_item);
++ }
++ wake_up_interruptible(&freelist_waitq);
++
++ /* Get the work lock again for the top of the while loop */
++ down_write(&work_list_lock);
++ restart:
++ }
++ up_write(&work_list_lock);
++
++ /* Wait for more work */
++ set_task_state(tsk, TASK_INTERRUPTIBLE);
++ add_wait_queue(&work_waitq, &wq);
++
++ if (list_empty(&work_list))
++ schedule();
++
++ set_task_state(tsk, TASK_RUNNING);
++ remove_wait_queue(&work_waitq, &wq);
++
++ } while (thread_exit == 0);
++
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++
++ up(&run_lock);
++ return 0;
++}
++
++/* API entry point */
++int dm_blockcopy(unsigned long fromsec, unsigned long tosec, unsigned long nr_sectors,
++ kdev_t fromdev, kdev_t todev,
++ int priority, int throttle, void (*callback)(copy_cb_reason_t, void *, long), void *context)
++{
++ struct copy_work *newwork;
++ static pid_t thread_pid = 0;
++ long from_blocksize = get_hardsect_size(fromdev);
++ long to_blocksize = get_hardsect_size(todev);
++
++ /* Make sure the start sectors are on physical block boundaries */
++ if (fromsec % (from_blocksize/SECTOR_SIZE))
++ return -EINVAL;
++ if (tosec % (to_blocksize/SECTOR_SIZE))
++ return -EINVAL;
++
++ /* Start the thread if we don't have one already */
++ down(&start_lock);
++ if (copy_task == NULL) {
++ thread_pid = kernel_thread(copy_kthread, NULL, 0);
++ if (thread_pid > 0) {
++
++ DECLARE_WAITQUEUE(wq, current);
++ struct task_struct *tsk = current;
++
++ DMINFO("Started kcopyd thread");
++
++ /* Wait for it to complete it's startup initialisation */
++ set_task_state(tsk, TASK_INTERRUPTIBLE);
++ add_wait_queue(&start_waitq, &wq);
++
++ if (copy_task == NULL)
++ schedule();
++
++ set_task_state(tsk, TASK_RUNNING);
++ remove_wait_queue(&start_waitq, &wq);
++ }
++ else {
++ DMERR("Failed to start kcopyd thread");
++ up(&start_lock);
++ return -EAGAIN;
++ }
++ }
++ up(&start_lock);
++
++ /* This will wait until one is available */
++ newwork = get_work_struct();
++
++ newwork->fromsec = fromsec;
++ newwork->tosec = tosec;
++ newwork->fromdev = fromdev;
++ newwork->todev = todev;
++ newwork->nr_sectors = nr_sectors;
++ newwork->done_sectors = 0;
++ newwork->throttle = throttle;
++ newwork->priority = priority;
++ newwork->callback = callback;
++ newwork->context = context;
++
++ down_write(&work_list_lock);
++ add_to_work_list(newwork);
++ up_write(&work_list_lock);
++
++ wake_up_interruptible(&work_waitq);
++ return 0;
++}
++
++
++/* Pre-allocate some structures for the free list */
++static int allocate_free_list(void)
++{
++ int i;
++ struct copy_work *newwork;
++
++ for (i=0; i<FREE_LIST_SIZE; i++) {
++ newwork = kmalloc(sizeof(struct copy_work), GFP_KERNEL);
++ if (!newwork)
++ return i;
++ newwork->freelist = 1;
++ list_add(&newwork->list, &free_list);
++ }
++ return i;
++}
++
++int __init kcopyd_init(void)
++{
++ init_rwsem(&work_list_lock);
++ init_rwsem(&free_list_lock);
++ init_MUTEX(&start_lock);
++ init_MUTEX(&run_lock);
++
++ if (alloc_kiovec(1, &iobuf)) {
++ DMERR("Unable to allocate kiobuf for kcopyd");
++ return -1;
++ }
++
++ if (alloc_iobuf_pages(iobuf, KIO_MAX_SECTORS)) {
++ DMERR("Unable to allocate pages for kcopyd");
++ free_kiovec(1, &iobuf);
++ return -1;
++ }
++
++ entry_cachep = kmem_cache_create("kcopyd",
++ sizeof(struct copy_work),
++ __alignof__(struct copy_work),
++ 0, NULL, NULL);
++ if (!entry_cachep) {
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++ DMERR("Unable to allocate slab cache for kcopyd");
++ return -1;
++ }
++
++ if (allocate_free_list() == 0) {
++ unmap_kiobuf(iobuf);
++ free_kiovec(1, &iobuf);
++ kmem_cache_destroy(entry_cachep);
++ DMERR("Unable to allocate any work structures for the free list");
++ return -1;
++ }
++
++ return 0;
++}
++
++void kcopyd_exit(void)
++{
++ struct list_head *entry, *temp;
++
++ thread_exit = 1;
++ wake_up_interruptible(&work_waitq);
++
++ /* Wait for the thread to finish */
++ down(&run_lock);
++ up(&run_lock);
++
++ /* Free the free list */
++ list_for_each_safe(entry, temp, &free_list) {
++ struct copy_work *cw;
++ cw = list_entry(entry, struct copy_work, list);
++ list_del(&cw->list);
++ kfree(cw);
++ }
++
++ if (entry_cachep)
++ kmem_cache_destroy(entry_cachep);
++}
++
++EXPORT_SYMBOL(dm_blockcopy);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -ruN linux-2.4.18/include/linux/device-mapper.h linux/include/linux/device-mapper.h
+--- linux-2.4.18/include/linux/device-mapper.h Thu Jan 1 01:00:00 1970
++++ linux/include/linux/device-mapper.h Thu Mar 7 16:56:22 2002
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DEVICE_MAPPER_H
++#define _LINUX_DEVICE_MAPPER_H
++
++#define DM_DIR "device-mapper" /* Slashes not supported */
++#define DM_MAX_TYPE_NAME 16
++#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
++
++#ifdef __KERNEL__
++
++struct dm_table;
++struct dm_dev;
++typedef unsigned long offset_t;
++
++
++/*
++ * Prototypes for functions for a target
++ */
++typedef int (*dm_ctr_fn)(struct dm_table *t, offset_t b, offset_t l,
++ int argc, char **argv, void **context);
++typedef void (*dm_dtr_fn)(struct dm_table *t, void *c);
++typedef int (*dm_map_fn)(struct buffer_head *bh, int rw, void *context);
++typedef int (*dm_err_fn)(struct buffer_head *bh, int rw, void *context);
++
++
++void dm_error(const char *message);
++
++/*
++ * Constructors should call these functions to ensure destination devices
++ * are opened/closed correctly
++ */
++int dm_table_get_device(struct dm_table *t, const char *path,
++ offset_t start, offset_t len, struct dm_dev **result);
++void dm_table_put_device(struct dm_table *table, struct dm_dev *d);
++
++/*
++ * Information about a target type
++ */
++struct target_type {
++ const char *name;
++ struct module *module;
++ dm_ctr_fn ctr;
++ dm_dtr_fn dtr;
++ dm_map_fn map;
++ dm_err_fn err;
++};
++
++int dm_register_target(struct target_type *t);
++int dm_unregister_target(struct target_type *t);
++
++#endif /* __KERNEL__ */
++
++#endif /* _LINUX_DEVICE_MAPPER_H */
+diff -ruN linux-2.4.18/include/linux/dm-ioctl.h linux/include/linux/dm-ioctl.h
+--- linux-2.4.18/include/linux/dm-ioctl.h Thu Jan 1 01:00:00 1970
++++ linux/include/linux/dm-ioctl.h Thu Mar 14 16:32:58 2002
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DM_IOCTL_H
++#define _LINUX_DM_IOCTL_H
++
++#include "device-mapper.h"
++
++/*
++ * Implements a traditional ioctl interface to the device mapper.
++ */
++
++/*
++ * All ioctl arguments consist of a single chunk of memory, with
++ * this structure at the start.
++ */
++struct dm_ioctl {
++ char version[16];
++
++ unsigned long data_size; /* total size of data passed in
++ * including this struct */
++
++ unsigned long data_start; /* offset to start of data
++ * relative to start of this struct */
++
++ char name[DM_NAME_LEN]; /* device name */
++
++ unsigned int target_count; /* in/out */
++ unsigned int open_count; /* out */
++ unsigned int flags; /* in/out */
++
++ __kernel_dev_t dev; /* in/out */
++
++ char uuid[DM_UUID_LEN]; /* unique identifier for
++ * the block device */
++};
++
++/*
++ * Used to specify tables. These structures appear after the
++ * dm_ioctl.
++ */
++struct dm_target_spec {
++ int32_t status; /* used when reading from kernel only */
++ unsigned long long sector_start;
++ unsigned long long length;
++
++ char target_type[DM_MAX_TYPE_NAME];
++
++ unsigned long next; /* offset in bytes to next target_spec */
++
++ /*
++ * Parameter string starts immediately after this object.
++ * Be careful to add padding after string to ensure correct
++ * alignment of subsequent dm_target_spec.
++ */
++};
++
++/*
++ * Used to retrieve the target dependencies.
++ */
++struct dm_target_deps {
++ unsigned int count;
++
++ __kernel_dev_t dev[0]; /* out */
++};
++
++#define DM_IOCTL 0xfd
++
++enum {
++ DM_CREATE_CMD = 0,
++ DM_REMOVE_CMD,
++ DM_SUSPEND_CMD,
++ DM_RELOAD_CMD,
++ DM_INFO_CMD,
++ DM_RENAME_CMD,
++ DM_VERSION_CMD,
++ DM_DEPS_CMD,
++ DM_REMOVE_ALL_CMD
++};
++
++#define DM_CREATE _IOWR(DM_IOCTL, DM_CREATE_CMD, struct dm_ioctl)
++#define DM_REMOVE _IOW(DM_IOCTL, DM_REMOVE_CMD, struct dm_ioctl)
++#define DM_SUSPEND _IOW(DM_IOCTL, DM_SUSPEND_CMD, struct dm_ioctl)
++#define DM_RELOAD _IOW(DM_IOCTL, DM_RELOAD_CMD, struct dm_ioctl)
++#define DM_INFO _IOWR(DM_IOCTL, DM_INFO_CMD, struct dm_ioctl)
++#define DM_RENAME _IOW(DM_IOCTL, DM_RENAME_CMD, struct dm_ioctl)
++#define DM_VERSION _IOR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
++#define DM_DEPS _IOR(DM_IOCTL, DM_DEPS_CMD, struct dm_ioctl)
++#define DM_REMOVE_ALL _IOR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
++
++#define DM_IOCTL_VERSION "0.94"
++#define DM_DRIVER_VERSION "0.94.08-ioctl-cvs (2002-03-14)"
++
++/* Status bits */
++#define DM_READONLY_FLAG 0x00000001
++#define DM_SUSPEND_FLAG 0x00000002
++#define DM_EXISTS_FLAG 0x00000004
++#define DM_PERSISTENT_DEV_FLAG 0x00000008
++
++#endif /* _LINUX_DM_IOCTL_H */
+diff -ruN linux-2.4.18/include/linux/fs.h linux/include/linux/fs.h
+--- linux-2.4.18/include/linux/fs.h Mon Mar 4 17:42:19 2002
++++ linux/include/linux/fs.h Thu Mar 14 16:33:11 2002
+@@ -258,7 +258,10 @@
+ char * b_data; /* pointer to data block */
+ struct page *b_page; /* the page this bh is mapped to */
+ void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+- void *b_private; /* reserved for b_end_io */
++ void *b_private; /* reserved for b_end_io, also used by ext3 */
++ void *b_bdev_private; /* a hack to get around ext3 using b_private
++ * after handing the buffer_head to the
++ * block layer */
+
+ unsigned long b_rsector; /* Real buffer location on disk */
+ wait_queue_head_t b_wait;