appropriate parameters in step 1, you can just run 'make apply-patches'
from the top directory.
- If you are using User Mode Linux, you may also need to apply the patch
- patches/misc/uml_config.patch
+ If you are using an old version of User Mode Linux, you may also
+ need to apply the patch patches/misc/uml_config.patch.
Configure, build and install your kernel in the normal way, selecting
'Device mapper support' from the 'Multiple devices driver support' menu.
static int _wait(int argc, char **argv)
{
- struct dm_task *dmt;
-
- if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
- return 0;
-
- if (!dm_task_set_name(dmt, argv[1]))
- goto out;
-
- if (!dm_task_run(dmt))
- goto out;
-
- dm_task_destroy(dmt);
-
- out:
- return 1;
+ return _simple(DM_DEVICE_WAITEVENT, argv[1]);
}
static int _status(int argc, char **argv)
int r = 0;
struct dm_task *dmt;
void *next = NULL;
- unsigned long long start, length;
+ uint64_t start, length;
char *target_type = NULL;
char *params;
int cmd;
- if (strcmp(argv[0], "status") == 0)
- cmd = DM_DEVICE_STATUS;
+ if (!strcmp(argv[0], "status"))
+ cmd = DM_DEVICE_STATUS;
else
- cmd = DM_DEVICE_TABLE;
+ cmd = DM_DEVICE_TABLE;
if (!(dmt = dm_task_create(cmd)))
return 0;
/* Fetch targets and print 'em */
do {
- next = dm_get_next_target(dmt, next, &start, &length,
- &target_type, ¶ms);
- if (target_type) {
- printf("%lld %lld %s %s\n",
- start, length, target_type, params);
- }
+ next = dm_get_next_target(dmt, next, &start, &length,
+ &target_type, ¶ms);
+ if (target_type) {
+ printf("%"PRIu64" %"PRIu64" %s %s\n",
+ start, length, target_type, params);
+ }
} while (next);
r = 1;
struct dm_dev;
typedef unsigned long offset_t;
+typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
/*
* Prototypes for functions for a target
*/
-typedef int (*dm_ctr_fn)(struct dm_table *t, offset_t b, offset_t l,
- int argc, char **argv, void **context);
-typedef void (*dm_dtr_fn)(struct dm_table *t, void *c);
-typedef int (*dm_map_fn)(struct buffer_head *bh, int rw, void *context);
-typedef int (*dm_err_fn)(struct buffer_head *bh, int rw, void *context);
-
+typedef int (*dm_ctr_fn) (struct dm_table *t, offset_t b, offset_t l,
+ int argc, char **argv, void **context);
+typedef void (*dm_dtr_fn) (struct dm_table *t, void *c);
+typedef int (*dm_map_fn) (struct buffer_head *bh, int rw, void *context);
+typedef int (*dm_err_fn) (struct buffer_head *bh, int rw, void *context);
+typedef int (*dm_sts_fn) (status_type_t sts_type, char *, int maxlen,
+ void *context);
+typedef int (*dm_wait_fn) (void *context, wait_queue_t *wq, int add);
void dm_error(const char *message);
dm_dtr_fn dtr;
dm_map_fn map;
dm_err_fn err;
+ dm_sts_fn sts;
+ dm_wait_fn wait;
};
int dm_register_target(struct target_type *t);
*/
#include "dm-snapshot.h"
+#include "kcopyd.h"
+#include <linux/mm.h>
#define SECTOR_SIZE 512
+#define SECTOR_SHIFT 9
-/*
- * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
- */
-#define SNAP_MAGIC 0x70416e53
-
-/*
- * The on-disk version of the metadata. Only applicable to
- * persistent snapshots.
- * There is no backward or forward compatibility implemented, snapshots
- * with different disk versions than the kernel will not be usable. It is
- * expected that "lvcreate" will blank out the start of the COW device
- * before calling the snapshot constructor.
- */
-#define SNAPSHOT_DISK_VERSION 1
+/*-----------------------------------------------------------------
+ * Persistent snapshots, by persistent we mean that the snapshot
+ * will survive a reboot.
+ *---------------------------------------------------------------*/
/*
- * Metadata format: (please keep this up-to-date!)
- * Persistent snapshots have a 1 block header (see below for structure) at
- * the very start of the device. The COW metadata starts at
- * .start_of_exceptions.
- *
- * COW metadata is stored in blocks that are "extent-size" sectors long as
- * an array of disk_exception structures in Little-Endian format.
- * The last entry in this array has rsector_new set to 0 (this cannot be a
- * legal redirection as the header is here) and if rsector_org has a value
- * it is the sector number of the next COW metadata sector on the disk. if
- * rsector_org is also zero then this is the end of the COW metadata.
+ * We need to store a record of which parts of the origin have
+ * been copied to the snapshot device. The snapshot code
+ * requires that we copy exception chunks to chunk aligned areas
+ * of the COW store. It makes sense therefore, to store the
+ * metadata in chunk size blocks.
*
- * The metadata is written in hardblocksize lumps rather than in units of
- * extents for efficiency so don't expect a whole extent to be zeroed out
- * at any time.
+ * There is no backward or forward compatibility implemented,
+ * snapshots with different disk versions than the kernel will
+ * not be usable. It is expected that "lvcreate" will blank out
+ * the start of a fresh COW device before calling the snapshot
+ * constructor.
*
- * Non-persistent snapshots simple have redirected blocks stored
- * (in chunk_size sectors) from hard block 1 to avoid inadvertantly
- * creating a bad header.
+ * The very first metadata block, which is also the first chunk
+ * on the COW device, will include a header struct followed by
+ * exception info. All other metadata chunks will solely consist
+ * of exception info. All on disk structures are in
+ * little-endian format. The end of the exceptions info is
+ * indicated by an exception with a new_chunk of 0.
*/
/*
- * Internal snapshot structure
+ * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
*/
-struct persistent_info {
- /* Size of extents used for COW blocks */
- long extent_size;
-
- /* Number of the next free sector for COW/data */
- unsigned long next_free_sector;
-
- /* Where the metadata starts */
- unsigned long start_of_exceptions;
-
- /* Where we are currently writing the metadata */
- unsigned long current_metadata_sector;
-
- /* Index into disk_cow array */
- int current_metadata_entry;
-
- /* Index into mythical extent array */
- int current_metadata_number;
-
- /* Number of metadata entries in the disk_cow array */
- int highest_metadata_entry;
+#define SNAP_MAGIC 0x70416e53
- /* Number of metadata entries per hard disk block */
- int md_entries_per_block;
+/*
+ * The on-disk version of the metadata.
+ */
+#define SNAPSHOT_DISK_VERSION 1
- int full;
+#if 0
+struct disk_header {
+ uint32_t magic;
- /* kiobuf for doing I/O to header & metadata */
- struct kiobuf *cow_iobuf;
+ /*
+ * Is this snapshot valid. There is no way of recovering
+ * an invalid snapshot.
+ */
+ int valid;
/*
- * Disk extent with COW data in it. as an array of
- * exception tables. The first one points to the next
- * block of metadata or 0 if this is the last
+ * Simple, incrementing version. no backward
+ * compatibility.
*/
- struct disk_exception *disk_cow;
+ uint32_t version;
+
+ /* In sectors */
+ uint32_t chunk_size;
};
+#endif
-/*
- * An array of these is held in each disk block. LE format
- */
struct disk_exception {
- uint64_t rsector_org;
- uint64_t rsector_new;
+ uint64_t old_chunk;
+ uint64_t new_chunk;
};
/*
- * Structure of a (persistent) snapshot header on disk. in LE format
+ * The top level structure for a persistent exception store.
*/
-struct snap_disk_header {
- uint32_t magic;
+struct pstore {
+ struct dm_snapshot *snap; /* up pointer to my snapshot */
+ int valid;
+ uint32_t chunk_size;
+ uint32_t exceptions_per_area;
- /* Simple, incrementing version. no backward compatibility */
- uint32_t version;
+ /*
+ * Now that we have an asynchronous kcopyd there is no
+ * need for large chunk sizes, so it wont hurt to have a
+ * whole chunks worth of metadata in memory at once.
+ */
+ void *area;
+ struct kiobuf *iobuf;
- /* In 512 byte sectors */
- uint32_t chunk_size;
+ /*
+ * Used to keep track of which metadata area the data in
+ * 'chunk' refers to.
+ */
+ uint32_t current_area;
- /* In 512 byte sectors */
- uint32_t extent_size;
- uint64_t start_of_exceptions;
- uint32_t full;
-};
+ /*
+ * The next free chunk for an exception.
+ */
+ uint32_t next_free;
-static inline struct persistent_info *get_info(struct exception_store *store)
-{
- return (struct persistent_info *) store->context;
-}
+ /*
+ * The index of next free exception in the current
+ * metadata area.
+ */
+ uint32_t current_committed;
+};
/*
- * READ or WRITE some blocks to/from a device
+ * For performance reasons we want to defer writing a committed
+ * exceptions metadata to disk so that we can amortise away this
+ * exensive operation.
+ *
+ * For the initial version of this code we will remain with
+ * synchronous io. There are some deadlock issues with async
+ * that I haven't yet worked out.
*/
-static int do_io(int rw, struct kiobuf *iobuf, kdev_t dev,
- unsigned long start, int nr_sectors)
+static int do_io(int rw, struct kcopyd_region *where, struct kiobuf *iobuf)
{
- int i, sectors_per_block, nr_blocks;
- int blocksize = get_hardsect_size(dev);
+ int i, sectors_per_block, nr_blocks, start;
+ int blocksize = get_hardsect_size(where->dev);
int status;
sectors_per_block = blocksize / SECTOR_SIZE;
- nr_blocks = nr_sectors / sectors_per_block;
- start /= sectors_per_block;
+ nr_blocks = where->count / sectors_per_block;
+ start = where->sector / sectors_per_block;
for (i = 0; i < nr_blocks; i++)
iobuf->blocks[i] = start++;
- iobuf->length = nr_sectors << 9;
+ iobuf->length = where->count << 9;
- status = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
- return (status != (nr_sectors << 9));
+ status = brw_kiovec(rw, 1, &iobuf, where->dev, iobuf->blocks,
+ blocksize);
+ if (status != (where->count << 9))
+ return -EIO;
+
+ return 0;
}
/*
- * Write the latest COW metadata block.
+ * FIXME: Remove once 2.4.19 has been released.
*/
-static int write_metadata(struct dm_snapshot *s, struct persistent_info *pi)
+struct page *vmalloc_to_page(void *vmalloc_addr)
{
- kdev_t dev = s->cow->dev;
- int blocksize = get_hardsect_size(dev);
- int writesize = blocksize / SECTOR_SIZE;
-
- if (do_io(WRITE, pi->cow_iobuf, dev,
- pi->current_metadata_sector, writesize) != 0) {
- DMERR("Error writing COW block");
- return -1;
+ unsigned long addr = (unsigned long) vmalloc_addr;
+ struct page *page = NULL;
+ pmd_t *pmd;
+ pte_t *pte;
+ pgd_t *pgd;
+
+ pgd = pgd_offset_k(addr);
+ if (!pgd_none(*pgd)) {
+ pmd = pmd_offset(pgd, addr);
+ if (!pmd_none(*pmd)) {
+ pte = pte_offset(pmd, addr);
+ if (pte_present(*pte)) {
+ page = pte_page(*pte);
+ }
+ }
}
-
- return 0;
+ return page;
}
-/*
- * Allocate a kiobuf. This is the only code nicked from the old
- * snapshot driver and I've changed it anyway.
- */
-static int alloc_iobuf_pages(struct kiobuf *iobuf, int nr_sectors)
+static int allocate_iobuf(struct pstore *ps)
{
- int nr_pages, r, i;
+ size_t i, r, len, nr_pages;
+ struct page *page;
- if (nr_sectors > KIO_MAX_SECTORS)
- return -1;
+ len = ps->chunk_size * SECTOR_SIZE;
- nr_pages = nr_sectors / (PAGE_SIZE / SECTOR_SIZE);
- r = expand_kiobuf(iobuf, nr_pages);
- if (r)
- goto out;
+ /*
+ * Allocate the chunk_size block of memory that will hold
+ * a single metadata area.
+ */
+ ps->area = vmalloc(len);
+ if (!ps->area)
+ return -ENOMEM;
- r = -ENOMEM;
- iobuf->locked = 1;
- iobuf->nr_pages = 0;
- for (i = 0; i < nr_pages; i++) {
- struct page *page;
+ if (alloc_kiovec(1, &ps->iobuf)) {
+ vfree(ps->area);
+ return -ENOMEM;
+ }
- page = alloc_page(GFP_KERNEL);
- if (!page)
- goto out;
+ nr_pages = ps->chunk_size / (PAGE_SIZE / SECTOR_SIZE);
+ r = expand_kiobuf(ps->iobuf, nr_pages);
+ if (r) {
+ vfree(ps->area);
+ return -ENOMEM;
+ }
- iobuf->maplist[i] = page;
+ /*
+ * We lock the pages for ps->area into memory since they'll be
+ * doing a lot of io.
+ *
+ * FIXME: Check that there's no race, ie. the pages can't
+ * be swapped out before we lock them, we may have to
+ * allocate them as seperate pages after all :(
+ */
+ for (i = 0; i < len; i += PAGE_SIZE) {
+ page = vmalloc_to_page(ps->area + i);
LockPage(page);
- iobuf->nr_pages++;
+ ps->iobuf->maplist[i] = page;
+ ps->iobuf->nr_pages++;
}
- iobuf->offset = 0;
- r = 0;
-
- out:
- return r;
+ ps->iobuf->offset = 0;
+ return 0;
}
-/*
- * Read on-disk COW metadata and populate the hash table.
- */
-static int read_metadata(struct dm_snapshot *s, struct persistent_info *pi)
+static void free_iobuf(struct pstore *ps)
{
- int status;
int i;
- int entry = 0;
- int map_page = 0;
- int nr_sectors = pi->extent_size;
- kdev_t dev = s->cow->dev;
- int blocksize = get_hardsect_size(dev);
- unsigned long cur_sector = pi->start_of_exceptions;
- unsigned long last_sector;
- unsigned long first_free_sector = 0;
- int entries_per_page = PAGE_SIZE / sizeof(struct disk_exception);
- struct disk_exception *cow_block;
- struct kiobuf *read_iobuf;
- int r = 0;
- int devsize = get_dev_size(dev);
- /*
- * Allocate our own iovec for this operation 'cos the
- * others are way too small.
- */
- if (alloc_kiovec(1, &read_iobuf)) {
- DMERR("Error allocating iobuf for %s", kdevname(dev));
- return -1;
- }
+ for (i = 0; i < ps->iobuf->nr_pages; i++)
+ UnlockPage(ps->iobuf->maplist[i]);
- if (alloc_iobuf_pages(read_iobuf, pi->extent_size)) {
- DMERR("Error allocating iobuf space for %s", kdevname(dev));
- free_kiovec(1, &read_iobuf);
- return -1;
- }
- cow_block = page_address(read_iobuf->maplist[0]);
-
- do {
- /* Make sure the chain does not go off the end of
- * the device, or backwards */
- if (cur_sector > devsize || cur_sector < first_free_sector) {
- DMERR("COW table chain pointers are inconsistent, "
- "can't activate snapshot");
- r = -1;
- goto out;
- }
-
- first_free_sector = max(first_free_sector,
- cur_sector + pi->extent_size);
- status = do_io(READ, read_iobuf, dev, cur_sector, nr_sectors);
- if (status == 0) {
- map_page = 0;
- entry = 0;
-
- cow_block = page_address(read_iobuf->maplist[0]);
-
- /* Now populate the hash table from this data */
- for (i = 0; i <= pi->highest_metadata_entry &&
- cow_block[entry].rsector_new != 0; i++) {
-
- chunk_t old =
- sector_to_chunk(s,
- le64_to_cpu(cow_block
- [entry].
- rsector_org));
- chunk_t new =
- sector_to_chunk(s,
- le64_to_cpu(cow_block
- [entry].
- rsector_new));
-
- if ((r = dm_add_exception(s, old, new)))
- goto out;
-
- first_free_sector = max(first_free_sector,
- (unsigned
- long) (le64_to_cpu
- (cow_block
- [entry].
- rsector_new) +
- s->chunk_size));
-
- /* Do we need to move onto the next page? */
- if (++entry >= entries_per_page) {
- entry = 0;
- cow_block =
- page_address(read_iobuf->
- maplist[++map_page]);
- }
- }
- } else {
- DMERR("Error reading COW metadata for %s",
- kdevname(dev));
- r = -1;
- goto out;
- }
- last_sector = cur_sector;
- cur_sector = le64_to_cpu(cow_block[entry].rsector_org);
-
- } while (cur_sector != 0);
-
- pi->current_metadata_sector = last_sector +
- map_page * PAGE_SIZE / SECTOR_SIZE +
- entry / (SECTOR_SIZE / sizeof(struct disk_exception));
- pi->current_metadata_entry = entry;
- pi->current_metadata_number = i;
- pi->next_free_sector = first_free_sector;
-
- /* Copy last block into cow_iobuf */
- memcpy(pi->disk_cow, (char *) ((long) &cow_block[entry] -
- ((long) &cow_block[entry] &
- (blocksize - 1))), blocksize);
-
- out:
- unmap_kiobuf(read_iobuf);
- free_kiovec(1, &read_iobuf);
-
- return r;
+ free_kiovec(1, &ps->iobuf);
+ vfree(ps->area);
}
/*
- * Read the snapshot volume header, returns 0 only if it read OK
- * and it was valid. returns 1 if no header was found, -1 on
- * error. All fields are checked against the snapshot structure
- * itself to make sure we don't corrupt the data.
+ * Read or write a metadata area.
*/
-static int read_header(struct dm_snapshot *s, struct persistent_info *pi)
+static int area_io(struct pstore *ps, uint32_t area, int rw)
{
- int status;
- struct snap_disk_header *header;
- kdev_t dev = s->cow->dev;
- int blocksize = get_hardsect_size(dev);
- unsigned long devsize;
-
- /* Get it */
- status = do_io(READ, pi->cow_iobuf, dev, 0L, blocksize / SECTOR_SIZE);
- if (status != 0) {
- DMERR("Snapshot dev %s error reading header", kdevname(dev));
- return -1;
- }
+ int r;
+ struct kcopyd_region where;
- header = (struct snap_disk_header *)
- page_address(pi->cow_iobuf->maplist[0]);
-
- /*
- * Check the magic. It's OK if this fails, we just create
- * a new snapshot header and start from scratch.
- */
- if (le32_to_cpu(header->magic) != SNAP_MAGIC) {
- return 1;
- }
-
- /* Check the version matches */
- if (le32_to_cpu(header->version) != SNAPSHOT_DISK_VERSION) {
- DMWARN("Snapshot dev %s version mismatch. Stored: %d, "
- "driver: %d", kdevname(dev),
- le32_to_cpu(header->version), SNAPSHOT_DISK_VERSION);
- return -1;
- }
-
- /* Check the chunk sizes match */
- if (le32_to_cpu(header->chunk_size) != s->chunk_size) {
- DMWARN("Snapshot dev %s chunk size mismatch. Stored: %d, "
- "requested: %ld", kdevname(dev),
- le32_to_cpu(header->chunk_size), s->chunk_size);
- return -1;
- }
-
- /* Check the extent sizes match */
- if (le32_to_cpu(header->extent_size) != pi->extent_size) {
- DMWARN("Snapshot dev %s extent size mismatch. Stored: %d, "
- "requested: %ld", kdevname(dev),
- le32_to_cpu(header->extent_size), pi->extent_size);
- return -1;
- }
+ where.dev = ps->snap->cow->dev;
+ where.sector = ((ps->exceptions_per_area + 1) * ps->chunk_size) * area;
+ where.count = ps->chunk_size;
- /* Get the rest of the data */
- pi->start_of_exceptions = le64_to_cpu(header->start_of_exceptions);
- if (header->full) {
- DMWARN("Snapshot dev %s is full. It cannot be used",
- kdevname(dev));
- return -1;
- }
+ r = do_io(rw, &where, ps->area);
+ if (r)
+ return r;
- /* Validate against the size of the volume */
- devsize = get_dev_size(dev);
- if (pi->start_of_exceptions > devsize) {
- DMWARN("Snapshot metadata error on %s. start exceptions > "
- "device size (%ld > %ld)",
- kdevname(dev), pi->start_of_exceptions, devsize);
- return -1;
- }
+ ps->current_area = area;
+ return 0;
+}
- /* Read metadata into the hash table and update pointers */
- return read_metadata(s, pi);
+static int zero_area(struct pstore *ps, uint32_t area)
+{
+ memset(ps, 0, ps->chunk_size);
+ return area_io(ps, area, WRITE);
}
/*
- * Write (or update) the header. The only time we should need to
- * do an update is when the snapshot becomes full.
+ * Access functions for the disk exceptions, these do the endian conversions.
*/
-static int write_header(struct dm_snapshot *s, struct persistent_info *pi)
+static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
{
- struct snap_disk_header *header;
- struct kiobuf *head_iobuf;
- kdev_t dev = s->cow->dev;
- int blocksize = get_hardsect_size(dev);
- int status;
+ if (index > ps->exceptions_per_area)
+ return NULL;
- /*
- * Allocate our own iobuf for this so we don't corrupt
- * any of the other writes that may be going on.
- */
- if (alloc_kiovec(1, &head_iobuf)) {
- DMERR("Error allocating iobuf for header on %s", kdevname(dev));
- return -1;
- }
-
- if (alloc_iobuf_pages(head_iobuf, PAGE_SIZE / SECTOR_SIZE)) {
- DMERR("Error allocating iobuf space for header on %s",
- kdevname(dev));
- free_kiovec(1, &head_iobuf);
- return -1;
- }
+ return ((struct disk_exception *) ps->area) + index;
+}
- header = (struct snap_disk_header *)
- page_address(head_iobuf->maplist[0]);
+static int read_exception(struct pstore *ps,
+ uint32_t index, struct disk_exception *result)
+{
+ struct disk_exception *e;
- header->magic = cpu_to_le32(SNAP_MAGIC);
- header->version = cpu_to_le32(SNAPSHOT_DISK_VERSION);
- header->chunk_size = cpu_to_le32(s->chunk_size);
- header->extent_size = cpu_to_le32(pi->extent_size);
- header->full = cpu_to_le32(pi->full);
+ e = get_exception(ps, index);
+ if (!e)
+ return -EINVAL;
- header->start_of_exceptions = cpu_to_le64(pi->start_of_exceptions);
+ /* copy it */
+ memcpy(result, e, sizeof(&result));
- /* Must write at least a full block */
- status = do_io(WRITE, head_iobuf, dev, 0, blocksize / SECTOR_SIZE);
+ result->old_chunk = le64_to_cpu(result->old_chunk);
+ result->new_chunk = le64_to_cpu(result->new_chunk);
- unmap_kiobuf(head_iobuf);
- free_kiovec(1, &head_iobuf);
- return status;
+ return 0;
}
-static void destroy_persistent(struct exception_store *store)
+static int write_exception(struct pstore *ps,
+ uint32_t index, struct disk_exception *de)
{
- struct persistent_info *pi = get_info(store);
- unmap_kiobuf(pi->cow_iobuf);
- free_kiovec(1, &pi->cow_iobuf);
- kfree(pi);
+ struct disk_exception *e;
+
+ e = get_exception(ps, index);
+ if (!e)
+ return -EINVAL;
+
+ /* copy it */
+ e->old_chunk = cpu_to_le64(e->old_chunk);
+ e->new_chunk = cpu_to_le64(e->new_chunk);
+
+ return 0;
}
-static int init_persistent(struct exception_store *store, int blocksize,
- unsigned long extent_size, void **context)
+/*
+ * Registers the exceptions that are present in the current area.
+ * 'full' is filled in to indicate if the area has been
+ * filled.
+ */
+static int insert_exceptions(struct pstore *ps, int *full)
{
- struct persistent_info *pi = get_info(store);
- struct dm_snapshot *s = store->snap;
- int status;
+ int i, r;
+ struct disk_exception de;
- *context = "Error in disk header";
- /* Check for a header on disk and create a new one if not */
- if ((status = read_header(s, pi)) == 1) {
-
- /* Write a new header */
- pi->start_of_exceptions = pi->next_free_sector;
- pi->next_free_sector += pi->extent_size;
- pi->current_metadata_sector = pi->start_of_exceptions;
- pi->current_metadata_entry = 0;
- pi->current_metadata_number = 0;
-
- *context = "Unable to write snapshot header";
- if (write_header(s, pi) != 0) {
- DMERR("Error writing header to snapshot volume %s",
- kdevname(s->cow->dev));
- goto free_ret;
- }
+ /* presume the area is full */
+ *full = 1;
+
+ for (i = 0; i < ps->exceptions_per_area; i++) {
+ r = read_exception(ps, i, &de);
+
+ if (r)
+ return r;
- /* Write a blank metadata block to the device */
- if (write_metadata(s, pi) != 0) {
- DMERR("Error writing initial COW table to "
- "snapshot volume %s", kdevname(s->cow->dev));
- goto free_ret;
+ /*
+ * If the new_chunk is pointing at the start of
+ * the COW device, where the first metadata area
+ * is we know that we've hit the end of the
+ * exceptions. Therefor the area is not full.
+ */
+ if (de.new_chunk) {
+ ps->current_committed = i;
+ *full = 0;
+ break;
}
- }
- /*
- * There is a header but it doesn't match - fail so we
- * don't destroy what might be useful data on disk. If
- * the user really wants to use this COW device for a
- * snapshot then the first sector should be zeroed out
- * first.
- */
- if (status == -1)
- goto free_ret;
+ /*
+ * Keep track of the start of the free chunks.
+ */
+ if (ps->next_free <= de.new_chunk)
+ ps->next_free = de.new_chunk + 1;
- return 0;
+ /*
+ * Otherwise we add the exception to the snapshot.
+ */
+ r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
+ if (r)
+ return r;
+ }
- free_ret:
- unmap_kiobuf(pi->cow_iobuf);
- free_kiovec(1, &pi->cow_iobuf);
- return -1;
+ return 0;
}
-/*
- * Finds a suitable destination for the exception.
- */
-static int prepare_persistent(struct exception_store *store,
- struct exception *e)
+static int read_exceptions(struct pstore *ps)
{
- struct persistent_info *pi = get_info(store);
- struct dm_snapshot *s = store->snap;
- offset_t dev_size;
+ uint32_t area;
+ int r, full = 0;
/*
- * Check for full snapshot. Doing the size calculation here means that
- * the COW device can be resized without us being told
+ * Keeping reading chunks and inserting exceptions until
+ * we find a partially full area.
*/
- dev_size = get_dev_size(s->cow->dev);
- if (pi->next_free_sector + s->chunk_size >= dev_size) {
- /* Snapshot is full, we can't use it */
- DMWARN("Snapshot %s is full (sec=%ld, size=%ld)",
- kdevname(s->cow->dev),
- pi->next_free_sector + s->chunk_size, dev_size);
- pi->full = 1;
+ for (area = 0; !full; area++) {
+ r = area_io(ps, area, READ);
+ if (r)
+ return r;
- /* Mark it full on the device */
- write_header(s, pi);
+ r = insert_exceptions(ps, &full);
+ if (r)
+ return r;
- return -1;
-
- } else {
- e->new_chunk = sector_to_chunk(s, pi->next_free_sector);
- pi->next_free_sector += s->chunk_size;
+ area++;
}
return 0;
}
-/*
- * Add a new exception entry to the on-disk metadata.
- */
-static int commit_persistent(struct exception_store *store, struct exception *e)
+static inline struct pstore *get_info(struct exception_store *store)
{
- struct persistent_info *pi = get_info(store);
- struct dm_snapshot *s = store->snap;
+ return (struct pstore *) store->context;
+}
- int i = pi->current_metadata_entry++;
- unsigned long next_md_block = pi->current_metadata_sector;
+static void persistent_destroy(struct exception_store *store)
+{
+ struct pstore *ps = get_info(store);
- pi->current_metadata_number++;
+ free_iobuf(ps);
+ vfree(ps->area);
+ kfree(ps);
+}
- /* Update copy of disk COW */
- pi->disk_cow[i].rsector_org =
- cpu_to_le64(chunk_to_sector(s, e->old_chunk));
- pi->disk_cow[i].rsector_new =
- cpu_to_le64(chunk_to_sector(s, e->new_chunk));
+static int persistent_prepare(struct exception_store *store,
+ struct exception *e)
+{
+ struct pstore *ps = get_info(store);
+ uint32_t stride;
- /* Have we filled this extent ? */
- if (pi->current_metadata_number >= pi->highest_metadata_entry) {
- /* Fill in pointer to next metadata extent */
- i++;
- pi->current_metadata_entry++;
+ e->new_chunk = ps->next_free;
- next_md_block = pi->next_free_sector;
- pi->next_free_sector += pi->extent_size;
+ /*
+ * Move onto the next free pending, making sure to take
+ * into account the location of the metadata chunks.
+ */
+ stride = (ps->exceptions_per_area + 1);
+ if (!(++ps->next_free % stride))
+ ps->next_free++;
- pi->disk_cow[i].rsector_org = cpu_to_le64(next_md_block);
- pi->disk_cow[i].rsector_new = 0;
- }
+ return 0;
+}
- /* Commit to disk */
- if (write_metadata(s, pi)) {
- pi->full = 1; /* Failed. don't try again */
- return -1;
- }
+static void persistent_commit(struct exception_store *store,
+ struct exception *e,
+ void (*callback) (void *, int success),
+ void *callback_context)
+{
+ int r;
+ struct pstore *ps = get_info(store);
+ struct disk_exception de;
+
+ de.old_chunk = e->old_chunk;
+ de.new_chunk = e->new_chunk;
+ write_exception(ps, ps->current_committed, &de);
/*
- * Write a new (empty) metadata block if we are at the
- * end of an existing block so that read_metadata finds a
- * terminating zero entry.
+ * Write the whole area to the disk for now, later we'll
+ * try and defer the write.
*/
- if (pi->current_metadata_entry == pi->md_entries_per_block) {
- memset(pi->disk_cow, 0, PAGE_SIZE);
- pi->current_metadata_sector = next_md_block;
-
- /*
- * If this is also the end of an extent then go
- * back to the start.
- */
- if (pi->current_metadata_number >= pi->highest_metadata_entry) {
- pi->current_metadata_number = 0;
+ r = area_io(ps, ps->current_area, WRITE);
+ if (r)
+ goto bad;
- } else {
- int blocksize = get_hardsect_size(s->cow->dev);
- pi->current_metadata_sector += blocksize / SECTOR_SIZE;
- }
+ /*
+ * Notify the snapshot that the commit has actually
+ * happened.
+ */
+ callback(callback_context, 1);
- pi->current_metadata_entry = 0;
- if (write_metadata(s, pi) != 0) {
- pi->full = 1;
- return -1;
- }
+ /*
+ * Have we completely filled the current area ?
+ */
+ if (++ps->current_committed > ps->exceptions_per_area) {
+ ps->current_committed = 0;
+ r = zero_area(ps, ps->current_area + 1);
+ if (r)
+ goto bad;
}
- return 0;
+ return;
+
+ bad:
+ ps->valid = 0;
+ callback(callback_context, 0);
}
-/*
- * Sets the full flag in the metadata. A quick hack for now.
- */
-static void drop_persistent(struct exception_store *store)
+static void persistent_drop(struct exception_store *store)
{
- get_info(store)->full = 1;
- write_header(store->snap, get_info(store));
+ struct pstore *ps = get_info(store);
+
+ /*
+ * FIXME: This function is pointless until we have the
+ * header.
+ */
+ ps->valid = 0;
}
-int dm_create_persistent(struct exception_store *store, struct dm_snapshot *s,
- int blocksize, offset_t extent_size, void **error)
+int persistent_init(struct exception_store *store, uint32_t chunk_size)
{
- struct persistent_info *pi;
- int i, r;
- int cow_sectors;
+ int r;
+ struct pstore *ps;
- pi = kmalloc(sizeof(*pi), GFP_KERNEL);
- if (!pi)
+ /* allocate the pstore */
+ ps = kmalloc(sizeof(*ps), GFP_KERNEL);
+ if (!ps)
return -ENOMEM;
- memset(store, 0, sizeof(*store));
- store->destroy = destroy_persistent;
- store->init = init_persistent;
- store->prepare_exception = prepare_persistent;
- store->commit_exception = commit_persistent;
- store->drop_snapshot = drop_persistent;
- store->snap = s;
- store->context = pi;
-
- pi->extent_size = extent_size;
-
- /* Leave the first block alone */
- pi->next_free_sector = blocksize / SECTOR_SIZE;
- pi->disk_cow = NULL;
-
- pi->highest_metadata_entry = (pi->extent_size * SECTOR_SIZE) /
- sizeof(struct disk_exception) - 1;
- pi->md_entries_per_block = blocksize / sizeof(struct disk_exception);
-
- /* Allocate and set up iobuf for metadata I/O */
- *error = "Unable to allocate COW iovec";
- if ((r = alloc_kiovec(1, &pi->cow_iobuf)))
+ r = allocate_iobuf(ps);
+ if (r)
return r;
+ ps->snap = store->snap;
+ ps->valid = 1;
+ ps->chunk_size = chunk_size;
+ ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
+ sizeof(struct disk_exception);
+ ps->next_free = 1;
+ ps->current_committed = 0;
+
/*
- * Allocate space for the COW buffer. It should be at
- * least PAGE_SIZE.
+ * Read the metadata.
*/
- cow_sectors = blocksize / SECTOR_SIZE + PAGE_SIZE / SECTOR_SIZE;
- *error = "Unable to allocate COW I/O buffer space";
- if ((r = alloc_iobuf_pages(pi->cow_iobuf, cow_sectors))) {
- free_kiovec(1, &pi->cow_iobuf);
- return r;
+ r = read_exceptions(ps);
+ if (r) {
+ free_iobuf(ps);
+ kfree(ps);
}
- for (i = 0; i < pi->cow_iobuf->nr_pages; i++)
- memset(page_address(pi->cow_iobuf->maplist[i]), 0, PAGE_SIZE);
+ store->destroy = persistent_destroy;
+ store->prepare_exception = persistent_prepare;
+ store->commit_exception = persistent_commit;
+ store->drop_snapshot = persistent_drop;
+ store->context = ps;
- pi->disk_cow = page_address(pi->cow_iobuf->maplist[0]);
- return 0;
+ return r;
}
-/*
+/*-----------------------------------------------------------------
* Implementation of the store for non-persistent snapshots.
- */
+ *---------------------------------------------------------------*/
struct transient_c {
offset_t next_free;
};
e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
tc->next_free += store->snap->chunk_size;
-#if 0
- DMWARN("Preparing exception, chunk %lu -> %lu.",
- (unsigned long) e->old_chunk, (unsigned long) e->new_chunk);
-#endif
-
return 0;
}
+void commit_transient(struct exception_store *store,
+ struct exception *e,
+ void (*callback) (void *, int success),
+ void *callback_context)
+{
+ /* Just succeed */
+ callback(callback_context, 1);
+}
+
+int percentfull_transient(struct exception_store *store)
+{
+ struct transient_c *tc = (struct transient_c *) store->context;
+ return (tc->next_free * 100) / get_dev_size(store->snap->cow->dev);
+}
+
int dm_create_transient(struct exception_store *store,
struct dm_snapshot *s, int blocksize, void **error)
{
memset(store, 0, sizeof(*store));
store->destroy = destroy_transient;
store->prepare_exception = prepare_transient;
+ store->commit_exception = commit_transient;
+ store->percent_full = percentfull_transient;
store->snap = s;
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
*/
struct linear_c {
long delta; /* FIXME: we need a signed offset type */
+ long start; /* For display only */
struct dm_dev *dev;
};
}
lc->delta = (int) start - (int) b;
+ lc->start = start;
*context = lc;
return 0;
return 1;
}
+static int linear_sts(status_type_t sts_type, char *result, int maxlen,
+ void *context)
+{
+ struct linear_c *lc = (struct linear_c *) context;
+
+ switch (sts_type) {
+ case STATUSTYPE_INFO:
+ result[0] = '\0';
+ break;
+
+ case STATUSTYPE_TABLE:
+ snprintf(result, maxlen, "%s %ld", kdevname(lc->dev->dev),
+ lc->start);
+ break;
+ }
+ return 0;
+}
+
static struct target_type linear_target = {
name: "linear",
module: THIS_MODULE,
ctr: linear_ctr,
dtr: linear_dtr,
map: linear_map,
+ sts: linear_sts,
+ wait: NULL, /* No wait function */
};
int __init dm_linear_init(void)
if (r < 0)
DMERR("linear: unregister failed %d", r);
}
-
unsigned long topos;
unsigned long got_to;
+ unsigned long size; /* for %age calculation */
struct rw_semaphore lock;
struct buffer_head *bhstring;
+ wait_queue_head_t waitq;
int error;
};
if (!uptodate) {
DMERR("Mirror copy to %s failed", kdevname(lc->todev->dev));
lc->error = 1;
- dm_notify(lc); /* TODO: interface ?? */
+ wake_up_interruptible(&lc->waitq);
}
kmem_cache_free(bh_cachep, bh);
+ wake_up_interruptible(&lc->waitq);
}
static void mirror_bh(struct mirror_c *mc, struct buffer_head *bh)
lc->topos = offset2;
lc->error = 0;
lc->bhstring = NULL;
+ lc->size = l - offset1;
+ init_waitqueue_head(&lc->waitq);
init_rwsem(&lc->lock);
*context = lc;
{
struct mirror_c *lc = (struct mirror_c *) c;
+ /* Just in case anyone is still waiting... */
+ wake_up_interruptible(&lc->waitq);
+
dm_table_put_device(t, lc->fromdev);
dm_table_put_device(t, lc->todev);
kfree(c);
return 1;
}
+static int mirror_sts(status_type_t sts_type, char *result, int maxlen,
+ void *context)
+{
+ struct mirror_c *mc = (struct mirror_c *) context;
+
+ switch (sts_type) {
+ case STATUSTYPE_INFO:
+ if (mc->error)
+ snprintf(result, maxlen, "Error");
+ else
+ snprintf(result, maxlen, "%ld%%",
+ (mc->got_to -
+ mc->from_delta) * 100 / mc->size);
+ break;
+
+ case STATUSTYPE_TABLE:
+ snprintf(result, maxlen, "%s %ld %s %ld %d",
+ kdevname(mc->fromdev->dev), mc->frompos,
+ kdevname(mc->todev->dev), mc->topos, 0);
+ break;
+ }
+ return 0;
+}
+
+static int mirror_wait(wait_queue_t *wq, void *context)
+{
+ struct mirror_c *mc = (struct mirror_c *) context;
+
+ if (add)
+ add_wait_queue(&mc->waitq, wq);
+ else
+ remove_wait_queue(&mc->waitq, wq);
+
+ return 0;
+}
+
static struct target_type mirror_target = {
name: "mirror",
module: THIS_MODULE,
ctr: mirror_ctr,
dtr: mirror_dtr,
map: mirror_map,
+ sts: mirror_sts,
+ wait: mirror_wait,
};
int __init dm_mirror_init(void)
sizeof(struct buffer_head),
__alignof__(struct buffer_head),
0, NULL, NULL);
- if (!bh_cachep) {
+ if (!bh_cachep)
return -1;
- }
r = dm_register_target(&mirror_target);
if (r < 0) {
#define DMDEBUG(x...)
#endif
+/*
+ * The percentage increment we will wake up users at
+ */
+#define WAKE_UP_PERCENT 5
+
/*
* Hard sector size used all over the kernel
*/
return 0;
}
-static void exit_exception_table(struct exception_table *et, kmem_cache_t * mem)
+static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
{
struct list_head *slot, *entry, *temp;
struct exception *ex;
static inline struct exception *alloc_exception(void)
{
- return kmem_cache_alloc(exception_cache, GFP_NOIO);
+ struct exception *e;
+
+ e = kmem_cache_alloc(exception_cache, GFP_NOIO);
+ if (!e)
+ e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
+
+ return e;
}
static inline void free_exception(struct exception *e)
/*
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n>
- * <chunk-size> <extent-size>
+ * <chunk-size>
*/
static int snapshot_ctr(struct dm_table *t, offset_t b, offset_t l,
int argc, char **argv, void **context)
goto bad_putdev;
}
+ init_waitqueue_head(&s->waitq);
s->chunk_size = chunk_size;
s->chunk_mask = chunk_size - 1;
for (s->chunk_shift = 0; chunk_size;
s->chunk_shift--;
s->valid = 1;
+ s->last_percent = 0;
init_rwsem(&s->lock);
/* Allocate hash table for COW data */
goto bad_free1;
}
- /* Allocate the COW iobuf and set associated variables */
- r = store_int_fn(s, init, blocksize, extent_size, context);
- if (r) {
- *context = "Couldn't initialise exception store";
- goto bad_free1;
- }
-
/* Flush IO to the origin device */
#if LVM_VFS_ENHANCEMENT
fsync_dev_lockfs(s->origin->dev);
{
struct dm_snapshot *s = (struct dm_snapshot *) context;
+ wake_up_interruptible(&s->waitq);
+
unregister_snapshot(s);
exit_exception_table(&s->pending, pending_cache);
bh = n;
}
+ /* FIXME: not sure we can this from this context. */
run_task_queue(&tq_disk);
}
}
}
-/*
- * Called when the copy I/O has finished
- */
-static void copy_callback(int err, void *context)
+static void pending_complete(struct pending_exception *pe, int success)
{
- struct pending_exception *pe = (struct pending_exception *) context;
- struct dm_snapshot *s = pe->snap;
struct exception *e;
+ struct dm_snapshot *s = pe->snap;
- if (!err) {
- /* Update the metadata if we are persistent */
- store_fn(s, commit_exception, &pe->e);
-
+ if (success) {
e = alloc_exception();
if (!e) {
- /* FIXME: what do we do now ? */
+ printk("Unable to allocate exception.");
+ down_write(&s->lock);
+ store_fn(s, drop_snapshot);
+ s->valid = 0;
+ up_write(&s->lock);
return;
}
* Add a proper exception, and remove the
* inflight exception from the list.
*/
- down_write(&pe->snap->lock);
+ down_write(&s->lock);
memcpy(e, &pe->e, sizeof(*e));
insert_exception(&s->complete, e);
remove_exception(&pe->e);
/* Submit any pending write BHs */
- up_write(&pe->snap->lock);
+ up_write(&s->lock);
flush_buffers(pe->snapshot_bhs);
DMDEBUG("Exception completed successfully.");
+ /* Notify any interested parties */
+ if (s->store.percent_full) {
+ int pc = s->store.percent_full(&s->store);
+
+ if (pc >= s->last_percent + WAKE_UP_PERCENT) {
+ wake_up_interruptible(&s->waitq);
+ s->last_percent = pc - pc % WAKE_UP_PERCENT;
+ }
+ }
+
} else {
/* Read/write error - snapshot is unusable */
DMERR("Error reading/writing snapshot");
- down_write(&pe->snap->lock);
- store_fn(pe->snap, drop_snapshot);
- pe->snap->valid = 0;
+ down_write(&s->lock);
+ store_fn(s, drop_snapshot);
+ s->valid = 0;
remove_exception(&pe->e);
- up_write(&pe->snap->lock);
+ up_write(&s->lock);
error_buffers(pe->snapshot_bhs);
+ wake_up_interruptible(&s->waitq);
DMDEBUG("Exception failed.");
}
free_pending_exception(pe);
}
+static void commit_callback(void *context, int success)
+{
+ struct pending_exception *pe = (struct pending_exception *) context;
+ pending_complete(pe, success);
+}
+
+/*
+ * Called when the copy I/O has finished. kcopyd actually runs
+ * this code so don't block.
+ */
+static void copy_callback(int err, void *context)
+{
+ struct pending_exception *pe = (struct pending_exception *) context;
+ struct dm_snapshot *s = pe->snap;
+
+ if (err)
+ pending_complete(pe, 0);
+
+ else
+ /* Update the metadata if we are persistent */
+ s->store.commit_exception(&s->store, &pe->e, commit_callback,
+ pe);
+}
+
/*
* Dispatches the copy operation to kcopyd.
*/
if (!pe) {
store_fn(snap, drop_snapshot);
snap->valid = 0;
- } else {
+ } else {
if (last)
list_splice(&pe->siblings,
&last->siblings);
return r;
}
+static int snapshot_sts(status_type_t sts_type, char *result,
+ int maxlen, void *context)
+{
+ struct dm_snapshot *snap = (struct dm_snapshot *) context;
+ char cowdevname[PATH_MAX];
+ char orgdevname[PATH_MAX];
+
+ switch (sts_type) {
+ case STATUSTYPE_INFO:
+ if (!snap->valid)
+ snprintf(result, maxlen, "Invalid");
+ else {
+ if (snap->store.percent_full)
+ snprintf(result, maxlen, "%d%%",
+ snap->store.percent_full(&snap->
+ store));
+ else
+ snprintf(result, maxlen, "Unknown");
+ }
+ break;
+
+ case STATUSTYPE_TABLE:
+ /* kdevname returns a static pointer so we
+ need to make private copies if the output is to make sense */
+ strcpy(cowdevname, kdevname(snap->cow->dev));
+ strcpy(orgdevname, kdevname(snap->origin->dev));
+ snprintf(result, maxlen, "%s %s %c %ld", orgdevname, cowdevname,
+ 'N', /* TODO persistent snaps */
+ snap->chunk_size);
+ break;
+ }
+
+ return 0;
+}
+
+static int snapshot_wait(void *context, wait_queue_t *wq, int add)
+{
+ struct dm_snapshot *snap = (struct dm_snapshot *) context;
+
+ if (add)
+ add_wait_queue(&snap->waitq, wq);
+ else
+ remove_wait_queue(&snap->waitq, wq);
+
+ return 0;
+}
+
/*
* Called on a write from the origin driver.
*/
return (rw == WRITE) ? do_origin(dev, bh) : 1;
}
+static int origin_sts(status_type_t sts_type, char *result,
+ int maxlen, void *context)
+{
+ struct dm_dev *dev = (struct dm_dev *) context;
+
+ switch (sts_type) {
+ case STATUSTYPE_INFO:
+ result[0] = '\0';
+ break;
+
+ case STATUSTYPE_TABLE:
+ snprintf(result, maxlen, "%s", kdevname(dev->dev));
+ break;
+ }
+
+ return 0;
+}
+
static struct target_type origin_target = {
name: "snapshot-origin",
module: THIS_MODULE,
ctr: origin_ctr,
dtr: origin_dtr,
map: origin_map,
+ sts: origin_sts,
+ wait: NULL,
err: NULL
};
ctr: snapshot_ctr,
dtr: snapshot_dtr,
map: snapshot_map,
+ sts: snapshot_sts,
+ wait: snapshot_wait,
err: NULL
};
/*
* Destroys this object when you've finished with it.
*/
- void (*destroy)(struct exception_store *store);
-
- /*
- * Read the metadata and populate the snapshot.
- */
- int (*init)(struct exception_store *store,
- int blocksize, unsigned long extent_size, void **context);
+ void (*destroy) (struct exception_store *store);
/*
* Find somewhere to store the next exception.
*/
- int (*prepare_exception)(struct exception_store *store,
+ int (*prepare_exception) (struct exception_store *store,
struct exception *e);
/*
* Update the metadata with this exception.
*/
- int (*commit_exception)(struct exception_store *store,
- struct exception *e);
+ void (*commit_exception) (struct exception_store *store,
+ struct exception *e,
+ void (*callback) (void *, int success),
+ void *callback_context);
/*
* The snapshot is invalid, note this in the metadata.
*/
- void (*drop_snapshot)(struct exception_store *store);
+ void (*drop_snapshot) (struct exception_store *store);
+
+ /*
+ * Return the %age full of the snapshot
+ */
+ int (*percent_full) (struct exception_store *store);
struct dm_snapshot *snap;
void *context;
/* List of snapshots per Origin */
struct list_head list;
+ /* Processes wait on this when they want to block on status changes */
+ wait_queue_head_t waitq;
+
/* Size of data blocks saved - must be a power of 2 */
chunk_t chunk_size;
chunk_t chunk_mask;
/* You can't use a snapshot if this is 0 (e.g. if full) */
int valid;
+ /* The last percentage we notified */
+ int last_percent;
+
struct exception_table pending;
struct exception_table complete;
* store.
*/
int dm_create_persistent(struct exception_store *store,
- struct dm_snapshot *s,
- int blocksize,
- offset_t extent_size,
- void **error);
+ struct dm_snapshot *s,
+ int blocksize, offset_t extent_size, void **error);
int dm_create_transient(struct exception_store *store,
- struct dm_snapshot *s,
- int blocksize, void **error);
+ struct dm_snapshot *s, int blocksize, void **error);
/*
* Return the number of sectors in the device.
return -EINVAL;
}
- chunk_size =simple_strtoul(argv[1], &end, 10);
+ chunk_size = simple_strtoul(argv[1], &end, 10);
if (*end) {
*context = "dm-stripe: Invalid chunk_size";
return -EINVAL;
if (l % stripes) {
*context = "dm-stripe: Target length not divisable by "
- "number of stripes";
+ "number of stripes";
return -EINVAL;
}
sc = alloc_context(stripes);
if (!sc) {
*context = "dm-stripe: Memory allocation for striped context "
- "failed";
+ "failed";
return -ENOMEM;
}
for (i = 0; i < stripes; i++) {
if (argc < 2) {
*context = "dm-stripe: Not enough destinations "
- "specified";
+ "specified";
kfree(sc);
return -EINVAL;
}
r = get_stripe(t, sc, i, argv);
if (r < 0) {
*context = "dm-stripe: Couldn't parse stripe "
- "destination";
+ "destination";
while (i--)
dm_table_put_device(t, sc->stripe[i].dev);
kfree(sc);
return 1;
}
+static int stripe_sts(status_type_t sts_type, char *result, int maxlen,
+ void *context)
+{
+ struct stripe_c *sc = (struct stripe_c *) context;
+ int offset;
+ int i;
+
+ switch (sts_type) {
+ case STATUSTYPE_INFO:
+ result[0] = '\0';
+ break;
+
+ case STATUSTYPE_TABLE:
+ offset = snprintf(result, maxlen, "%d %ld",
+ sc->stripes, sc->chunk_mask + 1);
+ for (i = 0; i < sc->stripes; i++) {
+ offset +=
+ snprintf(result + offset, maxlen - offset,
+ " %s %ld",
+ kdevname(sc->stripe[i].dev->dev),
+ sc->stripe[i].physical_start);
+ }
+ break;
+ }
+ return 0;
+}
+
static struct target_type stripe_target = {
name: "striped",
module: THIS_MODULE,
ctr: stripe_ctr,
dtr: stripe_dtr,
map: stripe_map,
+ sts: stripe_sts,
+ wait: NULL,
};
int __init dm_stripe_init(void)
return;
}
-
int r;
kdev_t dev;
struct dm_dev *dd;
-
- /* convert the path to a device */
- if ((r = lookup_device(path, &dev)))
- return r;
+ int major, minor;
+
+ if (sscanf(path, "%x:%x", &major, &minor) == 2) {
+ /* Extract the major/minor numbers */
+ dev = MKDEV(major, minor);
+ } else {
+ /* convert the path to a device */
+ if ((r = lookup_device(path, &dev)))
+ return r;
+ }
dd = find_device(&t->devices, dev);
if (!dd) {
total += t->counts[i];
}
- indexes = (offset_t *) vmalloc((unsigned long)NODE_SIZE * total);
+ indexes = (offset_t *) vmalloc((unsigned long) NODE_SIZE * total);
if (!indexes)
return -ENOMEM;
ctr: io_err_ctr,
dtr: io_err_dtr,
map: io_err_map,
+ sts: NULL,
+ wait: NULL,
};
int dm_target_init(void)
static struct buffer_head *alloc_buffer(void)
{
- int state = current->state;
struct buffer_head *r;
+ int flags;
- set_current_state(TASK_UNINTERRUPTIBLE);
- spin_lock(&_buffer_lock);
+ spin_lock_irqsave(&_buffer_lock, flags);
if (!_free_buffers)
r = NULL;
r->b_reqnext = NULL;
}
- spin_unlock(&_buffer_lock);
- set_current_state(state);
+ spin_unlock_irqrestore(&_buffer_lock, flags);
return r;
}
*/
static void free_buffer(struct buffer_head *bh)
{
- spin_lock(&_buffer_lock);
+ int flags;
+
+ spin_lock_irqsave(&_buffer_lock, flags);
bh->b_reqnext = _free_buffers;
_free_buffers = bh;
- spin_unlock(&_buffer_lock);
+ spin_unlock_irqrestore(&_buffer_lock, flags);
}
/*-----------------------------------------------------------------
if (!_job_cache)
return -ENOMEM;
- _job_pool = mempool_create(MIN_JOBS,
- mempool_alloc_slab,
+ _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
mempool_free_slab, _job_cache);
if (!_job_pool) {
kmem_cache_destroy(_job_cache);
{
struct kcopyd_job *job;
- job = mempool_alloc(_job_pool, GFP_KERNEL);;
+ job = mempool_alloc(_job_pool, GFP_KERNEL);
if (!job)
return NULL;
* Functions to push and pop a job onto the head of a given job
* list.
*/
-static inline struct kcopyd_job *__pop(struct list_head *jobs)
+static inline struct kcopyd_job *pop(struct list_head *jobs)
{
struct kcopyd_job *job = NULL;
+ int flags;
+
+ spin_lock_irqsave(&_job_lock, flags);
- spin_lock(&_job_lock);
if (!list_empty(jobs)) {
job = list_entry(jobs->next, struct kcopyd_job, list);
list_del(&job->list);
}
- spin_unlock(&_job_lock);
+ spin_unlock_irqrestore(&_job_lock, flags);
return job;
}
-static struct kcopyd_job *pop(struct list_head *jobs)
+static inline void push(struct list_head *jobs, struct kcopyd_job *job)
{
- int state = current->state;
- struct kcopyd_job *job;
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- job = __pop(jobs);
- set_current_state(state);
- return job;
-}
+ int flags;
-static inline void __push(struct list_head *jobs, struct kcopyd_job *job)
-{
- spin_lock(&_job_lock);
+ spin_lock_irqsave(&_job_lock, flags);
list_add(&job->list, jobs);
- spin_unlock(&_job_lock);
-}
-
-static void push(struct list_head *jobs, struct kcopyd_job *job)
-{
- int state = current->state;
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- __push(jobs, job);
- set_current_state(state);
+ spin_unlock_irqrestore(&_job_lock, flags);
}
/*
/* are we the last ? */
if (atomic_dec_and_test(&job->nr_incomplete)) {
- __push(&_complete_jobs, job);
+ push(&_complete_jobs, job);
wake_kcopyd();
}
static DECLARE_MUTEX(_run_lock);
static DECLARE_WAIT_QUEUE_HEAD(_job_queue);
-/*
- * A day in the life of a little daemon.
- */
-static void kcopyd_cycle(void)
+static int kcopyd(void *start_lock)
{
DECLARE_WAITQUEUE(wq, current);
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&_job_queue, &wq);
-
- do_work();
- schedule();
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&_job_queue, &wq);
-}
-
-static int kcopyd(void *start_lock)
-{
daemonize();
strcpy(current->comm, "kcopyd");
_kcopyd_task = current;
down(&_run_lock);
up((struct semaphore *) start_lock);
- while (!atomic_read(&_kcopyd_must_die))
- kcopyd_cycle();
+ add_wait_queue(&_job_queue, &wq);
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (atomic_read(&_kcopyd_must_die))
+ break;
+
+ do_work();
+ schedule();
+ }
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&_job_queue, &wq);
up(&_run_lock);
DMINFO("kcopyd shutting down");
if (!_copy_cache)
return -ENOMEM;
- _copy_pool = mempool_create(MIN_INFOS,
- mempool_alloc_slab,
+ _copy_pool = mempool_create(MIN_INFOS, mempool_alloc_slab,
mempool_free_slab, _copy_cache);
if (!_copy_pool) {
kmem_cache_destroy(_copy_cache);
kcopyd_io(job);
}
-int kcopyd_copy(struct kcopyd_region *from,
- struct kcopyd_region *to, kcopyd_notify_fn fn, void *context)
+int kcopyd_copy(struct kcopyd_region *from, struct kcopyd_region *to,
+ kcopyd_notify_fn fn, void *context)
{
struct copy_info *info;
struct kcopyd_job *job;
#include <linux/miscdevice.h>
#include <linux/dm-ioctl.h>
#include <linux/init.h>
+#include <linux/wait.h>
static void free_params(struct dm_ioctl *param)
{
return (*param->uuid) ? DM_LOOKUP_BY_UUID : DM_LOOKUP_BY_NAME;
}
+#define ALIGNMENT sizeof(int)
+static void *_align(void *ptr, unsigned int a)
+{
+ register unsigned long align = --a;
+
+ return (void *) (((unsigned long) ptr + align) & ~align);
+}
+
+/*
+ * Build up the status struct for each target
+ */
+static int __status(struct mapped_device *md, struct dm_ioctl *param,
+ char *outbuf, int *len)
+{
+ int i;
+ struct dm_target_spec *spec;
+ unsigned long long sector = 0LL;
+ char *outptr;
+ status_type_t type;
+
+ if (param->flags & DM_STATUS_TABLE_FLAG)
+ type = STATUSTYPE_TABLE;
+ else
+ type = STATUSTYPE_INFO;
+
+ outptr = outbuf;
+
+ /* Get all the target info */
+ for (i = 0; i < md->map->num_targets; i++) {
+ struct target_type *tt = md->map->targets[i].type;
+ offset_t high = md->map->highs[i];
+
+ if (outptr - outbuf +
+ sizeof(struct dm_target_spec) > param->data_size)
+ return -ENOMEM;
+
+ spec = (struct dm_target_spec *) outptr;
+
+ spec->status = 0;
+ spec->sector_start = sector;
+ spec->length = high - sector + 1;
+ strncpy(spec->target_type, tt->name, sizeof(spec->target_type));
+
+ outptr += sizeof(struct dm_target_spec);
+
+ /* Get the status/table string from the target driver */
+ if (tt->sts)
+ tt->sts(type, outptr,
+ outbuf + param->data_size - outptr,
+ md->map->targets[i].private);
+ else
+ outptr[0] = '\0';
+
+ outptr += strlen(outptr) + 1;
+ _align(outptr, ALIGNMENT);
+
+ sector = high + 1;
+
+ spec->next = outptr - outbuf;
+ }
+
+ param->target_count = md->map->num_targets;
+ *len = outptr - outbuf;
+
+ return 0;
+}
+
+static int __wait(struct mapped_device *md, struct dm_ioctl *param)
+{
+ int waiting = 0;
+ int i;
+ DECLARE_WAITQUEUE(waitq, current);
+
+ /* Get all the target info */
+ for (i = 0; i < md->map->num_targets; i++) {
+ struct target_type *tt = md->map->targets[i].type;
+
+ set_task_state(current, TASK_INTERRUPTIBLE);
+
+ /* Add ourself to the target's wait queue */
+ if (tt->wait &&
+ (!tt->wait(md->map->targets[i].private, &waitq, 1)))
+ waiting = 1;
+ }
+
+ /* If at least one call succeeded then sleep */
+ if (waiting) {
+ schedule();
+
+ for (i = 0; i < md->map->num_targets; i++) {
+ struct target_type *tt = md->map->targets[i].type;
+
+ /* And remove ourself */
+ if (tt->wait)
+ tt->wait(md->map->targets[i].private,
+ &waitq, 0);
+ }
+ }
+
+ set_task_state(current, TASK_RUNNING);
+
+ return 0;
+}
+
+/*
+ * Return the status of a device as a text string for each
+ * target.
+ */
+static int get_status(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+ struct mapped_device *md;
+ int len = 0;
+ int ret;
+ char *outbuf = NULL;
+
+ md = dm_get_name_r(lookup_name(param), lookup_type(param));
+ if (!md)
+ /*
+ * Device not found - returns cleared exists flag.
+ */
+ goto out;
+
+ /* We haven't a clue how long the resultant data will be so
+ just allocate as much as userland has allowed us and make sure
+ we don't overun it */
+ outbuf = kmalloc(param->data_size, GFP_KERNEL);
+ if (!outbuf)
+ goto out;
+ /*
+ * Get the status of all targets
+ */
+ __status(md, param, outbuf, &len);
+
+ /*
+ * Setup the basic dm_ioctl structure.
+ */
+ __info(md, param);
+
+ out:
+ if (md)
+ dm_put_r(md);
+
+ ret = results_to_user(user, param, outbuf, len);
+
+ if (outbuf)
+ kfree(outbuf);
+
+ return ret;
+}
+
+/*
+ * Wait for a device to report an event
+ */
+static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user)
+{
+ struct mapped_device *md;
+
+ md = dm_get_name_r(lookup_name(param), lookup_type(param));
+ if (!md)
+ /*
+ * Device not found - returns cleared exists flag.
+ */
+ goto out;
+ /*
+ * Setup the basic dm_ioctl structure.
+ */
+ __info(md, param);
+
+ /*
+ * Wait for anotification event
+ */
+ __wait(md, param);
+
+ dm_put_r(md);
+
+ out:
+ return results_to_user(user, param, NULL, 0);
+}
+
/*
* Copies device info back to user space, used by
* the create and info ioctls.
r = dep(param, user);
break;
+ case DM_GET_STATUS_CMD:
+ r = get_status(param, user);
+ break;
+
+ case DM_WAIT_EVENT_CMD:
+ r = wait_device_event(param, user);
+ break;
+
default:
DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
r = -EINVAL;
}
static struct file_operations _ctl_fops = {
- open: ctl_open,
- release: ctl_close,
- ioctl: ctl_ioctl,
- owner: THIS_MODULE,
+ open: ctl_open,
+ release:ctl_close,
+ ioctl: ctl_ioctl,
+ owner: THIS_MODULE,
};
static devfs_handle_t _ctl_handle;
static struct miscdevice _dm_misc = {
- minor: MISC_DYNAMIC_MINOR,
- name: DM_NAME,
- fops: &_ctl_fops
+ minor: MISC_DYNAMIC_MINOR,
+ name: DM_NAME,
+ fops: &_ctl_fops
};
/* Create misc character device and link to DM_DIR/control */
/*
* All ioctl arguments consist of a single chunk of memory, with
- * this structure at the start.
+ * this structure at the start. If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
*/
struct dm_ioctl {
char version[16];
unsigned long data_start; /* offset to start of data
* relative to start of this struct */
- char name[DM_NAME_LEN]; /* device name */
+ char name[DM_NAME_LEN]; /* device name */
unsigned int target_count; /* in/out */
unsigned int open_count; /* out */
- unsigned int flags; /* in/out */
+ unsigned int flags; /* in/out */
- __kernel_dev_t dev; /* in/out */
+ __kernel_dev_t dev; /* in/out */
- char uuid[DM_UUID_LEN]; /* unique identifier for
- * the block device */
+ char uuid[DM_UUID_LEN]; /* unique identifier for
+ * the block device */
};
/*
DM_RENAME_CMD,
DM_VERSION_CMD,
DM_DEPS_CMD,
- DM_REMOVE_ALL_CMD
+ DM_REMOVE_ALL_CMD,
+ DM_GET_STATUS_CMD,
+ DM_WAIT_EVENT_CMD
};
-#define DM_CREATE _IOWR(DM_IOCTL, DM_CREATE_CMD, struct dm_ioctl)
-#define DM_REMOVE _IOW(DM_IOCTL, DM_REMOVE_CMD, struct dm_ioctl)
-#define DM_SUSPEND _IOW(DM_IOCTL, DM_SUSPEND_CMD, struct dm_ioctl)
-#define DM_RELOAD _IOW(DM_IOCTL, DM_RELOAD_CMD, struct dm_ioctl)
-#define DM_INFO _IOWR(DM_IOCTL, DM_INFO_CMD, struct dm_ioctl)
-#define DM_RENAME _IOW(DM_IOCTL, DM_RENAME_CMD, struct dm_ioctl)
-#define DM_VERSION _IOR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
-#define DM_DEPS _IOR(DM_IOCTL, DM_DEPS_CMD, struct dm_ioctl)
-#define DM_REMOVE_ALL _IOR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_CREATE _IOWR(DM_IOCTL, DM_CREATE_CMD, struct dm_ioctl)
+#define DM_REMOVE _IOW(DM_IOCTL, DM_REMOVE_CMD, struct dm_ioctl)
+#define DM_SUSPEND _IOW(DM_IOCTL, DM_SUSPEND_CMD, struct dm_ioctl)
+#define DM_RELOAD _IOW(DM_IOCTL, DM_RELOAD_CMD, struct dm_ioctl)
+#define DM_INFO _IOWR(DM_IOCTL, DM_INFO_CMD, struct dm_ioctl)
+#define DM_RENAME _IOW(DM_IOCTL, DM_RENAME_CMD, struct dm_ioctl)
+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_DEPS _IOWR(DM_IOCTL, DM_DEPS_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL _IOR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_GET_STATUS _IOWR(DM_IOCTL, DM_GET_STATUS_CMD, struct dm_ioctl)
+#define DM_WAIT_EVENT _IOR(DM_IOCTL, DM_WAIT_EVENT_CMD, struct dm_ioctl)
#define DM_IOCTL_VERSION @DM_IOCTL_VERSION@
#define DM_DRIVER_VERSION @DM_DRIVER_VERSION@
#define DM_EXISTS_FLAG 0x00000004
#define DM_PERSISTENT_DEV_FLAG 0x00000008
+/* Flag passed into ioctl STATUS command to get table information
+ rather than current status */
+#define DM_STATUS_TABLE_FLAG 0x00000010
+
#endif /* _LINUX_DM_IOCTL_H */
"waitevent"
};
-
static void *_align(void *ptr, unsigned int a)
{
register unsigned long align = --a;
return 1;
}
-void *dm_get_next_target(struct dm_task *dmt, void *next, unsigned long long *start,
- unsigned long long *length,
- char **target_type,
- char **params)
+void *dm_get_next_target(struct dm_task *dmt, void *next,
+ uint64_t *start, uint64_t *length,
+ char **target_type, char **params)
{
- struct target *t;
-
- if (!next)
- next = dmt->head;
+ struct target *t = (struct target *) next;
- t = next;
+ if (!t)
+ t = dmt->head;
- if (t) {
+ if (!t)
+ return NULL;
*start = t->start;
*length = t->length;
*target_type = t->type;
*params = t->params;
+
return t->next;
- }
- return NULL;
}
/* Unmarshall the target info returned from a status call */
-static int unmarshal_status(struct dm_task *dmt, struct dm_ioctl *dmi)
+static int _unmarshal_status(struct dm_task *dmt, struct dm_ioctl *dmi)
{
- char *outbuf = (char *)dmi + sizeof(struct dm_ioctl);
+ char *outbuf = (char *) dmi + sizeof(struct dm_ioctl);
char *outptr = outbuf;
int i;
- for (i=0; i < dmi->target_count; i++) {
- struct dm_target_spec *spec = (struct dm_target_spec *)outptr;
+ for (i = 0; i < dmi->target_count; i++) {
+ struct dm_target_spec *spec = (struct dm_target_spec *) outptr;
- if (!dm_task_add_target(dmt, spec->sector_start, spec->length,
- spec->target_type, outptr+sizeof(*spec)))
- return 0;
- outptr += sizeof(struct dm_target_spec);
- outptr += strlen(outptr) + 1;
- _align(outptr, ALIGNMENT);
+ if (!dm_task_add_target(dmt, spec->sector_start, spec->length,
+ spec->target_type,
+ outptr + sizeof(*spec)))
+ return 0;
+
+ outptr += sizeof(struct dm_target_spec);
+ outptr += strlen(outptr) + 1;
+ _align(outptr, ALIGNMENT);
}
+
return 1;
}
return 1;
}
-struct target *create_target(uint64_t start,
- uint64_t len, const char *type, const char *params)
+struct target *create_target(uint64_t start, uint64_t len, const char *type,
+ const char *params)
{
struct target *t = malloc(sizeof(*t));
break;
case DM_DEVICE_TABLE:
- dmi->flags |= DM_STATUS_TABLE_FLAG;
+ dmi->flags |= DM_STATUS_TABLE_FLAG;
command = DM_GET_STATUS;
break;
case DM_DEVICE_WAITEVENT:
- command = DM_WAIT_EVENT;
+ command = DM_WAIT_EVENT;
break;
default:
case DM_DEVICE_STATUS:
case DM_DEVICE_TABLE:
- if (!unmarshal_status(dmt, dmi))
- goto bad;
- break;
+ if (!_unmarshal_status(dmt, dmi))
+ goto bad;
+ break;
}
dmt->dmi = dmi;
/* Use this to retrive target information returned from a STATUS call */
void *dm_get_next_target(struct dm_task *dmt,
- void *next, unsigned long long *start,
- unsigned long long *length,
- char **target_type,
- char **params);
+ void *next, uint64_t *start, uint64_t *length,
+ char **target_type, char **params);
/*
* Call this to actually run the ioctl.