]> sourceware.org Git - lvm2.git/commitdiff
device-mapper: Fork libdm internally.
authorJoe Thornber <ejt@redhat.com>
Mon, 14 May 2018 11:16:43 +0000 (12:16 +0100)
committerJoe Thornber <ejt@redhat.com>
Wed, 16 May 2018 12:00:50 +0000 (13:00 +0100)
The device-mapper directory now holds a copy of libdm source.  At
the moment this code is identical to libdm.  Over time code will
migrate out to appropriate places (see doc/refactoring.txt).

The libdm directory still exists, and contains the source for the
libdevmapper shared library, which we will continue to ship (though
not neccessarily update).

All code using libdm should now use the version in device-mapper.

87 files changed:
Makefile.in
configure
configure.ac
daemons/clvmd/Makefile.in
daemons/cmirrord/Makefile.in
daemons/cmirrord/cluster.h
daemons/cmirrord/functions.h
daemons/dmeventd/Makefile.in
daemons/dmeventd/dmeventd.c
daemons/dmeventd/libdevmapper-event.c
daemons/dmeventd/libdevmapper-event.pc.in
daemons/dmeventd/plugins/lvm2/Makefile.in
daemons/dmeventd/plugins/mirror/Makefile.in
daemons/dmeventd/plugins/raid/Makefile.in
daemons/dmeventd/plugins/snapshot/Makefile.in
daemons/dmeventd/plugins/thin/Makefile.in
daemons/dmfilemapd/Makefile.in
daemons/dmfilemapd/dmfilemapd.c
daemons/lvmetad/Makefile.in
daemons/lvmlockd/Makefile.in
daemons/lvmlockd/lvmlockd-core.c
daemons/lvmpolld/Makefile.in
device-mapper/Makefile [deleted file]
device_mapper/Makefile [new file with mode: 0644]
device_mapper/datastruct/bitset.c [new file with mode: 0644]
device_mapper/datastruct/hash.c [new file with mode: 0644]
device_mapper/datastruct/list.c [new file with mode: 0644]
device_mapper/ioctl/libdm-iface.c [new file with mode: 0644]
device_mapper/ioctl/libdm-targets.h [new file with mode: 0644]
device_mapper/libdevmapper.h [new file with mode: 0644]
device_mapper/libdm-common.c [new file with mode: 0644]
device_mapper/libdm-common.h [new file with mode: 0644]
device_mapper/libdm-config.c [new file with mode: 0644]
device_mapper/libdm-deptree.c [new file with mode: 0644]
device_mapper/libdm-file.c [new file with mode: 0644]
device_mapper/libdm-report.c [new file with mode: 0644]
device_mapper/libdm-stats.c [new file with mode: 0644]
device_mapper/libdm-string.c [new file with mode: 0644]
device_mapper/libdm-targets.c [new file with mode: 0644]
device_mapper/libdm-timestamp.c [new file with mode: 0644]
device_mapper/misc/dm-ioctl.h [new file with mode: 0644]
device_mapper/misc/dm-log-userspace.h [new file with mode: 0644]
device_mapper/misc/dm-logging.h [new file with mode: 0644]
device_mapper/misc/dmlib.h [new file with mode: 0644]
device_mapper/misc/kdev_t.h [new file with mode: 0644]
device_mapper/mm/dbg_malloc.c [new file with mode: 0644]
device_mapper/mm/pool-debug.c [new file with mode: 0644]
device_mapper/mm/pool-fast.c [new file with mode: 0644]
device_mapper/mm/pool.c [new file with mode: 0644]
device_mapper/regex/matcher.c [new file with mode: 0644]
device_mapper/regex/parse_rx.c [new file with mode: 0644]
device_mapper/regex/parse_rx.h [new file with mode: 0644]
device_mapper/regex/ttree.c [new file with mode: 0644]
device_mapper/regex/ttree.h [new file with mode: 0644]
device_mapper/vdo/status.c [moved from device-mapper/vdo/status.c with 99% similarity]
device_mapper/vdo/target.h [moved from device-mapper/vdo/target.h with 100% similarity]
lib/config/config.h
lib/device/bcache.c
lib/device/bcache.h
lib/device/dev-cache.c
lib/metadata/pv.h
lib/metadata/vg.h
lib/misc/lib.h
lib/report/properties.h
libdaemon/client/config-util.c
libdaemon/client/daemon-client.c
libdm/Makefile.in
libdm/make.tmpl.in [new file with mode: 0644]
liblvm/Makefile.in
liblvm/lvm_misc.h
liblvm/lvm_prop.c
make.tmpl.in
scripts/Makefile.in
test/api/Makefile.in
test/unit/Makefile.in
test/unit/bitset_t.c
test/unit/config_t.c
test/unit/dmlist_t.c
test/unit/dmstatus_t.c
test/unit/framework.h
test/unit/matcher_t.c
test/unit/percent_t.c
test/unit/string_t.c
test/unit/vdo_t.c
tools/Makefile.in
tools/dmsetup.c
tools/tool.h

index 3274f174f2453d30e903dd4e9124a9ecf95947bf..a1c87468b179a9158d3f1aeb6af36ff64b6ae7d1 100644 (file)
@@ -211,8 +211,7 @@ endif
 endif
 
 include test/unit/Makefile
-
-include device-mapper/Makefile
+include device_mapper/Makefile
 
 ifneq ($(shell which ctags),)
 .PHONY: tags
index 3b88dadce3d009cc299f0dbd1cf040e76a18aa89..60f51d6451635f24131580b96cd4a819f8cd114f 100755 (executable)
--- a/configure
+++ b/configure
@@ -15559,7 +15559,7 @@ _ACEOF
 
 
 ################################################################################
-ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/Makefile lib/Makefile lib/locking/Makefile include/lvm-version.h libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile"
+ac_config_files="$ac_config_files Makefile make.tmpl libdm/make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/Makefile lib/Makefile lib/locking/Makefile include/lvm-version.h libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile"
 
 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
@@ -16256,6 +16256,7 @@ do
     "include/configure.h") CONFIG_HEADERS="$CONFIG_HEADERS include/configure.h" ;;
     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
     "make.tmpl") CONFIG_FILES="$CONFIG_FILES make.tmpl" ;;
+    "libdm/make.tmpl") CONFIG_FILES="$CONFIG_FILES libdm/make.tmpl" ;;
     "daemons/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/Makefile" ;;
     "daemons/clvmd/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/clvmd/Makefile" ;;
     "daemons/cmirrord/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/cmirrord/Makefile" ;;
index a05f051c461233e387332c376a96d6ae96691cc3..8dc9c189c047788f952226a90e254d392b842a34 100644 (file)
@@ -2088,6 +2088,7 @@ dnl -- keep utility scripts running properly
 AC_CONFIG_FILES([
 Makefile
 make.tmpl
+libdm/make.tmpl
 daemons/Makefile
 daemons/clvmd/Makefile
 daemons/cmirrord/Makefile
index 83af00e20e10f9e88b59f2d870d7f30c3938e89a..47a3411e426c8740a9e1b343027312368e4a05cf 100644 (file)
@@ -74,7 +74,7 @@ TARGETS = \
 
 include $(top_builddir)/make.tmpl
 
-LIBS += $(LVMINTERNAL_LIBS) -ldevmapper $(PTHREAD_LIBS) -laio
+LIBS += $(LVMINTERNAL_LIBS) $(PTHREAD_LIBS) -laio
 CFLAGS += -fno-strict-aliasing $(EXTRA_EXEC_CFLAGS)
 
 INSTALL_TARGETS = \
index 96e0db8ce245653775be8e9ea85eb6331c82c3f9..fc0ef6d5ab81881b7b485ed505760907edd960ad 100644 (file)
@@ -26,7 +26,6 @@ TARGETS = cmirrord
 
 include $(top_builddir)/make.tmpl
 
-LIBS += -ldevmapper
 LMLIBS += $(CPG_LIBS) $(SACKPT_LIBS)
 CFLAGS += $(CPG_CFLAGS) $(SACKPT_CFLAGS) $(EXTRA_EXEC_CFLAGS)
 LDFLAGS += $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS)
index 0efbd64bfa06169f1f60fc8fba2bc5235297d1f5..54ddd79ecb32575494b72c910cebaaa20a05891a 100644 (file)
@@ -12,8 +12,8 @@
 #ifndef _LVM_CLOG_CLUSTER_H
 #define _LVM_CLOG_CLUSTER_H
 
-#include "libdm/misc/dm-log-userspace.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/misc/dm-log-userspace.h"
+#include "device_mapper/libdevmapper.h"
 
 #define DM_ULOG_RESPONSE 0x1000U /* in last byte of 32-bit value */
 #define DM_ULOG_CHECKPOINT_READY 21
index c770459a9909b41b12d5eb32277de5706a5bf90b..8a7301d018b3b73a32ee3b5e8af31f66b0e60642 100644 (file)
@@ -12,7 +12,7 @@
 #ifndef _LVM_CLOG_FUNCTIONS_H
 #define _LVM_CLOG_FUNCTIONS_H
 
-#include "libdm/misc/dm-log-userspace.h"
+#include "device_mapper/misc/dm-log-userspace.h"
 #include "cluster.h"
 
 #define LOG_RESUMED   1
index d5241ebfb25b0eec42cfcfb563dc486e3075f7da..e43bb2b2a2b10275b7340436084162adef84be5a 100644 (file)
@@ -57,13 +57,13 @@ all: device-mapper
 device-mapper: $(TARGETS)
 
 CFLAGS_dmeventd.o += $(EXTRA_EXEC_CFLAGS)
-LIBS += -ldevmapper $(PTHREAD_LIBS)
+LIBS += $(PTHREAD_LIBS)
 
 dmeventd: $(LIB_SHARED) dmeventd.o
        $(CC) $(CFLAGS) -L. $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) dmeventd.o \
-               -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(LIBS)
+               -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(top_builddir)/device_mapper/libdevice-mapper.a $(LIBS) -lm
 
-dmeventd.static: $(LIB_STATIC) dmeventd.o $(interfacebuilddir)/libdevmapper.a
+dmeventd.static: $(LIB_STATIC) dmeventd.o
        $(CC) $(CFLAGS) $(LDFLAGS) -static -L. -L$(interfacebuilddir) dmeventd.o \
                -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(LIBS) $(STATIC_LIBS)
 
@@ -73,7 +73,6 @@ endif
 
 ifneq ("$(CFLOW_CMD)", "")
 CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES))
--include $(top_builddir)/libdm/libdevmapper.cflow
 -include $(top_builddir)/lib/liblvm-internal.cflow
 -include $(top_builddir)/lib/liblvm2cmd.cflow
 -include $(top_builddir)/daemons/dmeventd/$(LIB_NAME).cflow
index 438cf1668328cc1f54d8a31feacca4a01af03ec3..533186b970bae9c812cba83200e7e91e7d471f52 100644 (file)
@@ -16,7 +16,7 @@
  * dmeventd - dm event daemon to monitor active mapped devices
  */
 
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
 
 #include "daemons/dmeventd/libdevmapper-event.h"
 #include "dmeventd.h"
index a75924c27c2e1ff936bdb1ac5554494f19edc292..7f0722831ddda1f5ad96393070b1af8e6a835025 100644 (file)
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libdm/misc/dm-logging.h"
-#include "libdm/misc/dmlib.h"
+#include "device_mapper/misc/dm-logging.h"
+#include "device_mapper/misc/dmlib.h"
 #include "daemons/dmeventd/libdevmapper-event.h"
 #include "dmeventd.h"
+#include "lib/misc/intl.h"
 
 #include <fcntl.h>
 #include <sys/file.h>
@@ -25,6 +26,7 @@
 #include <arpa/inet.h>         /* for htonl, ntohl */
 #include <pthread.h>
 #include <syslog.h>
+#include <unistd.h>
 
 static int _debug_level = 0;
 static int _use_syslog = 0;
index 839433fb8cf35f08d904125e79f106476a47f97d..fcad5bca43c9b41593736993306a108e3e849bdf 100644 (file)
@@ -8,4 +8,3 @@ Description: device-mapper event library
 Version: @DM_LIB_PATCHLEVEL@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ldevmapper-event
-Requires.private: devmapper
index 956d31edcaa4e7cf75e0f335a58536d025f262a5..7e4696c68c3c167cf51f317d45d1abd5d7bbec94 100644 (file)
@@ -24,7 +24,7 @@ LIB_VERSION = $(LIB_VERSION_LVM)
 
 include $(top_builddir)/make.tmpl
 
-LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS)
+LIBS += @LVM2CMD_LIB@ $(PTHREAD_LIBS)
 
 install_lvm2: install_lib_shared
 
index 1d9666daa83160c7880285befae98b54302d2c11..22832ddf02a23e7b7f852122c317f21aa905db58 100644 (file)
@@ -30,7 +30,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow
 
 include $(top_builddir)/make.tmpl
 
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2
 
 install_lvm2: install_dm_plugin
 
index 1bca8b2aac53dec0eb57786e85960062fcaf6c82..54343b1ca1e40ae315d59a675027e667e9bba4b2 100644 (file)
@@ -29,7 +29,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow
 
 include $(top_builddir)/make.tmpl
 
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2
 
 install_lvm2: install_dm_plugin
 
index 5eb7a47f10ba90dba39babba398360ed791467e6..75f4342ad61147f308b9725c3e607a2e3202a896 100644 (file)
@@ -26,7 +26,7 @@ LIB_VERSION = $(LIB_VERSION_LVM)
 
 include $(top_builddir)/make.tmpl
 
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2
 
 install_lvm2: install_dm_plugin
 
index f54ee2da5e680d6366a846d75999cd35800905d2..9f1c2b34bd04773043934682bc30642c9d5273c0 100644 (file)
@@ -29,7 +29,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow
 
 include $(top_builddir)/make.tmpl
 
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2
 
 install_lvm2: install_dm_plugin
 
index 8a4938b22b7ffb2c16e1c4107daec24caba3e778..1afd6b8eda4e9834bbe0fa86dd1bb69325139636 100644 (file)
@@ -35,13 +35,12 @@ all: device-mapper
 device-mapper: $(TARGETS)
 
 CFLAGS_dmfilemapd.o += $(EXTRA_EXEC_CFLAGS)
-LIBS += -ldevmapper
 
 dmfilemapd: $(LIB_SHARED) dmfilemapd.o
        $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) \
                -o $@ dmfilemapd.o $(DL_LIBS) $(LIBS)
 
-dmfilemapd.static: $(LIB_STATIC) dmfilemapd.o $(interfacebuilddir)/libdevmapper.a
+dmfilemapd.static: $(LIB_STATIC) dmfilemapd.o
        $(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -static -L$(interfacebuilddir) \
                -o $@ dmfilemapd.o $(DL_LIBS) $(LIBS) $(STATIC_LIBS)
 
index 7fc95c8a4f189f340c03b5118b15bdd34d32591a..4e048fff361bb865b8490c419ed2d67b6a700ed5 100644 (file)
@@ -16,7 +16,7 @@
 
 #include "tools/tool.h"
 
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
 
 #include "lib/config/defaults.h"
 
index 1d901aabc65ad9b7e9d3feb68b9a6c8889a0c939..f652db8bfc66c5697bc45a1ded9e71b6d8b7c60d 100644 (file)
@@ -32,15 +32,17 @@ CFLAGS_lvmetactl.o += $(EXTRA_EXEC_CFLAGS)
 CFLAGS_lvmetad-core.o += $(EXTRA_EXEC_CFLAGS)
 INCLUDES += -I$(top_srcdir)/libdaemon/server
 LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS)
-LIBS += $(RT_LIBS) $(DAEMON_LIBS) -ldevmapper $(PTHREAD_LIBS)
+LIBS += $(RT_LIBS) $(DAEMON_LIBS) $(PTHREAD_LIBS) -lm
 
 lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
-                   $(top_builddir)/libdaemon/server/libdaemonserver.a
-       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) -ldaemonserver $(LIBS)
+                   $(top_builddir)/libdaemon/server/libdaemonserver.a \
+                   $(top_builddir)/device_mapper/libdevice-mapper.a
+       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(top_builddir)/device_mapper/libdevice-mapper.a -ldaemonserver $(LIBS)
 
 lvmetactl: lvmetactl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
-       $(top_builddir)/libdaemon/server/libdaemonserver.a
-       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(LIBS)
+       $(top_builddir)/libdaemon/server/libdaemonserver.a \
+       $(top_builddir)/device_mapper/libdevice-mapper.a
+       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(top_builddir)/device_mapper/libdevice-mapper.a $(LIBS)
 
 CLEAN_TARGETS += lvmetactl.o
 
index 8f16d0652921bb242535ce673cf0e4f4960de1e0..50463999a77fe6cb91f9b21dc2eb30f95381b4a8 100644 (file)
@@ -36,7 +36,7 @@ include $(top_builddir)/make.tmpl
 CFLAGS += $(EXTRA_EXEC_CFLAGS)
 INCLUDES += -I$(top_srcdir)/libdaemon/server
 LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS)
-LIBS += $(RT_LIBS) $(DAEMON_LIBS) -ldevmapper $(PTHREAD_LIBS)
+LIBS += $(RT_LIBS) $(DAEMON_LIBS) $(PTHREAD_LIBS)
 
 lvmlockd: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
                    $(top_builddir)/libdaemon/server/libdaemonserver.a
index e75d02bc075630e39973ebfa6e7c987b4c6244ac..f1a2a20389989700b5344818389c72334fea1a9a 100644 (file)
@@ -19,7 +19,7 @@
 #include "lvm-version.h"
 #include "daemons/lvmetad/lvmetad-client.h"
 #include "daemons/lvmlockd/lvmlockd-client.h"
-#include "libdm/misc/dm-ioctl.h"
+#include "device_mapper/misc/dm-ioctl.h"
 
 /* #include <assert.h> */
 #include <errno.h>
index 483758dcda15625f953fdb540d4c28b47d6db693..69c4a8d867e6ee703ec1497d2fccdc93b91b3f73 100644 (file)
@@ -30,7 +30,7 @@ include $(top_builddir)/make.tmpl
 CFLAGS += $(EXTRA_EXEC_CFLAGS)
 INCLUDES += -I$(top_srcdir)/libdaemon/server
 LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS)
-LIBS += $(DAEMON_LIBS) -ldaemonserver -ldevmapper $(PTHREAD_LIBS)
+LIBS += $(DAEMON_LIBS) -ldaemonserver $(PTHREAD_LIBS)
 
 lvmpolld: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
                    $(top_builddir)/libdaemon/server/libdaemonserver.a
diff --git a/device-mapper/Makefile b/device-mapper/Makefile
deleted file mode 100644 (file)
index 76e19f0..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2018 Red Hat, Inc. All rights reserved.
-#
-# This file is part of LVM2.
-#
-# This copyrighted material is made available to anyone wishing to use,
-# modify, copy, or redistribute it subject to the terms and conditions
-# of the GNU General Public License v.2.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-DM_SOURCE=\
-       device-mapper/vdo/status.c
-
-DM_DEPENDS=$(subst .c,.d,$(DM_SOURCE))
-DM_OBJECTS=$(DM_SOURCE:%.c=%.o)
-CLEAN_TARGETS+=$(DM_DEPENDS) $(DM_OBJECTS)
-
--include $(DM_DEPENDS)
diff --git a/device_mapper/Makefile b/device_mapper/Makefile
new file mode 100644 (file)
index 0000000..999c3ba
--- /dev/null
@@ -0,0 +1,46 @@
+# Copyright (C) 2018 Red Hat, Inc. All rights reserved.
+#
+# This file is part of the device-mapper userspace tools.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+DEVICE_MAPPER_SOURCE=\
+       device_mapper/datastruct/bitset.c \
+       device_mapper/datastruct/hash.c \
+       device_mapper/datastruct/list.c \
+       device_mapper/libdm-common.c \
+       device_mapper/libdm-config.c \
+       device_mapper/libdm-deptree.c \
+       device_mapper/libdm-file.c \
+       device_mapper/libdm-report.c \
+       device_mapper/libdm-stats.c \
+       device_mapper/libdm-string.c \
+       device_mapper/libdm-targets.c \
+       device_mapper/libdm-timestamp.c \
+       device_mapper/mm/dbg_malloc.c \
+       device_mapper/mm/pool.c \
+       device_mapper/regex/matcher.c \
+       device_mapper/regex/parse_rx.c \
+       device_mapper/regex/ttree.c \
+       device_mapper/ioctl/libdm-iface.c
+
+DEVICE_MAPPER_DEPENDS=$(subst .c,.d,$(DEVICE_MAPPER_SOURCE))
+DEVICE_MAPPER_OBJECTS=$(subst .c,.o,$(DEVICE_MAPPER_SOURCE))
+CLEAN_TARGETS+=$(DEVICE_MAPPER_DEPENDS) $(DEVICE_MAPPER_OBJECTS)
+
+-include $(DEVICE_MAPPER_DEPENDS)
+
+$(DEVICE_MAPPER_OBJECTS): INCLUDES+=-Idevice_mapper/
+
+device_mapper/libdevice-mapper.a: $(DEVICE_MAPPER_OBJECTS)
+       @echo "    [AR] $@"
+       $(Q) $(RM) $@
+       $(Q) $(AR) rsv $@ $(DEVICE_MAPPER_OBJECTS) > /dev/null
+
+CLEAN_TARGETS+=device_mapper/libdevice-mapper.a
diff --git a/device_mapper/datastruct/bitset.c b/device_mapper/datastruct/bitset.c
new file mode 100644 (file)
index 0000000..6ae99d3
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <ctype.h>
+
+/* FIXME: calculate this. */
+#define INT_SHIFT 5
+
+dm_bitset_t dm_bitset_create(struct dm_pool *mem, unsigned num_bits)
+{
+       unsigned n = (num_bits / DM_BITS_PER_INT) + 2;
+       size_t size = sizeof(int) * n;
+       dm_bitset_t bs;
+       
+       if (mem)
+               bs = dm_pool_zalloc(mem, size);
+       else
+               bs = dm_zalloc(size);
+
+       if (!bs)
+               return NULL;
+
+       *bs = num_bits;
+
+       return bs;
+}
+
+void dm_bitset_destroy(dm_bitset_t bs)
+{
+       dm_free(bs);
+}
+
+int dm_bitset_equal(dm_bitset_t in1, dm_bitset_t in2)
+{
+       int i;
+
+       for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--)
+               if (in1[i] != in2[i])
+                       return 0;
+
+       return 1;
+}
+
+void dm_bit_and(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2)
+{
+       int i;
+
+       for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--)
+               out[i] = in1[i] & in2[i];
+}
+void dm_bit_union(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2)
+{
+       int i;
+       for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--)
+               out[i] = in1[i] | in2[i];
+}
+
+static int _test_word(uint32_t test, int bit)
+{
+       uint32_t tb = test >> bit;
+
+       return (tb ? ffs(tb) + bit - 1 : -1);
+}
+
+static int _test_word_rev(uint32_t test, int bit)
+{
+       uint32_t tb = test << (DM_BITS_PER_INT - 1 - bit);
+
+       return (tb ? bit - clz(tb) : -1);
+}
+
+int dm_bit_get_next(dm_bitset_t bs, int last_bit)
+{
+       int bit, word;
+       uint32_t test;
+
+       last_bit++;             /* otherwise we'll return the same bit again */
+
+       /*
+        * bs[0] holds number of bits
+        */
+       while (last_bit < (int) bs[0]) {
+               word = last_bit >> INT_SHIFT;
+               test = bs[word + 1];
+               bit = last_bit & (DM_BITS_PER_INT - 1);
+
+               if ((bit = _test_word(test, bit)) >= 0)
+                       return (word * DM_BITS_PER_INT) + bit;
+
+               last_bit = last_bit - (last_bit & (DM_BITS_PER_INT - 1)) +
+                   DM_BITS_PER_INT;
+       }
+
+       return -1;
+}
+
+int dm_bit_get_prev(dm_bitset_t bs, int last_bit)
+{
+       int bit, word;
+       uint32_t test;
+
+       last_bit--;             /* otherwise we'll return the same bit again */
+
+       /*
+        * bs[0] holds number of bits
+        */
+       while (last_bit >= 0) {
+               word = last_bit >> INT_SHIFT;
+               test = bs[word + 1];
+               bit = last_bit & (DM_BITS_PER_INT - 1);
+
+               if ((bit = _test_word_rev(test, bit)) >= 0)
+                       return (word * DM_BITS_PER_INT) + bit;
+
+               last_bit = (last_bit & ~(DM_BITS_PER_INT - 1)) - 1;
+       }
+
+       return -1;
+}
+
+int dm_bit_get_first(dm_bitset_t bs)
+{
+       return dm_bit_get_next(bs, -1);
+}
+
+int dm_bit_get_last(dm_bitset_t bs)
+{
+       return dm_bit_get_prev(bs, bs[0] + 1);
+}
+
+/*
+ * Based on the Linux kernel __bitmap_parselist from lib/bitmap.c
+ */
+dm_bitset_t dm_bitset_parse_list(const char *str, struct dm_pool *mem,
+                                size_t min_num_bits)
+{
+       unsigned a, b;
+       int c, old_c, totaldigits, ndigits, nmaskbits;
+       int at_start, in_range;
+       dm_bitset_t mask = NULL;
+       const char *start = str;
+       size_t len;
+
+scan:
+       len = strlen(str);
+       totaldigits = c = 0;
+       nmaskbits = 0;
+       do {
+               at_start = 1;
+               in_range = 0;
+               a = b = 0;
+               ndigits = totaldigits;
+
+               /* Get the next value or range of values */
+               while (len) {
+                       old_c = c;
+                       c = *str++;
+                       len--;
+                       if (isspace(c))
+                               continue;
+
+                       /* A '\0' or a ',' signal the end of a value or range */
+                       if (c == '\0' || c == ',')
+                               break;
+                       /*
+                       * whitespaces between digits are not allowed,
+                       * but it's ok if whitespaces are on head or tail.
+                       * when old_c is whilespace,
+                       * if totaldigits == ndigits, whitespace is on head.
+                       * if whitespace is on tail, it should not run here.
+                       * as c was ',' or '\0',
+                       * the last code line has broken the current loop.
+                       */
+                       if ((totaldigits != ndigits) && isspace(old_c))
+                               goto_bad;
+
+                       if (c == '-') {
+                               if (at_start || in_range)
+                                       goto_bad;
+                               b = 0;
+                               in_range = 1;
+                               at_start = 1;
+                               continue;
+                       }
+
+                       if (!isdigit(c))
+                               goto_bad;
+
+                       b = b * 10 + (c - '0');
+                       if (!in_range)
+                               a = b;
+                       at_start = 0;
+                       totaldigits++;
+               }
+               if (ndigits == totaldigits)
+                       continue;
+               /* if no digit is after '-', it's wrong */
+               if (at_start && in_range)
+                       goto_bad;
+               if (!(a <= b))
+                       goto_bad;
+               if (b >= nmaskbits)
+                       nmaskbits = b + 1;
+               while ((a <= b) && mask) {
+                       dm_bit_set(mask, a);
+                       a++;
+               }
+       } while (len && c == ',');
+
+       if (!mask) {
+               if (min_num_bits && (nmaskbits < min_num_bits))
+                       nmaskbits = min_num_bits;
+
+               if (!(mask = dm_bitset_create(mem, nmaskbits)))
+                       goto_bad;
+               str = start;
+               goto scan;
+       }
+
+       return mask;
+bad:
+       if (mask) {
+               if (mem)
+                       dm_pool_free(mem, mask);
+               else
+                       dm_bitset_destroy(mask);
+       }
+       return NULL;
+}
+
+#if defined(__GNUC__)
+/*
+ * Maintain backward compatibility with older versions that did not
+ * accept a 'min_num_bits' argument to dm_bitset_parse_list().
+ */
+dm_bitset_t dm_bitset_parse_list_v1_02_129(const char *str, struct dm_pool *mem);
+dm_bitset_t dm_bitset_parse_list_v1_02_129(const char *str, struct dm_pool *mem)
+{
+       return dm_bitset_parse_list(str, mem, 0);
+}
+
+#else /* if defined(__GNUC__) */
+
+#endif
diff --git a/device_mapper/datastruct/hash.c b/device_mapper/datastruct/hash.c
new file mode 100644 (file)
index 0000000..9b9c939
--- /dev/null
@@ -0,0 +1,392 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+struct dm_hash_node {
+       struct dm_hash_node *next;
+       void *data;
+       unsigned data_len;
+       unsigned keylen;
+       char key[0];
+};
+
+struct dm_hash_table {
+       unsigned num_nodes;
+       unsigned num_slots;
+       struct dm_hash_node **slots;
+};
+
+/* Permutation of the Integers 0 through 255 */
+static unsigned char _nums[] = {
+       1, 14, 110, 25, 97, 174, 132, 119, 138, 170, 125, 118, 27, 233, 140, 51,
+       87, 197, 177, 107, 234, 169, 56, 68, 30, 7, 173, 73, 188, 40, 36, 65,
+       49, 213, 104, 190, 57, 211, 148, 223, 48, 115, 15, 2, 67, 186, 210, 28,
+       12, 181, 103, 70, 22, 58, 75, 78, 183, 167, 238, 157, 124, 147, 172,
+       144,
+       176, 161, 141, 86, 60, 66, 128, 83, 156, 241, 79, 46, 168, 198, 41, 254,
+       178, 85, 253, 237, 250, 154, 133, 88, 35, 206, 95, 116, 252, 192, 54,
+       221,
+       102, 218, 255, 240, 82, 106, 158, 201, 61, 3, 89, 9, 42, 155, 159, 93,
+       166, 80, 50, 34, 175, 195, 100, 99, 26, 150, 16, 145, 4, 33, 8, 189,
+       121, 64, 77, 72, 208, 245, 130, 122, 143, 55, 105, 134, 29, 164, 185,
+       194,
+       193, 239, 101, 242, 5, 171, 126, 11, 74, 59, 137, 228, 108, 191, 232,
+       139,
+       6, 24, 81, 20, 127, 17, 91, 92, 251, 151, 225, 207, 21, 98, 113, 112,
+       84, 226, 18, 214, 199, 187, 13, 32, 94, 220, 224, 212, 247, 204, 196,
+       43,
+       249, 236, 45, 244, 111, 182, 153, 136, 129, 90, 217, 202, 19, 165, 231,
+       71,
+       230, 142, 96, 227, 62, 179, 246, 114, 162, 53, 160, 215, 205, 180, 47,
+       109,
+       44, 38, 31, 149, 135, 0, 216, 52, 63, 23, 37, 69, 39, 117, 146, 184,
+       163, 200, 222, 235, 248, 243, 219, 10, 152, 131, 123, 229, 203, 76, 120,
+       209
+};
+
+static struct dm_hash_node *_create_node(const char *str, unsigned len)
+{
+       struct dm_hash_node *n = dm_malloc(sizeof(*n) + len);
+
+       if (n) {
+               memcpy(n->key, str, len);
+               n->keylen = len;
+       }
+
+       return n;
+}
+
+static unsigned long _hash(const char *str, unsigned len)
+{
+       unsigned long h = 0, g;
+       unsigned i;
+
+       for (i = 0; i < len; i++) {
+               h <<= 4;
+               h += _nums[(unsigned char) *str++];
+               g = h & ((unsigned long) 0xf << 16u);
+               if (g) {
+                       h ^= g >> 16u;
+                       h ^= g >> 5u;
+               }
+       }
+
+       return h;
+}
+
+struct dm_hash_table *dm_hash_create(unsigned size_hint)
+{
+       size_t len;
+       unsigned new_size = 16u;
+       struct dm_hash_table *hc = dm_zalloc(sizeof(*hc));
+
+       if (!hc)
+               return_0;
+
+       /* round size hint up to a power of two */
+       while (new_size < size_hint)
+               new_size = new_size << 1;
+
+       hc->num_slots = new_size;
+       len = sizeof(*(hc->slots)) * new_size;
+       if (!(hc->slots = dm_zalloc(len)))
+               goto_bad;
+
+       return hc;
+
+      bad:
+       dm_free(hc->slots);
+       dm_free(hc);
+       return 0;
+}
+
+static void _free_nodes(struct dm_hash_table *t)
+{
+       struct dm_hash_node *c, *n;
+       unsigned i;
+
+       for (i = 0; i < t->num_slots; i++)
+               for (c = t->slots[i]; c; c = n) {
+                       n = c->next;
+                       dm_free(c);
+               }
+}
+
+void dm_hash_destroy(struct dm_hash_table *t)
+{
+       _free_nodes(t);
+       dm_free(t->slots);
+       dm_free(t);
+}
+
+static struct dm_hash_node **_find(struct dm_hash_table *t, const void *key,
+                                  uint32_t len)
+{
+       unsigned h = _hash(key, len) & (t->num_slots - 1);
+       struct dm_hash_node **c;
+
+       for (c = &t->slots[h]; *c; c = &((*c)->next)) {
+               if ((*c)->keylen != len)
+                       continue;
+
+               if (!memcmp(key, (*c)->key, len))
+                       break;
+       }
+
+       return c;
+}
+
+void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key,
+                           uint32_t len)
+{
+       struct dm_hash_node **c = _find(t, key, len);
+
+       return *c ? (*c)->data : 0;
+}
+
+int dm_hash_insert_binary(struct dm_hash_table *t, const void *key,
+                         uint32_t len, void *data)
+{
+       struct dm_hash_node **c = _find(t, key, len);
+
+       if (*c)
+               (*c)->data = data;
+       else {
+               struct dm_hash_node *n = _create_node(key, len);
+
+               if (!n)
+                       return 0;
+
+               n->data = data;
+               n->next = 0;
+               *c = n;
+               t->num_nodes++;
+       }
+
+       return 1;
+}
+
+void dm_hash_remove_binary(struct dm_hash_table *t, const void *key,
+                       uint32_t len)
+{
+       struct dm_hash_node **c = _find(t, key, len);
+
+       if (*c) {
+               struct dm_hash_node *old = *c;
+               *c = (*c)->next;
+               dm_free(old);
+               t->num_nodes--;
+       }
+}
+
+void *dm_hash_lookup(struct dm_hash_table *t, const char *key)
+{
+       return dm_hash_lookup_binary(t, key, strlen(key) + 1);
+}
+
+int dm_hash_insert(struct dm_hash_table *t, const char *key, void *data)
+{
+       return dm_hash_insert_binary(t, key, strlen(key) + 1, data);
+}
+
+void dm_hash_remove(struct dm_hash_table *t, const char *key)
+{
+       dm_hash_remove_binary(t, key, strlen(key) + 1);
+}
+
+static struct dm_hash_node **_find_str_with_val(struct dm_hash_table *t,
+                                               const void *key, const void *val,
+                                               uint32_t len, uint32_t val_len)
+{
+       struct dm_hash_node **c;
+       unsigned h;
+       
+       h = _hash(key, len) & (t->num_slots - 1);
+
+       for (c = &t->slots[h]; *c; c = &((*c)->next)) {
+               if ((*c)->keylen != len)
+                       continue;
+
+               if (!memcmp(key, (*c)->key, len) && (*c)->data) {
+                       if (((*c)->data_len == val_len) &&
+                           !memcmp(val, (*c)->data, val_len))
+                               return c;
+               }
+       }
+
+       return NULL;
+}
+
+int dm_hash_insert_allow_multiple(struct dm_hash_table *t, const char *key,
+                                 const void *val, uint32_t val_len)
+{
+       struct dm_hash_node *n;
+       struct dm_hash_node *first;
+       int len = strlen(key) + 1;
+       unsigned h;
+
+       n = _create_node(key, len);
+       if (!n)
+               return 0;
+
+       n->data = (void *)val;
+       n->data_len = val_len;
+
+       h = _hash(key, len) & (t->num_slots - 1);
+
+       first = t->slots[h];
+
+       if (first)
+               n->next = first;
+       else
+               n->next = 0;
+       t->slots[h] = n;
+
+       t->num_nodes++;
+       return 1;
+}
+
+/*
+ * Look through multiple entries with the same key for one that has a
+ * matching val and return that.  If none have maching val, return NULL.
+ */
+void *dm_hash_lookup_with_val(struct dm_hash_table *t, const char *key,
+                             const void *val, uint32_t val_len)
+{
+       struct dm_hash_node **c;
+
+       c = _find_str_with_val(t, key, val, strlen(key) + 1, val_len);
+
+       return (c && *c) ? (*c)->data : 0;
+}
+
+/*
+ * Look through multiple entries with the same key for one that has a
+ * matching val and remove that.
+ */
+void dm_hash_remove_with_val(struct dm_hash_table *t, const char *key,
+                            const void *val, uint32_t val_len)
+{
+       struct dm_hash_node **c;
+
+       c = _find_str_with_val(t, key, val, strlen(key) + 1, val_len);
+
+       if (c && *c) {
+               struct dm_hash_node *old = *c;
+               *c = (*c)->next;
+               dm_free(old);
+               t->num_nodes--;
+       }
+}
+
+/*
+ * Look up the value for a key and count how many
+ * entries have the same key.
+ *
+ * If no entries have key, return NULL and set count to 0.
+ *
+ * If one entry has the key, the function returns the val,
+ * and sets count to 1.
+ *
+ * If N entries have the key, the function returns the val
+ * from the first entry, and sets count to N.
+ */
+void *dm_hash_lookup_with_count(struct dm_hash_table *t, const char *key, int *count)
+{
+       struct dm_hash_node **c;
+       struct dm_hash_node **c1 = NULL;
+       uint32_t len = strlen(key) + 1;
+       unsigned h;
+
+       *count = 0;
+
+       h = _hash(key, len) & (t->num_slots - 1);
+
+       for (c = &t->slots[h]; *c; c = &((*c)->next)) {
+               if ((*c)->keylen != len)
+                       continue;
+
+               if (!memcmp(key, (*c)->key, len)) {
+                       (*count)++;
+                       if (!c1)
+                               c1 = c;
+               }
+       }
+
+       if (!c1)
+               return NULL;
+       else
+               return *c1 ? (*c1)->data : 0;
+}
+
+unsigned dm_hash_get_num_entries(struct dm_hash_table *t)
+{
+       return t->num_nodes;
+}
+
+void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f)
+{
+       struct dm_hash_node *c, *n;
+       unsigned i;
+
+       for (i = 0; i < t->num_slots; i++)
+               for (c = t->slots[i]; c; c = n) {
+                       n = c->next;
+                       f(c->data);
+               }
+}
+
+void dm_hash_wipe(struct dm_hash_table *t)
+{
+       _free_nodes(t);
+       memset(t->slots, 0, sizeof(struct dm_hash_node *) * t->num_slots);
+       t->num_nodes = 0u;
+}
+
+char *dm_hash_get_key(struct dm_hash_table *t __attribute__((unused)),
+                     struct dm_hash_node *n)
+{
+       return n->key;
+}
+
+void *dm_hash_get_data(struct dm_hash_table *t __attribute__((unused)),
+                      struct dm_hash_node *n)
+{
+       return n->data;
+}
+
+static struct dm_hash_node *_next_slot(struct dm_hash_table *t, unsigned s)
+{
+       struct dm_hash_node *c = NULL;
+       unsigned i;
+
+       for (i = s; i < t->num_slots && !c; i++)
+               c = t->slots[i];
+
+       return c;
+}
+
+struct dm_hash_node *dm_hash_get_first(struct dm_hash_table *t)
+{
+       return _next_slot(t, 0);
+}
+
+struct dm_hash_node *dm_hash_get_next(struct dm_hash_table *t, struct dm_hash_node *n)
+{
+       unsigned h = _hash(n->key, n->keylen) & (t->num_slots - 1);
+
+       return n->next ? n->next : _next_slot(t, h + 1);
+}
diff --git a/device_mapper/datastruct/list.c b/device_mapper/datastruct/list.c
new file mode 100644 (file)
index 0000000..86c3e4e
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include <assert.h>
+
+/*
+ * Initialise a list before use.
+ * The list head's next and previous pointers point back to itself.
+ */
+void dm_list_init(struct dm_list *head)
+{
+       head->n = head->p = head;
+}
+
+/*
+ * Insert an element before 'head'.
+ * If 'head' is the list head, this adds an element to the end of the list.
+ */
+void dm_list_add(struct dm_list *head, struct dm_list *elem)
+{
+       assert(head->n);
+
+       elem->n = head;
+       elem->p = head->p;
+
+       head->p->n = elem;
+       head->p = elem;
+}
+
+/*
+ * Insert an element after 'head'.
+ * If 'head' is the list head, this adds an element to the front of the list.
+ */
+void dm_list_add_h(struct dm_list *head, struct dm_list *elem)
+{
+       assert(head->n);
+
+       elem->n = head->n;
+       elem->p = head;
+
+       head->n->p = elem;
+       head->n = elem;
+}
+
+/*
+ * Delete an element from its list.
+ * Note that this doesn't change the element itself - it may still be safe
+ * to follow its pointers.
+ */
+void dm_list_del(struct dm_list *elem)
+{
+       elem->n->p = elem->p;
+       elem->p->n = elem->n;
+}
+
+/*
+ * Remove an element from existing list and insert before 'head'.
+ */
+void dm_list_move(struct dm_list *head, struct dm_list *elem)
+{
+        dm_list_del(elem);
+        dm_list_add(head, elem);
+}
+
+/*
+ * Is the list empty?
+ */
+int dm_list_empty(const struct dm_list *head)
+{
+       return head->n == head;
+}
+
+/*
+ * Is this the first element of the list?
+ */
+int dm_list_start(const struct dm_list *head, const struct dm_list *elem)
+{
+       return elem->p == head;
+}
+
+/*
+ * Is this the last element of the list?
+ */
+int dm_list_end(const struct dm_list *head, const struct dm_list *elem)
+{
+       return elem->n == head;
+}
+
+/*
+ * Return first element of the list or NULL if empty
+ */
+struct dm_list *dm_list_first(const struct dm_list *head)
+{
+       return (dm_list_empty(head) ? NULL : head->n);
+}
+
+/*
+ * Return last element of the list or NULL if empty
+ */
+struct dm_list *dm_list_last(const struct dm_list *head)
+{
+       return (dm_list_empty(head) ? NULL : head->p);
+}
+
+/*
+ * Return the previous element of the list, or NULL if we've reached the start.
+ */
+struct dm_list *dm_list_prev(const struct dm_list *head, const struct dm_list *elem)
+{
+       return (dm_list_start(head, elem) ? NULL : elem->p);
+}
+
+/*
+ * Return the next element of the list, or NULL if we've reached the end.
+ */
+struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *elem)
+{
+       return (dm_list_end(head, elem) ? NULL : elem->n);
+}
+
+/*
+ * Return the number of elements in a list by walking it.
+ */
+unsigned int dm_list_size(const struct dm_list *head)
+{
+       unsigned int s = 0;
+       const struct dm_list *v;
+
+       dm_list_iterate(v, head)
+           s++;
+
+       return s;
+}
+
+/*
+ * Join two lists together.
+ * This moves all the elements of the list 'head1' to the end of the list
+ * 'head', leaving 'head1' empty.
+ */
+void dm_list_splice(struct dm_list *head, struct dm_list *head1)
+{
+       assert(head->n);
+       assert(head1->n);
+
+       if (dm_list_empty(head1))
+           return;
+
+       head1->p->n = head;
+       head1->n->p = head->p;
+
+       head->p->n = head1->n;
+       head->p = head1->p;
+
+       dm_list_init(head1);
+}
diff --git a/device_mapper/ioctl/libdm-iface.c b/device_mapper/ioctl/libdm-iface.c
new file mode 100644 (file)
index 0000000..4825f1d
--- /dev/null
@@ -0,0 +1,2196 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2013 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "libdm-targets.h"
+#include "libdm-common.h"
+
+#include <stddef.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <limits.h>
+#include <unistd.h>
+
+#ifdef __linux__
+#  include "misc/kdev_t.h"
+#  include <linux/limits.h>
+#else
+#  define MAJOR(x) major((x))
+#  define MINOR(x) minor((x))
+#  define MKDEV(x,y) makedev((x),(y))
+#endif
+
+#include "misc/dm-ioctl.h"
+
+/*
+ * Ensure build compatibility.  
+ * The hard-coded versions here are the highest present 
+ * in the _cmd_data arrays.
+ */
+
+#if !((DM_VERSION_MAJOR == 4 && DM_VERSION_MINOR >= 6))
+#error The version of dm-ioctl.h included is incompatible.
+#endif
+
+/* FIXME This should be exported in device-mapper.h */
+#define DM_NAME "device-mapper"
+
+#define PROC_MISC "/proc/misc"
+#define PROC_DEVICES "/proc/devices"
+#define MISC_NAME "misc"
+
+#define NUMBER_OF_MAJORS 4096
+
+/*
+ * Static minor number assigned since kernel version 2.6.36.
+ * The original definition is in kernel's include/linux/miscdevice.h.
+ * This number is also visible in modules.devname exported by depmod
+ * utility (support included in module-init-tools version >= 3.12).
+ */
+#define MAPPER_CTRL_MINOR 236
+#define MISC_MAJOR 10
+
+/* dm major version no for running kernel */
+static unsigned _dm_version = DM_VERSION_MAJOR;
+static unsigned _dm_version_minor = 0;
+static unsigned _dm_version_patchlevel = 0;
+static int _log_suppress = 0;
+static struct dm_timestamp *_dm_ioctl_timestamp = NULL;
+
+/*
+ * If the kernel dm driver only supports one major number
+ * we store it in _dm_device_major.  Otherwise we indicate
+ * which major numbers have been claimed by device-mapper
+ * in _dm_bitset.
+ */
+static unsigned _dm_multiple_major_support = 1;
+static dm_bitset_t _dm_bitset = NULL;
+static uint32_t _dm_device_major = 0;
+
+static int _control_fd = -1;
+static int _hold_control_fd_open = 0;
+static int _version_checked = 0;
+static int _version_ok = 1;
+static unsigned _ioctl_buffer_double_factor = 0;
+
+const int _dm_compat = 0;
+
+/* *INDENT-OFF* */
+static struct cmd_data _cmd_data_v4[] = {
+       {"create",      DM_DEV_CREATE,          {4, 0, 0}},
+       {"reload",      DM_TABLE_LOAD,          {4, 0, 0}},
+       {"remove",      DM_DEV_REMOVE,          {4, 0, 0}},
+       {"remove_all",  DM_REMOVE_ALL,          {4, 0, 0}},
+       {"suspend",     DM_DEV_SUSPEND,         {4, 0, 0}},
+       {"resume",      DM_DEV_SUSPEND,         {4, 0, 0}},
+       {"info",        DM_DEV_STATUS,          {4, 0, 0}},
+       {"deps",        DM_TABLE_DEPS,          {4, 0, 0}},
+       {"rename",      DM_DEV_RENAME,          {4, 0, 0}},
+       {"version",     DM_VERSION,             {4, 0, 0}},
+       {"status",      DM_TABLE_STATUS,        {4, 0, 0}},
+       {"table",       DM_TABLE_STATUS,        {4, 0, 0}},
+       {"waitevent",   DM_DEV_WAIT,            {4, 0, 0}},
+       {"names",       DM_LIST_DEVICES,        {4, 0, 0}},
+       {"clear",       DM_TABLE_CLEAR,         {4, 0, 0}},
+       {"mknodes",     DM_DEV_STATUS,          {4, 0, 0}},
+#ifdef DM_LIST_VERSIONS
+       {"versions",    DM_LIST_VERSIONS,       {4, 1, 0}},
+#endif
+#ifdef DM_TARGET_MSG
+       {"message",     DM_TARGET_MSG,          {4, 2, 0}},
+#endif
+#ifdef DM_DEV_SET_GEOMETRY
+       {"setgeometry", DM_DEV_SET_GEOMETRY,    {4, 6, 0}},
+#endif
+};
+/* *INDENT-ON* */
+
+#define ALIGNMENT 8
+
+/* FIXME Rejig library to record & use errno instead */
+#ifndef DM_EXISTS_FLAG
+#  define DM_EXISTS_FLAG 0x00000004
+#endif
+
+static char *_align(char *ptr, unsigned int a)
+{
+       register unsigned long agn = --a;
+
+       return (char *) (((unsigned long) ptr + agn) & ~agn);
+}
+
+#ifdef DM_IOCTLS
+static unsigned _kernel_major = 0;
+static unsigned _kernel_minor = 0;
+static unsigned _kernel_release = 0;
+
+static int _uname(void)
+{
+       static int _uts_set = 0;
+       struct utsname _uts;
+       int parts;
+
+       if (_uts_set)
+               return 1;
+
+       if (uname(&_uts)) {
+               log_error("uname failed: %s", strerror(errno));
+               return 0;
+       }
+
+       parts = sscanf(_uts.release, "%u.%u.%u",
+                      &_kernel_major, &_kernel_minor, &_kernel_release);
+
+       /* Kernels with a major number of 2 always had 3 parts. */
+       if (parts < 1 || (_kernel_major < 3 && parts < 3)) {
+               log_error("Could not determine kernel version used.");
+               return 0;
+       }
+
+       _uts_set = 1;
+       return 1;
+}
+
+int get_uname_version(unsigned *major, unsigned *minor, unsigned *release)
+{
+       if (!_uname())
+               return_0;
+
+       *major = _kernel_major;
+       *minor = _kernel_minor;
+       *release = _kernel_release;
+
+       return 1;
+}
+/*
+ * Set number to NULL to populate _dm_bitset - otherwise first
+ * match is returned.
+ * Returns:
+ *     0 - error
+ *     1 - success - number found
+ *     2 - success - number not found (only if require_module_loaded=0)
+ */
+static int _get_proc_number(const char *file, const char *name,
+                           uint32_t *number, int require_module_loaded)
+{
+       FILE *fl;
+       char nm[256];
+       char *line = NULL;
+       size_t len;
+       uint32_t num;
+
+       if (!(fl = fopen(file, "r"))) {
+               log_sys_error("fopen", file);
+               return 0;
+       }
+
+       while (getline(&line, &len, fl) != -1) {
+               if (sscanf(line, "%d %255s\n", &num, &nm[0]) == 2) {
+                       if (!strcmp(name, nm)) {
+                               if (number) {
+                                       *number = num;
+                                       if (fclose(fl))
+                                               log_sys_error("fclose", file);
+                                       free(line);
+                                       return 1;
+                               }
+                               dm_bit_set(_dm_bitset, num);
+                       }
+               }
+       }
+       if (fclose(fl))
+               log_sys_error("fclose", file);
+       free(line);
+
+       if (number) {
+               if (require_module_loaded) {
+                       log_error("%s: No entry for %s found", file, name);
+                       return 0;
+               }
+
+               return 2;
+       }
+
+       return 1;
+}
+
+static int _control_device_number(uint32_t *major, uint32_t *minor)
+{
+       if (!_get_proc_number(PROC_DEVICES, MISC_NAME, major, 1) ||
+           !_get_proc_number(PROC_MISC, DM_NAME, minor, 1)) {
+               *major = 0;
+               return 0;
+       }
+
+       return 1;
+}
+
+/*
+ * Returns 1 if it exists on returning; 0 if it doesn't; -1 if it's wrong.
+ */
+static int _control_exists(const char *control, uint32_t major, uint32_t minor)
+{
+       struct stat buf;
+
+       if (stat(control, &buf) < 0) {
+               if (errno != ENOENT)
+                       log_sys_error("stat", control);
+               return 0;
+       }
+
+       if (!S_ISCHR(buf.st_mode)) {
+               log_verbose("%s: Wrong inode type", control);
+               if (!unlink(control))
+                       return 0;
+               log_sys_error("unlink", control);
+               return -1;
+       }
+
+       if (major && buf.st_rdev != MKDEV((dev_t)major, (dev_t)minor)) {
+               log_verbose("%s: Wrong device number: (%u, %u) instead of "
+                           "(%u, %u)", control,
+                           MAJOR(buf.st_mode), MINOR(buf.st_mode),
+                           major, minor);
+               if (!unlink(control))
+                       return 0;
+               log_sys_error("unlink", control);
+               return -1;
+       }
+
+       return 1;
+}
+
+static int _create_control(const char *control, uint32_t major, uint32_t minor)
+{
+       int ret;
+       mode_t old_umask;
+
+       /*
+        * Return if the control already exists with intended major/minor
+        * or there's an error unlinking an apparently incorrect one.
+        */
+       ret = _control_exists(control, major, minor);
+       if (ret == -1)
+               return_0;       /* Failed to unlink existing incorrect node */
+       if (ret)
+               return 1;       /* Already exists and correct */
+
+       (void) dm_prepare_selinux_context(dm_dir(), S_IFDIR);
+       old_umask = umask(DM_DEV_DIR_UMASK);
+       ret = dm_create_dir(dm_dir());
+       umask(old_umask);
+       (void) dm_prepare_selinux_context(NULL, 0);
+
+       if (!ret)
+               return_0;
+
+       log_verbose("Creating device %s (%u, %u)", control, major, minor);
+
+       (void) dm_prepare_selinux_context(control, S_IFCHR);
+       old_umask = umask(DM_CONTROL_NODE_UMASK);
+       if (mknod(control, S_IFCHR | S_IRUSR | S_IWUSR,
+                 MKDEV((dev_t)major, (dev_t)minor)) < 0)  {
+               log_sys_error("mknod", control);
+               ret = 0;
+       }
+       umask(old_umask);
+       (void) dm_prepare_selinux_context(NULL, 0);
+
+       return ret;
+}
+#endif
+
+/*
+ * FIXME Update bitset in long-running process if dm claims new major numbers.
+ */
+/*
+ * If require_module_loaded=0, caller is responsible to check
+ * whether _dm_device_major or _dm_bitset is really set. If
+ * it's not, it means the module is not loaded.
+ */
+static int _create_dm_bitset(int require_module_loaded)
+{
+       int r;
+
+#ifdef DM_IOCTLS
+       if (_dm_bitset || _dm_device_major)
+               return 1;
+
+       if (!_uname())
+               return 0;
+
+       /*
+        * 2.6 kernels are limited to one major number.
+        * Assume 2.4 kernels are patched not to.
+        * FIXME Check _dm_version and _dm_version_minor if 2.6 changes this.
+        */
+       if (KERNEL_VERSION(_kernel_major, _kernel_minor, _kernel_release) >=
+           KERNEL_VERSION(2, 6, 0))
+               _dm_multiple_major_support = 0;
+
+       if (!_dm_multiple_major_support) {
+               if (!_get_proc_number(PROC_DEVICES, DM_NAME, &_dm_device_major,
+                                     require_module_loaded))
+                       return 0;
+               return 1;
+       }
+
+       /* Multiple major numbers supported */
+       if (!(_dm_bitset = dm_bitset_create(NULL, NUMBER_OF_MAJORS)))
+               return 0;
+
+       r = _get_proc_number(PROC_DEVICES, DM_NAME, NULL, require_module_loaded);
+       if (!r || r == 2) {
+               dm_bitset_destroy(_dm_bitset);
+               _dm_bitset = NULL;
+               /*
+                * It's not an error if we didn't find anything and we
+                * didn't require module to be loaded at the same time.
+                */
+               return r == 2;
+       }
+
+       return 1;
+#else
+       return 0;
+#endif
+}
+
+int dm_is_dm_major(uint32_t major)
+{
+       if (!_create_dm_bitset(0))
+               return 0;
+
+       if (_dm_multiple_major_support) {
+               if (!_dm_bitset)
+                       return 0;
+               return dm_bit(_dm_bitset, major) ? 1 : 0;
+       }
+
+       if (!_dm_device_major)
+               return 0;
+
+       return (major == _dm_device_major) ? 1 : 0;
+}
+
+static void _close_control_fd(void)
+{
+       if (_control_fd != -1) {
+               if (close(_control_fd) < 0)
+                       log_sys_error("close", "_control_fd");
+               _control_fd = -1;
+       }
+}
+
+#ifdef DM_IOCTLS
+static int _open_and_assign_control_fd(const char *control)
+{
+       if ((_control_fd = open(control, O_RDWR)) < 0) {
+               log_sys_error("open", control);
+               return 0;
+       }
+
+       return 1;
+}
+#endif
+
+static int _open_control(void)
+{
+#ifdef DM_IOCTLS
+       char control[PATH_MAX];
+       uint32_t major = MISC_MAJOR;
+       uint32_t minor = MAPPER_CTRL_MINOR;
+
+       if (_control_fd != -1)
+               return 1;
+
+       if (!_uname())
+               return 0;
+
+       if (dm_snprintf(control, sizeof(control), "%s/%s", dm_dir(), DM_CONTROL_NODE) < 0)
+               goto_bad;
+
+       /*
+        * Prior to 2.6.36 the minor number should be looked up in /proc.
+        */
+       if ((KERNEL_VERSION(_kernel_major, _kernel_minor, _kernel_release) <
+            KERNEL_VERSION(2, 6, 36)) &&
+           !_control_device_number(&major, &minor))
+               goto_bad;
+
+       /*
+        * Create the node with correct major and minor if not already done.
+        * Udev may already have created /dev/mapper/control
+        * from the modules.devname file generated by depmod.
+        */
+       if (!_create_control(control, major, minor))
+               goto_bad;
+
+       /*
+        * As of 2.6.36 kernels, the open can trigger autoloading dm-mod.
+        */
+       if (!_open_and_assign_control_fd(control))
+               goto_bad;
+       
+       if (!_create_dm_bitset(1)) {
+               log_error("Failed to set up list of device-mapper major numbers");
+               return 0;
+       }
+
+       return 1;
+
+bad:
+       log_error("Failure to communicate with kernel device-mapper driver.");
+       if (!geteuid())
+               log_error("Check that device-mapper is available in the kernel.");
+       return 0;
+#else
+       return 1;
+#endif
+}
+
+static void _dm_zfree_string(char *string)
+{
+       if (string) {
+               memset(string, 0, strlen(string));
+               dm_free(string);
+       }
+}
+
+static void _dm_zfree_dmi(struct dm_ioctl *dmi)
+{
+       if (dmi) {
+               memset(dmi, 0, dmi->data_size);
+               dm_free(dmi);
+       }
+}
+
+static void _dm_task_free_targets(struct dm_task *dmt)
+{
+       struct target *t, *n;
+
+       for (t = dmt->head; t; t = n) {
+               n = t->next;
+               _dm_zfree_string(t->params);
+               dm_free(t->type);
+               dm_free(t);
+       }
+
+       dmt->head = dmt->tail = NULL;
+}
+
+void dm_task_destroy(struct dm_task *dmt)
+{
+       _dm_task_free_targets(dmt);
+       _dm_zfree_dmi(dmt->dmi.v4);
+       dm_free(dmt->dev_name);
+       dm_free(dmt->mangled_dev_name);
+       dm_free(dmt->newname);
+       dm_free(dmt->message);
+       dm_free(dmt->geometry);
+       dm_free(dmt->uuid);
+       dm_free(dmt->mangled_uuid);
+       dm_free(dmt);
+}
+
+/*
+ * Protocol Version 4 functions.
+ */
+
+int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size)
+{
+       unsigned *v;
+
+       if (!dmt->dmi.v4) {
+               if (version)
+                       version[0] = '\0';
+               return 0;
+       }
+
+       v = dmt->dmi.v4->version;
+       _dm_version_minor = v[1];
+       _dm_version_patchlevel = v[2];
+       if (version &&
+           (snprintf(version, size, "%u.%u.%u", v[0], v[1], v[2]) < 0)) {
+               log_error("Buffer for version is to short.");
+               if (size > 0)
+                       version[0] = '\0';
+               return 0;
+       }
+
+       return 1;
+}
+
+static int _check_version(char *version, size_t size, int log_suppress)
+{
+       struct dm_task *task;
+       int r;
+
+       if (!(task = dm_task_create(DM_DEVICE_VERSION))) {
+               log_error("Failed to get device-mapper version");
+               version[0] = '\0';
+               return 0;
+       }
+
+       if (log_suppress)
+               _log_suppress = 1;
+
+       r = dm_task_run(task);
+       if (!dm_task_get_driver_version(task, version, size))
+               stack;
+       dm_task_destroy(task);
+       _log_suppress = 0;
+
+       return r;
+}
+
+/*
+ * Find out device-mapper's major version number the first time 
+ * this is called and whether or not we support it.
+ */
+int dm_check_version(void)
+{
+       char libversion[64] = "", dmversion[64] = "";
+       const char *compat = "";
+
+       if (_version_checked)
+               return _version_ok;
+
+       _version_checked = 1;
+
+       if (_check_version(dmversion, sizeof(dmversion), _dm_compat))
+               return 1;
+
+       if (!_dm_compat)
+               goto_bad;
+
+       log_verbose("device-mapper ioctl protocol version %u failed. "
+                   "Trying protocol version 1.", _dm_version);
+       _dm_version = 1;
+       if (_check_version(dmversion, sizeof(dmversion), 0)) {
+               log_verbose("Using device-mapper ioctl protocol version 1");
+               return 1;
+       }
+
+       compat = "(compat)";
+
+      bad:
+       dm_get_library_version(libversion, sizeof(libversion));
+
+       log_error("Incompatible libdevmapper %s%s and kernel driver %s.",
+                 *libversion ? libversion : "(unknown version)", compat,
+                 *dmversion ? dmversion : "(unknown version)");
+
+       _version_ok = 0;
+       return 0;
+}
+
+int dm_cookie_supported(void)
+{
+       return (dm_check_version() &&
+               _dm_version >= 4 &&
+               _dm_version_minor >= 15);
+}
+
+static int _dm_inactive_supported(void)
+{
+       int inactive_supported = 0;
+
+       if (dm_check_version() && _dm_version >= 4) {
+               if (_dm_version_minor >= 16)
+                       inactive_supported = 1; /* upstream */
+               else if (_dm_version_minor == 11 &&
+                        (_dm_version_patchlevel >= 6 &&
+                         _dm_version_patchlevel <= 40)) {
+                       inactive_supported = 1; /* RHEL 5.7 */
+               }
+       }
+
+       return inactive_supported;
+}
+
+int dm_message_supports_precise_timestamps(void)
+{
+       /*
+        * 4.32.0 supports "precise_timestamps" and "histogram:" options
+        * to @stats_create messages but lacks the ability to report
+        * these properties via a subsequent @stats_list: require at
+        * least 4.33.0 in order to use these features.
+        */
+       if (dm_check_version() && _dm_version >= 4)
+               if (_dm_version_minor >= 33)
+                       return 1;
+       return 0;
+}
+
+void *dm_get_next_target(struct dm_task *dmt, void *next,
+                        uint64_t *start, uint64_t *length,
+                        char **target_type, char **params)
+{
+       struct target *t = (struct target *) next;
+
+       if (!t)
+               t = dmt->head;
+
+       if (!t) {
+               *start = 0;
+               *length = 0;
+               *target_type = 0;
+               *params = 0;
+               return NULL;
+       }
+
+       *start = t->start;
+       *length = t->length;
+       *target_type = t->type;
+       *params = t->params;
+
+       return t->next;
+}
+
+/* Unmarshall the target info returned from a status call */
+static int _unmarshal_status(struct dm_task *dmt, struct dm_ioctl *dmi)
+{
+       char *outbuf = (char *) dmi + dmi->data_start;
+       char *outptr = outbuf;
+       uint32_t i;
+       struct dm_target_spec *spec;
+
+       _dm_task_free_targets(dmt);
+
+       for (i = 0; i < dmi->target_count; i++) {
+               spec = (struct dm_target_spec *) outptr;
+               if (!dm_task_add_target(dmt, spec->sector_start,
+                                       spec->length,
+                                       spec->target_type,
+                                       outptr + sizeof(*spec))) {
+                       return 0;
+               }
+
+               outptr = outbuf + spec->next;
+       }
+
+       return 1;
+}
+
+int dm_format_dev(char *buf, int bufsize, uint32_t dev_major,
+                 uint32_t dev_minor)
+{
+       int r;
+
+       if (bufsize < 8)
+               return 0;
+
+       r = snprintf(buf, (size_t) bufsize, "%u:%u", dev_major, dev_minor);
+       if (r < 0 || r > bufsize - 1)
+               return 0;
+
+       return 1;
+}
+
+int dm_task_get_info(struct dm_task *dmt, struct dm_info *info)
+{
+       if (!dmt->dmi.v4)
+               return 0;
+
+       memset(info, 0, sizeof(*info));
+
+       info->exists = dmt->dmi.v4->flags & DM_EXISTS_FLAG ? 1 : 0;
+       if (!info->exists)
+               return 1;
+
+       info->suspended = dmt->dmi.v4->flags & DM_SUSPEND_FLAG ? 1 : 0;
+       info->read_only = dmt->dmi.v4->flags & DM_READONLY_FLAG ? 1 : 0;
+       info->live_table = dmt->dmi.v4->flags & DM_ACTIVE_PRESENT_FLAG ? 1 : 0;
+       info->inactive_table = dmt->dmi.v4->flags & DM_INACTIVE_PRESENT_FLAG ?
+           1 : 0;
+       info->deferred_remove = dmt->dmi.v4->flags & DM_DEFERRED_REMOVE;
+       info->internal_suspend = (dmt->dmi.v4->flags & DM_INTERNAL_SUSPEND_FLAG) ? 1 : 0;
+       info->target_count = dmt->dmi.v4->target_count;
+       info->open_count = dmt->dmi.v4->open_count;
+       info->event_nr = dmt->dmi.v4->event_nr;
+       info->major = MAJOR(dmt->dmi.v4->dev);
+       info->minor = MINOR(dmt->dmi.v4->dev);
+
+       return 1;
+}
+
+uint32_t dm_task_get_read_ahead(const struct dm_task *dmt, uint32_t *read_ahead)
+{
+       const char *dev_name;
+
+       *read_ahead = 0;
+
+       if (!dmt->dmi.v4 || !(dmt->dmi.v4->flags & DM_EXISTS_FLAG))
+               return 0;
+
+       if (*dmt->dmi.v4->name)
+               dev_name = dmt->dmi.v4->name;
+       else if (!(dev_name = DEV_NAME(dmt))) {
+               log_error("Get read ahead request failed: device name unrecorded.");
+               return 0;
+       }
+
+       return get_dev_node_read_ahead(dev_name, MAJOR(dmt->dmi.v4->dev),
+                                      MINOR(dmt->dmi.v4->dev), read_ahead);
+}
+
+struct dm_deps *dm_task_get_deps(struct dm_task *dmt)
+{
+       return (struct dm_deps *) (((char *) dmt->dmi.v4) +
+                                  dmt->dmi.v4->data_start);
+}
+
+struct dm_names *dm_task_get_names(struct dm_task *dmt)
+{
+       return (struct dm_names *) (((char *) dmt->dmi.v4) +
+                                   dmt->dmi.v4->data_start);
+}
+
+struct dm_versions *dm_task_get_versions(struct dm_task *dmt)
+{
+       return (struct dm_versions *) (((char *) dmt->dmi.v4) +
+                                      dmt->dmi.v4->data_start);
+}
+
+const char *dm_task_get_message_response(struct dm_task *dmt)
+{
+       const char *start, *end;
+
+       if (!(dmt->dmi.v4->flags & DM_DATA_OUT_FLAG))
+               return NULL;
+
+       start = (const char *) dmt->dmi.v4 + dmt->dmi.v4->data_start;
+       end = (const char *) dmt->dmi.v4 + dmt->dmi.v4->data_size;
+
+       if (end < start) {
+               log_error(INTERNAL_ERROR "Corrupted message structure returned: start %d > end %d", (int)dmt->dmi.v4->data_start, (int)dmt->dmi.v4->data_size);
+               return NULL;
+       }
+
+       if (!memchr(start, 0, end - start)) {
+               log_error(INTERNAL_ERROR "Message response doesn't contain terminating NUL character");
+               return NULL;
+       }
+
+       return start;
+}
+
+int dm_task_set_ro(struct dm_task *dmt)
+{
+       dmt->read_only = 1;
+       return 1;
+}
+
+int dm_task_set_read_ahead(struct dm_task *dmt, uint32_t read_ahead,
+                          uint32_t read_ahead_flags)
+{
+       dmt->read_ahead = read_ahead;
+       dmt->read_ahead_flags = read_ahead_flags;
+
+       return 1;
+}
+
+int dm_task_suppress_identical_reload(struct dm_task *dmt)
+{
+       dmt->suppress_identical_reload = 1;
+       return 1;
+}
+
+int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node)
+{
+       switch (add_node) {
+       case DM_ADD_NODE_ON_RESUME:
+       case DM_ADD_NODE_ON_CREATE:
+               dmt->add_node = add_node;
+               return 1;
+       default:
+               log_error("Unknown add node parameter");
+               return 0;
+       }
+}
+
+int dm_task_set_newuuid(struct dm_task *dmt, const char *newuuid)
+{
+       dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode();
+       char mangled_uuid[DM_UUID_LEN];
+       int r = 0;
+
+       if (strlen(newuuid) >= DM_UUID_LEN) {
+               log_error("Uuid \"%s\" too long", newuuid);
+               return 0;
+       }
+
+       if (!check_multiple_mangled_string_allowed(newuuid, "new UUID", mangling_mode))
+               return_0;
+
+       if (mangling_mode != DM_STRING_MANGLING_NONE &&
+           (r = mangle_string(newuuid, "new UUID", strlen(newuuid), mangled_uuid,
+                              sizeof(mangled_uuid), mangling_mode)) < 0) {
+               log_error("Failed to mangle new device UUID \"%s\"", newuuid);
+               return 0;
+       }
+
+       if (r) {
+               log_debug_activation("New device uuid mangled [%s]: %s --> %s",
+                                    mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex",
+                                    newuuid, mangled_uuid);
+               newuuid = mangled_uuid;
+       }
+
+       dm_free(dmt->newname);
+       if (!(dmt->newname = dm_strdup(newuuid))) {
+               log_error("dm_task_set_newuuid: strdup(%s) failed", newuuid);
+               return 0;
+       }
+       dmt->new_uuid = 1;
+
+       return 1;
+}
+
+int dm_task_set_message(struct dm_task *dmt, const char *message)
+{
+       dm_free(dmt->message);
+       if (!(dmt->message = dm_strdup(message))) {
+               log_error("dm_task_set_message: strdup failed");
+               return 0;
+       }
+
+       return 1;
+}
+
+int dm_task_set_sector(struct dm_task *dmt, uint64_t sector)
+{
+       dmt->sector = sector;
+
+       return 1;
+}
+
+int dm_task_set_geometry(struct dm_task *dmt, const char *cylinders, const char *heads,
+                        const char *sectors, const char *start)
+{
+       dm_free(dmt->geometry);
+       if (dm_asprintf(&(dmt->geometry), "%s %s %s %s",
+                       cylinders, heads, sectors, start) < 0) {
+               log_error("dm_task_set_geometry: sprintf failed");
+               return 0;
+       }
+
+       return 1;
+}
+
+int dm_task_no_flush(struct dm_task *dmt)
+{
+       dmt->no_flush = 1;
+
+       return 1;
+}
+
+int dm_task_no_open_count(struct dm_task *dmt)
+{
+       dmt->no_open_count = 1;
+
+       return 1;
+}
+
+int dm_task_skip_lockfs(struct dm_task *dmt)
+{
+       dmt->skip_lockfs = 1;
+
+       return 1;
+}
+
+int dm_task_secure_data(struct dm_task *dmt)
+{
+       dmt->secure_data = 1;
+
+       return 1;
+}
+
+int dm_task_retry_remove(struct dm_task *dmt)
+{
+       dmt->retry_remove = 1;
+
+       return 1;
+}
+
+int dm_task_deferred_remove(struct dm_task *dmt)
+{
+       dmt->deferred_remove = 1;
+
+       return 1;
+}
+
+int dm_task_query_inactive_table(struct dm_task *dmt)
+{
+       dmt->query_inactive_table = 1;
+
+       return 1;
+}
+
+int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr)
+{
+       dmt->event_nr = event_nr;
+
+       return 1;
+}
+
+int dm_task_set_record_timestamp(struct dm_task *dmt)
+{
+       if (!_dm_ioctl_timestamp)
+               _dm_ioctl_timestamp = dm_timestamp_alloc();
+
+       if (!_dm_ioctl_timestamp)
+               return_0;
+
+       dmt->record_timestamp = 1;
+
+       return 1;
+}
+
+struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt)
+{
+       return dmt->record_timestamp ? _dm_ioctl_timestamp : NULL;
+}
+
+struct target *create_target(uint64_t start, uint64_t len, const char *type,
+                            const char *params)
+{
+       struct target *t;
+
+       if (strlen(type) >= DM_MAX_TYPE_NAME) {
+               log_error("Target type name %s is too long.", type);
+               return NULL;
+       }
+
+       if (!(t = dm_zalloc(sizeof(*t)))) {
+               log_error("create_target: malloc(%" PRIsize_t ") failed",
+                         sizeof(*t));
+               return NULL;
+       }
+
+       if (!(t->params = dm_strdup(params))) {
+               log_error("create_target: strdup(params) failed");
+               goto bad;
+       }
+
+       if (!(t->type = dm_strdup(type))) {
+               log_error("create_target: strdup(type) failed");
+               goto bad;
+       }
+
+       t->start = start;
+       t->length = len;
+       return t;
+
+      bad:
+       _dm_zfree_string(t->params);
+       dm_free(t->type);
+       dm_free(t);
+       return NULL;
+}
+
+static char *_add_target(struct target *t, char *out, char *end)
+{
+       char *out_sp = out;
+       struct dm_target_spec sp;
+       size_t sp_size = sizeof(struct dm_target_spec);
+       unsigned int backslash_count = 0;
+       int len;
+       char *pt;
+
+       if (strlen(t->type) >= sizeof(sp.target_type)) {
+               log_error("Target type name %s is too long.", t->type);
+               return NULL;
+       }
+
+       sp.status = 0;
+       sp.sector_start = t->start;
+       sp.length = t->length;
+       strncpy(sp.target_type, t->type, sizeof(sp.target_type) - 1);
+       sp.target_type[sizeof(sp.target_type) - 1] = '\0';
+
+       out += sp_size;
+       pt = t->params;
+
+       while (*pt)
+               if (*pt++ == '\\')
+                       backslash_count++;
+       len = strlen(t->params) + backslash_count;
+
+       if ((out >= end) || (out + len + 1) >= end) {
+               log_error("Ran out of memory building ioctl parameter");
+               return NULL;
+       }
+
+       if (backslash_count) {
+               /* replace "\" with "\\" */
+               pt = t->params;
+               do {
+                       if (*pt == '\\')
+                               *out++ = '\\';
+                       *out++ = *pt++;
+               } while (*pt);
+               *out++ = '\0';
+       }
+       else {
+               strcpy(out, t->params);
+               out += len + 1;
+       }
+
+       /* align next block */
+       out = _align(out, ALIGNMENT);
+
+       sp.next = out - out_sp;
+       memcpy(out_sp, &sp, sp_size);
+
+       return out;
+}
+
+static int _lookup_dev_name(uint64_t dev, char *buf, size_t len)
+{
+       struct dm_names *names;
+       unsigned next = 0;
+       struct dm_task *dmt;
+       int r = 0;
+       if (!(dmt = dm_task_create(DM_DEVICE_LIST)))
+               return 0;
+       if (!dm_task_run(dmt))
+               goto out;
+
+       if (!(names = dm_task_get_names(dmt)))
+               goto out;
+       if (!names->dev)
+               goto out;
+       do {
+               names = (struct dm_names *)((char *) names + next);
+               if (names->dev == dev) {
+                       strncpy(buf, names->name, len);
+                       r = 1;
+                       break;
+               }
+               next = names->next;
+       } while (next);
+
+      out:
+       dm_task_destroy(dmt);
+       return r;
+}
+
+static struct dm_ioctl *_flatten(struct dm_task *dmt, unsigned repeat_count)
+{
+       const size_t min_size = 16 * 1024;
+       const int (*version)[3];
+
+       struct dm_ioctl *dmi;
+       struct target *t;
+       struct dm_target_msg *tmsg;
+       size_t len = sizeof(struct dm_ioctl);
+       char *b, *e;
+       int count = 0;
+
+       for (t = dmt->head; t; t = t->next) {
+               len += sizeof(struct dm_target_spec);
+               len += strlen(t->params) + 1 + ALIGNMENT;
+               count++;
+       }
+
+       if (count && (dmt->sector || dmt->message)) {
+               log_error("targets and message are incompatible");
+               return NULL;
+       }
+
+       if (count && dmt->newname) {
+               log_error("targets and rename are incompatible");
+               return NULL;
+       }
+
+       if (count && dmt->geometry) {
+               log_error("targets and geometry are incompatible");
+               return NULL;
+       }
+
+       if (dmt->newname && (dmt->sector || dmt->message)) {
+               log_error("message and rename are incompatible");
+               return NULL;
+       }
+
+       if (dmt->newname && dmt->geometry) {
+               log_error("geometry and rename are incompatible");
+               return NULL;
+       }
+
+       if (dmt->geometry && (dmt->sector || dmt->message)) {
+               log_error("geometry and message are incompatible");
+               return NULL;
+       }
+
+       if (dmt->sector && !dmt->message) {
+               log_error("message is required with sector");
+               return NULL;
+       }
+
+       if (dmt->newname)
+               len += strlen(dmt->newname) + 1;
+
+       if (dmt->message)
+               len += sizeof(struct dm_target_msg) + strlen(dmt->message) + 1;
+
+       if (dmt->geometry)
+               len += strlen(dmt->geometry) + 1;
+
+       /*
+        * Give len a minimum size so that we have space to store
+        * dependencies or status information.
+        */
+       if (len < min_size)
+               len = min_size;
+
+       /* Increase buffer size if repeating because buffer was too small */
+       while (repeat_count--)
+               len *= 2;
+
+       if (!(dmi = dm_zalloc(len)))
+               return NULL;
+
+       version = &_cmd_data_v4[dmt->type].version;
+
+       dmi->version[0] = (*version)[0];
+       dmi->version[1] = (*version)[1];
+       dmi->version[2] = (*version)[2];
+
+       dmi->data_size = len;
+       dmi->data_start = sizeof(struct dm_ioctl);
+
+       if (dmt->minor >= 0) {
+               if (!_dm_multiple_major_support && dmt->allow_default_major_fallback &&
+                   dmt->major != (int) _dm_device_major) {
+                       log_verbose("Overriding major number of %d "
+                                   "with %u for persistent device.",
+                                   dmt->major, _dm_device_major);
+                       dmt->major = _dm_device_major;
+               }
+
+               if (dmt->major <= 0) {
+                       log_error("Missing major number for persistent device.");
+                       goto bad;
+               }
+
+               dmi->flags |= DM_PERSISTENT_DEV_FLAG;
+               dmi->dev = MKDEV((dev_t)dmt->major, (dev_t)dmt->minor);
+       }
+
+       /* Does driver support device number referencing? */
+       if (_dm_version_minor < 3 && !DEV_NAME(dmt) && !DEV_UUID(dmt) && dmi->dev) {
+               if (!_lookup_dev_name(dmi->dev, dmi->name, sizeof(dmi->name))) {
+                       log_error("Unable to find name for device (%" PRIu32
+                                 ":%" PRIu32 ")", dmt->major, dmt->minor);
+                       goto bad;
+               }
+               log_verbose("device (%" PRIu32 ":%" PRIu32 ") is %s "
+                           "for compatibility with old kernel",
+                           dmt->major, dmt->minor, dmi->name);
+       }
+
+       /* FIXME Until resume ioctl supplies name, use dev_name for readahead */
+       if (DEV_NAME(dmt) && (dmt->type != DM_DEVICE_RESUME || dmt->minor < 0 ||
+                             dmt->major < 0))
+               strncpy(dmi->name, DEV_NAME(dmt), sizeof(dmi->name));
+
+       if (DEV_UUID(dmt))
+               strncpy(dmi->uuid, DEV_UUID(dmt), sizeof(dmi->uuid));
+
+       if (dmt->type == DM_DEVICE_SUSPEND)
+               dmi->flags |= DM_SUSPEND_FLAG;
+       if (dmt->no_flush) {
+               if (_dm_version_minor < 12)
+                       log_verbose("No flush flag unsupported by kernel. "
+                                   "Buffers will be flushed.");
+               else
+                       dmi->flags |= DM_NOFLUSH_FLAG;
+       }
+       if (dmt->read_only)
+               dmi->flags |= DM_READONLY_FLAG;
+       if (dmt->skip_lockfs)
+               dmi->flags |= DM_SKIP_LOCKFS_FLAG;
+       if (dmt->deferred_remove && (dmt->type == DM_DEVICE_REMOVE || dmt->type == DM_DEVICE_REMOVE_ALL))
+               dmi->flags |= DM_DEFERRED_REMOVE;
+
+       if (dmt->secure_data) {
+               if (_dm_version_minor < 20)
+                       log_verbose("Secure data flag unsupported by kernel. "
+                                   "Buffers will not be wiped after use.");
+               dmi->flags |= DM_SECURE_DATA_FLAG;
+       }
+       if (dmt->query_inactive_table) {
+               if (!_dm_inactive_supported())
+                       log_warn("WARNING: Inactive table query unsupported "
+                                "by kernel.  It will use live table.");
+               dmi->flags |= DM_QUERY_INACTIVE_TABLE_FLAG;
+       }
+       if (dmt->new_uuid) {
+               if (_dm_version_minor < 19) {
+                       log_error("WARNING: Setting UUID unsupported by "
+                                 "kernel.  Aborting operation.");
+                       goto bad;
+               }
+               dmi->flags |= DM_UUID_FLAG;
+       }
+
+       dmi->target_count = count;
+       dmi->event_nr = dmt->event_nr;
+
+       b = (char *) (dmi + 1);
+       e = (char *) dmi + len;
+
+       for (t = dmt->head; t; t = t->next)
+               if (!(b = _add_target(t, b, e)))
+                       goto_bad;
+
+       if (dmt->newname)
+               strcpy(b, dmt->newname);
+
+       if (dmt->message) {
+               tmsg = (struct dm_target_msg *) b;
+               tmsg->sector = dmt->sector;
+               strcpy(tmsg->message, dmt->message);
+       }
+
+       if (dmt->geometry)
+               strcpy(b, dmt->geometry);
+
+       return dmi;
+
+      bad:
+       _dm_zfree_dmi(dmi);
+       return NULL;
+}
+
+static int _process_mapper_dir(struct dm_task *dmt)
+{
+       struct dirent *dirent;
+       DIR *d;
+       const char *dir;
+       int r = 1;
+
+       dir = dm_dir();
+       if (!(d = opendir(dir))) {
+               log_sys_error("opendir", dir);
+               return 0;
+       }
+
+       while ((dirent = readdir(d))) {
+               if (!strcmp(dirent->d_name, ".") ||
+                   !strcmp(dirent->d_name, "..") ||
+                   !strcmp(dirent->d_name, "control"))
+                       continue;
+               if (!dm_task_set_name(dmt, dirent->d_name)) {
+                       r = 0;
+                       stack;
+                       continue; /* try next name */
+               }
+               if (!dm_task_run(dmt)) {
+                       r = 0;
+                       stack;  /* keep going */
+               }
+       }
+
+       if (closedir(d))
+               log_sys_error("closedir", dir);
+
+       return r;
+}
+
+static int _process_all_v4(struct dm_task *dmt)
+{
+       struct dm_task *task;
+       struct dm_names *names;
+       unsigned next = 0;
+       int r = 1;
+
+       if (!(task = dm_task_create(DM_DEVICE_LIST)))
+               return 0;
+
+       if (!dm_task_run(task)) {
+               r = 0;
+               goto out;
+       }
+
+       if (!(names = dm_task_get_names(task))) {
+               r = 0;
+               goto out;
+       }
+
+       if (!names->dev)
+               goto out;
+
+       do {
+               names = (struct dm_names *)((char *) names + next);
+               if (!dm_task_set_name(dmt, names->name)) {
+                       r = 0;
+                       goto out;
+               }
+               if (!dm_task_run(dmt))
+                       r = 0;
+               next = names->next;
+       } while (next);
+
+      out:
+       dm_task_destroy(task);
+       return r;
+}
+
+static int _mknodes_v4(struct dm_task *dmt)
+{
+       (void) _process_mapper_dir(dmt);
+
+       return _process_all_v4(dmt);
+}
+
+/*
+ * If an operation that uses a cookie fails, decrement the
+ * semaphore instead of udev.
+ */
+static int _udev_complete(struct dm_task *dmt)
+{
+       uint16_t base;
+
+       if (dmt->cookie_set &&
+           (base = dmt->event_nr & ~DM_UDEV_FLAGS_MASK))
+               /* strip flags from the cookie and use cookie magic instead */
+               return dm_udev_complete(base | (DM_COOKIE_MAGIC <<
+                                               DM_UDEV_FLAGS_SHIFT));
+
+       return 1;
+}
+
+#ifdef DM_IOCTLS
+static int _check_uevent_generated(struct dm_ioctl *dmi)
+{
+       if (!dm_check_version() ||
+           _dm_version < 4 ||
+           _dm_version_minor < 17)
+               /* can't check, assume uevent is generated */
+               return 1;
+
+       return dmi->flags & DM_UEVENT_GENERATED_FLAG;
+}
+#endif
+
+static int _create_and_load_v4(struct dm_task *dmt)
+{
+       struct dm_task *task;
+       int r;
+       uint32_t cookie;
+
+       /* Use new task struct to create the device */
+       if (!(task = dm_task_create(DM_DEVICE_CREATE))) {
+               _udev_complete(dmt);
+               return_0;
+       }
+
+       /* Copy across relevant fields */
+       if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name))
+               goto_bad;
+
+       if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid))
+               goto_bad;
+
+       task->major = dmt->major;
+       task->minor = dmt->minor;
+       task->uid = dmt->uid;
+       task->gid = dmt->gid;
+       task->mode = dmt->mode;
+       /* FIXME: Just for udev_check in dm_task_run. Can we avoid this? */
+       task->event_nr = dmt->event_nr & DM_UDEV_FLAGS_MASK;
+       task->cookie_set = dmt->cookie_set;
+       task->add_node = dmt->add_node;
+
+       if (!dm_task_run(task))
+               goto_bad;
+
+       dm_task_destroy(task);
+
+       /* Next load the table */
+       if (!(task = dm_task_create(DM_DEVICE_RELOAD))) {
+               stack;
+               _udev_complete(dmt);
+               goto revert;
+       }
+
+       /* Copy across relevant fields */
+       if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) {
+               stack;
+               dm_task_destroy(task);
+               _udev_complete(dmt);
+               goto revert;
+       }
+
+       task->read_only = dmt->read_only;
+       task->head = dmt->head;
+       task->tail = dmt->tail;
+       task->secure_data = dmt->secure_data;
+
+       r = dm_task_run(task);
+
+       task->head = NULL;
+       task->tail = NULL;
+       dm_task_destroy(task);
+
+       if (!r) {
+               stack;
+               _udev_complete(dmt);
+               goto revert;
+       }
+
+       /* Use the original structure last so the info will be correct */
+       dmt->type = DM_DEVICE_RESUME;
+       dm_free(dmt->uuid);
+       dmt->uuid = NULL;
+       dm_free(dmt->mangled_uuid);
+       dmt->mangled_uuid = NULL;
+
+       if (dm_task_run(dmt))
+               return 1;
+
+      revert:
+       dmt->type = DM_DEVICE_REMOVE;
+       dm_free(dmt->uuid);
+       dmt->uuid = NULL;
+       dm_free(dmt->mangled_uuid);
+       dmt->mangled_uuid = NULL;
+
+       /*
+        * Also udev-synchronize "remove" dm task that is a part of this revert!
+        * But only if the original dm task was supposed to be synchronized.
+        */
+       if (dmt->cookie_set) {
+               cookie = (dmt->event_nr & ~DM_UDEV_FLAGS_MASK) |
+                        (DM_COOKIE_MAGIC << DM_UDEV_FLAGS_SHIFT);
+               if (!dm_task_set_cookie(dmt, &cookie,
+                                       (dmt->event_nr & DM_UDEV_FLAGS_MASK) >>
+                                       DM_UDEV_FLAGS_SHIFT))
+                       stack; /* keep going */
+       }
+
+       if (!dm_task_run(dmt))
+               log_error("Failed to revert device creation.");
+
+       return 0;
+
+      bad:
+       dm_task_destroy(task);
+       _udev_complete(dmt);
+
+       return 0;
+}
+
+uint64_t dm_task_get_existing_table_size(struct dm_task *dmt)
+{
+       return dmt->existing_table_size;
+}
+
+static int _reload_with_suppression_v4(struct dm_task *dmt)
+{
+       struct dm_task *task;
+       struct target *t1, *t2;
+       size_t len;
+       int r;
+
+       /* New task to get existing table information */
+       if (!(task = dm_task_create(DM_DEVICE_TABLE))) {
+               log_error("Failed to create device-mapper task struct");
+               return 0;
+       }
+
+       /* Copy across relevant fields */
+       if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) {
+               dm_task_destroy(task);
+               return 0;
+       }
+
+       if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid)) {
+               dm_task_destroy(task);
+               return 0;
+       }
+
+       task->major = dmt->major;
+       task->minor = dmt->minor;
+
+       r = dm_task_run(task);
+
+       if (!r) {
+               dm_task_destroy(task);
+               return r;
+       }
+
+       /* Store existing table size */
+       t2 = task->head;
+       while (t2 && t2->next)
+               t2 = t2->next;
+       dmt->existing_table_size = t2 ? t2->start + t2->length : 0;
+
+       if (((task->dmi.v4->flags & DM_READONLY_FLAG) ? 1 : 0) != dmt->read_only)
+               goto no_match;
+
+       t1 = dmt->head;
+       t2 = task->head;
+
+       while (t1 && t2) {
+               len = strlen(t2->params);
+               while (len-- > 0 && t2->params[len] == ' ')
+                       t2->params[len] = '\0';
+               if ((t1->start != t2->start) ||
+                   (t1->length != t2->length) ||
+                   (strcmp(t1->type, t2->type)) ||
+                   (strcmp(t1->params, t2->params)))
+                       goto no_match;
+               t1 = t1->next;
+               t2 = t2->next;
+       }
+       
+       if (!t1 && !t2) {
+               dmt->dmi.v4 = task->dmi.v4;
+               task->dmi.v4 = NULL;
+               dm_task_destroy(task);
+               return 1;
+       }
+
+no_match:
+       dm_task_destroy(task);
+
+       /* Now do the original reload */
+       dmt->suppress_identical_reload = 0;
+       r = dm_task_run(dmt);
+
+       return r;
+}
+
+static int _check_children_not_suspended_v4(struct dm_task *dmt, uint64_t device)
+{
+       struct dm_task *task;
+       struct dm_info info;
+       struct dm_deps *deps;
+       int r = 0;
+       uint32_t i;
+
+       /* Find dependencies */
+       if (!(task = dm_task_create(DM_DEVICE_DEPS)))
+               return 0;
+
+       /* Copy across or set relevant fields */
+       if (device) {
+               task->major = MAJOR(device);
+               task->minor = MINOR(device);
+       } else {
+               if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name))
+                       goto out;
+
+               if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid))
+                       goto out;
+
+               task->major = dmt->major;
+               task->minor = dmt->minor;
+       }
+
+       task->uid = dmt->uid;
+       task->gid = dmt->gid;
+       task->mode = dmt->mode;
+       /* FIXME: Just for udev_check in dm_task_run. Can we avoid this? */
+       task->event_nr = dmt->event_nr & DM_UDEV_FLAGS_MASK;
+       task->cookie_set = dmt->cookie_set;
+       task->add_node = dmt->add_node;
+       
+       if (!(r = dm_task_run(task)))
+               goto out;
+
+       if (!dm_task_get_info(task, &info) || !info.exists)
+               goto out;
+
+       /*
+        * Warn if any of the devices this device depends upon are already
+        * suspended: I/O could become trapped between the two devices.
+        */
+       if (info.suspended) {
+               if (!device)
+                       log_debug_activation("Attempting to suspend a device that is already suspended "
+                                            "(%u:%u)", info.major, info.minor);
+               else
+                       log_error(INTERNAL_ERROR "Attempt to suspend device %s%s%s%.0d%s%.0d%s%s"
+                                 "that uses already-suspended device (%u:%u)", 
+                                 DEV_NAME(dmt) ? : "", DEV_UUID(dmt) ? : "",
+                                 dmt->major > 0 ? "(" : "",
+                                 dmt->major > 0 ? dmt->major : 0,
+                                 dmt->major > 0 ? ":" : "",
+                                 dmt->minor > 0 ? dmt->minor : 0,
+                                 dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+                                 dmt->major > 0 ? ") " : "",
+                                 info.major, info.minor);
+
+               /* No need for further recursion */
+               r = 1;
+               goto out;
+       }
+
+       if (!(deps = dm_task_get_deps(task)))
+               goto out;
+
+       for (i = 0; i < deps->count; i++) {
+               /* Only recurse with dm devices */
+               if (MAJOR(deps->device[i]) != _dm_device_major)
+                       continue;
+
+               if (!_check_children_not_suspended_v4(task, deps->device[i]))
+                       goto out;
+       }
+
+       r = 1;
+
+out:
+       dm_task_destroy(task);
+
+       return r;
+}
+
+static int _suspend_with_validation_v4(struct dm_task *dmt)
+{
+       /* Avoid recursion */
+       dmt->enable_checks = 0;
+
+       /*
+        * Ensure we can't leave any I/O trapped between suspended devices.
+        */
+       if (!_check_children_not_suspended_v4(dmt, 0))
+               return 0;
+
+       /* Finally, perform the original suspend. */
+       return dm_task_run(dmt);
+}
+
+static const char *_sanitise_message(char *message)
+{
+       const char *sanitised_message = message ?: "";
+
+       /* FIXME: Check for whitespace variations. */
+       /* This traps what cryptsetup sends us. */
+       if (message && !strncasecmp(message, "key set", 7))
+               sanitised_message = "key set";
+
+       return sanitised_message;
+}
+
+#ifdef DM_IOCTLS
+static int _do_dm_ioctl_unmangle_string(char *str, const char *str_name,
+                                       char *buf, size_t buf_size,
+                                       dm_string_mangling_t mode)
+{
+       int r;
+
+       if (mode == DM_STRING_MANGLING_NONE)
+               return 1;
+
+       if (!check_multiple_mangled_string_allowed(str, str_name, mode))
+               return_0;
+
+       if ((r = unmangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0) {
+               log_debug_activation("_do_dm_ioctl_unmangle_string: failed to "
+                                    "unmangle %s \"%s\"", str_name, str);
+               return 0;
+       }
+
+       if (r)
+               memcpy(str, buf, strlen(buf) + 1);
+
+       return 1;
+}
+
+static int _dm_ioctl_unmangle_names(int type, struct dm_ioctl *dmi)
+{
+       char buf[DM_NAME_LEN];
+       struct dm_names *names;
+       unsigned next = 0;
+       char *name;
+       int r = 1;
+
+       if ((name = dmi->name))
+               r = _do_dm_ioctl_unmangle_string(name, "name", buf, sizeof(buf),
+                                                dm_get_name_mangling_mode());
+
+       if (type == DM_DEVICE_LIST &&
+           ((names = ((struct dm_names *) ((char *)dmi + dmi->data_start)))) &&
+           names->dev) {
+               do {
+                       names = (struct dm_names *)((char *) names + next);
+                       r = _do_dm_ioctl_unmangle_string(names->name, "name",
+                                                        buf, sizeof(buf),
+                                                        dm_get_name_mangling_mode());
+                       next = names->next;
+               } while (next);
+       }
+
+       return r;
+}
+
+static int _dm_ioctl_unmangle_uuids(int type, struct dm_ioctl *dmi)
+{
+       char buf[DM_UUID_LEN];
+       char *uuid = dmi->uuid;
+
+       if (uuid)
+               return _do_dm_ioctl_unmangle_string(uuid, "UUID", buf, sizeof(buf),
+                                                   dm_get_name_mangling_mode());
+
+       return 1;
+}
+#endif
+
+static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command,
+                                    unsigned buffer_repeat_count,
+                                    unsigned retry_repeat_count,
+                                    int *retryable)
+{
+       struct dm_ioctl *dmi;
+       int ioctl_with_uevent;
+       int r;
+
+       dmt->ioctl_errno = 0;
+
+       dmi = _flatten(dmt, buffer_repeat_count);
+       if (!dmi) {
+               log_error("Couldn't create ioctl argument.");
+               return NULL;
+       }
+
+       if (dmt->type == DM_DEVICE_TABLE)
+               dmi->flags |= DM_STATUS_TABLE_FLAG;
+
+       dmi->flags |= DM_EXISTS_FLAG;   /* FIXME */
+
+       if (dmt->no_open_count)
+               dmi->flags |= DM_SKIP_BDGET_FLAG;
+
+       ioctl_with_uevent = dmt->type == DM_DEVICE_RESUME ||
+                           dmt->type == DM_DEVICE_REMOVE ||
+                           dmt->type == DM_DEVICE_RENAME;
+
+       if (ioctl_with_uevent && dm_cookie_supported()) {
+               /*
+                * Always mark events coming from libdevmapper as
+                * "primary sourced". This is needed to distinguish
+                * any spurious events so we can act appropriately.
+                * This needs to be applied even when udev_sync is
+                * not used because udev flags could be used alone.
+                */
+               dmi->event_nr |= DM_UDEV_PRIMARY_SOURCE_FLAG <<
+                                DM_UDEV_FLAGS_SHIFT;
+
+               /*
+                * Prevent udev vs. libdevmapper race when processing nodes
+                * and symlinks. This can happen when the udev rules are
+                * installed and udev synchronisation code is enabled in
+                * libdevmapper but the software using libdevmapper does not
+                * make use of it (by not calling dm_task_set_cookie before).
+                * We need to instruct the udev rules not to be applied at
+                * all in this situation so we can gracefully fallback to
+                * libdevmapper's node and symlink creation code.
+                */
+               if (!dmt->cookie_set && dm_udev_get_sync_support()) {
+                       log_debug_activation("Cookie value is not set while trying to call %s "
+                                            "ioctl. Please, consider using libdevmapper's udev "
+                                            "synchronisation interface or disable it explicitly "
+                                            "by calling dm_udev_set_sync_support(0).",
+                                            dmt->type == DM_DEVICE_RESUME ? "DM_DEVICE_RESUME" :
+                                            dmt->type == DM_DEVICE_REMOVE ? "DM_DEVICE_REMOVE" :
+                                                                            "DM_DEVICE_RENAME");
+                       log_debug_activation("Switching off device-mapper and all subsystem related "
+                                            "udev rules. Falling back to libdevmapper node creation.");
+                       /*
+                        * Disable general dm and subsystem rules but keep
+                        * dm disk rules if not flagged out explicitly before.
+                        * We need /dev/disk content for the software that expects it.
+                       */
+                       dmi->event_nr |= (DM_UDEV_DISABLE_DM_RULES_FLAG |
+                                         DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG) <<
+                                        DM_UDEV_FLAGS_SHIFT;
+               }
+       }
+
+       log_debug_activation("dm %s %s%s %s%s%s %s%.0d%s%.0d%s"
+                            "%s[ %s%s%s%s%s%s%s%s%s] %.0" PRIu64 " %s [%u] (*%u)",
+                            _cmd_data_v4[dmt->type].name,
+                            dmt->new_uuid ? "UUID " : "",
+                            dmi->name, dmi->uuid, dmt->newname ? " " : "",
+                            dmt->newname ? dmt->newname : "",
+                            dmt->major > 0 ? "(" : "",
+                            dmt->major > 0 ? dmt->major : 0,
+                            dmt->major > 0 ? ":" : "",
+                            dmt->minor > 0 ? dmt->minor : 0,
+                            dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+                            dmt->major > 0 ? ") " : "",
+                            dmt->no_open_count ? "noopencount " : "opencount ",
+                            dmt->no_flush ? "noflush " : "flush ",
+                            dmt->read_only ? "readonly " : "",
+                            dmt->skip_lockfs ? "skiplockfs " : "",
+                            dmt->retry_remove ? "retryremove " : "",
+                            dmt->deferred_remove ? "deferredremove " : "",
+                            dmt->secure_data ? "securedata " : "",
+                            dmt->query_inactive_table ? "inactive " : "",
+                            dmt->enable_checks ? "enablechecks " : "",
+                            dmt->sector, _sanitise_message(dmt->message),
+                            dmi->data_size, retry_repeat_count);
+#ifdef DM_IOCTLS
+       r = ioctl(_control_fd, command, dmi);
+
+       if (dmt->record_timestamp)
+               if (!dm_timestamp_get(_dm_ioctl_timestamp))
+                       stack;
+
+       if (r < 0 && dmt->expected_errno != errno) {
+               dmt->ioctl_errno = errno;
+               if (dmt->ioctl_errno == ENXIO && ((dmt->type == DM_DEVICE_INFO) ||
+                                                 (dmt->type == DM_DEVICE_MKNODES) ||
+                                                 (dmt->type == DM_DEVICE_STATUS)))
+                       dmi->flags &= ~DM_EXISTS_FLAG;  /* FIXME */
+               else {
+                       if (_log_suppress || dmt->ioctl_errno == EINTR)
+                               log_verbose("device-mapper: %s ioctl on %s %s%s%.0d%s%.0d%s%s "
+                                           "failed: %s",
+                                           _cmd_data_v4[dmt->type].name,
+                                           dmi->name, dmi->uuid,
+                                           dmt->major > 0 ? "(" : "",
+                                           dmt->major > 0 ? dmt->major : 0,
+                                           dmt->major > 0 ? ":" : "",
+                                           dmt->minor > 0 ? dmt->minor : 0,
+                                           dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+                                           dmt->major > 0 ? ")" : "",
+                                           strerror(dmt->ioctl_errno));
+                       else
+                               log_error("device-mapper: %s ioctl on %s %s%s%.0d%s%.0d%s%s "
+                                         "failed: %s",
+                                         _cmd_data_v4[dmt->type].name,
+                                         dmi->name, dmi->uuid,
+                                         dmt->major > 0 ? "(" : "",
+                                         dmt->major > 0 ? dmt->major : 0,
+                                         dmt->major > 0 ? ":" : "",
+                                         dmt->minor > 0 ? dmt->minor : 0,
+                                         dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+                                         dmt->major > 0 ? ")" : "",
+                                         strerror(dmt->ioctl_errno));
+
+                       /*
+                        * It's sometimes worth retrying after EBUSY in case
+                        * it's a transient failure caused by an asynchronous
+                        * process quickly scanning the device.
+                        */
+                       *retryable = dmt->ioctl_errno == EBUSY;
+
+                       goto error;
+               }
+       }
+
+       if (ioctl_with_uevent && dm_udev_get_sync_support() &&
+           !_check_uevent_generated(dmi)) {
+               log_debug_activation("Uevent not generated! Calling udev_complete "
+                                    "internally to avoid process lock-up.");
+               _udev_complete(dmt);
+       }
+
+       if (!_dm_ioctl_unmangle_names(dmt->type, dmi))
+               goto error;
+
+       if (dmt->type != DM_DEVICE_REMOVE &&
+           !_dm_ioctl_unmangle_uuids(dmt->type, dmi))
+               goto error;
+
+#else /* Userspace alternative for testing */
+       goto error;
+#endif
+       return dmi;
+
+error:
+       _dm_zfree_dmi(dmi);
+       return NULL;
+}
+
+void dm_task_update_nodes(void)
+{
+       update_devs();
+}
+
+#define DM_IOCTL_RETRIES 25
+#define DM_RETRY_USLEEP_DELAY 200000
+
+int dm_task_get_errno(struct dm_task *dmt)
+{
+       return dmt->ioctl_errno;
+}
+
+int dm_task_run(struct dm_task *dmt)
+{
+       struct dm_ioctl *dmi;
+       unsigned command;
+       int check_udev;
+       int rely_on_udev;
+       int suspended_counter;
+       unsigned ioctl_retry = 1;
+       int retryable = 0;
+       const char *dev_name = DEV_NAME(dmt);
+       const char *dev_uuid = DEV_UUID(dmt);
+
+       if ((unsigned) dmt->type >= DM_ARRAY_SIZE(_cmd_data_v4)) {
+               log_error(INTERNAL_ERROR "unknown device-mapper task %d",
+                         dmt->type);
+               return 0;
+       }
+
+       command = _cmd_data_v4[dmt->type].cmd;
+
+       /* Old-style creation had a table supplied */
+       if (dmt->type == DM_DEVICE_CREATE && dmt->head)
+               return _create_and_load_v4(dmt);
+
+       if (dmt->type == DM_DEVICE_MKNODES && !dev_name &&
+           !dev_uuid && dmt->major <= 0)
+               return _mknodes_v4(dmt);
+
+       if ((dmt->type == DM_DEVICE_RELOAD) && dmt->suppress_identical_reload)
+               return _reload_with_suppression_v4(dmt);
+
+       if ((dmt->type == DM_DEVICE_SUSPEND) && dmt->enable_checks)
+               return _suspend_with_validation_v4(dmt);
+
+       if (!_open_control()) {
+               _udev_complete(dmt);
+               return_0;
+       }
+
+       if ((suspended_counter = dm_get_suspended_counter()) &&
+           dmt->type == DM_DEVICE_RELOAD)
+               log_error(INTERNAL_ERROR "Performing unsafe table load while %d device(s) "
+                         "are known to be suspended: "
+                         "%s%s%s %s%.0d%s%.0d%s%s",
+                         suspended_counter,
+                         dev_name ? : "",
+                         dev_uuid ? " UUID " : "",
+                         dev_uuid ? : "",
+                         dmt->major > 0 ? "(" : "",
+                         dmt->major > 0 ? dmt->major : 0,
+                         dmt->major > 0 ? ":" : "",
+                         dmt->minor > 0 ? dmt->minor : 0,
+                         dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+                         dmt->major > 0 ? ") " : "");
+
+       /* FIXME Detect and warn if cookie set but should not be. */
+repeat_ioctl:
+       if (!(dmi = _do_dm_ioctl(dmt, command, _ioctl_buffer_double_factor,
+                                ioctl_retry, &retryable))) {
+               /*
+                * Async udev rules that scan devices commonly cause transient
+                * failures.  Normally you'd expect the user to have made sure
+                * nothing was using the device before issuing REMOVE, so it's
+                * worth retrying in case the failure is indeed transient.
+                */
+               if (retryable && dmt->type == DM_DEVICE_REMOVE &&
+                   dmt->retry_remove && ++ioctl_retry <= DM_IOCTL_RETRIES) {
+                       usleep(DM_RETRY_USLEEP_DELAY);
+                       goto repeat_ioctl;
+               }
+
+               _udev_complete(dmt);
+               return 0;
+       }
+
+       if (dmi->flags & DM_BUFFER_FULL_FLAG) {
+               switch (dmt->type) {
+               case DM_DEVICE_LIST_VERSIONS:
+               case DM_DEVICE_LIST:
+               case DM_DEVICE_DEPS:
+               case DM_DEVICE_STATUS:
+               case DM_DEVICE_TABLE:
+               case DM_DEVICE_WAITEVENT:
+               case DM_DEVICE_TARGET_MSG:
+                       _ioctl_buffer_double_factor++;
+                       _dm_zfree_dmi(dmi);
+                       goto repeat_ioctl;
+               default:
+                       log_error("WARNING: libdevmapper buffer too small for data");
+               }
+       }
+
+       /*
+        * Are we expecting a udev operation to occur that we need to check for?
+        */
+       check_udev = dmt->cookie_set &&
+                    !(dmt->event_nr >> DM_UDEV_FLAGS_SHIFT &
+                      DM_UDEV_DISABLE_DM_RULES_FLAG);
+
+       rely_on_udev = dmt->cookie_set ? (dmt->event_nr >> DM_UDEV_FLAGS_SHIFT &
+                                         DM_UDEV_DISABLE_LIBRARY_FALLBACK) : 0;
+
+       switch (dmt->type) {
+       case DM_DEVICE_CREATE:
+               if ((dmt->add_node == DM_ADD_NODE_ON_CREATE) &&
+                   dev_name && *dev_name && !rely_on_udev)
+                       add_dev_node(dev_name, MAJOR(dmi->dev),
+                                    MINOR(dmi->dev), dmt->uid, dmt->gid,
+                                    dmt->mode, check_udev, rely_on_udev);
+               break;
+       case DM_DEVICE_REMOVE:
+               /* FIXME Kernel needs to fill in dmi->name */
+               if (dev_name && !rely_on_udev)
+                       rm_dev_node(dev_name, check_udev, rely_on_udev);
+               break;
+
+       case DM_DEVICE_RENAME:
+               /* FIXME Kernel needs to fill in dmi->name */
+               if (!dmt->new_uuid && dev_name)
+                       rename_dev_node(dev_name, dmt->newname,
+                                       check_udev, rely_on_udev);
+               break;
+
+       case DM_DEVICE_RESUME:
+               if ((dmt->add_node == DM_ADD_NODE_ON_RESUME) &&
+                   dev_name && *dev_name)
+                       add_dev_node(dev_name, MAJOR(dmi->dev),
+                                    MINOR(dmi->dev), dmt->uid, dmt->gid,
+                                    dmt->mode, check_udev, rely_on_udev);
+               /* FIXME Kernel needs to fill in dmi->name */
+               set_dev_node_read_ahead(dev_name,
+                                       MAJOR(dmi->dev), MINOR(dmi->dev),
+                                       dmt->read_ahead, dmt->read_ahead_flags);
+               break;
+       
+       case DM_DEVICE_MKNODES:
+               if (dmi->flags & DM_EXISTS_FLAG)
+                       add_dev_node(dmi->name, MAJOR(dmi->dev),
+                                    MINOR(dmi->dev), dmt->uid,
+                                    dmt->gid, dmt->mode, 0, rely_on_udev);
+               else if (dev_name)
+                       rm_dev_node(dev_name, 0, rely_on_udev);
+               break;
+
+       case DM_DEVICE_STATUS:
+       case DM_DEVICE_TABLE:
+       case DM_DEVICE_WAITEVENT:
+               if (!_unmarshal_status(dmt, dmi))
+                       goto bad;
+               break;
+       }
+
+       /* Was structure reused? */
+       _dm_zfree_dmi(dmt->dmi.v4);
+       dmt->dmi.v4 = dmi;
+       return 1;
+
+      bad:
+       _dm_zfree_dmi(dmi);
+       return 0;
+}
+
+void dm_hold_control_dev(int hold_open)
+{
+       _hold_control_fd_open = hold_open ? 1 : 0;
+
+       log_debug("Hold of control device is now %sset.",
+                 _hold_control_fd_open ? "" : "un");
+}
+
+void dm_lib_release(void)
+{
+       if (!_hold_control_fd_open)
+               _close_control_fd();
+       dm_timestamp_destroy(_dm_ioctl_timestamp);
+       _dm_ioctl_timestamp = NULL;
+       update_devs();
+}
+
+void dm_pools_check_leaks(void);
+
+void dm_lib_exit(void)
+{
+       int suspended_counter;
+       static unsigned _exited = 0;
+
+       if (_exited++)
+               return;
+
+       if ((suspended_counter = dm_get_suspended_counter()))
+               log_error("libdevmapper exiting with %d device(s) still suspended.", suspended_counter);
+
+       dm_lib_release();
+       selinux_release();
+       if (_dm_bitset)
+               dm_bitset_destroy(_dm_bitset);
+       _dm_bitset = NULL;
+       dm_pools_check_leaks();
+       dm_dump_memory();
+       _version_ok = 1;
+       _version_checked = 0;
+}
+
+#if defined(__GNUC__)
+/*
+ * Maintain binary backward compatibility.
+ * Version script mechanism works with 'gcc' compatible compilers only.
+ */
+
+/*
+ * This following code is here to retain ABI compatibility after adding
+ * the field deferred_remove to struct dm_info in version 1.02.89.
+ *
+ * Binaries linked against version 1.02.88 of libdevmapper or earlier
+ * will use this function that returns dm_info without the
+ * deferred_remove field.
+ *
+ * Binaries compiled against version 1.02.89 onwards will use
+ * the new function dm_task_get_info_with_deferred_remove due to the
+ * #define.
+ *
+ * N.B. Keep this function at the end of the file to make sure that
+ * no code in this file accidentally calls it.
+ */
+
+int dm_task_get_info_base(struct dm_task *dmt, struct dm_info *info);
+int dm_task_get_info_base(struct dm_task *dmt, struct dm_info *info)
+{
+       struct dm_info new_info;
+
+       if (!dm_task_get_info(dmt, &new_info))
+               return 0;
+
+       memcpy(info, &new_info, offsetof(struct dm_info, deferred_remove));
+
+       return 1;
+}
+
+int dm_task_get_info_with_deferred_remove(struct dm_task *dmt, struct dm_info *info);
+int dm_task_get_info_with_deferred_remove(struct dm_task *dmt, struct dm_info *info)
+{
+       struct dm_info new_info;
+
+       if (!dm_task_get_info(dmt, &new_info))
+               return 0;
+
+       memcpy(info, &new_info, offsetof(struct dm_info, internal_suspend));
+
+       return 1;
+}
+#endif
diff --git a/device_mapper/ioctl/libdm-targets.h b/device_mapper/ioctl/libdm-targets.h
new file mode 100644 (file)
index 0000000..b5b20d5
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef LIB_DMTARGETS_H
+#define LIB_DMTARGETS_H
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+struct dm_ioctl;
+
+struct target {
+       uint64_t start;
+       uint64_t length;
+       char *type;
+       char *params;
+
+       struct target *next;
+};
+
+struct dm_task {
+       int type;
+       char *dev_name;
+       char *mangled_dev_name;
+
+       struct target *head, *tail;
+
+       int read_only;
+       uint32_t event_nr;
+       int major;
+       int minor;
+       int allow_default_major_fallback;
+       uid_t uid;
+       gid_t gid;
+       mode_t mode;
+       uint32_t read_ahead;
+       uint32_t read_ahead_flags;
+       union {
+               struct dm_ioctl *v4;
+       } dmi;
+       char *newname;
+       char *message;
+       char *geometry;
+       uint64_t sector;
+       int no_flush;
+       int no_open_count;
+       int skip_lockfs;
+       int query_inactive_table;
+       int suppress_identical_reload;
+       dm_add_node_t add_node;
+       uint64_t existing_table_size;
+       int cookie_set;
+       int new_uuid;
+       int secure_data;
+       int retry_remove;
+       int deferred_remove;
+       int enable_checks;
+       int expected_errno;
+       int ioctl_errno;
+
+       int record_timestamp;
+
+       char *uuid;
+       char *mangled_uuid;
+};
+
+struct cmd_data {
+       const char *name;
+       const unsigned cmd;
+       const int version[3];
+};
+
+int dm_check_version(void);
+uint64_t dm_task_get_existing_table_size(struct dm_task *dmt);
+
+#endif
diff --git a/device_mapper/libdevmapper.h b/device_mapper/libdevmapper.h
new file mode 100644 (file)
index 0000000..2438f74
--- /dev/null
@@ -0,0 +1,3755 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006 Rackable Systems All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef LIB_DEVICE_MAPPER_H
+#define LIB_DEVICE_MAPPER_H
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#ifdef __linux__
+#  include <linux/types.h>
+#endif
+
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifndef __GNUC__
+# define __typeof__ typeof
+#endif
+
+/* Macros to make string defines */
+#define DM_TO_STRING_EXP(A) #A
+#define DM_TO_STRING(A) DM_TO_STRING_EXP(A)
+
+#define DM_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*****************************************************************
+ * The first section of this file provides direct access to the
+ * individual device-mapper ioctls.  Since it is quite laborious to
+ * build the ioctl arguments for the device-mapper, people are
+ * encouraged to use this library.
+ ****************************************************************/
+
+/*
+ * The library user may wish to register their own
+ * logging function.  By default errors go to stderr.
+ * Use dm_log_with_errno_init(NULL) to restore the default log fn.
+ * Error messages may have a non-zero errno.
+ * Debug messages may have a non-zero class.
+ * Aborts on internal error when env DM_ABORT_ON_INTERNAL_ERRORS is 1
+ */
+
+typedef void (*dm_log_with_errno_fn) (int level, const char *file, int line,
+                                     int dm_errno_or_class, const char *f, ...)
+    __attribute__ ((format(printf, 5, 6)));
+
+void dm_log_with_errno_init(dm_log_with_errno_fn fn);
+void dm_log_init_verbose(int level);
+
+/*
+ * Original version of this function.
+ * dm_errno is set to 0.
+ *
+ * Deprecated: Use the _with_errno_ versions above instead.
+ */
+typedef void (*dm_log_fn) (int level, const char *file, int line,
+                          const char *f, ...)
+    __attribute__ ((format(printf, 4, 5)));
+
+void dm_log_init(dm_log_fn fn);
+/*
+ * For backward-compatibility, indicate that dm_log_init() was used
+ * to set a non-default value of dm_log().
+ */
+int dm_log_is_non_default(void);
+
+/*
+ * Number of devices currently in suspended state (via the library).
+ */
+int dm_get_suspended_counter(void);
+
+enum {
+       DM_DEVICE_CREATE,
+       DM_DEVICE_RELOAD,
+       DM_DEVICE_REMOVE,
+       DM_DEVICE_REMOVE_ALL,
+
+       DM_DEVICE_SUSPEND,
+       DM_DEVICE_RESUME,
+
+       DM_DEVICE_INFO,
+       DM_DEVICE_DEPS,
+       DM_DEVICE_RENAME,
+
+       DM_DEVICE_VERSION,
+
+       DM_DEVICE_STATUS,
+       DM_DEVICE_TABLE,
+       DM_DEVICE_WAITEVENT,
+
+       DM_DEVICE_LIST,
+
+       DM_DEVICE_CLEAR,
+
+       DM_DEVICE_MKNODES,
+
+       DM_DEVICE_LIST_VERSIONS,
+       
+       DM_DEVICE_TARGET_MSG,
+
+       DM_DEVICE_SET_GEOMETRY
+};
+
+/*
+ * You will need to build a struct dm_task for
+ * each ioctl command you want to execute.
+ */
+
+struct dm_pool;
+struct dm_task;
+struct dm_timestamp;
+
+struct dm_task *dm_task_create(int type);
+void dm_task_destroy(struct dm_task *dmt);
+
+int dm_task_set_name(struct dm_task *dmt, const char *name);
+int dm_task_set_uuid(struct dm_task *dmt, const char *uuid);
+
+/*
+ * Retrieve attributes after an info.
+ */
+struct dm_info {
+       int exists;
+       int suspended;
+       int live_table;
+       int inactive_table;
+       int32_t open_count;
+       uint32_t event_nr;
+       uint32_t major;
+       uint32_t minor;         /* minor device number */
+       int read_only;          /* 0:read-write; 1:read-only */
+
+       int32_t target_count;
+
+       int deferred_remove;
+       int internal_suspend;
+};
+
+struct dm_deps {
+       uint32_t count;
+       uint32_t filler;
+       uint64_t device[0];
+};
+
+struct dm_names {
+       uint64_t dev;
+       uint32_t next;          /* Offset to next struct from start of this struct */
+       char name[0];
+};
+
+struct dm_versions {
+       uint32_t next;          /* Offset to next struct from start of this struct */
+       uint32_t version[3];
+
+       char name[0];
+};
+
+int dm_get_library_version(char *version, size_t size);
+int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size);
+int dm_task_get_info(struct dm_task *dmt, struct dm_info *dmi);
+
+/*
+ * This function returns dm device's UUID based on the value
+ * of the mangling mode set during preceding dm_task_run call:
+ *   - unmangled UUID for DM_STRING_MANGLING_{AUTO, HEX},
+ *   - UUID without any changes for DM_STRING_MANGLING_NONE.
+ *
+ * To get mangled or unmangled form of the UUID directly, use
+ * dm_task_get_uuid_mangled or dm_task_get_uuid_unmangled function.
+ */
+const char *dm_task_get_uuid(const struct dm_task *dmt);
+
+struct dm_deps *dm_task_get_deps(struct dm_task *dmt);
+struct dm_versions *dm_task_get_versions(struct dm_task *dmt);
+const char *dm_task_get_message_response(struct dm_task *dmt);
+
+/*
+ * These functions return device-mapper names based on the value
+ * of the mangling mode set during preceding dm_task_run call:
+ *   - unmangled name for DM_STRING_MANGLING_{AUTO, HEX},
+ *   - name without any changes for DM_STRING_MANGLING_NONE.
+ *
+ * To get mangled or unmangled form of the name directly, use
+ * dm_task_get_name_mangled or dm_task_get_name_unmangled function.
+ */
+const char *dm_task_get_name(const struct dm_task *dmt);
+struct dm_names *dm_task_get_names(struct dm_task *dmt);
+
+int dm_task_set_ro(struct dm_task *dmt);
+int dm_task_set_newname(struct dm_task *dmt, const char *newname);
+int dm_task_set_newuuid(struct dm_task *dmt, const char *newuuid);
+int dm_task_set_minor(struct dm_task *dmt, int minor);
+int dm_task_set_major(struct dm_task *dmt, int major);
+int dm_task_set_major_minor(struct dm_task *dmt, int major, int minor, int allow_default_major_fallback);
+int dm_task_set_uid(struct dm_task *dmt, uid_t uid);
+int dm_task_set_gid(struct dm_task *dmt, gid_t gid);
+int dm_task_set_mode(struct dm_task *dmt, mode_t mode);
+/* See also description for DM_UDEV_DISABLE_LIBRARY_FALLBACK flag! */
+int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags);
+int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr);
+int dm_task_set_geometry(struct dm_task *dmt, const char *cylinders, const char *heads, const char *sectors, const char *start);
+int dm_task_set_message(struct dm_task *dmt, const char *message);
+int dm_task_set_sector(struct dm_task *dmt, uint64_t sector);
+int dm_task_no_flush(struct dm_task *dmt);
+int dm_task_no_open_count(struct dm_task *dmt);
+int dm_task_skip_lockfs(struct dm_task *dmt);
+int dm_task_query_inactive_table(struct dm_task *dmt);
+int dm_task_suppress_identical_reload(struct dm_task *dmt);
+int dm_task_secure_data(struct dm_task *dmt);
+int dm_task_retry_remove(struct dm_task *dmt);
+int dm_task_deferred_remove(struct dm_task *dmt);
+
+/*
+ * Record timestamp immediately after the ioctl returns.
+ */
+int dm_task_set_record_timestamp(struct dm_task *dmt);
+struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt);
+
+/*
+ * Enable checks for common mistakes such as issuing ioctls in an unsafe order.
+ */
+int dm_task_enable_checks(struct dm_task *dmt);
+
+typedef enum {
+       DM_ADD_NODE_ON_RESUME, /* add /dev/mapper node with dmsetup resume */
+       DM_ADD_NODE_ON_CREATE  /* add /dev/mapper node with dmsetup create */
+} dm_add_node_t;
+int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node);
+
+/*
+ * Control read_ahead.
+ */
+#define DM_READ_AHEAD_AUTO UINT32_MAX  /* Use kernel default readahead */
+#define DM_READ_AHEAD_NONE 0           /* Disable readahead */
+
+#define DM_READ_AHEAD_MINIMUM_FLAG     0x1     /* Value supplied is minimum */
+
+/*
+ * Read ahead is set with DM_DEVICE_CREATE with a table or DM_DEVICE_RESUME.
+ */
+int dm_task_set_read_ahead(struct dm_task *dmt, uint32_t read_ahead,
+                          uint32_t read_ahead_flags);
+uint32_t dm_task_get_read_ahead(const struct dm_task *dmt,
+                               uint32_t *read_ahead);
+
+/*
+ * Use these to prepare for a create or reload.
+ */
+int dm_task_add_target(struct dm_task *dmt,
+                      uint64_t start,
+                      uint64_t size, const char *ttype, const char *params);
+
+/*
+ * Format major/minor numbers correctly for input to driver.
+ */
+#define DM_FORMAT_DEV_BUFSIZE  13      /* Minimum bufsize to handle worst case. */
+int dm_format_dev(char *buf, int bufsize, uint32_t dev_major, uint32_t dev_minor);
+
+/* Use this to retrive target information returned from a STATUS call */
+void *dm_get_next_target(struct dm_task *dmt,
+                        void *next, uint64_t *start, uint64_t *length,
+                        char **target_type, char **params);
+
+/*
+ * Following dm_get_status_* functions will allocate approriate status structure
+ * from passed mempool together with the necessary character arrays.
+ * Destroying the mempool will release all asociated allocation.
+ */
+
+/* Parse params from STATUS call for mirror target */
+typedef enum {
+       DM_STATUS_MIRROR_ALIVE        = 'A',/* No failures */
+       DM_STATUS_MIRROR_FLUSH_FAILED = 'F',/* Mirror out-of-sync */
+       DM_STATUS_MIRROR_WRITE_FAILED = 'D',/* Mirror out-of-sync */
+       DM_STATUS_MIRROR_SYNC_FAILED  = 'S',/* Mirror out-of-sync */
+       DM_STATUS_MIRROR_READ_FAILED  = 'R',/* Mirror data unaffected */
+       DM_STATUS_MIRROR_UNCLASSIFIED = 'U' /* Bug */
+} dm_status_mirror_health_t;
+
+struct dm_status_mirror {
+       uint64_t total_regions;
+       uint64_t insync_regions;
+       uint32_t dev_count;             /* # of devs[] elements (<= 8) */
+       struct {
+               dm_status_mirror_health_t health;
+               uint32_t major;
+               uint32_t minor;
+       } *devs;                        /* array with individual legs */
+       const char *log_type;           /* core, disk,.... */
+       uint32_t log_count;             /* # of logs[] elements */
+       struct {
+               dm_status_mirror_health_t health;
+               uint32_t major;
+               uint32_t minor;
+       } *logs;                        /* array with individual logs */
+};
+
+int dm_get_status_mirror(struct dm_pool *mem, const char *params,
+                        struct dm_status_mirror **status);
+
+/* Parse params from STATUS call for raid target */
+struct dm_status_raid {
+       uint64_t reserved;
+       uint64_t total_regions;         /* sectors */
+       uint64_t insync_regions;        /* sectors */
+       uint64_t mismatch_count;
+       uint32_t dev_count;
+       char *raid_type;
+       /* A - alive,  a - alive not in-sync,  D - dead/failed */
+       char *dev_health;
+       /* idle, frozen, resync, recover, check, repair */
+       char *sync_action;
+       uint64_t data_offset; /* RAID out-of-place reshaping */
+};
+
+int dm_get_status_raid(struct dm_pool *mem, const char *params,
+                      struct dm_status_raid **status);
+
+/* Parse params from STATUS call for cache target */
+struct dm_status_cache {
+       uint64_t version;  /* zero for now */
+
+       uint32_t metadata_block_size;   /* in 512B sectors */
+       uint32_t block_size;            /* AKA 'chunk_size' */
+
+       uint64_t metadata_used_blocks;
+       uint64_t metadata_total_blocks;
+
+       uint64_t used_blocks;
+       uint64_t dirty_blocks;
+       uint64_t total_blocks;
+
+       uint64_t read_hits;
+       uint64_t read_misses;
+       uint64_t write_hits;
+       uint64_t write_misses;
+
+       uint64_t demotions;
+       uint64_t promotions;
+
+       uint64_t feature_flags;         /* DM_CACHE_FEATURE_? */
+
+       int core_argc;
+       char **core_argv;
+
+       char *policy_name;
+       int policy_argc;
+       char **policy_argv;
+
+       unsigned error : 1;             /* detected error (switches to fail soon) */
+       unsigned fail : 1;              /* all I/O fails */
+       unsigned needs_check : 1;       /* metadata needs check */
+       unsigned read_only : 1;         /* metadata may not be changed */
+       uint32_t reserved : 28;
+};
+
+int dm_get_status_cache(struct dm_pool *mem, const char *params,
+                       struct dm_status_cache **status);
+
+/*
+ * Parse params from STATUS call for snapshot target
+ *
+ * Snapshot target's format:
+ * <= 1.7.0: <used_sectors>/<total_sectors>
+ * >= 1.8.0: <used_sectors>/<total_sectors> <metadata_sectors>
+ */
+struct dm_status_snapshot {
+       uint64_t used_sectors;          /* in 512b units */
+       uint64_t total_sectors;
+       uint64_t metadata_sectors;
+       unsigned has_metadata_sectors : 1; /* set when metadata_sectors is present */
+       unsigned invalid : 1;           /* set when snapshot is invalidated */
+       unsigned merge_failed : 1;      /* set when snapshot merge failed */
+       unsigned overflow : 1;          /* set when snapshot overflows */
+};
+
+int dm_get_status_snapshot(struct dm_pool *mem, const char *params,
+                          struct dm_status_snapshot **status);
+
+/* Parse params from STATUS call for thin_pool target */
+typedef enum {
+       DM_THIN_DISCARDS_IGNORE,
+       DM_THIN_DISCARDS_NO_PASSDOWN,
+       DM_THIN_DISCARDS_PASSDOWN
+} dm_thin_discards_t;
+
+struct dm_status_thin_pool {
+       uint64_t transaction_id;
+       uint64_t used_metadata_blocks;
+       uint64_t total_metadata_blocks;
+       uint64_t used_data_blocks;
+       uint64_t total_data_blocks;
+       uint64_t held_metadata_root;
+       uint32_t read_only;             /* metadata may not be changed */
+       dm_thin_discards_t discards;
+       uint32_t fail : 1;              /* all I/O fails */
+       uint32_t error_if_no_space : 1; /* otherwise queue_if_no_space */
+       uint32_t out_of_data_space : 1; /* metadata may be changed, but data may not be allocated (no rw) */
+       uint32_t needs_check : 1;       /* metadata needs check */
+       uint32_t error : 1;             /* detected error (switches to fail soon) */
+       uint32_t reserved : 27;
+};
+
+int dm_get_status_thin_pool(struct dm_pool *mem, const char *params,
+                           struct dm_status_thin_pool **status);
+
+/* Parse params from STATUS call for thin target */
+struct dm_status_thin {
+       uint64_t mapped_sectors;
+       uint64_t highest_mapped_sector;
+       uint32_t fail : 1;              /* Thin volume fails I/O */
+       uint32_t reserved : 31;
+};
+
+int dm_get_status_thin(struct dm_pool *mem, const char *params,
+                      struct dm_status_thin **status);
+
+/*
+ * device-mapper statistics support
+ */
+
+/*
+ * Statistics handle.
+ *
+ * Operations on dm_stats objects include managing statistics regions
+ * and obtaining and manipulating current counter values from the
+ * kernel. Methods are provided to return baisc count values and to
+ * derive time-based metrics when a suitable interval estimate is
+ * provided.
+ *
+ * Internally the dm_stats handle contains a pointer to a table of one
+ * or more dm_stats_region objects representing the regions registered
+ * with the dm_stats_create_region() method. These in turn point to a
+ * table of one or more dm_stats_counters objects containing the
+ * counter sets for each defined area within the region:
+ *
+ * dm_stats->dm_stats_region[nr_regions]->dm_stats_counters[nr_areas]
+ *
+ * This structure is private to the library and may change in future
+ * versions: all users should make use of the public interface and treat
+ * the dm_stats type as an opaque handle.
+ *
+ * Regions and counter sets are stored in order of increasing region_id.
+ * Depending on region specifications and the sequence of create and
+ * delete operations this may not correspond to increasing sector
+ * number: users of the library should not assume that this is the case
+ * unless region creation is deliberately managed to ensure this (by
+ * always creating regions in strict order of ascending sector address).
+ *
+ * Regions may also overlap so the same sector range may be included in
+ * more than one region or area: applications should be prepared to deal
+ * with this or manage regions such that it does not occur.
+ */
+struct dm_stats;
+
+/*
+ * Histogram handle.
+ *
+ * A histogram object represents the latency histogram values and bin
+ * boundaries of the histogram associated with a particular area.
+ *
+ * Operations on the handle allow the number of bins, bin boundaries,
+ * counts and relative proportions to be obtained as well as the
+ * conversion of a histogram or its bounds to a compact string
+ * representation.
+ */
+struct dm_histogram;
+
+/*
+ * Allocate a dm_stats handle to use for subsequent device-mapper
+ * statistics operations. A program_id may be specified and will be
+ * used by default for subsequent operations on this handle.
+ *
+ * If program_id is NULL or the empty string a program_id will be
+ * automatically set to the value contained in /proc/self/comm.
+ */
+struct dm_stats *dm_stats_create(const char *program_id);
+
+/*
+ * Bind a dm_stats handle to the specified device major and minor
+ * values. Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor);
+
+/*
+ * Bind a dm_stats handle to the specified device name.
+ * Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_name(struct dm_stats *dms, const char *name);
+
+/*
+ * Bind a dm_stats handle to the specified device UUID.
+ * Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid);
+
+/*
+ * Bind a dm_stats handle to the device backing the file referenced
+ * by the specified file descriptor.
+ *
+ * File descriptor fd must reference a regular file, open for reading,
+ * in a local file system, backed by a device-mapper device, that
+ * supports the FIEMAP ioctl, and that returns data describing the
+ * physical location of extents.
+ */
+int dm_stats_bind_from_fd(struct dm_stats *dms, int fd);
+/*
+ * Test whether the running kernel supports the precise_timestamps
+ * feature. Presence of this feature also implies histogram support.
+ * The library will check this call internally and fails any attempt
+ * to use nanosecond counters or histograms on kernels that fail to
+ * meet this check.
+ */
+int dm_message_supports_precise_timestamps(void);
+
+/*
+ * Precise timetamps and histogram support.
+ * 
+ * Test for the presence of precise_timestamps and histogram support.
+ */
+int dm_stats_driver_supports_precise(void);
+int dm_stats_driver_supports_histogram(void);
+
+/*
+ * Returns 1 if the specified region has the precise_timestamps feature
+ * enabled (i.e. produces nanosecond-precision counter values) or 0 for
+ * a region using the default milisecond precision.
+ */
+int dm_stats_get_region_precise_timestamps(const struct dm_stats *dms,
+                                          uint64_t region_id);
+
+/*
+ * Returns 1 if the region at the current cursor location has the
+ * precise_timestamps feature enabled (i.e. produces
+ * nanosecond-precision counter values) or 0 for a region using the
+ * default milisecond precision.
+ */
+int dm_stats_get_current_region_precise_timestamps(const struct dm_stats *dms);
+
+#define DM_STATS_ALL_PROGRAMS ""
+/*
+ * Parse the response from a @stats_list message. dm_stats_list will
+ * allocate the necessary dm_stats and dm_stats region structures from
+ * the embedded dm_pool. No counter data will be obtained (the counters
+ * members of dm_stats_region objects are set to NULL).
+ *
+ * A program_id may optionally be supplied; if the argument is non-NULL
+ * only regions with a matching program_id value will be considered. If
+ * the argument is NULL then the default program_id associated with the
+ * dm_stats handle will be used. Passing the special value
+ * DM_STATS_ALL_PROGRAMS will cause all regions to be queried
+ * regardless of region program_id.
+ */
+int dm_stats_list(struct dm_stats *dms, const char *program_id);
+
+#define DM_STATS_REGIONS_ALL UINT64_MAX
+/*
+ * Populate a dm_stats object with statistics for one or more regions of
+ * the specified device.
+ *
+ * A program_id may optionally be supplied; if the argument is non-NULL
+ * only regions with a matching program_id value will be considered. If
+ * the argument is NULL then the default program_id associated with the
+ * dm_stats handle will be used. Passing the special value
+ * DM_STATS_ALL_PROGRAMS will cause all regions to be queried
+ * regardless of region program_id.
+ *
+ * Passing the special value DM_STATS_REGIONS_ALL as the region_id
+ * argument will attempt to retrieve all regions selected by the
+ * program_id argument.
+ *
+ * If region_id is used to request a single region_id to be populated
+ * the program_id is ignored.
+ */
+int dm_stats_populate(struct dm_stats *dms, const char *program_id,
+                     uint64_t region_id);
+
+/*
+ * Create a new statistics region on the device bound to dms.
+ *
+ * start and len specify the region start and length in 512b sectors.
+ * Passing zero for both start and len will create a region spanning
+ * the entire device.
+ *
+ * Step determines how to subdivide the region into discrete counter
+ * sets: a positive value specifies the size of areas into which the
+ * region should be split while a negative value will split the region
+ * into a number of areas equal to the absolute value of step:
+ *
+ * - a region with one area spanning the entire device:
+ *
+ *   dm_stats_create_region(dms, 0, 0, -1, p, a);
+ *
+ * - a region with areas of 1MiB:
+ *
+ *   dm_stats_create_region(dms, 0, 0, 1 << 11, p, a);
+ *
+ * - one 1MiB region starting at 1024 sectors with two areas:
+ *
+ *   dm_stats_create_region(dms, 1024, 1 << 11, -2, p, a);
+ *
+ * If precise is non-zero attempt to create a region with nanosecond
+ * precision counters using the kernel precise_timestamps feature.
+ *
+ * precise - A flag to request nanosecond precision counters
+ * to be used for this region.
+ *
+ * histogram_bounds - specify the boundaries of a latency histogram to
+ * be tracked for the region. The values are expressed as an array of
+ * uint64_t terminated with a zero. Values must be in order of ascending
+ * magnitude and specify the upper bounds of successive histogram bins
+ * in nanoseconds (with an implicit lower bound of zero on the first bin
+ * and an implicit upper bound of infinity on the final bin). For
+ * example:
+ *
+ *   uint64_t bounds_ary[] = { 1000, 2000, 3000, 0 };
+ *
+ * Specifies a histogram with four bins: 0-1000ns, 1000-2000ns,
+ * 2000-3000ns and >3000ns.
+ *
+ * The smallest latency value that can be tracked for a region not using
+ * precise_timestamps is 1ms: attempting to create a region with
+ * histogram boundaries < 1ms will cause the precise_timestamps feature
+ * to be enabled for that region automatically if it was not requested
+ * explicitly.
+ *
+ * program_id is an optional string argument that identifies the
+ * program creating the region. If program_id is NULL or the empty
+ * string the default program_id stored in the handle will be used.
+ *
+ * user_data is an optional string argument that is added to the
+ * content of the aux_data field stored with the statistics region by
+ * the kernel.
+ *
+ * The library may also use this space internally, for example, to
+ * store a group descriptor or other metadata: in this case the
+ * library will strip any internal data fields from the value before
+ * it is returned via a call to dm_stats_get_region_aux_data().
+ *
+ * The user data stored is not accessed by the library or kernel and
+ * may be used to store an arbitrary data word (embedded whitespace is
+ * not permitted).
+ *
+ * An application using both the library and direct access to the
+ * @stats_list device-mapper message may see the internal values stored
+ * in this field by the library. In such cases any string up to and
+ * including the first '#' in the field must be treated as an opaque
+ * value and preserved across any external modification of aux_data.
+ *
+ * The region_id of the newly-created region is returned in *region_id
+ * if it is non-NULL.
+ */
+int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id,
+                          uint64_t start, uint64_t len, int64_t step,
+                          int precise, struct dm_histogram *bounds,
+                          const char *program_id, const char *user_data);
+
+/*
+ * Delete the specified statistics region. This will also mark the
+ * region as not-present and discard any existing statistics data.
+ */
+int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Clear the specified statistics region. This requests the kernel to
+ * zero all counter values (except in-flight I/O). Note that this
+ * operation is not atomic with respect to reads of the counters; any IO
+ * events occurring between the last print operation and the clear will
+ * be lost. This can be avoided by using the atomic print-and-clear
+ * function of the dm_stats_print_region() call or by using the higher
+ * level dm_stats_populate() interface.
+ */
+int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Print the current counter values for the specified statistics region
+ * and return them as a string. The memory for the string buffer will
+ * be allocated from the dm_stats handle's private pool and should be
+ * returned by calling dm_stats_buffer_destroy() when no longer
+ * required. The pointer will become invalid following any call that
+ * clears or reinitializes the handle (destroy, list, populate, bind).
+ *
+ * This allows applications that wish to access the raw message response
+ * to obtain it via a dm_stats handle; no parsing of the textual counter
+ * data is carried out by this function.
+ *
+ * Most users are recommended to use the dm_stats_populate() call
+ * instead since this will automatically parse the statistics data into
+ * numeric form accessible via the dm_stats_get_*() counter access
+ * methods.
+ *
+ * A subset of the data lines may be requested by setting the
+ * start_line and num_lines parameters. If both are zero all data
+ * lines are returned.
+ *
+ * If the clear parameter is non-zero the operation will also
+ * atomically reset all counter values to zero (except in-flight IO).
+ */
+char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id,
+                           unsigned start_line, unsigned num_lines,
+                           unsigned clear);
+
+/*
+ * Destroy a statistics response buffer obtained from a call to
+ * dm_stats_print_region().
+ */
+void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer);
+
+/*
+ * Determine the number of regions contained in a dm_stats handle
+ * following a dm_stats_list() or dm_stats_populate() call.
+ *
+ * The value returned is the number of registered regions visible with the
+ * progam_id value used for the list or populate operation and may not be
+ * equal to the highest present region_id (either due to program_id
+ * filtering or gaps in the sequence of region_id values).
+ *
+ * Always returns zero on an empty handle.
+ */
+uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms);
+
+/*
+ * Determine the number of groups contained in a dm_stats handle
+ * following a dm_stats_list() or dm_stats_populate() call.
+ *
+ * The value returned is the number of registered groups visible with the
+ * progam_id value used for the list or populate operation and may not be
+ * equal to the highest present group_id (either due to program_id
+ * filtering or gaps in the sequence of group_id values).
+ *
+ * Always returns zero on an empty handle.
+ */
+uint64_t dm_stats_get_nr_groups(const struct dm_stats *dms);
+
+/*
+ * Test whether region_id is present in this dm_stats handle.
+ */
+int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Returns the number of areas (counter sets) contained in the specified
+ * region_id of the supplied dm_stats handle.
+ */
+uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms,
+                                     uint64_t region_id);
+
+/*
+ * Returns the total number of areas (counter sets) in all regions of the
+ * given dm_stats object.
+ */
+uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms);
+
+/*
+ * Test whether group_id is present in this dm_stats handle.
+ */
+int dm_stats_group_present(const struct dm_stats *dms, uint64_t group_id);
+
+/*
+ * Return the number of bins in the histogram configuration for the
+ * specified region or zero if no histogram specification is configured.
+ * Valid following a dm_stats_list() or dm_stats_populate() operation.
+ */
+int dm_stats_get_region_nr_histogram_bins(const struct dm_stats *dms,
+                                         uint64_t region_id);
+
+/*
+ * Parse a histogram string with optional unit suffixes into a
+ * dm_histogram bounds description.
+ *
+ * A histogram string is a string of numbers "n1,n2,n3,..." that
+ * represent the boundaries of a histogram. The first and final bins
+ * have implicit lower and upper bounds of zero and infinity
+ * respectively and boundary values must occur in order of ascending
+ * magnitude.  Unless a unit suffix is given all values are specified in
+ * nanoseconds.
+ *
+ * For example, if bounds_str="300,600,900", the region will be created
+ * with a histogram containing four bins. Each report will include four
+ * numbers a:b:c:d. a is the number of requests that took between 0 and
+ * 300ns to complete, b is the number of requests that took 300-600ns to
+ * complete, c is the number of requests that took 600-900ns to complete
+ * and d is the number of requests that took more than 900ns to
+ * complete.
+ *
+ * An optional unit suffix of 's', 'ms', 'us', or 'ns' may be used to
+ * specify units of seconds, miliseconds, microseconds, or nanoseconds:
+ *
+ *   bounds_str="1ns,1us,1ms,1s"
+ *   bounds_str="500us,1ms,1500us,2ms"
+ *   bounds_str="200ms,400ms,600ms,800ms,1s"
+ *
+ * The smallest valid unit of time for a histogram specification depends
+ * on whether the region uses precise timestamps: for a region with the
+ * default milisecond precision the smallest possible histogram boundary
+ * magnitude is one milisecond: attempting to use a histogram with a
+ * boundary less than one milisecond when creating a region will cause
+ * the region to be created with the precise_timestamps feature enabled.
+ *
+ * On sucess a pointer to the struct dm_histogram representing the
+ * bounds values is returned, or NULL in the case of error. The returned
+ * pointer should be freed using dm_free() when no longer required.
+ */
+struct dm_histogram *dm_histogram_bounds_from_string(const char *bounds_str);
+
+/*
+ * Parse a zero terminated array of uint64_t into a dm_histogram bounds
+ * description.
+ *
+ * Each value in the array specifies the upper bound of a bin in the
+ * latency histogram in nanoseconds. Values must appear in ascending
+ * order of magnitude.
+ *
+ * The smallest valid unit of time for a histogram specification depends
+ * on whether the region uses precise timestamps: for a region with the
+ * default milisecond precision the smallest possible histogram boundary
+ * magnitude is one milisecond: attempting to use a histogram with a
+ * boundary less than one milisecond when creating a region will cause
+ * the region to be created with the precise_timestamps feature enabled.
+ */
+struct dm_histogram *dm_histogram_bounds_from_uint64(const uint64_t *bounds);
+
+/*
+ * Destroy the histogram bounds array obtained from a call to
+ * dm_histogram_bounds_from_string().
+ */
+void dm_histogram_bounds_destroy(struct dm_histogram *bounds);
+
+/*
+ * Destroy a dm_stats object and all associated regions, counter
+ * sets and histograms.
+ */
+void dm_stats_destroy(struct dm_stats *dms);
+
+/*
+ * Counter sampling interval
+ */
+
+/*
+ * Set the sampling interval for counter data to the specified value in
+ * either nanoseconds or milliseconds.
+ *
+ * The interval is used to calculate time-based metrics from the basic
+ * counter data: an interval must be set before calling any of the
+ * metric methods.
+ *
+ * For best accuracy the duration should be measured and updated at the
+ * end of each interval.
+ *
+ * All values are stored internally with nanosecond precision and are
+ * converted to or from ms when the millisecond interfaces are used.
+ */
+void dm_stats_set_sampling_interval_ns(struct dm_stats *dms,
+                                      uint64_t interval_ns);
+
+void dm_stats_set_sampling_interval_ms(struct dm_stats *dms,
+                                      uint64_t interval_ms);
+
+/*
+ * Retrieve the configured sampling interval in either nanoseconds or
+ * milliseconds.
+ */
+uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms);
+uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms);
+
+/*
+ * Override program_id. This may be used to change the default
+ * program_id value for an existing handle. If the allow_empty argument
+ * is non-zero a NULL or empty program_id is permitted.
+ *
+ * Use with caution! Most users of the library should set a valid,
+ * non-NULL program_id for every statistics region created. Failing to
+ * do so may result in confusing state when multiple programs are
+ * creating and managing statistics regions.
+ *
+ * All users of the library are encouraged to choose an unambiguous,
+ * unique program_id: this could be based on PID (for programs that
+ * create, report, and delete regions in a single process), session id,
+ * executable name, or some other distinguishing string.
+ *
+ * Use of the empty string as a program_id does not simplify use of the
+ * library or the command line tools and use of this value is strongly
+ * discouraged.
+ */
+int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty,
+                           const char *program_id);
+
+/*
+ * Region properties: size, length & area_len.
+ *
+ * Region start and length are returned in units of 512b as specified
+ * at region creation time. The area_len value gives the size of areas
+ * into which the region has been subdivided. For regions with a single
+ * area spanning the range this value is equal to the region length.
+ *
+ * For regions created with a specified number of areas the value
+ * represents the size of the areas into which the kernel divided the
+ * region excluding any rounding of the last area size. The number of
+ * areas may be obtained using the dm_stats_nr_areas_region() call.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start,
+                             uint64_t region_id);
+
+int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len,
+                           uint64_t region_id);
+
+int dm_stats_get_region_area_len(const struct dm_stats *dms,
+                                uint64_t *len, uint64_t region_id);
+
+/*
+ * Area properties: start, offset and length.
+ *
+ * The area length is always equal to the area length of the region
+ * that contains it and is obtained from dm_stats_get_region_area_len().
+ *
+ * The start of an area is a function of the area_id and the containing
+ * region's start and area length: it gives the absolute offset into the
+ * containing device of the beginning of the area.
+ *
+ * The offset expresses the area's relative offset into the current
+ * region. I.e. the area start minus the start offset of the containing
+ * region.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start,
+                           uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset,
+                            uint64_t region_id, uint64_t area_id);
+
+/*
+ * Retrieve program_id and user aux_data for a specific region.
+ *
+ * Only valid following a call to dm_stats_list().
+ */
+
+/*
+ * Retrieve program_id for the specified region.
+ *
+ * The returned pointer does not need to be freed separately from the
+ * dm_stats handle but will become invalid after a dm_stats_destroy(),
+ * dm_stats_list(), dm_stats_populate(), or dm_stats_bind*() of the
+ * handle from which it was obtained.
+ */
+const char *dm_stats_get_region_program_id(const struct dm_stats *dms,
+                                          uint64_t region_id);
+
+/*
+ * Retrieve user aux_data set for the specified region. This function
+ * will return any stored user aux_data as a string in the memory
+ * pointed to by the aux_data argument.
+ *
+ * Any library internal aux_data fields, such as DMS_GROUP descriptors,
+ * are stripped before the value is returned.
+ *
+ * The returned pointer does not need to be freed separately from the
+ * dm_stats handle but will become invalid after a dm_stats_destroy(),
+ * dm_stats_list(), dm_stats_populate(), or dm_stats_bind*() of the
+ * handle from which it was obtained.
+ */
+const char *dm_stats_get_region_aux_data(const struct dm_stats *dms,
+                                        uint64_t region_id);
+
+typedef enum {
+       DM_STATS_OBJECT_TYPE_NONE,
+       DM_STATS_OBJECT_TYPE_AREA,
+       DM_STATS_OBJECT_TYPE_REGION,
+       DM_STATS_OBJECT_TYPE_GROUP
+} dm_stats_obj_type_t;
+
+/*
+ * Statistics cursor
+ *
+ * A dm_stats handle maintains an optional cursor into the statistics
+ * tables that it stores. Iterators are provided to visit each region,
+ * area, or group in a handle and accessor methods are provided to
+ * obtain properties and values for the object at the current cursor
+ * position.
+ *
+ * Using the cursor simplifies walking all regions or groups when
+ * the tables are sparse (i.e. contains some present and some
+ * non-present region_id or group_id values either due to program_id
+ * filtering or the ordering of region and group creation and deletion).
+ *
+ * Simple macros are provided to visit each area, region, or group,
+ * contained in a handle and applications are encouraged to use these
+ * where possible.
+ */
+
+/*
+ * Walk flags are used to initialise a dm_stats handle's cursor control
+ * and to select region or group aggregation when calling a metric or
+ * counter property method with immediate group, region, and area ID
+ * values.
+ *
+ * Walk flags are stored in the uppermost word of a uint64_t so that
+ * a region_id or group_id may be encoded in the lower bits. This
+ * allows an aggregate region_id or group_id to be specified when
+ * retrieving counter or metric values.
+ *
+ * Flags may be ORred together when used to initialise a dm_stats_walk:
+ * the resulting walk will visit instance of each type specified by
+ * the flag combination.
+ */
+#define DM_STATS_WALK_AREA   0x1000000000000ULL
+#define DM_STATS_WALK_REGION 0x2000000000000ULL
+#define DM_STATS_WALK_GROUP  0x4000000000000ULL
+
+#define DM_STATS_WALK_ALL    0x7000000000000ULL
+#define DM_STATS_WALK_DEFAULT (DM_STATS_WALK_AREA | DM_STATS_WALK_REGION)
+
+/*
+ * Skip regions from a DM_STATS_WALK_REGION that contain only a single
+ * area: in this case the region's aggregate values are identical to
+ * the values of the single contained area. Setting this flag will
+ * suppress these duplicate entries during a dm_stats_walk_* with the
+ * DM_STATS_WALK_REGION flag set.
+ */
+#define DM_STATS_WALK_SKIP_SINGLE_AREA   0x8000000000000ULL
+
+/*
+ * Initialise the cursor control of a dm_stats handle for the specified
+ * walk type(s). Including a walk flag in the flags argument will cause
+ * any subsequent walk to visit that type of object (until the next
+ * call to dm_stats_walk_init()).
+ */
+int dm_stats_walk_init(struct dm_stats *dms, uint64_t flags);
+
+/*
+ * Set the cursor of a dm_stats handle to address the first present
+ * group, region, or area of the currently configured walk. It is
+ * valid to attempt to walk a NULL stats handle or a handle containing
+ * no present regions; in this case any call to dm_stats_walk_next()
+ * becomes a no-op and all calls to dm_stats_walk_end() return true.
+ */
+void dm_stats_walk_start(struct dm_stats *dms);
+
+/*
+ * Advance the statistics cursor to the next area, or to the next
+ * present region if at the end of the current region. If the end of
+ * the region, area, or group tables is reached a subsequent call to
+ * dm_stats_walk_end() will return 1 and dm_stats_object_type() called
+ * on the location will return DM_STATS_OBJECT_TYPE_NONE,
+ */
+void dm_stats_walk_next(struct dm_stats *dms);
+
+/*
+ * Force the statistics cursor to advance to the next region. This will
+ * stop any in-progress area walk (by clearing DM_STATS_WALK_AREA) and
+ * advance the cursor to the next present region, the first present
+ * group (if DM_STATS_GROUP_WALK is set), or to the end. In this case a
+ * subsequent call to dm_stats_walk_end() will return 1 and a call to
+ * dm_stats_object_type() for the location will return
+ * DM_STATS_OBJECT_TYPE_NONE.
+ */
+void dm_stats_walk_next_region(struct dm_stats *dms);
+
+/*
+ * Test whether the end of a statistics walk has been reached.
+ */
+int dm_stats_walk_end(struct dm_stats *dms);
+
+/*
+ * Return the type of object at the location specified by region_id
+ * and area_id. If either region_id or area_id uses one of the special
+ * values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT the
+ * corresponding region or area identifier will be taken from the
+ * current cursor location. If the cursor location or the value encoded
+ * by region_id and area_id indicates an aggregate region or group,
+ * this will be reflected in the value returned.
+ */
+dm_stats_obj_type_t dm_stats_object_type(const struct dm_stats *dms,
+                                        uint64_t region_id,
+                                        uint64_t area_id);
+
+/*
+ * Return the type of object at the current stats cursor location.
+ */
+dm_stats_obj_type_t dm_stats_current_object_type(const struct dm_stats *dms);
+
+/*
+ * Stats iterators
+ *
+ * C 'for' and 'do'/'while' style iterators for dm_stats data.
+ *
+ * It is not safe to call any function that modifies the region table
+ * within the loop body (i.e. dm_stats_list(), dm_stats_populate(),
+ * dm_stats_init(), or dm_stats_destroy()).
+ *
+ * All counter and property (dm_stats_get_*) access methods, as well as
+ * dm_stats_populate_region() can be safely called from loops.
+ *
+ */
+
+/*
+ * Iterate over the regions table visiting each region.
+ *
+ * If the region table is empty or unpopulated the loop body will not be
+ * executed.
+ */
+#define dm_stats_foreach_region(dms)                           \
+for (dm_stats_walk_init((dms), DM_STATS_WALK_REGION),          \
+     dm_stats_walk_start((dms));                               \
+     !dm_stats_walk_end((dms)); dm_stats_walk_next_region((dms)))
+
+/*
+ * Iterate over the regions table visiting each area.
+ *
+ * If the region table is empty or unpopulated the loop body will not
+ * be executed.
+ */
+#define dm_stats_foreach_area(dms)                             \
+for (dm_stats_walk_init((dms), DM_STATS_WALK_AREA),            \
+     dm_stats_walk_start((dms));                               \
+     !dm_stats_walk_end((dms)); dm_stats_walk_next((dms)))
+
+/*
+ * Iterate over the regions table visiting each group. Metric and
+ * counter methods will return values for the group.
+ *
+ * If the group table is empty or unpopulated the loop body will not
+ * be executed.
+ */
+#define dm_stats_foreach_group(dms)                            \
+for (dm_stats_walk_init((dms), DM_STATS_WALK_GROUP),           \
+     dm_stats_walk_start(dms);                                 \
+     !dm_stats_walk_end(dms);                                  \
+     dm_stats_walk_next(dms))
+
+/*
+ * Start a walk iterating over the regions contained in dm_stats handle
+ * 'dms'.
+ *
+ * The body of the loop should call dm_stats_walk_next() or
+ * dm_stats_walk_next_region() to advance to the next element.
+ *
+ * The loop body is executed at least once even if the stats handle is
+ * empty.
+ */
+#define dm_stats_walk_do(dms)                                  \
+do {                                                           \
+       dm_stats_walk_start((dms));                             \
+       do
+
+/*
+ * Start a 'while' style loop or end a 'do..while' loop iterating over the
+ * regions contained in dm_stats handle 'dms'.
+ */
+#define dm_stats_walk_while(dms)                               \
+       while(!dm_stats_walk_end((dms)));                       \
+} while (0)
+
+/*
+ * Cursor relative property methods
+ *
+ * Calls with the prefix dm_stats_get_current_* operate relative to the
+ * current cursor location, returning properties for the current region
+ * or area of the supplied dm_stats handle.
+ *
+ */
+
+/*
+ * Returns the number of areas (counter sets) contained in the current
+ * region of the supplied dm_stats handle.
+ */
+uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms);
+
+/*
+ * Retrieve the current values of the stats cursor.
+ */
+uint64_t dm_stats_get_current_region(const struct dm_stats *dms);
+uint64_t dm_stats_get_current_area(const struct dm_stats *dms);
+
+/*
+ * Current region properties: size, length & area_len.
+ *
+ * See the comments for the equivalent dm_stats_get_* versions for a
+ * complete description of these methods.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_current_region_start(const struct dm_stats *dms,
+                                     uint64_t *start);
+
+int dm_stats_get_current_region_len(const struct dm_stats *dms,
+                                   uint64_t *len);
+
+int dm_stats_get_current_region_area_len(const struct dm_stats *dms,
+                                        uint64_t *area_len);
+
+/*
+ * Current area properties: start and length.
+ *
+ * See the comments for the equivalent dm_stats_get_* versions for a
+ * complete description of these methods.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_current_area_start(const struct dm_stats *dms,
+                                   uint64_t *start);
+
+int dm_stats_get_current_area_offset(const struct dm_stats *dms,
+                                    uint64_t *offset);
+
+int dm_stats_get_current_area_len(const struct dm_stats *dms,
+                                      uint64_t *start);
+
+/*
+ * Return a pointer to the program_id string for region at the current
+ * cursor location.
+ */
+const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms);
+
+/*
+ * Return a pointer to the user aux_data string for the region at the
+ * current cursor location.
+ */
+const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms);
+
+/*
+ * Statistics groups and data aggregation.
+ */
+
+/*
+ * Create a new group in stats handle dms from the group descriptor
+ * passed in group. The group descriptor is a string containing a list
+ * of region_id values that will be included in the group. The first
+ * region_id found will be the group leader. Ranges of identifiers may
+ * be expressed as "M-N", where M and N are the start and end region_id
+ * values for the range.
+ */
+int dm_stats_create_group(struct dm_stats *dms, const char *group,
+                         const char *alias, uint64_t *group_id);
+
+/*
+ * Remove the specified group_id. If the remove argument is zero the
+ * group will be removed but the regions that it contained will remain.
+ * If remove is non-zero then all regions that belong to the group will
+ * also be removed.
+ */
+int dm_stats_delete_group(struct dm_stats *dms, uint64_t group_id, int remove);
+
+/*
+ * Set an alias for this group or region. The alias will be returned
+ * instead of the normal dm-stats name for this region or group.
+ */
+int dm_stats_set_alias(struct dm_stats *dms, uint64_t group_id,
+                      const char *alias);
+
+/*
+ * Returns a pointer to the currently configured alias for id, or the
+ * name of the dm device the handle is bound to if no alias has been
+ * set. The pointer will be freed automatically when a new alias is set
+ * or when the stats handle is cleared.
+ */
+const char *dm_stats_get_alias(const struct dm_stats *dms, uint64_t id);
+
+#define DM_STATS_GROUP_NONE UINT64_MAX
+/*
+ * Return the group_id that the specified region_id belongs to, or the
+ * special value DM_STATS_GROUP_NONE if the region does not belong
+ * to any group.
+ */
+uint64_t dm_stats_get_group_id(const struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Store a pointer to a string describing the regions that are members
+ * of the group specified by group_id in the memory pointed to by buf.
+ * The string is in the same format as the 'group' argument to
+ * dm_stats_create_group().
+ *
+ * The pointer does not need to be freed explicitly by the caller: it
+ * will become invalid following a subsequent dm_stats_list(),
+ * dm_stats_populate() or dm_stats_destroy() of the corresponding
+ * dm_stats handle.
+ */
+int dm_stats_get_group_descriptor(const struct dm_stats *dms,
+                                 uint64_t group_id, char **buf);
+
+/*
+ * Create regions that correspond to the extents of a file in the
+ * filesystem and optionally place them into a group.
+ *
+ * File descriptor fd must reference a regular file, open for reading,
+ * in a local file system that supports the FIEMAP ioctl, and that
+ * returns data describing the physical location of extents.
+ *
+ * The file descriptor can be closed by the caller following the call
+ * to dm_stats_create_regions_from_fd().
+ *
+ * Unless nogroup is non-zero the regions will be placed into a group
+ * and the group alias set to the value supplied (if alias is NULL no
+ * group alias will be assigned).
+ *
+ * On success the function returns a pointer to an array of uint64_t
+ * containing the IDs of the newly created regions. The region_id
+ * array is terminated by the value DM_STATS_REGION_NOT_PRESENT and
+ * should be freed using dm_free() when no longer required.
+ *
+ * On error NULL is returned.
+ *
+ * Following a call to dm_stats_create_regions_from_fd() the handle
+ * is guaranteed to be in a listed state, and to contain any region
+ * and group identifiers created by the operation.
+ *
+ * The group_id for the new group is equal to the region_id value in
+ * the first array element.
+ */
+uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd,
+                                         int group, int precise,
+                                         struct dm_histogram *bounds,
+                                         const char *alias);
+/*
+ * Update a group of regions that correspond to the extents of a file
+ * in the filesystem, adding and removing regions to account for
+ * allocation changes in the underlying file.
+ *
+ * File descriptor fd must reference a regular file, open for reading,
+ * in a local file system that supports the FIEMAP ioctl, and that
+ * returns data describing the physical location of extents.
+ *
+ * The file descriptor can be closed by the caller following the call
+ * to dm_stats_update_regions_from_fd().
+ *
+ * On success the function returns a pointer to an array of uint64_t
+ * containing the IDs of the updated regions (including any existing
+ * regions that were not modified by the call).
+ *
+ * The region_id array is terminated by the special value
+ * DM_STATS_REGION_NOT_PRESENT and should be freed using dm_free()
+ * when no longer required.
+ *
+ * On error NULL is returned.
+ *
+ * Following a call to dm_stats_update_regions_from_fd() the handle
+ * is guaranteed to be in a listed state, and to contain any region
+ * and group identifiers created by the operation.
+ *
+ * This function cannot be used with file mapped regions that are
+ * not members of a group: either group the regions, or remove them
+ * and re-map them with dm_stats_create_regions_from_fd().
+ */
+uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd,
+                                         uint64_t group_id);
+
+
+/*
+ * The file map monitoring daemon can monitor files in two distinct
+ * ways: the mode affects the behaviour of the daemon when a file
+ * under monitoring is renamed or unlinked, and the conditions which
+ * cause the daemon to terminate.
+ *
+ * In both modes, the daemon will always shut down when the group
+ * being monitored is deleted.
+ *
+ * Follow inode:
+ * The daemon follows the inode of the file, as it was at the time the
+ * daemon started. The file descriptor referencing the file is kept
+ * open at all times, and the daemon will exit when it detects that
+ * the file has been unlinked and it is the last holder of a reference
+ * to the file.
+ *
+ * This mode is useful if the file is expected to be renamed, or moved
+ * within the file system, while it is being monitored.
+ *
+ * Follow path:
+ * The daemon follows the path that was given on the daemon command
+ * line. The file descriptor referencing the file is re-opened on each
+ * iteration of the daemon, and the daemon will exit if no file exists
+ * at this location (a tolerance is allowed so that a brief delay
+ * between unlink() and creat() is permitted).
+ *
+ * This mode is useful if the file is updated by unlinking the original
+ * and placing a new file at the same path.
+ */
+
+typedef enum {
+       DM_FILEMAPD_FOLLOW_INODE,
+       DM_FILEMAPD_FOLLOW_PATH,
+       DM_FILEMAPD_FOLLOW_NONE
+} dm_filemapd_mode_t;
+
+/*
+ * Parse a string representation of a dmfilemapd mode.
+ *
+ * Returns a valid dm_filemapd_mode_t value on success, or
+ * DM_FILEMAPD_FOLLOW_NONE on error.
+ */
+dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str);
+
+/*
+ * Start the dmfilemapd filemap monitoring daemon for the specified
+ * file descriptor, group, and file system path. The daemon will
+ * monitor the file for allocation changes, and when a change is
+ * detected, call dm_stats_update_regions_from_fd() to update the
+ * mapped regions for the file.
+ *
+ * The path provided to dm_stats_start_filemapd() must be an absolute
+ * path, and should reflect the path of 'fd' at the time that it was
+ * opened.
+ *
+ * The mode parameter controls the behaviour of the daemon when the
+ * file being monitored is unlinked or moved: see the comments for
+ * dm_filemapd_mode_t for a full description and possible values.
+ *
+ * The daemon can be stopped at any time by sending SIGTERM to the
+ * daemon pid.
+ */
+int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path,
+                           dm_filemapd_mode_t mode, unsigned foreground,
+                           unsigned verbose);
+
+/*
+ * Call this to actually run the ioctl.
+ */
+int dm_task_run(struct dm_task *dmt);
+
+/*
+ * The errno from the last device-mapper ioctl performed by dm_task_run.
+ */
+int dm_task_get_errno(struct dm_task *dmt);
+
+/*
+ * Call this to make or remove the device nodes associated with previously
+ * issued commands.
+ */
+void dm_task_update_nodes(void);
+
+/*
+ * Mangling support
+ *
+ * Character whitelist: 0-9, A-Z, a-z, #+-.:=@_
+ * HEX mangling format: \xNN, NN being the hex value of the character.
+ * (whitelist and format supported by udev)
+*/
+typedef enum {
+       DM_STRING_MANGLING_NONE, /* do not mangle at all */
+       DM_STRING_MANGLING_AUTO, /* mangle only if not already mangled with hex, error when mixed */
+       DM_STRING_MANGLING_HEX   /* always mangle with hex encoding, no matter what the input is */
+} dm_string_mangling_t;
+
+/*
+ * Set/get mangling mode used for device-mapper names and uuids.
+ */
+int dm_set_name_mangling_mode(dm_string_mangling_t name_mangling);
+dm_string_mangling_t dm_get_name_mangling_mode(void);
+
+/*
+ * Get mangled/unmangled form of the device-mapper name or uuid
+ * irrespective of the global setting (set by dm_set_name_mangling_mode).
+ * The name or uuid returned needs to be freed after use by calling dm_free!
+ */
+char *dm_task_get_name_mangled(const struct dm_task *dmt);
+char *dm_task_get_name_unmangled(const struct dm_task *dmt);
+char *dm_task_get_uuid_mangled(const struct dm_task *dmt);
+char *dm_task_get_uuid_unmangled(const struct dm_task *dmt);
+
+/*
+ * Configure the device-mapper directory
+ */
+int dm_set_dev_dir(const char *dir);
+const char *dm_dir(void);
+
+/*
+ * Configure sysfs directory, /sys by default
+ */
+int dm_set_sysfs_dir(const char *dir);
+const char *dm_sysfs_dir(void);
+
+/*
+ * Configure default UUID prefix string.
+ * Conventionally this is a short capitalised prefix indicating the subsystem
+ * that is managing the devices, e.g. "LVM-" or "MPATH-".
+ * To support stacks of devices from different subsystems, recursive functions
+ * stop recursing if they reach a device with a different prefix.
+ */
+int dm_set_uuid_prefix(const char *uuid_prefix);
+const char *dm_uuid_prefix(void);
+
+/*
+ * Determine whether a major number belongs to device-mapper or not.
+ */
+int dm_is_dm_major(uint32_t major);
+
+/*
+ * Get associated device name for given major and minor number by reading
+ * the sysfs content. If this is a dm device, get associated dm name, the one
+ * that appears in /dev/mapper. DM names could be resolved this way only if
+ * kernel used >= 2.6.29, kernel name is found otherwise (e.g. dm-0).
+ * If prefer_kernel_name is set, the kernel name is always preferred over
+ * device-mapper name for dm devices no matter what the kernel version is.
+ * For non-dm devices, we always get associated kernel name, e.g sda, md0 etc.
+ * Returns 0 on error or if sysfs is not used (or configured incorrectly),
+ * otherwise returns 1 and the supplied buffer holds the device name.
+ */
+int dm_device_get_name(uint32_t major, uint32_t minor,
+                      int prefer_kernel_name,
+                      char *buf, size_t buf_size);
+
+/*
+ * Determine whether a device has any holders (devices
+ * using this device). If sysfs is not used (or configured
+ * incorrectly), returns 0.
+ */
+int dm_device_has_holders(uint32_t major, uint32_t minor);
+
+/*
+ * Determine whether a device contains mounted filesystem.
+ * If sysfs is not used (or configured incorrectly), returns 0.
+ */
+int dm_device_has_mounted_fs(uint32_t major, uint32_t minor);
+
+
+/*
+ * Callback is invoked for individal mountinfo lines,
+ * minor, major and mount target are parsed and unmangled.
+ */
+typedef int (*dm_mountinfo_line_callback_fn) (char *line, unsigned maj, unsigned min,
+                                             char *target, void *cb_data);
+
+/*
+ * Read all lines from /proc/self/mountinfo,
+ * for each line calls read_fn callback.
+ */
+int dm_mountinfo_read(dm_mountinfo_line_callback_fn read_fn, void *cb_data);
+
+/*
+ * Initialise library
+ */
+void dm_lib_init(void) __attribute__((constructor));
+
+/*
+ * Release library resources
+ */
+void dm_lib_release(void);
+void dm_lib_exit(void) __attribute__((destructor));
+
+/* An optimisation for clients making repeated calls involving dm ioctls */
+void dm_hold_control_dev(int hold_open);
+
+/*
+ * Use NULL for all devices.
+ */
+int dm_mknodes(const char *name);
+int dm_driver_version(char *version, size_t size);
+
+/******************************************************
+ * Functions to build and manipulate trees of devices *
+ ******************************************************/
+struct dm_tree;
+struct dm_tree_node;
+
+/*
+ * Initialise an empty dependency tree.
+ *
+ * The tree consists of a root node together with one node for each mapped
+ * device which has child nodes for each device referenced in its table.
+ *
+ * Every node in the tree has one or more children and one or more parents.
+ *
+ * The root node is the parent/child of every node that doesn't have other
+ * parents/children.
+ */
+struct dm_tree *dm_tree_create(void);
+void dm_tree_free(struct dm_tree *tree);
+
+/*
+ * List of suffixes to be ignored when matching uuids against existing devices.
+ */
+void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes);
+
+/*
+ * Add nodes to the tree for a given device and all the devices it uses.
+ */
+int dm_tree_add_dev(struct dm_tree *tree, uint32_t major, uint32_t minor);
+int dm_tree_add_dev_with_udev_flags(struct dm_tree *tree, uint32_t major,
+                                   uint32_t minor, uint16_t udev_flags);
+
+/*
+ * Add a new node to the tree if it doesn't already exist.
+ */
+struct dm_tree_node *dm_tree_add_new_dev(struct dm_tree *tree,
+                                        const char *name,
+                                        const char *uuid,
+                                        uint32_t major, uint32_t minor,
+                                        int read_only,
+                                        int clear_inactive,
+                                        void *context);
+struct dm_tree_node *dm_tree_add_new_dev_with_udev_flags(struct dm_tree *tree,
+                                                        const char *name,
+                                                        const char *uuid,
+                                                        uint32_t major,
+                                                        uint32_t minor,
+                                                        int read_only,
+                                                        int clear_inactive,
+                                                        void *context,
+                                                        uint16_t udev_flags);
+
+/*
+ * Search for a node in the tree.
+ * Set major and minor to 0 or uuid to NULL to get the root node.
+ */
+struct dm_tree_node *dm_tree_find_node(struct dm_tree *tree,
+                                      uint32_t major,
+                                      uint32_t minor);
+struct dm_tree_node *dm_tree_find_node_by_uuid(struct dm_tree *tree,
+                                              const char *uuid);
+
+/*
+ * Use this to walk through all children of a given node.
+ * Set handle to NULL in first call.
+ * Returns NULL after the last child.
+ * Set inverted to use inverted tree.
+ */
+struct dm_tree_node *dm_tree_next_child(void **handle,
+                                       const struct dm_tree_node *parent,
+                                       uint32_t inverted);
+
+/*
+ * Get properties of a node.
+ */
+const char *dm_tree_node_get_name(const struct dm_tree_node *node);
+const char *dm_tree_node_get_uuid(const struct dm_tree_node *node);
+const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node);
+void *dm_tree_node_get_context(const struct dm_tree_node *node);
+/*
+ * Returns  0 when node size and its children is unchanged.
+ * Returns  1 when node or any of its children has increased size.
+ * Rerurns -1 when node or any of its children has reduced size.
+ */
+int dm_tree_node_size_changed(const struct dm_tree_node *dnode);
+
+/*
+ * Returns the number of children of the given node (excluding the root node).
+ * Set inverted for the number of parents.
+ */
+int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted);
+
+/*
+ * Deactivate a device plus all dependencies.
+ * Ignores devices that don't have a uuid starting with uuid_prefix.
+ */
+int dm_tree_deactivate_children(struct dm_tree_node *dnode,
+                               const char *uuid_prefix,
+                               size_t uuid_prefix_len);
+/*
+ * Preload/create a device plus all dependencies.
+ * Ignores devices that don't have a uuid starting with uuid_prefix.
+ */
+int dm_tree_preload_children(struct dm_tree_node *dnode,
+                            const char *uuid_prefix,
+                            size_t uuid_prefix_len);
+
+/*
+ * Resume a device plus all dependencies.
+ * Ignores devices that don't have a uuid starting with uuid_prefix.
+ */
+int dm_tree_activate_children(struct dm_tree_node *dnode,
+                             const char *uuid_prefix,
+                             size_t uuid_prefix_len);
+
+/*
+ * Suspend a device plus all dependencies.
+ * Ignores devices that don't have a uuid starting with uuid_prefix.
+ */
+int dm_tree_suspend_children(struct dm_tree_node *dnode,
+                            const char *uuid_prefix,
+                            size_t uuid_prefix_len);
+
+/*
+ * Skip the filesystem sync when suspending.
+ * Does nothing with other functions.
+ * Use this when no snapshots are involved.
+ */
+void dm_tree_skip_lockfs(struct dm_tree_node *dnode);
+
+/*
+ * Set the 'noflush' flag when suspending devices.
+ * If the kernel supports it, instead of erroring outstanding I/O that
+ * cannot be completed, the I/O is queued and resubmitted when the
+ * device is resumed.  This affects multipath devices when all paths
+ * have failed and queue_if_no_path is set, and mirror devices when
+ * block_on_error is set and the mirror log has failed.
+ */
+void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode);
+
+/*
+ * Retry removal of each device if not successful.
+ */
+void dm_tree_retry_remove(struct dm_tree_node *dnode);
+
+/*
+ * Is the uuid prefix present in the tree?
+ * Only returns 0 if every node was checked successfully.
+ * Returns 1 if the tree walk has to be aborted.
+ */
+int dm_tree_children_use_uuid(struct dm_tree_node *dnode,
+                             const char *uuid_prefix,
+                             size_t uuid_prefix_len);
+
+/*
+ * Construct tables for new nodes before activating them.
+ */
+int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node *dnode,
+                                           uint64_t size,
+                                           const char *origin_uuid);
+int dm_tree_node_add_snapshot_target(struct dm_tree_node *node,
+                                    uint64_t size,
+                                    const char *origin_uuid,
+                                    const char *cow_uuid,
+                                    int persistent,
+                                    uint32_t chunk_size);
+int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node *node,
+                                          uint64_t size,
+                                          const char *origin_uuid,
+                                          const char *cow_uuid,
+                                          const char *merge_uuid,
+                                          uint32_t chunk_size);
+int dm_tree_node_add_error_target(struct dm_tree_node *node,
+                                 uint64_t size);
+int dm_tree_node_add_zero_target(struct dm_tree_node *node,
+                                uint64_t size);
+int dm_tree_node_add_linear_target(struct dm_tree_node *node,
+                                  uint64_t size);
+int dm_tree_node_add_striped_target(struct dm_tree_node *node,
+                                   uint64_t size,
+                                   uint32_t stripe_size);
+
+#define DM_CRYPT_IV_DEFAULT    UINT64_C(-1)    /* iv_offset == seg offset */
+/*
+ * Function accepts one string in cipher specification
+ * (chainmode and iv should be NULL because included in cipher string)
+ *   or
+ * separate arguments which will be joined to "cipher-chainmode-iv"
+ */
+int dm_tree_node_add_crypt_target(struct dm_tree_node *node,
+                                 uint64_t size,
+                                 const char *cipher,
+                                 const char *chainmode,
+                                 const char *iv,
+                                 uint64_t iv_offset,
+                                 const char *key);
+int dm_tree_node_add_mirror_target(struct dm_tree_node *node,
+                                  uint64_t size);
+
+/* Mirror log flags */
+#define DM_NOSYNC              0x00000001      /* Known already in sync */
+#define DM_FORCESYNC           0x00000002      /* Force resync */
+#define DM_BLOCK_ON_ERROR      0x00000004      /* On error, suspend I/O */
+#define DM_CORELOG             0x00000008      /* In-memory log */
+
+int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node,
+                                      uint32_t region_size,
+                                      unsigned clustered,
+                                      const char *log_uuid,
+                                      unsigned area_count,
+                                      uint32_t flags);
+
+int dm_tree_node_add_raid_target(struct dm_tree_node *node,
+                                uint64_t size,
+                                const char *raid_type,
+                                uint32_t region_size,
+                                uint32_t stripe_size,
+                                uint64_t rebuilds,
+                                uint64_t flags);
+
+/*
+ * Defines below are based on kernel's dm-cache.c defines
+ * DM_CACHE_MIN_DATA_BLOCK_SIZE (32 * 1024 >> SECTOR_SHIFT)
+ * DM_CACHE_MAX_DATA_BLOCK_SIZE (1024 * 1024 * 1024 >> SECTOR_SHIFT)
+ */
+#define DM_CACHE_MIN_DATA_BLOCK_SIZE (UINT32_C(64))
+#define DM_CACHE_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152))
+/*
+ * Max supported size for cache pool metadata device.
+ * Limitation is hardcoded into the kernel and bigger device sizes
+ * are not accepted.
+ *
+ * Limit defined in drivers/md/dm-cache-metadata.h
+ */
+#define DM_CACHE_METADATA_MAX_SECTORS DM_THIN_METADATA_MAX_SECTORS
+
+/*
+ * Define number of elements in rebuild and writemostly arrays
+ * 'of struct dm_tree_node_raid_params'.
+ */
+
+struct dm_tree_node_raid_params {
+       const char *raid_type;
+
+       uint32_t stripes;
+       uint32_t mirrors;
+       uint32_t region_size;
+       uint32_t stripe_size;
+
+       /*
+        * 'rebuilds' and 'writemostly' are bitfields that signify
+        * which devices in the array are to be rebuilt or marked
+        * writemostly.  The kernel supports up to 253 legs.
+        * We limit ourselves by choosing a lower value
+        * for DEFAULT_RAID{1}_MAX_IMAGES in defaults.h.
+        */
+       uint64_t rebuilds;
+       uint64_t writemostly;
+       uint32_t writebehind;       /* I/Os (kernel default COUNTER_MAX / 2) */
+       uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */
+       uint32_t max_recovery_rate; /* kB/sec/disk */
+       uint32_t min_recovery_rate; /* kB/sec/disk */
+       uint32_t stripe_cache;      /* sectors */
+
+       uint64_t flags;             /* [no]sync */
+       uint32_t reserved2;
+};
+
+/*
+ * Version 2 of above node raid params struct to keeep API compatibility.
+ *
+ * Extended for more than 64 legs (max 253 in the MD kernel runtime!),
+ * delta_disks for disk add/remove reshaping,
+ * data_offset for out-of-place reshaping
+ * and data_copies for odd number of raid10 legs.
+ */
+#define        RAID_BITMAP_SIZE 4 /* 4 * 64 bit elements in rebuilds/writemostly arrays */
+struct dm_tree_node_raid_params_v2 {
+       const char *raid_type;
+
+       uint32_t stripes;
+       uint32_t mirrors;
+       uint32_t region_size;
+       uint32_t stripe_size;
+
+       int delta_disks; /* +/- number of disks to add/remove (reshaping) */
+       int data_offset; /* data offset to set (out-of-place reshaping) */
+
+       /*
+        * 'rebuilds' and 'writemostly' are bitfields that signify
+        * which devices in the array are to be rebuilt or marked
+        * writemostly.  The kernel supports up to 253 legs.
+        * We limit ourselvs by choosing a lower value
+        * for DEFAULT_RAID_MAX_IMAGES.
+        */
+       uint64_t rebuilds[RAID_BITMAP_SIZE];
+       uint64_t writemostly[RAID_BITMAP_SIZE];
+       uint32_t writebehind;       /* I/Os (kernel default COUNTER_MAX / 2) */
+       uint32_t data_copies;       /* RAID # of data copies */
+       uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */
+       uint32_t max_recovery_rate; /* kB/sec/disk */
+       uint32_t min_recovery_rate; /* kB/sec/disk */
+       uint32_t stripe_cache;      /* sectors */
+
+       uint64_t flags;             /* [no]sync */
+};
+
+int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node,
+                                            uint64_t size,
+                                            const struct dm_tree_node_raid_params *p);
+
+/* Version 2 API function taking dm_tree_node_raid_params_v2 for aforementioned extensions. */
+int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node *node,
+                                               uint64_t size,
+                                               const struct dm_tree_node_raid_params_v2 *p);
+
+/* Cache feature_flags */
+#define DM_CACHE_FEATURE_WRITEBACK    0x00000001
+#define DM_CACHE_FEATURE_WRITETHROUGH 0x00000002
+#define DM_CACHE_FEATURE_PASSTHROUGH  0x00000004
+#define DM_CACHE_FEATURE_METADATA2    0x00000008 /* cache v1.10 */
+
+struct dm_config_node;
+/*
+ * Use for passing cache policy and all its args e.g.:
+ *
+ * policy_settings {
+ *    migration_threshold=2048
+ *    sequention_threashold=100
+ *    ...
+ * }
+ *
+ * For policy without any parameters use NULL.
+ */
+int dm_tree_node_add_cache_target(struct dm_tree_node *node,
+                                 uint64_t size,
+                                 uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
+                                 const char *metadata_uuid,
+                                 const char *data_uuid,
+                                 const char *origin_uuid,
+                                 const char *policy_name,
+                                 const struct dm_config_node *policy_settings,
+                                 uint32_t data_block_size);
+
+/*
+ * FIXME Add individual cache policy pairs  <key> = value, like:
+ * int dm_tree_node_add_cache_policy_arg(struct dm_tree_node *dnode,
+ *                                   const char *key, uint64_t value);
+ */
+
+/*
+ * Replicator operation mode
+ * Note: API for Replicator is not yet stable
+ */
+typedef enum {
+       DM_REPLICATOR_SYNC,                     /* Synchronous replication */
+       DM_REPLICATOR_ASYNC_WARN,               /* Warn if async replicator is slow */
+       DM_REPLICATOR_ASYNC_STALL,              /* Stall replicator if not fast enough */
+       DM_REPLICATOR_ASYNC_DROP,               /* Drop sites out of sync */
+       DM_REPLICATOR_ASYNC_FAIL,               /* Fail replicator if slow */
+       NUM_DM_REPLICATOR_MODES
+} dm_replicator_mode_t;
+
+int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
+                                      uint64_t size,
+                                      const char *rlog_uuid,
+                                      const char *rlog_type,
+                                      unsigned rsite_index,
+                                      dm_replicator_mode_t mode,
+                                      uint32_t async_timeout,
+                                      uint64_t fall_behind_data,
+                                      uint32_t fall_behind_ios);
+
+int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node,
+                                          uint64_t size,
+                                          const char *replicator_uuid, /* Replicator control device */
+                                          uint64_t rdevice_index,
+                                          const char *rdev_uuid,       /* Rimage device name/uuid */
+                                          unsigned rsite_index,
+                                          const char *slog_uuid,
+                                          uint32_t slog_flags,         /* Mirror log flags */
+                                          uint32_t slog_region_size);
+/* End of Replicator API */
+
+/*
+ * FIXME: Defines bellow are based on kernel's dm-thin.c defines
+ * DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
+ * DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
+ */
+#define DM_THIN_MIN_DATA_BLOCK_SIZE (UINT32_C(128))
+#define DM_THIN_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152))
+/*
+ * Max supported size for thin pool  metadata device (17112760320 bytes)
+ * Limitation is hardcoded into the kernel and bigger device size
+ * is not accepted.
+ * drivers/md/dm-thin-metadata.h THIN_METADATA_MAX_SECTORS
+ */
+#define DM_THIN_MAX_METADATA_SIZE   (UINT64_C(255) * (1 << 14) * (4096 / (1 << 9)) - 256 * 1024)
+
+int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node,
+                                     uint64_t size,
+                                     uint64_t transaction_id,
+                                     const char *metadata_uuid,
+                                     const char *pool_uuid,
+                                     uint32_t data_block_size,
+                                     uint64_t low_water_mark,
+                                     unsigned skip_block_zeroing);
+
+/* Supported messages for thin provision target */
+typedef enum {
+       DM_THIN_MESSAGE_CREATE_SNAP,            /* device_id, origin_id */
+       DM_THIN_MESSAGE_CREATE_THIN,            /* device_id */
+       DM_THIN_MESSAGE_DELETE,                 /* device_id */
+       DM_THIN_MESSAGE_SET_TRANSACTION_ID,     /* current_id, new_id */
+       DM_THIN_MESSAGE_RESERVE_METADATA_SNAP,  /* target version >= 1.1 */
+       DM_THIN_MESSAGE_RELEASE_METADATA_SNAP,  /* target version >= 1.1 */
+} dm_thin_message_t;
+
+int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node,
+                                      dm_thin_message_t type,
+                                      uint64_t id1, uint64_t id2);
+
+/*
+ * Set thin pool discard features
+ *   ignore      - Disable support for discards
+ *   no_passdown - Don't pass discards down to underlying data device,
+ *                 just remove the mapping
+ * Feature is available since version 1.1 of the thin target.
+ */
+int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node,
+                                      unsigned ignore,
+                                      unsigned no_passdown);
+/*
+ * Set error if no space, instead of queueing for thin pool.
+ */
+int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node,
+                                                unsigned error_if_no_space);
+/* Start thin pool with metadata in read-only mode */
+int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node,
+                                        unsigned read_only);
+/*
+ * FIXME: Defines bellow are based on kernel's dm-thin.c defines
+ * MAX_DEV_ID ((1 << 24) - 1)
+ */
+#define DM_THIN_MAX_DEVICE_ID (UINT32_C((1 << 24) - 1))
+int dm_tree_node_add_thin_target(struct dm_tree_node *node,
+                                uint64_t size,
+                                const char *pool_uuid,
+                                uint32_t device_id);
+
+int dm_tree_node_set_thin_external_origin(struct dm_tree_node *node,
+                                         const char *external_uuid);
+
+void dm_tree_node_set_udev_flags(struct dm_tree_node *node, uint16_t udev_flags);
+
+void dm_tree_node_set_presuspend_node(struct dm_tree_node *node,
+                                     struct dm_tree_node *presuspend_node);
+
+int dm_tree_node_add_target_area(struct dm_tree_node *node,
+                                   const char *dev_name,
+                                   const char *dlid,
+                                   uint64_t offset);
+
+/*
+ * Only for temporarily-missing raid devices where changes are tracked.
+ */
+int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset);
+
+/*
+ * Set readahead (in sectors) after loading the node.
+ */
+void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode,
+                                uint32_t read_ahead,
+                                uint32_t read_ahead_flags);
+
+/*
+ * Set node callback hook before de/activation.
+ * Callback is called before 'activation' of node for activation tree,
+ * or 'deactivation' of node for deactivation tree.
+ */
+typedef enum {
+       DM_NODE_CALLBACK_PRELOADED,   /* Node has preload deps */
+       DM_NODE_CALLBACK_DEACTIVATED, /* Node is deactivated */
+} dm_node_callback_t;
+typedef int (*dm_node_callback_fn) (struct dm_tree_node *node,
+                                   dm_node_callback_t type, void *cb_data);
+void dm_tree_node_set_callback(struct dm_tree_node *node,
+                              dm_node_callback_fn cb, void *cb_data);
+
+void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie);
+uint32_t dm_tree_get_cookie(struct dm_tree_node *node);
+
+/*****************************************************************************
+ * Library functions
+ *****************************************************************************/
+
+/*******************
+ * Memory management
+ *******************/
+
+/*
+ * Never use these functions directly - use the macros following instead.
+ */
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
+       __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line)
+       __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
+       __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line)
+       __attribute__((__warn_unused_result__));
+void dm_free_wrapper(void *ptr);
+char *dm_strdup_wrapper(const char *s, const char *file, int line)
+       __attribute__((__warn_unused_result__));
+int dm_dump_memory_wrapper(void);
+void dm_bounds_check_wrapper(void);
+
+#define dm_malloc(s) dm_malloc_wrapper((s), __FILE__, __LINE__)
+#define dm_malloc_aligned(s, a) dm_malloc_aligned_wrapper((s), (a),  __FILE__, __LINE__)
+#define dm_zalloc(s) dm_zalloc_wrapper((s), __FILE__, __LINE__)
+#define dm_strdup(s) dm_strdup_wrapper((s), __FILE__, __LINE__)
+#define dm_free(p) dm_free_wrapper(p)
+#define dm_realloc(p, s) dm_realloc_wrapper((p), (s), __FILE__, __LINE__)
+#define dm_dump_memory() dm_dump_memory_wrapper()
+#define dm_bounds_check() dm_bounds_check_wrapper()
+
+/*
+ * The pool allocator is useful when you are going to allocate
+ * lots of memory, use the memory for a bit, and then free the
+ * memory in one go.  A surprising amount of code has this usage
+ * profile.
+ *
+ * You should think of the pool as an infinite, contiguous chunk
+ * of memory.  The front of this chunk of memory contains
+ * allocated objects, the second half is free.  dm_pool_alloc grabs
+ * the next 'size' bytes from the free half, in effect moving it
+ * into the allocated half.  This operation is very efficient.
+ *
+ * dm_pool_free frees the allocated object *and* all objects
+ * allocated after it.  It is important to note this semantic
+ * difference from malloc/free.  This is also extremely
+ * efficient, since a single dm_pool_free can dispose of a large
+ * complex object.
+ *
+ * dm_pool_destroy frees all allocated memory.
+ *
+ * eg, If you are building a binary tree in your program, and
+ * know that you are only ever going to insert into your tree,
+ * and not delete (eg, maintaining a symbol table for a
+ * compiler).  You can create yourself a pool, allocate the nodes
+ * from it, and when the tree becomes redundant call dm_pool_destroy
+ * (no nasty iterating through the tree to free nodes).
+ *
+ * eg, On the other hand if you wanted to repeatedly insert and
+ * remove objects into the tree, you would be better off
+ * allocating the nodes from a free list; you cannot free a
+ * single arbitrary node with pool.
+ */
+
+struct dm_pool;
+
+/* constructor and destructor */
+struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint)
+       __attribute__((__warn_unused_result__));
+void dm_pool_destroy(struct dm_pool *p);
+
+/* simple allocation/free routines */
+void *dm_pool_alloc(struct dm_pool *p, size_t s)
+       __attribute__((__warn_unused_result__));
+void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment)
+       __attribute__((__warn_unused_result__));
+void dm_pool_empty(struct dm_pool *p);
+void dm_pool_free(struct dm_pool *p, void *ptr);
+
+/*
+ * To aid debugging, a pool can be locked. Any modifications made
+ * to the content of the pool while it is locked can be detected.
+ * Default compilation is using a crc checksum to notice modifications.
+ * The pool locking is using the mprotect with the compilation flag
+ * DEBUG_ENFORCE_POOL_LOCKING to enforce the memory protection.
+ */
+/* query pool lock status */
+int dm_pool_locked(struct dm_pool *p);
+/* mark pool as locked */
+int dm_pool_lock(struct dm_pool *p, int crc)
+       __attribute__((__warn_unused_result__));
+/* mark pool as unlocked */
+int dm_pool_unlock(struct dm_pool *p, int crc)
+       __attribute__((__warn_unused_result__));
+
+/*
+ * Object building routines:
+ *
+ * These allow you to 'grow' an object, useful for
+ * building strings, or filling in dynamic
+ * arrays.
+ *
+ * It's probably best explained with an example:
+ *
+ * char *build_string(struct dm_pool *mem)
+ * {
+ *      int i;
+ *      char buffer[16];
+ *
+ *      if (!dm_pool_begin_object(mem, 128))
+ *              return NULL;
+ *
+ *      for (i = 0; i < 50; i++) {
+ *              snprintf(buffer, sizeof(buffer), "%d, ", i);
+ *              if (!dm_pool_grow_object(mem, buffer, 0))
+ *                      goto bad;
+ *      }
+ *
+ *     // add null
+ *      if (!dm_pool_grow_object(mem, "\0", 1))
+ *              goto bad;
+ *
+ *      return dm_pool_end_object(mem);
+ *
+ * bad:
+ *
+ *      dm_pool_abandon_object(mem);
+ *      return NULL;
+ *}
+ *
+ * So start an object by calling dm_pool_begin_object
+ * with a guess at the final object size - if in
+ * doubt make the guess too small.
+ *
+ * Then append chunks of data to your object with
+ * dm_pool_grow_object.  Finally get your object with
+ * a call to dm_pool_end_object.
+ *
+ * Setting delta to 0 means it will use strlen(extra).
+ */
+int dm_pool_begin_object(struct dm_pool *p, size_t hint);
+int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta);
+void *dm_pool_end_object(struct dm_pool *p);
+void dm_pool_abandon_object(struct dm_pool *p);
+
+/* utilities */
+char *dm_pool_strdup(struct dm_pool *p, const char *str)
+       __attribute__((__warn_unused_result__));
+char *dm_pool_strndup(struct dm_pool *p, const char *str, size_t n)
+       __attribute__((__warn_unused_result__));
+void *dm_pool_zalloc(struct dm_pool *p, size_t s)
+       __attribute__((__warn_unused_result__));
+
+/******************
+ * bitset functions
+ ******************/
+
+typedef uint32_t *dm_bitset_t;
+
+dm_bitset_t dm_bitset_create(struct dm_pool *mem, unsigned num_bits);
+void dm_bitset_destroy(dm_bitset_t bs);
+
+int dm_bitset_equal(dm_bitset_t in1, dm_bitset_t in2);
+
+void dm_bit_and(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2);
+void dm_bit_union(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2);
+int dm_bit_get_first(dm_bitset_t bs);
+int dm_bit_get_next(dm_bitset_t bs, int last_bit);
+int dm_bit_get_last(dm_bitset_t bs);
+int dm_bit_get_prev(dm_bitset_t bs, int last_bit);
+
+#define DM_BITS_PER_INT (sizeof(int) * CHAR_BIT)
+
+#define dm_bit(bs, i) \
+   ((bs)[((i) / DM_BITS_PER_INT) + 1] & (0x1 << ((i) & (DM_BITS_PER_INT - 1))))
+
+#define dm_bit_set(bs, i) \
+   ((bs)[((i) / DM_BITS_PER_INT) + 1] |= (0x1 << ((i) & (DM_BITS_PER_INT - 1))))
+
+#define dm_bit_clear(bs, i) \
+   ((bs)[((i) / DM_BITS_PER_INT) + 1] &= ~(0x1 << ((i) & (DM_BITS_PER_INT - 1))))
+
+#define dm_bit_set_all(bs) \
+   memset((bs) + 1, -1, ((*(bs) / DM_BITS_PER_INT) + 1) * sizeof(int))
+
+#define dm_bit_clear_all(bs) \
+   memset((bs) + 1, 0, ((*(bs) / DM_BITS_PER_INT) + 1) * sizeof(int))
+
+#define dm_bit_copy(bs1, bs2) \
+   memcpy((bs1) + 1, (bs2) + 1, ((*(bs2) / DM_BITS_PER_INT) + 1) * sizeof(int))
+
+/*
+ * Parse a string representation of a bitset into a dm_bitset_t. The
+ * notation used is identical to the kernel bitmap parser (cpuset etc.)
+ * and supports both lists ("1,2,3") and ranges ("1-2,5-8"). If the mem
+ * parameter is NULL memory for the bitset will be allocated using
+ * dm_malloc(). Otherwise the bitset will be allocated using the supplied
+ * dm_pool.
+ */
+dm_bitset_t dm_bitset_parse_list(const char *str, struct dm_pool *mem,
+                                size_t min_num_bits);
+
+/* Returns number of set bits */
+static inline unsigned hweight32(uint32_t i)
+{
+       unsigned r = (i & 0x55555555) + ((i >> 1) & 0x55555555);
+
+       r =    (r & 0x33333333) + ((r >>  2) & 0x33333333);
+       r =    (r & 0x0F0F0F0F) + ((r >>  4) & 0x0F0F0F0F);
+       r =    (r & 0x00FF00FF) + ((r >>  8) & 0x00FF00FF);
+       return (r & 0x0000FFFF) + ((r >> 16) & 0x0000FFFF);
+}
+
+/****************
+ * hash functions
+ ****************/
+
+struct dm_hash_table;
+struct dm_hash_node;
+
+typedef void (*dm_hash_iterate_fn) (void *data);
+
+struct dm_hash_table *dm_hash_create(unsigned size_hint)
+       __attribute__((__warn_unused_result__));
+void dm_hash_destroy(struct dm_hash_table *t);
+void dm_hash_wipe(struct dm_hash_table *t);
+
+void *dm_hash_lookup(struct dm_hash_table *t, const char *key);
+int dm_hash_insert(struct dm_hash_table *t, const char *key, void *data);
+void dm_hash_remove(struct dm_hash_table *t, const char *key);
+
+void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key, uint32_t len);
+int dm_hash_insert_binary(struct dm_hash_table *t, const void *key, uint32_t len,
+                         void *data);
+void dm_hash_remove_binary(struct dm_hash_table *t, const void *key, uint32_t len);
+
+unsigned dm_hash_get_num_entries(struct dm_hash_table *t);
+void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f);
+
+char *dm_hash_get_key(struct dm_hash_table *t, struct dm_hash_node *n);
+void *dm_hash_get_data(struct dm_hash_table *t, struct dm_hash_node *n);
+struct dm_hash_node *dm_hash_get_first(struct dm_hash_table *t);
+struct dm_hash_node *dm_hash_get_next(struct dm_hash_table *t, struct dm_hash_node *n);
+
+/*
+ * dm_hash_insert() replaces the value of an existing
+ * entry with a matching key if one exists.  Otherwise
+ * it adds a new entry.
+ *
+ * dm_hash_insert_with_val() inserts a new entry if
+ * another entry with the same key already exists.
+ * val_len is the size of the data being inserted.
+ *
+ * If two entries with the same key exist,
+ * (added using dm_hash_insert_allow_multiple), then:
+ * . dm_hash_lookup() returns the first one it finds, and
+ *   dm_hash_lookup_with_val() returns the one with a matching
+ *   val_len/val.
+ * . dm_hash_remove() removes the first one it finds, and
+ *   dm_hash_remove_with_val() removes the one with a matching
+ *   val_len/val.
+ *
+ * If a single entry with a given key exists, and it has
+ * zero val_len, then:
+ * . dm_hash_lookup() returns it
+ * . dm_hash_lookup_with_val(val_len=0) returns it
+ * . dm_hash_remove() removes it
+ * . dm_hash_remove_with_val(val_len=0) removes it
+ *
+ * dm_hash_lookup_with_count() is a single call that will
+ * both lookup a key's value and check if there is more
+ * than one entry with the given key.
+ *
+ * (It is not meant to retrieve all the entries with the
+ * given key.  In the common case where a single entry exists
+ * for the key, it is useful to have a single call that will
+ * both look up the value and indicate if multiple values
+ * exist for the key.)
+ *
+ * dm_hash_lookup_with_count:
+ * . If no entries exist, the function returns NULL, and
+ *   the count is set to 0.
+ * . If only one entry exists, the value of that entry is
+ *   returned and count is set to 1.
+ * . If N entries exists, the value of the first entry is
+ *   returned and count is set to N.
+ */
+
+void *dm_hash_lookup_with_val(struct dm_hash_table *t, const char *key,
+                              const void *val, uint32_t val_len);
+void dm_hash_remove_with_val(struct dm_hash_table *t, const char *key,
+                             const void *val, uint32_t val_len);
+int dm_hash_insert_allow_multiple(struct dm_hash_table *t, const char *key,
+                                  const void *val, uint32_t val_len);
+void *dm_hash_lookup_with_count(struct dm_hash_table *t, const char *key, int *count);
+
+
+#define dm_hash_iterate(v, h) \
+       for (v = dm_hash_get_first((h)); v; \
+            v = dm_hash_get_next((h), v))
+
+/****************
+ * list functions
+ ****************/
+
+/*
+ * A list consists of a list head plus elements.
+ * Each element has 'next' and 'previous' pointers.
+ * The list head's pointers point to the first and the last element.
+ */
+
+struct dm_list {
+       struct dm_list *n, *p;
+};
+
+/*
+ * String list.
+ */
+struct dm_str_list {
+       struct dm_list list;
+       const char *str;
+};
+
+/*
+ * Initialise a list before use.
+ * The list head's next and previous pointers point back to itself.
+ */
+#define DM_LIST_HEAD_INIT(name)         { &(name), &(name) }
+#define DM_LIST_INIT(name)     struct dm_list name = DM_LIST_HEAD_INIT(name)
+void dm_list_init(struct dm_list *head);
+
+/*
+ * Insert an element before 'head'.
+ * If 'head' is the list head, this adds an element to the end of the list.
+ */
+void dm_list_add(struct dm_list *head, struct dm_list *elem);
+
+/*
+ * Insert an element after 'head'.
+ * If 'head' is the list head, this adds an element to the front of the list.
+ */
+void dm_list_add_h(struct dm_list *head, struct dm_list *elem);
+
+/*
+ * Delete an element from its list.
+ * Note that this doesn't change the element itself - it may still be safe
+ * to follow its pointers.
+ */
+void dm_list_del(struct dm_list *elem);
+
+/*
+ * Remove an element from existing list and insert before 'head'.
+ */
+void dm_list_move(struct dm_list *head, struct dm_list *elem);
+
+/*
+ * Join 'head1' to the end of 'head'.
+ */
+void dm_list_splice(struct dm_list *head, struct dm_list *head1);
+
+/*
+ * Is the list empty?
+ */
+int dm_list_empty(const struct dm_list *head);
+
+/*
+ * Is this the first element of the list?
+ */
+int dm_list_start(const struct dm_list *head, const struct dm_list *elem);
+
+/*
+ * Is this the last element of the list?
+ */
+int dm_list_end(const struct dm_list *head, const struct dm_list *elem);
+
+/*
+ * Return first element of the list or NULL if empty
+ */
+struct dm_list *dm_list_first(const struct dm_list *head);
+
+/*
+ * Return last element of the list or NULL if empty
+ */
+struct dm_list *dm_list_last(const struct dm_list *head);
+
+/*
+ * Return the previous element of the list, or NULL if we've reached the start.
+ */
+struct dm_list *dm_list_prev(const struct dm_list *head, const struct dm_list *elem);
+
+/*
+ * Return the next element of the list, or NULL if we've reached the end.
+ */
+struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *elem);
+
+/*
+ * Given the address v of an instance of 'struct dm_list' called 'head'
+ * contained in a structure of type t, return the containing structure.
+ */
+#define dm_list_struct_base(v, t, head) \
+    ((t *)((const char *)(v) - (const char *)&((t *) 0)->head))
+
+/*
+ * Given the address v of an instance of 'struct dm_list list' contained in
+ * a structure of type t, return the containing structure.
+ */
+#define dm_list_item(v, t) dm_list_struct_base((v), t, list)
+
+/*
+ * Given the address v of one known element e in a known structure of type t,
+ * return another element f.
+ */
+#define dm_struct_field(v, t, e, f) \
+    (((t *)((uintptr_t)(v) - (uintptr_t)&((t *) 0)->e))->f)
+
+/*
+ * Given the address v of a known element e in a known structure of type t,
+ * return the list head 'list'
+ */
+#define dm_list_head(v, t, e) dm_struct_field(v, t, e, list)
+
+/*
+ * Set v to each element of a list in turn.
+ */
+#define dm_list_iterate(v, head) \
+       for (v = (head)->n; v != head; v = v->n)
+
+/*
+ * Set v to each element in a list in turn, starting from the element
+ * in front of 'start'.
+ * You can use this to 'unwind' a list_iterate and back out actions on
+ * already-processed elements.
+ * If 'start' is 'head' it walks the list backwards.
+ */
+#define dm_list_uniterate(v, head, start) \
+       for (v = (start)->p; v != head; v = v->p)
+
+/*
+ * A safe way to walk a list and delete and free some elements along
+ * the way.
+ * t must be defined as a temporary variable of the same type as v.
+ */
+#define dm_list_iterate_safe(v, t, head) \
+       for (v = (head)->n, t = v->n; v != head; v = t, t = v->n)
+
+/*
+ * Walk a list, setting 'v' in turn to the containing structure of each item.
+ * The containing structure should be the same type as 'v'.
+ * The 'struct dm_list' variable within the containing structure is 'field'.
+ */
+#define dm_list_iterate_items_gen(v, head, field) \
+       for (v = dm_list_struct_base((head)->n, __typeof__(*v), field); \
+            &v->field != (head); \
+            v = dm_list_struct_base(v->field.n, __typeof__(*v), field))
+
+/*
+ * Walk a list, setting 'v' in turn to the containing structure of each item.
+ * The containing structure should be the same type as 'v'.
+ * The list should be 'struct dm_list list' within the containing structure.
+ */
+#define dm_list_iterate_items(v, head) dm_list_iterate_items_gen(v, (head), list)
+
+/*
+ * Walk a list, setting 'v' in turn to the containing structure of each item.
+ * The containing structure should be the same type as 'v'.
+ * The 'struct dm_list' variable within the containing structure is 'field'.
+ * t must be defined as a temporary variable of the same type as v.
+ */
+#define dm_list_iterate_items_gen_safe(v, t, head, field) \
+       for (v = dm_list_struct_base((head)->n, __typeof__(*v), field), \
+            t = dm_list_struct_base(v->field.n, __typeof__(*v), field); \
+            &v->field != (head); \
+            v = t, t = dm_list_struct_base(v->field.n, __typeof__(*v), field))
+/*
+ * Walk a list, setting 'v' in turn to the containing structure of each item.
+ * The containing structure should be the same type as 'v'.
+ * The list should be 'struct dm_list list' within the containing structure.
+ * t must be defined as a temporary variable of the same type as v.
+ */
+#define dm_list_iterate_items_safe(v, t, head) \
+       dm_list_iterate_items_gen_safe(v, t, (head), list)
+
+/*
+ * Walk a list backwards, setting 'v' in turn to the containing structure
+ * of each item.
+ * The containing structure should be the same type as 'v'.
+ * The 'struct dm_list' variable within the containing structure is 'field'.
+ */
+#define dm_list_iterate_back_items_gen(v, head, field) \
+       for (v = dm_list_struct_base((head)->p, __typeof__(*v), field); \
+            &v->field != (head); \
+            v = dm_list_struct_base(v->field.p, __typeof__(*v), field))
+
+/*
+ * Walk a list backwards, setting 'v' in turn to the containing structure
+ * of each item.
+ * The containing structure should be the same type as 'v'.
+ * The list should be 'struct dm_list list' within the containing structure.
+ */
+#define dm_list_iterate_back_items(v, head) dm_list_iterate_back_items_gen(v, (head), list)
+
+/*
+ * Return the number of elements in a list by walking it.
+ */
+unsigned int dm_list_size(const struct dm_list *head);
+
+/*********
+ * selinux
+ *********/
+
+/*
+ * Obtain SELinux security context assigned for the path and set this
+ * context for creating a new file system object. This security context
+ * is global and it is used until reset to default policy behaviour
+ * by calling 'dm_prepare_selinux_context(NULL, 0)'.
+ */
+int dm_prepare_selinux_context(const char *path, mode_t mode);
+/*
+ * Set SELinux context for existing file system object.
+ */
+int dm_set_selinux_context(const char *path, mode_t mode);
+
+/*********************
+ * string manipulation
+ *********************/
+
+/*
+ * Break up the name of a mapped device into its constituent
+ * Volume Group, Logical Volume and Layer (if present).
+ * If mem is supplied, the result is allocated from the mempool.
+ * Otherwise the strings are changed in situ.
+ */
+int dm_split_lvm_name(struct dm_pool *mem, const char *dmname,
+                     char **vgname, char **lvname, char **layer);
+
+/*
+ * Destructively split buffer into NULL-separated words in argv.
+ * Returns number of words.
+ */
+int dm_split_words(char *buffer, unsigned max,
+                  unsigned ignore_comments, /* Not implemented */
+                  char **argv);
+
+/*
+ * Returns -1 if buffer too small
+ */
+int dm_snprintf(char *buf, size_t bufsize, const char *format, ...)
+    __attribute__ ((format(printf, 3, 4)));
+
+/*
+ * Returns pointer to the last component of the path.
+ */
+const char *dm_basename(const char *path);
+
+/*
+ * Returns number of occurrences of 'c' in 'str' of length 'size'.
+ */
+unsigned dm_count_chars(const char *str, size_t len, const int c);
+
+/*
+ * Length of string after escaping double quotes and backslashes.
+ */
+size_t dm_escaped_len(const char *str);
+
+/*
+ * <vg>-<lv>-<layer> or if !layer just <vg>-<lv>.
+ */
+char *dm_build_dm_name(struct dm_pool *mem, const char *vgname,
+                      const char *lvname, const char *layer);
+char *dm_build_dm_uuid(struct dm_pool *mem, const char *prefix, const char *lvid, const char *layer);
+
+/*
+ * Copies a string, quoting double quotes with backslashes.
+ */
+char *dm_escape_double_quotes(char *out, const char *src);
+
+/*
+ * Undo quoting in situ.
+ */
+void dm_unescape_double_quotes(char *src);
+
+/*
+ * Unescape colons and "at" signs in situ and save the substrings
+ * starting at the position of the first unescaped colon and the
+ * first unescaped "at" sign. This is normally used to unescape
+ * device names used as PVs.
+ */
+void dm_unescape_colons_and_at_signs(char *src,
+                                    char **substr_first_unquoted_colon,
+                                    char **substr_first_unquoted_at_sign);
+
+/*
+ * Replacement for strncpy() function.
+ *
+ * Copies no more than n bytes from string pointed by src to the buffer
+ * pointed by dest and ensure string is finished with '\0'.
+ * Returns 0 if the whole string does not fit.
+ */
+int dm_strncpy(char *dest, const char *src, size_t n);
+
+/*
+ * Recognize unit specifier in the 'units' arg and return a factor
+ * representing that unit. If the 'units' contains a prefix with digits,
+ * the 'units' is considered to be a custom unit.
+ *
+ * Also, set 'unit_type' output arg to the character that represents
+ * the unit specified. The 'unit_type' character equals to the unit
+ * character itself recognized in the 'units' arg for canonical units.
+ * Otherwise, the 'unit_type' character is set to 'U' for custom unit.
+ *
+ * An example for k/K canonical units and 8k/8K custom units:
+ *
+ *   units  unit_type  return value (factor)
+ *   k      k          1024
+ *   K      K          1000
+ *   8k     U          1024*8
+ *   8K     U          1000*8
+ *   etc...
+ *
+ * Recognized units:
+ *
+ *   h/H - human readable (returns 1 for both)
+ *   b/B - byte (returns 1 for both)
+ *   s/S - sector (returns 512 for both)
+ *   k/K - kilo (returns 1024/1000 respectively)
+ *   m/M - mega (returns 1024^2/1000^2 respectively)
+ *   g/G - giga (returns 1024^3/1000^3 respectively)
+ *   t/T - tera (returns 1024^4/1000^4 respectively)
+ *   p/P - peta (returns 1024^5/1000^5 respectively)
+ *   e/E - exa (returns 1024^6/1000^6 respectively)
+ *
+ * Only one units character is allowed in the 'units' arg
+ * if strict mode is enabled by 'strict' arg.
+ *
+ * The 'endptr' output arg, if not NULL, saves the pointer
+ * in the 'units' string which follows the unit specifier
+ * recognized (IOW the position where the parsing of the
+ * unit specifier stopped).
+ *
+ * Returns the unit factor or 0 if no unit is recognized.
+ */
+uint64_t dm_units_to_factor(const char *units, char *unit_type,
+                           int strict, const char **endptr);
+
+/*
+ * Type of unit specifier used by dm_size_to_string().
+ */
+typedef enum {
+       DM_SIZE_LONG = 0,       /* Megabyte */
+       DM_SIZE_SHORT = 1,      /* MB or MiB */
+       DM_SIZE_UNIT = 2        /* M or m */
+} dm_size_suffix_t;
+
+/*
+ * Convert a size (in 512-byte sectors) into a printable string using units of unit_type.
+ * An upper-case unit_type indicates output units based on powers of 1000 are
+ * required; a lower-case unit_type indicates powers of 1024.
+ * For correct operation, unit_factor must be one of:
+ *     0 - the correct value will be calculated internally;
+ *   or the output from dm_units_to_factor() corresponding to unit_type;
+ *   or 'u' or 'U', an arbitrary number of bytes to use as the power base.
+ * Set include_suffix to 1 to include a suffix of suffix_type.
+ * Set use_si_units to 0 for suffixes that don't distinguish between 1000 and 1024.
+ * Set use_si_units to 1 for a suffix that does distinguish.
+ */
+const char *dm_size_to_string(struct dm_pool *mem, uint64_t size,
+                             char unit_type, int use_si_units,
+                             uint64_t unit_factor, int include_suffix,
+                             dm_size_suffix_t suffix_type);
+
+/**************************
+ * file/stream manipulation
+ **************************/
+
+/*
+ * Create a directory (with parent directories if necessary).
+ * Returns 1 on success, 0 on failure.
+ */
+int dm_create_dir(const char *dir);
+
+int dm_is_empty_dir(const char *dir);
+
+/*
+ * Close a stream, with nicer error checking than fclose's.
+ * Derived from gnulib's close-stream.c.
+ *
+ * Close "stream".  Return 0 if successful, and EOF (setting errno)
+ * otherwise.  Upon failure, set errno to 0 if the error number
+ * cannot be determined.  Useful mainly for writable streams.
+ */
+int dm_fclose(FILE *stream);
+
+/*
+ * Returns size of a buffer which is allocated with dm_malloc.
+ * Pointer to the buffer is stored in *buf.
+ * Returns -1 on failure leaving buf undefined.
+ */
+int dm_asprintf(char **buf, const char *format, ...)
+    __attribute__ ((format(printf, 2, 3)));
+int dm_vasprintf(char **buf, const char *format, va_list ap)
+    __attribute__ ((format(printf, 2, 0)));
+
+/*
+ * create lockfile (pidfile) - create and lock a lock file
+ * @lockfile: location of lock file
+ *
+ * Returns: 1 on success, 0 otherwise, errno is handled internally
+ */
+int dm_create_lockfile(const char* lockfile);
+
+/*
+ * Query whether a daemon is running based on its lockfile
+ *
+ * Returns: 1 if running, 0 if not
+ */
+int dm_daemon_is_running(const char* lockfile);
+
+/*********************
+ * regular expressions
+ *********************/
+struct dm_regex;
+
+/*
+ * Initialise an array of num patterns for matching.
+ * Uses memory from mem.
+ */
+struct dm_regex *dm_regex_create(struct dm_pool *mem, const char * const *patterns,
+                                unsigned num_patterns);
+
+/*
+ * Match string s against the patterns.
+ * Returns the index of the highest pattern in the array that matches,
+ * or -1 if none match.
+ */
+int dm_regex_match(struct dm_regex *regex, const char *s);
+
+/*
+ * This is useful for regression testing only.  The idea is if two
+ * fingerprints are different, then the two dfas are certainly not
+ * isomorphic.  If two fingerprints _are_ the same then it's very likely
+ * that the dfas are isomorphic.
+ *
+ * This function must be called before any matching is done.
+ */
+uint32_t dm_regex_fingerprint(struct dm_regex *regex);
+
+/******************
+ * percent handling
+ ******************/
+/*
+ * A fixed-point representation of percent values. One percent equals to
+ * DM_PERCENT_1 as defined below. Values that are not multiples of DM_PERCENT_1
+ * represent fractions, with precision of 1/1000000 of a percent. See
+ * dm_percent_to_float for a conversion to a floating-point representation.
+ *
+ * You should always use dm_make_percent when building dm_percent_t values. The
+ * implementation of dm_make_percent is biased towards the middle: it ensures that
+ * the result is DM_PERCENT_0 or DM_PERCENT_100 if and only if this is the actual
+ * value -- it never rounds any intermediate value (> 0 or < 100) to either 0
+ * or 100.
+*/
+#define DM_PERCENT_CHAR '%'
+
+typedef enum {
+       DM_PERCENT_0 = 0,
+       DM_PERCENT_1 = 1000000,
+       DM_PERCENT_100 = 100 * DM_PERCENT_1,
+       DM_PERCENT_INVALID = -1,
+       DM_PERCENT_FAILED = -2
+} dm_percent_range_t;
+
+typedef int32_t dm_percent_t;
+
+float dm_percent_to_float(dm_percent_t percent);
+/*
+ * Return adjusted/rounded float for better percent value printing.
+ * Function ensures for given precision of digits:
+ * 100.0% returns only when the value is DM_PERCENT_100
+ *        for close smaller values rounds to nearest smaller value
+ * 0.0% returns only for value DM_PERCENT_0
+ *        for close bigger values rounds to nearest bigger value
+ * In all other cases returns same value as dm_percent_to_float()
+ */
+float dm_percent_to_round_float(dm_percent_t percent, unsigned digits);
+dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator);
+
+/********************
+ * timestamp handling
+ ********************/
+
+/*
+ * Create a dm_timestamp object to use with dm_timestamp_get.
+ */
+struct dm_timestamp *dm_timestamp_alloc(void);
+
+/*
+ * Update dm_timestamp object to represent the current time.
+ */
+int dm_timestamp_get(struct dm_timestamp *ts);
+
+/*
+ * Copy a timestamp from ts_old to ts_new.
+ */
+void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old);
+
+/*
+ * Compare two timestamps.
+ *
+ * Return: -1 if ts1 is less than ts2
+ *         0 if ts1 is equal to ts2
+ *          1 if ts1 is greater than ts2
+ */
+int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2);
+
+/*
+ * Return the absolute difference in nanoseconds between
+ * the dm_timestamp objects ts1 and ts2.
+ *
+ * Callers that need to know whether ts1 is before, equal to, or after ts2
+ * in addition to the magnitude should use dm_timestamp_compare.
+ */
+uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2);
+
+/*
+ * Destroy a dm_timestamp object.
+ */
+void dm_timestamp_destroy(struct dm_timestamp *ts);
+
+/*********************
+ * reporting functions
+ *********************/
+
+struct dm_report_object_type {
+       uint32_t id;                    /* Powers of 2 */
+       const char *desc;
+       const char *prefix;             /* field id string prefix (optional) */
+       /* FIXME: convert to proper usage of const pointers here */
+       void *(*data_fn)(void *object); /* callback from report_object() */
+};
+
+struct dm_report_field;
+
+/*
+ * dm_report_field_type flags
+ */
+#define DM_REPORT_FIELD_MASK                           0x00000FFF
+#define DM_REPORT_FIELD_ALIGN_MASK                     0x0000000F
+#define DM_REPORT_FIELD_ALIGN_LEFT                     0x00000001
+#define DM_REPORT_FIELD_ALIGN_RIGHT                    0x00000002
+#define DM_REPORT_FIELD_TYPE_MASK                      0x00000FF0
+#define DM_REPORT_FIELD_TYPE_NONE                      0x00000000
+#define DM_REPORT_FIELD_TYPE_STRING                    0x00000010
+#define DM_REPORT_FIELD_TYPE_NUMBER                    0x00000020
+#define DM_REPORT_FIELD_TYPE_SIZE                      0x00000040
+#define DM_REPORT_FIELD_TYPE_PERCENT                   0x00000080
+#define DM_REPORT_FIELD_TYPE_STRING_LIST               0x00000100
+#define DM_REPORT_FIELD_TYPE_TIME                      0x00000200
+
+/* For use with reserved values only! */
+#define DM_REPORT_FIELD_RESERVED_VALUE_MASK            0x0000000F
+#define DM_REPORT_FIELD_RESERVED_VALUE_NAMED           0x00000001 /* only named value, less strict form of reservation */
+#define DM_REPORT_FIELD_RESERVED_VALUE_RANGE           0x00000002 /* value is range - low and high value defined */
+#define DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE   0x00000004 /* value is computed in runtime */
+#define DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES     0x00000008 /* value names are recognized in runtime */
+
+#define DM_REPORT_FIELD_TYPE_ID_LEN 32
+#define DM_REPORT_FIELD_TYPE_HEADING_LEN 32
+
+struct dm_report;
+struct dm_report_field_type {
+       uint32_t type;          /* object type id */
+       uint32_t flags;         /* DM_REPORT_FIELD_* */
+       uint32_t offset;        /* byte offset in the object */
+       int32_t width;          /* default width */
+       /* string used to specify the field */
+       const char id[DM_REPORT_FIELD_TYPE_ID_LEN];
+       /* string printed in header */
+       const char heading[DM_REPORT_FIELD_TYPE_HEADING_LEN];
+       int (*report_fn)(struct dm_report *rh, struct dm_pool *mem,
+                        struct dm_report_field *field, const void *data,
+                        void *private_data);
+       const char *desc;       /* description of the field */
+};
+
+/*
+ * Per-field reserved value.
+ */
+struct dm_report_field_reserved_value {
+       /* field_num is the position of the field in 'fields'
+          array passed to dm_report_init_with_selection */
+       uint32_t field_num;
+       /* the value is of the same type as the field
+          identified by field_num */
+       const void *value;
+};
+
+/*
+ * Reserved value is a 'value' that is used directly if any of the 'names' is hit
+ * or in case of fuzzy names, if such fuzzy name matches.
+ *
+ * If type is any of DM_REPORT_FIELD_TYPE_*, the reserved value is recognized
+ * for all fields of that type.
+ *
+ * If type is DM_REPORT_FIELD_TYPE_NONE, the reserved value is recognized
+ * for the exact field specified - hence the type of the value is automatically
+ * the same as the type of the field itself.
+ *
+ * The array of reserved values is used to initialize reporting with
+ * selection enabled (see also dm_report_init_with_selection function).
+ */
+struct dm_report_reserved_value {
+       const uint32_t type;            /* DM_REPORT_FIELD_RESERVED_VALUE_* and DM_REPORT_FIELD_TYPE_*  */
+       const void *value;              /* reserved value:
+                                               uint64_t for DM_REPORT_FIELD_TYPE_NUMBER
+                                               uint64_t for DM_REPORT_FIELD_TYPE_SIZE (number of 512-byte sectors)
+                                               uint64_t for DM_REPORT_FIELD_TYPE_PERCENT
+                                               const char* for DM_REPORT_FIELD_TYPE_STRING
+                                               struct dm_report_field_reserved_value for DM_REPORT_FIELD_TYPE_NONE
+                                               dm_report_reserved_handler* if DM_REPORT_FIELD_RESERVED_VALUE_{DYNAMIC_VALUE,FUZZY_NAMES} is used */
+       const char **names;             /* null-terminated array of static names for this reserved value */
+       const char *description;        /* description of the reserved value */
+};
+
+/*
+ * Available actions for dm_report_reserved_value_handler.
+ */
+typedef enum {
+       DM_REPORT_RESERVED_PARSE_FUZZY_NAME,
+       DM_REPORT_RESERVED_GET_DYNAMIC_VALUE,
+} dm_report_reserved_action_t;
+
+/*
+ * Generic reserved value handler to process reserved value names and/or values.
+ *
+ * Actions and their input/output:
+ *
+ *     DM_REPORT_RESERVED_PARSE_FUZZY_NAME
+ *             data_in:  const char *fuzzy_name
+ *             data_out: const char *canonical_name, NULL if fuzzy_name not recognized
+ *
+ *     DM_REPORT_RESERVED_GET_DYNAMIC_VALUE
+ *             data_in:  const char *canonical_name
+ *             data_out: void *value, NULL if canonical_name not recognized
+ *
+ * All actions return:
+ *
+ *     -1 if action not implemented
+ *     0 on error
+ *     1 on success
+ */
+typedef int (*dm_report_reserved_handler) (struct dm_report *rh,
+                                          struct dm_pool *mem,
+                                          uint32_t field_num,
+                                          dm_report_reserved_action_t action,
+                                          const void *data_in,
+                                          const void **data_out);
+
+/*
+ * The dm_report_value_cache_{set,get} are helper functions to store and retrieve
+ * various values used during reporting (dm_report_field_type.report_fn) and/or
+ * selection processing (dm_report_reserved_handler instances) to avoid
+ * recalculation of these values or to share values among calls.
+ */
+int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data);
+const void *dm_report_value_cache_get(struct dm_report *rh, const char *name);
+/*
+ * dm_report_init output_flags
+ */
+#define DM_REPORT_OUTPUT_MASK                  0x000000FF
+#define DM_REPORT_OUTPUT_ALIGNED               0x00000001
+#define DM_REPORT_OUTPUT_BUFFERED              0x00000002
+#define DM_REPORT_OUTPUT_HEADINGS              0x00000004
+#define DM_REPORT_OUTPUT_FIELD_NAME_PREFIX     0x00000008
+#define DM_REPORT_OUTPUT_FIELD_UNQUOTED                0x00000010
+#define DM_REPORT_OUTPUT_COLUMNS_AS_ROWS       0x00000020
+#define DM_REPORT_OUTPUT_MULTIPLE_TIMES                0x00000040
+
+struct dm_report *dm_report_init(uint32_t *report_types,
+                                const struct dm_report_object_type *types,
+                                const struct dm_report_field_type *fields,
+                                const char *output_fields,
+                                const char *output_separator,
+                                uint32_t output_flags,
+                                const char *sort_keys,
+                                void *private_data);
+struct dm_report *dm_report_init_with_selection(uint32_t *report_types,
+                                               const struct dm_report_object_type *types,
+                                               const struct dm_report_field_type *fields,
+                                               const char *output_fields,
+                                               const char *output_separator,
+                                               uint32_t output_flags,
+                                               const char *sort_keys,
+                                               const char *selection,
+                                               const struct dm_report_reserved_value reserved_values[],
+                                               void *private_data);
+/*
+ * Report an object, pass it through the selection criteria if they
+ * are present and display the result on output if it passes the criteria.
+ */
+int dm_report_object(struct dm_report *rh, void *object);
+/*
+ * The same as dm_report_object, but display the result on output only if
+ * 'do_output' arg is set. Also, save the result of selection in 'selected'
+ * arg if it's not NULL (either 1 if the object passes, otherwise 0).
+ */
+int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected);
+
+/*
+ * Compact report output so that if field value is empty for all rows in
+ * the report, drop the field from output completely (including headers).
+ * Compact output is applicable only if report is buffered, otherwise
+ * this function has no effect.
+ */
+int dm_report_compact_fields(struct dm_report *rh);
+
+/*
+ * The same as dm_report_compact_fields, but for selected fields only.
+ * The "fields" arg is comma separated list of field names (the same format
+ * as used for "output_fields" arg in dm_report_init fn).
+ */
+int dm_report_compact_given_fields(struct dm_report *rh, const char *fields);
+
+/*
+ * Returns 1 if there is no data waiting to be output.
+ */
+int dm_report_is_empty(struct dm_report *rh);
+
+/*
+ * Destroy report content without doing output.
+ */
+void dm_report_destroy_rows(struct dm_report *rh);
+
+int dm_report_output(struct dm_report *rh);
+
+/*
+ * Output the report headings for a columns-based report, even if they
+ * have already been shown. Useful for repeating reports that wish to
+ * issue a periodic reminder of the column headings.
+ */
+int dm_report_column_headings(struct dm_report *rh);
+
+void dm_report_free(struct dm_report *rh);
+
+/*
+ * Prefix added to each field name with DM_REPORT_OUTPUT_FIELD_NAME_PREFIX
+ */
+int dm_report_set_output_field_name_prefix(struct dm_report *rh,
+                                          const char *report_prefix);
+
+int dm_report_set_selection(struct dm_report *rh, const char *selection);
+
+/*
+ * Report functions are provided for simple data types.
+ * They take care of allocating copies of the data.
+ */
+int dm_report_field_string(struct dm_report *rh, struct dm_report_field *field,
+                          const char *const *data);
+int dm_report_field_string_list(struct dm_report *rh, struct dm_report_field *field,
+                               const struct dm_list *data, const char *delimiter);
+int dm_report_field_string_list_unsorted(struct dm_report *rh, struct dm_report_field *field,
+                                        const struct dm_list *data, const char *delimiter);
+int dm_report_field_int32(struct dm_report *rh, struct dm_report_field *field,
+                         const int32_t *data);
+int dm_report_field_uint32(struct dm_report *rh, struct dm_report_field *field,
+                          const uint32_t *data);
+int dm_report_field_int(struct dm_report *rh, struct dm_report_field *field,
+                       const int *data);
+int dm_report_field_uint64(struct dm_report *rh, struct dm_report_field *field,
+                          const uint64_t *data);
+int dm_report_field_percent(struct dm_report *rh, struct dm_report_field *field,
+                           const dm_percent_t *data);
+
+/*
+ * For custom fields, allocate the data in 'mem' and use
+ * dm_report_field_set_value().
+ * 'sortvalue' may be NULL if it matches 'value'
+ */
+void dm_report_field_set_value(struct dm_report_field *field, const void *value,
+                              const void *sortvalue);
+
+/*
+ * Report group support.
+ */
+struct dm_report_group;
+
+typedef enum {
+       DM_REPORT_GROUP_SINGLE,
+       DM_REPORT_GROUP_BASIC,
+       DM_REPORT_GROUP_JSON
+} dm_report_group_type_t;
+
+struct dm_report_group *dm_report_group_create(dm_report_group_type_t type, void *data);
+int dm_report_group_push(struct dm_report_group *group, struct dm_report *report, void *data);
+int dm_report_group_pop(struct dm_report_group *group);
+int dm_report_group_output_and_pop_all(struct dm_report_group *group);
+int dm_report_group_destroy(struct dm_report_group *group);
+
+/*
+ * Stats counter access methods
+ *
+ * Each method returns the corresponding stats counter value from the
+ * supplied dm_stats handle for the specified region_id and area_id.
+ * If either region_id or area_id uses one of the special values
+ * DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then the region
+ * or area is selected according to the current state of the dm_stats
+ * handle's embedded cursor.
+ *
+ * Two methods are provided to access counter values: a named function
+ * for each available counter field and a single function that accepts
+ * an enum value specifying the required field. New code is encouraged
+ * to use the enum based interface as calls to the named functions are
+ * implemented using the enum method internally.
+ *
+ * See the kernel documentation for complete descriptions of each
+ * counter field:
+ *
+ * Documentation/device-mapper/statistics.txt
+ * Documentation/iostats.txt
+ *
+ * reads: the number of reads completed
+ * reads_merged: the number of reads merged
+ * read_sectors: the number of sectors read
+ * read_nsecs: the number of nanoseconds spent reading
+ * writes: the number of writes completed
+ * writes_merged: the number of writes merged
+ * write_sectors: the number of sectors written
+ * write_nsecs: the number of nanoseconds spent writing
+ * io_in_progress: the number of I/Os currently in progress
+ * io_nsecs: the number of nanoseconds spent doing I/Os
+ * weighted_io_nsecs: the weighted number of nanoseconds spent doing I/Os
+ * total_read_nsecs: the total time spent reading in nanoseconds
+ * total_write_nsecs: the total time spent writing in nanoseconds
+ */
+
+#define DM_STATS_REGION_CURRENT UINT64_MAX
+#define DM_STATS_AREA_CURRENT UINT64_MAX
+
+typedef enum {
+       DM_STATS_READS_COUNT,
+       DM_STATS_READS_MERGED_COUNT,
+       DM_STATS_READ_SECTORS_COUNT,
+       DM_STATS_READ_NSECS,
+       DM_STATS_WRITES_COUNT,
+       DM_STATS_WRITES_MERGED_COUNT,
+       DM_STATS_WRITE_SECTORS_COUNT,
+       DM_STATS_WRITE_NSECS,
+       DM_STATS_IO_IN_PROGRESS_COUNT,
+       DM_STATS_IO_NSECS,
+       DM_STATS_WEIGHTED_IO_NSECS,
+       DM_STATS_TOTAL_READ_NSECS,
+       DM_STATS_TOTAL_WRITE_NSECS,
+       DM_STATS_NR_COUNTERS
+} dm_stats_counter_t;
+
+uint64_t dm_stats_get_counter(const struct dm_stats *dms,
+                             dm_stats_counter_t counter,
+                             uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_reads(const struct dm_stats *dms,
+                           uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_reads_merged(const struct dm_stats *dms,
+                                  uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_read_sectors(const struct dm_stats *dms,
+                                  uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_read_nsecs(const struct dm_stats *dms,
+                                uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_writes(const struct dm_stats *dms,
+                            uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_writes_merged(const struct dm_stats *dms,
+                                   uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_write_sectors(const struct dm_stats *dms,
+                                   uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_write_nsecs(const struct dm_stats *dms,
+                                 uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_io_in_progress(const struct dm_stats *dms,
+                                    uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_io_nsecs(const struct dm_stats *dms,
+                              uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_weighted_io_nsecs(const struct dm_stats *dms,
+                                       uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_total_read_nsecs(const struct dm_stats *dms,
+                                      uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_total_write_nsecs(const struct dm_stats *dms,
+                                       uint64_t region_id, uint64_t area_id);
+
+/*
+ * Derived statistics access methods
+ *
+ * Each method returns the corresponding value calculated from the
+ * counters stored in the supplied dm_stats handle for the specified
+ * region_id and area_id. If either region_id or area_id uses one of the
+ * special values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then
+ * the region or area is selected according to the current state of the
+ * dm_stats handle's embedded cursor.
+ *
+ * The set of metrics is based on the fields provided by the Linux
+ * iostats program.
+ *
+ * rd_merges_per_sec: the number of reads merged per second
+ * wr_merges_per_sec: the number of writes merged per second
+ * reads_per_sec: the number of reads completed per second
+ * writes_per_sec: the number of writes completed per second
+ * read_sectors_per_sec: the number of sectors read per second
+ * write_sectors_per_sec: the number of sectors written per second
+ * average_request_size: the average size of requests submitted
+ * service_time: the average service time (in ns) for requests issued
+ * average_queue_size: the average queue length
+ * average_wait_time: the average time for requests to be served (in ns)
+ * average_rd_wait_time: the average read wait time
+ * average_wr_wait_time: the average write wait time
+ */
+
+typedef enum {
+       DM_STATS_RD_MERGES_PER_SEC,
+       DM_STATS_WR_MERGES_PER_SEC,
+       DM_STATS_READS_PER_SEC,
+       DM_STATS_WRITES_PER_SEC,
+       DM_STATS_READ_SECTORS_PER_SEC,
+       DM_STATS_WRITE_SECTORS_PER_SEC,
+       DM_STATS_AVERAGE_REQUEST_SIZE,
+       DM_STATS_AVERAGE_QUEUE_SIZE,
+       DM_STATS_AVERAGE_WAIT_TIME,
+       DM_STATS_AVERAGE_RD_WAIT_TIME,
+       DM_STATS_AVERAGE_WR_WAIT_TIME,
+       DM_STATS_SERVICE_TIME,
+       DM_STATS_THROUGHPUT,
+       DM_STATS_UTILIZATION,
+       DM_STATS_NR_METRICS
+} dm_stats_metric_t;
+
+int dm_stats_get_metric(const struct dm_stats *dms, int metric,
+                       uint64_t region_id, uint64_t area_id, double *value);
+
+int dm_stats_get_rd_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+                                  uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_wr_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+                                  uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_reads_per_sec(const struct dm_stats *dms, double *rd_s,
+                              uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_writes_per_sec(const struct dm_stats *dms, double *wr_s,
+                               uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_read_sectors_per_sec(const struct dm_stats *dms,
+                                     double *rsec_s, uint64_t region_id,
+                                     uint64_t area_id);
+
+int dm_stats_get_write_sectors_per_sec(const struct dm_stats *dms,
+                                      double *wr_s, uint64_t region_id,
+                                      uint64_t area_id);
+
+int dm_stats_get_average_request_size(const struct dm_stats *dms,
+                                     double *arqsz, uint64_t region_id,
+                                     uint64_t area_id);
+
+int dm_stats_get_service_time(const struct dm_stats *dms, double *svctm,
+                             uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_queue_size(const struct dm_stats *dms, double *qusz,
+                                   uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_wait_time(const struct dm_stats *dms, double *await,
+                                  uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_rd_wait_time(const struct dm_stats *dms,
+                                     double *await, uint64_t region_id,
+                                     uint64_t area_id);
+
+int dm_stats_get_average_wr_wait_time(const struct dm_stats *dms,
+                                     double *await, uint64_t region_id,
+                                     uint64_t area_id);
+
+int dm_stats_get_throughput(const struct dm_stats *dms, double *tput,
+                           uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util,
+                            uint64_t region_id, uint64_t area_id);
+
+/*
+ * Statistics histogram access methods.
+ *
+ * Methods to access latency histograms for regions that have them
+ * enabled. Each histogram contains a configurable number of bins
+ * spanning a user defined latency interval.
+ *
+ * The bin count, upper and lower bin bounds, and bin values are
+ * made available via the following area methods.
+ *
+ * Methods to obtain a simple string representation of the histogram
+ * and its bounds are also provided.
+ */
+
+/*
+ * Retrieve a pointer to the histogram associated with the specified
+ * area. If the area does not have a histogram configured this function
+ * returns NULL.
+ *
+ * The pointer does not need to be freed explicitly by the caller: it
+ * will become invalid following a subsequent dm_stats_list(),
+ * dm_stats_populate() or dm_stats_destroy() of the corresponding
+ * dm_stats handle.
+ *
+ * If region_id or area_id is one of the special values
+ * DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT the current cursor
+ * value is used to select the region or area.
+ */
+struct dm_histogram *dm_stats_get_histogram(const struct dm_stats *dms,
+                                           uint64_t region_id,
+                                           uint64_t area_id);
+
+/*
+ * Return the number of bins in the specified histogram handle.
+ */
+int dm_histogram_get_nr_bins(const struct dm_histogram *dmh);
+
+/*
+ * Get the lower bound of the specified bin of the histogram for the
+ * area specified by region_id and area_id. The value is returned in
+ * nanoseconds.
+ */
+uint64_t dm_histogram_get_bin_lower(const struct dm_histogram *dmh, int bin);
+
+/*
+ * Get the upper bound of the specified bin of the histogram for the
+ * area specified by region_id and area_id. The value is returned in
+ * nanoseconds.
+ */
+uint64_t dm_histogram_get_bin_upper(const struct dm_histogram *dmh, int bin);
+
+/*
+ * Get the width of the specified bin of the histogram for the area
+ * specified by region_id and area_id. The width is equal to the bin
+ * upper bound minus the lower bound and yields the range of latency
+ * values covered by this bin. The value is returned in nanoseconds.
+ */
+uint64_t dm_histogram_get_bin_width(const struct dm_histogram *dmh, int bin);
+
+/*
+ * Get the value of the specified bin of the histogram for the area
+ * specified by region_id and area_id.
+ */
+uint64_t dm_histogram_get_bin_count(const struct dm_histogram *dmh, int bin);
+
+/*
+ * Get the percentage (relative frequency) of the specified bin of the
+ * histogram for the area specified by region_id and area_id.
+ */
+dm_percent_t dm_histogram_get_bin_percent(const struct dm_histogram *dmh,
+                                         int bin);
+
+/*
+ * Return the total observations (sum of bin counts) for the histogram
+ * of the area specified by region_id and area_id.
+ */
+uint64_t dm_histogram_get_sum(const struct dm_histogram *dmh);
+
+/*
+ * Histogram formatting flags.
+ */
+#define DM_HISTOGRAM_SUFFIX  0x1
+#define DM_HISTOGRAM_VALUES  0x2
+#define DM_HISTOGRAM_PERCENT 0X4
+#define DM_HISTOGRAM_BOUNDS_LOWER 0x10
+#define DM_HISTOGRAM_BOUNDS_UPPER 0x20
+#define DM_HISTOGRAM_BOUNDS_RANGE 0x30
+
+/*
+ * Return a string representation of the supplied histogram's values and
+ * bin boundaries.
+ *
+ * The bin argument selects the bin to format. If this argument is less
+ * than zero all bins will be included in the resulting string.
+ *
+ * width specifies a minimum width for the field in characters; if it is
+ * zero the width will be determined automatically based on the options
+ * selected for formatting. A value less than zero disables field width
+ * control: bin boundaries and values will be output with a minimum
+ * amount of whitespace.
+ *
+ * flags is a collection of flag arguments that control the string format:
+ *
+ * DM_HISTOGRAM_VALUES  - Include bin values in the string.
+ * DM_HISTOGRAM_SUFFIX  - Include time unit suffixes when printing bounds.
+ * DM_HISTOGRAM_PERCENT - Format bin values as a percentage.
+ *
+ * DM_HISTOGRAM_BOUNDS_LOWER - Include the lower bound of each bin.
+ * DM_HISTOGRAM_BOUNDS_UPPER - Include the upper bound of each bin.
+ * DM_HISTOGRAM_BOUNDS_RANGE - Show the span of each bin as "lo-up".
+ *
+ * The returned pointer does not need to be freed explicitly by the
+ * caller: it will become invalid following a subsequent
+ * dm_stats_list(), dm_stats_populate() or dm_stats_destroy() of the
+ * corresponding dm_stats handle.
+ */
+const char *dm_histogram_to_string(const struct dm_histogram *dmh, int bin,
+                                  int width, int flags);
+
+/*************************
+ * config file parse/print
+ *************************/
+typedef enum {
+       DM_CFG_INT,
+       DM_CFG_FLOAT,
+       DM_CFG_STRING,
+       DM_CFG_EMPTY_ARRAY
+} dm_config_value_type_t;
+
+struct dm_config_value {
+       dm_config_value_type_t type;
+
+       union {
+               int64_t i;
+               float f;
+               double d;               /* Unused. */
+               const char *str;
+       } v;
+
+       struct dm_config_value *next;   /* For arrays */
+       uint32_t format_flags;
+};
+
+struct dm_config_node {
+       const char *key;
+       struct dm_config_node *parent, *sib, *child;
+       struct dm_config_value *v;
+       int id;
+};
+
+struct dm_config_tree {
+       struct dm_config_node *root;
+       struct dm_config_tree *cascade;
+       struct dm_pool *mem;
+       void *custom;
+};
+
+struct dm_config_tree *dm_config_create(void);
+struct dm_config_tree *dm_config_from_string(const char *config_settings);
+int dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end);
+int dm_config_parse_without_dup_node_check(struct dm_config_tree *cft, const char *start, const char *end);
+
+void *dm_config_get_custom(struct dm_config_tree *cft);
+void dm_config_set_custom(struct dm_config_tree *cft, void *custom);
+
+/*
+ * When searching, first_cft is checked before second_cft.
+ */
+struct dm_config_tree *dm_config_insert_cascaded_tree(struct dm_config_tree *first_cft, struct dm_config_tree *second_cft);
+
+/*
+ * If there's a cascaded dm_config_tree, remove the top layer
+ * and return the layer below.  Otherwise return NULL.
+ */
+struct dm_config_tree *dm_config_remove_cascaded_tree(struct dm_config_tree *cft);
+
+/*
+ * Create a new, uncascaded config tree equivalent to the input cascade.
+ */
+struct dm_config_tree *dm_config_flatten(struct dm_config_tree *cft);
+
+void dm_config_destroy(struct dm_config_tree *cft);
+
+/* Simple output line by line. */
+typedef int (*dm_putline_fn)(const char *line, void *baton);
+/* More advaced output with config node reference. */
+typedef int (*dm_config_node_out_fn)(const struct dm_config_node *cn, const char *line, void *baton);
+
+/*
+ * Specification for advanced config node output.
+ */
+struct dm_config_node_out_spec {
+       dm_config_node_out_fn prefix_fn; /* called before processing config node lines */
+       dm_config_node_out_fn line_fn; /* called for each config node line */
+       dm_config_node_out_fn suffix_fn; /* called after processing config node lines */
+};
+
+/* Write the node and any subsequent siblings it has. */
+int dm_config_write_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton);
+int dm_config_write_node_out(const struct dm_config_node *cn, const struct dm_config_node_out_spec *out_spec, void *baton);
+
+/* Write given node only without subsequent siblings. */
+int dm_config_write_one_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton);
+int dm_config_write_one_node_out(const struct dm_config_node *cn, const struct dm_config_node_out_spec *out_spec, void *baton);
+
+struct dm_config_node *dm_config_find_node(const struct dm_config_node *cn, const char *path);
+int dm_config_has_node(const struct dm_config_node *cn, const char *path);
+int dm_config_remove_node(struct dm_config_node *parent, struct dm_config_node *remove);
+const char *dm_config_find_str(const struct dm_config_node *cn, const char *path, const char *fail);
+const char *dm_config_find_str_allow_empty(const struct dm_config_node *cn, const char *path, const char *fail);
+int dm_config_find_int(const struct dm_config_node *cn, const char *path, int fail);
+int64_t dm_config_find_int64(const struct dm_config_node *cn, const char *path, int64_t fail);
+float dm_config_find_float(const struct dm_config_node *cn, const char *path, float fail);
+
+const struct dm_config_node *dm_config_tree_find_node(const struct dm_config_tree *cft, const char *path);
+const char *dm_config_tree_find_str(const struct dm_config_tree *cft, const char *path, const char *fail);
+const char *dm_config_tree_find_str_allow_empty(const struct dm_config_tree *cft, const char *path, const char *fail);
+int dm_config_tree_find_int(const struct dm_config_tree *cft, const char *path, int fail);
+int64_t dm_config_tree_find_int64(const struct dm_config_tree *cft, const char *path, int64_t fail);
+float dm_config_tree_find_float(const struct dm_config_tree *cft, const char *path, float fail);
+int dm_config_tree_find_bool(const struct dm_config_tree *cft, const char *path, int fail);
+
+/*
+ * Understands (0, ~0), (y, n), (yes, no), (on,
+ * off), (true, false).
+ */
+int dm_config_find_bool(const struct dm_config_node *cn, const char *path, int fail);
+int dm_config_value_is_bool(const struct dm_config_value *v);
+
+int dm_config_get_uint32(const struct dm_config_node *cn, const char *path, uint32_t *result);
+int dm_config_get_uint64(const struct dm_config_node *cn, const char *path, uint64_t *result);
+int dm_config_get_str(const struct dm_config_node *cn, const char *path, const char **result);
+int dm_config_get_list(const struct dm_config_node *cn, const char *path, const struct dm_config_value **result);
+int dm_config_get_section(const struct dm_config_node *cn, const char *path, const struct dm_config_node **result);
+
+unsigned dm_config_maybe_section(const char *str, unsigned len);
+
+const char *dm_config_parent_name(const struct dm_config_node *n);
+
+struct dm_config_node *dm_config_clone_node_with_mem(struct dm_pool *mem, const struct dm_config_node *node, int siblings);
+struct dm_config_node *dm_config_create_node(struct dm_config_tree *cft, const char *key);
+struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft);
+struct dm_config_node *dm_config_clone_node(struct dm_config_tree *cft, const struct dm_config_node *cn, int siblings);
+
+/*
+ * Common formatting flags applicable to all config node types (lower 16 bits).
+ */
+#define DM_CONFIG_VALUE_FMT_COMMON_ARRAY             0x00000001 /* value is array */
+#define DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES      0x00000002 /* add spaces in "key = value" pairs in constrast to "key=value" for better readability */
+
+/*
+ * Type-related config node formatting flags (higher 16 bits).
+ */
+/* int-related formatting flags */
+#define DM_CONFIG_VALUE_FMT_INT_OCTAL                0x00010000 /* print number in octal form */
+
+/* string-related formatting flags */
+#define DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES         0x00010000 /* do not print quotes around string value */
+
+void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags);
+uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv);
+
+struct dm_pool *dm_config_memory(struct dm_config_tree *cft);
+
+/* Udev device directory. */
+#define DM_UDEV_DEV_DIR "/dev/"
+
+/* Cookie prefixes.
+ *
+ * The cookie value consists of a prefix (16 bits) and a base (16 bits).
+ * We can use the prefix to store the flags. These flags are sent to
+ * kernel within given dm task. When returned back to userspace in
+ * DM_COOKIE udev environment variable, we can control several aspects
+ * of udev rules we use by decoding the cookie prefix. When doing the
+ * notification, we replace the cookie prefix with DM_COOKIE_MAGIC,
+ * so we notify the right semaphore.
+ *
+ * It is still possible to use cookies for passing the flags to udev
+ * rules even when udev_sync is disabled. The base part of the cookie
+ * will be zero (there's no notification semaphore) and prefix will be
+ * set then. However, having udev_sync enabled is highly recommended.
+ */
+#define DM_COOKIE_MAGIC 0x0D4D
+#define DM_UDEV_FLAGS_MASK 0xFFFF0000
+#define DM_UDEV_FLAGS_SHIFT 16
+
+/*
+ * DM_UDEV_DISABLE_DM_RULES_FLAG is set in case we need to disable
+ * basic device-mapper udev rules that create symlinks in /dev/<DM_DIR>
+ * directory. However, we can't reliably prevent creating default
+ * nodes by udev (commonly /dev/dm-X, where X is a number).
+ */
+#define DM_UDEV_DISABLE_DM_RULES_FLAG 0x0001
+/*
+ * DM_UDEV_DISABLE_SUBSYTEM_RULES_FLAG is set in case we need to disable
+ * subsystem udev rules, but still we need the general DM udev rules to
+ * be applied (to create the nodes and symlinks under /dev and /dev/disk).
+ */
+#define DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG 0x0002
+/*
+ * DM_UDEV_DISABLE_DISK_RULES_FLAG is set in case we need to disable
+ * general DM rules that set symlinks in /dev/disk directory.
+ */
+#define DM_UDEV_DISABLE_DISK_RULES_FLAG 0x0004
+/*
+ * DM_UDEV_DISABLE_OTHER_RULES_FLAG is set in case we need to disable
+ * all the other rules that are not general device-mapper nor subsystem
+ * related (the rules belong to other software or packages). All foreign
+ * rules should check this flag directly and they should ignore further
+ * rule processing for such event.
+ */
+#define DM_UDEV_DISABLE_OTHER_RULES_FLAG 0x0008
+/*
+ * DM_UDEV_LOW_PRIORITY_FLAG is set in case we need to instruct the
+ * udev rules to give low priority to the device that is currently
+ * processed. For example, this provides a way to select which symlinks
+ * could be overwritten by high priority ones if their names are equal.
+ * Common situation is a name based on FS UUID while using origin and
+ * snapshot devices.
+ */
+#define DM_UDEV_LOW_PRIORITY_FLAG 0x0010
+/*
+ * DM_UDEV_DISABLE_LIBRARY_FALLBACK is set in case we need to disable
+ * libdevmapper's node management. We will rely on udev completely
+ * and there will be no fallback action provided by libdevmapper if
+ * udev does something improperly. Using the library fallback code has
+ * a consequence that you need to take into account: any device node
+ * or symlink created without udev is not recorded in udev database
+ * which other applications may read to get complete list of devices.
+ * For this reason, use of DM_UDEV_DISABLE_LIBRARY_FALLBACK is
+ * recommended on systems where udev is used. Keep library fallback
+ * enabled just for exceptional cases where you need to debug udev-related
+ * problems. If you hit such problems, please contact us through upstream
+ * LVM2 development mailing list (see also README file). This flag is
+ * currently not set by default in libdevmapper so you need to set it
+ * explicitly if you're sure that udev is behaving correctly on your
+ * setups.
+ */
+#define DM_UDEV_DISABLE_LIBRARY_FALLBACK 0x0020
+/*
+ * DM_UDEV_PRIMARY_SOURCE_FLAG is automatically appended by
+ * libdevmapper for all ioctls generating udev uevents. Once used in
+ * udev rules, we know if this is a real "primary sourced" event or not.
+ * We need to distinguish real events originated in libdevmapper from
+ * any spurious events to gather all missing information (e.g. events
+ * generated as a result of "udevadm trigger" command or as a result
+ * of the "watch" udev rule).
+ */
+#define DM_UDEV_PRIMARY_SOURCE_FLAG 0x0040
+
+/*
+ * Udev flags reserved for use by any device-mapper subsystem.
+ */
+#define DM_SUBSYSTEM_UDEV_FLAG0 0x0100
+#define DM_SUBSYSTEM_UDEV_FLAG1 0x0200
+#define DM_SUBSYSTEM_UDEV_FLAG2 0x0400
+#define DM_SUBSYSTEM_UDEV_FLAG3 0x0800
+#define DM_SUBSYSTEM_UDEV_FLAG4 0x1000
+#define DM_SUBSYSTEM_UDEV_FLAG5 0x2000
+#define DM_SUBSYSTEM_UDEV_FLAG6 0x4000
+#define DM_SUBSYSTEM_UDEV_FLAG7 0x8000
+
+int dm_cookie_supported(void);
+
+/*
+ * Udev synchronisation functions.
+ */
+void dm_udev_set_sync_support(int sync_with_udev);
+int dm_udev_get_sync_support(void);
+void dm_udev_set_checking(int checking);
+int dm_udev_get_checking(void);
+
+/*
+ * Default value to get new auto generated cookie created
+ */
+#define DM_COOKIE_AUTO_CREATE 0
+int dm_udev_create_cookie(uint32_t *cookie);
+int dm_udev_complete(uint32_t cookie);
+int dm_udev_wait(uint32_t cookie);
+
+/*
+ * dm_dev_wait_immediate 
+ * If *ready is 1 on return, the wait is complete.
+ * If *ready is 0 on return, the wait is incomplete and either
+ * this function or dm_udev_wait() must be called again.
+ * Returns 0 on error, when neither function should be called again.
+ */
+int dm_udev_wait_immediate(uint32_t cookie, int *ready);
+
+#define DM_DEV_DIR_UMASK 0022
+#define DM_CONTROL_NODE_UMASK 0177
+
+#ifdef __cplusplus
+}
+#endif
+#endif                         /* LIB_DEVICE_MAPPER_H */
diff --git a/device_mapper/libdm-common.c b/device_mapper/libdm-common.c
new file mode 100644 (file)
index 0000000..bcf12cb
--- /dev/null
@@ -0,0 +1,2691 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "device_mapper/misc/dmlib.h"
+#include "ioctl/libdm-targets.h"
+#include "libdm-common.h"
+#include "misc/kdev_t.h"
+#include "misc/dm-ioctl.h"
+
+#include <stdarg.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+
+#ifdef UDEV_SYNC_SUPPORT
+#  include <sys/types.h>
+#  include <sys/ipc.h>
+#  include <sys/sem.h>
+#  include <libudev.h>
+#endif
+
+#ifdef __linux__
+#  include <linux/fs.h>
+#endif
+
+#ifdef HAVE_SELINUX
+#  include <selinux/selinux.h>
+#endif
+#ifdef HAVE_SELINUX_LABEL_H
+#  include <selinux/label.h>
+#endif
+
+#define DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME "DM_DEFAULT_NAME_MANGLING_MODE"
+
+#define DEV_DIR "/dev/"
+
+#ifdef UDEV_SYNC_SUPPORT
+#ifdef _SEM_SEMUN_UNDEFINED
+union semun
+{
+       int val;                        /* value for SETVAL */
+       struct semid_ds *buf;           /* buffer for IPC_STAT & IPC_SET */
+       unsigned short int *array;      /* array for GETALL & SETALL */
+       struct seminfo *__buf;          /* buffer for IPC_INFO */
+};
+#endif
+#endif
+
+static char _dm_dir[PATH_MAX] = DEV_DIR DM_DIR;
+static char _sysfs_dir[PATH_MAX] = "/sys/";
+static char _path0[PATH_MAX];           /* path buffer, safe 4kB on stack */
+static const char _mountinfo[] = "/proc/self/mountinfo";
+
+#define DM_MAX_UUID_PREFIX_LEN 15
+static char _default_uuid_prefix[DM_MAX_UUID_PREFIX_LEN + 1] = "LVM-";
+
+static int _verbose = 0;
+static int _suspended_dev_counter = 0;
+static dm_string_mangling_t _name_mangling_mode = DEFAULT_DM_NAME_MANGLING;
+
+#ifdef HAVE_SELINUX_LABEL_H
+static struct selabel_handle *_selabel_handle = NULL;
+#endif
+
+static int _udev_disabled = 0;
+
+#ifdef UDEV_SYNC_SUPPORT
+static int _semaphore_supported = -1;
+static int _udev_running = -1;
+static int _sync_with_udev = 1;
+static int _udev_checking = 1;
+#endif
+
+void dm_lib_init(void)
+{
+       const char *env;
+
+       if (getenv("DM_DISABLE_UDEV"))
+               _udev_disabled = 1;
+
+       _name_mangling_mode = DEFAULT_DM_NAME_MANGLING;
+       if ((env = getenv(DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME))) {
+               if (!strcasecmp(env, "none"))
+                       _name_mangling_mode = DM_STRING_MANGLING_NONE;
+               else if (!strcasecmp(env, "auto"))
+                       _name_mangling_mode = DM_STRING_MANGLING_AUTO;
+               else if (!strcasecmp(env, "hex"))
+                       _name_mangling_mode = DM_STRING_MANGLING_HEX;
+       }
+}
+
+/*
+ * Library users can provide their own logging
+ * function.
+ */
+
+__attribute__((format(printf, 5, 0)))
+static void _default_log_line(int level, const char *file,
+                             int line, int dm_errno_or_class,
+                             const char *f, va_list ap)
+{
+       static int _abort_on_internal_errors = -1;
+       static int _debug_with_line_numbers = -1;
+       FILE *out = log_stderr(level) ? stderr : stdout;
+
+       level = log_level(level);
+
+       if (level <= _LOG_WARN || _verbose) {
+               if (level < _LOG_WARN)
+                       out = stderr;
+
+               if (_debug_with_line_numbers < 0)
+                       /* Set when env DM_DEBUG_WITH_LINE_NUMBERS is not "0" */
+                       _debug_with_line_numbers =
+                               strcmp(getenv("DM_DEBUG_WITH_LINE_NUMBERS") ? : "0", "0");
+
+               if (_debug_with_line_numbers)
+                       fprintf(out, "%s:%d     ", file, line);
+
+               vfprintf(out, f, ap);
+               fputc('\n', out);
+       }
+
+       if (_abort_on_internal_errors < 0)
+               /* Set when env DM_ABORT_ON_INTERNAL_ERRORS is not "0" */
+               _abort_on_internal_errors =
+                       strcmp(getenv("DM_ABORT_ON_INTERNAL_ERRORS") ? : "0", "0");
+
+       if (_abort_on_internal_errors &&
+           !strncmp(f, INTERNAL_ERROR, sizeof(INTERNAL_ERROR) - 1))
+               abort();
+}
+
+__attribute__((format(printf, 5, 6)))
+static void _default_log_with_errno(int level,
+           const char *file, int line, int dm_errno_or_class,
+           const char *f, ...)
+{
+       va_list ap;
+
+       va_start(ap, f);
+       _default_log_line(level, file, line, dm_errno_or_class, f, ap);
+       va_end(ap);
+}
+
+__attribute__((format(printf, 4, 5)))
+static void _default_log(int level, const char *file,
+                        int line, const char *f, ...)
+{
+       va_list ap;
+
+       va_start(ap, f);
+       _default_log_line(level, file, line, 0, f, ap);
+       va_end(ap);
+}
+
+dm_log_fn dm_log = _default_log;
+dm_log_with_errno_fn dm_log_with_errno = _default_log_with_errno;
+
+/*
+ * Wrapper function to reformat new messages to and
+ * old style logging which had not used errno parameter
+ *
+ * As we cannot simply pass '...' to old function we
+ * need to process arg list locally and just pass '%s' + buffer
+ */
+__attribute__((format(printf, 5, 6)))
+static void _log_to_default_log(int level,
+           const char *file, int line, int dm_errno_or_class,
+           const char *f, ...)
+{
+       int n;
+       va_list ap;
+       char buf[2 * PATH_MAX + 256]; /* big enough for most messages */
+
+       va_start(ap, f);
+       n = vsnprintf(buf, sizeof(buf), f, ap);
+       va_end(ap);
+
+       if (n > 0) /* Could be truncated */
+               dm_log(level, file, line, "%s", buf);
+}
+
+/*
+ * Wrapper function take 'old' style message without errno
+ * and log it via new logging function with errno arg
+ *
+ * This minor case may happen if new libdm is used with old
+ * recompiled tool that would decided to use new logging,
+ * but still would like to use old binary plugins.
+ */
+__attribute__((format(printf, 4, 5)))
+static void _log_to_default_log_with_errno(int level,
+           const char *file, int line, const char *f, ...)
+{
+       int n;
+       va_list ap;
+       char buf[2 * PATH_MAX + 256]; /* big enough for most messages */
+
+       va_start(ap, f);
+       n = vsnprintf(buf, sizeof(buf), f, ap);
+       va_end(ap);
+
+       if (n > 0) /* Could be truncated */
+               dm_log_with_errno(level, file, line, 0, "%s", buf);
+}
+
+void dm_log_init(dm_log_fn fn)
+{
+       if (fn)  {
+               dm_log = fn;
+               dm_log_with_errno = _log_to_default_log;
+       } else {
+               dm_log = _default_log;
+               dm_log_with_errno = _default_log_with_errno;
+       }
+}
+
+int dm_log_is_non_default(void)
+{
+       return (dm_log == _default_log && dm_log_with_errno == _default_log_with_errno) ? 0 : 1;
+}
+
+void dm_log_with_errno_init(dm_log_with_errno_fn fn)
+{
+       if (fn) {
+               dm_log = _log_to_default_log_with_errno;
+               dm_log_with_errno = fn;
+       } else {
+               dm_log = _default_log;
+               dm_log_with_errno = _default_log_with_errno;
+       }
+}
+
+void dm_log_init_verbose(int level)
+{
+       _verbose = level;
+}
+
+static int _build_dev_path(char *buffer, size_t len, const char *dev_name)
+{
+       int r;
+
+       /* If there's a /, assume caller knows what they're doing */
+       if (strchr(dev_name, '/'))
+               r = dm_strncpy(buffer, dev_name, len);
+       else
+               r = (dm_snprintf(buffer, len, "%s/%s",
+                                _dm_dir, dev_name) < 0) ? 0 : 1;
+       if (!r)
+               log_error("Failed to build dev path for \"%s\".", dev_name);
+
+       return r;
+}
+
+int dm_get_library_version(char *version, size_t size)
+{
+       return dm_strncpy(version, DM_LIB_VERSION, size);
+}
+
+void inc_suspended(void)
+{
+       _suspended_dev_counter++;
+       log_debug_activation("Suspended device counter increased to %d", _suspended_dev_counter);
+}
+
+void dec_suspended(void)
+{
+       if (!_suspended_dev_counter) {
+               log_error("Attempted to decrement suspended device counter below zero.");
+               return;
+       }
+
+       _suspended_dev_counter--;
+       log_debug_activation("Suspended device counter reduced to %d", _suspended_dev_counter);
+}
+
+int dm_get_suspended_counter(void)
+{
+       return _suspended_dev_counter;
+}
+
+int dm_set_name_mangling_mode(dm_string_mangling_t name_mangling_mode)
+{
+       _name_mangling_mode = name_mangling_mode;
+
+       return 1;
+}
+
+dm_string_mangling_t dm_get_name_mangling_mode(void)
+{
+       return _name_mangling_mode;
+}
+
+struct dm_task *dm_task_create(int type)
+{
+       struct dm_task *dmt = dm_zalloc(sizeof(*dmt));
+
+       if (!dmt) {
+               log_error("dm_task_create: malloc(%" PRIsize_t ") failed",
+                         sizeof(*dmt));
+               return NULL;
+       }
+
+       if (!dm_check_version()) {
+               dm_free(dmt);
+               return_NULL;
+       }
+
+       dmt->type = type;
+       dmt->minor = -1;
+       dmt->major = -1;
+       dmt->allow_default_major_fallback = 1;
+       dmt->uid = DM_DEVICE_UID;
+       dmt->gid = DM_DEVICE_GID;
+       dmt->mode = DM_DEVICE_MODE;
+       dmt->no_open_count = 0;
+       dmt->read_ahead = DM_READ_AHEAD_AUTO;
+       dmt->read_ahead_flags = 0;
+       dmt->event_nr = 0;
+       dmt->cookie_set = 0;
+       dmt->query_inactive_table = 0;
+       dmt->new_uuid = 0;
+       dmt->secure_data = 0;
+       dmt->record_timestamp = 0;
+
+       return dmt;
+}
+
+/*
+ * Find the name associated with a given device number by scanning _dm_dir.
+ */
+static int _find_dm_name_of_device(dev_t st_rdev, char *buf, size_t buf_len)
+{
+       const char *name;
+       char path[PATH_MAX];
+       struct dirent *dirent;
+       DIR *d;
+       struct stat st;
+       int r = 0;
+
+       if (!(d = opendir(_dm_dir))) {
+               log_sys_error("opendir", _dm_dir);
+               return 0;
+       }
+
+       while ((dirent = readdir(d))) {
+               name = dirent->d_name;
+
+               if (!strcmp(name, ".") || !strcmp(name, ".."))
+                       continue;
+
+               if (dm_snprintf(path, sizeof(path), "%s/%s", _dm_dir,
+                               name) == -1) {
+                       log_error("Couldn't create path for %s", name);
+                       continue;
+               }
+
+               if (stat(path, &st))
+                       continue;
+
+               if (st.st_rdev == st_rdev) {
+                       strncpy(buf, name, buf_len);
+                       r = 1;
+                       break;
+               }
+       }
+
+       if (closedir(d))
+               log_sys_error("closedir", _dm_dir);
+
+       return r;
+}
+
+static int _is_whitelisted_char(char c)
+{
+       /*
+        * Actually, DM supports any character in a device name.
+        * This whitelist is just for proper integration with udev.
+        */
+        if ((c >= '0' && c <= '9') ||
+            (c >= 'A' && c <= 'Z') ||
+            (c >= 'a' && c <= 'z') ||
+            strchr("#+-.:=@_", c) != NULL)
+                return 1;
+
+        return 0;
+}
+
+int check_multiple_mangled_string_allowed(const char *str, const char *str_name,
+                                        dm_string_mangling_t mode)
+{
+       if (mode == DM_STRING_MANGLING_AUTO && strstr(str, "\\x5cx")) {
+               log_error("The %s \"%s\" seems to be mangled more than once. "
+                         "This is not allowed in auto mode.", str_name, str);
+               return 0;
+       }
+
+       return 1;
+}
+
+/*
+ * Mangle all characters in the input string which are not on a whitelist
+ * with '\xNN' format where NN is the hex value of the character.
+ */
+int mangle_string(const char *str, const char *str_name, size_t len,
+                 char *buf, size_t buf_len, dm_string_mangling_t mode)
+{
+       int need_mangling = -1; /* -1 don't know yet, 0 no, 1 yes */
+       size_t i, j;
+
+       if (!str || !buf)
+               return -1;
+
+       /* Is there anything to do at all? */
+       if (!*str || !len)
+               return 0;
+
+       if (buf_len < DM_NAME_LEN) {
+               log_error(INTERNAL_ERROR "mangle_string: supplied buffer too small");
+               return -1;
+       }
+
+       if (mode == DM_STRING_MANGLING_NONE)
+               mode = DM_STRING_MANGLING_AUTO;
+
+       for (i = 0, j = 0; str[i]; i++) {
+               if (mode == DM_STRING_MANGLING_AUTO) {
+                       /*
+                        * Detect already mangled part of the string and keep it.
+                        * Return error on mixture of mangled/not mangled!
+                        */
+                       if (str[i] == '\\' && str[i+1] == 'x') {
+                               if ((len - i < 4) || (need_mangling == 1))
+                                       goto bad1;
+                               if (buf_len - j < 4)
+                                       goto bad2;
+
+                               memcpy(&buf[j], &str[i], 4);
+                               i+=3; j+=4;
+
+                               need_mangling = 0;
+                               continue;
+                       }
+               }
+
+               if (_is_whitelisted_char(str[i])) {
+                       /* whitelisted, keep it. */
+                       if (buf_len - j < 1)
+                               goto bad2;
+                       buf[j] = str[i];
+                       j++;
+               } else {
+                       /*
+                        * Not on a whitelist, mangle it.
+                        * Return error on mixture of mangled/not mangled
+                        * unless a DM_STRING_MANGLING_HEX is used!.
+                        */
+                       if ((mode != DM_STRING_MANGLING_HEX) && (need_mangling == 0))
+                               goto bad1;
+                       if (buf_len - j < 4)
+                               goto bad2;
+
+                       sprintf(&buf[j], "\\x%02x", (unsigned char) str[i]);
+                       j+=4;
+
+                       need_mangling = 1;
+               }
+       }
+
+       if (buf_len - j < 1)
+               goto bad2;
+       buf[j] = '\0';
+
+       /* All chars in the string whitelisted? */
+       if (need_mangling == -1)
+               need_mangling = 0;
+
+       return need_mangling;
+
+bad1:
+       log_error("The %s \"%s\" contains mixed mangled and unmangled "
+                 "characters or it's already mangled improperly.", str_name, str);
+       return -1;
+bad2:
+       log_error("Mangled form of the %s too long for \"%s\".", str_name, str);
+       return -1;
+}
+
+/*
+ * Try to unmangle supplied string.
+ * Return value: -1 on error, 0 when no unmangling needed, 1 when unmangling applied
+ */
+int unmangle_string(const char *str, const char *str_name, size_t len,
+                   char *buf, size_t buf_len, dm_string_mangling_t mode)
+{
+       int strict = mode != DM_STRING_MANGLING_NONE;
+       char str_rest[DM_NAME_LEN];
+       size_t i, j;
+       int code;
+       int r = 0;
+
+       if (!str || !buf)
+               return -1;
+
+       /* Is there anything to do at all? */
+       if (!*str || !len)
+               return 0;
+
+       if (buf_len < DM_NAME_LEN) {
+               log_error(INTERNAL_ERROR "unmangle_string: supplied buffer too small");
+               return -1;
+       }
+
+       for (i = 0, j = 0; str[i]; i++, j++) {
+               if (strict && !(_is_whitelisted_char(str[i]) || str[i]=='\\')) {
+                       log_error("The %s \"%s\" should be mangled but "
+                                 "it contains blacklisted characters.", str_name, str);
+                       j=0; r=-1;
+                       goto out;
+               }
+
+               if (str[i] == '\\' && str[i+1] == 'x') {
+                       if (!sscanf(&str[i+2], "%2x%s", &code, str_rest)) {
+                               log_debug_activation("Hex encoding mismatch detected in %s \"%s\" "
+                                                    "while trying to unmangle it.", str_name, str);
+                               goto out;
+                       }
+                       buf[j] = (unsigned char) code;
+
+                       /* skip the encoded part we've just decoded! */
+                       i+= 3;
+
+                       /* unmangling applied */
+                       r = 1;
+               } else
+                       buf[j] = str[i];
+       }
+
+out:
+       buf[j] = '\0';
+       return r;
+}
+
+static int _dm_task_set_name(struct dm_task *dmt, const char *name,
+                            dm_string_mangling_t mangling_mode)
+{
+       char mangled_name[DM_NAME_LEN];
+       int r = 0;
+
+       dm_free(dmt->dev_name);
+       dmt->dev_name = NULL;
+       dm_free(dmt->mangled_dev_name);
+       dmt->mangled_dev_name = NULL;
+
+       if (strlen(name) >= DM_NAME_LEN) {
+               log_error("Name \"%s\" too long.", name);
+               return 0;
+       }
+
+       if (!check_multiple_mangled_string_allowed(name, "name", mangling_mode))
+               return_0;
+
+       if (mangling_mode != DM_STRING_MANGLING_NONE &&
+           (r = mangle_string(name, "name", strlen(name), mangled_name,
+                              sizeof(mangled_name), mangling_mode)) < 0) {
+               log_error("Failed to mangle device name \"%s\".", name);
+               return 0;
+       }
+
+       /* Store mangled_dev_name only if it differs from dev_name! */
+       if (r) {
+               log_debug_activation("Device name mangled [%s]: %s --> %s",
+                                    mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex",
+                                    name, mangled_name);
+               if (!(dmt->mangled_dev_name = dm_strdup(mangled_name))) {
+                       log_error("_dm_task_set_name: dm_strdup(%s) failed", mangled_name);
+                       return 0;
+               }
+       }
+
+       if (!(dmt->dev_name = dm_strdup(name))) {
+               log_error("_dm_task_set_name: strdup(%s) failed", name);
+               return 0;
+       }
+
+       return 1;
+}
+
+static int _dm_task_set_name_from_path(struct dm_task *dmt, const char *path,
+                                      const char *name)
+{
+       char buf[PATH_MAX];
+       struct stat st1, st2;
+       const char *final_name = NULL;
+       size_t len;
+
+       if (dmt->type == DM_DEVICE_CREATE) {
+               log_error("Name \"%s\" invalid. It contains \"/\".", path);
+               return 0;
+       }
+
+       if (!stat(path, &st1)) {
+               /*
+                * Found directly.
+                * If supplied path points to same device as last component
+                * under /dev/mapper, use that name directly.  
+                */
+               if (dm_snprintf(buf, sizeof(buf), "%s/%s", _dm_dir, name) == -1) {
+                       log_error("Couldn't create path for %s", name);
+                       return 0;
+               }
+
+               if (!stat(buf, &st2) && (st1.st_rdev == st2.st_rdev))
+                       final_name = name;
+       } else {
+               /* Not found. */
+               /* If there is exactly one '/' try a prefix of /dev */
+               if ((len = strlen(path)) < 3 || path[0] == '/' ||
+                   dm_count_chars(path, len, '/') != 1) {
+                       log_error("Device %s not found", path);
+                       return 0;
+               }
+               if (dm_snprintf(buf, sizeof(buf), "%s/../%s", _dm_dir, path) == -1) {
+                       log_error("Couldn't create /dev path for %s", path);
+                       return 0;
+               }
+               if (stat(buf, &st1)) {
+                       log_error("Device %s not found", path);
+                       return 0;
+               }
+               /* Found */
+       }
+
+       /*
+        * If we don't have the dm name yet, Call _find_dm_name_of_device() to
+        * scan _dm_dir for a match.
+        */
+       if (!final_name) {
+               if (_find_dm_name_of_device(st1.st_rdev, buf, sizeof(buf)))
+                       final_name = buf;
+               else {
+                       log_error("Device %s not found", name);
+                       return 0;
+               }
+       }
+
+       /* This is an already existing path - do not mangle! */
+       return _dm_task_set_name(dmt, final_name, DM_STRING_MANGLING_NONE);
+}
+
+int dm_task_set_name(struct dm_task *dmt, const char *name)
+{
+       char *pos;
+
+       /* Path supplied for existing device? */
+       if ((pos = strrchr(name, '/')))
+               return _dm_task_set_name_from_path(dmt, name, pos + 1);
+
+       return _dm_task_set_name(dmt, name, dm_get_name_mangling_mode());
+}
+
+const char *dm_task_get_name(const struct dm_task *dmt)
+{
+       return (dmt->dmi.v4->name);
+}
+
+static char *_task_get_string_mangled(const char *str, const char *str_name,
+                                     char *buf, size_t buf_size,
+                                     dm_string_mangling_t mode)
+{
+       char *rs;
+       int r;
+
+       if ((r = mangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0)
+               return NULL;
+
+       if (!(rs = r ? dm_strdup(buf) : dm_strdup(str)))
+               log_error("_task_get_string_mangled: dm_strdup failed");
+
+       return rs;
+}
+
+static char *_task_get_string_unmangled(const char *str, const char *str_name,
+                                       char *buf, size_t buf_size,
+                                       dm_string_mangling_t mode)
+{
+       char *rs;
+       int r = 0;
+
+       /*
+        * Unless the mode used is 'none', the string
+        * is *already* unmangled on ioctl return!
+        */
+       if (mode == DM_STRING_MANGLING_NONE &&
+           (r = unmangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0)
+               return NULL;
+
+       if (!(rs = r ? dm_strdup(buf) : dm_strdup(str)))
+               log_error("_task_get_string_unmangled: dm_strdup failed");
+
+       return rs;
+}
+
+char *dm_task_get_name_mangled(const struct dm_task *dmt)
+{
+       const char *s = dm_task_get_name(dmt);
+       char buf[DM_NAME_LEN];
+       char *rs;
+
+       if (!(rs = _task_get_string_mangled(s, "name", buf, sizeof(buf), dm_get_name_mangling_mode())))
+               log_error("Failed to mangle device name \"%s\".", s);
+
+       return rs;
+}
+
+char *dm_task_get_name_unmangled(const struct dm_task *dmt)
+{
+       const char *s = dm_task_get_name(dmt);
+       char buf[DM_NAME_LEN];
+       char *rs;
+
+       if (!(rs = _task_get_string_unmangled(s, "name", buf, sizeof(buf), dm_get_name_mangling_mode())))
+               log_error("Failed to unmangle device name \"%s\".", s);
+
+       return rs;
+}
+
+const char *dm_task_get_uuid(const struct dm_task *dmt)
+{
+       return (dmt->dmi.v4->uuid);
+}
+
+char *dm_task_get_uuid_mangled(const struct dm_task *dmt)
+{
+       const char *s = dm_task_get_uuid(dmt);
+       char buf[DM_UUID_LEN];
+       char *rs;
+
+       if (!(rs = _task_get_string_mangled(s, "UUID", buf, sizeof(buf), dm_get_name_mangling_mode())))
+               log_error("Failed to mangle device uuid \"%s\".", s);
+
+       return rs;
+}
+
+char *dm_task_get_uuid_unmangled(const struct dm_task *dmt)
+{
+       const char *s = dm_task_get_uuid(dmt);
+       char buf[DM_UUID_LEN];
+       char *rs;
+
+       if (!(rs = _task_get_string_unmangled(s, "UUID", buf, sizeof(buf), dm_get_name_mangling_mode())))
+               log_error("Failed to unmangle device uuid \"%s\".", s);
+
+       return rs;
+}
+
+int dm_task_set_newname(struct dm_task *dmt, const char *newname)
+{
+       dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode();
+       char mangled_name[DM_NAME_LEN];
+       int r = 0;
+
+       if (strchr(newname, '/')) {
+               log_error("Name \"%s\" invalid. It contains \"/\".", newname);
+               return 0;
+       }
+
+       if (strlen(newname) >= DM_NAME_LEN) {
+               log_error("Name \"%s\" too long", newname);
+               return 0;
+       }
+
+       if (!*newname) {
+               log_error("Non empty new name is required.");
+               return 0;
+       }
+
+       if (!check_multiple_mangled_string_allowed(newname, "new name", mangling_mode))
+               return_0;
+
+       if (mangling_mode != DM_STRING_MANGLING_NONE &&
+           (r = mangle_string(newname, "new name", strlen(newname), mangled_name,
+                              sizeof(mangled_name), mangling_mode)) < 0) {
+               log_error("Failed to mangle new device name \"%s\"", newname);
+               return 0;
+       }
+
+       if (r) {
+               log_debug_activation("New device name mangled [%s]: %s --> %s",
+                                    mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex",
+                                    newname, mangled_name);
+               newname = mangled_name;
+       }
+
+       dm_free(dmt->newname);
+       if (!(dmt->newname = dm_strdup(newname))) {
+               log_error("dm_task_set_newname: strdup(%s) failed", newname);
+               return 0;
+       }
+
+       dmt->new_uuid = 0;
+
+       return 1;
+}
+
+int dm_task_set_uuid(struct dm_task *dmt, const char *uuid)
+{
+       char mangled_uuid[DM_UUID_LEN];
+       dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode();
+       int r = 0;
+
+       dm_free(dmt->uuid);
+       dmt->uuid = NULL;
+       dm_free(dmt->mangled_uuid);
+       dmt->mangled_uuid = NULL;
+
+       if (!check_multiple_mangled_string_allowed(uuid, "UUID", mangling_mode))
+               return_0;
+
+       if (mangling_mode != DM_STRING_MANGLING_NONE &&
+           (r = mangle_string(uuid, "UUID", strlen(uuid), mangled_uuid,
+                              sizeof(mangled_uuid), mangling_mode)) < 0) {
+               log_error("Failed to mangle device uuid \"%s\".", uuid);
+               return 0;
+       }
+
+       if (r) {
+               log_debug_activation("Device uuid mangled [%s]: %s --> %s",
+                                    mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex",
+                                    uuid, mangled_uuid);
+
+               if (!(dmt->mangled_uuid = dm_strdup(mangled_uuid))) {
+                       log_error("dm_task_set_uuid: dm_strdup(%s) failed", mangled_uuid);
+                       return 0;
+               }
+       }
+
+       if (!(dmt->uuid = dm_strdup(uuid))) {
+               log_error("dm_task_set_uuid: strdup(%s) failed", uuid);
+               return 0;
+       }
+
+       return 1;
+}
+
+int dm_task_set_major(struct dm_task *dmt, int major)
+{
+       dmt->major = major;
+       dmt->allow_default_major_fallback = 0;
+
+       return 1;
+}
+
+int dm_task_set_minor(struct dm_task *dmt, int minor)
+{
+       dmt->minor = minor;
+
+       return 1;
+}
+
+int dm_task_set_major_minor(struct dm_task *dmt, int major, int minor,
+                           int allow_default_major_fallback)
+{
+       dmt->major = major;
+       dmt->minor = minor;
+       dmt->allow_default_major_fallback = allow_default_major_fallback;
+
+       return 1;
+}
+
+int dm_task_set_uid(struct dm_task *dmt, uid_t uid)
+{
+       dmt->uid = uid;
+
+       return 1;
+}
+
+int dm_task_set_gid(struct dm_task *dmt, gid_t gid)
+{
+       dmt->gid = gid;
+
+       return 1;
+}
+
+int dm_task_set_mode(struct dm_task *dmt, mode_t mode)
+{
+       dmt->mode = mode;
+
+       return 1;
+}
+
+int dm_task_enable_checks(struct dm_task *dmt)
+{
+       dmt->enable_checks = 1;
+
+       return 1;
+}
+
+int dm_task_add_target(struct dm_task *dmt, uint64_t start, uint64_t size,
+                      const char *ttype, const char *params)
+{
+       struct target *t = create_target(start, size, ttype, params);
+       if (!t)
+               return_0;
+
+       if (!dmt->head)
+               dmt->head = dmt->tail = t;
+       else {
+               dmt->tail->next = t;
+               dmt->tail = t;
+       }
+
+       return 1;
+}
+
+#ifdef HAVE_SELINUX
+static int _selabel_lookup(const char *path, mode_t mode,
+                          security_context_t *scontext)
+{
+#ifdef HAVE_SELINUX_LABEL_H
+       if (!_selabel_handle &&
+           !(_selabel_handle = selabel_open(SELABEL_CTX_FILE, NULL, 0))) {
+               log_error("selabel_open failed: %s", strerror(errno));
+               return 0;
+       }
+
+       if (selabel_lookup(_selabel_handle, scontext, path, mode)) {
+               log_debug_activation("selabel_lookup failed for %s: %s",
+                                    path, strerror(errno));
+               return 0;
+       }
+#else
+       if (matchpathcon(path, mode, scontext)) {
+               log_debug_activation("matchpathcon failed for %s: %s",
+                                    path, strerror(errno));
+               return 0;
+       }
+#endif
+       return 1;
+}
+#endif
+
+#ifdef HAVE_SELINUX
+static int _is_selinux_enabled(void)
+{
+       static int _tested = 0;
+       static int _enabled;
+
+       if (!_tested) {
+               _tested = 1;
+               _enabled = is_selinux_enabled();
+       }
+
+       return _enabled;
+}
+#endif
+
+int dm_prepare_selinux_context(const char *path, mode_t mode)
+{
+#ifdef HAVE_SELINUX
+       security_context_t scontext = NULL;
+
+       if (_is_selinux_enabled() <= 0)
+               return 1;
+
+       if (path) {
+               if (!_selabel_lookup(path, mode, &scontext))
+                       return_0;
+
+               log_debug_activation("Preparing SELinux context for %s to %s.", path, scontext);
+       }
+       else
+               log_debug_activation("Resetting SELinux context to default value.");
+
+       if (setfscreatecon(scontext) < 0) {
+               log_sys_error("setfscreatecon", (path ? : "SELinux context reset"));
+               freecon(scontext);
+               return 0;
+       }
+
+       freecon(scontext);
+#endif
+       return 1;
+}
+
+int dm_set_selinux_context(const char *path, mode_t mode)
+{
+#ifdef HAVE_SELINUX
+       security_context_t scontext = NULL;
+
+       if (_is_selinux_enabled() <= 0)
+               return 1;
+
+       if (!_selabel_lookup(path, mode, &scontext))
+               return_0;
+
+       log_debug_activation("Setting SELinux context for %s to %s.", path, scontext);
+
+       if ((lsetfilecon(path, scontext) < 0) && (errno != ENOTSUP)) {
+               log_sys_error("lsetfilecon", path);
+               freecon(scontext);
+               return 0;
+       }
+
+       freecon(scontext);
+#endif
+       return 1;
+}
+
+void selinux_release(void)
+{
+#ifdef HAVE_SELINUX_LABEL_H
+       if (_selabel_handle)
+               selabel_close(_selabel_handle);
+       _selabel_handle = NULL;
+#endif
+}
+
+static int _warn_if_op_needed(int warn_if_udev_failed)
+{
+    return warn_if_udev_failed && dm_udev_get_sync_support() && dm_udev_get_checking();
+}
+
+static int _add_dev_node(const char *dev_name, uint32_t major, uint32_t minor,
+                        uid_t uid, gid_t gid, mode_t mode, int warn_if_udev_failed)
+{
+       char path[PATH_MAX];
+       struct stat info;
+       dev_t dev = MKDEV((dev_t)major, (dev_t)minor);
+       mode_t old_mask;
+
+       if (!_build_dev_path(path, sizeof(path), dev_name))
+               return_0;
+
+       if (stat(path, &info) >= 0) {
+               if (!S_ISBLK(info.st_mode)) {
+                       log_error("A non-block device file at '%s' "
+                                 "is already present", path);
+                       return 0;
+               }
+
+               /* If right inode already exists we don't touch uid etc. */
+               if (info.st_rdev == dev)
+                       return 1;
+
+               if (unlink(path) < 0) {
+                       log_error("Unable to unlink device node for '%s'",
+                                 dev_name);
+                       return 0;
+               }
+       } else if (_warn_if_op_needed(warn_if_udev_failed))
+               log_warn("%s not set up by udev: Falling back to direct "
+                        "node creation.", path);
+
+       (void) dm_prepare_selinux_context(path, S_IFBLK);
+       old_mask = umask(0);
+
+       /* The node may already have been created by udev. So ignore EEXIST. */
+       if (mknod(path, S_IFBLK | mode, dev) < 0 && errno != EEXIST) {
+               log_error("%s: mknod for %s failed: %s", path, dev_name, strerror(errno));
+               umask(old_mask);
+               (void) dm_prepare_selinux_context(NULL, 0);
+               return 0;
+       }
+       umask(old_mask);
+       (void) dm_prepare_selinux_context(NULL, 0);
+
+       if (chown(path, uid, gid) < 0) {
+               log_sys_error("chown", path);
+               return 0;
+       }
+
+       log_debug_activation("Created %s", path);
+
+       return 1;
+}
+
+static int _rm_dev_node(const char *dev_name, int warn_if_udev_failed)
+{
+       char path[PATH_MAX];
+       struct stat info;
+
+       if (!_build_dev_path(path, sizeof(path), dev_name))
+               return_0;
+       if (lstat(path, &info) < 0)
+               return 1;
+       else if (_warn_if_op_needed(warn_if_udev_failed))
+               log_warn("Node %s was not removed by udev. "
+                        "Falling back to direct node removal.", path);
+
+       /* udev may already have deleted the node. Ignore ENOENT. */
+       if (unlink(path) < 0 && errno != ENOENT) {
+               log_error("Unable to unlink device node for '%s'", dev_name);
+               return 0;
+       }
+
+       log_debug_activation("Removed %s", path);
+
+       return 1;
+}
+
+static int _rename_dev_node(const char *old_name, const char *new_name,
+                           int warn_if_udev_failed)
+{
+       char oldpath[PATH_MAX];
+       char newpath[PATH_MAX];
+       struct stat info, info2;
+       struct stat *info_block_dev;
+
+       if (!_build_dev_path(oldpath, sizeof(oldpath), old_name) ||
+           !_build_dev_path(newpath, sizeof(newpath), new_name))
+               return_0;
+
+       if (lstat(newpath, &info) == 0) {
+               if (S_ISLNK(info.st_mode)) {
+                       if (stat(newpath, &info2) == 0)
+                               info_block_dev = &info2;
+                       else {
+                               log_sys_error("stat", newpath);
+                               return 0;
+                       }
+               } else
+                       info_block_dev = &info;
+
+               if (!S_ISBLK(info_block_dev->st_mode)) {
+                       log_error("A non-block device file at '%s' "
+                                 "is already present", newpath);
+                       return 0;
+               }
+               else if (_warn_if_op_needed(warn_if_udev_failed)) {
+                       if (lstat(oldpath, &info) < 0 &&
+                                errno == ENOENT)
+                               /* assume udev already deleted this */
+                               return 1;
+
+                       log_warn("The node %s should have been renamed to %s "
+                                "by udev but old node is still present. "
+                                "Falling back to direct old node removal.",
+                                oldpath, newpath);
+                       return _rm_dev_node(old_name, 0);
+               }
+
+               if (unlink(newpath) < 0) {
+                       if (errno == EPERM) {
+                               /* devfs, entry has already been renamed */
+                               return 1;
+                       }
+                       log_error("Unable to unlink device node for '%s'",
+                                 new_name);
+                       return 0;
+               }
+       }
+       else if (_warn_if_op_needed(warn_if_udev_failed))
+               log_warn("The node %s should have been renamed to %s "
+                        "by udev but new node is not present. "
+                        "Falling back to direct node rename.",
+                        oldpath, newpath);
+
+       /* udev may already have renamed the node. Ignore ENOENT. */
+       /* FIXME: when renaming to target mangling mode "none" with udev
+        * while there are some blacklisted characters in the node name,
+        * udev will remove the old_node, but fails to properly rename
+        * to new_node. The libdevmapper code tries to call
+        * rename(old_node,new_node), but that won't do anything
+        * since the old node is already removed by udev.
+        * For example renaming 'a\x20b' to 'a b':
+        *   - udev removes 'a\x20b'
+        *   - udev creates 'a' and 'b' (since it considers the ' ' as a delimiter
+        *   - libdevmapper checks udev has done the rename properly
+        *   - libdevmapper calls stat(new_node) and it does not see it
+        *   - libdevmapper calls rename(old_node,new_node)
+        *   - the rename is a NOP since the old_node does not exist anymore
+        *
+        * However, this situation is very rare - why would anyone need
+        * to rename to an unsupported mode??? So a fix for this would be
+        * just for completeness.
+        */
+       if (rename(oldpath, newpath) < 0 && errno != ENOENT) {
+               log_error("Unable to rename device node from '%s' to '%s'",
+                         old_name, new_name);
+               return 0;
+       }
+
+       log_debug_activation("Renamed %s to %s", oldpath, newpath);
+
+       return 1;
+}
+
+#ifdef __linux__
+static int _open_dev_node(const char *dev_name)
+{
+       int fd = -1;
+       char path[PATH_MAX];
+
+       if (!_build_dev_path(path, sizeof(path), dev_name))
+               return fd;
+
+       if ((fd = open(path, O_RDONLY, 0)) < 0)
+               log_sys_error("open", path);
+
+       return fd;
+}
+
+int get_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
+                           uint32_t *read_ahead)
+{
+       char buf[24];
+       int len;
+       int r = 1;
+       int fd;
+       long read_ahead_long;
+
+       /*
+        * If we know the device number, use sysfs if we can.
+        * Otherwise use BLKRAGET ioctl.
+        */
+       if (*_sysfs_dir && major != 0) {
+               if (dm_snprintf(_path0, sizeof(_path0), "%sdev/block/%" PRIu32
+                               ":%" PRIu32 "/bdi/read_ahead_kb", _sysfs_dir,
+                               major, minor) < 0) {
+                       log_error("Failed to build sysfs_path.");
+                       return 0;
+               }
+
+               if ((fd = open(_path0, O_RDONLY, 0)) != -1) {
+                       /* Reading from sysfs, expecting number\n */
+                       if ((len = read(fd, buf, sizeof(buf) - 1)) < 1) {
+                               log_sys_error("read", _path0);
+                               r = 0;
+                       } else {
+                               buf[len] = 0; /* kill \n and ensure \0 */
+                               *read_ahead = atoi(buf) * 2;
+                               log_debug_activation("%s (%d:%d): read ahead is %" PRIu32,
+                                                    dev_name, major, minor, *read_ahead);
+                       }
+
+                       if (close(fd))
+                               log_sys_debug("close", _path0);
+
+                       return r;
+               }
+
+               log_sys_debug("open", _path0);
+               /* Fall back to use dev_name */
+       }
+
+       /*
+        * Open/close dev_name may block the process
+        * (i.e. overfilled thin pool volume)
+        */
+       if (!*dev_name) {
+               log_error("Empty device name passed to BLKRAGET");
+               return 0;
+       }
+
+       if ((fd = _open_dev_node(dev_name)) < 0)
+               return_0;
+
+       if (ioctl(fd, BLKRAGET, &read_ahead_long)) {
+               log_sys_error("BLKRAGET", dev_name);
+               *read_ahead = 0;
+               r = 0;
+       } else {
+               *read_ahead = (uint32_t) read_ahead_long;
+               log_debug_activation("%s: read ahead is %" PRIu32, dev_name, *read_ahead);
+       }
+
+       if (close(fd))
+               log_sys_debug("close", dev_name);
+
+       return r;
+}
+
+static int _set_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
+                          uint32_t read_ahead)
+{
+       char buf[24];
+       int len;
+       int r = 1;
+       int fd;
+       long read_ahead_long = (long) read_ahead;
+
+       log_debug_activation("%s (%d:%d): Setting read ahead to %" PRIu32, dev_name,
+                            major, minor, read_ahead);
+
+       /*
+        * If we know the device number, use sysfs if we can.
+        * Otherwise use BLKRASET ioctl. RA is set after resume.
+        */
+       if (*_sysfs_dir && major != 0) {
+               if (dm_snprintf(_path0, sizeof(_path0), "%sdev/block/%" PRIu32
+                               ":%" PRIu32 "/bdi/read_ahead_kb",
+                               _sysfs_dir, major, minor) < 0) {
+                       log_error("Failed to build sysfs_path.");
+                       return 0;
+               }
+
+               /* Sysfs is kB based, round up to kB */
+               if ((len = dm_snprintf(buf, sizeof(buf), FMTu32,
+                                      (read_ahead + 1) / 2)) < 0) {
+                       log_error("Failed to build size in kB.");
+                       return 0;
+               }
+
+               if ((fd = open(_path0, O_WRONLY, 0)) != -1) {
+                       if (write(fd, buf, len) < len) {
+                               log_sys_error("write", _path0);
+                               r = 0;
+                       }
+
+                       if (close(fd))
+                               log_sys_debug("close", _path0);
+
+                       return r;
+               }
+
+               log_sys_debug("open", _path0);
+               /* Fall back to use dev_name */
+       }
+
+       if (!*dev_name) {
+               log_error("Empty device name passed to BLKRAGET");
+               return 0;
+       }
+
+       if ((fd = _open_dev_node(dev_name)) < 0)
+               return_0;
+
+       if (ioctl(fd, BLKRASET, read_ahead_long)) {
+               log_sys_error("BLKRASET", dev_name);
+               r = 0;
+       }
+
+       if (close(fd))
+               log_sys_debug("close", dev_name);
+
+       return r;
+}
+
+static int _set_dev_node_read_ahead(const char *dev_name,
+                                   uint32_t major, uint32_t minor,
+                                   uint32_t read_ahead, uint32_t read_ahead_flags)
+{
+       uint32_t current_read_ahead;
+
+       if (read_ahead == DM_READ_AHEAD_AUTO)
+               return 1;
+
+       if (read_ahead == DM_READ_AHEAD_NONE)
+               read_ahead = 0;
+
+       if (read_ahead_flags & DM_READ_AHEAD_MINIMUM_FLAG) {
+               if (!get_dev_node_read_ahead(dev_name, major, minor, &current_read_ahead))
+                       return_0;
+
+               if (current_read_ahead >= read_ahead) {
+                       log_debug_activation("%s: retaining kernel read ahead of %" PRIu32
+                                 " (requested %" PRIu32 ")",           
+                                 dev_name, current_read_ahead, read_ahead);
+                       return 1;
+               }
+       }
+
+       return _set_read_ahead(dev_name, major, minor, read_ahead);
+}
+
+#else
+
+int get_dev_node_read_ahead(const char *dev_name, uint32_t *read_ahead)
+{
+       *read_ahead = 0;
+
+       return 1;
+}
+
+static int _set_dev_node_read_ahead(const char *dev_name,
+                                   uint32_t major, uint32_t minor,
+                                   uint32_t read_ahead, uint32_t read_ahead_flags)
+{
+       return 1;
+}
+#endif
+
+typedef enum {
+       NODE_ADD,
+       NODE_DEL,
+       NODE_RENAME,
+       NODE_READ_AHEAD,
+       NUM_NODES
+} node_op_t;
+
+static int _do_node_op(node_op_t type, const char *dev_name, uint32_t major,
+                      uint32_t minor, uid_t uid, gid_t gid, mode_t mode,
+                      const char *old_name, uint32_t read_ahead,
+                      uint32_t read_ahead_flags, int warn_if_udev_failed)
+{
+       switch (type) {
+       case NODE_ADD:
+               return _add_dev_node(dev_name, major, minor, uid, gid,
+                                    mode, warn_if_udev_failed);
+       case NODE_DEL:
+               return _rm_dev_node(dev_name, warn_if_udev_failed);
+       case NODE_RENAME:
+               return _rename_dev_node(old_name, dev_name, warn_if_udev_failed);
+       case NODE_READ_AHEAD:
+               return _set_dev_node_read_ahead(dev_name, major, minor,
+                                               read_ahead, read_ahead_flags);
+       default:
+               ; /* NOTREACHED */
+       }
+
+       return 1;
+}
+
+static DM_LIST_INIT(_node_ops);
+static int _count_node_ops[NUM_NODES];
+
+struct node_op_parms {
+       struct dm_list list;
+       node_op_t type;
+       char *dev_name;
+       uint32_t major;
+       uint32_t minor;
+       uid_t uid;
+       gid_t gid;
+       mode_t mode;
+       uint32_t read_ahead;
+       uint32_t read_ahead_flags;
+       char *old_name;
+       int warn_if_udev_failed;
+       unsigned rely_on_udev;
+       char names[0];
+};
+
+static void _store_str(char **pos, char **ptr, const char *str)
+{
+       strcpy(*pos, str);
+       *ptr = *pos;
+       *pos += strlen(*ptr) + 1;
+}
+
+static void _del_node_op(struct node_op_parms *nop)
+{
+       _count_node_ops[nop->type]--;
+       dm_list_del(&nop->list);
+       dm_free(nop);
+
+}
+
+/* Check if there is other the type of node operation stacked */
+static int _other_node_ops(node_op_t type)
+{
+       unsigned i;
+
+       for (i = 0; i < NUM_NODES; i++)
+               if (type != i && _count_node_ops[i])
+                       return 1;
+       return 0;
+}
+
+static void _log_node_op(const char *action_str, struct node_op_parms *nop)
+{
+       const char *rely = nop->rely_on_udev ? " [trust_udev]" : "" ;
+       const char *verify = nop->warn_if_udev_failed ? " [verify_udev]" : "";
+
+       switch (nop->type) {
+       case NODE_ADD:
+               log_debug_activation("%s: %s NODE_ADD (%" PRIu32 ",%" PRIu32 ") %u:%u 0%o%s%s",
+                                    nop->dev_name, action_str, nop->major, nop->minor, nop->uid, nop->gid, nop->mode,
+                                    rely, verify);
+               break;
+       case NODE_DEL:
+               log_debug_activation("%s: %s NODE_DEL%s%s", nop->dev_name, action_str, rely, verify);
+               break;
+       case NODE_RENAME:
+               log_debug_activation("%s: %s NODE_RENAME to %s%s%s", nop->old_name, action_str, nop->dev_name, rely, verify);
+               break;
+       case NODE_READ_AHEAD:
+               log_debug_activation("%s: %s NODE_READ_AHEAD %" PRIu32 " (flags=%" PRIu32 ")%s%s",
+                                    nop->dev_name, action_str, nop->read_ahead, nop->read_ahead_flags, rely, verify);
+               break;
+       default:
+               ; /* NOTREACHED */
+       }
+}
+
+static int _stack_node_op(node_op_t type, const char *dev_name, uint32_t major,
+                         uint32_t minor, uid_t uid, gid_t gid, mode_t mode,
+                         const char *old_name, uint32_t read_ahead,
+                         uint32_t read_ahead_flags, int warn_if_udev_failed,
+                         unsigned rely_on_udev)
+{
+       struct node_op_parms *nop;
+       struct dm_list *noph, *nopht;
+       size_t len = strlen(dev_name) + strlen(old_name) + 2;
+       char *pos;
+
+       /*
+        * Note: warn_if_udev_failed must have valid content
+        */
+       if ((type == NODE_DEL) && _other_node_ops(type))
+               /*
+                * Ignore any outstanding operations on the node if deleting it.
+                */
+               dm_list_iterate_safe(noph, nopht, &_node_ops) {
+                       nop = dm_list_item(noph, struct node_op_parms);
+                       if (!strcmp(dev_name, nop->dev_name)) {
+                               _log_node_op("Unstacking", nop);
+                               _del_node_op(nop);
+                               if (!_other_node_ops(type))
+                                       break; /* no other non DEL ops */
+                       }
+               }
+       else if ((type == NODE_ADD) && _count_node_ops[NODE_DEL])
+               /*
+                * Ignore previous DEL operation on added node.
+                * (No other operations for this device then DEL could be stacked here).
+                */
+               dm_list_iterate_safe(noph, nopht, &_node_ops) {
+                       nop = dm_list_item(noph, struct node_op_parms);
+                       if ((nop->type == NODE_DEL) &&
+                           !strcmp(dev_name, nop->dev_name)) {
+                               _log_node_op("Unstacking", nop);
+                               _del_node_op(nop);
+                               break; /* no other DEL ops */
+                       }
+               }
+       else if (type == NODE_RENAME)
+               /*
+                * Ignore any outstanding operations if renaming it.
+                *
+                * Currently  RENAME operation happens through 'suspend -> resume'.
+                * On 'resume' device is added with read_ahead settings, so it is
+                * safe to remove any stacked ADD, RENAME, READ_AHEAD operation
+                * There cannot be any DEL operation on the renamed device.
+                */
+               dm_list_iterate_safe(noph, nopht, &_node_ops) {
+                       nop = dm_list_item(noph, struct node_op_parms);
+                       if (!strcmp(old_name, nop->dev_name)) {
+                               _log_node_op("Unstacking", nop);
+                               _del_node_op(nop);
+                       }
+               }
+       else if (type == NODE_READ_AHEAD) {
+               /* udev doesn't process readahead */
+               rely_on_udev = 0;
+               warn_if_udev_failed = 0;
+       }
+
+       if (!(nop = dm_malloc(sizeof(*nop) + len))) {
+               log_error("Insufficient memory to stack mknod operation");
+               return 0;
+       }
+
+       pos = nop->names;
+       nop->type = type;
+       nop->major = major;
+       nop->minor = minor;
+       nop->uid = uid;
+       nop->gid = gid;
+       nop->mode = mode;
+       nop->read_ahead = read_ahead;
+       nop->read_ahead_flags = read_ahead_flags;
+       nop->rely_on_udev = rely_on_udev;
+
+       /*
+        * Clear warn_if_udev_failed if rely_on_udev is set.  It doesn't get
+        * checked in this case - this just removes the flag from log messages.
+        */
+       nop->warn_if_udev_failed = rely_on_udev ? 0 : warn_if_udev_failed;
+
+       _store_str(&pos, &nop->dev_name, dev_name);
+       _store_str(&pos, &nop->old_name, old_name);
+
+       _count_node_ops[type]++;
+       dm_list_add(&_node_ops, &nop->list);
+
+       _log_node_op("Stacking", nop);
+
+       return 1;
+}
+
+static void _pop_node_ops(void)
+{
+       struct dm_list *noph, *nopht;
+       struct node_op_parms *nop;
+
+       dm_list_iterate_safe(noph, nopht, &_node_ops) {
+               nop = dm_list_item(noph, struct node_op_parms);
+               if (!nop->rely_on_udev) {
+                       _log_node_op("Processing", nop);
+                       _do_node_op(nop->type, nop->dev_name, nop->major, nop->minor,
+                                   nop->uid, nop->gid, nop->mode, nop->old_name,
+                                   nop->read_ahead, nop->read_ahead_flags,
+                                   nop->warn_if_udev_failed);
+               } else
+                       _log_node_op("Skipping", nop);
+               _del_node_op(nop);
+       }
+}
+
+int add_dev_node(const char *dev_name, uint32_t major, uint32_t minor,
+                uid_t uid, gid_t gid, mode_t mode, int check_udev, unsigned rely_on_udev)
+{
+       return _stack_node_op(NODE_ADD, dev_name, major, minor, uid,
+                             gid, mode, "", 0, 0, check_udev, rely_on_udev);
+}
+
+int rename_dev_node(const char *old_name, const char *new_name, int check_udev, unsigned rely_on_udev)
+{
+       return _stack_node_op(NODE_RENAME, new_name, 0, 0, 0,
+                             0, 0, old_name, 0, 0, check_udev, rely_on_udev);
+}
+
+int rm_dev_node(const char *dev_name, int check_udev, unsigned rely_on_udev)
+{
+       return _stack_node_op(NODE_DEL, dev_name, 0, 0, 0,
+                             0, 0, "", 0, 0, check_udev, rely_on_udev);
+}
+
+int set_dev_node_read_ahead(const char *dev_name,
+                            uint32_t major, uint32_t minor,
+                           uint32_t read_ahead, uint32_t read_ahead_flags)
+{
+       if (read_ahead == DM_READ_AHEAD_AUTO)
+               return 1;
+
+       return _stack_node_op(NODE_READ_AHEAD, dev_name, major, minor, 0, 0,
+                              0, "", read_ahead, read_ahead_flags, 0, 0);
+}
+
+void update_devs(void)
+{
+       _pop_node_ops();
+}
+
+static int _canonicalize_and_set_dir(const char *src, const char *suffix, size_t max_len, char *dir)
+{
+       size_t len;
+       const char *slash;
+
+       if (*src != '/') {
+               log_debug_activation("Invalid directory value, %s: "
+                                    "not an absolute name.", src);
+               return 0;
+       }
+
+       len = strlen(src);
+       slash = src[len-1] == '/' ? "" : "/";
+
+       if (dm_snprintf(dir, max_len, "%s%s%s", src, slash, suffix ? suffix : "") < 0) {
+               log_debug_activation("Invalid directory value, %s: name too long.", src);
+               return 0;
+       }
+
+       return 1;
+}
+
+int dm_set_dev_dir(const char *dev_dir)
+{
+       return _canonicalize_and_set_dir(dev_dir, DM_DIR, sizeof _dm_dir, _dm_dir);
+}
+
+const char *dm_dir(void)
+{
+       return _dm_dir;
+}
+
+int dm_set_sysfs_dir(const char *sysfs_dir)
+{
+       if (!sysfs_dir || !*sysfs_dir) {
+               _sysfs_dir[0] = '\0';
+               return 1;
+       }
+
+       return _canonicalize_and_set_dir(sysfs_dir, NULL, sizeof _sysfs_dir, _sysfs_dir);
+}
+
+const char *dm_sysfs_dir(void)
+{
+       return _sysfs_dir;
+}
+
+/*
+ * Replace existing uuid_prefix provided it isn't too long.
+ */
+int dm_set_uuid_prefix(const char *uuid_prefix)
+{
+       if (!uuid_prefix)
+               return_0;
+
+       if (strlen(uuid_prefix) > DM_MAX_UUID_PREFIX_LEN) {
+               log_error("New uuid prefix %s too long.", uuid_prefix);
+               return 0;
+       }
+
+       strcpy(_default_uuid_prefix, uuid_prefix);
+
+       return 1;
+}
+
+const char *dm_uuid_prefix(void)
+{
+       return _default_uuid_prefix;
+}
+
+static int _is_octal(int a)
+{
+       return (((a) & ~7) == '0');
+}
+
+/* Convert mangled mountinfo into normal ASCII string */
+static void _unmangle_mountinfo_string(const char *src, char *buf)
+{
+       while (*src) {
+               if ((*src == '\\') &&
+                   _is_octal(src[1]) && _is_octal(src[2]) && _is_octal(src[3])) {
+                       *buf++ = 64 * (src[1] & 7) + 8 * (src[2] & 7) + (src[3] & 7);
+                       src += 4;
+               } else
+                       *buf++ = *src++;
+       }
+       *buf = '\0';
+}
+
+/* Parse one line of mountinfo and unmangled target line */
+static int _mountinfo_parse_line(const char *line, unsigned *maj, unsigned *min, char *buf)
+{
+       char root[PATH_MAX + 1]; /* sscanf needs extra '\0' */
+       char target[PATH_MAX + 1];
+       char *devmapper;
+       struct dm_task *dmt;
+       struct dm_info info;
+       unsigned i;
+
+       /* TODO: maybe detect availability of  %ms  glib support ? */
+       if (sscanf(line, "%*u %*u %u:%u %" DM_TO_STRING(PATH_MAX)
+                  "s %" DM_TO_STRING(PATH_MAX) "s",
+                  maj, min, root, target) < 4) {
+               log_error("Failed to parse mountinfo line.");
+               return 0;
+       }
+
+       /* btrfs fakes device numbers, but there is still /dev/mapper name
+        * placed in mountinfo, so try to detect proper major:minor via this */
+       if (*maj == 0 && (devmapper = strstr(line, "/dev/mapper/"))) {
+               if (!(dmt = dm_task_create(DM_DEVICE_INFO))) {
+                       log_error("Mount info task creation failed.");
+                       return 0;
+               }
+               devmapper += 12; /* skip fixed prefix */
+               for (i = 0; devmapper[i] && devmapper[i] != ' ' && i < sizeof(root); ++i)
+                       root[i] = devmapper[i];
+               root[i] = 0;
+               _unmangle_mountinfo_string(root, buf);
+               buf[DM_NAME_LEN] = 0; /* cut away */
+
+               if (dm_task_set_name(dmt, buf) &&
+                   dm_task_no_open_count(dmt) &&
+                   dm_task_run(dmt) &&
+                   dm_task_get_info(dmt, &info)) {
+                       log_debug("Replacing mountinfo device (%u:%u) with matching DM device %s (%u:%u).",
+                                 *maj, *min, buf, info.major, info.minor);
+                       *maj = info.major;
+                       *min = info.minor;
+               }
+               dm_task_destroy(dmt);
+       }
+
+       _unmangle_mountinfo_string(target, buf);
+
+       return 1;
+}
+
+/*
+ * Function to operate on individal mountinfo line,
+ * minor, major and mount target are parsed and unmangled
+ */
+int dm_mountinfo_read(dm_mountinfo_line_callback_fn read_fn, void *cb_data)
+{
+       FILE *minfo;
+       char buffer[2 * PATH_MAX];
+       char target[PATH_MAX];
+       unsigned maj, min;
+       int r = 1;
+
+       if (!(minfo = fopen(_mountinfo, "r"))) {
+               if (errno != ENOENT)
+                       log_sys_error("fopen", _mountinfo);
+               else
+                       log_sys_debug("fopen", _mountinfo);
+               return 0;
+       }
+
+       while (!feof(minfo) && fgets(buffer, sizeof(buffer), minfo))
+               if (!_mountinfo_parse_line(buffer, &maj, &min, target) ||
+                   !read_fn(buffer, maj, min, target, cb_data)) {
+                       stack;
+                       r = 0;
+                       break;
+               }
+
+       if (fclose(minfo))
+               log_sys_error("fclose", _mountinfo);
+
+       return r;
+}
+
+static int _sysfs_get_dm_name(uint32_t major, uint32_t minor, char *buf, size_t buf_size)
+{
+       char *sysfs_path, *temp_buf = NULL;
+       FILE *fp = NULL;
+       int r = 0;
+       size_t len;
+
+       if (!(sysfs_path = dm_malloc(PATH_MAX)) ||
+           !(temp_buf = dm_malloc(PATH_MAX))) {
+               log_error("_sysfs_get_dm_name: failed to allocate temporary buffers");
+               goto bad;
+       }
+
+       if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32 ":%" PRIu32
+                       "/dm/name", _sysfs_dir, major, minor) < 0) {
+               log_error("_sysfs_get_dm_name: dm_snprintf failed");
+               goto bad;
+       }
+
+       if (!(fp = fopen(sysfs_path, "r"))) {
+               if (errno != ENOENT)
+                       log_sys_error("fopen", sysfs_path);
+               else
+                       log_sys_debug("fopen", sysfs_path);
+               goto bad;
+       }
+
+       if (!fgets(temp_buf, PATH_MAX, fp)) {
+               log_sys_error("fgets", sysfs_path);
+               goto bad;
+       }
+
+       len = strlen(temp_buf);
+
+       if (len > buf_size) {
+               log_error("_sysfs_get_dm_name: supplied buffer too small");
+               goto bad;
+       }
+
+       temp_buf[len ? len - 1 : 0] = '\0'; /* \n */
+       strcpy(buf, temp_buf);
+       r = 1;
+bad:
+       if (fp && fclose(fp))
+               log_sys_error("fclose", sysfs_path);
+
+       dm_free(temp_buf);
+       dm_free(sysfs_path);
+
+       return r;
+}
+
+static int _sysfs_get_kernel_name(uint32_t major, uint32_t minor, char *buf, size_t buf_size)
+{
+       char *name, *sysfs_path, *temp_buf = NULL;
+       ssize_t size;
+       size_t len;
+       int r = 0;
+
+       if (!(sysfs_path = dm_malloc(PATH_MAX)) ||
+           !(temp_buf = dm_malloc(PATH_MAX))) {
+               log_error("_sysfs_get_kernel_name: failed to allocate temporary buffers");
+               goto bad;
+       }
+
+       if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32 ":%" PRIu32,
+                       _sysfs_dir, major, minor) < 0) {
+               log_error("_sysfs_get_kernel_name: dm_snprintf failed");
+               goto bad;
+       }
+
+       if ((size = readlink(sysfs_path, temp_buf, PATH_MAX - 1)) < 0) {
+               if (errno != ENOENT)
+                       log_sys_error("readlink", sysfs_path);
+               else
+                       log_sys_debug("readlink", sysfs_path);
+               goto bad;
+       }
+       temp_buf[size] = '\0';
+
+       if (!(name = strrchr(temp_buf, '/'))) {
+               log_error("Could not locate device kernel name in sysfs path %s", temp_buf);
+               goto bad;
+       }
+       name += 1;
+       len = size - (name - temp_buf) + 1;
+
+       if (len > buf_size) {
+               log_error("_sysfs_get_kernel_name: output buffer too small");
+               goto bad;
+       }
+
+       strcpy(buf, name);
+       r = 1;
+bad:
+       dm_free(temp_buf);
+       dm_free(sysfs_path);
+
+       return r;
+}
+
+int dm_device_get_name(uint32_t major, uint32_t minor, int prefer_kernel_name,
+                      char *buf, size_t buf_size)
+{
+       if (!*_sysfs_dir)
+               return 0;
+
+       /*
+        * device-mapper devices and prefer_kernel_name = 0
+        * get dm name by reading /sys/dev/block/major:minor/dm/name,
+        * fallback to _sysfs_get_kernel_name if not successful
+        */
+       if (dm_is_dm_major(major) && !prefer_kernel_name) {
+               if (_sysfs_get_dm_name(major, minor, buf, buf_size))
+                       return 1;
+               else
+                       stack;
+       }
+
+       /*
+        * non-device-mapper devices or prefer_kernel_name = 1
+        * get kernel name using readlink /sys/dev/block/major:minor -> .../dm-X
+        */
+       return _sysfs_get_kernel_name(major, minor, buf, buf_size);
+}
+
+int dm_device_has_holders(uint32_t major, uint32_t minor)
+{
+       char sysfs_path[PATH_MAX];
+       struct stat st;
+
+       if (!*_sysfs_dir)
+               return 0;
+
+       if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32
+                       ":%" PRIu32 "/holders", _sysfs_dir, major, minor) < 0) {
+               log_warn("WARNING: sysfs_path dm_snprintf failed.");
+               return 0;
+       }
+
+       if (stat(sysfs_path, &st)) {
+               if (errno != ENOENT)
+                       log_sys_debug("stat", sysfs_path);
+               return 0;
+       }
+
+       return !dm_is_empty_dir(sysfs_path);
+}
+
+static int _mounted_fs_on_device(const char *kernel_dev_name)
+{
+       char sysfs_path[PATH_MAX];
+       struct dirent *dirent;
+       DIR *d;
+       struct stat st;
+       int r = 0;
+
+       if (dm_snprintf(sysfs_path, PATH_MAX, "%sfs", _sysfs_dir) < 0) {
+               log_warn("WARNING: sysfs_path dm_snprintf failed.");
+               return 0;
+       }
+
+       if (!(d = opendir(sysfs_path))) {
+               if (errno != ENOENT)
+                       log_sys_debug("opendir", sysfs_path);
+               return 0;
+       }
+
+       while ((dirent = readdir(d))) {
+               if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+                       continue;
+
+               if (dm_snprintf(sysfs_path, PATH_MAX, "%sfs/%s/%s",
+                               _sysfs_dir, dirent->d_name, kernel_dev_name) < 0) {
+                       log_warn("WARNING: sysfs_path dm_snprintf failed.");
+                       break;
+               }
+
+               if (!stat(sysfs_path, &st)) {
+                       /* found! */
+                       r = 1;
+                       break;
+               }
+               else if (errno != ENOENT) {
+                       log_sys_debug("stat", sysfs_path);
+                       break;
+               }
+       }
+
+       if (closedir(d))
+               log_sys_debug("closedir", kernel_dev_name);
+
+       return r;
+}
+
+struct mountinfo_s {
+       unsigned maj;
+       unsigned min;
+       int mounted;
+};
+
+static int _device_has_mounted_fs(char *buffer, unsigned major, unsigned minor,
+                                 char *target, void *cb_data)
+{
+       struct mountinfo_s *data = cb_data;
+       char kernel_dev_name[PATH_MAX];
+
+       if ((major == data->maj) && (minor == data->min)) {
+               if (!dm_device_get_name(major, minor, 1, kernel_dev_name,
+                                       sizeof(kernel_dev_name))) {
+                       stack;
+                       *kernel_dev_name = '\0';
+               }
+               log_verbose("Device %s (%u:%u) appears to be mounted on %s.",
+                           kernel_dev_name, major, minor, target);
+               data->mounted = 1;
+       }
+
+       return 1;
+}
+
+int dm_device_has_mounted_fs(uint32_t major, uint32_t minor)
+{
+       char kernel_dev_name[PATH_MAX];
+       struct mountinfo_s data = {
+               .maj = major,
+               .min = minor,
+       };
+
+       if (!dm_mountinfo_read(_device_has_mounted_fs, &data))
+               stack;
+
+       if (data.mounted)
+               return 1;
+       /*
+        * TODO: Verify dm_mountinfo_read() is superset
+        * and remove sysfs check (namespaces)
+        */
+       /* Get kernel device name first */
+       if (!dm_device_get_name(major, minor, 1, kernel_dev_name, PATH_MAX))
+               return 0;
+
+       /* Check /sys/fs/<fs_name>/<kernel_dev_name> presence */
+       return _mounted_fs_on_device(kernel_dev_name);
+}
+
+int dm_mknodes(const char *name)
+{
+       struct dm_task *dmt;
+       int r = 0;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_MKNODES)))
+               return_0;
+
+       if (name && !dm_task_set_name(dmt, name))
+               goto out;
+
+       if (!dm_task_no_open_count(dmt))
+               goto out;
+
+       r = dm_task_run(dmt);
+
+out:
+       dm_task_destroy(dmt);
+       return r;
+}
+
+int dm_driver_version(char *version, size_t size)
+{
+       struct dm_task *dmt;
+       int r = 0;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_VERSION)))
+               return_0;
+
+       if (!dm_task_run(dmt))
+               log_error("Failed to get driver version");
+
+       if (!dm_task_get_driver_version(dmt, version, size))
+               goto out;
+
+       r = 1;
+
+out:
+       dm_task_destroy(dmt);
+       return r;
+}
+
+static void _set_cookie_flags(struct dm_task *dmt, uint16_t flags)
+{
+       if (!dm_cookie_supported())
+               return;
+
+       if (_udev_disabled) {
+               /*
+                * If udev is disabled, hardcode this functionality:
+                *   - we want libdm to create the nodes
+                *   - we don't want the /dev/mapper and any subsystem
+                *     related content to be created by udev if udev
+                *     rules are installed
+                */
+               flags &= ~DM_UDEV_DISABLE_LIBRARY_FALLBACK;
+               flags |= DM_UDEV_DISABLE_DM_RULES_FLAG | DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG;
+       }
+
+       dmt->event_nr = flags << DM_UDEV_FLAGS_SHIFT;
+}
+
+#ifndef UDEV_SYNC_SUPPORT
+void dm_udev_set_sync_support(int sync_with_udev)
+{
+}
+
+int dm_udev_get_sync_support(void)
+{
+       return 0;
+}
+
+void dm_udev_set_checking(int checking)
+{
+}
+
+int dm_udev_get_checking(void)
+{
+       return 0;
+}
+
+int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags)
+{
+       _set_cookie_flags(dmt, flags);
+
+       *cookie = 0;
+       dmt->cookie_set = 1;
+
+       return 1;
+}
+
+int dm_udev_complete(uint32_t cookie)
+{
+       return 1;
+}
+
+int dm_udev_wait(uint32_t cookie)
+{
+       update_devs();
+
+       return 1;
+}
+
+int dm_udev_wait_immediate(uint32_t cookie, int *ready)
+{
+       update_devs();
+       *ready = 1;
+
+       return 1;
+}
+
+#else          /* UDEV_SYNC_SUPPORT */
+
+static int _check_semaphore_is_supported(void)
+{
+       int maxid;
+       union semun arg;
+       struct seminfo seminfo;
+
+       arg.__buf = &seminfo;
+       maxid = semctl(0, 0, SEM_INFO, arg);
+
+       if (maxid < 0) {
+               log_warn("Kernel not configured for semaphores (System V IPC). "
+                        "Not using udev synchronisation code.");
+               return 0;
+       }
+
+       return 1;
+}
+
+static int _check_udev_is_running(void)
+{
+       struct udev *udev;
+       struct udev_queue *udev_queue;
+       int r;
+
+       if (!(udev = udev_new()))
+               goto_bad;
+
+       if (!(udev_queue = udev_queue_new(udev))) {
+               udev_unref(udev);
+               goto_bad;
+       }
+
+       if (!(r = udev_queue_get_udev_is_active(udev_queue)))
+               log_debug_activation("Udev is not running. "
+                                    "Not using udev synchronisation code.");
+
+       udev_queue_unref(udev_queue);
+       udev_unref(udev);
+
+       return r;
+
+bad:
+       log_error("Could not get udev state. Assuming udev is not running.");
+       return 0;
+}
+
+static void _check_udev_sync_requirements_once(void)
+{
+       if (_semaphore_supported < 0)
+               _semaphore_supported = _check_semaphore_is_supported();
+
+       if (_udev_running < 0) {
+               _udev_running = _check_udev_is_running();
+               if (_udev_disabled && _udev_running)
+                       log_warn("Udev is running and DM_DISABLE_UDEV environment variable is set. "
+                                "Bypassing udev, device-mapper library will manage device "
+                                "nodes in device directory.");
+       }
+}
+
+void dm_udev_set_sync_support(int sync_with_udev)
+{
+       _check_udev_sync_requirements_once();
+       _sync_with_udev = sync_with_udev;
+}
+
+int dm_udev_get_sync_support(void)
+{
+       _check_udev_sync_requirements_once();
+
+       return !_udev_disabled && _semaphore_supported &&
+               dm_cookie_supported() &&_udev_running && _sync_with_udev;
+}
+
+void dm_udev_set_checking(int checking)
+{
+       if ((_udev_checking = checking))
+               log_debug_activation("DM udev checking enabled");
+       else
+               log_debug_activation("DM udev checking disabled");
+}
+
+int dm_udev_get_checking(void)
+{
+       return _udev_checking;
+}
+
+static int _get_cookie_sem(uint32_t cookie, int *semid)
+{
+       if (cookie >> 16 != DM_COOKIE_MAGIC) {
+               log_error("Could not continue to access notification "
+                         "semaphore identified by cookie value %"
+                         PRIu32 " (0x%x). Incorrect cookie prefix.",
+                         cookie, cookie);
+               return 0;
+       }
+
+       if ((*semid = semget((key_t) cookie, 1, 0)) >= 0)
+               return 1;
+
+       switch (errno) {
+               case ENOENT:
+                       log_error("Could not find notification "
+                                 "semaphore identified by cookie "
+                                 "value %" PRIu32 " (0x%x)",
+                                 cookie, cookie);
+                       break;
+               case EACCES:
+                       log_error("No permission to access "
+                                 "notificaton semaphore identified "
+                                 "by cookie value %" PRIu32 " (0x%x)",
+                                 cookie, cookie);
+                       break;
+               default:
+                       log_error("Failed to access notification "
+                                  "semaphore identified by cookie "
+                                  "value %" PRIu32 " (0x%x): %s",
+                                 cookie, cookie, strerror(errno));
+                       break;
+       }
+
+       return 0;
+}
+
+static int _udev_notify_sem_inc(uint32_t cookie, int semid)
+{
+       struct sembuf sb = {0, 1, 0};
+       int val;
+
+       if (semop(semid, &sb, 1) < 0) {
+               log_error("semid %d: semop failed for cookie 0x%" PRIx32 ": %s",
+                         semid, cookie, strerror(errno));
+               return 0;
+       }
+
+       if ((val = semctl(semid, 0, GETVAL)) < 0) {
+               log_error("semid %d: sem_ctl GETVAL failed for "
+                         "cookie 0x%" PRIx32 ": %s",
+                         semid, cookie, strerror(errno));
+               return 0;               
+       }
+
+       log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) incremented to %d",
+                 cookie, semid, val);
+
+       return 1;
+}
+
+static int _udev_notify_sem_dec(uint32_t cookie, int semid)
+{
+       struct sembuf sb = {0, -1, IPC_NOWAIT};
+       int val;
+
+       if ((val = semctl(semid, 0, GETVAL)) < 0) {
+               log_error("semid %d: sem_ctl GETVAL failed for "
+                         "cookie 0x%" PRIx32 ": %s",
+                         semid, cookie, strerror(errno));
+               return 0;
+       }
+
+       if (semop(semid, &sb, 1) < 0) {
+               switch (errno) {
+                       case EAGAIN:
+                               log_error("semid %d: semop failed for cookie "
+                                         "0x%" PRIx32 ": "
+                                         "incorrect semaphore state",
+                                         semid, cookie);
+                               break;
+                       default:
+                               log_error("semid %d: semop failed for cookie "
+                                         "0x%" PRIx32 ": %s",
+                                         semid, cookie, strerror(errno));
+                               break;
+               }
+               return 0;
+       }
+
+       log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) decremented to %d",
+                            cookie, semid, val - 1);
+
+       return 1;
+}
+
+static int _udev_notify_sem_destroy(uint32_t cookie, int semid)
+{
+       if (semctl(semid, 0, IPC_RMID, 0) < 0) {
+               log_error("Could not cleanup notification semaphore "
+                         "identified by cookie value %" PRIu32 " (0x%x): %s",
+                         cookie, cookie, strerror(errno));
+               return 0;
+       }
+
+       log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) destroyed", cookie,
+                            semid);
+
+       return 1;
+}
+
+static int _udev_notify_sem_create(uint32_t *cookie, int *semid)
+{
+       int fd;
+       int gen_semid;
+       int val;
+       uint16_t base_cookie;
+       uint32_t gen_cookie;
+       union semun sem_arg;
+
+       if ((fd = open("/dev/urandom", O_RDONLY)) < 0) {
+               log_error("Failed to open /dev/urandom "
+                         "to create random cookie value");
+               *cookie = 0;
+               return 0;
+       }
+
+       /* Generate random cookie value. Be sure it is unique and non-zero. */
+       do {
+               /* FIXME Handle non-error returns from read(). Move _io() into libdm? */
+               if (read(fd, &base_cookie, sizeof(base_cookie)) != sizeof(base_cookie)) {
+                       log_error("Failed to initialize notification cookie");
+                       goto bad;
+               }
+
+               gen_cookie = DM_COOKIE_MAGIC << 16 | base_cookie;
+
+               if (base_cookie && (gen_semid = semget((key_t) gen_cookie,
+                                   1, 0600 | IPC_CREAT | IPC_EXCL)) < 0) {
+                       switch (errno) {
+                               case EEXIST:
+                                       /* if the semaphore key exists, we
+                                        * simply generate another random one */
+                                       base_cookie = 0;
+                                       break;
+                               case ENOMEM:
+                                       log_error("Not enough memory to create "
+                                                 "notification semaphore");
+                                       goto bad;
+                               case ENOSPC:
+                                       log_error("Limit for the maximum number "
+                                                 "of semaphores reached. You can "
+                                                 "check and set the limits in "
+                                                 "/proc/sys/kernel/sem.");
+                                       goto bad;
+                               default:
+                                       log_error("Failed to create notification "
+                                                 "semaphore: %s", strerror(errno));
+                                       goto bad;
+                       }
+               }
+       } while (!base_cookie);
+
+       log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) created",
+                            gen_cookie, gen_semid);
+
+       sem_arg.val = 1;
+
+       if (semctl(gen_semid, 0, SETVAL, sem_arg) < 0) {
+               log_error("semid %d: semctl failed: %s", gen_semid, strerror(errno));
+               /* We have to destroy just created semaphore
+                * so it won't stay in the system. */
+               (void) _udev_notify_sem_destroy(gen_cookie, gen_semid);
+               goto bad;
+       }
+
+       if ((val = semctl(gen_semid, 0, GETVAL)) < 0) {
+               log_error("semid %d: sem_ctl GETVAL failed for "
+                         "cookie 0x%" PRIx32 ": %s",
+                         gen_semid, gen_cookie, strerror(errno));
+               goto bad;
+       }
+
+       log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) incremented to %d",
+                            gen_cookie, gen_semid, val);
+
+       if (close(fd))
+               stack;
+
+       *semid = gen_semid;
+       *cookie = gen_cookie;
+
+       return 1;
+
+bad:
+       if (close(fd))
+               stack;
+
+       *cookie = 0;
+
+       return 0;
+}
+
+int dm_udev_create_cookie(uint32_t *cookie)
+{
+       int semid;
+
+       if (!dm_udev_get_sync_support()) {
+               *cookie = 0;
+               return 1;
+       }
+
+       return _udev_notify_sem_create(cookie, &semid);
+}
+
+static const char *_task_type_disp(int type)
+{
+       switch(type) {
+       case DM_DEVICE_CREATE:
+               return "CREATE";
+        case DM_DEVICE_RELOAD:
+               return "RELOAD";
+        case DM_DEVICE_REMOVE:
+               return "REMOVE";
+        case DM_DEVICE_REMOVE_ALL:
+               return "REMOVE_ALL";
+        case DM_DEVICE_SUSPEND:
+               return "SUSPEND";
+        case DM_DEVICE_RESUME:
+               return "RESUME";
+        case DM_DEVICE_INFO:
+               return "INFO";
+        case DM_DEVICE_DEPS:
+               return "DEPS";
+        case DM_DEVICE_RENAME:
+               return "RENAME";
+        case DM_DEVICE_VERSION:
+               return "VERSION";
+        case DM_DEVICE_STATUS:
+               return "STATUS";
+        case DM_DEVICE_TABLE:
+               return "TABLE";
+        case DM_DEVICE_WAITEVENT:
+               return "WAITEVENT";
+        case DM_DEVICE_LIST:
+               return "LIST";
+        case DM_DEVICE_CLEAR:
+               return "CLEAR";
+        case DM_DEVICE_MKNODES:
+               return "MKNODES";
+        case DM_DEVICE_LIST_VERSIONS:
+               return "LIST_VERSIONS";
+        case DM_DEVICE_TARGET_MSG:
+               return "TARGET_MSG";
+        case DM_DEVICE_SET_GEOMETRY:
+               return "SET_GEOMETRY";
+       }
+       return "unknown";
+}
+
+int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags)
+{
+       int semid;
+
+       _set_cookie_flags(dmt, flags);
+
+       if (!dm_udev_get_sync_support()) {
+               *cookie = 0;
+               dmt->cookie_set = 1;
+               return 1;
+       }
+
+       if (*cookie) {
+               if (!_get_cookie_sem(*cookie, &semid))
+                       goto_bad;
+       } else if (!_udev_notify_sem_create(cookie, &semid))
+               goto_bad;
+
+       if (!_udev_notify_sem_inc(*cookie, semid)) {
+               log_error("Could not set notification semaphore "
+                         "identified by cookie value %" PRIu32 " (0x%x)",
+                         *cookie, *cookie);
+               goto bad;
+       }
+
+       dmt->event_nr |= ~DM_UDEV_FLAGS_MASK & *cookie;
+       dmt->cookie_set = 1;
+
+       log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) assigned to "
+                            "%s task(%d) with flags%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s (0x%" PRIx16 ")",
+                            *cookie, semid, _task_type_disp(dmt->type), dmt->type,
+                            (flags & DM_UDEV_DISABLE_DM_RULES_FLAG) ? " DISABLE_DM_RULES" : "",
+                            (flags & DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG) ? " DISABLE_SUBSYSTEM_RULES" : "",
+                            (flags & DM_UDEV_DISABLE_DISK_RULES_FLAG) ? " DISABLE_DISK_RULES" : "",
+                            (flags & DM_UDEV_DISABLE_OTHER_RULES_FLAG) ? " DISABLE_OTHER_RULES" : "",
+                            (flags & DM_UDEV_LOW_PRIORITY_FLAG) ? " LOW_PRIORITY" : "",
+                            (flags & DM_UDEV_DISABLE_LIBRARY_FALLBACK) ? " DISABLE_LIBRARY_FALLBACK" : "",
+                            (flags & DM_UDEV_PRIMARY_SOURCE_FLAG) ? " PRIMARY_SOURCE" : "",
+                            (flags & DM_SUBSYSTEM_UDEV_FLAG0) ? " SUBSYSTEM_0" : " ",
+                            (flags & DM_SUBSYSTEM_UDEV_FLAG1) ? " SUBSYSTEM_1" : " ",
+                            (flags & DM_SUBSYSTEM_UDEV_FLAG2) ? " SUBSYSTEM_2" : " ",
+                            (flags & DM_SUBSYSTEM_UDEV_FLAG3) ? " SUBSYSTEM_3" : " ",
+                            (flags & DM_SUBSYSTEM_UDEV_FLAG4) ? " SUBSYSTEM_4" : " ",
+                            (flags & DM_SUBSYSTEM_UDEV_FLAG5) ? " SUBSYSTEM_5" : " ",
+                            (flags & DM_SUBSYSTEM_UDEV_FLAG6) ? " SUBSYSTEM_6" : " ",
+                            (flags & DM_SUBSYSTEM_UDEV_FLAG7) ? " SUBSYSTEM_7" : " ",
+                            flags);
+
+       return 1;
+
+bad:
+       dmt->event_nr = 0;
+       return 0;
+}
+
+int dm_udev_complete(uint32_t cookie)
+{
+       int semid;
+
+       if (!cookie || !dm_udev_get_sync_support())
+               return 1;
+
+       if (!_get_cookie_sem(cookie, &semid))
+               return_0;
+
+       if (!_udev_notify_sem_dec(cookie, semid)) {
+               log_error("Could not signal waiting process using notification "
+                         "semaphore identified by cookie value %" PRIu32 " (0x%x)",
+                         cookie, cookie);
+               return 0;
+       }
+
+       return 1;
+}
+
+/*
+ * If *nowait is set, return immediately leaving it set if the semaphore
+ * is not ready to be decremented to 0.  *nowait is cleared if the wait
+ * succeeds.
+ */
+static int _udev_wait(uint32_t cookie, int *nowait)
+{
+       int semid;
+       struct sembuf sb = {0, 0, 0};
+       int val;
+
+       if (!cookie || !dm_udev_get_sync_support())
+               return 1;
+
+       if (!_get_cookie_sem(cookie, &semid))
+               return_0;
+
+       /* Return immediately if the semaphore value exceeds 1? */
+       if (*nowait) {
+               if ((val = semctl(semid, 0, GETVAL)) < 0) {
+                       log_error("semid %d: sem_ctl GETVAL failed for "
+                                 "cookie 0x%" PRIx32 ": %s",
+                                 semid, cookie, strerror(errno));
+                       return 0;               
+               }
+
+               if (val > 1)
+                       return 1;
+
+               *nowait = 0;
+       }
+
+       if (!_udev_notify_sem_dec(cookie, semid)) {
+               log_error("Failed to set a proper state for notification "
+                         "semaphore identified by cookie value %" PRIu32 " (0x%x) "
+                         "to initialize waiting for incoming notifications.",
+                         cookie, cookie);
+               (void) _udev_notify_sem_destroy(cookie, semid);
+               return 0;
+       }
+
+       log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) waiting for zero",
+                            cookie, semid);
+
+repeat_wait:
+       if (semop(semid, &sb, 1) < 0) {
+               if (errno == EINTR)
+                       goto repeat_wait;
+               else if (errno == EIDRM)
+                       return 1;
+
+               log_error("Could not set wait state for notification semaphore "
+                         "identified by cookie value %" PRIu32 " (0x%x): %s",
+                         cookie, cookie, strerror(errno));
+               (void) _udev_notify_sem_destroy(cookie, semid);
+               return 0;
+       }
+
+       return _udev_notify_sem_destroy(cookie, semid);
+}
+
+int dm_udev_wait(uint32_t cookie)
+{
+       int nowait = 0;
+       int r = _udev_wait(cookie, &nowait);
+
+       update_devs();
+
+       return r;
+}
+
+int dm_udev_wait_immediate(uint32_t cookie, int *ready)
+{
+       int nowait = 1;
+       int r = _udev_wait(cookie, &nowait);
+
+       if (r && nowait) {
+               *ready = 0;
+               return 1;
+       }
+
+       update_devs();
+       *ready = 1;
+
+       return r;
+}
+#endif         /* UDEV_SYNC_SUPPORT */
diff --git a/device_mapper/libdm-common.h b/device_mapper/libdm-common.h
new file mode 100644 (file)
index 0000000..010d876
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef LIB_DMCOMMON_H
+#define LIB_DMCOMMON_H
+
+#include "libdevmapper.h"
+
+#define DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME "DM_DEFAULT_NAME_MANGLING_MODE"
+
+#define DEV_NAME(dmt) (dmt->mangled_dev_name ? : dmt->dev_name)
+#define DEV_UUID(DMT) (dmt->mangled_uuid ? : dmt->uuid)
+
+int mangle_string(const char *str, const char *str_name, size_t len,
+                 char *buf, size_t buf_len, dm_string_mangling_t mode);
+
+int unmangle_string(const char *str, const char *str_name, size_t len,
+                   char *buf, size_t buf_len, dm_string_mangling_t mode);
+
+int check_multiple_mangled_string_allowed(const char *str, const char *str_name,
+                                         dm_string_mangling_t mode);
+
+struct target *create_target(uint64_t start,
+                            uint64_t len,
+                            const char *type, const char *params);
+
+int add_dev_node(const char *dev_name, uint32_t minor, uint32_t major,
+                uid_t uid, gid_t gid, mode_t mode, int check_udev, unsigned rely_on_udev);
+int rm_dev_node(const char *dev_name, int check_udev, unsigned rely_on_udev);
+int rename_dev_node(const char *old_name, const char *new_name,
+                   int check_udev, unsigned rely_on_udev);
+int get_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
+                           uint32_t *read_ahead);
+int set_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
+                           uint32_t read_ahead, uint32_t read_ahead_flags);
+void update_devs(void);
+void selinux_release(void);
+
+void inc_suspended(void);
+void dec_suspended(void);
+
+int parse_thin_pool_status(const char *params, struct dm_status_thin_pool *s);
+
+int get_uname_version(unsigned *major, unsigned *minor, unsigned *release);
+
+#endif
diff --git a/device_mapper/libdm-config.c b/device_mapper/libdm-config.c
new file mode 100644 (file)
index 0000000..fd4d929
--- /dev/null
@@ -0,0 +1,1486 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <stdarg.h>
+
+#define SECTION_B_CHAR '{'
+#define SECTION_E_CHAR '}'
+
+enum {
+       TOK_INT,
+       TOK_FLOAT,
+       TOK_STRING,             /* Single quotes */
+       TOK_STRING_ESCAPED,     /* Double quotes */
+       TOK_STRING_BARE,        /* No quotes */
+       TOK_EQ,
+       TOK_SECTION_B,
+       TOK_SECTION_E,
+       TOK_ARRAY_B,
+       TOK_ARRAY_E,
+       TOK_IDENTIFIER,
+       TOK_COMMA,
+       TOK_EOF
+};
+
+struct parser {
+       const char *fb, *fe;            /* file limits */
+
+       int t;                  /* token limits and type */
+       const char *tb, *te;
+
+       int line;               /* line number we are on */
+
+       struct dm_pool *mem;
+       int no_dup_node_check;  /* whether to disable dup node checking */
+};
+
+struct config_output {
+       struct dm_pool *mem;
+       dm_putline_fn putline;
+       const struct dm_config_node_out_spec *spec;
+       void *baton;
+};
+
+static void _get_token(struct parser *p, int tok_prev);
+static void _eat_space(struct parser *p);
+static struct dm_config_node *_file(struct parser *p);
+static struct dm_config_node *_section(struct parser *p, struct dm_config_node *parent);
+static struct dm_config_value *_value(struct parser *p);
+static struct dm_config_value *_type(struct parser *p);
+static int _match_aux(struct parser *p, int t);
+static struct dm_config_value *_create_value(struct dm_pool *mem);
+static struct dm_config_node *_create_node(struct dm_pool *mem);
+static char *_dup_tok(struct parser *p);
+static char *_dup_token(struct dm_pool *mem, const char *b, const char *e);
+
+static const int _sep = '/';
+
+#define MAX_INDENT 32
+
+#define match(t) do {\
+   if (!_match_aux(p, (t))) {\
+       log_error("Parse error at byte %" PRIptrdiff_t " (line %d): unexpected token", \
+                 p->tb - p->fb + 1, p->line); \
+      return 0;\
+   } \
+} while(0)
+
+static int _tok_match(const char *str, const char *b, const char *e)
+{
+       while (*str && (b != e)) {
+               if (*str++ != *b++)
+                       return 0;
+       }
+
+       return !(*str || (b != e));
+}
+
+struct dm_config_tree *dm_config_create(void)
+{
+       struct dm_config_tree *cft;
+       struct dm_pool *mem = dm_pool_create("config", 10 * 1024);
+
+       if (!mem) {
+               log_error("Failed to allocate config pool.");
+               return 0;
+       }
+
+       if (!(cft = dm_pool_zalloc(mem, sizeof(*cft)))) {
+               log_error("Failed to allocate config tree.");
+               dm_pool_destroy(mem);
+               return 0;
+       }
+       cft->mem = mem;
+
+       return cft;
+}
+
+void dm_config_set_custom(struct dm_config_tree *cft, void *custom)
+{
+       cft->custom = custom;
+}
+
+void *dm_config_get_custom(struct dm_config_tree *cft)
+{
+       return cft->custom;
+}
+
+void dm_config_destroy(struct dm_config_tree *cft)
+{
+       dm_pool_destroy(cft->mem);
+}
+
+/*
+ * If there's a cascaded dm_config_tree, remove and return it, otherwise
+ * return NULL.
+ */
+struct dm_config_tree *dm_config_remove_cascaded_tree(struct dm_config_tree *cft)
+{
+       struct dm_config_tree *second_cft;
+
+       if (!cft)
+               return NULL;
+
+       second_cft = cft->cascade;
+       cft->cascade = NULL;
+
+       return second_cft;
+}
+
+/*
+ * When searching, first_cft is checked before second_cft.
+ */
+struct dm_config_tree *dm_config_insert_cascaded_tree(struct dm_config_tree *first_cft, struct dm_config_tree *second_cft)
+{
+       first_cft->cascade = second_cft;
+
+       return first_cft;
+}
+
+static struct dm_config_node *_config_reverse(struct dm_config_node *head)
+{
+       struct dm_config_node *left = head, *middle = NULL, *right = NULL;
+
+       while (left) {
+               right = middle;
+               middle = left;
+               left = left->sib;
+               middle->sib = right;
+               middle->child = _config_reverse(middle->child);
+       }
+
+       return middle;
+}
+
+static int _do_dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end, int no_dup_node_check)
+{
+       /* TODO? if (start == end) return 1; */
+
+       struct parser *p;
+       if (!(p = dm_pool_alloc(cft->mem, sizeof(*p))))
+               return_0;
+
+       p->mem = cft->mem;
+       p->fb = start;
+       p->fe = end;
+       p->tb = p->te = p->fb;
+       p->line = 1;
+       p->no_dup_node_check = no_dup_node_check;
+
+       _get_token(p, TOK_SECTION_E);
+       if (!(cft->root = _file(p)))
+               return_0;
+
+       cft->root = _config_reverse(cft->root);
+
+       return 1;
+}
+
+int dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end)
+{
+       return _do_dm_config_parse(cft, start, end, 0);
+}
+
+int dm_config_parse_without_dup_node_check(struct dm_config_tree *cft, const char *start, const char *end)
+{
+       return _do_dm_config_parse(cft, start, end, 1);
+}
+
+struct dm_config_tree *dm_config_from_string(const char *config_settings)
+{
+       struct dm_config_tree *cft;
+
+       if (!(cft = dm_config_create()))
+               return_NULL;
+
+       if (!dm_config_parse(cft, config_settings, config_settings + strlen(config_settings))) {
+               dm_config_destroy(cft);
+               return_NULL;
+       }
+
+       return cft;
+}
+
+static int _line_start(struct config_output *out)
+{
+       if (!dm_pool_begin_object(out->mem, 128)) {
+               log_error("dm_pool_begin_object failed for config line");
+               return 0;
+       }
+
+       return 1;
+}
+
+__attribute__ ((format(printf, 2, 3)))
+static int _line_append(struct config_output *out, const char *fmt, ...)
+{
+       char buf[4096];
+       char *dyn_buf = NULL;
+       va_list ap;
+       int n;
+
+       /*
+        * We should be fine with the 4096 char buffer 99% of the time,
+        * but if we need to go beyond that, allocate the buffer dynamically.
+        */
+
+       va_start(ap, fmt);
+       n = vsnprintf(buf, sizeof(buf), fmt, ap);
+       va_end(ap);
+
+       if (n < 0) {
+               log_error("vsnprintf failed for config line");
+               return 0;
+       }
+
+       if (n > (int) sizeof buf - 1) {
+               /*
+                * Fixed size buffer with sizeof buf is not enough,
+                * so try dynamically allocated buffer now...
+                */
+               va_start(ap, fmt);
+               n = dm_vasprintf(&dyn_buf, fmt, ap);
+               va_end(ap);
+
+               if (n < 0) {
+                       log_error("dm_vasprintf failed for config line");
+                       return 0;
+               }
+       }
+
+       if (!dm_pool_grow_object(out->mem, dyn_buf ? : buf, 0)) {
+               log_error("dm_pool_grow_object failed for config line");
+               dm_free(dyn_buf);
+               return 0;
+       }
+
+       dm_free(dyn_buf);
+
+       return 1;
+}
+
+#define line_append(args...) do {if (!_line_append(out, args)) {return_0;}} while (0)
+
+static int _line_end(const struct dm_config_node *cn, struct config_output *out)
+{
+       const char *line;
+
+       if (!dm_pool_grow_object(out->mem, "\0", 1)) {
+               log_error("dm_pool_grow_object failed for config line");
+               return 0;
+       }
+
+       line = dm_pool_end_object(out->mem);
+
+       if (!out->putline && !out->spec)
+               return 0;
+
+       if (out->putline)
+               out->putline(line, out->baton);
+
+       if (out->spec && out->spec->line_fn)
+               out->spec->line_fn(cn, line, out->baton);
+
+       return 1;
+}
+
+static int _write_value(struct config_output *out, const struct dm_config_value *v)
+{
+       char *buf;
+       const char *s;
+
+       switch (v->type) {
+       case DM_CFG_STRING:
+               buf = alloca(dm_escaped_len(v->v.str));
+               s = (v->format_flags & DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES) ? "" : "\"";
+               line_append("%s%s%s", s, dm_escape_double_quotes(buf, v->v.str), s);
+               break;
+
+       case DM_CFG_FLOAT:
+               line_append("%f", v->v.f);
+               break;
+
+       case DM_CFG_INT:
+               if (v->format_flags & DM_CONFIG_VALUE_FMT_INT_OCTAL)
+                       line_append("0%" PRIo64, v->v.i);
+               else
+                       line_append(FMTd64, v->v.i);
+               break;
+
+       case DM_CFG_EMPTY_ARRAY:
+               s = (v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES) ? " " : "";
+               line_append("[%s]", s);
+               break;
+
+       default:
+               log_error("_write_value: Unknown value type: %d", v->type);
+
+       }
+
+       return 1;
+}
+
+static int _write_config(const struct dm_config_node *n, int only_one,
+                        struct config_output *out, int level)
+{
+       const char *extra_space;
+       int format_array;
+       char space[MAX_INDENT + 1];
+       int l = (level < MAX_INDENT) ? level : MAX_INDENT;
+       int i;
+       char *escaped_key = NULL;
+
+       if (!n)
+               return 1;
+
+       for (i = 0; i < l; i++)
+               space[i] = '\t';
+       space[i] = '\0';
+
+       do {
+               extra_space = (n->v && (n->v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES)) ? " " : "";
+               format_array = (n->v && (n->v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_ARRAY));
+
+               if (out->spec && out->spec->prefix_fn)
+                       out->spec->prefix_fn(n, space, out->baton);
+
+               if (!_line_start(out))
+                       return_0;
+               if (strchr(n->key, '#') || strchr(n->key, '"') || strchr(n->key, '!')) {
+                       escaped_key = alloca(dm_escaped_len(n->key) + 2);
+                       *escaped_key = '"';
+                       dm_escape_double_quotes(escaped_key + 1, n->key);
+                       strcat(escaped_key, "\"");
+               }
+               line_append("%s%s", space, escaped_key ? escaped_key : n->key);
+               escaped_key = NULL;
+               if (!n->v) {
+                       /* it's a sub section */
+                       line_append(" {");
+                       if (!_line_end(n, out))
+                               return_0;
+                       if (!_write_config(n->child, 0, out, level + 1))
+                               return_0;
+                       if (!_line_start(out))
+                               return_0;
+                       line_append("%s}", space);
+               } else {
+                       /* it's a value */
+                       const struct dm_config_value *v = n->v;
+                       line_append("%s=%s", extra_space, extra_space);
+                       if (v->next) {
+                               line_append("[%s", extra_space);
+                               while (v && v->type != DM_CFG_EMPTY_ARRAY) {
+                                       if (!_write_value(out, v))
+                                               return_0;
+                                       v = v->next;
+                                       if (v && v->type != DM_CFG_EMPTY_ARRAY)
+                                               line_append(",%s", extra_space);
+                               }
+                               line_append("%s]", extra_space);
+                       } else {
+                               if (format_array && (v->type != DM_CFG_EMPTY_ARRAY))
+                                       line_append("[%s", extra_space);
+                               if (!_write_value(out, v))
+                                       return_0;
+                               if (format_array && (v->type != DM_CFG_EMPTY_ARRAY))
+                                       line_append("%s]", extra_space);
+                       }
+               }
+               if (!_line_end(n, out))
+                       return_0;
+
+               if (out->spec && out->spec->suffix_fn)
+                       out->spec->suffix_fn(n, space, out->baton);
+
+               n = n->sib;
+       } while (n && !only_one);
+       /* FIXME: add error checking */
+       return 1;
+}
+
+static int _write_node(const struct dm_config_node *cn, int only_one,
+                      dm_putline_fn putline,
+                      const struct dm_config_node_out_spec *out_spec,
+                      void *baton)
+{
+       struct config_output out = {
+               .mem = dm_pool_create("config_output", 1024),
+               .putline = putline,
+               .spec = out_spec,
+               .baton = baton
+       };
+
+       if (!out.mem)
+               return_0;
+
+       if (!_write_config(cn, only_one, &out, 0)) {
+               dm_pool_destroy(out.mem);
+               return_0;
+       }
+       dm_pool_destroy(out.mem);
+       return 1;
+}
+
+int dm_config_write_one_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton)
+{
+       return _write_node(cn, 1, putline, NULL, baton);
+}
+
+int dm_config_write_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton)
+{
+       return _write_node(cn, 0, putline, NULL, baton);
+}
+
+int dm_config_write_one_node_out(const struct dm_config_node *cn,
+                                const struct dm_config_node_out_spec *out_spec,
+                                void *baton)
+{
+       return _write_node(cn, 1, NULL, out_spec, baton);
+}
+
+int dm_config_write_node_out(const struct dm_config_node *cn,
+                            const struct dm_config_node_out_spec *out_spec,
+                            void *baton)
+{
+       return _write_node(cn, 0, NULL, out_spec, baton);
+}
+
+/*
+ * parser
+ */
+static char *_dup_string_tok(struct parser *p)
+{
+       char *str;
+
+       p->tb++, p->te--;       /* strip "'s */
+
+       if (p->te < p->tb) {
+               log_error("Parse error at byte %" PRIptrdiff_t " (line %d): "
+                         "expected a string token.",
+                         p->tb - p->fb + 1, p->line);
+               return NULL;
+       }
+
+       if (!(str = _dup_tok(p)))
+               return_NULL;
+
+       p->te++;
+
+       return str;
+}
+
+static struct dm_config_node *_file(struct parser *p)
+{
+       struct dm_config_node root = { 0 };
+       root.key = "<root>";
+
+       while (p->t != TOK_EOF)
+               if (!_section(p, &root))
+                       return_NULL;
+       return root.child;
+}
+
+static struct dm_config_node *_make_node(struct dm_pool *mem,
+                                        const char *key_b, const char *key_e,
+                                        struct dm_config_node *parent)
+{
+       struct dm_config_node *n;
+
+       if (!(n = _create_node(mem)))
+               return_NULL;
+
+       n->key = _dup_token(mem, key_b, key_e);
+       if (parent) {
+               n->parent = parent;
+               n->sib = parent->child;
+               parent->child = n;
+       }
+       return n;
+}
+
+/* when mem is not NULL, we create the path if it doesn't exist yet */
+static struct dm_config_node *_find_or_make_node(struct dm_pool *mem,
+                                                struct dm_config_node *parent,
+                                                const char *path,
+                                                int no_dup_node_check)
+{
+       const char *e;
+       struct dm_config_node *cn = parent ? parent->child : NULL;
+       struct dm_config_node *cn_found = NULL;
+
+       while (cn || mem) {
+               /* trim any leading slashes */
+               while (*path && (*path == _sep))
+                       path++;
+
+               /* find the end of this segment */
+               for (e = path; *e && (*e != _sep); e++) ;
+
+               /* hunt for the node */
+               cn_found = NULL;
+
+               if (!no_dup_node_check) {
+                       while (cn) {
+                               if (_tok_match(cn->key, path, e)) {
+                                       /* Inefficient */
+                                       if (!cn_found)
+                                               cn_found = cn;
+                                       else
+                                               log_warn("WARNING: Ignoring duplicate"
+                                                        " config node: %s ("
+                                                        "seeking %s)", cn->key, path);
+                               }
+
+                               cn = cn->sib;
+                       }
+               }
+
+               if (!cn_found && mem) {
+                       if (!(cn_found = _make_node(mem, path, e, parent)))
+                               return_NULL;
+               }
+
+               if (cn_found && *e) {
+                       parent = cn_found;
+                       cn = cn_found->child;
+               } else
+                       return cn_found;
+               path = e;
+       }
+
+       return NULL;
+}
+
+static struct dm_config_node *_section(struct parser *p, struct dm_config_node *parent)
+{
+       /* IDENTIFIER SECTION_B_CHAR VALUE* SECTION_E_CHAR */
+
+       struct dm_config_node *root;
+       struct dm_config_value *value;
+       char *str;
+
+       if (p->t == TOK_STRING_ESCAPED) {
+               if (!(str = _dup_string_tok(p)))
+                       return_NULL;
+               dm_unescape_double_quotes(str);
+
+               match(TOK_STRING_ESCAPED);
+       } else if (p->t == TOK_STRING) {
+               if (!(str = _dup_string_tok(p)))
+                       return_NULL;
+
+               match(TOK_STRING);
+       } else {
+               if (!(str = _dup_tok(p)))
+                       return_NULL;
+
+               match(TOK_IDENTIFIER);
+       }
+
+       if (!strlen(str)) {
+               log_error("Parse error at byte %" PRIptrdiff_t " (line %d): empty section identifier",
+                         p->tb - p->fb + 1, p->line);
+               return NULL;
+       }
+
+       if (!(root = _find_or_make_node(p->mem, parent, str, p->no_dup_node_check)))
+               return_NULL;
+
+       if (p->t == TOK_SECTION_B) {
+               match(TOK_SECTION_B);
+               while (p->t != TOK_SECTION_E) {
+                       if (!(_section(p, root)))
+                               return_NULL;
+               }
+               match(TOK_SECTION_E);
+       } else {
+               match(TOK_EQ);
+               if (!(value = _value(p)))
+                       return_NULL;
+               if (root->v)
+                       log_warn("WARNING: Ignoring duplicate"
+                                " config value: %s", str);
+               root->v = value;
+       }
+
+       return root;
+}
+
+static struct dm_config_value *_value(struct parser *p)
+{
+       /* '[' TYPE* ']' | TYPE */
+       struct dm_config_value *h = NULL, *l, *ll = NULL;
+       if (p->t == TOK_ARRAY_B) {
+               match(TOK_ARRAY_B);
+               while (p->t != TOK_ARRAY_E) {
+                       if (!(l = _type(p)))
+                               return_NULL;
+
+                       if (!h)
+                               h = l;
+                       else
+                               ll->next = l;
+                       ll = l;
+
+                       if (p->t == TOK_COMMA)
+                               match(TOK_COMMA);
+               }
+               match(TOK_ARRAY_E);
+               /*
+                * Special case for an empty array.
+                */
+               if (!h) {
+                       if (!(h = _create_value(p->mem))) {
+                               log_error("Failed to allocate value");
+                               return NULL;
+                       }
+
+                       h->type = DM_CFG_EMPTY_ARRAY;
+               }
+
+       } else
+               if (!(h = _type(p)))
+                       return_NULL;
+
+       return h;
+}
+
+static struct dm_config_value *_type(struct parser *p)
+{
+       /* [+-]{0,1}[0-9]+ | [0-9]*\.[0-9]* | ".*" */
+       struct dm_config_value *v = _create_value(p->mem);
+       char *str;
+
+       if (!v) {
+               log_error("Failed to allocate type value");
+               return NULL;
+       }
+
+       switch (p->t) {
+       case TOK_INT:
+               v->type = DM_CFG_INT;
+               errno = 0;
+               v->v.i = strtoll(p->tb, NULL, 0);       /* FIXME: check error */
+               if (errno) {
+                       log_error("Failed to read int token.");
+                       return NULL;
+               }
+               match(TOK_INT);
+               break;
+
+       case TOK_FLOAT:
+               v->type = DM_CFG_FLOAT;
+               errno = 0;
+               v->v.f = strtod(p->tb, NULL);   /* FIXME: check error */
+               if (errno) {
+                       log_error("Failed to read float token.");
+                       return NULL;
+               }
+               match(TOK_FLOAT);
+               break;
+
+       case TOK_STRING:
+               v->type = DM_CFG_STRING;
+
+               if (!(v->v.str = _dup_string_tok(p)))
+                       return_NULL;
+
+               match(TOK_STRING);
+               break;
+
+       case TOK_STRING_BARE:
+               v->type = DM_CFG_STRING;
+
+               if (!(v->v.str = _dup_tok(p)))
+                       return_NULL;
+
+               match(TOK_STRING_BARE);
+               break;
+
+       case TOK_STRING_ESCAPED:
+               v->type = DM_CFG_STRING;
+
+               if (!(str = _dup_string_tok(p)))
+                       return_NULL;
+               dm_unescape_double_quotes(str);
+               v->v.str = str;
+               match(TOK_STRING_ESCAPED);
+               break;
+
+       default:
+               log_error("Parse error at byte %" PRIptrdiff_t " (line %d): expected a value",
+                         p->tb - p->fb + 1, p->line);
+               return NULL;
+       }
+       return v;
+}
+
+static int _match_aux(struct parser *p, int t)
+{
+       if (p->t != t)
+               return 0;
+
+       _get_token(p, t);
+       return 1;
+}
+
+/*
+ * tokeniser
+ */
+static void _get_token(struct parser *p, int tok_prev)
+{
+       int values_allowed = 0;
+
+       const char *te;
+
+       p->tb = p->te;
+       _eat_space(p);
+       if (p->tb == p->fe || !*p->tb) {
+               p->t = TOK_EOF;
+               return;
+       }
+
+       /* Should next token be interpreted as value instead of identifier? */
+       if (tok_prev == TOK_EQ || tok_prev == TOK_ARRAY_B ||
+           tok_prev == TOK_COMMA)
+               values_allowed = 1;
+
+       p->t = TOK_INT;         /* fudge so the fall through for
+                                  floats works */
+
+       te = p->te;
+       switch (*te) {
+       case SECTION_B_CHAR:
+               p->t = TOK_SECTION_B;
+               te++;
+               break;
+
+       case SECTION_E_CHAR:
+               p->t = TOK_SECTION_E;
+               te++;
+               break;
+
+       case '[':
+               p->t = TOK_ARRAY_B;
+               te++;
+               break;
+
+       case ']':
+               p->t = TOK_ARRAY_E;
+               te++;
+               break;
+
+       case ',':
+               p->t = TOK_COMMA;
+               te++;
+               break;
+
+       case '=':
+               p->t = TOK_EQ;
+               te++;
+               break;
+
+       case '"':
+               p->t = TOK_STRING_ESCAPED;
+               te++;
+               while ((te != p->fe) && (*te) && (*te != '"')) {
+                       if ((*te == '\\') && (te + 1 != p->fe) &&
+                           *(te + 1))
+                               te++;
+                       te++;
+               }
+
+               if ((te != p->fe) && (*te))
+                       te++;
+               break;
+
+       case '\'':
+               p->t = TOK_STRING;
+               te++;
+               while ((te != p->fe) && (*te) && (*te != '\''))
+                       te++;
+
+               if ((te != p->fe) && (*te))
+                       te++;
+               break;
+
+       case '.':
+               p->t = TOK_FLOAT;
+               /* Fall through */
+       case '0':
+       case '1':
+       case '2':
+       case '3':
+       case '4':
+       case '5':
+       case '6':
+       case '7':
+       case '8':
+       case '9':
+       case '+':
+       case '-':
+               if (values_allowed) {
+                       while (++te != p->fe) {
+                               if (!isdigit((int) *te)) {
+                                       if (*te == '.') {
+                                               if (p->t != TOK_FLOAT) {
+                                                       p->t = TOK_FLOAT;
+                                                       continue;
+                                               }
+                                       }
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               /* fall through */
+
+       default:
+               p->t = TOK_IDENTIFIER;
+               while ((te != p->fe) && (*te) && !isspace(*te) &&
+                      (*te != '#') && (*te != '=') &&
+                      (*te != SECTION_B_CHAR) &&
+                      (*te != SECTION_E_CHAR))
+                       te++;
+               if (values_allowed)
+                       p->t = TOK_STRING_BARE;
+               break;
+       }
+
+       p->te = te;
+}
+
+static void _eat_space(struct parser *p)
+{
+       while (p->tb != p->fe) {
+               if (*p->te == '#')
+                       while ((p->te != p->fe) && (*p->te != '\n') && (*p->te))
+                               ++p->te;
+
+               else if (!isspace(*p->te))
+                       break;
+
+               while ((p->te != p->fe) && isspace(*p->te)) {
+                       if (*p->te == '\n')
+                               ++p->line;
+                       ++p->te;
+               }
+
+               p->tb = p->te;
+       }
+}
+
+/*
+ * memory management
+ */
+static struct dm_config_value *_create_value(struct dm_pool *mem)
+{
+       return dm_pool_zalloc(mem, sizeof(struct dm_config_value));
+}
+
+static struct dm_config_node *_create_node(struct dm_pool *mem)
+{
+       return dm_pool_zalloc(mem, sizeof(struct dm_config_node));
+}
+
+static char *_dup_token(struct dm_pool *mem, const char *b, const char *e)
+{
+       size_t len = e - b;
+       char *str = dm_pool_alloc(mem, len + 1);
+       if (!str) {
+               log_error("Failed to duplicate token.");
+               return 0;
+       }
+       memcpy(str, b, len);
+       str[len] = '\0';
+       return str;
+}
+
+static char *_dup_tok(struct parser *p)
+{
+       return _dup_token(p->mem, p->tb, p->te);
+}
+
+/*
+ * Utility functions
+ */
+
+/*
+ * node_lookup_fn is either:
+ *   _find_config_node to perform a lookup starting from a given config_node 
+ *   in a config_tree;
+ * or
+ *   _find_first_config_node to find the first config_node in a set of 
+ *   cascaded trees.
+ */
+typedef const struct dm_config_node *node_lookup_fn(const void *start, const char *path);
+
+static const struct dm_config_node *_find_config_node(const void *start, const char *path) {
+       struct dm_config_node dummy = { .child = (void *) start };
+       return _find_or_make_node(NULL, &dummy, path, 0);
+}
+
+static const struct dm_config_node *_find_first_config_node(const void *start, const char *path)
+{
+       const struct dm_config_tree *cft = start;
+       const struct dm_config_node *cn = NULL;
+
+       while (cft) {
+               if ((cn = _find_config_node(cft->root, path)))
+                       return cn;
+               cft = cft->cascade;
+       }
+
+       return NULL;
+}
+
+static const char *_find_config_str(const void *start, node_lookup_fn find_fn,
+                                   const char *path, const char *fail, int allow_empty)
+{
+       const struct dm_config_node *n = find_fn(start, path);
+
+       /* Empty strings are ignored if allow_empty is set */
+       if (n && n->v) {
+               if ((n->v->type == DM_CFG_STRING) &&
+                   (allow_empty || (*n->v->v.str))) {
+                       /* log_very_verbose("Setting %s to %s", path, n->v->v.str); */
+                       return n->v->v.str;
+               }
+               if ((n->v->type != DM_CFG_STRING) || (!allow_empty && fail))
+                       log_warn("WARNING: Ignoring unsupported value for %s.", path);
+       }
+
+       if (fail)
+               log_very_verbose("%s not found in config: defaulting to %s",
+                                path, fail);
+       return fail;
+}
+
+const char *dm_config_find_str(const struct dm_config_node *cn,
+                              const char *path, const char *fail)
+{
+       return _find_config_str(cn, _find_config_node, path, fail, 0);
+}
+
+const char *dm_config_find_str_allow_empty(const struct dm_config_node *cn,
+                                          const char *path, const char *fail)
+{
+       return _find_config_str(cn, _find_config_node, path, fail, 1);
+}
+
+static int64_t _find_config_int64(const void *start, node_lookup_fn find,
+                                 const char *path, int64_t fail)
+{
+       const struct dm_config_node *n = find(start, path);
+
+       if (n && n->v && n->v->type == DM_CFG_INT) {
+               /* log_very_verbose("Setting %s to %" PRId64, path, n->v->v.i); */
+               return n->v->v.i;
+       }
+
+       log_very_verbose("%s not found in config: defaulting to %" PRId64,
+                        path, fail);
+       return fail;
+}
+
+static float _find_config_float(const void *start, node_lookup_fn find,
+                               const char *path, float fail)
+{
+       const struct dm_config_node *n = find(start, path);
+
+       if (n && n->v && n->v->type == DM_CFG_FLOAT) {
+               /* log_very_verbose("Setting %s to %f", path, n->v->v.f); */
+               return n->v->v.f;
+       }
+
+       log_very_verbose("%s not found in config: defaulting to %f",
+                        path, fail);
+
+       return fail;
+
+}
+
+static int _str_in_array(const char *str, const char * const values[])
+{
+       int i;
+
+       for (i = 0; values[i]; i++)
+               if (!strcasecmp(str, values[i]))
+                       return 1;
+
+       return 0;
+}
+
+static int _str_to_bool(const char *str, int fail)
+{
+       const char * const _true_values[]  = { "y", "yes", "on", "true", NULL };
+       const char * const _false_values[] = { "n", "no", "off", "false", NULL };
+
+       if (_str_in_array(str, _true_values))
+               return 1;
+
+       if (_str_in_array(str, _false_values))
+               return 0;
+
+       return fail;
+}
+
+static int _find_config_bool(const void *start, node_lookup_fn find,
+                            const char *path, int fail)
+{
+       const struct dm_config_node *n = find(start, path);
+       const struct dm_config_value *v;
+       int b;
+
+       if (n) {
+               v = n->v;
+
+               switch (v->type) {
+               case DM_CFG_INT:
+                       b = v->v.i ? 1 : 0;
+                       /* log_very_verbose("Setting %s to %d", path, b); */
+                       return b;
+
+               case DM_CFG_STRING:
+                       b = _str_to_bool(v->v.str, fail);
+                       /* log_very_verbose("Setting %s to %d", path, b); */
+                       return b;
+               default:
+                       ;
+               }
+       }
+
+       log_very_verbose("%s not found in config: defaulting to %d",
+                        path, fail);
+
+       return fail;
+}
+
+/***********************************
+ * node-based lookup
+ **/
+
+struct dm_config_node *dm_config_find_node(const struct dm_config_node *cn,
+                                          const char *path)
+{
+       return (struct dm_config_node *) _find_config_node(cn, path);
+}
+
+int dm_config_find_int(const struct dm_config_node *cn, const char *path, int fail)
+{
+       /* FIXME Add log_error message on overflow */
+       return (int) _find_config_int64(cn, _find_config_node, path, (int64_t) fail);
+}
+
+int64_t dm_config_find_int64(const struct dm_config_node *cn, const char *path, int64_t fail)
+{
+       return _find_config_int64(cn, _find_config_node, path, fail);
+}
+
+float dm_config_find_float(const struct dm_config_node *cn, const char *path,
+                          float fail)
+{
+       return _find_config_float(cn, _find_config_node, path, fail);
+}
+
+int dm_config_find_bool(const struct dm_config_node *cn, const char *path, int fail)
+{
+       return _find_config_bool(cn, _find_config_node, path, fail);
+}
+
+int dm_config_value_is_bool(const struct dm_config_value *v) {
+       if (!v)
+               return 0;
+
+       switch(v->type) {
+               case DM_CFG_INT:
+                       return 1;
+               case DM_CFG_STRING:
+                       return _str_to_bool(v->v.str, -1) != -1;
+               default:
+                       return 0;
+       }
+}
+
+/***********************************
+ * tree-based lookup
+ **/
+
+const struct dm_config_node *dm_config_tree_find_node(const struct dm_config_tree *cft,
+                                                     const char *path)
+{
+       return _find_first_config_node(cft, path);
+}
+
+const char *dm_config_tree_find_str(const struct dm_config_tree *cft, const char *path,
+                                   const char *fail)
+{
+       return _find_config_str(cft, _find_first_config_node, path, fail, 0);
+}
+
+const char *dm_config_tree_find_str_allow_empty(const struct dm_config_tree *cft, const char *path,
+                                               const char *fail)
+{
+       return _find_config_str(cft, _find_first_config_node, path, fail, 1);
+}
+
+int dm_config_tree_find_int(const struct dm_config_tree *cft, const char *path, int fail)
+{
+       /* FIXME Add log_error message on overflow */
+       return (int) _find_config_int64(cft, _find_first_config_node, path, (int64_t) fail);
+}
+
+int64_t dm_config_tree_find_int64(const struct dm_config_tree *cft, const char *path, int64_t fail)
+{
+       return _find_config_int64(cft, _find_first_config_node, path, fail);
+}
+
+float dm_config_tree_find_float(const struct dm_config_tree *cft, const char *path,
+                               float fail)
+{
+       return _find_config_float(cft, _find_first_config_node, path, fail);
+}
+
+int dm_config_tree_find_bool(const struct dm_config_tree *cft, const char *path, int fail)
+{
+       return _find_config_bool(cft, _find_first_config_node, path, fail);
+}
+
+/************************************/
+
+
+int dm_config_get_uint32(const struct dm_config_node *cn, const char *path,
+                        uint32_t *result)
+{
+       const struct dm_config_node *n;
+
+       n = _find_config_node(cn, path);
+
+       if (!n || !n->v || n->v->type != DM_CFG_INT)
+               return 0;
+
+       if (result)
+               *result = n->v->v.i;
+       return 1;
+}
+
+int dm_config_get_uint64(const struct dm_config_node *cn, const char *path,
+                        uint64_t *result)
+{
+       const struct dm_config_node *n;
+
+       n = _find_config_node(cn, path);
+
+       if (!n || !n->v || n->v->type != DM_CFG_INT)
+               return 0;
+
+       if (result)
+               *result = (uint64_t) n->v->v.i;
+       return 1;
+}
+
+int dm_config_get_str(const struct dm_config_node *cn, const char *path,
+                     const char **result)
+{
+       const struct dm_config_node *n;
+
+       n = _find_config_node(cn, path);
+
+       if (!n || !n->v || n->v->type != DM_CFG_STRING)
+               return 0;
+
+       if (result)
+               *result = n->v->v.str;
+       return 1;
+}
+
+int dm_config_get_list(const struct dm_config_node *cn, const char *path,
+                      const struct dm_config_value **result)
+{
+       const struct dm_config_node *n;
+
+       n = _find_config_node(cn, path);
+       /* TODO when we represent single-item lists consistently, add a check
+        * for n->v->next != NULL */
+       if (!n || !n->v)
+               return 0;
+
+       if (result)
+               *result = n->v;
+       return 1;
+}
+
+int dm_config_get_section(const struct dm_config_node *cn, const char *path,
+                         const struct dm_config_node **result)
+{
+       const struct dm_config_node *n;
+
+       n = _find_config_node(cn, path);
+       if (!n || n->v)
+               return 0;
+
+       if (result)
+               *result = n;
+       return 1;
+}
+
+int dm_config_has_node(const struct dm_config_node *cn, const char *path)
+{
+       return _find_config_node(cn, path) ? 1 : 0;
+}
+
+/*
+ * Convert a token type to the char it represents.
+ */
+static char _token_type_to_char(int type)
+{
+       switch (type) {
+               case TOK_SECTION_B:
+                       return SECTION_B_CHAR;
+               case TOK_SECTION_E:
+                       return SECTION_E_CHAR;
+               default:
+                       return 0;
+       }
+}
+
+/*
+ * Returns:
+ *  # of 'type' tokens in 'str'.
+ */
+static unsigned _count_tokens(const char *str, unsigned len, int type)
+{
+       char c;
+
+       c = _token_type_to_char(type);
+
+       return dm_count_chars(str, len, c);
+}
+
+const char *dm_config_parent_name(const struct dm_config_node *n)
+{
+       return (n->parent ? n->parent->key : "(root)");
+}
+/*
+ * Heuristic function to make a quick guess as to whether a text
+ * region probably contains a valid config "section".  (Useful for
+ * scanning areas of the disk for old metadata.)
+ * Config sections contain various tokens, may contain other sections
+ * and strings, and are delimited by begin (type 'TOK_SECTION_B') and
+ * end (type 'TOK_SECTION_E') tokens.  As a quick heuristic, we just
+ * count the number of begin and end tokens, and see if they are
+ * non-zero and the counts match.
+ * Full validation of the section should be done with another function
+ * (for example, read_config_fd).
+ *
+ * Returns:
+ *  0 - probably is not a valid config section
+ *  1 - probably _is_ a valid config section
+ */
+unsigned dm_config_maybe_section(const char *str, unsigned len)
+{
+       int begin_count;
+       int end_count;
+
+       begin_count = _count_tokens(str, len, TOK_SECTION_B);
+       end_count = _count_tokens(str, len, TOK_SECTION_E);
+
+       if (begin_count && end_count && (begin_count == end_count))
+               return 1;
+       else
+               return 0;
+}
+
+__attribute__((nonnull(1, 2)))
+static struct dm_config_value *_clone_config_value(struct dm_pool *mem,
+                                                  const struct dm_config_value *v)
+{
+       struct dm_config_value *new_cv;
+
+       if (!(new_cv = _create_value(mem))) {
+               log_error("Failed to clone config value.");
+               return NULL;
+       }
+
+       new_cv->type = v->type;
+       if (v->type == DM_CFG_STRING) {
+               if (!(new_cv->v.str = dm_pool_strdup(mem, v->v.str))) {
+                       log_error("Failed to clone config string value.");
+                       return NULL;
+               }
+       } else
+               new_cv->v = v->v;
+
+       if (v->next && !(new_cv->next = _clone_config_value(mem, v->next)))
+               return_NULL;
+
+       return new_cv;
+}
+
+struct dm_config_node *dm_config_clone_node_with_mem(struct dm_pool *mem, const struct dm_config_node *cn, int siblings)
+{
+       struct dm_config_node *new_cn;
+
+       if (!cn) {
+               log_error("Cannot clone NULL config node.");
+               return NULL;
+       }
+
+       if (!(new_cn = _create_node(mem))) {
+               log_error("Failed to clone config node.");
+               return NULL;
+       }
+
+       if ((cn->key && !(new_cn->key = dm_pool_strdup(mem, cn->key)))) {
+               log_error("Failed to clone config node key.");
+               return NULL;
+       }
+
+       new_cn->id = cn->id;
+
+       if ((cn->v && !(new_cn->v = _clone_config_value(mem, cn->v))) ||
+           (cn->child && !(new_cn->child = dm_config_clone_node_with_mem(mem, cn->child, 1))) ||
+           (siblings && cn->sib && !(new_cn->sib = dm_config_clone_node_with_mem(mem, cn->sib, siblings))))
+               return_NULL; /* 'new_cn' released with mem pool */
+
+       return new_cn;
+}
+
+struct dm_config_node *dm_config_clone_node(struct dm_config_tree *cft, const struct dm_config_node *node, int sib)
+{
+       return dm_config_clone_node_with_mem(cft->mem, node, sib);
+}
+
+struct dm_config_node *dm_config_create_node(struct dm_config_tree *cft, const char *key)
+{
+       struct dm_config_node *cn;
+
+       if (!(cn = _create_node(cft->mem))) {
+               log_error("Failed to create config node.");
+               return NULL;
+       }
+       if (!(cn->key = dm_pool_strdup(cft->mem, key))) {
+               log_error("Failed to create config node's key.");
+               return NULL;
+       }
+       cn->parent = NULL;
+       cn->v = NULL;
+
+       return cn;
+}
+
+struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft)
+{
+       return _create_value(cft->mem);
+}
+
+void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags)
+{
+       if (!cv)
+               return;
+
+       cv->format_flags = format_flags;
+}
+
+uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv)
+{
+       if (!cv)
+               return 0;
+
+       return cv->format_flags;
+}
+
+struct dm_pool *dm_config_memory(struct dm_config_tree *cft)
+{
+       return cft->mem;
+}
+
+static int _override_path(const char *path, struct dm_config_node *node, void *baton)
+{
+       struct dm_config_tree *cft = baton;
+       struct dm_config_node dummy, *target;
+       dummy.child = cft->root;
+       if (!(target = _find_or_make_node(cft->mem, &dummy, path, 0)))
+               return_0;
+       if (!(target->v = _clone_config_value(cft->mem, node->v)))
+               return_0;
+       cft->root = dummy.child;
+       return 1;
+}
+
+static int _enumerate(const char *path, struct dm_config_node *cn, int (*cb)(const char *, struct dm_config_node *, void *), void *baton)
+{
+       char *sub = NULL;
+
+       while (cn) {
+               if (dm_asprintf(&sub, "%s/%s", path, cn->key) < 0)
+                       return_0;
+               if (cn->child) {
+                       if (!_enumerate(sub, cn->child, cb, baton))
+                               goto_bad;
+               } else
+                       if (!cb(sub, cn, baton))
+                               goto_bad;
+               dm_free(sub);
+               cn = cn->sib;
+       }
+       return 1;
+bad:
+       dm_free(sub);
+       return 0;
+}
+
+struct dm_config_tree *dm_config_flatten(struct dm_config_tree *cft)
+{
+       struct dm_config_tree *res = dm_config_create(), *done = NULL, *current = NULL;
+
+       if (!res)
+               return_NULL;
+
+       while (done != cft) {
+               current = cft;
+               while (current->cascade != done)
+                       current = current->cascade;
+               _enumerate("", current->root, _override_path, res);
+               done = current;
+       }
+
+       return res;
+}
+
+int dm_config_remove_node(struct dm_config_node *parent, struct dm_config_node *rem_node)
+{
+       struct dm_config_node *cn = parent->child, *last = NULL;
+       while (cn) {
+               if (cn == rem_node) {
+                       if (last)
+                               last->sib = cn->sib;
+                       else
+                               parent->child = cn->sib;
+                       return 1;
+               }
+               last = cn;
+               cn = cn->sib;
+       }
+       return 0;
+}
diff --git a/device_mapper/libdm-deptree.c b/device_mapper/libdm-deptree.c
new file mode 100644 (file)
index 0000000..fab3e06
--- /dev/null
@@ -0,0 +1,3853 @@
+/*
+ * Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "ioctl/libdm-targets.h"
+#include "libdm-common.h"
+#include "misc/kdev_t.h"
+#include "misc/dm-ioctl.h"
+
+#include <stdarg.h>
+#include <sys/param.h>
+#include <sys/utsname.h>
+
+#define MAX_TARGET_PARAMSIZE 500000
+
+/* Supported segment types */
+enum {
+       SEG_CACHE,
+       SEG_CRYPT,
+       SEG_ERROR,
+       SEG_LINEAR,
+       SEG_MIRRORED,
+       SEG_SNAPSHOT,
+       SEG_SNAPSHOT_ORIGIN,
+       SEG_SNAPSHOT_MERGE,
+       SEG_STRIPED,
+       SEG_ZERO,
+       SEG_THIN_POOL,
+       SEG_THIN,
+       SEG_RAID0,
+       SEG_RAID0_META,
+       SEG_RAID1,
+       SEG_RAID10,
+       SEG_RAID4,
+       SEG_RAID5_N,
+       SEG_RAID5_LA,
+       SEG_RAID5_RA,
+       SEG_RAID5_LS,
+       SEG_RAID5_RS,
+       SEG_RAID6_N_6,
+       SEG_RAID6_ZR,
+       SEG_RAID6_NR,
+       SEG_RAID6_NC,
+       SEG_RAID6_LS_6,
+       SEG_RAID6_RS_6,
+       SEG_RAID6_LA_6,
+       SEG_RAID6_RA_6,
+};
+
+/* FIXME Add crypt and multipath support */
+
+static const struct {
+       unsigned type;
+       const char target[16];
+} _dm_segtypes[] = {
+       { SEG_CACHE, "cache" },
+       { SEG_CRYPT, "crypt" },
+       { SEG_ERROR, "error" },
+       { SEG_LINEAR, "linear" },
+       { SEG_MIRRORED, "mirror" },
+       { SEG_SNAPSHOT, "snapshot" },
+       { SEG_SNAPSHOT_ORIGIN, "snapshot-origin" },
+       { SEG_SNAPSHOT_MERGE, "snapshot-merge" },
+       { SEG_STRIPED, "striped" },
+       { SEG_ZERO, "zero"},
+       { SEG_THIN_POOL, "thin-pool"},
+       { SEG_THIN, "thin"},
+       { SEG_RAID0, "raid0"},
+       { SEG_RAID0_META, "raid0_meta"},
+       { SEG_RAID1, "raid1"},
+       { SEG_RAID10, "raid10"},
+       { SEG_RAID4, "raid4"},
+       { SEG_RAID5_N,  "raid5_n"},
+       { SEG_RAID5_LA, "raid5_la"},
+       { SEG_RAID5_RA, "raid5_ra"},
+       { SEG_RAID5_LS, "raid5_ls"},
+       { SEG_RAID5_RS, "raid5_rs"},
+       { SEG_RAID6_N_6,"raid6_n_6"},
+       { SEG_RAID6_ZR, "raid6_zr"},
+       { SEG_RAID6_NR, "raid6_nr"},
+       { SEG_RAID6_NC, "raid6_nc"},
+       { SEG_RAID6_LS_6, "raid6_ls_6"},
+       { SEG_RAID6_RS_6, "raid6_rs_6"},
+       { SEG_RAID6_LA_6, "raid6_la_6"},
+       { SEG_RAID6_RA_6, "raid6_ra_6"},
+
+
+       /*
+        * WARNING: Since 'raid' target overloads this 1:1 mapping table
+        * for search do not add new enum elements past them!
+        */
+       { SEG_RAID5_LS, "raid5"}, /* same as "raid5_ls" (default for MD also) */
+       { SEG_RAID6_ZR, "raid6"}, /* same as "raid6_zr" */
+       { SEG_RAID10, "raid10_near"}, /* same as "raid10" */
+};
+
+/* Some segment types have a list of areas of other devices attached */
+struct seg_area {
+       struct dm_list list;
+
+       struct dm_tree_node *dev_node;
+
+       uint64_t offset;
+};
+
+struct dm_thin_message {
+       dm_thin_message_t type;
+       union {
+               struct {
+                       uint32_t device_id;
+                       uint32_t origin_id;
+               } m_create_snap;
+               struct {
+                       uint32_t device_id;
+               } m_create_thin;
+               struct {
+                       uint32_t device_id;
+               } m_delete;
+               struct {
+                       uint64_t current_id;
+                       uint64_t new_id;
+               } m_set_transaction_id;
+       } u;
+};
+
+struct thin_message {
+       struct dm_list list;
+       struct dm_thin_message message;
+       int expected_errno;
+};
+
+/* Per-segment properties */
+struct load_segment {
+       struct dm_list list;
+
+       unsigned type;
+
+       uint64_t size;
+
+       unsigned area_count;            /* Linear + Striped + Mirrored + Crypt */
+       struct dm_list areas;           /* Linear + Striped + Mirrored + Crypt */
+
+       uint32_t stripe_size;           /* Striped + raid */
+
+       int persistent;                 /* Snapshot */
+       uint32_t chunk_size;            /* Snapshot */
+       struct dm_tree_node *cow;       /* Snapshot */
+       struct dm_tree_node *origin;    /* Snapshot + Snapshot origin + Cache */
+       struct dm_tree_node *merge;     /* Snapshot */
+
+       struct dm_tree_node *log;       /* Mirror */
+       uint32_t region_size;           /* Mirror + raid */
+       unsigned clustered;             /* Mirror */
+       unsigned mirror_area_count;     /* Mirror */
+       uint32_t flags;                 /* Mirror + raid + Cache */
+       char *uuid;                     /* Clustered mirror log */
+
+       const char *policy_name;        /* Cache */
+       unsigned policy_argc;           /* Cache */
+       struct dm_config_node *policy_settings; /* Cache */
+
+       const char *cipher;             /* Crypt */
+       const char *chainmode;          /* Crypt */
+       const char *iv;                 /* Crypt */
+       uint64_t iv_offset;             /* Crypt */
+       const char *key;                /* Crypt */
+
+       int delta_disks;                /* raid reshape number of disks */
+       int data_offset;                /* raid reshape data offset on disk to set */
+       uint64_t rebuilds[RAID_BITMAP_SIZE];    /* raid */
+       uint64_t writemostly[RAID_BITMAP_SIZE]; /* raid */
+       uint32_t writebehind;           /* raid */
+       uint32_t max_recovery_rate;     /* raid kB/sec/disk */
+       uint32_t min_recovery_rate;     /* raid kB/sec/disk */
+       uint32_t data_copies;           /* raid10 data_copies */
+
+       struct dm_tree_node *metadata;  /* Thin_pool + Cache */
+       struct dm_tree_node *pool;      /* Thin_pool, Thin */
+       struct dm_tree_node *external;  /* Thin */
+       struct dm_list thin_messages;   /* Thin_pool */
+       uint64_t transaction_id;        /* Thin_pool */
+       uint64_t low_water_mark;        /* Thin_pool */
+       uint32_t data_block_size;       /* Thin_pool + cache */
+       unsigned skip_block_zeroing;    /* Thin_pool */
+       unsigned ignore_discard;        /* Thin_pool target vsn 1.1 */
+       unsigned no_discard_passdown;   /* Thin_pool target vsn 1.1 */
+       unsigned error_if_no_space;     /* Thin pool target vsn 1.10 */
+       unsigned read_only;             /* Thin pool target vsn 1.3 */
+       uint32_t device_id;             /* Thin */
+
+};
+
+/* Per-device properties */
+struct load_properties {
+       int read_only;
+       uint32_t major;
+       uint32_t minor;
+
+       uint32_t read_ahead;
+       uint32_t read_ahead_flags;
+
+       unsigned segment_count;
+       int size_changed;
+       struct dm_list segs;
+
+       const char *new_name;
+
+       /* If immediate_dev_node is set to 1, try to create the dev node
+        * as soon as possible (e.g. in preload stage even during traversal
+        * and processing of dm tree). This will also flush all stacked dev
+        * node operations, synchronizing with udev.
+        */
+       unsigned immediate_dev_node;
+
+       /*
+        * If the device size changed from zero and this is set,
+        * don't resume the device immediately, even if the device
+        * has parents.  This works provided the parents do not
+        * validate the device size and is required by pvmove to
+        * avoid starting the mirror resync operation too early.
+        */
+       unsigned delay_resume_if_new;
+
+       /*
+        * Preload tree normally only loads and not resume, but there is
+        * automatic resume when target is extended, as it's believed
+        * there can be no i/o flying to this 'new' extedend space
+        * from any device above. Reason is that preloaded target above
+        * may actually need to see its bigger subdevice before it
+        * gets suspended. As long as devices are simple linears
+        * there is no problem to resume bigger device in preload (before commit).
+        * However complex targets like thin-pool (raid,cache...)
+        * they shall not be resumed before their commit.
+        */
+       unsigned delay_resume_if_extended;
+
+       /*
+        * Call node_send_messages(), set to 2 if there are messages
+        * When != 0, it validates matching transaction id, thus thin-pools
+        * where transation_id is passed as 0 are never validated, this
+        * allows external managment of thin-pool TID.
+        */
+       unsigned send_messages;
+       /* Skip suspending node's children, used when sending messages to thin-pool */
+       int skip_suspend;
+};
+
+/* Two of these used to join two nodes with uses and used_by. */
+struct dm_tree_link {
+       struct dm_list list;
+       struct dm_tree_node *node;
+};
+
+struct dm_tree_node {
+       struct dm_tree *dtree;
+
+       const char *name;
+       const char *uuid;
+       struct dm_info info;
+
+       struct dm_list uses;            /* Nodes this node uses */
+       struct dm_list used_by;         /* Nodes that use this node */
+
+       int activation_priority;        /* 0 gets activated first */
+       int implicit_deps;              /* 1 device only implicitly referenced */
+
+       uint16_t udev_flags;            /* Udev control flags */
+
+       void *context;                  /* External supplied context */
+
+       struct load_properties props;   /* For creation/table (re)load */
+
+       /*
+        * If presuspend of child node is needed
+        * Note: only direct child is allowed
+        */
+       struct dm_tree_node *presuspend_node;
+
+       /* Callback */
+       dm_node_callback_fn callback;
+       void *callback_data;
+
+       /*
+        * TODO:
+        *      Add advanced code which tracks of send ioctls and their
+        *      proper revert operation for more advanced recovery
+        *      Current code serves mostly only to recovery when
+        *      thin pool metadata check fails and command would
+        *      have left active thin data and metadata subvolumes.
+        */
+       struct dm_list activated;       /* Head of activated nodes for preload revert */
+       struct dm_list activated_list;  /* List of activated nodes for preload revert */
+};
+
+struct dm_tree {
+       struct dm_pool *mem;
+       struct dm_hash_table *devs;
+       struct dm_hash_table *uuids;
+       struct dm_tree_node root;
+       int skip_lockfs;                /* 1 skips lockfs (for non-snapshots) */
+       int no_flush;                   /* 1 sets noflush (mirrors/multipath) */
+       int retry_remove;               /* 1 retries remove if not successful */
+       uint32_t cookie;
+       char buf[DM_NAME_LEN + 32];     /* print buffer for device_name (major:minor) */
+       const char **optional_uuid_suffixes;    /* uuid suffixes ignored when matching */
+};
+
+/*
+ * Tree functions.
+ */
+struct dm_tree *dm_tree_create(void)
+{
+       struct dm_pool *dmem;
+       struct dm_tree *dtree;
+
+       if (!(dmem = dm_pool_create("dtree", 1024)) ||
+           !(dtree = dm_pool_zalloc(dmem, sizeof(*dtree)))) {
+               log_error("Failed to allocate dtree.");
+               if (dmem)
+                       dm_pool_destroy(dmem);
+               return NULL;
+       }
+
+       dtree->root.dtree = dtree;
+       dm_list_init(&dtree->root.uses);
+       dm_list_init(&dtree->root.used_by);
+       dm_list_init(&dtree->root.activated);
+       dtree->skip_lockfs = 0;
+       dtree->no_flush = 0;
+       dtree->mem = dmem;
+       dtree->optional_uuid_suffixes = NULL;
+
+       if (!(dtree->devs = dm_hash_create(8))) {
+               log_error("dtree hash creation failed");
+               dm_pool_destroy(dtree->mem);
+               return NULL;
+       }
+
+       if (!(dtree->uuids = dm_hash_create(32))) {
+               log_error("dtree uuid hash creation failed");
+               dm_hash_destroy(dtree->devs);
+               dm_pool_destroy(dtree->mem);
+               return NULL;
+       }
+
+       return dtree;
+}
+
+void dm_tree_free(struct dm_tree *dtree)
+{
+       if (!dtree)
+               return;
+
+       dm_hash_destroy(dtree->uuids);
+       dm_hash_destroy(dtree->devs);
+       dm_pool_destroy(dtree->mem);
+}
+
+void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie)
+{
+       node->dtree->cookie = cookie;
+}
+
+uint32_t dm_tree_get_cookie(struct dm_tree_node *node)
+{
+       return node->dtree->cookie;
+}
+
+void dm_tree_skip_lockfs(struct dm_tree_node *dnode)
+{
+       dnode->dtree->skip_lockfs = 1;
+}
+
+void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode)
+{
+       dnode->dtree->no_flush = 1;
+}
+
+void dm_tree_retry_remove(struct dm_tree_node *dnode)
+{
+       dnode->dtree->retry_remove = 1;
+}
+
+/*
+ * Node functions.
+ */
+static int _nodes_are_linked(const struct dm_tree_node *parent,
+                            const struct dm_tree_node *child)
+{
+       struct dm_tree_link *dlink;
+
+       dm_list_iterate_items(dlink, &parent->uses)
+               if (dlink->node == child)
+                       return 1;
+
+       return 0;
+}
+
+static int _link(struct dm_list *list, struct dm_tree_node *node)
+{
+       struct dm_tree_link *dlink;
+
+       if (!(dlink = dm_pool_alloc(node->dtree->mem, sizeof(*dlink)))) {
+               log_error("dtree link allocation failed");
+               return 0;
+       }
+
+       dlink->node = node;
+       dm_list_add(list, &dlink->list);
+
+       return 1;
+}
+
+static int _link_nodes(struct dm_tree_node *parent,
+                      struct dm_tree_node *child)
+{
+       if (_nodes_are_linked(parent, child))
+               return 1;
+
+       if (!_link(&parent->uses, child))
+               return 0;
+
+       if (!_link(&child->used_by, parent))
+               return 0;
+
+       return 1;
+}
+
+static void _unlink(struct dm_list *list, struct dm_tree_node *node)
+{
+       struct dm_tree_link *dlink;
+
+       dm_list_iterate_items(dlink, list)
+               if (dlink->node == node) {
+                       dm_list_del(&dlink->list);
+                       break;
+               }
+}
+
+static void _unlink_nodes(struct dm_tree_node *parent,
+                         struct dm_tree_node *child)
+{
+       if (!_nodes_are_linked(parent, child))
+               return;
+
+       _unlink(&parent->uses, child);
+       _unlink(&child->used_by, parent);
+}
+
+static int _add_to_toplevel(struct dm_tree_node *node)
+{
+       return _link_nodes(&node->dtree->root, node);
+}
+
+static void _remove_from_toplevel(struct dm_tree_node *node)
+{
+       _unlink_nodes(&node->dtree->root, node);
+}
+
+static int _add_to_bottomlevel(struct dm_tree_node *node)
+{
+       return _link_nodes(node, &node->dtree->root);
+}
+
+static void _remove_from_bottomlevel(struct dm_tree_node *node)
+{
+       _unlink_nodes(node, &node->dtree->root);
+}
+
+static int _link_tree_nodes(struct dm_tree_node *parent, struct dm_tree_node *child)
+{
+       /* Don't link to root node if child already has a parent */
+       if (parent == &parent->dtree->root) {
+               if (dm_tree_node_num_children(child, 1))
+                       return 1;
+       } else
+               _remove_from_toplevel(child);
+
+       if (child == &child->dtree->root) {
+               if (dm_tree_node_num_children(parent, 0))
+                       return 1;
+       } else
+               _remove_from_bottomlevel(parent);
+
+       return _link_nodes(parent, child);
+}
+
+static struct dm_tree_node *_create_dm_tree_node(struct dm_tree *dtree,
+                                                const char *name,
+                                                const char *uuid,
+                                                struct dm_info *info,
+                                                void *context,
+                                                uint16_t udev_flags)
+{
+       struct dm_tree_node *node;
+       dev_t dev;
+
+       if (!(node = dm_pool_zalloc(dtree->mem, sizeof(*node))) ||
+           !(node->name = dm_pool_strdup(dtree->mem, name)) ||
+           !(node->uuid = dm_pool_strdup(dtree->mem, uuid))) {
+               log_error("_create_dm_tree_node alloc failed.");
+               return NULL;
+       }
+
+       node->dtree = dtree;
+       node->info = *info;
+       node->context = context;
+       node->udev_flags = udev_flags;
+
+       dm_list_init(&node->uses);
+       dm_list_init(&node->used_by);
+       dm_list_init(&node->activated);
+       dm_list_init(&node->props.segs);
+
+       dev = MKDEV((dev_t)info->major, (dev_t)info->minor);
+
+       if (!dm_hash_insert_binary(dtree->devs, (const char *) &dev,
+                                  sizeof(dev), node)) {
+               log_error("dtree node hash insertion failed");
+               dm_pool_free(dtree->mem, node);
+               return NULL;
+       }
+
+       if (*uuid && !dm_hash_insert(dtree->uuids, uuid, node)) {
+               log_error("dtree uuid hash insertion failed");
+               dm_hash_remove_binary(dtree->devs, (const char *) &dev,
+                                     sizeof(dev));
+               dm_pool_free(dtree->mem, node);
+               return NULL;
+       }
+
+       return node;
+}
+
+static struct dm_tree_node *_find_dm_tree_node(struct dm_tree *dtree,
+                                              uint32_t major, uint32_t minor)
+{
+       dev_t dev = MKDEV((dev_t)major, (dev_t)minor);
+
+       return dm_hash_lookup_binary(dtree->devs, (const char *) &dev,
+                                    sizeof(dev));
+}
+
+void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes)
+{
+       dtree->optional_uuid_suffixes = optional_uuid_suffixes;
+}
+
+static struct dm_tree_node *_find_dm_tree_node_by_uuid(struct dm_tree *dtree,
+                                                      const char *uuid)
+{
+       struct dm_tree_node *node;
+       const char *default_uuid_prefix;
+       size_t default_uuid_prefix_len;
+       const char *suffix, *suffix_position;
+       char uuid_without_suffix[DM_UUID_LEN];
+       unsigned i = 0;
+       const char **suffix_list = dtree->optional_uuid_suffixes;
+
+       if ((node = dm_hash_lookup(dtree->uuids, uuid))) {
+               log_debug("Matched uuid %s in deptree.", uuid);
+               return node;
+       }
+
+       default_uuid_prefix = dm_uuid_prefix();
+       default_uuid_prefix_len = strlen(default_uuid_prefix);
+
+       if (suffix_list && (suffix_position = rindex(uuid, '-'))) {
+               while ((suffix = suffix_list[i++])) {
+                       if (strcmp(suffix_position + 1, suffix))
+                               continue;
+
+                       (void) strncpy(uuid_without_suffix, uuid, sizeof(uuid_without_suffix));
+                       uuid_without_suffix[suffix_position - uuid] = '\0';
+
+                       if ((node = dm_hash_lookup(dtree->uuids, uuid_without_suffix))) {
+                               log_debug("Matched uuid %s (missing suffix -%s) in deptree.", uuid_without_suffix, suffix);
+                               return node;
+                       }
+
+                       break;
+               };
+       }
+       
+       if (strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
+               return NULL;
+
+       if ((node = dm_hash_lookup(dtree->uuids, uuid + default_uuid_prefix_len))) {
+               log_debug("Matched uuid %s (missing prefix) in deptree.", uuid + default_uuid_prefix_len);
+               return node;
+       }
+
+       log_debug("Not matched uuid %s in deptree.", uuid);
+       return NULL;
+}
+
+/* Return node's device_name (major:minor) for debug messages */
+static const char *_node_name(struct dm_tree_node *dnode)
+{
+       if (dm_snprintf(dnode->dtree->buf, sizeof(dnode->dtree->buf),
+                       "%s (" FMTu32 ":" FMTu32 ")",
+                       dnode->name ? dnode->name : "",
+                       dnode->info.major, dnode->info.minor) < 0) {
+               stack;
+               return dnode->name;
+       }
+
+       return dnode->dtree->buf;
+}
+
+void dm_tree_node_set_udev_flags(struct dm_tree_node *dnode, uint16_t udev_flags)
+
+{
+       if (udev_flags != dnode->udev_flags)
+               log_debug_activation("Resetting %s udev_flags from 0x%x to 0x%x.",
+                                    _node_name(dnode),
+                                    dnode->udev_flags, udev_flags);
+       dnode->udev_flags = udev_flags;
+}
+
+void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode,
+                                uint32_t read_ahead,
+                                uint32_t read_ahead_flags)
+{
+       dnode->props.read_ahead = read_ahead;
+       dnode->props.read_ahead_flags = read_ahead_flags;
+}
+
+void dm_tree_node_set_presuspend_node(struct dm_tree_node *node,
+                                     struct dm_tree_node *presuspend_node)
+{
+       node->presuspend_node = presuspend_node;
+}
+
+const char *dm_tree_node_get_name(const struct dm_tree_node *node)
+{
+       return node->info.exists ? node->name : "";
+}
+
+const char *dm_tree_node_get_uuid(const struct dm_tree_node *node)
+{
+       return node->info.exists ? node->uuid : "";
+}
+
+const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node)
+{
+       return &node->info;
+}
+
+void *dm_tree_node_get_context(const struct dm_tree_node *node)
+{
+       return node->context;
+}
+
+int dm_tree_node_size_changed(const struct dm_tree_node *dnode)
+{
+       return dnode->props.size_changed;
+}
+
+int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted)
+{
+       if (inverted) {
+               if (_nodes_are_linked(&node->dtree->root, node))
+                       return 0;
+               return dm_list_size(&node->used_by);
+       }
+
+       if (_nodes_are_linked(node, &node->dtree->root))
+               return 0;
+
+       return dm_list_size(&node->uses);
+}
+
+/*
+ * Returns 1 if no prefix supplied
+ */
+static int _uuid_prefix_matches(const char *uuid, const char *uuid_prefix, size_t uuid_prefix_len)
+{
+       const char *default_uuid_prefix = dm_uuid_prefix();
+       size_t default_uuid_prefix_len = strlen(default_uuid_prefix);
+
+       if (!uuid_prefix)
+               return 1;
+
+       if (!strncmp(uuid, uuid_prefix, uuid_prefix_len))
+               return 1;
+
+       /* Handle transition: active device uuids might be missing the prefix */
+       if (uuid_prefix_len <= 4)
+               return 0;
+
+       if (!strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
+               return 0;
+
+       if (strncmp(uuid_prefix, default_uuid_prefix, default_uuid_prefix_len))
+               return 0;
+
+       if (!strncmp(uuid, uuid_prefix + default_uuid_prefix_len, uuid_prefix_len - default_uuid_prefix_len))
+               return 1;
+
+       return 0;
+}
+
+/*
+ * Returns 1 if no children.
+ */
+static int _children_suspended(struct dm_tree_node *node,
+                              uint32_t inverted,
+                              const char *uuid_prefix,
+                              size_t uuid_prefix_len)
+{
+       struct dm_list *list;
+       struct dm_tree_link *dlink;
+       const struct dm_info *dinfo;
+       const char *uuid;
+
+       if (inverted) {
+               if (_nodes_are_linked(&node->dtree->root, node))
+                       return 1;
+               list = &node->used_by;
+       } else {
+               if (_nodes_are_linked(node, &node->dtree->root))
+                       return 1;
+               list = &node->uses;
+       }
+
+       dm_list_iterate_items(dlink, list) {
+               if (!(uuid = dm_tree_node_get_uuid(dlink->node))) {
+                       stack;
+                       continue;
+               }
+
+               /* Ignore if it doesn't belong to this VG */
+               if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               /* Ignore if parent node wants to presuspend this node */
+               if (dlink->node->presuspend_node == node)
+                       continue;
+
+               if (!(dinfo = dm_tree_node_get_info(dlink->node)))
+                       return_0;       /* FIXME Is this normal? */
+
+               if (!dinfo->suspended)
+                       return 0;
+       }
+
+       return 1;
+}
+
+/*
+ * Set major and minor to zero for root of tree.
+ */
+struct dm_tree_node *dm_tree_find_node(struct dm_tree *dtree,
+                                      uint32_t major,
+                                      uint32_t minor)
+{
+       if (!major && !minor)
+               return &dtree->root;
+
+       return _find_dm_tree_node(dtree, major, minor);
+}
+
+/*
+ * Set uuid to NULL for root of tree.
+ */
+struct dm_tree_node *dm_tree_find_node_by_uuid(struct dm_tree *dtree,
+                                              const char *uuid)
+{
+       if (!uuid || !*uuid)
+               return &dtree->root;
+
+       return _find_dm_tree_node_by_uuid(dtree, uuid);
+}
+
+/*
+ * First time set *handle to NULL.
+ * Set inverted to invert the tree.
+ */
+struct dm_tree_node *dm_tree_next_child(void **handle,
+                                       const struct dm_tree_node *parent,
+                                       uint32_t inverted)
+{
+       struct dm_list **dlink = (struct dm_list **) handle;
+       const struct dm_list *use_list;
+
+       if (inverted)
+               use_list = &parent->used_by;
+       else
+               use_list = &parent->uses;
+
+       if (!*dlink)
+               *dlink = dm_list_first(use_list);
+       else
+               *dlink = dm_list_next(use_list, *dlink);
+
+       return (*dlink) ? dm_list_item(*dlink, struct dm_tree_link)->node : NULL;
+}
+
+static int _deps(struct dm_task **dmt, struct dm_pool *mem, uint32_t major, uint32_t minor,
+                const char **name, const char **uuid, unsigned inactive_table,
+                struct dm_info *info, struct dm_deps **deps)
+{
+       memset(info, 0, sizeof(*info));
+       *name = "";
+       *uuid = "";
+       *deps = NULL;
+
+       if (!dm_is_dm_major(major)) {
+               info->major = major;
+               info->minor = minor;
+               return 1;
+       }
+
+       if (!(*dmt = dm_task_create(DM_DEVICE_DEPS)))
+               return_0;
+
+       if (!dm_task_set_major(*dmt, major) || !dm_task_set_minor(*dmt, minor)) {
+               log_error("_deps: failed to set major:minor for (" FMTu32 ":" FMTu32 ").",
+                         major, minor);
+               goto failed;
+       }
+
+       if (inactive_table && !dm_task_query_inactive_table(*dmt)) {
+               log_error("_deps: failed to set inactive table for (%" PRIu32 ":%" PRIu32 ")",
+                         major, minor);
+               goto failed;
+       }
+
+       if (!dm_task_run(*dmt)) {
+               log_error("_deps: task run failed for (%" PRIu32 ":%" PRIu32 ")",
+                         major, minor);
+               goto failed;
+       }
+
+       if (!dm_task_get_info(*dmt, info)) {
+               log_error("_deps: failed to get info for (%" PRIu32 ":%" PRIu32 ")",
+                         major, minor);
+               goto failed;
+       }
+
+       if (info->exists) {
+               if (info->major != major) {
+                       log_error("Inconsistent dtree major number: %u != %u",
+                                 major, info->major);
+                       goto failed;
+               }
+               if (info->minor != minor) {
+                       log_error("Inconsistent dtree minor number: %u != %u",
+                                 minor, info->minor);
+                       goto failed;
+               }
+               *name = dm_task_get_name(*dmt);
+               *uuid = dm_task_get_uuid(*dmt);
+               *deps = dm_task_get_deps(*dmt);
+       }
+
+       return 1;
+
+failed:
+       dm_task_destroy(*dmt);
+       *dmt = NULL;
+
+       return 0;
+}
+
+/*
+ * Deactivate a device with its dependencies if the uuid prefix matches.
+ */
+static int _info_by_dev(uint32_t major, uint32_t minor, int with_open_count,
+                       struct dm_info *info, struct dm_pool *mem,
+                       const char **name, const char **uuid)
+{
+       struct dm_task *dmt;
+       int r = 0;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+               return_0;
+
+       if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
+               log_error("_info_by_dev: Failed to set device number.");
+               goto out;
+       }
+
+       if (!with_open_count && !dm_task_no_open_count(dmt))
+               log_warn("WARNING: Failed to disable open_count.");
+
+       if (!dm_task_run(dmt))
+               goto_out;
+
+       if (!dm_task_get_info(dmt, info))
+               goto_out;
+
+       if (name && !(*name = dm_pool_strdup(mem, dm_task_get_name(dmt)))) {
+               log_error("name pool_strdup failed");
+               goto out;
+       }
+
+       if (uuid && !(*uuid = dm_pool_strdup(mem, dm_task_get_uuid(dmt)))) {
+               log_error("uuid pool_strdup failed");
+               goto out;
+       }
+
+       r = 1;
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+static int _check_device_not_in_use(const char *name, struct dm_info *info)
+{
+       const char *reason;
+
+       if (!info->exists)
+               return 1;
+
+       /* If sysfs is not used, use open_count information only. */
+       if (!*dm_sysfs_dir()) {
+               if (!info->open_count)
+                       return 1;
+               reason = "in use";
+       } else if (dm_device_has_holders(info->major, info->minor))
+               reason = "is used by another device";
+       else if (dm_device_has_mounted_fs(info->major, info->minor))
+               reason = "constains a filesystem in use";
+       else
+               return 1;
+
+       log_error("Device %s (" FMTu32 ":" FMTu32 ") %s.",
+                 name, info->major, info->minor, reason);
+       return 0;
+}
+
+/* Check if all parent nodes of given node have open_count == 0 */
+static int _node_has_closed_parents(struct dm_tree_node *node,
+                                   const char *uuid_prefix,
+                                   size_t uuid_prefix_len)
+{
+       struct dm_tree_link *dlink;
+       const struct dm_info *dinfo;
+       struct dm_info info;
+       const char *uuid;
+
+       /* Iterate through parents of this node */
+       dm_list_iterate_items(dlink, &node->used_by) {
+               if (!(uuid = dm_tree_node_get_uuid(dlink->node))) {
+                       stack;
+                       continue;
+               }
+
+               /* Ignore if it doesn't belong to this VG */
+               if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               if (!(dinfo = dm_tree_node_get_info(dlink->node)))
+                       return_0;       /* FIXME Is this normal? */
+
+               /* Refresh open_count */
+               if (!_info_by_dev(dinfo->major, dinfo->minor, 1, &info, NULL, NULL, NULL))
+                       return_0;
+
+               if (!info.exists)
+                       continue;
+
+               if (info.open_count) {
+                       log_debug_activation("Node %s %d:%d has open_count %d", uuid_prefix,
+                                            dinfo->major, dinfo->minor, info.open_count);
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
+static int _deactivate_node(const char *name, uint32_t major, uint32_t minor,
+                           uint32_t *cookie, uint16_t udev_flags, int retry)
+{
+       struct dm_task *dmt;
+       int r = 0;
+
+       log_verbose("Removing %s (%" PRIu32 ":%" PRIu32 ")", name, major, minor);
+
+       if (!(dmt = dm_task_create(DM_DEVICE_REMOVE))) {
+               log_error("Deactivation dm_task creation failed for %s", name);
+               return 0;
+       }
+
+       if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
+               log_error("Failed to set device number for %s deactivation", name);
+               goto out;
+       }
+
+       if (!dm_task_no_open_count(dmt))
+               log_warn("WARNING: Failed to disable open_count.");
+
+       if (cookie)
+               if (!dm_task_set_cookie(dmt, cookie, udev_flags))
+                       goto out;
+
+       if (retry)
+               dm_task_retry_remove(dmt);
+
+       r = dm_task_run(dmt);
+
+       /* FIXME Until kernel returns actual name so dm-iface.c can handle it */
+       rm_dev_node(name, dmt->cookie_set && !(udev_flags & DM_UDEV_DISABLE_DM_RULES_FLAG),
+                   dmt->cookie_set && (udev_flags & DM_UDEV_DISABLE_LIBRARY_FALLBACK));
+
+       /* FIXME Remove node from tree or mark invalid? */
+
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+static int _node_clear_table(struct dm_tree_node *dnode, uint16_t udev_flags)
+{
+       struct dm_task *dmt = NULL, *deps_dmt = NULL;
+       struct dm_info *info = &dnode->info, deps_info;
+       struct dm_deps *deps = NULL;
+       const char *name, *uuid, *depname, *depuuid;
+       const char *default_uuid_prefix;
+       size_t default_uuid_prefix_len;
+       uint32_t i;
+       int r = 0;
+
+       if (!(name = dm_tree_node_get_name(dnode))) {
+               log_error("_node_clear_table failed: missing name");
+               return 0;
+       }
+
+       /* Is there a table? */
+       if (!info->exists || !info->inactive_table)
+               return 1;
+
+       /* Get devices used by inactive table that's about to be deleted. */
+       if (!_deps(&deps_dmt, dnode->dtree->mem, info->major, info->minor, &depname, &depuuid, 1, info, &deps)) {
+               log_error("Failed to obtain dependencies for %s before clearing table.", name);
+               return 0;
+       }
+
+       log_verbose("Clearing inactive table %s (%" PRIu32 ":%" PRIu32 ")",
+                   name, info->major, info->minor);
+
+       if (!(dmt = dm_task_create(DM_DEVICE_CLEAR))) {
+               log_error("Table clear dm_task creation failed for %s", name);
+               goto out;
+       }
+
+       if (!dm_task_set_major(dmt, info->major) ||
+           !dm_task_set_minor(dmt, info->minor)) {
+               log_error("Failed to set device number for %s table clear", name);
+               goto out;
+       }
+
+       r = dm_task_run(dmt);
+
+       if (!dm_task_get_info(dmt, info)) {
+               log_error("_node_clear_table failed: info missing after running task for %s", name);
+               r = 0;
+       }
+
+       if (!r || !deps)
+               goto_out;
+
+       /*
+        * Remove (incomplete) devices that the inactive table referred to but
+        * which are not in the tree, no longer referenced and don't have a live
+        * table.
+        */
+       default_uuid_prefix = dm_uuid_prefix();
+       default_uuid_prefix_len = strlen(default_uuid_prefix);
+
+       for (i = 0; i < deps->count; i++) {
+               /* If already in tree, assume it's under control */
+               if (_find_dm_tree_node(dnode->dtree, MAJOR(deps->device[i]), MINOR(deps->device[i])))
+                       continue;
+
+               if (!_info_by_dev(MAJOR(deps->device[i]), MINOR(deps->device[i]), 1,
+                                 &deps_info, dnode->dtree->mem, &name, &uuid))
+                       goto_out;
+
+               /* Proceed if device is an 'orphan' - unreferenced and without a live table. */
+               if (!deps_info.exists || deps_info.live_table || deps_info.open_count)
+                       continue;
+
+               if (strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
+                       continue;
+
+               /* Remove device. */
+               if (!_deactivate_node(name, deps_info.major, deps_info.minor, &dnode->dtree->cookie, udev_flags, 0)) {
+                       log_error("Failed to deactivate no-longer-used device %s (%"
+                                 PRIu32 ":%" PRIu32 ")", name, deps_info.major, deps_info.minor);
+               } else if (deps_info.suspended)
+                       dec_suspended();
+       }
+
+out:
+       if (dmt)
+               dm_task_destroy(dmt);
+
+       if (deps_dmt)
+               dm_task_destroy(deps_dmt);
+
+       return r;
+}
+
+struct dm_tree_node *dm_tree_add_new_dev_with_udev_flags(struct dm_tree *dtree,
+                                                        const char *name,
+                                                        const char *uuid,
+                                                        uint32_t major,
+                                                        uint32_t minor,
+                                                        int read_only,
+                                                        int clear_inactive,
+                                                        void *context,
+                                                        uint16_t udev_flags)
+{
+       struct dm_tree_node *dnode;
+       struct dm_info info = { 0 };
+
+       if (!name || !uuid) {
+               log_error("Cannot add device without name and uuid.");
+               return NULL;
+       }
+
+       /* Do we need to add node to tree? */
+       if (!(dnode = dm_tree_find_node_by_uuid(dtree, uuid))) {
+               if (!(dnode = _create_dm_tree_node(dtree, name, uuid, &info,
+                                                  context, 0)))
+                       return_NULL;
+
+               /* Attach to root node until a table is supplied */
+               if (!_add_to_toplevel(dnode) || !_add_to_bottomlevel(dnode))
+                       return_NULL;
+
+               dnode->props.major = major;
+               dnode->props.minor = minor;
+       } else if (strcmp(name, dnode->name)) {
+               /* Do we need to rename node? */
+               if (!(dnode->props.new_name = dm_pool_strdup(dtree->mem, name))) {
+                       log_error("name pool_strdup failed");
+                       return NULL;
+               }
+       }
+
+       dnode->props.read_only = read_only ? 1 : 0;
+       dnode->props.read_ahead = DM_READ_AHEAD_AUTO;
+       dnode->props.read_ahead_flags = 0;
+
+       if (clear_inactive && !_node_clear_table(dnode, udev_flags))
+               return_NULL;
+
+       dnode->context = context;
+       dnode->udev_flags = udev_flags;
+
+       return dnode;
+}
+
+struct dm_tree_node *dm_tree_add_new_dev(struct dm_tree *dtree, const char *name,
+                                        const char *uuid, uint32_t major, uint32_t minor,
+                                        int read_only, int clear_inactive, void *context)
+{
+       return dm_tree_add_new_dev_with_udev_flags(dtree, name, uuid, major, minor,
+                                                  read_only, clear_inactive, context, 0);
+}
+
+static struct dm_tree_node *_add_dev(struct dm_tree *dtree,
+                                    struct dm_tree_node *parent,
+                                    uint32_t major, uint32_t minor,
+                                    uint16_t udev_flags,
+                                    int implicit_deps)
+{
+       struct dm_task *dmt = NULL;
+       struct dm_info info;
+       struct dm_deps *deps = NULL;
+       const char *name = NULL;
+       const char *uuid = NULL;
+       struct dm_tree_node *node = NULL;
+       uint32_t i;
+       int new = 0;
+
+       /* Already in tree? */
+       if (!(node = _find_dm_tree_node(dtree, major, minor))) {
+               if (!_deps(&dmt, dtree->mem, major, minor, &name, &uuid, 0, &info, &deps))
+                       return_NULL;
+
+               if (!(node = _create_dm_tree_node(dtree, name, uuid, &info,
+                                                 NULL, udev_flags)))
+                       goto_out;
+               new = 1;
+               node->implicit_deps = implicit_deps;
+       } else if (!implicit_deps && node->implicit_deps) {
+               node->udev_flags = udev_flags;
+               node->implicit_deps = 0;
+       }
+
+       if (!_link_tree_nodes(parent, node)) {
+               node = NULL;
+               goto_out;
+       }
+
+       /* If node was already in tree, no need to recurse. */
+       if (!new)
+               goto out;
+
+       /* Can't recurse if not a mapped device or there are no dependencies */
+       if (!node->info.exists || !deps || !deps->count) {
+               if (!_add_to_bottomlevel(node)) {
+                       stack;
+                       node = NULL;
+               }
+               goto out;
+       }
+
+       /* Add dependencies to tree */
+       for (i = 0; i < deps->count; i++)
+               /* Implicit devices are by default temporary */
+               if (!_add_dev(dtree, node, MAJOR(deps->device[i]),
+                             MINOR(deps->device[i]), udev_flags |
+                             DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG |
+                             DM_UDEV_DISABLE_DISK_RULES_FLAG |
+                             DM_UDEV_DISABLE_OTHER_RULES_FLAG, 1)) {
+                       node = NULL;
+                       goto_out;
+               }
+
+out:
+       if (dmt)
+               dm_task_destroy(dmt);
+
+       return node;
+}
+
+int dm_tree_add_dev(struct dm_tree *dtree, uint32_t major, uint32_t minor)
+{
+       return _add_dev(dtree, &dtree->root, major, minor, 0, 0) ? 1 : 0;
+}
+
+int dm_tree_add_dev_with_udev_flags(struct dm_tree *dtree, uint32_t major,
+                                   uint32_t minor, uint16_t udev_flags)
+{
+       return _add_dev(dtree, &dtree->root, major, minor, udev_flags, 0) ? 1 : 0;
+}
+
+static int _rename_node(const char *old_name, const char *new_name, uint32_t major,
+                       uint32_t minor, uint32_t *cookie, uint16_t udev_flags)
+{
+       struct dm_task *dmt;
+       int r = 0;
+
+       log_verbose("Renaming %s (%" PRIu32 ":%" PRIu32 ") to %s", old_name, major, minor, new_name);
+
+       if (!(dmt = dm_task_create(DM_DEVICE_RENAME))) {
+               log_error("Rename dm_task creation failed for %s", old_name);
+               return 0;
+       }
+
+       if (!dm_task_set_name(dmt, old_name)) {
+               log_error("Failed to set name for %s rename.", old_name);
+               goto out;
+       }
+
+       if (!dm_task_set_newname(dmt, new_name))
+               goto_out;
+
+       if (!dm_task_no_open_count(dmt))
+               log_warn("WARNING: Failed to disable open_count.");
+
+       if (!dm_task_set_cookie(dmt, cookie, udev_flags))
+               goto out;
+
+       r = dm_task_run(dmt);
+
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+/* FIXME Merge with _suspend_node? */
+static int _resume_node(const char *name, uint32_t major, uint32_t minor,
+                       uint32_t read_ahead, uint32_t read_ahead_flags,
+                       struct dm_info *newinfo, uint32_t *cookie,
+                       uint16_t udev_flags, int already_suspended)
+{
+       struct dm_task *dmt;
+       int r = 0;
+
+       log_verbose("Resuming %s (" FMTu32 ":" FMTu32 ").", name, major, minor);
+
+       if (!(dmt = dm_task_create(DM_DEVICE_RESUME))) {
+               log_debug_activation("Suspend dm_task creation failed for %s.", name);
+               return 0;
+       }
+
+       /* FIXME Kernel should fill in name on return instead */
+       if (!dm_task_set_name(dmt, name)) {
+               log_debug_activation("Failed to set device name for %s resumption.", name);
+               goto out;
+       }
+
+       if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
+               log_error("Failed to set device number for %s resumption.", name);
+               goto out;
+       }
+
+       if (!dm_task_no_open_count(dmt))
+               log_warn("WARNING: Failed to disable open_count.");
+
+       if (!dm_task_set_read_ahead(dmt, read_ahead, read_ahead_flags))
+               log_warn("WARNING: Failed to set read ahead.");
+
+       if (!dm_task_set_cookie(dmt, cookie, udev_flags))
+               goto_out;
+
+       if (!(r = dm_task_run(dmt)))
+               goto_out;
+
+       if (already_suspended)
+               dec_suspended();
+
+       if (!(r = dm_task_get_info(dmt, newinfo)))
+               stack;
+
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+static int _suspend_node(const char *name, uint32_t major, uint32_t minor,
+                        int skip_lockfs, int no_flush, struct dm_info *newinfo)
+{
+       struct dm_task *dmt;
+       int r = 0;
+
+       log_verbose("Suspending %s (%" PRIu32 ":%" PRIu32 ")%s%s",
+                   name, major, minor,
+                   skip_lockfs ? "" : " with filesystem sync",
+                   no_flush ? "" : " with device flush");
+
+       if (!(dmt = dm_task_create(DM_DEVICE_SUSPEND))) {
+               log_error("Suspend dm_task creation failed for %s", name);
+               return 0;
+       }
+
+       if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
+               log_error("Failed to set device number for %s suspension.", name);
+               goto out;
+       }
+
+       if (!dm_task_no_open_count(dmt))
+               log_warn("WARNING: Failed to disable open_count.");
+
+       if (skip_lockfs && !dm_task_skip_lockfs(dmt))
+               log_warn("WARNING: Failed to set skip_lockfs flag.");
+
+       if (no_flush && !dm_task_no_flush(dmt))
+               log_warn("WARNING: Failed to set no_flush flag.");
+
+       if ((r = dm_task_run(dmt))) {
+               inc_suspended();
+               r = dm_task_get_info(dmt, newinfo);
+       }
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+static int _thin_pool_get_status(struct dm_tree_node *dnode,
+                                struct dm_status_thin_pool *s)
+{
+       struct dm_task *dmt;
+       int r = 0;
+       uint64_t start, length;
+       char *type = NULL;
+       char *params = NULL;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_STATUS)))
+               return_0;
+
+       if (!dm_task_set_major(dmt, dnode->info.major) ||
+           !dm_task_set_minor(dmt, dnode->info.minor)) {
+               log_error("Failed to set major minor.");
+               goto out;
+       }
+
+       if (!dm_task_no_flush(dmt))
+               log_warn("WARNING: Can't set no_flush flag."); /* Non fatal */
+
+       if (!dm_task_run(dmt))
+               goto_out;
+
+       dm_get_next_target(dmt, NULL, &start, &length, &type, &params);
+
+       if (!type || (strcmp(type, "thin-pool") != 0)) {
+               log_error("Expected thin-pool target for %s and got %s.",
+                         _node_name(dnode), type ? : "no target");
+               goto out;
+       }
+
+       if (!parse_thin_pool_status(params, s))
+               goto_out;
+
+       log_debug_activation("Found transaction id %" PRIu64 " for thin pool %s "
+                            "with status line: %s.",
+                            s->transaction_id, _node_name(dnode), params);
+
+       r = 1;
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+static int _thin_pool_node_message(struct dm_tree_node *dnode, struct thin_message *tm)
+{
+       struct dm_task *dmt;
+       struct dm_thin_message *m = &tm->message;
+       char buf[64];
+       int r;
+
+       switch (m->type) {
+       case DM_THIN_MESSAGE_CREATE_SNAP:
+               r = dm_snprintf(buf, sizeof(buf), "create_snap %u %u",
+                               m->u.m_create_snap.device_id,
+                               m->u.m_create_snap.origin_id);
+               break;
+       case DM_THIN_MESSAGE_CREATE_THIN:
+               r = dm_snprintf(buf, sizeof(buf), "create_thin %u",
+                               m->u.m_create_thin.device_id);
+               break;
+       case DM_THIN_MESSAGE_DELETE:
+               r = dm_snprintf(buf, sizeof(buf), "delete %u",
+                               m->u.m_delete.device_id);
+               break;
+       case DM_THIN_MESSAGE_SET_TRANSACTION_ID:
+               r = dm_snprintf(buf, sizeof(buf),
+                               "set_transaction_id %" PRIu64 " %" PRIu64,
+                               m->u.m_set_transaction_id.current_id,
+                               m->u.m_set_transaction_id.new_id);
+               break;
+       case DM_THIN_MESSAGE_RESERVE_METADATA_SNAP: /* target vsn 1.1 */
+               r = dm_snprintf(buf, sizeof(buf), "reserve_metadata_snap");
+               break;
+       case DM_THIN_MESSAGE_RELEASE_METADATA_SNAP: /* target vsn 1.1 */
+               r = dm_snprintf(buf, sizeof(buf), "release_metadata_snap");
+               break;
+       default:
+               r = -1;
+       }
+
+       if (r < 0) {
+               log_error("Failed to prepare message.");
+               return 0;
+       }
+
+       r = 0;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG)))
+               return_0;
+
+       if (!dm_task_set_major(dmt, dnode->info.major) ||
+           !dm_task_set_minor(dmt, dnode->info.minor)) {
+               log_error("Failed to set message major minor.");
+               goto out;
+       }
+
+       if (!dm_task_set_message(dmt, buf))
+               goto_out;
+
+       /* Internal functionality of dm_task */
+       dmt->expected_errno = tm->expected_errno;
+
+       if (!dm_task_run(dmt)) {
+               log_error("Failed to process thin pool message \"%s\".", buf);
+               goto out;
+       }
+
+       r = 1;
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+static struct load_segment *_get_last_load_segment(struct dm_tree_node *node)
+{
+       if (dm_list_empty(&node->props.segs)) {
+               log_error("Node %s is missing a segment.", _node_name(node));
+               return NULL;
+       }
+
+       return dm_list_item(dm_list_last(&node->props.segs), struct load_segment);
+}
+
+/* For preload pass only validate pool's transaction_id */
+static int _node_send_messages(struct dm_tree_node *dnode,
+                              const char *uuid_prefix,
+                              size_t uuid_prefix_len,
+                              int send)
+{
+       struct load_segment *seg;
+       struct thin_message *tmsg;
+       struct dm_status_thin_pool stp;
+       const char *uuid;
+       int have_messages;
+
+       if (!dnode->info.exists)
+               return 1;
+
+       if (!(seg = _get_last_load_segment(dnode)))
+               return_0;
+
+       if (seg->type != SEG_THIN_POOL)
+               return 1;
+
+       if (!(uuid = dm_tree_node_get_uuid(dnode)))
+               return_0;
+
+       if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) {
+               log_debug_activation("UUID \"%s\" does not match.", uuid);
+               return 1;
+       }
+
+       if (!_thin_pool_get_status(dnode, &stp))
+               return_0;
+
+       have_messages = !dm_list_empty(&seg->thin_messages) ? 1 : 0;
+       if (stp.transaction_id == seg->transaction_id) {
+               dnode->props.send_messages = 0; /* messages already committed */
+               if (have_messages)
+                       log_debug_activation("Thin pool %s transaction_id matches %"
+                                            PRIu64 ", skipping messages.",
+                                            _node_name(dnode), stp.transaction_id);
+               return 1;
+       }
+
+       /* Error if there are no stacked messages or id mismatches */
+       if ((stp.transaction_id + 1) != seg->transaction_id) {
+               log_error("Thin pool %s transaction_id is %" PRIu64 ", while expected %" PRIu64 ".",
+                         _node_name(dnode), stp.transaction_id, seg->transaction_id - have_messages);
+               return 0;
+       }
+
+       if (!have_messages || !send)
+               return 1; /* transaction_id is matching */
+
+       dm_list_iterate_items(tmsg, &seg->thin_messages) {
+               if (!(_thin_pool_node_message(dnode, tmsg)))
+                       return_0;
+               if (tmsg->message.type == DM_THIN_MESSAGE_SET_TRANSACTION_ID) {
+                       if (!_thin_pool_get_status(dnode, &stp))
+                               return_0;
+                       if (stp.transaction_id != tmsg->message.u.m_set_transaction_id.new_id) {
+                               log_error("Thin pool %s transaction_id is %" PRIu64
+                                         " and does not match expected  %" PRIu64 ".",
+                                         _node_name(dnode), stp.transaction_id,
+                                         tmsg->message.u.m_set_transaction_id.new_id);
+                               return 0;
+                       }
+               }
+       }
+
+       dnode->props.send_messages = 0; /* messages posted */
+
+       return 1;
+}
+
+/*
+ * FIXME Don't attempt to deactivate known internal dependencies.
+ */
+static int _dm_tree_deactivate_children(struct dm_tree_node *dnode,
+                                       const char *uuid_prefix,
+                                       size_t uuid_prefix_len,
+                                       unsigned level)
+{
+       int r = 1;
+       void *handle = NULL;
+       struct dm_tree_node *child = dnode;
+       struct dm_info info;
+       const struct dm_info *dinfo;
+       const char *name;
+       const char *uuid;
+
+       while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+               if (!(dinfo = dm_tree_node_get_info(child))) {
+                       stack;
+                       continue;
+               }
+
+               if (!(name = dm_tree_node_get_name(child))) {
+                       stack;
+                       continue;
+               }
+
+               if (!(uuid = dm_tree_node_get_uuid(child))) {
+                       stack;
+                       continue;
+               }
+
+               /* Ignore if it doesn't belong to this VG */
+               if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               /* Refresh open_count */
+               if (!_info_by_dev(dinfo->major, dinfo->minor, 1, &info, NULL, NULL, NULL))
+                       return_0;
+
+               if (!info.exists)
+                       continue;
+
+               if (info.open_count) {
+                       /* Skip internal non-toplevel opened nodes */
+                       if (level)
+                               continue;
+
+                       /* When retry is not allowed, error */
+                       if (!child->dtree->retry_remove) {
+                               log_error("Unable to deactivate open %s (" FMTu32 ":"
+                                         FMTu32 ").", name, info.major, info.minor);
+                               r = 0;
+                               continue;
+                       }
+
+                       /* Check toplevel node for holders/mounted fs */
+                       if (!_check_device_not_in_use(name, &info)) {
+                               stack;
+                               r = 0;
+                               continue;
+                       }
+                       /* Go on with retry */
+               }
+
+               /* Also checking open_count in parent nodes of presuspend_node */
+               if ((child->presuspend_node &&
+                    !_node_has_closed_parents(child->presuspend_node,
+                                              uuid_prefix, uuid_prefix_len))) {
+                       /* Only report error from (likely non-internal) dependency at top level */
+                       if (!level) {
+                               log_error("Unable to deactivate open %s (" FMTu32 ":"
+                                         FMTu32 ").", name, info.major, info.minor);
+                               r = 0;
+                       }
+                       continue;
+               }
+
+               /* Suspend child node first if requested */
+               if (child->presuspend_node &&
+                   !dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               if (!_deactivate_node(name, info.major, info.minor,
+                                     &child->dtree->cookie, child->udev_flags,
+                                     (level == 0) ? child->dtree->retry_remove : 0)) {
+                       log_error("Unable to deactivate %s (" FMTu32 ":"
+                                 FMTu32 ").", name, info.major, info.minor);
+                       r = 0;
+                       continue;
+               }
+
+               if (info.suspended && info.live_table)
+                       dec_suspended();
+
+               if (child->callback &&
+                   !child->callback(child, DM_NODE_CALLBACK_DEACTIVATED,
+                                    child->callback_data))
+                       stack;
+                       /* FIXME Deactivation must currently ignore failure
+                        * here so that lvremove can continue: we need an
+                        * alternative way to handle this state without 
+                        * setting r=0.  Or better, skip calling thin_check
+                        * entirely if the device is about to be removed. */
+
+               if (dm_tree_node_num_children(child, 0) &&
+                   !_dm_tree_deactivate_children(child, uuid_prefix, uuid_prefix_len, level + 1))
+                       return_0;
+       }
+
+       return r;
+}
+
+int dm_tree_deactivate_children(struct dm_tree_node *dnode,
+                               const char *uuid_prefix,
+                               size_t uuid_prefix_len)
+{
+       return _dm_tree_deactivate_children(dnode, uuid_prefix, uuid_prefix_len, 0);
+}
+
+int dm_tree_suspend_children(struct dm_tree_node *dnode,
+                            const char *uuid_prefix,
+                            size_t uuid_prefix_len)
+{
+       int r = 1;
+       void *handle = NULL;
+       struct dm_tree_node *child = dnode;
+       struct dm_info info, newinfo;
+       const struct dm_info *dinfo;
+       const char *name;
+       const char *uuid;
+
+       /* Suspend nodes at this level of the tree */
+       while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+               if (!(dinfo = dm_tree_node_get_info(child))) {
+                       stack;
+                       continue;
+               }
+
+               if (!(name = dm_tree_node_get_name(child))) {
+                       stack;
+                       continue;
+               }
+
+               if (!(uuid = dm_tree_node_get_uuid(child))) {
+                       stack;
+                       continue;
+               }
+
+               /* Ignore if it doesn't belong to this VG */
+               if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               /* Ensure immediate parents are already suspended */
+               if (!_children_suspended(child, 1, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               if (!_info_by_dev(dinfo->major, dinfo->minor, 0, &info, NULL, NULL, NULL))
+                       return_0;
+
+               if (!info.exists || info.suspended)
+                       continue;
+
+               /* If child has some real messages send them */
+               if ((child->props.send_messages > 1) && r) {
+                       if (!(r = _node_send_messages(child, uuid_prefix, uuid_prefix_len, 1)))
+                               stack;
+                       else {
+                               log_debug_activation("Sent messages to thin-pool %s and "
+                                                    "skipping suspend of its children.",
+                                                    _node_name(child));
+                               child->props.skip_suspend++;
+                       }
+                       continue;
+               }
+
+               if (!_suspend_node(name, info.major, info.minor,
+                                  child->dtree->skip_lockfs,
+                                  child->dtree->no_flush, &newinfo)) {
+                       log_error("Unable to suspend %s (" FMTu32 ":"
+                                 FMTu32 ")", name, info.major, info.minor);
+                       r = 0;
+                       continue;
+               }
+
+               /* Update cached info */
+               child->info = newinfo;
+       }
+
+       /* Then suspend any child nodes */
+       handle = NULL;
+
+       while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+               if (child->props.skip_suspend)
+                       continue;
+
+               if (!(uuid = dm_tree_node_get_uuid(child))) {
+                       stack;
+                       continue;
+               }
+
+               /* Ignore if it doesn't belong to this VG */
+               if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               if (dm_tree_node_num_children(child, 0))
+                       if (!dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len))
+                               return_0;
+       }
+
+       return r;
+}
+
+/*
+ * _rename_conflict_exists
+ * @dnode
+ * @node
+ * @resolvable
+ *
+ * Check if there is a rename conflict with existing peers in
+ * this tree.  'resolvable' is set if the conflicting node will
+ * also be undergoing a rename.  (Allowing that node to rename
+ * first would clear the conflict.)
+ *
+ * Returns: 1 if conflict, 0 otherwise
+ */
+static int _rename_conflict_exists(struct dm_tree_node *parent,
+                                struct dm_tree_node *node,
+                                int *resolvable)
+{
+       void *handle = NULL;
+       const char *name = dm_tree_node_get_name(node);
+       const char *sibling_name;
+       struct dm_tree_node *sibling;
+
+       *resolvable = 0;
+
+       if (!name)
+               return_0;
+
+       while ((sibling = dm_tree_next_child(&handle, parent, 0))) {
+               if (sibling == node)
+                       continue;
+
+               if (!(sibling_name = dm_tree_node_get_name(sibling))) {
+                       stack;
+                       continue;
+               }
+
+               if (!strcmp(node->props.new_name, sibling_name)) {
+                       if (sibling->props.new_name)
+                               *resolvable = 1;
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+int dm_tree_activate_children(struct dm_tree_node *dnode,
+                                const char *uuid_prefix,
+                                size_t uuid_prefix_len)
+{
+       int r = 1;
+       int resolvable_name_conflict, awaiting_peer_rename = 0;
+       void *handle = NULL;
+       struct dm_tree_node *child = dnode;
+       const char *name;
+       const char *uuid;
+       int priority;
+
+       /* Activate children first */
+       while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+               if (!(uuid = dm_tree_node_get_uuid(child))) {
+                       stack;
+                       continue;
+               }
+
+               if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               if (dm_tree_node_num_children(child, 0))
+                       if (!dm_tree_activate_children(child, uuid_prefix, uuid_prefix_len))
+                               return_0;
+       }
+
+       handle = NULL;
+
+       for (priority = 0; priority < 3; priority++) {
+               awaiting_peer_rename = 0;
+               while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+                       if (priority != child->activation_priority)
+                               continue;
+
+                       if (!(uuid = dm_tree_node_get_uuid(child))) {
+                               stack;
+                               continue;
+                       }
+
+                       if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+                               continue;
+
+                       if (!(name = dm_tree_node_get_name(child))) {
+                               stack;
+                               continue;
+                       }
+
+                       /* Rename? */
+                       if (child->props.new_name) {
+                               if (_rename_conflict_exists(dnode, child, &resolvable_name_conflict) &&
+                                   resolvable_name_conflict) {
+                                       awaiting_peer_rename++;
+                                       continue;
+                               }
+                               if (!_rename_node(name, child->props.new_name, child->info.major,
+                                                 child->info.minor, &child->dtree->cookie,
+                                                 child->udev_flags)) {
+                                       log_error("Failed to rename %s (%" PRIu32
+                                                 ":%" PRIu32 ") to %s", name, child->info.major,
+                                                 child->info.minor, child->props.new_name);
+                                       return 0;
+                               }
+                               child->name = child->props.new_name;
+                               child->props.new_name = NULL;
+                       }
+
+                       if (!child->info.inactive_table && !child->info.suspended)
+                               continue;
+
+                       if (!_resume_node(child->name, child->info.major, child->info.minor,
+                                         child->props.read_ahead, child->props.read_ahead_flags,
+                                         &child->info, &child->dtree->cookie, child->udev_flags, child->info.suspended)) {
+                               log_error("Unable to resume %s.", _node_name(child));
+                               r = 0;
+                               continue;
+                       }
+               }
+               if (awaiting_peer_rename)
+                       priority--; /* redo priority level */
+       }
+
+       /*
+        * FIXME: Implement delayed error reporting
+        * activation should be stopped only in the case,
+        * the submission of transation_id message fails,
+        * resume should continue further, just whole command
+        * has to report failure.
+        */
+       if (r && (dnode->props.send_messages > 1) &&
+           !(r = _node_send_messages(dnode, uuid_prefix, uuid_prefix_len, 1)))
+               stack;
+
+       return r;
+}
+
+static int _create_node(struct dm_tree_node *dnode)
+{
+       int r = 0;
+       struct dm_task *dmt;
+
+       log_verbose("Creating %s", dnode->name);
+
+       if (!(dmt = dm_task_create(DM_DEVICE_CREATE))) {
+               log_error("Create dm_task creation failed for %s", dnode->name);
+               return 0;
+       }
+
+       if (!dm_task_set_name(dmt, dnode->name)) {
+               log_error("Failed to set device name for %s", dnode->name);
+               goto out;
+       }
+
+       if (!dm_task_set_uuid(dmt, dnode->uuid)) {
+               log_error("Failed to set uuid for %s", dnode->name);
+               goto out;
+       }
+
+       if (dnode->props.major &&
+           (!dm_task_set_major(dmt, dnode->props.major) ||
+            !dm_task_set_minor(dmt, dnode->props.minor))) {
+               log_error("Failed to set device number for %s creation.", dnode->name);
+               goto out;
+       }
+
+       if (dnode->props.read_only && !dm_task_set_ro(dmt)) {
+               log_error("Failed to set read only flag for %s", dnode->name);
+               goto out;
+       }
+
+       if (!dm_task_no_open_count(dmt))
+               log_warn("WARNING: Failed to disable open_count.");
+
+       if ((r = dm_task_run(dmt))) {
+               if (!(r = dm_task_get_info(dmt, &dnode->info)))
+                       /*
+                        * This should not be possible to occur.  However,
+                        * we print an error message anyway for the more
+                        * absurd cases (e.g. memory corruption) so there
+                        * is never any question as to which one failed.
+                        */
+                       log_error(INTERNAL_ERROR
+                                 "Unable to get DM task info for %s.",
+                                 dnode->name);
+       }
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+/*
+ * _remove_node
+ *
+ * This function is only used to remove a DM device that has failed
+ * to load any table.
+ */
+static int _remove_node(struct dm_tree_node *dnode)
+{
+       if (!dnode->info.exists)
+               return 1;
+
+       if (dnode->info.live_table || dnode->info.inactive_table) {
+               log_error(INTERNAL_ERROR
+                         "_remove_node called on device with loaded table(s).");
+               return 0;
+       }
+
+       if (!_deactivate_node(dnode->name, dnode->info.major, dnode->info.minor,
+                             &dnode->dtree->cookie, dnode->udev_flags, 0)) {
+               log_error("Failed to clean-up device with no table: %s.",
+                         _node_name(dnode));
+               return 0;
+       }
+       return 1;
+}
+
+static int _build_dev_string(char *devbuf, size_t bufsize, struct dm_tree_node *node)
+{
+       if (!dm_format_dev(devbuf, bufsize, node->info.major, node->info.minor)) {
+               log_error("Failed to format %s device number for %s as dm "
+                         "target (%u,%u)",
+                         node->name, node->uuid, node->info.major, node->info.minor);
+               return 0;
+       }
+
+       return 1;
+}
+
+/* simplify string emiting code */
+#define EMIT_PARAMS(p, str...)\
+do {\
+       int w;\
+       if ((w = dm_snprintf(params + p, paramsize - (size_t) p, str)) < 0) {\
+               stack; /* Out of space */\
+               return -1;\
+       }\
+       p += w;\
+} while (0)
+
+/*
+ * _emit_areas_line
+ *
+ * Returns: 1 on success, 0 on failure
+ */
+static int _emit_areas_line(struct dm_task *dmt __attribute__((unused)),
+                           struct load_segment *seg, char *params,
+                           size_t paramsize, int *pos)
+{
+       struct seg_area *area;
+       char devbuf[DM_FORMAT_DEV_BUFSIZE];
+       unsigned first_time = 1;
+
+       dm_list_iterate_items(area, &seg->areas) {
+               switch (seg->type) {
+               case SEG_RAID0:
+               case SEG_RAID0_META:
+               case SEG_RAID1:
+               case SEG_RAID10:
+               case SEG_RAID4:
+               case SEG_RAID5_N:
+               case SEG_RAID5_LA:
+               case SEG_RAID5_RA:
+               case SEG_RAID5_LS:
+               case SEG_RAID5_RS:
+               case SEG_RAID6_N_6:
+               case SEG_RAID6_ZR:
+               case SEG_RAID6_NR:
+               case SEG_RAID6_NC:
+               case SEG_RAID6_LS_6:
+               case SEG_RAID6_RS_6:
+               case SEG_RAID6_LA_6:
+               case SEG_RAID6_RA_6:
+                       if (!area->dev_node) {
+                               EMIT_PARAMS(*pos, " -");
+                               break;
+                       }
+                       if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node))
+                               return_0;
+
+                       EMIT_PARAMS(*pos, " %s", devbuf);
+                       break;
+               default:
+                       if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node))
+                               return_0;
+
+                       EMIT_PARAMS(*pos, "%s%s %" PRIu64, first_time ? "" : " ",
+                                   devbuf, area->offset);
+               }
+
+               first_time = 0;
+       }
+
+       return 1;
+}
+
+/*
+ * Returns: 1 on success, 0 on failure
+ */
+static int _mirror_emit_segment_line(struct dm_task *dmt, struct load_segment *seg,
+                                    char *params, size_t paramsize)
+{
+       int block_on_error = 0;
+       int handle_errors = 0;
+       int dm_log_userspace = 0;
+       unsigned log_parm_count;
+       int pos = 0;
+       char logbuf[DM_FORMAT_DEV_BUFSIZE];
+       const char *logtype;
+       unsigned kmaj = 0, kmin = 0, krel = 0;
+
+       if (!get_uname_version(&kmaj, &kmin, &krel))
+               return_0;
+
+       if ((seg->flags & DM_BLOCK_ON_ERROR)) {
+               /*
+                * Originally, block_on_error was an argument to the log
+                * portion of the mirror CTR table.  It was renamed to
+                * "handle_errors" and now resides in the 'features'
+                * section of the mirror CTR table (i.e. at the end).
+                *
+                * We can identify whether to use "block_on_error" or
+                * "handle_errors" by the dm-mirror module's version
+                * number (>= 1.12) or by the kernel version (>= 2.6.22).
+                */
+               if (KERNEL_VERSION(kmaj, kmin, krel) >= KERNEL_VERSION(2, 6, 22))
+                       handle_errors = 1;
+               else
+                       block_on_error = 1;
+       }
+
+       if (seg->clustered) {
+               /* Cluster mirrors require a UUID */
+               if (!seg->uuid)
+                       return_0;
+
+               /*
+                * Cluster mirrors used to have their own log
+                * types.  Now they are accessed through the
+                * userspace log type.
+                *
+                * The dm-log-userspace module was added to the
+                * 2.6.31 kernel.
+                */
+               if (KERNEL_VERSION(kmaj, kmin, krel) >= KERNEL_VERSION(2, 6, 31))
+                       dm_log_userspace = 1;
+       }
+
+       /* Region size */
+       log_parm_count = 1;
+
+       /* [no]sync, block_on_error etc. */
+       log_parm_count += hweight32(seg->flags);
+
+       /* "handle_errors" is a feature arg now */
+       if (handle_errors)
+               log_parm_count--;
+
+       /* DM_CORELOG does not count in the param list */
+       if (seg->flags & DM_CORELOG)
+               log_parm_count--;
+
+       if (seg->clustered) {
+               log_parm_count++; /* For UUID */
+
+               if (!dm_log_userspace)
+                       EMIT_PARAMS(pos, "clustered-");
+               else
+                       /* For clustered-* type field inserted later */
+                       log_parm_count++;
+       }
+
+       if (!seg->log)
+               logtype = "core";
+       else {
+               logtype = "disk";
+               log_parm_count++;
+               if (!_build_dev_string(logbuf, sizeof(logbuf), seg->log))
+                       return_0;
+       }
+
+       if (dm_log_userspace)
+               EMIT_PARAMS(pos, "userspace %u %s clustered-%s",
+                           log_parm_count, seg->uuid, logtype);
+       else
+               EMIT_PARAMS(pos, "%s %u", logtype, log_parm_count);
+
+       if (seg->log)
+               EMIT_PARAMS(pos, " %s", logbuf);
+
+       EMIT_PARAMS(pos, " %u", seg->region_size);
+
+       if (seg->clustered && !dm_log_userspace)
+               EMIT_PARAMS(pos, " %s", seg->uuid);
+
+       if ((seg->flags & DM_NOSYNC))
+               EMIT_PARAMS(pos, " nosync");
+       else if ((seg->flags & DM_FORCESYNC))
+               EMIT_PARAMS(pos, " sync");
+
+       if (block_on_error)
+               EMIT_PARAMS(pos, " block_on_error");
+
+       EMIT_PARAMS(pos, " %u ", seg->mirror_area_count);
+
+       if (_emit_areas_line(dmt, seg, params, paramsize, &pos) <= 0)
+               return_0;
+
+       if (handle_errors)
+               EMIT_PARAMS(pos, " 1 handle_errors");
+
+       return 1;
+}
+
+static int _2_if_value(unsigned p)
+{
+       return p ? 2 : 0;
+}
+
+/* Return number of bits passed in @bits assuming 2 * 64 bit size */
+static int _get_params_count(const uint64_t *bits)
+{
+       int r = 0;
+       int i = RAID_BITMAP_SIZE;
+
+       while (i--) {
+               r += 2 * hweight32(bits[i] & 0xFFFFFFFF);
+               r += 2 * hweight32(bits[i] >> 32);
+       }
+
+       return r;
+}
+
+/*
+ * Get target version (major, minor and patchlevel) for @target_name
+ *
+ * FIXME: this function is derived from liblvm.
+ *        Integrate with move of liblvm functions
+ *        to libdm in future library layer purge
+ *        (e.g. expose as API dm_target_version()?)
+ */
+static int _target_version(const char *target_name, uint32_t *maj,
+                          uint32_t *min, uint32_t *patchlevel)
+{
+       int r = 0;
+       struct dm_task *dmt;
+       struct dm_versions *target, *last_target = NULL;
+
+       log_very_verbose("Getting target version for %s", target_name);
+       if (!(dmt = dm_task_create(DM_DEVICE_LIST_VERSIONS)))
+               return_0;
+
+       if (!dm_task_run(dmt)) {
+               log_debug_activation("Failed to get %s target versions", target_name);
+               /* Assume this was because LIST_VERSIONS isn't supported */
+               *maj = *min = *patchlevel = 0;
+               r = 1;
+       } else
+               for (target = dm_task_get_versions(dmt);
+                    target != last_target;
+                    last_target = target, target = (struct dm_versions *)((char *) target + target->next))
+                       if (!strcmp(target_name, target->name)) {
+                               *maj = target->version[0];
+                               *min = target->version[1];
+                               *patchlevel = target->version[2];
+                               log_very_verbose("Found %s target "
+                                                "v%" PRIu32 ".%" PRIu32 ".%" PRIu32 ".",
+                                                target_name, *maj, *min, *patchlevel);
+                               r = 1;
+                               break;
+                       }
+
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+static int _raid_emit_segment_line(struct dm_task *dmt, uint32_t major,
+                                  uint32_t minor, struct load_segment *seg,
+                                  uint64_t *seg_start, char *params,
+                                  size_t paramsize)
+{
+       uint32_t i;
+       uint32_t area_count = seg->area_count / 2;
+       uint32_t maj, min, patchlevel;
+       int param_count = 1; /* mandatory 'chunk size'/'stripe size' arg */
+       int pos = 0;
+       unsigned type;
+
+       if (seg->area_count % 2)
+               return 0;
+
+       if ((seg->flags & DM_NOSYNC) || (seg->flags & DM_FORCESYNC))
+               param_count++;
+
+       param_count += _2_if_value(seg->data_offset) +
+                      _2_if_value(seg->delta_disks) +
+                      _2_if_value(seg->region_size) +
+                      _2_if_value(seg->writebehind) +
+                      _2_if_value(seg->min_recovery_rate) +
+                      _2_if_value(seg->max_recovery_rate) +
+                      _2_if_value(seg->data_copies > 1);
+
+       /* rebuilds and writemostly are BITMAP_SIZE * 64 bits */
+       param_count += _get_params_count(seg->rebuilds);
+       param_count += _get_params_count(seg->writemostly);
+
+       if ((seg->type == SEG_RAID1) && seg->stripe_size)
+               log_info("WARNING: Ignoring RAID1 stripe size");
+
+       /* Kernel only expects "raid0", not "raid0_meta" */
+       type = seg->type;
+       if (type == SEG_RAID0_META)
+               type = SEG_RAID0;
+
+       EMIT_PARAMS(pos, "%s %d %u",
+                   type == SEG_RAID10 ? "raid10" : _dm_segtypes[type].target,
+                   param_count, seg->stripe_size);
+
+       if (!_target_version("raid", &maj, &min, &patchlevel))
+               return_0;
+
+       /*
+        * Target version prior to 1.9.0 and >= 1.11.0 emit
+        * order of parameters as of kernel target documentation
+        */
+       if (maj > 1 || (maj == 1 && (min < 9 || min >= 11))) {
+               if (seg->flags & DM_NOSYNC)
+                       EMIT_PARAMS(pos, " nosync");
+               else if (seg->flags & DM_FORCESYNC)
+                       EMIT_PARAMS(pos, " sync");
+
+               for (i = 0; i < area_count; i++)
+                       if (seg->rebuilds[i/64] & (1ULL << (i%64)))
+                               EMIT_PARAMS(pos, " rebuild %u", i);
+
+               if (seg->min_recovery_rate)
+                       EMIT_PARAMS(pos, " min_recovery_rate %u",
+                                   seg->min_recovery_rate);
+
+               if (seg->max_recovery_rate)
+                       EMIT_PARAMS(pos, " max_recovery_rate %u",
+                                   seg->max_recovery_rate);
+
+               for (i = 0; i < area_count; i++)
+                       if (seg->writemostly[i/64] & (1ULL << (i%64)))
+                               EMIT_PARAMS(pos, " write_mostly %u", i);
+
+               if (seg->writebehind)
+                       EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind);
+
+               if (seg->region_size)
+                       EMIT_PARAMS(pos, " region_size %u", seg->region_size);
+
+               if (seg->data_copies > 1 && type == SEG_RAID10)
+                       EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies);
+
+               if (seg->delta_disks)
+                       EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks);
+
+               /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */
+               if (seg->data_offset)
+                       EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset);
+
+       /* Target version >= 1.9.0 && < 1.11.0 had a table line parameter ordering flaw */
+       } else {
+               if (seg->data_copies > 1 && type == SEG_RAID10)
+                       EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies);
+
+               if (seg->flags & DM_NOSYNC)
+                       EMIT_PARAMS(pos, " nosync");
+               else if (seg->flags & DM_FORCESYNC)
+                       EMIT_PARAMS(pos, " sync");
+
+               if (seg->region_size)
+                       EMIT_PARAMS(pos, " region_size %u", seg->region_size);
+
+               /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */
+               if (seg->data_offset)
+                       EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset);
+
+               if (seg->delta_disks)
+                       EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks);
+
+               for (i = 0; i < area_count; i++)
+                       if (seg->rebuilds[i/64] & (1ULL << (i%64)))
+                               EMIT_PARAMS(pos, " rebuild %u", i);
+
+               for (i = 0; i < area_count; i++)
+                       if (seg->writemostly[i/64] & (1ULL << (i%64)))
+                               EMIT_PARAMS(pos, " write_mostly %u", i);
+
+               if (seg->writebehind)
+                       EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind);
+
+               if (seg->max_recovery_rate)
+                       EMIT_PARAMS(pos, " max_recovery_rate %u",
+                                   seg->max_recovery_rate);
+
+               if (seg->min_recovery_rate)
+                       EMIT_PARAMS(pos, " min_recovery_rate %u",
+                                   seg->min_recovery_rate);
+       }
+
+       /* Print number of metadata/data device pairs */
+       EMIT_PARAMS(pos, " %u", area_count);
+
+       if (_emit_areas_line(dmt, seg, params, paramsize, &pos) <= 0)
+               return_0;
+
+       return 1;
+}
+
+static int _cache_emit_segment_line(struct dm_task *dmt,
+                                   struct load_segment *seg,
+                                   char *params, size_t paramsize)
+{
+       int pos = 0;
+       /* unsigned feature_count; */
+       char data[DM_FORMAT_DEV_BUFSIZE];
+       char metadata[DM_FORMAT_DEV_BUFSIZE];
+       char origin[DM_FORMAT_DEV_BUFSIZE];
+       const char *name;
+       struct dm_config_node *cn;
+
+       /* Cache Dev */
+       if (!_build_dev_string(data, sizeof(data), seg->pool))
+               return_0;
+
+       /* Metadata Dev */
+       if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata))
+               return_0;
+
+       /* Origin Dev */
+       if (!_build_dev_string(origin, sizeof(origin), seg->origin))
+               return_0;
+
+       EMIT_PARAMS(pos, "%s %s %s", metadata, data, origin);
+
+       /* Data block size */
+       EMIT_PARAMS(pos, " %u", seg->data_block_size);
+
+       /* Features */
+       /* feature_count = hweight32(seg->flags); */
+       /* EMIT_PARAMS(pos, " %u", feature_count); */
+       if (seg->flags & DM_CACHE_FEATURE_METADATA2)
+               EMIT_PARAMS(pos, " 2 metadata2 ");
+       else
+               EMIT_PARAMS(pos, " 1 ");
+
+       if (seg->flags & DM_CACHE_FEATURE_PASSTHROUGH)
+               EMIT_PARAMS(pos, "passthrough");
+        else if (seg->flags & DM_CACHE_FEATURE_WRITEBACK)
+               EMIT_PARAMS(pos, "writeback");
+       else
+               EMIT_PARAMS(pos, "writethrough");
+
+       /* Cache Policy */
+       name = seg->policy_name ? : "default";
+
+       EMIT_PARAMS(pos, " %s", name);
+
+       EMIT_PARAMS(pos, " %u", seg->policy_argc * 2);
+       if (seg->policy_settings)
+               for (cn = seg->policy_settings->child; cn; cn = cn->sib)
+                       EMIT_PARAMS(pos, " %s %" PRIu64, cn->key, cn->v->v.i);
+
+       return 1;
+}
+
+static int _thin_pool_emit_segment_line(struct dm_task *dmt,
+                                       struct load_segment *seg,
+                                       char *params, size_t paramsize)
+{
+       int pos = 0;
+       char pool[DM_FORMAT_DEV_BUFSIZE], metadata[DM_FORMAT_DEV_BUFSIZE];
+       int features = (seg->error_if_no_space ? 1 : 0) +
+                (seg->read_only ? 1 : 0) +
+                (seg->ignore_discard ? 1 : 0) +
+                (seg->no_discard_passdown ? 1 : 0) +
+                (seg->skip_block_zeroing ? 1 : 0);
+
+       if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata))
+               return_0;
+
+       if (!_build_dev_string(pool, sizeof(pool), seg->pool))
+               return_0;
+
+       EMIT_PARAMS(pos, "%s %s %d %" PRIu64 " %d%s%s%s%s%s", metadata, pool,
+                   seg->data_block_size, seg->low_water_mark, features,
+                   seg->skip_block_zeroing ? " skip_block_zeroing" : "",
+                   seg->ignore_discard ? " ignore_discard" : "",
+                   seg->no_discard_passdown ? " no_discard_passdown" : "",
+                   seg->error_if_no_space ? " error_if_no_space" : "",
+                   seg->read_only ? " read_only" : ""
+                  );
+
+       return 1;
+}
+
+static int _thin_emit_segment_line(struct dm_task *dmt,
+                                  struct load_segment *seg,
+                                  char *params, size_t paramsize)
+{
+       int pos = 0;
+       char pool[DM_FORMAT_DEV_BUFSIZE];
+       char external[DM_FORMAT_DEV_BUFSIZE + 1];
+
+       if (!_build_dev_string(pool, sizeof(pool), seg->pool))
+               return_0;
+
+       if (!seg->external)
+               *external = 0;
+       else {
+               *external = ' ';
+               if (!_build_dev_string(external + 1, sizeof(external) - 1,
+                                      seg->external))
+                       return_0;
+       }
+
+       EMIT_PARAMS(pos, "%s %d%s", pool, seg->device_id, external);
+
+       return 1;
+}
+
+static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
+                             uint32_t minor, struct load_segment *seg,
+                             uint64_t *seg_start, char *params,
+                             size_t paramsize)
+{
+       int pos = 0;
+       int r;
+       int target_type_is_raid = 0;
+       char originbuf[DM_FORMAT_DEV_BUFSIZE], cowbuf[DM_FORMAT_DEV_BUFSIZE];
+
+       switch(seg->type) {
+       case SEG_ERROR:
+       case SEG_ZERO:
+       case SEG_LINEAR:
+               break;
+       case SEG_MIRRORED:
+               /* Mirrors are pretty complicated - now in separate function */
+               r = _mirror_emit_segment_line(dmt, seg, params, paramsize);
+               if (!r)
+                       return_0;
+               break;
+       case SEG_SNAPSHOT:
+       case SEG_SNAPSHOT_MERGE:
+               if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin))
+                       return_0;
+               if (!_build_dev_string(cowbuf, sizeof(cowbuf), seg->cow))
+                       return_0;
+               EMIT_PARAMS(pos, "%s %s %c %d", originbuf, cowbuf,
+                           seg->persistent ? 'P' : 'N', seg->chunk_size);
+               break;
+       case SEG_SNAPSHOT_ORIGIN:
+               if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin))
+                       return_0;
+               EMIT_PARAMS(pos, "%s", originbuf);
+               break;
+       case SEG_STRIPED:
+               EMIT_PARAMS(pos, "%u %u ", seg->area_count, seg->stripe_size);
+               break;
+       case SEG_CRYPT:
+               EMIT_PARAMS(pos, "%s%s%s%s%s %s %" PRIu64 " ", seg->cipher,
+                           seg->chainmode ? "-" : "", seg->chainmode ?: "",
+                           seg->iv ? "-" : "", seg->iv ?: "", seg->key,
+                           seg->iv_offset != DM_CRYPT_IV_DEFAULT ?
+                           seg->iv_offset : *seg_start);
+               break;
+       case SEG_RAID0:
+       case SEG_RAID0_META:
+       case SEG_RAID1:
+       case SEG_RAID10:
+       case SEG_RAID4:
+       case SEG_RAID5_N:
+       case SEG_RAID5_LA:
+       case SEG_RAID5_RA:
+       case SEG_RAID5_LS:
+       case SEG_RAID5_RS:
+       case SEG_RAID6_N_6:
+       case SEG_RAID6_ZR:
+       case SEG_RAID6_NR:
+       case SEG_RAID6_NC:
+       case SEG_RAID6_LS_6:
+       case SEG_RAID6_RS_6:
+       case SEG_RAID6_LA_6:
+       case SEG_RAID6_RA_6:
+               target_type_is_raid = 1;
+               r = _raid_emit_segment_line(dmt, major, minor, seg, seg_start,
+                                           params, paramsize);
+               if (!r)
+                       return_0;
+
+               break;
+       case SEG_THIN_POOL:
+               if (!_thin_pool_emit_segment_line(dmt, seg, params, paramsize))
+                       return_0;
+               break;
+       case SEG_THIN:
+               if (!_thin_emit_segment_line(dmt, seg, params, paramsize))
+                       return_0;
+               break;
+       case SEG_CACHE:
+               if (!_cache_emit_segment_line(dmt, seg, params, paramsize))
+                       return_0;
+               break;
+       }
+
+       switch(seg->type) {
+       case SEG_ERROR:
+       case SEG_SNAPSHOT:
+       case SEG_SNAPSHOT_ORIGIN:
+       case SEG_SNAPSHOT_MERGE:
+       case SEG_ZERO:
+       case SEG_THIN_POOL:
+       case SEG_THIN:
+       case SEG_CACHE:
+               break;
+       case SEG_CRYPT:
+       case SEG_LINEAR:
+       case SEG_STRIPED:
+               if ((r = _emit_areas_line(dmt, seg, params, paramsize, &pos)) <= 0) {
+                       stack;
+                       return r;
+               }
+               if (!params[0]) {
+                       log_error("No parameters supplied for %s target "
+                                 "%u:%u.", _dm_segtypes[seg->type].target,
+                                 major, minor);
+                       return 0;
+               }
+               break;
+       }
+
+       log_debug_activation("Adding target to (%" PRIu32 ":%" PRIu32 "): %" PRIu64
+                            " %" PRIu64 " %s %s", major, minor,
+                            *seg_start, seg->size, target_type_is_raid ? "raid" :
+                            _dm_segtypes[seg->type].target, params);
+
+       if (!dm_task_add_target(dmt, *seg_start, seg->size,
+                               target_type_is_raid ? "raid" :
+                               _dm_segtypes[seg->type].target, params))
+               return_0;
+
+       *seg_start += seg->size;
+
+       return 1;
+}
+
+#undef EMIT_PARAMS
+
+static int _emit_segment(struct dm_task *dmt, uint32_t major, uint32_t minor,
+                        struct load_segment *seg, uint64_t *seg_start)
+{
+       char *params;
+       size_t paramsize = 4096; /* FIXME: too small for long RAID lines when > 64 devices supported */
+       int ret;
+
+       do {
+               if (!(params = dm_malloc(paramsize))) {
+                       log_error("Insufficient space for target parameters.");
+                       return 0;
+               }
+
+               params[0] = '\0';
+               ret = _emit_segment_line(dmt, major, minor, seg, seg_start,
+                                        params, paramsize);
+               dm_free(params);
+
+               if (!ret)
+                       stack;
+
+               if (ret >= 0)
+                       return ret;
+
+               log_debug_activation("Insufficient space in params[%" PRIsize_t
+                                    "] for target parameters.", paramsize);
+
+               paramsize *= 2;
+       } while (paramsize < MAX_TARGET_PARAMSIZE);
+
+       log_error("Target parameter size too big. Aborting.");
+       return 0;
+}
+
+static int _load_node(struct dm_tree_node *dnode)
+{
+       int r = 0;
+       struct dm_task *dmt;
+       struct load_segment *seg;
+       uint64_t seg_start = 0, existing_table_size;
+
+       log_verbose("Loading table for %s.", _node_name(dnode));
+
+       if (!(dmt = dm_task_create(DM_DEVICE_RELOAD))) {
+               log_error("Reload dm_task creation failed for %s.", _node_name(dnode));
+               return 0;
+       }
+
+       if (!dm_task_set_major(dmt, dnode->info.major) ||
+           !dm_task_set_minor(dmt, dnode->info.minor)) {
+               log_error("Failed to set device number for %s reload.", _node_name(dnode));
+               goto out;
+       }
+
+       if (dnode->props.read_only && !dm_task_set_ro(dmt)) {
+               log_error("Failed to set read only flag for %s.", _node_name(dnode));
+               goto out;
+       }
+
+       if (!dm_task_no_open_count(dmt))
+               log_warn("WARNING: Failed to disable open_count.");
+
+       dm_list_iterate_items(seg, &dnode->props.segs)
+               if (!_emit_segment(dmt, dnode->info.major, dnode->info.minor,
+                                  seg, &seg_start))
+                       goto_out;
+
+       if (!dm_task_suppress_identical_reload(dmt))
+               log_warn("WARNING: Failed to suppress reload of identical tables.");
+
+       if ((r = dm_task_run(dmt))) {
+               r = dm_task_get_info(dmt, &dnode->info);
+               if (r && !dnode->info.inactive_table)
+                       log_verbose("Suppressed %s identical table reload.",
+                                   _node_name(dnode));
+
+               existing_table_size = dm_task_get_existing_table_size(dmt);
+               if ((dnode->props.size_changed =
+                    (existing_table_size == seg_start) ? 0 :
+                    (existing_table_size > seg_start) ? -1 : 1)) {
+                       /*
+                        * Kernel usually skips size validation on zero-length devices
+                        * now so no need to preload them.
+                        */
+                       /* FIXME In which kernel version did this begin? */
+                       if (!existing_table_size && dnode->props.delay_resume_if_new)
+                               dnode->props.size_changed = 0;
+
+                       log_debug_activation("Table size changed from %" PRIu64 " to %"
+                                            PRIu64 " for %s.%s", existing_table_size,
+                                            seg_start, _node_name(dnode),
+                                            dnode->props.size_changed ? "" : " (Ignoring.)");
+
+                       /*
+                        * FIXME: code here has known design problem.
+                        *  LVM2 does NOT resize thin-pool on top of other LV in 2 steps -
+                        *  where raid would be resized with 1st. transaction
+                        *  followed by 2nd. thin-pool resize - RHBZ #1285063
+                        */
+                       if (existing_table_size && dnode->props.delay_resume_if_extended) {
+                               log_debug_activation("Resume of table of extended device %s delayed.",
+                                                    _node_name(dnode));
+                               dnode->props.size_changed = 0;
+                       }
+               }
+       }
+
+       dnode->props.segment_count = 0;
+
+out:
+       dm_task_destroy(dmt);
+
+       return r;
+}
+
+/*
+ * Currently try to deactivate only nodes created during preload.
+ * New node is always attached to the front of activated_list
+ */
+static int _dm_tree_revert_activated(struct dm_tree_node *parent)
+{
+       struct dm_tree_node *child;
+
+       dm_list_iterate_items_gen(child, &parent->activated, activated_list) {
+               log_debug_activation("Reverting %s.", _node_name(child));
+               if (child->callback) {
+                       log_debug_activation("Dropping callback for %s.", _node_name(child));
+                       child->callback = NULL;
+               }
+               if (!_deactivate_node(child->name, child->info.major, child->info.minor,
+                                     &child->dtree->cookie, child->udev_flags, 0)) {
+                       log_error("Unable to deactivate %s.", _node_name(child));
+                       return 0;
+               }
+               if (!_dm_tree_revert_activated(child))
+                       return_0;
+       }
+
+       return 1;
+}
+
+int dm_tree_preload_children(struct dm_tree_node *dnode,
+                            const char *uuid_prefix,
+                            size_t uuid_prefix_len)
+{
+       int r = 1, node_created = 0;
+       void *handle = NULL;
+       struct dm_tree_node *child;
+       int update_devs_flag = 0;
+
+       /* Preload children first */
+       while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+               /* Propagate delay of resume from parent node */
+               if (dnode->props.delay_resume_if_new > 1)
+                       child->props.delay_resume_if_new = dnode->props.delay_resume_if_new;
+
+               /* Skip existing non-device-mapper devices */
+               if (!child->info.exists && child->info.major)
+                       continue;
+
+               /* Ignore if it doesn't belong to this VG */
+               if (child->info.exists &&
+                   !_uuid_prefix_matches(child->uuid, uuid_prefix, uuid_prefix_len))
+                       continue;
+
+               if (dm_tree_node_num_children(child, 0))
+                       if (!dm_tree_preload_children(child, uuid_prefix, uuid_prefix_len))
+                               return_0;
+
+               /* FIXME Cope if name exists with no uuid? */
+               if (!child->info.exists && !(node_created = _create_node(child)))
+                       return_0;
+
+               /* Propagate delayed resume from exteded child node */
+               if (child->props.delay_resume_if_extended)
+                       dnode->props.delay_resume_if_extended = 1;
+
+               if (!child->info.inactive_table &&
+                   child->props.segment_count &&
+                   !_load_node(child)) {
+                       /*
+                        * If the table load does not succeed, we remove the
+                        * device in the kernel that would otherwise have an
+                        * empty table.  This makes the create + load of the
+                        * device atomic.  However, if other dependencies have
+                        * already been created and loaded; this code is
+                        * insufficient to remove those - only the node
+                        * encountering the table load failure is removed.
+                        */
+                       if (node_created && !_remove_node(child))
+                               return_0;
+                       return_0;
+               }
+
+               /* No resume for a device without parents or with unchanged or smaller size */
+               if (!dm_tree_node_num_children(child, 1) || (child->props.size_changed <= 0))
+                       continue;
+
+               if (!child->info.inactive_table && !child->info.suspended)
+                       continue;
+
+               if (!_resume_node(child->name, child->info.major, child->info.minor,
+                                 child->props.read_ahead, child->props.read_ahead_flags,
+                                 &child->info, &child->dtree->cookie, child->udev_flags,
+                                 child->info.suspended)) {
+                       log_error("Unable to resume %s.", _node_name(child));
+                       /* If the device was not previously active, we might as well remove this node. */
+                       if (!child->info.live_table &&
+                           !_deactivate_node(child->name, child->info.major, child->info.minor,
+                                             &child->dtree->cookie, child->udev_flags, 0))
+                               log_error("Unable to deactivate %s.", _node_name(child));
+                       r = 0;
+                       /* Each child is handled independently */
+                       continue;
+               }
+
+               if (node_created) {
+                       /* Collect newly introduced devices for revert */
+                       dm_list_add_h(&dnode->activated, &child->activated_list);
+
+                       /* When creating new node also check transaction_id. */
+                       if (child->props.send_messages &&
+                           !_node_send_messages(child, uuid_prefix, uuid_prefix_len, 0)) {
+                               stack;
+                               if (!dm_udev_wait(dm_tree_get_cookie(dnode)))
+                                       stack;
+                               dm_tree_set_cookie(dnode, 0);
+                               (void) _dm_tree_revert_activated(dnode);
+                               r = 0;
+                               continue;
+                       }
+               }
+
+               /*
+                * Prepare for immediate synchronization with udev and flush all stacked
+                * dev node operations if requested by immediate_dev_node property. But
+                * finish processing current level in the tree first.
+                */
+               if (child->props.immediate_dev_node)
+                       update_devs_flag = 1;
+       }
+
+       if (update_devs_flag ||
+           (r && !dnode->info.exists && dnode->callback)) {
+               if (!dm_udev_wait(dm_tree_get_cookie(dnode)))
+                       stack;
+               dm_tree_set_cookie(dnode, 0);
+
+               if (r && !dnode->info.exists && dnode->callback &&
+                   !dnode->callback(dnode, DM_NODE_CALLBACK_PRELOADED,
+                                    dnode->callback_data))
+               {
+                       /* Try to deactivate what has been activated in preload phase */
+                       (void) _dm_tree_revert_activated(dnode);
+                       return_0;
+               }
+       }
+
+       return r;
+}
+
+/*
+ * Returns 1 if unsure.
+ */
+int dm_tree_children_use_uuid(struct dm_tree_node *dnode,
+                                const char *uuid_prefix,
+                                size_t uuid_prefix_len)
+{
+       void *handle = NULL;
+       struct dm_tree_node *child = dnode;
+       const char *uuid;
+
+       while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+               if (!(uuid = dm_tree_node_get_uuid(child))) {
+                       log_warn("WARNING: Failed to get uuid for dtree node %s.",
+                                _node_name(child));
+                       return 1;
+               }
+
+               if (_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+                       return 1;
+
+               if (dm_tree_node_num_children(child, 0))
+                       dm_tree_children_use_uuid(child, uuid_prefix, uuid_prefix_len);
+       }
+
+       return 0;
+}
+
+/*
+ * Target functions
+ */
+static struct load_segment *_add_segment(struct dm_tree_node *dnode, unsigned type, uint64_t size)
+{
+       struct load_segment *seg;
+
+       if (!(seg = dm_pool_zalloc(dnode->dtree->mem, sizeof(*seg)))) {
+               log_error("dtree node segment allocation failed");
+               return NULL;
+       }
+
+       seg->type = type;
+       seg->size = size;
+       dm_list_init(&seg->areas);
+       dm_list_add(&dnode->props.segs, &seg->list);
+       dnode->props.segment_count++;
+
+       return seg;
+}
+
+int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node *dnode,
+                                           uint64_t size,
+                                           const char *origin_uuid)
+{
+       struct load_segment *seg;
+       struct dm_tree_node *origin_node;
+
+       if (!(seg = _add_segment(dnode, SEG_SNAPSHOT_ORIGIN, size)))
+               return_0;
+
+       if (!(origin_node = dm_tree_find_node_by_uuid(dnode->dtree, origin_uuid))) {
+               log_error("Couldn't find snapshot origin uuid %s.", origin_uuid);
+               return 0;
+       }
+
+       seg->origin = origin_node;
+       if (!_link_tree_nodes(dnode, origin_node))
+               return_0;
+
+       /* Resume snapshot origins after new snapshots */
+       dnode->activation_priority = 1;
+
+       /*
+        * Don't resume the origin immediately in case it is a non-trivial 
+        * target that must not be active more than once concurrently!
+        */
+       origin_node->props.delay_resume_if_new = 1;
+
+       return 1;
+}
+
+static int _add_snapshot_target(struct dm_tree_node *node,
+                               uint64_t size,
+                               const char *origin_uuid,
+                               const char *cow_uuid,
+                               const char *merge_uuid,
+                               int persistent,
+                               uint32_t chunk_size)
+{
+       struct load_segment *seg;
+       struct dm_tree_node *origin_node, *cow_node, *merge_node;
+       unsigned seg_type;
+
+       seg_type = !merge_uuid ? SEG_SNAPSHOT : SEG_SNAPSHOT_MERGE;
+
+       if (!(seg = _add_segment(node, seg_type, size)))
+               return_0;
+
+       if (!(origin_node = dm_tree_find_node_by_uuid(node->dtree, origin_uuid))) {
+               log_error("Couldn't find snapshot origin uuid %s.", origin_uuid);
+               return 0;
+       }
+
+       seg->origin = origin_node;
+       if (!_link_tree_nodes(node, origin_node))
+               return_0;
+
+       if (!(cow_node = dm_tree_find_node_by_uuid(node->dtree, cow_uuid))) {
+               log_error("Couldn't find snapshot COW device uuid %s.", cow_uuid);
+               return 0;
+       }
+
+       seg->cow = cow_node;
+       if (!_link_tree_nodes(node, cow_node))
+               return_0;
+
+       seg->persistent = persistent ? 1 : 0;
+       seg->chunk_size = chunk_size;
+
+       if (merge_uuid) {
+               if (!(merge_node = dm_tree_find_node_by_uuid(node->dtree, merge_uuid))) {
+                       /* not a pure error, merging snapshot may have been deactivated */
+                       log_verbose("Couldn't find merging snapshot uuid %s.", merge_uuid);
+               } else {
+                       seg->merge = merge_node;
+                       /* must not link merging snapshot, would undermine activation_priority below */
+               }
+
+               /* Resume snapshot-merge (acting origin) after other snapshots */
+               node->activation_priority = 1;
+               if (seg->merge) {
+                       /* Resume merging snapshot after snapshot-merge */
+                       seg->merge->activation_priority = 2;
+               }
+       }
+
+       return 1;
+}
+
+
+int dm_tree_node_add_snapshot_target(struct dm_tree_node *node,
+                                    uint64_t size,
+                                    const char *origin_uuid,
+                                    const char *cow_uuid,
+                                    int persistent,
+                                    uint32_t chunk_size)
+{
+       return _add_snapshot_target(node, size, origin_uuid, cow_uuid,
+                                   NULL, persistent, chunk_size);
+}
+
+int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node *node,
+                                          uint64_t size,
+                                          const char *origin_uuid,
+                                          const char *cow_uuid,
+                                          const char *merge_uuid,
+                                          uint32_t chunk_size)
+{
+       return _add_snapshot_target(node, size, origin_uuid, cow_uuid,
+                                   merge_uuid, 1, chunk_size);
+}
+
+int dm_tree_node_add_error_target(struct dm_tree_node *node,
+                                 uint64_t size)
+{
+       if (!_add_segment(node, SEG_ERROR, size))
+               return_0;
+
+       return 1;
+}
+
+int dm_tree_node_add_zero_target(struct dm_tree_node *node,
+                                uint64_t size)
+{
+       if (!_add_segment(node, SEG_ZERO, size))
+               return_0;
+
+       return 1;
+}
+
+int dm_tree_node_add_linear_target(struct dm_tree_node *node,
+                                  uint64_t size)
+{
+       if (!_add_segment(node, SEG_LINEAR, size))
+               return_0;
+
+       return 1;
+}
+
+int dm_tree_node_add_striped_target(struct dm_tree_node *node,
+                                   uint64_t size,
+                                   uint32_t stripe_size)
+{
+       struct load_segment *seg;
+
+       if (!(seg = _add_segment(node, SEG_STRIPED, size)))
+               return_0;
+
+       seg->stripe_size = stripe_size;
+
+       return 1;
+}
+
+int dm_tree_node_add_crypt_target(struct dm_tree_node *node,
+                                 uint64_t size,
+                                 const char *cipher,
+                                 const char *chainmode,
+                                 const char *iv,
+                                 uint64_t iv_offset,
+                                 const char *key)
+{
+       struct load_segment *seg;
+
+       if (!(seg = _add_segment(node, SEG_CRYPT, size)))
+               return_0;
+
+       seg->cipher = cipher;
+       seg->chainmode = chainmode;
+       seg->iv = iv;
+       seg->iv_offset = iv_offset;
+       seg->key = key;
+
+       return 1;
+}
+
+int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node,
+                                      uint32_t region_size,
+                                      unsigned clustered,
+                                      const char *log_uuid,
+                                      unsigned area_count,
+                                      uint32_t flags)
+{
+       struct dm_tree_node *log_node = NULL;
+       struct load_segment *seg;
+
+       if (!(seg = _get_last_load_segment(node)))
+               return_0;
+
+       if (log_uuid) {
+               if (!(seg->uuid = dm_pool_strdup(node->dtree->mem, log_uuid))) {
+                       log_error("log uuid pool_strdup failed");
+                       return 0;
+               }
+               if ((flags & DM_CORELOG))
+                       /* For pvmove: immediate resume (for size validation) isn't needed. */
+                       /* pvmove flag passed via unused UUID and its suffix */
+                       node->props.delay_resume_if_new = strstr(log_uuid, "pvmove") ? 2 : 1;
+               else {
+                       if (!(log_node = dm_tree_find_node_by_uuid(node->dtree, log_uuid))) {
+                               log_error("Couldn't find mirror log uuid %s.", log_uuid);
+                               return 0;
+                       }
+
+                       if (clustered)
+                               log_node->props.immediate_dev_node = 1;
+
+                       /* The kernel validates the size of disk logs. */
+                       /* FIXME Propagate to any devices below */
+                       log_node->props.delay_resume_if_new = 0;
+
+                       if (!_link_tree_nodes(node, log_node))
+                               return_0;
+               }
+       }
+
+       seg->log = log_node;
+       seg->region_size = region_size;
+       seg->clustered = clustered;
+       seg->mirror_area_count = area_count;
+       seg->flags = flags;
+
+       return 1;
+}
+
+int dm_tree_node_add_mirror_target(struct dm_tree_node *node,
+                                  uint64_t size)
+{
+       if (!_add_segment(node, SEG_MIRRORED, size))
+               return_0;
+
+       return 1;
+}
+
+int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node,
+                                            uint64_t size,
+                                            const struct dm_tree_node_raid_params *p)
+{
+       unsigned i;
+       struct load_segment *seg = NULL;
+
+       for (i = 0; i < DM_ARRAY_SIZE(_dm_segtypes) && !seg; ++i)
+               if (!strcmp(p->raid_type, _dm_segtypes[i].target))
+                       if (!(seg = _add_segment(node,
+                                                _dm_segtypes[i].type, size)))
+                               return_0;
+       if (!seg) {
+               log_error("Unsupported raid type %s.", p->raid_type);
+               return 0;
+       }
+
+       seg->region_size = p->region_size;
+       seg->stripe_size = p->stripe_size;
+       seg->area_count = 0;
+       memset(seg->rebuilds, 0, sizeof(seg->rebuilds));
+       seg->rebuilds[0] = p->rebuilds;
+       memset(seg->writemostly, 0, sizeof(seg->writemostly));
+       seg->writemostly[0] = p->writemostly;
+       seg->writebehind = p->writebehind;
+       seg->min_recovery_rate = p->min_recovery_rate;
+       seg->max_recovery_rate = p->max_recovery_rate;
+       seg->flags = p->flags;
+
+       return 1;
+}
+
+int dm_tree_node_add_raid_target(struct dm_tree_node *node,
+                                uint64_t size,
+                                const char *raid_type,
+                                uint32_t region_size,
+                                uint32_t stripe_size,
+                                uint64_t rebuilds,
+                                uint64_t flags)
+{
+       struct dm_tree_node_raid_params params = {
+               .raid_type = raid_type,
+               .region_size = region_size,
+               .stripe_size = stripe_size,
+               .rebuilds = rebuilds,
+               .flags = flags
+       };
+
+       return dm_tree_node_add_raid_target_with_params(node, size, &params);
+}
+
+/*
+ * Version 2 of dm_tree_node_add_raid_target() allowing for:
+ *
+ * - maximum 253 legs in a raid set (MD kernel limitation)
+ * - delta_disks for disk add/remove reshaping
+ * - data_offset for out-of-place reshaping
+ * - data_copies to cope witth odd numbers of raid10 disks
+ */
+int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node *node,
+                                               uint64_t size,
+                                               const struct dm_tree_node_raid_params_v2 *p)
+{
+       unsigned i;
+       struct load_segment *seg = NULL;
+
+       for (i = 0; i < DM_ARRAY_SIZE(_dm_segtypes) && !seg; ++i)
+               if (!strcmp(p->raid_type, _dm_segtypes[i].target))
+                       if (!(seg = _add_segment(node,
+                                                _dm_segtypes[i].type, size)))
+                               return_0;
+       if (!seg) {
+               log_error("Unsupported raid type %s.", p->raid_type);
+               return 0;
+       }
+
+       seg->region_size = p->region_size;
+       seg->stripe_size = p->stripe_size;
+       seg->area_count = 0;
+       seg->delta_disks = p->delta_disks;
+       seg->data_offset = p->data_offset;
+       memcpy(seg->rebuilds, p->rebuilds, sizeof(seg->rebuilds));
+       memcpy(seg->writemostly, p->writemostly, sizeof(seg->writemostly));
+       seg->writebehind = p->writebehind;
+       seg->data_copies = p->data_copies;
+       seg->min_recovery_rate = p->min_recovery_rate;
+       seg->max_recovery_rate = p->max_recovery_rate;
+       seg->flags = p->flags;
+
+       return 1;
+}
+
+int dm_tree_node_add_cache_target(struct dm_tree_node *node,
+                                 uint64_t size,
+                                 uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
+                                 const char *metadata_uuid,
+                                 const char *data_uuid,
+                                 const char *origin_uuid,
+                                 const char *policy_name,
+                                 const struct dm_config_node *policy_settings,
+                                 uint32_t data_block_size)
+{
+       struct dm_config_node *cn;
+       struct load_segment *seg;
+       static const uint64_t _modemask =
+               DM_CACHE_FEATURE_PASSTHROUGH |
+               DM_CACHE_FEATURE_WRITETHROUGH |
+               DM_CACHE_FEATURE_WRITEBACK;
+
+       /* Detect unknown (bigger) feature bit */
+       if (feature_flags >= (DM_CACHE_FEATURE_METADATA2 * 2)) {
+               log_error("Unsupported cache's feature flags set " FMTu64 ".",
+                         feature_flags);
+               return 0;
+       }
+
+       switch (feature_flags & _modemask) {
+       case DM_CACHE_FEATURE_PASSTHROUGH:
+       case DM_CACHE_FEATURE_WRITEBACK:
+               if (strcmp(policy_name, "cleaner") == 0) {
+                       /* Enforce writethrough mode for cleaner policy */
+                       feature_flags = ~_modemask;
+                       feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH;
+               }
+                /* Fall through */
+       case DM_CACHE_FEATURE_WRITETHROUGH:
+               break;
+       default:
+               log_error("Invalid cache's feature flag " FMTu64 ".",
+                         feature_flags);
+               return 0;
+       }
+
+       if (data_block_size < DM_CACHE_MIN_DATA_BLOCK_SIZE) {
+               log_error("Data block size %u is lower then %u sectors.",
+                         data_block_size, DM_CACHE_MIN_DATA_BLOCK_SIZE);
+               return 0;
+       }
+
+       if (data_block_size > DM_CACHE_MAX_DATA_BLOCK_SIZE) {
+               log_error("Data block size %u is higher then %u sectors.",
+                         data_block_size, DM_CACHE_MAX_DATA_BLOCK_SIZE);
+               return 0;
+       }
+
+       if (!(seg = _add_segment(node, SEG_CACHE, size)))
+               return_0;
+
+       if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree,
+                                                   data_uuid))) {
+               log_error("Missing cache's data uuid %s.",
+                         data_uuid);
+               return 0;
+       }
+       if (!_link_tree_nodes(node, seg->pool))
+               return_0;
+
+       if (!(seg->metadata = dm_tree_find_node_by_uuid(node->dtree,
+                                                       metadata_uuid))) {
+               log_error("Missing cache's metadata uuid %s.",
+                         metadata_uuid);
+               return 0;
+       }
+       if (!_link_tree_nodes(node, seg->metadata))
+               return_0;
+
+       if (!(seg->origin = dm_tree_find_node_by_uuid(node->dtree,
+                                                     origin_uuid))) {
+               log_error("Missing cache's origin uuid %s.",
+                         metadata_uuid);
+               return 0;
+       }
+       if (!_link_tree_nodes(node, seg->origin))
+               return_0;
+
+       seg->data_block_size = data_block_size;
+       seg->flags = feature_flags;
+       seg->policy_name = policy_name;
+
+       /* FIXME: better validation missing */
+       if (policy_settings) {
+               if (!(seg->policy_settings = dm_config_clone_node_with_mem(node->dtree->mem, policy_settings, 0)))
+                       return_0;
+
+               for (cn = seg->policy_settings->child; cn; cn = cn->sib) {
+                       if (!cn->v || (cn->v->type != DM_CFG_INT)) {
+                               /* For now only  <key> = <int>  pairs are supported */
+                               log_error("Cache policy parameter %s is without integer value.", cn->key);
+                               return 0;
+                       }
+                       seg->policy_argc++;
+               }
+       }
+
+       return 1;
+}
+
+int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
+                                      uint64_t size,
+                                      const char *rlog_uuid,
+                                      const char *rlog_type,
+                                      unsigned rsite_index,
+                                      dm_replicator_mode_t mode,
+                                      uint32_t async_timeout,
+                                      uint64_t fall_behind_data,
+                                      uint32_t fall_behind_ios)
+{
+       log_error("Replicator segment is unsupported.");
+       return 0;
+}
+
+/* Appends device node to Replicator */
+int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node,
+                                          uint64_t size,
+                                          const char *replicator_uuid,
+                                          uint64_t rdevice_index,
+                                          const char *rdev_uuid,
+                                          unsigned rsite_index,
+                                          const char *slog_uuid,
+                                          uint32_t slog_flags,
+                                          uint32_t slog_region_size)
+{
+       log_error("Replicator targer is unsupported.");
+       return 0;
+}
+
+static struct load_segment *_get_single_load_segment(struct dm_tree_node *node,
+                                                    unsigned type)
+{
+       struct load_segment *seg;
+
+       if (!(seg = _get_last_load_segment(node)))
+               return_NULL;
+
+       /* Never used past _load_node(), so can test segment_count */
+       if (node->props.segment_count != 1) {
+               log_error("Node %s must have only one segment.",
+                         _dm_segtypes[type].target);
+               return NULL;
+       }
+
+       if (seg->type != type) {
+               log_error("Node %s has segment type %s.",
+                         _dm_segtypes[type].target,
+                         _dm_segtypes[seg->type].target);
+               return NULL;
+       }
+
+       return seg;
+}
+
+static int _thin_validate_device_id(uint32_t device_id)
+{
+       if (device_id > DM_THIN_MAX_DEVICE_ID) {
+               log_error("Device id %u is higher then %u.",
+                         device_id, DM_THIN_MAX_DEVICE_ID);
+               return 0;
+       }
+
+       return 1;
+}
+
+int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node,
+                                     uint64_t size,
+                                     uint64_t transaction_id,
+                                     const char *metadata_uuid,
+                                     const char *pool_uuid,
+                                     uint32_t data_block_size,
+                                     uint64_t low_water_mark,
+                                     unsigned skip_block_zeroing)
+{
+       struct load_segment *seg, *mseg;
+       uint64_t devsize = 0;
+
+       if (data_block_size < DM_THIN_MIN_DATA_BLOCK_SIZE) {
+               log_error("Data block size %u is lower then %u sectors.",
+                         data_block_size, DM_THIN_MIN_DATA_BLOCK_SIZE);
+               return 0;
+       }
+
+       if (data_block_size > DM_THIN_MAX_DATA_BLOCK_SIZE) {
+               log_error("Data block size %u is higher then %u sectors.",
+                         data_block_size, DM_THIN_MAX_DATA_BLOCK_SIZE);
+               return 0;
+       }
+
+       if (!(seg = _add_segment(node, SEG_THIN_POOL, size)))
+               return_0;
+
+       if (!(seg->metadata = dm_tree_find_node_by_uuid(node->dtree, metadata_uuid))) {
+               log_error("Missing metadata uuid %s.", metadata_uuid);
+               return 0;
+       }
+
+       if (!_link_tree_nodes(node, seg->metadata))
+               return_0;
+
+       /* FIXME: more complex target may need more tweaks */
+       dm_list_iterate_items(mseg, &seg->metadata->props.segs) {
+               devsize += mseg->size;
+               if (devsize > DM_THIN_MAX_METADATA_SIZE) {
+                       log_debug_activation("Ignoring %" PRIu64 " of device.",
+                                            devsize - DM_THIN_MAX_METADATA_SIZE);
+                       mseg->size -= (devsize - DM_THIN_MAX_METADATA_SIZE);
+                       devsize = DM_THIN_MAX_METADATA_SIZE;
+                       /* FIXME: drop remaining segs */
+               }
+       }
+
+       if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) {
+               log_error("Missing pool uuid %s.", pool_uuid);
+               return 0;
+       }
+
+       if (!_link_tree_nodes(node, seg->pool))
+               return_0;
+
+       /* Clean flag delay_resume_if_new - so corelog gets resumed */
+       seg->metadata->props.delay_resume_if_new = 0;
+       seg->pool->props.delay_resume_if_new = 0;
+
+       /* Preload must not resume extended running thin-pool before it's committed */
+       node->props.delay_resume_if_extended = 1;
+
+       /* Validate only transaction_id > 0 when activating thin-pool */
+       node->props.send_messages = transaction_id ? 1 : 0;
+       seg->transaction_id = transaction_id;
+       seg->low_water_mark = low_water_mark;
+       seg->data_block_size = data_block_size;
+       seg->skip_block_zeroing = skip_block_zeroing;
+       dm_list_init(&seg->thin_messages);
+
+       return 1;
+}
+
+int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node,
+                                      dm_thin_message_t type,
+                                      uint64_t id1, uint64_t id2)
+{
+       struct thin_message *tm;
+       struct load_segment *seg;
+
+       if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+               return_0;
+
+       if (!(tm = dm_pool_zalloc(node->dtree->mem, sizeof (*tm)))) {
+               log_error("Failed to allocate thin message.");
+               return 0;
+       }
+
+       switch (type) {
+       case DM_THIN_MESSAGE_CREATE_SNAP:
+               /* If the thin origin is active, it must be suspend first! */
+               if (id1 == id2) {
+                       log_error("Cannot use same device id for origin and its snapshot.");
+                       return 0;
+               }
+               if (!_thin_validate_device_id(id1) ||
+                   !_thin_validate_device_id(id2))
+                       return_0;
+               tm->message.u.m_create_snap.device_id = id1;
+               tm->message.u.m_create_snap.origin_id = id2;
+               break;
+       case DM_THIN_MESSAGE_CREATE_THIN:
+               if (!_thin_validate_device_id(id1))
+                       return_0;
+               tm->message.u.m_create_thin.device_id = id1;
+               tm->expected_errno = EEXIST;
+               break;
+       case DM_THIN_MESSAGE_DELETE:
+               if (!_thin_validate_device_id(id1))
+                       return_0;
+               tm->message.u.m_delete.device_id = id1;
+               tm->expected_errno = ENODATA;
+               break;
+       case DM_THIN_MESSAGE_SET_TRANSACTION_ID:
+               if ((id1 + 1) != id2) {
+                       log_error("New transaction id must be sequential.");
+                       return 0; /* FIXME: Maybe too strict here? */
+               }
+               if (id2 != seg->transaction_id) {
+                       log_error("Current transaction id is different from thin pool.");
+                       return 0; /* FIXME: Maybe too strict here? */
+               }
+               tm->message.u.m_set_transaction_id.current_id = id1;
+               tm->message.u.m_set_transaction_id.new_id = id2;
+               break;
+       default:
+               log_error("Unsupported message type %d.", (int) type);
+               return 0;
+       }
+
+       tm->message.type = type;
+       dm_list_add(&seg->thin_messages, &tm->list);
+       /* Higher value >1 identifies there are really some messages */
+       node->props.send_messages = 2;
+
+       return 1;
+}
+
+int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node,
+                                      unsigned ignore,
+                                      unsigned no_passdown)
+{
+       struct load_segment *seg;
+
+       if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+               return_0;
+
+       seg->ignore_discard = ignore;
+       seg->no_discard_passdown = no_passdown;
+
+       return 1;
+}
+
+int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node,
+                                                unsigned error_if_no_space)
+{
+       struct load_segment *seg;
+
+       if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+               return_0;
+
+       seg->error_if_no_space = error_if_no_space;
+
+       return 1;
+}
+
+int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node,
+                                        unsigned read_only)
+{
+       struct load_segment *seg;
+
+       if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+               return_0;
+
+       seg->read_only = read_only;
+
+       return 1;
+}
+
+int dm_tree_node_add_thin_target(struct dm_tree_node *node,
+                                uint64_t size,
+                                const char *pool_uuid,
+                                uint32_t device_id)
+{
+       struct dm_tree_node *pool;
+       struct load_segment *seg;
+
+       if (!(pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) {
+               log_error("Missing thin pool uuid %s.", pool_uuid);
+               return 0;
+       }
+
+       if (!_link_tree_nodes(node, pool))
+               return_0;
+
+       if (!_thin_validate_device_id(device_id))
+               return_0;
+
+       if (!(seg = _add_segment(node, SEG_THIN, size)))
+               return_0;
+
+       seg->pool = pool;
+       seg->device_id = device_id;
+
+       return 1;
+}
+
+int dm_tree_node_set_thin_external_origin(struct dm_tree_node *node,
+                                         const char *external_uuid)
+{
+       struct dm_tree_node *external;
+       struct load_segment *seg;
+
+       if (!(seg = _get_single_load_segment(node, SEG_THIN)))
+               return_0;
+
+       if (!(external = dm_tree_find_node_by_uuid(node->dtree,
+                                                  external_uuid))) {
+               log_error("Missing thin external origin uuid %s.",
+                         external_uuid);
+               return 0;
+       }
+
+       if (!_link_tree_nodes(node, external))
+               return_0;
+
+       seg->external = external;
+
+       return 1;
+}
+
+static int _add_area(struct dm_tree_node *node, struct load_segment *seg, struct dm_tree_node *dev_node, uint64_t offset)
+{
+       struct seg_area *area;
+
+       if (!(area = dm_pool_zalloc(node->dtree->mem, sizeof (*area)))) {
+               log_error("Failed to allocate target segment area.");
+               return 0;
+       }
+
+       area->dev_node = dev_node;
+       area->offset = offset;
+
+       dm_list_add(&seg->areas, &area->list);
+       seg->area_count++;
+
+       return 1;
+}
+
+int dm_tree_node_add_target_area(struct dm_tree_node *node,
+                                const char *dev_name,
+                                const char *uuid,
+                                uint64_t offset)
+{
+       struct load_segment *seg;
+       struct stat info;
+       struct dm_tree_node *dev_node;
+
+       if ((!dev_name || !*dev_name) && (!uuid || !*uuid)) {
+               log_error("dm_tree_node_add_target_area called without device");
+               return 0;
+       }
+
+       if (uuid) {
+               if (!(dev_node = dm_tree_find_node_by_uuid(node->dtree, uuid))) {
+                       log_error("Couldn't find area uuid %s.", uuid);
+                       return 0;
+               }
+               if (!_link_tree_nodes(node, dev_node))
+                       return_0;
+       } else {
+               if (stat(dev_name, &info) < 0) {
+                       log_error("Device %s not found.", dev_name);
+                       return 0;
+               }
+
+               if (!S_ISBLK(info.st_mode)) {
+                       log_error("Device %s is not a block device.", dev_name);
+                       return 0;
+               }
+
+               /* FIXME Check correct macro use */
+               if (!(dev_node = _add_dev(node->dtree, node, MAJOR(info.st_rdev),
+                                         MINOR(info.st_rdev), 0, 0)))
+                       return_0;
+       }
+
+       if (!(seg = _get_last_load_segment(node)))
+               return_0;
+
+       if (!_add_area(node, seg, dev_node, offset))
+               return_0;
+
+       return 1;
+}
+
+int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset)
+{
+       struct load_segment *seg;
+
+       if (!(seg = _get_last_load_segment(node)))
+               return_0;
+
+       switch (seg->type) {
+       case SEG_RAID0:
+       case SEG_RAID0_META:
+       case SEG_RAID1:
+       case SEG_RAID4:
+       case SEG_RAID5_N:
+       case SEG_RAID5_LA:
+       case SEG_RAID5_RA:
+       case SEG_RAID5_LS:
+       case SEG_RAID5_RS:
+       case SEG_RAID6_N_6:
+       case SEG_RAID6_ZR:
+       case SEG_RAID6_NR:
+       case SEG_RAID6_NC:
+       case SEG_RAID6_LS_6:
+       case SEG_RAID6_RS_6:
+       case SEG_RAID6_LA_6:
+       case SEG_RAID6_RA_6:
+               break;
+       default:
+               log_error("dm_tree_node_add_null_area() called on an unsupported segment type");
+               return 0;
+       }
+
+       if (!_add_area(node, seg, NULL, offset))
+               return_0;
+
+       return 1;
+}
+
+void dm_tree_node_set_callback(struct dm_tree_node *dnode,
+                              dm_node_callback_fn cb, void *data)
+{
+       dnode->callback = cb;
+       dnode->callback_data = data;
+}
+
+#if defined(__GNUC__)
+/*
+ * Backward compatible implementations.
+ *
+ * Keep these at the end of the file to make sure that
+ * no code in this file accidentally calls it.
+ */
+
+/* Backward compatible dm_tree_node_size_changed() implementations. */
+int dm_tree_node_size_changed_base(const struct dm_tree_node *dnode);
+int dm_tree_node_size_changed_base(const struct dm_tree_node *dnode)
+{
+       /* Base does not make difference between smaller and bigger */
+       return dm_tree_node_size_changed(dnode) ? 1 : 0;
+}
+
+/*
+ * Retain ABI compatibility after adding the DM_CACHE_FEATURE_METADATA2
+ * in version 1.02.138.
+ *
+ * Binaries compiled against version 1.02.138 onwards will use
+ * the new function dm_tree_node_add_cache_target which detects unknown
+ * feature flags and returns error for them.
+ */
+int dm_tree_node_add_cache_target_base(struct dm_tree_node *node,
+                                      uint64_t size,
+                                      uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
+                                      const char *metadata_uuid,
+                                      const char *data_uuid,
+                                      const char *origin_uuid,
+                                      const char *policy_name,
+                                      const struct dm_config_node *policy_settings,
+                                      uint32_t data_block_size);
+int dm_tree_node_add_cache_target_base(struct dm_tree_node *node,
+                                      uint64_t size,
+                                      uint64_t feature_flags,
+                                      const char *metadata_uuid,
+                                      const char *data_uuid,
+                                      const char *origin_uuid,
+                                      const char *policy_name,
+                                      const struct dm_config_node *policy_settings,
+                                      uint32_t data_block_size)
+{
+       /* Old version supported only these FEATURE bits, others were ignored so masked them */
+       static const uint64_t _mask =
+               DM_CACHE_FEATURE_WRITEBACK |
+               DM_CACHE_FEATURE_WRITETHROUGH |
+               DM_CACHE_FEATURE_PASSTHROUGH;
+
+       return dm_tree_node_add_cache_target(node, size, feature_flags & _mask,
+                                            metadata_uuid, data_uuid, origin_uuid,
+                                            policy_name, policy_settings, data_block_size);
+}
+#endif
diff --git a/device_mapper/libdm-file.c b/device_mapper/libdm-file.c
new file mode 100644 (file)
index 0000000..5c6d223
--- /dev/null
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <sys/file.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+
+static int _is_dir(const char *path)
+{
+       struct stat st;
+
+       if (stat(path, &st) < 0) {
+               log_sys_error("stat", path);
+               return 0;
+       }
+
+       if (!S_ISDIR(st.st_mode)) {
+               log_error("Existing path %s is not "
+                         "a directory.", path);
+               return 0;
+       }
+
+       return 1;
+}
+
+static int _create_dir_recursive(const char *dir)
+{
+       char *orig, *s;
+       int rc, r = 0;
+
+       log_verbose("Creating directory \"%s\"", dir);
+       /* Create parent directories */
+       orig = s = dm_strdup(dir);
+       if (!s) {
+               log_error("Failed to duplicate directory name.");
+               return 0;
+       }
+
+       while ((s = strchr(s, '/')) != NULL) {
+               *s = '\0';
+               if (*orig) {
+                       rc = mkdir(orig, 0777);
+                       if (rc < 0) {
+                               if (errno == EEXIST) {
+                                       if (!_is_dir(orig))
+                                               goto_out;
+                               } else {
+                                       if (errno != EROFS)
+                                               log_sys_error("mkdir", orig);
+                                       goto out;
+                               }
+                       }
+               }
+               *s++ = '/';
+       }
+
+       /* Create final directory */
+       rc = mkdir(dir, 0777);
+       if (rc < 0) {
+               if (errno == EEXIST) {
+                       if (!_is_dir(dir))
+                               goto_out;
+               } else {
+                       if (errno != EROFS)
+                               log_sys_error("mkdir", orig);
+                       goto out;
+               }
+       }
+
+       r = 1;
+out:
+       dm_free(orig);
+       return r;
+}
+
+int dm_create_dir(const char *dir)
+{
+       struct stat info;
+
+       if (!*dir)
+               return 1;
+
+       if (stat(dir, &info) == 0 && S_ISDIR(info.st_mode))
+               return 1;
+
+       if (!_create_dir_recursive(dir))
+               return_0;
+
+       return 1;
+}
+
+int dm_is_empty_dir(const char *dir)
+{
+       struct dirent *dirent;
+       DIR *d;
+
+       if (!(d = opendir(dir))) {
+               log_sys_error("opendir", dir);
+               return 0;
+       }
+
+       while ((dirent = readdir(d)))
+               if (strcmp(dirent->d_name, ".") && strcmp(dirent->d_name, ".."))
+                       break;
+
+       if (closedir(d))
+               log_sys_error("closedir", dir);
+
+       return dirent ? 0 : 1;
+}
+
+int dm_fclose(FILE *stream)
+{
+       int prev_fail = ferror(stream);
+       int fclose_fail = fclose(stream);
+
+       /* If there was a previous failure, but fclose succeeded,
+          clear errno, since ferror does not set it, and its value
+          may be unrelated to the ferror-reported failure.  */
+       if (prev_fail && !fclose_fail)
+               errno = 0;
+
+       return prev_fail || fclose_fail ? EOF : 0;
+}
+
+int dm_create_lockfile(const char *lockfile)
+{
+       int fd, value;
+       size_t bufferlen;
+       ssize_t write_out;
+       struct flock lock;
+       char buffer[50];
+       int retries = 0;
+
+       if ((fd = open(lockfile, O_CREAT | O_WRONLY,
+                      (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))) < 0) {
+               log_error("Cannot open lockfile [%s], error was [%s]",
+                         lockfile, strerror(errno));
+               return 0;
+       }
+
+       lock.l_type = F_WRLCK;
+       lock.l_start = 0;
+       lock.l_whence = SEEK_SET;
+       lock.l_len = 0;
+retry_fcntl:
+       if (fcntl(fd, F_SETLK, &lock) < 0) {
+               switch (errno) {
+               case EINTR:
+                       goto retry_fcntl;
+               case EACCES:
+               case EAGAIN:
+                       if (retries == 20) {
+                               log_error("Cannot lock lockfile [%s], error was [%s]",
+                                         lockfile, strerror(errno));
+                               break;
+                       } else {
+                               ++ retries;
+                               usleep(1000);
+                               goto retry_fcntl;
+                       }
+               default:
+                       log_error("process is already running");
+               }
+
+               goto fail_close;
+       }
+
+       if (ftruncate(fd, 0) < 0) {
+               log_error("Cannot truncate pidfile [%s], error was [%s]",
+                         lockfile, strerror(errno));
+
+               goto fail_close_unlink;
+       }
+
+       snprintf(buffer, sizeof(buffer), "%u\n", getpid());
+
+       bufferlen = strlen(buffer);
+       write_out = write(fd, buffer, bufferlen);
+
+       if ((write_out < 0) || (write_out == 0 && errno)) {
+               log_error("Cannot write pid to pidfile [%s], error was [%s]",
+                         lockfile, strerror(errno));
+
+               goto fail_close_unlink;
+       }
+
+       if ((write_out == 0) || ((size_t)write_out < bufferlen)) {
+               log_error("Cannot write pid to pidfile [%s], shortwrite of"
+                         "[%" PRIsize_t "] bytes, expected [%" PRIsize_t "]\n",
+                         lockfile, write_out, bufferlen);
+
+               goto fail_close_unlink;
+       }
+
+       if ((value = fcntl(fd, F_GETFD, 0)) < 0) {
+               log_error("Cannot get close-on-exec flag from pidfile [%s], "
+                         "error was [%s]", lockfile, strerror(errno));
+
+               goto fail_close_unlink;
+       }
+       value |= FD_CLOEXEC;
+       if (fcntl(fd, F_SETFD, value) < 0) {
+               log_error("Cannot set close-on-exec flag from pidfile [%s], "
+                         "error was [%s]", lockfile, strerror(errno));
+
+               goto fail_close_unlink;
+       }
+
+       return 1;
+
+fail_close_unlink:
+       if (unlink(lockfile))
+               log_sys_debug("unlink", lockfile);
+fail_close:
+       if (close(fd))
+               log_sys_debug("close", lockfile);
+
+       return 0;
+}
+
+int dm_daemon_is_running(const char* lockfile)
+{
+       int fd;
+       struct flock lock;
+
+       if((fd = open(lockfile, O_RDONLY)) < 0)
+               return 0;
+
+       lock.l_type = F_WRLCK;
+       lock.l_start = 0;
+       lock.l_whence = SEEK_SET;
+       lock.l_len = 0;
+       if (fcntl(fd, F_GETLK, &lock) < 0) {
+               log_error("Cannot check lock status of lockfile [%s], error was [%s]",
+                         lockfile, strerror(errno));
+               if (close(fd))
+                       stack;
+               return 0;
+       }
+
+       if (close(fd))
+               stack;
+
+       return (lock.l_type == F_UNLCK) ? 0 : 1;
+}
diff --git a/device_mapper/libdm-report.c b/device_mapper/libdm-report.c
new file mode 100644 (file)
index 0000000..3a48c3f
--- /dev/null
@@ -0,0 +1,5104 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <ctype.h>
+#include <math.h>  /* fabs() */
+#include <float.h> /* DBL_EPSILON */
+#include <time.h>
+
+/*
+ * Internal flags
+ */
+#define RH_SORT_REQUIRED       0x00000100
+#define RH_HEADINGS_PRINTED    0x00000200
+#define RH_FIELD_CALC_NEEDED   0x00000400
+#define RH_ALREADY_REPORTED    0x00000800
+
+struct selection {
+       struct dm_pool *mem;
+       struct selection_node *selection_root;
+       int add_new_fields;
+};
+
+struct report_group_item;
+
+struct dm_report {
+       struct dm_pool *mem;
+
+       /**
+        * Cache the first row allocated so that all rows and fields
+        * can be disposed of in a single dm_pool_free() call.
+        */
+       struct row *first_row;
+
+       /* To report all available types */
+#define REPORT_TYPES_ALL       UINT32_MAX
+       uint32_t report_types;
+       const char *output_field_name_prefix;
+       const char *field_prefix;
+       uint32_t flags;
+       const char *separator;
+
+       uint32_t keys_count;
+
+       /* Ordered list of fields needed for this report */
+       struct dm_list field_props;
+
+       /* Rows of report data */
+       struct dm_list rows;
+
+       /* Array of field definitions */
+       const struct dm_report_field_type *fields;
+       const char **canonical_field_ids;
+       const struct dm_report_object_type *types;
+
+       /* To store caller private data */
+       void *private;
+
+       /* Selection handle */
+       struct selection *selection;
+
+       /* Null-terminated array of reserved values */
+       const struct dm_report_reserved_value *reserved_values;
+       struct dm_hash_table *value_cache;
+
+       struct report_group_item *group_item;
+};
+
+struct dm_report_group {
+       dm_report_group_type_t type;
+       struct dm_pool *mem;
+       struct dm_list items;
+       int indent;
+};
+
+struct report_group_item {
+       struct dm_list list;
+       struct dm_report_group *group;
+       struct dm_report *report;
+       union {
+               uint32_t orig_report_flags;
+               uint32_t finished_count;
+       } store;
+       struct report_group_item *parent;
+       unsigned output_done:1;
+       unsigned needs_closing:1;
+       void *data;
+};
+
+/*
+ * Internal per-field flags
+ */
+#define FLD_HIDDEN     0x00001000
+#define FLD_SORT_KEY   0x00002000
+#define FLD_ASCENDING  0x00004000
+#define FLD_DESCENDING 0x00008000
+#define FLD_COMPACTED  0x00010000
+#define FLD_COMPACT_ONE 0x00020000
+
+struct field_properties {
+       struct dm_list list;
+       uint32_t field_num;
+       uint32_t sort_posn;
+       int32_t initial_width;
+       int32_t width; /* current width: adjusted by dm_report_object() */
+       const struct dm_report_object_type *type;
+       uint32_t flags;
+       int implicit;
+};
+
+/*
+ * Report selection
+ */
+struct op_def {
+       const char *string;
+       uint32_t flags;
+       const char *desc;
+};
+
+#define FLD_CMP_MASK           0x0FF00000
+#define FLD_CMP_UNCOMPARABLE   0x00100000
+#define FLD_CMP_EQUAL          0x00200000
+#define FLD_CMP_NOT            0x00400000
+#define FLD_CMP_GT             0x00800000
+#define FLD_CMP_LT             0x01000000
+#define FLD_CMP_REGEX          0x02000000
+#define FLD_CMP_NUMBER         0x04000000
+#define FLD_CMP_TIME           0x08000000
+/*
+ * #define FLD_CMP_STRING 0x10000000
+ * We could define FLD_CMP_STRING here for completeness here,
+ * but it's not needed - we can check operator compatibility with
+ * field type by using FLD_CMP_REGEX, FLD_CMP_NUMBER and
+ * FLD_CMP_TIME flags only.
+ */
+
+/*
+ * When defining operators, always define longer one before
+ * shorter one if one is a prefix of another!
+ * (e.g. =~ comes before =)
+*/
+static struct op_def _op_cmp[] = {
+       { "=~", FLD_CMP_REGEX, "Matching regular expression. [regex]" },
+       { "!~", FLD_CMP_REGEX|FLD_CMP_NOT, "Not matching regular expression. [regex]" },
+       { "=", FLD_CMP_EQUAL, "Equal to. [number, size, percent, string, string list, time]" },
+       { "!=", FLD_CMP_NOT|FLD_CMP_EQUAL, "Not equal to. [number, size, percent, string, string_list, time]" },
+       { ">=", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL, "Greater than or equal to. [number, size, percent, time]" },
+       { ">", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_GT, "Greater than. [number, size, percent, time]" },
+       { "<=", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL, "Less than or equal to. [number, size, percent, time]" },
+       { "<", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_LT, "Less than. [number, size, percent, time]" },
+       { "since", FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL, "Since specified time (same as '>='). [time]" },
+       { "after", FLD_CMP_TIME|FLD_CMP_GT, "After specified time (same as '>'). [time]"},
+       { "until", FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL, "Until specified time (same as '<='). [time]"},
+       { "before", FLD_CMP_TIME|FLD_CMP_LT, "Before specified time (same as '<'). [time]"},
+       { NULL, 0, NULL }
+};
+
+#define SEL_MASK               0x000000FF
+#define SEL_ITEM               0x00000001
+#define SEL_AND                0x00000002
+#define SEL_OR                 0x00000004
+
+#define SEL_MODIFIER_MASK      0x00000F00
+#define SEL_MODIFIER_NOT       0x00000100
+
+#define SEL_PRECEDENCE_MASK    0x0000F000
+#define SEL_PRECEDENCE_PS      0x00001000
+#define SEL_PRECEDENCE_PE      0x00002000
+
+#define SEL_LIST_MASK          0x000F0000
+#define SEL_LIST_LS            0x00010000
+#define SEL_LIST_LE            0x00020000
+#define SEL_LIST_SUBSET_LS     0x00040000
+#define SEL_LIST_SUBSET_LE     0x00080000
+
+static struct op_def _op_log[] = {
+       { "&&", SEL_AND, "All fields must match" },
+       { ",", SEL_AND, "All fields must match" },
+       { "||", SEL_OR, "At least one field must match" },
+       { "#", SEL_OR, "At least one field must match" },
+       { "!", SEL_MODIFIER_NOT, "Logical negation" },
+       { "(", SEL_PRECEDENCE_PS, "Left parenthesis" },
+       { ")", SEL_PRECEDENCE_PE, "Right parenthesis" },
+       { "[", SEL_LIST_LS, "List start" },
+       { "]", SEL_LIST_LE, "List end"},
+       { "{", SEL_LIST_SUBSET_LS, "List subset start"},
+       { "}", SEL_LIST_SUBSET_LE, "List subset end"},
+       { NULL,  0, NULL},
+};
+
+struct selection_str_list {
+       struct dm_str_list str_list;
+       unsigned type;                  /* either SEL_AND or SEL_OR */
+};
+
+struct field_selection_value {
+       union {
+               const char *s;
+               uint64_t i;
+               time_t t;
+               double d;
+               struct dm_regex *r;
+               struct selection_str_list *l;
+       } v;
+       struct field_selection_value *next;
+};
+
+struct field_selection {
+       struct field_properties *fp;
+       uint32_t flags;
+       struct field_selection_value *value;
+};
+
+struct selection_node {
+       struct dm_list list;
+       uint32_t type;
+       union {
+               struct field_selection *item;
+               struct dm_list set;
+       } selection;
+};
+
+struct reserved_value_wrapper {
+       const char *matched_name;
+       const struct dm_report_reserved_value *reserved;
+       const void *value;
+};
+
+/*
+ * Report data field
+ */
+struct dm_report_field {
+       struct dm_list list;
+       struct field_properties *props;
+
+       const char *report_string;      /* Formatted ready for display */
+       const void *sort_value;         /* Raw value for sorting */
+};
+
+struct row {
+       struct dm_list list;
+       struct dm_report *rh;
+       struct dm_list fields;                    /* Fields in display order */
+       struct dm_report_field *(*sort_fields)[]; /* Fields in sort order */
+       int selected;
+       struct dm_report_field *field_sel_status;
+};
+
+/*
+ * Implicit report types and fields.
+ */
+#define SPECIAL_REPORT_TYPE 0x80000000
+#define SPECIAL_FIELD_SELECTED_ID "selected"
+#define SPECIAL_FIELD_HELP_ID "help"
+#define SPECIAL_FIELD_HELP_ALT_ID "?"
+
+static void *_null_returning_fn(void *obj __attribute__((unused)))
+{
+       return NULL;
+}
+
+static int _no_report_fn(struct dm_report *rh __attribute__((unused)),
+                        struct dm_pool *mem __attribute__((unused)),
+                        struct dm_report_field *field __attribute__((unused)),
+                        const void *data __attribute__((unused)),
+                        void *private __attribute__((unused)))
+{
+       return 1;
+}
+
+static int _selected_disp(struct dm_report *rh,
+                         struct dm_pool *mem __attribute__((unused)),
+                         struct dm_report_field *field,
+                         const void *data,
+                         void *private __attribute__((unused)))
+{
+       const struct row *row = (const struct row *)data;
+       return dm_report_field_int(rh, field, &row->selected);
+}
+
+static const struct dm_report_object_type _implicit_special_report_types[] = {
+       { SPECIAL_REPORT_TYPE, "Special", "special_", _null_returning_fn },
+       { 0, "", "", NULL }
+};
+
+static const struct dm_report_field_type _implicit_special_report_fields[] = {
+       { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ID, "Help", _no_report_fn, "Show help." },
+       { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ALT_ID, "Help", _no_report_fn, "Show help." },
+       { 0, 0, 0, 0, "", "", 0, 0}
+};
+
+static const struct dm_report_field_type _implicit_special_report_fields_with_selection[] = {
+       { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER, 0, 8, SPECIAL_FIELD_SELECTED_ID, "Selected", _selected_disp, "Set if item passes selection criteria." },
+       { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ID, "Help", _no_report_fn, "Show help." },
+       { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ALT_ID, "Help", _no_report_fn, "Show help." },
+       { 0, 0, 0, 0, "", "", 0, 0}
+};
+
+static const struct dm_report_object_type *_implicit_report_types = _implicit_special_report_types;
+static const struct dm_report_field_type *_implicit_report_fields = _implicit_special_report_fields;
+
+static const struct dm_report_object_type *_find_type(struct dm_report *rh,
+                                                     uint32_t report_type)
+{
+       const struct dm_report_object_type *t;
+
+       for (t = _implicit_report_types; t->data_fn; t++)
+               if (t->id == report_type)
+                       return t;
+
+       for (t = rh->types; t->data_fn; t++)
+               if (t->id == report_type)
+                       return t;
+
+       return NULL;
+}
+
+/*
+ * Data-munging functions to prepare each data type for display and sorting
+ */
+
+int dm_report_field_string(struct dm_report *rh,
+                          struct dm_report_field *field, const char *const *data)
+{
+       char *repstr;
+
+       if (!(repstr = dm_pool_strdup(rh->mem, *data))) {
+               log_error("dm_report_field_string: dm_pool_strdup failed");
+               return 0;
+       }
+
+       field->report_string = repstr;
+       field->sort_value = (const void *) field->report_string;
+
+       return 1;
+}
+
+int dm_report_field_percent(struct dm_report *rh,
+                           struct dm_report_field *field,
+                           const dm_percent_t *data)
+{
+       char *repstr;
+       uint64_t *sortval;
+
+       if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) {
+               log_error("dm_report_field_percent: dm_pool_alloc failed for sort_value.");
+               return 0;
+       }
+
+       *sortval = (uint64_t)(*data);
+
+       if (*data == DM_PERCENT_INVALID) {
+               dm_report_field_set_value(field, "", sortval);
+               return 1;
+       }
+
+       if (!(repstr = dm_pool_alloc(rh->mem, 8))) {
+               dm_pool_free(rh->mem, sortval);
+               log_error("dm_report_field_percent: dm_pool_alloc failed for percent report string.");
+               return 0;
+       }
+
+       if (dm_snprintf(repstr, 7, "%.2f", dm_percent_to_round_float(*data, 2)) < 0) {
+               dm_pool_free(rh->mem, sortval);
+               log_error("dm_report_field_percent: percentage too large.");
+               return 0;
+       }
+
+       dm_report_field_set_value(field, repstr, sortval);
+       return 1;
+}
+
+struct str_list_sort_value_item {
+       unsigned pos;
+       size_t len;
+};
+
+struct str_list_sort_value {
+       const char *value;
+       struct str_list_sort_value_item *items;
+};
+
+struct str_list_sort_item {
+       const char *str;
+       struct str_list_sort_value_item item;
+};
+
+static int _str_list_sort_item_cmp(const void *a, const void *b)
+{
+       const struct str_list_sort_item *slsi_a = (const struct str_list_sort_item *) a;
+       const struct str_list_sort_item *slsi_b = (const struct str_list_sort_item *) b;
+
+       return strcmp(slsi_a->str, slsi_b->str);
+}
+
+static int _report_field_string_list(struct dm_report *rh,
+                                    struct dm_report_field *field,
+                                    const struct dm_list *data,
+                                    const char *delimiter,
+                                    int sort)
+{
+       static const char _string_list_grow_object_failed_msg[] = "dm_report_field_string_list: dm_pool_grow_object_failed";
+       struct str_list_sort_value *sort_value = NULL;
+       unsigned int list_size, pos, i;
+       struct str_list_sort_item *arr = NULL;
+       struct dm_str_list *sl;
+       size_t delimiter_len, len;
+       void *object;
+       int r = 0;
+
+       if (!(sort_value = dm_pool_zalloc(rh->mem, sizeof(struct str_list_sort_value)))) {
+               log_error("dm_report_field_string_list: dm_pool_zalloc failed for sort_value");
+               return 0;
+       }
+
+       list_size = dm_list_size(data);
+
+       /*
+        * Sort value stores the pointer to the report_string and then
+        * position and length for each list element withing the report_string.
+        * The first element stores number of elements in 'len' (therefore
+        * list_size + 1 is used below for the extra element).
+        * For example, with this input:
+        *   sort = 0;  (we don't want to report sorted)
+        *   report_string = "abc,xy,defgh";  (this is reported)
+        *
+        * ...we end up with:
+        *   sort_value->value = report_string; (we'll use the original report_string for indices)
+        *   sort_value->items[0] = {0,3};  (we have 3 items)
+        *   sort_value->items[1] = {0,3};  ("abc")
+        *   sort_value->items[2] = {7,5};  ("defgh")
+        *   sort_value->items[3] = {4,2};  ("xy")
+        *
+        *   The items alone are always sorted while in report_string they can be
+        *   sorted or not (based on "sort" arg) - it depends on how we prefer to
+        *   display the list. Having items sorted internally helps with searching
+        *   through them.
+        */
+       if (!(sort_value->items = dm_pool_zalloc(rh->mem, (list_size + 1) * sizeof(struct str_list_sort_value_item)))) {
+               log_error("dm_report_fiel_string_list: dm_pool_zalloc failed for sort value items");
+               goto out;
+       }
+       sort_value->items[0].len = list_size;
+
+       /* zero items */
+       if (!list_size) {
+               sort_value->value = field->report_string = "";
+               field->sort_value = sort_value;
+               return 1;
+       }
+
+       /* one item */
+       if (list_size == 1) {
+               sl = (struct dm_str_list *) dm_list_first(data);
+               if (!sl ||
+                   !(sort_value->value = field->report_string = dm_pool_strdup(rh->mem, sl->str))) {
+                       log_error("dm_report_field_string_list: dm_pool_strdup failed");
+                       goto out;
+               }
+               sort_value->items[1].pos = 0;
+               sort_value->items[1].len = strlen(sl->str);
+               field->sort_value = sort_value;
+               return 1;
+       }
+
+       /* more than one item - sort the list */
+       if (!(arr = dm_malloc(sizeof(struct str_list_sort_item) * list_size))) {
+               log_error("dm_report_field_string_list: dm_malloc failed");
+               goto out;
+       }
+
+       if (!(dm_pool_begin_object(rh->mem, 256))) {
+               log_error(_string_list_grow_object_failed_msg);
+               goto out;
+       }
+
+       if (!delimiter)
+               delimiter = ",";
+       delimiter_len = strlen(delimiter);
+
+       i = pos = len = 0;
+       dm_list_iterate_items(sl, data) {
+               arr[i].str = sl->str;
+               if (!sort) {
+                       /* sorted outpud not required - report the list as it is */
+                       len = strlen(sl->str);
+                       if (!dm_pool_grow_object(rh->mem, arr[i].str, len) ||
+                           (i+1 != list_size && !dm_pool_grow_object(rh->mem, delimiter, delimiter_len))) {
+                               log_error(_string_list_grow_object_failed_msg);
+                               goto out;
+                       }
+                       arr[i].item.pos = pos;
+                       arr[i].item.len = len;
+                       pos = i+1 == list_size ? pos+len : pos+len+delimiter_len;
+               }
+               i++;
+       }
+
+       qsort(arr, i, sizeof(struct str_list_sort_item), _str_list_sort_item_cmp);
+
+       for (i = 0, pos = 0; i < list_size; i++) {
+               if (sort) {
+                       /* sorted output required - report the list as sorted */
+                       len = strlen(arr[i].str);
+                       if (!dm_pool_grow_object(rh->mem, arr[i].str, len) ||
+                           (i+1 != list_size && !dm_pool_grow_object(rh->mem, delimiter, delimiter_len))) {
+                               log_error(_string_list_grow_object_failed_msg);
+                               goto out;
+                       }
+                       /*
+                        * Save position and length of the string
+                        * element in report_string for sort_value.
+                        * Use i+1 here since items[0] stores list size!!!
+                        */
+                       sort_value->items[i+1].pos = pos;
+                       sort_value->items[i+1].len = len;
+                       pos = i+1 == list_size ? pos+len : pos+len+delimiter_len;
+               } else {
+                       sort_value->items[i+1].pos = arr[i].item.pos;
+                       sort_value->items[i+1].len = arr[i].item.len;
+               }
+       }
+
+       if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+               log_error(_string_list_grow_object_failed_msg);
+               goto out;
+       }
+
+       object = dm_pool_end_object(rh->mem);
+       sort_value->value = object;
+       field->sort_value = sort_value;
+       field->report_string = object;
+       r = 1;
+out:
+       if (!r && sort_value)
+               dm_pool_free(rh->mem, sort_value);
+       dm_free(arr);
+
+       return r;
+}
+
+int dm_report_field_string_list(struct dm_report *rh,
+                               struct dm_report_field *field,
+                               const struct dm_list *data,
+                               const char *delimiter)
+{
+       return _report_field_string_list(rh, field, data, delimiter, 1);
+}
+
+int dm_report_field_string_list_unsorted(struct dm_report *rh,
+                                        struct dm_report_field *field,
+                                        const struct dm_list *data,
+                                        const char *delimiter)
+{
+       /*
+        * The raw value is always sorted, just the string reported is unsorted.
+        * Having the raw value always sorted helps when matching selection list
+        * with selection criteria.
+        */
+       return _report_field_string_list(rh, field, data, delimiter, 0);
+}
+
+int dm_report_field_int(struct dm_report *rh,
+                       struct dm_report_field *field, const int *data)
+{
+       const int value = *data;
+       uint64_t *sortval;
+       char *repstr;
+
+       if (!(repstr = dm_pool_zalloc(rh->mem, 13))) {
+               log_error("dm_report_field_int: dm_pool_alloc failed");
+               return 0;
+       }
+
+       if (!(sortval = dm_pool_alloc(rh->mem, sizeof(int64_t)))) {
+               log_error("dm_report_field_int: dm_pool_alloc failed");
+               return 0;
+       }
+
+       if (dm_snprintf(repstr, 12, "%d", value) < 0) {
+               log_error("dm_report_field_int: int too big: %d", value);
+               return 0;
+       }
+
+       *sortval = (uint64_t) value;
+       field->sort_value = sortval;
+       field->report_string = repstr;
+
+       return 1;
+}
+
+int dm_report_field_uint32(struct dm_report *rh,
+                          struct dm_report_field *field, const uint32_t *data)
+{
+       const uint32_t value = *data;
+       uint64_t *sortval;
+       char *repstr;
+
+       if (!(repstr = dm_pool_zalloc(rh->mem, 12))) {
+               log_error("dm_report_field_uint32: dm_pool_alloc failed");
+               return 0;
+       }
+
+       if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) {
+               log_error("dm_report_field_uint32: dm_pool_alloc failed");
+               return 0;
+       }
+
+       if (dm_snprintf(repstr, 11, "%u", value) < 0) {
+               log_error("dm_report_field_uint32: uint32 too big: %u", value);
+               return 0;
+       }
+
+       *sortval = (uint64_t) value;
+       field->sort_value = sortval;
+       field->report_string = repstr;
+
+       return 1;
+}
+
+int dm_report_field_int32(struct dm_report *rh,
+                         struct dm_report_field *field, const int32_t *data)
+{
+       const int32_t value = *data;
+       uint64_t *sortval;
+       char *repstr;
+
+       if (!(repstr = dm_pool_zalloc(rh->mem, 13))) {
+               log_error("dm_report_field_int32: dm_pool_alloc failed");
+               return 0;
+       }
+
+       if (!(sortval = dm_pool_alloc(rh->mem, sizeof(int64_t)))) {
+               log_error("dm_report_field_int32: dm_pool_alloc failed");
+               return 0;
+       }
+
+       if (dm_snprintf(repstr, 12, "%d", value) < 0) {
+               log_error("dm_report_field_int32: int32 too big: %d", value);
+               return 0;
+       }
+
+       *sortval = (uint64_t) value;
+       field->sort_value = sortval;
+       field->report_string = repstr;
+
+       return 1;
+}
+
+int dm_report_field_uint64(struct dm_report *rh,
+                          struct dm_report_field *field, const uint64_t *data)
+{
+       const uint64_t value = *data;
+       uint64_t *sortval;
+       char *repstr;
+
+       if (!(repstr = dm_pool_zalloc(rh->mem, 22))) {
+               log_error("dm_report_field_uint64: dm_pool_alloc failed");
+               return 0;
+       }
+
+       if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) {
+               log_error("dm_report_field_uint64: dm_pool_alloc failed");
+               return 0;
+       }
+
+       if (dm_snprintf(repstr, 21, FMTu64 , value) < 0) {
+               log_error("dm_report_field_uint64: uint64 too big: %" PRIu64, value);
+               return 0;
+       }
+
+       *sortval = value;
+       field->sort_value = sortval;
+       field->report_string = repstr;
+
+       return 1;
+}
+
+/*
+ * Helper functions for custom report functions
+ */
+void dm_report_field_set_value(struct dm_report_field *field, const void *value, const void *sortvalue)
+{
+       field->report_string = (const char *) value;
+       field->sort_value = sortvalue ? : value;
+
+       if ((field->sort_value == value) &&
+           (field->props->flags & DM_REPORT_FIELD_TYPE_NUMBER))
+               log_warn(INTERNAL_ERROR "Using string as sort value for numerical field.");
+}
+
+static const char *_get_field_type_name(unsigned field_type)
+{
+       switch (field_type) {
+               case DM_REPORT_FIELD_TYPE_STRING: return "string";
+               case DM_REPORT_FIELD_TYPE_NUMBER: return "number";
+               case DM_REPORT_FIELD_TYPE_SIZE: return "size";
+               case DM_REPORT_FIELD_TYPE_PERCENT: return "percent";
+               case DM_REPORT_FIELD_TYPE_TIME: return "time";
+               case DM_REPORT_FIELD_TYPE_STRING_LIST: return "string list";
+               default: return "unknown";
+       }
+}
+
+/*
+ * show help message
+ */
+static size_t _get_longest_field_id_len(const struct dm_report_field_type *fields)
+{
+       uint32_t f;
+       size_t id_len = 0;
+
+       for (f = 0; fields[f].report_fn; f++)
+               if (strlen(fields[f].id) > id_len)
+                       id_len = strlen(fields[f].id);
+
+       return id_len;
+}
+
+static void _display_fields_more(struct dm_report *rh,
+                                const struct dm_report_field_type *fields,
+                                size_t id_len, int display_all_fields_item,
+                                int display_field_types)
+{
+       uint32_t f;
+       const struct dm_report_object_type *type;
+       const char *desc, *last_desc = "";
+
+       for (f = 0; fields[f].report_fn; f++)
+               if (strlen(fields[f].id) > id_len)
+                       id_len = strlen(fields[f].id);
+
+       for (type = rh->types; type->data_fn; type++)
+               if (strlen(type->prefix) + 3 > id_len)
+                       id_len = strlen(type->prefix) + 3;
+
+       for (f = 0; fields[f].report_fn; f++) {
+               if ((type = _find_type(rh, fields[f].type)) && type->desc)
+                       desc = type->desc;
+               else
+                       desc = " ";
+               if (desc != last_desc) {
+                       if (*last_desc)
+                               log_warn(" ");
+                       log_warn("%s Fields", desc);
+                       log_warn("%*.*s", (int) strlen(desc) + 7,
+                                (int) strlen(desc) + 7,
+                                "-------------------------------------------------------------------------------");
+                       if (display_all_fields_item && type->id != SPECIAL_REPORT_TYPE)
+                               log_warn("  %sall%-*s - %s", type->prefix,
+                                        (int) (id_len - 3 - strlen(type->prefix)), "",
+                                        "All fields in this section.");
+               }
+               /* FIXME Add line-wrapping at terminal width (or 80 cols) */
+               log_warn("  %-*s - %s%s%s%s%s", (int) id_len, fields[f].id, fields[f].desc,
+                                             display_field_types ? " [" : "",
+                                             display_field_types ? fields[f].flags & FLD_CMP_UNCOMPARABLE ? "unselectable " : "" : "",
+                                             display_field_types ? _get_field_type_name(fields[f].flags & DM_REPORT_FIELD_TYPE_MASK) : "",
+                                             display_field_types ? "]" : "");
+               last_desc = desc;
+       }
+}
+
+/*
+ * show help message
+ */
+static void _display_fields(struct dm_report *rh, int display_all_fields_item,
+                           int display_field_types)
+{
+       size_t tmp, id_len = 0;
+
+       if ((tmp = _get_longest_field_id_len(_implicit_report_fields)) > id_len)
+               id_len = tmp;
+       if ((tmp = _get_longest_field_id_len(rh->fields)) > id_len)
+               id_len = tmp;
+
+       _display_fields_more(rh, rh->fields, id_len, display_all_fields_item,
+                            display_field_types);
+       log_warn(" ");
+       _display_fields_more(rh, _implicit_report_fields, id_len,
+                            display_all_fields_item, display_field_types);
+
+}
+
+/*
+ * Initialise report handle
+ */
+static int _copy_field(struct dm_report *rh, struct field_properties *dest,
+                      uint32_t field_num, int implicit)
+{
+       const struct dm_report_field_type *fields = implicit ? _implicit_report_fields
+                                                            : rh->fields;
+
+       dest->field_num = field_num;
+       dest->initial_width = fields[field_num].width;
+       dest->width = fields[field_num].width; /* adjusted in _do_report_object() */
+       dest->flags = fields[field_num].flags & DM_REPORT_FIELD_MASK;
+       dest->implicit = implicit;
+
+       /* set object type method */
+       dest->type = _find_type(rh, fields[field_num].type);
+       if (!dest->type) {
+               log_error("dm_report: field not match: %s",
+                         fields[field_num].id);
+               return 0;
+       }
+
+       return 1;
+}
+
+static struct field_properties * _add_field(struct dm_report *rh,
+                                           uint32_t field_num, int implicit,
+                                           uint32_t flags)
+{
+       struct field_properties *fp;
+
+       if (!(fp = dm_pool_zalloc(rh->mem, sizeof(*fp)))) {
+               log_error("dm_report: struct field_properties allocation "
+                         "failed");
+               return NULL;
+       }
+
+       if (!_copy_field(rh, fp, field_num, implicit)) {
+               stack;
+               dm_pool_free(rh->mem, fp);
+               return NULL;
+       }
+
+       fp->flags |= flags;
+
+       /*
+        * Place hidden fields at the front so dm_list_end() will
+        * tell us when we've reached the last visible field.
+        */
+       if (fp->flags & FLD_HIDDEN)
+               dm_list_add_h(&rh->field_props, &fp->list);
+       else
+               dm_list_add(&rh->field_props, &fp->list);
+
+       return fp;
+}
+
+static int _get_canonical_field_name(const char *field,
+                                    size_t flen,
+                                    char *canonical_field,
+                                    size_t fcanonical_len,
+                                    int *differs)
+{
+       size_t i;
+       int diff = 0;
+
+       for (i = 0; *field && flen; field++, flen--) {
+               if (*field == '_') {
+                       diff = 1;
+                       continue;
+               }
+               if ((i + 1) >= fcanonical_len) {
+                       canonical_field[0] = '\0';
+                       log_error("%s: field name too long.", field);
+                       return 0;
+               }
+               canonical_field[i++] = *field;
+       }
+
+       canonical_field[i] = '\0';
+       if (differs)
+               *differs = diff;
+       return 1;
+}
+
+/*
+ * Compare canonical_name1 against canonical_name2 or prefix
+ * plus canonical_name2. Canonical name is a name where all
+ * superfluous characters are removed (underscores for now).
+ * Both names are always null-terminated.
+ */
+static int _is_same_field(const char *canonical_name1, const char *canonical_name2,
+                         const char *prefix)
+{
+       size_t prefix_len;
+
+       /* Exact match? */
+       if (!strcasecmp(canonical_name1, canonical_name2))
+               return 1;
+
+       /* Match including prefix? */
+       prefix_len = strlen(prefix) - 1;
+       if (!strncasecmp(prefix, canonical_name1, prefix_len) &&
+           !strcasecmp(canonical_name1 + prefix_len, canonical_name2))
+               return 1;
+
+       return 0;
+}
+
+/*
+ * Check for a report type prefix + "all" match.
+ */
+static void _all_match_combine(const struct dm_report_object_type *types,
+                              unsigned unprefixed_all_matched,
+                              const char *field, size_t flen,
+                              uint32_t *report_types)
+{
+       char field_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
+       const struct dm_report_object_type *t;
+       size_t prefix_len;
+
+       if (!_get_canonical_field_name(field, flen, field_canon, sizeof(field_canon), NULL))
+               return;
+       flen = strlen(field_canon);
+
+       for (t = types; t->data_fn; t++) {
+               prefix_len = strlen(t->prefix) - 1;
+
+               if (!strncasecmp(t->prefix, field_canon, prefix_len) &&
+                   ((unprefixed_all_matched && (flen == prefix_len)) ||
+                    (!strncasecmp(field_canon + prefix_len, "all", 3) &&
+                     (flen == prefix_len + 3))))
+                       *report_types |= t->id;
+       }
+}
+
+static uint32_t _all_match(struct dm_report *rh, const char *field, size_t flen)
+{
+       uint32_t report_types = 0;
+       unsigned unprefixed_all_matched = 0;
+
+       if (!strncasecmp(field, "all", 3) && flen == 3) {
+               /* If there's no report prefix, match all report types */
+               if (!(flen = strlen(rh->field_prefix)))
+                       return rh->report_types ? : REPORT_TYPES_ALL;
+
+               /* otherwise include all fields beginning with the report prefix. */
+               unprefixed_all_matched = 1;
+               field = rh->field_prefix;
+               report_types = rh->report_types;
+       }
+
+       /* Combine all report types that have a matching prefix. */
+       _all_match_combine(rh->types, unprefixed_all_matched, field, flen, &report_types);
+
+       return report_types;
+}
+
+/*
+ * Add all fields with a matching type.
+ */
+static int _add_all_fields(struct dm_report *rh, uint32_t type)
+{
+       uint32_t f;
+
+       for (f = 0; rh->fields[f].report_fn; f++)
+               if ((rh->fields[f].type & type) && !_add_field(rh, f, 0, 0))
+                       return 0;
+
+       return 1;
+}
+
+static int _get_field(struct dm_report *rh, const char *field, size_t flen,
+                     uint32_t *f_ret, int *implicit)
+{
+       char field_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
+       uint32_t f;
+
+       if (!flen)
+               return 0;
+
+       if (!_get_canonical_field_name(field, flen, field_canon, sizeof(field_canon), NULL))
+               return_0;
+
+       for (f = 0; _implicit_report_fields[f].report_fn; f++) {
+               if (_is_same_field(_implicit_report_fields[f].id, field_canon, rh->field_prefix)) {
+                       *f_ret = f;
+                       *implicit = 1;
+                       return 1;
+               }
+       }
+
+       for (f = 0; rh->fields[f].report_fn; f++) {
+               if (_is_same_field(rh->canonical_field_ids[f], field_canon, rh->field_prefix)) {
+                       *f_ret = f;
+                       *implicit = 0;
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+static int _field_match(struct dm_report *rh, const char *field, size_t flen,
+                       unsigned report_type_only)
+{
+       uint32_t f, type;
+       int implicit;
+
+       if (!flen)
+               return 0;
+
+       if ((_get_field(rh, field, flen, &f, &implicit))) {
+               if (report_type_only) {
+                       rh->report_types |= implicit ? _implicit_report_fields[f].type
+                                                    : rh->fields[f].type;
+                       return 1;
+               }
+
+               return _add_field(rh, f, implicit, 0) ? 1 : 0;
+       }
+
+       if ((type = _all_match(rh, field, flen))) {
+               if (report_type_only) {
+                       rh->report_types |= type;
+                       return 1;
+               }
+
+               return  _add_all_fields(rh, type);
+       }
+
+       return 0;
+}
+
+static int _add_sort_key(struct dm_report *rh, uint32_t field_num, int implicit,
+                        uint32_t flags, unsigned report_type_only)
+{
+       struct field_properties *fp, *found = NULL;
+       const struct dm_report_field_type *fields = implicit ? _implicit_report_fields
+                                                            : rh->fields;
+
+       dm_list_iterate_items(fp, &rh->field_props) {
+               if ((fp->implicit == implicit) && (fp->field_num == field_num)) {
+                       found = fp;
+                       break;
+               }
+       }
+
+       if (!found) {
+               if (report_type_only)
+                       rh->report_types |= fields[field_num].type;
+               else if (!(found = _add_field(rh, field_num, implicit, FLD_HIDDEN)))
+                       return_0;
+       }
+
+       if (report_type_only)
+               return 1;
+
+       if (found->flags & FLD_SORT_KEY) {
+               log_warn("dm_report: Ignoring duplicate sort field: %s.",
+                        fields[field_num].id);
+               return 1;
+       }
+
+       found->flags |= FLD_SORT_KEY;
+       found->sort_posn = rh->keys_count++;
+       found->flags |= flags;
+
+       return 1;
+}
+
+static int _key_match(struct dm_report *rh, const char *key, size_t len,
+                     unsigned report_type_only)
+{
+       char key_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
+       uint32_t f;
+       uint32_t flags;
+
+       if (!len)
+               return 0;
+
+       if (*key == '+') {
+               key++;
+               len--;
+               flags = FLD_ASCENDING;
+       } else if (*key == '-') {
+               key++;
+               len--;
+               flags = FLD_DESCENDING;
+       } else
+               flags = FLD_ASCENDING;
+
+       if (!len) {
+               log_error("dm_report: Missing sort field name");
+               return 0;
+       }
+
+       if (!_get_canonical_field_name(key, len, key_canon, sizeof(key_canon), NULL))
+               return_0;
+
+       for (f = 0; _implicit_report_fields[f].report_fn; f++)
+               if (_is_same_field(_implicit_report_fields[f].id, key_canon, rh->field_prefix))
+                       return _add_sort_key(rh, f, 1, flags, report_type_only);
+
+       for (f = 0; rh->fields[f].report_fn; f++)
+               if (_is_same_field(rh->canonical_field_ids[f], key_canon, rh->field_prefix))
+                       return _add_sort_key(rh, f, 0, flags, report_type_only);
+
+       return 0;
+}
+
+static int _parse_fields(struct dm_report *rh, const char *format,
+                        unsigned report_type_only)
+{
+       const char *ws;         /* Word start */
+       const char *we = format;        /* Word end */
+
+       while (*we) {
+               /* Allow consecutive commas */
+               while (*we && *we == ',')
+                       we++;
+
+               /* start of the field name */
+               ws = we;
+               while (*we && *we != ',')
+                       we++;
+
+               if (!_field_match(rh, ws, (size_t) (we - ws), report_type_only)) {
+                       _display_fields(rh, 1, 0);
+                       log_warn(" ");
+                       log_error("Unrecognised field: %.*s", (int) (we - ws), ws);
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
+static int _parse_keys(struct dm_report *rh, const char *keys,
+                      unsigned report_type_only)
+{
+       const char *ws;         /* Word start */
+       const char *we = keys;  /* Word end */
+
+       if (!keys)
+               return 1;
+
+       while (*we) {
+               /* Allow consecutive commas */
+               while (*we && *we == ',')
+                       we++;
+               ws = we;
+               while (*we && *we != ',')
+                       we++;
+               if (!_key_match(rh, ws, (size_t) (we - ws), report_type_only)) {
+                       _display_fields(rh, 1, 0);
+                       log_warn(" ");
+                       log_error("dm_report: Unrecognised field: %.*s", (int) (we - ws), ws);
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
+static int _contains_reserved_report_type(const struct dm_report_object_type *types)
+{
+       const struct dm_report_object_type *type, *implicit_type;
+
+       for (implicit_type = _implicit_report_types; implicit_type->data_fn; implicit_type++) {
+               for (type = types; type->data_fn; type++) {
+                       if (implicit_type->id & type->id) {
+                               log_error(INTERNAL_ERROR "dm_report_init: definition of report "
+                                         "types given contains reserved identifier");
+                               return 1;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static void _dm_report_init_update_types(struct dm_report *rh, uint32_t *report_types)
+{
+       const struct dm_report_object_type *type;
+
+       if (!report_types)
+               return;
+
+       *report_types = rh->report_types;
+       /*
+        * Do not include implicit types as these are not understood by
+        * dm_report_init caller - the caller doesn't know how to check
+        * these types anyway.
+        */
+       for (type = _implicit_report_types; type->data_fn; type++)
+               *report_types &= ~type->id;
+}
+
+static int _help_requested(struct dm_report *rh)
+{
+       struct field_properties *fp;
+
+       dm_list_iterate_items(fp, &rh->field_props) {
+               if (fp->implicit &&
+                   (!strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_HELP_ID) ||
+                    !strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_HELP_ALT_ID)))
+                       return 1;
+       }
+
+       return 0;
+}
+
+static int _canonicalize_field_ids(struct dm_report *rh)
+{
+       size_t registered_field_count = 0, i;
+       char canonical_field[DM_REPORT_FIELD_TYPE_ID_LEN];
+       char *canonical_field_dup;
+       int differs;
+
+       while (*rh->fields[registered_field_count].id)
+               registered_field_count++;
+
+       if (!(rh->canonical_field_ids = dm_pool_alloc(rh->mem, registered_field_count * sizeof(const char *)))) {
+               log_error("_canonicalize_field_ids: dm_pool_alloc failed");
+               return 0;
+       }
+
+       for (i = 0; i < registered_field_count; i++) {
+               if (!_get_canonical_field_name(rh->fields[i].id, strlen(rh->fields[i].id),
+                                              canonical_field, sizeof(canonical_field), &differs))
+                       return_0;
+
+               if (differs) {
+                       if (!(canonical_field_dup = dm_pool_strdup(rh->mem, canonical_field))) {
+                               log_error("_canonicalize_field_dup: dm_pool_alloc failed.");
+                               return 0;
+                       }
+                       rh->canonical_field_ids[i] = canonical_field_dup;
+               } else
+                       rh->canonical_field_ids[i] = rh->fields[i].id;
+       }
+
+       return 1;
+}
+
+struct dm_report *dm_report_init(uint32_t *report_types,
+                                const struct dm_report_object_type *types,
+                                const struct dm_report_field_type *fields,
+                                const char *output_fields,
+                                const char *output_separator,
+                                uint32_t output_flags,
+                                const char *sort_keys,
+                                void *private_data)
+{
+       struct dm_report *rh;
+       const struct dm_report_object_type *type;
+
+       if (_contains_reserved_report_type(types))
+               return_NULL;
+
+       if (!(rh = dm_zalloc(sizeof(*rh)))) {
+               log_error("dm_report_init: dm_malloc failed");
+               return NULL;
+       }
+
+       /*
+        * rh->report_types is updated in _parse_fields() and _parse_keys()
+        * to contain all types corresponding to the fields specified by
+        * fields or keys.
+        */
+       if (report_types)
+               rh->report_types = *report_types;
+
+       rh->separator = output_separator;
+       rh->fields = fields;
+       rh->types = types;
+       rh->private = private_data;
+
+       rh->flags |= output_flags & DM_REPORT_OUTPUT_MASK;
+
+       /* With columns_as_rows we must buffer and not align. */
+       if (output_flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS) {
+               if (!(output_flags & DM_REPORT_OUTPUT_BUFFERED))
+                       rh->flags |= DM_REPORT_OUTPUT_BUFFERED;
+               if (output_flags & DM_REPORT_OUTPUT_ALIGNED)
+                       rh->flags &= ~DM_REPORT_OUTPUT_ALIGNED;
+       }
+
+       if (output_flags & DM_REPORT_OUTPUT_BUFFERED)
+               rh->flags |= RH_SORT_REQUIRED;
+
+       rh->flags |= RH_FIELD_CALC_NEEDED;
+
+       dm_list_init(&rh->field_props);
+       dm_list_init(&rh->rows);
+
+       if ((type = _find_type(rh, rh->report_types)) && type->prefix)
+               rh->field_prefix = type->prefix;
+       else
+               rh->field_prefix = "";
+
+       if (!(rh->mem = dm_pool_create("report", 10 * 1024))) {
+               log_error("dm_report_init: allocation of memory pool failed");
+               dm_free(rh);
+               return NULL;
+       }
+
+       if (!_canonicalize_field_ids(rh)) {
+               dm_report_free(rh);
+               return NULL;
+       }
+
+       /*
+        * To keep the code needed to add the "all" field to a minimum, we parse
+        * the field lists twice.  The first time we only update the report type.
+        * FIXME Use one pass instead and expand the "all" field afterwards.
+        */
+       if (!_parse_fields(rh, output_fields, 1) ||
+           !_parse_keys(rh, sort_keys, 1)) {
+               dm_report_free(rh);
+               return NULL;
+       }
+
+       /* Generate list of fields for output based on format string & flags */
+       if (!_parse_fields(rh, output_fields, 0) ||
+           !_parse_keys(rh, sort_keys, 0)) {
+               dm_report_free(rh);
+               return NULL;
+       }
+
+       /*
+        * Return updated types value for further compatility check by caller.
+        */
+       _dm_report_init_update_types(rh, report_types);
+
+       if (_help_requested(rh)) {
+               _display_fields(rh, 1, 0);
+               log_warn(" ");
+               rh->flags |= RH_ALREADY_REPORTED;
+       }
+
+       return rh;
+}
+
+void dm_report_free(struct dm_report *rh)
+{
+       if (rh->selection)
+               dm_pool_destroy(rh->selection->mem);
+       if (rh->value_cache)
+               dm_hash_destroy(rh->value_cache);
+       dm_pool_destroy(rh->mem);
+       dm_free(rh);
+}
+
+static char *_toupperstr(char *str)
+{
+       char *u = str;
+
+       do
+               *u = toupper(*u);
+       while (*u++);
+
+       return str;
+}
+
+int dm_report_set_output_field_name_prefix(struct dm_report *rh, const char *output_field_name_prefix)
+{
+       char *prefix;
+
+       if (!(prefix = dm_pool_strdup(rh->mem, output_field_name_prefix))) {
+               log_error("dm_report_set_output_field_name_prefix: dm_pool_strdup failed");
+               return 0;
+       }
+
+       rh->output_field_name_prefix = _toupperstr(prefix);
+       
+       return 1;
+}
+
+/*
+ * Create a row of data for an object
+ */
+static void *_report_get_field_data(struct dm_report *rh,
+                                   struct field_properties *fp, void *object)
+{
+       const struct dm_report_field_type *fields = fp->implicit ? _implicit_report_fields
+                                                                : rh->fields;
+
+       char *ret = fp->type->data_fn(object);
+
+       if (!ret)
+               return NULL;
+
+       return (void *)(ret + fields[fp->field_num].offset);
+}
+
+static void *_report_get_implicit_field_data(struct dm_report *rh __attribute__((unused)),
+                                            struct field_properties *fp, struct row *row)
+{
+       if (!strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_SELECTED_ID))
+               return row;
+
+       return NULL;
+}
+
+static int _dbl_equal(double d1, double d2)
+{
+       return fabs(d1 - d2) < DBL_EPSILON;
+}
+
+static int _dbl_greater(double d1, double d2)
+{
+       return (d1 > d2) && !_dbl_equal(d1, d2);
+}
+
+static int _dbl_less(double d1, double d2)
+{
+       return (d1 < d2) && !_dbl_equal(d1, d2);
+}
+
+static int _dbl_greater_or_equal(double d1, double d2)
+{
+       return _dbl_greater(d1, d2) || _dbl_equal(d1, d2);
+}
+
+static int _dbl_less_or_equal(double d1, double d2)
+{
+       return _dbl_less(d1, d2) || _dbl_equal(d1, d2);
+}
+
+#define _uint64 *(const uint64_t *)
+#define _uint64arr(var,index) ((const uint64_t *)(var))[(index)]
+#define _str (const char *)
+#define _dbl *(const double *)
+#define _dblarr(var,index) ((const double *)(var))[(index)]
+
+static int _do_check_value_is_strictly_reserved(unsigned type, const void *res_val, int res_range,
+                                               const void *val, struct field_selection *fs)
+{
+       int sel_range = fs ? fs->value->next != NULL : 0;
+
+       switch (type & DM_REPORT_FIELD_TYPE_MASK) {
+               case DM_REPORT_FIELD_TYPE_NUMBER:
+                       if (res_range && sel_range) {
+                               /* both reserved value and selection value are ranges */
+                               if (((_uint64 val >= _uint64arr(res_val,0)) && (_uint64 val <= _uint64arr(res_val,1))) ||
+                                   (fs && ((fs->value->v.i == _uint64arr(res_val,0)) && (fs->value->next->v.i == _uint64arr(res_val,1)))))
+                                       return 1;
+                       } else if (res_range) {
+                               /* only reserved value is a range */
+                               if (((_uint64 val >= _uint64arr(res_val,0)) && (_uint64 val <= _uint64arr(res_val,1))) ||
+                                   (fs && ((fs->value->v.i >= _uint64arr(res_val,0)) && (fs->value->v.i <= _uint64arr(res_val,1)))))
+                                       return 1;
+                       } else if (sel_range) {
+                               /* only selection value is a range */
+                               if (((_uint64 val >= _uint64 res_val) && (_uint64 val <= _uint64 res_val)) ||
+                                   (fs && ((fs->value->v.i >= _uint64 res_val) && (fs->value->next->v.i <= _uint64 res_val))))
+                                       return 1;
+                       } else {
+                               /* neither selection value nor reserved value is a range */
+                               if ((_uint64 val == _uint64 res_val) ||
+                                   (fs && (fs->value->v.i == _uint64 res_val)))
+                                       return 1;
+                       }
+                       break;
+
+               case DM_REPORT_FIELD_TYPE_STRING:
+                       /* there are no ranges for string type yet */
+                       if ((!strcmp(_str val, _str res_val)) ||
+                           (fs && (!strcmp(fs->value->v.s, _str res_val))))
+                               return 1;
+                       break;
+
+               case DM_REPORT_FIELD_TYPE_SIZE:
+                       if (res_range && sel_range) {
+                               /* both reserved value and selection value are ranges */
+                               if ((_dbl_greater_or_equal(_dbl val, _dblarr(res_val,0)) && _dbl_less_or_equal(_dbl val, _dblarr(res_val,1))) ||
+                                   (fs && (_dbl_equal(fs->value->v.d, _dblarr(res_val,0)) && (_dbl_equal(fs->value->next->v.d, _dblarr(res_val,1))))))
+                                       return 1;
+                       } else if (res_range) {
+                               /* only reserved value is a range */
+                               if ((_dbl_greater_or_equal(_dbl val, _dblarr(res_val,0)) && _dbl_less_or_equal(_dbl val, _dblarr(res_val,1))) ||
+                                   (fs && (_dbl_greater_or_equal(fs->value->v.d, _dblarr(res_val,0)) && _dbl_less_or_equal(fs->value->v.d, _dblarr(res_val,1)))))
+                                       return 1;
+                       } else if (sel_range) {
+                               /* only selection value is a range */
+                               if ((_dbl_greater_or_equal(_dbl val, _dbl res_val) && (_dbl_less_or_equal(_dbl val, _dbl res_val))) ||
+                                   (fs && (_dbl_greater_or_equal(fs->value->v.d, _dbl res_val) && _dbl_less_or_equal(fs->value->next->v.d, _dbl res_val))))
+                                       return 1;
+                       } else {
+                               /* neither selection value nor reserved value is a range */
+                               if ((_dbl_equal(_dbl val, _dbl res_val)) ||
+                                   (fs && (_dbl_equal(fs->value->v.d, _dbl res_val))))
+                                       return 1;
+                       }
+                       break;
+
+               case DM_REPORT_FIELD_TYPE_STRING_LIST:
+                       /* FIXME Add comparison for string list */
+                       break;
+               case DM_REPORT_FIELD_TYPE_TIME:
+                       /* FIXME Add comparison for time */
+                       break;
+       }
+
+       return 0;
+}
+
+/*
+ * Used to check whether a value of certain type used in selection is reserved.
+ */
+static int _check_value_is_strictly_reserved(struct dm_report *rh, uint32_t field_num, unsigned type,
+                                            const void *val, struct field_selection *fs)
+{
+       const struct dm_report_reserved_value *iter = rh->reserved_values;
+       const struct dm_report_field_reserved_value *frv;
+       int res_range;
+
+       if (!iter)
+               return 0;
+
+       while (iter->value) {
+               /* Only check strict reserved values, not the weaker form ("named" reserved value). */
+               if (!(iter->type & DM_REPORT_FIELD_RESERVED_VALUE_NAMED)) {
+                       res_range = iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE;
+                       if ((iter->type & DM_REPORT_FIELD_TYPE_MASK) == DM_REPORT_FIELD_TYPE_NONE) {
+                               frv = (const struct dm_report_field_reserved_value *) iter->value;
+                               if (frv->field_num == field_num && _do_check_value_is_strictly_reserved(type, frv->value, res_range, val, fs))
+                                       return 1;
+                       } else if (iter->type & type && _do_check_value_is_strictly_reserved(type, iter->value, res_range, val, fs))
+                               return 1;
+               }
+               iter++;
+       }
+
+       return 0;
+}
+
+static int _cmp_field_int(struct dm_report *rh, uint32_t field_num, const char *field_id,
+                         uint64_t val, struct field_selection *fs)
+{
+       int range = fs->value->next != NULL;
+       const uint64_t sel1 = fs->value->v.i;
+       const uint64_t sel2 = range ? fs->value->next->v.i : 0;
+
+       switch(fs->flags & FLD_CMP_MASK) {
+               case FLD_CMP_EQUAL:
+                       return range ? ((val >= sel1) && (val <= sel2)) : val == sel1;
+
+               case FLD_CMP_NOT|FLD_CMP_EQUAL:
+                       return range ? !((val >= sel1) && (val <= sel2)) : val != sel1;
+
+               case FLD_CMP_NUMBER|FLD_CMP_GT:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+                               return 0;
+                       return range ? val > sel2 : val > sel1;
+
+               case FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+                               return 0;
+                       return val >= sel1;
+
+               case FLD_CMP_NUMBER|FLD_CMP_LT:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+                               return 0;
+                       return val < sel1;
+
+               case FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+                               return 0;
+                       return range ? val <= sel2 : val <= sel1;
+
+               default:
+                       log_error(INTERNAL_ERROR "_cmp_field_int: unsupported number "
+                                 "comparison type for field %s", field_id);
+       }
+
+       return 0;
+}
+
+static int _cmp_field_double(struct dm_report *rh, uint32_t field_num, const char *field_id,
+                            double val, struct field_selection *fs)
+{
+       int range = fs->value->next != NULL;
+       double sel1 = fs->value->v.d;
+       double sel2 = range ? fs->value->next->v.d : 0;
+
+       switch(fs->flags & FLD_CMP_MASK) {
+               case FLD_CMP_EQUAL:
+                       return range ? (_dbl_greater_or_equal(val, sel1) && _dbl_less_or_equal(val, sel2))
+                                    : _dbl_equal(val, sel1);
+
+               case FLD_CMP_NOT|FLD_CMP_EQUAL:
+                       return range ? !(_dbl_greater_or_equal(val, sel1) && _dbl_less_or_equal(val, sel2))
+                                    : !_dbl_equal(val, sel1);
+
+               case FLD_CMP_NUMBER|FLD_CMP_GT:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+                               return 0;
+                       return range ? _dbl_greater(val, sel2)
+                                    : _dbl_greater(val, sel1);
+
+               case FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+                               return 0;
+                       return _dbl_greater_or_equal(val, sel1);
+
+               case FLD_CMP_NUMBER|FLD_CMP_LT:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+                               return 0;
+                       return _dbl_less(val, sel1);
+
+               case FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+                               return 0;
+                       return range ? _dbl_less_or_equal(val, sel2) : _dbl_less_or_equal(val, sel1); 
+
+               default:
+                       log_error(INTERNAL_ERROR "_cmp_field_double: unsupported number "
+                                 "comparison type for selection field %s", field_id);
+       }
+
+       return 0;
+}
+
+static int _cmp_field_string(struct dm_report *rh __attribute__((unused)),
+                            uint32_t field_num, const char *field_id,
+                            const char *val, struct field_selection *fs)
+{
+       const char *sel = fs->value->v.s;
+
+       switch (fs->flags & FLD_CMP_MASK) {
+               case FLD_CMP_EQUAL:
+                       return !strcmp(val, sel);
+               case FLD_CMP_NOT|FLD_CMP_EQUAL:
+                       return strcmp(val, sel);
+               default:
+                       log_error(INTERNAL_ERROR "_cmp_field_string: unsupported string "
+                                 "comparison type for selection field %s", field_id);
+       }
+
+       return 0;
+}
+
+static int _cmp_field_time(struct dm_report *rh,
+                          uint32_t field_num, const char *field_id,
+                          time_t val, struct field_selection *fs)
+{
+       int range = fs->value->next != NULL;
+       time_t sel1 = fs->value->v.t;
+       time_t sel2 = range ? fs->value->next->v.t : 0;
+
+       switch(fs->flags & FLD_CMP_MASK) {
+               case FLD_CMP_EQUAL:
+                       return range ? ((val >= sel1) && (val <= sel2)) : val == sel1;
+               case FLD_CMP_NOT|FLD_CMP_EQUAL:
+                       return range ? ((val >= sel1) && (val <= sel2)) : val != sel1;
+               case FLD_CMP_TIME|FLD_CMP_GT:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+                               return 0;
+                       return range ? val > sel2 : val > sel1;
+               case FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+                               return 0;
+                       return val >= sel1;
+               case FLD_CMP_TIME|FLD_CMP_LT:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+                               return 0;
+                       return val < sel1;
+               case FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL:
+                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+                               return 0;
+                       return range ? val <= sel2 : val <= sel1;
+               default:
+                       log_error(INTERNAL_ERROR "_cmp_field_time: unsupported time "
+                                 "comparison type for field %s", field_id);
+       }
+
+       return 0;
+}
+
+/* Matches if all items from selection string list match list value strictly 1:1. */
+static int _cmp_field_string_list_strict_all(const struct str_list_sort_value *val,
+                                            const struct selection_str_list *sel)
+{
+       unsigned int sel_list_size = dm_list_size(&sel->str_list.list);
+       struct dm_str_list *sel_item;
+       unsigned int i = 1;
+
+       if (!val->items[0].len) {
+               if (sel_list_size == 1) {
+                       /* match blank string list with selection defined as blank string only */
+                       sel_item = dm_list_item(dm_list_first(&sel->str_list.list), struct dm_str_list);
+                       return !strcmp(sel_item->str, "");
+               }
+               return 0;
+       }
+
+       /* if item count differs, it's clear the lists do not match */
+       if (val->items[0].len != sel_list_size)
+               return 0;
+
+       /* both lists are sorted so they either match 1:1 or not */
+       dm_list_iterate_items(sel_item, &sel->str_list.list) {
+               if ((strlen(sel_item->str) != val->items[i].len) ||
+                   strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len))
+                       return 0;
+               i++;
+       }
+
+       return 1;
+}
+
+/* Matches if all items from selection string list match a subset of list value. */
+static int _cmp_field_string_list_subset_all(const struct str_list_sort_value *val,
+                                            const struct selection_str_list *sel)
+{
+       unsigned int sel_list_size = dm_list_size(&sel->str_list.list);
+       struct dm_str_list *sel_item;
+       unsigned int i, last_found = 1;
+       int r = 0;
+
+       if (!val->items[0].len) {
+               if (sel_list_size == 1) {
+                       /* match blank string list with selection defined as blank string only */
+                       sel_item = dm_list_item(dm_list_first(&sel->str_list.list), struct dm_str_list);
+                       return !strcmp(sel_item->str, "");
+               }
+               return 0;
+       }
+
+       /* check selection is a subset of the value */
+       dm_list_iterate_items(sel_item, &sel->str_list.list) {
+               r = 0;
+               for (i = last_found; i <= val->items[0].len; i++) {
+                       if ((strlen(sel_item->str) == val->items[i].len) &&
+                           !strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len)) {
+                               last_found = i;
+                               r = 1;
+                       }
+               }
+               if (!r)
+                       break;
+       }
+
+       return r;
+}
+
+/* Matches if any item from selection string list matches list value. */
+static int _cmp_field_string_list_any(const struct str_list_sort_value *val,
+                                     const struct selection_str_list *sel)
+{
+       struct dm_str_list *sel_item;
+       unsigned int i;
+
+       /* match blank string list with selection that contains blank string */
+       if (!val->items[0].len) {
+               dm_list_iterate_items(sel_item, &sel->str_list.list) {
+                       if (!strcmp(sel_item->str, ""))
+                               return 1;
+               }
+               return 0;
+       }
+
+       dm_list_iterate_items(sel_item, &sel->str_list.list) {
+               /*
+                * TODO: Optimize this so we don't need to compare the whole lists' content.
+                *       Make use of the fact that the lists are sorted!
+                */
+               for (i = 1; i <= val->items[0].len; i++) {
+                       if ((strlen(sel_item->str) == val->items[i].len) &&
+                           !strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len))
+                               return 1;
+               }
+       }
+
+       return 0;
+}
+
+static int _cmp_field_string_list(struct dm_report *rh __attribute__((unused)),
+                                 uint32_t field_num, const char *field_id,
+                                 const struct str_list_sort_value *val,
+                                 struct field_selection *fs)
+{
+       const struct selection_str_list *sel = fs->value->v.l;
+       int subset, r;
+
+       switch (sel->type & SEL_LIST_MASK) {
+               case SEL_LIST_LS:
+                       subset = 0;
+                       break;
+               case SEL_LIST_SUBSET_LS:
+                       subset = 1;
+                       break;
+               default:
+                       log_error(INTERNAL_ERROR "_cmp_field_string_list: unknown list type");
+                       return 0;
+       }
+
+       switch (sel->type & SEL_MASK) {
+               case SEL_AND:
+                       r = subset ? _cmp_field_string_list_subset_all(val, sel)
+                                  : _cmp_field_string_list_strict_all(val, sel);
+                       break;
+               case SEL_OR:
+                       r = _cmp_field_string_list_any(val, sel);
+                       break;
+               default:
+                       log_error(INTERNAL_ERROR "_cmp_field_string_list: unsupported string "
+                                 "list type found, expecting either AND or OR list for "
+                                 "selection field %s", field_id);
+                       return 0;
+       }
+
+       return fs->flags & FLD_CMP_NOT ? !r : r;
+}
+
+static int _cmp_field_regex(const char *s, struct field_selection *fs)
+{
+       int match = dm_regex_match(fs->value->v.r, s) >= 0;
+       return fs->flags & FLD_CMP_NOT ? !match : match;
+}
+
+static int _compare_selection_field(struct dm_report *rh,
+                                   struct dm_report_field *f,
+                                   struct field_selection *fs)
+{
+       const struct dm_report_field_type *fields = f->props->implicit ? _implicit_report_fields
+                                                                      : rh->fields;
+       const char *field_id = fields[f->props->field_num].id;
+       int r = 0;
+
+       if (!f->sort_value) {
+               log_error("_compare_selection_field: field without value :%d",
+                         f->props->field_num);
+               return 0;
+       }
+
+       if (fs->flags & FLD_CMP_REGEX)
+               r = _cmp_field_regex((const char *) f->sort_value, fs);
+       else {
+               switch(f->props->flags & DM_REPORT_FIELD_TYPE_MASK) {
+                       case DM_REPORT_FIELD_TYPE_PERCENT:
+                               /*
+                                * Check against real percent values only.
+                                * That means DM_PERCENT_0 <= percent <= DM_PERCENT_100.
+                                */
+                               if (*(const uint64_t *) f->sort_value > DM_PERCENT_100)
+                                       return 0;
+                               /* fall through */
+                       case DM_REPORT_FIELD_TYPE_NUMBER:
+                               r = _cmp_field_int(rh, f->props->field_num, field_id, *(const uint64_t *) f->sort_value, fs);
+                               break;
+                       case DM_REPORT_FIELD_TYPE_SIZE:
+                               r = _cmp_field_double(rh, f->props->field_num, field_id, *(const double *) f->sort_value, fs);
+                               break;
+                       case DM_REPORT_FIELD_TYPE_STRING:
+                               r = _cmp_field_string(rh, f->props->field_num, field_id, (const char *) f->sort_value, fs);
+                               break;
+                       case DM_REPORT_FIELD_TYPE_STRING_LIST:
+                               r = _cmp_field_string_list(rh, f->props->field_num, field_id, (const struct str_list_sort_value *) f->sort_value, fs);
+                               break;
+                       case DM_REPORT_FIELD_TYPE_TIME:
+                               r = _cmp_field_time(rh, f->props->field_num, field_id, *(const time_t *) f->sort_value, fs);
+                               break;
+                       default:
+                               log_error(INTERNAL_ERROR "_compare_selection_field: unknown field type for field %s", field_id);
+               }
+       }
+
+       return r;
+}
+
+static int _check_selection(struct dm_report *rh, struct selection_node *sn,
+                           struct dm_list *fields)
+{
+       int r;
+       struct selection_node *iter_n;
+       struct dm_report_field *f;
+
+       switch (sn->type & SEL_MASK) {
+               case SEL_ITEM:
+                       r = 1;
+                       dm_list_iterate_items(f, fields) {
+                               if (sn->selection.item->fp != f->props)
+                                       continue;
+                               if (!_compare_selection_field(rh, f, sn->selection.item))
+                                       r = 0;
+                       }
+                       break;
+               case SEL_OR:
+                       r = 0;
+                       dm_list_iterate_items(iter_n, &sn->selection.set)
+                               if ((r |= _check_selection(rh, iter_n, fields)))
+                                       break;
+                       break;
+               case SEL_AND:
+                       r = 1;
+                       dm_list_iterate_items(iter_n, &sn->selection.set)
+                               if (!(r &= _check_selection(rh, iter_n, fields)))
+                                       break;
+                       break;
+               default:
+                       log_error("Unsupported selection type");
+                       return 0;
+       }
+
+       return (sn->type & SEL_MODIFIER_NOT) ? !r : r;
+}
+
+static int _check_report_selection(struct dm_report *rh, struct dm_list *fields)
+{
+       if (!rh->selection || !rh->selection->selection_root)
+               return 1;
+
+       return _check_selection(rh, rh->selection->selection_root, fields);
+}
+
+static int _do_report_object(struct dm_report *rh, void *object, int do_output, int *selected)
+{
+       const struct dm_report_field_type *fields;
+       struct field_properties *fp;
+       struct row *row = NULL;
+       struct dm_report_field *field;
+       void *data = NULL;
+       int r = 0;
+
+       if (!rh) {
+               log_error(INTERNAL_ERROR "_do_report_object: dm_report handler is NULL.");
+               return 0;
+       }
+
+       if (!do_output && !selected) {
+               log_error(INTERNAL_ERROR "_do_report_object: output not requested and "
+                                        "selected output variable is NULL too.");
+               return 0;
+       }
+
+       if (rh->flags & RH_ALREADY_REPORTED)
+               return 1;
+
+       if (!(row = dm_pool_zalloc(rh->mem, sizeof(*row)))) {
+               log_error("_do_report_object: struct row allocation failed");
+               return 0;
+       }
+
+       if (!rh->first_row)
+               rh->first_row = row;
+
+       row->rh = rh;
+
+       if ((rh->flags & RH_SORT_REQUIRED) &&
+           !(row->sort_fields =
+               dm_pool_zalloc(rh->mem, sizeof(struct dm_report_field *) *
+                              rh->keys_count))) {
+               log_error("_do_report_object: "
+                         "row sort value structure allocation failed");
+               goto out;
+       }
+
+       dm_list_init(&row->fields);
+       row->selected = 1;
+
+       /* For each field to be displayed, call its report_fn */
+       dm_list_iterate_items(fp, &rh->field_props) {
+               if (!(field = dm_pool_zalloc(rh->mem, sizeof(*field)))) {
+                       log_error("_do_report_object: "
+                                 "struct dm_report_field allocation failed");
+                       goto out;
+               }
+
+               if (fp->implicit) {
+                       fields = _implicit_report_fields;
+                       if (!strcmp(fields[fp->field_num].id, SPECIAL_FIELD_SELECTED_ID))
+                               row->field_sel_status = field;
+               } else
+                       fields = rh->fields;
+
+               field->props = fp;
+
+               data = fp->implicit ? _report_get_implicit_field_data(rh, fp, row)
+                                   : _report_get_field_data(rh, fp, object);
+               if (!data) {
+                       log_error("_do_report_object: "
+                                 "no data assigned to field %s",
+                                 fields[fp->field_num].id);
+                       goto out;
+               }
+
+               if (!fields[fp->field_num].report_fn(rh, rh->mem,
+                                                        field, data,
+                                                        rh->private)) {
+                       log_error("_do_report_object: "
+                                 "report function failed for field %s",
+                                 fields[fp->field_num].id);
+                       goto out;
+               }
+
+               dm_list_add(&row->fields, &field->list);
+       }
+
+       r = 1;
+
+       if (!_check_report_selection(rh, &row->fields)) {
+               row->selected = 0;
+
+               /*
+                * If the row is not selected, we still keep it for output if either:
+                *   - we're displaying special "selected" field in the row,
+                *   - or the report is supposed to be on output multiple times
+                *     where each output can have a new selection defined.
+                */
+               if (!row->field_sel_status && !(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+                       goto out;
+
+               if (row->field_sel_status) {
+                       /*
+                        * If field with id "selected" is reported,
+                        * report the row although it does not pass
+                        * the selection criteria.
+                        * The "selected" field reports the result
+                        * of the selection.
+                        */
+                       _implicit_report_fields[row->field_sel_status->props->field_num].report_fn(rh,
+                                                       rh->mem, row->field_sel_status, row, rh->private);
+                       /*
+                        * If the "selected" field is not displayed, e.g.
+                        * because it is part of the sort field list,
+                        * skip the display of the row as usual unless
+                        * we plan to do the output multiple times.
+                        */
+                       if ((row->field_sel_status->props->flags & FLD_HIDDEN) &&
+                           !(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+                               goto out;
+               }
+       }
+
+       if (!do_output)
+               goto out;
+
+       dm_list_add(&rh->rows, &row->list);
+
+       if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED))
+               return dm_report_output(rh);
+out:
+       if (selected)
+               *selected = row->selected;
+       if (!do_output || !r)
+               dm_pool_free(rh->mem, row);
+       return r;
+}
+
+static int _do_report_compact_fields(struct dm_report *rh, int global)
+{
+       struct dm_report_field *field;
+       struct field_properties *fp;
+       struct row *row;
+
+       if (!rh) {
+               log_error("dm_report_enable_compact_output: dm report handler is NULL.");
+               return 0;
+       }
+
+       if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED) ||
+             dm_list_empty(&rh->rows))
+               return 1;
+
+       /*
+        * At first, mark all fields with FLD_HIDDEN flag.
+        * Also, mark field with FLD_COMPACTED flag, but only
+        * the ones that didn't have FLD_HIDDEN set before.
+        * This prevents losing the original FLD_HIDDEN flag
+        * in next step...
+        */
+       dm_list_iterate_items(fp, &rh->field_props) {
+               if (fp->flags & FLD_HIDDEN)
+                       continue;
+               if (global || (fp->flags & FLD_COMPACT_ONE))
+                       fp->flags |= (FLD_COMPACTED | FLD_HIDDEN);
+       }
+
+       /*
+        * ...check each field in a row and if its report value
+        * is not empty, drop the FLD_COMPACTED and FLD_HIDDEN
+        * flag if FLD_COMPACTED flag is set. It's important
+        * to keep FLD_HIDDEN flag for the fields that were
+        * already marked with FLD_HIDDEN before - these don't
+        * have FLD_COMPACTED set - check this condition!
+        */
+       dm_list_iterate_items(row, &rh->rows) {
+               dm_list_iterate_items(field, &row->fields) {
+                       if ((field->report_string && *field->report_string) &&
+                            field->props->flags & FLD_COMPACTED)
+                                       field->props->flags &= ~(FLD_COMPACTED | FLD_HIDDEN);
+                       }
+       }
+
+       /*
+        * The fields left with FLD_COMPACTED and FLD_HIDDEN flag are
+        * the ones which have blank value in all rows. The FLD_HIDDEN
+        * will cause such field to not be reported on output at all.
+        */
+
+       return 1;
+}
+
+int dm_report_compact_fields(struct dm_report *rh)
+{
+       return _do_report_compact_fields(rh, 1);
+}
+
+static int _field_to_compact_match(struct dm_report *rh, const char *field, size_t flen)
+{
+       struct field_properties *fp;
+       uint32_t f;
+       int implicit;
+
+       if ((_get_field(rh, field, flen, &f, &implicit))) {
+               dm_list_iterate_items(fp, &rh->field_props) {
+                       if ((fp->implicit == implicit) && (fp->field_num == f)) {
+                               fp->flags |= FLD_COMPACT_ONE;
+                               break;
+                       }
+               }
+               return 1;
+       }
+
+       return 0;
+}
+
+static int _parse_fields_to_compact(struct dm_report *rh, const char *fields)
+{
+       const char *ws;           /* Word start */
+       const char *we = fields;  /* Word end */
+
+       if (!fields)
+               return 1;
+
+       while (*we) {
+               while (*we && *we == ',')
+                       we++;
+               ws = we;
+               while (*we && *we != ',')
+                       we++;
+               if (!_field_to_compact_match(rh, ws, (size_t) (we - ws))) {
+                       log_error("dm_report: Unrecognized field: %.*s", (int) (we - ws), ws);
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
+int dm_report_compact_given_fields(struct dm_report *rh, const char *fields)
+{
+       if (!_parse_fields_to_compact(rh, fields))
+               return_0;
+
+       return _do_report_compact_fields(rh, 0);
+}
+
+int dm_report_object(struct dm_report *rh, void *object)
+{
+       return _do_report_object(rh, object, 1, NULL);
+}
+
+int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected)
+{
+       return _do_report_object(rh, object, do_output, selected);
+}
+
+/*
+ * Selection parsing
+ */
+
+/*
+ * Other tokens (FIELD, VALUE, STRING, NUMBER, REGEX)
+ *     FIELD := <strings of alphabet, number and '_'>
+ *     VALUE := NUMBER | STRING
+ *     REGEX := <strings quoted by '"', '\'', '(', '{', '[' or unquoted>
+ *     NUMBER := <strings of [0-9]> (because sort_value is unsigned)
+ *     STRING := <strings quoted by '"', '\'' or unquoted>
+ */
+
+static const char * _skip_space(const char *s)
+{
+       while (*s && isspace(*s))
+               s++;
+       return s;
+}
+
+static int _tok_op(struct op_def *t, const char *s, const char **end,
+                  uint32_t expect)
+{
+       size_t len;
+
+       s = _skip_space(s);
+
+       for (; t->string; t++) {
+               if (expect && !(t->flags & expect))
+                       continue;
+
+               len = strlen(t->string);
+               if (!strncmp(s, t->string, len)) {
+                       if (end)
+                               *end = s + len;
+                       return t->flags;
+               }
+       }
+
+       if (end)
+               *end = s;
+       return 0;
+}
+
+static int _tok_op_log(const char *s, const char **end, uint32_t expect)
+{
+       return _tok_op(_op_log, s, end, expect);
+}
+
+static int _tok_op_cmp(const char *s, const char **end)
+{
+       return _tok_op(_op_cmp, s, end, 0);
+}
+
+static char _get_and_skip_quote_char(char const **s)
+{
+       char c = 0;
+
+       if (**s == '"' || **s == '\'') {
+               c = **s;
+               (*s)++;
+       }
+
+       return c;
+}
+
+ /*
+  *
+  * Input:
+  *   s             - a pointer to the parsed string
+  * Output:
+  *   begin         - a pointer to the beginning of the token
+  *   end           - a pointer to the end of the token + 1
+  *                   or undefined if return value is NULL
+  *   return value  - a starting point of the next parsing or
+  *                   NULL if 's' doesn't match with token type
+  *                   (the parsing should be terminated)
+  */
+static const char *_tok_value_number(const char *s,
+                                    const char **begin, const char **end)
+
+{
+       int is_float = 0;
+
+       *begin = s;
+       while ((!is_float && (*s == '.') && ++is_float) || isdigit(*s))
+               s++;
+       *end = s;
+
+       if (*begin == *end)
+               return NULL;
+
+       return s;
+}
+
+/*
+ * Input:
+ *   s               - a pointer to the parsed string
+ *   endchar         - terminating character
+ *   end_op_flags    - terminating operator flags (see _op_log)
+ *                     (if endchar is non-zero then endflags is ignored)
+ * Output:
+ *   begin           - a pointer to the beginning of the token
+ *   end             - a pointer to the end of the token + 1
+ *   end_op_flag_hit - the flag from endflags hit during parsing
+ *   return value    - a starting point of the next parsing
+ */
+static const char *_tok_value_string(const char *s,
+                                    const char **begin, const char **end,
+                                    const char endchar, uint32_t end_op_flags,
+                                    uint32_t *end_op_flag_hit)
+{
+       uint32_t flag_hit = 0;
+
+       *begin = s;
+
+       /*
+        * If endchar is defined, scan the string till
+        * the endchar or the end of string is hit.
+        * This is in case the string is quoted and we
+        * know exact character that is the stopper.
+        */
+       if (endchar) {
+               while (*s && *s != endchar)
+                       s++;
+               if (*s != endchar) {
+                       log_error("Missing end quote.");
+                       return NULL;
+               }
+               *end = s;
+               s++;
+       } else {
+               /*
+                * If endchar is not defined then endchar is/are the
+                * operator/s as defined by 'endflags' arg or space char.
+                * This is in case the string is not quoted and
+                * we don't know which character is the exact stopper.
+                */
+               while (*s) {
+                       if ((flag_hit = _tok_op(_op_log, s, NULL, end_op_flags)) || *s == ' ')
+                               break;
+                       s++;
+               }
+               *end = s;
+               /*
+                * If we hit one of the strings as defined by 'endflags'
+                * and if 'endflag_hit' arg is provided, save the exact
+                * string flag that was hit.
+                */
+               if (end_op_flag_hit)
+                       *end_op_flag_hit = flag_hit;
+       }
+
+       return s;
+}
+
+static const char *_reserved_name(struct dm_report *rh,
+                                 const struct dm_report_reserved_value *reserved,
+                                 const struct dm_report_field_reserved_value *frv,
+                                 uint32_t field_num, const char *s, size_t len)
+{
+       dm_report_reserved_handler handler;
+       const char *canonical_name;
+       const char **name;
+       char *tmp_s;
+       char c;
+       int r;
+
+       name = reserved->names;
+       while (*name) {
+               if ((strlen(*name) == len) && !strncmp(*name, s, len))
+                       return *name;
+               name++;
+       }
+
+       if (reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES) {
+               handler = (dm_report_reserved_handler) (frv ? frv->value : reserved->value);
+               c = s[len];
+               tmp_s = (char *) s;
+               tmp_s[len] = '\0';
+               if ((r = handler(rh, rh->selection->mem, field_num,
+                                DM_REPORT_RESERVED_PARSE_FUZZY_NAME,
+                                tmp_s, (const void **) &canonical_name)) <= 0) {
+                       if (r == -1)
+                               log_error(INTERNAL_ERROR "%s reserved value handler for field %s has missing "
+                                         "implementation of DM_REPORT_RESERVED_PARSE_FUZZY_NAME action",
+                                         (reserved->type & DM_REPORT_FIELD_TYPE_MASK) ? "type-specific" : "field-specific",
+                                          rh->fields[field_num].id);
+                       else
+                               log_error("Error occured while processing %s reserved value handler for field %s",
+                                         (reserved->type & DM_REPORT_FIELD_TYPE_MASK) ? "type-specific" : "field-specific",
+                                          rh->fields[field_num].id);
+               }
+               tmp_s[len] = c;
+               if (r && canonical_name)
+                       return canonical_name;
+       }
+
+       return NULL;
+}
+
+/*
+ * Used to replace a string representation of the reserved value
+ * found in selection with the exact reserved value of certain type.
+ */
+static const char *_get_reserved(struct dm_report *rh, unsigned type,
+                                uint32_t field_num, int implicit,
+                                const char *s, const char **begin, const char **end,
+                                struct reserved_value_wrapper *rvw)
+{
+       const struct dm_report_reserved_value *iter = implicit ? NULL : rh->reserved_values;
+       const struct dm_report_field_reserved_value *frv;
+       const char *tmp_begin, *tmp_end, *tmp_s = s;
+       const char *name = NULL;
+       char c;
+
+       rvw->reserved = NULL;
+
+       if (!iter)
+               return s;
+
+       c = _get_and_skip_quote_char(&tmp_s);
+       if (!(tmp_s = _tok_value_string(tmp_s, &tmp_begin, &tmp_end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL)))
+               return s;
+
+       while (iter->value) {
+               if (!(iter->type & DM_REPORT_FIELD_TYPE_MASK)) {
+                       /* DM_REPORT_FIELD_TYPE_NONE - per-field reserved value */
+                       frv = (const struct dm_report_field_reserved_value *) iter->value;
+                       if ((frv->field_num == field_num) && (name = _reserved_name(rh, iter, frv, field_num,
+                                                                                   tmp_begin, tmp_end - tmp_begin)))
+                               break;
+               } else if (iter->type & type) {
+                       /* DM_REPORT_FIELD_TYPE_* - per-type reserved value */
+                       if ((name = _reserved_name(rh, iter, NULL, field_num,
+                                                  tmp_begin, tmp_end - tmp_begin)))
+                               break;
+               }
+               iter++;
+       }
+
+       if (name) {
+               /* found! */
+               *begin = tmp_begin;
+               *end = tmp_end;
+               s = tmp_s;
+               rvw->reserved = iter;
+               rvw->matched_name = name;
+       }
+
+       return s;
+}
+
+float dm_percent_to_float(dm_percent_t percent)
+{
+       /* Add 0.f to prevent returning -0.00 */
+       return (float) percent / DM_PERCENT_1 + 0.f;
+}
+
+float dm_percent_to_round_float(dm_percent_t percent, unsigned digits)
+{
+       static const float power10[] = {
+               1.f, .1f, .01f, .001f, .0001f, .00001f, .000001f,
+               .0000001f, .00000001f, .000000001f,
+               .0000000001f
+       };
+       float r;
+       float f = dm_percent_to_float(percent);
+
+       if (digits >= DM_ARRAY_SIZE(power10))
+               digits = DM_ARRAY_SIZE(power10) - 1; /* no better precision */
+
+       r = DM_PERCENT_1 * power10[digits];
+
+       if ((percent < r) && (percent > DM_PERCENT_0))
+               f = power10[digits];
+       else if ((percent > (DM_PERCENT_100 - r)) && (percent < DM_PERCENT_100))
+               f = (float) (DM_PERCENT_100 - r) / DM_PERCENT_1;
+
+       return f;
+}
+
+dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator)
+{
+       dm_percent_t percent;
+
+       if (!denominator)
+               return DM_PERCENT_100; /* FIXME? */
+       if (!numerator)
+               return DM_PERCENT_0;
+       if (numerator == denominator)
+               return DM_PERCENT_100;
+       switch (percent = DM_PERCENT_100 * ((double) numerator / (double) denominator)) {
+               case DM_PERCENT_100:
+                       return DM_PERCENT_100 - 1;
+               case DM_PERCENT_0:
+                       return DM_PERCENT_0 + 1;
+               default:
+                       return percent;
+       }
+}
+
+int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data)
+{
+       if (!rh->value_cache && (!(rh->value_cache = dm_hash_create(64)))) {
+               log_error("Failed to create cache for values used during reporting.");
+               return 0;
+       }
+
+       return dm_hash_insert(rh->value_cache, name, (void *) data);
+}
+
+const void *dm_report_value_cache_get(struct dm_report *rh, const char *name)
+{
+       return (rh->value_cache) ? dm_hash_lookup(rh->value_cache, name) : NULL;
+}
+
+/*
+ * Used to check whether the reserved_values definition passed to
+ * dm_report_init_with_selection contains only supported reserved value types.
+ */
+static int _check_reserved_values_supported(const struct dm_report_field_type fields[],
+                                           const struct dm_report_reserved_value reserved_values[])
+{
+       const struct dm_report_reserved_value *iter;
+       const struct dm_report_field_reserved_value *field_res;
+       const struct dm_report_field_type *field;
+       static uint32_t supported_reserved_types = DM_REPORT_FIELD_TYPE_NUMBER |
+                                                  DM_REPORT_FIELD_TYPE_SIZE |
+                                                  DM_REPORT_FIELD_TYPE_PERCENT |
+                                                  DM_REPORT_FIELD_TYPE_STRING |
+                                                  DM_REPORT_FIELD_TYPE_TIME;
+       static uint32_t supported_reserved_types_with_range = DM_REPORT_FIELD_RESERVED_VALUE_RANGE |
+                                                             DM_REPORT_FIELD_TYPE_NUMBER |
+                                                             DM_REPORT_FIELD_TYPE_SIZE |
+                                                             DM_REPORT_FIELD_TYPE_PERCENT |
+                                                             DM_REPORT_FIELD_TYPE_TIME;
+
+
+       if (!reserved_values)
+               return 1;
+
+       iter = reserved_values;
+
+       while (iter->value) {
+               if (iter->type & DM_REPORT_FIELD_TYPE_MASK) {
+                       if (!(iter->type & supported_reserved_types) ||
+                           ((iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) &&
+                            !(iter->type & supported_reserved_types_with_range))) {
+                               log_error(INTERNAL_ERROR "_check_reserved_values_supported: "
+                                         "global reserved value for type 0x%x not supported",
+                                          iter->type);
+                               return 0;
+                       }
+               } else {
+                       field_res = (const struct dm_report_field_reserved_value *) iter->value;
+                       field = &fields[field_res->field_num];
+                       if (!(field->flags & supported_reserved_types) ||
+                           ((iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) &&
+                            !(iter->type & supported_reserved_types_with_range))) {
+                               log_error(INTERNAL_ERROR "_check_reserved_values_supported: "
+                                         "field-specific reserved value of type 0x%x for "
+                                         "field %s not supported",
+                                          field->flags & DM_REPORT_FIELD_TYPE_MASK, field->id);
+                               return 0;
+                       }
+               }
+               iter++;
+       }
+
+       return 1;
+}
+
+/*
+ * Input:
+ *   ft              - field type for which the value is parsed
+ *   s               - a pointer to the parsed string
+ * Output:
+ *   begin           - a pointer to the beginning of the token
+ *   end             - a pointer to the end of the token + 1
+ *   flags           - parsing flags
+ */
+static const char *_tok_value_regex(struct dm_report *rh,
+                                   const struct dm_report_field_type *ft,
+                                   const char *s, const char **begin,
+                                   const char **end, uint32_t *flags,
+                                   struct reserved_value_wrapper *rvw)
+{
+       char c;
+       rvw->reserved = NULL;
+
+       s = _skip_space(s);
+
+       if (!*s) {
+               log_error("Regular expression expected for selection field %s", ft->id);
+               return NULL;
+       }
+
+       switch (*s) {
+               case '(': c = ')'; break;
+               case '{': c = '}'; break;
+               case '[': c = ']'; break;
+               case '"': /* fall through */
+               case '\'': c = *s; break;
+               default:  c = 0;
+       }
+
+       if (!(s = _tok_value_string(c ? s + 1 : s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) {
+               log_error("Failed to parse regex value for selection field %s.", ft->id);
+               return NULL;
+       }
+
+       *flags |= DM_REPORT_FIELD_TYPE_STRING;
+       return s;
+}
+
+static int _str_list_item_cmp(const void *a, const void *b)
+{
+       const struct dm_str_list * const *item_a = (const struct dm_str_list * const *) a;
+       const struct dm_str_list * const *item_b = (const struct dm_str_list * const *) b;
+
+       return strcmp((*item_a)->str, (*item_b)->str);
+}
+
+static int _add_item_to_string_list(struct dm_pool *mem, const char *begin,
+                                   const char *end, struct dm_list *list)
+{
+       struct dm_str_list *item;
+
+       if (!(item = dm_pool_zalloc(mem, sizeof(*item))) ||
+           !(item->str = begin == end ? "" : dm_pool_strndup(mem, begin, end - begin))) {
+               log_error("_add_item_to_string_list: memory allocation failed for string list item");
+               return 0;
+       }
+       dm_list_add(list, &item->list);
+
+       return 1;
+}
+
+/*
+ * Input:
+ *   ft              - field type for which the value is parsed
+ *   mem             - memory pool to allocate from
+ *   s               - a pointer to the parsed string
+ * Output:
+ *   begin           - a pointer to the beginning of the token (whole list)
+ *   end             - a pointer to the end of the token + 1 (whole list)
+ *   sel_str_list    - the list of strings parsed
+ */
+static const char *_tok_value_string_list(const struct dm_report_field_type *ft,
+                                         struct dm_pool *mem, const char *s,
+                                         const char **begin, const char **end,
+                                         struct selection_str_list **sel_str_list)
+{
+       static const char _str_list_item_parsing_failed[] = "Failed to parse string list value "
+                                                           "for selection field %s.";
+       struct selection_str_list *ssl = NULL;
+       struct dm_str_list *item;
+       const char *begin_item = NULL, *end_item = NULL, *tmp;
+       uint32_t op_flags, end_op_flag_expected, end_op_flag_hit = 0;
+       struct dm_str_list **arr;
+       size_t list_size;
+       unsigned int i;
+       int list_end = 0;
+       char c;
+
+       if (!(ssl = dm_pool_alloc(mem, sizeof(*ssl)))) {
+               log_error("_tok_value_string_list: memory allocation failed for selection list");
+               goto bad;
+       }
+       dm_list_init(&ssl->str_list.list);
+       ssl->type = 0;
+       *begin = s;
+
+       if (!(op_flags = _tok_op_log(s, &tmp, SEL_LIST_LS | SEL_LIST_SUBSET_LS))) {
+               /* Only one item - SEL_LIST_{SUBSET_}LS and SEL_LIST_{SUBSET_}LE not used */
+               c = _get_and_skip_quote_char(&s);
+               if (!(s = _tok_value_string(s, &begin_item, &end_item, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) {
+                       log_error(_str_list_item_parsing_failed, ft->id);
+                       goto bad;
+               }
+               if (!_add_item_to_string_list(mem, begin_item, end_item, &ssl->str_list.list))
+                       goto_bad;
+               ssl->type = SEL_OR | SEL_LIST_LS;
+               goto out;
+       }
+
+       /* More than one item - items enclosed in SEL_LIST_LS and SEL_LIST_LE
+        * or SEL_LIST_SUBSET_LS and SEL_LIST_SUBSET_LE.
+        * Each element is terminated by AND or OR operator or 'list end'.
+        * The first operator hit is then the one allowed for the whole list,
+        * no mixing allowed!
+        */
+
+       /* Are we using [] or {} for the list? */
+       end_op_flag_expected = (op_flags == SEL_LIST_LS) ? SEL_LIST_LE : SEL_LIST_SUBSET_LE;
+
+       op_flags = SEL_LIST_LE | SEL_LIST_SUBSET_LE | SEL_AND | SEL_OR;
+       s++;
+       while (*s) {
+               s = _skip_space(s);
+               c = _get_and_skip_quote_char(&s);
+               if (!(s = _tok_value_string(s, &begin_item, &end_item, c, op_flags, NULL))) {
+                       log_error(_str_list_item_parsing_failed, ft->id);
+                       goto bad;
+               }
+               s = _skip_space(s);
+
+               if (!(end_op_flag_hit = _tok_op_log(s, &tmp, op_flags))) {
+                       log_error("Invalid operator in selection list.");
+                       goto bad;
+               }
+
+               if (end_op_flag_hit & (SEL_LIST_LE | SEL_LIST_SUBSET_LE)) {
+                       list_end = 1;
+                       if (end_op_flag_hit != end_op_flag_expected) {
+                               for (i = 0; _op_log[i].string; i++)
+                                       if (_op_log[i].flags == end_op_flag_expected)
+                                               break;
+                               log_error("List ended with incorrect character, "
+                                         "expecting \'%s\'.", _op_log[i].string);
+                               goto bad;
+                       }
+               }
+
+               if (ssl->type) {
+                       if (!list_end && !(ssl->type & end_op_flag_hit)) {
+                               log_error("Only one type of logical operator allowed "
+                                         "in selection list at a time.");
+                               goto bad;
+                       }
+               } else {
+                       if (list_end)
+                               ssl->type = end_op_flag_expected == SEL_LIST_LE ? SEL_AND : SEL_OR;
+                       else
+                               ssl->type = end_op_flag_hit;
+               }
+
+               if (!_add_item_to_string_list(mem, begin_item, end_item, &ssl->str_list.list))
+                       goto_bad;
+
+               s = tmp;
+
+               if (list_end)
+                       break;
+       }
+
+       if (!(end_op_flag_hit & (SEL_LIST_LE | SEL_LIST_SUBSET_LE))) {
+               log_error("Missing list end for selection field %s", ft->id);
+               goto bad;
+       }
+
+       /* Store information whether [] or {} was used. */
+       if (end_op_flag_expected == SEL_LIST_LE)
+               ssl->type |= SEL_LIST_LS;
+       else
+               ssl->type |= SEL_LIST_SUBSET_LS;
+
+       /* Sort the list. */
+       if (!(list_size = dm_list_size(&ssl->str_list.list))) {
+               log_error(INTERNAL_ERROR "_tok_value_string_list: list has no items");
+               goto bad;
+       } else if (list_size == 1)
+               goto out;
+       if (!(arr = dm_malloc(sizeof(item) * list_size))) {
+               log_error("_tok_value_string_list: memory allocation failed for sort array");
+               goto bad;
+       }
+
+       i = 0;
+       dm_list_iterate_items(item, &ssl->str_list.list)
+               arr[i++] = item;
+       qsort(arr, list_size, sizeof(item), _str_list_item_cmp);
+       dm_list_init(&ssl->str_list.list);
+       for (i = 0; i < list_size; i++)
+               dm_list_add(&ssl->str_list.list, &arr[i]->list);
+
+       dm_free(arr);
+out:
+       *end = s;
+        if (sel_str_list)
+               *sel_str_list = ssl;
+
+       return s;
+bad:
+       *end = s;
+       if (ssl)
+               dm_pool_free(mem, ssl);
+        if (sel_str_list)
+               *sel_str_list = NULL;
+       return s;
+}
+
+struct time_value {
+       int range;
+       time_t t1;
+       time_t t2;
+};
+
+static const char *_out_of_range_msg = "Field selection value %s out of supported range for field %s.";
+
+/*
+ * Standard formatted date and time - ISO8601.
+ *
+ * date time timezone
+ *
+ * date:
+ * YYYY-MM-DD (or shortly YYYYMMDD)
+ * YYYY-MM (shortly YYYYMM), auto DD=1
+ * YYYY, auto MM=01 and DD=01
+ *
+ * time:
+ * hh:mm:ss (or shortly hhmmss)
+ * hh:mm (or shortly hhmm), auto ss=0
+ * hh (or shortly hh), auto mm=0, auto ss=0
+ *
+ * timezone:
+ * +hh:mm or -hh:mm (or shortly +hhmm or -hhmm)
+ * +hh or -hh
+*/
+
+#define DELIM_DATE '-'
+#define DELIM_TIME ':'
+
+static int _days_in_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+static int _is_leap_year(long year)
+{
+       return (((year % 4==0) && (year % 100 != 0)) || (year % 400 == 0));
+}
+
+static int _get_days_in_month(long month, long year)
+{
+       return (month == 2 && _is_leap_year(year)) ? _days_in_month[month-1] + 1
+                                                  : _days_in_month[month-1];
+}
+
+typedef enum {
+       RANGE_NONE,
+       RANGE_SECOND,
+       RANGE_MINUTE,
+       RANGE_HOUR,
+       RANGE_DAY,
+       RANGE_MONTH,
+       RANGE_YEAR
+} time_range_t;
+
+static char *_get_date(char *str, struct tm *tm, time_range_t *range)
+{
+       static const char incorrect_date_format_msg[] = "Incorrect date format.";
+       time_range_t tmp_range = RANGE_NONE;
+       long n1, n2 = -1, n3 = -1;
+       char *s = str, *end;
+       size_t len = 0;
+
+       if (!isdigit(*s))
+               /* we need a year at least */
+               return NULL;
+
+       n1 = strtol(s, &end, 10);
+       if (*end == DELIM_DATE) {
+               len += (4 - (end - s)); /* diff in length from standard YYYY */
+               s = end + 1;
+               if (isdigit(*s)) {
+                       n2 = strtol(s, &end, 10);
+                       len += (2 - (end - s)); /* diff in length from standard MM */
+                       if (*end == DELIM_DATE) {
+                               s = end + 1;
+                               n3 = strtol(s, &end, 10);
+                               len += (2 - (end - s)); /* diff in length from standard DD */
+                       }
+               }
+       }
+
+       len = len + end - str;
+
+       /* variations from standard YYYY-MM-DD */
+       if (n3 == -1) {
+               if (n2 == -1) {
+                       if (len == 4) {
+                               /* YYYY */
+                               tmp_range = RANGE_YEAR;
+                               n3 = n2 = 1;
+                       } else if (len == 6) {
+                               /* YYYYMM */
+                               tmp_range = RANGE_MONTH;
+                               n3 = 1;
+                               n2 = n1 % 100;
+                               n1 = n1 / 100;
+                       } else if (len == 8) {
+                               tmp_range = RANGE_DAY;
+                               /* YYYYMMDD */
+                               n3 = n1 % 100;
+                               n2 = (n1 / 100) % 100;
+                               n1 = n1 / 10000;
+                       } else {
+                               log_error(incorrect_date_format_msg);
+                               return NULL;
+                       }
+               } else {
+                       if (len == 7) {
+                               tmp_range = RANGE_MONTH;
+                               /* YYYY-MM */
+                               n3 = 1;
+                       } else {
+                               log_error(incorrect_date_format_msg);
+                               return NULL;
+                       }
+               }
+       }
+
+       if (n2 < 1 || n2 > 12) {
+               log_error("Specified month out of range.");
+               return NULL;
+       }
+
+       if (n3 < 1 || n3 > _get_days_in_month(n2, n1)) {
+               log_error("Specified day out of range.");
+               return NULL;
+       }
+
+       if (tmp_range == RANGE_NONE)
+               tmp_range = RANGE_DAY;
+
+       tm->tm_year = n1 - 1900;
+       tm->tm_mon = n2 - 1;
+       tm->tm_mday = n3;
+       *range = tmp_range;
+
+       return (char *) _skip_space(end);
+}
+
+static char *_get_time(char *str, struct tm *tm, time_range_t *range)
+{
+       static const char incorrect_time_format_msg[] = "Incorrect time format.";
+       time_range_t tmp_range = RANGE_NONE;
+       long n1, n2 = -1, n3 = -1;
+       char *s = str, *end;
+       size_t len = 0;
+
+       if (!isdigit(*s)) {
+               /* time is not compulsory */
+               tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
+               return (char *) _skip_space(s);
+       }
+
+       n1 = strtol(s, &end, 10);
+       if (*end == DELIM_TIME) {
+               len += (2 - (end - s)); /* diff in length from standard HH */
+               s = end + 1;
+               if (isdigit(*s)) {
+                       n2 = strtol(s, &end, 10);
+                       len += (2 - (end - s)); /* diff in length from standard MM */
+                       if (*end == DELIM_TIME) {
+                               s = end + 1;
+                               n3 = strtol(s, &end, 10);
+                               len += (2 - (end - s)); /* diff in length from standard SS */
+                       }
+               }
+       }
+
+       len = len + end - str;
+
+       /* variations from standard HH:MM:SS */
+       if (n3 == -1) {
+               if (n2 == -1) {
+                       if (len == 2) {
+                               /* HH */
+                               tmp_range = RANGE_HOUR;
+                               n3 = n2 = 0;
+                       } else if (len == 4) {
+                               /* HHMM */
+                               tmp_range = RANGE_MINUTE;
+                               n3 = 0;
+                               n2 = n1 % 100;
+                               n1 = n1 / 100;
+                       } else if (len == 6) {
+                               /* HHMMSS */
+                               tmp_range = RANGE_SECOND;
+                               n3 = n1 % 100;
+                               n2 = (n1 / 100) % 100;
+                               n1 = n1 / 10000;
+                       } else {
+                               log_error(incorrect_time_format_msg);
+                               return NULL;
+                       }
+               } else {
+                       if (len == 5) {
+                               /* HH:MM */
+                               tmp_range = RANGE_MINUTE;
+                               n3 = 0;
+                       } else {
+                               log_error(incorrect_time_format_msg);
+                               return NULL;
+                       }
+               }
+       }
+
+       if (n1 < 0 || n1 > 23) {
+               log_error("Specified hours out of range.");
+               return NULL;
+       }
+
+       if (n2 < 0 || n2 > 60) {
+               log_error("Specified minutes out of range.");
+               return NULL;
+       }
+
+       if (n3 < 0 || n3 > 60) {
+               log_error("Specified seconds out of range.");
+               return NULL;
+       }
+
+       /* Just time without exact date is incomplete! */
+       if (*range != RANGE_DAY) {
+               log_error("Full date specification needed.");
+               return NULL;
+       }
+
+       tm->tm_hour = n1;
+       tm->tm_min = n2;
+       tm->tm_sec = n3;
+       *range = tmp_range;
+
+       return (char *) _skip_space(end);
+}
+
+/* The offset is always an absolute offset against GMT! */
+static char *_get_tz(char *str, int *tz_supplied, int *offset)
+{
+       long n1, n2 = -1;
+       char *s = str, *end;
+       int sign = 1; /* +HH:MM by default */
+       size_t len = 0;
+
+       *tz_supplied = 0;
+       *offset = 0;
+
+       if (!isdigit(*s)) {
+               if (*s == '+')  {
+                       sign = 1;
+                       s = s + 1;
+               } else if (*s == '-') {
+                       sign = -1;
+                       s = s + 1;
+               } else
+                       return (char *) _skip_space(s);
+       }
+
+       n1 = strtol(s, &end, 10);
+       if (*end == DELIM_TIME) {
+               len = (2 - (end - s)); /* diff in length from standard HH */
+               s = end + 1;
+               if (isdigit(*s)) {
+                       n2 = strtol(s, &end, 10);
+                       len = (2 - (end - s)); /* diff in length from standard MM */
+               }
+       }
+
+       len = len + end - s;
+
+       /* variations from standard HH:MM */
+       if (n2 == -1) {
+               if (len == 2) {
+                       /* HH */
+                       n2 = 0;
+               } else if (len == 4) {
+                       /* HHMM */
+                       n2 = n1 % 100;
+                       n1 = n1 / 100;
+               } else
+                       return NULL;
+       }
+
+       if (n2 < 0 || n2 > 60)
+               return NULL;
+
+       if (n1 < 0 || n1 > 14)
+               return NULL;
+
+       /* timezone offset in seconds */
+       *offset = sign * ((n1 * 3600) + (n2 * 60));
+       *tz_supplied = 1;
+       return (char *) _skip_space(end);
+}
+
+static int _local_tz_offset(time_t t_local)
+{
+       struct tm tm_gmt;
+       time_t t_gmt;
+
+       gmtime_r(&t_local, &tm_gmt);
+       t_gmt = mktime(&tm_gmt);
+
+       /*
+        * gmtime returns time that is adjusted
+        * for DST.Subtract this adjustment back
+        * to give us proper *absolute* offset
+        * for our local timezone.
+        */
+       if (tm_gmt.tm_isdst)
+               t_gmt -= 3600;
+
+       return t_local - t_gmt;
+}
+
+static void _get_final_time(time_range_t range, struct tm *tm,
+                           int tz_supplied, int offset,
+                           struct time_value *tval)
+{
+
+       struct tm tm_up = *tm;
+
+       switch (range) {
+               case RANGE_SECOND:
+                       if (tm_up.tm_sec < 59) {
+                               tm_up.tm_sec += 1;
+                               break;
+                       }
+                       /* fall through */
+               case RANGE_MINUTE:
+                       if (tm_up.tm_min < 59) {
+                               tm_up.tm_min += 1;
+                               break;
+                       }
+                       /* fall through */
+               case RANGE_HOUR:
+                       if (tm_up.tm_hour < 23) {
+                               tm_up.tm_hour += 1;
+                               break;
+                       }
+                       /* fall through */
+               case RANGE_DAY:
+                       if (tm_up.tm_mday < _get_days_in_month(tm_up.tm_mon, tm_up.tm_year)) {
+                               tm_up.tm_mday += 1;
+                               break;
+                       }
+                       /* fall through */
+               case RANGE_MONTH:
+                       if (tm_up.tm_mon < 11) {
+                               tm_up.tm_mon += 1;
+                               break;
+                       }
+                       /* fall through */
+               case RANGE_YEAR:
+                       tm_up.tm_year += 1;
+                       break;
+               case RANGE_NONE:
+                       /* nothing to do here */
+                       break;
+       }
+
+       tval->range = (range != RANGE_NONE);
+       tval->t1 = mktime(tm);
+       tval->t2 = mktime(&tm_up) - 1;
+
+       if (tz_supplied) {
+               /*
+                * The 'offset' is with respect to the GMT.
+                * Calculate what the offset is with respect
+                * to our local timezone and adjust times
+                * so they represent time in our local timezone.
+                */
+               offset -= _local_tz_offset(tval->t1);
+               tval->t1 -= offset;
+               tval->t2 -= offset;
+       }
+}
+
+static int _parse_formatted_date_time(char *str, struct time_value *tval)
+{
+       time_range_t range = RANGE_NONE;
+       struct tm tm = {0};
+       int gmt_offset;
+       int tz_supplied;
+
+       tm.tm_year = tm.tm_mday = tm.tm_mon = -1;
+       tm.tm_hour = tm.tm_min = tm.tm_sec = -1;
+       tm.tm_isdst = tm.tm_wday = tm.tm_yday = -1;
+
+       if (!(str = _get_date(str, &tm, &range)))
+               return 0;
+
+       if (!(str = _get_time(str, &tm, &range)))
+               return 0;
+
+       if (!(str = _get_tz(str, &tz_supplied, &gmt_offset)))
+               return 0;
+
+       if (*str)
+               return 0;
+
+       _get_final_time(range, &tm, tz_supplied, gmt_offset, tval);
+
+       return 1;
+}
+
+static const char *_tok_value_time(const struct dm_report_field_type *ft,
+                                  struct dm_pool *mem, const char *s,
+                                  const char **begin, const char **end,
+                                  struct time_value *tval)
+{
+       char *time_str = NULL;
+       const char *r = NULL;
+       uint64_t t;
+       char c;
+
+       s = _skip_space(s);
+
+       if (*s == '@') {
+               /* Absolute time value in number of seconds since epoch. */
+               if (!(s = _tok_value_number(s+1, begin, end)))
+                       goto_out;
+
+               if (!(time_str = dm_pool_strndup(mem, *begin, *end - *begin))) {
+                       log_error("_tok_value_time: dm_pool_strndup failed");
+                       goto out;
+               }
+
+               errno = 0;
+               if (((t = strtoull(time_str, NULL, 10)) == ULLONG_MAX) && errno == ERANGE) {
+                       log_error(_out_of_range_msg, time_str, ft->id);
+                       goto out;
+               }
+
+               tval->range = 0;
+               tval->t1 = (time_t) t;
+               tval->t2 = 0;
+               r = s;
+       } else {
+               c = _get_and_skip_quote_char(&s);
+               if (!(s = _tok_value_string(s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL)))
+                       goto_out;
+
+               if (!(time_str = dm_pool_strndup(mem, *begin, *end - *begin))) {
+                       log_error("tok_value_time: dm_pool_strndup failed");
+                       goto out;
+               }
+
+               if (!_parse_formatted_date_time(time_str, tval))
+                       goto_out;
+               r = s;
+       }
+out:
+       if (time_str)
+               dm_pool_free(mem, time_str);
+       return r;
+}
+
+/*
+ * Input:
+ *   ft              - field type for which the value is parsed
+ *   s               - a pointer to the parsed string
+ *   mem             - memory pool to allocate from
+ * Output:
+ *   begin           - a pointer to the beginning of the token
+ *   end             - a pointer to the end of the token + 1
+ *   flags           - parsing flags
+ *   custom          - custom data specific to token type
+ *                     (e.g. size unit factor)
+ */
+static const char *_tok_value(struct dm_report *rh,
+                             const struct dm_report_field_type *ft,
+                             uint32_t field_num, int implicit,
+                             const char *s,
+                             const char **begin, const char **end,
+                             uint32_t *flags,
+                             struct reserved_value_wrapper *rvw,
+                             struct dm_pool *mem, void *custom)
+{
+       int expected_type = ft->flags & DM_REPORT_FIELD_TYPE_MASK;
+       struct selection_str_list **str_list;
+       struct time_value *tval;
+       uint64_t *factor;
+       const char *tmp;
+       char c;
+
+       s = _skip_space(s);
+
+       s = _get_reserved(rh, expected_type, field_num, implicit, s, begin, end, rvw);
+       if (rvw->reserved) {
+               /*
+                * FLD_CMP_NUMBER shares operators with FLD_CMP_TIME,
+                * so adjust flags here based on expected type.
+                */
+               if (expected_type == DM_REPORT_FIELD_TYPE_TIME)
+                       *flags &= ~FLD_CMP_NUMBER;
+               else if (expected_type == DM_REPORT_FIELD_TYPE_NUMBER)
+                       *flags &= ~FLD_CMP_TIME;
+               *flags |= expected_type;
+               return s;
+       }
+
+       switch (expected_type) {
+
+               case DM_REPORT_FIELD_TYPE_STRING:
+                       c = _get_and_skip_quote_char(&s);
+                       if (!(s = _tok_value_string(s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) {
+                               log_error("Failed to parse string value "
+                                         "for selection field %s.", ft->id);
+                               return NULL;
+                       }
+                       *flags |= DM_REPORT_FIELD_TYPE_STRING;
+                       break;
+
+               case DM_REPORT_FIELD_TYPE_STRING_LIST:
+                       if (!(str_list = (struct selection_str_list **) custom))
+                               goto_bad;
+
+                       s = _tok_value_string_list(ft, mem, s, begin, end, str_list);
+                       if (!(*str_list)) {
+                               log_error("Failed to parse string list value "
+                                         "for selection field %s.", ft->id);
+                               return NULL;
+                       }
+                       *flags |= DM_REPORT_FIELD_TYPE_STRING_LIST;
+                       break;
+
+               case DM_REPORT_FIELD_TYPE_NUMBER:
+                       /* fall through */
+               case DM_REPORT_FIELD_TYPE_SIZE:
+                       /* fall through */
+               case DM_REPORT_FIELD_TYPE_PERCENT:
+                       if (!(s = _tok_value_number(s, begin, end))) {
+                               log_error("Failed to parse numeric value "
+                                         "for selection field %s.", ft->id);
+                               return NULL;
+                       }
+
+                       if (*s == DM_PERCENT_CHAR) {
+                               s++;
+                               c = DM_PERCENT_CHAR;
+                               if (expected_type != DM_REPORT_FIELD_TYPE_PERCENT) {
+                                       log_error("Found percent value but %s value "
+                                                 "expected for selection field %s.",
+                                                 expected_type == DM_REPORT_FIELD_TYPE_NUMBER ?
+                                                       "numeric" : "size", ft->id);
+                                       return NULL;
+                               }
+                       } else {
+                               if (!(factor = (uint64_t *) custom))
+                                       goto_bad;
+
+                               if ((*factor = dm_units_to_factor(s, &c, 0, &tmp))) {
+                                       s = tmp;
+                                       if (expected_type != DM_REPORT_FIELD_TYPE_SIZE) {
+                                               log_error("Found size unit specifier "
+                                                         "but %s value expected for "
+                                                         "selection field %s.",
+                                                         expected_type == DM_REPORT_FIELD_TYPE_NUMBER ?
+                                                         "numeric" : "percent", ft->id);
+                                               return NULL;
+                                       }
+                               } else if (expected_type == DM_REPORT_FIELD_TYPE_SIZE) {
+                                       /*
+                                        * If size unit is not defined in the selection
+                                        * and the type expected is size, use use 'm'
+                                        * (1 MiB) for the unit by default. This is the
+                                        * same behaviour as seen in lvcreate -L <size>.
+                                        */
+                                       *factor = 1024*1024;
+                               }
+                       }
+
+                       *flags |= expected_type;
+                       /*
+                        * FLD_CMP_NUMBER shares operators with FLD_CMP_TIME,
+                        * but we have NUMBER here, so remove FLD_CMP_TIME.
+                        */
+                       *flags &= ~FLD_CMP_TIME;
+                       break;
+
+               case DM_REPORT_FIELD_TYPE_TIME:
+                       if (!(tval = (struct time_value *) custom))
+                               goto_bad;
+
+                       if (!(s = _tok_value_time(ft, mem, s, begin, end, tval))) {
+                               log_error("Failed to parse time value "
+                                         "for selection field %s.", ft->id);
+                               return NULL;
+                       }
+
+                       *flags |= DM_REPORT_FIELD_TYPE_TIME;
+                       /*
+                        * FLD_CMP_TIME shares operators with FLD_CMP_NUMBER,
+                        * but we have TIME here, so remove FLD_CMP_NUMBER.
+                        */
+                       *flags &= ~FLD_CMP_NUMBER;
+                       break;
+       }
+
+       return s;
+bad:
+       log_error(INTERNAL_ERROR "Forbidden NULL custom detected.");
+
+       return NULL;
+}
+
+/*
+ * Input:
+ *   s               - a pointer to the parsed string
+ * Output:
+ *   begin           - a pointer to the beginning of the token
+ *   end             - a pointer to the end of the token + 1
+ */
+static const char *_tok_field_name(const char *s,
+                                   const char **begin, const char **end)
+{
+       char c;
+       s = _skip_space(s);
+
+       *begin = s;
+       while ((c = *s) &&
+              (isalnum(c) || c == '_' || c == '-'))
+               s++;
+       *end = s;
+
+       if (*begin == *end)
+               return NULL;
+
+       return s;
+}
+
+static int _get_reserved_value(struct dm_report *rh, uint32_t field_num,
+                              struct reserved_value_wrapper *rvw)
+{
+       const void *tmp_value;
+       dm_report_reserved_handler handler;
+       int r;
+
+       if (!rvw->reserved) {
+               rvw->value = NULL;
+               return 1;
+       }
+
+       if (rvw->reserved->type & DM_REPORT_FIELD_TYPE_MASK)
+               /* type reserved value */
+               tmp_value = rvw->reserved->value;
+       else
+               /* per-field reserved value */
+               tmp_value = ((const struct dm_report_field_reserved_value *) rvw->reserved->value)->value;
+
+       if (rvw->reserved->type & (DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE | DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES)) {
+               handler = (dm_report_reserved_handler) tmp_value;
+               if ((r = handler(rh, rh->selection->mem, field_num,
+                                DM_REPORT_RESERVED_GET_DYNAMIC_VALUE,
+                                rvw->matched_name, &tmp_value)) <= 0) {
+                       if (r == -1)
+                               log_error(INTERNAL_ERROR "%s reserved value handler for field %s has missing"
+                                         "implementation of DM_REPORT_RESERVED_GET_DYNAMIC_VALUE action",
+                                         (rvw->reserved->type) & DM_REPORT_FIELD_TYPE_MASK ? "type-specific" : "field-specific",
+                                         rh->fields[field_num].id);
+                       else
+                               log_error("Error occured while processing %s reserved value handler for field %s",
+                                         (rvw->reserved->type) & DM_REPORT_FIELD_TYPE_MASK ? "type-specific" : "field-specific",
+                                         rh->fields[field_num].id);
+                       return 0;
+               }
+       }
+
+       rvw->value = tmp_value;
+       return 1;
+}
+
+static struct field_selection *_create_field_selection(struct dm_report *rh,
+                                                      uint32_t field_num,
+                                                      int implicit,
+                                                      const char *v,
+                                                      size_t len,
+                                                      uint32_t flags,
+                                                      struct reserved_value_wrapper *rvw,
+                                                      void *custom)
+{
+       static const char *_field_selection_value_alloc_failed_msg = "dm_report: struct field_selection_value allocation failed for selection field %s";
+       const struct dm_report_field_type *fields = implicit ? _implicit_report_fields
+                                                            : rh->fields;
+       struct field_properties *fp, *found = NULL;
+       struct field_selection *fs;
+       const char *field_id;
+       struct time_value *tval;
+       uint64_t factor;
+       char *s;
+
+       dm_list_iterate_items(fp, &rh->field_props) {
+               if ((fp->implicit == implicit) && (fp->field_num == field_num)) {
+                       found = fp;
+                       break;
+               }
+       }
+
+       /* The field is neither used in display options nor sort keys. */
+       if (!found) {
+               if (rh->selection->add_new_fields) {
+                       if (!(found = _add_field(rh, field_num, implicit, FLD_HIDDEN)))
+                               return NULL;
+                       rh->report_types |= fields[field_num].type;
+               } else {
+                       log_error("Unable to create selection with field \'%s\' "
+                                 "which is not included in current report.",
+                                 implicit ? _implicit_report_fields[field_num].id
+                                          : rh->fields[field_num].id);
+                       return NULL;
+               }
+       }
+
+       field_id = fields[found->field_num].id;
+
+       if (!(found->flags & flags & DM_REPORT_FIELD_TYPE_MASK)) {
+               log_error("dm_report: incompatible comparison "
+                         "type for selection field %s", field_id);
+               return NULL;
+       }
+
+       /* set up selection */
+       if (!(fs = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection)))) {
+               log_error("dm_report: struct field_selection "
+                         "allocation failed for selection field %s", field_id);
+               return NULL;
+       }
+
+       if (!(fs->value = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection_value)))) {
+               log_error(_field_selection_value_alloc_failed_msg, field_id);
+               goto error;
+       }
+
+       if (((rvw->reserved && (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)) ||
+            (((flags & DM_REPORT_FIELD_TYPE_MASK) == DM_REPORT_FIELD_TYPE_TIME) &&
+             custom && ((struct time_value *) custom)->range))
+                &&
+           !(fs->value->next = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection_value)))) {
+               log_error(_field_selection_value_alloc_failed_msg, field_id);
+               goto error;
+       }
+
+       fs->fp = found;
+       fs->flags = flags;
+
+       if (!_get_reserved_value(rh, field_num, rvw)) {
+               log_error("dm_report: could not get reserved value "
+                         "while processing selection field %s", field_id);
+               goto error;
+       }
+
+       /* store comparison operand */
+       if (flags & FLD_CMP_REGEX) {
+               /* REGEX */
+               if (!(s = dm_malloc(len + 1))) {
+                       log_error("dm_report: dm_malloc failed to store "
+                                 "regex value for selection field %s", field_id);
+                       goto error;
+               }
+               memcpy(s, v, len);
+               s[len] = '\0';
+
+               fs->value->v.r = dm_regex_create(rh->selection->mem, (const char * const *) &s, 1);
+               dm_free(s);
+               if (!fs->value->v.r) {
+                       log_error("dm_report: failed to create regex "
+                                 "matcher for selection field %s", field_id);
+                       goto error;
+               }
+       } else {
+               /* STRING, NUMBER, SIZE, PERCENT, STRING_LIST, TIME */
+               if (!(s = dm_pool_strndup(rh->selection->mem, v, len))) {
+                       log_error("dm_report: dm_pool_strndup for value "
+                                 "of selection field %s", field_id);
+                       goto error;
+               }
+
+               switch (flags & DM_REPORT_FIELD_TYPE_MASK) {
+                       case DM_REPORT_FIELD_TYPE_STRING:
+                               if (rvw->value) {
+                                       fs->value->v.s = (const char *) rvw->value;
+                                       if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+                                               fs->value->next->v.s = (((const char * const *) rvw->value)[1]);
+                                       dm_pool_free(rh->selection->mem, s);
+                               } else {
+                                       fs->value->v.s = s;
+                                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_STRING, fs->value->v.s, NULL)) {
+                                               log_error("String value %s found in selection is reserved.", fs->value->v.s);
+                                               goto error;
+                                       }
+                               }
+                               break;
+                       case DM_REPORT_FIELD_TYPE_NUMBER:
+                               if (rvw->value) {
+                                       fs->value->v.i = *(const uint64_t *) rvw->value;
+                                       if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+                                               fs->value->next->v.i = (((const uint64_t *) rvw->value)[1]);
+                               } else {
+                                       errno = 0;
+                                       if (((fs->value->v.i = strtoull(s, NULL, 10)) == ULLONG_MAX) &&
+                                                (errno == ERANGE)) {
+                                               log_error(_out_of_range_msg, s, field_id);
+                                               goto error;
+                                       }
+                                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &fs->value->v.i, NULL)) {
+                                               log_error("Numeric value %" PRIu64 " found in selection is reserved.", fs->value->v.i);
+                                               goto error;
+                                       }
+                               }
+                               dm_pool_free(rh->selection->mem, s);
+                               break;
+                       case DM_REPORT_FIELD_TYPE_SIZE:
+                               if (rvw->value) {
+                                       fs->value->v.d = *(const double *) rvw->value;
+                                       if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+                                               fs->value->next->v.d = (((const double *) rvw->value)[1]);
+                               } else {
+                                       errno = 0;
+                                       fs->value->v.d = strtod(s, NULL);
+                                       if (errno == ERANGE) {
+                                               log_error(_out_of_range_msg, s, field_id);
+                                               goto error;
+                                       }
+                                       if (custom && (factor = *((const uint64_t *)custom)))
+                                               fs->value->v.d *= factor;
+                                       fs->value->v.d /= 512; /* store size in sectors! */
+                                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &fs->value->v.d, NULL)) {
+                                               log_error("Size value %f found in selection is reserved.", fs->value->v.d);
+                                               goto error;
+                                       }
+                               }
+                               dm_pool_free(rh->selection->mem, s);
+                               break;
+                       case DM_REPORT_FIELD_TYPE_PERCENT:
+                               if (rvw->value) {
+                                       fs->value->v.i = *(const uint64_t *) rvw->value;
+                                       if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+                                               fs->value->next->v.i = (((const uint64_t *) rvw->value)[1]);
+                               } else {
+                                       errno = 0;
+                                       fs->value->v.d = strtod(s, NULL);
+                                       if ((errno == ERANGE) || (fs->value->v.d < 0) || (fs->value->v.d > 100)) {
+                                               log_error(_out_of_range_msg, s, field_id);
+                                               goto error;
+                                       }
+
+                                       fs->value->v.i = (dm_percent_t) (DM_PERCENT_1 * fs->value->v.d);
+
+                                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_PERCENT, &fs->value->v.i, NULL)) {
+                                               log_error("Percent value %s found in selection is reserved.", s);
+                                               goto error;
+                                       }
+                               }
+                               break;
+                       case DM_REPORT_FIELD_TYPE_STRING_LIST:
+                               if (!custom)
+                                        goto_bad;
+                               fs->value->v.l = *(struct selection_str_list **)custom;
+                               if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_STRING_LIST, fs->value->v.l, NULL)) {
+                                       log_error("String list value found in selection is reserved.");
+                                       goto error;
+                               }
+                               break;
+                       case DM_REPORT_FIELD_TYPE_TIME:
+                               if (rvw->value) {
+                                       fs->value->v.t = *(const time_t *) rvw->value;
+                                       if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+                                               fs->value->next->v.t = (((const time_t *) rvw->value)[1]);
+                               } else {
+                                       if (!(tval = (struct time_value *) custom))
+                                               goto_bad;
+                                       fs->value->v.t = tval->t1;
+                                       if (tval->range)
+                                               fs->value->next->v.t = tval->t2;
+                                       if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &fs->value->v.t, NULL)) {
+                                               log_error("Time value found in selection is reserved.");
+                                               goto error;
+                                       }
+                               }
+                               break;
+                       default:
+                               log_error(INTERNAL_ERROR "_create_field_selection: "
+                                         "unknown type of selection field %s", field_id);
+                               goto error;
+               }
+       }
+
+       return fs;
+bad:
+       log_error(INTERNAL_ERROR "Forbiden NULL custom detected.");
+error:
+       dm_pool_free(rh->selection->mem, fs);
+
+       return NULL;
+}
+
+static struct selection_node *_alloc_selection_node(struct dm_pool *mem, uint32_t type)
+{
+       struct selection_node *sn;
+
+       if (!(sn = dm_pool_zalloc(mem, sizeof(struct selection_node)))) {
+               log_error("dm_report: struct selection_node allocation failed");
+               return NULL;
+       }
+
+       dm_list_init(&sn->list);
+       sn->type = type;
+       if (!(type & SEL_ITEM))
+               dm_list_init(&sn->selection.set);
+
+       return sn;
+}
+
+static void _display_selection_help(struct dm_report *rh)
+{
+       static const char _grow_object_failed_msg[] = "_display_selection_help: dm_pool_grow_object failed";
+       struct op_def *t;
+       const struct dm_report_reserved_value *rv;
+       size_t len_all, len_final = 0;
+       const char **rvs;
+       char *rvs_all;
+
+       log_warn("Selection operands");
+       log_warn("------------------");
+       log_warn("  field               - Reporting field.");
+       log_warn("  number              - Non-negative integer value.");
+       log_warn("  size                - Floating point value with units, 'm' unit used by default if not specified.");
+       log_warn("  percent             - Non-negative integer with or without %% suffix.");
+       log_warn("  string              - Characters quoted by \' or \" or unquoted.");
+       log_warn("  string list         - Strings enclosed by [ ] or { } and elements delimited by either");
+       log_warn("                        \"all items must match\" or \"at least one item must match\" operator.");
+       log_warn("  regular expression  - Characters quoted by \' or \" or unquoted.");
+       log_warn(" ");
+       if (rh->reserved_values) {
+               log_warn("Reserved values");
+               log_warn("---------------");
+
+               for (rv = rh->reserved_values; rv->type; rv++) {
+                       for (len_all = 0, rvs = rv->names; *rvs; rvs++)
+                               len_all += strlen(*rvs) + 2;
+                       if (len_all > len_final)
+                               len_final = len_all;
+               }
+
+               for (rv = rh->reserved_values; rv->type; rv++) {
+                       if (!dm_pool_begin_object(rh->mem, 256)) {
+                               log_error("_display_selection_help: dm_pool_begin_object failed");
+                               break;
+                       }
+                       for (rvs = rv->names; *rvs; rvs++) {
+                               if (((rvs != rv->names) && !dm_pool_grow_object(rh->mem, ", ", 2)) ||
+                                   !dm_pool_grow_object(rh->mem, *rvs, strlen(*rvs))) {
+                                       log_error(_grow_object_failed_msg);
+                                       goto out_reserved_values;
+                               }
+                       }
+                       if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+                               log_error(_grow_object_failed_msg);
+                               goto out_reserved_values;
+                       }
+                       rvs_all = dm_pool_end_object(rh->mem);
+
+                       log_warn("  %-*s - %s [%s]", (int) len_final, rvs_all, rv->description,
+                                                    _get_field_type_name(rv->type));
+                       dm_pool_free(rh->mem, rvs_all);
+               }
+               log_warn(" ");
+       }
+out_reserved_values:
+       log_warn("Selection operators");
+       log_warn("-------------------");
+       log_warn("  Comparison operators:");
+       t = _op_cmp;
+       for (; t->string; t++)
+               log_warn("    %6s  - %s", t->string, t->desc);
+       log_warn(" ");
+       log_warn("  Logical and grouping operators:");
+       t = _op_log;
+       for (; t->string; t++)
+               log_warn("    %4s  - %s", t->string, t->desc);
+       log_warn(" ");
+}
+
+static const char _sel_syntax_error_at_msg[] = "Selection syntax error at '%s'.";
+static const char _sel_help_ref_msg[] = "Use \'help\' for selection to get more help.";
+
+/*
+ * Selection parser
+ *
+ * _parse_* functions
+ *
+ *   Input:
+ *     s             - a pointer to the parsed string
+ *   Output:
+ *     next          - a pointer used for next _parse_*'s input,
+ *                     next == s if return value is NULL
+ *     return value  - a filter node pointer,
+ *                     NULL if s doesn't match
+ */
+
+/*
+ * SELECTION := FIELD_NAME OP_CMP STRING |
+ *              FIELD_NAME OP_CMP NUMBER  |
+ *              FIELD_NAME OP_REGEX REGEX
+ */
+static struct selection_node *_parse_selection(struct dm_report *rh,
+                                              const char *s,
+                                              const char **next)
+{
+       struct field_selection *fs;
+       struct selection_node *sn;
+       const char *ws, *we; /* field name */
+       const char *vs, *ve; /* value */
+       const char *last;
+       uint32_t flags, field_num;
+       int implicit;
+       const struct dm_report_field_type *ft;
+       struct selection_str_list *str_list;
+       struct reserved_value_wrapper rvw = {0};
+       struct time_value tval;
+       uint64_t factor;
+       void *custom = NULL;
+       char *tmp;
+       char c;
+
+       /* field name */
+       if (!(last = _tok_field_name(s, &ws, &we))) {
+               log_error("Expecting field name");
+               goto bad;
+       }
+
+       /* check if the field with given name exists */
+       if (!_get_field(rh, ws, (size_t) (we - ws), &field_num, &implicit)) {
+               c = we[0];
+               tmp = (char *) we;
+               tmp[0] = '\0';
+               _display_fields(rh, 0, 1);
+               log_warn(" ");
+               log_error("Unrecognised selection field: %s", ws);
+               tmp[0] = c;
+               goto bad;
+       }
+
+       if (implicit) {
+               ft = &_implicit_report_fields[field_num];
+               if (ft->flags & FLD_CMP_UNCOMPARABLE) {
+                       c = we[0];
+                       tmp = (char *) we;
+                       tmp[0] = '\0';
+                       _display_fields(rh, 0, 1);
+                       log_warn(" ");
+                       log_error("Selection field is uncomparable: %s.", ws);
+                       tmp[0] = c;
+                       goto bad;
+               }
+       } else
+               ft = &rh->fields[field_num];
+
+       /* comparison operator */
+       if (!(flags = _tok_op_cmp(we, &last))) {
+               _display_selection_help(rh);
+               log_error("Unrecognised comparison operator: %s", we);
+               goto bad;
+       }
+       if (!last) {
+               _display_selection_help(rh);
+               log_error("Missing value after operator");
+               goto bad;
+       }
+
+       /* comparison value */
+       if (flags & FLD_CMP_REGEX) {
+               /*
+                * REGEX value
+                */
+               if (!(last = _tok_value_regex(rh, ft, last, &vs, &ve, &flags, &rvw)))
+                       goto_bad;
+       } else {
+               /*
+                * STRING, NUMBER, SIZE, PERCENT, STRING_LIST, TIME value
+                */
+               if (flags & FLD_CMP_NUMBER) {
+                       if (!(ft->flags & (DM_REPORT_FIELD_TYPE_NUMBER |
+                                          DM_REPORT_FIELD_TYPE_SIZE |
+                                          DM_REPORT_FIELD_TYPE_PERCENT |
+                                          DM_REPORT_FIELD_TYPE_TIME))) {
+                               _display_selection_help(rh);
+                               log_error("Operator can be used only with number, size, time or percent fields: %s", ws);
+                               goto bad;
+                       }
+               } else if (flags & FLD_CMP_TIME) {
+                       if (!(ft->flags & DM_REPORT_FIELD_TYPE_TIME)) {
+                               _display_selection_help(rh);
+                               log_error("Operator can be used only with time fields: %s", ws);
+                               goto bad;
+                       }
+               }
+
+               if (ft->flags == DM_REPORT_FIELD_TYPE_SIZE ||
+                   ft->flags == DM_REPORT_FIELD_TYPE_NUMBER ||
+                   ft->flags == DM_REPORT_FIELD_TYPE_PERCENT)
+                       custom = &factor;
+               else if (ft->flags & DM_REPORT_FIELD_TYPE_TIME)
+                       custom = &tval;
+               else if (ft->flags == DM_REPORT_FIELD_TYPE_STRING_LIST)
+                       custom = &str_list;
+               else
+                       custom = NULL;
+               if (!(last = _tok_value(rh, ft, field_num, implicit,
+                                       last, &vs, &ve, &flags,
+                                       &rvw, rh->selection->mem, custom)))
+                       goto_bad;
+       }
+
+       *next = _skip_space(last);
+
+       /* create selection */
+       if (!(fs = _create_field_selection(rh, field_num, implicit, vs, (size_t) (ve - vs), flags, &rvw, custom)))
+               return_NULL;
+
+       /* create selection node */
+       if (!(sn = _alloc_selection_node(rh->selection->mem, SEL_ITEM)))
+               return_NULL;
+
+       /* add selection to selection node */
+       sn->selection.item = fs;
+
+       return sn;
+bad:
+       log_error(_sel_syntax_error_at_msg, s);
+       log_error(_sel_help_ref_msg);
+       *next = s;
+       return NULL;
+}
+
+static struct selection_node *_parse_or_ex(struct dm_report *rh,
+                                          const char *s,
+                                          const char **next,
+                                          struct selection_node *or_sn);
+
+static struct selection_node *_parse_ex(struct dm_report *rh,
+                                       const char *s,
+                                       const char **next)
+{
+       static const char _ps_expected_msg[] = "Syntax error: left parenthesis expected at \'%s\'";
+       static const char _pe_expected_msg[] = "Syntax error: right parenthesis expected at \'%s\'";
+       struct selection_node *sn = NULL;
+       uint32_t t;
+       const char *tmp;
+
+       t = _tok_op_log(s, next, SEL_MODIFIER_NOT | SEL_PRECEDENCE_PS);
+       if (t == SEL_MODIFIER_NOT) {
+               /* '!' '(' EXPRESSION ')' */
+               if (!_tok_op_log(*next, &tmp, SEL_PRECEDENCE_PS)) {
+                       log_error(_ps_expected_msg, *next);
+                       goto error;
+               }
+               if (!(sn = _parse_or_ex(rh, tmp, next, NULL)))
+                       goto error;
+               sn->type |= SEL_MODIFIER_NOT;
+               if (!_tok_op_log(*next, &tmp, SEL_PRECEDENCE_PE)) {
+                       log_error(_pe_expected_msg, *next);
+                       goto error;
+               }
+               *next = tmp;
+       } else if (t == SEL_PRECEDENCE_PS) {
+               /* '(' EXPRESSION ')' */
+               if (!(sn = _parse_or_ex(rh, *next, &tmp, NULL)))
+                       goto error;
+               if (!_tok_op_log(tmp, next, SEL_PRECEDENCE_PE)) {
+                       log_error(_pe_expected_msg, *next);
+                       goto error;
+               }
+       } else if ((s = _skip_space(s))) {
+               /* SELECTION */
+               sn = _parse_selection(rh, s, next);
+       } else {
+               sn = NULL;
+               *next = s;
+       }
+
+       return sn;
+error:
+       *next = s;
+       return NULL;
+}
+
+/* AND_EXPRESSION := EX (AND_OP AND_EXPRSSION) */
+static struct selection_node *_parse_and_ex(struct dm_report *rh,
+                                           const char *s,
+                                           const char **next,
+                                           struct selection_node *and_sn)
+{
+       struct selection_node *n;
+       const char *tmp;
+
+       n = _parse_ex(rh, s, next);
+       if (!n)
+               goto error;
+
+       if (!_tok_op_log(*next, &tmp, SEL_AND)) {
+               if (!and_sn)
+                       return n;
+               dm_list_add(&and_sn->selection.set, &n->list);
+               return and_sn;
+       }
+
+       if (!and_sn) {
+               if (!(and_sn = _alloc_selection_node(rh->selection->mem, SEL_AND)))
+                       goto error;
+       }
+       dm_list_add(&and_sn->selection.set, &n->list);
+
+       return _parse_and_ex(rh, tmp, next, and_sn);
+error:
+       *next = s;
+       return NULL;
+}
+
+/* OR_EXPRESSION := AND_EXPRESSION (OR_OP OR_EXPRESSION) */
+static struct selection_node *_parse_or_ex(struct dm_report *rh,
+                                          const char *s,
+                                          const char **next,
+                                          struct selection_node *or_sn)
+{
+       struct selection_node *n;
+       const char *tmp;
+
+       n = _parse_and_ex(rh, s, next, NULL);
+       if (!n)
+               goto error;
+
+       if (!_tok_op_log(*next, &tmp, SEL_OR)) {
+               if (!or_sn)
+                       return n;
+               dm_list_add(&or_sn->selection.set, &n->list);
+               return or_sn;
+       }
+
+       if (!or_sn) {
+               if (!(or_sn = _alloc_selection_node(rh->selection->mem, SEL_OR)))
+                       goto error;
+       }
+       dm_list_add(&or_sn->selection.set, &n->list);
+
+       return _parse_or_ex(rh, tmp, next, or_sn);
+error:
+       *next = s;
+       return NULL;
+}
+
+static int _alloc_rh_selection(struct dm_report *rh)
+{
+       if (!(rh->selection = dm_pool_zalloc(rh->mem, sizeof(struct selection))) ||
+           !(rh->selection->mem = dm_pool_create("report selection", 10 * 1024))) {
+               log_error("Failed to allocate report selection structure.");
+               if (rh->selection)
+                       dm_pool_free(rh->mem, rh->selection);
+               return 0;
+       }
+
+       return 1;
+}
+
+#define SPECIAL_SELECTION_ALL "all"
+
+static int _report_set_selection(struct dm_report *rh, const char *selection, int add_new_fields)
+{
+       struct selection_node *root = NULL;
+       const char *fin, *next;
+
+       if (rh->selection) {
+               if (rh->selection->selection_root)
+                       /* Trash any previous selection. */
+                       dm_pool_free(rh->selection->mem, rh->selection->selection_root);
+               rh->selection->selection_root = NULL;
+       } else {
+               if (!_alloc_rh_selection(rh))
+                       goto_bad;
+       }
+
+       if (!selection || !selection[0] || !strcasecmp(selection, SPECIAL_SELECTION_ALL))
+               return 1;
+
+       rh->selection->add_new_fields = add_new_fields;
+
+       if (!(root = _alloc_selection_node(rh->selection->mem, SEL_OR)))
+               return 0;
+
+       if (!_parse_or_ex(rh, selection, &fin, root))
+               goto_bad;
+
+       next = _skip_space(fin);
+       if (*next) {
+               log_error("Expecting logical operator");
+               log_error(_sel_syntax_error_at_msg, next);
+               log_error(_sel_help_ref_msg);
+               goto bad;
+       }
+
+       rh->selection->selection_root = root;
+       return 1;
+bad:
+       dm_pool_free(rh->selection->mem, root);
+       return 0;
+}
+
+static void _reset_field_props(struct dm_report *rh)
+{
+       struct field_properties *fp;
+       dm_list_iterate_items(fp, &rh->field_props)
+               fp->width = fp->initial_width;
+       rh->flags |= RH_FIELD_CALC_NEEDED;
+}
+
+int dm_report_set_selection(struct dm_report *rh, const char *selection)
+{
+       struct row *row;
+
+       if (!_report_set_selection(rh, selection, 0))
+               return_0;
+
+       _reset_field_props(rh);
+
+       dm_list_iterate_items(row, &rh->rows) {
+               row->selected = _check_report_selection(rh, &row->fields);
+               if (row->field_sel_status)
+                       _implicit_report_fields[row->field_sel_status->props->field_num].report_fn(rh,
+                                                       rh->mem, row->field_sel_status, row, rh->private);
+       }
+
+       return 1;
+}
+
+struct dm_report *dm_report_init_with_selection(uint32_t *report_types,
+                                               const struct dm_report_object_type *types,
+                                               const struct dm_report_field_type *fields,
+                                               const char *output_fields,
+                                               const char *output_separator,
+                                               uint32_t output_flags,
+                                               const char *sort_keys,
+                                               const char *selection,
+                                               const struct dm_report_reserved_value reserved_values[],
+                                               void *private_data)
+{
+       struct dm_report *rh;
+
+       _implicit_report_fields = _implicit_special_report_fields_with_selection;
+
+       if (!(rh = dm_report_init(report_types, types, fields, output_fields,
+                       output_separator, output_flags, sort_keys, private_data)))
+               return NULL;
+
+       if (!selection || !selection[0]) {
+               rh->selection = NULL;
+               return rh;
+       }
+
+       if (!_check_reserved_values_supported(fields, reserved_values)) {
+               log_error(INTERNAL_ERROR "dm_report_init_with_selection: "
+                         "trying to register unsupported reserved value type, "
+                         "skipping report selection");
+               return rh;
+       }
+       rh->reserved_values = reserved_values;
+
+       if (!strcasecmp(selection, SPECIAL_FIELD_HELP_ID) ||
+           !strcmp(selection, SPECIAL_FIELD_HELP_ALT_ID)) {
+               _display_fields(rh, 0, 1);
+               log_warn(" ");
+               _display_selection_help(rh);
+               rh->flags |= RH_ALREADY_REPORTED;
+               return rh;
+       }
+
+       if (!_report_set_selection(rh, selection, 1))
+               goto_bad;
+
+       _dm_report_init_update_types(rh, report_types);
+
+       return rh;
+bad:
+       dm_report_free(rh);
+       return NULL;
+}
+
+/*
+ * Print row of headings
+ */
+static int _report_headings(struct dm_report *rh)
+{
+       const struct dm_report_field_type *fields;
+       struct field_properties *fp;
+       const char *heading;
+       char *buf = NULL;
+       size_t buf_size = 0;
+
+       rh->flags |= RH_HEADINGS_PRINTED;
+
+       if (!(rh->flags & DM_REPORT_OUTPUT_HEADINGS))
+               return 1;
+
+       if (!dm_pool_begin_object(rh->mem, 128)) {
+               log_error("dm_report: "
+                         "dm_pool_begin_object failed for headings");
+               return 0;
+       }
+
+       dm_list_iterate_items(fp, &rh->field_props) {
+               if ((int) buf_size < fp->width)
+                       buf_size = (size_t) fp->width;
+       }
+       /* Including trailing '\0'! */
+       buf_size++;
+
+       if (!(buf = dm_malloc(buf_size))) {
+               log_error("dm_report: Could not allocate memory for heading buffer.");
+               goto bad;
+       }
+
+       /* First heading line */
+       dm_list_iterate_items(fp, &rh->field_props) {
+               if (fp->flags & FLD_HIDDEN)
+                       continue;
+
+               fields = fp->implicit ? _implicit_report_fields : rh->fields;
+
+               heading = fields[fp->field_num].heading;
+               if (rh->flags & DM_REPORT_OUTPUT_ALIGNED) {
+                       if (dm_snprintf(buf, buf_size, "%-*.*s",
+                                        fp->width, fp->width, heading) < 0) {
+                               log_error("dm_report: snprintf heading failed");
+                               goto bad;
+                       }
+                       if (!dm_pool_grow_object(rh->mem, buf, fp->width)) {
+                               log_error("dm_report: Failed to generate report headings for printing");
+                               goto bad;
+                       }
+               } else if (!dm_pool_grow_object(rh->mem, heading, 0)) {
+                       log_error("dm_report: Failed to generate report headings for printing");
+                       goto bad;
+               }
+
+               if (!dm_list_end(&rh->field_props, &fp->list))
+                       if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) {
+                               log_error("dm_report: Failed to generate report headings for printing");
+                               goto bad;
+                       }
+       }
+       if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+               log_error("dm_report: Failed to generate report headings for printing");
+               goto bad;
+       }
+
+       /* print all headings */
+       heading = (char *) dm_pool_end_object(rh->mem);
+       log_print("%s", heading);
+
+       dm_pool_free(rh->mem, (void *)heading);
+       dm_free(buf);
+
+       return 1;
+
+      bad:
+       dm_free(buf);
+       dm_pool_abandon_object(rh->mem);
+       return 0;
+}
+
+static int _should_display_row(struct row *row)
+{
+       return row->field_sel_status || row->selected;
+}
+
+static void _recalculate_fields(struct dm_report *rh)
+{
+       struct row *row;
+       struct dm_report_field *field;
+       int len;
+
+       dm_list_iterate_items(row, &rh->rows) {
+               dm_list_iterate_items(field, &row->fields) {
+                       if ((rh->flags & RH_SORT_REQUIRED) &&
+                           (field->props->flags & FLD_SORT_KEY)) {
+                               (*row->sort_fields)[field->props->sort_posn] = field;
+                       }
+
+                       if (_should_display_row(row)) {
+                               len = (int) strlen(field->report_string);
+                               if ((len > field->props->width))
+                                       field->props->width = len;
+
+                       }
+               }
+       }
+
+       rh->flags &= ~RH_FIELD_CALC_NEEDED;
+}
+
+int dm_report_column_headings(struct dm_report *rh)
+{
+       /* Columns-as-rows does not use _report_headings. */
+       if (rh->flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS)
+               return 1;
+
+       if (rh->flags & RH_FIELD_CALC_NEEDED)
+               _recalculate_fields(rh);
+
+       return _report_headings(rh);
+}
+
+/*
+ * Sort rows of data
+ */
+static int _row_compare(const void *a, const void *b)
+{
+       const struct row *rowa = *(const struct row * const *) a;
+       const struct row *rowb = *(const struct row * const *) b;
+       const struct dm_report_field *sfa, *sfb;
+       uint32_t cnt;
+
+       for (cnt = 0; cnt < rowa->rh->keys_count; cnt++) {
+               sfa = (*rowa->sort_fields)[cnt];
+               sfb = (*rowb->sort_fields)[cnt];
+               if ((sfa->props->flags & DM_REPORT_FIELD_TYPE_NUMBER) ||
+                   (sfa->props->flags & DM_REPORT_FIELD_TYPE_SIZE) ||
+                   (sfa->props->flags & DM_REPORT_FIELD_TYPE_TIME)) {
+                       const uint64_t numa =
+                           *(const uint64_t *) sfa->sort_value;
+                       const uint64_t numb =
+                           *(const uint64_t *) sfb->sort_value;
+
+                       if (numa == numb)
+                               continue;
+
+                       if (sfa->props->flags & FLD_ASCENDING) {
+                               return (numa > numb) ? 1 : -1;
+                       } else {        /* FLD_DESCENDING */
+                               return (numa < numb) ? 1 : -1;
+                       }
+               } else {
+                       /* DM_REPORT_FIELD_TYPE_STRING
+                        * DM_REPORT_FIELD_TYPE_STRING_LIST */
+                       const char *stra = (const char *) sfa->sort_value;
+                       const char *strb = (const char *) sfb->sort_value;
+                       int cmp = strcmp(stra, strb);
+
+                       if (!cmp)
+                               continue;
+
+                       if (sfa->props->flags & FLD_ASCENDING) {
+                               return (cmp > 0) ? 1 : -1;
+                       } else {        /* FLD_DESCENDING */
+                               return (cmp < 0) ? 1 : -1;
+                       }
+               }
+       }
+
+       return 0;               /* Identical */
+}
+
+static int _sort_rows(struct dm_report *rh)
+{
+       struct row *(*rows)[];
+       uint32_t count = 0;
+       struct row *row;
+
+       if (!(rows = dm_pool_alloc(rh->mem, sizeof(**rows) *
+                               dm_list_size(&rh->rows)))) {
+               log_error("dm_report: sort array allocation failed");
+               return 0;
+       }
+
+       dm_list_iterate_items(row, &rh->rows)
+               (*rows)[count++] = row;
+
+       qsort(rows, count, sizeof(**rows), _row_compare);
+
+       dm_list_init(&rh->rows);
+       while (count--)
+               dm_list_add_h(&rh->rows, &(*rows)[count]->list);
+
+       return 1;
+}
+
+#define STANDARD_QUOTE         "\'"
+#define STANDARD_PAIR          "="
+
+#define JSON_INDENT_UNIT       4
+#define JSON_SPACE             " "
+#define JSON_QUOTE             "\""
+#define JSON_PAIR              ":"
+#define JSON_SEPARATOR         ","
+#define JSON_OBJECT_START      "{"
+#define JSON_OBJECT_END        "}"
+#define JSON_ARRAY_START       "["
+#define JSON_ARRAY_END         "]"
+#define JSON_ESCAPE_CHAR       "\\"
+
+#define UNABLE_TO_EXTEND_OUTPUT_LINE_MSG "dm_report: Unable to extend output line"
+
+static int _is_basic_report(struct dm_report *rh)
+{
+       return rh->group_item &&
+              (rh->group_item->group->type == DM_REPORT_GROUP_BASIC);
+}
+
+static int _is_json_report(struct dm_report *rh)
+{
+       return rh->group_item &&
+              (rh->group_item->group->type == DM_REPORT_GROUP_JSON);
+}
+
+/*
+ * Produce report output
+ */
+static int _output_field(struct dm_report *rh, struct dm_report_field *field)
+{
+       const struct dm_report_field_type *fields = field->props->implicit ? _implicit_report_fields
+                                                                          : rh->fields;
+       char *field_id;
+       int32_t width;
+       uint32_t align;
+       const char *repstr;
+       const char *p1_repstr, *p2_repstr;
+       char *buf = NULL;
+       size_t buf_size = 0;
+
+       if (_is_json_report(rh)) {
+               if (!dm_pool_grow_object(rh->mem, JSON_QUOTE, 1) ||
+                   !dm_pool_grow_object(rh->mem, fields[field->props->field_num].id, 0) ||
+                   !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1) ||
+                   !dm_pool_grow_object(rh->mem, JSON_PAIR, 1) ||
+                   !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) {
+                       log_error("dm_report: Unable to extend output line");
+                       return 0;
+               }
+       } else if (rh->flags & DM_REPORT_OUTPUT_FIELD_NAME_PREFIX) {
+               if (!(field_id = dm_strdup(fields[field->props->field_num].id))) {
+                       log_error("dm_report: Failed to copy field name");
+                       return 0;
+               }
+
+               if (!dm_pool_grow_object(rh->mem, rh->output_field_name_prefix, 0)) {
+                       log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                       dm_free(field_id);
+                       return 0;
+               }
+
+               if (!dm_pool_grow_object(rh->mem, _toupperstr(field_id), 0)) {
+                       log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                       dm_free(field_id);
+                       return 0;
+               }
+
+               dm_free(field_id);
+
+               if (!dm_pool_grow_object(rh->mem, STANDARD_PAIR, 1)) {
+                       log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                       return 0;
+               }
+
+               if (!(rh->flags & DM_REPORT_OUTPUT_FIELD_UNQUOTED) &&
+                   !dm_pool_grow_object(rh->mem, STANDARD_QUOTE, 1)) {
+                       log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                       return 0;
+               }
+       }
+
+       repstr = field->report_string;
+       width = field->props->width;
+       if (!(rh->flags & DM_REPORT_OUTPUT_ALIGNED)) {
+               if (_is_json_report(rh)) {
+                       /* Escape any JSON_QUOTE that may appear in reported string. */
+                       p1_repstr = repstr;
+                       while ((p2_repstr = strstr(p1_repstr, JSON_QUOTE))) {
+                               if (p2_repstr > p1_repstr) {
+                                       if (!dm_pool_grow_object(rh->mem, p1_repstr, p2_repstr - p1_repstr)) {
+                                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                                               return 0;
+                                       }
+                               }
+                               if (!dm_pool_grow_object(rh->mem, JSON_ESCAPE_CHAR, 1) ||
+                                   !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) {
+                                       log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                                       return 0;
+                               }
+                               p1_repstr = p2_repstr + 1;
+                       }
+
+                       if (!dm_pool_grow_object(rh->mem, p1_repstr, 0)) {
+                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                               return 0;
+                       }
+               } else {
+                       if (!dm_pool_grow_object(rh->mem, repstr, 0)) {
+                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                               return 0;
+                       }
+               }
+       } else {
+               if (!(align = field->props->flags & DM_REPORT_FIELD_ALIGN_MASK))
+                       align = ((field->props->flags & DM_REPORT_FIELD_TYPE_NUMBER) ||
+                                (field->props->flags & DM_REPORT_FIELD_TYPE_SIZE)) ? 
+                               DM_REPORT_FIELD_ALIGN_RIGHT : DM_REPORT_FIELD_ALIGN_LEFT;
+
+               /* Including trailing '\0'! */
+               buf_size = width + 1;
+               if (!(buf = dm_malloc(buf_size))) {
+                       log_error("dm_report: Could not allocate memory for output line buffer.");
+                       return 0;
+               }
+
+               if (align & DM_REPORT_FIELD_ALIGN_LEFT) {
+                       if (dm_snprintf(buf, buf_size, "%-*.*s",
+                                        width, width, repstr) < 0) {
+                               log_error("dm_report: left-aligned snprintf() failed");
+                               goto bad;
+                       }
+                       if (!dm_pool_grow_object(rh->mem, buf, width)) {
+                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                               goto bad;
+                       }
+               } else if (align & DM_REPORT_FIELD_ALIGN_RIGHT) {
+                       if (dm_snprintf(buf, buf_size, "%*.*s",
+                                        width, width, repstr) < 0) {
+                               log_error("dm_report: right-aligned snprintf() failed");
+                               goto bad;
+                       }
+                       if (!dm_pool_grow_object(rh->mem, buf, width)) {
+                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                               goto bad;
+                       }
+               }
+       }
+
+       if (rh->flags & DM_REPORT_OUTPUT_FIELD_NAME_PREFIX) {
+               if (!(rh->flags & DM_REPORT_OUTPUT_FIELD_UNQUOTED)) {
+                       if (!dm_pool_grow_object(rh->mem, STANDARD_QUOTE, 1)) {
+                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                               goto bad;
+                       }
+               }
+       } else if (_is_json_report(rh)) {
+               if (!dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) {
+                       log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                       goto bad;
+               }
+       }
+
+       dm_free(buf);
+       return 1;
+
+bad:
+       dm_free(buf);
+       return 0;
+}
+
+static void _destroy_rows(struct dm_report *rh)
+{
+       /*
+        * free the first row allocated to this report: since this is a
+        * pool allocation this will also free all subsequently allocated
+        * rows from the report and any associated string data.
+        */
+       if (rh->first_row)
+               dm_pool_free(rh->mem, rh->first_row);
+       rh->first_row = NULL;
+       dm_list_init(&rh->rows);
+
+       /* Reset field widths to original values. */
+       _reset_field_props(rh);
+}
+
+static int _output_as_rows(struct dm_report *rh)
+{
+       const struct dm_report_field_type *fields;
+       struct field_properties *fp;
+       struct dm_report_field *field;
+       struct row *row;
+
+       dm_list_iterate_items(fp, &rh->field_props) {
+               if (fp->flags & FLD_HIDDEN) {
+                       dm_list_iterate_items(row, &rh->rows) {
+                               field = dm_list_item(dm_list_first(&row->fields), struct dm_report_field);
+                               dm_list_del(&field->list);
+                       }
+                       continue;
+               }
+
+               fields = fp->implicit ? _implicit_report_fields : rh->fields;
+
+               if (!dm_pool_begin_object(rh->mem, 512)) {
+                       log_error("dm_report: Unable to allocate output line");
+                       return 0;
+               }
+
+               if ((rh->flags & DM_REPORT_OUTPUT_HEADINGS)) {
+                       if (!dm_pool_grow_object(rh->mem, fields[fp->field_num].heading, 0)) {
+                               log_error("dm_report: Failed to extend row for field name");
+                               goto bad;
+                       }
+                       if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) {
+                               log_error("dm_report: Failed to extend row with separator");
+                               goto bad;
+                       }
+               }
+
+               dm_list_iterate_items(row, &rh->rows) {
+                       if ((field = dm_list_item(dm_list_first(&row->fields), struct dm_report_field))) {
+                               if (!_output_field(rh, field))
+                                       goto bad;
+                               dm_list_del(&field->list);
+                       }
+
+                       if (!dm_list_end(&rh->rows, &row->list))
+                               if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) {
+                                       log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                                       goto bad;
+                               }
+               }
+
+               if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+                       log_error("dm_report: Failed to terminate row");
+                       goto bad;
+               }
+               log_print("%s", (char *) dm_pool_end_object(rh->mem));
+       }
+
+       _destroy_rows(rh);
+
+       return 1;
+
+      bad:
+       dm_pool_abandon_object(rh->mem);
+       return 0;
+}
+
+static int _output_as_columns(struct dm_report *rh)
+{
+       struct dm_list *fh, *rowh, *ftmp, *rtmp;
+       struct row *row = NULL;
+       struct dm_report_field *field;
+       struct dm_list *last_row;
+       int do_field_delim;
+       char *line;
+
+       /* If headings not printed yet, calculate field widths and print them */
+       if (!(rh->flags & RH_HEADINGS_PRINTED))
+               _report_headings(rh);
+
+       /* Print and clear buffer */
+       last_row = dm_list_last(&rh->rows);
+       dm_list_iterate_safe(rowh, rtmp, &rh->rows) {
+               row = dm_list_item(rowh, struct row);
+
+               if (!_should_display_row(row))
+                       continue;
+
+               if (!dm_pool_begin_object(rh->mem, 512)) {
+                       log_error("dm_report: Unable to allocate output line");
+                       return 0;
+               }
+
+               if (_is_json_report(rh)) {
+                       if (!dm_pool_grow_object(rh->mem, JSON_OBJECT_START, 0)) {
+                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                               goto bad;
+                       }
+               }
+
+               do_field_delim = 0;
+
+               dm_list_iterate_safe(fh, ftmp, &row->fields) {
+                       field = dm_list_item(fh, struct dm_report_field);
+                       if (field->props->flags & FLD_HIDDEN)
+                               continue;
+
+                       if (do_field_delim) {
+                               if (_is_json_report(rh)) {
+                                       if (!dm_pool_grow_object(rh->mem, JSON_SEPARATOR, 0) ||
+                                           !dm_pool_grow_object(rh->mem, JSON_SPACE, 0)) {
+                                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                                               goto bad;
+                                       }
+                               } else {
+                                       if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) {
+                                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                                               goto bad;
+                                       }
+                               }
+                       } else
+                               do_field_delim = 1;
+
+                       if (!_output_field(rh, field))
+                               goto bad;
+
+                       if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+                               dm_list_del(&field->list);
+               }
+
+               if (_is_json_report(rh)) {
+                       if (!dm_pool_grow_object(rh->mem, JSON_OBJECT_END, 0)) {
+                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                               goto bad;
+                       }
+                       if (rowh != last_row &&
+                           !dm_pool_grow_object(rh->mem, JSON_SEPARATOR, 0)) {
+                               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+                               goto bad;
+                       }
+               }
+
+               if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+                       log_error("dm_report: Unable to terminate output line");
+                       goto bad;
+               }
+
+               line = (char *) dm_pool_end_object(rh->mem);
+               log_print("%*s", rh->group_item ? rh->group_item->group->indent + (int) strlen(line) : 0, line);
+               if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+                       dm_list_del(&row->list);
+       }
+
+       if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+               _destroy_rows(rh);
+
+       return 1;
+
+      bad:
+       dm_pool_abandon_object(rh->mem);
+       return 0;
+}
+
+int dm_report_is_empty(struct dm_report *rh)
+{
+       return dm_list_empty(&rh->rows) ? 1 : 0;
+}
+
+static struct report_group_item *_get_topmost_report_group_item(struct dm_report_group *group)
+{
+       struct report_group_item *item;
+
+       if (group && !dm_list_empty(&group->items))
+               item = dm_list_item(dm_list_first(&group->items), struct report_group_item);
+       else
+               item = NULL;
+
+       return item;
+}
+
+static void _json_output_start(struct dm_report_group *group)
+{
+       if (!group->indent) {
+               log_print(JSON_OBJECT_START);
+               group->indent += JSON_INDENT_UNIT;
+       }
+}
+
+static int _json_output_array_start(struct dm_pool *mem, struct report_group_item *item)
+{
+       const char *name = (const char *) item->data;
+       char *output;
+
+       if (!dm_pool_begin_object(mem, 32)) {
+               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+               return 0;
+       }
+
+       if (!dm_pool_grow_object(mem, JSON_QUOTE, 1) ||
+           !dm_pool_grow_object(mem, name, 0) ||
+           !dm_pool_grow_object(mem, JSON_QUOTE JSON_PAIR JSON_SPACE JSON_ARRAY_START, 0) ||
+           !dm_pool_grow_object(mem, "\0", 1) ||
+           !(output = dm_pool_end_object(mem))) {
+               log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+               goto bad;
+       }
+
+       if (item->parent->store.finished_count > 0)
+               log_print("%*s", item->group->indent + (int) sizeof(JSON_SEPARATOR) - 1, JSON_SEPARATOR);
+
+       if (item->parent->parent && item->parent->data) {
+               log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_START) - 1, JSON_OBJECT_START);
+               item->group->indent += JSON_INDENT_UNIT;
+       }
+
+       log_print("%*s", item->group->indent + (int) strlen(output), output);
+       item->group->indent += JSON_INDENT_UNIT;
+
+       dm_pool_free(mem, output);
+       return 1;
+bad:
+       dm_pool_abandon_object(mem);
+       return 0;
+}
+
+static int _prepare_json_report_output(struct dm_report *rh)
+{
+       _json_output_start(rh->group_item->group);
+
+       if (rh->group_item->output_done && dm_list_empty(&rh->rows))
+               return 1;
+
+       /*
+        * If this report is in JSON group, it must be at the
+        * top of the stack of reports so the output from
+        * different reports do not interleave with each other.
+        */
+       if (_get_topmost_report_group_item(rh->group_item->group) != rh->group_item) {
+               log_error("dm_report: dm_report_output: interleaved reports detected for JSON output");
+               return 0;
+       }
+
+       if (rh->group_item->needs_closing) {
+               log_error("dm_report: dm_report_output: unfinished JSON output detected");
+               return 0;
+       }
+
+       if (!_json_output_array_start(rh->mem, rh->group_item))
+               return_0;
+
+       rh->group_item->needs_closing = 1;
+       return 1;
+}
+
+static int _print_basic_report_header(struct dm_report *rh)
+{
+       const char *report_name = (const char *) rh->group_item->data;
+       size_t len = strlen(report_name);
+       char *underline;
+
+       if (!(underline = dm_pool_zalloc(rh->mem, len + 1)))
+               return_0;
+
+       memset(underline, '=', len);
+
+       if (rh->group_item->parent->store.finished_count > 0)
+               log_print("%s", "");
+       log_print("%s", report_name);
+       log_print("%s", underline);
+
+       dm_pool_free(rh->mem, underline);
+       return 1;
+}
+
+int dm_report_output(struct dm_report *rh)
+{
+       int r = 0;
+
+       if (_is_json_report(rh) &&
+           !_prepare_json_report_output(rh))
+               return_0;
+
+       if (dm_list_empty(&rh->rows)) {
+               r = 1;
+               goto out;
+       }
+
+       if (rh->flags & RH_FIELD_CALC_NEEDED)
+               _recalculate_fields(rh);
+
+       if ((rh->flags & RH_SORT_REQUIRED))
+               _sort_rows(rh);
+
+       if (_is_basic_report(rh) && !_print_basic_report_header(rh))
+               goto_out;
+
+       if ((rh->flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS))
+               r = _output_as_rows(rh);
+       else
+               r = _output_as_columns(rh);
+out:
+       if (r && rh->group_item)
+               rh->group_item->output_done = 1;
+       return r;
+}
+
+void dm_report_destroy_rows(struct dm_report *rh)
+{
+       _destroy_rows(rh);
+}
+
+struct dm_report_group *dm_report_group_create(dm_report_group_type_t type, void *data)
+{
+       struct dm_report_group *group;
+       struct dm_pool *mem;
+       struct report_group_item *item;
+
+       if (!(mem = dm_pool_create("report_group", 1024))) {
+               log_error("dm_report: dm_report_init_group: failed to allocate mem pool");
+               return NULL;
+       }
+
+       if (!(group = dm_pool_zalloc(mem, sizeof(*group)))) {
+               log_error("dm_report: failed to allocate report group structure");
+               goto bad;
+       }
+
+       group->mem = mem;
+       group->type = type;
+       dm_list_init(&group->items);
+
+       if (!(item = dm_pool_zalloc(mem, sizeof(*item)))) {
+               log_error("dm_report: faile to allocate root report group item");
+               goto bad;
+       }
+
+       dm_list_add_h(&group->items, &item->list);
+
+       return group;
+bad:
+       dm_pool_destroy(mem);
+       return NULL;
+}
+
+static int _report_group_push_single(struct report_group_item *item, void *data)
+{
+       struct report_group_item *item_iter;
+       unsigned count = 0;
+
+       dm_list_iterate_items(item_iter, &item->group->items) {
+               if (item_iter->report)
+                       count++;
+       }
+
+       if (count > 1) {
+               log_error("dm_report: unable to add more than one report "
+                         "to current report group");
+               return 0;
+       }
+
+       return 1;
+}
+
+static int _report_group_push_basic(struct report_group_item *item, const char *name)
+{
+       if (item->report) {
+               if (!(item->report->flags & DM_REPORT_OUTPUT_BUFFERED))
+                       item->report->flags &= ~(DM_REPORT_OUTPUT_MULTIPLE_TIMES);
+       } else {
+               if (!name && item->parent->store.finished_count > 0)
+                       log_print("%s", "");
+       }
+
+       return 1;
+}
+
+static int _report_group_push_json(struct report_group_item *item, const char *name)
+{
+       if (name && !(item->data = dm_pool_strdup(item->group->mem, name))) {
+               log_error("dm_report: failed to duplicate json item name");
+               return 0;
+       }
+
+       if (item->report) {
+               item->report->flags &= ~(DM_REPORT_OUTPUT_ALIGNED |
+                                        DM_REPORT_OUTPUT_HEADINGS |
+                                        DM_REPORT_OUTPUT_COLUMNS_AS_ROWS);
+               item->report->flags |= DM_REPORT_OUTPUT_BUFFERED;
+       } else {
+               _json_output_start(item->group);
+               if (name) {
+                       if (!_json_output_array_start(item->group->mem, item))
+                               return_0;
+               } else {
+                       if (!item->parent->parent) {
+                               log_error("dm_report: can't use unnamed object at top level of JSON output");
+                               return 0;
+                       }
+                       if (item->parent->store.finished_count > 0)
+                               log_print("%*s", item->group->indent + (int) sizeof(JSON_SEPARATOR) - 1, JSON_SEPARATOR);
+                       log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_START) - 1, JSON_OBJECT_START);
+                       item->group->indent += JSON_INDENT_UNIT;
+               }
+
+               item->output_done = 1;
+               item->needs_closing = 1;
+       }
+
+       return 1;
+}
+
+int dm_report_group_push(struct dm_report_group *group, struct dm_report *report, void *data)
+{
+       struct report_group_item *item, *tmp_item;
+
+       if (!group)
+               return 1;
+
+       if (!(item = dm_pool_zalloc(group->mem, sizeof(*item)))) {
+               log_error("dm_report: dm_report_group_push: group item allocation failed");
+               return 0;
+       }
+
+       if ((item->report = report)) {
+               item->store.orig_report_flags = report->flags;
+               report->group_item = item;
+       }
+
+       item->group = group;
+       item->data = data;
+
+       dm_list_iterate_items(tmp_item, &group->items) {
+               if (!tmp_item->report) {
+                       item->parent = tmp_item;
+                       break;
+               }
+       }
+
+       dm_list_add_h(&group->items, &item->list);
+
+       switch (group->type) {
+               case DM_REPORT_GROUP_SINGLE:
+                       if (!_report_group_push_single(item, data))
+                               goto_bad;
+                       break;
+               case DM_REPORT_GROUP_BASIC:
+                       if (!_report_group_push_basic(item, data))
+                               goto_bad;
+                       break;
+               case DM_REPORT_GROUP_JSON:
+                       if (!_report_group_push_json(item, data))
+                               goto_bad;
+                       break;
+               default:
+                       goto_bad;
+       }
+
+       return 1;
+bad:
+       dm_list_del(&item->list);
+       dm_pool_free(group->mem, item);
+       return 0;
+}
+
+static int _report_group_pop_single(struct report_group_item *item)
+{
+       return 1;
+}
+
+static int _report_group_pop_basic(struct report_group_item *item)
+{
+       return 1;
+}
+
+static int _report_group_pop_json(struct report_group_item *item)
+{
+       if (item->output_done && item->needs_closing) {
+               if (item->data) {
+                       item->group->indent -= JSON_INDENT_UNIT;
+                       log_print("%*s", item->group->indent + (int) sizeof(JSON_ARRAY_END) - 1, JSON_ARRAY_END);
+               }
+               if (item->parent->data && item->parent->parent) {
+                       item->group->indent -= JSON_INDENT_UNIT;
+                       log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_END) - 1, JSON_OBJECT_END);
+               }
+               item->needs_closing = 0;
+       }
+
+       return 1;
+}
+
+int dm_report_group_pop(struct dm_report_group *group)
+{
+       struct report_group_item *item;
+
+       if (!group)
+               return 1;
+
+       if (!(item = _get_topmost_report_group_item(group))) {
+               log_error("dm_report: dm_report_group_pop: group has no items");
+               return 0;
+       }
+
+       switch (group->type) {
+               case DM_REPORT_GROUP_SINGLE:
+                       if (!_report_group_pop_single(item))
+                               return_0;
+                       break;
+               case DM_REPORT_GROUP_BASIC:
+                       if (!_report_group_pop_basic(item))
+                               return_0;
+                       break;
+               case DM_REPORT_GROUP_JSON:
+                       if (!_report_group_pop_json(item))
+                               return_0;
+                       break;
+               default:
+                       return 0;
+        }
+
+       dm_list_del(&item->list);
+
+       if (item->report) {
+               item->report->flags = item->store.orig_report_flags;
+               item->report->group_item = NULL;
+       }
+
+       if (item->parent)
+               item->parent->store.finished_count++;
+
+       dm_pool_free(group->mem, item);
+       return 1;
+}
+
+int dm_report_group_output_and_pop_all(struct dm_report_group *group)
+{
+       struct report_group_item *item, *tmp_item;
+
+       dm_list_iterate_items_safe(item, tmp_item, &group->items) {
+               if (!item->parent) {
+                       item->store.finished_count = 0;
+                       continue;
+               }
+               if (item->report && !dm_report_output(item->report))
+                       return_0;
+               if (!dm_report_group_pop(group))
+                       return_0;
+       }
+
+       if (group->type == DM_REPORT_GROUP_JSON) {
+               _json_output_start(group);
+               log_print(JSON_OBJECT_END);
+               group->indent -= JSON_INDENT_UNIT;
+       }
+
+       return 1;
+}
+
+int dm_report_group_destroy(struct dm_report_group *group)
+{
+       int r = 1;
+
+       if (!group)
+               return 1;
+
+       if (!dm_report_group_output_and_pop_all(group))
+               r = 0;
+
+       dm_pool_destroy(group->mem);
+       return r;
+}
diff --git a/device_mapper/libdm-stats.c b/device_mapper/libdm-stats.c
new file mode 100644 (file)
index 0000000..6cd08a7
--- /dev/null
@@ -0,0 +1,5095 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ *
+ * _stats_get_extents_for_file() based in part on filefrag_fiemap() from
+ * e2fsprogs/misc/filefrag.c. Copyright 2003 by Theodore Ts'o.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "misc/kdev_t.h"
+
+#include "math.h" /* log10() */
+
+#include <sys/sysmacros.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h> /* fstatfs */
+#include <unistd.h>
+
+#ifdef __linux__
+  #include <linux/fs.h> /* FS_IOC_FIEMAP */
+#endif
+
+#ifdef HAVE_LINUX_FIEMAP_H
+  #include <linux/fiemap.h> /* fiemap */
+#endif
+
+#ifdef HAVE_LINUX_MAGIC_H
+  #include <linux/magic.h> /* BTRFS_SUPER_MAGIC */
+#endif
+
+#define DM_STATS_REGION_NOT_PRESENT UINT64_MAX
+#define DM_STATS_GROUP_NOT_PRESENT DM_STATS_GROUP_NONE
+
+#define NSEC_PER_USEC   1000L
+#define NSEC_PER_MSEC   1000000L
+#define NSEC_PER_SEC    1000000000L
+
+#define PRECISE_ARG "precise_timestamps"
+#define HISTOGRAM_ARG "histogram:"
+
+#define STATS_ROW_BUF_LEN 4096
+#define STATS_MSG_BUF_LEN 1024
+#define STATS_FIE_BUF_LEN 2048
+
+#define SECTOR_SHIFT 9L
+
+/* Histogram bin */
+struct dm_histogram_bin {
+       uint64_t upper; /* Upper bound on this bin. */
+       uint64_t count; /* Count value for this bin. */
+};
+
+struct dm_histogram {
+       /* The stats handle this histogram belongs to. */
+       const struct dm_stats *dms;
+       /* The region this histogram belongs to. */
+       const struct dm_stats_region *region;
+       uint64_t sum; /* Sum of histogram bin counts. */
+       int nr_bins; /* Number of histogram bins assigned. */
+       struct dm_histogram_bin bins[0];
+};
+
+/*
+ * See Documentation/device-mapper/statistics.txt for full descriptions
+ * of the device-mapper statistics counter fields.
+ */
+struct dm_stats_counters {
+       uint64_t reads;             /* Num reads completed */
+       uint64_t reads_merged;      /* Num reads merged */
+       uint64_t read_sectors;      /* Num sectors read */
+       uint64_t read_nsecs;        /* Num milliseconds spent reading */
+       uint64_t writes;            /* Num writes completed */
+       uint64_t writes_merged;     /* Num writes merged */
+       uint64_t write_sectors;     /* Num sectors written */
+       uint64_t write_nsecs;       /* Num milliseconds spent writing */
+       uint64_t io_in_progress;    /* Num I/Os currently in progress */
+       uint64_t io_nsecs;          /* Num milliseconds spent doing I/Os */
+       uint64_t weighted_io_nsecs; /* Weighted num milliseconds doing I/Os */
+       uint64_t total_read_nsecs;  /* Total time spent reading in milliseconds */
+       uint64_t total_write_nsecs; /* Total time spent writing in milliseconds */
+       struct dm_histogram *histogram; /* Histogram. */
+};
+
+struct dm_stats_region {
+       uint64_t region_id; /* as returned by @stats_list */
+       uint64_t group_id;
+       uint64_t start;
+       uint64_t len;
+       uint64_t step;
+       char *program_id;
+       char *aux_data;
+       uint64_t timescale; /* precise_timestamps is per-region */
+       struct dm_histogram *bounds; /* histogram configuration */
+       struct dm_histogram *histogram; /* aggregate cache */
+       struct dm_stats_counters *counters;
+};
+
+struct dm_stats_group {
+       uint64_t group_id;
+       const char *alias;
+       dm_bitset_t regions;
+       struct dm_histogram *histogram;
+};
+
+struct dm_stats {
+       /* device binding */
+       int bind_major;  /* device major that this dm_stats object is bound to */
+       int bind_minor;  /* device minor that this dm_stats object is bound to */
+       char *bind_name; /* device-mapper device name */
+       char *bind_uuid; /* device-mapper UUID */
+       char *program_id; /* default program_id for this handle */
+       const char *name; /* cached device_name used for reporting */
+       struct dm_pool *mem; /* memory pool for region and counter tables */
+       struct dm_pool *hist_mem; /* separate pool for histogram tables */
+       struct dm_pool *group_mem; /* separate pool for group tables */
+       uint64_t nr_regions; /* total number of present regions */
+       uint64_t max_region; /* size of the regions table */
+       uint64_t interval_ns;  /* sampling interval in nanoseconds */
+       uint64_t timescale; /* default sample value multiplier */
+       int precise; /* use precise_timestamps when creating regions */
+       struct dm_stats_region *regions;
+       struct dm_stats_group *groups;
+       /* statistics cursor */
+       uint64_t walk_flags; /* walk control flags */
+       uint64_t cur_flags;
+       uint64_t cur_group;
+       uint64_t cur_region;
+       uint64_t cur_area;
+};
+
+#define PROC_SELF_COMM "/proc/self/comm"
+static char *_program_id_from_proc(void)
+{
+       FILE *comm = NULL;
+       char buf[STATS_ROW_BUF_LEN];
+
+       if (!(comm = fopen(PROC_SELF_COMM, "r")))
+               return_NULL;
+
+       if (!fgets(buf, sizeof(buf), comm)) {
+               log_error("Could not read from %s", PROC_SELF_COMM);
+               if (fclose(comm))
+                       stack;
+               return NULL;
+       }
+
+       if (fclose(comm))
+               stack;
+
+       return dm_strdup(buf);
+}
+
+static uint64_t _nr_areas(uint64_t len, uint64_t step)
+{
+       /* Default is one area. */
+       if (!len || !step)
+               return 1;
+       /*
+        * drivers/md/dm-stats.c::message_stats_create()
+        * A region may be sub-divided into areas with their own counters.
+        * Any partial area at the end of the region is treated as an
+        * additional complete area.
+        */
+       return (len + step - 1) / step;
+}
+
+static uint64_t _nr_areas_region(struct dm_stats_region *region)
+{
+       return _nr_areas(region->len, region->step);
+}
+
+struct dm_stats *dm_stats_create(const char *program_id)
+{
+       size_t hist_hint = sizeof(struct dm_histogram_bin);
+       size_t group_hint = sizeof(struct dm_stats_group);
+       struct dm_stats *dms = NULL;
+
+       if (!(dms = dm_zalloc(sizeof(*dms))))
+               return_NULL;
+
+       /* FIXME: better hint. */
+       if (!(dms->mem = dm_pool_create("stats_pool", 4096))) {
+               dm_free(dms);
+               return_NULL;
+       }
+
+       if (!(dms->hist_mem = dm_pool_create("histogram_pool", hist_hint)))
+               goto_bad;
+
+       if (!(dms->group_mem = dm_pool_create("group_pool", group_hint)))
+               goto_bad;
+
+       if (!program_id || !strlen(program_id))
+               dms->program_id = _program_id_from_proc();
+       else
+               dms->program_id = dm_strdup(program_id);
+
+       if (!dms->program_id) {
+               log_error("Could not allocate memory for program_id");
+               goto bad;
+       }
+
+       dms->bind_major = -1;
+       dms->bind_minor = -1;
+       dms->bind_name = NULL;
+       dms->bind_uuid = NULL;
+
+       dms->name = NULL;
+
+       /* by default all regions use msec precision */
+       dms->timescale = NSEC_PER_MSEC;
+       dms->precise = 0;
+
+       dms->nr_regions = DM_STATS_REGION_NOT_PRESENT;
+       dms->max_region = DM_STATS_REGION_NOT_PRESENT;
+       dms->regions = NULL;
+
+       /* maintain compatibility with earlier walk version */
+       dms->walk_flags = dms->cur_flags = DM_STATS_WALK_DEFAULT;
+
+       return dms;
+
+bad:
+       dm_pool_destroy(dms->mem);
+       if (dms->hist_mem)
+               dm_pool_destroy(dms->hist_mem);
+       if (dms->group_mem)
+               dm_pool_destroy(dms->group_mem);
+       dm_free(dms);
+       return NULL;
+}
+
+/*
+ * Test whether the stats region pointed to by region is present.
+ */
+static int _stats_region_present(const struct dm_stats_region *region)
+{
+       return !(region->region_id == DM_STATS_REGION_NOT_PRESENT);
+}
+
+/*
+ * Test whether the stats group pointed to by group is present.
+ */
+static int _stats_group_present(const struct dm_stats_group *group)
+{
+       return !(group->group_id == DM_STATS_GROUP_NOT_PRESENT);
+}
+
+/*
+ * Test whether a stats group id is present.
+ */
+static int _stats_group_id_present(const struct dm_stats *dms, uint64_t id)
+{
+       struct dm_stats_group *group = NULL;
+
+       if (id == DM_STATS_GROUP_NOT_PRESENT)
+               return 0;
+
+       if (!dms)
+               return_0;
+
+       if (!dms->regions)
+               return 0;
+
+       if (id > dms->max_region)
+               return 0;
+
+       group = &dms->groups[id];
+
+       return _stats_group_present(group);
+}
+
+/*
+ * Test whether the given region_id is a member of any group.
+ */
+static uint64_t _stats_region_is_grouped(const struct dm_stats* dms,
+                                        uint64_t region_id)
+{
+       uint64_t group_id;
+
+       if (region_id == DM_STATS_GROUP_NOT_PRESENT)
+               return 0;
+
+       if (!_stats_region_present(&dms->regions[region_id]))
+               return 0;
+
+       group_id = dms->regions[region_id].group_id;
+
+       return group_id != DM_STATS_GROUP_NOT_PRESENT;
+}
+
+static void _stats_histograms_destroy(struct dm_pool *mem,
+                                     struct dm_stats_region *region)
+{
+       /* Unpopulated handle. */
+       if (!region->counters)
+               return;
+
+       /*
+        * Free everything in the pool back to the first histogram.
+        */
+       if (region->counters[0].histogram)
+               dm_pool_free(mem, region->counters[0].histogram);
+}
+
+static void _stats_region_destroy(struct dm_stats_region *region)
+{
+       if (!_stats_region_present(region))
+               return;
+
+       region->start = region->len = region->step = 0;
+       region->timescale = 0;
+
+       /*
+        * Don't free counters and histogram bounds here: they are
+        * dropped from the pool along with the corresponding
+        * regions table.
+        *
+        * The following objects are all allocated with dm_malloc.
+        */
+
+       region->counters = NULL;
+       region->bounds = NULL;
+
+       dm_free(region->program_id);
+       region->program_id = NULL;
+       dm_free(region->aux_data);
+       region->aux_data = NULL;
+       region->region_id = DM_STATS_REGION_NOT_PRESENT;
+}
+
+static void _stats_regions_destroy(struct dm_stats *dms)
+{
+       struct dm_pool *mem = dms->mem;
+       uint64_t i;
+
+       if (!dms->regions)
+               return;
+
+       /* walk backwards to obey pool order */
+       for (i = dms->max_region; (i != DM_STATS_REGION_NOT_PRESENT); i--) {
+               _stats_histograms_destroy(dms->hist_mem, &dms->regions[i]);
+               _stats_region_destroy(&dms->regions[i]);
+       }
+
+       dm_pool_free(mem, dms->regions);
+       dms->regions = NULL;
+}
+
+static void _stats_group_destroy(struct dm_stats_group *group)
+{
+       if (!_stats_group_present(group))
+               return;
+
+       group->histogram = NULL;
+
+       if (group->alias) {
+               dm_free((char *) group->alias);
+               group->alias = NULL;
+       }
+       if (group->regions) {
+               dm_bitset_destroy(group->regions);
+               group->regions = NULL;
+       }
+       group->group_id = DM_STATS_GROUP_NOT_PRESENT;
+}
+
+static void _stats_groups_destroy(struct dm_stats *dms)
+{
+       uint64_t i;
+
+       if (!dms->groups)
+               return;
+
+       for (i = dms->max_region; (i != DM_STATS_REGION_NOT_PRESENT); i--)
+               _stats_group_destroy(&dms->groups[i]);
+       dm_pool_free(dms->group_mem, dms->groups);
+       dms->groups = NULL;
+}
+
+static int _set_stats_device(struct dm_stats *dms, struct dm_task *dmt)
+{
+       if (dms->bind_name)
+               return dm_task_set_name(dmt, dms->bind_name);
+       if (dms->bind_uuid)
+               return dm_task_set_uuid(dmt, dms->bind_uuid);
+       if (dms->bind_major > 0)
+               return dm_task_set_major(dmt, dms->bind_major)
+                       && dm_task_set_minor(dmt, dms->bind_minor);
+       return_0;
+}
+
+static int _stats_bound(const struct dm_stats *dms)
+{
+       if (dms->bind_major > 0 || dms->bind_name || dms->bind_uuid)
+               return 1;
+       /* %p format specifier expects a void pointer. */
+       log_error("Stats handle at %p is not bound.", dms);
+       return 0;
+}
+
+static void _stats_clear_binding(struct dm_stats *dms)
+{
+       if (dms->bind_name)
+               dm_pool_free(dms->mem, dms->bind_name);
+       if (dms->bind_uuid)
+               dm_pool_free(dms->mem, dms->bind_uuid);
+       dm_free((char *) dms->name);
+
+       dms->bind_name = dms->bind_uuid = NULL;
+       dms->bind_major = dms->bind_minor = -1;
+       dms->name = NULL;
+}
+
+int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor)
+{
+       _stats_clear_binding(dms);
+       _stats_regions_destroy(dms);
+       _stats_groups_destroy(dms);
+
+       dms->bind_major = major;
+       dms->bind_minor = minor;
+
+       return 1;
+}
+
+int dm_stats_bind_name(struct dm_stats *dms, const char *name)
+{
+       _stats_clear_binding(dms);
+       _stats_regions_destroy(dms);
+       _stats_groups_destroy(dms);
+
+       if (!(dms->bind_name = dm_pool_strdup(dms->mem, name)))
+               return_0;
+
+       return 1;
+}
+
+int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid)
+{
+       _stats_clear_binding(dms);
+       _stats_regions_destroy(dms);
+       _stats_groups_destroy(dms);
+
+       if (!(dms->bind_uuid = dm_pool_strdup(dms->mem, uuid)))
+               return_0;
+
+       return 1;
+}
+
+int dm_stats_bind_from_fd(struct dm_stats *dms, int fd)
+{
+        int major, minor;
+        struct stat buf;
+
+        if (fstat(fd, &buf)) {
+                log_error("fstat failed for fd %d.", fd);
+                return 0;
+        }
+
+        major = (int) MAJOR(buf.st_dev);
+        minor = (int) MINOR(buf.st_dev);
+
+        if (!dm_stats_bind_devno(dms, major, minor))
+                return_0;
+        return 1;
+}
+
+static int _stats_check_precise_timestamps(const struct dm_stats *dms)
+{
+       /* Already checked? */
+       if (dms && dms->precise)
+               return 1;
+
+       return dm_message_supports_precise_timestamps();
+}
+
+int dm_stats_driver_supports_precise(void)
+{
+       return _stats_check_precise_timestamps(NULL);
+}
+
+int dm_stats_driver_supports_histogram(void)
+{
+       return _stats_check_precise_timestamps(NULL);
+}
+
+static int _fill_hist_arg(char *hist_arg, size_t hist_len, uint64_t scale,
+                         struct dm_histogram *bounds)
+{
+       int i, l, len = 0, nr_bins;
+       char *arg = hist_arg;
+       uint64_t value;
+
+       nr_bins = bounds->nr_bins;
+
+       for (i = 0; i < nr_bins; i++) {
+               value = bounds->bins[i].upper / scale;
+               if ((l = dm_snprintf(arg, hist_len - len, FMTu64"%s", value,
+                                    (i == (nr_bins - 1)) ? "" : ",")) < 0)
+                       return_0;
+               len += l;
+               arg += l;
+       }
+       return 1;
+}
+
+static void *_get_hist_arg(struct dm_histogram *bounds, uint64_t scale,
+                          size_t *len)
+{
+       struct dm_histogram_bin *entry, *bins;
+       size_t hist_len = 1; /* terminating '\0' */
+       double value;
+
+       entry = bins = bounds->bins;
+
+       entry += bounds->nr_bins - 1;
+       while(entry >= bins) {
+               value = (double) (entry--)->upper;
+               /* Use lround to avoid size_t -> double cast warning. */
+               hist_len += 1 + (size_t) lround(log10(value / scale));
+               if (entry != bins)
+                       hist_len++; /* ',' */
+       }
+
+       *len = hist_len;
+
+       return dm_zalloc(hist_len);
+}
+
+static char *_build_histogram_arg(struct dm_histogram *bounds, int *precise)
+{
+       struct dm_histogram_bin *entry, *bins;
+       size_t hist_len;
+       char *hist_arg;
+       uint64_t scale;
+
+       entry = bins = bounds->bins;
+
+       /* Empty histogram is invalid. */
+       if (!bounds->nr_bins) {
+               log_error("Cannot format empty histogram description.");
+               return NULL;
+       }
+
+       /* Validate entries and set *precise if precision < 1ms. */
+       entry += bounds->nr_bins - 1;
+       while (entry >= bins) {
+               if (entry != bins) {
+                       if (entry->upper < (entry - 1)->upper) {
+                               log_error("Histogram boundaries must be in "
+                                         "order of increasing magnitude.");
+                               return 0;
+                       }
+               }
+
+               /*
+                * Only enable precise_timestamps automatically if any
+                * value in the histogram bounds uses precision < 1ms.
+                */
+               if (((entry--)->upper % NSEC_PER_MSEC) && !*precise)
+                       *precise = 1;
+       }
+
+       scale = (*precise) ? 1 : NSEC_PER_MSEC;
+
+       /* Calculate hist_len and allocate a character buffer. */
+       if (!(hist_arg = _get_hist_arg(bounds, scale, &hist_len))) {
+               log_error("Could not allocate memory for histogram argument.");
+               return 0;
+       }
+
+       /* Fill hist_arg with boundary strings. */
+       if (!_fill_hist_arg(hist_arg, hist_len, scale, bounds))
+               goto_bad;
+
+       return hist_arg;
+
+bad:
+       log_error("Could not build histogram arguments.");
+       dm_free(hist_arg);
+
+       return NULL;
+}
+
+static struct dm_task *_stats_send_message(struct dm_stats *dms, char *msg)
+{
+       struct dm_task *dmt;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG)))
+               return_0;
+
+       if (!_set_stats_device(dms, dmt))
+               goto_bad;
+
+       if (!dm_task_set_message(dmt, msg))
+               goto_bad;
+
+       if (!dm_task_run(dmt))
+               goto_bad;
+
+       return dmt;
+
+bad:
+       dm_task_destroy(dmt);
+       return NULL;
+}
+
+/*
+ * Cache the dm device_name for the device bound to dms.
+ */
+static int _stats_set_name_cache(struct dm_stats *dms)
+{
+       struct dm_task *dmt;
+
+       if (dms->name)
+               return 1;
+
+       if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+               return_0;
+
+       if (!_set_stats_device(dms, dmt))
+               goto_bad;
+
+       if (!dm_task_run(dmt))
+               goto_bad;
+
+       if (!(dms->name = dm_strdup(dm_task_get_name(dmt))))
+               goto_bad;
+
+       dm_task_destroy(dmt);
+
+       return 1;
+
+bad:
+       log_error("Could not retrieve device-mapper name for device.");
+       dm_task_destroy(dmt);
+       return 0;
+}
+
+/*
+ * update region group_id values
+ */
+static void _stats_update_groups(struct dm_stats *dms)
+{
+       struct dm_stats_group *group;
+       uint64_t group_id, i;
+
+       for (group_id = 0; group_id < dms->max_region + 1; group_id++) {
+               if (!_stats_group_id_present(dms, group_id))
+                       continue;
+
+               group = &dms->groups[group_id];
+
+               for (i = dm_bit_get_first(group->regions);
+                    i != DM_STATS_GROUP_NOT_PRESENT;
+                    i = dm_bit_get_next(group->regions, i))
+                       dms->regions[i].group_id = group_id;
+       }
+}
+
+static void _check_group_regions_present(struct dm_stats *dms,
+                                        struct dm_stats_group *group)
+{
+       dm_bitset_t regions = group->regions;
+       int64_t i, group_id;
+
+       group_id = i = dm_bit_get_first(regions);
+
+       for (; i > 0; i = dm_bit_get_next(regions, i))
+               if (!_stats_region_present(&dms->regions[i])) {
+                       log_warn("Group descriptor " FMTd64 " contains "
+                                "non-existent region_id " FMTd64 ".",
+                                group_id, i);
+                       dm_bit_clear(regions, i);
+               }
+}
+
+/*
+ * Parse a DMS_GROUP group descriptor embedded in a region's aux_data.
+ *
+ * DMS_GROUP="ALIAS:MEMBERS"
+ *
+ * ALIAS: group alias
+ * MEMBERS: list of group member region ids.
+ *
+ */
+#define DMS_GROUP_TAG "DMS_GROUP="
+#define DMS_GROUP_TAG_LEN (sizeof(DMS_GROUP_TAG) - 1)
+#define DMS_GROUP_SEP ':'
+#define DMS_AUX_SEP "#"
+
+static int _parse_aux_data_group(struct dm_stats *dms,
+                                struct dm_stats_region *region,
+                                struct dm_stats_group *group)
+{
+       char *alias, *c, *end;
+       dm_bitset_t regions;
+
+       memset(group, 0, sizeof(*group));
+       group->group_id = DM_STATS_GROUP_NOT_PRESENT;
+
+       /* find start of group tag */
+       c = strstr(region->aux_data, DMS_GROUP_TAG);
+       if (!c)
+               return 1; /* no group is not an error */
+
+       alias = c + strlen(DMS_GROUP_TAG);
+
+       c = strchr(c, DMS_GROUP_SEP);
+
+       if (!c) {
+               log_error("Found malformed group tag while reading aux_data");
+               return 0;
+       }
+
+       /* terminate alias and advance to members */
+       *(c++) = '\0';
+
+       log_debug("Read alias '%s' from aux_data", alias);
+
+       if (!c) {
+               log_error("Found malformed group descriptor while "
+                         "reading aux_data, expected '%c'", DMS_GROUP_SEP);
+               return 0;
+       }
+
+       /* if user aux_data follows make sure we have a terminated
+        * string to pass to dm_bitset_parse_list().
+        */
+       end = strstr(c, DMS_AUX_SEP);
+       if (!end)
+               end = c + strlen(c);
+       *(end++) = '\0';
+
+       if (!(regions = dm_bitset_parse_list(c, NULL, 0))) {
+               log_error("Could not parse member list while "
+                         "reading group aux_data");
+               return 0;
+       }
+
+       group->group_id = dm_bit_get_first(regions);
+       if (group->group_id != region->region_id) {
+               log_error("Found invalid group descriptor in region " FMTu64
+                         " aux_data.", region->region_id);
+               group->group_id = DM_STATS_GROUP_NOT_PRESENT;
+               goto bad;
+       }
+
+       group->regions = regions;
+       group->alias = NULL;
+       if (strlen(alias)) {
+               group->alias = dm_strdup(alias);
+               if (!group->alias) {
+                       log_error("Could not allocate memory for group alias");
+                       goto bad;
+               }
+       }
+
+       /* separate group tag from user aux_data */
+       if ((strlen(end) > 1) || strncmp(end, "-", 1))
+               c = dm_strdup(end);
+       else
+               c = dm_strdup("");
+
+       if (!c) {
+               log_error("Could not allocate memory for user aux_data");
+               goto bad_alias;
+       }
+
+       dm_free(region->aux_data);
+       region->aux_data = c;
+
+       log_debug("Found group_id " FMTu64 ": alias=\"%s\"", group->group_id,
+                 (group->alias) ? group->alias : "");
+
+       return 1;
+
+bad_alias:
+       dm_free((char *) group->alias);
+bad:
+       dm_bitset_destroy(regions);
+       return 0;
+}
+
+/*
+ * Parse a histogram specification returned by the kernel in a
+ * @stats_list response.
+ */
+static int _stats_parse_histogram_spec(struct dm_stats *dms,
+                                      struct dm_stats_region *region,
+                                      const char *histogram)
+{
+       static const char _valid_chars[] = "0123456789,";
+       uint64_t scale = region->timescale, this_val = 0;
+       struct dm_pool *mem = dms->hist_mem;
+       struct dm_histogram_bin cur;
+       struct dm_histogram hist;
+       int nr_bins = 1;
+       const char *c, *v, *val_start;
+       char *p, *endptr = NULL;
+
+       /* Advance past "histogram:". */
+       histogram = strchr(histogram, ':');
+       if (!histogram) {
+               log_error("Could not parse histogram description.");
+               return 0;
+       }
+       histogram++;
+
+       /* @stats_list rows are newline terminated. */
+       if ((p = strchr(histogram, '\n')))
+               *p = '\0';
+
+       if (!dm_pool_begin_object(mem, sizeof(cur)))
+               return_0;
+
+       memset(&hist, 0, sizeof(hist));
+
+       hist.nr_bins = 0; /* fix later */
+       hist.region = region;
+       hist.dms = dms;
+
+       if (!dm_pool_grow_object(mem, &hist, sizeof(hist)))
+               goto_bad;
+
+       c = histogram;
+       do {
+               for (v = _valid_chars; *v; v++)
+                       if (*c == *v)
+                               break;
+               if (!*v) {
+                       stack;
+                       goto badchar;
+               }
+
+               if (*c == ',') {
+                       log_error("Invalid histogram description: %s",
+                                 histogram);
+                       goto bad;
+               } else {
+                       val_start = c;
+                       endptr = NULL;
+
+                       errno = 0;
+                       this_val = strtoull(val_start, &endptr, 10);
+                       if (errno || !endptr) {
+                               log_error("Could not parse histogram boundary.");
+                               goto bad;
+                       }
+
+                       c = endptr; /* Advance to units, comma, or end. */
+
+                       if (*c == ',')
+                               c++;
+                       else if (*c || (*c == ' ')) { /* Expected ',' or NULL. */
+                               stack;
+                               goto badchar;
+                       }
+
+                       if (*c == ',')
+                               c++;
+
+                       cur.upper = scale * this_val;
+                       cur.count = 0;
+
+                       if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+                               goto_bad;
+
+                       nr_bins++;
+               }
+       } while (*c && (*c != ' '));
+
+       /* final upper bound. */
+       cur.upper = UINT64_MAX;
+       if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+               goto_bad;
+
+       region->bounds = dm_pool_end_object(mem);
+
+       if (!region->bounds)
+               return_0;
+
+       region->bounds->nr_bins = nr_bins;
+
+       log_debug("Added region histogram spec with %d entries.", nr_bins);
+       return 1;
+
+badchar:
+       log_error("Invalid character in histogram: '%c' (0x%x)", *c, *c);
+bad:
+       dm_pool_abandon_object(mem);
+       return 0;
+}
+
+static int _stats_parse_list_region(struct dm_stats *dms,
+                                   struct dm_stats_region *region, char *line)
+{
+       char *p = NULL, string_data[STATS_ROW_BUF_LEN];
+       char *program_id, *aux_data, *stats_args;
+       char *empty_string = (char *) "";
+       int r;
+
+       memset(string_data, 0, sizeof(string_data));
+
+       /*
+        * Parse fixed fields, line format:
+        *
+        * <region_id>: <start_sector>+<length> <step> <string data>
+        *
+        * Maximum string data size is 4096 - 1 bytes.
+        */
+       r = sscanf(line, FMTu64 ": " FMTu64 "+" FMTu64 " " FMTu64 " %4095c",
+                  &region->region_id, &region->start, &region->len,
+                  &region->step, string_data);
+
+       if (r != 5)
+               return_0;
+
+       /* program_id is guaranteed to be first. */
+       program_id = string_data;
+
+       /*
+        * FIXME: support embedded '\ ' in string data:
+        *   s/strchr/_find_unescaped_space()/
+        */
+       if ((p = strchr(string_data, ' '))) {
+               /* terminate program_id string. */
+               *p = '\0';
+               if (!strncmp(program_id, "-", 1))
+                       program_id = empty_string;
+               aux_data = p + 1;
+               if ((p = strchr(aux_data, ' '))) {
+                       /* terminate aux_data string. */
+                       *p = '\0';
+                       stats_args = p + 1;
+               } else
+                       stats_args = empty_string;
+
+               /* no aux_data? */
+               if (!strncmp(aux_data, "-", 1))
+                       aux_data = empty_string;
+               else
+                       /* remove trailing newline */
+                       aux_data[strlen(aux_data) - 1] = '\0';
+       } else
+               aux_data = stats_args = empty_string;
+
+       if (strstr(stats_args, PRECISE_ARG))
+               region->timescale = 1;
+       else
+               region->timescale = NSEC_PER_MSEC;
+
+       if ((p = strstr(stats_args, HISTOGRAM_ARG))) {
+               if (!_stats_parse_histogram_spec(dms, region, p))
+                       return_0;
+       } else
+               region->bounds = NULL;
+
+       /* clear aggregate cache */
+       region->histogram = NULL;
+
+       region->group_id = DM_STATS_GROUP_NOT_PRESENT;
+
+       if (!(region->program_id = dm_strdup(program_id)))
+               return_0;
+       if (!(region->aux_data = dm_strdup(aux_data))) {
+               dm_free(region->program_id);
+               return_0;
+       }
+
+       region->counters = NULL;
+       return 1;
+}
+
+static int _stats_parse_list(struct dm_stats *dms, const char *resp)
+{
+       uint64_t max_region = 0, nr_regions = 0;
+       struct dm_stats_region cur, fill;
+       struct dm_stats_group cur_group;
+       struct dm_pool *mem = dms->mem, *group_mem = dms->group_mem;
+       char line[STATS_ROW_BUF_LEN];
+       FILE *list_rows;
+
+       if (!resp) {
+               log_error("Could not parse NULL @stats_list response.");
+               return 0;
+       }
+
+       _stats_regions_destroy(dms);
+       _stats_groups_destroy(dms);
+
+       /* no regions */
+       if (!strlen(resp)) {
+               dms->nr_regions = dms->max_region = 0;
+               dms->regions = NULL;
+               return 1;
+       }
+
+       /*
+        * dm_task_get_message_response() returns a 'const char *' but
+        * since fmemopen also permits "w" it expects a 'char *'.
+        */
+       if (!(list_rows = fmemopen((char *)resp, strlen(resp), "r")))
+               return_0;
+
+       /* begin region table */
+       if (!dm_pool_begin_object(mem, 1024))
+               goto_bad;
+
+       /* begin group table */
+       if (!dm_pool_begin_object(group_mem, 32))
+               goto_bad;
+
+       while(fgets(line, sizeof(line), list_rows)) {
+
+               cur_group.group_id = DM_STATS_GROUP_NOT_PRESENT;
+               cur_group.regions = NULL;
+               cur_group.alias = NULL;
+
+               if (!_stats_parse_list_region(dms, &cur, line))
+                       goto_bad;
+
+               /* handle holes in the list of region_ids */
+               if (cur.region_id > max_region) {
+                       memset(&fill, 0, sizeof(fill));
+                       memset(&cur_group, 0, sizeof(cur_group));
+                       fill.region_id = DM_STATS_REGION_NOT_PRESENT;
+                       cur_group.group_id = DM_STATS_GROUP_NOT_PRESENT;
+                       do {
+                               if (!dm_pool_grow_object(mem, &fill, sizeof(fill)))
+                                       goto_bad;
+                               if (!dm_pool_grow_object(group_mem, &cur_group,
+                                                        sizeof(cur_group)))
+                                       goto_bad;
+                       } while (max_region++ < (cur.region_id - 1));
+               }
+
+               if (cur.aux_data)
+                       if (!_parse_aux_data_group(dms, &cur, &cur_group))
+                               log_error("Failed to parse group descriptor "
+                                         "from region_id " FMTu64 " aux_data:"
+                                         "'%s'", cur.region_id, cur.aux_data);
+                               /* continue */
+
+               if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+                       goto_bad;
+
+               if (!dm_pool_grow_object(group_mem, &cur_group,
+                                        sizeof(cur_group)))
+                       goto_bad;
+
+               max_region++;
+               nr_regions++;
+       }
+
+       if (!nr_regions)
+               /* no region data read from @stats_list */
+               goto bad;
+
+       dms->nr_regions = nr_regions;
+       dms->max_region = max_region - 1;
+       dms->regions = dm_pool_end_object(mem);
+       dms->groups = dm_pool_end_object(group_mem);
+
+       dm_stats_foreach_group(dms)
+               _check_group_regions_present(dms, &dms->groups[dms->cur_group]);
+
+       _stats_update_groups(dms);
+
+       if (fclose(list_rows))
+               stack;
+
+       return 1;
+
+bad:
+       if (fclose(list_rows))
+               stack;
+       dm_pool_abandon_object(mem);
+       dm_pool_abandon_object(group_mem);
+
+       return 0;
+}
+
+int dm_stats_list(struct dm_stats *dms, const char *program_id)
+{
+       char msg[STATS_MSG_BUF_LEN];
+       struct dm_task *dmt;
+       int r;
+
+       if (!_stats_bound(dms))
+               return_0;
+
+       /* allow zero-length program_id for list */
+       if (!program_id)
+               program_id = dms->program_id;
+
+       if (!_stats_set_name_cache(dms))
+               return_0;
+
+       if (dms->regions)
+               _stats_regions_destroy(dms);
+
+       r = dm_snprintf(msg, sizeof(msg), "@stats_list %s", program_id);
+
+       if (r < 0) {
+               log_error("Failed to prepare stats message.");
+               return 0;
+       }
+
+       if (!(dmt = _stats_send_message(dms, msg)))
+               return_0;
+
+       if (!_stats_parse_list(dms, dm_task_get_message_response(dmt))) {
+               log_error("Could not parse @stats_list response.");
+               goto bad;
+       }
+
+       dm_task_destroy(dmt);
+       return 1;
+
+bad:
+       dm_task_destroy(dmt);
+       return 0;
+}
+
+/*
+ * Parse histogram data returned from a @stats_print operation.
+ */
+static int _stats_parse_histogram(struct dm_pool *mem, char *hist_str,
+                                 struct dm_histogram **histogram,
+                                 struct dm_stats_region *region)
+{
+       static const char _valid_chars[] = "0123456789:";
+       struct dm_histogram *bounds = region->bounds;
+       struct dm_histogram hist = {
+               .nr_bins = region->bounds->nr_bins
+       };
+       const char *c, *v, *val_start;
+       struct dm_histogram_bin cur;
+       uint64_t sum = 0, this_val;
+       char *endptr = NULL;
+       int bin = 0;
+
+       c = hist_str;
+
+       if (!dm_pool_begin_object(mem, sizeof(cur)))
+               return_0;
+
+       if (!dm_pool_grow_object(mem, &hist, sizeof(hist)))
+               goto_bad;
+
+       do {
+               memset(&cur, 0, sizeof(cur));
+               for (v = _valid_chars; *v; v++)
+                       if (*c == *v)
+                               break;
+               if (!*v)
+                       goto badchar;
+
+               if (*c == ',')
+                       goto badchar;
+               else {
+                       val_start = c;
+                       endptr = NULL;
+
+                       errno = 0;
+                       this_val = strtoull(val_start, &endptr, 10);
+                       if (errno || !endptr) {
+                               log_error("Could not parse histogram value.");
+                               goto bad;
+                       }
+                       c = endptr; /* Advance to colon, or end. */
+
+                       if (*c == ':')
+                               c++;
+                       else if (*c & (*c != '\n'))
+                               /* Expected ':', '\n', or NULL. */
+                               goto badchar;
+
+                       if (*c == ':')
+                               c++;
+
+                       cur.upper = bounds->bins[bin].upper;
+                       cur.count = this_val;
+                       sum += this_val;
+
+                       if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+                               goto_bad;
+
+                       bin++;
+               }
+       } while (*c && (*c != '\n'));
+
+       log_debug("Added region histogram data with %d entries.", hist.nr_bins);
+
+       *histogram = dm_pool_end_object(mem);
+       (*histogram)->sum = sum;
+
+       return 1;
+
+badchar:
+       log_error("Invalid character in histogram data: '%c' (0x%x)", *c, *c);
+bad:
+       dm_pool_abandon_object(mem);
+       return 0;
+}
+
+static int _stats_parse_region(struct dm_stats *dms, const char *resp,
+                              struct dm_stats_region *region,
+                              uint64_t timescale)
+{
+       struct dm_histogram *hist = NULL;
+       struct dm_pool *mem = dms->mem;
+       struct dm_stats_counters cur;
+       FILE *stats_rows = NULL;
+       uint64_t start = 0, len = 0;
+       char row[STATS_ROW_BUF_LEN];
+       int r;
+
+       if (!resp) {
+               log_error("Could not parse empty @stats_print response.");
+               return 0;
+       }
+
+       region->start = UINT64_MAX;
+
+       if (!dm_pool_begin_object(mem, 512))
+               goto_bad;
+
+       /*
+        * dm_task_get_message_response() returns a 'const char *' but
+        * since fmemopen also permits "w" it expects a 'char *'.
+        */
+       stats_rows = fmemopen((char *)resp, strlen(resp), "r");
+       if (!stats_rows)
+               goto_bad;
+
+       /*
+        * Output format for each step-sized area of a region:
+        *
+        * <start_sector>+<length> counters
+        *
+        * The first 11 counters have the same meaning as
+        * /sys/block/ * /stat or /proc/diskstats.
+        *
+        * Please refer to Documentation/iostats.txt for details.
+        *
+        * 1. the number of reads completed
+        * 2. the number of reads merged
+        * 3. the number of sectors read
+        * 4. the number of milliseconds spent reading
+        * 5. the number of writes completed
+        * 6. the number of writes merged
+        * 7. the number of sectors written
+        * 8. the number of milliseconds spent writing
+        * 9. the number of I/Os currently in progress
+        * 10. the number of milliseconds spent doing I/Os
+        * 11. the weighted number of milliseconds spent doing I/Os
+        *
+        * Additional counters:
+        * 12. the total time spent reading in milliseconds
+        * 13. the total time spent writing in milliseconds
+        *
+       */
+       while (fgets(row, sizeof(row), stats_rows)) {
+               r = sscanf(row, FMTu64 "+" FMTu64 /* start+len */
+                          /* reads */
+                          FMTu64 " " FMTu64 " " FMTu64 " " FMTu64 " "
+                          /* writes */
+                          FMTu64 " " FMTu64 " " FMTu64 " " FMTu64 " "
+                          /* in flight & io nsecs */
+                          FMTu64 " " FMTu64 " " FMTu64 " "
+                          /* tot read/write nsecs */
+                          FMTu64 " " FMTu64, &start, &len,
+                          &cur.reads, &cur.reads_merged, &cur.read_sectors,
+                          &cur.read_nsecs,
+                          &cur.writes, &cur.writes_merged, &cur.write_sectors,
+                          &cur.write_nsecs,
+                          &cur.io_in_progress,
+                          &cur.io_nsecs, &cur.weighted_io_nsecs,
+                          &cur.total_read_nsecs, &cur.total_write_nsecs);
+               if (r != 15) {
+                       log_error("Could not parse @stats_print row.");
+                       goto bad;
+               }
+
+               /* scale time values up if needed */
+               if (timescale != 1) {
+                       cur.read_nsecs *= timescale;
+                       cur.write_nsecs *= timescale;
+                       cur.io_nsecs *= timescale;
+                       cur.weighted_io_nsecs *= timescale;
+                       cur.total_read_nsecs *= timescale;
+                       cur.total_write_nsecs *= timescale;
+               }
+
+               if (region->bounds) {
+                       /* Find first histogram separator. */
+                       char *hist_str = strchr(row, ':');
+                       if (!hist_str) {
+                               log_error("Could not parse histogram value.");
+                               goto bad;
+                       }
+                       /* Find space preceding histogram. */
+                       while (hist_str && *(hist_str - 1) != ' ')
+                               hist_str--;
+
+                       /* Use a separate pool for histogram objects since we
+                        * are growing the area table and each area's histogram
+                        * table simultaneously.
+                        */
+                       if (!_stats_parse_histogram(dms->hist_mem, hist_str,
+                                                   &hist, region))
+                               goto_bad;
+                       hist->dms = dms;
+                       hist->region = region;
+               }
+
+               cur.histogram = hist;
+
+               if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+                       goto_bad;
+
+               if (region->start == UINT64_MAX) {
+                       region->start = start;
+                       region->step = len; /* area size is always uniform. */
+               }
+       }
+
+       if (region->start == UINT64_MAX)
+               /* no area data read from @stats_print */
+               goto bad;
+
+       region->len = (start + len) - region->start;
+       region->timescale = timescale;
+       region->counters = dm_pool_end_object(mem);
+
+       if (fclose(stats_rows))
+               stack;
+
+       return 1;
+
+bad:
+       if (stats_rows)
+               if (fclose(stats_rows))
+                       stack;
+       dm_pool_abandon_object(mem);
+
+       return 0;
+}
+
+static void _stats_walk_next_present(const struct dm_stats *dms,
+                                    uint64_t *flags,
+                                    uint64_t *cur_r, uint64_t *cur_a,
+                                    uint64_t *cur_g)
+{
+       struct dm_stats_region *cur = NULL;
+
+       /* start of walk: region loop advances *cur_r to 0. */
+       if (*cur_r != DM_STATS_REGION_NOT_PRESENT)
+               cur = &dms->regions[*cur_r];
+
+       /* within current region? */
+       if (cur && (*flags & DM_STATS_WALK_AREA)) {
+               if (++(*cur_a) < _nr_areas_region(cur))
+                       return;
+               else
+                       *cur_a = 0;
+       }
+
+       /* advance to next present, non-skipped region or end */
+       while (++(*cur_r) <= dms->max_region) {
+               cur = &dms->regions[*cur_r];
+               if (!_stats_region_present(cur))
+                       continue;
+               if ((*flags & DM_STATS_WALK_SKIP_SINGLE_AREA))
+                       if (!(*flags & DM_STATS_WALK_AREA))
+                               if (_nr_areas_region(cur) < 2)
+                                       continue;
+               /* matching region found */
+               break;
+       }
+       return;
+}
+
+static void _stats_walk_next(const struct dm_stats *dms, uint64_t *flags,
+                            uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g)
+{
+       if (!dms || !dms->regions)
+               return;
+
+       if (*flags & DM_STATS_WALK_AREA) {
+               /* advance to next area, region, or end */
+               _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g);
+               return;
+       }
+
+       if (*flags & DM_STATS_WALK_REGION) {
+               /* enable region aggregation */
+               *cur_a = DM_STATS_WALK_REGION;
+               _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g);
+               return;
+       }
+
+       if (*flags & DM_STATS_WALK_GROUP) {
+               /* enable group aggregation */
+               *cur_r = *cur_a = DM_STATS_WALK_GROUP;
+               while (!_stats_group_id_present(dms, ++(*cur_g))
+                      && (*cur_g) < dms->max_region + 1)
+                       ; /* advance to next present group or end */
+               return;
+       }
+
+       log_error("stats_walk_next called with empty walk flags");
+}
+
+static void _group_walk_start(const struct dm_stats *dms, uint64_t *flags,
+                             uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g)
+{
+       if (!(*flags & DM_STATS_WALK_GROUP))
+               return;
+
+       *cur_a = *cur_r = DM_STATS_WALK_GROUP;
+       *cur_g = 0;
+
+       /* advance to next present group or end */
+       while ((*cur_g) <= dms->max_region) {
+               if (_stats_region_is_grouped(dms, *cur_g))
+                       break;
+               (*cur_g)++;
+       }
+
+       if (*cur_g > dms->max_region)
+               /* no groups to walk */
+               *flags &= ~DM_STATS_WALK_GROUP;
+}
+
+static void _stats_walk_start(const struct dm_stats *dms, uint64_t *flags,
+                             uint64_t *cur_r, uint64_t *cur_a,
+                             uint64_t *cur_g)
+{
+       log_debug("starting stats walk with %s %s %s %s",
+                 (*flags & DM_STATS_WALK_AREA) ? "AREA" : "",
+                 (*flags & DM_STATS_WALK_REGION) ? "REGION" : "",
+                 (*flags & DM_STATS_WALK_GROUP) ? "GROUP" : "",
+                 (*flags & DM_STATS_WALK_SKIP_SINGLE_AREA) ? "SKIP" : "");
+
+       if (!dms->regions)
+               return;
+
+       if (!(*flags & (DM_STATS_WALK_AREA | DM_STATS_WALK_REGION)))
+               return _group_walk_start(dms, flags, cur_r, cur_a, cur_g);
+
+       /* initialise cursor state */
+       *cur_a = 0;
+       *cur_r = DM_STATS_REGION_NOT_PRESENT;
+       *cur_g = DM_STATS_GROUP_NOT_PRESENT;
+
+       if (!(*flags & DM_STATS_WALK_AREA))
+               *cur_a = DM_STATS_WALK_REGION;
+
+       /* advance to first present, non-skipped region */
+       _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g);
+}
+
+#define DM_STATS_WALK_MASK (DM_STATS_WALK_AREA                 \
+                           | DM_STATS_WALK_REGION              \
+                           | DM_STATS_WALK_GROUP               \
+                           | DM_STATS_WALK_SKIP_SINGLE_AREA)
+
+int dm_stats_walk_init(struct dm_stats *dms, uint64_t flags)
+{
+       if (!dms)
+               return_0;
+
+       if (flags & ~DM_STATS_WALK_MASK) {
+               log_error("Unknown value in walk flags: 0x" FMTx64,
+                         (uint64_t) (flags & ~DM_STATS_WALK_MASK));
+               return 0;
+       }
+       dms->walk_flags = flags;
+       log_debug("dm_stats_walk_init: initialised flags to " FMTx64, flags);
+       return 1;
+}
+
+void dm_stats_walk_start(struct dm_stats *dms)
+{
+       if (!dms || !dms->regions)
+               return;
+
+       dms->cur_flags = dms->walk_flags;
+
+       _stats_walk_start(dms, &dms->cur_flags,
+                         &dms->cur_region, &dms->cur_area,
+                         &dms->cur_group);
+}
+
+void dm_stats_walk_next(struct dm_stats *dms)
+{
+       _stats_walk_next(dms, &dms->cur_flags,
+                        &dms->cur_region, &dms->cur_area,
+                        &dms->cur_group);
+}
+
+void dm_stats_walk_next_region(struct dm_stats *dms)
+{
+       dms->cur_flags &= ~DM_STATS_WALK_AREA;
+       _stats_walk_next(dms, &dms->cur_flags,
+                        &dms->cur_region, &dms->cur_area,
+                        &dms->cur_group);
+}
+
+/*
+ * Return 1 if any regions remain that are present and not skipped
+ * by the current walk flags or 0 otherwise.
+ */
+static uint64_t _stats_walk_any_unskipped(const struct dm_stats *dms,
+                                         uint64_t *flags,
+                                         uint64_t *cur_r, uint64_t *cur_a)
+{
+       struct dm_stats_region *region;
+       uint64_t i;
+
+       if (*cur_r > dms->max_region)
+               return 0;
+
+       for (i = *cur_r; i <= dms->max_region; i++) {
+               region = &dms->regions[i];
+               if (!_stats_region_present(region))
+                       continue;
+               if ((*flags & DM_STATS_WALK_SKIP_SINGLE_AREA)
+                   && !(*flags & DM_STATS_WALK_AREA))
+                       if (_nr_areas_region(region) < 2)
+                               continue;
+               return 1;
+       }
+       return 0;
+}
+
+static void _stats_walk_end_areas(const struct dm_stats *dms, uint64_t *flags,
+                                 uint64_t *cur_r, uint64_t *cur_a,
+                                 uint64_t *cur_g)
+{
+       int end = !_stats_walk_any_unskipped(dms, flags, cur_r, cur_a);
+
+       if (!(*flags & DM_STATS_WALK_AREA))
+               return;
+
+       if (!end)
+               return;
+
+       *flags &= ~DM_STATS_WALK_AREA;
+       if (*flags & DM_STATS_WALK_REGION) {
+               /* start region walk */
+               *cur_a = DM_STATS_WALK_REGION;
+               *cur_r = DM_STATS_REGION_NOT_PRESENT;
+               _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g);
+               if (!_stats_walk_any_unskipped(dms, flags, cur_r, cur_a)) {
+                       /* no more regions */
+                       *flags &= ~DM_STATS_WALK_REGION;
+                       if (!(*flags & DM_STATS_WALK_GROUP))
+                               *cur_r = dms->max_region;
+               }
+       }
+
+       if (*flags & DM_STATS_WALK_REGION)
+               return;
+
+       if (*flags & DM_STATS_WALK_GROUP)
+               _group_walk_start(dms, flags, cur_r, cur_a, cur_g);
+}
+
+static int _stats_walk_end(const struct dm_stats *dms, uint64_t *flags,
+                          uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g)
+{
+       if (*flags & DM_STATS_WALK_AREA) {
+               _stats_walk_end_areas(dms, flags, cur_r, cur_a, cur_g);
+               goto out;
+       }
+
+       if (*flags & DM_STATS_WALK_REGION) {
+               if (!_stats_walk_any_unskipped(dms, flags, cur_r, cur_a)) {
+                       *flags &= ~DM_STATS_WALK_REGION;
+                       _group_walk_start(dms, flags, cur_r, cur_a, cur_g);
+               }
+               goto out;
+       }
+
+       if (*flags & DM_STATS_WALK_GROUP) {
+               if (*cur_g <= dms->max_region)
+                       goto out;
+               *flags &= ~DM_STATS_WALK_GROUP;
+       }
+out:
+       return !(*flags & ~DM_STATS_WALK_SKIP_SINGLE_AREA);
+}
+
+int dm_stats_walk_end(struct dm_stats *dms)
+{
+       if (!dms)
+               return 1;
+
+       if (_stats_walk_end(dms, &dms->cur_flags,
+                           &dms->cur_region, &dms->cur_area,
+                           &dms->cur_group)) {
+               dms->cur_flags = dms->walk_flags;
+               return 1;
+       }
+       return 0;
+}
+
+dm_stats_obj_type_t dm_stats_object_type(const struct dm_stats *dms,
+                                        uint64_t region_id,
+                                        uint64_t area_id)
+{
+       uint64_t group_id;
+
+       region_id = (region_id == DM_STATS_REGION_CURRENT)
+                    ? dms->cur_region : region_id ;
+       area_id = (area_id == DM_STATS_AREA_CURRENT)
+                  ? dms->cur_area : area_id ;
+
+       if (region_id == DM_STATS_REGION_NOT_PRESENT)
+               /* no region */
+               return DM_STATS_OBJECT_TYPE_NONE;
+
+       if (region_id & DM_STATS_WALK_GROUP) {
+               if (region_id == DM_STATS_WALK_GROUP)
+                       /* indirect group_id from cursor */
+                       group_id = dms->cur_group;
+               else
+                       /* immediate group_id encoded in region_id */
+                       group_id = region_id & ~DM_STATS_WALK_GROUP;
+               if (!_stats_group_id_present(dms, group_id))
+                       return DM_STATS_OBJECT_TYPE_NONE;
+               return DM_STATS_OBJECT_TYPE_GROUP;
+       }
+
+       if (region_id > dms->max_region)
+               /* end of table */
+               return DM_STATS_OBJECT_TYPE_NONE;
+
+       if (area_id & DM_STATS_WALK_REGION)
+               /* aggregate region */
+               return DM_STATS_OBJECT_TYPE_REGION;
+
+       /* plain region_id and area_id */
+       return DM_STATS_OBJECT_TYPE_AREA;
+}
+
+dm_stats_obj_type_t dm_stats_current_object_type(const struct dm_stats *dms)
+{
+       /* dm_stats_object_type will decode region/area */
+       return dm_stats_object_type(dms,
+                                   DM_STATS_REGION_CURRENT,
+                                   DM_STATS_AREA_CURRENT);
+}
+
+uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms,
+                                     uint64_t region_id)
+{
+       struct dm_stats_region *region = NULL;
+
+       /* groups or aggregate regions cannot be subdivided */
+       if (region_id & DM_STATS_WALK_GROUP)
+               return 1;
+
+       region = &dms->regions[region_id];
+       return _nr_areas_region(region);
+}
+
+uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms)
+{
+       /* groups or aggregate regions cannot be subdivided */
+       if (dms->cur_region & DM_STATS_WALK_GROUP)
+               return 1;
+
+       return dm_stats_get_region_nr_areas(dms, dms->cur_region);
+}
+
+uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms)
+{
+       uint64_t nr_areas = 0, flags = DM_STATS_WALK_AREA;
+       /* use a separate cursor */
+       uint64_t cur_region = 0, cur_area = 0, cur_group = 0;
+
+       /* no regions to visit? */
+       if (!dms->regions)
+               return 0;
+
+       flags = DM_STATS_WALK_AREA;
+       _stats_walk_start(dms, &flags, &cur_region, &cur_area, &cur_group);
+       do {
+               nr_areas += dm_stats_get_current_nr_areas(dms);
+               _stats_walk_next(dms, &flags,
+                                &cur_region, &cur_area,
+                                &cur_group);
+       } while (!_stats_walk_end(dms, &flags,
+                                 &cur_region, &cur_area,
+                                 &cur_group));
+       return nr_areas;
+}
+
+int dm_stats_group_present(const struct dm_stats *dms, uint64_t group_id)
+{
+       return _stats_group_id_present(dms, group_id);
+}
+
+int dm_stats_get_region_nr_histogram_bins(const struct dm_stats *dms,
+                                         uint64_t region_id)
+{
+       region_id = (region_id == DM_STATS_REGION_CURRENT)
+                    ? dms->cur_region : region_id ;
+
+       /* FIXME: support group histograms if all region bounds match */
+       if (region_id & DM_STATS_WALK_GROUP)
+               return 0;
+
+       if (!dms->regions[region_id].bounds)
+               return 0;
+
+       return dms->regions[region_id].bounds->nr_bins;
+}
+
+/*
+ * Fill buf with a list of set regions in the regions bitmap. Consecutive
+ * ranges of set region IDs are output using "M-N" range notation.
+ *
+ * The number of bytes consumed is returned or zero on error.
+ */
+static size_t _stats_group_tag_fill(const struct dm_stats *dms,
+                                   dm_bitset_t regions,
+                                   char *buf, size_t buflen)
+{
+       int i, j, r, next, last = 0;
+       size_t used = 0;
+
+       last = dm_bit_get_last(regions);
+
+       i = dm_bit_get_first(regions);
+       for(; i >= 0; i = dm_bit_get_next(regions, i)) {
+               /* find range end */
+               j = i;
+               do
+                       next = j + 1;
+               while ((j = dm_bit_get_next(regions, j)) == next);
+
+               /* set to last set bit */
+               j = next - 1;
+
+               /* handle range vs. single region */
+               if (i != j)
+                       r = dm_snprintf(buf, buflen, FMTu64 "-" FMTu64 "%s",
+                                       (uint64_t) i, (uint64_t) j,
+                                       (j == last) ? "" : ",");
+               else
+                       r = dm_snprintf(buf, buflen, FMTu64 "%s", (uint64_t) i,
+                                       (i == last) ? "" : ",");
+               if (r < 0)
+                       goto_bad;
+
+               i = next; /* skip handled bits if in range */
+
+               buf += r;
+               used += r;
+       }
+
+       return used;
+bad:
+       log_error("Could not format group list.");
+       return 0;
+}
+
+/*
+ * Calculate the space required to hold a string description of the group
+ * described by the regions bitset using comma separated list in range
+ * notation ("A,B,C,M-N").
+ */
+static size_t _stats_group_tag_len(const struct dm_stats *dms,
+                                  dm_bitset_t regions)
+{
+       int64_t i, j, next, nr_regions = 0;
+       size_t buflen = 0, id_len = 0;
+
+       /* check region ids and find last set bit */
+       i = dm_bit_get_first(regions);
+       for (; i >= 0; i = dm_bit_get_next(regions, i)) {
+               /* length of region_id or range start in characters */
+               id_len = (i) ? 1 + (size_t) log10(i) : 1;
+               buflen += id_len;
+               j = i;
+               do
+                       next = j + 1;
+               while ((j = dm_bit_get_next(regions, j)) == next);
+
+               /* set to last set bit */
+               j = next - 1;
+
+               nr_regions += j - i + 1;
+
+               /* handle range */
+               if (i != j) {
+                       /* j is always > i, which is always >= 0 */
+                       id_len = 1 + (size_t) log10(j);
+                       buflen += id_len + 1; /* range end plus "-" */
+               }
+               buflen++;
+               i = next; /* skip bits if handling range */
+       }
+       return buflen;
+}
+
+/*
+ * Build a DMS_GROUP="..." tag for the group specified by group_id,
+ * to be stored in the corresponding region's aux_data field.
+ */
+static char *_build_group_tag(struct dm_stats *dms, uint64_t group_id)
+{
+       char *aux_string, *buf;
+       dm_bitset_t regions;
+       const char *alias;
+       size_t buflen = 0;
+       int r;
+
+       regions = dms->groups[group_id].regions;
+       alias = dms->groups[group_id].alias;
+
+       buflen = _stats_group_tag_len(dms, regions);
+
+       if (!buflen)
+               return_0;
+
+       buflen += DMS_GROUP_TAG_LEN;
+       buflen += 1 + (alias ? strlen(alias) : 0); /* 'alias:' */
+
+       buf = aux_string = dm_malloc(buflen);
+       if (!buf) {
+               log_error("Could not allocate memory for aux_data string.");
+               return NULL;
+       }
+
+       if (!dm_strncpy(buf, DMS_GROUP_TAG, DMS_GROUP_TAG_LEN + 1))
+               goto_bad;
+
+       buf += DMS_GROUP_TAG_LEN;
+       buflen -= DMS_GROUP_TAG_LEN;
+
+       r = dm_snprintf(buf, buflen, "%s%c", alias ? alias : "", DMS_GROUP_SEP);
+       if (r < 0)
+               goto_bad;
+
+       buf += r;
+       buflen -= r;
+
+       r = _stats_group_tag_fill(dms, regions, buf, buflen);
+       if (!r)
+               goto_bad;
+
+       return aux_string;
+bad:
+       log_error("Could not format group aux_data.");
+       dm_free(aux_string);
+       return NULL;
+}
+
+/*
+ * Store updated aux_data for a region. The aux_data is passed to the
+ * kernel using the @stats_set_aux message. Any required group tag is
+ * generated from the current group table and included in the message.
+ */
+static int _stats_set_aux(struct dm_stats *dms,
+                         uint64_t region_id, const char *aux_data)
+{
+       const char *group_tag = NULL;
+       struct dm_task *dmt = NULL;
+       char msg[STATS_MSG_BUF_LEN];
+
+       /* group data required? */
+       if (_stats_group_id_present(dms, region_id)) {
+               group_tag = _build_group_tag(dms, region_id);
+               if (!group_tag) {
+                       log_error("Could not build group descriptor for "
+                                 "region ID " FMTu64, region_id);
+                       goto bad;
+               }
+       }
+
+       if (dm_snprintf(msg, sizeof(msg), "@stats_set_aux " FMTu64 " %s%s%s ",
+                       region_id, (group_tag) ? group_tag : "",
+                       (group_tag) ? DMS_AUX_SEP : "",
+                       (strlen(aux_data)) ? aux_data : "-") < 0) {
+               log_error("Could not prepare @stats_set_aux message");
+               goto bad;
+       }
+
+       if (!(dmt = _stats_send_message(dms, msg)))
+               goto_bad;
+
+       dm_free((char *) group_tag);
+
+       /* no response to a @stats_set_aux message */
+       dm_task_destroy(dmt);
+
+       return 1;
+bad:
+       dm_free((char *) group_tag);
+       return 0;
+}
+
+/*
+ * Maximum length of a "start+end" range string:
+ * Two 20 digit uint64_t, '+', and NULL.
+ */
+#define RANGE_LEN 42
+static int _stats_create_region(struct dm_stats *dms, uint64_t *region_id,
+                               uint64_t start, uint64_t len, int64_t step,
+                               int precise, const char *hist_arg,
+                               const char *program_id, const char *aux_data)
+{
+       char msg[STATS_MSG_BUF_LEN], range[RANGE_LEN], *endptr = NULL;
+       const char *err_fmt = "Could not prepare @stats_create %s.";
+       const char *precise_str = PRECISE_ARG;
+       const char *resp, *opt_args = NULL;
+       struct dm_task *dmt = NULL;
+       int r = 0, nr_opt = 0;
+
+       if (!_stats_bound(dms))
+               return_0;
+
+       if (!program_id || !strlen(program_id))
+               program_id = dms->program_id;
+
+       if (start || len) {
+               if (dm_snprintf(range, sizeof(range), FMTu64 "+" FMTu64,
+                               start, len) < 0) {
+                       log_error(err_fmt, "range");
+                       return 0;
+               }
+       }
+
+       if (precise < 0)
+               precise = dms->precise;
+
+       if (precise)
+               nr_opt++;
+       else
+               precise_str = "";
+
+       if (hist_arg)
+               nr_opt++;
+       else
+               hist_arg = "";
+
+       if (nr_opt) {
+               if ((dm_asprintf((char **)&opt_args, "%d %s %s%s", nr_opt,
+                                precise_str,
+                                (strlen(hist_arg)) ? HISTOGRAM_ARG : "",
+                                hist_arg)) < 0) {
+                       log_error(err_fmt, PRECISE_ARG " option.");
+                       return 0;
+               }
+       } else
+               opt_args = dm_strdup("");
+
+       if (dm_snprintf(msg, sizeof(msg), "@stats_create %s %s" FMTu64
+                       " %s %s %s", (start || len) ? range : "-",
+                       (step < 0) ? "/" : "",
+                       (uint64_t)llabs(step),
+                       opt_args, program_id, aux_data) < 0) {
+               log_error(err_fmt, "message");
+               dm_free((void *) opt_args);
+               return 0;
+       }
+
+       if (!(dmt = _stats_send_message(dms, msg)))
+               goto_out;
+
+       resp = dm_task_get_message_response(dmt);
+       if (!resp) {
+               log_error("Could not parse empty @stats_create response.");
+               goto out;
+       }
+
+       if (region_id) {
+               errno = 0;
+               *region_id = strtoull(resp, &endptr, 10);
+               if (errno || resp == endptr)
+                       goto_out;
+       }
+
+       r = 1;
+
+out:
+       if (dmt)
+               dm_task_destroy(dmt);
+       dm_free((void *) opt_args);
+
+       return r;
+}
+
+int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id,
+                          uint64_t start, uint64_t len, int64_t step,
+                          int precise, struct dm_histogram *bounds,
+                          const char *program_id, const char *user_data)
+{
+       char *hist_arg = NULL;
+       int r = 0;
+
+       /* Nanosecond counters and histograms both need precise_timestamps. */
+       if ((precise || bounds) && !_stats_check_precise_timestamps(dms))
+               return_0;
+
+       if (bounds) {
+               /* _build_histogram_arg enables precise if vals < 1ms. */
+               if (!(hist_arg = _build_histogram_arg(bounds, &precise)))
+                       goto_out;
+       }
+
+       r = _stats_create_region(dms, region_id, start, len, step,
+                                precise, hist_arg, program_id, user_data);
+       dm_free(hist_arg);
+
+out:
+       return r;
+}
+
+static void _stats_clear_group_regions(struct dm_stats *dms, uint64_t group_id)
+{
+       struct dm_stats_group *group;
+       uint64_t i;
+
+       group = &dms->groups[group_id];
+       for (i = dm_bit_get_first(group->regions);
+            i != DM_STATS_GROUP_NOT_PRESENT;
+            i = dm_bit_get_next(group->regions, i))
+               dms->regions[i].group_id = DM_STATS_GROUP_NOT_PRESENT;
+}
+
+static int _stats_remove_region_id_from_group(struct dm_stats *dms,
+                                             uint64_t region_id)
+{
+       struct dm_stats_region *region = &dms->regions[region_id];
+       uint64_t group_id = region->group_id;
+       dm_bitset_t regions = dms->groups[group_id].regions;
+
+       if (!_stats_region_is_grouped(dms, region_id))
+               return_0;
+
+       dm_bit_clear(regions, region_id);
+
+       /* removing group leader? */
+       if (region_id == group_id) {
+               _stats_clear_group_regions(dms, group_id);
+               _stats_group_destroy(&dms->groups[group_id]);
+       }
+
+       return _stats_set_aux(dms, group_id, dms->regions[group_id].aux_data);
+}
+
+static int _stats_delete_region(struct dm_stats *dms, uint64_t region_id)
+{
+       char msg[STATS_MSG_BUF_LEN];
+       struct dm_task *dmt;
+
+       if (_stats_region_is_grouped(dms, region_id))
+               if (!_stats_remove_region_id_from_group(dms, region_id)) {
+                       log_error("Could not remove region ID " FMTu64 " from "
+                                 "group ID " FMTu64,
+                                 region_id, dms->regions[region_id].group_id);
+                       return 0;
+               }
+
+       if (dm_snprintf(msg, sizeof(msg), "@stats_delete " FMTu64, region_id) < 0) {
+               log_error("Could not prepare @stats_delete message.");
+               return 0;
+       }
+
+       dmt = _stats_send_message(dms, msg);
+       if (!dmt)
+               return_0;
+       dm_task_destroy(dmt);
+
+       return 1;
+}
+
+int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id)
+{
+       int listed = 0;
+
+       if (!_stats_bound(dms))
+               return_0;
+
+       /*
+        * To correctly delete a region, that may be part of a group, a
+        * listed handle is required, since the region may need to be
+        * removed from another region's group descriptor; earlier
+        * versions of the region deletion interface do not have this
+        * requirement since there are no dependencies between regions.
+        *
+        * Listing a previously unlisted handle has numerous
+        * side-effects on other calls and operations (e.g. stats
+        * walks), especially when returning to a function that depends
+        * on the state of the region table, or statistics cursor.
+        *
+        * To avoid changing the semantics of the API, and the need for
+        * a versioned symbol, maintain a flag indicating when a listing
+        * has been carried out, and drop the region table before
+        * returning.
+        *
+        * This ensures compatibility with programs compiled against
+        * earlier versions of libdm.
+        */
+       if (!dms->regions && !(listed = dm_stats_list(dms, dms->program_id))) {
+               log_error("Could not obtain region list while deleting "
+                         "region ID " FMTu64, region_id);
+               goto bad;
+       }
+
+       if (!dm_stats_get_nr_regions(dms)) {
+               log_error("Could not delete region ID " FMTu64 ": "
+                         "no regions found", region_id);
+               goto bad;
+       }
+
+       /* includes invalid and special region_id values */
+       if (!dm_stats_region_present(dms, region_id)) {
+               log_error("Region ID " FMTu64 " does not exist", region_id);
+               goto bad;
+       }
+
+       if (!_stats_delete_region(dms, region_id))
+               goto bad;
+
+       if (!listed)
+               /* wipe region and mark as not present */
+               _stats_region_destroy(&dms->regions[region_id]);
+       else
+               /* return handle to prior state */
+               _stats_regions_destroy(dms);
+
+       return 1;
+bad:
+       if (listed)
+               _stats_regions_destroy(dms);
+
+       return 0;
+}
+
+int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id)
+{
+       char msg[STATS_MSG_BUF_LEN];
+       struct dm_task *dmt;
+
+       if (!_stats_bound(dms))
+               return_0;
+
+       if (dm_snprintf(msg, sizeof(msg), "@stats_clear " FMTu64, region_id) < 0) {
+               log_error("Could not prepare @stats_clear message.");
+               return 0;
+       }
+
+       dmt = _stats_send_message(dms, msg);
+
+       if (!dmt)
+               return_0;
+
+       dm_task_destroy(dmt);
+
+       return 1;
+}
+
+static struct dm_task *_stats_print_region(struct dm_stats *dms,
+                                   uint64_t region_id, unsigned start_line,
+                                   unsigned num_lines, unsigned clear)
+{
+       /* @stats_print[_clear] <region_id> [<start_line> <num_lines>] */
+       const char *err_fmt = "Could not prepare @stats_print %s.";
+       char msg[STATS_MSG_BUF_LEN], lines[RANGE_LEN];
+       struct dm_task *dmt = NULL;
+
+       if (start_line || num_lines)
+               if (dm_snprintf(lines, sizeof(lines),
+                               "%u %u", start_line, num_lines) < 0) {
+                       log_error(err_fmt, "row specification");
+                       return NULL;
+               }
+
+       if (dm_snprintf(msg, sizeof(msg), "@stats_print%s " FMTu64 " %s",
+                       (clear) ? "_clear" : "",
+                       region_id, (start_line || num_lines) ? lines : "") < 0) {
+               log_error(err_fmt, "message");
+               return NULL;
+       }
+
+       if (!(dmt = _stats_send_message(dms, msg)))
+               return_NULL;
+
+       return dmt;
+}
+
+char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id,
+                           unsigned start_line, unsigned num_lines,
+                           unsigned clear)
+{
+       char *resp = NULL;
+       struct dm_task *dmt = NULL;
+       const char *response;
+
+       if (!_stats_bound(dms))
+               return_0;
+
+       /*
+        * FIXME: 'print' can be emulated for groups or aggregate regions
+        * by populating the handle and emitting aggregate counter data
+        * in the kernel print format.
+        */
+       if (region_id == DM_STATS_WALK_GROUP)
+               return_0;
+
+       dmt = _stats_print_region(dms, region_id,
+                                 start_line, num_lines, clear);
+
+       if (!dmt)
+               return_0;
+
+       if (!(response = dm_task_get_message_response(dmt)))
+               goto_out;
+
+       if (!(resp = dm_pool_strdup(dms->mem, response)))
+               log_error("Could not allocate memory for response buffer.");
+out:
+       dm_task_destroy(dmt);
+
+       return resp;
+}
+
+void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer)
+{
+       dm_pool_free(dms->mem, buffer);
+}
+
+uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms)
+{
+       if (!dms)
+               return_0;
+
+       if (!dms->regions)
+               return 0;
+
+       return dms->nr_regions;
+}
+
+uint64_t dm_stats_get_nr_groups(const struct dm_stats *dms)
+{
+       uint64_t group_id, nr_groups = 0;
+
+       if (!dms)
+               return_0;
+
+       /* no regions or groups? */
+       if (!dms->regions || !dms->groups)
+               return 0;
+
+       for (group_id = 0; group_id <= dms->max_region; group_id++)
+               if (dms->groups[group_id].group_id
+                   != DM_STATS_GROUP_NOT_PRESENT)
+                       nr_groups++;
+
+       return nr_groups;
+}
+
+/**
+ * Test whether region_id is present in this set of stats data.
+ */
+int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id)
+{
+       if (!dms->regions)
+               return_0;
+
+       if (region_id > dms->max_region)
+               return 0;
+
+       return _stats_region_present(&dms->regions[region_id]);
+}
+
+static int _dm_stats_populate_region(struct dm_stats *dms, uint64_t region_id,
+                                    const char *resp)
+{
+       struct dm_stats_region *region = &dms->regions[region_id];
+
+       if (!_stats_bound(dms))
+               return_0;
+
+       if (!region) {
+               log_error("Cannot populate empty handle before dm_stats_list().");
+               return 0;
+       }
+       if (!_stats_parse_region(dms, resp, region, region->timescale)) {
+               log_error("Could not parse @stats_print message response.");
+               return 0;
+       }
+       region->region_id = region_id;
+       return 1;
+}
+
+int dm_stats_populate(struct dm_stats *dms, const char *program_id,
+                     uint64_t region_id)
+{
+       int all_regions = (region_id == DM_STATS_REGIONS_ALL);
+       struct dm_task *dmt = NULL; /* @stats_print task */
+       uint64_t saved_flags; /* saved walk flags */
+       const char *resp;
+
+       /*
+        * We are about do destroy and re-create the region table, so it
+        * is safe to use the cursor embedded in the stats handle: just
+        * save a copy of the current walk_flags to restore later.
+        */
+       saved_flags = dms->walk_flags;
+
+       if (!_stats_bound(dms))
+               return_0;
+
+       if ((!all_regions) && (region_id & DM_STATS_WALK_GROUP)) {
+               log_error("Invalid region_id for dm_stats_populate: "
+                         "DM_STATS_WALK_GROUP");
+               return 0;
+       }
+
+       if (!dms->nr_regions) {
+               log_error("No regions registered.");
+               return 0;
+       }
+
+       /* allow zero-length program_id for populate */
+       if (!program_id)
+               program_id = dms->program_id;
+
+       if (all_regions && !dm_stats_list(dms, program_id)) {
+               log_error("Could not parse @stats_list response.");
+               goto bad;
+       } else if (!_stats_set_name_cache(dms)) {
+               goto_bad;
+       }
+
+       dms->walk_flags = DM_STATS_WALK_REGION;
+       dm_stats_walk_start(dms);
+       do {
+               region_id = (all_regions)
+                            ? dm_stats_get_current_region(dms) : region_id;
+
+               /* obtain all lines and clear counter values */
+               if (!(dmt = _stats_print_region(dms, region_id, 0, 0, 1)))
+                       goto_bad;
+
+               resp = dm_task_get_message_response(dmt);
+               if (!_dm_stats_populate_region(dms, region_id, resp)) {
+                       dm_task_destroy(dmt);
+                       goto_bad;
+               }
+
+               dm_task_destroy(dmt);
+               dm_stats_walk_next(dms);
+
+       } while (all_regions && !dm_stats_walk_end(dms));
+
+       dms->walk_flags = saved_flags;
+       return 1;
+
+bad:
+       dms->walk_flags = saved_flags;
+       _stats_regions_destroy(dms);
+       dms->regions = NULL;
+       return 0;
+}
+
+/**
+ * destroy a dm_stats object and all associated regions and counter sets.
+ */
+void dm_stats_destroy(struct dm_stats *dms)
+{
+       if (!dms)
+               return;
+
+       _stats_regions_destroy(dms);
+       _stats_groups_destroy(dms);
+       _stats_clear_binding(dms);
+       dm_pool_destroy(dms->mem);
+       dm_pool_destroy(dms->hist_mem);
+       dm_pool_destroy(dms->group_mem);
+       dm_free(dms->program_id);
+       dm_free((char *) dms->name);
+       dm_free(dms);
+}
+
+/*
+ * Walk each area that is a member of region_id rid.
+ * i is a variable of type int that holds the current area_id.
+ */
+#define _foreach_region_area(dms, rid, i)                              \
+for ((i) = 0; (i) < _nr_areas_region(&dms->regions[(rid)]); (i)++)     \
+
+/*
+ * Walk each region that is a member of group_id gid.
+ * i is a variable of type int that holds the current region_id.
+ */
+#define _foreach_group_region(dms, gid, i)                     \
+for ((i) = dm_bit_get_first((dms)->groups[(gid)].regions);     \
+     (i) != DM_STATS_GROUP_NOT_PRESENT;                                \
+     (i) = dm_bit_get_next((dms)->groups[(gid)].regions, (i))) \
+
+/*
+ * Walk each region that is a member of group_id gid visiting each
+ * area within the region.
+ * i is a variable of type int that holds the current region_id.
+ * j is a variable of type int variable that holds the current area_id.
+ */
+#define _foreach_group_area(dms, gid, i, j)                    \
+_foreach_group_region(dms, gid, i)                             \
+       _foreach_region_area(dms, i, j)
+
+static uint64_t _stats_get_counter(const struct dm_stats *dms,
+                                  const struct dm_stats_counters *area,
+                                  dm_stats_counter_t counter)
+{
+       switch(counter) {
+       case DM_STATS_READS_COUNT:
+               return area->reads;
+       case DM_STATS_READS_MERGED_COUNT:
+               return area->reads_merged;
+       case DM_STATS_READ_SECTORS_COUNT:
+               return area->read_sectors;
+       case DM_STATS_READ_NSECS:
+               return area->read_nsecs;
+       case DM_STATS_WRITES_COUNT:
+               return area->writes;
+       case DM_STATS_WRITES_MERGED_COUNT:
+               return area->writes_merged;
+       case DM_STATS_WRITE_SECTORS_COUNT:
+               return area->write_sectors;
+       case DM_STATS_WRITE_NSECS:
+               return area->write_nsecs;
+       case DM_STATS_IO_IN_PROGRESS_COUNT:
+               return area->io_in_progress;
+       case DM_STATS_IO_NSECS:
+               return area->io_nsecs;
+       case DM_STATS_WEIGHTED_IO_NSECS:
+               return area->weighted_io_nsecs;
+       case DM_STATS_TOTAL_READ_NSECS:
+               return area->total_read_nsecs;
+       case DM_STATS_TOTAL_WRITE_NSECS:
+               return area->total_write_nsecs;
+       case DM_STATS_NR_COUNTERS:
+       default:
+               log_error("Attempt to read invalid counter: %d", counter);
+       }
+       return 0;
+}
+
+uint64_t dm_stats_get_counter(const struct dm_stats *dms,
+                             dm_stats_counter_t counter,
+                             uint64_t region_id, uint64_t area_id)
+{
+       uint64_t i, j, sum = 0; /* aggregation */
+       int sum_regions = 0;
+       struct dm_stats_region *region;
+       struct dm_stats_counters *area;
+
+       region_id = (region_id == DM_STATS_REGION_CURRENT)
+                    ? dms->cur_region : region_id ;
+       area_id = (area_id == DM_STATS_REGION_CURRENT)
+                  ? dms->cur_area : area_id ;
+
+       sum_regions = !!(region_id & DM_STATS_WALK_GROUP);
+
+       if (region_id == DM_STATS_WALK_GROUP)
+               /* group walk using the cursor */
+               region_id = dms->cur_group;
+       else if (region_id & DM_STATS_WALK_GROUP)
+               /* group walk using immediate group_id */
+               region_id &= ~DM_STATS_WALK_GROUP;
+       region = &dms->regions[region_id];
+
+       /*
+        * All statistics aggregation takes place here: aggregate metrics
+        * are calculated as normal using the aggregated counter values
+        * returned for the region or group specified.
+        */
+
+       if (_stats_region_is_grouped(dms, region_id) && (sum_regions)) {
+               /* group */
+               if (area_id & DM_STATS_WALK_GROUP)
+                       _foreach_group_area(dms, region->group_id, i, j) {
+                               area = &dms->regions[i].counters[j];
+                               sum += _stats_get_counter(dms, area, counter);
+                       }
+               else
+                       _foreach_group_region(dms, region->group_id, i) {
+                               area = &dms->regions[i].counters[area_id];
+                               sum += _stats_get_counter(dms, area, counter);
+                       }
+       } else if (area_id == DM_STATS_WALK_REGION) {
+               /* aggregate region */
+               _foreach_region_area(dms, region_id, j) {
+                       area = &dms->regions[region_id].counters[j];
+                       sum += _stats_get_counter(dms, area, counter);
+               }
+       } else {
+               /* plain region / area */
+               area = &region->counters[area_id];
+               sum = _stats_get_counter(dms, area, counter);
+       }
+
+       return sum;
+}
+
+/*
+ * Methods for accessing named counter fields. All methods share the
+ * following naming scheme and prototype:
+ *
+ * uint64_t dm_stats_get_COUNTER(const struct dm_stats *, uint64_t, uint64_t)
+ *
+ * Where the two integer arguments are the region_id and area_id
+ * respectively.
+ *
+ * name is the name of the counter (lower case)
+ * counter is the part of the enum name following DM_STATS_ (upper case)
+ */
+#define MK_STATS_GET_COUNTER_FN(name, counter)                         \
+uint64_t dm_stats_get_ ## name(const struct dm_stats *dms,             \
+                              uint64_t region_id, uint64_t area_id)    \
+{                                                                      \
+       return dm_stats_get_counter(dms, DM_STATS_ ## counter,          \
+                                   region_id, area_id);                \
+}
+
+MK_STATS_GET_COUNTER_FN(reads, READS_COUNT)
+MK_STATS_GET_COUNTER_FN(reads_merged, READS_MERGED_COUNT)
+MK_STATS_GET_COUNTER_FN(read_sectors, READ_SECTORS_COUNT)
+MK_STATS_GET_COUNTER_FN(read_nsecs, READ_NSECS)
+MK_STATS_GET_COUNTER_FN(writes, WRITES_COUNT)
+MK_STATS_GET_COUNTER_FN(writes_merged, WRITES_MERGED_COUNT)
+MK_STATS_GET_COUNTER_FN(write_sectors, WRITE_SECTORS_COUNT)
+MK_STATS_GET_COUNTER_FN(write_nsecs, WRITE_NSECS)
+MK_STATS_GET_COUNTER_FN(io_in_progress, IO_IN_PROGRESS_COUNT)
+MK_STATS_GET_COUNTER_FN(io_nsecs, IO_NSECS)
+MK_STATS_GET_COUNTER_FN(weighted_io_nsecs, WEIGHTED_IO_NSECS)
+MK_STATS_GET_COUNTER_FN(total_read_nsecs, TOTAL_READ_NSECS)
+MK_STATS_GET_COUNTER_FN(total_write_nsecs, TOTAL_WRITE_NSECS)
+#undef MK_STATS_GET_COUNTER_FN
+
+/*
+ * Floating point stats metric functions
+ *
+ * Called from dm_stats_get_metric() to calculate the value of
+ * the requested metric.
+ *
+ * int _metric_name(const struct dm_stats *dms,
+ *                 struct dm_stats_counters *c,
+ *                 double *value);
+ *
+ * Calculate a metric value from the counter data for the given
+ * identifiers and store it in the memory pointed to by value,
+ * applying group or region aggregation if enabled.
+ *
+ * Return one on success or zero on failure.
+ *
+ * To add a new metric:
+ *
+ * o Add a new name to the dm_stats_metric_t enum.
+ * o Create a _metric_fn() to calculate the new metric.
+ * o Add _metric_fn to the _metrics function table
+ *   (entries in enum order).
+ * o Do not add a new named public function for the metric -
+ *   users of new metrics are encouraged to convert to the enum
+ *   based metric interface.
+ *
+ */
+
+static int _rd_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+                             uint64_t region_id, uint64_t area_id)
+{
+       double mrgs;
+       mrgs = (double) dm_stats_get_counter(dms, DM_STATS_READS_MERGED_COUNT,
+                                            region_id, area_id);
+
+       *rrqm = mrgs / (double) dms->interval_ns;
+
+       return 1;
+}
+
+static int _wr_merges_per_sec(const struct dm_stats *dms, double *wrqm,
+                             uint64_t region_id, uint64_t area_id)
+{
+       double mrgs;
+       mrgs = (double) dm_stats_get_counter(dms, DM_STATS_WRITES_MERGED_COUNT,
+                                            region_id, area_id);
+
+       *wrqm = mrgs / (double) dms->interval_ns;
+
+       return 1;
+}
+
+static int _reads_per_sec(const struct dm_stats *dms, double *rd_s,
+                         uint64_t region_id, uint64_t area_id)
+{
+       double reads;
+       reads = (double) dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+                                             region_id, area_id);
+
+       *rd_s = (reads * NSEC_PER_SEC) / (double) dms->interval_ns;
+
+       return 1;
+}
+
+static int _writes_per_sec(const struct dm_stats *dms, double *wr_s,
+                          uint64_t region_id, uint64_t area_id)
+{
+       double writes;
+       writes = (double) dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+                                              region_id, area_id);
+
+       *wr_s = (writes * NSEC_PER_SEC) / (double) dms->interval_ns;
+
+       return 1;
+}
+
+static int _read_sectors_per_sec(const struct dm_stats *dms, double *rsec_s,
+                                uint64_t region_id, uint64_t area_id)
+{
+       double sect;
+       sect = (double) dm_stats_get_counter(dms, DM_STATS_READ_SECTORS_COUNT,
+                                            region_id, area_id);
+
+       *rsec_s = (sect * (double) NSEC_PER_SEC) / (double) dms->interval_ns;
+
+       return 1;
+}
+
+static int _write_sectors_per_sec(const struct dm_stats *dms, double *wsec_s,
+                                 uint64_t region_id, uint64_t area_id)
+{
+       double sect;
+       sect = (double) dm_stats_get_counter(dms, DM_STATS_WRITE_SECTORS_COUNT,
+                                            region_id, area_id);
+
+       *wsec_s = (sect * (double) NSEC_PER_SEC) / (double) dms->interval_ns;
+
+       return 1;
+}
+
+static int _average_request_size(const struct dm_stats *dms, double *arqsz,
+                                uint64_t region_id, uint64_t area_id)
+{
+       double ios, sectors;
+
+       ios = (double) (dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+                                            region_id, area_id)
+                       + dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+                                              region_id, area_id));
+       sectors = (double) (dm_stats_get_counter(dms, DM_STATS_READ_SECTORS_COUNT,
+                                                region_id, area_id)
+                           + dm_stats_get_counter(dms, DM_STATS_WRITE_SECTORS_COUNT,
+                                                  region_id, area_id));
+
+       if (ios > 0.0)
+               *arqsz = sectors / ios;
+       else
+               *arqsz = 0.0;
+
+       return 1;
+}
+
+static int _average_queue_size(const struct dm_stats *dms, double *qusz,
+                              uint64_t region_id, uint64_t area_id)
+{
+       double io_ticks;
+       io_ticks = (double) dm_stats_get_counter(dms, DM_STATS_WEIGHTED_IO_NSECS,
+                                                region_id, area_id);
+
+       if (io_ticks > 0.0)
+               *qusz = io_ticks / (double) dms->interval_ns;
+       else
+               *qusz = 0.0;
+
+       return 1;
+}
+
+static int _average_wait_time(const struct dm_stats *dms, double *await,
+                             uint64_t region_id, uint64_t area_id)
+{
+       uint64_t io_ticks, nr_ios;
+
+       io_ticks = dm_stats_get_counter(dms, DM_STATS_READ_NSECS,
+                                       region_id, area_id);
+       io_ticks += dm_stats_get_counter(dms, DM_STATS_WRITE_NSECS,
+                                        region_id, area_id);
+
+       nr_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+                                     region_id, area_id);
+       nr_ios += dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+                                      region_id, area_id);
+
+       if (nr_ios > 0)
+               *await = (double) io_ticks / (double) nr_ios;
+       else
+               *await = 0.0;
+
+       return 1;
+}
+
+static int _average_rd_wait_time(const struct dm_stats *dms, double *await,
+                                uint64_t region_id, uint64_t area_id)
+{
+       uint64_t rd_io_ticks, nr_rd_ios;
+
+       rd_io_ticks = dm_stats_get_counter(dms, DM_STATS_READ_NSECS,
+                                          region_id, area_id);
+       nr_rd_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+                                        region_id, area_id);
+
+       /*
+        * If rd_io_ticks is > 0 this should imply that nr_rd_ios is
+        * also > 0 (unless a kernel bug exists). Test for both here
+        * before using the IO count as a divisor (Coverity).
+        */
+       if (rd_io_ticks > 0 && nr_rd_ios > 0)
+               *await = (double) rd_io_ticks / (double) nr_rd_ios;
+       else
+               *await = 0.0;
+
+       return 1;
+}
+
+static int _average_wr_wait_time(const struct dm_stats *dms, double *await,
+                                uint64_t region_id, uint64_t area_id)
+{
+       uint64_t wr_io_ticks, nr_wr_ios;
+
+       wr_io_ticks = dm_stats_get_counter(dms, DM_STATS_WRITE_NSECS,
+                                          region_id, area_id);
+       nr_wr_ios = dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+                                        region_id, area_id);
+
+       /*
+        * If wr_io_ticks is > 0 this should imply that nr_wr_ios is
+        * also > 0 (unless a kernel bug exists). Test for both here
+        * before using the IO count as a divisor (Coverity).
+        */
+       if (wr_io_ticks > 0 && nr_wr_ios > 0)
+               *await = (double) wr_io_ticks / (double) nr_wr_ios;
+       else
+               *await = 0.0;
+
+       return 1;
+}
+
+static int _throughput(const struct dm_stats *dms, double *tput,
+                      uint64_t region_id, uint64_t area_id)
+{
+       uint64_t nr_ios;
+
+       nr_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+                                     region_id, area_id);
+       nr_ios += dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+                                      region_id, area_id);
+
+       *tput = ((double) NSEC_PER_SEC * (double) nr_ios)
+               / (double) (dms->interval_ns);
+
+       return 1;
+}
+
+static int _utilization(const struct dm_stats *dms, double *util,
+                       uint64_t region_id, uint64_t area_id)
+{
+       uint64_t io_nsecs, interval_ns = dms->interval_ns;
+
+       /**
+        * If io_nsec > interval_ns there is something wrong with the clock
+        * for the last interval; do not allow a value > 100% utilization
+        * to be passed to a dm_make_percent() call. We expect to see these
+        * at startup if counters have not been cleared before the first read.
+        *
+        * A zero interval_ns is also an error since metrics cannot be
+        * calculated without a defined interval - return zero and emit a
+        * backtrace in this case.
+        */
+       io_nsecs = dm_stats_get_counter(dms, DM_STATS_IO_NSECS,
+                                       region_id, area_id);
+
+       if (!interval_ns) {
+               *util = 0.0;
+               return_0;
+       }
+
+       io_nsecs = ((io_nsecs < interval_ns) ? io_nsecs : interval_ns);
+
+       *util = (double) io_nsecs / (double) interval_ns;
+
+       return 1;
+}
+
+static int _service_time(const struct dm_stats *dms, double *svctm,
+                        uint64_t region_id, uint64_t area_id)
+{
+       double tput, util;
+
+       if (!_throughput(dms, &tput, region_id, area_id))
+               return 0;
+
+       if (!_utilization(dms, &util, region_id, area_id))
+               return 0;
+
+       util *= 100;
+
+       /* avoid NAN with zero counter values */
+       if ( (uint64_t) tput == 0 || (uint64_t) util == 0) {
+               *svctm = 0.0;
+               return 1;
+       }
+
+       *svctm = ((double) NSEC_PER_SEC * dm_percent_to_float(util))
+                 / (100.0 * tput);
+
+       return 1;
+}
+
+/*
+ * Table in enum order:
+ *      DM_STATS_RD_MERGES_PER_SEC,
+ *      DM_STATS_WR_MERGES_PER_SEC,
+ *      DM_STATS_READS_PER_SEC,
+ *      DM_STATS_WRITES_PER_SEC,
+ *      DM_STATS_READ_SECTORS_PER_SEC,
+ *      DM_STATS_WRITE_SECTORS_PER_SEC,
+ *      DM_STATS_AVERAGE_REQUEST_SIZE,
+ *      DM_STATS_AVERAGE_QUEUE_SIZE,
+ *      DM_STATS_AVERAGE_WAIT_TIME,
+ *      DM_STATS_AVERAGE_RD_WAIT_TIME,
+ *      DM_STATS_AVERAGE_WR_WAIT_TIME
+ *      DM_STATS_SERVICE_TIME,
+ *      DM_STATS_THROUGHPUT,
+ *      DM_STATS_UTILIZATION
+ *
+*/
+
+typedef int (*_metric_fn_t)(const struct dm_stats *, double *,
+                           uint64_t, uint64_t);
+
+_metric_fn_t _metrics[DM_STATS_NR_METRICS] = {
+       _rd_merges_per_sec,
+       _wr_merges_per_sec,
+       _reads_per_sec,
+       _writes_per_sec,
+       _read_sectors_per_sec,
+       _write_sectors_per_sec,
+       _average_request_size,
+       _average_queue_size,
+       _average_wait_time,
+       _average_rd_wait_time,
+       _average_wr_wait_time,
+       _service_time,
+       _throughput,
+       _utilization
+};
+
+int dm_stats_get_metric(const struct dm_stats *dms, int metric,
+                       uint64_t region_id, uint64_t area_id, double *value)
+{
+       if (!dms->interval_ns)
+               return_0;
+
+       /*
+        * Decode DM_STATS_{REGION,AREA}_CURRENT here; counters will then
+        * be returned for the actual current region and area.
+        *
+        * DM_STATS_WALK_GROUP is passed through to the counter methods -
+        * aggregates for the group are returned and used to calculate
+        * the metric for the group totals.
+        */
+       region_id = (region_id == DM_STATS_REGION_CURRENT)
+                    ? dms->cur_region : region_id ;
+       area_id = (area_id == DM_STATS_REGION_CURRENT)
+                  ? dms->cur_area : area_id ;
+
+       if (metric < 0 || metric >= DM_STATS_NR_METRICS) {
+               log_error("Attempt to read invalid metric: %d", metric);
+               return 0;
+       }
+
+       return _metrics[metric](dms, value, region_id, area_id);
+}
+
+/**
+ * Methods for accessing stats metrics. All methods share the
+ * following naming scheme and prototype:
+ *
+ * uint64_t dm_stats_get_metric(struct dm_stats *,
+ *                             int, int,
+ *                             uint64_t, uint64_t,
+ *                             double *v)
+ *
+ * Where the two integer arguments are the region_id and area_id
+ * respectively.
+ *
+ * name is the name of the metric (lower case)
+ * metric is the part of the enum name following DM_STATS_ (upper case)
+ */
+#define MK_STATS_GET_METRIC_FN(name, metric, meta)                     \
+int dm_stats_get_ ## name(const struct dm_stats *dms, double *meta,    \
+                         uint64_t region_id, uint64_t area_id)         \
+{                                                                      \
+       return dm_stats_get_metric(dms, DM_STATS_ ## metric,            \
+                                  region_id, area_id, meta);           \
+}
+
+MK_STATS_GET_METRIC_FN(rd_merges_per_sec, RD_MERGES_PER_SEC, rrqm)
+MK_STATS_GET_METRIC_FN(wr_merges_per_sec, WR_MERGES_PER_SEC, wrqm)
+MK_STATS_GET_METRIC_FN(reads_per_sec, READS_PER_SEC, rd_s)
+MK_STATS_GET_METRIC_FN(writes_per_sec, WRITES_PER_SEC, wr_s)
+MK_STATS_GET_METRIC_FN(read_sectors_per_sec, READ_SECTORS_PER_SEC, rsec_s)
+MK_STATS_GET_METRIC_FN(write_sectors_per_sec, WRITE_SECTORS_PER_SEC, wsec_s)
+MK_STATS_GET_METRIC_FN(average_request_size, AVERAGE_REQUEST_SIZE, arqsz)
+MK_STATS_GET_METRIC_FN(average_queue_size, AVERAGE_QUEUE_SIZE, qusz)
+MK_STATS_GET_METRIC_FN(average_wait_time, AVERAGE_WAIT_TIME, await)
+MK_STATS_GET_METRIC_FN(average_rd_wait_time, AVERAGE_RD_WAIT_TIME, await)
+MK_STATS_GET_METRIC_FN(average_wr_wait_time, AVERAGE_WR_WAIT_TIME, await)
+MK_STATS_GET_METRIC_FN(service_time, SERVICE_TIME, svctm)
+MK_STATS_GET_METRIC_FN(throughput, THROUGHPUT, tput)
+
+/*
+ * Utilization is an exception since it used the dm_percent_t type in the
+ * original named function based interface: preserve this behaviour for
+ * backwards compatibility with existing users.
+ *
+ * The same metric may be accessed as a double via the enum based metric
+ * interface.
+ */
+int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util,
+                            uint64_t region_id, uint64_t area_id)
+{
+       double _util;
+
+       if (!dm_stats_get_metric(dms, DM_STATS_UTILIZATION,
+                                region_id, area_id, &_util))
+               return_0;
+       /* scale up utilization value in the range [0.00..1.00] */
+       *util = dm_make_percent(DM_PERCENT_1 * _util, DM_PERCENT_1);
+       return 1;
+}
+
+void dm_stats_set_sampling_interval_ms(struct dm_stats *dms, uint64_t interval_ms)
+{
+       /* All times use nsecs internally. */
+       dms->interval_ns = interval_ms * NSEC_PER_MSEC;
+}
+
+void dm_stats_set_sampling_interval_ns(struct dm_stats *dms, uint64_t interval_ns)
+{
+       dms->interval_ns = interval_ns;
+}
+
+uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms)
+{
+       /* All times use nsecs internally. */
+       return (dms->interval_ns / NSEC_PER_MSEC);
+}
+
+uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms)
+{
+       /* All times use nsecs internally. */
+       return (dms->interval_ns);
+}
+
+int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty,
+                           const char *program_id)
+{
+       if (!allow_empty && (!program_id || !strlen(program_id))) {
+               log_error("Empty program_id not permitted without "
+                         "allow_empty=1");
+               return 0;
+       }
+
+       if (!program_id)
+               program_id = "";
+
+       dm_free(dms->program_id);
+
+       if (!(dms->program_id = dm_strdup(program_id)))
+               return_0;
+
+       return 1;
+}
+
+uint64_t dm_stats_get_current_region(const struct dm_stats *dms)
+{
+       return dms->cur_region;
+}
+
+uint64_t dm_stats_get_current_area(const struct dm_stats *dms)
+{
+       return dms->cur_area & ~DM_STATS_WALK_ALL;
+}
+
+int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start,
+                             uint64_t region_id)
+{
+       if (!dms || !dms->regions)
+               return_0;
+
+       /* start is unchanged when aggregating areas */
+       if (region_id & DM_STATS_WALK_REGION)
+               region_id &= ~DM_STATS_WALK_REGION;
+
+       /* use start of first region as group start */
+       if (region_id & DM_STATS_WALK_GROUP) {
+               if (region_id == DM_STATS_WALK_GROUP)
+                       region_id = dms->cur_group;
+               else
+                       region_id &= ~DM_STATS_WALK_GROUP;
+       }
+
+       *start = dms->regions[region_id].start;
+       return 1;
+}
+
+int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len,
+                           uint64_t region_id)
+{
+       uint64_t i;
+       if (!dms || !dms->regions)
+               return_0;
+
+       *len = 0;
+
+       /* length is unchanged when aggregating areas */
+       if (region_id & DM_STATS_WALK_REGION)
+               region_id &= ~DM_STATS_WALK_REGION;
+
+       if (region_id & DM_STATS_WALK_GROUP) {
+               /* decode region / group ID */
+               if (region_id == DM_STATS_WALK_GROUP)
+                       region_id = dms->cur_group;
+               else
+                       region_id &= ~DM_STATS_WALK_GROUP;
+
+               /* use sum of region sizes as group size */
+               if (_stats_region_is_grouped(dms, region_id))
+                       _foreach_group_region(dms, dms->cur_group, i)
+                               *len += dms->regions[i].len;
+               else {
+                       log_error("Group ID " FMTu64 " does not exist",
+                                 region_id);
+                       return 0;
+               }
+       } else
+               *len = dms->regions[region_id].len;
+
+       return 1;
+}
+
+int dm_stats_get_region_area_len(const struct dm_stats *dms, uint64_t *len,
+                                uint64_t region_id)
+{
+       if (!dms || !dms->regions)
+               return_0;
+
+       /* groups are not subdivided - area size equals group size */
+       if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION))
+               /* get_region_len will decode region_id */
+               return dm_stats_get_region_len(dms, len, region_id);
+
+       *len = dms->regions[region_id].step;
+       return 1;
+}
+
+int dm_stats_get_current_region_start(const struct dm_stats *dms,
+                                     uint64_t *start)
+{
+       return dm_stats_get_region_start(dms, start, dms->cur_region);
+}
+
+int dm_stats_get_current_region_len(const struct dm_stats *dms,
+                                   uint64_t *len)
+{
+       return dm_stats_get_region_len(dms, len, dms->cur_region);
+}
+
+int dm_stats_get_current_region_area_len(const struct dm_stats *dms,
+                                        uint64_t *step)
+{
+       return dm_stats_get_region_area_len(dms, step, dms->cur_region);
+}
+
+int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start,
+                           uint64_t region_id, uint64_t area_id)
+{
+       struct dm_stats_region *region;
+       if (!dms || !dms->regions)
+               return_0;
+
+       /* group or region area start equals region start */
+       if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION))
+               return dm_stats_get_region_start(dms, start, region_id);
+
+       region = &dms->regions[region_id];
+       *start = region->start + region->step * area_id;
+       return 1;
+}
+
+int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset,
+                            uint64_t region_id, uint64_t area_id)
+{
+       if (!dms || !dms->regions)
+               return_0;
+
+       /* no areas for groups or aggregate regions */
+       if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION))
+               *offset = 0;
+       else
+               *offset = dms->regions[region_id].step * area_id;
+
+       return 1;
+}
+
+int dm_stats_get_current_area_start(const struct dm_stats *dms,
+                                   uint64_t *start)
+{
+       return dm_stats_get_area_start(dms, start,
+                                      dms->cur_region, dms->cur_area);
+}
+
+int dm_stats_get_current_area_offset(const struct dm_stats *dms,
+                                         uint64_t *offset)
+{
+       return dm_stats_get_area_offset(dms, offset,
+                                      dms->cur_region, dms->cur_area);
+}
+
+int dm_stats_get_current_area_len(const struct dm_stats *dms,
+                                 uint64_t *len)
+{
+       return dm_stats_get_region_area_len(dms, len, dms->cur_region);
+}
+
+const char *dm_stats_get_region_program_id(const struct dm_stats *dms,
+                                          uint64_t region_id)
+{
+       const char *program_id = NULL;
+
+       if (region_id & DM_STATS_WALK_GROUP)
+               return dms->program_id;
+
+       if (region_id & DM_STATS_WALK_REGION)
+               region_id &= ~DM_STATS_WALK_REGION;
+
+       program_id = dms->regions[region_id].program_id;
+       return (program_id) ? program_id : "";
+}
+
+const char *dm_stats_get_region_aux_data(const struct dm_stats *dms,
+                                        uint64_t region_id)
+{
+       const char *aux_data = NULL;
+
+       if (region_id & DM_STATS_WALK_GROUP)
+               return "";
+
+       if (region_id & DM_STATS_WALK_REGION)
+               region_id &= ~DM_STATS_WALK_REGION;
+
+       aux_data = dms->regions[region_id].aux_data;
+       return (aux_data) ? aux_data : "" ;
+}
+
+int dm_stats_set_alias(struct dm_stats *dms, uint64_t group_id, const char *alias)
+{
+       struct dm_stats_group *group = NULL;
+       const char *old_alias = NULL;
+
+       if (!dms->regions || !dms->groups || !alias)
+               return_0;
+
+       if (!_stats_region_is_grouped(dms, group_id)) {
+               log_error("Cannot set alias for ungrouped region ID "
+                         FMTu64, group_id);
+               return 0;
+       }
+
+       if (group_id & DM_STATS_WALK_GROUP) {
+               if (group_id == DM_STATS_WALK_GROUP)
+                       group_id = dms->cur_group;
+               else
+                       group_id &= ~DM_STATS_WALK_GROUP;
+       }
+
+       if (group_id != dms->regions[group_id].group_id) {
+               /* dm_stats_set_alias() must be called on the group ID. */
+               log_error("Cannot set alias for group member " FMTu64 ".",
+                         group_id);
+               return 0;
+       }
+
+       group = &dms->groups[group_id];
+       old_alias = group->alias;
+
+       group->alias = dm_strdup(alias);
+       if (!group->alias) {
+               log_error("Could not allocate memory for alias.");
+               goto bad;
+       }
+
+       if (!_stats_set_aux(dms, group_id, dms->regions[group_id].aux_data)) {
+               log_error("Could not set new aux_data");
+               goto bad;
+       }
+
+       dm_free((char *) old_alias);
+
+       return 1;
+
+bad:
+       dm_free((char *) group->alias);
+       group->alias = old_alias;
+       return 0;
+}
+
+const char *dm_stats_get_alias(const struct dm_stats *dms, uint64_t id)
+{
+       const struct dm_stats_region *region;
+
+       id = (id == DM_STATS_REGION_CURRENT) ? dms->cur_region : id;
+
+       if (id & DM_STATS_WALK_GROUP) {
+               if (id == DM_STATS_WALK_GROUP)
+                       id = dms->cur_group;
+               else
+                       id &= ~DM_STATS_WALK_GROUP;
+       }
+
+       region = &dms->regions[id];
+       if (!_stats_region_is_grouped(dms, id)
+           || !dms->groups[region->group_id].alias)
+               return dms->name;
+
+       return dms->groups[region->group_id].alias;
+}
+
+const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms)
+{
+       return dm_stats_get_region_program_id(dms, dms->cur_region);
+}
+
+const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms)
+{
+       return dm_stats_get_region_aux_data(dms, dms->cur_region);
+}
+
+int dm_stats_get_region_precise_timestamps(const struct dm_stats *dms,
+                                          uint64_t region_id)
+{
+       struct dm_stats_region *region;
+
+       if (region_id == DM_STATS_REGION_CURRENT)
+               region_id = dms->cur_region;
+
+       if (region_id == DM_STATS_WALK_GROUP)
+               region_id = dms->cur_group;
+       else if (region_id & DM_STATS_WALK_GROUP)
+               region_id &= ~DM_STATS_WALK_GROUP;
+
+       region = &dms->regions[region_id];
+       return region->timescale == 1;
+}
+
+int dm_stats_get_current_region_precise_timestamps(const struct dm_stats *dms)
+{
+       return dm_stats_get_region_precise_timestamps(dms,
+                                                     DM_STATS_REGION_CURRENT);
+}
+
+/*
+ * Histogram access methods.
+ */
+
+static void _sum_histogram_bins(const struct dm_stats *dms,
+                               struct dm_histogram *dmh_aggr,
+                               uint64_t region_id, uint64_t area_id)
+{
+       struct dm_stats_region *region;
+       struct dm_histogram_bin *bins;
+       struct dm_histogram *dmh_cur;
+       int bin;
+
+       region = &dms->regions[region_id];
+       dmh_cur = region->counters[area_id].histogram;
+       bins = dmh_aggr->bins;
+
+       for (bin = 0; bin < dmh_aggr->nr_bins; bin++)
+               bins[bin].count += dmh_cur->bins[bin].count;
+}
+
+/*
+ * Create an aggregate histogram for a sub-divided region or a group.
+ */
+static struct dm_histogram *_aggregate_histogram(const struct dm_stats *dms,
+                                                uint64_t region_id,
+                                                uint64_t area_id)
+{
+       struct dm_histogram *dmh_aggr, *dmh_cur, **dmh_cachep;
+       uint64_t group_id = DM_STATS_GROUP_NOT_PRESENT;
+       int bin, nr_bins, group = 1;
+       size_t hist_size;
+
+       if (area_id == DM_STATS_WALK_REGION) {
+               /* region aggregation */
+               group = 0;
+               if (!_stats_region_present(&dms->regions[region_id]))
+                       return_NULL;
+
+               if (!dms->regions[region_id].bounds)
+                       return_NULL;
+
+               if (!dms->regions[region_id].counters)
+                       return dms->regions[region_id].bounds;
+
+               if (dms->regions[region_id].histogram)
+                       return dms->regions[region_id].histogram;
+
+               dmh_cur = dms->regions[region_id].counters[0].histogram;
+               dmh_cachep = &dms->regions[region_id].histogram;
+               nr_bins = dms->regions[region_id].bounds->nr_bins;
+       } else {
+               /* group aggregation */
+               group_id = region_id;
+               area_id = DM_STATS_WALK_GROUP;
+               if (!_stats_group_id_present(dms, group_id))
+                       return_NULL;
+
+               if (!dms->regions[group_id].bounds)
+                       return_NULL;
+
+               if (!dms->regions[group_id].counters)
+                       return dms->regions[group_id].bounds;
+
+               if (dms->groups[group_id].histogram)
+                       return dms->groups[group_id].histogram;
+
+               dmh_cur = dms->regions[group_id].counters[0].histogram;
+               dmh_cachep = &dms->groups[group_id].histogram;
+               nr_bins = dms->regions[group_id].bounds->nr_bins;
+       }
+
+       hist_size = sizeof(*dmh_aggr)
+                    + nr_bins * sizeof(struct dm_histogram_bin);
+
+       if (!(dmh_aggr = dm_pool_zalloc(dms->hist_mem, hist_size))) {
+               log_error("Could not allocate group histogram");
+               return 0;
+       }
+
+       dmh_aggr->nr_bins = dmh_cur->nr_bins;
+       dmh_aggr->dms = dms;
+
+       if (!group)
+               _foreach_region_area(dms, region_id, area_id) {
+                       _sum_histogram_bins(dms, dmh_aggr, region_id, area_id);
+               }
+       else {
+               _foreach_group_area(dms, group_id, region_id, area_id) {
+                       _sum_histogram_bins(dms, dmh_aggr, region_id, area_id);
+               }
+       }
+
+       for (bin = 0; bin < nr_bins; bin++) {
+               dmh_aggr->sum += dmh_aggr->bins[bin].count;
+               dmh_aggr->bins[bin].upper = dmh_cur->bins[bin].upper;
+       }
+
+       /* cache aggregate histogram for subsequent access */
+       *dmh_cachep = dmh_aggr;
+
+       return dmh_aggr;
+}
+
+struct dm_histogram *dm_stats_get_histogram(const struct dm_stats *dms,
+                                           uint64_t region_id,
+                                           uint64_t area_id)
+{
+       int aggr = 0;
+
+       if (region_id == DM_STATS_REGION_CURRENT) {
+               region_id = dms->cur_region;
+               if (region_id & DM_STATS_WALK_GROUP) {
+                       region_id = dms->cur_group;
+                       aggr = 1;
+               }
+       } else if (region_id & DM_STATS_WALK_GROUP) {
+               region_id &= ~DM_STATS_WALK_GROUP;
+               aggr = 1;
+       }
+
+       area_id = (area_id == DM_STATS_AREA_CURRENT)
+                  ? dms->cur_area : area_id ;
+
+       if (area_id == DM_STATS_WALK_REGION)
+               aggr = 1;
+
+       if (aggr)
+               return _aggregate_histogram(dms, region_id, area_id);
+
+       if (region_id & DM_STATS_WALK_REGION)
+               region_id &= ~DM_STATS_WALK_REGION;
+
+       if (!dms->regions[region_id].counters)
+               return dms->regions[region_id].bounds;
+
+       return dms->regions[region_id].counters[area_id].histogram;
+}
+
+int dm_histogram_get_nr_bins(const struct dm_histogram *dmh)
+{
+       return dmh->nr_bins;
+}
+
+uint64_t dm_histogram_get_bin_lower(const struct dm_histogram *dmh, int bin)
+{
+       return (!bin) ? 0 : dmh->bins[bin - 1].upper;
+}
+
+uint64_t dm_histogram_get_bin_upper(const struct dm_histogram *dmh, int bin)
+{
+       return dmh->bins[bin].upper;
+}
+
+uint64_t dm_histogram_get_bin_width(const struct dm_histogram *dmh, int bin)
+{
+       uint64_t upper, lower;
+       upper = dm_histogram_get_bin_upper(dmh, bin);
+       lower = dm_histogram_get_bin_lower(dmh, bin);
+       return (upper - lower);
+}
+
+uint64_t dm_histogram_get_bin_count(const struct dm_histogram *dmh, int bin)
+{
+       return dmh->bins[bin].count;
+}
+
+uint64_t dm_histogram_get_sum(const struct dm_histogram *dmh)
+{
+       return dmh->sum;
+}
+
+dm_percent_t dm_histogram_get_bin_percent(const struct dm_histogram *dmh,
+                                         int bin)
+{
+       uint64_t value = dm_histogram_get_bin_count(dmh, bin);
+       uint64_t width = dm_histogram_get_bin_width(dmh, bin);
+       uint64_t total = dm_histogram_get_sum(dmh);
+
+       double val = (double) value;
+
+       if (!total || !value || !width)
+               return DM_PERCENT_0;
+
+       return dm_make_percent((uint64_t) val, total);
+}
+
+/*
+ * Histogram string helper functions: used to construct histogram and
+ * bin boundary strings from numeric data.
+ */
+
+/*
+ * Allocate an unbound histogram object with nr_bins bins. Only used
+ * for histograms used to hold bounds values as arguments for calls to
+ * dm_stats_create_region().
+ */
+static struct dm_histogram *_alloc_dm_histogram(int nr_bins)
+{
+       /* Allocate space for dm_histogram + nr_entries. */
+       size_t size = sizeof(struct dm_histogram) +
+               (unsigned) nr_bins * sizeof(struct dm_histogram_bin);
+       return dm_zalloc(size);
+}
+
+/*
+ * Parse a histogram bounds string supplied by the user. The string
+ * consists of a list of numbers, "n1,n2,n3,..." with optional 'ns',
+ * 'us', 'ms', or 's' unit suffixes.
+ *
+ * The scale parameter indicates the timescale used for this region: one
+ * for nanoscale resolution and NSEC_PER_MSEC for miliseconds.
+ *
+ * On return bounds contains a pointer to an array of uint64_t
+ * histogram bounds values expressed in units of nanoseconds.
+ */
+struct dm_histogram *dm_histogram_bounds_from_string(const char *bounds_str)
+{
+       static const char _valid_chars[] = "0123456789,muns";
+       uint64_t this_val = 0, mult = 1;
+       const char *c, *v, *val_start;
+       struct dm_histogram_bin *cur;
+       struct dm_histogram *dmh;
+       int nr_entries = 1;
+       char *endptr;
+
+       c = bounds_str;
+
+       /* Count number of bounds entries. */
+       while(*c)
+               if (*(c++) == ',')
+                       nr_entries++;
+
+       c = bounds_str;
+
+       if (!(dmh = _alloc_dm_histogram(nr_entries)))
+               return_0;
+
+       dmh->nr_bins = nr_entries;
+
+       cur = dmh->bins;
+
+       do {
+               for (v = _valid_chars; *v; v++)
+                       if (*c == *v)
+                               break;
+
+               if (!*v) {
+                       stack;
+                       goto badchar;
+               }
+
+               if (*c == ',') {
+                       log_error("Empty histogram bin not allowed: %s",
+                                 bounds_str);
+                       goto bad;
+               } else {
+                       val_start = c;
+                       endptr = NULL;
+
+                       this_val = strtoull(val_start, &endptr, 10);
+                       if (!endptr) {
+                               log_error("Could not parse histogram bound.");
+                               goto bad;
+                       }
+                       c = endptr; /* Advance to units, comma, or end. */
+
+                       if (*c == 's') {
+                               mult = NSEC_PER_SEC;
+                               c++; /* Advance over 's'. */
+                       } else if (*(c + 1) == 's') {
+                               if (*c == 'm')
+                                       mult = NSEC_PER_MSEC;
+                               else if (*c == 'u')
+                                       mult = NSEC_PER_USEC;
+                               else if (*c == 'n')
+                                       mult = 1;
+                               else {
+                                       stack;
+                                       goto badchar;
+                               }
+                               c += 2; /* Advance over 'ms', 'us', or 'ns'. */
+                       } else if (*c == ',')
+                               c++;
+                       else if (*c) { /* Expected ',' or NULL. */
+                               stack;
+                               goto badchar;
+                       }
+
+                       if (*c == ',')
+                               c++;
+                       this_val *= mult;
+                       (cur++)->upper = this_val;
+               }
+       } while (*c);
+
+       /* Bounds histograms have no owner. */
+       dmh->dms = NULL;
+       dmh->region = NULL;
+
+       return dmh;
+
+badchar:
+       log_error("Invalid character in histogram: %c", *c);
+bad:
+       dm_free(dmh);
+       return NULL;
+}
+
+struct dm_histogram *dm_histogram_bounds_from_uint64(const uint64_t *bounds)
+{
+       const uint64_t *entry = bounds;
+       struct dm_histogram_bin *cur;
+       struct dm_histogram *dmh;
+       int nr_entries = 1;
+
+       if (!bounds || !bounds[0]) {
+               log_error("Could not parse empty histogram bounds array");
+               return 0;
+       }
+
+       /* Count number of bounds entries. */
+       while(*entry)
+               if (*(++entry))
+                       nr_entries++;
+
+       entry = bounds;
+
+       if (!(dmh = _alloc_dm_histogram(nr_entries)))
+               return_0;
+
+       dmh->nr_bins = nr_entries;
+
+       cur = dmh->bins;
+
+       while (*entry)
+               (cur++)->upper = *(entry++);
+
+       /* Bounds histograms have no owner. */
+       dmh->dms = NULL;
+       dmh->region = NULL;
+
+       return dmh;
+}
+
+void dm_histogram_bounds_destroy(struct dm_histogram *bounds)
+{
+       if (!bounds)
+               return;
+
+       /* Bounds histograms are not bound to any handle or region. */
+       if (bounds->dms || bounds->region) {
+               log_error("Freeing invalid histogram bounds pointer %p.",
+                         (void *) bounds);
+               stack;
+       }
+       /* dm_free() expects a (void *). */
+       dm_free((void *) bounds);
+}
+
+/*
+ * Scale a bounds value down from nanoseconds to the largest possible
+ * whole unit suffix.
+ */
+static void _scale_bound_value_to_suffix(uint64_t *bound, const char **suffix)
+{
+       *suffix = "ns";
+       if (!(*bound % NSEC_PER_SEC)) {
+               *bound /= NSEC_PER_SEC;
+               *suffix = "s";
+       } else if (!(*bound % NSEC_PER_MSEC)) {
+               *bound /= NSEC_PER_MSEC;
+               *suffix = "ms";
+       } else if (!(*bound % NSEC_PER_USEC)) {
+               *bound /= NSEC_PER_USEC;
+               *suffix = "us";
+       }
+}
+
+#define DM_HISTOGRAM_BOUNDS_MASK 0x30
+#define BOUNDS_LEN 64
+
+static int _make_bounds_string(char *buf, size_t size, uint64_t lower,
+                              uint64_t upper, int flags, int width)
+{
+       char bound_buf[BOUNDS_LEN];
+       const char *l_suff = NULL;
+       const char *u_suff = NULL;
+       const char *sep = "";
+       int bounds = flags & DM_HISTOGRAM_BOUNDS_MASK;
+
+       if (!bounds)
+               return_0;
+
+       *buf = '\0';
+
+       if (flags & DM_HISTOGRAM_SUFFIX) {
+               _scale_bound_value_to_suffix(&lower, &l_suff);
+               _scale_bound_value_to_suffix(&upper, &u_suff);
+       } else
+               l_suff = u_suff = "";
+
+       if (flags & DM_HISTOGRAM_VALUES)
+               sep = ":";
+
+       if (bounds > DM_HISTOGRAM_BOUNDS_LOWER) {
+               /* Handle infinite uppermost bound. */
+               if (upper == UINT64_MAX) {
+                       if (dm_snprintf(bound_buf, sizeof(bound_buf),
+                                        ">" FMTu64 "%s", lower, l_suff) < 0)
+                               goto_out;
+                       /* Only display an 'upper' string for final bin. */
+                       bounds = DM_HISTOGRAM_BOUNDS_UPPER;
+               } else {
+                       if (dm_snprintf(bound_buf, sizeof(bound_buf),
+                                        FMTu64 "%s", upper, u_suff) < 0)
+                               goto_out;
+               }
+       } else if (bounds == DM_HISTOGRAM_BOUNDS_LOWER) {
+               if ((dm_snprintf(bound_buf, sizeof(bound_buf), FMTu64 "%s",
+                                lower, l_suff)) < 0)
+                       goto_out;
+       }
+
+       switch (bounds) {
+       case DM_HISTOGRAM_BOUNDS_LOWER:
+       case DM_HISTOGRAM_BOUNDS_UPPER:
+               return dm_snprintf(buf, size, "%*s%s", width, bound_buf, sep);
+       case DM_HISTOGRAM_BOUNDS_RANGE:
+               return dm_snprintf(buf, size,  FMTu64 "%s-%s%s",
+                                  lower, l_suff, bound_buf, sep);
+       }
+out:
+       return 0;
+}
+
+#define BOUND_WIDTH_NOSUFFIX 10 /* 999999999 nsecs */
+#define BOUND_WIDTH 6 /* bounds string up to 9999xs */
+#define COUNT_WIDTH 6 /* count string: up to 9999 */
+#define PERCENT_WIDTH 6 /* percent string : 0.00-100.00% */
+#define DM_HISTOGRAM_VALUES_MASK 0x06
+
+const char *dm_histogram_to_string(const struct dm_histogram *dmh, int bin,
+                                  int width, int flags)
+{
+       char buf[BOUNDS_LEN], bounds_buf[BOUNDS_LEN];
+       int minwidth, bounds, values, start, last;
+       uint64_t lower, upper, val_u64; /* bounds of the current bin. */
+       /* Use the histogram pool for string building. */
+       struct dm_pool *mem = dmh->dms->hist_mem;
+       const char *sep = "";
+       int bounds_width;
+       ssize_t len = 0;
+       float val_flt;
+
+       bounds = flags & DM_HISTOGRAM_BOUNDS_MASK;
+       values = flags & DM_HISTOGRAM_VALUES;
+
+       if (bin < 0) {
+               start = 0;
+               last = dmh->nr_bins - 1;
+       } else
+               start = last = bin;
+
+       minwidth = width;
+
+       if (width < 0 || !values)
+               width = minwidth = 0; /* no padding */
+       else if (flags & DM_HISTOGRAM_PERCENT)
+               width = minwidth = (width) ? : PERCENT_WIDTH;
+       else if (flags & DM_HISTOGRAM_VALUES)
+               width = minwidth = (width) ? : COUNT_WIDTH;
+
+       if (values && !width)
+               sep = ":";
+
+       /* Set bounds string to the empty string. */
+       bounds_buf[0] = '\0';
+
+       if (!dm_pool_begin_object(mem, 64))
+               return_0;
+
+       for (bin = start; bin <= last; bin++) {
+               if (bounds) {
+                       /* Default bounds width depends on time suffixes. */
+                       bounds_width = (!(flags & DM_HISTOGRAM_SUFFIX))
+                                       ? BOUND_WIDTH_NOSUFFIX
+                                       : BOUND_WIDTH ;
+
+                       bounds_width = (!width) ? width : bounds_width;
+
+                       lower = dm_histogram_get_bin_lower(dmh, bin);
+                       upper = dm_histogram_get_bin_upper(dmh, bin);
+
+                       len = sizeof(bounds_buf);
+                       len = _make_bounds_string(bounds_buf, len,
+                                                 lower, upper, flags,
+                                                 bounds_width);
+                       /*
+                        * Comma separates "bounds: value" pairs unless
+                        * --noheadings is used.
+                        */
+                       sep = (width || !values) ? "," : ":";
+
+                       /* Adjust width by real bounds length if set. */
+                       width -= (width) ? (len - (bounds_width + 1)) : 0;
+
+                       /* -ve width indicates specified width was overrun. */
+                       width = (width > 0) ? width : 0;
+               }
+
+               if (bin == last)
+                       sep = "";
+
+               if (flags & DM_HISTOGRAM_PERCENT) {
+                       dm_percent_t pr;
+                       pr = dm_histogram_get_bin_percent(dmh, bin);
+                       val_flt = dm_percent_to_float(pr);
+                       len = dm_snprintf(buf, sizeof(buf), "%s%*.2f%%%s",
+                                         bounds_buf, width, val_flt, sep);
+               } else if (values) {
+                       val_u64 = dmh->bins[bin].count;
+                       len = dm_snprintf(buf, sizeof(buf), "%s%*"PRIu64"%s",
+                                         bounds_buf, width, val_u64, sep);
+               } else if (bounds)
+                       len = dm_snprintf(buf, sizeof(buf), "%s%s", bounds_buf,
+                                         sep);
+               else {
+                       *buf = '\0';
+                       len = 0;
+               }
+
+               if (len < 0)
+                       goto_bad;
+
+               width = minwidth; /* re-set histogram column width. */
+               if (!dm_pool_grow_object(mem, buf, (size_t) len))
+                       goto_bad;
+       }
+
+       if (!dm_pool_grow_object(mem, "\0", 1))
+               goto_bad;
+
+       return (const char *) dm_pool_end_object(mem);
+
+bad:
+       dm_pool_abandon_object(mem);
+       return NULL;
+}
+
+/*
+ * A lightweight representation of an extent (region, area, file
+ * system block or extent etc.). A table of extents can be used
+ * to sort and to efficiently find holes or overlaps among a set
+ * of tuples of the form (id, start, len).
+ */
+struct _extent {
+       struct dm_list list;
+       uint64_t id;
+       uint64_t start;
+       uint64_t len;
+};
+
+/* last address in an extent */
+#define _extent_end(a) ((a)->start + (a)->len - 1)
+
+/* a and b must be sorted by increasing start sector */
+#define _extents_overlap(a, b) (_extent_end(a) > (b)->start)
+
+/*
+ * Comparison function to sort extents in ascending start order.
+ */
+static int _extent_start_compare(const void *p1, const void *p2)
+{
+       const struct _extent *r1, *r2;
+       r1 = (const struct _extent *) p1;
+       r2 = (const struct _extent *) p2;
+
+       if (r1->start < r2->start)
+               return -1;
+       else if (r1->start == r2->start)
+               return 0;
+       return 1;
+}
+
+static int _stats_create_group(struct dm_stats *dms, dm_bitset_t regions,
+                              const char *alias, uint64_t *group_id)
+{
+       struct dm_stats_group *group;
+       *group_id = dm_bit_get_first(regions);
+
+       /* group has no regions? */
+       if (*group_id == DM_STATS_GROUP_NOT_PRESENT)
+               return_0;
+
+       group = &dms->groups[*group_id];
+
+       if (group->regions) {
+               log_error(INTERNAL_ERROR "Unexpected group state while"
+                         "creating group ID bitmap" FMTu64, *group_id);
+               return 0;
+       }
+
+       group->group_id = *group_id;
+       group->regions = regions;
+
+       if (alias)
+               group->alias = dm_strdup(alias);
+       else
+               group->alias = NULL;
+
+       /* force an update of the group tag stored in aux_data */
+       if (!_stats_set_aux(dms, *group_id, dms->regions[*group_id].aux_data))
+               return 0;
+
+       return 1;
+}
+
+static int _stats_group_check_overlap(const struct dm_stats *dms,
+                                     dm_bitset_t regions, int count)
+{
+       struct dm_list ext_list = DM_LIST_HEAD_INIT(ext_list);
+       struct _extent *ext, *tmp, *next, *map = NULL;
+       size_t map_size = (dms->max_region + 1) * sizeof(*map);
+       int i = 0, id, overlap, merged;
+
+       map = dm_pool_alloc(dms->mem, map_size);
+       if (!map) {
+               log_error("Could not allocate memory for region map");
+               return 0;
+       }
+
+       /* build a table of extents in order of region_id */
+       for (id = dm_bit_get_first(regions); id >= 0;
+            id = dm_bit_get_next(regions, id)) {
+               dm_list_init(&map[i].list);
+               map[i].id = id;
+               map[i].start = dms->regions[id].start;
+               map[i].len = dms->regions[id].len;
+               i++;
+       }
+
+       /* A single region cannot overlap itself. */
+       if (i == 1) {
+               dm_pool_free(dms->mem, map);
+               return 1;
+       }
+
+       /* sort by extent.start */
+       qsort(map, count, sizeof(*map), _extent_start_compare);
+
+       for (i = 0; i < count; i++)
+               dm_list_add(&ext_list, &map[i].list);
+
+       overlap = 0;
+merge:
+       merged = 0;
+       dm_list_iterate_items_safe(ext, tmp, &ext_list) {
+               next = dm_list_item(dm_list_next(&ext_list, &ext->list),
+                                   struct _extent);
+               if (!next)
+                       continue;
+
+               if (_extents_overlap(ext, next)) {
+                       log_warn("WARNING: region IDs " FMTu64 " and "
+                                FMTu64 " overlap. Some events will be "
+                                "counted twice.", ext->id, next->id);
+                       /* merge larger extent into smaller */
+                       if (_extent_end(ext) > _extent_end(next)) {
+                               next->id = ext->id;
+                               next->len = ext->len;
+                       }
+                       if (ext->start < next->start)
+                               next->start = ext->start;
+                       dm_list_del(&ext->list);
+                       overlap = merged = 1;
+               }
+       }
+       /* continue until no merge candidates remain */
+       if (merged)
+               goto merge;
+
+       dm_pool_free(dms->mem, map);
+       return (overlap == 0);
+}
+
+static void _stats_copy_histogram_bounds(struct dm_histogram *to,
+                                        struct dm_histogram *from)
+{
+       int i;
+
+       to->nr_bins = from->nr_bins;
+
+       for (i = 0; i < to->nr_bins; i++)
+               to->bins[i].upper = from->bins[i].upper;
+}
+
+/*
+ * Compare histogram bounds h1 and h2, and return 1 if they match (i.e.
+ * have the same number of bins and identical bin boundary values), or 0
+ * otherwise.
+ */
+static int _stats_check_histogram_bounds(struct dm_histogram *h1,
+                                        struct dm_histogram *h2)
+{
+       int i;
+
+       if (!h1 || !h2)
+               return 0;
+
+       if (h1->nr_bins != h2->nr_bins)
+               return 0;
+
+       for (i = 0; i < h1->nr_bins; i++)
+               if (h1->bins[i].upper != h2->bins[i].upper)
+                       return 0;
+       return 1;
+}
+
+/*
+ * Create a new group in stats handle dms from the group description
+ * passed in group.
+ */
+int dm_stats_create_group(struct dm_stats *dms, const char *members,
+                         const char *alias, uint64_t *group_id)
+{
+       struct dm_histogram *check = NULL, *bounds;
+       int i, count = 0, precise = 0;
+       dm_bitset_t regions;
+
+       if (!dms->regions || !dms->groups) {
+               log_error("Could not create group: no regions found.");
+               return 0;
+       };
+
+       if (!(regions = dm_bitset_parse_list(members, NULL, 0))) {
+               log_error("Could not parse list: '%s'", members);
+               return 0;
+       }
+
+       if (!(check = dm_pool_zalloc(dms->hist_mem, sizeof(*check)))) {
+               log_error("Could not allocate memory for bounds check");
+               goto bad;
+       }
+
+       /* too many bits? */
+       if ((*regions - 1) > dms->max_region) {
+               log_error("Invalid region ID: %d", *regions - 1);
+               goto bad;
+       }
+
+       /*
+        * Check that each region_id in the bitmap meets the group
+        * constraints: present, not already grouped, and if any
+        * histogram is present that they all have the same bounds.
+        */
+       for (i = dm_bit_get_first(regions); i >= 0;
+            i = dm_bit_get_next(regions, i)) {
+               if (!dm_stats_region_present(dms, i)) {
+                       log_error("Region ID %d does not exist", i);
+                       goto bad;
+               }
+               if (_stats_region_is_grouped(dms, i)) {
+                       log_error("Region ID %d already a member of group ID "
+                                 FMTu64, i, dms->regions[i].group_id);
+                       goto bad;
+               }
+               if (dms->regions[i].timescale == 1)
+                       precise++;
+
+               /* check for matching histogram bounds */
+               bounds = dms->regions[i].bounds;
+               if (bounds && !check->nr_bins)
+                       _stats_copy_histogram_bounds(check, bounds);
+               else if (bounds) {
+                       if (!_stats_check_histogram_bounds(check, bounds)) {
+                               log_error("All region histogram bounds "
+                                         "must match exactly");
+                               goto bad;
+                       }
+               }
+               count++;
+       }
+
+       if (precise && (precise != count))
+               log_warn("WARNING: Grouping regions with different clock resolution: "
+                        "precision may be lost.");
+
+       if (!_stats_group_check_overlap(dms, regions, count))
+               log_very_verbose("Creating group with overlapping regions.");
+
+       if (!_stats_create_group(dms, regions, alias, group_id))
+               goto bad;
+
+       dm_pool_free(dms->hist_mem, check);
+       return 1;
+
+bad:
+       dm_pool_free(dms->hist_mem, check);
+       dm_bitset_destroy(regions);
+       return 0;
+}
+
+/*
+ * Remove the specified group_id.
+ */
+int dm_stats_delete_group(struct dm_stats *dms, uint64_t group_id,
+                         int remove_regions)
+{
+       struct dm_stats_region *leader;
+       dm_bitset_t regions;
+       uint64_t i;
+
+       if (group_id > dms->max_region) {
+               log_error("Invalid group ID: " FMTu64, group_id);
+               return 0;
+       }
+
+       if (!_stats_group_id_present(dms, group_id)) {
+               log_error("Group ID " FMTu64 " does not exist", group_id);
+               return 0;
+       }
+
+       regions = dms->groups[group_id].regions;
+       leader = &dms->regions[group_id];
+
+       /* delete all but the group leader */
+       for (i = (*regions - 1); i > leader->region_id; i--) {
+               if (dm_bit(regions, i)) {
+                       dm_bit_clear(regions, i);
+                       if (remove_regions && !dm_stats_delete_region(dms, i))
+                               log_warn("WARNING: Failed to delete region "
+                                        FMTu64 " on %s.", i, dms->name);
+               }
+       }
+
+       /* clear group and mark as not present */
+       _stats_clear_group_regions(dms, group_id);
+       _stats_group_destroy(&dms->groups[group_id]);
+
+       /* delete leader or clear aux_data */
+       if (remove_regions)
+               return dm_stats_delete_region(dms, group_id);
+       else if (!_stats_set_aux(dms, group_id, leader->aux_data))
+               return 0;
+
+       return 1;
+}
+
+uint64_t dm_stats_get_group_id(const struct dm_stats *dms, uint64_t region_id)
+{
+       region_id = (region_id == DM_STATS_REGION_CURRENT)
+                    ? dms->cur_region : region_id;
+
+       if (region_id & DM_STATS_WALK_GROUP) {
+               if (region_id == DM_STATS_WALK_GROUP)
+                       return dms->cur_group;
+               else
+                       return region_id & ~DM_STATS_WALK_GROUP;
+       }
+
+       if (region_id & DM_STATS_WALK_REGION)
+               region_id &= ~DM_STATS_WALK_REGION;
+
+       return dms->regions[region_id].group_id;
+}
+
+int dm_stats_get_group_descriptor(const struct dm_stats *dms,
+                                 uint64_t group_id, char **buf)
+{
+       dm_bitset_t regions = dms->groups[group_id].regions;
+       size_t buflen;
+
+       buflen = _stats_group_tag_len(dms, regions);
+
+       *buf = dm_pool_alloc(dms->mem, buflen);
+       if (!*buf) {
+               log_error("Could not allocate memory for regions string");
+               return 0;
+       }
+
+       if (!_stats_group_tag_fill(dms, regions, *buf, buflen))
+               return 0;
+
+       return 1;
+}
+
+#ifdef HAVE_LINUX_FIEMAP_H
+/*
+ * Resize the group bitmap corresponding to group_id so that it can
+ * contain at least num_regions members.
+ */
+static int _stats_resize_group(struct dm_stats_group *group,
+                              uint64_t num_regions)
+{
+       uint64_t last_bit = dm_bit_get_last(group->regions);
+       dm_bitset_t new, old;
+
+       if (last_bit >= num_regions) {
+               log_error("Cannot resize group bitmap to " FMTu64
+                         " with bit " FMTu64 " set.", num_regions, last_bit);
+               return 0;
+       }
+
+       log_very_verbose("Resizing group bitmap from " FMTu32 " to " FMTu64
+                        " (last_bit: " FMTu64 ").", group->regions[0],
+                        num_regions, last_bit);
+
+       new = dm_bitset_create(NULL, (unsigned) num_regions);
+       if (!new) {
+               log_error("Could not allocate memory for new group bitmap.");
+               return 0;
+       }
+
+       old = group->regions;
+       dm_bit_copy(new, old);
+       group->regions = new;
+       dm_bitset_destroy(old);
+       return 1;
+}
+
+/*
+ * Group a table of region_ids corresponding to the extents of a file.
+ */
+static int _stats_group_file_regions(struct dm_stats *dms, uint64_t *region_ids,
+                                    uint64_t count, const char *alias)
+{
+       dm_bitset_t regions = dm_bitset_create(NULL, dms->nr_regions);
+       uint64_t i, group_id = DM_STATS_GROUP_NOT_PRESENT;
+       char *members = NULL;
+       size_t buflen;
+
+       if (!regions) {
+               log_error("Cannot map file: failed to allocate group bitmap.");
+               return 0;
+       }
+
+       for (i = 0; i < count; i++)
+               dm_bit_set(regions, region_ids[i]);
+
+       buflen = _stats_group_tag_len(dms, regions);
+       members = dm_malloc(buflen);
+
+       if (!members) {
+               log_error("Cannot map file: failed to allocate group "
+                         "descriptor.");
+               dm_bitset_destroy(regions);
+               return 0;
+       }
+
+       if (!_stats_group_tag_fill(dms, regions, members, buflen))
+               goto bad;
+
+       /*
+        * overlaps should not be possible: overlapping file extents
+        * returned by FIEMAP imply a kernel bug or a corrupt fs.
+        */
+       if (!_stats_group_check_overlap(dms, regions, count))
+               log_very_verbose("Creating group with overlapping regions.");
+
+       if (!_stats_create_group(dms, regions, alias, &group_id))
+               goto bad;
+
+       dm_free(members);
+       return 1;
+bad:
+       dm_bitset_destroy(regions);
+       dm_free(members);
+       return 0;
+}
+
+static int _stats_add_file_extent(int fd, struct dm_pool *mem, uint64_t id,
+                                 struct fiemap_extent *fm_ext)
+{
+       struct _extent extent;
+
+       /* final address of list is unknown */
+       memset(&extent.list, 0, sizeof(extent.list));
+
+       /* convert bytes to dm (512b) sectors */
+       extent.start = fm_ext->fe_physical >> SECTOR_SHIFT;
+       extent.len = fm_ext->fe_length >> SECTOR_SHIFT;
+       extent.id = id;
+
+       log_very_verbose("Extent " FMTu64 " on fd %d at " FMTu64 "+"
+                        FMTu64, extent.id, fd, extent.start, extent.len);
+
+       if (!dm_pool_grow_object(mem, &extent,
+                                sizeof(extent))) {
+               log_error("Cannot map file: failed to grow extent map.");
+               return 0;
+       }
+       return 1;
+}
+
+/* test for the boundary of an extent */
+#define ext_boundary(ext, exp)         \
+((ext).fe_logical != 0) &&             \
+((ext).fe_physical != (exp))
+
+/*
+ * Copy fields from fiemap_extent 'from' to the fiemap_extent
+ * pointed to by 'to'.
+ */
+#define ext_copy(to, from)     \
+do {                           \
+       *(to) = *(from);        \
+} while (0)
+
+static uint64_t _stats_map_extents(int fd, struct dm_pool *mem,
+                                  struct fiemap *fiemap,
+                                  struct fiemap_extent *fm_ext,
+                                  struct fiemap_extent *fm_last,
+                                  struct fiemap_extent *fm_pending,
+                                  uint64_t next_extent,
+                                  int *eof)
+{
+       uint64_t expected = 0, nr_extents = next_extent;
+       unsigned int i;
+
+       /*
+        * Loop over the returned extents adding the fm_pending extent
+        * to the table of extents each time a discontinuity (or eof)
+        * is detected.
+        *
+        * We use a pointer to fm_pending in the caller since it is
+        * possible that logical extents comprising a single physical
+        * extent are returned by successive FIEMAP calls.
+        */
+       for (i = 0; i < fiemap->fm_mapped_extents; i++) {
+               expected = fm_last->fe_physical + fm_last->fe_length;
+
+               if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST)
+                       *eof = 1;
+
+               /* cannot map extents that are not yet allocated. */
+               if (fm_ext[i].fe_flags
+                   & (FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC))
+                       continue;
+
+               /*
+                * Begin a new extent if the current physical address differs
+                * from the expected address yielded by fm_last.fe_physical +
+                * fm_last.fe_length.
+                *
+                * A logical discontinuity is seen at the start of the file if
+                * unwritten space exists before the first extent: do not add
+                * any extent record until we have accumulated a non-zero length
+                * in fm_pending.
+                */
+               if (fm_pending->fe_length &&
+                   ext_boundary(fm_ext[i], expected)) {
+                       if (!_stats_add_file_extent(fd, mem, nr_extents,
+                                                   fm_pending))
+                               goto_bad;
+                       nr_extents++;
+                       /* Begin a new pending extent. */
+                       ext_copy(fm_pending, fm_ext + i);
+               } else {
+                       expected = 0;
+                       /* Begin a new pending extent for extent 0. If there is
+                        * a hole at the start of the file, the first allocated
+                        * extent will have a non-zero fe_logical. Detect this
+                        * case by testing fm_pending->fe_length: if no length
+                        * has been accumulated we are handling the first
+                        * physical extent of the file.
+                        */
+                       if (!fm_pending->fe_length || fm_ext[i].fe_logical == 0)
+                               ext_copy(fm_pending, fm_ext + i);
+                       else
+                               /* accumulate this logical extent's length */
+                               fm_pending->fe_length += fm_ext[i].fe_length;
+               }
+               *fm_last = fm_ext[i];
+       }
+
+       /*
+        * If the file only has a single extent, no boundary is ever
+        * detected to trigger addition of the first extent.
+        */
+       if (*eof || (fm_ext[i - 1].fe_logical == 0)) {
+               _stats_add_file_extent(fd, mem, nr_extents, fm_pending);
+               nr_extents++;
+       }
+
+       fiemap->fm_start = (fm_ext[i - 1].fe_logical +
+                           fm_ext[i - 1].fe_length);
+
+       /* return the number of extents found in this call. */
+       return nr_extents - next_extent;
+bad:
+       /* signal mapping error to caller */
+       *eof = -1;
+       return 0;
+}
+
+/*
+ * Read the extents of an open file descriptor into a table of struct _extent.
+ *
+ * Based on e2fsprogs/misc/filefrag.c::filefrag_fiemap().
+ *
+ * Copyright 2003 by Theodore Ts'o.
+ *
+ */
+static struct _extent *_stats_get_extents_for_file(struct dm_pool *mem, int fd,
+                                                  uint64_t *count)
+{
+       struct fiemap_extent fm_last = {0}, fm_pending = {0}, *fm_ext = NULL;
+       struct fiemap *fiemap = NULL;
+       int eof = 0, nr_extents = 0;
+       struct _extent *extents;
+       unsigned long flags = 0;
+       uint64_t *buf;
+
+       /* grow temporary extent table in the pool */
+       if (!dm_pool_begin_object(mem, sizeof(*extents)))
+               return NULL;
+
+       buf = dm_zalloc(STATS_FIE_BUF_LEN);
+       if (!buf) {
+               log_error("Could not allocate memory for FIEMAP buffer.");
+               goto bad;
+       }
+
+       /* initialise pointers into the ioctl buffer. */
+       fiemap = (struct fiemap *) buf;
+       fm_ext = &fiemap->fm_extents[0];
+
+       /* space available per ioctl */
+       *count = (STATS_FIE_BUF_LEN - sizeof(*fiemap))
+                 / sizeof(struct fiemap_extent);
+
+       flags = FIEMAP_FLAG_SYNC;
+
+       do {
+               /* start of ioctl loop - zero size and set count to bufsize */
+               fiemap->fm_length = ~0ULL;
+               fiemap->fm_flags = flags;
+               fiemap->fm_extent_count = *count;
+
+               /* get count-sized chunk of extents */
+               if (ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap) < 0) {
+                       if (errno == EBADR)
+                               log_err_once("FIEMAP failed with unknown "
+                                            "flags %x.", fiemap->fm_flags);
+                       goto bad;
+               }
+
+               /* If 0 extents are returned, more ioctls are not needed */
+               if (fiemap->fm_mapped_extents == 0)
+                       break;
+
+               nr_extents += _stats_map_extents(fd, mem, fiemap, fm_ext,
+                                                &fm_last, &fm_pending,
+                                                nr_extents, &eof);
+
+               /* check for extent mapping error */
+               if (eof < 0)
+                       goto bad;
+
+       } while (eof == 0);
+
+       if (!nr_extents) {
+               log_error("Cannot map file: no allocated extents.");
+               goto bad;
+       }
+
+       /* return total number of extents */
+       *count = nr_extents;
+       extents = dm_pool_end_object(mem);
+
+       /* free FIEMAP buffer. */
+       dm_free(buf);
+
+       return extents;
+
+bad:
+       *count = 0;
+       dm_pool_abandon_object(mem);
+       dm_free(buf);
+       return NULL;
+}
+
+#define MATCH_EXTENT(e, s, l) \
+(((e).start == (s)) && ((e).len == (l)))
+
+static struct _extent *_find_extent(uint64_t nr_extents, struct _extent *extents,
+                                   uint64_t start, uint64_t len)
+{
+       size_t i;
+       for (i = 0; i < nr_extents; i++)
+               if (MATCH_EXTENT(extents[i], start, len))
+                       return extents + i;
+       return NULL;
+}
+
+/*
+ * Clean up a table of region_id values that were created during a
+ * failed dm_stats_create_regions_from_fd, or dm_stats_update_regions_from_fd
+ * operation.
+ */
+static void _stats_cleanup_region_ids(struct dm_stats *dms, uint64_t *regions,
+                                     uint64_t nr_regions)
+{
+       uint64_t i;
+
+       for (i = 0; i < nr_regions; i++)
+               if (!_stats_delete_region(dms, regions[i]))
+                       log_error("Could not delete region " FMTu64 ".", i);
+}
+
+/*
+ * First update pass: prune no-longer-allocated extents from the group
+ * and build a table of the remaining extents so that their creation
+ * can be skipped in the second pass.
+ */
+static int _stats_unmap_regions(struct dm_stats *dms, uint64_t group_id,
+                               struct dm_pool *mem, struct _extent *extents,
+                               struct _extent **old_extents, uint64_t *count,
+                               int *regroup)
+{
+       struct dm_stats_region *region = NULL;
+       struct dm_stats_group *group = NULL;
+       uint64_t nr_kept, nr_old;
+       struct _extent ext;
+       int64_t i;
+
+       group = &dms->groups[group_id];
+
+       log_very_verbose("Checking for changed file extents in group ID "
+                        FMTu64, group_id);
+
+       if (!dm_pool_begin_object(mem, sizeof(**old_extents))) {
+               log_error("Could not allocate extent table.");
+               return 0;
+       }
+
+       nr_kept = nr_old = 0; /* counts of old and retained extents */
+
+       /*
+        * First pass: delete de-allocated extents and set regroup=1 if
+        * deleting the current group leader.
+        */
+       i = dm_bit_get_last(group->regions);
+       for (; i >= 0; i = dm_bit_get_prev(group->regions, i)) {
+               region = &dms->regions[i];
+               nr_old++;
+
+               if (extents && _find_extent(*count, extents,
+                                 region->start, region->len)) {
+                       ext.start = region->start;
+                       ext.len = region->len;
+                       ext.id = i;
+                       nr_kept++;
+
+                       if (!dm_pool_grow_object(mem, &ext, sizeof(ext)))
+                               goto out;
+
+                       log_very_verbose("Kept region " FMTu64, i);
+               } else {
+
+                       if (i == group_id)
+                               *regroup = 1;
+
+                       if (!_stats_delete_region(dms, i)) {
+                               log_error("Could not remove region ID " FMTu64,
+                                         i);
+                               goto out;
+                       }
+
+                       log_very_verbose("Deleted region " FMTu64, i);
+               }
+       }
+
+       *old_extents = dm_pool_end_object(mem);
+       if (!*old_extents) {
+               log_error("Could not finalize region extent table.");
+               goto out;
+       }
+       log_very_verbose("Kept " FMTd64 " of " FMTd64 " old extents",
+                        nr_kept, nr_old);
+       log_very_verbose("Found " FMTu64 " new extents",
+                        *count - nr_kept);
+
+       return (int) nr_kept;
+out:
+       dm_pool_abandon_object(mem);
+       return -1;
+}
+
+/*
+ * Create or update a set of regions representing the extents of a file
+ * and return a table of uint64_t region_id values. The number of regions
+ * created is returned in the memory pointed to by count (which must be
+ * non-NULL).
+ *
+ * If group_id is not equal to DM_STATS_GROUP_NOT_PRESENT, it is assumed
+ * that group_id corresponds to a group containing existing regions that
+ * were mapped to this file at an earlier time: regions will be added or
+ * removed to reflect the current status of the file.
+ */
+static uint64_t *_stats_map_file_regions(struct dm_stats *dms, int fd,
+                                        struct dm_histogram *bounds,
+                                        int precise, uint64_t group_id,
+                                        uint64_t *count, int *regroup)
+{
+       struct _extent *extents = NULL, *old_extents = NULL;
+       uint64_t *regions = NULL, fail_region, i, num_bits;
+       struct dm_stats_group *group = NULL;
+       struct dm_pool *extent_mem = NULL;
+       struct _extent *old_ext;
+       char *hist_arg = NULL;
+       struct statfs fsbuf;
+       int64_t nr_kept = 0;
+       struct stat buf;
+       int update;
+
+       *count = 0;
+       update = _stats_group_id_present(dms, group_id);
+
+#ifdef BTRFS_SUPER_MAGIC
+       if (fstatfs(fd, &fsbuf)) {
+               log_error("fstatfs failed for fd %d", fd);
+               return 0;
+       }
+
+       if (fsbuf.f_type == BTRFS_SUPER_MAGIC) {
+               log_error("Cannot map file: btrfs does not provide "
+                         "physical FIEMAP extent data.");
+               return 0;
+       }
+#endif
+
+       if (fstat(fd, &buf)) {
+               log_error("fstat failed for fd %d", fd);
+               return 0;
+       }
+
+       if (!(buf.st_mode & S_IFREG)) {
+               log_error("Not a regular file");
+               return 0;
+       }
+
+       if (!dm_is_dm_major(major(buf.st_dev))) {
+               log_error("Cannot map file: not a device-mapper device.");
+               return 0;
+       }
+
+       /*
+        * If regroup is set here, we are creating a new filemap: otherwise
+        * we are updating a group with a valid group identifier in group_id.
+        */
+       if (update)
+               log_very_verbose("Updating extents from fd %d with group ID "
+                                FMTu64 " on (%d:%d)", fd, group_id,
+                                major(buf.st_dev), minor(buf.st_dev));
+       else
+               log_very_verbose("Mapping extents from fd %d on (%d:%d)",
+                                fd, major(buf.st_dev), minor(buf.st_dev));
+
+       /* Use a temporary, private pool for the extent table. This avoids
+         * hijacking the dms->mem (region table) pool which would lead to
+         * interleaving temporary allocations with dm_stats_list() data,
+         * causing complications in the error path.
+         */
+       if (!(extent_mem = dm_pool_create("extents", sizeof(*extents))))
+               return_NULL;
+
+       if (!(extents = _stats_get_extents_for_file(extent_mem, fd, count))) {
+               log_very_verbose("No extents found in fd %d", fd);
+               if (!update)
+                       goto out;
+       }
+
+       if (update) {
+               group = &dms->groups[group_id];
+               if ((nr_kept = _stats_unmap_regions(dms, group_id, extent_mem,
+                                                    extents, &old_extents,
+                                                    count, regroup)) < 0)
+                       goto_out;
+       }
+
+        if (bounds)
+                if (!(hist_arg = _build_histogram_arg(bounds, &precise)))
+                        goto_out;
+
+       /* make space for end-of-table marker */
+       if (!(regions = dm_malloc((1 + *count) * sizeof(*regions)))) {
+               log_error("Could not allocate memory for region IDs.");
+               goto_out;
+       }
+
+       /*
+        * Second pass (first for non-update case): create regions for
+        * all extents not retained from the prior mapping, and insert
+        * retained regions into the table of region_id values.
+        *
+        * If a regroup is not scheduled, set group bits for newly
+        * created regions in the group leader bitmap.
+        */
+       for (i = 0; i < *count; i++) {
+               if (update) {
+                       if ((old_ext = _find_extent((uint64_t) nr_kept,
+                                                   old_extents,
+                                                   extents[i].start,
+                                                   extents[i].len))) {
+                               regions[i] = old_ext->id;
+                               continue;
+                       }
+               }
+               if (!_stats_create_region(dms, regions + i, extents[i].start,
+                                         extents[i].len, -1, precise, hist_arg,
+                                         dms->program_id, "")) {
+                       log_error("Failed to create region " FMTu64 " of "
+                                 FMTu64 " at " FMTu64 ".", i, *count,
+                                 extents[i].start);
+                       goto out_remove;
+               }
+
+               log_very_verbose("Created new region mapping " FMTu64 "+" FMTu64
+                                " with region ID " FMTu64, extents[i].start,
+                                extents[i].len, regions[i]);
+
+               if (!*regroup && update) {
+                       /* expand group bitmap */
+                       if (regions[i] > (group->regions[0] - 1)) {
+                               num_bits = regions[i] + *count;
+                               if (!_stats_resize_group(group, num_bits)) {
+                                       log_error("Failed to resize group "
+                                                 "bitmap.");
+                                       goto out_remove;
+                               }
+                       }
+                       dm_bit_set(group->regions, regions[i]);
+               }
+
+       }
+       regions[*count] = DM_STATS_REGION_NOT_PRESENT;
+
+       /* Update group leader aux_data for new group members. */
+       if (!*regroup && update)
+               if (!_stats_set_aux(dms, group_id,
+                                   dms->regions[group_id].aux_data))
+                       log_error("Failed to update group aux_data.");
+
+       if (bounds)
+               dm_free(hist_arg);
+
+       /* the extent table will be empty if the file has been truncated. */
+       if (extents)
+               dm_pool_free(extent_mem, extents);
+
+       dm_pool_destroy(extent_mem);
+
+       return regions;
+
+out_remove:
+       /* New region creation may begin to fail part-way through creating
+        * a set of file mapped regions: in this case we need to roll back
+        * the regions that were already created and return the handle to
+        * a consistent state. A listed handle is required for this: use a
+        * single list operation and call _stats_delete_region() directly
+        * to avoid a @stats_list ioctl and list parsing for each region.
+        */
+       if (!dm_stats_list(dms, NULL))
+               goto out;
+
+       fail_region = i;
+       _stats_cleanup_region_ids(dms, regions, fail_region);
+       *count = 0;
+
+out:
+       dm_pool_destroy(extent_mem);
+       dm_free(hist_arg);
+       dm_free(regions);
+       return NULL;
+}
+
+uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd,
+                                         int group, int precise,
+                                         struct dm_histogram *bounds,
+                                         const char *alias)
+{
+       uint64_t *regions, count;
+       int regroup = 1;
+
+       if (alias && !group) {
+               log_error("Cannot set alias without grouping regions.");
+               return NULL;
+       }
+
+       if (!(regions = _stats_map_file_regions(dms, fd, bounds, precise,
+                                               DM_STATS_GROUP_NOT_PRESENT,
+                                               &count, &regroup)))
+               return NULL;
+
+       if (!group)
+               return regions;
+
+       /* refresh handle */
+       if (!dm_stats_list(dms, NULL))
+               goto_out;
+
+       if (!_stats_group_file_regions(dms, regions, count, alias))
+               goto_out;
+
+       return regions;
+out:
+       _stats_cleanup_region_ids(dms, regions, count);
+       dm_free(regions);
+       return NULL;
+}
+
+uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd,
+                                         uint64_t group_id)
+{
+       struct dm_histogram *bounds = NULL;
+       int nr_bins, precise, regroup;
+       uint64_t *regions, count = 0;
+       const char *alias = NULL;
+
+       if (!dms->regions || !dm_stats_group_present(dms, group_id)) {
+               if (!dm_stats_list(dms, dms->program_id)) {
+                       log_error("Could not obtain region list while "
+                                 "updating group " FMTu64 ".", group_id);
+                       return NULL;
+               }
+       }
+
+       if (!dm_stats_group_present(dms, group_id)) {
+               log_error("Group ID " FMTu64 " does not exist.", group_id);
+               return NULL;
+       }
+
+       /*
+        * If the extent corresponding to the group leader's region has been
+        * deallocated, _stats_map_file_regions() will remove the region and
+        * the group. In this case, regroup will be set by the call and the
+        * group will be re-created using saved values.
+        */
+       regroup = 0;
+
+       /*
+        * A copy of the alias is needed to re-create the group when regroup=1.
+        */
+       if (dms->groups[group_id].alias) {
+               alias = dm_strdup(dms->groups[group_id].alias);
+               if (!alias) {
+                       log_error("Failed to allocate group alias string.");
+                       return NULL;
+               }
+       }
+
+       if (dms->regions[group_id].bounds) {
+               /*
+                * A copy of the histogram bounds must be passed to
+                * _stats_map_file_regions() to be used when creating new
+                * regions: it is not safe to use the copy in the current group
+                * leader since it may be destroyed during the first group
+                * update pass.
+                */
+               nr_bins = dms->regions[group_id].bounds->nr_bins;
+               bounds = _alloc_dm_histogram(nr_bins);
+               if (!bounds) {
+                       log_error("Could not allocate memory for group "
+                                 "histogram bounds.");
+                       goto out;
+               }
+               _stats_copy_histogram_bounds(bounds,
+                                            dms->regions[group_id].bounds);
+       }
+
+       precise = (dms->regions[group_id].timescale == 1);
+
+       regions = _stats_map_file_regions(dms, fd, bounds, precise,
+                                         group_id, &count, &regroup);
+
+       if (!regions)
+               goto bad;
+
+       if (!dm_stats_list(dms, NULL))
+               goto bad;
+
+       /* regroup if there are regions to group */
+       if (regroup && (*regions != DM_STATS_REGION_NOT_PRESENT))
+               if (!_stats_group_file_regions(dms, regions, count, alias))
+                       goto bad;
+
+       dm_free(bounds);
+       dm_free((char *) alias);
+       return regions;
+bad:
+       _stats_cleanup_region_ids(dms, regions, count);
+       dm_free(bounds);
+       dm_free(regions);
+out:
+       dm_free((char *) alias);
+       return NULL;
+}
+#else /* !HAVE_LINUX_FIEMAP */
+uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd,
+                                         int group, int precise,
+                                         struct dm_histogram *bounds,
+                                         const char *alias)
+{
+       log_error("File mapping requires FIEMAP ioctl support.");
+       return 0;
+}
+
+uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd,
+                                         uint64_t group_id)
+{
+       log_error("File mapping requires FIEMAP ioctl support.");
+       return 0;
+}
+#endif /* HAVE_LINUX_FIEMAP */
+
+#ifdef DMFILEMAPD
+static const char *_filemapd_mode_names[] = {
+       "inode",
+       "path",
+       NULL
+};
+
+dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str)
+{
+       dm_filemapd_mode_t mode = DM_FILEMAPD_FOLLOW_INODE;
+       const char **mode_name;
+
+       if (mode_str) {
+               for (mode_name = _filemapd_mode_names; *mode_name; mode_name++)
+                       if (!strcmp(*mode_name, mode_str))
+                               break;
+               if (*mode_name)
+                       mode = DM_FILEMAPD_FOLLOW_INODE
+                               + (mode_name - _filemapd_mode_names);
+               else {
+                       log_error("Could not parse dmfilemapd mode: %s",
+                                 mode_str);
+                       return DM_FILEMAPD_FOLLOW_NONE;
+               }
+       }
+       return mode;
+}
+
+#define DM_FILEMAPD "dmfilemapd"
+#define NR_FILEMAPD_ARGS 7 /* includes argv[0] */
+/*
+ * Start dmfilemapd to monitor the specified file descriptor, and to
+ * update the group given by 'group_id' when the file's allocation
+ * changes.
+ *
+ * usage: dmfilemapd <fd> <group_id> <mode> [<foreground>[<log_level>]]
+ */
+int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path,
+                           dm_filemapd_mode_t mode, unsigned foreground,
+                           unsigned verbose)
+{
+       char fd_str[8], group_str[8], fg_str[2], verb_str[2];
+       const char *mode_str = _filemapd_mode_names[mode];
+       char *args[NR_FILEMAPD_ARGS + 1];
+       pid_t pid = 0;
+       int argc = 0;
+
+       if (fd < 0) {
+               log_error("dmfilemapd file descriptor must be "
+                         "non-negative: %d", fd);
+               return 0;
+       }
+
+       if (path[0] != '/') {
+               log_error("Path argument must specify an absolute path.");
+               return 0;
+       }
+
+       if (mode > DM_FILEMAPD_FOLLOW_PATH) {
+               log_error("Invalid dmfilemapd mode argument: "
+                         "Must be DM_FILEMAPD_FOLLOW_INODE or "
+                         "DM_FILEMAPD_FOLLOW_PATH");
+               return 0;
+       }
+
+       if (foreground > 1) {
+               log_error("Invalid dmfilemapd foreground argument. "
+                         "Must be 0 or 1: %d.", foreground);
+               return 0;
+       }
+
+       if (verbose > 3) {
+               log_error("Invalid dmfilemapd verbose argument. "
+                         "Must be 0..3: %d.", verbose);
+               return 0;
+       }
+
+       /* set argv[0] */
+       args[argc++] = (char *) DM_FILEMAPD;
+
+       /* set <fd> */
+       if ((dm_snprintf(fd_str, sizeof(fd_str), "%d", fd)) < 0) {
+               log_error("Could not format fd argument.");
+               return 0;
+       }
+       args[argc++] = fd_str;
+
+       /* set <group_id> */
+       if ((dm_snprintf(group_str, sizeof(group_str), FMTu64, group_id)) < 0) {
+               log_error("Could not format group_id argument.");
+               return 0;
+       }
+       args[argc++] = group_str;
+
+       /* set <path> */
+       args[argc++] = (char *) path;
+
+       /* set <mode> */
+       args[argc++] = (char *) mode_str;
+
+       /* set <foreground> */
+       if ((dm_snprintf(fg_str, sizeof(fg_str), "%u", foreground)) < 0) {
+               log_error("Could not format foreground argument.");
+               return 0;
+       }
+       args[argc++] = fg_str;
+
+       /* set <verbose> */
+       if ((dm_snprintf(verb_str, sizeof(verb_str), "%u", verbose)) < 0) {
+               log_error("Could not format verbose argument.");
+               return 0;
+       }
+       args[argc++] = verb_str;
+
+       /* terminate args[argc] */
+       args[argc] = NULL;
+
+       log_very_verbose("Spawning daemon as '%s %d " FMTu64 " %s %s %u %u'",
+                        *args, fd, group_id, path, mode_str,
+                        foreground, verbose);
+
+       if (!foreground && ((pid = fork()) < 0)) {
+               log_error("Failed to fork dmfilemapd process.");
+               return 0;
+       }
+
+       if (pid > 0) {
+               log_very_verbose("Forked dmfilemapd process as pid %d", pid);
+               return 1;
+       }
+
+       execvp(args[0], args);
+       log_sys_error("execvp", args[0]);
+       if (!foreground)
+               _exit(127);
+       return 0;
+}
+# else /* !DMFILEMAPD */
+dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str)
+{
+       return 0;
+};
+
+int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path,
+                           dm_filemapd_mode_t mode, unsigned foreground,
+                           unsigned verbose)
+{
+       log_error("dmfilemapd support disabled.");
+       return 0;
+}
+#endif /* DMFILEMAPD */
+
+/*
+ * Backward compatible dm_stats_create_region() implementations.
+ *
+ * Keep these at the end of the file to avoid adding clutter around the
+ * current dm_stats_create_region() version.
+ */
+
+#if defined(__GNUC__)
+int dm_stats_create_region_v1_02_106(struct dm_stats *dms, uint64_t *region_id,
+                                    uint64_t start, uint64_t len, int64_t step,
+                                    int precise, const char *program_id,
+                                    const char *aux_data);
+int dm_stats_create_region_v1_02_106(struct dm_stats *dms, uint64_t *region_id,
+                                    uint64_t start, uint64_t len, int64_t step,
+                                    int precise, const char *program_id,
+                                    const char *aux_data)
+{
+       /* 1.02.106 lacks histogram argument. */
+       return _stats_create_region(dms, region_id, start, len, step, precise,
+                                   NULL, program_id, aux_data);
+}
+
+int dm_stats_create_region_v1_02_104(struct dm_stats *dms, uint64_t *region_id,
+                                    uint64_t start, uint64_t len, int64_t step,
+                                    const char *program_id, const char *aux_data);
+int dm_stats_create_region_v1_02_104(struct dm_stats *dms, uint64_t *region_id,
+                                    uint64_t start, uint64_t len, int64_t step,
+                                    const char *program_id, const char *aux_data)
+{
+       /* 1.02.104 lacks histogram and precise arguments. */
+       return _stats_create_region(dms, region_id, start, len, step, 0, NULL,
+                                   program_id, aux_data);
+}
+#endif
diff --git a/device_mapper/libdm-string.c b/device_mapper/libdm-string.c
new file mode 100644 (file)
index 0000000..8bd6c2d
--- /dev/null
@@ -0,0 +1,718 @@
+/*
+ * Copyright (C) 2006-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <math.h>  /* fabs() */
+#include <float.h> /* DBL_EPSILON */
+
+/*
+ * consume characters while they match the predicate function.
+ */
+static char *_consume(char *buffer, int (*fn) (int))
+{
+       while (*buffer && fn(*buffer))
+               buffer++;
+
+       return buffer;
+}
+
+static int _isword(int c)
+{
+       return !isspace(c);
+}
+
+/*
+ * Split buffer into NULL-separated words in argv.
+ * Returns number of words.
+ */
+int dm_split_words(char *buffer, unsigned max,
+                  unsigned ignore_comments __attribute__((unused)),
+                  char **argv)
+{
+       unsigned arg;
+
+       for (arg = 0; arg < max; arg++) {
+               buffer = _consume(buffer, isspace);
+               if (!*buffer)
+                       break;
+
+               argv[arg] = buffer;
+               buffer = _consume(buffer, _isword);
+
+               if (*buffer) {
+                       *buffer = '\0';
+                       buffer++;
+               }
+       }
+
+       return arg;
+}
+
+/*
+ * Remove hyphen quoting from a component of a name.
+ * NULL-terminates the component and returns start of next component.
+ */
+static char *_unquote(char *component)
+{
+       char *c = component;
+       char *o = c;
+       char *r;
+
+       while (*c) {
+               if (*(c + 1)) {
+                       if (*c == '-') {
+                               if (*(c + 1) == '-')
+                                       c++;
+                               else
+                                       break;
+                       }
+               }
+               *o = *c;
+               o++;
+               c++;
+       }
+
+       r = (*c) ? c + 1 : c;
+       *o = '\0';
+
+       return r;
+}
+
+int dm_split_lvm_name(struct dm_pool *mem, const char *dmname,
+                     char **vgname, char **lvname, char **layer)
+{
+       if (!vgname || !lvname || !layer) {
+               log_error(INTERNAL_ERROR "dm_split_lvm_name: Forbidden NULL parameter detected.");
+               return 0;
+       }
+
+       if (mem && (!dmname || !(*vgname = dm_pool_strdup(mem, dmname)))) {
+               log_error("Failed to duplicate lvm name.");
+               return 0;
+       } else if (!*vgname) {
+               log_error("Missing lvm name for split.");
+               return 0;
+       }
+
+       _unquote(*layer = _unquote(*lvname = _unquote(*vgname)));
+
+       return 1;
+}
+
+/*
+ * On error, up to glibc 2.0.6, snprintf returned -1 if buffer was too small;
+ * From glibc 2.1 it returns number of chars (excl. trailing null) that would 
+ * have been written had there been room.
+ *
+ * dm_snprintf reverts to the old behaviour.
+ */
+int dm_snprintf(char *buf, size_t bufsize, const char *format, ...)
+{
+       int n;
+       va_list ap;
+
+       va_start(ap, format);
+       n = vsnprintf(buf, bufsize, format, ap);
+       va_end(ap);
+
+       if (n < 0 || ((unsigned) n >= bufsize))
+               return -1;
+
+       return n;
+}
+
+const char *dm_basename(const char *path)
+{
+       const char *p = strrchr(path, '/');
+
+       return p ? p + 1 : path;
+}
+
+int dm_vasprintf(char **result, const char *format, va_list aq)
+{
+       int i, n, size = 16;
+       va_list ap;
+       char *buf = dm_malloc(size);
+
+       *result = 0;
+
+       if (!buf)
+               return -1;
+
+       for (i = 0;; i++) {
+               va_copy(ap, aq);
+               n = vsnprintf(buf, size, format, ap);
+               va_end(ap);
+
+               if (0 <= n && n < size)
+                       break;
+
+               dm_free(buf);
+               /* Up to glibc 2.0.6 returns -1 */
+               size = (n < 0) ? size * 2 : n + 1;
+               if (!(buf = dm_malloc(size)))
+                       return -1;
+       }
+
+       if (i > 1) {
+               /* Reallocating more then once? */
+               if (!(*result = dm_strdup(buf))) {
+                       dm_free(buf);
+                       return -1;
+               }
+               dm_free(buf);
+       } else
+               *result = buf;
+
+       return n + 1;
+}
+
+int dm_asprintf(char **result, const char *format, ...)
+{
+       int r;
+       va_list ap;
+       va_start(ap, format);
+       r = dm_vasprintf(result, format, ap);
+       va_end(ap);
+       return r;
+}
+
+/*
+ * Count occurences of 'c' in 'str' until we reach a null char.
+ *
+ * Returns:
+ *  len - incremented for each char we encounter.
+ *  count - number of occurrences of 'c' and 'c2'.
+ */
+static void _count_chars(const char *str, size_t *len, int *count,
+                        const int c1, const int c2)
+{
+       const char *ptr;
+
+       for (ptr = str; *ptr; ptr++, (*len)++)
+               if (*ptr == c1 || *ptr == c2)
+                       (*count)++;
+}
+
+/*
+ * Count occurrences of 'c' in 'str' of length 'size'.
+ *
+ * Returns:
+ *   Number of occurrences of 'c'
+ */
+unsigned dm_count_chars(const char *str, size_t len, const int c)
+{
+       size_t i;
+       unsigned count = 0;
+
+       for (i = 0; i < len; i++)
+               if (str[i] == c)
+                       count++;
+
+       return count;
+}
+
+/*
+ * Length of string after escaping double quotes and backslashes.
+ */
+size_t dm_escaped_len(const char *str)
+{
+       size_t len = 1;
+       int count = 0;
+
+       _count_chars(str, &len, &count, '\"', '\\');
+
+       return count + len;
+}
+
+/*
+ * Copies a string, quoting orig_char with quote_char.
+ * Optionally also quote quote_char.
+ */
+static void _quote_characters(char **out, const char *src,
+                             const int orig_char, const int quote_char,
+                             int quote_quote_char)
+{
+       while (*src) {
+               if (*src == orig_char ||
+                   (*src == quote_char && quote_quote_char))
+                       *(*out)++ = quote_char;
+
+               *(*out)++ = *src++;
+       }
+}
+
+static void _unquote_one_character(char *src, const char orig_char,
+                                  const char quote_char)
+{
+       char *out;
+       char s, n;
+
+       /* Optimise for the common case where no changes are needed. */
+       while ((s = *src++)) {
+               if (s == quote_char &&
+                   ((n = *src) == orig_char || n == quote_char)) {
+                       out = src++;
+                       *(out - 1) = n;
+
+                       while ((s = *src++)) {
+                               if (s == quote_char &&
+                                   ((n = *src) == orig_char || n == quote_char)) {
+                                       s = n;
+                                       src++;
+                               }
+                               *out = s;
+                               out++;
+                       }
+
+                       *out = '\0';
+                       return;
+               }
+       }
+}
+
+/*
+ * Unquote each character given in orig_char array and unquote quote_char
+ * as well. Also save the first occurrence of each character from orig_char
+ * that was found unquoted in arr_substr_first_unquoted array. This way we can
+ * process several characters in one go.
+ */
+static void _unquote_characters(char *src, const char *orig_chars,
+                               size_t num_orig_chars,
+                               const char quote_char,
+                               char *arr_substr_first_unquoted[])
+{
+       char *out = src;
+       char c, s, n;
+       unsigned i;
+
+       while ((s = *src++)) {
+               for (i = 0; i < num_orig_chars; i++) {
+                       c = orig_chars[i];
+                       if (s == quote_char &&
+                           ((n = *src) == c || n == quote_char)) {
+                               s = n;
+                               src++;
+                               break;
+                       }
+                       if (arr_substr_first_unquoted && (s == c) &&
+                           !arr_substr_first_unquoted[i])
+                               arr_substr_first_unquoted[i] = out;
+               };
+               *out++ = s;
+       }
+
+       *out = '\0';
+}
+
+/*
+ * Copies a string, quoting hyphens with hyphens.
+ */
+static void _quote_hyphens(char **out, const char *src)
+{
+       _quote_characters(out, src, '-', '-', 0);
+}
+
+/*
+ * <vg>-<lv>-<layer> or if !layer just <vg>-<lv>.
+ */
+char *dm_build_dm_name(struct dm_pool *mem, const char *vgname,
+                      const char *lvname, const char *layer)
+{
+       size_t len = 1;
+       int hyphens = 1;
+       char *r, *out;
+
+       _count_chars(vgname, &len, &hyphens, '-', 0);
+       _count_chars(lvname, &len, &hyphens, '-', 0);
+
+       if (layer && *layer) {
+               _count_chars(layer, &len, &hyphens, '-', 0);
+               hyphens++;
+       }
+
+       len += hyphens;
+
+       if (!(r = dm_pool_alloc(mem, len))) {
+               log_error("build_dm_name: Allocation failed for %" PRIsize_t
+                         " for %s %s %s.", len, vgname, lvname, layer);
+               return NULL;
+       }
+
+       out = r;
+       _quote_hyphens(&out, vgname);
+       *out++ = '-';
+       _quote_hyphens(&out, lvname);
+
+       if (layer && *layer) {
+               /* No hyphen if the layer begins with _ e.g. _mlog */
+               if (*layer != '_')
+                       *out++ = '-';
+               _quote_hyphens(&out, layer);
+       }
+       *out = '\0';
+
+       return r;
+}
+
+char *dm_build_dm_uuid(struct dm_pool *mem, const char *uuid_prefix, const char *lvid, const char *layer)
+{
+       char *dmuuid;
+       size_t len;
+
+       if (!layer)
+               layer = "";
+
+       len = strlen(uuid_prefix) + strlen(lvid) + strlen(layer) + 2;
+
+       if (!(dmuuid = dm_pool_alloc(mem, len))) {
+               log_error("build_dm_name: Allocation failed for %" PRIsize_t
+                         " %s %s.", len, lvid, layer);
+               return NULL;
+       }
+
+       sprintf(dmuuid, "%s%s%s%s", uuid_prefix, lvid, (*layer) ? "-" : "", layer);
+
+       return dmuuid;
+}
+
+/*
+ * Copies a string, quoting double quotes with backslashes.
+ */
+char *dm_escape_double_quotes(char *out, const char *src)
+{
+       char *buf = out;
+
+       _quote_characters(&buf, src, '\"', '\\', 1);
+       *buf = '\0';
+
+       return out;
+}
+
+/*
+ * Undo quoting in situ.
+ */
+void dm_unescape_double_quotes(char *src)
+{
+       _unquote_one_character(src, '\"', '\\');
+}
+
+/*
+ * Unescape colons and "at" signs in situ and save the substrings
+ * starting at the position of the first unescaped colon and the
+ * first unescaped "at" sign. This is normally used to unescape
+ * device names used as PVs.
+ */
+void dm_unescape_colons_and_at_signs(char *src,
+                                    char **substr_first_unquoted_colon,
+                                    char **substr_first_unquoted_at_sign)
+{
+       const char *orig_chars = ":@";
+       char *arr_substr_first_unquoted[] = {NULL, NULL, NULL};
+
+       _unquote_characters(src, orig_chars, 2, '\\', arr_substr_first_unquoted);
+
+       if (substr_first_unquoted_colon)
+               *substr_first_unquoted_colon = arr_substr_first_unquoted[0];
+
+       if (substr_first_unquoted_at_sign)
+               *substr_first_unquoted_at_sign = arr_substr_first_unquoted[1];
+}
+
+int dm_strncpy(char *dest, const char *src, size_t n)
+{
+       if (memccpy(dest, src, 0, n))
+               return 1;
+
+       if (n > 0)
+               dest[n - 1] = '\0';
+
+       return 0;
+}
+
+/* Test if the doubles are close enough to be considered equal */
+static int _close_enough(double d1, double d2)
+{
+       return fabs(d1 - d2) < DBL_EPSILON;
+}
+
+#define BASE_UNKNOWN 0
+#define BASE_SHARED 1
+#define BASE_1024 8
+#define BASE_1000 15
+#define BASE_SPECIAL 21
+#define NUM_UNIT_PREFIXES 6
+#define NUM_SPECIAL 3
+
+#define SIZE_BUF 128
+
+const char *dm_size_to_string(struct dm_pool *mem, uint64_t size,
+                             char unit_type, int use_si_units, 
+                             uint64_t unit_factor, int include_suffix, 
+                             dm_size_suffix_t suffix_type)
+{
+       unsigned base = BASE_UNKNOWN;
+       unsigned s;
+       int precision;
+       double d;
+       uint64_t byte = UINT64_C(0);
+       uint64_t units = UINT64_C(1024);
+       char *size_buf = NULL;
+       char new_unit_type = '\0', unit_type_buf[2];
+       const char *prefix = "";
+       const char * const size_str[][3] = {
+               /* BASE_UNKNOWN */
+               {"         ", "   ", " "},      /* [0] */
+
+               /* BASE_SHARED - Used if use_si_units = 0 */
+               {" Exabyte", " EB", "E"},       /* [1] */
+               {" Petabyte", " PB", "P"},      /* [2] */
+               {" Terabyte", " TB", "T"},      /* [3] */
+               {" Gigabyte", " GB", "G"},      /* [4] */
+               {" Megabyte", " MB", "M"},      /* [5] */
+               {" Kilobyte", " KB", "K"},      /* [6] */
+               {" Byte    ", " B", "B"},       /* [7] */
+
+               /* BASE_1024 - Used if use_si_units = 1 */
+               {" Exbibyte", " EiB", "e"},     /* [8] */
+               {" Pebibyte", " PiB", "p"},     /* [9] */
+               {" Tebibyte", " TiB", "t"},     /* [10] */
+               {" Gibibyte", " GiB", "g"},     /* [11] */
+               {" Mebibyte", " MiB", "m"},     /* [12] */
+               {" Kibibyte", " KiB", "k"},     /* [13] */
+               {" Byte    ", " B", "b"},       /* [14] */
+
+               /* BASE_1000 - Used if use_si_units = 1 */
+               {" Exabyte",  " EB", "E"},      /* [15] */
+               {" Petabyte", " PB", "P"},      /* [16] */
+               {" Terabyte", " TB", "T"},      /* [17] */
+               {" Gigabyte", " GB", "G"},      /* [18] */
+               {" Megabyte", " MB", "M"},      /* [19] */
+               {" Kilobyte", " kB", "K"},      /* [20] */
+
+               /* BASE_SPECIAL */
+               {" Byte    ", " B ", "B"},      /* [21] (shared with BASE_1000) */
+               {" Units   ", " Un", "U"},      /* [22] */
+               {" Sectors ", " Se", "S"},      /* [23] */
+       };
+
+       if (!(size_buf = dm_pool_alloc(mem, SIZE_BUF))) {
+               log_error("no memory for size display buffer");
+               return "";
+       }
+
+       if (!use_si_units) {
+               /* Case-independent match */
+               for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+                       if (toupper((int) unit_type) ==
+                           *size_str[BASE_SHARED + s][2]) {
+                               base = BASE_SHARED;
+                               break;
+                       }
+       } else {
+               /* Case-dependent match for powers of 1000 */
+               for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+                       if (unit_type == *size_str[BASE_1000 + s][2]) {
+                               base = BASE_1000;
+                               break;
+                       }
+
+               /* Case-dependent match for powers of 1024 */
+               if (base == BASE_UNKNOWN)
+                       for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+                       if (unit_type == *size_str[BASE_1024 + s][2]) {
+                               base = BASE_1024;
+                               break;
+                       }
+       }
+
+       if (base == BASE_UNKNOWN)
+               /* Check for special units - s, b or u */
+               for (s = 0; s < NUM_SPECIAL; s++)
+                       if (toupper((int) unit_type) ==
+                           *size_str[BASE_SPECIAL + s][2]) {
+                               base = BASE_SPECIAL;
+                               break;
+                       }
+
+       if (size == UINT64_C(0)) {
+               if (base == BASE_UNKNOWN)
+                       s = 0;
+               sprintf(size_buf, "0%s", include_suffix ? size_str[base + s][suffix_type] : "");
+               return size_buf;
+       }
+
+       size *= UINT64_C(512);
+
+       if (base != BASE_UNKNOWN) {
+               if (!unit_factor) {
+                       unit_type_buf[0] = unit_type;
+                       unit_type_buf[1] = '\0';
+                       if (!(unit_factor = dm_units_to_factor(&unit_type_buf[0], &new_unit_type, 1, NULL)) ||
+                           unit_type != new_unit_type) {
+                               /* The two functions should match (and unrecognised units get treated like 'h'). */
+                               log_error(INTERNAL_ERROR "Inconsistent units: %c and %c.", unit_type, new_unit_type);
+                               return "";
+                       }
+               }
+               byte = unit_factor;
+       } else {
+               /* Human-readable style */
+               if (unit_type == 'H' || unit_type == 'R') {
+                       units = UINT64_C(1000);
+                       base = BASE_1000;
+               } else {
+                       units = UINT64_C(1024);
+                       base = BASE_1024;
+               }
+
+               if (!use_si_units)
+                       base = BASE_SHARED;
+
+               byte = units * units * units * units * units * units;
+
+               for (s = 0; s < NUM_UNIT_PREFIXES && size < byte; s++)
+                       byte /= units;
+
+               if ((s < NUM_UNIT_PREFIXES) &&
+                   ((unit_type == 'R') || (unit_type == 'r'))) {
+                       /* When the rounding would cause difference, add '<' prefix
+                        * i.e.  2043M is more then 1.9949G prints <2.00G
+                        * This version is for 2 digits fixed precision */
+                       d = 100. * (double) size / byte;
+                       if (!_close_enough(floorl(d), nearbyintl(d)))
+                               prefix = "<";
+               }
+
+               include_suffix = 1;
+       }
+
+       /* FIXME Make precision configurable */
+       switch (toupper(*size_str[base + s][DM_SIZE_UNIT])) {
+       case 'B':
+       case 'S':
+               precision = 0;
+               break;
+       default:
+               precision = 2;
+       }
+
+       snprintf(size_buf, SIZE_BUF, "%s%.*f%s", prefix, precision,
+                (double) size / byte, include_suffix ? size_str[base + s][suffix_type] : "");
+
+       return size_buf;
+}
+
+uint64_t dm_units_to_factor(const char *units, char *unit_type,
+                           int strict, const char **endptr)
+{
+       char *ptr = NULL;
+       uint64_t v;
+       double custom_value = 0;
+       uint64_t multiplier;
+
+       if (endptr)
+               *endptr = units;
+
+       if (isdigit(*units)) {
+               custom_value = strtod(units, &ptr);
+               if (ptr == units)
+                       return 0;
+               v = (uint64_t) strtoull(units, NULL, 10);
+               if (_close_enough((double) v, custom_value))
+                       custom_value = 0;       /* Use integer arithmetic */
+               units = ptr;
+       } else
+               v = 1;
+
+       /* Only one units char permitted in strict mode. */
+       if (strict && units[0] && units[1])
+               return 0;
+
+       if (v == 1)
+               *unit_type = *units;
+       else
+               *unit_type = 'U';
+
+       switch (*units) {
+       case 'h':
+       case 'H':
+       case 'r':
+       case 'R':
+               multiplier = v = UINT64_C(1);
+               *unit_type = *units;
+               break;
+       case 'b':
+       case 'B':
+               multiplier = UINT64_C(1);
+               break;
+#define KILO UINT64_C(1024)
+       case 's':
+       case 'S':
+               multiplier = (KILO/2);
+               break;
+       case 'k':
+               multiplier = KILO;
+               break;
+       case 'm':
+               multiplier = KILO * KILO;
+               break;
+       case 'g':
+               multiplier = KILO * KILO * KILO;
+               break;
+       case 't':
+               multiplier = KILO * KILO * KILO * KILO;
+               break;
+       case 'p':
+               multiplier = KILO * KILO * KILO * KILO * KILO;
+               break;
+       case 'e':
+               multiplier = KILO * KILO * KILO * KILO * KILO * KILO;
+               break;
+#undef KILO
+#define KILO UINT64_C(1000)
+       case 'K':
+               multiplier = KILO;
+               break;
+       case 'M':
+               multiplier = KILO * KILO;
+               break;
+       case 'G':
+               multiplier = KILO * KILO * KILO;
+               break;
+       case 'T':
+               multiplier = KILO * KILO * KILO * KILO;
+               break;
+       case 'P':
+               multiplier = KILO * KILO * KILO * KILO * KILO;
+               break;
+       case 'E':
+               multiplier = KILO * KILO * KILO * KILO * KILO * KILO;
+               break;
+#undef KILO
+       default:
+               return 0;
+       }
+
+       if (endptr)
+               *endptr = units + 1;
+
+       if (_close_enough(custom_value, 0.))
+               return v * multiplier; /* Use integer arithmetic */
+       else
+               return (uint64_t) (custom_value * multiplier);
+}
diff --git a/device_mapper/libdm-targets.c b/device_mapper/libdm-targets.c
new file mode 100644 (file)
index 0000000..5ab4701
--- /dev/null
@@ -0,0 +1,565 @@
+/*
+ * Copyright (C) 2005-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "libdm-common.h"
+
+int dm_get_status_snapshot(struct dm_pool *mem, const char *params,
+                          struct dm_status_snapshot **status)
+{
+       struct dm_status_snapshot *s;
+       int r;
+
+       if (!params) {
+               log_error("Failed to parse invalid snapshot params.");
+               return 0;
+       }
+
+       if (!(s = dm_pool_zalloc(mem, sizeof(*s)))) {
+               log_error("Failed to allocate snapshot status structure.");
+               return 0;
+       }
+
+       r = sscanf(params, FMTu64 "/" FMTu64 " " FMTu64,
+                  &s->used_sectors, &s->total_sectors,
+                  &s->metadata_sectors);
+
+       if (r == 3 || r == 2)
+               s->has_metadata_sectors = (r == 3);
+       else if (!strcmp(params, "Invalid"))
+               s->invalid = 1;
+       else if (!strcmp(params, "Merge failed"))
+               s->merge_failed = 1;
+       else if (!strcmp(params, "Overflow"))
+               s->overflow = 1;
+       else {
+               dm_pool_free(mem, s);
+               log_error("Failed to parse snapshot params: %s.", params);
+               return 0;
+       }
+
+       *status = s;
+
+       return 1;
+}
+
+/*
+ * Skip nr fields each delimited by a single space.
+ * FIXME Don't assume single space.
+ */
+static const char *_skip_fields(const char *p, unsigned nr)
+{
+       while (p && nr-- && (p = strchr(p, ' ')))
+               p++;
+
+       return p;
+}
+
+/*
+ * Count number of single-space delimited fields.
+ * Number of fields is number of spaces plus one.
+ */
+static unsigned _count_fields(const char *p)
+{
+       unsigned nr = 1;
+
+       if (!p || !*p)
+               return 0;
+
+       while ((p = _skip_fields(p, 1)))
+               nr++;
+
+       return nr;
+}
+
+/*
+ * Various RAID status versions include:
+ * Versions < 1.5.0 (4 fields):
+ *   <raid_type> <#devs> <health_str> <sync_ratio>
+ * Versions 1.5.0+  (6 fields):
+ *   <raid_type> <#devs> <health_str> <sync_ratio> <sync_action> <mismatch_cnt>
+ * Versions 1.9.0+  (7 fields):
+ *   <raid_type> <#devs> <health_str> <sync_ratio> <sync_action> <mismatch_cnt> <data_offset>
+ */
+int dm_get_status_raid(struct dm_pool *mem, const char *params,
+                      struct dm_status_raid **status)
+{
+       int i;
+       unsigned num_fields;
+       const char *p, *pp, *msg_fields = "";
+       struct dm_status_raid *s = NULL;
+       unsigned a = 0;
+
+       if ((num_fields = _count_fields(params)) < 4)
+               goto_bad;
+
+       /* Second field holds the device count */
+       msg_fields = "<#devs> ";
+       if (!(p = _skip_fields(params, 1)) || (sscanf(p, "%d", &i) != 1))
+               goto_bad;
+
+       msg_fields = "";
+       if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_raid))))
+               goto_bad;
+
+       if (!(s->raid_type = dm_pool_zalloc(mem, p - params)))
+               goto_bad; /* memory is freed when pool is destroyed */
+
+       if (!(s->dev_health = dm_pool_zalloc(mem, i + 1))) /* Space for health chars */
+               goto_bad;
+
+       msg_fields = "<raid_type> <#devices> <health_chars> and <sync_ratio> ";
+       if (sscanf(params, "%s %u %s " FMTu64 "/" FMTu64,
+                  s->raid_type,
+                  &s->dev_count,
+                  s->dev_health,
+                  &s->insync_regions,
+                  &s->total_regions) != 5)
+               goto_bad;
+
+       /*
+        * All pre-1.5.0 version parameters are read.  Now we check
+        * for additional 1.5.0+ parameters (i.e. num_fields at least 6).
+        *
+        * Note that 'sync_action' will be NULL (and mismatch_count
+        * will be 0) if the kernel returns a pre-1.5.0 status.
+        */
+       if (num_fields < 6)
+               goto out;
+
+       msg_fields = "<sync_action> and <mismatch_cnt> ";
+
+       /* Skip pre-1.5.0 params */
+       if (!(p = _skip_fields(params, 4)) || !(pp = _skip_fields(p, 1)))
+               goto_bad;
+
+       if (!(s->sync_action = dm_pool_zalloc(mem, pp - p)))
+               goto_bad;
+
+       if (sscanf(p, "%s " FMTu64, s->sync_action, &s->mismatch_count) != 2)
+               goto_bad;
+
+       if (num_fields < 7)
+               goto out;
+
+       /*
+        * All pre-1.9.0 version parameters are read.  Now we check
+        * for additional 1.9.0+ parameters (i.e. nr_fields at least 7).
+        *
+        * Note that data_offset will be 0 if the
+        * kernel returns a pre-1.9.0 status.
+        */
+       msg_fields = "<data_offset>";
+       if (!(p = _skip_fields(params, 6))) /* skip pre-1.9.0 params */
+               goto bad;
+       if (sscanf(p, FMTu64, &s->data_offset) != 1)
+               goto bad;
+
+out:
+       *status = s;
+
+       if (s->insync_regions == s->total_regions) {
+               /* FIXME: kernel gives misleading info here
+                * Trying to recognize a true state */
+               while (i-- > 0)
+                       if (s->dev_health[i] == 'a')
+                               a++; /* Count number of 'a' */
+
+               if (a && a < s->dev_count) {
+                       /* SOME legs are in 'a' */
+                       if (!strcasecmp(s->sync_action, "recover")
+                           || !strcasecmp(s->sync_action, "idle"))
+                               /* Kernel may possibly start some action
+                                * in near-by future, do not report 100% */
+                               s->insync_regions--;
+               }
+       }
+
+       return 1;
+
+bad:
+       log_error("Failed to parse %sraid params: %s", msg_fields, params);
+
+       if (s)
+               dm_pool_free(mem, s);
+
+       *status = NULL;
+
+       return 0;
+}
+
+/*
+ * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
+ * <cache block size> <#used cache blocks>/<#total cache blocks>
+ * <#read hits> <#read misses> <#write hits> <#write misses>
+ * <#demotions> <#promotions> <#dirty> <#features> <features>*
+ * <#core args> <core args>* <policy name> <#policy args> <policy args>*
+ *
+ * metadata block size      : Fixed block size for each metadata block in
+ *                            sectors
+ * #used metadata blocks    : Number of metadata blocks used
+ * #total metadata blocks   : Total number of metadata blocks
+ * cache block size         : Configurable block size for the cache device
+ *                            in sectors
+ * #used cache blocks       : Number of blocks resident in the cache
+ * #total cache blocks      : Total number of cache blocks
+ * #read hits               : Number of times a READ bio has been mapped
+ *                            to the cache
+ * #read misses             : Number of times a READ bio has been mapped
+ *                            to the origin
+ * #write hits              : Number of times a WRITE bio has been mapped
+ *                            to the cache
+ * #write misses            : Number of times a WRITE bio has been
+ *                            mapped to the origin
+ * #demotions               : Number of times a block has been removed
+ *                            from the cache
+ * #promotions              : Number of times a block has been moved to
+ *                            the cache
+ * #dirty                   : Number of blocks in the cache that differ
+ *                            from the origin
+ * #feature args            : Number of feature args to follow
+ * feature args             : 'writethrough' (optional)
+ * #core args               : Number of core arguments (must be even)
+ * core args                : Key/value pairs for tuning the core
+ *                            e.g. migration_threshold
+ *                          *policy name              : Name of the policy
+ * #policy args             : Number of policy arguments to follow (must be even)
+ * policy args              : Key/value pairs
+ *                            e.g. sequential_threshold
+ */
+int dm_get_status_cache(struct dm_pool *mem, const char *params,
+                       struct dm_status_cache **status)
+{
+       int i, feature_argc;
+       char *str;
+       const char *p, *pp;
+       struct dm_status_cache *s;
+
+       if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_cache))))
+               return_0;
+
+       if (strstr(params, "Error")) {
+               s->error = 1;
+               s->fail = 1; /*  This is also I/O fail state */
+               goto out;
+       }
+
+       if (strstr(params, "Fail")) {
+               s->fail = 1;
+               goto out;
+       }
+
+       /* Read in args that have definitive placement */
+       if (sscanf(params,
+                  " " FMTu32
+                  " " FMTu64 "/" FMTu64
+                  " " FMTu32
+                  " " FMTu64 "/" FMTu64
+                  " " FMTu64 " " FMTu64
+                  " " FMTu64 " " FMTu64
+                  " " FMTu64 " " FMTu64
+                  " " FMTu64
+                  " %d",
+                  &s->metadata_block_size,
+                  &s->metadata_used_blocks, &s->metadata_total_blocks,
+                  &s->block_size, /* AKA, chunk_size */
+                  &s->used_blocks, &s->total_blocks,
+                  &s->read_hits, &s->read_misses,
+                  &s->write_hits, &s->write_misses,
+                  &s->demotions, &s->promotions,
+                  &s->dirty_blocks,
+                  &feature_argc) != 14)
+               goto bad;
+
+       /* Now jump to "features" section */
+       if (!(p = _skip_fields(params, 12)))
+               goto bad;
+
+       /* Read in features */
+       for (i = 0; i < feature_argc; i++) {
+               if (!strncmp(p, "writethrough ", 13))
+                       s->feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH;
+               else if (!strncmp(p, "writeback ", 10))
+                       s->feature_flags |= DM_CACHE_FEATURE_WRITEBACK;
+               else if (!strncmp(p, "passthrough ", 12))
+                       s->feature_flags |= DM_CACHE_FEATURE_PASSTHROUGH;
+               else if (!strncmp(p, "metadata2 ", 10))
+                       s->feature_flags |= DM_CACHE_FEATURE_METADATA2;
+               else
+                       log_error("Unknown feature in status: %s", params);
+
+               if (!(p = _skip_fields(p, 1)))
+                       goto bad;
+       }
+
+       /* Read in core_args. */
+       if (sscanf(p, "%d ", &s->core_argc) != 1)
+               goto bad;
+       if ((s->core_argc > 0) &&
+           (!(s->core_argv = dm_pool_zalloc(mem, sizeof(char *) * s->core_argc)) ||
+            !(p = _skip_fields(p, 1)) ||
+            !(str = dm_pool_strdup(mem, p)) ||
+            !(p = _skip_fields(p, (unsigned) s->core_argc)) ||
+            (dm_split_words(str, s->core_argc, 0, s->core_argv) != s->core_argc)))
+               goto bad;
+
+       /* Read in policy args */
+       pp = p;
+       if (!(p = _skip_fields(p, 1)) ||
+           !(s->policy_name = dm_pool_zalloc(mem, (p - pp))))
+               goto bad;
+       if (sscanf(pp, "%s %d", s->policy_name, &s->policy_argc) != 2)
+               goto bad;
+       if (s->policy_argc &&
+           (!(s->policy_argv = dm_pool_zalloc(mem, sizeof(char *) * s->policy_argc)) ||
+            !(p = _skip_fields(p, 1)) ||
+            !(str = dm_pool_strdup(mem, p)) ||
+            (dm_split_words(str, s->policy_argc, 0, s->policy_argv) != s->policy_argc)))
+               goto bad;
+
+       /* TODO: improve this parser */
+       if (strstr(p, " ro"))
+               s->read_only = 1;
+
+       if (strstr(p, " needs_check"))
+               s->needs_check = 1;
+out:
+       *status = s;
+       return 1;
+
+bad:
+       log_error("Failed to parse cache params: %s", params);
+       dm_pool_free(mem, s);
+       *status = NULL;
+
+       return 0;
+}
+
+int parse_thin_pool_status(const char *params, struct dm_status_thin_pool *s)
+{
+       int pos;
+
+       memset(s, 0, sizeof(*s));
+
+       if (!params) {
+               log_error("Failed to parse invalid thin params.");
+               return 0;
+       }
+
+       if (strstr(params, "Error")) {
+               s->error = 1;
+               s->fail = 1; /*  This is also I/O fail state */
+               return 1;
+       }
+
+       if (strstr(params, "Fail")) {
+               s->fail = 1;
+               return 1;
+       }
+
+       /* FIXME: add support for held metadata root */
+       if (sscanf(params, FMTu64 " " FMTu64 "/" FMTu64 " " FMTu64 "/" FMTu64 "%n",
+                  &s->transaction_id,
+                  &s->used_metadata_blocks,
+                  &s->total_metadata_blocks,
+                  &s->used_data_blocks,
+                  &s->total_data_blocks, &pos) < 5) {
+               log_error("Failed to parse thin pool params: %s.", params);
+               return 0;
+       }
+
+       /* New status flags */
+       if (strstr(params + pos, "no_discard_passdown"))
+               s->discards = DM_THIN_DISCARDS_NO_PASSDOWN;
+       else if (strstr(params + pos, "ignore_discard"))
+               s->discards = DM_THIN_DISCARDS_IGNORE;
+       else /* default discard_passdown */
+               s->discards = DM_THIN_DISCARDS_PASSDOWN;
+
+       /* Default is 'writable' (rw) data */
+       if (strstr(params + pos, "out_of_data_space"))
+               s->out_of_data_space = 1;
+       else if (strstr(params + pos, "ro "))
+               s->read_only = 1;
+
+       /* Default is 'queue_if_no_space' */
+       if (strstr(params + pos, "error_if_no_space"))
+               s->error_if_no_space = 1;
+
+       if (strstr(params + pos, "needs_check"))
+               s->needs_check = 1;
+
+       return 1;
+}
+
+int dm_get_status_thin_pool(struct dm_pool *mem, const char *params,
+                           struct dm_status_thin_pool **status)
+{
+       struct dm_status_thin_pool *s;
+
+       if (!(s = dm_pool_alloc(mem, sizeof(struct dm_status_thin_pool)))) {
+               log_error("Failed to allocate thin_pool status structure.");
+               return 0;
+       }
+
+       if (!parse_thin_pool_status(params, s)) {
+               dm_pool_free(mem, s);
+               return_0;
+       }
+
+       *status = s;
+
+       return 1;
+}
+
+int dm_get_status_thin(struct dm_pool *mem, const char *params,
+                      struct dm_status_thin **status)
+{
+       struct dm_status_thin *s;
+
+       if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_thin)))) {
+               log_error("Failed to allocate thin status structure.");
+               return 0;
+       }
+
+       if (strchr(params, '-')) {
+               /* nothing to parse */
+       } else if (strstr(params, "Fail")) {
+               s->fail = 1;
+       } else if (sscanf(params, FMTu64 " " FMTu64,
+                  &s->mapped_sectors,
+                  &s->highest_mapped_sector) != 2) {
+               dm_pool_free(mem, s);
+               log_error("Failed to parse thin params: %s.", params);
+               return 0;
+       }
+
+       *status = s;
+
+       return 1;
+}
+
+/*
+ * dm core parms:           0 409600 mirror
+ * Mirror core parms:       2 253:4 253:5 400/400
+ * New-style failure params: 1 AA
+ * New-style log params:     3 cluster 253:3 A
+ *                      or  3 disk 253:3 A
+ *                      or  1 core
+ */
+#define DM_MIRROR_MAX_IMAGES 8 /* limited by kernel DM_KCOPYD_MAX_REGIONS */
+
+int dm_get_status_mirror(struct dm_pool *mem, const char *params,
+                        struct dm_status_mirror **status)
+{
+       struct dm_status_mirror *s;
+       const char *p, *pos = params;
+       unsigned num_devs, argc, i;
+       int used;
+
+       if (!(s = dm_pool_zalloc(mem, sizeof(*s)))) {
+               log_error("Failed to alloc mem pool to parse mirror status.");
+               return 0;
+       }
+
+       if (sscanf(pos, "%u %n", &num_devs, &used) != 1)
+               goto_out;
+       pos += used;
+
+       if (num_devs > DM_MIRROR_MAX_IMAGES) {
+               log_error(INTERNAL_ERROR "More then " DM_TO_STRING(DM_MIRROR_MAX_IMAGES)
+                         " reported in mirror status.");
+               goto out;
+       }
+
+       if (!(s->devs = dm_pool_alloc(mem, num_devs * sizeof(*(s->devs))))) {
+               log_error("Allocation of devs failed.");
+               goto out;
+       }
+
+       for (i = 0; i < num_devs; ++i, pos += used)
+               if (sscanf(pos, "%u:%u %n",
+                          &(s->devs[i].major), &(s->devs[i].minor), &used) != 2)
+                       goto_out;
+
+       if (sscanf(pos, FMTu64 "/" FMTu64 "%n",
+                  &s->insync_regions, &s->total_regions, &used) != 2)
+               goto_out;
+       pos += used;
+
+       if (sscanf(pos, "%u %n", &argc, &used) != 1)
+               goto_out;
+       pos += used;
+
+       for (i = 0; i < num_devs ; ++i)
+               s->devs[i].health = pos[i];
+
+       if (!(pos = _skip_fields(pos, argc)))
+               goto_out;
+
+       if (strncmp(pos, "userspace", 9) == 0) {
+               pos += 9;
+               /* FIXME: support status of userspace mirror implementation */
+       }
+
+       if (sscanf(pos, "%u %n", &argc, &used) != 1)
+               goto_out;
+       pos += used;
+
+       if (argc == 1) {
+               /* core, cluster-core */
+               if (!(s->log_type = dm_pool_strdup(mem, pos))) {
+                       log_error("Allocation of log type string failed.");
+                       goto out;
+               }
+       } else {
+               if (!(p = _skip_fields(pos, 1)))
+                       goto_out;
+
+               /* disk, cluster-disk */
+               if (!(s->log_type = dm_pool_strndup(mem, pos, p - pos - 1))) {
+                       log_error("Allocation of log type string failed.");
+                       goto out;
+               }
+               pos = p;
+
+               if ((argc > 2) && !strcmp(s->log_type, "disk")) {
+                       s->log_count = argc - 2;
+
+                       if (!(s->logs = dm_pool_alloc(mem, s->log_count * sizeof(*(s->logs))))) {
+                               log_error("Allocation of logs failed.");
+                               goto out;
+                       }
+
+                       for (i = 0; i < s->log_count; ++i, pos += used)
+                               if (sscanf(pos, "%u:%u %n",
+                                          &s->logs[i].major, &s->logs[i].minor, &used) != 2)
+                                       goto_out;
+
+                       for (i = 0; i < s->log_count; ++i)
+                               s->logs[i].health = pos[i];
+               }
+       }
+
+       s->dev_count = num_devs;
+       *status = s;
+
+       return 1;
+out:
+       log_error("Failed to parse mirror status %s.", params);
+       dm_pool_free(mem, s);
+       *status = NULL;
+
+       return 0;
+}
diff --git a/device_mapper/libdm-timestamp.c b/device_mapper/libdm-timestamp.c
new file mode 100644 (file)
index 0000000..c2d0ad8
--- /dev/null
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2006 Rackable Systems All rights reserved.
+ * Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Abstract out the time methods used so they can be adjusted later -
+ * the results of these routines should stay in-core.  
+ */
+
+#include "misc/dmlib.h"
+
+#include <stdlib.h>
+
+#define NSEC_PER_USEC  UINT64_C(1000)
+#define NSEC_PER_MSEC  UINT64_C(1000000)
+#define NSEC_PER_SEC   UINT64_C(1000000000)
+
+/*
+ * The realtime section uses clock_gettime with the CLOCK_MONOTONIC
+ * parameter to prevent issues with time warps
+ * This implementation requires librt.
+ */
+#ifdef HAVE_REALTIME
+
+#include <time.h>
+
+struct dm_timestamp {
+       struct timespec t;
+};
+
+static uint64_t _timestamp_to_uint64(struct dm_timestamp *ts)
+{
+       uint64_t stamp = 0;
+
+       stamp += (uint64_t) ts->t.tv_sec * NSEC_PER_SEC;
+       stamp += (uint64_t) ts->t.tv_nsec;
+
+       return stamp;
+}
+
+struct dm_timestamp *dm_timestamp_alloc(void)
+{
+       struct dm_timestamp *ts = NULL;
+
+       if (!(ts = dm_zalloc(sizeof(*ts))))
+               stack;
+
+       return ts;
+}
+
+int dm_timestamp_get(struct dm_timestamp *ts)
+{
+       if (!ts)
+               return 0;
+
+       if (clock_gettime(CLOCK_MONOTONIC, &ts->t)) {
+               log_sys_error("clock_gettime", "get_timestamp");
+               ts->t.tv_sec = 0;
+               ts->t.tv_nsec = 0;
+               return 0;
+       }
+
+       return 1;
+}
+
+#else /* ! HAVE_REALTIME */
+
+/*
+ * The !realtime section just uses gettimeofday and is therefore subject
+ * to ntp-type time warps - not sure if should allow that.
+ */
+
+#include <sys/time.h>
+
+struct dm_timestamp {
+       struct timeval t;
+};
+
+static uint64_t _timestamp_to_uint64(struct dm_timestamp *ts)
+{
+       uint64_t stamp = 0;
+
+       stamp += ts->t.tv_sec * NSEC_PER_SEC;
+       stamp += ts->t.tv_usec * NSEC_PER_USEC;
+
+       return stamp;
+}
+
+struct dm_timestamp *dm_timestamp_alloc(void)
+{
+       struct dm_timestamp *ts;
+
+       if (!(ts = dm_malloc(sizeof(*ts))))
+               stack;
+
+       return ts;
+}
+
+int dm_timestamp_get(struct dm_timestamp *ts)
+{
+       if (!ts)
+               return 0;
+
+       if (gettimeofday(&ts->t, NULL)) {
+               log_sys_error("gettimeofday", "get_timestamp");
+               ts->t.tv_sec = 0;
+               ts->t.tv_usec = 0;
+               return 0;
+       }
+
+       return 1;
+}
+
+#endif /* HAVE_REALTIME */
+
+/*
+ * Compare two timestamps.
+ *
+ * Return: -1 if ts1 is less than ts2
+ *          0 if ts1 is equal to ts2
+ *          1 if ts1 is greater than ts2
+ */
+int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2)
+{
+       uint64_t t1, t2;
+
+       t1 = _timestamp_to_uint64(ts1);
+       t2 = _timestamp_to_uint64(ts2);
+
+       if (t2 < t1)
+               return 1;
+
+       if (t1 < t2)
+               return -1;
+
+       return 0;
+}
+
+/*
+ * Return the absolute difference in nanoseconds between
+ * the dm_timestamp objects ts1 and ts2.
+ *
+ * Callers that need to know whether ts1 is before, equal to, or after ts2
+ * in addition to the magnitude should use dm_timestamp_compare.
+ */
+uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2)
+{
+       uint64_t t1, t2;
+
+       t1 = _timestamp_to_uint64(ts1);
+       t2 = _timestamp_to_uint64(ts2);
+
+       if (t1 > t2)
+               return t1 - t2;
+
+       return t2 - t1;
+}
+
+void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old)
+{
+       *ts_new = *ts_old;
+}
+
+void dm_timestamp_destroy(struct dm_timestamp *ts)
+{
+       dm_free(ts);
+}
diff --git a/device_mapper/misc/dm-ioctl.h b/device_mapper/misc/dm-ioctl.h
new file mode 100644 (file)
index 0000000..79f574c
--- /dev/null
@@ -0,0 +1,364 @@
+/*
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2004 - 2017 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_V4_H
+#define _LINUX_DM_IOCTL_V4_H
+
+#ifdef __linux__
+#  include <linux/types.h>
+#endif
+
+#define DM_DIR "mapper"                /* Slashes not supported */
+#define DM_CONTROL_NODE "control"
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables.  Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled.  The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device or set its uuid if none was previously supplied.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed.  Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device.  If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_.  Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device.  This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device.  The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ *
+ * DM_TARGET_MSG:
+ * Pass a message string to the target at a specific offset of a device.
+ *
+ * DM_DEV_SET_GEOMETRY:
+ * Set the geometry of a device by passing in a string in this format:
+ *
+ * "cylinders heads sectors_per_track start_sector"
+ *
+ * Beware that CHS geometry is nearly obsolete and only provided
+ * for compatibility with dm devices that can be booted by a PC
+ * BIOS.  See struct hd_geometry for range limits.  Also note that
+ * the geometry is erased if the device size changes.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start.  If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+       /*
+        * The version number is made up of three parts:
+        * major - no backward or forward compatibility,
+        * minor - only backwards compatible,
+        * patch - both backwards and forwards compatible.
+        *
+        * All clients of the ioctl interface should fill in the
+        * version number of the interface that they were
+        * compiled with.
+        *
+        * All recognised ioctl commands (ie. those that don't
+        * return -ENOTTY) fill out this field, even if the
+        * command failed.
+        */
+       uint32_t version[3];    /* in/out */
+       uint32_t data_size;     /* total size of data passed in
+                                * including this struct */
+
+       uint32_t data_start;    /* offset to start of data
+                                * relative to start of this struct */
+
+       uint32_t target_count;  /* in/out */
+       int32_t open_count;     /* out */
+       uint32_t flags;         /* in/out */
+
+       /*
+        * event_nr holds either the event number (input and output) or the
+        * udev cookie value (input only).
+        * The DM_DEV_WAIT ioctl takes an event number as input.
+        * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+        * use the field as a cookie to return in the DM_COOKIE
+        * variable with the uevents they issue.
+        * For output, the ioctls return the event number, not the cookie.
+        */
+       uint32_t event_nr;              /* in/out */
+       uint32_t padding;
+
+       uint64_t dev;           /* in/out */
+
+       char name[DM_NAME_LEN]; /* device name */
+       char uuid[DM_UUID_LEN]; /* unique identifier for
+                                * the block device */
+       char data[7];           /* padding or data */
+};
+
+/*
+ * Used to specify tables.  These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+       uint64_t sector_start;
+       uint64_t length;
+       int32_t status;         /* used when reading from kernel only */
+
+       /*
+        * Location of the next dm_target_spec.
+        * - When specifying targets on a DM_TABLE_LOAD command, this value is
+        *   the number of bytes from the start of the "current" dm_target_spec
+        *   to the start of the "next" dm_target_spec.
+        * - When retrieving targets on a DM_TABLE_STATUS command, this value
+        *   is the number of bytes from the start of the first dm_target_spec
+        *   (that follows the dm_ioctl struct) to the start of the "next"
+        *   dm_target_spec.
+        */
+       uint32_t next;
+
+       char target_type[DM_MAX_TYPE_NAME];
+
+       /*
+        * Parameter string starts immediately after this object.
+        * Be careful to add padding after string to ensure correct
+        * alignment of subsequent dm_target_spec.
+        */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+       uint32_t count; /* Array size */
+       uint32_t padding;       /* unused */
+       uint64_t dev[0];        /* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+       uint64_t dev;
+       uint32_t next;          /* offset to the next record from
+                                  the _start_ of this */
+       char name[0];
+};
+
+/*
+ * Used to retrieve the target versions
+ */
+struct dm_target_versions {
+        uint32_t next;
+        uint32_t version[3];
+
+        char name[0];
+};
+
+/*
+ * Used to pass message to a target
+ */
+struct dm_target_msg {
+       uint64_t sector;        /* Device sector */
+
+       char message[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+       /* Top level cmds */
+       DM_VERSION_CMD = 0,
+       DM_REMOVE_ALL_CMD,
+       DM_LIST_DEVICES_CMD,
+
+       /* device level cmds */
+       DM_DEV_CREATE_CMD,
+       DM_DEV_REMOVE_CMD,
+       DM_DEV_RENAME_CMD,
+       DM_DEV_SUSPEND_CMD,
+       DM_DEV_STATUS_CMD,
+       DM_DEV_WAIT_CMD,
+
+       /* Table level cmds */
+       DM_TABLE_LOAD_CMD,
+       DM_TABLE_CLEAR_CMD,
+       DM_TABLE_DEPS_CMD,
+       DM_TABLE_STATUS_CMD,
+
+       /* Added later */
+       DM_LIST_VERSIONS_CMD,
+       DM_TARGET_MSG_CMD,
+       DM_DEV_SET_GEOMETRY_CMD,
+       DM_DEV_ARM_POLL_CMD,
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES  _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT      _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+#define DM_DEV_ARM_POLL  _IOWR(DM_IOCTL, DM_DEV_ARM_POLL_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD    _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR   _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS    _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+
+#define DM_TARGET_MSG   _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
+#define DM_DEV_SET_GEOMETRY    _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR       4
+#define DM_VERSION_MINOR       36
+#define DM_VERSION_PATCHLEVEL  0
+#define DM_VERSION_EXTRA       "-ioctl (2017-06-09)"
+
+/* Status bits */
+#define DM_READONLY_FLAG       (1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG                (1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG   (1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG   (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG    (1 << 8) /* Out */
+
+/*
+ * This flag is now ignored.
+ */
+#define DM_SKIP_BDGET_FLAG     (1 << 9) /* In */
+
+/*
+ * Set this to avoid attempting to freeze any filesystem when suspending.
+ */
+#define DM_SKIP_LOCKFS_FLAG    (1 << 10) /* In */
+
+/*
+ * Set this to suspend without flushing queued ios.
+ * Also disables flushing uncommitted changes in the thin target before
+ * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT.
+ */
+#define DM_NOFLUSH_FLAG                (1 << 11) /* In */
+
+/*
+ * If set, any table information returned will relate to the inactive
+ * table instead of the live one.  Always check DM_INACTIVE_PRESENT_FLAG
+ * is set before using the data returned.
+ */
+#define DM_QUERY_INACTIVE_TABLE_FLAG   (1 << 12) /* In */
+
+/*
+ * If set, a uevent was generated for which the caller may need to wait.
+ */
+#define DM_UEVENT_GENERATED_FLAG       (1 << 13) /* Out */
+
+/*
+ * If set, rename changes the uuid not the name.  Only permitted
+ * if no uuid was previously supplied: an existing uuid cannot be changed.
+ */
+#define DM_UUID_FLAG                   (1 << 14) /* In */
+
+/*
+ * If set, all buffers are wiped after use. Use when sending
+ * or requesting sensitive data such as an encryption key.
+ */
+#define DM_SECURE_DATA_FLAG            (1 << 15) /* In */
+
+/*
+ * If set, a message generated output data.
+ */
+#define DM_DATA_OUT_FLAG               (1 << 16) /* Out */
+
+/*
+ * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if
+ * the device cannot be removed immediately because it is still in use
+ * it should instead be scheduled for removal when it gets closed.
+ *
+ * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this
+ * flag indicates that the device is scheduled to be removed when it
+ * gets closed.
+ */
+#define DM_DEFERRED_REMOVE             (1 << 17) /* In/Out */
+
+/*
+ * If set, the device is suspended internally.
+ */
+#define DM_INTERNAL_SUSPEND_FLAG       (1 << 18) /* Out */
+
+#endif                         /* _LINUX_DM_IOCTL_H */
diff --git a/device_mapper/misc/dm-log-userspace.h b/device_mapper/misc/dm-log-userspace.h
new file mode 100644 (file)
index 0000000..a770ae6
--- /dev/null
@@ -0,0 +1,418 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef __DM_LOG_USERSPACE_H__
+#define __DM_LOG_USERSPACE_H__
+
+#include <inttypes.h>
+
+#include "dm-ioctl.h" /* For DM_UUID_LEN */
+
+/*
+ * The device-mapper userspace log module consists of a kernel component and
+ * a user-space component.  The kernel component implements the API defined
+ * in dm-dirty-log.h.  Its purpose is simply to pass the parameters and
+ * return values of those API functions between kernel and user-space.
+ *
+ * Below are defined the 'request_types' - DM_ULOG_CTR, DM_ULOG_DTR, etc.
+ * These request types represent the different functions in the device-mapper
+ * dirty log API.  Each of these is described in more detail below.
+ *
+ * The user-space program must listen for requests from the kernel (representing
+ * the various API functions) and process them.
+ *
+ * User-space begins by setting up the communication link (error checking
+ * removed for clarity):
+ *     fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ *     addr.nl_family = AF_NETLINK;
+ *     addr.nl_groups = CN_IDX_DM;
+ *     addr.nl_pid = 0;
+ *     r = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
+ *     opt = addr.nl_groups;
+ *     setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
+ *
+ * User-space will then wait to receive requests from the kernel, which it
+ * will process as described below.  The requests are received in the form,
+ * ((struct dm_ulog_request) + (additional data)).  Depending on the request
+ * type, there may or may not be 'additional data'.  In the descriptions below,
+ * you will see 'Payload-to-userspace' and 'Payload-to-kernel'.  The
+ * 'Payload-to-userspace' is what the kernel sends in 'additional data' as
+ * necessary parameters to complete the request.  The 'Payload-to-kernel' is
+ * the 'additional data' returned to the kernel that contains the necessary
+ * results of the request.  The 'data_size' field in the dm_ulog_request
+ * structure denotes the availability and amount of payload data.
+ */
+
+/*
+ * DM_ULOG_CTR corresponds to (found in dm-dirty-log.h):
+ * int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
+ *           unsigned argc, char **argv);
+ *
+ * Payload-to-userspace:
+ *     A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *     The name of the device that is used as the backing store for the log
+ *      data.  'dm_get_device' will be called on this device.  ('dm_put_device'
+ *      will be called on this device automatically after calling DM_ULOG_DTR.)
+ *      If there is no device needed for log data, 'data_size' in the
+ *      dm_ulog_request struct should be 0.
+ *
+ * The UUID contained in the dm_ulog_request structure is the reference that
+ * will be used by all request types to a specific log.  The constructor must
+ * record this assotiation with the instance created.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field, filling the
+ * data field with the log device if necessary, and setting 'data_size'
+ * appropriately.
+ */
+#define DM_ULOG_CTR                    1
+
+/*
+ * DM_ULOG_DTR corresponds to (found in dm-dirty-log.h):
+ * void (*dtr)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *     None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being destroyed.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_DTR                    2
+
+/*
+ * DM_ULOG_PRESUSPEND corresponds to (found in dm-dirty-log.h):
+ * int (*presuspend)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being presuspended.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_PRESUSPEND             3
+
+/*
+ * DM_ULOG_POSTSUSPEND corresponds to (found in dm-dirty-log.h):
+ * int (*postsuspend)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being postsuspended.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_POSTSUSPEND            4
+
+/*
+ * DM_ULOG_RESUME corresponds to (found in dm-dirty-log.h):
+ * int (*resume)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being resumed.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_RESUME                 5
+
+/*
+ * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h):
+ * uint32_t (*get_region_size)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     uint64_t - contains the region size
+ *
+ * The region size is something that was determined at constructor time.
+ * It is returned in the payload area and 'data_size' is set to
+ * reflect this.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field appropriately.
+ */
+#define DM_ULOG_GET_REGION_SIZE        6
+
+/*
+ * DM_ULOG_IS_CLEAN corresponds to (found in dm-dirty-log.h):
+ * int (*is_clean)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *     uint64_t - the region to get clean status on
+ * Payload-to-kernel:
+ *     int64_t  - 1 if clean, 0 otherwise
+ *
+ * Payload is sizeof(uint64_t) and contains the region for which the clean
+ * status is being made.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - filling the payload with 0 (not clean) or
+ * 1 (clean), setting 'data_size' and 'error' appropriately.
+ */
+#define DM_ULOG_IS_CLEAN               7
+
+/*
+ * DM_ULOG_IN_SYNC corresponds to (found in dm-dirty-log.h):
+ * int (*in_sync)(struct dm_dirty_log *log, region_t region,
+ *               int can_block);
+ *
+ * Payload-to-userspace:
+ *     uint64_t - the region to get sync status on
+ * Payload-to-kernel:
+ *     int64_t - 1 if in-sync, 0 otherwise
+ *
+ * Exactly the same as 'is_clean' above, except this time asking "has the
+ * region been recovered?" vs. "is the region not being modified?"
+ */
+#define DM_ULOG_IN_SYNC                8
+
+/*
+ * DM_ULOG_FLUSH corresponds to (found in dm-dirty-log.h):
+ * int (*flush)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     None.
+ *
+ * No incoming or outgoing payload.  Simply flush log state to disk.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_FLUSH                  9
+
+/*
+ * DM_ULOG_MARK_REGION corresponds to (found in dm-dirty-log.h):
+ * void (*mark_region)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *     uint64_t [] - region(s) to mark
+ * Payload-to-kernel:
+ *     None.
+ *
+ * Incoming payload contains the one or more regions to mark dirty.
+ * The number of regions contained in the payload can be determined from
+ * 'data_size/sizeof(uint64_t)'.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_MARK_REGION           10
+
+/*
+ * DM_ULOG_CLEAR_REGION corresponds to (found in dm-dirty-log.h):
+ * void (*clear_region)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *     uint64_t [] - region(s) to clear
+ * Payload-to-kernel:
+ *     None.
+ *
+ * Incoming payload contains the one or more regions to mark clean.
+ * The number of regions contained in the payload can be determined from
+ * 'data_size/sizeof(uint64_t)'.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_CLEAR_REGION          11
+
+/*
+ * DM_ULOG_GET_RESYNC_WORK corresponds to (found in dm-dirty-log.h):
+ * int (*get_resync_work)(struct dm_dirty_log *log, region_t *region);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     {
+ *             int64_t i; -- 1 if recovery necessary, 0 otherwise
+ *             uint64_t r; -- The region to recover if i=1
+ *     }
+ * 'data_size' should be set appropriately.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field appropriately.
+ */
+#define DM_ULOG_GET_RESYNC_WORK       12
+
+/*
+ * DM_ULOG_SET_REGION_SYNC corresponds to (found in dm-dirty-log.h):
+ * void (*set_region_sync)(struct dm_dirty_log *log,
+ *                        region_t region, int in_sync);
+ *
+ * Payload-to-userspace:
+ *     {
+ *             uint64_t - region to set sync state on
+ *             int64_t  - 0 if not-in-sync, 1 if in-sync
+ *     }
+ * Payload-to-kernel:
+ *     None.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_SET_REGION_SYNC       13
+
+/*
+ * DM_ULOG_GET_SYNC_COUNT corresponds to (found in dm-dirty-log.h):
+ * region_t (*get_sync_count)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     uint64_t - the number of in-sync regions
+ *
+ * No incoming payload.  Kernel-bound payload contains the number of
+ * regions that are in-sync (in a size_t).
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_GET_SYNC_COUNT        14
+
+/*
+ * DM_ULOG_STATUS_INFO corresponds to (found in dm-dirty-log.h):
+ * int (*status)(struct dm_dirty_log *log, STATUSTYPE_INFO,
+ *              char *result, unsigned maxlen);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     Character string containing STATUSTYPE_INFO
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_STATUS_INFO           15
+
+/*
+ * DM_ULOG_STATUS_TABLE corresponds to (found in dm-dirty-log.h):
+ * int (*status)(struct dm_dirty_log *log, STATUSTYPE_TABLE,
+ *              char *result, unsigned maxlen);
+ *
+ * Payload-to-userspace:
+ *     None.
+ * Payload-to-kernel:
+ *     Character string containing STATUSTYPE_TABLE
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_STATUS_TABLE          16
+
+/*
+ * DM_ULOG_IS_REMOTE_RECOVERING corresponds to (found in dm-dirty-log.h):
+ * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *     uint64_t - region to determine recovery status on
+ * Payload-to-kernel:
+ *     {
+ *             int64_t is_recovering;  -- 0 if no, 1 if yes
+ *             uint64_t in_sync_hint;  -- lowest region still needing resync
+ *     }
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_IS_REMOTE_RECOVERING  17
+
+/*
+ * (DM_ULOG_REQUEST_MASK & request_type) to get the request type
+ *
+ * Payload-to-userspace:
+ *     A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *     None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * We are reserving 8 bits of the 32-bit 'request_type' field for the
+ * various request types above.  The remaining 24-bits are currently
+ * set to zero and are reserved for future use and compatibility concerns.
+ *
+ * User-space should always use DM_ULOG_REQUEST_TYPE to acquire the
+ * request type from the 'request_type' field to maintain forward compatibility.
+ */
+#define DM_ULOG_REQUEST_MASK 0xFF
+#define DM_ULOG_REQUEST_TYPE(request_type) \
+       (DM_ULOG_REQUEST_MASK & (request_type))
+
+/*
+ * DM_ULOG_REQUEST_VERSION is incremented when there is a
+ * change to the way information is passed between kernel
+ * and userspace.  This could be a structure change of
+ * dm_ulog_request or a change in the way requests are
+ * issued/handled.  Changes are outlined here:
+ *     version 1:  Initial implementation
+ *     version 2:  DM_ULOG_CTR allowed to return a string containing a
+ *                 device name that is to be registered with DM via
+ *                 'dm_get_device'.
+ */
+#define DM_ULOG_REQUEST_VERSION 2
+
+struct dm_ulog_request {
+       /*
+        * The local unique identifier (luid) and the universally unique
+        * identifier (uuid) are used to tie a request to a specific
+        * mirror log.  A single machine log could probably make due with
+        * just the 'luid', but a cluster-aware log must use the 'uuid' and
+        * the 'luid'.  The uuid is what is required for node to node
+        * communication concerning a particular log, but the 'luid' helps
+        * differentiate between logs that are being swapped and have the
+        * same 'uuid'.  (Think "live" and "inactive" device-mapper tables.)
+        */
+       uint64_t luid;
+       char uuid[DM_UUID_LEN];
+       char padding[3];        /* Padding because DM_UUID_LEN = 129 */
+
+       uint32_t version;       /* See DM_ULOG_REQUEST_VERSION */
+       int32_t error;          /* Used to report back processing errors */
+
+       uint32_t seq;           /* Sequence number for request */
+       uint32_t request_type;  /* DM_ULOG_* defined above */
+       uint32_t data_size;     /* How much data (not including this struct) */
+
+       char data[];
+};
+
+#endif /* __DM_LOG_USERSPACE_H__ */
diff --git a/device_mapper/misc/dm-logging.h b/device_mapper/misc/dm-logging.h
new file mode 100644 (file)
index 0000000..a35480e
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2016 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DM_LOGGING_H
+#define _DM_LOGGING_H
+
+#include "libdevmapper.h"
+
+extern dm_log_with_errno_fn dm_log_with_errno;
+
+#define LOG_MESG(l, f, ln, e, x...) \
+       dm_log_with_errno(l, f, ln, e, ## x)
+
+#define LOG_LINE(l, x...) LOG_MESG(l, __FILE__, __LINE__, 0, ## x)
+#define LOG_LINE_WITH_ERRNO(l, e, x...) LOG_MESG(l, __FILE__, __LINE__, e, ## x)
+
+/* Debug messages may have a type instead of an errno */
+#define LOG_LINE_WITH_CLASS(l, c, x...) LOG_MESG(l, __FILE__, __LINE__, c, ## x)
+
+#include "lib/log/log.h"
+
+#endif
diff --git a/device_mapper/misc/dmlib.h b/device_mapper/misc/dmlib.h
new file mode 100644 (file)
index 0000000..ba376bc
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This file must be included first by every device-mapper library source file.
+ */
+#ifndef _DM_LIB_H
+#define _DM_LIB_H
+
+// FIXME: get rid of this whole file
+#include "configure.h"
+
+#define _REENTRANT
+#define _GNU_SOURCE
+
+#include "libdevmapper.h"
+#include "lib/misc/util.h"
+#include "dm-logging.h"
+
+#endif
diff --git a/device_mapper/misc/kdev_t.h b/device_mapper/misc/kdev_t.h
new file mode 100644 (file)
index 0000000..f88bb0a
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _LIBDM_KDEV_H
+#define _LIBDM_KDEV_H
+
+#define MAJOR(dev)      ((dev & 0xfff00) >> 8)
+#define MINOR(dev)      ((dev & 0xff) | ((dev >> 12) & 0xfff00))
+#define MKDEV(ma,mi)    ((mi & 0xff) | (ma << 8) | ((mi & ~0xff) << 12))
+
+#endif
diff --git a/device_mapper/mm/dbg_malloc.c b/device_mapper/mm/dbg_malloc.c
new file mode 100644 (file)
index 0000000..a17203c
--- /dev/null
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#ifdef VALGRIND_POOL
+#include "memcheck.h"
+#endif
+#include <assert.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+void *dm_malloc_aux(size_t s, const char *file, int line)
+        __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_malloc_aux_debug(size_t s, const char *file, int line)
+        __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+static void *_dm_malloc_aligned_aux(size_t s, size_t a, const char *file, int line)
+        __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_zalloc_aux(size_t s, const char *file, int line)
+        __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_zalloc_aux_debug(size_t s, const char *file, int line)
+        __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_realloc_aux(void *p, unsigned int s, const char *file, int line)
+        __attribute__((__warn_unused_result__));
+void dm_free_aux(void *p);
+char *dm_strdup_aux(const char *str, const char *file, int line)
+        __attribute__((__warn_unused_result__));
+int dm_dump_memory_debug(void);
+void dm_bounds_check_debug(void);
+
+char *dm_strdup_aux(const char *str, const char *file, int line)
+{
+       char *ret;
+
+       if (!str) {
+               log_error(INTERNAL_ERROR "dm_strdup called with NULL pointer");
+               return NULL;
+       }
+
+       if ((ret = dm_malloc_aux_debug(strlen(str) + 1, file, line)))
+               strcpy(ret, str);
+
+       return ret;
+}
+
+struct memblock {
+       struct memblock *prev, *next;   /* All allocated blocks are linked */
+       size_t length;          /* Size of the requested block */
+       int id;                 /* Index of the block */
+       const char *file;       /* File that allocated */
+       int line;               /* Line that allocated */
+       void *magic;            /* Address of this block */
+} __attribute__((aligned(8)));
+
+static struct {
+       unsigned block_serialno;/* Non-decreasing serialno of block */
+       unsigned blocks_allocated; /* Current number of blocks allocated */
+       unsigned blocks_max;    /* Max no of concurrently-allocated blocks */
+       unsigned int bytes, mbytes;
+
+} _mem_stats = {
+0, 0, 0, 0, 0};
+
+static struct memblock *_head = 0;
+static struct memblock *_tail = 0;
+
+void *dm_malloc_aux_debug(size_t s, const char *file, int line)
+{
+       struct memblock *nb;
+       size_t tsize = s + sizeof(*nb) + sizeof(unsigned long);
+
+       if (s > 50000000) {
+               log_error("Huge memory allocation (size %" PRIsize_t
+                         ") rejected - metadata corruption?", s);
+               return 0;
+       }
+
+       if (!(nb = malloc(tsize))) {
+               log_error("couldn't allocate any memory, size = %" PRIsize_t,
+                         s);
+               return 0;
+       }
+
+       /* set up the file and line info */
+       nb->file = file;
+       nb->line = line;
+
+       dm_bounds_check();
+
+       /* setup fields */
+       nb->magic = nb + 1;
+       nb->length = s;
+       nb->id = ++_mem_stats.block_serialno;
+       nb->next = 0;
+
+       /* stomp a pretty pattern across the new memory
+          and fill in the boundary bytes */
+       {
+               char *ptr = (char *) (nb + 1);
+               size_t i;
+               for (i = 0; i < s; i++)
+                       *ptr++ = i & 0x1 ? (char) 0xba : (char) 0xbe;
+
+               for (i = 0; i < sizeof(unsigned long); i++)
+                       *ptr++ = (char) nb->id;
+       }
+
+       nb->prev = _tail;
+
+       /* link to tail of the list */
+       if (!_head)
+               _head = _tail = nb;
+       else {
+               _tail->next = nb;
+               _tail = nb;
+       }
+
+       _mem_stats.blocks_allocated++;
+       if (_mem_stats.blocks_allocated > _mem_stats.blocks_max)
+               _mem_stats.blocks_max = _mem_stats.blocks_allocated;
+
+       _mem_stats.bytes += s;
+       if (_mem_stats.bytes > _mem_stats.mbytes)
+               _mem_stats.mbytes = _mem_stats.bytes;
+
+       /* log_debug_mem("Allocated: %u %u %u", nb->id, _mem_stats.blocks_allocated,
+                 _mem_stats.bytes); */
+#ifdef VALGRIND_POOL
+       VALGRIND_MAKE_MEM_UNDEFINED(nb + 1, s);
+#endif
+       return nb + 1;
+}
+
+void *dm_zalloc_aux_debug(size_t s, const char *file, int line)
+{
+       void *ptr = dm_malloc_aux_debug(s, file, line);
+
+       if (ptr)
+               memset(ptr, 0, s);
+
+       return ptr;
+}
+
+void dm_free_aux(void *p)
+{
+       char *ptr;
+       size_t i;
+       struct memblock *mb = ((struct memblock *) p) - 1;
+       if (!p)
+               return;
+
+       dm_bounds_check();
+
+       /* sanity check */
+       assert(mb->magic == p);
+#ifdef VALGRIND_POOL
+       VALGRIND_MAKE_MEM_DEFINED(p, mb->length);
+#endif
+       /* check data at the far boundary */
+       ptr = (char *) p + mb->length;
+       for (i = 0; i < sizeof(unsigned long); i++)
+               if (ptr[i] != (char) mb->id)
+                       assert(!"Damage at far end of block");
+
+       /* have we freed this before ? */
+       assert(mb->id != 0);
+
+       /* unlink */
+       if (mb->prev)
+               mb->prev->next = mb->next;
+       else
+               _head = mb->next;
+
+       if (mb->next)
+               mb->next->prev = mb->prev;
+       else
+               _tail = mb->prev;
+
+       mb->id = 0;
+
+       /* stomp a different pattern across the memory */
+       ptr = p;
+       for (i = 0; i < mb->length; i++)
+               ptr[i] = i & 1 ? (char) 0xde : (char) 0xad;
+
+       assert(_mem_stats.blocks_allocated);
+       _mem_stats.blocks_allocated--;
+       _mem_stats.bytes -= mb->length;
+
+       /* free the memory */
+       free(mb);
+}
+
+void *dm_realloc_aux(void *p, unsigned int s, const char *file, int line)
+{
+       void *r;
+       struct memblock *mb = ((struct memblock *) p) - 1;
+
+       r = dm_malloc_aux_debug(s, file, line);
+
+       if (r && p) {
+               memcpy(r, p, mb->length);
+               dm_free_aux(p);
+       }
+
+       return r;
+}
+
+int dm_dump_memory_debug(void)
+{
+       unsigned long tot = 0;
+       struct memblock *mb;
+       char str[32];
+
+       if (_head)
+               log_very_verbose("You have a memory leak:");
+
+       for (mb = _head; mb; mb = mb->next) {
+#ifdef VALGRIND_POOL
+               /*
+                * We can't look at the memory in case it has had
+                * VALGRIND_MAKE_MEM_NOACCESS called on it.
+                */
+               str[0] = '\0';
+#else
+               size_t c;
+
+               for (c = 0; c < sizeof(str) - 1; c++) {
+                       if (c >= mb->length)
+                               str[c] = ' ';
+                       else if (((char *)mb->magic)[c] == '\0')
+                               str[c] = '\0';
+                       else if (((char *)mb->magic)[c] < ' ')
+                               str[c] = '?';
+                       else
+                               str[c] = ((char *)mb->magic)[c];
+               }
+               str[sizeof(str) - 1] = '\0';
+#endif
+
+               LOG_MESG(_LOG_INFO, mb->file, mb->line, 0,
+                        "block %d at %p, size %" PRIsize_t "\t [%s]",
+                        mb->id, mb->magic, mb->length, str);
+               tot += mb->length;
+       }
+
+       if (_head)
+               log_very_verbose("%ld bytes leaked in total", tot);
+
+       return 1;
+}
+
+void dm_bounds_check_debug(void)
+{
+       struct memblock *mb = _head;
+       while (mb) {
+               size_t i;
+               char *ptr = ((char *) (mb + 1)) + mb->length;
+               for (i = 0; i < sizeof(unsigned long); i++)
+                       if (*ptr++ != (char) mb->id)
+                               assert(!"Memory smash");
+
+               mb = mb->next;
+       }
+}
+
+void *dm_malloc_aux(size_t s, const char *file __attribute__((unused)),
+                   int line __attribute__((unused)))
+{
+       if (s > 50000000) {
+               log_error("Huge memory allocation (size %" PRIsize_t
+                         ") rejected - metadata corruption?", s);
+               return 0;
+       }
+
+       return malloc(s);
+}
+
+/* Allocate size s with alignment a (or page size if 0) */
+static void *_dm_malloc_aligned_aux(size_t s, size_t a, const char *file __attribute__((unused)),
+                                   int line __attribute__((unused)))
+{
+       void *memptr;
+       int r;
+
+       if (!a)
+               a = getpagesize();
+
+       if (s > 50000000) {
+               log_error("Huge memory allocation (size %" PRIsize_t
+                         ") rejected - metadata corruption?", s);
+               return 0;
+       }
+
+       if ((r = posix_memalign(&memptr, a, s))) {
+               log_error("Failed to allocate %" PRIsize_t " bytes aligned to %" PRIsize_t ": %s", s, a, strerror(r));
+               return 0;
+       }
+
+       return memptr;
+}
+
+void *dm_zalloc_aux(size_t s, const char *file, int line)
+{
+       void *ptr = dm_malloc_aux(s, file, line);
+
+       if (ptr)
+               memset(ptr, 0, s);
+
+       return ptr;
+}
+
+#ifdef DEBUG_MEM
+
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
+{
+       return dm_malloc_aux_debug(s, file, line);
+}
+
+void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line)
+{
+       /* FIXME Implement alignment when debugging - currently just ignored */
+       return _dm_malloc_aux_debug(s, file, line);
+}
+
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
+{
+       return dm_zalloc_aux_debug(s, file, line);
+}
+
+char *dm_strdup_wrapper(const char *str, const char *file, int line)
+{
+       return dm_strdup_aux(str, file, line);
+}
+
+void dm_free_wrapper(void *ptr)
+{
+       dm_free_aux(ptr);
+}
+
+void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line)
+{
+       return dm_realloc_aux(p, s, file, line);
+}
+
+int dm_dump_memory_wrapper(void)
+{
+       return dm_dump_memory_debug();
+}
+
+void dm_bounds_check_wrapper(void)
+{
+       dm_bounds_check_debug();
+}
+
+#else /* !DEBUG_MEM */
+
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
+{
+       return dm_malloc_aux(s, file, line);
+}
+
+void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line)
+{
+       return _dm_malloc_aligned_aux(s, a, file, line);
+}
+
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
+{
+       return dm_zalloc_aux(s, file, line);
+}
+
+char *dm_strdup_wrapper(const char *str,
+                       const char *file __attribute__((unused)),
+                       int line __attribute__((unused)))
+{
+       return strdup(str);
+}
+
+void dm_free_wrapper(void *ptr)
+{
+       free(ptr);
+}
+
+void *dm_realloc_wrapper(void *p, unsigned int s, 
+                        const char *file __attribute__((unused)),
+                        int line __attribute__((unused)))
+{
+       return realloc(p, s);
+}
+
+int dm_dump_memory_wrapper(void)
+{
+       return 1;
+}
+
+void dm_bounds_check_wrapper(void)
+{
+}
+
+#endif /* DEBUG_MEM */
diff --git a/device_mapper/mm/pool-debug.c b/device_mapper/mm/pool-debug.c
new file mode 100644 (file)
index 0000000..c523238
--- /dev/null
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dmlib.h"
+#include <assert.h>
+
+struct block {
+       struct block *next;
+       size_t size;
+       void *data;
+};
+
+typedef struct {
+       unsigned block_serialno;        /* Non-decreasing serialno of block */
+       unsigned blocks_allocated;      /* Current number of blocks allocated */
+       unsigned blocks_max;    /* Max no of concurrently-allocated blocks */
+       unsigned int bytes, maxbytes;
+} pool_stats;
+
+struct dm_pool {
+       struct dm_list list;
+       const char *name;
+       void *orig_pool;        /* to pair it with first allocation call */
+       unsigned locked;
+       long crc;
+
+       int begun;
+       struct block *object;
+
+       struct block *blocks;
+       struct block *tail;
+
+       pool_stats stats;
+};
+
+/* by default things come out aligned for doubles */
+#define DEFAULT_ALIGNMENT __alignof__ (double)
+
+struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint)
+{
+       struct dm_pool *mem = dm_zalloc(sizeof(*mem));
+
+       if (!mem) {
+               log_error("Couldn't create memory pool %s (size %"
+                         PRIsize_t ")", name, sizeof(*mem));
+               return NULL;
+       }
+
+       mem->name = name;
+       mem->orig_pool = mem;
+
+#ifdef DEBUG_POOL
+       log_debug_mem("Created mempool %s at %p", name, mem);
+#endif
+
+       dm_list_add(&_dm_pools, &mem->list);
+       return mem;
+}
+
+static void _free_blocks(struct dm_pool *p, struct block *b)
+{
+       struct block *n;
+
+       if (p->locked)
+               log_error(INTERNAL_ERROR "_free_blocks from locked pool %s",
+                         p->name);
+
+       while (b) {
+               p->stats.bytes -= b->size;
+               p->stats.blocks_allocated--;
+
+               n = b->next;
+               dm_free(b->data);
+               dm_free(b);
+               b = n;
+       }
+}
+
+static void _pool_stats(struct dm_pool *p, const char *action)
+{
+#ifdef DEBUG_POOL
+       log_debug_mem("%s mempool %s at %p: %u/%u bytes, %u/%u blocks, "
+                     "%u allocations)", action, p->name, p, p->stats.bytes,
+                     p->stats.maxbytes, p->stats.blocks_allocated,
+                     p->stats.blocks_max, p->stats.block_serialno);
+#else
+       ;
+#endif
+}
+
+void dm_pool_destroy(struct dm_pool *p)
+{
+       _pool_stats(p, "Destroying");
+       _free_blocks(p, p->blocks);
+       dm_list_del(&p->list);
+       dm_free(p);
+}
+
+void *dm_pool_alloc(struct dm_pool *p, size_t s)
+{
+       return dm_pool_alloc_aligned(p, s, DEFAULT_ALIGNMENT);
+}
+
+static void _append_block(struct dm_pool *p, struct block *b)
+{
+       if (p->locked)
+               log_error(INTERNAL_ERROR "_append_blocks to locked pool %s",
+                         p->name);
+
+       if (p->tail) {
+               p->tail->next = b;
+               p->tail = b;
+       } else
+               p->blocks = p->tail = b;
+
+       p->stats.block_serialno++;
+       p->stats.blocks_allocated++;
+       if (p->stats.blocks_allocated > p->stats.blocks_max)
+               p->stats.blocks_max = p->stats.blocks_allocated;
+
+       p->stats.bytes += b->size;
+       if (p->stats.bytes > p->stats.maxbytes)
+               p->stats.maxbytes = p->stats.bytes;
+}
+
+static struct block *_new_block(size_t s, unsigned alignment)
+{
+       /* FIXME: I'm currently ignoring the alignment arg. */
+       size_t len = sizeof(struct block) + s;
+       struct block *b = dm_malloc(len);
+
+       /*
+        * Too lazy to implement alignment for debug version, and
+        * I don't think LVM will use anything but default
+        * align.
+        */
+       assert(alignment <= DEFAULT_ALIGNMENT);
+
+       if (!b) {
+               log_error("Out of memory");
+               return NULL;
+       }
+
+       if (!(b->data = dm_malloc(s))) {
+               log_error("Out of memory");
+               dm_free(b);
+               return NULL;
+       }
+
+       b->next = NULL;
+       b->size = s;
+
+       return b;
+}
+
+void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment)
+{
+       struct block *b = _new_block(s, alignment);
+
+       if (!b)
+               return_NULL;
+
+       _append_block(p, b);
+
+       return b->data;
+}
+
+void dm_pool_empty(struct dm_pool *p)
+{
+       _pool_stats(p, "Emptying");
+       _free_blocks(p, p->blocks);
+       p->blocks = p->tail = NULL;
+}
+
+void dm_pool_free(struct dm_pool *p, void *ptr)
+{
+       struct block *b, *prev = NULL;
+
+       _pool_stats(p, "Freeing (before)");
+
+       for (b = p->blocks; b; b = b->next) {
+               if (b->data == ptr)
+                       break;
+               prev = b;
+       }
+
+       /*
+        * If this fires then you tried to free a
+        * pointer that either wasn't from this
+        * pool, or isn't the start of a block.
+        */
+       assert(b);
+
+       _free_blocks(p, b);
+
+       if (prev) {
+               p->tail = prev;
+               prev->next = NULL;
+       } else
+               p->blocks = p->tail = NULL;
+
+       _pool_stats(p, "Freeing (after)");
+}
+
+int dm_pool_begin_object(struct dm_pool *p, size_t init_size)
+{
+       assert(!p->begun);
+       p->begun = 1;
+       return 1;
+}
+
+int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta)
+{
+       struct block *new;
+       size_t new_size;
+
+       if (p->locked)
+               log_error(INTERNAL_ERROR "Grow objects in locked pool %s",
+                         p->name);
+
+       if (!delta)
+               delta = strlen(extra);
+
+       assert(p->begun);
+
+       if (p->object)
+               new_size = delta + p->object->size;
+       else
+               new_size = delta;
+
+       if (!(new = _new_block(new_size, DEFAULT_ALIGNMENT))) {
+               log_error("Couldn't extend object.");
+               return 0;
+       }
+
+       if (p->object) {
+               memcpy(new->data, p->object->data, p->object->size);
+               dm_free(p->object->data);
+               dm_free(p->object);
+       }
+       p->object = new;
+
+       memcpy((char*)new->data + new_size - delta, extra, delta);
+
+       return 1;
+}
+
+void *dm_pool_end_object(struct dm_pool *p)
+{
+       assert(p->begun);
+       _append_block(p, p->object);
+
+       p->begun = 0;
+       p->object = NULL;
+       return p->tail->data;
+}
+
+void dm_pool_abandon_object(struct dm_pool *p)
+{
+       assert(p->begun);
+       dm_free(p->object);
+       p->begun = 0;
+       p->object = NULL;
+}
+
+static long _pool_crc(const struct dm_pool *p)
+{
+#ifndef DEBUG_ENFORCE_POOL_LOCKING
+#warning pool crc not implemented with pool debug
+#endif
+       return 0;
+}
+
+static int _pool_protect(struct dm_pool *p, int prot)
+{
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+#warning pool mprotect not implemented with pool debug
+#endif
+       return 1;
+}
diff --git a/device_mapper/mm/pool-fast.c b/device_mapper/mm/pool-fast.c
new file mode 100644 (file)
index 0000000..895872e
--- /dev/null
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef VALGRIND_POOL
+#include "memcheck.h"
+#endif
+
+#include "misc/dmlib.h"
+#include <stddef.h>    /* For musl libc */
+#include <malloc.h>
+
+struct chunk {
+       char *begin, *end;
+       struct chunk *prev;
+} __attribute__((aligned(8)));
+
+struct dm_pool {
+       struct dm_list list;
+       struct chunk *chunk, *spare_chunk;      /* spare_chunk is a one entry free
+                                                  list to stop 'bobbling' */
+       const char *name;
+       size_t chunk_size;
+       size_t object_len;
+       unsigned object_alignment;
+       int locked;
+       long crc;
+};
+
+static void _align_chunk(struct chunk *c, unsigned alignment);
+static struct chunk *_new_chunk(struct dm_pool *p, size_t s);
+static void _free_chunk(struct chunk *c);
+
+/* by default things come out aligned for doubles */
+#define DEFAULT_ALIGNMENT __alignof__ (double)
+
+struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint)
+{
+       size_t new_size = 1024;
+       struct dm_pool *p = dm_zalloc(sizeof(*p));
+
+       if (!p) {
+               log_error("Couldn't create memory pool %s (size %"
+                         PRIsize_t ")", name, sizeof(*p));
+               return 0;
+       }
+
+       p->name = name;
+       /* round chunk_hint up to the next power of 2 */
+       p->chunk_size = chunk_hint + sizeof(struct chunk);
+       while (new_size < p->chunk_size)
+               new_size <<= 1;
+       p->chunk_size = new_size;
+       pthread_mutex_lock(&_dm_pools_mutex);
+       dm_list_add(&_dm_pools, &p->list);
+       pthread_mutex_unlock(&_dm_pools_mutex);
+       return p;
+}
+
+void dm_pool_destroy(struct dm_pool *p)
+{
+       struct chunk *c, *pr;
+       _free_chunk(p->spare_chunk);
+       c = p->chunk;
+       while (c) {
+               pr = c->prev;
+               _free_chunk(c);
+               c = pr;
+       }
+
+       pthread_mutex_lock(&_dm_pools_mutex);
+       dm_list_del(&p->list);
+       pthread_mutex_unlock(&_dm_pools_mutex);
+       dm_free(p);
+}
+
+void *dm_pool_alloc(struct dm_pool *p, size_t s)
+{
+       return dm_pool_alloc_aligned(p, s, DEFAULT_ALIGNMENT);
+}
+
+void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment)
+{
+       struct chunk *c = p->chunk;
+       void *r;
+
+       /* realign begin */
+       if (c)
+               _align_chunk(c, alignment);
+
+       /* have we got room ? */
+       if (!c || (c->begin > c->end) || ((c->end - c->begin) < (int) s)) {
+               /* allocate new chunk */
+               size_t needed = s + alignment + sizeof(struct chunk);
+               c = _new_chunk(p, (needed > p->chunk_size) ?
+                              needed : p->chunk_size);
+
+               if (!c)
+                       return_NULL;
+
+               _align_chunk(c, alignment);
+       }
+
+       r = c->begin;
+       c->begin += s;
+
+#ifdef VALGRIND_POOL
+       VALGRIND_MAKE_MEM_UNDEFINED(r, s);
+#endif
+
+       return r;
+}
+
+void dm_pool_empty(struct dm_pool *p)
+{
+       struct chunk *c;
+
+       for (c = p->chunk; c && c->prev; c = c->prev)
+               ;
+
+       if (c)
+               dm_pool_free(p, (char *) (c + 1));
+}
+
+void dm_pool_free(struct dm_pool *p, void *ptr)
+{
+       struct chunk *c = p->chunk;
+
+       while (c) {
+               if (((char *) c < (char *) ptr) &&
+                   ((char *) c->end > (char *) ptr)) {
+                       c->begin = ptr;
+#ifdef VALGRIND_POOL
+                       VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin);
+#endif
+                       break;
+               }
+
+               if (p->spare_chunk)
+                       _free_chunk(p->spare_chunk);
+
+               c->begin = (char *) (c + 1);
+#ifdef VALGRIND_POOL
+                VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin);
+#endif
+
+               p->spare_chunk = c;
+               c = c->prev;
+       }
+
+       if (!c)
+               log_error(INTERNAL_ERROR "pool_free asked to free pointer "
+                         "not in pool");
+       else
+               p->chunk = c;
+}
+
+int dm_pool_begin_object(struct dm_pool *p, size_t hint)
+{
+       struct chunk *c = p->chunk;
+       const size_t align = DEFAULT_ALIGNMENT;
+
+       p->object_len = 0;
+       p->object_alignment = align;
+
+       if (c)
+               _align_chunk(c, align);
+
+       if (!c || (c->begin > c->end) || ((c->end - c->begin) < (int) hint)) {
+               /* allocate a new chunk */
+               c = _new_chunk(p,
+                              hint > (p->chunk_size - sizeof(struct chunk)) ?
+                              hint + sizeof(struct chunk) + align :
+                              p->chunk_size);
+
+               if (!c)
+                       return 0;
+
+               _align_chunk(c, align);
+       }
+
+       return 1;
+}
+
+int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta)
+{
+       struct chunk *c = p->chunk, *nc;
+
+       if (!delta)
+               delta = strlen(extra);
+
+       if ((c->end - (c->begin + p->object_len)) < (int) delta) {
+               /* move into a new chunk */
+               if (p->object_len + delta > (p->chunk_size / 2))
+                       nc = _new_chunk(p, (p->object_len + delta) * 2);
+               else
+                       nc = _new_chunk(p, p->chunk_size);
+
+               if (!nc)
+                       return 0;
+
+               _align_chunk(p->chunk, p->object_alignment);
+
+#ifdef VALGRIND_POOL
+               VALGRIND_MAKE_MEM_UNDEFINED(p->chunk->begin, p->object_len);
+#endif
+
+               memcpy(p->chunk->begin, c->begin, p->object_len);
+
+#ifdef VALGRIND_POOL
+               VALGRIND_MAKE_MEM_NOACCESS(c->begin, p->object_len);
+#endif
+
+               c = p->chunk;
+       }
+
+#ifdef VALGRIND_POOL
+       VALGRIND_MAKE_MEM_UNDEFINED(p->chunk->begin + p->object_len, delta);
+#endif
+
+       memcpy(c->begin + p->object_len, extra, delta);
+       p->object_len += delta;
+       return 1;
+}
+
+void *dm_pool_end_object(struct dm_pool *p)
+{
+       struct chunk *c = p->chunk;
+       void *r = c->begin;
+       c->begin += p->object_len;
+       p->object_len = 0u;
+       p->object_alignment = DEFAULT_ALIGNMENT;
+       return r;
+}
+
+void dm_pool_abandon_object(struct dm_pool *p)
+{
+#ifdef VALGRIND_POOL
+       VALGRIND_MAKE_MEM_NOACCESS(p->chunk, p->object_len);
+#endif
+       p->object_len = 0;
+       p->object_alignment = DEFAULT_ALIGNMENT;
+}
+
+static void _align_chunk(struct chunk *c, unsigned alignment)
+{
+       c->begin += alignment - ((unsigned long) c->begin & (alignment - 1));
+}
+
+static struct chunk *_new_chunk(struct dm_pool *p, size_t s)
+{
+       struct chunk *c;
+
+       if (p->spare_chunk &&
+           ((p->spare_chunk->end - p->spare_chunk->begin) >= (ptrdiff_t)s)) {
+               /* reuse old chunk */
+               c = p->spare_chunk;
+               p->spare_chunk = 0;
+       } else {
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+               if (!_pagesize) {
+                       _pagesize = getpagesize(); /* lvm_pagesize(); */
+                       _pagesize_mask = _pagesize - 1;
+               }
+               /*
+                * Allocate page aligned size so malloc could work.
+                * Otherwise page fault would happen from pool unrelated
+                * memory writes of internal malloc pointers.
+                */
+#  define aligned_malloc(s)    (posix_memalign((void**)&c, _pagesize, \
+                                               ALIGN_ON_PAGE(s)) == 0)
+#else
+#  define aligned_malloc(s)    (c = dm_malloc(s))
+#endif /* DEBUG_ENFORCE_POOL_LOCKING */
+               if (!aligned_malloc(s)) {
+#undef aligned_malloc
+                       log_error("Out of memory.  Requested %" PRIsize_t
+                                 " bytes.", s);
+                       return NULL;
+               }
+
+               c->begin = (char *) (c + 1);
+               c->end = (char *) c + s;
+
+#ifdef VALGRIND_POOL
+               VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin);
+#endif
+       }
+
+       c->prev = p->chunk;
+       p->chunk = c;
+       return c;
+}
+
+static void _free_chunk(struct chunk *c)
+{
+#ifdef VALGRIND_POOL
+#  ifdef DEBUG_MEM
+       if (c)
+               VALGRIND_MAKE_MEM_UNDEFINED(c + 1, c->end - (char *) (c + 1));
+#  endif
+#endif
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+       /* since DEBUG_MEM is using own memory list */
+       free(c); /* for posix_memalign() */
+#else
+       dm_free(c);
+#endif
+}
+
+
+/**
+ * Calc crc/hash from pool's memory chunks with internal pointers
+ */
+static long _pool_crc(const struct dm_pool *p)
+{
+       long crc_hash = 0;
+#ifndef DEBUG_ENFORCE_POOL_LOCKING
+       const struct chunk *c;
+       const long *ptr, *end;
+
+       for (c = p->chunk; c; c = c->prev) {
+               end = (const long *) (c->begin < c->end ? (long) c->begin & ~7: (long) c->end);
+               ptr = (const long *) c;
+#ifdef VALGRIND_POOL
+               VALGRIND_MAKE_MEM_DEFINED(ptr, (end - ptr) * sizeof(*end));
+#endif
+               while (ptr < end) {
+                       crc_hash += *ptr++;
+                       crc_hash += (crc_hash << 10);
+                       crc_hash ^= (crc_hash >> 6);
+               }
+       }
+#endif /* DEBUG_ENFORCE_POOL_LOCKING */
+
+       return crc_hash;
+}
+
+static int _pool_protect(struct dm_pool *p, int prot)
+{
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+       struct chunk *c;
+
+       for (c = p->chunk; c; c = c->prev) {
+               if (mprotect(c, (size_t) ((c->end - (char *) c) - 1), prot) != 0) {
+                       log_sys_error("mprotect", "");
+                       return 0;
+               }
+       }
+#endif
+       return 1;
+}
diff --git a/device_mapper/mm/pool.c b/device_mapper/mm/pool.c
new file mode 100644 (file)
index 0000000..a710704
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include <sys/mman.h>
+#include <pthread.h>
+
+static DM_LIST_INIT(_dm_pools);
+static pthread_mutex_t _dm_pools_mutex = PTHREAD_MUTEX_INITIALIZER;
+void dm_pools_check_leaks(void);
+
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+#ifdef DEBUG_POOL
+#error Do not use DEBUG_POOL with DEBUG_ENFORCE_POOL_LOCKING
+#endif
+
+/*
+ * Use mprotect system call to ensure all locked pages are not writable.
+ * Generates segmentation fault with write access to the locked pool.
+ *
+ * - Implementation is using posix_memalign() to get page aligned
+ *   memory blocks (could be implemented also through malloc).
+ * - Only pool-fast is properly handled for now.
+ * - Checksum is slower compared to mprotect.
+ */
+static size_t _pagesize = 0;
+static size_t _pagesize_mask = 0;
+#define ALIGN_ON_PAGE(size) (((size) + (_pagesize_mask)) & ~(_pagesize_mask))
+#endif
+
+#ifdef DEBUG_POOL
+#include "pool-debug.c"
+#else
+#include "pool-fast.c"
+#endif
+
+char *dm_pool_strdup(struct dm_pool *p, const char *str)
+{
+       size_t len = strlen(str) + 1;
+       char *ret = dm_pool_alloc(p, len);
+
+       if (ret)
+               memcpy(ret, str, len);
+
+       return ret;
+}
+
+char *dm_pool_strndup(struct dm_pool *p, const char *str, size_t n)
+{
+       char *ret = dm_pool_alloc(p, n + 1);
+
+       if (ret) {
+               strncpy(ret, str, n);
+               ret[n] = '\0';
+       }
+
+       return ret;
+}
+
+void *dm_pool_zalloc(struct dm_pool *p, size_t s)
+{
+       void *ptr = dm_pool_alloc(p, s);
+
+       if (ptr)
+               memset(ptr, 0, s);
+
+       return ptr;
+}
+
+void dm_pools_check_leaks(void)
+{
+       struct dm_pool *p;
+
+       pthread_mutex_lock(&_dm_pools_mutex);
+       if (dm_list_empty(&_dm_pools)) {
+               pthread_mutex_unlock(&_dm_pools_mutex);
+               return;
+       }
+
+       log_error("You have a memory leak (not released memory pool):");
+       dm_list_iterate_items(p, &_dm_pools) {
+#ifdef DEBUG_POOL
+               log_error(" [%p] %s (%u bytes)",
+                         p->orig_pool,
+                         p->name, p->stats.bytes);
+#else
+               log_error(" [%p] %s", p, p->name);
+#endif
+       }
+       pthread_mutex_unlock(&_dm_pools_mutex);
+       log_error(INTERNAL_ERROR "Unreleased memory pool(s) found.");
+}
+
+/**
+ * Status of locked pool.
+ *
+ * \param p
+ * Pool to be tested for lock status.
+ *
+ * \return
+ * 1 when the pool is locked, 0 otherwise.
+ */
+int dm_pool_locked(struct dm_pool *p)
+{
+       return p->locked;
+}
+
+/**
+ * Lock memory pool.
+ *
+ * \param p
+ * Pool to be locked.
+ *
+ * \param crc
+ * Bool specifies whether to store the pool crc/hash checksum.
+ *
+ * \return
+ * 1 (success) when the pool was preperly locked, 0 otherwise.
+ */
+int dm_pool_lock(struct dm_pool *p, int crc)
+{
+       if (p->locked) {
+               log_error(INTERNAL_ERROR "Pool %s is already locked.",
+                         p->name);
+               return 0;
+       }
+
+       if (crc)
+               p->crc = _pool_crc(p);  /* Get crc for pool */
+
+       if (!_pool_protect(p, PROT_READ)) {
+               _pool_protect(p, PROT_READ | PROT_WRITE);
+               return_0;
+       }
+
+       p->locked = 1;
+
+       log_debug_mem("Pool %s is locked.", p->name);
+
+       return 1;
+}
+
+/**
+ * Unlock memory pool.
+ *
+ * \param p
+ * Pool to be unlocked.
+ *
+ * \param crc
+ * Bool enables compare of the pool crc/hash with the stored value
+ * at pool lock. The pool is not properly unlocked if there is a mismatch.
+ *
+ * \return
+ * 1 (success) when the pool was properly unlocked, 0 otherwise.
+ */
+int dm_pool_unlock(struct dm_pool *p, int crc)
+{
+       if (!p->locked) {
+               log_error(INTERNAL_ERROR "Pool %s is already unlocked.",
+                         p->name);
+               return 0;
+       }
+
+       p->locked = 0;
+
+       if (!_pool_protect(p, PROT_READ | PROT_WRITE))
+               return_0;
+
+       log_debug_mem("Pool %s is unlocked.", p->name);
+
+       if (crc && (p->crc != _pool_crc(p))) {
+               log_error(INTERNAL_ERROR "Pool %s crc mismatch.", p->name);
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/device_mapper/regex/matcher.c b/device_mapper/regex/matcher.c
new file mode 100644 (file)
index 0000000..375c1ab
--- /dev/null
@@ -0,0 +1,575 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "parse_rx.h"
+#include "ttree.h"
+#include "assert.h"
+
+struct dfa_state {
+       struct dfa_state *next;
+       int final;
+       dm_bitset_t bits;
+       struct dfa_state *lookup[256];
+};
+
+struct dm_regex {              /* Instance variables for the lexer */
+       struct dfa_state *start;
+       unsigned num_nodes;
+        unsigned num_charsets;
+       int nodes_entered;
+       struct rx_node **nodes;
+        int charsets_entered;
+        struct rx_node **charsets;
+       struct dm_pool *scratch, *mem;
+
+        /* stuff for on the fly dfa calculation */
+        dm_bitset_t charmap[256];
+        dm_bitset_t dfa_copy;
+        struct ttree *tt;
+        dm_bitset_t bs;
+        struct dfa_state *h, *t;
+};
+
+static int _count_nodes(struct rx_node *rx)
+{
+       int r = 1;
+
+       if (rx->left)
+               r += _count_nodes(rx->left);
+
+       if (rx->right)
+               r += _count_nodes(rx->right);
+
+       return r;
+}
+
+static unsigned _count_charsets(struct rx_node *rx)
+{
+        if (rx->type == CHARSET)
+                return 1;
+
+        return (rx->left ? _count_charsets(rx->left) : 0) +
+                (rx->right ? _count_charsets(rx->right) : 0);
+}
+
+static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i)
+{
+        if (rx->type == CHARSET)
+                rx->charset_index = (*i)++;
+        else {
+                if (rx->left)
+                        _enumerate_charsets_internal(rx->left, i);
+                if (rx->right)
+                        _enumerate_charsets_internal(rx->right, i);
+        }
+}
+
+static void _enumerate_charsets(struct rx_node *rx)
+{
+        unsigned i = 0;
+        _enumerate_charsets_internal(rx, &i);
+}
+
+static void _fill_table(struct dm_regex *m, struct rx_node *rx)
+{
+       assert((rx->type != OR) || (rx->left && rx->right));
+
+       if (rx->left)
+               _fill_table(m, rx->left);
+
+       if (rx->right)
+               _fill_table(m, rx->right);
+
+       m->nodes[m->nodes_entered++] = rx;
+        if (rx->type == CHARSET)
+                m->charsets[m->charsets_entered++] = rx;
+}
+
+static int _create_bitsets(struct dm_regex *m)
+{
+       unsigned i;
+       struct rx_node *n;
+
+       for (i = 0; i < m->num_nodes; i++) {
+               n = m->nodes[i];
+               if (!(n->firstpos = dm_bitset_create(m->scratch, m->num_charsets)))
+                       return_0;
+               if (!(n->lastpos = dm_bitset_create(m->scratch, m->num_charsets)))
+                       return_0;
+               if (!(n->followpos = dm_bitset_create(m->scratch, m->num_charsets)))
+                       return_0;
+       }
+
+       return 1;
+}
+
+static void _calc_functions(struct dm_regex *m)
+{
+       unsigned i, j, final = 1;
+       struct rx_node *rx, *c1, *c2;
+
+       for (i = 0; i < m->num_nodes; i++) {
+               rx = m->nodes[i];
+               c1 = rx->left;
+               c2 = rx->right;
+
+               if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS))
+                       rx->final = final++;
+
+               switch (rx->type) {
+               case CAT:
+                       if (c1->nullable)
+                               dm_bit_union(rx->firstpos,
+                                         c1->firstpos, c2->firstpos);
+                       else
+                               dm_bit_copy(rx->firstpos, c1->firstpos);
+
+                       if (c2->nullable)
+                               dm_bit_union(rx->lastpos,
+                                         c1->lastpos, c2->lastpos);
+                       else
+                               dm_bit_copy(rx->lastpos, c2->lastpos);
+
+                       rx->nullable = c1->nullable && c2->nullable;
+                       break;
+
+               case PLUS:
+                       dm_bit_copy(rx->firstpos, c1->firstpos);
+                       dm_bit_copy(rx->lastpos, c1->lastpos);
+                       rx->nullable = c1->nullable;
+                       break;
+
+               case OR:
+                       dm_bit_union(rx->firstpos, c1->firstpos, c2->firstpos);
+                       dm_bit_union(rx->lastpos, c1->lastpos, c2->lastpos);
+                       rx->nullable = c1->nullable || c2->nullable;
+                       break;
+
+               case QUEST:
+               case STAR:
+                       dm_bit_copy(rx->firstpos, c1->firstpos);
+                       dm_bit_copy(rx->lastpos, c1->lastpos);
+                       rx->nullable = 1;
+                       break;
+
+               case CHARSET:
+                       dm_bit_set(rx->firstpos, rx->charset_index);
+                       dm_bit_set(rx->lastpos, rx->charset_index);
+                       rx->nullable = 0;
+                       break;
+
+               default:
+                       log_error(INTERNAL_ERROR "Unknown calc node type");
+               }
+
+               /*
+                * followpos has it's own switch
+                * because PLUS and STAR do the
+                * same thing.
+                */
+               switch (rx->type) {
+               case CAT:
+                       for (j = 0; j < m->num_charsets; j++) {
+                                struct rx_node *n = m->charsets[j];
+                               if (dm_bit(c1->lastpos, j))
+                                       dm_bit_union(n->followpos,
+                                                     n->followpos, c2->firstpos);
+                       }
+                       break;
+
+               case PLUS:
+               case STAR:
+                       for (j = 0; j < m->num_charsets; j++) {
+                                struct rx_node *n = m->charsets[j];
+                               if (dm_bit(rx->lastpos, j))
+                                       dm_bit_union(n->followpos,
+                                                     n->followpos, rx->firstpos);
+                       }
+                       break;
+               }
+       }
+}
+
+static struct dfa_state *_create_dfa_state(struct dm_pool *mem)
+{
+       return dm_pool_zalloc(mem, sizeof(struct dfa_state));
+}
+
+static struct dfa_state *_create_state_queue(struct dm_pool *mem,
+                                             struct dfa_state *dfa,
+                                             dm_bitset_t bits)
+{
+       if (!(dfa->bits = dm_bitset_create(mem, bits[0])))  /* first element is the size */
+               return_NULL;
+
+       dm_bit_copy(dfa->bits, bits);
+       dfa->next = 0;
+       dfa->final = -1;
+
+       return dfa;
+}
+
+static int _calc_state(struct dm_regex *m, struct dfa_state *dfa, int a)
+{
+        int set_bits = 0, i;
+        dm_bitset_t dfa_bits = dfa->bits;
+        dm_bit_and(m->dfa_copy, m->charmap[a], dfa_bits);
+
+        /* iterate through all the states in firstpos */
+        for (i = dm_bit_get_first(m->dfa_copy); i >= 0; i = dm_bit_get_next(m->dfa_copy, i)) {
+                if (a == TARGET_TRANS)
+                        dfa->final = m->charsets[i]->final;
+
+                dm_bit_union(m->bs, m->bs, m->charsets[i]->followpos);
+                set_bits = 1;
+        }
+
+        if (set_bits) {
+                struct dfa_state *tmp;
+                struct dfa_state *ldfa = ttree_lookup(m->tt, m->bs + 1);
+                if (!ldfa) {
+                        /* push */
+                       if (!(ldfa = _create_dfa_state(m->mem)))
+                               return_0;
+
+                       ttree_insert(m->tt, m->bs + 1, ldfa);
+                       if (!(tmp = _create_state_queue(m->scratch, ldfa, m->bs)))
+                               return_0;
+                        if (!m->h)
+                                m->h = m->t = tmp;
+                        else {
+                                m->t->next = tmp;
+                                m->t = tmp;
+                        }
+                }
+
+                dfa->lookup[a] = ldfa;
+                dm_bit_clear_all(m->bs);
+        }
+
+       return 1;
+}
+
+static int _calc_states(struct dm_regex *m, struct rx_node *rx)
+{
+       unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1;
+       struct dfa_state *dfa;
+       struct rx_node *n;
+       unsigned i;
+       int a;
+
+       if (!(m->tt = ttree_create(m->scratch, iwidth)))
+               return_0;
+
+       if (!(m->bs = dm_bitset_create(m->scratch, m->num_charsets)))
+               return_0;
+
+        /* build some char maps */
+        for (a = 0; a < 256; a++)
+               if (!(m->charmap[a] = dm_bitset_create(m->scratch, m->num_charsets)))
+                       return_0;
+
+        for (i = 0; i < m->num_nodes; i++) {
+               n = m->nodes[i];
+                        if (n->type == CHARSET) {
+                        for (a = dm_bit_get_first(n->charset);
+                             a >= 0; a = dm_bit_get_next(n->charset, a))
+                                dm_bit_set(m->charmap[a], n->charset_index);
+                }
+        }
+
+       /* create first state */
+       if (!(dfa = _create_dfa_state(m->mem)))
+               return_0;
+
+       m->start = dfa;
+       ttree_insert(m->tt, rx->firstpos + 1, dfa);
+
+       /* prime the queue */
+       if (!(m->h = m->t = _create_state_queue(m->scratch, dfa, rx->firstpos)))
+               return_0;
+
+       if (!(m->dfa_copy = dm_bitset_create(m->scratch, m->num_charsets)))
+               return_0;
+
+       return 1;
+}
+
+/*
+ * Forces all the dfa states to be calculated up front, ie. what
+ * _calc_states() used to do before we switched to calculating on demand.
+ */
+static int _force_states(struct dm_regex *m)
+{
+        int a;
+
+        /* keep processing until there's nothing in the queue */
+        struct dfa_state *s;
+        while ((s = m->h)) {
+                /* pop state off front of the queue */
+                m->h = m->h->next;
+
+                /* iterate through all the inputs for this state */
+                dm_bit_clear_all(m->bs);
+                for (a = 0; a < 256; a++)
+                       if (!_calc_state(m, s, a))
+                               return_0;
+        }
+
+        return 1;
+}
+
+struct dm_regex *dm_regex_create(struct dm_pool *mem, const char * const *patterns,
+                                unsigned num_patterns)
+{
+       char *all, *ptr;
+       unsigned i;
+       size_t len = 0;
+       struct rx_node *rx;
+       struct dm_regex *m;
+       struct dm_pool *scratch = mem;
+
+       if (!(m = dm_pool_zalloc(mem, sizeof(*m))))
+               return_NULL;
+
+       /* join the regexps together, delimiting with zero */
+       for (i = 0; i < num_patterns; i++)
+               len += strlen(patterns[i]) + 8;
+
+       ptr = all = dm_pool_alloc(scratch, len + 1);
+
+       if (!all)
+               goto_bad;
+
+       for (i = 0; i < num_patterns; i++) {
+               ptr += sprintf(ptr, "(.*(%s)%c)", patterns[i], TARGET_TRANS);
+               if (i < (num_patterns - 1))
+                       *ptr++ = '|';
+       }
+
+       /* parse this expression */
+       if (!(rx = rx_parse_tok(scratch, all, ptr))) {
+               log_error("Couldn't parse regex");
+               goto bad;
+       }
+
+       m->mem = mem;
+       m->scratch = scratch;
+       m->num_nodes = _count_nodes(rx);
+       m->num_charsets = _count_charsets(rx);
+       _enumerate_charsets(rx);
+       if (!(m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes)))
+               goto_bad;
+
+       if (!(m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets)))
+               goto_bad;
+
+       _fill_table(m, rx);
+
+       if (!_create_bitsets(m))
+               goto_bad;
+
+       _calc_functions(m);
+
+       if (!_calc_states(m, rx))
+               goto_bad;
+
+       return m;
+
+      bad:
+       dm_pool_free(mem, m);
+
+       return NULL;
+}
+
+static struct dfa_state *_step_matcher(struct dm_regex *m, int c, struct dfa_state *cs, int *r)
+{
+        struct dfa_state *ns;
+
+       if (!(ns = cs->lookup[(unsigned char) c])) {
+               if (!_calc_state(m, cs, (unsigned char) c))
+                        return_NULL;
+
+               if (!(ns = cs->lookup[(unsigned char) c]))
+                       return NULL;
+       }
+
+        // yuck, we have to special case the target trans
+       if ((ns->final == -1) &&
+           !_calc_state(m, ns, TARGET_TRANS))
+                return_NULL;
+
+       if (ns->final && (ns->final > *r))
+               *r = ns->final;
+
+       return ns;
+}
+
+int dm_regex_match(struct dm_regex *regex, const char *s)
+{
+       struct dfa_state *cs = regex->start;
+       int r = 0;
+
+        dm_bit_clear_all(regex->bs);
+       if (!(cs = _step_matcher(regex, HAT_CHAR, cs, &r)))
+               goto out;
+
+       for (; *s; s++)
+               if (!(cs = _step_matcher(regex, *s, cs, &r)))
+                       goto out;
+
+       _step_matcher(regex, DOLLAR_CHAR, cs, &r);
+
+      out:
+       /* subtract 1 to get back to zero index */
+       return r - 1;
+}
+
+/*
+ * The next block of code concerns calculating a fingerprint for the dfa.
+ *
+ * We're not calculating a minimal dfa in _calculate_state (maybe a future
+ * improvement).  As such it's possible that two non-isomorphic dfas
+ * recognise the same language.  This can only really happen if you start
+ * with equivalent, but different regexes (for example the simplifier in
+ * parse_rx.c may have changed).
+ *
+ * The code is inefficient; repeatedly searching a singly linked list for
+ * previously seen nodes.  Not worried since this is test code.
+ */
+struct node_list {
+        unsigned node_id;
+        struct dfa_state *node;
+        struct node_list *next;
+};
+
+struct printer {
+        struct dm_pool *mem;
+        struct node_list *pending;
+        struct node_list *processed;
+        unsigned next_index;
+};
+
+static uint32_t _randomise(uint32_t n)
+{
+        /* 2^32 - 5 */
+        uint32_t const prime = (~0) - 4;
+        return n * prime;
+}
+
+static int _seen(struct node_list *n, struct dfa_state *node, uint32_t *i)
+{
+        while (n) {
+                if (n->node == node) {
+                        *i = n->node_id;
+                        return 1;
+                }
+                n = n->next;
+        }
+
+        return 0;
+}
+
+/*
+ * Push node if it's not been seen before, returning a unique index.
+ */
+static uint32_t _push_node(struct printer *p, struct dfa_state *node)
+{
+        uint32_t i;
+       struct node_list *n;
+
+        if (_seen(p->pending, node, &i) ||
+            _seen(p->processed, node, &i))
+                return i;
+
+       if (!(n = dm_pool_alloc(p->mem, sizeof(*n))))
+               return_0;
+
+       n->node_id = ++p->next_index; /* start from 1, keep 0 as error code */
+       n->node = node;
+       n->next = p->pending;
+       p->pending = n;
+
+       return n->node_id;
+}
+
+/*
+ * Pop the front node, and fill out it's previously assigned index.
+ */
+static struct dfa_state *_pop_node(struct printer *p)
+{
+        struct dfa_state *node = NULL;
+       struct node_list *n;
+
+       if (p->pending) {
+               n = p->pending;
+                p->pending = n->next;
+                n->next = p->processed;
+                p->processed = n;
+
+                node = n->node;
+        }
+
+        return node;
+}
+
+static uint32_t _combine(uint32_t n1, uint32_t n2)
+{
+        return ((n1 << 8) | (n1 >> 24)) ^ _randomise(n2);
+}
+
+static uint32_t _fingerprint(struct printer *p)
+{
+        int c;
+        uint32_t result = 0;
+        struct dfa_state *node;
+
+        while ((node = _pop_node(p))) {
+                result = _combine(result, (node->final < 0) ? 0 : node->final);
+                for (c = 0; c < 256; c++)
+                        result = _combine(result,
+                                          _push_node(p, node->lookup[c]));
+        }
+
+        return result;
+}
+
+uint32_t dm_regex_fingerprint(struct dm_regex *regex)
+{
+        struct printer p;
+        uint32_t result = 0;
+        struct dm_pool *mem = dm_pool_create("regex fingerprint", 1024);
+
+       if (!mem)
+               return_0;
+
+       if (!_force_states(regex))
+               goto_out;
+
+        p.mem = mem;
+        p.pending = NULL;
+        p.processed = NULL;
+        p.next_index = 0;
+
+       if (!_push_node(&p, regex->start))
+               goto_out;
+
+       result = _fingerprint(&p);
+out:
+        dm_pool_destroy(mem);
+
+        return result;
+}
diff --git a/device_mapper/regex/parse_rx.c b/device_mapper/regex/parse_rx.c
new file mode 100644 (file)
index 0000000..cc83bfe
--- /dev/null
@@ -0,0 +1,667 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "parse_rx.h"
+
+#ifdef DEBUG
+#include <ctype.h>
+
+__attribute__ ((__unused__))
+static void _regex_print(struct rx_node *rx, int depth, unsigned show_nodes)
+{
+       int i, numchars;
+
+       if (rx->left) {
+               if (rx->left->type != CHARSET && (show_nodes || (!((rx->type == CAT || rx->type == OR) && rx->left->type == CAT))))
+                       printf("(");
+
+               _regex_print(rx->left, depth + 1, show_nodes);
+
+               if (rx->left->type != CHARSET && (show_nodes || (!((rx->type == CAT || rx->type == OR) && rx->left->type == CAT))))
+                       printf(")");
+       }
+
+       /* display info about the node */
+       switch (rx->type) {
+       case CAT:
+               break;
+
+       case OR:
+               printf("|");
+               break;
+
+       case STAR:
+               printf("*");
+               break;
+
+       case PLUS:
+               printf("+");
+               break;
+
+       case QUEST:
+               printf("?");
+               break;
+
+       case CHARSET:
+               numchars = 0;
+               for (i = 0; i < 256; i++)
+                       if (dm_bit(rx->charset, i) && (isprint(i) || i == HAT_CHAR || i == DOLLAR_CHAR))
+                               numchars++;
+               if (numchars == 97) {
+                       printf(".");
+                       break;
+               }
+               if (numchars > 1)
+                       printf("[");
+               for (i = 0; i < 256; i++)
+                       if (dm_bit(rx->charset, i)) {
+                               if (isprint(i))
+                                       printf("%c", (char) i);
+                               else if (i == HAT_CHAR)
+                                       printf("^");
+                               else if (i == DOLLAR_CHAR)
+                                       printf("$");
+                       }
+               if (numchars > 1)
+                       printf("]");
+               break;
+
+       default:
+               fprintf(stderr, "Unknown type");
+       }
+
+       if (rx->right) {
+               if (rx->right->type != CHARSET && (show_nodes || (!(rx->type == CAT && rx->right->type == CAT) && rx->right->right)))
+                       printf("(");
+               _regex_print(rx->right, depth + 1, show_nodes);
+               if (rx->right->type != CHARSET && (show_nodes || (!(rx->type == CAT && rx->right->type == CAT) && rx->right->right)))
+                       printf(")");
+       }
+
+       if (!depth)
+               printf("\n");
+}
+#endif /* DEBUG */
+
+struct parse_sp {              /* scratch pad for the parsing process */
+       struct dm_pool *mem;
+       int type;               /* token type, 0 indicates a charset */
+       dm_bitset_t charset;    /* The current charset */
+       const char *cursor;     /* where we are in the regex */
+       const char *rx_end;     /* 1pte for the expression being parsed */
+};
+
+static struct rx_node *_or_term(struct parse_sp *ps);
+
+static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr)
+{
+       ps->type = 0;
+       ps->cursor = ptr + 1;
+       dm_bit_clear_all(ps->charset);
+       dm_bit_set(ps->charset, c);
+}
+
+/*
+ * Get the next token from the regular expression.
+ * Returns: 1 success, 0 end of input, -1 error.
+ */
+static int _rx_get_token(struct parse_sp *ps)
+{
+       int neg = 0, range = 0;
+       char c, lc = 0;
+       const char *ptr = ps->cursor;
+       if (ptr == ps->rx_end) {        /* end of input ? */
+               ps->type = -1;
+               return 0;
+       }
+
+       switch (*ptr) {
+               /* charsets and ncharsets */
+       case '[':
+               ptr++;
+               if (*ptr == '^') {
+                       dm_bit_set_all(ps->charset);
+
+                       /* never transition on zero */
+                       dm_bit_clear(ps->charset, 0);
+                       neg = 1;
+                       ptr++;
+
+               } else
+                       dm_bit_clear_all(ps->charset);
+
+               while ((ptr < ps->rx_end) && (*ptr != ']')) {
+                       if (*ptr == '\\') {
+                               /* an escaped character */
+                               ptr++;
+                               switch (*ptr) {
+                               case 'n':
+                                       c = '\n';
+                                       break;
+                               case 'r':
+                                       c = '\r';
+                                       break;
+                               case 't':
+                                       c = '\t';
+                                       break;
+                               default:
+                                       c = *ptr;
+                               }
+                       } else if (*ptr == '-' && lc) {
+                               /* we've got a range on our hands */
+                               range = 1;
+                               ptr++;
+                               if (ptr == ps->rx_end) {
+                                       log_error("Incomplete range"
+                                                 "specification");
+                                       return -1;
+                               }
+                               c = *ptr;
+                       } else
+                               c = *ptr;
+
+                       if (range) {
+                               /* add lc - c into the bitset */
+                               if (lc > c) {
+                                       char tmp = c;
+                                       c = lc;
+                                       lc = tmp;
+                               }
+
+                               for (; lc <= c; lc++) {
+                                       if (neg)
+                                               dm_bit_clear(ps->charset, lc);
+                                       else
+                                               dm_bit_set(ps->charset, lc);
+                               }
+                               range = 0;
+                       } else {
+                               /* add c into the bitset */
+                               if (neg)
+                                       dm_bit_clear(ps->charset, c);
+                               else
+                                       dm_bit_set(ps->charset, c);
+                       }
+                       ptr++;
+                       lc = c;
+               }
+
+               if (ptr >= ps->rx_end) {
+                       ps->type = -1;
+                       return -1;
+               }
+
+               ps->type = 0;
+               ps->cursor = ptr + 1;
+               break;
+
+               /* These characters are special, we just return their ASCII
+                  codes as the type.  Sorted into ascending order to help the
+                  compiler */
+       case '(':
+       case ')':
+       case '*':
+       case '+':
+       case '?':
+       case '|':
+               ps->type = (int) *ptr;
+               ps->cursor = ptr + 1;
+               break;
+
+       case '^':
+               _single_char(ps, HAT_CHAR, ptr);
+               break;
+
+       case '$':
+               _single_char(ps, DOLLAR_CHAR, ptr);
+               break;
+
+       case '.':
+               /* The 'all but newline' character set */
+               ps->type = 0;
+               ps->cursor = ptr + 1;
+               dm_bit_set_all(ps->charset);
+               dm_bit_clear(ps->charset, (int) '\n');
+               dm_bit_clear(ps->charset, (int) '\r');
+               dm_bit_clear(ps->charset, 0);
+               break;
+
+       case '\\':
+               /* escaped character */
+               ptr++;
+               if (ptr >= ps->rx_end) {
+                       log_error("Badly quoted character at end "
+                                 "of expression");
+                       ps->type = -1;
+                       return -1;
+               }
+
+               ps->type = 0;
+               ps->cursor = ptr + 1;
+               dm_bit_clear_all(ps->charset);
+               switch (*ptr) {
+               case 'n':
+                       dm_bit_set(ps->charset, (int) '\n');
+                       break;
+               case 'r':
+                       dm_bit_set(ps->charset, (int) '\r');
+                       break;
+               case 't':
+                       dm_bit_set(ps->charset, (int) '\t');
+                       break;
+               default:
+                       dm_bit_set(ps->charset, (int) *ptr);
+               }
+               break;
+
+       default:
+               /* add a single character to the bitset */
+               ps->type = 0;
+               ps->cursor = ptr + 1;
+               dm_bit_clear_all(ps->charset);
+               dm_bit_set(ps->charset, (int) (unsigned char) *ptr);
+               break;
+       }
+
+       return 1;
+}
+
+static struct rx_node *_node(struct dm_pool *mem, int type,
+                            struct rx_node *l, struct rx_node *r)
+{
+       struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
+
+       if (n) {
+               if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) {
+                       dm_pool_free(mem, n);
+                       return NULL;
+               }
+
+               n->type = type;
+               n->left = l;
+               n->right = r;
+       }
+
+       return n;
+}
+
+static struct rx_node *_term(struct parse_sp *ps)
+{
+       struct rx_node *n;
+
+       switch (ps->type) {
+       case 0:
+               if (!(n = _node(ps->mem, CHARSET, NULL, NULL)))
+                       return_NULL;
+
+               dm_bit_copy(n->charset, ps->charset);
+               _rx_get_token(ps);      /* match charset */
+               break;
+
+       case '(':
+               _rx_get_token(ps);      /* match '(' */
+               n = _or_term(ps);
+               if (ps->type != ')') {
+                       log_error("missing ')' in regular expression");
+                       return 0;
+               }
+               _rx_get_token(ps);      /* match ')' */
+               break;
+
+       default:
+               n = 0;
+       }
+
+       return n;
+}
+
+static struct rx_node *_closure_term(struct parse_sp *ps)
+{
+       struct rx_node *l, *n;
+
+       if (!(l = _term(ps)))
+               return NULL;
+
+       for (;;) {
+               switch (ps->type) {
+               case '*':
+                       n = _node(ps->mem, STAR, l, NULL);
+                       break;
+
+               case '+':
+                       n = _node(ps->mem, PLUS, l, NULL);
+                       break;
+
+               case '?':
+                       n = _node(ps->mem, QUEST, l, NULL);
+                       break;
+
+               default:
+                       return l;
+               }
+
+               if (!n)
+                       return_NULL;
+
+               _rx_get_token(ps);
+               l = n;
+       }
+
+       return n;
+}
+
+static struct rx_node *_cat_term(struct parse_sp *ps)
+{
+       struct rx_node *l, *r, *n;
+
+       if (!(l = _closure_term(ps)))
+               return NULL;
+
+       if (ps->type == '|')
+               return l;
+
+       if (!(r = _cat_term(ps)))
+               return l;
+
+       if (!(n = _node(ps->mem, CAT, l, r)))
+               stack;
+
+       return n;
+}
+
+static struct rx_node *_or_term(struct parse_sp *ps)
+{
+       struct rx_node *l, *r, *n;
+
+       if (!(l = _cat_term(ps)))
+               return NULL;
+
+       if (ps->type != '|')
+               return l;
+
+       _rx_get_token(ps);              /* match '|' */
+
+       if (!(r = _or_term(ps))) {
+               log_error("Badly formed 'or' expression");
+               return NULL;
+       }
+
+       if (!(n = _node(ps->mem, OR, l, r)))
+               stack;
+
+       return n;
+}
+
+/*----------------------------------------------------------------*/
+
+/* Macros for left and right nodes.  Inverted if 'leftmost' is set. */
+#define LEFT(a) (leftmost ? (a)->left : (a)->right)
+#define RIGHT(a) (leftmost ? (a)->right : (a)->left)
+
+/*
+ * The optimiser spots common prefixes on either side of an 'or' node, and
+ * lifts them outside the 'or' with a 'cat'.
+ */
+static unsigned _depth(struct rx_node *r, unsigned leftmost)
+{
+       int count = 1;
+
+       while (r->type != CHARSET && LEFT(r) && (leftmost || r->type != OR)) {
+               count++;
+               r = LEFT(r);
+       }
+
+       return count;
+}
+
+/*
+ * FIXME: a unique key could be built up as part of the parse, to make the
+ * comparison quick.  Alternatively we could use cons-hashing, and then
+ * this would simply be a pointer comparison.
+ */
+static int _nodes_equal(struct rx_node *l, struct rx_node *r)
+{
+       if (l->type != r->type)
+               return 0;
+
+       switch (l->type) {
+       case CAT:
+       case OR:
+               return _nodes_equal(l->left, r->left) &&
+                       _nodes_equal(l->right, r->right);
+
+       case STAR:
+       case PLUS:
+       case QUEST:
+               return _nodes_equal(l->left, r->left);
+
+       case CHARSET:
+               /*
+                * Never change anything containing TARGET_TRANS
+                * used by matcher as boundary marker between concatenated
+                * expressions.
+                */
+               return (!dm_bit(l->charset, TARGET_TRANS) && dm_bitset_equal(l->charset, r->charset));
+       }
+
+       /* NOTREACHED */
+       return_0;
+}
+
+static int _find_leftmost_common(struct rx_node *or,
+                                 struct rx_node **l,
+                                 struct rx_node **r,
+                                unsigned leftmost)
+{
+       struct rx_node *left = or->left, *right = or->right;
+       unsigned left_depth = _depth(left, leftmost);
+       unsigned right_depth = _depth(right, leftmost);
+
+       while (left_depth > right_depth && left->type != OR) {
+               left = LEFT(left);
+               left_depth--;
+       }
+
+       while (right_depth > left_depth && right->type != OR) {
+               right = LEFT(right);
+               right_depth--;
+       }
+
+       if (left_depth != right_depth)
+               return 0;
+
+       while (left_depth) {
+               if (left->type == CAT && right->type == CAT) {
+                       if (_nodes_equal(LEFT(left), LEFT(right))) {
+                               *l = left;
+                               *r = right;
+                               return 1;
+                       }
+               }
+               if (left->type == OR || right->type == OR)
+                       break;
+               left = LEFT(left);
+               right = LEFT(right);
+               left_depth--;
+       }
+
+       return 0;
+}
+
+/* If top node is OR, rotate (leftmost example) from ((ab)|((ac)|d)) to (((ab)|(ac))|d) */
+static int _rotate_ors(struct rx_node *r, unsigned leftmost)
+{
+       struct rx_node *old_node;
+
+       if (r->type != OR || RIGHT(r)->type != OR)
+               return 0;
+
+       old_node = RIGHT(r);
+
+       if (leftmost) {
+               r->right = RIGHT(old_node);
+               old_node->right = LEFT(old_node);
+               old_node->left = LEFT(r);
+               r->left = old_node;
+       } else {
+               r->left = RIGHT(old_node);
+               old_node->left = LEFT(old_node);
+               old_node->right = LEFT(r);
+               r->right = old_node;
+       }
+
+       return 1;
+}
+
+static struct rx_node *_exchange_nodes(struct dm_pool *mem, struct rx_node *r,
+                                      struct rx_node *left_cat, struct rx_node *right_cat,
+                                      unsigned leftmost)
+{
+       struct rx_node *new_r;
+
+       if (leftmost)
+               new_r = _node(mem, CAT, LEFT(left_cat), r);
+       else
+               new_r = _node(mem, CAT, r, LEFT(right_cat));
+
+       if (!new_r)
+               return_NULL;
+
+       memcpy(left_cat, RIGHT(left_cat), sizeof(*left_cat));
+       memcpy(right_cat, RIGHT(right_cat), sizeof(*right_cat));
+
+       return new_r;
+}
+
+static struct rx_node *_pass(struct dm_pool *mem,
+                             struct rx_node *r,
+                             int *changed)
+{
+       struct rx_node *left, *right;
+
+       /*
+        * walk the tree, optimising every 'or' node.
+        */
+       switch (r->type) {
+       case CAT:
+               if (!(r->left = _pass(mem, r->left, changed)))
+                       return_NULL;
+
+               if (!(r->right = _pass(mem, r->right, changed)))
+                       return_NULL;
+
+               break;
+
+       case STAR:
+       case PLUS:
+       case QUEST:
+               if (!(r->left = _pass(mem, r->left, changed)))
+                       return_NULL;
+
+               break;
+       case OR:
+               /* It's important we optimise sub nodes first */
+               if (!(r->left = _pass(mem, r->left, changed)))
+                       return_NULL;
+
+               if (!(r->right = _pass(mem, r->right, changed)))
+                       return_NULL;
+               /*
+                * If rotate_ors changes the tree, left and right are stale,
+                * so just set 'changed' to repeat the search.
+                *
+                * FIXME Check we can't 'bounce' between left and right rotations here.
+                */
+               if (_find_leftmost_common(r, &left, &right, 1)) {
+                       if (!_rotate_ors(r, 1))
+                               r = _exchange_nodes(mem, r, left, right, 1);
+                       *changed = 1;
+               } else if (_find_leftmost_common(r, &left, &right, 0)) {
+                       if (!_rotate_ors(r, 0))
+                               r = _exchange_nodes(mem, r, left, right, 0);
+                       *changed = 1;
+               }
+               break;
+
+       case CHARSET:
+               break;
+       }
+
+       return r;
+}
+
+static struct rx_node *_optimise(struct dm_pool *mem, struct rx_node *r)
+{
+       /*
+        * We're looking for (or (... (cat <foo> a)) (... (cat <foo> b)))
+        * and want to turn it into (cat <foo> (or (... a) (... b)))
+        *
+        * (fa)|(fb) becomes f(a|b)
+        */
+
+       /*
+        * Initially done as an inefficient multipass algorithm.
+        */
+       int changed;
+
+       do {
+               changed = 0;
+               r = _pass(mem, r, &changed);
+       } while (r && changed);
+
+       return r;
+}
+
+/*----------------------------------------------------------------*/
+
+struct rx_node *rx_parse_tok(struct dm_pool *mem,
+                            const char *begin, const char *end)
+{
+       struct rx_node *r;
+       struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps));
+
+       if (!ps)
+               return_NULL;
+
+       ps->mem = mem;
+       if (!(ps->charset = dm_bitset_create(mem, 256))) {
+               log_error("Regex charset allocation failed");
+               dm_pool_free(mem, ps);
+               return NULL;
+       }
+       ps->cursor = begin;
+       ps->rx_end = end;
+       _rx_get_token(ps);              /* load the first token */
+
+       if (!(r = _or_term(ps))) {
+               log_error("Parse error in regex");
+               dm_pool_free(mem, ps);
+               return NULL;
+       }
+
+       if (!(r = _optimise(mem, r))) {
+               log_error("Regex optimisation error");
+               dm_pool_free(mem, ps);
+               return NULL;
+       }
+
+       return r;
+}
+
+struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str)
+{
+       return rx_parse_tok(mem, str, str + strlen(str));
+}
diff --git a/device_mapper/regex/parse_rx.h b/device_mapper/regex/parse_rx.h
new file mode 100644 (file)
index 0000000..0897060
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DM_PARSE_REGEX_H
+#define _DM_PARSE_REGEX_H
+
+enum {
+       CAT,
+       STAR,
+       PLUS,
+       OR,
+       QUEST,
+       CHARSET
+};
+
+/*
+ * We're never going to be running the regex on non-printable
+ * chars, so we can use a couple of these chars to represent the
+ * start and end of a string.
+ */
+#define HAT_CHAR 0x2
+#define DOLLAR_CHAR 0x3
+
+#define TARGET_TRANS '\0'
+
+struct rx_node {
+       int type;
+       dm_bitset_t charset;
+       struct rx_node *left, *right;
+
+       /* used to build the dfa for the toker */
+        unsigned charset_index;
+       int nullable, final;
+       dm_bitset_t firstpos;
+       dm_bitset_t lastpos;
+       dm_bitset_t followpos;
+};
+
+struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str);
+struct rx_node *rx_parse_tok(struct dm_pool *mem,
+                            const char *begin, const char *end);
+
+#endif
diff --git a/device_mapper/regex/ttree.c b/device_mapper/regex/ttree.c
new file mode 100644 (file)
index 0000000..62c5bf7
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "ttree.h"
+
+struct node {
+       unsigned k;
+       struct node *l, *m, *r;
+       void *data;
+};
+
+struct ttree {
+       int klen;
+       struct dm_pool *mem;
+       struct node *root;
+};
+
+__attribute__((nonnull(1)))
+static struct node **_lookup_single(struct node **c, unsigned int k)
+{
+       while (*c) {
+               if (k < (*c)->k)
+                       c = &((*c)->l);
+
+               else if (k > (*c)->k)
+                       c = &((*c)->r);
+
+               else {
+                       c = &((*c)->m);
+                       break;
+               }
+       }
+
+       return c;
+}
+
+void *ttree_lookup(struct ttree *tt, unsigned *key)
+{
+       struct node **c = &tt->root;
+       int count = tt->klen;
+
+       while (*c && count) {
+               c = _lookup_single(c, *key++);
+               count--;
+       }
+
+       return *c ? (*c)->data : NULL;
+}
+
+static struct node *_tree_node(struct dm_pool *mem, unsigned int k)
+{
+       struct node *n = dm_pool_zalloc(mem, sizeof(*n));
+
+       if (n)
+               n->k = k;
+
+       return n;
+}
+
+int ttree_insert(struct ttree *tt, unsigned int *key, void *data)
+{
+       struct node **c = &tt->root;
+       int count = tt->klen;
+       unsigned int k;
+
+       do {
+               k = *key++;
+               c = _lookup_single(c, k);
+               count--;
+
+       } while (*c && count);
+
+       if (!*c) {
+               count++;
+
+               while (count--) {
+                       if (!(*c = _tree_node(tt->mem, k)))
+                               return_0;
+
+                       if (count) {
+                               k = *key++;
+                               c = &((*c)->m);
+                       }
+               }
+       }
+       (*c)->data = data;
+
+       return 1;
+}
+
+struct ttree *ttree_create(struct dm_pool *mem, unsigned int klen)
+{
+       struct ttree *tt;
+
+       if (!(tt = dm_pool_zalloc(mem, sizeof(*tt))))
+               return_NULL;
+
+       tt->klen = klen;
+       tt->mem = mem;
+       return tt;
+}
diff --git a/device_mapper/regex/ttree.h b/device_mapper/regex/ttree.h
new file mode 100644 (file)
index 0000000..8b62181
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.  
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DM_TTREE_H
+#define _DM_TTREE_H
+
+struct ttree;
+
+struct ttree *ttree_create(struct dm_pool *mem, unsigned int klen);
+
+void *ttree_lookup(struct ttree *tt, unsigned *key);
+int ttree_insert(struct ttree *tt, unsigned *key, void *data);
+
+#endif
similarity index 99%
rename from device-mapper/vdo/status.c
rename to device_mapper/vdo/status.c
index 1739a7e70ca64633b079fd996005e64922571806..4bd15e4624b04707920981bd46f65e23dc0eb14a 100644 (file)
@@ -1,7 +1,7 @@
 #include "target.h"
 
 // For DM_ARRAY_SIZE!
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 #include <ctype.h>
 #include <stdlib.h>
index b797c78d878d9689bf7acc8c1dccd1b4ea3ad91b..146aae152c6a6e2269c6d7062ba834cc61eca0ba 100644 (file)
@@ -16,7 +16,7 @@
 #ifndef _LVM_CONFIG_H
 #define _LVM_CONFIG_H
 
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 #include "lib/device/device.h"
 
 /* 16 bits: 3 bits for major, 4 bits for minor, 9 bits for patchlevel */
index aa6bb7aceb50040c1189d19286af1dab249dbc58..d213758e7034f1b07178a7e6504fba4b091fce8f 100644 (file)
@@ -15,7 +15,7 @@
 #define _GNU_SOURCE
 
 #include "lib/device/bcache.h"
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
 #include "lib/log/log.h"
 
 #include <errno.h>
index cf3d6688d1418878142b88a769d60ba19f29d6b8..8f328c76c0abe786b0e2ed81653ec61b30a59375 100644 (file)
@@ -15,7 +15,7 @@
 #ifndef BCACHE_H
 #define BCACHE_H
 
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 #include <linux/fs.h>
 #include <stdint.h>
index f3c23260d7317c179a80ed226d74b3ba9a094acf..38026a38109a3e9dfb11de2d8ba2e9c1b45d7c62 100644 (file)
@@ -17,7 +17,7 @@
 #include "lib/datastruct/btree.h"
 #include "lib/config/config.h"
 #include "lib/commands/toolcontext.h"
-#include "libdm/misc/dm-ioctl.h"
+#include "device_mapper/misc/dm-ioctl.h"
 #include "lib/misc/lvm-string.h"
 
 #ifdef UDEV_SYNC_SUPPORT
index 23d2bd759978055d90ca1bc2c6baa6280dcc86bd..d5d91ce0f98a57686b08307c2dd311ad8247e980 100644 (file)
@@ -16,7 +16,7 @@
 #define _LVM_PV_H
 
 #include "lib/uuid/uuid.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 struct device;
 struct format_type;
index b37c01d8333071629152f1496fa043ff54832aa8..3d24bba7dcc624031ea7f5f5bccde6f1d275667e 100644 (file)
@@ -16,7 +16,7 @@
 #define _LVM_VG_H
 
 #include "lib/uuid/uuid.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 struct cmd_context;
 struct format_instance;
index 13c7110f79f74585ea75d601c4cd45892745cd92..3ae3aacc089285daaca9ded09a91ca0e2d6a68d4 100644 (file)
@@ -79,7 +79,7 @@
 
 
 #include "lib/misc/intl.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 #include "lib/misc/util.h"
 
 #ifdef DM
index f52705e9a80f75ef382c1e03d20a766de466ae0b..38b61110c3ab3ae2fd8a1fc0a944dc6083c45722 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef _LVM_PROPERTIES_H
 #define _LVM_PROPERTIES_H
 
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 #include "lib/metadata/metadata.h"
 #include "lib/report/report.h"
 #include "lib/properties/prop_common.h"
index 12baa998b1d3a5dae01aef7f331d14d8ac559006..f8526d75c1d4bc28ff678b748d006ecd098cd812 100644 (file)
@@ -17,7 +17,7 @@
 #include "tools/tool.h"
 
 #include "libdaemon/client/daemon-io.h"
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
 
 #include <math.h>  /* fabs() */
 #include <float.h> /* DBL_EPSILON */
index b5dfbf1309683ec716812d61e4431664c34d6619..28d7c04a32faed48d1e85180c804b5e3d16abcdf 100644 (file)
@@ -18,7 +18,7 @@
 
 #include "libdaemon/client/daemon-io.h"
 #include "libdaemon/client/daemon-client.h"
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
 
 #include <sys/un.h>
 #include <sys/socket.h>
index 66ec39513d3586b54d82d02473b0433e448f2d0e..eeef0801dab28cabad620a666528ff0da5d60fd6 100644 (file)
@@ -52,7 +52,7 @@ CFLOW_LIST_TARGET = libdevmapper.cflow
 EXPORTED_HEADER = $(srcdir)/libdevmapper.h
 EXPORTED_FN_PREFIX = dm
 
-include $(top_builddir)/make.tmpl
+include $(top_builddir)/libdm/make.tmpl
 
 PROGS_CFLAGS = $(UDEV_CFLAGS)
 
diff --git a/libdm/make.tmpl.in b/libdm/make.tmpl.in
new file mode 100644 (file)
index 0000000..7e3f4a4
--- /dev/null
@@ -0,0 +1,578 @@
+# @configure_input@
+#
+# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+# Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+ifeq ($(V),1)
+       Q=
+else
+       Q=@
+endif
+
+SHELL = @SHELL@
+
+@SET_MAKE@
+
+# Allow environment to override any built-in default value for CC.
+# If there is a built-in default, CC is NOT set to @CC@ here.
+CC ?= @CC@
+
+# If $(CC) holds the usual built-in default value of 'cc' then replace it with
+# the configured value.
+# (To avoid this and force the use of 'cc' from the environment, supply its
+# full path.)
+ifeq ($(CC), cc)
+  CC = @CC@
+endif
+
+RANLIB = @RANLIB@
+INSTALL = @INSTALL@
+MKDIR_P = @MKDIR_P@
+MSGFMT = @MSGFMT@
+LCOV = @LCOV@
+GENHTML = @GENHTML@
+LN_S = @LN_S@
+SED = @SED@
+CFLOW_CMD = @CFLOW_CMD@
+AWK = @AWK@
+CHMOD = @CHMOD@
+EGREP = @EGREP@
+GREP = @GREP@
+SORT = @SORT@
+WC = @WC@
+AR = @AR@
+RM = rm -f
+
+PYTHON2 = @PYTHON2@
+PYTHON3 = @PYTHON3@
+PYCOMPILE = $(top_srcdir)/autoconf/py-compile
+
+LIBS = @LIBS@
+# Extra libraries always linked with static binaries
+STATIC_LIBS = $(SELINUX_LIBS) $(UDEV_LIBS) $(BLKID_LIBS)
+DEFS += @DEFS@
+# FIXME set this only where it's needed, not globally?
+CFLAGS ?= @COPTIMISE_FLAG@ @CFLAGS@
+LDFLAGS ?= @LDFLAGS@
+CLDFLAGS += @CLDFLAGS@
+ELDFLAGS += @ELDFLAGS@
+LDDEPS += @LDDEPS@
+LIB_SUFFIX = @LIB_SUFFIX@
+LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS)
+DL_LIBS = @DL_LIBS@
+RT_LIBS = @RT_LIBS@
+M_LIBS = @M_LIBS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+READLINE_LIBS = @READLINE_LIBS@
+SELINUX_LIBS = @SELINUX_LIBS@
+UDEV_CFLAGS = @UDEV_CFLAGS@
+UDEV_LIBS = @UDEV_LIBS@
+BLKID_CFLAGS = @BLKID_CFLAGS@
+BLKID_LIBS = @BLKID_LIBS@
+SYSTEMD_LIBS = @SYSTEMD_LIBS@
+VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
+
+# Setup directory variables
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+udev_prefix = @udev_prefix@
+sysconfdir = @sysconfdir@
+rootdir = $(DESTDIR)/
+bindir = $(DESTDIR)@bindir@
+confdir = $(DESTDIR)@CONFDIR@/lvm
+profiledir = $(confdir)/@DEFAULT_PROFILE_SUBDIR@
+includedir = $(DESTDIR)@includedir@
+libdir = $(DESTDIR)@libdir@
+libexecdir = $(DESTDIR)@libexecdir@
+usrlibdir = $(DESTDIR)@usrlibdir@
+sbindir = $(DESTDIR)@sbindir@
+usrsbindir = $(DESTDIR)@usrsbindir@
+datarootdir = @datarootdir@
+datadir = $(DESTDIR)@datadir@
+infodir = $(DESTDIR)@infodir@
+mandir = $(DESTDIR)@mandir@
+localedir = $(DESTDIR)@localedir@
+staticdir = $(DESTDIR)@STATICDIR@
+udevdir = $(DESTDIR)@udevdir@
+pkgconfigdir = $(usrlibdir)/pkgconfig
+initdir = $(DESTDIR)$(sysconfdir)/rc.d/init.d
+dbusconfdir = $(DESTDIR)$(sysconfdir)/dbus-1/system.d
+dbusservicedir = $(datadir)/dbus-1/system-services
+systemd_unit_dir = $(DESTDIR)@systemdsystemunitdir@
+systemd_generator_dir = $(DESTDIR)$(SYSTEMD_GENERATOR_DIR)
+systemd_dir = $(DESTDIR)@systemdutildir@
+tmpfiles_dir = $(DESTDIR)@tmpfilesdir@
+ocf_scriptdir = $(DESTDIR)@OCFDIR@
+pythonprefix = $(DESTDIR)$(prefix)
+
+# N.B. No $(DESTDIR) prefix here.
+python2dir = @PYTHON2DIR@
+python3dir = @PYTHON3DIR@
+
+USRLIB_RELPATH = $(shell echo $(abspath $(usrlibdir) $(libdir)) | \
+  $(AWK) -f $(top_srcdir)/scripts/relpath.awk)
+
+SYSTEMD_GENERATOR_DIR = @systemdutildir@/system-generators
+DEFAULT_SYS_DIR = @DEFAULT_SYS_DIR@
+DEFAULT_ARCHIVE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_ARCHIVE_SUBDIR@
+DEFAULT_BACKUP_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_BACKUP_SUBDIR@
+DEFAULT_CACHE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_CACHE_SUBDIR@
+DEFAULT_PROFILE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_PROFILE_SUBDIR@
+DEFAULT_LOCK_DIR = @DEFAULT_LOCK_DIR@
+DEFAULT_RUN_DIR = @DEFAULT_RUN_DIR@
+DEFAULT_PID_DIR = @DEFAULT_PID_DIR@
+DEFAULT_MANGLING = @MANGLING@
+
+# Setup vpath search paths for some suffixes
+vpath %.c $(srcdir)
+vpath %.cpp $(srcdir)
+vpath %.in $(srcdir)
+vpath %.po $(srcdir)
+vpath %.exported_symbols $(srcdir)
+
+interface = @interface@
+interfacebuilddir = $(top_builddir)/libdm/$(interface)
+rpmbuilddir = $(abs_top_builddir)/build
+
+# The number of jobs to run, if blank, defaults to the make standard
+ifndef MAKEFLAGS
+MAKEFLAGS = @JOBS@
+endif
+
+# Handle installation of files
+ifeq ("@WRITE_INSTALL@", "yes")
+# leaving defaults
+M_INSTALL_SCRIPT =
+M_INSTALL_DATA = -m 644
+else
+M_INSTALL_PROGRAM = -m 555
+M_INSTALL_DATA = -m 444
+endif
+INSTALL_PROGRAM = $(INSTALL) $(M_INSTALL_PROGRAM) $(STRIP)
+INSTALL_DATA = $(INSTALL) -p $(M_INSTALL_DATA)
+INSTALL_WDATA = $(INSTALL) -p -m 644
+
+INSTALL_DIR = $(INSTALL) -m 755 -d
+INSTALL_ROOT_DIR = $(INSTALL) -m 700 -d
+INSTALL_ROOT_DATA = $(INSTALL) -m 600
+INSTALL_SCRIPT = $(INSTALL) -p $(M_INSTALL_PROGRAM)
+
+.SUFFIXES:
+.SUFFIXES: .c .cpp .d .o .so .a .po .pot .mo .dylib
+
+ifeq ("$(notdir $(CC))", "gcc")
+WFLAGS +=\
+ -Wall\
+ -Wcast-align\
+ -Wfloat-equal\
+ -Wformat-security\
+ -Winline\
+ -Wmissing-format-attribute\
+ -Wmissing-include-dirs\
+ -Wmissing-noreturn\
+ -Wpointer-arith\
+ -Wredundant-decls\
+ -Wshadow\
+ -Wundef\
+ -Wwrite-strings
+
+WCFLAGS +=\
+ -Wmissing-declarations\
+ -Wmissing-prototypes\
+ -Wnested-externs\
+ -Wold-style-definition\
+ -Wstrict-prototypes\
+ -Wuninitialized
+
+ifeq ("@HAVE_WJUMP@", "yes")
+WCFLAGS += -Wjump-misses-init
+endif
+
+ifeq ("@HAVE_WCLOBBERED@", "yes")
+WFLAGS +=\
+ -Wclobbered\
+ -Wempty-body\
+ -Wignored-qualifiers\
+ -Wlogical-op\
+ -Wtype-limits
+
+WCFLAGS +=\
+ -Wmissing-parameter-type\
+ -Wold-style-declaration\
+ -Woverride-init
+endif
+
+ifeq ("@HAVE_WSYNCNAND@", "yes")
+WFLAGS += -Wsync-nand
+endif
+endif
+
+ifneq ("@STATIC_LINK@", "yes")
+ifeq ("@HAVE_PIE@", "yes")
+ifeq ("@HAVE_FULL_RELRO@", "yes")
+  EXTRA_EXEC_CFLAGS += -fPIE
+  EXTRA_EXEC_LDFLAGS += -Wl,-z,relro,-z,now -pie -fPIE
+  CLDFLAGS += -Wl,-z,relro
+endif
+endif
+endif
+
+#WFLAGS += -W -Wno-sign-compare -Wno-unused-parameter -Wno-missing-field-initializers
+#WFLAGS += -Wsign-compare -Wunused-parameter -Wmissing-field-initializers
+#WFLAGS += -Wconversion -Wbad-function-cast -Wcast-qual -Waggregate-return -Wpacked
+#WFLAGS += -pedantic -std=gnu99
+#DEFS += -DDEBUG_CRC32
+
+#
+# Avoid recursive extension of CFLAGS
+# by checking whether CFLAGS already has fPIC string
+#
+ifeq (,$(findstring fPIC,$(CFLAGS)))
+
+CFLAGS += -fPIC
+
+ifeq ("@DEBUG@", "yes")
+ifeq (,$(findstring -g,$(CFLAGS)))
+  CFLAGS += -g
+endif
+  CFLAGS += -fno-omit-frame-pointer
+  DEFS += -DDEBUG
+  # memory debugging is not thread-safe yet
+  ifneq ("@BUILD_DMEVENTD@", "yes")
+  ifneq ("@BUILD_DMFILEMAPD@", "yes")
+  ifneq ("@BUILD_LVMLOCKD@", "yes")
+  ifneq ("@BUILD_LVMPOLLD@", "yes")
+  ifneq ("@BUILD_LVMETAD@", "yes")
+  ifeq ("@CLVMD@", "none")
+    DEFS += -DDEBUG_MEM
+  endif
+  endif
+  endif
+  endif
+  endif
+  endif
+endif
+
+# end of fPIC protection
+endif
+
+DEFS += -D_BUILDING_LVM
+
+LDFLAGS += -L$(top_builddir)/libdm -L$(top_builddir)/lib
+CLDFLAGS += -L$(top_builddir)/libdm -L$(top_builddir)/lib
+
+DAEMON_LIBS = -ldaemonclient
+LDFLAGS += -L$(top_builddir)/libdaemon/client
+CLDFLAGS += -L$(top_builddir)/libdaemon/client
+
+ifeq ("@BUILD_DMEVENTD@", "yes")
+  DMEVENT_LIBS = -ldevmapper-event
+  LDFLAGS += -L$(top_builddir)/daemons/dmeventd
+  CLDFLAGS += -L$(top_builddir)/daemons/dmeventd
+endif
+
+# Combination of DEBUG_POOL and DEBUG_ENFORCE_POOL_LOCKING is not suppored.
+#DEFS += -DDEBUG_POOL
+# Default pool locking is using the crc checksum. With mprotect memory
+# enforcing compilation faulty memory write could be easily found.
+#DEFS += -DDEBUG_ENFORCE_POOL_LOCKING
+#DEFS += -DBOUNDS_CHECK
+
+# LVM is not supposed to use mmap while devices are suspended.
+# This code causes a core dump if gets called.
+#DEFS += -DDEBUG_MEMLOCK
+
+#CFLAGS += -pg
+#LDFLAGS += -pg
+
+STRIP=
+#STRIP = -s
+
+LVM_VERSION := $(shell cat $(top_srcdir)/VERSION)
+
+LIB_VERSION_LVM := $(shell $(AWK) -F '.' '{printf "%s.%s",$$1,$$2}' $(top_srcdir)/VERSION)
+
+LIB_VERSION_DM := $(shell $(AWK) -F '.' '{printf "%s.%s",$$1,$$2}' $(top_srcdir)/VERSION_DM)
+
+LIB_VERSION_APP := $(shell $(AWK) -F '[(). ]' '{printf "%s.%s",$$1,$$4}' $(top_srcdir)/VERSION)
+
+INCLUDES += -I$(top_srcdir) -I$(srcdir) -I$(top_builddir)/include
+
+DEPS = $(top_builddir)/make.tmpl $(top_srcdir)/VERSION \
+       $(top_builddir)/Makefile
+
+OBJECTS = $(SOURCES:%.c=%.o) $(CXXSOURCES:%.cpp=%.o)
+POTFILES = $(SOURCES:%.c=%.pot)
+
+.PHONY: all pofile distclean clean cleandir cflow device-mapper
+.PHONY: install install_cluster install_device-mapper install_lvm2
+.PHONY: install_dbus_service
+.PHONY: install_lib_shared install_dm_plugin install_lvm2_plugin
+.PHONY: install_ocf install_systemd_generators install_all_man all_man man help
+.PHONY: python_bindings install_python_bindings
+.PHONY: $(SUBDIRS) $(SUBDIRS.install) $(SUBDIRS.clean) $(SUBDIRS.distclean)
+.PHONY: $(SUBDIRS.pofile) $(SUBDIRS.install_cluster) $(SUBDIRS.cflow)
+.PHONY: $(SUBDIRS.device-mapper) $(SUBDIRS.install-device-mapper)
+.PHONY: $(SUBDIRS.generate) generate
+
+SUBDIRS.device-mapper := $(SUBDIRS:=.device-mapper)
+SUBDIRS.install := $(SUBDIRS:=.install)
+SUBDIRS.install_cluster := $(SUBDIRS:=.install_cluster)
+SUBDIRS.install_device-mapper := $(SUBDIRS:=.install_device-mapper)
+SUBDIRS.install_lvm2 := $(SUBDIRS:=.install_lvm2)
+SUBDIRS.install_ocf := $(SUBDIRS:=.install_ocf)
+SUBDIRS.pofile := $(SUBDIRS:=.pofile)
+SUBDIRS.cflow := $(SUBDIRS:=.cflow)
+SUBDIRS.clean := $(SUBDIRS:=.clean)
+SUBDIRS.distclean := $(SUBDIRS:=.distclean)
+
+TARGETS += $(LIB_SHARED) $(LIB_STATIC)
+
+all: $(SUBDIRS) $(TARGETS)
+
+install: all $(SUBDIRS.install)
+install_cluster: all $(SUBDIRS.install_cluster)
+install_device-mapper: $(SUBDIRS.install_device-mapper)
+install_lvm2: $(SUBDIRS.install_lvm2)
+install_ocf: $(SUBDIRS.install_ocf)
+cflow: $(SUBDIRS.cflow)
+
+$(SUBDIRS): $(SUBDIRS.device-mapper)
+       $(MAKE) -C $@
+
+$(SUBDIRS.device-mapper):
+       $(MAKE) -C $(@:.device-mapper=) device-mapper
+
+$(SUBDIRS.install): $(SUBDIRS)
+       $(MAKE) -C $(@:.install=) install
+
+$(SUBDIRS.install_cluster): $(SUBDIRS)
+       $(MAKE) -C $(@:.install_cluster=) install_cluster
+
+$(SUBDIRS.install_device-mapper): device-mapper
+       $(MAKE) -C $(@:.install_device-mapper=) install_device-mapper
+
+$(SUBDIRS.install_lvm2): $(SUBDIRS)
+       $(MAKE) -C $(@:.install_lvm2=) install_lvm2
+
+$(SUBDIRS.install_ocf):
+       $(MAKE) -C $(@:.install_ocf=) install_ocf
+
+$(SUBDIRS.clean):
+       -$(MAKE) -C $(@:.clean=) clean
+
+$(SUBDIRS.distclean):
+       -$(MAKE) -C $(@:.distclean=) distclean
+
+$(SUBDIRS.cflow):
+       $(MAKE) -C $(@:.cflow=) cflow
+
+ifeq ("@INTL@", "yes")
+pofile: $(SUBDIRS.pofile) $(POTFILES)
+
+$(SUBDIRS.pofile):
+       $(MAKE) -C $(@:.pofile=) pofile
+endif
+
+$(SUBDIRS.generate):
+       $(MAKE) -C $(@:.generate=) generate
+
+ifneq ("$(CFLOW_LIST_TARGET)", "")
+CLEAN_CFLOW += $(CFLOW_LIST_TARGET)
+$(CFLOW_LIST_TARGET): $(CFLOW_LIST)
+       echo "CFLOW_SOURCES += $(addprefix \
+               \$$(top_srcdir)$(subst $(top_srcdir),,$(srcdir))/, $(CFLOW_LIST))" > $@
+cflow: $(CFLOW_LIST_TARGET)
+endif
+
+ifneq ("$(CFLOW_TARGET)", "")
+CLEAN_CFLOW += \
+       $(CFLOW_TARGET).cflow \
+       $(CFLOW_TARGET).xref \
+       $(CFLOW_TARGET).tree \
+       $(CFLOW_TARGET).rtree \
+       $(CFLOW_TARGET).rxref
+
+ifneq ("$(CFLOW_CMD)", "")
+CFLOW_FLAGS +=\
+ --cpp="$(CC) -E" \
+ --symbol _ISbit:wrapper \
+ --symbol __attribute__:wrapper \
+ --symbol __const__:wrapper \
+ --symbol __const:type \
+ --symbol __restrict:type \
+ --symbol __extension__:wrapper \
+ --symbol __nonnull:wrapper \
+ --symbol __nothrow__:wrapper \
+ --symbol __pure__:wrapper \
+ --symbol __REDIRECT:wrapper \
+ --symbol __REDIRECT_NTH:wrapper \
+ --symbol __wur:wrapper \
+ -I$(top_srcdir)/libdm \
+ -I$(top_srcdir)/libdm/ioctl \
+ -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2/ \
+ $(INCLUDES) $(DEFS)
+
+$(CFLOW_TARGET).cflow: $(CFLOW_SOURCES)
+       $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) $(CFLOW_SOURCES)
+$(CFLOW_TARGET).rxref: $(CFLOW_SOURCES)
+       $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) -r --omit-arguments $(CFLOW_SOURCES)
+$(CFLOW_TARGET).tree: $(CFLOW_SOURCES)
+       $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) --omit-arguments -T -b $(CFLOW_SOURCES)
+$(CFLOW_TARGET).xref: $(CFLOW_SOURCES)
+       $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) --omit-arguments -x $(CFLOW_SOURCES)
+#$(CFLOW_TARGET).rtree:  $(CFLOW_SOURCES)
+#      $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) -r --omit-arguments -T -b $(CFLOW_SOURCES)
+cflow: $(CFLOW_TARGET).cflow $(CFLOW_TARGET).tree $(CFLOW_TARGET).rxref $(CFLOW_TARGET).xref
+#$(CFLOW_TARGET).rtree
+endif
+endif
+
+.LIBPATTERNS = lib%.so lib%.a
+
+DEPFLAGS=-MT $@ -MMD -MP -MF $*.d
+
+# still needed in 2018 for 32bit builds
+DEFS+=-D_FILE_OFFSET_BITS=64
+
+%.o: %.c
+       @echo "    [CC] $<"
+       $(Q) $(CC) $(DEPFLAGS) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(WCFLAGS) $(CFLAGS) $(CFLAGS_$@) $< -o $@
+
+%.o: %.cpp
+       @echo "    [CXX] $<"
+       $(Q) $(CXX) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(CXXFLAGS) $(CXXFLAGS_$@) $< -o $@
+
+%.pot: %.c Makefile
+       @echo "    [CC] $@"
+       $(Q) $(CC) -E $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) -include $(top_builddir)/include/pogen.h $(DEFS) $(WFLAGS) $(CFLAGS) $< >$@
+
+%.so: %.o
+       @echo "    [CC] $<"
+       $(Q) $(CC) -c $(CFLAGS) $(CLDFLAGS) $< $(LIBS) -o $@
+
+ifneq (,$(LIB_SHARED))
+
+TARGETS += $(LIB_SHARED).$(LIB_VERSION)
+$(LIB_SHARED).$(LIB_VERSION): $(OBJECTS) $(LDDEPS)
+       @echo "    [CC] $@"
+ifeq ("@LIB_SUFFIX@","so")
+       $(Q) $(CC) -shared -Wl,-soname,$(notdir $@) \
+               $(CFLAGS) $(CLDFLAGS) $(OBJECTS) $(LIBS) -o $@
+endif
+ifeq ("@LIB_SUFFIX@","dylib")
+       $(Q) $(CC) -dynamiclib -dylib_current_version,$(LIB_VERSION) \
+               $(CFLAGS) $(CLDFLAGS) $(OBJECTS) $(LIBS) -o $@
+endif
+
+$(LIB_SHARED): $(LIB_SHARED).$(LIB_VERSION)
+       @echo "    [LN] $<"
+       $(Q) $(LN_S) -f $(<F) $@
+
+CLEAN_TARGETS += $(LDDEPS) .exported_symbols_generated
+
+install_lib_shared: $(LIB_SHARED)
+       @echo "    [INSTALL] $<"
+       $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/$(<F).$(LIB_VERSION)
+       $(Q) $(INSTALL_DIR) $(usrlibdir)
+       $(Q) $(LN_S) -f $(USRLIB_RELPATH)$(<F).$(LIB_VERSION) $(usrlibdir)/$(<F)
+
+# FIXME: plugins are installed to subdirs
+#        and for compatibility links in libdir are created
+#        when the code is fixed links could be removed.
+install_dm_plugin: $(LIB_SHARED)
+       @echo "    [INSTALL] $<"
+       $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/device-mapper/$(<F)
+       $(Q) $(LN_S) -f device-mapper/$(<F) $(libdir)/$(<F)
+
+install_lvm2_plugin: $(LIB_SHARED)
+       @echo "    [INSTALL] $<"
+       $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/lvm2/$(<F)
+       $(Q) $(LN_S) -f lvm2/$(<F) $(libdir)/$(<F)
+       $(Q) $(LN_S) -f $(<F) $(libdir)/$(<F).$(LIB_VERSION)
+endif
+
+$(LIB_STATIC): $(OBJECTS)
+       @echo "    [AR] $@"
+       $(Q) $(RM) $@
+       $(Q) $(AR) rsv $@ $(OBJECTS) > /dev/null
+
+%.d:
+.PRECIOUS: %.d
+       
+%.mo: %.po
+       @echo "    [MSGFMT] $<"
+       $(Q) $(MSGFMT) -o $@ $<
+
+CLEAN_TARGETS += \
+       $(SOURCES:%.c=%.d) $(SOURCES:%.c=%.gcno) $(SOURCES:%.c=%.gcda) \
+       $(SOURCES2:%.c=%.o) $(SOURCES2:%.c=%.d) $(SOURCES2:%.c=%.gcno) $(SOURCES2:%.c=%.gcda) \
+       $(POTFILES) $(CLEAN_CFLOW)
+
+cleandir:
+ifneq (,$(firstword $(CLEAN_DIRS)))
+       $(RM) -r $(CLEAN_DIRS)
+endif
+       $(RM) $(OBJECTS) $(TARGETS) $(CLEAN_TARGETS) core
+
+clean: $(SUBDIRS.clean) cleandir
+
+distclean: cleandir $(SUBDIRS.distclean)
+ifneq (,$(firstword $(DISTCLEAN_DIRS)))
+       $(RM) -r $(DISTCLEAN_DIRS)
+endif
+       $(RM) $(DISTCLEAN_TARGETS) Makefile
+
+.exported_symbols_generated: $(EXPORTED_HEADER) .exported_symbols $(DEPS)
+       $(Q) set -e; \
+       ( cat $(srcdir)/.exported_symbols; \
+         if test -n "$(EXPORTED_HEADER)"; then \
+               $(CC) -E -P $(INCLUDES) $(DEFS) $(EXPORTED_HEADER) | \
+               $(SED) -ne "/^typedef|}/!s/.*[ *]\($(EXPORTED_FN_PREFIX)_[a-z0-9_]*\)(.*/\1/p"; \
+         fi \
+       ) > $@
+
+EXPORTED_UC := $(shell echo $(EXPORTED_FN_PREFIX) | tr '[a-z]' '[A-Z]')
+EXPORTED_SYMBOLS := $(wildcard $(srcdir)/.exported_symbols.Base $(srcdir)/.exported_symbols.$(EXPORTED_UC)_[0-9_]*[0-9])
+
+.export.sym: .exported_symbols_generated $(EXPORTED_SYMBOLS)
+ifeq (,$(firstword $(EXPORTED_SYMBOLS)))
+       $(Q) set -e; (echo "Base {"; echo "     global:";\
+                $(SED) "s/^/           /;s/$$/;/" $<;\
+                echo "};";\
+                echo "Local {"; echo " local:"; echo "         *;"; echo "};";\
+                ) > $@
+else
+       $(Q) set -e;\
+       R=$$($(SORT) $^ | uniq -u);\
+       test -z "$$R" || { echo "Mismatch between symbols in shared library and lists in .exported_symbols.* files: $$R"; false; } ;\
+       ( for i in $$(echo $(EXPORTED_SYMBOLS) | tr ' ' '\n' | $(SORT) -rnt_ -k5 ); do\
+               echo "$${i##*.} {"; echo "      global:";\
+               $(SED) "s/^/            /;s/$$/;/" $$i;\
+               echo "};";\
+         done;\
+         echo "Local {"; echo "        local:"; echo "         *;"; echo "};";\
+       ) > $@
+endif
+
+ifeq ("@USE_TRACKING@","yes")
+ifeq (,$(findstring $(MAKECMDGOALS),cscope.out cflow clean distclean lcov \
+ help check check_local check_cluster check_lvmetad check_lvmpolld))
+    ifdef SOURCES
+       -include $(SOURCES:.c=.d) $(CXXSOURCES:.cpp=.d)
+    endif
+    ifdef SOURCES2
+       -include $(SOURCES2:.c=.d)
+    endif
+endif
+endif
index be3049a9ea8dab835b7bb89c9ad8325401da5dbd..210ee37600e5615aea8330015dacb8a1a6ea6ba0 100644 (file)
@@ -43,7 +43,7 @@ LDDEPS += $(top_builddir)/lib/liblvm-internal.a
 include $(top_builddir)/make.tmpl
 
 LDFLAGS += -L$(top_builddir)/lib -L$(top_builddir)/daemons/dmeventd
-LIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio
+LIBS += $(LVMINTERNAL_LIBS) -laio
 
 .PHONY: install_dynamic install_static install_include install_pkgconfig
 
index 62f91ce50105170494151f981cfb746db113dfcd..b83a4430506aea3718a9ab11686d9eba724ef24e 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef _LVM2APP_MISC_H
 #define _LVM2APP_MISC_H
 
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 #include "liblvm/lvm2app.h"
 #include "lib/metadata/metadata-exported.h"
 #include "lib/commands/toolcontext.h"
index 1997596679c6790321b2ac8b3000afa35dbbda25..5d274a6010e5a4409951f8a4cf7b8b81d55f2a1a 100644 (file)
@@ -13,7 +13,7 @@
  */
 
 #include "lvm_prop.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 #include "lib/metadata/metadata.h"
 
 /* lv create parameters */
index 7e3f4a481f7c923e2090aea7ba18a7842236f124..c4d9d583ff575ada1f4843ad97b4f5bb14da8859 100644 (file)
@@ -68,7 +68,9 @@ CLDFLAGS += @CLDFLAGS@
 ELDFLAGS += @ELDFLAGS@
 LDDEPS += @LDDEPS@
 LIB_SUFFIX = @LIB_SUFFIX@
-LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS)
+LVMINTERNAL_LIBS=\
+       -llvm-internal \
+       $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS)
 DL_LIBS = @DL_LIBS@
 RT_LIBS = @RT_LIBS@
 M_LIBS = @M_LIBS@
@@ -338,7 +340,7 @@ SUBDIRS.distclean := $(SUBDIRS:=.distclean)
 
 TARGETS += $(LIB_SHARED) $(LIB_STATIC)
 
-all: $(SUBDIRS) $(TARGETS)
+all: $(top_builddir)/device_mapper/libdevice-mapper.a $(SUBDIRS) $(TARGETS)
 
 install: all $(SUBDIRS.install)
 install_cluster: all $(SUBDIRS.install_cluster)
@@ -347,7 +349,7 @@ install_lvm2: $(SUBDIRS.install_lvm2)
 install_ocf: $(SUBDIRS.install_ocf)
 cflow: $(SUBDIRS.cflow)
 
-$(SUBDIRS): $(SUBDIRS.device-mapper)
+$(SUBDIRS): $(SUBDIRS.device-mapper) $(top_builddir)/device_mapper/libdevice-mapper.a
        $(MAKE) -C $@
 
 $(SUBDIRS.device-mapper):
index 720ae9f0e1bb6ccafe8e46e6d753ab0afc475076..eeac88156eb57bedd3e3f314d851b17eb29271bf 100644 (file)
@@ -23,12 +23,12 @@ endif
 include $(top_builddir)/make.tmpl
 
 ifeq ("@APPLIB@", "yes")
-       DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so $(top_builddir)/libdm/libdevmapper.so
+       DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so
        LDFLAGS += -L$(top_builddir)/liblvm
 ifeq ("@BUILD_DMEVENTD@", "yes")
        LDFLAGS += -Wl,-rpath-link,$(top_builddir)/daemons/dmeventd
 endif
-       LVMLIBS = @LVM2APP_LIB@ -ldevmapper -laio
+       LVMLIBS = @LVM2APP_LIB@ -laio
 endif
 
 LVM_SCRIPTS = lvmdump.sh lvmconf.sh
index e953675a0d6389a79de2d811457e1bc6363e1929..6661149d9145a183a6a78ed914eaf4000ae58352 100644 (file)
@@ -41,9 +41,12 @@ endif
 include $(top_builddir)/make.tmpl
 
 DEFS += -D_REENTRANT
-DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so $(top_builddir)/libdm/libdevmapper.so
-LDFLAGS += -L$(top_builddir)/liblvm -L$(top_builddir)/daemons/dmeventd
-LIBS += @LVM2APP_LIB@ $(DMEVENT_LIBS) -ldevmapper
+DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so
+LDFLAGS+=\
+       -L$(top_builddir)/liblvm \
+       -L$(top_builddir)/daemons/dmeventd \
+       -L$(top_builddir)/device_mapper/libdevice-mapper.a
+LIBS += @LVM2APP_LIB@ $(DMEVENT_LIBS)
 
 %.t: %.o $(DEPLIBS)
        $(CC) -o $@ $(<) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) $(LIBS)
index 9d18608828e2ea89f4e4508ad494d0214a6426e7..54b7d723cb2746f994b32896561bb5cfbb085b74 100644 (file)
@@ -12,7 +12,7 @@
 
 UNIT_SOURCE=\
        base/data-struct/radix-tree.c \
-       device-mapper/vdo/status.c \
+       device_mapper/vdo/status.c \
        \
        test/unit/bcache_t.c \
        test/unit/bcache_utils_t.c \
@@ -32,9 +32,9 @@ UNIT_SOURCE=\
 UNIT_DEPENDS=$(subst .c,.d,$(UNIT_SOURCE))
 UNIT_OBJECTS=$(UNIT_SOURCE:%.c=%.o)
 CLEAN_TARGETS+=$(UNIT_DEPENDS) $(UNIT_OBJECTS)
-UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio
+UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -laio
 
-test/unit/unit-test: $(UNIT_OBJECTS) libdm/libdevmapper.$(LIB_SUFFIX) lib/liblvm-internal.a
+test/unit/unit-test: $(UNIT_OBJECTS) device_mapper/libdevice-mapper.a lib/liblvm-internal.a
        @echo "    [LD] $@"
        $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) -L$(top_builddir)/libdm \
              -o $@ $(UNIT_OBJECTS) $(UNIT_LDLIBS)
index 66e0d321df603db3dd512fd4b23ae995db6b9926..9b18fcbf05fe4e7b7c3ab2c34426b37fb46d2dcb 100644 (file)
@@ -13,7 +13,7 @@
  */
 
 #include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 enum {
         NR_BITS = 137
index e988706d234e986ef7c8dade1c0f26305b1c035d..21af55146d4d5491da977bba9560625eceac5dea 100644 (file)
@@ -13,7 +13,7 @@
  */
 
 #include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 static void *_mem_init(void)
 {
index 5a4951e1a0f62d3c8e92048016d86c9f3f9fc1e4..8a9948f72d0de537782884cbb52760b1a53cb004 100644 (file)
@@ -13,7 +13,7 @@
  */
 
 #include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 static void test_dmlist_splice(void *fixture)
 {
index 43fb0bf8266b6ee525de04306557ac90298def80..f50dd75c459a6971743818ce077a2f414044bdbe 100644 (file)
@@ -13,7 +13,7 @@
  */
 
 #include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 static void *_mem_init(void)
 {
index a90a6e2bdc1decc0712776b9cacde228eb2f0e51..0a8a5f27bf2d825475a7c9b5c19e62d7a0e98eb2 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef TEST_UNIT_FRAMEWORK_H
 #define TEST_UNIT_FRAMEWORK_H
 
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 #include <stdbool.h>
 #include <stdint.h>
index 8405a347f89f264c1523e2722fc442297c3e2a57..296c78ad111436b5d8f840a34738435b76203d53 100644 (file)
@@ -14,7 +14,7 @@
  */
 
 #include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 #include "matcher_data.h"
 
index fc168d4e1e7a1790e7094e84839aeb796eef852f..43414809aa9eb511d79a6fb84fb4086011901ef6 100644 (file)
@@ -13,7 +13,7 @@
  */
 
 #include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 #include <stdio.h>
 #include <string.h>
index 74886f0bdde1515b1533128181acfe18d39b3659..3557247e8f889a8ac5e7101b7d336f3b3b404644 100644 (file)
@@ -13,7 +13,7 @@
  */
 
 #include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 
 #include <stdio.h>
 #include <string.h>
index 21ecd1ad8c29f43a7d6b1099b1da640c0530365c..9c41887eee8181ff8fe815032a23ef50fcc28859 100644 (file)
@@ -12,7 +12,7 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "device-mapper/vdo/target.h"
+#include "device_mapper/vdo/target.h"
 #include "framework.h"
 #include "units.h"
 
index 103b76732f3d349f50f6fd40d293f2c361e72caa..d4514a289a614f764527943f622463355b7042b3 100644 (file)
@@ -95,7 +95,7 @@ ifeq ("@STATIC_LINK@", "yes")
   INSTALL_CMDLIB_TARGETS += install_cmdlib_static
 endif
 
-LVMLIBS = $(LVMINTERNAL_LIBS) -ldevmapper -laio
+LVMLIBS = $(LVMINTERNAL_LIBS) -laio
 LIB_VERSION = $(LIB_VERSION_LVM)
 
 CLEAN_TARGETS = liblvm2cmd.$(LIB_SUFFIX) $(TARGETS_DM) \
@@ -122,15 +122,15 @@ device-mapper: $(TARGETS_DM)
 
 CFLAGS_dmsetup.o += $(UDEV_CFLAGS) $(EXTRA_EXEC_CFLAGS)
 
-dmsetup: dmsetup.o $(top_builddir)/libdm/libdevmapper.$(LIB_SUFFIX)
+dmsetup: dmsetup.o $(top_builddir)/device_mapper/libdevice-mapper.a
        @echo "    [CC] $@"
        $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) \
-             -o $@ dmsetup.o -ldevmapper $(LIBS)
+             -o $@ $+ $(LIBS) -lm
 
-dmsetup.static: dmsetup.o $(interfacebuilddir)/libdevmapper.a
+dmsetup.static: dmsetup.o $(top_builddir)/device_mapper/libdevice-mapper.a
        @echo "    [CC] $@"
        $(Q) $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) \
-             -o $@ dmsetup.o -ldevmapper $(M_LIBS) $(PTHREAD_LIBS) $(STATIC_LIBS) $(LIBS)
+             -o $@ $+ $(M_LIBS) $(PTHREAD_LIBS) $(STATIC_LIBS) $(LIBS)
 
 all: device-mapper
 
@@ -138,10 +138,10 @@ CFLAGS_lvm.o += $(EXTRA_EXEC_CFLAGS)
 
 INCLUDES += -I$(top_builddir)/tools
 
-lvm: $(OBJECTS) lvm.o $(top_builddir)/lib/liblvm-internal.a
+lvm: $(OBJECTS) lvm.o $(top_builddir)/lib/liblvm-internal.a $(top_builddir)/device_mapper/libdevice-mapper.a
        @echo "    [CC] $@"
-       $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $(OBJECTS) lvm.o \
-               $(LVMLIBS) $(READLINE_LIBS) $(LIBS)
+       $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $+ \
+               $(LVMLIBS) $(READLINE_LIBS) $(LIBS) -lm
 
 DEFS_man-generator.o += -DMAN_PAGE_GENERATOR
 
@@ -157,7 +157,7 @@ ifeq ("@BUILD_LVMETAD@", "yes")
 lvm: $(top_builddir)/libdaemon/client/libdaemonclient.a
 endif
 
-lvm.static: $(OBJECTS) lvm-static.o $(top_builddir)/lib/liblvm-internal.a  $(interfacebuilddir)/libdevmapper.a
+lvm.static: $(OBJECTS) lvm-static.o $(top_builddir)/lib/liblvm-internal.a  $(top_builddir)/device_mapper/libdevice-mapper.a
        @echo "    [CC] $@"
        $(Q) $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) -o $@ \
              $(OBJECTS) lvm-static.o $(LVMLIBS) $(STATIC_LIBS) $(LIBS)
@@ -222,7 +222,6 @@ $(SOURCES:%.c=%.o) $(SOURCES2:%.c=%.o): command-lines-input.h command-count.h cm
 
 ifneq ("$(CFLOW_CMD)", "")
 CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES))
--include $(top_builddir)/libdm/libdevmapper.cflow
 -include $(top_builddir)/lib/liblvm-internal.cflow
 endif
 
index d3080bba8220e338283eb402ae038a07230ba92e..5486ed38a915c890c767417b7bedd29951bbe9d8 100644 (file)
@@ -17,7 +17,7 @@
  */
 #include "tools/tool.h"
 
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
 
 #include <ctype.h>
 #include <dirent.h>
index 656234c34bfe38823798bdd27ed0792349552295..51d530c76576f02809e5f2876b9f6be164bb1f8c 100644 (file)
@@ -24,7 +24,7 @@
 
 #include <unistd.h>
 
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
 #include "lib/misc/util.h"
 
 #endif /* _LVM_TOOL_H */
This page took 1.110645 seconds and 5 git commands to generate.