2 * Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
4 * This file is part of the device-mapper userspace tools.
6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions
8 * of the GNU Lesser General Public License v.2.1.
10 * You should have received a copy of the GNU Lesser General Public License
11 * along with this program; if not, write to the Free Software Foundation,
12 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 #include "libdm-targets.h"
17 #include "libdm-common.h"
22 #include <sys/param.h>
23 #include <sys/utsname.h>
25 #define MAX_TARGET_PARAMSIZE 500000
27 /* FIXME Fix interface so this is used only by LVM */
28 #define UUID_PREFIX "LVM-"
30 #define REPLICATOR_LOCAL_SITE 0
32 /* Supported segment types */
47 /* FIXME Add crypt and multipath support */
53 { SEG_CRYPT
, "crypt" },
54 { SEG_ERROR
, "error" },
55 { SEG_LINEAR
, "linear" },
56 { SEG_MIRRORED
, "mirror" },
57 { SEG_REPLICATOR
, "replicator" },
58 { SEG_REPLICATOR_DEV
, "replicator-dev" },
59 { SEG_SNAPSHOT
, "snapshot" },
60 { SEG_SNAPSHOT_ORIGIN
, "snapshot-origin" },
61 { SEG_SNAPSHOT_MERGE
, "snapshot-merge" },
62 { SEG_STRIPED
, "striped" },
66 /* Some segment types have a list of areas of other devices attached */
70 struct dm_tree_node
*dev_node
;
74 unsigned rsite_index
; /* Replicator site index */
75 struct dm_tree_node
*slog
; /* Replicator sync log node */
76 uint64_t region_size
; /* Replicator sync log size */
77 uint32_t flags
; /* Replicator sync log flags */
80 /* Replicator-log has a list of sites */
81 /* FIXME: maybe move to seg_area too? */
82 struct replicator_site
{
86 dm_replicator_mode_t mode
;
87 uint32_t async_timeout
;
88 uint32_t fall_behind_ios
;
89 uint64_t fall_behind_data
;
92 /* Per-segment properties */
100 unsigned area_count
; /* Linear + Striped + Mirrored + Crypt + Replicator */
101 struct dm_list areas
; /* Linear + Striped + Mirrored + Crypt + Replicator */
103 uint32_t stripe_size
; /* Striped */
105 int persistent
; /* Snapshot */
106 uint32_t chunk_size
; /* Snapshot */
107 struct dm_tree_node
*cow
; /* Snapshot */
108 struct dm_tree_node
*origin
; /* Snapshot + Snapshot origin */
109 struct dm_tree_node
*merge
; /* Snapshot */
111 struct dm_tree_node
*log
; /* Mirror + Replicator */
112 uint32_t region_size
; /* Mirror */
113 unsigned clustered
; /* Mirror */
114 unsigned mirror_area_count
; /* Mirror */
115 uint32_t flags
; /* Mirror log */
116 char *uuid
; /* Clustered mirror log */
118 const char *cipher
; /* Crypt */
119 const char *chainmode
; /* Crypt */
120 const char *iv
; /* Crypt */
121 uint64_t iv_offset
; /* Crypt */
122 const char *key
; /* Crypt */
124 const char *rlog_type
; /* Replicator */
125 struct dm_list rsites
; /* Replicator */
126 unsigned rsite_count
; /* Replicator */
127 unsigned rdevice_count
; /* Replicator */
128 struct dm_tree_node
*replicator
;/* Replicator-dev */
129 uint64_t rdevice_index
; /* Replicator-dev */
132 /* Per-device properties */
133 struct load_properties
{
139 uint32_t read_ahead_flags
;
141 unsigned segment_count
;
142 unsigned size_changed
;
145 const char *new_name
;
147 /* If immediate_dev_node is set to 1, try to create the dev node
148 * as soon as possible (e.g. in preload stage even during traversal
149 * and processing of dm tree). This will also flush all stacked dev
150 * node operations, synchronizing with udev.
152 unsigned immediate_dev_node
;
155 * If the device size changed from zero and this is set,
156 * don't resume the device immediately, even if the device
157 * has parents. This works provided the parents do not
158 * validate the device size and is required by pvmove to
159 * avoid starting the mirror resync operation too early.
161 unsigned delay_resume_if_new
;
164 /* Two of these used to join two nodes with uses and used_by. */
165 struct dm_tree_link
{
167 struct dm_tree_node
*node
;
170 struct dm_tree_node
{
171 struct dm_tree
*dtree
;
177 struct dm_list uses
; /* Nodes this node uses */
178 struct dm_list used_by
; /* Nodes that use this node */
180 int activation_priority
; /* 0 gets activated first */
182 uint16_t udev_flags
; /* Udev control flags */
184 void *context
; /* External supplied context */
186 struct load_properties props
; /* For creation/table (re)load */
189 * If presuspend of child node is needed
190 * Note: only direct child is allowed
192 struct dm_tree_node
*presuspend_node
;
197 struct dm_hash_table
*devs
;
198 struct dm_hash_table
*uuids
;
199 struct dm_tree_node root
;
200 int skip_lockfs
; /* 1 skips lockfs (for non-snapshots) */
201 int no_flush
; /* 1 sets noflush (mirrors/multipath) */
205 struct dm_tree
*dm_tree_create(void)
207 struct dm_tree
*dtree
;
209 if (!(dtree
= dm_zalloc(sizeof(*dtree
)))) {
210 log_error("dm_tree_create malloc failed");
214 dtree
->root
.dtree
= dtree
;
215 dm_list_init(&dtree
->root
.uses
);
216 dm_list_init(&dtree
->root
.used_by
);
217 dtree
->skip_lockfs
= 0;
220 if (!(dtree
->mem
= dm_pool_create("dtree", 1024))) {
221 log_error("dtree pool creation failed");
226 if (!(dtree
->devs
= dm_hash_create(8))) {
227 log_error("dtree hash creation failed");
228 dm_pool_destroy(dtree
->mem
);
233 if (!(dtree
->uuids
= dm_hash_create(32))) {
234 log_error("dtree uuid hash creation failed");
235 dm_hash_destroy(dtree
->devs
);
236 dm_pool_destroy(dtree
->mem
);
244 void dm_tree_free(struct dm_tree
*dtree
)
249 dm_hash_destroy(dtree
->uuids
);
250 dm_hash_destroy(dtree
->devs
);
251 dm_pool_destroy(dtree
->mem
);
255 static int _nodes_are_linked(const struct dm_tree_node
*parent
,
256 const struct dm_tree_node
*child
)
258 struct dm_tree_link
*dlink
;
260 dm_list_iterate_items(dlink
, &parent
->uses
)
261 if (dlink
->node
== child
)
267 static int _link(struct dm_list
*list
, struct dm_tree_node
*node
)
269 struct dm_tree_link
*dlink
;
271 if (!(dlink
= dm_pool_alloc(node
->dtree
->mem
, sizeof(*dlink
)))) {
272 log_error("dtree link allocation failed");
277 dm_list_add(list
, &dlink
->list
);
282 static int _link_nodes(struct dm_tree_node
*parent
,
283 struct dm_tree_node
*child
)
285 if (_nodes_are_linked(parent
, child
))
288 if (!_link(&parent
->uses
, child
))
291 if (!_link(&child
->used_by
, parent
))
297 static void _unlink(struct dm_list
*list
, struct dm_tree_node
*node
)
299 struct dm_tree_link
*dlink
;
301 dm_list_iterate_items(dlink
, list
)
302 if (dlink
->node
== node
) {
303 dm_list_del(&dlink
->list
);
308 static void _unlink_nodes(struct dm_tree_node
*parent
,
309 struct dm_tree_node
*child
)
311 if (!_nodes_are_linked(parent
, child
))
314 _unlink(&parent
->uses
, child
);
315 _unlink(&child
->used_by
, parent
);
318 static int _add_to_toplevel(struct dm_tree_node
*node
)
320 return _link_nodes(&node
->dtree
->root
, node
);
323 static void _remove_from_toplevel(struct dm_tree_node
*node
)
325 _unlink_nodes(&node
->dtree
->root
, node
);
328 static int _add_to_bottomlevel(struct dm_tree_node
*node
)
330 return _link_nodes(node
, &node
->dtree
->root
);
333 static void _remove_from_bottomlevel(struct dm_tree_node
*node
)
335 _unlink_nodes(node
, &node
->dtree
->root
);
338 static int _link_tree_nodes(struct dm_tree_node
*parent
, struct dm_tree_node
*child
)
340 /* Don't link to root node if child already has a parent */
341 if (parent
== &parent
->dtree
->root
) {
342 if (dm_tree_node_num_children(child
, 1))
345 _remove_from_toplevel(child
);
347 if (child
== &child
->dtree
->root
) {
348 if (dm_tree_node_num_children(parent
, 0))
351 _remove_from_bottomlevel(parent
);
353 return _link_nodes(parent
, child
);
356 static struct dm_tree_node
*_create_dm_tree_node(struct dm_tree
*dtree
,
359 struct dm_info
*info
,
363 struct dm_tree_node
*node
;
366 if (!(node
= dm_pool_zalloc(dtree
->mem
, sizeof(*node
)))) {
367 log_error("_create_dm_tree_node alloc failed");
376 node
->context
= context
;
377 node
->udev_flags
= udev_flags
;
378 node
->activation_priority
= 0;
380 dm_list_init(&node
->uses
);
381 dm_list_init(&node
->used_by
);
382 dm_list_init(&node
->props
.segs
);
384 dev
= MKDEV(info
->major
, info
->minor
);
386 if (!dm_hash_insert_binary(dtree
->devs
, (const char *) &dev
,
387 sizeof(dev
), node
)) {
388 log_error("dtree node hash insertion failed");
389 dm_pool_free(dtree
->mem
, node
);
394 !dm_hash_insert(dtree
->uuids
, uuid
, node
)) {
395 log_error("dtree uuid hash insertion failed");
396 dm_hash_remove_binary(dtree
->devs
, (const char *) &dev
,
398 dm_pool_free(dtree
->mem
, node
);
405 static struct dm_tree_node
*_find_dm_tree_node(struct dm_tree
*dtree
,
406 uint32_t major
, uint32_t minor
)
408 uint64_t dev
= MKDEV(major
, minor
);
410 return dm_hash_lookup_binary(dtree
->devs
, (const char *) &dev
,
414 static struct dm_tree_node
*_find_dm_tree_node_by_uuid(struct dm_tree
*dtree
,
417 struct dm_tree_node
*node
;
419 if ((node
= dm_hash_lookup(dtree
->uuids
, uuid
)))
422 if (strncmp(uuid
, UUID_PREFIX
, sizeof(UUID_PREFIX
) - 1))
425 return dm_hash_lookup(dtree
->uuids
, uuid
+ sizeof(UUID_PREFIX
) - 1);
428 static int _deps(struct dm_task
**dmt
, struct dm_pool
*mem
, uint32_t major
, uint32_t minor
,
429 const char **name
, const char **uuid
,
430 struct dm_info
*info
, struct dm_deps
**deps
)
432 memset(info
, 0, sizeof(*info
));
434 if (!dm_is_dm_major(major
)) {
441 info
->live_table
= 0;
442 info
->inactive_table
= 0;
447 if (!(*dmt
= dm_task_create(DM_DEVICE_DEPS
))) {
448 log_error("deps dm_task creation failed");
452 if (!dm_task_set_major(*dmt
, major
)) {
453 log_error("_deps: failed to set major for (%" PRIu32
":%" PRIu32
")",
458 if (!dm_task_set_minor(*dmt
, minor
)) {
459 log_error("_deps: failed to set minor for (%" PRIu32
":%" PRIu32
")",
464 if (!dm_task_run(*dmt
)) {
465 log_error("_deps: task run failed for (%" PRIu32
":%" PRIu32
")",
470 if (!dm_task_get_info(*dmt
, info
)) {
471 log_error("_deps: failed to get info for (%" PRIu32
":%" PRIu32
")",
481 if (info
->major
!= major
) {
482 log_error("Inconsistent dtree major number: %u != %u",
486 if (info
->minor
!= minor
) {
487 log_error("Inconsistent dtree minor number: %u != %u",
491 if (!(*name
= dm_pool_strdup(mem
, dm_task_get_name(*dmt
)))) {
492 log_error("name pool_strdup failed");
495 if (!(*uuid
= dm_pool_strdup(mem
, dm_task_get_uuid(*dmt
)))) {
496 log_error("uuid pool_strdup failed");
499 *deps
= dm_task_get_deps(*dmt
);
505 dm_task_destroy(*dmt
);
509 static struct dm_tree_node
*_add_dev(struct dm_tree
*dtree
,
510 struct dm_tree_node
*parent
,
511 uint32_t major
, uint32_t minor
,
514 struct dm_task
*dmt
= NULL
;
516 struct dm_deps
*deps
= NULL
;
517 const char *name
= NULL
;
518 const char *uuid
= NULL
;
519 struct dm_tree_node
*node
= NULL
;
523 /* Already in tree? */
524 if (!(node
= _find_dm_tree_node(dtree
, major
, minor
))) {
525 if (!_deps(&dmt
, dtree
->mem
, major
, minor
, &name
, &uuid
, &info
, &deps
))
528 if (!(node
= _create_dm_tree_node(dtree
, name
, uuid
, &info
,
534 if (!_link_tree_nodes(parent
, node
)) {
539 /* If node was already in tree, no need to recurse. */
543 /* Can't recurse if not a mapped device or there are no dependencies */
544 if (!node
->info
.exists
|| !deps
->count
) {
545 if (!_add_to_bottomlevel(node
)) {
552 /* Add dependencies to tree */
553 for (i
= 0; i
< deps
->count
; i
++)
554 if (!_add_dev(dtree
, node
, MAJOR(deps
->device
[i
]),
555 MINOR(deps
->device
[i
]), udev_flags
)) {
562 dm_task_destroy(dmt
);
567 static int _node_clear_table(struct dm_tree_node
*dnode
)
570 struct dm_info
*info
;
574 if (!(info
= &dnode
->info
)) {
575 log_error("_node_clear_table failed: missing info");
579 if (!(name
= dm_tree_node_get_name(dnode
))) {
580 log_error("_node_clear_table failed: missing name");
584 /* Is there a table? */
585 if (!info
->exists
|| !info
->inactive_table
)
588 log_verbose("Clearing inactive table %s (%" PRIu32
":%" PRIu32
")",
589 name
, info
->major
, info
->minor
);
591 if (!(dmt
= dm_task_create(DM_DEVICE_CLEAR
))) {
592 log_error("Table clear dm_task creation failed for %s", name
);
596 if (!dm_task_set_major(dmt
, info
->major
) ||
597 !dm_task_set_minor(dmt
, info
->minor
)) {
598 log_error("Failed to set device number for %s table clear", name
);
599 dm_task_destroy(dmt
);
603 r
= dm_task_run(dmt
);
605 if (!dm_task_get_info(dmt
, info
)) {
606 log_error("_node_clear_table failed: info missing after running task for %s", name
);
610 dm_task_destroy(dmt
);
615 struct dm_tree_node
*dm_tree_add_new_dev(struct dm_tree
*dtree
,
618 uint32_t major
, uint32_t minor
,
623 struct dm_tree_node
*dnode
;
628 /* Do we need to add node to tree? */
629 if (!(dnode
= dm_tree_find_node_by_uuid(dtree
, uuid
))) {
630 if (!(name2
= dm_pool_strdup(dtree
->mem
, name
))) {
631 log_error("name pool_strdup failed");
634 if (!(uuid2
= dm_pool_strdup(dtree
->mem
, uuid
))) {
635 log_error("uuid pool_strdup failed");
643 info
.inactive_table
= 0;
646 if (!(dnode
= _create_dm_tree_node(dtree
, name2
, uuid2
, &info
,
650 /* Attach to root node until a table is supplied */
651 if (!_add_to_toplevel(dnode
) || !_add_to_bottomlevel(dnode
))
654 dnode
->props
.major
= major
;
655 dnode
->props
.minor
= minor
;
656 dnode
->props
.new_name
= NULL
;
657 dnode
->props
.size_changed
= 0;
658 } else if (strcmp(name
, dnode
->name
)) {
659 /* Do we need to rename node? */
660 if (!(dnode
->props
.new_name
= dm_pool_strdup(dtree
->mem
, name
))) {
661 log_error("name pool_strdup failed");
666 dnode
->props
.read_only
= read_only
? 1 : 0;
667 dnode
->props
.read_ahead
= DM_READ_AHEAD_AUTO
;
668 dnode
->props
.read_ahead_flags
= 0;
670 if (clear_inactive
&& !_node_clear_table(dnode
))
673 dnode
->context
= context
;
674 dnode
->udev_flags
= 0;
679 struct dm_tree_node
*dm_tree_add_new_dev_with_udev_flags(struct dm_tree
*dtree
,
689 struct dm_tree_node
*node
;
691 if ((node
= dm_tree_add_new_dev(dtree
, name
, uuid
, major
, minor
, read_only
,
692 clear_inactive
, context
)))
693 node
->udev_flags
= udev_flags
;
699 void dm_tree_node_set_read_ahead(struct dm_tree_node
*dnode
,
701 uint32_t read_ahead_flags
)
703 dnode
->props
.read_ahead
= read_ahead
;
704 dnode
->props
.read_ahead_flags
= read_ahead_flags
;
707 void dm_tree_node_set_presuspend_node(struct dm_tree_node
*node
,
708 struct dm_tree_node
*presuspend_node
)
710 node
->presuspend_node
= presuspend_node
;
713 int dm_tree_add_dev(struct dm_tree
*dtree
, uint32_t major
, uint32_t minor
)
715 return _add_dev(dtree
, &dtree
->root
, major
, minor
, 0) ? 1 : 0;
718 int dm_tree_add_dev_with_udev_flags(struct dm_tree
*dtree
, uint32_t major
,
719 uint32_t minor
, uint16_t udev_flags
)
721 return _add_dev(dtree
, &dtree
->root
, major
, minor
, udev_flags
) ? 1 : 0;
724 const char *dm_tree_node_get_name(const struct dm_tree_node
*node
)
726 return node
->info
.exists
? node
->name
: "";
729 const char *dm_tree_node_get_uuid(const struct dm_tree_node
*node
)
731 return node
->info
.exists
? node
->uuid
: "";
734 const struct dm_info
*dm_tree_node_get_info(const struct dm_tree_node
*node
)
739 void *dm_tree_node_get_context(const struct dm_tree_node
*node
)
741 return node
->context
;
744 int dm_tree_node_size_changed(const struct dm_tree_node
*dnode
)
746 return dnode
->props
.size_changed
;
749 int dm_tree_node_num_children(const struct dm_tree_node
*node
, uint32_t inverted
)
752 if (_nodes_are_linked(&node
->dtree
->root
, node
))
754 return dm_list_size(&node
->used_by
);
757 if (_nodes_are_linked(node
, &node
->dtree
->root
))
760 return dm_list_size(&node
->uses
);
764 * Returns 1 if no prefix supplied
766 static int _uuid_prefix_matches(const char *uuid
, const char *uuid_prefix
, size_t uuid_prefix_len
)
771 if (!strncmp(uuid
, uuid_prefix
, uuid_prefix_len
))
774 /* Handle transition: active device uuids might be missing the prefix */
775 if (uuid_prefix_len
<= 4)
778 if (!strncmp(uuid
, UUID_PREFIX
, sizeof(UUID_PREFIX
) - 1))
781 if (strncmp(uuid_prefix
, UUID_PREFIX
, sizeof(UUID_PREFIX
) - 1))
784 if (!strncmp(uuid
, uuid_prefix
+ sizeof(UUID_PREFIX
) - 1, uuid_prefix_len
- (sizeof(UUID_PREFIX
) - 1)))
791 * Returns 1 if no children.
793 static int _children_suspended(struct dm_tree_node
*node
,
795 const char *uuid_prefix
,
796 size_t uuid_prefix_len
)
798 struct dm_list
*list
;
799 struct dm_tree_link
*dlink
;
800 const struct dm_info
*dinfo
;
804 if (_nodes_are_linked(&node
->dtree
->root
, node
))
806 list
= &node
->used_by
;
808 if (_nodes_are_linked(node
, &node
->dtree
->root
))
813 dm_list_iterate_items(dlink
, list
) {
814 if (!(uuid
= dm_tree_node_get_uuid(dlink
->node
))) {
819 /* Ignore if it doesn't belong to this VG */
820 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
823 /* Ignore if parent node wants to presuspend this node */
824 if (dlink
->node
->presuspend_node
== node
)
827 if (!(dinfo
= dm_tree_node_get_info(dlink
->node
))) {
828 stack
; /* FIXME Is this normal? */
832 if (!dinfo
->suspended
)
840 * Set major and minor to zero for root of tree.
842 struct dm_tree_node
*dm_tree_find_node(struct dm_tree
*dtree
,
846 if (!major
&& !minor
)
849 return _find_dm_tree_node(dtree
, major
, minor
);
853 * Set uuid to NULL for root of tree.
855 struct dm_tree_node
*dm_tree_find_node_by_uuid(struct dm_tree
*dtree
,
861 return _find_dm_tree_node_by_uuid(dtree
, uuid
);
865 * First time set *handle to NULL.
866 * Set inverted to invert the tree.
868 struct dm_tree_node
*dm_tree_next_child(void **handle
,
869 const struct dm_tree_node
*parent
,
872 struct dm_list
**dlink
= (struct dm_list
**) handle
;
873 const struct dm_list
*use_list
;
876 use_list
= &parent
->used_by
;
878 use_list
= &parent
->uses
;
881 *dlink
= dm_list_first(use_list
);
883 *dlink
= dm_list_next(use_list
, *dlink
);
885 return (*dlink
) ? dm_list_item(*dlink
, struct dm_tree_link
)->node
: NULL
;
889 * Deactivate a device with its dependencies if the uuid prefix matches.
891 static int _info_by_dev(uint32_t major
, uint32_t minor
, int with_open_count
,
892 struct dm_info
*info
)
897 if (!(dmt
= dm_task_create(DM_DEVICE_INFO
))) {
898 log_error("_info_by_dev: dm_task creation failed");
902 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
903 log_error("_info_by_dev: Failed to set device number");
904 dm_task_destroy(dmt
);
908 if (!with_open_count
&& !dm_task_no_open_count(dmt
))
909 log_error("Failed to disable open_count");
911 if ((r
= dm_task_run(dmt
)))
912 r
= dm_task_get_info(dmt
, info
);
914 dm_task_destroy(dmt
);
919 /* Check if all parent nodes of given node have open_count == 0 */
920 static int _node_has_closed_parents(struct dm_tree_node
*node
,
921 const char *uuid_prefix
,
922 size_t uuid_prefix_len
)
924 struct dm_tree_link
*dlink
;
925 const struct dm_info
*dinfo
;
929 /* Iterate through parents of this node */
930 dm_list_iterate_items(dlink
, &node
->used_by
) {
931 if (!(uuid
= dm_tree_node_get_uuid(dlink
->node
))) {
936 /* Ignore if it doesn't belong to this VG */
937 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
940 if (!(dinfo
= dm_tree_node_get_info(dlink
->node
))) {
941 stack
; /* FIXME Is this normal? */
945 /* Refresh open_count */
946 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 1, &info
) ||
950 if (info
.open_count
) {
951 log_debug("Node %s %d:%d has open_count %d", uuid_prefix
,
952 dinfo
->major
, dinfo
->minor
, info
.open_count
);
960 static int _deactivate_node(const char *name
, uint32_t major
, uint32_t minor
,
961 uint32_t *cookie
, uint16_t udev_flags
)
966 log_verbose("Removing %s (%" PRIu32
":%" PRIu32
")", name
, major
, minor
);
968 if (!(dmt
= dm_task_create(DM_DEVICE_REMOVE
))) {
969 log_error("Deactivation dm_task creation failed for %s", name
);
973 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
974 log_error("Failed to set device number for %s deactivation", name
);
978 if (!dm_task_no_open_count(dmt
))
979 log_error("Failed to disable open_count");
981 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
984 r
= dm_task_run(dmt
);
986 /* FIXME Until kernel returns actual name so dm-iface.c can handle it */
987 rm_dev_node(name
, dmt
->cookie_set
&& !(udev_flags
& DM_UDEV_DISABLE_DM_RULES_FLAG
),
988 dmt
->cookie_set
&& !(udev_flags
& DM_UDEV_DISABLE_LIBRARY_FALLBACK
));
990 /* FIXME Remove node from tree or mark invalid? */
993 dm_task_destroy(dmt
);
998 static int _rename_node(const char *old_name
, const char *new_name
, uint32_t major
,
999 uint32_t minor
, uint32_t *cookie
, uint16_t udev_flags
)
1001 struct dm_task
*dmt
;
1004 log_verbose("Renaming %s (%" PRIu32
":%" PRIu32
") to %s", old_name
, major
, minor
, new_name
);
1006 if (!(dmt
= dm_task_create(DM_DEVICE_RENAME
))) {
1007 log_error("Rename dm_task creation failed for %s", old_name
);
1011 if (!dm_task_set_name(dmt
, old_name
)) {
1012 log_error("Failed to set name for %s rename.", old_name
);
1016 if (!dm_task_set_newname(dmt
, new_name
))
1019 if (!dm_task_no_open_count(dmt
))
1020 log_error("Failed to disable open_count");
1022 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1025 r
= dm_task_run(dmt
);
1028 dm_task_destroy(dmt
);
1033 /* FIXME Merge with _suspend_node? */
1034 static int _resume_node(const char *name
, uint32_t major
, uint32_t minor
,
1035 uint32_t read_ahead
, uint32_t read_ahead_flags
,
1036 struct dm_info
*newinfo
, uint32_t *cookie
,
1037 uint16_t udev_flags
, int already_suspended
)
1039 struct dm_task
*dmt
;
1042 log_verbose("Resuming %s (%" PRIu32
":%" PRIu32
")", name
, major
, minor
);
1044 if (!(dmt
= dm_task_create(DM_DEVICE_RESUME
))) {
1045 log_error("Suspend dm_task creation failed for %s", name
);
1049 /* FIXME Kernel should fill in name on return instead */
1050 if (!dm_task_set_name(dmt
, name
)) {
1051 log_error("Failed to set readahead device name for %s", name
);
1055 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1056 log_error("Failed to set device number for %s resumption.", name
);
1060 if (!dm_task_no_open_count(dmt
))
1061 log_error("Failed to disable open_count");
1063 if (!dm_task_set_read_ahead(dmt
, read_ahead
, read_ahead_flags
))
1064 log_error("Failed to set read ahead");
1066 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1069 if ((r
= dm_task_run(dmt
))) {
1070 if (already_suspended
)
1072 r
= dm_task_get_info(dmt
, newinfo
);
1076 dm_task_destroy(dmt
);
1081 static int _suspend_node(const char *name
, uint32_t major
, uint32_t minor
,
1082 int skip_lockfs
, int no_flush
, struct dm_info
*newinfo
)
1084 struct dm_task
*dmt
;
1087 log_verbose("Suspending %s (%" PRIu32
":%" PRIu32
")%s%s",
1089 skip_lockfs
? "" : " with filesystem sync",
1090 no_flush
? "" : " with device flush");
1092 if (!(dmt
= dm_task_create(DM_DEVICE_SUSPEND
))) {
1093 log_error("Suspend dm_task creation failed for %s", name
);
1097 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1098 log_error("Failed to set device number for %s suspension.", name
);
1099 dm_task_destroy(dmt
);
1103 if (!dm_task_no_open_count(dmt
))
1104 log_error("Failed to disable open_count");
1106 if (skip_lockfs
&& !dm_task_skip_lockfs(dmt
))
1107 log_error("Failed to set skip_lockfs flag.");
1109 if (no_flush
&& !dm_task_no_flush(dmt
))
1110 log_error("Failed to set no_flush flag.");
1112 if ((r
= dm_task_run(dmt
))) {
1114 r
= dm_task_get_info(dmt
, newinfo
);
1117 dm_task_destroy(dmt
);
1123 * FIXME Don't attempt to deactivate known internal dependencies.
1125 static int _dm_tree_deactivate_children(struct dm_tree_node
*dnode
,
1126 const char *uuid_prefix
,
1127 size_t uuid_prefix_len
,
1131 void *handle
= NULL
;
1132 struct dm_tree_node
*child
= dnode
;
1133 struct dm_info info
;
1134 const struct dm_info
*dinfo
;
1138 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1139 if (!(dinfo
= dm_tree_node_get_info(child
))) {
1144 if (!(name
= dm_tree_node_get_name(child
))) {
1149 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1154 /* Ignore if it doesn't belong to this VG */
1155 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1158 /* Refresh open_count */
1159 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 1, &info
) ||
1163 /* Also checking open_count in parent nodes of presuspend_node */
1164 if (info
.open_count
||
1165 (child
->presuspend_node
&&
1166 !_node_has_closed_parents(child
->presuspend_node
,
1167 uuid_prefix
, uuid_prefix_len
))) {
1168 /* Only report error from (likely non-internal) dependency at top level */
1170 log_error("Unable to deactivate open %s (%" PRIu32
1171 ":%" PRIu32
")", name
, info
.major
,
1178 /* Suspend child node first if requested */
1179 if (child
->presuspend_node
&&
1180 !dm_tree_suspend_children(child
, uuid_prefix
, uuid_prefix_len
))
1183 if (!_deactivate_node(name
, info
.major
, info
.minor
,
1184 &child
->dtree
->cookie
, child
->udev_flags
)) {
1185 log_error("Unable to deactivate %s (%" PRIu32
1186 ":%" PRIu32
")", name
, info
.major
,
1190 } else if (info
.suspended
)
1193 if (dm_tree_node_num_children(child
, 0)) {
1194 if (!_dm_tree_deactivate_children(child
, uuid_prefix
, uuid_prefix_len
, level
+ 1))
1202 int dm_tree_deactivate_children(struct dm_tree_node
*dnode
,
1203 const char *uuid_prefix
,
1204 size_t uuid_prefix_len
)
1206 return _dm_tree_deactivate_children(dnode
, uuid_prefix
, uuid_prefix_len
, 0);
1209 void dm_tree_skip_lockfs(struct dm_tree_node
*dnode
)
1211 dnode
->dtree
->skip_lockfs
= 1;
1214 void dm_tree_use_no_flush_suspend(struct dm_tree_node
*dnode
)
1216 dnode
->dtree
->no_flush
= 1;
1219 int dm_tree_suspend_children(struct dm_tree_node
*dnode
,
1220 const char *uuid_prefix
,
1221 size_t uuid_prefix_len
)
1224 void *handle
= NULL
;
1225 struct dm_tree_node
*child
= dnode
;
1226 struct dm_info info
, newinfo
;
1227 const struct dm_info
*dinfo
;
1231 /* Suspend nodes at this level of the tree */
1232 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1233 if (!(dinfo
= dm_tree_node_get_info(child
))) {
1238 if (!(name
= dm_tree_node_get_name(child
))) {
1243 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1248 /* Ignore if it doesn't belong to this VG */
1249 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1252 /* Ensure immediate parents are already suspended */
1253 if (!_children_suspended(child
, 1, uuid_prefix
, uuid_prefix_len
))
1256 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 0, &info
) ||
1257 !info
.exists
|| info
.suspended
)
1260 if (!_suspend_node(name
, info
.major
, info
.minor
,
1261 child
->dtree
->skip_lockfs
,
1262 child
->dtree
->no_flush
, &newinfo
)) {
1263 log_error("Unable to suspend %s (%" PRIu32
1264 ":%" PRIu32
")", name
, info
.major
,
1270 /* Update cached info */
1271 child
->info
= newinfo
;
1274 /* Then suspend any child nodes */
1277 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1278 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1283 /* Ignore if it doesn't belong to this VG */
1284 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1287 if (dm_tree_node_num_children(child
, 0))
1288 if (!dm_tree_suspend_children(child
, uuid_prefix
, uuid_prefix_len
))
1295 int dm_tree_activate_children(struct dm_tree_node
*dnode
,
1296 const char *uuid_prefix
,
1297 size_t uuid_prefix_len
)
1300 void *handle
= NULL
;
1301 struct dm_tree_node
*child
= dnode
;
1302 struct dm_info newinfo
;
1307 /* Activate children first */
1308 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1309 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1314 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1317 if (dm_tree_node_num_children(child
, 0))
1318 if (!dm_tree_activate_children(child
, uuid_prefix
, uuid_prefix_len
))
1324 for (priority
= 0; priority
< 3; priority
++) {
1325 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1326 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1331 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1334 if (priority
!= child
->activation_priority
)
1337 if (!(name
= dm_tree_node_get_name(child
))) {
1343 if (child
->props
.new_name
) {
1344 if (!_rename_node(name
, child
->props
.new_name
, child
->info
.major
,
1345 child
->info
.minor
, &child
->dtree
->cookie
,
1346 child
->udev_flags
)) {
1347 log_error("Failed to rename %s (%" PRIu32
1348 ":%" PRIu32
") to %s", name
, child
->info
.major
,
1349 child
->info
.minor
, child
->props
.new_name
);
1352 child
->name
= child
->props
.new_name
;
1353 child
->props
.new_name
= NULL
;
1356 if (!child
->info
.inactive_table
&& !child
->info
.suspended
)
1359 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
1360 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
1361 &newinfo
, &child
->dtree
->cookie
, child
->udev_flags
, child
->info
.suspended
)) {
1362 log_error("Unable to resume %s (%" PRIu32
1363 ":%" PRIu32
")", child
->name
, child
->info
.major
,
1369 /* Update cached info */
1370 child
->info
= newinfo
;
1379 static int _create_node(struct dm_tree_node
*dnode
)
1382 struct dm_task
*dmt
;
1384 log_verbose("Creating %s", dnode
->name
);
1386 if (!(dmt
= dm_task_create(DM_DEVICE_CREATE
))) {
1387 log_error("Create dm_task creation failed for %s", dnode
->name
);
1391 if (!dm_task_set_name(dmt
, dnode
->name
)) {
1392 log_error("Failed to set device name for %s", dnode
->name
);
1396 if (!dm_task_set_uuid(dmt
, dnode
->uuid
)) {
1397 log_error("Failed to set uuid for %s", dnode
->name
);
1401 if (dnode
->props
.major
&&
1402 (!dm_task_set_major(dmt
, dnode
->props
.major
) ||
1403 !dm_task_set_minor(dmt
, dnode
->props
.minor
))) {
1404 log_error("Failed to set device number for %s creation.", dnode
->name
);
1408 if (dnode
->props
.read_only
&& !dm_task_set_ro(dmt
)) {
1409 log_error("Failed to set read only flag for %s", dnode
->name
);
1413 if (!dm_task_no_open_count(dmt
))
1414 log_error("Failed to disable open_count");
1416 if ((r
= dm_task_run(dmt
)))
1417 r
= dm_task_get_info(dmt
, &dnode
->info
);
1420 dm_task_destroy(dmt
);
1426 static int _build_dev_string(char *devbuf
, size_t bufsize
, struct dm_tree_node
*node
)
1428 if (!dm_format_dev(devbuf
, bufsize
, node
->info
.major
, node
->info
.minor
)) {
1429 log_error("Failed to format %s device number for %s as dm "
1431 node
->name
, node
->uuid
, node
->info
.major
, node
->info
.minor
);
1438 /* simplify string emiting code */
1439 #define EMIT_PARAMS(p, str...)\
1442 if ((w = dm_snprintf(params + p, paramsize - (size_t) p, str)) < 0) {\
1443 stack; /* Out of space */\
1452 * Returns: 1 on success, 0 on failure
1454 static int _emit_areas_line(struct dm_task
*dmt
__attribute__((unused
)),
1455 struct load_segment
*seg
, char *params
,
1456 size_t paramsize
, int *pos
)
1458 struct seg_area
*area
;
1459 char devbuf
[DM_FORMAT_DEV_BUFSIZE
];
1460 unsigned first_time
= 1;
1461 const char *logtype
, *synctype
;
1462 unsigned log_parm_count
;
1464 dm_list_iterate_items(area
, &seg
->areas
) {
1465 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
1468 switch (seg
->type
) {
1469 case SEG_REPLICATOR_DEV
:
1470 EMIT_PARAMS(*pos
, " %d 1 %s", area
->rsite_index
, devbuf
);
1472 EMIT_PARAMS(*pos
, " nolog 0");
1474 /* Remote devices */
1475 log_parm_count
= (area
->flags
&
1476 (DM_NOSYNC
| DM_FORCESYNC
)) ? 2 : 1;
1479 devbuf
[0] = 0; /* Only core log parameters */
1482 devbuf
[0] = ' '; /* Extra space before device name */
1483 if (!_build_dev_string(devbuf
+ 1,
1488 log_parm_count
++; /* Extra sync log device name parameter */
1491 EMIT_PARAMS(*pos
, " %s %u%s %" PRIu64
, logtype
,
1492 log_parm_count
, devbuf
, area
->region_size
);
1494 synctype
= (area
->flags
& DM_NOSYNC
) ?
1495 " nosync" : (area
->flags
& DM_FORCESYNC
) ?
1499 EMIT_PARAMS(*pos
, "%s", synctype
);
1503 EMIT_PARAMS(*pos
, "%s%s %" PRIu64
, first_time
? "" : " ",
1504 devbuf
, area
->offset
);
1513 static int _replicator_emit_segment_line(const struct load_segment
*seg
, char *params
,
1514 size_t paramsize
, int *pos
)
1516 const struct load_segment
*rlog_seg
;
1517 struct replicator_site
*rsite
;
1518 char rlogbuf
[DM_FORMAT_DEV_BUFSIZE
];
1519 unsigned parm_count
;
1521 if (!seg
->log
|| !_build_dev_string(rlogbuf
, sizeof(rlogbuf
), seg
->log
))
1524 rlog_seg
= dm_list_item(dm_list_last(&seg
->log
->props
.segs
),
1525 struct load_segment
);
1527 EMIT_PARAMS(*pos
, "%s 4 %s 0 auto %" PRIu64
,
1528 seg
->rlog_type
, rlogbuf
, rlog_seg
->size
);
1530 dm_list_iterate_items(rsite
, &seg
->rsites
) {
1531 parm_count
= (rsite
->fall_behind_data
1532 || rsite
->fall_behind_ios
1533 || rsite
->async_timeout
) ? 4 : 2;
1535 EMIT_PARAMS(*pos
, " blockdev %u %u %s", parm_count
, rsite
->rsite_index
,
1536 (rsite
->mode
== DM_REPLICATOR_SYNC
) ? "synchronous" : "asynchronous");
1538 if (rsite
->fall_behind_data
)
1539 EMIT_PARAMS(*pos
, " data %" PRIu64
, rsite
->fall_behind_data
);
1540 else if (rsite
->fall_behind_ios
)
1541 EMIT_PARAMS(*pos
, " ios %" PRIu32
, rsite
->fall_behind_ios
);
1542 else if (rsite
->async_timeout
)
1543 EMIT_PARAMS(*pos
, " timeout %" PRIu32
, rsite
->async_timeout
);
1550 * Returns: 1 on success, 0 on failure
1552 static int _mirror_emit_segment_line(struct dm_task
*dmt
, uint32_t major
,
1553 uint32_t minor
, struct load_segment
*seg
,
1554 uint64_t *seg_start
, char *params
,
1557 int block_on_error
= 0;
1558 int handle_errors
= 0;
1559 int dm_log_userspace
= 0;
1561 unsigned log_parm_count
;
1563 char logbuf
[DM_FORMAT_DEV_BUFSIZE
];
1564 const char *logtype
;
1565 unsigned kmaj
= 0, kmin
= 0, krel
= 0;
1567 if (uname(&uts
) == -1) {
1568 log_error("Cannot read kernel release version.");
1572 /* Kernels with a major number of 2 always had 3 parts. */
1573 parts
= sscanf(uts
.release
, "%u.%u.%u", &kmaj
, &kmin
, &krel
);
1574 if (parts
< 1 || (kmaj
< 3 && parts
< 3)) {
1575 log_error("Wrong kernel release version %s.", uts
.release
);
1579 if ((seg
->flags
& DM_BLOCK_ON_ERROR
)) {
1581 * Originally, block_on_error was an argument to the log
1582 * portion of the mirror CTR table. It was renamed to
1583 * "handle_errors" and now resides in the 'features'
1584 * section of the mirror CTR table (i.e. at the end).
1586 * We can identify whether to use "block_on_error" or
1587 * "handle_errors" by the dm-mirror module's version
1588 * number (>= 1.12) or by the kernel version (>= 2.6.22).
1590 if (KERNEL_VERSION(kmaj
, kmin
, krel
) >= KERNEL_VERSION(2, 6, 22))
1596 if (seg
->clustered
) {
1597 /* Cluster mirrors require a UUID */
1602 * Cluster mirrors used to have their own log
1603 * types. Now they are accessed through the
1604 * userspace log type.
1606 * The dm-log-userspace module was added to the
1609 if (KERNEL_VERSION(kmaj
, kmin
, krel
) >= KERNEL_VERSION(2, 6, 31))
1610 dm_log_userspace
= 1;
1616 /* [no]sync, block_on_error etc. */
1617 log_parm_count
+= hweight32(seg
->flags
);
1619 /* "handle_errors" is a feature arg now */
1623 /* DM_CORELOG does not count in the param list */
1624 if (seg
->flags
& DM_CORELOG
)
1627 if (seg
->clustered
) {
1628 log_parm_count
++; /* For UUID */
1630 if (!dm_log_userspace
)
1631 EMIT_PARAMS(pos
, "clustered-");
1633 /* For clustered-* type field inserted later */
1642 if (!_build_dev_string(logbuf
, sizeof(logbuf
), seg
->log
))
1646 if (dm_log_userspace
)
1647 EMIT_PARAMS(pos
, "userspace %u %s clustered-%s",
1648 log_parm_count
, seg
->uuid
, logtype
);
1650 EMIT_PARAMS(pos
, "%s %u", logtype
, log_parm_count
);
1653 EMIT_PARAMS(pos
, " %s", logbuf
);
1655 EMIT_PARAMS(pos
, " %u", seg
->region_size
);
1657 if (seg
->clustered
&& !dm_log_userspace
)
1658 EMIT_PARAMS(pos
, " %s", seg
->uuid
);
1660 if ((seg
->flags
& DM_NOSYNC
))
1661 EMIT_PARAMS(pos
, " nosync");
1662 else if ((seg
->flags
& DM_FORCESYNC
))
1663 EMIT_PARAMS(pos
, " sync");
1666 EMIT_PARAMS(pos
, " block_on_error");
1668 EMIT_PARAMS(pos
, " %u ", seg
->mirror_area_count
);
1670 if (_emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
) <= 0)
1674 EMIT_PARAMS(pos
, " 1 handle_errors");
1679 static int _emit_segment_line(struct dm_task
*dmt
, uint32_t major
,
1680 uint32_t minor
, struct load_segment
*seg
,
1681 uint64_t *seg_start
, char *params
,
1686 char originbuf
[DM_FORMAT_DEV_BUFSIZE
], cowbuf
[DM_FORMAT_DEV_BUFSIZE
];
1694 /* Mirrors are pretty complicated - now in separate function */
1695 r
= _mirror_emit_segment_line(dmt
, major
, minor
, seg
, seg_start
,
1700 case SEG_REPLICATOR
:
1701 if ((r
= _replicator_emit_segment_line(seg
, params
, paramsize
,
1707 case SEG_REPLICATOR_DEV
:
1708 if (!seg
->replicator
|| !_build_dev_string(originbuf
,
1713 EMIT_PARAMS(pos
, "%s %" PRIu64
, originbuf
, seg
->rdevice_index
);
1716 case SEG_SNAPSHOT_MERGE
:
1717 if (!_build_dev_string(originbuf
, sizeof(originbuf
), seg
->origin
))
1719 if (!_build_dev_string(cowbuf
, sizeof(cowbuf
), seg
->cow
))
1721 EMIT_PARAMS(pos
, "%s %s %c %d", originbuf
, cowbuf
,
1722 seg
->persistent
? 'P' : 'N', seg
->chunk_size
);
1724 case SEG_SNAPSHOT_ORIGIN
:
1725 if (!_build_dev_string(originbuf
, sizeof(originbuf
), seg
->origin
))
1727 EMIT_PARAMS(pos
, "%s", originbuf
);
1730 EMIT_PARAMS(pos
, "%u %u ", seg
->area_count
, seg
->stripe_size
);
1733 EMIT_PARAMS(pos
, "%s%s%s%s%s %s %" PRIu64
" ", seg
->cipher
,
1734 seg
->chainmode
? "-" : "", seg
->chainmode
?: "",
1735 seg
->iv
? "-" : "", seg
->iv
?: "", seg
->key
,
1736 seg
->iv_offset
!= DM_CRYPT_IV_DEFAULT
?
1737 seg
->iv_offset
: *seg_start
);
1743 case SEG_REPLICATOR
:
1745 case SEG_SNAPSHOT_ORIGIN
:
1746 case SEG_SNAPSHOT_MERGE
:
1751 case SEG_REPLICATOR_DEV
:
1753 if ((r
= _emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
)) <= 0) {
1760 log_debug("Adding target to (%" PRIu32
":%" PRIu32
"): %" PRIu64
1761 " %" PRIu64
" %s %s", major
, minor
,
1762 *seg_start
, seg
->size
, dm_segtypes
[seg
->type
].target
, params
);
1764 if (!dm_task_add_target(dmt
, *seg_start
, seg
->size
, dm_segtypes
[seg
->type
].target
, params
))
1767 *seg_start
+= seg
->size
;
1774 static int _emit_segment(struct dm_task
*dmt
, uint32_t major
, uint32_t minor
,
1775 struct load_segment
*seg
, uint64_t *seg_start
)
1778 size_t paramsize
= 4096;
1782 if (!(params
= dm_malloc(paramsize
))) {
1783 log_error("Insufficient space for target parameters.");
1788 ret
= _emit_segment_line(dmt
, major
, minor
, seg
, seg_start
,
1798 log_debug("Insufficient space in params[%" PRIsize_t
1799 "] for target parameters.", paramsize
);
1802 } while (paramsize
< MAX_TARGET_PARAMSIZE
);
1804 log_error("Target parameter size too big. Aborting.");
1808 static int _load_node(struct dm_tree_node
*dnode
)
1811 struct dm_task
*dmt
;
1812 struct load_segment
*seg
;
1813 uint64_t seg_start
= 0, existing_table_size
;
1815 log_verbose("Loading %s table (%" PRIu32
":%" PRIu32
")", dnode
->name
,
1816 dnode
->info
.major
, dnode
->info
.minor
);
1818 if (!(dmt
= dm_task_create(DM_DEVICE_RELOAD
))) {
1819 log_error("Reload dm_task creation failed for %s", dnode
->name
);
1823 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
1824 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
1825 log_error("Failed to set device number for %s reload.", dnode
->name
);
1829 if (dnode
->props
.read_only
&& !dm_task_set_ro(dmt
)) {
1830 log_error("Failed to set read only flag for %s", dnode
->name
);
1834 if (!dm_task_no_open_count(dmt
))
1835 log_error("Failed to disable open_count");
1837 dm_list_iterate_items(seg
, &dnode
->props
.segs
)
1838 if (!_emit_segment(dmt
, dnode
->info
.major
, dnode
->info
.minor
,
1842 if (!dm_task_suppress_identical_reload(dmt
))
1843 log_error("Failed to suppress reload of identical tables.");
1845 if ((r
= dm_task_run(dmt
))) {
1846 r
= dm_task_get_info(dmt
, &dnode
->info
);
1847 if (r
&& !dnode
->info
.inactive_table
)
1848 log_verbose("Suppressed %s identical table reload.",
1851 existing_table_size
= dm_task_get_existing_table_size(dmt
);
1852 if ((dnode
->props
.size_changed
=
1853 (existing_table_size
== seg_start
) ? 0 : 1)) {
1854 log_debug("Table size changed from %" PRIu64
" to %"
1855 PRIu64
" for %s", existing_table_size
,
1856 seg_start
, dnode
->name
);
1858 * Kernel usually skips size validation on zero-length devices
1859 * now so no need to preload them.
1861 /* FIXME In which kernel version did this begin? */
1862 if (!existing_table_size
&& dnode
->props
.delay_resume_if_new
)
1863 dnode
->props
.size_changed
= 0;
1867 dnode
->props
.segment_count
= 0;
1870 dm_task_destroy(dmt
);
1875 int dm_tree_preload_children(struct dm_tree_node
*dnode
,
1876 const char *uuid_prefix
,
1877 size_t uuid_prefix_len
)
1880 void *handle
= NULL
;
1881 struct dm_tree_node
*child
;
1882 struct dm_info newinfo
;
1883 int update_devs_flag
= 0;
1885 /* Preload children first */
1886 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1887 /* Skip existing non-device-mapper devices */
1888 if (!child
->info
.exists
&& child
->info
.major
)
1891 /* Ignore if it doesn't belong to this VG */
1892 if (child
->info
.exists
&&
1893 !_uuid_prefix_matches(child
->uuid
, uuid_prefix
, uuid_prefix_len
))
1896 if (dm_tree_node_num_children(child
, 0))
1897 if (!dm_tree_preload_children(child
, uuid_prefix
, uuid_prefix_len
))
1900 /* FIXME Cope if name exists with no uuid? */
1901 if (!child
->info
.exists
) {
1902 if (!_create_node(child
)) {
1908 if (!child
->info
.inactive_table
&& child
->props
.segment_count
) {
1909 if (!_load_node(child
)) {
1915 /* Propagate device size change change */
1916 if (child
->props
.size_changed
)
1917 dnode
->props
.size_changed
= 1;
1919 /* Resume device immediately if it has parents and its size changed */
1920 if (!dm_tree_node_num_children(child
, 1) || !child
->props
.size_changed
)
1923 if (!child
->info
.inactive_table
&& !child
->info
.suspended
)
1926 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
1927 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
1928 &newinfo
, &child
->dtree
->cookie
, child
->udev_flags
,
1929 child
->info
.suspended
)) {
1930 log_error("Unable to resume %s (%" PRIu32
1931 ":%" PRIu32
")", child
->name
, child
->info
.major
,
1937 /* Update cached info */
1938 child
->info
= newinfo
;
1941 * Prepare for immediate synchronization with udev and flush all stacked
1942 * dev node operations if requested by immediate_dev_node property. But
1943 * finish processing current level in the tree first.
1945 if (child
->props
.immediate_dev_node
)
1946 update_devs_flag
= 1;
1952 if (update_devs_flag
) {
1953 if (!dm_udev_wait(dm_tree_get_cookie(dnode
)))
1955 dm_tree_set_cookie(dnode
, 0);
1962 * Returns 1 if unsure.
1964 int dm_tree_children_use_uuid(struct dm_tree_node
*dnode
,
1965 const char *uuid_prefix
,
1966 size_t uuid_prefix_len
)
1968 void *handle
= NULL
;
1969 struct dm_tree_node
*child
= dnode
;
1972 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1973 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1974 log_error("Failed to get uuid for dtree node.");
1978 if (_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1981 if (dm_tree_node_num_children(child
, 0))
1982 dm_tree_children_use_uuid(child
, uuid_prefix
, uuid_prefix_len
);
1991 static struct load_segment
*_add_segment(struct dm_tree_node
*dnode
, unsigned type
, uint64_t size
)
1993 struct load_segment
*seg
;
1995 if (!(seg
= dm_pool_zalloc(dnode
->dtree
->mem
, sizeof(*seg
)))) {
1996 log_error("dtree node segment allocation failed");
2002 seg
->area_count
= 0;
2003 dm_list_init(&seg
->areas
);
2004 seg
->stripe_size
= 0;
2005 seg
->persistent
= 0;
2006 seg
->chunk_size
= 0;
2011 dm_list_add(&dnode
->props
.segs
, &seg
->list
);
2012 dnode
->props
.segment_count
++;
2017 int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node
*dnode
,
2019 const char *origin_uuid
)
2021 struct load_segment
*seg
;
2022 struct dm_tree_node
*origin_node
;
2024 if (!(seg
= _add_segment(dnode
, SEG_SNAPSHOT_ORIGIN
, size
)))
2027 if (!(origin_node
= dm_tree_find_node_by_uuid(dnode
->dtree
, origin_uuid
))) {
2028 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid
);
2032 seg
->origin
= origin_node
;
2033 if (!_link_tree_nodes(dnode
, origin_node
))
2036 /* Resume snapshot origins after new snapshots */
2037 dnode
->activation_priority
= 1;
2042 static int _add_snapshot_target(struct dm_tree_node
*node
,
2044 const char *origin_uuid
,
2045 const char *cow_uuid
,
2046 const char *merge_uuid
,
2048 uint32_t chunk_size
)
2050 struct load_segment
*seg
;
2051 struct dm_tree_node
*origin_node
, *cow_node
, *merge_node
;
2054 seg_type
= !merge_uuid
? SEG_SNAPSHOT
: SEG_SNAPSHOT_MERGE
;
2056 if (!(seg
= _add_segment(node
, seg_type
, size
)))
2059 if (!(origin_node
= dm_tree_find_node_by_uuid(node
->dtree
, origin_uuid
))) {
2060 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid
);
2064 seg
->origin
= origin_node
;
2065 if (!_link_tree_nodes(node
, origin_node
))
2068 if (!(cow_node
= dm_tree_find_node_by_uuid(node
->dtree
, cow_uuid
))) {
2069 log_error("Couldn't find snapshot COW device uuid %s.", cow_uuid
);
2073 seg
->cow
= cow_node
;
2074 if (!_link_tree_nodes(node
, cow_node
))
2077 seg
->persistent
= persistent
? 1 : 0;
2078 seg
->chunk_size
= chunk_size
;
2081 if (!(merge_node
= dm_tree_find_node_by_uuid(node
->dtree
, merge_uuid
))) {
2082 /* not a pure error, merging snapshot may have been deactivated */
2083 log_verbose("Couldn't find merging snapshot uuid %s.", merge_uuid
);
2085 seg
->merge
= merge_node
;
2086 /* must not link merging snapshot, would undermine activation_priority below */
2089 /* Resume snapshot-merge (acting origin) after other snapshots */
2090 node
->activation_priority
= 1;
2092 /* Resume merging snapshot after snapshot-merge */
2093 seg
->merge
->activation_priority
= 2;
2101 int dm_tree_node_add_snapshot_target(struct dm_tree_node
*node
,
2103 const char *origin_uuid
,
2104 const char *cow_uuid
,
2106 uint32_t chunk_size
)
2108 return _add_snapshot_target(node
, size
, origin_uuid
, cow_uuid
,
2109 NULL
, persistent
, chunk_size
);
2112 int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node
*node
,
2114 const char *origin_uuid
,
2115 const char *cow_uuid
,
2116 const char *merge_uuid
,
2117 uint32_t chunk_size
)
2119 return _add_snapshot_target(node
, size
, origin_uuid
, cow_uuid
,
2120 merge_uuid
, 1, chunk_size
);
2123 int dm_tree_node_add_error_target(struct dm_tree_node
*node
,
2126 if (!_add_segment(node
, SEG_ERROR
, size
))
2132 int dm_tree_node_add_zero_target(struct dm_tree_node
*node
,
2135 if (!_add_segment(node
, SEG_ZERO
, size
))
2141 int dm_tree_node_add_linear_target(struct dm_tree_node
*node
,
2144 if (!_add_segment(node
, SEG_LINEAR
, size
))
2150 int dm_tree_node_add_striped_target(struct dm_tree_node
*node
,
2152 uint32_t stripe_size
)
2154 struct load_segment
*seg
;
2156 if (!(seg
= _add_segment(node
, SEG_STRIPED
, size
)))
2159 seg
->stripe_size
= stripe_size
;
2164 int dm_tree_node_add_crypt_target(struct dm_tree_node
*node
,
2167 const char *chainmode
,
2172 struct load_segment
*seg
;
2174 if (!(seg
= _add_segment(node
, SEG_CRYPT
, size
)))
2177 seg
->cipher
= cipher
;
2178 seg
->chainmode
= chainmode
;
2180 seg
->iv_offset
= iv_offset
;
2186 int dm_tree_node_add_mirror_target_log(struct dm_tree_node
*node
,
2187 uint32_t region_size
,
2189 const char *log_uuid
,
2190 unsigned area_count
,
2193 struct dm_tree_node
*log_node
= NULL
;
2194 struct load_segment
*seg
;
2196 if (!node
->props
.segment_count
) {
2197 log_error(INTERNAL_ERROR
"Attempt to add target area to missing segment.");
2201 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2204 if (!(seg
->uuid
= dm_pool_strdup(node
->dtree
->mem
, log_uuid
))) {
2205 log_error("log uuid pool_strdup failed");
2208 if ((flags
& DM_CORELOG
))
2209 /* For pvmove: immediate resume (for size validation) isn't needed. */
2210 node
->props
.delay_resume_if_new
= 1;
2212 if (!(log_node
= dm_tree_find_node_by_uuid(node
->dtree
, log_uuid
))) {
2213 log_error("Couldn't find mirror log uuid %s.", log_uuid
);
2218 log_node
->props
.immediate_dev_node
= 1;
2220 /* The kernel validates the size of disk logs. */
2221 /* FIXME Propagate to any devices below */
2222 log_node
->props
.delay_resume_if_new
= 0;
2224 if (!_link_tree_nodes(node
, log_node
))
2229 seg
->log
= log_node
;
2230 seg
->region_size
= region_size
;
2231 seg
->clustered
= clustered
;
2232 seg
->mirror_area_count
= area_count
;
2238 int dm_tree_node_add_mirror_target(struct dm_tree_node
*node
,
2241 if (!_add_segment(node
, SEG_MIRRORED
, size
))
2247 int dm_tree_node_add_replicator_target(struct dm_tree_node
*node
,
2249 const char *rlog_uuid
,
2250 const char *rlog_type
,
2251 unsigned rsite_index
,
2252 dm_replicator_mode_t mode
,
2253 uint32_t async_timeout
,
2254 uint64_t fall_behind_data
,
2255 uint32_t fall_behind_ios
)
2257 struct load_segment
*rseg
;
2258 struct replicator_site
*rsite
;
2260 /* Local site0 - adds replicator segment and links rlog device */
2261 if (rsite_index
== REPLICATOR_LOCAL_SITE
) {
2262 if (node
->props
.segment_count
) {
2263 log_error(INTERNAL_ERROR
"Attempt to add replicator segment to already used node.");
2267 if (!(rseg
= _add_segment(node
, SEG_REPLICATOR
, size
)))
2270 if (!(rseg
->log
= dm_tree_find_node_by_uuid(node
->dtree
, rlog_uuid
))) {
2271 log_error("Missing replicator log uuid %s.", rlog_uuid
);
2275 if (!_link_tree_nodes(node
, rseg
->log
))
2278 if (strcmp(rlog_type
, "ringbuffer") != 0) {
2279 log_error("Unsupported replicator log type %s.", rlog_type
);
2283 if (!(rseg
->rlog_type
= dm_pool_strdup(node
->dtree
->mem
, rlog_type
)))
2286 dm_list_init(&rseg
->rsites
);
2287 rseg
->rdevice_count
= 0;
2288 node
->activation_priority
= 1;
2291 /* Add site to segment */
2292 if (mode
== DM_REPLICATOR_SYNC
2293 && (async_timeout
|| fall_behind_ios
|| fall_behind_data
)) {
2294 log_error("Async parameters passed for synchronnous replicator.");
2298 if (node
->props
.segment_count
!= 1) {
2299 log_error(INTERNAL_ERROR
"Attempt to add remote site area before setting replicator log.");
2303 rseg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2304 if (rseg
->type
!= SEG_REPLICATOR
) {
2305 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment %s.",
2306 dm_segtypes
[rseg
->type
].target
);
2310 if (!(rsite
= dm_pool_zalloc(node
->dtree
->mem
, sizeof(*rsite
)))) {
2311 log_error("Failed to allocate remote site segment.");
2315 dm_list_add(&rseg
->rsites
, &rsite
->list
);
2316 rseg
->rsite_count
++;
2319 rsite
->async_timeout
= async_timeout
;
2320 rsite
->fall_behind_data
= fall_behind_data
;
2321 rsite
->fall_behind_ios
= fall_behind_ios
;
2322 rsite
->rsite_index
= rsite_index
;
2327 /* Appends device node to Replicator */
2328 int dm_tree_node_add_replicator_dev_target(struct dm_tree_node
*node
,
2330 const char *replicator_uuid
,
2331 uint64_t rdevice_index
,
2332 const char *rdev_uuid
,
2333 unsigned rsite_index
,
2334 const char *slog_uuid
,
2335 uint32_t slog_flags
,
2336 uint32_t slog_region_size
)
2338 struct seg_area
*area
;
2339 struct load_segment
*rseg
;
2340 struct load_segment
*rep_seg
;
2342 if (rsite_index
== REPLICATOR_LOCAL_SITE
) {
2343 /* Site index for local target */
2344 if (!(rseg
= _add_segment(node
, SEG_REPLICATOR_DEV
, size
)))
2347 if (!(rseg
->replicator
= dm_tree_find_node_by_uuid(node
->dtree
, replicator_uuid
))) {
2348 log_error("Missing replicator uuid %s.", replicator_uuid
);
2352 /* Local slink0 for replicator must be always initialized first */
2353 if (rseg
->replicator
->props
.segment_count
!= 1) {
2354 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment.");
2358 rep_seg
= dm_list_item(dm_list_last(&rseg
->replicator
->props
.segs
), struct load_segment
);
2359 if (rep_seg
->type
!= SEG_REPLICATOR
) {
2360 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment %s.",
2361 dm_segtypes
[rep_seg
->type
].target
);
2364 rep_seg
->rdevice_count
++;
2366 if (!_link_tree_nodes(node
, rseg
->replicator
))
2369 rseg
->rdevice_index
= rdevice_index
;
2371 /* Local slink0 for replicator must be always initialized first */
2372 if (node
->props
.segment_count
!= 1) {
2373 log_error(INTERNAL_ERROR
"Attempt to use non replicator-dev segment.");
2377 rseg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2378 if (rseg
->type
!= SEG_REPLICATOR_DEV
) {
2379 log_error(INTERNAL_ERROR
"Attempt to use non replicator-dev segment %s.",
2380 dm_segtypes
[rseg
->type
].target
);
2385 if (!(slog_flags
& DM_CORELOG
) && !slog_uuid
) {
2386 log_error("Unspecified sync log uuid.");
2390 if (!dm_tree_node_add_target_area(node
, NULL
, rdev_uuid
, 0))
2393 area
= dm_list_item(dm_list_last(&rseg
->areas
), struct seg_area
);
2395 if (!(slog_flags
& DM_CORELOG
)) {
2396 if (!(area
->slog
= dm_tree_find_node_by_uuid(node
->dtree
, slog_uuid
))) {
2397 log_error("Couldn't find sync log uuid %s.", slog_uuid
);
2401 if (!_link_tree_nodes(node
, area
->slog
))
2405 area
->flags
= slog_flags
;
2406 area
->region_size
= slog_region_size
;
2407 area
->rsite_index
= rsite_index
;
2412 static int _add_area(struct dm_tree_node
*node
, struct load_segment
*seg
, struct dm_tree_node
*dev_node
, uint64_t offset
)
2414 struct seg_area
*area
;
2416 if (!(area
= dm_pool_zalloc(node
->dtree
->mem
, sizeof (*area
)))) {
2417 log_error("Failed to allocate target segment area.");
2421 area
->dev_node
= dev_node
;
2422 area
->offset
= offset
;
2424 dm_list_add(&seg
->areas
, &area
->list
);
2430 int dm_tree_node_add_target_area(struct dm_tree_node
*node
,
2431 const char *dev_name
,
2435 struct load_segment
*seg
;
2437 struct dm_tree_node
*dev_node
;
2439 if ((!dev_name
|| !*dev_name
) && (!uuid
|| !*uuid
)) {
2440 log_error("dm_tree_node_add_target_area called without device");
2445 if (!(dev_node
= dm_tree_find_node_by_uuid(node
->dtree
, uuid
))) {
2446 log_error("Couldn't find area uuid %s.", uuid
);
2449 if (!_link_tree_nodes(node
, dev_node
))
2452 if (stat(dev_name
, &info
) < 0) {
2453 log_error("Device %s not found.", dev_name
);
2457 if (!S_ISBLK(info
.st_mode
)) {
2458 log_error("Device %s is not a block device.", dev_name
);
2462 /* FIXME Check correct macro use */
2463 if (!(dev_node
= _add_dev(node
->dtree
, node
, MAJOR(info
.st_rdev
),
2464 MINOR(info
.st_rdev
), 0)))
2468 if (!node
->props
.segment_count
) {
2469 log_error(INTERNAL_ERROR
"Attempt to add target area to missing segment.");
2473 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2475 if (!_add_area(node
, seg
, dev_node
, offset
))
2481 void dm_tree_set_cookie(struct dm_tree_node
*node
, uint32_t cookie
)
2483 node
->dtree
->cookie
= cookie
;
2486 uint32_t dm_tree_get_cookie(struct dm_tree_node
*node
)
2488 return node
->dtree
->cookie
;