2 * Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
4 * This file is part of the device-mapper userspace tools.
6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions
8 * of the GNU Lesser General Public License v.2.1.
10 * You should have received a copy of the GNU Lesser General Public License
11 * along with this program; if not, write to the Free Software Foundation,
12 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 #include "libdm-targets.h"
17 #include "libdm-common.h"
22 #include <sys/param.h>
23 #include <sys/utsname.h>
25 #define MAX_TARGET_PARAMSIZE 500000
27 #define REPLICATOR_LOCAL_SITE 0
29 /* Supported segment types */
56 /* FIXME Add crypt and multipath support */
62 { SEG_CRYPT
, "crypt" },
63 { SEG_ERROR
, "error" },
64 { SEG_LINEAR
, "linear" },
65 { SEG_MIRRORED
, "mirror" },
66 { SEG_REPLICATOR
, "replicator" },
67 { SEG_REPLICATOR_DEV
, "replicator-dev" },
68 { SEG_SNAPSHOT
, "snapshot" },
69 { SEG_SNAPSHOT_ORIGIN
, "snapshot-origin" },
70 { SEG_SNAPSHOT_MERGE
, "snapshot-merge" },
71 { SEG_STRIPED
, "striped" },
73 { SEG_THIN_POOL
, "thin-pool"},
75 { SEG_RAID1
, "raid1"},
76 { SEG_RAID4
, "raid4"},
77 { SEG_RAID5_LA
, "raid5_la"},
78 { SEG_RAID5_RA
, "raid5_ra"},
79 { SEG_RAID5_LS
, "raid5_ls"},
80 { SEG_RAID5_RS
, "raid5_rs"},
81 { SEG_RAID6_ZR
, "raid6_zr"},
82 { SEG_RAID6_NR
, "raid6_nr"},
83 { SEG_RAID6_NC
, "raid6_nc"},
86 *WARNING: Since 'raid' target overloads this 1:1 mapping table
87 * for search do not add new enum elements past them!
89 { SEG_RAID5_LS
, "raid5"}, /* same as "raid5_ls" (default for MD also) */
90 { SEG_RAID6_ZR
, "raid6"}, /* same as "raid6_zr" */
94 /* Some segment types have a list of areas of other devices attached */
98 struct dm_tree_node
*dev_node
;
102 unsigned rsite_index
; /* Replicator site index */
103 struct dm_tree_node
*slog
; /* Replicator sync log node */
104 uint64_t region_size
; /* Replicator sync log size */
105 uint32_t flags
; /* Replicator sync log flags */
108 struct dm_thin_message
{
109 dm_thin_message_t type
;
124 } m_set_transaction_id
;
128 struct thin_message
{
130 struct dm_thin_message message
;
134 /* Replicator-log has a list of sites */
135 /* FIXME: maybe move to seg_area too? */
136 struct replicator_site
{
139 unsigned rsite_index
;
140 dm_replicator_mode_t mode
;
141 uint32_t async_timeout
;
142 uint32_t fall_behind_ios
;
143 uint64_t fall_behind_data
;
146 /* Per-segment properties */
147 struct load_segment
{
154 unsigned area_count
; /* Linear + Striped + Mirrored + Crypt + Replicator */
155 struct dm_list areas
; /* Linear + Striped + Mirrored + Crypt + Replicator */
157 uint32_t stripe_size
; /* Striped + raid */
159 int persistent
; /* Snapshot */
160 uint32_t chunk_size
; /* Snapshot */
161 struct dm_tree_node
*cow
; /* Snapshot */
162 struct dm_tree_node
*origin
; /* Snapshot + Snapshot origin */
163 struct dm_tree_node
*merge
; /* Snapshot */
165 struct dm_tree_node
*log
; /* Mirror + Replicator */
166 uint32_t region_size
; /* Mirror + raid */
167 unsigned clustered
; /* Mirror */
168 unsigned mirror_area_count
; /* Mirror */
169 uint32_t flags
; /* Mirror log */
170 char *uuid
; /* Clustered mirror log */
172 const char *cipher
; /* Crypt */
173 const char *chainmode
; /* Crypt */
174 const char *iv
; /* Crypt */
175 uint64_t iv_offset
; /* Crypt */
176 const char *key
; /* Crypt */
178 const char *rlog_type
; /* Replicator */
179 struct dm_list rsites
; /* Replicator */
180 unsigned rsite_count
; /* Replicator */
181 unsigned rdevice_count
; /* Replicator */
182 struct dm_tree_node
*replicator
;/* Replicator-dev */
183 uint64_t rdevice_index
; /* Replicator-dev */
185 uint64_t rebuilds
; /* raid */
187 struct dm_tree_node
*metadata
; /* Thin_pool */
188 struct dm_tree_node
*pool
; /* Thin_pool, Thin */
189 struct dm_list thin_messages
; /* Thin_pool */
190 uint64_t transaction_id
; /* Thin_pool */
191 uint64_t low_water_mark
; /* Thin_pool */
192 uint32_t data_block_size
; /* Thin_pool */
193 unsigned skip_block_zeroing
; /* Thin_pool */
194 uint32_t device_id
; /* Thin */
198 /* Per-device properties */
199 struct load_properties
{
205 uint32_t read_ahead_flags
;
207 unsigned segment_count
;
208 unsigned size_changed
;
211 const char *new_name
;
213 /* If immediate_dev_node is set to 1, try to create the dev node
214 * as soon as possible (e.g. in preload stage even during traversal
215 * and processing of dm tree). This will also flush all stacked dev
216 * node operations, synchronizing with udev.
218 unsigned immediate_dev_node
;
221 * If the device size changed from zero and this is set,
222 * don't resume the device immediately, even if the device
223 * has parents. This works provided the parents do not
224 * validate the device size and is required by pvmove to
225 * avoid starting the mirror resync operation too early.
227 unsigned delay_resume_if_new
;
229 /* Send messages for this node in preload */
230 unsigned send_messages
;
233 /* Two of these used to join two nodes with uses and used_by. */
234 struct dm_tree_link
{
236 struct dm_tree_node
*node
;
239 struct dm_tree_node
{
240 struct dm_tree
*dtree
;
246 struct dm_list uses
; /* Nodes this node uses */
247 struct dm_list used_by
; /* Nodes that use this node */
249 int activation_priority
; /* 0 gets activated first */
251 uint16_t udev_flags
; /* Udev control flags */
253 void *context
; /* External supplied context */
255 struct load_properties props
; /* For creation/table (re)load */
258 * If presuspend of child node is needed
259 * Note: only direct child is allowed
261 struct dm_tree_node
*presuspend_node
;
264 dm_node_callback_fn callback
;
270 struct dm_hash_table
*devs
;
271 struct dm_hash_table
*uuids
;
272 struct dm_tree_node root
;
273 int skip_lockfs
; /* 1 skips lockfs (for non-snapshots) */
274 int no_flush
; /* 1 sets noflush (mirrors/multipath) */
275 int retry_remove
; /* 1 retries remove if not successful */
282 struct dm_tree
*dm_tree_create(void)
284 struct dm_pool
*dmem
;
285 struct dm_tree
*dtree
;
287 if (!(dmem
= dm_pool_create("dtree", 1024)) ||
288 !(dtree
= dm_pool_zalloc(dmem
, sizeof(*dtree
)))) {
289 log_error("Failed to allocate dtree.");
291 dm_pool_destroy(dmem
);
295 dtree
->root
.dtree
= dtree
;
296 dm_list_init(&dtree
->root
.uses
);
297 dm_list_init(&dtree
->root
.used_by
);
298 dtree
->skip_lockfs
= 0;
302 if (!(dtree
->devs
= dm_hash_create(8))) {
303 log_error("dtree hash creation failed");
304 dm_pool_destroy(dtree
->mem
);
308 if (!(dtree
->uuids
= dm_hash_create(32))) {
309 log_error("dtree uuid hash creation failed");
310 dm_hash_destroy(dtree
->devs
);
311 dm_pool_destroy(dtree
->mem
);
318 void dm_tree_free(struct dm_tree
*dtree
)
323 dm_hash_destroy(dtree
->uuids
);
324 dm_hash_destroy(dtree
->devs
);
325 dm_pool_destroy(dtree
->mem
);
328 void dm_tree_set_cookie(struct dm_tree_node
*node
, uint32_t cookie
)
330 node
->dtree
->cookie
= cookie
;
333 uint32_t dm_tree_get_cookie(struct dm_tree_node
*node
)
335 return node
->dtree
->cookie
;
338 void dm_tree_skip_lockfs(struct dm_tree_node
*dnode
)
340 dnode
->dtree
->skip_lockfs
= 1;
343 void dm_tree_use_no_flush_suspend(struct dm_tree_node
*dnode
)
345 dnode
->dtree
->no_flush
= 1;
348 void dm_tree_retry_remove(struct dm_tree_node
*dnode
)
350 dnode
->dtree
->retry_remove
= 1;
356 static int _nodes_are_linked(const struct dm_tree_node
*parent
,
357 const struct dm_tree_node
*child
)
359 struct dm_tree_link
*dlink
;
361 dm_list_iterate_items(dlink
, &parent
->uses
)
362 if (dlink
->node
== child
)
368 static int _link(struct dm_list
*list
, struct dm_tree_node
*node
)
370 struct dm_tree_link
*dlink
;
372 if (!(dlink
= dm_pool_alloc(node
->dtree
->mem
, sizeof(*dlink
)))) {
373 log_error("dtree link allocation failed");
378 dm_list_add(list
, &dlink
->list
);
383 static int _link_nodes(struct dm_tree_node
*parent
,
384 struct dm_tree_node
*child
)
386 if (_nodes_are_linked(parent
, child
))
389 if (!_link(&parent
->uses
, child
))
392 if (!_link(&child
->used_by
, parent
))
398 static void _unlink(struct dm_list
*list
, struct dm_tree_node
*node
)
400 struct dm_tree_link
*dlink
;
402 dm_list_iterate_items(dlink
, list
)
403 if (dlink
->node
== node
) {
404 dm_list_del(&dlink
->list
);
409 static void _unlink_nodes(struct dm_tree_node
*parent
,
410 struct dm_tree_node
*child
)
412 if (!_nodes_are_linked(parent
, child
))
415 _unlink(&parent
->uses
, child
);
416 _unlink(&child
->used_by
, parent
);
419 static int _add_to_toplevel(struct dm_tree_node
*node
)
421 return _link_nodes(&node
->dtree
->root
, node
);
424 static void _remove_from_toplevel(struct dm_tree_node
*node
)
426 _unlink_nodes(&node
->dtree
->root
, node
);
429 static int _add_to_bottomlevel(struct dm_tree_node
*node
)
431 return _link_nodes(node
, &node
->dtree
->root
);
434 static void _remove_from_bottomlevel(struct dm_tree_node
*node
)
436 _unlink_nodes(node
, &node
->dtree
->root
);
439 static int _link_tree_nodes(struct dm_tree_node
*parent
, struct dm_tree_node
*child
)
441 /* Don't link to root node if child already has a parent */
442 if (parent
== &parent
->dtree
->root
) {
443 if (dm_tree_node_num_children(child
, 1))
446 _remove_from_toplevel(child
);
448 if (child
== &child
->dtree
->root
) {
449 if (dm_tree_node_num_children(parent
, 0))
452 _remove_from_bottomlevel(parent
);
454 return _link_nodes(parent
, child
);
457 static struct dm_tree_node
*_create_dm_tree_node(struct dm_tree
*dtree
,
460 struct dm_info
*info
,
464 struct dm_tree_node
*node
;
467 if (!(node
= dm_pool_zalloc(dtree
->mem
, sizeof(*node
)))) {
468 log_error("_create_dm_tree_node alloc failed");
477 node
->context
= context
;
478 node
->udev_flags
= udev_flags
;
479 node
->activation_priority
= 0;
481 dm_list_init(&node
->uses
);
482 dm_list_init(&node
->used_by
);
483 dm_list_init(&node
->props
.segs
);
485 dev
= MKDEV(info
->major
, info
->minor
);
487 if (!dm_hash_insert_binary(dtree
->devs
, (const char *) &dev
,
488 sizeof(dev
), node
)) {
489 log_error("dtree node hash insertion failed");
490 dm_pool_free(dtree
->mem
, node
);
495 !dm_hash_insert(dtree
->uuids
, uuid
, node
)) {
496 log_error("dtree uuid hash insertion failed");
497 dm_hash_remove_binary(dtree
->devs
, (const char *) &dev
,
499 dm_pool_free(dtree
->mem
, node
);
506 static struct dm_tree_node
*_find_dm_tree_node(struct dm_tree
*dtree
,
507 uint32_t major
, uint32_t minor
)
509 uint64_t dev
= MKDEV(major
, minor
);
511 return dm_hash_lookup_binary(dtree
->devs
, (const char *) &dev
,
515 static struct dm_tree_node
*_find_dm_tree_node_by_uuid(struct dm_tree
*dtree
,
518 struct dm_tree_node
*node
;
519 const char *default_uuid_prefix
;
520 size_t default_uuid_prefix_len
;
522 if ((node
= dm_hash_lookup(dtree
->uuids
, uuid
)))
525 default_uuid_prefix
= dm_uuid_prefix();
526 default_uuid_prefix_len
= strlen(default_uuid_prefix
);
528 if (strncmp(uuid
, default_uuid_prefix
, default_uuid_prefix_len
))
531 return dm_hash_lookup(dtree
->uuids
, uuid
+ default_uuid_prefix_len
);
534 void dm_tree_node_set_udev_flags(struct dm_tree_node
*dnode
, uint16_t udev_flags
)
537 struct dm_info
*dinfo
= &dnode
->info
;
539 if (udev_flags
!= dnode
->udev_flags
)
540 log_debug("Resetting %s (%" PRIu32
":%" PRIu32
541 ") udev_flags from 0x%x to 0x%x",
542 dnode
->name
, dinfo
->major
, dinfo
->minor
,
543 dnode
->udev_flags
, udev_flags
);
544 dnode
->udev_flags
= udev_flags
;
547 void dm_tree_node_set_read_ahead(struct dm_tree_node
*dnode
,
549 uint32_t read_ahead_flags
)
551 dnode
->props
.read_ahead
= read_ahead
;
552 dnode
->props
.read_ahead_flags
= read_ahead_flags
;
555 void dm_tree_node_set_presuspend_node(struct dm_tree_node
*node
,
556 struct dm_tree_node
*presuspend_node
)
558 node
->presuspend_node
= presuspend_node
;
561 const char *dm_tree_node_get_name(const struct dm_tree_node
*node
)
563 return node
->info
.exists
? node
->name
: "";
566 const char *dm_tree_node_get_uuid(const struct dm_tree_node
*node
)
568 return node
->info
.exists
? node
->uuid
: "";
571 const struct dm_info
*dm_tree_node_get_info(const struct dm_tree_node
*node
)
576 void *dm_tree_node_get_context(const struct dm_tree_node
*node
)
578 return node
->context
;
581 int dm_tree_node_size_changed(const struct dm_tree_node
*dnode
)
583 return dnode
->props
.size_changed
;
586 int dm_tree_node_num_children(const struct dm_tree_node
*node
, uint32_t inverted
)
589 if (_nodes_are_linked(&node
->dtree
->root
, node
))
591 return dm_list_size(&node
->used_by
);
594 if (_nodes_are_linked(node
, &node
->dtree
->root
))
597 return dm_list_size(&node
->uses
);
601 * Returns 1 if no prefix supplied
603 static int _uuid_prefix_matches(const char *uuid
, const char *uuid_prefix
, size_t uuid_prefix_len
)
605 const char *default_uuid_prefix
= dm_uuid_prefix();
606 size_t default_uuid_prefix_len
= strlen(default_uuid_prefix
);
611 if (!strncmp(uuid
, uuid_prefix
, uuid_prefix_len
))
614 /* Handle transition: active device uuids might be missing the prefix */
615 if (uuid_prefix_len
<= 4)
618 if (!strncmp(uuid
, default_uuid_prefix
, default_uuid_prefix_len
))
621 if (strncmp(uuid_prefix
, default_uuid_prefix
, default_uuid_prefix_len
))
624 if (!strncmp(uuid
, uuid_prefix
+ default_uuid_prefix_len
, uuid_prefix_len
- default_uuid_prefix_len
))
631 * Returns 1 if no children.
633 static int _children_suspended(struct dm_tree_node
*node
,
635 const char *uuid_prefix
,
636 size_t uuid_prefix_len
)
638 struct dm_list
*list
;
639 struct dm_tree_link
*dlink
;
640 const struct dm_info
*dinfo
;
644 if (_nodes_are_linked(&node
->dtree
->root
, node
))
646 list
= &node
->used_by
;
648 if (_nodes_are_linked(node
, &node
->dtree
->root
))
653 dm_list_iterate_items(dlink
, list
) {
654 if (!(uuid
= dm_tree_node_get_uuid(dlink
->node
))) {
659 /* Ignore if it doesn't belong to this VG */
660 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
663 /* Ignore if parent node wants to presuspend this node */
664 if (dlink
->node
->presuspend_node
== node
)
667 if (!(dinfo
= dm_tree_node_get_info(dlink
->node
))) {
668 stack
; /* FIXME Is this normal? */
672 if (!dinfo
->suspended
)
680 * Set major and minor to zero for root of tree.
682 struct dm_tree_node
*dm_tree_find_node(struct dm_tree
*dtree
,
686 if (!major
&& !minor
)
689 return _find_dm_tree_node(dtree
, major
, minor
);
693 * Set uuid to NULL for root of tree.
695 struct dm_tree_node
*dm_tree_find_node_by_uuid(struct dm_tree
*dtree
,
701 return _find_dm_tree_node_by_uuid(dtree
, uuid
);
705 * First time set *handle to NULL.
706 * Set inverted to invert the tree.
708 struct dm_tree_node
*dm_tree_next_child(void **handle
,
709 const struct dm_tree_node
*parent
,
712 struct dm_list
**dlink
= (struct dm_list
**) handle
;
713 const struct dm_list
*use_list
;
716 use_list
= &parent
->used_by
;
718 use_list
= &parent
->uses
;
721 *dlink
= dm_list_first(use_list
);
723 *dlink
= dm_list_next(use_list
, *dlink
);
725 return (*dlink
) ? dm_list_item(*dlink
, struct dm_tree_link
)->node
: NULL
;
728 static int _deps(struct dm_task
**dmt
, struct dm_pool
*mem
, uint32_t major
, uint32_t minor
,
729 const char **name
, const char **uuid
, unsigned inactive_table
,
730 struct dm_info
*info
, struct dm_deps
**deps
)
732 memset(info
, 0, sizeof(*info
));
734 if (!dm_is_dm_major(major
)) {
745 if (!(*dmt
= dm_task_create(DM_DEVICE_DEPS
))) {
746 log_error("deps dm_task creation failed");
750 if (!dm_task_set_major(*dmt
, major
)) {
751 log_error("_deps: failed to set major for (%" PRIu32
":%" PRIu32
")",
756 if (!dm_task_set_minor(*dmt
, minor
)) {
757 log_error("_deps: failed to set minor for (%" PRIu32
":%" PRIu32
")",
762 if (inactive_table
&& !dm_task_query_inactive_table(*dmt
)) {
763 log_error("_deps: failed to set inactive table for (%" PRIu32
":%" PRIu32
")",
768 if (!dm_task_run(*dmt
)) {
769 log_error("_deps: task run failed for (%" PRIu32
":%" PRIu32
")",
774 if (!dm_task_get_info(*dmt
, info
)) {
775 log_error("_deps: failed to get info for (%" PRIu32
":%" PRIu32
")",
787 if (info
->major
!= major
) {
788 log_error("Inconsistent dtree major number: %u != %u",
792 if (info
->minor
!= minor
) {
793 log_error("Inconsistent dtree minor number: %u != %u",
797 if (name
&& !(*name
= dm_pool_strdup(mem
, dm_task_get_name(*dmt
)))) {
798 log_error("name pool_strdup failed");
801 if (uuid
&& !(*uuid
= dm_pool_strdup(mem
, dm_task_get_uuid(*dmt
)))) {
802 log_error("uuid pool_strdup failed");
805 *deps
= dm_task_get_deps(*dmt
);
811 dm_task_destroy(*dmt
);
816 * Deactivate a device with its dependencies if the uuid prefix matches.
818 static int _info_by_dev(uint32_t major
, uint32_t minor
, int with_open_count
,
819 struct dm_info
*info
, struct dm_pool
*mem
,
820 const char **name
, const char **uuid
)
825 if (!(dmt
= dm_task_create(DM_DEVICE_INFO
))) {
826 log_error("_info_by_dev: dm_task creation failed");
830 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
831 log_error("_info_by_dev: Failed to set device number");
832 dm_task_destroy(dmt
);
836 if (!with_open_count
&& !dm_task_no_open_count(dmt
))
837 log_error("Failed to disable open_count");
839 if (!(r
= dm_task_run(dmt
)))
842 if (!(r
= dm_task_get_info(dmt
, info
)))
845 if (name
&& !(*name
= dm_pool_strdup(mem
, dm_task_get_name(dmt
)))) {
846 log_error("name pool_strdup failed");
851 if (uuid
&& !(*uuid
= dm_pool_strdup(mem
, dm_task_get_uuid(dmt
)))) {
852 log_error("uuid pool_strdup failed");
858 dm_task_destroy(dmt
);
863 static int _check_device_not_in_use(const char *name
, struct dm_info
*info
)
868 /* If sysfs is not used, use open_count information only. */
869 if (!*dm_sysfs_dir()) {
870 if (info
->open_count
) {
871 log_error("Device %s (%" PRIu32
":%" PRIu32
") in use",
872 name
, info
->major
, info
->minor
);
879 if (dm_device_has_holders(info
->major
, info
->minor
)) {
880 log_error("Device %s (%" PRIu32
":%" PRIu32
") is used "
881 "by another device.", name
, info
->major
, info
->minor
);
885 if (dm_device_has_mounted_fs(info
->major
, info
->minor
)) {
886 log_error("Device %s (%" PRIu32
":%" PRIu32
") contains "
887 "a filesystem in use.", name
, info
->major
, info
->minor
);
894 /* Check if all parent nodes of given node have open_count == 0 */
895 static int _node_has_closed_parents(struct dm_tree_node
*node
,
896 const char *uuid_prefix
,
897 size_t uuid_prefix_len
)
899 struct dm_tree_link
*dlink
;
900 const struct dm_info
*dinfo
;
904 /* Iterate through parents of this node */
905 dm_list_iterate_items(dlink
, &node
->used_by
) {
906 if (!(uuid
= dm_tree_node_get_uuid(dlink
->node
))) {
911 /* Ignore if it doesn't belong to this VG */
912 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
915 if (!(dinfo
= dm_tree_node_get_info(dlink
->node
))) {
916 stack
; /* FIXME Is this normal? */
920 /* Refresh open_count */
921 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 1, &info
, NULL
, NULL
, NULL
) ||
925 if (info
.open_count
) {
926 log_debug("Node %s %d:%d has open_count %d", uuid_prefix
,
927 dinfo
->major
, dinfo
->minor
, info
.open_count
);
935 static int _deactivate_node(const char *name
, uint32_t major
, uint32_t minor
,
936 uint32_t *cookie
, uint16_t udev_flags
, int retry
)
941 log_verbose("Removing %s (%" PRIu32
":%" PRIu32
")", name
, major
, minor
);
943 if (!(dmt
= dm_task_create(DM_DEVICE_REMOVE
))) {
944 log_error("Deactivation dm_task creation failed for %s", name
);
948 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
949 log_error("Failed to set device number for %s deactivation", name
);
953 if (!dm_task_no_open_count(dmt
))
954 log_error("Failed to disable open_count");
957 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
961 dm_task_retry_remove(dmt
);
963 r
= dm_task_run(dmt
);
965 /* FIXME Until kernel returns actual name so dm-iface.c can handle it */
966 rm_dev_node(name
, dmt
->cookie_set
&& !(udev_flags
& DM_UDEV_DISABLE_DM_RULES_FLAG
),
967 dmt
->cookie_set
&& (udev_flags
& DM_UDEV_DISABLE_LIBRARY_FALLBACK
));
969 /* FIXME Remove node from tree or mark invalid? */
972 dm_task_destroy(dmt
);
977 static int _node_clear_table(struct dm_tree_node
*dnode
, uint16_t udev_flags
)
979 struct dm_task
*dmt
= NULL
, *deps_dmt
= NULL
;
980 struct dm_info
*info
, deps_info
;
981 struct dm_deps
*deps
= NULL
;
982 const char *name
, *uuid
;
983 const char *default_uuid_prefix
;
984 size_t default_uuid_prefix_len
;
988 if (!(info
= &dnode
->info
)) {
989 log_error("_node_clear_table failed: missing info");
993 if (!(name
= dm_tree_node_get_name(dnode
))) {
994 log_error("_node_clear_table failed: missing name");
998 /* Is there a table? */
999 if (!info
->exists
|| !info
->inactive_table
)
1002 /* Get devices used by inactive table that's about to be deleted. */
1003 if (!_deps(&deps_dmt
, dnode
->dtree
->mem
, info
->major
, info
->minor
, NULL
, NULL
, 1, info
, &deps
)) {
1004 log_error("Failed to obtain dependencies for %s before clearing table.", name
);
1008 log_verbose("Clearing inactive table %s (%" PRIu32
":%" PRIu32
")",
1009 name
, info
->major
, info
->minor
);
1011 if (!(dmt
= dm_task_create(DM_DEVICE_CLEAR
))) {
1012 log_error("Table clear dm_task creation failed for %s", name
);
1016 if (!dm_task_set_major(dmt
, info
->major
) ||
1017 !dm_task_set_minor(dmt
, info
->minor
)) {
1018 log_error("Failed to set device number for %s table clear", name
);
1022 r
= dm_task_run(dmt
);
1024 if (!dm_task_get_info(dmt
, info
)) {
1025 log_error("_node_clear_table failed: info missing after running task for %s", name
);
1033 * Remove (incomplete) devices that the inactive table referred to but
1034 * which are not in the tree, no longer referenced and don't have a live
1037 default_uuid_prefix
= dm_uuid_prefix();
1038 default_uuid_prefix_len
= strlen(default_uuid_prefix
);
1040 for (i
= 0; i
< deps
->count
; i
++) {
1041 /* If already in tree, assume it's under control */
1042 if (_find_dm_tree_node(dnode
->dtree
, MAJOR(deps
->device
[i
]), MINOR(deps
->device
[i
])))
1045 if (!_info_by_dev(MAJOR(deps
->device
[i
]), MINOR(deps
->device
[i
]), 1,
1046 &deps_info
, dnode
->dtree
->mem
, &name
, &uuid
))
1049 /* Proceed if device is an 'orphan' - unreferenced and without a live table. */
1050 if (!deps_info
.exists
|| deps_info
.live_table
|| deps_info
.open_count
)
1053 if (strncmp(uuid
, default_uuid_prefix
, default_uuid_prefix_len
))
1056 /* Remove device. */
1057 if (!_deactivate_node(name
, deps_info
.major
, deps_info
.minor
, &dnode
->dtree
->cookie
, udev_flags
, 0)) {
1058 log_error("Failed to deactivate no-longer-used device %s (%"
1059 PRIu32
":%" PRIu32
")", name
, deps_info
.major
, deps_info
.minor
);
1060 } else if (deps_info
.suspended
)
1066 dm_task_destroy(dmt
);
1069 dm_task_destroy(deps_dmt
);
1074 struct dm_tree_node
*dm_tree_add_new_dev_with_udev_flags(struct dm_tree
*dtree
,
1082 uint16_t udev_flags
)
1084 struct dm_tree_node
*dnode
;
1085 struct dm_info info
;
1089 if (!name
|| !uuid
) {
1090 log_error("Cannot add device without name and uuid.");
1094 /* Do we need to add node to tree? */
1095 if (!(dnode
= dm_tree_find_node_by_uuid(dtree
, uuid
))) {
1096 if (!(name2
= dm_pool_strdup(dtree
->mem
, name
))) {
1097 log_error("name pool_strdup failed");
1100 if (!(uuid2
= dm_pool_strdup(dtree
->mem
, uuid
))) {
1101 log_error("uuid pool_strdup failed");
1105 memset(&info
, 0, sizeof(info
));
1107 if (!(dnode
= _create_dm_tree_node(dtree
, name2
, uuid2
, &info
,
1111 /* Attach to root node until a table is supplied */
1112 if (!_add_to_toplevel(dnode
) || !_add_to_bottomlevel(dnode
))
1115 dnode
->props
.major
= major
;
1116 dnode
->props
.minor
= minor
;
1117 dnode
->props
.new_name
= NULL
;
1118 dnode
->props
.size_changed
= 0;
1119 } else if (strcmp(name
, dnode
->name
)) {
1120 /* Do we need to rename node? */
1121 if (!(dnode
->props
.new_name
= dm_pool_strdup(dtree
->mem
, name
))) {
1122 log_error("name pool_strdup failed");
1127 dnode
->props
.read_only
= read_only
? 1 : 0;
1128 dnode
->props
.read_ahead
= DM_READ_AHEAD_AUTO
;
1129 dnode
->props
.read_ahead_flags
= 0;
1131 if (clear_inactive
&& !_node_clear_table(dnode
, udev_flags
))
1134 dnode
->context
= context
;
1135 dnode
->udev_flags
= udev_flags
;
1140 struct dm_tree_node
*dm_tree_add_new_dev(struct dm_tree
*dtree
, const char *name
,
1141 const char *uuid
, uint32_t major
, uint32_t minor
,
1142 int read_only
, int clear_inactive
, void *context
)
1144 return dm_tree_add_new_dev_with_udev_flags(dtree
, name
, uuid
, major
, minor
,
1145 read_only
, clear_inactive
, context
, 0);
1148 static struct dm_tree_node
*_add_dev(struct dm_tree
*dtree
,
1149 struct dm_tree_node
*parent
,
1150 uint32_t major
, uint32_t minor
,
1151 uint16_t udev_flags
)
1153 struct dm_task
*dmt
= NULL
;
1154 struct dm_info info
;
1155 struct dm_deps
*deps
= NULL
;
1156 const char *name
= NULL
;
1157 const char *uuid
= NULL
;
1158 struct dm_tree_node
*node
= NULL
;
1162 /* Already in tree? */
1163 if (!(node
= _find_dm_tree_node(dtree
, major
, minor
))) {
1164 if (!_deps(&dmt
, dtree
->mem
, major
, minor
, &name
, &uuid
, 0, &info
, &deps
))
1167 if (!(node
= _create_dm_tree_node(dtree
, name
, uuid
, &info
,
1173 if (!_link_tree_nodes(parent
, node
)) {
1178 /* If node was already in tree, no need to recurse. */
1182 /* Can't recurse if not a mapped device or there are no dependencies */
1183 if (!node
->info
.exists
|| !deps
|| !deps
->count
) {
1184 if (!_add_to_bottomlevel(node
)) {
1191 /* Add dependencies to tree */
1192 for (i
= 0; i
< deps
->count
; i
++)
1193 if (!_add_dev(dtree
, node
, MAJOR(deps
->device
[i
]),
1194 MINOR(deps
->device
[i
]), udev_flags
)) {
1201 dm_task_destroy(dmt
);
1206 int dm_tree_add_dev(struct dm_tree
*dtree
, uint32_t major
, uint32_t minor
)
1208 return _add_dev(dtree
, &dtree
->root
, major
, minor
, 0) ? 1 : 0;
1211 int dm_tree_add_dev_with_udev_flags(struct dm_tree
*dtree
, uint32_t major
,
1212 uint32_t minor
, uint16_t udev_flags
)
1214 return _add_dev(dtree
, &dtree
->root
, major
, minor
, udev_flags
) ? 1 : 0;
1217 static int _rename_node(const char *old_name
, const char *new_name
, uint32_t major
,
1218 uint32_t minor
, uint32_t *cookie
, uint16_t udev_flags
)
1220 struct dm_task
*dmt
;
1223 log_verbose("Renaming %s (%" PRIu32
":%" PRIu32
") to %s", old_name
, major
, minor
, new_name
);
1225 if (!(dmt
= dm_task_create(DM_DEVICE_RENAME
))) {
1226 log_error("Rename dm_task creation failed for %s", old_name
);
1230 if (!dm_task_set_name(dmt
, old_name
)) {
1231 log_error("Failed to set name for %s rename.", old_name
);
1235 if (!dm_task_set_newname(dmt
, new_name
))
1238 if (!dm_task_no_open_count(dmt
))
1239 log_error("Failed to disable open_count");
1241 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1244 r
= dm_task_run(dmt
);
1247 dm_task_destroy(dmt
);
1252 /* FIXME Merge with _suspend_node? */
1253 static int _resume_node(const char *name
, uint32_t major
, uint32_t minor
,
1254 uint32_t read_ahead
, uint32_t read_ahead_flags
,
1255 struct dm_info
*newinfo
, uint32_t *cookie
,
1256 uint16_t udev_flags
, int already_suspended
)
1258 struct dm_task
*dmt
;
1261 log_verbose("Resuming %s (%" PRIu32
":%" PRIu32
")", name
, major
, minor
);
1263 if (!(dmt
= dm_task_create(DM_DEVICE_RESUME
))) {
1264 log_debug("Suspend dm_task creation failed for %s.", name
);
1268 /* FIXME Kernel should fill in name on return instead */
1269 if (!dm_task_set_name(dmt
, name
)) {
1270 log_debug("Failed to set device name for %s resumption.", name
);
1274 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1275 log_error("Failed to set device number for %s resumption.", name
);
1279 if (!dm_task_no_open_count(dmt
))
1280 log_error("Failed to disable open_count");
1282 if (!dm_task_set_read_ahead(dmt
, read_ahead
, read_ahead_flags
))
1283 log_error("Failed to set read ahead");
1285 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1288 if (!(r
= dm_task_run(dmt
)))
1291 if (already_suspended
)
1294 if (!(r
= dm_task_get_info(dmt
, newinfo
)))
1298 dm_task_destroy(dmt
);
1303 static int _suspend_node(const char *name
, uint32_t major
, uint32_t minor
,
1304 int skip_lockfs
, int no_flush
, struct dm_info
*newinfo
)
1306 struct dm_task
*dmt
;
1309 log_verbose("Suspending %s (%" PRIu32
":%" PRIu32
")%s%s",
1311 skip_lockfs
? "" : " with filesystem sync",
1312 no_flush
? "" : " with device flush");
1314 if (!(dmt
= dm_task_create(DM_DEVICE_SUSPEND
))) {
1315 log_error("Suspend dm_task creation failed for %s", name
);
1319 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1320 log_error("Failed to set device number for %s suspension.", name
);
1321 dm_task_destroy(dmt
);
1325 if (!dm_task_no_open_count(dmt
))
1326 log_error("Failed to disable open_count");
1328 if (skip_lockfs
&& !dm_task_skip_lockfs(dmt
))
1329 log_error("Failed to set skip_lockfs flag.");
1331 if (no_flush
&& !dm_task_no_flush(dmt
))
1332 log_error("Failed to set no_flush flag.");
1334 if ((r
= dm_task_run(dmt
))) {
1336 r
= dm_task_get_info(dmt
, newinfo
);
1339 dm_task_destroy(dmt
);
1344 static int _thin_pool_status_transaction_id(struct dm_tree_node
*dnode
, uint64_t *transaction_id
)
1346 struct dm_task
*dmt
;
1348 uint64_t start
, length
;
1350 char *params
= NULL
;
1352 if (!(dmt
= dm_task_create(DM_DEVICE_STATUS
)))
1355 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
1356 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
1357 log_error("Failed to set major minor.");
1361 if (!dm_task_run(dmt
))
1364 dm_get_next_target(dmt
, NULL
, &start
, &length
, &type
, ¶ms
);
1366 if (type
&& (strcmp(type
, "thin-pool") != 0)) {
1367 log_error("Expected thin-pool target for %d:%d and got %s.",
1368 dnode
->info
.major
, dnode
->info
.minor
, type
);
1372 if (!params
|| (sscanf(params
, "%" PRIu64
, transaction_id
) != 1)) {
1373 log_error("Failed to parse transaction_id from %s.", params
);
1377 log_debug("Thin pool transaction id: %" PRIu64
" status: %s.", *transaction_id
, params
);
1381 dm_task_destroy(dmt
);
1386 static int _thin_pool_node_message(struct dm_tree_node
*dnode
, struct thin_message
*tm
)
1388 struct dm_task
*dmt
;
1389 struct dm_thin_message
*m
= &tm
->message
;
1394 case DM_THIN_MESSAGE_CREATE_SNAP
:
1395 r
= dm_snprintf(buf
, sizeof(buf
), "create_snap %u %u",
1396 m
->u
.m_create_snap
.device_id
,
1397 m
->u
.m_create_snap
.origin_id
);
1399 case DM_THIN_MESSAGE_CREATE_THIN
:
1400 r
= dm_snprintf(buf
, sizeof(buf
), "create_thin %u",
1401 m
->u
.m_create_thin
.device_id
);
1403 case DM_THIN_MESSAGE_DELETE
:
1404 r
= dm_snprintf(buf
, sizeof(buf
), "delete %u",
1405 m
->u
.m_delete
.device_id
);
1407 case DM_THIN_MESSAGE_SET_TRANSACTION_ID
:
1408 r
= dm_snprintf(buf
, sizeof(buf
),
1409 "set_transaction_id %" PRIu64
" %" PRIu64
,
1410 m
->u
.m_set_transaction_id
.current_id
,
1411 m
->u
.m_set_transaction_id
.new_id
);
1418 log_error("Failed to prepare message.");
1424 if (!(dmt
= dm_task_create(DM_DEVICE_TARGET_MSG
)))
1427 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
1428 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
1429 log_error("Failed to set message major minor.");
1433 if (!dm_task_set_message(dmt
, buf
))
1436 /* Internal functionality of dm_task */
1437 dmt
->expected_errno
= tm
->expected_errno
;
1439 if (!dm_task_run(dmt
))
1444 dm_task_destroy(dmt
);
1449 static int _node_send_messages(struct dm_tree_node
*dnode
,
1450 const char *uuid_prefix
,
1451 size_t uuid_prefix_len
)
1453 struct load_segment
*seg
;
1454 struct thin_message
*tmsg
;
1458 if (!dnode
->info
.exists
|| (dm_list_size(&dnode
->props
.segs
) != 1))
1461 seg
= dm_list_item(dm_list_last(&dnode
->props
.segs
), struct load_segment
);
1462 if (seg
->type
!= SEG_THIN_POOL
)
1465 if (!(uuid
= dm_tree_node_get_uuid(dnode
)))
1468 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
)) {
1469 log_debug("UUID \"%s\" does not match.", uuid
);
1473 if (!_thin_pool_status_transaction_id(dnode
, &trans_id
))
1476 if (trans_id
== seg
->transaction_id
)
1477 return 1; /* In sync - skip messages */
1479 if (trans_id
!= (seg
->transaction_id
- 1)) {
1480 log_error("Thin pool transaction_id=%" PRIu64
", while expected: %" PRIu64
".",
1481 trans_id
, seg
->transaction_id
- 1);
1482 goto bad
; /* Nothing to send */
1485 dm_list_iterate_items(tmsg
, &seg
->thin_messages
)
1486 if (!(_thin_pool_node_message(dnode
, tmsg
)))
1491 /* Try to deactivate */
1492 if (!(dm_tree_deactivate_children(dnode
, uuid_prefix
, uuid_prefix_len
)))
1493 log_error("Failed to deactivate %s", dnode
->name
);
1499 * FIXME Don't attempt to deactivate known internal dependencies.
1501 static int _dm_tree_deactivate_children(struct dm_tree_node
*dnode
,
1502 const char *uuid_prefix
,
1503 size_t uuid_prefix_len
,
1507 void *handle
= NULL
;
1508 struct dm_tree_node
*child
= dnode
;
1509 struct dm_info info
;
1510 const struct dm_info
*dinfo
;
1514 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1515 if (!(dinfo
= dm_tree_node_get_info(child
))) {
1520 if (!(name
= dm_tree_node_get_name(child
))) {
1525 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1530 /* Ignore if it doesn't belong to this VG */
1531 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1534 /* Refresh open_count */
1535 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 1, &info
, NULL
, NULL
, NULL
) ||
1539 if (info
.open_count
) {
1540 /* Skip internal non-toplevel opened nodes */
1544 /* When retry is not allowed, error */
1545 if (!child
->dtree
->retry_remove
) {
1546 log_error("Unable to deactivate open %s (%" PRIu32
1547 ":%" PRIu32
")", name
, info
.major
, info
.minor
);
1552 /* Check toplevel node for holders/mounted fs */
1553 if (!_check_device_not_in_use(name
, &info
)) {
1558 /* Go on with retry */
1561 /* Also checking open_count in parent nodes of presuspend_node */
1562 if ((child
->presuspend_node
&&
1563 !_node_has_closed_parents(child
->presuspend_node
,
1564 uuid_prefix
, uuid_prefix_len
))) {
1565 /* Only report error from (likely non-internal) dependency at top level */
1567 log_error("Unable to deactivate open %s (%" PRIu32
1568 ":%" PRIu32
")", name
, info
.major
,
1575 /* Suspend child node first if requested */
1576 if (child
->presuspend_node
&&
1577 !dm_tree_suspend_children(child
, uuid_prefix
, uuid_prefix_len
))
1580 if (!_deactivate_node(name
, info
.major
, info
.minor
,
1581 &child
->dtree
->cookie
, child
->udev_flags
,
1582 (level
== 0) ? child
->dtree
->retry_remove
: 0)) {
1583 log_error("Unable to deactivate %s (%" PRIu32
1584 ":%" PRIu32
")", name
, info
.major
,
1588 } else if (info
.suspended
)
1591 if (child
->callback
&&
1592 !child
->callback(child
, DM_NODE_CALLBACK_DEACTIVATED
,
1593 child
->callback_data
))
1595 // FIXME: We need to let lvremove pass,
1596 // so for now deactivation ignores check result
1597 //r = 0; // FIXME: _node_clear_table() without callback ?
1599 if (dm_tree_node_num_children(child
, 0) &&
1600 !_dm_tree_deactivate_children(child
, uuid_prefix
, uuid_prefix_len
, level
+ 1))
1607 int dm_tree_deactivate_children(struct dm_tree_node
*dnode
,
1608 const char *uuid_prefix
,
1609 size_t uuid_prefix_len
)
1611 return _dm_tree_deactivate_children(dnode
, uuid_prefix
, uuid_prefix_len
, 0);
1614 int dm_tree_suspend_children(struct dm_tree_node
*dnode
,
1615 const char *uuid_prefix
,
1616 size_t uuid_prefix_len
)
1619 void *handle
= NULL
;
1620 struct dm_tree_node
*child
= dnode
;
1621 struct dm_info info
, newinfo
;
1622 const struct dm_info
*dinfo
;
1626 /* Suspend nodes at this level of the tree */
1627 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1628 if (!(dinfo
= dm_tree_node_get_info(child
))) {
1633 if (!(name
= dm_tree_node_get_name(child
))) {
1638 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1643 /* Ignore if it doesn't belong to this VG */
1644 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1647 /* Ensure immediate parents are already suspended */
1648 if (!_children_suspended(child
, 1, uuid_prefix
, uuid_prefix_len
))
1651 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 0, &info
, NULL
, NULL
, NULL
) ||
1652 !info
.exists
|| info
.suspended
)
1655 if (!_suspend_node(name
, info
.major
, info
.minor
,
1656 child
->dtree
->skip_lockfs
,
1657 child
->dtree
->no_flush
, &newinfo
)) {
1658 log_error("Unable to suspend %s (%" PRIu32
1659 ":%" PRIu32
")", name
, info
.major
,
1665 /* Update cached info */
1666 child
->info
= newinfo
;
1669 /* Then suspend any child nodes */
1672 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1673 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1678 /* Ignore if it doesn't belong to this VG */
1679 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1682 if (dm_tree_node_num_children(child
, 0))
1683 if (!dm_tree_suspend_children(child
, uuid_prefix
, uuid_prefix_len
))
1690 int dm_tree_activate_children(struct dm_tree_node
*dnode
,
1691 const char *uuid_prefix
,
1692 size_t uuid_prefix_len
)
1695 void *handle
= NULL
;
1696 struct dm_tree_node
*child
= dnode
;
1697 struct dm_info newinfo
;
1702 /* Activate children first */
1703 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1704 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1709 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1712 if (dm_tree_node_num_children(child
, 0))
1713 if (!dm_tree_activate_children(child
, uuid_prefix
, uuid_prefix_len
))
1719 for (priority
= 0; priority
< 3; priority
++) {
1720 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1721 if (priority
!= child
->activation_priority
)
1724 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1729 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1732 if (!(name
= dm_tree_node_get_name(child
))) {
1738 if (child
->props
.new_name
) {
1739 if (!_rename_node(name
, child
->props
.new_name
, child
->info
.major
,
1740 child
->info
.minor
, &child
->dtree
->cookie
,
1741 child
->udev_flags
)) {
1742 log_error("Failed to rename %s (%" PRIu32
1743 ":%" PRIu32
") to %s", name
, child
->info
.major
,
1744 child
->info
.minor
, child
->props
.new_name
);
1747 child
->name
= child
->props
.new_name
;
1748 child
->props
.new_name
= NULL
;
1751 if (!child
->info
.inactive_table
&& !child
->info
.suspended
)
1754 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
1755 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
1756 &newinfo
, &child
->dtree
->cookie
, child
->udev_flags
, child
->info
.suspended
)) {
1757 log_error("Unable to resume %s (%" PRIu32
1758 ":%" PRIu32
")", child
->name
, child
->info
.major
,
1764 /* Update cached info */
1765 child
->info
= newinfo
;
1770 * FIXME: Implement delayed error reporting
1771 * activation should be stopped only in the case,
1772 * the submission of transation_id message fails,
1773 * resume should continue further, just whole command
1774 * has to report failure.
1776 if (r
&& dnode
->props
.send_messages
&&
1777 !(r
= _node_send_messages(dnode
, uuid_prefix
, uuid_prefix_len
)))
1785 static int _create_node(struct dm_tree_node
*dnode
)
1788 struct dm_task
*dmt
;
1790 log_verbose("Creating %s", dnode
->name
);
1792 if (!(dmt
= dm_task_create(DM_DEVICE_CREATE
))) {
1793 log_error("Create dm_task creation failed for %s", dnode
->name
);
1797 if (!dm_task_set_name(dmt
, dnode
->name
)) {
1798 log_error("Failed to set device name for %s", dnode
->name
);
1802 if (!dm_task_set_uuid(dmt
, dnode
->uuid
)) {
1803 log_error("Failed to set uuid for %s", dnode
->name
);
1807 if (dnode
->props
.major
&&
1808 (!dm_task_set_major(dmt
, dnode
->props
.major
) ||
1809 !dm_task_set_minor(dmt
, dnode
->props
.minor
))) {
1810 log_error("Failed to set device number for %s creation.", dnode
->name
);
1814 if (dnode
->props
.read_only
&& !dm_task_set_ro(dmt
)) {
1815 log_error("Failed to set read only flag for %s", dnode
->name
);
1819 if (!dm_task_no_open_count(dmt
))
1820 log_error("Failed to disable open_count");
1822 if ((r
= dm_task_run(dmt
)))
1823 r
= dm_task_get_info(dmt
, &dnode
->info
);
1826 dm_task_destroy(dmt
);
1832 static int _build_dev_string(char *devbuf
, size_t bufsize
, struct dm_tree_node
*node
)
1834 if (!dm_format_dev(devbuf
, bufsize
, node
->info
.major
, node
->info
.minor
)) {
1835 log_error("Failed to format %s device number for %s as dm "
1837 node
->name
, node
->uuid
, node
->info
.major
, node
->info
.minor
);
1844 /* simplify string emiting code */
1845 #define EMIT_PARAMS(p, str...)\
1848 if ((w = dm_snprintf(params + p, paramsize - (size_t) p, str)) < 0) {\
1849 stack; /* Out of space */\
1858 * Returns: 1 on success, 0 on failure
1860 static int _emit_areas_line(struct dm_task
*dmt
__attribute__((unused
)),
1861 struct load_segment
*seg
, char *params
,
1862 size_t paramsize
, int *pos
)
1864 struct seg_area
*area
;
1865 char devbuf
[DM_FORMAT_DEV_BUFSIZE
];
1866 unsigned first_time
= 1;
1867 const char *logtype
, *synctype
;
1868 unsigned log_parm_count
;
1870 dm_list_iterate_items(area
, &seg
->areas
) {
1871 switch (seg
->type
) {
1872 case SEG_REPLICATOR_DEV
:
1873 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
1876 EMIT_PARAMS(*pos
, " %d 1 %s", area
->rsite_index
, devbuf
);
1878 EMIT_PARAMS(*pos
, " nolog 0");
1880 /* Remote devices */
1881 log_parm_count
= (area
->flags
&
1882 (DM_NOSYNC
| DM_FORCESYNC
)) ? 2 : 1;
1885 devbuf
[0] = 0; /* Only core log parameters */
1888 devbuf
[0] = ' '; /* Extra space before device name */
1889 if (!_build_dev_string(devbuf
+ 1,
1894 log_parm_count
++; /* Extra sync log device name parameter */
1897 EMIT_PARAMS(*pos
, " %s %u%s %" PRIu64
, logtype
,
1898 log_parm_count
, devbuf
, area
->region_size
);
1900 synctype
= (area
->flags
& DM_NOSYNC
) ?
1901 " nosync" : (area
->flags
& DM_FORCESYNC
) ?
1905 EMIT_PARAMS(*pos
, "%s", synctype
);
1917 if (!area
->dev_node
) {
1918 EMIT_PARAMS(*pos
, " -");
1921 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
1924 EMIT_PARAMS(*pos
, " %s", devbuf
);
1927 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
1930 EMIT_PARAMS(*pos
, "%s%s %" PRIu64
, first_time
? "" : " ",
1931 devbuf
, area
->offset
);
1940 static int _replicator_emit_segment_line(const struct load_segment
*seg
, char *params
,
1941 size_t paramsize
, int *pos
)
1943 const struct load_segment
*rlog_seg
;
1944 struct replicator_site
*rsite
;
1945 char rlogbuf
[DM_FORMAT_DEV_BUFSIZE
];
1946 unsigned parm_count
;
1948 if (!seg
->log
|| !_build_dev_string(rlogbuf
, sizeof(rlogbuf
), seg
->log
))
1951 rlog_seg
= dm_list_item(dm_list_last(&seg
->log
->props
.segs
),
1952 struct load_segment
);
1954 EMIT_PARAMS(*pos
, "%s 4 %s 0 auto %" PRIu64
,
1955 seg
->rlog_type
, rlogbuf
, rlog_seg
->size
);
1957 dm_list_iterate_items(rsite
, &seg
->rsites
) {
1958 parm_count
= (rsite
->fall_behind_data
1959 || rsite
->fall_behind_ios
1960 || rsite
->async_timeout
) ? 4 : 2;
1962 EMIT_PARAMS(*pos
, " blockdev %u %u %s", parm_count
, rsite
->rsite_index
,
1963 (rsite
->mode
== DM_REPLICATOR_SYNC
) ? "synchronous" : "asynchronous");
1965 if (rsite
->fall_behind_data
)
1966 EMIT_PARAMS(*pos
, " data %" PRIu64
, rsite
->fall_behind_data
);
1967 else if (rsite
->fall_behind_ios
)
1968 EMIT_PARAMS(*pos
, " ios %" PRIu32
, rsite
->fall_behind_ios
);
1969 else if (rsite
->async_timeout
)
1970 EMIT_PARAMS(*pos
, " timeout %" PRIu32
, rsite
->async_timeout
);
1977 * Returns: 1 on success, 0 on failure
1979 static int _mirror_emit_segment_line(struct dm_task
*dmt
, struct load_segment
*seg
,
1980 char *params
, size_t paramsize
)
1982 int block_on_error
= 0;
1983 int handle_errors
= 0;
1984 int dm_log_userspace
= 0;
1986 unsigned log_parm_count
;
1988 char logbuf
[DM_FORMAT_DEV_BUFSIZE
];
1989 const char *logtype
;
1990 unsigned kmaj
= 0, kmin
= 0, krel
= 0;
1992 if (uname(&uts
) == -1) {
1993 log_error("Cannot read kernel release version.");
1997 /* Kernels with a major number of 2 always had 3 parts. */
1998 parts
= sscanf(uts
.release
, "%u.%u.%u", &kmaj
, &kmin
, &krel
);
1999 if (parts
< 1 || (kmaj
< 3 && parts
< 3)) {
2000 log_error("Wrong kernel release version %s.", uts
.release
);
2004 if ((seg
->flags
& DM_BLOCK_ON_ERROR
)) {
2006 * Originally, block_on_error was an argument to the log
2007 * portion of the mirror CTR table. It was renamed to
2008 * "handle_errors" and now resides in the 'features'
2009 * section of the mirror CTR table (i.e. at the end).
2011 * We can identify whether to use "block_on_error" or
2012 * "handle_errors" by the dm-mirror module's version
2013 * number (>= 1.12) or by the kernel version (>= 2.6.22).
2015 if (KERNEL_VERSION(kmaj
, kmin
, krel
) >= KERNEL_VERSION(2, 6, 22))
2021 if (seg
->clustered
) {
2022 /* Cluster mirrors require a UUID */
2027 * Cluster mirrors used to have their own log
2028 * types. Now they are accessed through the
2029 * userspace log type.
2031 * The dm-log-userspace module was added to the
2034 if (KERNEL_VERSION(kmaj
, kmin
, krel
) >= KERNEL_VERSION(2, 6, 31))
2035 dm_log_userspace
= 1;
2041 /* [no]sync, block_on_error etc. */
2042 log_parm_count
+= hweight32(seg
->flags
);
2044 /* "handle_errors" is a feature arg now */
2048 /* DM_CORELOG does not count in the param list */
2049 if (seg
->flags
& DM_CORELOG
)
2052 if (seg
->clustered
) {
2053 log_parm_count
++; /* For UUID */
2055 if (!dm_log_userspace
)
2056 EMIT_PARAMS(pos
, "clustered-");
2058 /* For clustered-* type field inserted later */
2067 if (!_build_dev_string(logbuf
, sizeof(logbuf
), seg
->log
))
2071 if (dm_log_userspace
)
2072 EMIT_PARAMS(pos
, "userspace %u %s clustered-%s",
2073 log_parm_count
, seg
->uuid
, logtype
);
2075 EMIT_PARAMS(pos
, "%s %u", logtype
, log_parm_count
);
2078 EMIT_PARAMS(pos
, " %s", logbuf
);
2080 EMIT_PARAMS(pos
, " %u", seg
->region_size
);
2082 if (seg
->clustered
&& !dm_log_userspace
)
2083 EMIT_PARAMS(pos
, " %s", seg
->uuid
);
2085 if ((seg
->flags
& DM_NOSYNC
))
2086 EMIT_PARAMS(pos
, " nosync");
2087 else if ((seg
->flags
& DM_FORCESYNC
))
2088 EMIT_PARAMS(pos
, " sync");
2091 EMIT_PARAMS(pos
, " block_on_error");
2093 EMIT_PARAMS(pos
, " %u ", seg
->mirror_area_count
);
2095 if (_emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
) <= 0)
2099 EMIT_PARAMS(pos
, " 1 handle_errors");
2104 static int _raid_emit_segment_line(struct dm_task
*dmt
, uint32_t major
,
2105 uint32_t minor
, struct load_segment
*seg
,
2106 uint64_t *seg_start
, char *params
,
2110 int param_count
= 1; /* mandatory 'chunk size'/'stripe size' arg */
2113 if ((seg
->flags
& DM_NOSYNC
) || (seg
->flags
& DM_FORCESYNC
))
2116 if (seg
->region_size
)
2119 /* rebuilds is 64-bit */
2120 param_count
+= 2 * hweight32(seg
->rebuilds
& 0xFFFFFFFF);
2121 param_count
+= 2 * hweight32(seg
->rebuilds
>> 32);
2123 if ((seg
->type
== SEG_RAID1
) && seg
->stripe_size
)
2124 log_error("WARNING: Ignoring RAID1 stripe size");
2126 EMIT_PARAMS(pos
, "%s %d %u", dm_segtypes
[seg
->type
].target
,
2127 param_count
, seg
->stripe_size
);
2129 if (seg
->flags
& DM_NOSYNC
)
2130 EMIT_PARAMS(pos
, " nosync");
2131 else if (seg
->flags
& DM_FORCESYNC
)
2132 EMIT_PARAMS(pos
, " sync");
2134 if (seg
->region_size
)
2135 EMIT_PARAMS(pos
, " region_size %u", seg
->region_size
);
2137 for (i
= 0; i
< (seg
->area_count
/ 2); i
++)
2138 if (seg
->rebuilds
& (1 << i
))
2139 EMIT_PARAMS(pos
, " rebuild %u", i
);
2141 /* Print number of metadata/data device pairs */
2142 EMIT_PARAMS(pos
, " %u", seg
->area_count
/2);
2144 if (_emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
) <= 0)
2150 static int _emit_segment_line(struct dm_task
*dmt
, uint32_t major
,
2151 uint32_t minor
, struct load_segment
*seg
,
2152 uint64_t *seg_start
, char *params
,
2157 int target_type_is_raid
= 0;
2158 char originbuf
[DM_FORMAT_DEV_BUFSIZE
], cowbuf
[DM_FORMAT_DEV_BUFSIZE
];
2159 char pool
[DM_FORMAT_DEV_BUFSIZE
], metadata
[DM_FORMAT_DEV_BUFSIZE
];
2167 /* Mirrors are pretty complicated - now in separate function */
2168 r
= _mirror_emit_segment_line(dmt
, seg
, params
, paramsize
);
2172 case SEG_REPLICATOR
:
2173 if ((r
= _replicator_emit_segment_line(seg
, params
, paramsize
,
2179 case SEG_REPLICATOR_DEV
:
2180 if (!seg
->replicator
|| !_build_dev_string(originbuf
,
2185 EMIT_PARAMS(pos
, "%s %" PRIu64
, originbuf
, seg
->rdevice_index
);
2188 case SEG_SNAPSHOT_MERGE
:
2189 if (!_build_dev_string(originbuf
, sizeof(originbuf
), seg
->origin
))
2191 if (!_build_dev_string(cowbuf
, sizeof(cowbuf
), seg
->cow
))
2193 EMIT_PARAMS(pos
, "%s %s %c %d", originbuf
, cowbuf
,
2194 seg
->persistent
? 'P' : 'N', seg
->chunk_size
);
2196 case SEG_SNAPSHOT_ORIGIN
:
2197 if (!_build_dev_string(originbuf
, sizeof(originbuf
), seg
->origin
))
2199 EMIT_PARAMS(pos
, "%s", originbuf
);
2202 EMIT_PARAMS(pos
, "%u %u ", seg
->area_count
, seg
->stripe_size
);
2205 EMIT_PARAMS(pos
, "%s%s%s%s%s %s %" PRIu64
" ", seg
->cipher
,
2206 seg
->chainmode
? "-" : "", seg
->chainmode
?: "",
2207 seg
->iv
? "-" : "", seg
->iv
?: "", seg
->key
,
2208 seg
->iv_offset
!= DM_CRYPT_IV_DEFAULT
?
2209 seg
->iv_offset
: *seg_start
);
2220 target_type_is_raid
= 1;
2221 r
= _raid_emit_segment_line(dmt
, major
, minor
, seg
, seg_start
,
2228 if (!_build_dev_string(metadata
, sizeof(metadata
), seg
->metadata
))
2230 if (!_build_dev_string(pool
, sizeof(pool
), seg
->pool
))
2232 EMIT_PARAMS(pos
, "%s %s %d %" PRIu64
" %s", metadata
, pool
,
2233 seg
->data_block_size
, seg
->low_water_mark
,
2234 seg
->skip_block_zeroing
? "1 skip_block_zeroing" : "0");
2237 if (!_build_dev_string(pool
, sizeof(pool
), seg
->pool
))
2239 EMIT_PARAMS(pos
, "%s %d", pool
, seg
->device_id
);
2245 case SEG_REPLICATOR
:
2247 case SEG_SNAPSHOT_ORIGIN
:
2248 case SEG_SNAPSHOT_MERGE
:
2255 case SEG_REPLICATOR_DEV
:
2257 if ((r
= _emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
)) <= 0) {
2262 log_error("No parameters supplied for %s target "
2263 "%u:%u.", dm_segtypes
[seg
->type
].target
,
2270 log_debug("Adding target to (%" PRIu32
":%" PRIu32
"): %" PRIu64
2271 " %" PRIu64
" %s %s", major
, minor
,
2272 *seg_start
, seg
->size
, target_type_is_raid
? "raid" :
2273 dm_segtypes
[seg
->type
].target
, params
);
2275 if (!dm_task_add_target(dmt
, *seg_start
, seg
->size
,
2276 target_type_is_raid
? "raid" :
2277 dm_segtypes
[seg
->type
].target
, params
))
2280 *seg_start
+= seg
->size
;
2287 static int _emit_segment(struct dm_task
*dmt
, uint32_t major
, uint32_t minor
,
2288 struct load_segment
*seg
, uint64_t *seg_start
)
2291 size_t paramsize
= 4096;
2295 if (!(params
= dm_malloc(paramsize
))) {
2296 log_error("Insufficient space for target parameters.");
2301 ret
= _emit_segment_line(dmt
, major
, minor
, seg
, seg_start
,
2311 log_debug("Insufficient space in params[%" PRIsize_t
2312 "] for target parameters.", paramsize
);
2315 } while (paramsize
< MAX_TARGET_PARAMSIZE
);
2317 log_error("Target parameter size too big. Aborting.");
2321 static int _load_node(struct dm_tree_node
*dnode
)
2324 struct dm_task
*dmt
;
2325 struct load_segment
*seg
;
2326 uint64_t seg_start
= 0, existing_table_size
;
2328 log_verbose("Loading %s table (%" PRIu32
":%" PRIu32
")", dnode
->name
,
2329 dnode
->info
.major
, dnode
->info
.minor
);
2331 if (!(dmt
= dm_task_create(DM_DEVICE_RELOAD
))) {
2332 log_error("Reload dm_task creation failed for %s", dnode
->name
);
2336 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
2337 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
2338 log_error("Failed to set device number for %s reload.", dnode
->name
);
2342 if (dnode
->props
.read_only
&& !dm_task_set_ro(dmt
)) {
2343 log_error("Failed to set read only flag for %s", dnode
->name
);
2347 if (!dm_task_no_open_count(dmt
))
2348 log_error("Failed to disable open_count");
2350 dm_list_iterate_items(seg
, &dnode
->props
.segs
)
2351 if (!_emit_segment(dmt
, dnode
->info
.major
, dnode
->info
.minor
,
2355 if (!dm_task_suppress_identical_reload(dmt
))
2356 log_error("Failed to suppress reload of identical tables.");
2358 if ((r
= dm_task_run(dmt
))) {
2359 r
= dm_task_get_info(dmt
, &dnode
->info
);
2360 if (r
&& !dnode
->info
.inactive_table
)
2361 log_verbose("Suppressed %s (%" PRIu32
":%" PRIu32
2362 ") identical table reload.",
2364 dnode
->info
.major
, dnode
->info
.minor
);
2366 existing_table_size
= dm_task_get_existing_table_size(dmt
);
2367 if ((dnode
->props
.size_changed
=
2368 (existing_table_size
== seg_start
) ? 0 : 1)) {
2370 * Kernel usually skips size validation on zero-length devices
2371 * now so no need to preload them.
2373 /* FIXME In which kernel version did this begin? */
2374 if (!existing_table_size
&& dnode
->props
.delay_resume_if_new
)
2375 dnode
->props
.size_changed
= 0;
2377 log_debug("Table size changed from %" PRIu64
" to %"
2378 PRIu64
" for %s (%" PRIu32
":%" PRIu32
").%s",
2379 existing_table_size
, seg_start
, dnode
->name
,
2380 dnode
->info
.major
, dnode
->info
.minor
,
2381 dnode
->props
.size_changed
? "" : " (Ignoring.)");
2385 dnode
->props
.segment_count
= 0;
2388 dm_task_destroy(dmt
);
2393 int dm_tree_preload_children(struct dm_tree_node
*dnode
,
2394 const char *uuid_prefix
,
2395 size_t uuid_prefix_len
)
2398 void *handle
= NULL
;
2399 struct dm_tree_node
*child
;
2400 struct dm_info newinfo
;
2401 int update_devs_flag
= 0;
2403 /* Preload children first */
2404 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
2405 /* Skip existing non-device-mapper devices */
2406 if (!child
->info
.exists
&& child
->info
.major
)
2409 /* Ignore if it doesn't belong to this VG */
2410 if (child
->info
.exists
&&
2411 !_uuid_prefix_matches(child
->uuid
, uuid_prefix
, uuid_prefix_len
))
2414 if (dm_tree_node_num_children(child
, 0))
2415 if (!dm_tree_preload_children(child
, uuid_prefix
, uuid_prefix_len
))
2418 /* FIXME Cope if name exists with no uuid? */
2419 if (!child
->info
.exists
&& !_create_node(child
))
2422 if (!child
->info
.inactive_table
&&
2423 child
->props
.segment_count
&&
2427 /* Propagate device size change change */
2428 if (child
->props
.size_changed
)
2429 dnode
->props
.size_changed
= 1;
2431 /* Resume device immediately if it has parents and its size changed */
2432 if (!dm_tree_node_num_children(child
, 1) || !child
->props
.size_changed
)
2435 if (!child
->info
.inactive_table
&& !child
->info
.suspended
)
2438 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
2439 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
2440 &newinfo
, &child
->dtree
->cookie
, child
->udev_flags
,
2441 child
->info
.suspended
)) {
2442 log_error("Unable to resume %s (%" PRIu32
2443 ":%" PRIu32
")", child
->name
, child
->info
.major
,
2449 /* Update cached info */
2450 child
->info
= newinfo
;
2452 * Prepare for immediate synchronization with udev and flush all stacked
2453 * dev node operations if requested by immediate_dev_node property. But
2454 * finish processing current level in the tree first.
2456 if (child
->props
.immediate_dev_node
)
2457 update_devs_flag
= 1;
2460 if (update_devs_flag
||
2461 (!dnode
->info
.exists
&& dnode
->callback
)) {
2462 if (!dm_udev_wait(dm_tree_get_cookie(dnode
)))
2464 dm_tree_set_cookie(dnode
, 0);
2466 if (!dnode
->info
.exists
&& dnode
->callback
&&
2467 !dnode
->callback(child
, DM_NODE_CALLBACK_PRELOADED
,
2468 dnode
->callback_data
))
2476 * Returns 1 if unsure.
2478 int dm_tree_children_use_uuid(struct dm_tree_node
*dnode
,
2479 const char *uuid_prefix
,
2480 size_t uuid_prefix_len
)
2482 void *handle
= NULL
;
2483 struct dm_tree_node
*child
= dnode
;
2486 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
2487 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
2488 log_error("Failed to get uuid for dtree node.");
2492 if (_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
2495 if (dm_tree_node_num_children(child
, 0))
2496 dm_tree_children_use_uuid(child
, uuid_prefix
, uuid_prefix_len
);
2505 static struct load_segment
*_add_segment(struct dm_tree_node
*dnode
, unsigned type
, uint64_t size
)
2507 struct load_segment
*seg
;
2509 if (!(seg
= dm_pool_zalloc(dnode
->dtree
->mem
, sizeof(*seg
)))) {
2510 log_error("dtree node segment allocation failed");
2516 seg
->area_count
= 0;
2517 dm_list_init(&seg
->areas
);
2518 seg
->stripe_size
= 0;
2519 seg
->persistent
= 0;
2520 seg
->chunk_size
= 0;
2525 dm_list_add(&dnode
->props
.segs
, &seg
->list
);
2526 dnode
->props
.segment_count
++;
2531 int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node
*dnode
,
2533 const char *origin_uuid
)
2535 struct load_segment
*seg
;
2536 struct dm_tree_node
*origin_node
;
2538 if (!(seg
= _add_segment(dnode
, SEG_SNAPSHOT_ORIGIN
, size
)))
2541 if (!(origin_node
= dm_tree_find_node_by_uuid(dnode
->dtree
, origin_uuid
))) {
2542 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid
);
2546 seg
->origin
= origin_node
;
2547 if (!_link_tree_nodes(dnode
, origin_node
))
2550 /* Resume snapshot origins after new snapshots */
2551 dnode
->activation_priority
= 1;
2554 * Don't resume the origin immediately in case it is a non-trivial
2555 * target that must not be active more than once concurrently!
2557 origin_node
->props
.delay_resume_if_new
= 1;
2562 static int _add_snapshot_target(struct dm_tree_node
*node
,
2564 const char *origin_uuid
,
2565 const char *cow_uuid
,
2566 const char *merge_uuid
,
2568 uint32_t chunk_size
)
2570 struct load_segment
*seg
;
2571 struct dm_tree_node
*origin_node
, *cow_node
, *merge_node
;
2574 seg_type
= !merge_uuid
? SEG_SNAPSHOT
: SEG_SNAPSHOT_MERGE
;
2576 if (!(seg
= _add_segment(node
, seg_type
, size
)))
2579 if (!(origin_node
= dm_tree_find_node_by_uuid(node
->dtree
, origin_uuid
))) {
2580 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid
);
2584 seg
->origin
= origin_node
;
2585 if (!_link_tree_nodes(node
, origin_node
))
2588 if (!(cow_node
= dm_tree_find_node_by_uuid(node
->dtree
, cow_uuid
))) {
2589 log_error("Couldn't find snapshot COW device uuid %s.", cow_uuid
);
2593 seg
->cow
= cow_node
;
2594 if (!_link_tree_nodes(node
, cow_node
))
2597 seg
->persistent
= persistent
? 1 : 0;
2598 seg
->chunk_size
= chunk_size
;
2601 if (!(merge_node
= dm_tree_find_node_by_uuid(node
->dtree
, merge_uuid
))) {
2602 /* not a pure error, merging snapshot may have been deactivated */
2603 log_verbose("Couldn't find merging snapshot uuid %s.", merge_uuid
);
2605 seg
->merge
= merge_node
;
2606 /* must not link merging snapshot, would undermine activation_priority below */
2609 /* Resume snapshot-merge (acting origin) after other snapshots */
2610 node
->activation_priority
= 1;
2612 /* Resume merging snapshot after snapshot-merge */
2613 seg
->merge
->activation_priority
= 2;
2621 int dm_tree_node_add_snapshot_target(struct dm_tree_node
*node
,
2623 const char *origin_uuid
,
2624 const char *cow_uuid
,
2626 uint32_t chunk_size
)
2628 return _add_snapshot_target(node
, size
, origin_uuid
, cow_uuid
,
2629 NULL
, persistent
, chunk_size
);
2632 int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node
*node
,
2634 const char *origin_uuid
,
2635 const char *cow_uuid
,
2636 const char *merge_uuid
,
2637 uint32_t chunk_size
)
2639 return _add_snapshot_target(node
, size
, origin_uuid
, cow_uuid
,
2640 merge_uuid
, 1, chunk_size
);
2643 int dm_tree_node_add_error_target(struct dm_tree_node
*node
,
2646 if (!_add_segment(node
, SEG_ERROR
, size
))
2652 int dm_tree_node_add_zero_target(struct dm_tree_node
*node
,
2655 if (!_add_segment(node
, SEG_ZERO
, size
))
2661 int dm_tree_node_add_linear_target(struct dm_tree_node
*node
,
2664 if (!_add_segment(node
, SEG_LINEAR
, size
))
2670 int dm_tree_node_add_striped_target(struct dm_tree_node
*node
,
2672 uint32_t stripe_size
)
2674 struct load_segment
*seg
;
2676 if (!(seg
= _add_segment(node
, SEG_STRIPED
, size
)))
2679 seg
->stripe_size
= stripe_size
;
2684 int dm_tree_node_add_crypt_target(struct dm_tree_node
*node
,
2687 const char *chainmode
,
2692 struct load_segment
*seg
;
2694 if (!(seg
= _add_segment(node
, SEG_CRYPT
, size
)))
2697 seg
->cipher
= cipher
;
2698 seg
->chainmode
= chainmode
;
2700 seg
->iv_offset
= iv_offset
;
2706 int dm_tree_node_add_mirror_target_log(struct dm_tree_node
*node
,
2707 uint32_t region_size
,
2709 const char *log_uuid
,
2710 unsigned area_count
,
2713 struct dm_tree_node
*log_node
= NULL
;
2714 struct load_segment
*seg
;
2716 if (!node
->props
.segment_count
) {
2717 log_error(INTERNAL_ERROR
"Attempt to add target area to missing segment.");
2721 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2724 if (!(seg
->uuid
= dm_pool_strdup(node
->dtree
->mem
, log_uuid
))) {
2725 log_error("log uuid pool_strdup failed");
2728 if ((flags
& DM_CORELOG
))
2729 /* For pvmove: immediate resume (for size validation) isn't needed. */
2730 node
->props
.delay_resume_if_new
= 1;
2732 if (!(log_node
= dm_tree_find_node_by_uuid(node
->dtree
, log_uuid
))) {
2733 log_error("Couldn't find mirror log uuid %s.", log_uuid
);
2738 log_node
->props
.immediate_dev_node
= 1;
2740 /* The kernel validates the size of disk logs. */
2741 /* FIXME Propagate to any devices below */
2742 log_node
->props
.delay_resume_if_new
= 0;
2744 if (!_link_tree_nodes(node
, log_node
))
2749 seg
->log
= log_node
;
2750 seg
->region_size
= region_size
;
2751 seg
->clustered
= clustered
;
2752 seg
->mirror_area_count
= area_count
;
2758 int dm_tree_node_add_mirror_target(struct dm_tree_node
*node
,
2761 if (!_add_segment(node
, SEG_MIRRORED
, size
))
2767 int dm_tree_node_add_raid_target(struct dm_tree_node
*node
,
2769 const char *raid_type
,
2770 uint32_t region_size
,
2771 uint32_t stripe_size
,
2776 struct load_segment
*seg
= NULL
;
2778 for (i
= 0; dm_segtypes
[i
].target
&& !seg
; i
++)
2779 if (!strcmp(raid_type
, dm_segtypes
[i
].target
))
2780 if (!(seg
= _add_segment(node
,
2781 dm_segtypes
[i
].type
, size
)))
2787 seg
->region_size
= region_size
;
2788 seg
->stripe_size
= stripe_size
;
2789 seg
->area_count
= 0;
2790 seg
->rebuilds
= rebuilds
;
2796 int dm_tree_node_add_replicator_target(struct dm_tree_node
*node
,
2798 const char *rlog_uuid
,
2799 const char *rlog_type
,
2800 unsigned rsite_index
,
2801 dm_replicator_mode_t mode
,
2802 uint32_t async_timeout
,
2803 uint64_t fall_behind_data
,
2804 uint32_t fall_behind_ios
)
2806 struct load_segment
*rseg
;
2807 struct replicator_site
*rsite
;
2809 /* Local site0 - adds replicator segment and links rlog device */
2810 if (rsite_index
== REPLICATOR_LOCAL_SITE
) {
2811 if (node
->props
.segment_count
) {
2812 log_error(INTERNAL_ERROR
"Attempt to add replicator segment to already used node.");
2816 if (!(rseg
= _add_segment(node
, SEG_REPLICATOR
, size
)))
2819 if (!(rseg
->log
= dm_tree_find_node_by_uuid(node
->dtree
, rlog_uuid
))) {
2820 log_error("Missing replicator log uuid %s.", rlog_uuid
);
2824 if (!_link_tree_nodes(node
, rseg
->log
))
2827 if (strcmp(rlog_type
, "ringbuffer") != 0) {
2828 log_error("Unsupported replicator log type %s.", rlog_type
);
2832 if (!(rseg
->rlog_type
= dm_pool_strdup(node
->dtree
->mem
, rlog_type
)))
2835 dm_list_init(&rseg
->rsites
);
2836 rseg
->rdevice_count
= 0;
2837 node
->activation_priority
= 1;
2840 /* Add site to segment */
2841 if (mode
== DM_REPLICATOR_SYNC
2842 && (async_timeout
|| fall_behind_ios
|| fall_behind_data
)) {
2843 log_error("Async parameters passed for synchronnous replicator.");
2847 if (node
->props
.segment_count
!= 1) {
2848 log_error(INTERNAL_ERROR
"Attempt to add remote site area before setting replicator log.");
2852 rseg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2853 if (rseg
->type
!= SEG_REPLICATOR
) {
2854 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment %s.",
2855 dm_segtypes
[rseg
->type
].target
);
2859 if (!(rsite
= dm_pool_zalloc(node
->dtree
->mem
, sizeof(*rsite
)))) {
2860 log_error("Failed to allocate remote site segment.");
2864 dm_list_add(&rseg
->rsites
, &rsite
->list
);
2865 rseg
->rsite_count
++;
2868 rsite
->async_timeout
= async_timeout
;
2869 rsite
->fall_behind_data
= fall_behind_data
;
2870 rsite
->fall_behind_ios
= fall_behind_ios
;
2871 rsite
->rsite_index
= rsite_index
;
2876 /* Appends device node to Replicator */
2877 int dm_tree_node_add_replicator_dev_target(struct dm_tree_node
*node
,
2879 const char *replicator_uuid
,
2880 uint64_t rdevice_index
,
2881 const char *rdev_uuid
,
2882 unsigned rsite_index
,
2883 const char *slog_uuid
,
2884 uint32_t slog_flags
,
2885 uint32_t slog_region_size
)
2887 struct seg_area
*area
;
2888 struct load_segment
*rseg
;
2889 struct load_segment
*rep_seg
;
2891 if (rsite_index
== REPLICATOR_LOCAL_SITE
) {
2892 /* Site index for local target */
2893 if (!(rseg
= _add_segment(node
, SEG_REPLICATOR_DEV
, size
)))
2896 if (!(rseg
->replicator
= dm_tree_find_node_by_uuid(node
->dtree
, replicator_uuid
))) {
2897 log_error("Missing replicator uuid %s.", replicator_uuid
);
2901 /* Local slink0 for replicator must be always initialized first */
2902 if (rseg
->replicator
->props
.segment_count
!= 1) {
2903 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment.");
2907 rep_seg
= dm_list_item(dm_list_last(&rseg
->replicator
->props
.segs
), struct load_segment
);
2908 if (rep_seg
->type
!= SEG_REPLICATOR
) {
2909 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment %s.",
2910 dm_segtypes
[rep_seg
->type
].target
);
2913 rep_seg
->rdevice_count
++;
2915 if (!_link_tree_nodes(node
, rseg
->replicator
))
2918 rseg
->rdevice_index
= rdevice_index
;
2920 /* Local slink0 for replicator must be always initialized first */
2921 if (node
->props
.segment_count
!= 1) {
2922 log_error(INTERNAL_ERROR
"Attempt to use non replicator-dev segment.");
2926 rseg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2927 if (rseg
->type
!= SEG_REPLICATOR_DEV
) {
2928 log_error(INTERNAL_ERROR
"Attempt to use non replicator-dev segment %s.",
2929 dm_segtypes
[rseg
->type
].target
);
2934 if (!(slog_flags
& DM_CORELOG
) && !slog_uuid
) {
2935 log_error("Unspecified sync log uuid.");
2939 if (!dm_tree_node_add_target_area(node
, NULL
, rdev_uuid
, 0))
2942 area
= dm_list_item(dm_list_last(&rseg
->areas
), struct seg_area
);
2944 if (!(slog_flags
& DM_CORELOG
)) {
2945 if (!(area
->slog
= dm_tree_find_node_by_uuid(node
->dtree
, slog_uuid
))) {
2946 log_error("Couldn't find sync log uuid %s.", slog_uuid
);
2950 if (!_link_tree_nodes(node
, area
->slog
))
2954 area
->flags
= slog_flags
;
2955 area
->region_size
= slog_region_size
;
2956 area
->rsite_index
= rsite_index
;
2961 static int _thin_validate_device_id(uint32_t device_id
)
2963 if (device_id
> DM_THIN_MAX_DEVICE_ID
) {
2964 log_error("Device id %u is higher then %u.",
2965 device_id
, DM_THIN_MAX_DEVICE_ID
);
2972 int dm_tree_node_add_thin_pool_target(struct dm_tree_node
*node
,
2974 uint64_t transaction_id
,
2975 const char *metadata_uuid
,
2976 const char *pool_uuid
,
2977 uint32_t data_block_size
,
2978 uint64_t low_water_mark
,
2979 unsigned skip_block_zeroing
)
2981 struct load_segment
*seg
, *mseg
;
2982 uint64_t devsize
= 0;
2984 * Max supported size for thin pool metadata device
2985 * Limitation is hardcoded into kernel and bigger
2986 * device size is not accepted. (16978542592)
2988 const uint64_t max_metadata_size
=
2989 255ULL * (1 << 14) * (4096 / (1 << 9)) - 256 * 1024;
2991 if (data_block_size
< DM_THIN_MIN_DATA_BLOCK_SIZE
) {
2992 log_error("Data block size %u is lower then %u sectors.",
2993 data_block_size
, DM_THIN_MIN_DATA_BLOCK_SIZE
);
2997 if (data_block_size
> DM_THIN_MAX_DATA_BLOCK_SIZE
) {
2998 log_error("Data block size %u is higher then %u sectors.",
2999 data_block_size
, DM_THIN_MAX_DATA_BLOCK_SIZE
);
3003 if (!(seg
= _add_segment(node
, SEG_THIN_POOL
, size
)))
3006 if (!(seg
->metadata
= dm_tree_find_node_by_uuid(node
->dtree
, metadata_uuid
))) {
3007 log_error("Missing metadata uuid %s.", metadata_uuid
);
3011 if (!_link_tree_nodes(node
, seg
->metadata
))
3014 /* FIXME: more complex target may need more tweaks */
3015 dm_list_iterate_items(mseg
, &seg
->metadata
->props
.segs
) {
3016 devsize
+= mseg
->size
;
3017 if (devsize
> max_metadata_size
) {
3018 log_debug("Ignoring %" PRIu64
" of device.",
3019 devsize
- max_metadata_size
);
3020 mseg
->size
-= (devsize
- max_metadata_size
);
3021 devsize
= max_metadata_size
;
3022 /* FIXME: drop remaining segs */
3026 if (!(seg
->pool
= dm_tree_find_node_by_uuid(node
->dtree
, pool_uuid
))) {
3027 log_error("Missing pool uuid %s.", pool_uuid
);
3031 if (!_link_tree_nodes(node
, seg
->pool
))
3034 node
->props
.send_messages
= 1;
3035 seg
->transaction_id
= transaction_id
;
3036 seg
->low_water_mark
= low_water_mark
;
3037 seg
->data_block_size
= data_block_size
;
3038 seg
->skip_block_zeroing
= skip_block_zeroing
;
3039 dm_list_init(&seg
->thin_messages
);
3044 int dm_tree_node_add_thin_pool_message(struct dm_tree_node
*node
,
3045 dm_thin_message_t type
,
3046 uint64_t id1
, uint64_t id2
)
3048 struct load_segment
*seg
;
3049 struct thin_message
*tm
;
3051 if (node
->props
.segment_count
!= 1) {
3052 log_error("Thin pool node must have only one segment.");
3056 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
3057 if (seg
->type
!= SEG_THIN_POOL
) {
3058 log_error("Thin pool node has segment type %s.",
3059 dm_segtypes
[seg
->type
].target
);
3063 if (!(tm
= dm_pool_zalloc(node
->dtree
->mem
, sizeof (*tm
)))) {
3064 log_error("Failed to allocate thin message.");
3069 case DM_THIN_MESSAGE_CREATE_SNAP
:
3070 /* If the thin origin is active, it must be suspend first! */
3072 log_error("Cannot use same device id for origin and its snapshot.");
3075 if (!_thin_validate_device_id(id1
) ||
3076 !_thin_validate_device_id(id2
))
3078 tm
->message
.u
.m_create_snap
.device_id
= id1
;
3079 tm
->message
.u
.m_create_snap
.origin_id
= id2
;
3081 case DM_THIN_MESSAGE_CREATE_THIN
:
3082 if (!_thin_validate_device_id(id1
))
3084 tm
->message
.u
.m_create_thin
.device_id
= id1
;
3085 tm
->expected_errno
= EEXIST
;
3087 case DM_THIN_MESSAGE_DELETE
:
3088 if (!_thin_validate_device_id(id1
))
3090 tm
->message
.u
.m_delete
.device_id
= id1
;
3091 tm
->expected_errno
= ENODATA
;
3093 case DM_THIN_MESSAGE_SET_TRANSACTION_ID
:
3094 if ((id1
+ 1) != id2
) {
3095 log_error("New transaction id must be sequential.");
3096 return 0; /* FIXME: Maybe too strict here? */
3098 if (id2
!= seg
->transaction_id
) {
3099 log_error("Current transaction id is different from thin pool.");
3100 return 0; /* FIXME: Maybe too strict here? */
3102 tm
->message
.u
.m_set_transaction_id
.current_id
= id1
;
3103 tm
->message
.u
.m_set_transaction_id
.new_id
= id2
;
3106 log_error("Unsupported message type %d.", (int) type
);
3110 tm
->message
.type
= type
;
3111 dm_list_add(&seg
->thin_messages
, &tm
->list
);
3116 int dm_tree_node_add_thin_target(struct dm_tree_node
*node
,
3118 const char *pool_uuid
,
3121 struct dm_tree_node
*pool
;
3122 struct load_segment
*seg
;
3124 if (!(pool
= dm_tree_find_node_by_uuid(node
->dtree
, pool_uuid
))) {
3125 log_error("Missing thin pool uuid %s.", pool_uuid
);
3129 if (!_link_tree_nodes(node
, pool
))
3132 if (!_thin_validate_device_id(device_id
))
3135 if (!(seg
= _add_segment(node
, SEG_THIN
, size
)))
3139 seg
->device_id
= device_id
;
3145 int dm_get_status_thin_pool(struct dm_pool
*mem
, const char *params
,
3146 struct dm_status_thin_pool
**status
)
3148 struct dm_status_thin_pool
*s
;
3150 if (!(s
= dm_pool_zalloc(mem
, sizeof(struct dm_status_thin_pool
)))) {
3151 log_error("Failed to allocate thin_pool status structure.");
3155 /* FIXME: add support for held metadata root */
3156 if (sscanf(params
, "%" PRIu64
" %" PRIu64
"/%" PRIu64
" %" PRIu64
"/%" PRIu64
,
3158 &s
->used_metadata_blocks
,
3159 &s
->total_metadata_blocks
,
3160 &s
->used_data_blocks
,
3161 &s
->total_data_blocks
) != 5) {
3162 log_error("Failed to parse thin pool params: %s.", params
);
3171 int dm_get_status_thin(struct dm_pool
*mem
, const char *params
,
3172 struct dm_status_thin
**status
)
3174 struct dm_status_thin
*s
;
3176 if (!(s
= dm_pool_zalloc(mem
, sizeof(struct dm_status_thin
)))) {
3177 log_error("Failed to allocate thin status structure.");
3181 if (strchr(params
, '-')) {
3182 s
->mapped_sectors
= 0;
3183 s
->highest_mapped_sector
= 0;
3184 } else if (sscanf(params
, "%" PRIu64
" %" PRIu64
,
3186 &s
->highest_mapped_sector
) != 2) {
3187 log_error("Failed to parse thin params: %s.", params
);
3196 static int _add_area(struct dm_tree_node
*node
, struct load_segment
*seg
, struct dm_tree_node
*dev_node
, uint64_t offset
)
3198 struct seg_area
*area
;
3200 if (!(area
= dm_pool_zalloc(node
->dtree
->mem
, sizeof (*area
)))) {
3201 log_error("Failed to allocate target segment area.");
3205 area
->dev_node
= dev_node
;
3206 area
->offset
= offset
;
3208 dm_list_add(&seg
->areas
, &area
->list
);
3214 int dm_tree_node_add_target_area(struct dm_tree_node
*node
,
3215 const char *dev_name
,
3219 struct load_segment
*seg
;
3221 struct dm_tree_node
*dev_node
;
3223 if ((!dev_name
|| !*dev_name
) && (!uuid
|| !*uuid
)) {
3224 log_error("dm_tree_node_add_target_area called without device");
3229 if (!(dev_node
= dm_tree_find_node_by_uuid(node
->dtree
, uuid
))) {
3230 log_error("Couldn't find area uuid %s.", uuid
);
3233 if (!_link_tree_nodes(node
, dev_node
))
3236 if (stat(dev_name
, &info
) < 0) {
3237 log_error("Device %s not found.", dev_name
);
3241 if (!S_ISBLK(info
.st_mode
)) {
3242 log_error("Device %s is not a block device.", dev_name
);
3246 /* FIXME Check correct macro use */
3247 if (!(dev_node
= _add_dev(node
->dtree
, node
, MAJOR(info
.st_rdev
),
3248 MINOR(info
.st_rdev
), 0)))
3252 if (!node
->props
.segment_count
) {
3253 log_error(INTERNAL_ERROR
"Attempt to add target area to missing segment.");
3257 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
3259 if (!_add_area(node
, seg
, dev_node
, offset
))
3265 int dm_tree_node_add_null_area(struct dm_tree_node
*node
, uint64_t offset
)
3267 struct load_segment
*seg
;
3269 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
3271 switch (seg
->type
) {
3283 log_error("dm_tree_node_add_null_area() called on an unsupported segment type");
3287 if (!_add_area(node
, seg
, NULL
, offset
))
3293 void dm_tree_node_set_callback(struct dm_tree_node
*dnode
,
3294 dm_node_callback_fn cb
, void *data
)
3296 dnode
->callback
= cb
;
3297 dnode
->callback_data
= data
;