2 * Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved.
4 * This file is part of the device-mapper userspace tools.
6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions
8 * of the GNU Lesser General Public License v.2.1.
10 * You should have received a copy of the GNU Lesser General Public License
11 * along with this program; if not, write to the Free Software Foundation,
12 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15 #include "libdm/misc/dmlib.h"
16 #include "libdm-targets.h"
17 #include "libdm-common.h"
18 #include "libdm/misc/kdev_t.h"
19 #include "libdm/misc/dm-ioctl.h"
22 #include <sys/utsname.h>
24 #define MAX_TARGET_PARAMSIZE 500000
26 /* Supported segment types */
60 /* FIXME Add crypt and multipath support */
64 const char target
[16];
66 { SEG_CACHE
, "cache" },
67 { SEG_CRYPT
, "crypt" },
68 { SEG_ERROR
, "error" },
69 { SEG_LINEAR
, "linear" },
70 { SEG_MIRRORED
, "mirror" },
71 { SEG_SNAPSHOT
, "snapshot" },
72 { SEG_SNAPSHOT_ORIGIN
, "snapshot-origin" },
73 { SEG_SNAPSHOT_MERGE
, "snapshot-merge" },
74 { SEG_STRIPED
, "striped" },
76 { SEG_THIN_POOL
, "thin-pool"},
78 { SEG_RAID0
, "raid0"},
79 { SEG_RAID0_META
, "raid0_meta"},
80 { SEG_RAID1
, "raid1"},
81 { SEG_RAID10
, "raid10"},
82 { SEG_RAID4
, "raid4"},
83 { SEG_RAID5_N
, "raid5_n"},
84 { SEG_RAID5_LA
, "raid5_la"},
85 { SEG_RAID5_RA
, "raid5_ra"},
86 { SEG_RAID5_LS
, "raid5_ls"},
87 { SEG_RAID5_RS
, "raid5_rs"},
88 { SEG_RAID6_N_6
,"raid6_n_6"},
89 { SEG_RAID6_ZR
, "raid6_zr"},
90 { SEG_RAID6_NR
, "raid6_nr"},
91 { SEG_RAID6_NC
, "raid6_nc"},
92 { SEG_RAID6_LS_6
, "raid6_ls_6"},
93 { SEG_RAID6_RS_6
, "raid6_rs_6"},
94 { SEG_RAID6_LA_6
, "raid6_la_6"},
95 { SEG_RAID6_RA_6
, "raid6_ra_6"},
99 * WARNING: Since 'raid' target overloads this 1:1 mapping table
100 * for search do not add new enum elements past them!
102 { SEG_RAID5_LS
, "raid5"}, /* same as "raid5_ls" (default for MD also) */
103 { SEG_RAID6_ZR
, "raid6"}, /* same as "raid6_zr" */
104 { SEG_RAID10
, "raid10_near"}, /* same as "raid10" */
107 /* Some segment types have a list of areas of other devices attached */
111 struct dm_tree_node
*dev_node
;
116 struct dm_thin_message
{
117 dm_thin_message_t type
;
132 } m_set_transaction_id
;
136 struct thin_message
{
138 struct dm_thin_message message
;
142 /* Per-segment properties */
143 struct load_segment
{
150 unsigned area_count
; /* Linear + Striped + Mirrored + Crypt */
151 struct dm_list areas
; /* Linear + Striped + Mirrored + Crypt */
153 uint32_t stripe_size
; /* Striped + raid */
155 int persistent
; /* Snapshot */
156 uint32_t chunk_size
; /* Snapshot */
157 struct dm_tree_node
*cow
; /* Snapshot */
158 struct dm_tree_node
*origin
; /* Snapshot + Snapshot origin + Cache */
159 struct dm_tree_node
*merge
; /* Snapshot */
161 struct dm_tree_node
*log
; /* Mirror */
162 uint32_t region_size
; /* Mirror + raid */
163 unsigned clustered
; /* Mirror */
164 unsigned mirror_area_count
; /* Mirror */
165 uint64_t flags
; /* Mirror + Raid + Cache */
166 char *uuid
; /* Clustered mirror log */
168 const char *policy_name
; /* Cache */
169 unsigned policy_argc
; /* Cache */
170 struct dm_config_node
*policy_settings
; /* Cache */
172 const char *cipher
; /* Crypt */
173 const char *chainmode
; /* Crypt */
174 const char *iv
; /* Crypt */
175 uint64_t iv_offset
; /* Crypt */
176 const char *key
; /* Crypt */
178 int delta_disks
; /* raid reshape number of disks */
179 int data_offset
; /* raid reshape data offset on disk to set */
180 uint64_t rebuilds
[RAID_BITMAP_SIZE
]; /* raid */
181 uint64_t writemostly
[RAID_BITMAP_SIZE
]; /* raid */
182 uint32_t writebehind
; /* raid */
183 uint32_t max_recovery_rate
; /* raid kB/sec/disk */
184 uint32_t min_recovery_rate
; /* raid kB/sec/disk */
185 uint32_t data_copies
; /* raid10 data_copies */
187 struct dm_tree_node
*metadata
; /* Thin_pool + Cache */
188 struct dm_tree_node
*pool
; /* Thin_pool, Thin */
189 struct dm_tree_node
*external
; /* Thin */
190 struct dm_list thin_messages
; /* Thin_pool */
191 uint64_t transaction_id
; /* Thin_pool */
192 uint64_t low_water_mark
; /* Thin_pool */
193 uint32_t data_block_size
; /* Thin_pool + cache */
194 uint32_t migration_threshold
; /* Cache */
195 unsigned skip_block_zeroing
; /* Thin_pool */
196 unsigned ignore_discard
; /* Thin_pool target vsn 1.1 */
197 unsigned no_discard_passdown
; /* Thin_pool target vsn 1.1 */
198 unsigned error_if_no_space
; /* Thin pool target vsn 1.10 */
199 unsigned read_only
; /* Thin pool target vsn 1.3 */
200 uint32_t device_id
; /* Thin */
204 /* Per-device properties */
205 struct load_properties
{
211 uint32_t read_ahead_flags
;
213 unsigned segment_count
;
217 const char *new_name
;
219 /* If immediate_dev_node is set to 1, try to create the dev node
220 * as soon as possible (e.g. in preload stage even during traversal
221 * and processing of dm tree). This will also flush all stacked dev
222 * node operations, synchronizing with udev.
224 unsigned immediate_dev_node
;
227 * If the device size changed from zero and this is set,
228 * don't resume the device immediately, even if the device
229 * has parents. This works provided the parents do not
230 * validate the device size and is required by pvmove to
231 * avoid starting the mirror resync operation too early.
233 unsigned delay_resume_if_new
;
236 * Preload tree normally only loads and not resume, but there is
237 * automatic resume when target is extended, as it's believed
238 * there can be no i/o flying to this 'new' extended space
239 * from any device above. Reason is that preloaded target above
240 * may actually need to see its bigger subdevice before it
241 * gets suspended. As long as devices are simple linears
242 * there is no problem to resume bigger device in preload (before commit).
243 * However complex targets like thin-pool (raid,cache...)
244 * they shall not be resumed before their commit.
246 unsigned delay_resume_if_extended
;
249 * Call node_send_messages(), set to 2 if there are messages
250 * When != 0, it validates matching transaction id, thus thin-pools
251 * where transaction_id is passed as 0 are never validated, this
252 * allows external management of thin-pool TID.
254 unsigned send_messages
;
255 /* Skip suspending node's children, used when sending messages to thin-pool */
258 /* Suspend and Resume siblings after node activation with udev flags*/
259 unsigned reactivate_siblings
;
260 uint16_t reactivate_udev_flags
;
263 /* Two of these used to join two nodes with uses and used_by. */
264 struct dm_tree_link
{
266 struct dm_tree_node
*node
;
269 struct dm_tree_node
{
270 struct dm_tree
*dtree
;
276 struct dm_list uses
; /* Nodes this node uses */
277 struct dm_list used_by
; /* Nodes that use this node */
279 int activation_priority
; /* 0 gets activated first */
280 int implicit_deps
; /* 1 device only implicitly referenced */
282 uint16_t udev_flags
; /* Udev control flags */
284 void *context
; /* External supplied context */
286 struct load_properties props
; /* For creation/table (re)load */
289 * If presuspend of child node is needed
290 * Note: only direct child is allowed
292 struct dm_tree_node
*presuspend_node
;
295 dm_node_callback_fn callback
;
298 int activated
; /* tracks activation during preload */
303 struct dm_hash_table
*devs
;
304 struct dm_hash_table
*uuids
;
305 struct dm_tree_node root
;
306 int skip_lockfs
; /* 1 skips lockfs (for non-snapshots) */
307 int no_flush
; /* 1 sets noflush (mirrors/multipath) */
308 int retry_remove
; /* 1 retries remove if not successful */
310 char buf
[DM_NAME_LEN
+ 32]; /* print buffer for device_name (major:minor) */
311 const char * const *optional_uuid_suffixes
; /* uuid suffixes ignored when matching */
317 struct dm_tree
*dm_tree_create(void)
319 struct dm_pool
*dmem
;
320 struct dm_tree
*dtree
;
322 if (!(dmem
= dm_pool_create("dtree", 1024)) ||
323 !(dtree
= dm_pool_zalloc(dmem
, sizeof(*dtree
)))) {
324 log_error("Failed to allocate dtree.");
326 dm_pool_destroy(dmem
);
330 dtree
->root
.dtree
= dtree
;
331 dm_list_init(&dtree
->root
.uses
);
332 dm_list_init(&dtree
->root
.used_by
);
333 dtree
->skip_lockfs
= 0;
336 dtree
->optional_uuid_suffixes
= NULL
;
338 if (!(dtree
->devs
= dm_hash_create(8))) {
339 log_error("dtree hash creation failed");
340 dm_pool_destroy(dtree
->mem
);
344 if (!(dtree
->uuids
= dm_hash_create(32))) {
345 log_error("dtree uuid hash creation failed");
346 dm_hash_destroy(dtree
->devs
);
347 dm_pool_destroy(dtree
->mem
);
354 void dm_tree_free(struct dm_tree
*dtree
)
359 dm_hash_destroy(dtree
->uuids
);
360 dm_hash_destroy(dtree
->devs
);
361 dm_pool_destroy(dtree
->mem
);
364 void dm_tree_set_cookie(struct dm_tree_node
*node
, uint32_t cookie
)
366 node
->dtree
->cookie
= cookie
;
369 uint32_t dm_tree_get_cookie(struct dm_tree_node
*node
)
371 return node
->dtree
->cookie
;
374 void dm_tree_skip_lockfs(struct dm_tree_node
*dnode
)
376 dnode
->dtree
->skip_lockfs
= 1;
379 void dm_tree_use_no_flush_suspend(struct dm_tree_node
*dnode
)
381 dnode
->dtree
->no_flush
= 1;
384 void dm_tree_retry_remove(struct dm_tree_node
*dnode
)
386 dnode
->dtree
->retry_remove
= 1;
392 static int _nodes_are_linked(const struct dm_tree_node
*parent
,
393 const struct dm_tree_node
*child
)
395 struct dm_tree_link
*dlink
;
397 dm_list_iterate_items(dlink
, &parent
->uses
)
398 if (dlink
->node
== child
)
404 static int _link(struct dm_list
*list
, struct dm_tree_node
*node
)
406 struct dm_tree_link
*dlink
;
408 if (!(dlink
= dm_pool_alloc(node
->dtree
->mem
, sizeof(*dlink
)))) {
409 log_error("dtree link allocation failed");
414 dm_list_add(list
, &dlink
->list
);
419 static int _link_nodes(struct dm_tree_node
*parent
,
420 struct dm_tree_node
*child
)
422 if (_nodes_are_linked(parent
, child
))
425 if (!_link(&parent
->uses
, child
))
428 if (!_link(&child
->used_by
, parent
))
434 static void _unlink(struct dm_list
*list
, struct dm_tree_node
*node
)
436 struct dm_tree_link
*dlink
;
438 dm_list_iterate_items(dlink
, list
)
439 if (dlink
->node
== node
) {
440 dm_list_del(&dlink
->list
);
445 static void _unlink_nodes(struct dm_tree_node
*parent
,
446 struct dm_tree_node
*child
)
448 if (!_nodes_are_linked(parent
, child
))
451 _unlink(&parent
->uses
, child
);
452 _unlink(&child
->used_by
, parent
);
455 static int _add_to_toplevel(struct dm_tree_node
*node
)
457 return _link_nodes(&node
->dtree
->root
, node
);
460 static void _remove_from_toplevel(struct dm_tree_node
*node
)
462 _unlink_nodes(&node
->dtree
->root
, node
);
465 static int _add_to_bottomlevel(struct dm_tree_node
*node
)
467 return _link_nodes(node
, &node
->dtree
->root
);
470 static void _remove_from_bottomlevel(struct dm_tree_node
*node
)
472 _unlink_nodes(node
, &node
->dtree
->root
);
475 static int _link_tree_nodes(struct dm_tree_node
*parent
, struct dm_tree_node
*child
)
477 /* Don't link to root node if child already has a parent */
478 if (parent
== &parent
->dtree
->root
) {
479 if (dm_tree_node_num_children(child
, 1))
482 _remove_from_toplevel(child
);
484 if (child
== &child
->dtree
->root
) {
485 if (dm_tree_node_num_children(parent
, 0))
488 _remove_from_bottomlevel(parent
);
490 return _link_nodes(parent
, child
);
493 static struct dm_tree_node
*_create_dm_tree_node(struct dm_tree
*dtree
,
496 struct dm_info
*info
,
500 struct dm_tree_node
*node
;
503 if (!(node
= dm_pool_zalloc(dtree
->mem
, sizeof(*node
))) ||
504 !(node
->name
= dm_pool_strdup(dtree
->mem
, name
)) ||
505 !(node
->uuid
= dm_pool_strdup(dtree
->mem
, uuid
))) {
506 log_error("_create_dm_tree_node alloc failed.");
512 node
->context
= context
;
513 node
->udev_flags
= udev_flags
;
515 dm_list_init(&node
->uses
);
516 dm_list_init(&node
->used_by
);
517 dm_list_init(&node
->props
.segs
);
519 dev
= MKDEV(info
->major
, info
->minor
);
521 if (!dm_hash_insert_binary(dtree
->devs
, (const char *) &dev
,
522 sizeof(dev
), node
)) {
523 log_error("dtree node hash insertion failed");
524 dm_pool_free(dtree
->mem
, node
);
528 if (*uuid
&& !dm_hash_insert(dtree
->uuids
, uuid
, node
)) {
529 log_error("dtree uuid hash insertion failed");
530 dm_hash_remove_binary(dtree
->devs
, (const char *) &dev
,
532 dm_pool_free(dtree
->mem
, node
);
539 static struct dm_tree_node
*_find_dm_tree_node(struct dm_tree
*dtree
,
540 uint32_t major
, uint32_t minor
)
542 dev_t dev
= MKDEV(major
, minor
);
544 return dm_hash_lookup_binary(dtree
->devs
, (const char *) &dev
,
548 void dm_tree_set_optional_uuid_suffixes(struct dm_tree
*dtree
, const char **optional_uuid_suffixes
)
550 dtree
->optional_uuid_suffixes
= optional_uuid_suffixes
;
553 static const char *_node_name(struct dm_tree_node
*dnode
);
554 static struct dm_tree_node
*_find_dm_tree_node_by_uuid(struct dm_tree
*dtree
,
557 struct dm_tree_node
*node
;
558 const char *default_uuid_prefix
;
559 size_t default_uuid_prefix_len
;
560 const char *suffix
, *suffix_position
;
561 char uuid_without_suffix
[DM_UUID_LEN
+ 1];
563 const char * const *suffix_list
= dtree
->optional_uuid_suffixes
;
565 if ((node
= dm_hash_lookup(dtree
->uuids
, uuid
))) {
566 log_debug_activation("Matched uuid %s %s in deptree.", uuid
, _node_name(node
));
570 if (suffix_list
&& (suffix_position
= strrchr(uuid
, '-'))) {
571 while ((suffix
= suffix_list
[i
++])) {
572 if (strcmp(suffix_position
+ 1, suffix
))
575 dm_strncpy(uuid_without_suffix
, uuid
, sizeof(uuid_without_suffix
));
576 uuid_without_suffix
[suffix_position
- uuid
] = '\0';
578 if ((node
= dm_hash_lookup(dtree
->uuids
, uuid_without_suffix
))) {
579 log_debug_activation("Matched uuid %s %s (missing suffix -%s) in deptree.",
580 uuid_without_suffix
, _node_name(node
), suffix
);
588 default_uuid_prefix
= dm_uuid_prefix();
589 default_uuid_prefix_len
= strlen(default_uuid_prefix
);
591 if ((strncmp(uuid
, default_uuid_prefix
, default_uuid_prefix_len
) == 0) &&
592 (node
= dm_hash_lookup(dtree
->uuids
, uuid
+ default_uuid_prefix_len
))) {
593 log_debug_activation("Matched uuid %s %s (missing prefix) in deptree.",
594 uuid
+ default_uuid_prefix_len
, _node_name(node
));
598 log_debug_activation("Not matched uuid %s in deptree.", uuid
);
602 /* Return node's device_name (major:minor) for debug messages */
603 static const char *_node_name(struct dm_tree_node
*dnode
)
605 if (dm_snprintf(dnode
->dtree
->buf
, sizeof(dnode
->dtree
->buf
),
606 "%s (" FMTu32
":" FMTu32
")",
607 dnode
->name
? dnode
->name
: "",
608 dnode
->info
.major
, dnode
->info
.minor
) < 0) {
613 return dnode
->dtree
->buf
;
616 void dm_tree_node_set_udev_flags(struct dm_tree_node
*dnode
, uint16_t udev_flags
)
619 if (udev_flags
!= dnode
->udev_flags
)
620 log_debug_activation("Resetting %s udev_flags from 0x%x to 0x%x.",
622 dnode
->udev_flags
, udev_flags
);
623 dnode
->udev_flags
= udev_flags
;
626 void dm_tree_node_set_read_ahead(struct dm_tree_node
*dnode
,
628 uint32_t read_ahead_flags
)
630 dnode
->props
.read_ahead
= read_ahead
;
631 dnode
->props
.read_ahead_flags
= read_ahead_flags
;
634 void dm_tree_node_set_presuspend_node(struct dm_tree_node
*node
,
635 struct dm_tree_node
*presuspend_node
)
637 node
->presuspend_node
= presuspend_node
;
640 const char *dm_tree_node_get_name(const struct dm_tree_node
*node
)
642 return node
->info
.exists
? node
->name
: "";
645 const char *dm_tree_node_get_uuid(const struct dm_tree_node
*node
)
647 return node
->info
.exists
? node
->uuid
: "";
650 const struct dm_info
*dm_tree_node_get_info(const struct dm_tree_node
*node
)
655 void *dm_tree_node_get_context(const struct dm_tree_node
*node
)
657 return node
->context
;
660 int dm_tree_node_size_changed(const struct dm_tree_node
*dnode
)
662 return dnode
->props
.size_changed
;
665 int dm_tree_node_num_children(const struct dm_tree_node
*node
, uint32_t inverted
)
668 if (_nodes_are_linked(&node
->dtree
->root
, node
))
670 return dm_list_size(&node
->used_by
);
673 if (_nodes_are_linked(node
, &node
->dtree
->root
))
676 return dm_list_size(&node
->uses
);
680 * Returns 1 if no prefix supplied
682 static int _uuid_prefix_matches(const char *uuid
, const char *uuid_prefix
, size_t uuid_prefix_len
)
684 const char *default_uuid_prefix
= dm_uuid_prefix();
685 size_t default_uuid_prefix_len
= strlen(default_uuid_prefix
);
690 if (!strncmp(uuid
, uuid_prefix
, uuid_prefix_len
))
693 /* Handle transition: active device uuids might be missing the prefix */
694 if (uuid_prefix_len
<= 4)
697 if (!strncmp(uuid
, default_uuid_prefix
, default_uuid_prefix_len
))
700 if (strncmp(uuid_prefix
, default_uuid_prefix
, default_uuid_prefix_len
))
703 if (!strncmp(uuid
, uuid_prefix
+ default_uuid_prefix_len
, uuid_prefix_len
- default_uuid_prefix_len
))
710 * Returns 1 if no children.
712 static int _children_suspended(struct dm_tree_node
*node
,
714 const char *uuid_prefix
,
715 size_t uuid_prefix_len
)
717 struct dm_list
*list
;
718 struct dm_tree_link
*dlink
;
719 const struct dm_info
*dinfo
;
723 if (_nodes_are_linked(&node
->dtree
->root
, node
))
725 list
= &node
->used_by
;
727 if (_nodes_are_linked(node
, &node
->dtree
->root
))
732 dm_list_iterate_items(dlink
, list
) {
733 if (!(uuid
= dm_tree_node_get_uuid(dlink
->node
))) {
738 /* Ignore if it doesn't belong to this VG */
739 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
742 /* Ignore if parent node wants to presuspend this node */
743 if (dlink
->node
->presuspend_node
== node
)
746 if (!(dinfo
= dm_tree_node_get_info(dlink
->node
)))
747 return_0
; /* FIXME Is this normal? */
749 if (!dinfo
->suspended
)
757 * Set major and minor to zero for root of tree.
759 struct dm_tree_node
*dm_tree_find_node(struct dm_tree
*dtree
,
763 if (!major
&& !minor
)
766 return _find_dm_tree_node(dtree
, major
, minor
);
770 * Set uuid to NULL for root of tree.
772 struct dm_tree_node
*dm_tree_find_node_by_uuid(struct dm_tree
*dtree
,
778 return _find_dm_tree_node_by_uuid(dtree
, uuid
);
782 * First time set *handle to NULL.
783 * Set inverted to invert the tree.
785 struct dm_tree_node
*dm_tree_next_child(void **handle
,
786 const struct dm_tree_node
*parent
,
789 struct dm_list
**dlink
= (struct dm_list
**) handle
;
790 const struct dm_list
*use_list
;
793 use_list
= &parent
->used_by
;
795 use_list
= &parent
->uses
;
798 *dlink
= dm_list_first(use_list
);
800 *dlink
= dm_list_next(use_list
, *dlink
);
802 return (*dlink
) ? dm_list_item(*dlink
, struct dm_tree_link
)->node
: NULL
;
805 static int _deps(struct dm_task
**dmt
, struct dm_pool
*mem
, uint32_t major
, uint32_t minor
,
806 const char **name
, const char **uuid
, unsigned inactive_table
,
807 struct dm_info
*info
, struct dm_deps
**deps
)
809 memset(info
, 0, sizeof(*info
));
814 if (!dm_is_dm_major(major
)) {
820 if (!(*dmt
= dm_task_create(DM_DEVICE_DEPS
)))
823 if (!dm_task_set_major(*dmt
, major
) || !dm_task_set_minor(*dmt
, minor
)) {
824 log_error("_deps: failed to set major:minor for (" FMTu32
":" FMTu32
").",
829 if (inactive_table
&& !dm_task_query_inactive_table(*dmt
)) {
830 log_error("_deps: failed to set inactive table for (%" PRIu32
":%" PRIu32
")",
835 if (!dm_task_run(*dmt
)) {
836 log_error("_deps: task run failed for (%" PRIu32
":%" PRIu32
")",
841 if (!dm_task_get_info(*dmt
, info
)) {
842 log_error("_deps: failed to get info for (%" PRIu32
":%" PRIu32
")",
848 if (info
->major
!= major
) {
849 log_error("Inconsistent dtree major number: %u != %u",
853 if (info
->minor
!= minor
) {
854 log_error("Inconsistent dtree minor number: %u != %u",
858 *name
= dm_task_get_name(*dmt
);
859 *uuid
= dm_task_get_uuid(*dmt
);
860 *deps
= dm_task_get_deps(*dmt
);
866 dm_task_destroy(*dmt
);
873 * Deactivate a device with its dependencies if the uuid prefix matches.
875 static int _info_by_dev(uint32_t major
, uint32_t minor
, int with_open_count
,
876 struct dm_info
*info
, struct dm_pool
*mem
,
877 const char **name
, const char **uuid
)
882 if (!(dmt
= dm_task_create(DM_DEVICE_INFO
)))
885 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
886 log_error("_info_by_dev: Failed to set device number.");
890 if (!with_open_count
&& !dm_task_no_open_count(dmt
))
891 log_warn("WARNING: Failed to disable open_count.");
893 if (!dm_task_run(dmt
))
896 if (!dm_task_get_info(dmt
, info
))
899 if (name
&& !(*name
= dm_pool_strdup(mem
, dm_task_get_name(dmt
)))) {
900 log_error("name pool_strdup failed");
904 if (uuid
&& !(*uuid
= dm_pool_strdup(mem
, dm_task_get_uuid(dmt
)))) {
905 log_error("uuid pool_strdup failed");
911 dm_task_destroy(dmt
);
916 static int _check_device_not_in_use(const char *name
, struct dm_info
*info
)
923 /* If sysfs is not used, use open_count information only. */
924 if (!*dm_sysfs_dir()) {
925 if (!info
->open_count
)
928 } else if (dm_device_has_holders(info
->major
, info
->minor
))
929 reason
= "is used by another device";
930 else if (dm_device_has_mounted_fs(info
->major
, info
->minor
))
931 reason
= "contains a filesystem in use";
935 log_error("Device %s (" FMTu32
":" FMTu32
") %s.",
936 name
, info
->major
, info
->minor
, reason
);
940 /* Check if all parent nodes of given node have open_count == 0 */
941 static int _node_has_closed_parents(struct dm_tree_node
*node
,
942 const char *uuid_prefix
,
943 size_t uuid_prefix_len
)
945 struct dm_tree_link
*dlink
;
946 const struct dm_info
*dinfo
;
950 /* Iterate through parents of this node */
951 dm_list_iterate_items(dlink
, &node
->used_by
) {
952 if (!(uuid
= dm_tree_node_get_uuid(dlink
->node
))) {
957 /* Ignore if it doesn't belong to this VG */
958 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
961 if (!(dinfo
= dm_tree_node_get_info(dlink
->node
)))
962 return_0
; /* FIXME Is this normal? */
964 /* Refresh open_count */
965 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 1, &info
, NULL
, NULL
, NULL
))
971 if (info
.open_count
) {
972 log_debug_activation("Node %s %d:%d has open_count %d", uuid_prefix
,
973 dinfo
->major
, dinfo
->minor
, info
.open_count
);
981 static int _deactivate_node(const char *name
, uint32_t major
, uint32_t minor
,
982 uint32_t *cookie
, uint16_t udev_flags
, int retry
)
987 log_verbose("Removing %s (%" PRIu32
":%" PRIu32
")", name
, major
, minor
);
989 if (!(dmt
= dm_task_create(DM_DEVICE_REMOVE
))) {
990 log_error("Deactivation dm_task creation failed for %s", name
);
994 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
995 log_error("Failed to set device number for %s deactivation", name
);
999 if (!dm_task_no_open_count(dmt
))
1000 log_warn("WARNING: Failed to disable open_count.");
1003 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1007 dm_task_retry_remove(dmt
);
1009 r
= dm_task_run(dmt
);
1011 /* FIXME Until kernel returns actual name so dm-iface.c can handle it */
1012 rm_dev_node(name
, dmt
->cookie_set
&& !(udev_flags
& DM_UDEV_DISABLE_DM_RULES_FLAG
),
1013 dmt
->cookie_set
&& (udev_flags
& DM_UDEV_DISABLE_LIBRARY_FALLBACK
));
1015 /* FIXME Remove node from tree or mark invalid? */
1018 dm_task_destroy(dmt
);
1023 static int _node_clear_table(struct dm_tree_node
*dnode
, uint16_t udev_flags
)
1025 struct dm_task
*dmt
= NULL
, *deps_dmt
= NULL
;
1026 struct dm_info
*info
= &dnode
->info
, deps_info
;
1027 struct dm_deps
*deps
= NULL
;
1028 const char *name
, *uuid
, *depname
, *depuuid
;
1029 const char *default_uuid_prefix
;
1030 size_t default_uuid_prefix_len
;
1034 if (!(name
= dm_tree_node_get_name(dnode
))) {
1035 log_error("_node_clear_table failed: missing name");
1039 /* Is there a table? */
1040 if (!info
->exists
|| !info
->inactive_table
)
1043 /* Get devices used by inactive table that's about to be deleted. */
1044 if (!_deps(&deps_dmt
, dnode
->dtree
->mem
, info
->major
, info
->minor
, &depname
, &depuuid
, 1, info
, &deps
)) {
1045 log_error("Failed to obtain dependencies for %s before clearing table.", name
);
1049 log_verbose("Clearing inactive table %s (%" PRIu32
":%" PRIu32
")",
1050 name
, info
->major
, info
->minor
);
1052 if (!(dmt
= dm_task_create(DM_DEVICE_CLEAR
))) {
1053 log_error("Table clear dm_task creation failed for %s", name
);
1057 if (!dm_task_set_major(dmt
, info
->major
) ||
1058 !dm_task_set_minor(dmt
, info
->minor
)) {
1059 log_error("Failed to set device number for %s table clear", name
);
1063 r
= dm_task_run(dmt
);
1065 if (!dm_task_get_info(dmt
, info
)) {
1066 log_error("_node_clear_table failed: info missing after running task for %s", name
);
1074 * Remove (incomplete) devices that the inactive table referred to but
1075 * which are not in the tree, no longer referenced and don't have a live
1078 default_uuid_prefix
= dm_uuid_prefix();
1079 default_uuid_prefix_len
= strlen(default_uuid_prefix
);
1081 for (i
= 0; i
< deps
->count
; i
++) {
1082 /* If already in tree, assume it's under control */
1083 if (_find_dm_tree_node(dnode
->dtree
, MAJOR(deps
->device
[i
]), MINOR(deps
->device
[i
])))
1086 if (!_info_by_dev(MAJOR(deps
->device
[i
]), MINOR(deps
->device
[i
]), 1,
1087 &deps_info
, dnode
->dtree
->mem
, &name
, &uuid
))
1090 /* Proceed if device is an 'orphan' - unreferenced and without a live table. */
1091 if (!deps_info
.exists
|| deps_info
.live_table
|| deps_info
.open_count
)
1094 if (strncmp(uuid
, default_uuid_prefix
, default_uuid_prefix_len
))
1097 /* Remove device. */
1098 if (!_deactivate_node(name
, deps_info
.major
, deps_info
.minor
, &dnode
->dtree
->cookie
, udev_flags
, 0)) {
1099 log_error("Failed to deactivate no-longer-used device %s (%"
1100 PRIu32
":%" PRIu32
")", name
, deps_info
.major
, deps_info
.minor
);
1101 } else if (deps_info
.suspended
)
1107 dm_task_destroy(dmt
);
1110 dm_task_destroy(deps_dmt
);
1115 struct dm_tree_node
*dm_tree_add_new_dev_with_udev_flags(struct dm_tree
*dtree
,
1123 uint16_t udev_flags
)
1125 struct dm_tree_node
*dnode
;
1126 struct dm_info info
= { 0 };
1128 if (!name
|| !uuid
) {
1129 log_error("Cannot add device without name and uuid.");
1133 /* Do we need to add node to tree? */
1134 if (!(dnode
= dm_tree_find_node_by_uuid(dtree
, uuid
))) {
1135 if (!(dnode
= _create_dm_tree_node(dtree
, name
, uuid
, &info
,
1139 /* Attach to root node until a table is supplied */
1140 if (!_add_to_toplevel(dnode
) || !_add_to_bottomlevel(dnode
))
1143 dnode
->props
.major
= major
;
1144 dnode
->props
.minor
= minor
;
1145 } else if (strcmp(name
, dnode
->name
)) {
1146 /* Do we need to rename node? */
1147 if (!(dnode
->props
.new_name
= dm_pool_strdup(dtree
->mem
, name
))) {
1148 log_error("name pool_strdup failed");
1153 dnode
->props
.read_only
= read_only
? 1 : 0;
1154 dnode
->props
.read_ahead
= DM_READ_AHEAD_AUTO
;
1155 dnode
->props
.read_ahead_flags
= 0;
1157 if (clear_inactive
&& !_node_clear_table(dnode
, udev_flags
))
1160 dnode
->context
= context
;
1161 dnode
->udev_flags
= udev_flags
;
1166 struct dm_tree_node
*dm_tree_add_new_dev(struct dm_tree
*dtree
, const char *name
,
1167 const char *uuid
, uint32_t major
, uint32_t minor
,
1168 int read_only
, int clear_inactive
, void *context
)
1170 return dm_tree_add_new_dev_with_udev_flags(dtree
, name
, uuid
, major
, minor
,
1171 read_only
, clear_inactive
, context
, 0);
1174 static struct dm_tree_node
*_add_dev(struct dm_tree
*dtree
,
1175 struct dm_tree_node
*parent
,
1176 uint32_t major
, uint32_t minor
,
1177 uint16_t udev_flags
,
1180 struct dm_task
*dmt
= NULL
;
1181 struct dm_info info
;
1182 struct dm_deps
*deps
= NULL
;
1183 const char *name
= NULL
;
1184 const char *uuid
= NULL
;
1185 struct dm_tree_node
*node
= NULL
;
1189 /* Already in tree? */
1190 if (!(node
= _find_dm_tree_node(dtree
, major
, minor
))) {
1191 if (!_deps(&dmt
, dtree
->mem
, major
, minor
, &name
, &uuid
, 0, &info
, &deps
))
1194 if (!(node
= _create_dm_tree_node(dtree
, name
, uuid
, &info
,
1198 node
->implicit_deps
= implicit_deps
;
1199 } else if (!implicit_deps
&& node
->implicit_deps
) {
1200 node
->udev_flags
= udev_flags
;
1201 node
->implicit_deps
= 0;
1204 if (!_link_tree_nodes(parent
, node
)) {
1209 /* If node was already in tree, no need to recurse. */
1213 /* Can't recurse if not a mapped device or there are no dependencies */
1214 if (!node
->info
.exists
|| !deps
|| !deps
->count
) {
1215 if (!_add_to_bottomlevel(node
)) {
1222 /* Add dependencies to tree */
1223 for (i
= 0; i
< deps
->count
; i
++)
1224 /* Implicit devices are by default temporary */
1225 if (!_add_dev(dtree
, node
, MAJOR(deps
->device
[i
]),
1226 MINOR(deps
->device
[i
]), udev_flags
|
1227 DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG
|
1228 DM_UDEV_DISABLE_DISK_RULES_FLAG
|
1229 DM_UDEV_DISABLE_OTHER_RULES_FLAG
, 1)) {
1236 dm_task_destroy(dmt
);
1241 int dm_tree_add_dev(struct dm_tree
*dtree
, uint32_t major
, uint32_t minor
)
1243 return _add_dev(dtree
, &dtree
->root
, major
, minor
, 0, 0) ? 1 : 0;
1246 int dm_tree_add_dev_with_udev_flags(struct dm_tree
*dtree
, uint32_t major
,
1247 uint32_t minor
, uint16_t udev_flags
)
1249 return _add_dev(dtree
, &dtree
->root
, major
, minor
, udev_flags
, 0) ? 1 : 0;
1252 static int _rename_node(const char *old_name
, const char *new_name
, uint32_t major
,
1253 uint32_t minor
, uint32_t *cookie
, uint16_t udev_flags
)
1255 struct dm_task
*dmt
;
1258 log_verbose("Renaming %s (%" PRIu32
":%" PRIu32
") to %s", old_name
, major
, minor
, new_name
);
1260 if (!(dmt
= dm_task_create(DM_DEVICE_RENAME
))) {
1261 log_error("Rename dm_task creation failed for %s", old_name
);
1265 if (!dm_task_set_name(dmt
, old_name
)) {
1266 log_error("Failed to set name for %s rename.", old_name
);
1270 if (!dm_task_set_newname(dmt
, new_name
))
1273 if (!dm_task_no_open_count(dmt
))
1274 log_warn("WARNING: Failed to disable open_count.");
1276 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1279 r
= dm_task_run(dmt
);
1282 dm_task_destroy(dmt
);
1287 /* FIXME Merge with _suspend_node? */
1288 static int _resume_node(const char *name
, uint32_t major
, uint32_t minor
,
1289 uint32_t read_ahead
, uint32_t read_ahead_flags
,
1290 struct dm_info
*newinfo
, uint32_t *cookie
,
1291 uint16_t udev_flags
, int already_suspended
)
1293 struct dm_task
*dmt
;
1296 log_verbose("Resuming %s (" FMTu32
":" FMTu32
").", name
, major
, minor
);
1298 if (!(dmt
= dm_task_create(DM_DEVICE_RESUME
))) {
1299 log_debug_activation("Suspend dm_task creation failed for %s.", name
);
1303 /* FIXME Kernel should fill in name on return instead */
1304 if (!dm_task_set_name(dmt
, name
)) {
1305 log_debug_activation("Failed to set device name for %s resumption.", name
);
1309 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1310 log_error("Failed to set device number for %s resumption.", name
);
1314 if (!dm_task_no_open_count(dmt
))
1315 log_warn("WARNING: Failed to disable open_count.");
1317 if (!dm_task_set_read_ahead(dmt
, read_ahead
, read_ahead_flags
))
1318 log_warn("WARNING: Failed to set read ahead.");
1320 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1323 if (!(r
= dm_task_run(dmt
)))
1326 if (already_suspended
)
1329 if (!(r
= dm_task_get_info(dmt
, newinfo
)))
1333 dm_task_destroy(dmt
);
1338 static int _suspend_node(const char *name
, uint32_t major
, uint32_t minor
,
1339 int skip_lockfs
, int no_flush
, struct dm_info
*newinfo
)
1341 struct dm_task
*dmt
;
1344 log_verbose("Suspending %s (%" PRIu32
":%" PRIu32
")%s%s",
1346 skip_lockfs
? "" : " with filesystem sync",
1347 no_flush
? "" : " with device flush");
1349 if (!(dmt
= dm_task_create(DM_DEVICE_SUSPEND
))) {
1350 log_error("Suspend dm_task creation failed for %s", name
);
1354 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1355 log_error("Failed to set device number for %s suspension.", name
);
1359 if (!dm_task_no_open_count(dmt
))
1360 log_warn("WARNING: Failed to disable open_count.");
1362 if (skip_lockfs
&& !dm_task_skip_lockfs(dmt
))
1363 log_warn("WARNING: Failed to set skip_lockfs flag.");
1365 if (no_flush
&& !dm_task_no_flush(dmt
))
1366 log_warn("WARNING: Failed to set no_flush flag.");
1368 if ((r
= dm_task_run(dmt
))) {
1370 r
= dm_task_get_info(dmt
, newinfo
);
1373 dm_task_destroy(dmt
);
1378 static int _thin_pool_get_status(struct dm_tree_node
*dnode
,
1379 struct dm_status_thin_pool
*s
)
1381 struct dm_task
*dmt
;
1383 uint64_t start
, length
;
1385 char *params
= NULL
;
1387 if (!(dmt
= dm_task_create(DM_DEVICE_STATUS
)))
1390 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
1391 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
1392 log_error("Failed to set major minor.");
1396 if (!dm_task_no_flush(dmt
))
1397 log_warn("WARNING: Can't set no_flush flag."); /* Non fatal */
1399 if (!dm_task_run(dmt
))
1402 dm_get_next_target(dmt
, NULL
, &start
, &length
, &type
, ¶ms
);
1404 if (!type
|| (strcmp(type
, "thin-pool") != 0)) {
1405 log_error("Expected thin-pool target for %s and got %s.",
1406 _node_name(dnode
), type
? : "no target");
1410 if (!parse_thin_pool_status(params
, s
))
1413 log_debug_activation("Found transaction id %" PRIu64
" for thin pool %s "
1414 "with status line: %s.",
1415 s
->transaction_id
, _node_name(dnode
), params
);
1419 dm_task_destroy(dmt
);
1424 static int _node_message(uint32_t major
, uint32_t minor
,
1425 int expected_errno
, const char *message
)
1427 struct dm_task
*dmt
;
1430 if (!(dmt
= dm_task_create(DM_DEVICE_TARGET_MSG
)))
1433 if (!dm_task_set_major(dmt
, major
) ||
1434 !dm_task_set_minor(dmt
, minor
)) {
1435 log_error("Failed to set message major minor.");
1439 if (!dm_task_set_message(dmt
, message
))
1442 /* Internal functionality of dm_task */
1443 dmt
->expected_errno
= expected_errno
;
1445 if (!dm_task_run(dmt
)) {
1446 log_error("Failed to process message \"%s\".", message
);
1452 dm_task_destroy(dmt
);
1457 static int _thin_pool_node_message(struct dm_tree_node
*dnode
, struct thin_message
*tm
)
1459 struct dm_thin_message
*m
= &tm
->message
;
1464 case DM_THIN_MESSAGE_CREATE_SNAP
:
1465 r
= dm_snprintf(buf
, sizeof(buf
), "create_snap %u %u",
1466 m
->u
.m_create_snap
.device_id
,
1467 m
->u
.m_create_snap
.origin_id
);
1469 case DM_THIN_MESSAGE_CREATE_THIN
:
1470 r
= dm_snprintf(buf
, sizeof(buf
), "create_thin %u",
1471 m
->u
.m_create_thin
.device_id
);
1473 case DM_THIN_MESSAGE_DELETE
:
1474 r
= dm_snprintf(buf
, sizeof(buf
), "delete %u",
1475 m
->u
.m_delete
.device_id
);
1477 case DM_THIN_MESSAGE_SET_TRANSACTION_ID
:
1478 r
= dm_snprintf(buf
, sizeof(buf
),
1479 "set_transaction_id %" PRIu64
" %" PRIu64
,
1480 m
->u
.m_set_transaction_id
.current_id
,
1481 m
->u
.m_set_transaction_id
.new_id
);
1483 case DM_THIN_MESSAGE_RESERVE_METADATA_SNAP
: /* target vsn 1.1 */
1484 r
= dm_snprintf(buf
, sizeof(buf
), "reserve_metadata_snap");
1486 case DM_THIN_MESSAGE_RELEASE_METADATA_SNAP
: /* target vsn 1.1 */
1487 r
= dm_snprintf(buf
, sizeof(buf
), "release_metadata_snap");
1494 log_error("Failed to prepare message.");
1498 if (!_node_message(dnode
->info
.major
, dnode
->info
.minor
,
1499 tm
->expected_errno
, buf
)) {
1501 case DM_THIN_MESSAGE_CREATE_SNAP
:
1502 case DM_THIN_MESSAGE_CREATE_THIN
:
1503 if (errno
== EEXIST
) {
1505 * ATM errno from ioctl() is preserved through code error path chain
1506 * If this would ever change, another way need to be used to
1507 * obtain result from failed DM message
1509 log_error("Thin pool %s already contain thin device with device_id %u.",
1510 _node_name(dnode
), m
->u
.m_create_snap
.device_id
);
1514 * Give some useful advice how to solve this problem,
1515 * until lvconvert --repair can handle this automatically
1517 log_error("Manual intervention may be required to remove device dev_id=%u in thin pool metadata.",
1518 m
->u
.m_create_snap
.device_id
);
1519 log_error("Optionally new thin volume with device_id=%u can be manually added into a volume group.",
1520 m
->u
.m_create_snap
.device_id
);
1521 log_warn("WARNING: When uncertain how to do this, contact support!");
1534 static struct load_segment
*_get_last_load_segment(struct dm_tree_node
*node
)
1536 if (dm_list_empty(&node
->props
.segs
)) {
1537 log_error("Node %s is missing a segment.", _node_name(node
));
1541 return dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
1544 /* For preload pass only validate pool's transaction_id */
1545 static int _node_send_messages(struct dm_tree_node
*dnode
,
1546 const char *uuid_prefix
,
1547 size_t uuid_prefix_len
,
1550 struct load_segment
*seg
;
1551 struct thin_message
*tmsg
;
1552 struct dm_status_thin_pool stp
;
1556 if (!dnode
->info
.exists
)
1559 if (!(seg
= _get_last_load_segment(dnode
)))
1562 if (seg
->type
!= SEG_THIN_POOL
)
1565 if (!(uuid
= dm_tree_node_get_uuid(dnode
)))
1568 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
)) {
1569 log_debug_activation("UUID \"%s\" does not match.", uuid
);
1573 if (!_thin_pool_get_status(dnode
, &stp
))
1576 have_messages
= !dm_list_empty(&seg
->thin_messages
) ? 1 : 0;
1577 if (stp
.transaction_id
== seg
->transaction_id
) {
1578 dnode
->props
.send_messages
= 0; /* messages already committed */
1580 log_debug_activation("Thin pool %s transaction_id matches %"
1581 PRIu64
", skipping messages.",
1582 _node_name(dnode
), stp
.transaction_id
);
1586 /* Error if there are no stacked messages or id mismatches */
1587 if ((stp
.transaction_id
+ 1) != seg
->transaction_id
) {
1588 log_error("Thin pool %s transaction_id is %" PRIu64
", while expected %" PRIu64
".",
1589 _node_name(dnode
), stp
.transaction_id
, seg
->transaction_id
- have_messages
);
1593 if (!have_messages
|| !send
)
1594 return 1; /* transaction_id is matching */
1596 if (stp
.fail
|| stp
.read_only
|| stp
.needs_check
) {
1597 log_error("Cannot send messages to thin pool %s%s%s%s.",
1599 stp
.fail
? " in failed state" : "",
1600 stp
.read_only
? " with read only metadata" : "",
1601 stp
.needs_check
? " which needs check first" : "");
1605 dm_list_iterate_items(tmsg
, &seg
->thin_messages
) {
1606 if (!(_thin_pool_node_message(dnode
, tmsg
)))
1608 if (tmsg
->message
.type
== DM_THIN_MESSAGE_SET_TRANSACTION_ID
) {
1609 if (!_thin_pool_get_status(dnode
, &stp
))
1611 if (stp
.transaction_id
!= tmsg
->message
.u
.m_set_transaction_id
.new_id
) {
1612 log_error("Thin pool %s transaction_id is %" PRIu64
1613 " and does not match expected %" PRIu64
".",
1614 _node_name(dnode
), stp
.transaction_id
,
1615 tmsg
->message
.u
.m_set_transaction_id
.new_id
);
1621 dnode
->props
.send_messages
= 0; /* messages posted */
1627 * FIXME Don't attempt to deactivate known internal dependencies.
1629 static int _dm_tree_deactivate_children(struct dm_tree_node
*dnode
,
1630 const char *uuid_prefix
,
1631 size_t uuid_prefix_len
,
1635 void *handle
= NULL
;
1636 struct dm_tree_node
*child
= dnode
;
1637 struct dm_info info
;
1638 const struct dm_info
*dinfo
;
1642 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1643 if (!(dinfo
= dm_tree_node_get_info(child
))) {
1648 if (!(name
= dm_tree_node_get_name(child
))) {
1653 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1658 /* Ignore if it doesn't belong to this VG */
1659 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1662 /* Refresh open_count */
1663 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 1, &info
, NULL
, NULL
, NULL
))
1669 if (info
.open_count
) {
1670 /* Skip internal non-toplevel opened nodes */
1674 /* When retry is not allowed, error */
1675 if (!child
->dtree
->retry_remove
) {
1676 log_error("Unable to deactivate open %s (" FMTu32
":"
1677 FMTu32
").", name
, info
.major
, info
.minor
);
1682 /* Check toplevel node for holders/mounted fs */
1683 if (!_check_device_not_in_use(name
, &info
)) {
1688 /* Go on with retry */
1691 /* Also checking open_count in parent nodes of presuspend_node */
1692 if ((child
->presuspend_node
&&
1693 !_node_has_closed_parents(child
->presuspend_node
,
1694 uuid_prefix
, uuid_prefix_len
))) {
1695 /* Only report error from (likely non-internal) dependency at top level */
1697 log_error("Unable to deactivate open %s (" FMTu32
":"
1698 FMTu32
").", name
, info
.major
, info
.minor
);
1704 /* Suspend child node first if requested */
1705 if (child
->presuspend_node
&&
1706 !dm_tree_suspend_children(child
, uuid_prefix
, uuid_prefix_len
))
1709 if (!_deactivate_node(name
, info
.major
, info
.minor
,
1710 &child
->dtree
->cookie
, child
->udev_flags
,
1711 (level
== 0) ? child
->dtree
->retry_remove
: 0)) {
1712 log_error("Unable to deactivate %s (" FMTu32
":"
1713 FMTu32
").", name
, info
.major
, info
.minor
);
1718 if (info
.suspended
&& info
.live_table
)
1721 if (child
->callback
&&
1722 !child
->callback(child
, DM_NODE_CALLBACK_DEACTIVATED
,
1723 child
->callback_data
))
1725 /* FIXME Deactivation must currently ignore failure
1726 * here so that lvremove can continue: we need an
1727 * alternative way to handle this state without
1728 * setting r=0. Or better, skip calling thin_check
1729 * entirely if the device is about to be removed. */
1731 if (dm_tree_node_num_children(child
, 0) &&
1732 !_dm_tree_deactivate_children(child
, uuid_prefix
, uuid_prefix_len
, level
+ 1))
1739 int dm_tree_deactivate_children(struct dm_tree_node
*dnode
,
1740 const char *uuid_prefix
,
1741 size_t uuid_prefix_len
)
1743 return _dm_tree_deactivate_children(dnode
, uuid_prefix
, uuid_prefix_len
, 0);
1746 int dm_tree_suspend_children(struct dm_tree_node
*dnode
,
1747 const char *uuid_prefix
,
1748 size_t uuid_prefix_len
)
1751 void *handle
= NULL
;
1752 struct dm_tree_node
*child
= dnode
;
1753 struct dm_info info
, newinfo
;
1754 const struct dm_info
*dinfo
;
1758 /* Suspend nodes at this level of the tree */
1759 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1760 if (!(dinfo
= dm_tree_node_get_info(child
))) {
1765 if (!(name
= dm_tree_node_get_name(child
))) {
1770 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1775 /* Ignore if it doesn't belong to this VG */
1776 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1779 /* Ensure immediate parents are already suspended */
1780 if (!_children_suspended(child
, 1, uuid_prefix
, uuid_prefix_len
))
1783 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 0, &info
, NULL
, NULL
, NULL
))
1786 if (!info
.exists
|| info
.suspended
)
1789 /* If child has some real messages send them */
1790 if ((child
->props
.send_messages
> 1) && r
) {
1791 if (!(r
= _node_send_messages(child
, uuid_prefix
, uuid_prefix_len
, 1)))
1794 log_debug_activation("Sent messages to thin-pool %s and "
1795 "skipping suspend of its children.",
1797 child
->props
.skip_suspend
++;
1802 if (!_suspend_node(name
, info
.major
, info
.minor
,
1803 child
->dtree
->skip_lockfs
,
1804 child
->dtree
->no_flush
, &newinfo
)) {
1805 log_error("Unable to suspend %s (" FMTu32
":"
1806 FMTu32
")", name
, info
.major
, info
.minor
);
1811 /* Update cached info */
1812 child
->info
= newinfo
;
1815 /* Then suspend any child nodes */
1818 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1819 if (child
->props
.skip_suspend
)
1822 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1827 /* Ignore if it doesn't belong to this VG */
1828 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1831 if (dm_tree_node_num_children(child
, 0))
1832 if (!dm_tree_suspend_children(child
, uuid_prefix
, uuid_prefix_len
))
1840 * _rename_conflict_exists
1845 * Check if there is a rename conflict with existing peers in
1846 * this tree. 'resolvable' is set if the conflicting node will
1847 * also be undergoing a rename. (Allowing that node to rename
1848 * first would clear the conflict.)
1850 * Returns: 1 if conflict, 0 otherwise
1852 static int _rename_conflict_exists(struct dm_tree_node
*parent
,
1853 struct dm_tree_node
*node
,
1856 void *handle
= NULL
;
1857 const char *name
= dm_tree_node_get_name(node
);
1858 const char *sibling_name
;
1859 struct dm_tree_node
*sibling
;
1866 while ((sibling
= dm_tree_next_child(&handle
, parent
, 0))) {
1867 if (sibling
== node
)
1870 if (!(sibling_name
= dm_tree_node_get_name(sibling
))) {
1875 if (!strcmp(node
->props
.new_name
, sibling_name
)) {
1876 if (sibling
->props
.new_name
)
1886 * Reactivation of sibling nodes
1888 * Function is used when activating origin and its thick snapshots
1889 * to ensure udev is processing first the origin LV and all the
1890 * snapshot LVs are processed afterwards.
1892 static int _reactivate_siblings(struct dm_tree_node
*dnode
,
1893 const char *uuid_prefix
,
1894 size_t uuid_prefix_len
)
1896 struct dm_tree_node
*child
;
1898 void *handle
= NULL
;
1901 /* Wait for udev before reactivating siblings */
1902 if (!dm_udev_wait(dm_tree_get_cookie(dnode
)))
1905 dm_tree_set_cookie(dnode
, 0);
1907 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1908 if (child
->props
.reactivate_siblings
) {
1909 /* Skip 'leading' device in this group, marked with flag */
1910 child
->props
.reactivate_siblings
= 0;
1914 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1919 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1922 if (!_suspend_node(child
->name
, child
->info
.major
, child
->info
.minor
,
1923 child
->dtree
->skip_lockfs
,
1924 child
->dtree
->no_flush
, &child
->info
)) {
1925 log_error("Unable to suspend %s (" FMTu32
1926 ":" FMTu32
")", child
->name
,
1927 child
->info
.major
, child
->info
.minor
);
1931 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
1932 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
1933 &child
->info
, &child
->dtree
->cookie
,
1934 child
->props
.reactivate_udev_flags
, // use these flags
1935 child
->info
.suspended
)) {
1936 log_error("Failed to suspend %s (" FMTu32
1937 ":" FMTu32
")", child
->name
,
1938 child
->info
.major
, child
->info
.minor
);
1947 int dm_tree_activate_children(struct dm_tree_node
*dnode
,
1948 const char *uuid_prefix
,
1949 size_t uuid_prefix_len
)
1952 int resolvable_name_conflict
, awaiting_peer_rename
= 0;
1953 void *handle
= NULL
;
1954 struct dm_tree_node
*child
= dnode
;
1959 /* Activate children first */
1960 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1961 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1966 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1969 if (dm_tree_node_num_children(child
, 0))
1970 if (!dm_tree_activate_children(child
, uuid_prefix
, uuid_prefix_len
))
1976 for (priority
= 0; priority
< 3; priority
++) {
1977 awaiting_peer_rename
= 0;
1978 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1979 if (priority
!= child
->activation_priority
)
1982 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1987 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1990 if (!(name
= dm_tree_node_get_name(child
))) {
1996 if (child
->props
.new_name
) {
1997 if (_rename_conflict_exists(dnode
, child
, &resolvable_name_conflict
) &&
1998 resolvable_name_conflict
) {
1999 awaiting_peer_rename
++;
2002 if (!_rename_node(name
, child
->props
.new_name
, child
->info
.major
,
2003 child
->info
.minor
, &child
->dtree
->cookie
,
2004 child
->udev_flags
)) {
2005 log_error("Failed to rename %s (%" PRIu32
2006 ":%" PRIu32
") to %s", name
, child
->info
.major
,
2007 child
->info
.minor
, child
->props
.new_name
);
2010 child
->name
= child
->props
.new_name
;
2011 child
->props
.new_name
= NULL
;
2014 if (!child
->info
.inactive_table
&& !child
->info
.suspended
)
2017 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
2018 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
2019 &child
->info
, &child
->dtree
->cookie
, child
->udev_flags
, child
->info
.suspended
)) {
2020 log_error("Unable to resume %s.", _node_name(child
));
2026 * FIXME: Implement delayed error reporting
2027 * activation should be stopped only in the case,
2028 * the submission of transaction_id message fails,
2029 * resume should continue further, just whole command
2030 * has to report failure.
2032 if (r
&& (child
->props
.send_messages
> 1) &&
2033 !(r
= _node_send_messages(child
, uuid_prefix
, uuid_prefix_len
, 1)))
2036 /* Reactivate only for fresh activated origin */
2037 if (r
&& child
->props
.reactivate_siblings
&&
2038 (!(r
= _reactivate_siblings(dnode
, uuid_prefix
, uuid_prefix_len
))))
2041 if (awaiting_peer_rename
)
2042 priority
--; /* redo priority level */
2048 static int _create_node(struct dm_tree_node
*dnode
, struct dm_tree_node
*parent
)
2051 struct dm_task
*dmt
;
2053 log_verbose("Creating %s", dnode
->name
);
2055 if (!(dmt
= dm_task_create(DM_DEVICE_CREATE
))) {
2056 log_error("Create dm_task creation failed for %s", dnode
->name
);
2060 if (!dm_task_set_name(dmt
, dnode
->name
)) {
2061 log_error("Failed to set device name for %s", dnode
->name
);
2065 if (!dm_task_set_uuid(dmt
, dnode
->uuid
)) {
2066 log_error("Failed to set uuid for %s", dnode
->name
);
2070 if (dnode
->props
.major
&&
2071 (!dm_task_set_major(dmt
, dnode
->props
.major
) ||
2072 !dm_task_set_minor(dmt
, dnode
->props
.minor
))) {
2073 log_error("Failed to set device number for %s creation.", dnode
->name
);
2077 if (dnode
->props
.read_only
&& !dm_task_set_ro(dmt
)) {
2078 log_error("Failed to set read only flag for %s", dnode
->name
);
2082 if (!dm_task_no_open_count(dmt
))
2083 log_warn("WARNING: Failed to disable open_count.");
2085 if ((r
= dm_task_run(dmt
))) {
2086 if (!(r
= dm_task_get_info(dmt
, &dnode
->info
)))
2088 * This should not be possible to occur. However,
2089 * we print an error message anyway for the more
2090 * absurd cases (e.g. memory corruption) so there
2091 * is never any question as to which one failed.
2093 log_error(INTERNAL_ERROR
2094 "Unable to get DM task info for %s.",
2099 dnode
->activated
= 1;
2101 dm_task_destroy(dmt
);
2106 static int _build_dev_string(char *devbuf
, size_t bufsize
, struct dm_tree_node
*node
)
2108 if (!dm_format_dev(devbuf
, bufsize
, node
->info
.major
, node
->info
.minor
)) {
2109 log_error("Failed to format %s device number for %s as dm "
2111 node
->name
, node
->uuid
, node
->info
.major
, node
->info
.minor
);
2118 /* simplify string emitting code */
2119 #define EMIT_PARAMS(p, str...)\
2122 if ((w = dm_snprintf(params + p, paramsize - (size_t) p, str)) < 0) {\
2123 stack; /* Out of space */\
2132 * Returns: 1 on success, 0 on failure
2134 static int _emit_areas_line(struct dm_task
*dmt
__attribute__((unused
)),
2135 struct load_segment
*seg
, char *params
,
2136 size_t paramsize
, int *pos
)
2138 struct seg_area
*area
;
2139 char devbuf
[DM_FORMAT_DEV_BUFSIZE
];
2140 unsigned first_time
= 1;
2142 dm_list_iterate_items(area
, &seg
->areas
) {
2143 switch (seg
->type
) {
2145 case SEG_RAID0_META
:
2158 case SEG_RAID6_LS_6
:
2159 case SEG_RAID6_RS_6
:
2160 case SEG_RAID6_LA_6
:
2161 case SEG_RAID6_RA_6
:
2162 if (!area
->dev_node
) {
2163 EMIT_PARAMS(*pos
, " -");
2166 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
2169 EMIT_PARAMS(*pos
, " %s", devbuf
);
2172 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
2175 EMIT_PARAMS(*pos
, "%s%s %" PRIu64
, first_time
? "" : " ",
2176 devbuf
, area
->offset
);
2186 * Returns: 1 on success, 0 on failure
2188 static int _mirror_emit_segment_line(struct dm_task
*dmt
, struct load_segment
*seg
,
2189 char *params
, size_t paramsize
)
2191 int block_on_error
= 0;
2192 int handle_errors
= 0;
2193 int dm_log_userspace
= 0;
2194 unsigned log_parm_count
;
2196 char logbuf
[DM_FORMAT_DEV_BUFSIZE
];
2197 const char *logtype
;
2198 unsigned kmaj
= 0, kmin
= 0, krel
= 0;
2200 if (!get_uname_version(&kmaj
, &kmin
, &krel
))
2203 if ((seg
->flags
& DM_BLOCK_ON_ERROR
)) {
2205 * Originally, block_on_error was an argument to the log
2206 * portion of the mirror CTR table. It was renamed to
2207 * "handle_errors" and now resides in the 'features'
2208 * section of the mirror CTR table (i.e. at the end).
2210 * We can identify whether to use "block_on_error" or
2211 * "handle_errors" by the dm-mirror module's version
2212 * number (>= 1.12) or by the kernel version (>= 2.6.22).
2214 if (KERNEL_VERSION(kmaj
, kmin
, krel
) >= KERNEL_VERSION(2, 6, 22))
2220 if (seg
->clustered
) {
2221 /* Cluster mirrors require a UUID */
2226 * Cluster mirrors used to have their own log
2227 * types. Now they are accessed through the
2228 * userspace log type.
2230 * The dm-log-userspace module was added to the
2233 if (KERNEL_VERSION(kmaj
, kmin
, krel
) >= KERNEL_VERSION(2, 6, 31))
2234 dm_log_userspace
= 1;
2240 /* [no]sync, block_on_error etc. */
2241 log_parm_count
+= hweight32(seg
->flags
);
2243 /* "handle_errors" is a feature arg now */
2247 /* DM_CORELOG does not count in the param list */
2248 if (seg
->flags
& DM_CORELOG
)
2251 if (seg
->clustered
) {
2252 log_parm_count
++; /* For UUID */
2254 if (!dm_log_userspace
)
2255 EMIT_PARAMS(pos
, "clustered-");
2257 /* For clustered-* type field inserted later */
2266 if (!_build_dev_string(logbuf
, sizeof(logbuf
), seg
->log
))
2270 if (dm_log_userspace
)
2271 EMIT_PARAMS(pos
, "userspace %u %s clustered-%s",
2272 log_parm_count
, seg
->uuid
, logtype
);
2274 EMIT_PARAMS(pos
, "%s %u", logtype
, log_parm_count
);
2277 EMIT_PARAMS(pos
, " %s", logbuf
);
2279 EMIT_PARAMS(pos
, " %u", seg
->region_size
);
2281 if (seg
->clustered
&& !dm_log_userspace
)
2282 EMIT_PARAMS(pos
, " %s", seg
->uuid
);
2284 if ((seg
->flags
& DM_NOSYNC
))
2285 EMIT_PARAMS(pos
, " nosync");
2286 else if ((seg
->flags
& DM_FORCESYNC
))
2287 EMIT_PARAMS(pos
, " sync");
2290 EMIT_PARAMS(pos
, " block_on_error");
2292 EMIT_PARAMS(pos
, " %u ", seg
->mirror_area_count
);
2294 if (!_emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
))
2298 EMIT_PARAMS(pos
, " 1 handle_errors");
2303 static int _2_if_value(unsigned p
)
2308 /* Return number of bits passed in @bits assuming 2 * 64 bit size */
2309 static int _get_params_count(const uint64_t *bits
)
2312 int i
= RAID_BITMAP_SIZE
;
2315 r
+= 2 * hweight32(bits
[i
] & 0xFFFFFFFF);
2316 r
+= 2 * hweight32(bits
[i
] >> 32);
2323 * Get target version (major, minor and patchlevel) for @target_name
2325 * FIXME: this function is derived from liblvm.
2326 * Integrate with move of liblvm functions
2327 * to libdm in future library layer purge
2328 * (e.g. expose as API dm_target_version()?)
2330 static int _target_version(const char *target_name
, uint32_t *maj
,
2331 uint32_t *min
, uint32_t *patchlevel
)
2334 struct dm_task
*dmt
;
2335 struct dm_versions
*target
, *last_target
= NULL
;
2337 log_very_verbose("Getting target version for %s", target_name
);
2338 if (!(dmt
= dm_task_create(DM_DEVICE_LIST_VERSIONS
)))
2341 if (!dm_task_run(dmt
)) {
2342 log_debug_activation("Failed to get %s target versions", target_name
);
2343 /* Assume this was because LIST_VERSIONS isn't supported */
2344 *maj
= *min
= *patchlevel
= 0;
2347 for (target
= dm_task_get_versions(dmt
);
2348 target
!= last_target
;
2349 last_target
= target
, target
= (struct dm_versions
*)((char *) target
+ target
->next
))
2350 if (!strcmp(target_name
, target
->name
)) {
2351 *maj
= target
->version
[0];
2352 *min
= target
->version
[1];
2353 *patchlevel
= target
->version
[2];
2354 log_very_verbose("Found %s target "
2355 "v%" PRIu32
".%" PRIu32
".%" PRIu32
".",
2356 target_name
, *maj
, *min
, *patchlevel
);
2361 dm_task_destroy(dmt
);
2366 static int _raid_emit_segment_line(struct dm_task
*dmt
, uint32_t major
,
2367 uint32_t minor
, struct load_segment
*seg
,
2368 uint64_t *seg_start
, char *params
,
2372 uint32_t area_count
= seg
->area_count
/ 2;
2373 uint32_t maj
, min
, patchlevel
;
2374 int param_count
= 1; /* mandatory 'chunk size'/'stripe size' arg */
2378 if (seg
->area_count
% 2)
2381 if ((seg
->flags
& DM_NOSYNC
) || (seg
->flags
& DM_FORCESYNC
))
2384 param_count
+= _2_if_value(seg
->data_offset
) +
2385 _2_if_value(seg
->delta_disks
) +
2386 _2_if_value(seg
->region_size
) +
2387 _2_if_value(seg
->writebehind
) +
2388 _2_if_value(seg
->min_recovery_rate
) +
2389 _2_if_value(seg
->max_recovery_rate
) +
2390 _2_if_value(seg
->data_copies
> 1);
2392 /* rebuilds and writemostly are BITMAP_SIZE * 64 bits */
2393 param_count
+= _get_params_count(seg
->rebuilds
);
2394 param_count
+= _get_params_count(seg
->writemostly
);
2396 if ((seg
->type
== SEG_RAID1
) && seg
->stripe_size
)
2397 log_info("WARNING: Ignoring RAID1 stripe size");
2399 /* Kernel only expects "raid0", not "raid0_meta" */
2401 if (type
== SEG_RAID0_META
)
2404 EMIT_PARAMS(pos
, "%s %d %u",
2405 type
== SEG_RAID10
? "raid10" : _dm_segtypes
[type
].target
,
2406 param_count
, seg
->stripe_size
);
2408 if (!_target_version("raid", &maj
, &min
, &patchlevel
))
2412 * Target version prior to 1.9.0 and >= 1.11.0 emit
2413 * order of parameters as of kernel target documentation
2415 if (maj
> 1 || (maj
== 1 && (min
< 9 || min
>= 11))) {
2416 if (seg
->flags
& DM_NOSYNC
)
2417 EMIT_PARAMS(pos
, " nosync");
2418 else if (seg
->flags
& DM_FORCESYNC
)
2419 EMIT_PARAMS(pos
, " sync");
2421 for (i
= 0; i
< area_count
; i
++)
2422 if (seg
->rebuilds
[i
/64] & (1ULL << (i
%64)))
2423 EMIT_PARAMS(pos
, " rebuild %u", i
);
2425 if (seg
->min_recovery_rate
)
2426 EMIT_PARAMS(pos
, " min_recovery_rate %u",
2427 seg
->min_recovery_rate
);
2429 if (seg
->max_recovery_rate
)
2430 EMIT_PARAMS(pos
, " max_recovery_rate %u",
2431 seg
->max_recovery_rate
);
2433 for (i
= 0; i
< area_count
; i
++)
2434 if (seg
->writemostly
[i
/64] & (1ULL << (i
%64)))
2435 EMIT_PARAMS(pos
, " write_mostly %u", i
);
2437 if (seg
->writebehind
)
2438 EMIT_PARAMS(pos
, " max_write_behind %u", seg
->writebehind
);
2440 if (seg
->region_size
)
2441 EMIT_PARAMS(pos
, " region_size %u", seg
->region_size
);
2443 if (seg
->data_copies
> 1 && type
== SEG_RAID10
)
2444 EMIT_PARAMS(pos
, " raid10_copies %u", seg
->data_copies
);
2446 if (seg
->delta_disks
)
2447 EMIT_PARAMS(pos
, " delta_disks %d", seg
->delta_disks
);
2449 /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */
2450 if (seg
->data_offset
)
2451 EMIT_PARAMS(pos
, " data_offset %d", seg
->data_offset
== 1 ? 0 : seg
->data_offset
);
2453 /* Target version >= 1.9.0 && < 1.11.0 had a table line parameter ordering flaw */
2455 if (seg
->data_copies
> 1 && type
== SEG_RAID10
)
2456 EMIT_PARAMS(pos
, " raid10_copies %u", seg
->data_copies
);
2458 if (seg
->flags
& DM_NOSYNC
)
2459 EMIT_PARAMS(pos
, " nosync");
2460 else if (seg
->flags
& DM_FORCESYNC
)
2461 EMIT_PARAMS(pos
, " sync");
2463 if (seg
->region_size
)
2464 EMIT_PARAMS(pos
, " region_size %u", seg
->region_size
);
2466 /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */
2467 if (seg
->data_offset
)
2468 EMIT_PARAMS(pos
, " data_offset %d", seg
->data_offset
== 1 ? 0 : seg
->data_offset
);
2470 if (seg
->delta_disks
)
2471 EMIT_PARAMS(pos
, " delta_disks %d", seg
->delta_disks
);
2473 for (i
= 0; i
< area_count
; i
++)
2474 if (seg
->rebuilds
[i
/64] & (1ULL << (i
%64)))
2475 EMIT_PARAMS(pos
, " rebuild %u", i
);
2477 for (i
= 0; i
< area_count
; i
++)
2478 if (seg
->writemostly
[i
/64] & (1ULL << (i
%64)))
2479 EMIT_PARAMS(pos
, " write_mostly %u", i
);
2481 if (seg
->writebehind
)
2482 EMIT_PARAMS(pos
, " max_write_behind %u", seg
->writebehind
);
2484 if (seg
->max_recovery_rate
)
2485 EMIT_PARAMS(pos
, " max_recovery_rate %u",
2486 seg
->max_recovery_rate
);
2488 if (seg
->min_recovery_rate
)
2489 EMIT_PARAMS(pos
, " min_recovery_rate %u",
2490 seg
->min_recovery_rate
);
2493 /* Print number of metadata/data device pairs */
2494 EMIT_PARAMS(pos
, " %u", area_count
);
2496 if (!_emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
))
2502 static int _cache_emit_segment_line(struct dm_task
*dmt
,
2503 struct load_segment
*seg
,
2504 char *params
, size_t paramsize
)
2507 /* unsigned feature_count; */
2508 char data
[DM_FORMAT_DEV_BUFSIZE
];
2509 char metadata
[DM_FORMAT_DEV_BUFSIZE
];
2510 char origin
[DM_FORMAT_DEV_BUFSIZE
];
2512 struct dm_config_node
*cn
;
2515 if (!_build_dev_string(data
, sizeof(data
), seg
->pool
))
2519 if (!_build_dev_string(metadata
, sizeof(metadata
), seg
->metadata
))
2523 if (!_build_dev_string(origin
, sizeof(origin
), seg
->origin
))
2526 EMIT_PARAMS(pos
, "%s %s %s", metadata
, data
, origin
);
2528 /* Data block size */
2529 EMIT_PARAMS(pos
, " %u", seg
->data_block_size
);
2532 /* feature_count = hweight32(seg->flags); */
2533 /* EMIT_PARAMS(pos, " %u", feature_count); */
2534 if (seg
->flags
& DM_CACHE_FEATURE_METADATA2
)
2535 EMIT_PARAMS(pos
, " 2 metadata2 ");
2537 EMIT_PARAMS(pos
, " 1 ");
2539 if (seg
->flags
& DM_CACHE_FEATURE_PASSTHROUGH
)
2540 EMIT_PARAMS(pos
, "passthrough");
2541 else if (seg
->flags
& DM_CACHE_FEATURE_WRITEBACK
)
2542 EMIT_PARAMS(pos
, "writeback");
2544 EMIT_PARAMS(pos
, "writethrough");
2547 name
= seg
->policy_name
? : "default";
2549 EMIT_PARAMS(pos
, " %s", name
);
2551 /* Do not pass migration_threshold 2048 which is default */
2552 EMIT_PARAMS(pos
, " %u", (seg
->policy_argc
+ ((seg
->migration_threshold
!= 2048) ? 1 : 0)) * 2);
2553 if (seg
->migration_threshold
!= 2048)
2554 EMIT_PARAMS(pos
, " migration_threshold %u", seg
->migration_threshold
);
2555 if (seg
->policy_settings
)
2556 for (cn
= seg
->policy_settings
->child
; cn
; cn
= cn
->sib
)
2557 if (cn
->v
) /* Skip deleted entry */
2558 EMIT_PARAMS(pos
, " %s %" PRIu64
, cn
->key
, cn
->v
->v
.i
);
2563 static int _thin_pool_emit_segment_line(struct dm_task
*dmt
,
2564 struct load_segment
*seg
,
2565 char *params
, size_t paramsize
)
2568 char pool
[DM_FORMAT_DEV_BUFSIZE
], metadata
[DM_FORMAT_DEV_BUFSIZE
];
2569 int features
= (seg
->error_if_no_space
? 1 : 0) +
2570 (seg
->read_only
? 1 : 0) +
2571 (seg
->ignore_discard
? 1 : 0) +
2572 (seg
->no_discard_passdown
? 1 : 0) +
2573 (seg
->skip_block_zeroing
? 1 : 0);
2575 if (!_build_dev_string(metadata
, sizeof(metadata
), seg
->metadata
))
2578 if (!_build_dev_string(pool
, sizeof(pool
), seg
->pool
))
2581 EMIT_PARAMS(pos
, "%s %s %d %" PRIu64
" %d%s%s%s%s%s", metadata
, pool
,
2582 seg
->data_block_size
, seg
->low_water_mark
, features
,
2583 seg
->skip_block_zeroing
? " skip_block_zeroing" : "",
2584 seg
->ignore_discard
? " ignore_discard" : "",
2585 seg
->no_discard_passdown
? " no_discard_passdown" : "",
2586 seg
->error_if_no_space
? " error_if_no_space" : "",
2587 seg
->read_only
? " read_only" : ""
2593 static int _thin_emit_segment_line(struct dm_task
*dmt
,
2594 struct load_segment
*seg
,
2595 char *params
, size_t paramsize
)
2598 char pool
[DM_FORMAT_DEV_BUFSIZE
];
2599 char external
[DM_FORMAT_DEV_BUFSIZE
+ 1];
2601 if (!_build_dev_string(pool
, sizeof(pool
), seg
->pool
))
2608 if (!_build_dev_string(external
+ 1, sizeof(external
) - 1,
2613 EMIT_PARAMS(pos
, "%s %d%s", pool
, seg
->device_id
, external
);
2618 static int _emit_segment_line(struct dm_task
*dmt
, uint32_t major
,
2619 uint32_t minor
, struct load_segment
*seg
,
2620 uint64_t *seg_start
, char *params
,
2624 int target_type_is_raid
= 0;
2625 char originbuf
[DM_FORMAT_DEV_BUFSIZE
], cowbuf
[DM_FORMAT_DEV_BUFSIZE
];
2633 /* Mirrors are pretty complicated - now in separate function */
2634 if (!_mirror_emit_segment_line(dmt
, seg
, params
, paramsize
))
2638 case SEG_SNAPSHOT_MERGE
:
2639 if (!_build_dev_string(originbuf
, sizeof(originbuf
), seg
->origin
))
2641 if (!_build_dev_string(cowbuf
, sizeof(cowbuf
), seg
->cow
))
2643 EMIT_PARAMS(pos
, "%s %s %c %d", originbuf
, cowbuf
,
2644 seg
->persistent
? 'P' : 'N', seg
->chunk_size
);
2646 case SEG_SNAPSHOT_ORIGIN
:
2647 if (!_build_dev_string(originbuf
, sizeof(originbuf
), seg
->origin
))
2649 EMIT_PARAMS(pos
, "%s", originbuf
);
2652 EMIT_PARAMS(pos
, "%u %u ", seg
->area_count
, seg
->stripe_size
);
2655 EMIT_PARAMS(pos
, "%s%s%s%s%s %s %" PRIu64
" ", seg
->cipher
,
2656 seg
->chainmode
? "-" : "", seg
->chainmode
?: "",
2657 seg
->iv
? "-" : "", seg
->iv
?: "", seg
->key
,
2658 seg
->iv_offset
!= DM_CRYPT_IV_DEFAULT
?
2659 seg
->iv_offset
: *seg_start
);
2662 case SEG_RAID0_META
:
2675 case SEG_RAID6_LS_6
:
2676 case SEG_RAID6_RS_6
:
2677 case SEG_RAID6_LA_6
:
2678 case SEG_RAID6_RA_6
:
2679 target_type_is_raid
= 1;
2680 if (!_raid_emit_segment_line(dmt
, major
, minor
, seg
, seg_start
,
2686 if (!_thin_pool_emit_segment_line(dmt
, seg
, params
, paramsize
))
2690 if (!_thin_emit_segment_line(dmt
, seg
, params
, paramsize
))
2694 if (!_cache_emit_segment_line(dmt
, seg
, params
, paramsize
))
2702 case SEG_SNAPSHOT_ORIGIN
:
2703 case SEG_SNAPSHOT_MERGE
:
2712 if (!_emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
))
2716 log_error("No parameters supplied for %s target "
2717 "%u:%u.", _dm_segtypes
[seg
->type
].target
,
2724 log_debug_activation("Adding target to (%" PRIu32
":%" PRIu32
"): %" PRIu64
2725 " %" PRIu64
" %s %s", major
, minor
,
2726 *seg_start
, seg
->size
, target_type_is_raid
? "raid" :
2727 _dm_segtypes
[seg
->type
].target
, params
);
2729 if (!dm_task_add_target(dmt
, *seg_start
, seg
->size
,
2730 target_type_is_raid
? "raid" :
2731 _dm_segtypes
[seg
->type
].target
, params
))
2734 *seg_start
+= seg
->size
;
2741 static int _emit_segment(struct dm_task
*dmt
, uint32_t major
, uint32_t minor
,
2742 struct load_segment
*seg
, uint64_t *seg_start
)
2745 size_t paramsize
= 4096; /* FIXME: too small for long RAID lines when > 64 devices supported */
2749 if (!(params
= dm_malloc(paramsize
))) {
2750 log_error("Insufficient space for target parameters.");
2755 ret
= _emit_segment_line(dmt
, major
, minor
, seg
, seg_start
,
2765 log_debug_activation("Insufficient space in params[%" PRIsize_t
2766 "] for target parameters.", paramsize
);
2769 } while (paramsize
< MAX_TARGET_PARAMSIZE
);
2771 log_error("Target parameter size too big. Aborting.");
2775 static int _load_node(struct dm_tree_node
*dnode
)
2778 struct dm_task
*dmt
;
2779 struct load_segment
*seg
;
2780 uint64_t seg_start
= 0, existing_table_size
;
2782 log_verbose("Loading table for %s.", _node_name(dnode
));
2784 if (!(dmt
= dm_task_create(DM_DEVICE_RELOAD
))) {
2785 log_error("Reload dm_task creation failed for %s.", _node_name(dnode
));
2789 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
2790 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
2791 log_error("Failed to set device number for %s reload.", _node_name(dnode
));
2795 if (dnode
->props
.read_only
&& !dm_task_set_ro(dmt
)) {
2796 log_error("Failed to set read only flag for %s.", _node_name(dnode
));
2800 if (!dm_task_no_open_count(dmt
))
2801 log_warn("WARNING: Failed to disable open_count.");
2803 dm_list_iterate_items(seg
, &dnode
->props
.segs
)
2804 if (!_emit_segment(dmt
, dnode
->info
.major
, dnode
->info
.minor
,
2808 if (!dm_task_suppress_identical_reload(dmt
))
2809 log_warn("WARNING: Failed to suppress reload of identical tables.");
2811 if ((r
= dm_task_run(dmt
))) {
2812 r
= dm_task_get_info(dmt
, &dnode
->info
);
2813 if (r
&& !dnode
->info
.inactive_table
)
2814 log_verbose("Suppressed %s identical table reload.",
2817 existing_table_size
= dm_task_get_existing_table_size(dmt
);
2818 if ((dnode
->props
.size_changed
=
2819 (existing_table_size
== seg_start
) ? 0 :
2820 (existing_table_size
> seg_start
) ? -1 : 1)) {
2822 * Kernel usually skips size validation on zero-length devices
2823 * now so no need to preload them.
2825 /* FIXME In which kernel version did this begin? */
2826 if (!existing_table_size
&& dnode
->props
.delay_resume_if_new
)
2827 dnode
->props
.size_changed
= 0;
2829 log_debug_activation("Table size changed from %" PRIu64
" to %"
2830 PRIu64
" for %s.%s", existing_table_size
,
2831 seg_start
, _node_name(dnode
),
2832 dnode
->props
.size_changed
? "" : " (Ignoring.)");
2835 * FIXME: code here has known design problem.
2836 * LVM2 does NOT resize thin-pool on top of other LV in 2 steps -
2837 * where raid would be resized with 1st. transaction
2838 * followed by 2nd. thin-pool resize - RHBZ #1285063
2840 if (existing_table_size
&& dnode
->props
.delay_resume_if_extended
) {
2841 log_debug_activation("Resume of table of extended device %s delayed.",
2843 dnode
->props
.size_changed
= 0;
2848 dnode
->props
.segment_count
= 0;
2851 dm_task_destroy(dmt
);
2856 /* Try to deactivate only nodes created during preload. */
2857 static int _dm_tree_revert_activated(struct dm_tree_node
*dnode
)
2859 void *handle
= NULL
;
2860 struct dm_tree_node
*child
;
2862 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
2863 if (child
->activated
) {
2864 if (child
->callback
) {
2865 log_debug_activation("Dropping callback for %s.", _node_name(child
));
2866 child
->callback
= NULL
;
2869 log_debug_activation("Reverting %s.", _node_name(child
));
2870 if (!_deactivate_node(child
->name
, child
->info
.major
, child
->info
.minor
,
2871 &child
->dtree
->cookie
, child
->udev_flags
, 0)) {
2872 log_debug_activation("Unable to deactivate %s.", _node_name(child
));
2877 if (dm_tree_node_num_children(child
, 0) &&
2878 !_dm_tree_revert_activated(child
))
2885 static int _dm_tree_wait_and_revert_activated(struct dm_tree_node
*dnode
)
2887 if (!dm_udev_wait(dm_tree_get_cookie(dnode
)))
2890 dm_tree_set_cookie(dnode
, 0);
2892 return _dm_tree_revert_activated(dnode
);
2895 int dm_tree_preload_children(struct dm_tree_node
*dnode
,
2896 const char *uuid_prefix
,
2897 size_t uuid_prefix_len
)
2899 int r
= 1, node_created
= 0;
2900 void *handle
= NULL
;
2901 struct dm_tree_node
*child
;
2902 int update_devs_flag
= 0;
2904 /* Preload children first */
2905 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
2906 /* Propagate delay of resume from parent node */
2907 if (dnode
->props
.delay_resume_if_new
> 1)
2908 child
->props
.delay_resume_if_new
= dnode
->props
.delay_resume_if_new
;
2910 /* Skip existing non-device-mapper devices */
2911 if (!child
->info
.exists
&& child
->info
.major
)
2914 /* Ignore if it doesn't belong to this VG */
2915 if (child
->info
.exists
&&
2916 !_uuid_prefix_matches(child
->uuid
, uuid_prefix
, uuid_prefix_len
))
2919 if (dm_tree_node_num_children(child
, 0))
2920 if (!dm_tree_preload_children(child
, uuid_prefix
, uuid_prefix_len
))
2923 /* FIXME Cope if name exists with no uuid? */
2924 if (!child
->info
.exists
&& !(node_created
= _create_node(child
, dnode
)))
2927 /* Propagate delayed resume from extended child node */
2928 if (child
->props
.delay_resume_if_extended
)
2929 dnode
->props
.delay_resume_if_extended
= 1;
2931 if (!child
->info
.inactive_table
&&
2932 child
->props
.segment_count
&&
2933 !_load_node(child
)) {
2936 * If the table load fails, try to device in the kernel
2937 * together with other created and preloaded devices.
2939 if (!_dm_tree_wait_and_revert_activated(dnode
))
2945 /* No resume for a device without parents or with unchanged or smaller size */
2946 if (!dm_tree_node_num_children(child
, 1) || (child
->props
.size_changed
<= 0))
2949 if (!child
->info
.inactive_table
&& !child
->info
.suspended
)
2952 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
2953 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
2954 &child
->info
, &child
->dtree
->cookie
, child
->udev_flags
,
2955 child
->info
.suspended
)) {
2956 log_error("Unable to resume %s.", _node_name(child
));
2957 if (!_dm_tree_wait_and_revert_activated(dnode
))
2964 /* When creating new node also check transaction_id. */
2965 if (child
->props
.send_messages
&&
2966 !_node_send_messages(child
, uuid_prefix
, uuid_prefix_len
, 0)) {
2968 if (!_dm_tree_wait_and_revert_activated(dnode
))
2976 * Prepare for immediate synchronization with udev and flush all stacked
2977 * dev node operations if requested by immediate_dev_node property. But
2978 * finish processing current level in the tree first.
2980 if (child
->props
.immediate_dev_node
)
2981 update_devs_flag
= 1;
2984 if (update_devs_flag
||
2985 (r
&& !dnode
->info
.exists
&& dnode
->callback
)) {
2986 if (!dm_udev_wait(dm_tree_get_cookie(dnode
)))
2988 dm_tree_set_cookie(dnode
, 0);
2990 if (r
&& !dnode
->info
.exists
&& dnode
->callback
&&
2991 !dnode
->callback(dnode
, DM_NODE_CALLBACK_PRELOADED
,
2992 dnode
->callback_data
))
2994 /* Try to deactivate what has been activated in preload phase */
2995 (void) _dm_tree_revert_activated(dnode
);
3004 * Returns 1 if unsure.
3006 int dm_tree_children_use_uuid(struct dm_tree_node
*dnode
,
3007 const char *uuid_prefix
,
3008 size_t uuid_prefix_len
)
3010 void *handle
= NULL
;
3011 struct dm_tree_node
*child
= dnode
;
3014 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
3015 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
3016 log_warn("WARNING: Failed to get uuid for dtree node %s.",
3021 if (_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
3024 if (dm_tree_node_num_children(child
, 0))
3025 dm_tree_children_use_uuid(child
, uuid_prefix
, uuid_prefix_len
);
3034 static struct load_segment
*_add_segment(struct dm_tree_node
*dnode
, unsigned type
, uint64_t size
)
3036 struct load_segment
*seg
;
3038 if (!(seg
= dm_pool_zalloc(dnode
->dtree
->mem
, sizeof(*seg
)))) {
3039 log_error("dtree node segment allocation failed");
3045 dm_list_init(&seg
->areas
);
3046 dm_list_add(&dnode
->props
.segs
, &seg
->list
);
3047 dnode
->props
.segment_count
++;
3052 int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node
*dnode
,
3054 const char *origin_uuid
)
3056 struct load_segment
*seg
;
3057 struct dm_tree_node
*origin_node
;
3059 if (!(seg
= _add_segment(dnode
, SEG_SNAPSHOT_ORIGIN
, size
)))
3062 if (!(origin_node
= dm_tree_find_node_by_uuid(dnode
->dtree
, origin_uuid
))) {
3063 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid
);
3067 seg
->origin
= origin_node
;
3068 if (!_link_tree_nodes(dnode
, origin_node
))
3071 /* Resume snapshot origins after new snapshots */
3072 dnode
->activation_priority
= 1;
3074 if (!dnode
->info
.exists
)
3075 /* Reactivate siblings for this origin after being resumed */
3076 dnode
->props
.reactivate_siblings
= 1;
3079 * Don't resume the origin immediately in case it is a non-trivial
3080 * target that must not be active more than once concurrently!
3082 origin_node
->props
.delay_resume_if_new
= 1;
3087 static int _add_snapshot_target(struct dm_tree_node
*node
,
3089 const char *origin_uuid
,
3090 const char *cow_uuid
,
3091 const char *merge_uuid
,
3093 uint32_t chunk_size
)
3095 struct load_segment
*seg
;
3096 struct dm_tree_node
*origin_node
, *cow_node
, *merge_node
;
3099 seg_type
= !merge_uuid
? SEG_SNAPSHOT
: SEG_SNAPSHOT_MERGE
;
3101 if (!(seg
= _add_segment(node
, seg_type
, size
)))
3104 if (!(origin_node
= dm_tree_find_node_by_uuid(node
->dtree
, origin_uuid
))) {
3105 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid
);
3109 seg
->origin
= origin_node
;
3110 if (!_link_tree_nodes(node
, origin_node
))
3113 if (!(cow_node
= dm_tree_find_node_by_uuid(node
->dtree
, cow_uuid
))) {
3114 log_error("Couldn't find snapshot COW device uuid %s.", cow_uuid
);
3118 seg
->cow
= cow_node
;
3119 if (!_link_tree_nodes(node
, cow_node
))
3122 seg
->persistent
= persistent
? 1 : 0;
3123 seg
->chunk_size
= chunk_size
;
3126 if (!(merge_node
= dm_tree_find_node_by_uuid(node
->dtree
, merge_uuid
))) {
3127 /* not a pure error, merging snapshot may have been deactivated */
3128 log_verbose("Couldn't find merging snapshot uuid %s.", merge_uuid
);
3130 seg
->merge
= merge_node
;
3131 /* must not link merging snapshot, would undermine activation_priority below */
3134 /* Resume snapshot-merge (acting origin) after other snapshots */
3135 node
->activation_priority
= 1;
3137 /* Resume merging snapshot after snapshot-merge */
3138 seg
->merge
->activation_priority
= 2;
3140 } else if (!origin_node
->info
.exists
) {
3141 /* Keep original udev_flags for reactivation. */
3142 node
->props
.reactivate_udev_flags
= node
->udev_flags
;
3144 /* Reactivation is needed if the origin's -real device is not in DM table.
3145 * For this case after the resume of its origin LV we resume its snapshots
3146 * with updated udev_flags to completely avoid udev scanning for the first resume.
3147 * Reactivation then resumes snapshots with original udev_flags.
3149 node
->udev_flags
|= DM_SUBSYSTEM_UDEV_FLAG0
|
3150 DM_UDEV_DISABLE_DISK_RULES_FLAG
|
3151 DM_UDEV_DISABLE_OTHER_RULES_FLAG
;
3152 log_debug_activation("Using udev_flags 0x%x for activation of %s.",
3153 node
->udev_flags
, node
->name
);
3160 int dm_tree_node_add_snapshot_target(struct dm_tree_node
*node
,
3162 const char *origin_uuid
,
3163 const char *cow_uuid
,
3165 uint32_t chunk_size
)
3167 return _add_snapshot_target(node
, size
, origin_uuid
, cow_uuid
,
3168 NULL
, persistent
, chunk_size
);
3171 int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node
*node
,
3173 const char *origin_uuid
,
3174 const char *cow_uuid
,
3175 const char *merge_uuid
,
3176 uint32_t chunk_size
)
3178 return _add_snapshot_target(node
, size
, origin_uuid
, cow_uuid
,
3179 merge_uuid
, 1, chunk_size
);
3182 int dm_tree_node_add_error_target(struct dm_tree_node
*node
,
3185 if (!_add_segment(node
, SEG_ERROR
, size
))
3191 int dm_tree_node_add_zero_target(struct dm_tree_node
*node
,
3194 if (!_add_segment(node
, SEG_ZERO
, size
))
3200 int dm_tree_node_add_linear_target(struct dm_tree_node
*node
,
3203 if (!_add_segment(node
, SEG_LINEAR
, size
))
3209 int dm_tree_node_add_striped_target(struct dm_tree_node
*node
,
3211 uint32_t stripe_size
)
3213 struct load_segment
*seg
;
3215 if (!(seg
= _add_segment(node
, SEG_STRIPED
, size
)))
3218 seg
->stripe_size
= stripe_size
;
3223 int dm_tree_node_add_crypt_target(struct dm_tree_node
*node
,
3226 const char *chainmode
,
3231 struct load_segment
*seg
;
3233 if (!(seg
= _add_segment(node
, SEG_CRYPT
, size
)))
3236 seg
->cipher
= cipher
;
3237 seg
->chainmode
= chainmode
;
3239 seg
->iv_offset
= iv_offset
;
3245 int dm_tree_node_add_mirror_target_log(struct dm_tree_node
*node
,
3246 uint32_t region_size
,
3248 const char *log_uuid
,
3249 unsigned area_count
,
3252 struct dm_tree_node
*log_node
= NULL
;
3253 struct load_segment
*seg
;
3255 if (!(seg
= _get_last_load_segment(node
)))
3259 if (!(seg
->uuid
= dm_pool_strdup(node
->dtree
->mem
, log_uuid
))) {
3260 log_error("log uuid pool_strdup failed");
3263 if ((flags
& DM_CORELOG
))
3264 /* For pvmove: immediate resume (for size validation) isn't needed. */
3265 /* pvmove flag passed via unused UUID and its suffix */
3266 node
->props
.delay_resume_if_new
= strstr(log_uuid
, "pvmove") ? 2 : 1;
3268 if (!(log_node
= dm_tree_find_node_by_uuid(node
->dtree
, log_uuid
))) {
3269 log_error("Couldn't find mirror log uuid %s.", log_uuid
);
3274 log_node
->props
.immediate_dev_node
= 1;
3276 /* The kernel validates the size of disk logs. */
3277 /* FIXME Propagate to any devices below */
3278 log_node
->props
.delay_resume_if_new
= 0;
3280 if (!_link_tree_nodes(node
, log_node
))
3285 seg
->log
= log_node
;
3286 seg
->region_size
= region_size
;
3287 seg
->clustered
= clustered
;
3288 seg
->mirror_area_count
= area_count
;
3294 int dm_tree_node_add_mirror_target(struct dm_tree_node
*node
,
3297 if (!_add_segment(node
, SEG_MIRRORED
, size
))
3303 int dm_tree_node_add_raid_target_with_params(struct dm_tree_node
*node
,
3305 const struct dm_tree_node_raid_params
*p
)
3308 struct load_segment
*seg
= NULL
;
3310 for (i
= 0; i
< DM_ARRAY_SIZE(_dm_segtypes
) && !seg
; ++i
)
3311 if (!strcmp(p
->raid_type
, _dm_segtypes
[i
].target
))
3312 if (!(seg
= _add_segment(node
,
3313 _dm_segtypes
[i
].type
, size
)))
3316 log_error("Unsupported raid type %s.", p
->raid_type
);
3320 seg
->region_size
= p
->region_size
;
3321 seg
->stripe_size
= p
->stripe_size
;
3322 seg
->area_count
= 0;
3323 memset(seg
->rebuilds
, 0, sizeof(seg
->rebuilds
));
3324 seg
->rebuilds
[0] = p
->rebuilds
;
3325 memset(seg
->writemostly
, 0, sizeof(seg
->writemostly
));
3326 seg
->writemostly
[0] = p
->writemostly
;
3327 seg
->writebehind
= p
->writebehind
;
3328 seg
->min_recovery_rate
= p
->min_recovery_rate
;
3329 seg
->max_recovery_rate
= p
->max_recovery_rate
;
3330 seg
->flags
= p
->flags
;
3335 int dm_tree_node_add_raid_target(struct dm_tree_node
*node
,
3337 const char *raid_type
,
3338 uint32_t region_size
,
3339 uint32_t stripe_size
,
3343 struct dm_tree_node_raid_params params
= {
3344 .raid_type
= raid_type
,
3345 .region_size
= region_size
,
3346 .stripe_size
= stripe_size
,
3347 .rebuilds
= rebuilds
,
3351 return dm_tree_node_add_raid_target_with_params(node
, size
, ¶ms
);
3355 * Version 2 of dm_tree_node_add_raid_target() allowing for:
3357 * - maximum 253 legs in a raid set (MD kernel limitation)
3358 * - delta_disks for disk add/remove reshaping
3359 * - data_offset for out-of-place reshaping
3360 * - data_copies to cope with odd numbers of raid10 disks
3362 int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node
*node
,
3364 const struct dm_tree_node_raid_params_v2
*p
)
3367 struct load_segment
*seg
= NULL
;
3369 for (i
= 0; i
< DM_ARRAY_SIZE(_dm_segtypes
) && !seg
; ++i
)
3370 if (!strcmp(p
->raid_type
, _dm_segtypes
[i
].target
))
3371 if (!(seg
= _add_segment(node
,
3372 _dm_segtypes
[i
].type
, size
)))
3375 log_error("Unsupported raid type %s.", p
->raid_type
);
3379 seg
->region_size
= p
->region_size
;
3380 seg
->stripe_size
= p
->stripe_size
;
3381 seg
->area_count
= 0;
3382 seg
->delta_disks
= p
->delta_disks
;
3383 seg
->data_offset
= p
->data_offset
;
3384 memcpy(seg
->rebuilds
, p
->rebuilds
, sizeof(seg
->rebuilds
));
3385 memcpy(seg
->writemostly
, p
->writemostly
, sizeof(seg
->writemostly
));
3386 seg
->writebehind
= p
->writebehind
;
3387 seg
->data_copies
= p
->data_copies
;
3388 seg
->min_recovery_rate
= p
->min_recovery_rate
;
3389 seg
->max_recovery_rate
= p
->max_recovery_rate
;
3390 seg
->flags
= p
->flags
;
3395 DM_EXPORT_NEW_SYMBOL(int, dm_tree_node_add_cache_target
, 1_02_138
)
3396 (struct dm_tree_node
*node
,
3398 uint64_t feature_flags
, /* DM_CACHE_FEATURE_* */
3399 const char *metadata_uuid
,
3400 const char *data_uuid
,
3401 const char *origin_uuid
,
3402 const char *policy_name
,
3403 const struct dm_config_node
*policy_settings
,
3404 uint32_t data_block_size
)
3406 struct dm_config_node
*cn
;
3407 struct load_segment
*seg
;
3408 const uint64_t _modemask
=
3409 DM_CACHE_FEATURE_PASSTHROUGH
|
3410 DM_CACHE_FEATURE_WRITETHROUGH
|
3411 DM_CACHE_FEATURE_WRITEBACK
;
3413 /* Detect unknown (bigger) feature bit */
3414 if (feature_flags
>= (DM_CACHE_FEATURE_METADATA2
* 2)) {
3415 log_error("Unsupported cache's feature flags set " FMTu64
".",
3420 switch (feature_flags
& _modemask
) {
3421 case DM_CACHE_FEATURE_PASSTHROUGH
:
3422 case DM_CACHE_FEATURE_WRITEBACK
:
3423 if (strcmp(policy_name
, "cleaner") == 0) {
3424 /* Enforce writethrough mode for cleaner policy */
3425 feature_flags
= ~_modemask
;
3426 feature_flags
|= DM_CACHE_FEATURE_WRITETHROUGH
;
3429 case DM_CACHE_FEATURE_WRITETHROUGH
:
3432 log_error("Invalid cache's feature flag " FMTu64
".",
3437 if (data_block_size
< DM_CACHE_MIN_DATA_BLOCK_SIZE
) {
3438 log_error("Data block size %u is lower then %u sectors.",
3439 data_block_size
, DM_CACHE_MIN_DATA_BLOCK_SIZE
);
3443 if (data_block_size
> DM_CACHE_MAX_DATA_BLOCK_SIZE
) {
3444 log_error("Data block size %u is higher then %u sectors.",
3445 data_block_size
, DM_CACHE_MAX_DATA_BLOCK_SIZE
);
3449 if (!(seg
= _add_segment(node
, SEG_CACHE
, size
)))
3452 if (!(seg
->pool
= dm_tree_find_node_by_uuid(node
->dtree
,
3454 log_error("Missing cache's data uuid %s.",
3458 if (!_link_tree_nodes(node
, seg
->pool
))
3461 if (!(seg
->metadata
= dm_tree_find_node_by_uuid(node
->dtree
,
3463 log_error("Missing cache's metadata uuid %s.",
3467 if (!_link_tree_nodes(node
, seg
->metadata
))
3470 if (!(seg
->origin
= dm_tree_find_node_by_uuid(node
->dtree
,
3472 log_error("Missing cache's origin uuid %s.",
3476 if (!_link_tree_nodes(node
, seg
->origin
))
3479 seg
->data_block_size
= data_block_size
;
3480 seg
->flags
= feature_flags
;
3481 seg
->policy_name
= policy_name
;
3482 seg
->migration_threshold
= 2048; /* Default migration threshold 1MiB */
3484 /* FIXME: better validation missing */
3485 if (policy_settings
) {
3486 if (!(seg
->policy_settings
= dm_config_clone_node_with_mem(node
->dtree
->mem
, policy_settings
, 0)))
3489 for (cn
= seg
->policy_settings
->child
; cn
; cn
= cn
->sib
) {
3490 if (!cn
->v
|| (cn
->v
->type
!= DM_CFG_INT
)) {
3491 /* For now only <key> = <int> pairs are supported */
3492 log_error("Cache policy parameter %s is without integer value.", cn
->key
);
3495 if (strcmp(cn
->key
, "migration_threshold") == 0) {
3496 seg
->migration_threshold
= cn
->v
->v
.i
;
3497 cn
->v
= NULL
; /* skip this entry */
3503 /* Always some throughput available for cache to proceed */
3504 if (seg
->migration_threshold
< data_block_size
* 8)
3505 seg
->migration_threshold
= data_block_size
* 8;
3510 int dm_tree_node_add_replicator_target(struct dm_tree_node
*node
,
3512 const char *rlog_uuid
,
3513 const char *rlog_type
,
3514 unsigned rsite_index
,
3515 dm_replicator_mode_t mode
,
3516 uint32_t async_timeout
,
3517 uint64_t fall_behind_data
,
3518 uint32_t fall_behind_ios
)
3520 log_error("Replicator segment is unsupported.");
3524 /* Appends device node to Replicator */
3525 int dm_tree_node_add_replicator_dev_target(struct dm_tree_node
*node
,
3527 const char *replicator_uuid
,
3528 uint64_t rdevice_index
,
3529 const char *rdev_uuid
,
3530 unsigned rsite_index
,
3531 const char *slog_uuid
,
3532 uint32_t slog_flags
,
3533 uint32_t slog_region_size
)
3535 log_error("Replicator target is unsupported.");
3539 static struct load_segment
*_get_single_load_segment(struct dm_tree_node
*node
,
3542 struct load_segment
*seg
;
3544 if (!(seg
= _get_last_load_segment(node
)))
3547 /* Never used past _load_node(), so can test segment_count */
3548 if (node
->props
.segment_count
!= 1) {
3549 log_error("Node %s must have only one segment.",
3550 _dm_segtypes
[type
].target
);
3554 if (seg
->type
!= type
) {
3555 log_error("Node %s has segment type %s.",
3556 _dm_segtypes
[type
].target
,
3557 _dm_segtypes
[seg
->type
].target
);
3564 static int _thin_validate_device_id(uint32_t device_id
)
3566 if (device_id
> DM_THIN_MAX_DEVICE_ID
) {
3567 log_error("Device id %u is higher then %u.",
3568 device_id
, DM_THIN_MAX_DEVICE_ID
);
3575 int dm_tree_node_add_thin_pool_target(struct dm_tree_node
*node
,
3577 uint64_t transaction_id
,
3578 const char *metadata_uuid
,
3579 const char *pool_uuid
,
3580 uint32_t data_block_size
,
3581 uint64_t low_water_mark
,
3582 unsigned skip_block_zeroing
)
3584 return dm_tree_node_add_thin_pool_target_v1(node
, size
, transaction_id
,
3585 metadata_uuid
, pool_uuid
,
3592 int dm_tree_node_add_thin_pool_target_v1(struct dm_tree_node
*node
,
3594 uint64_t transaction_id
,
3595 const char *metadata_uuid
,
3596 const char *pool_uuid
,
3597 uint32_t data_block_size
,
3598 uint64_t low_water_mark
,
3599 unsigned skip_block_zeroing
,
3600 unsigned crop_metadata
)
3602 struct load_segment
*seg
, *mseg
;
3603 uint64_t devsize
= 0;
3605 if (data_block_size
< DM_THIN_MIN_DATA_BLOCK_SIZE
) {
3606 log_error("Data block size %u is lower then %u sectors.",
3607 data_block_size
, DM_THIN_MIN_DATA_BLOCK_SIZE
);
3611 if (data_block_size
> DM_THIN_MAX_DATA_BLOCK_SIZE
) {
3612 log_error("Data block size %u is higher then %u sectors.",
3613 data_block_size
, DM_THIN_MAX_DATA_BLOCK_SIZE
);
3617 if (!(seg
= _add_segment(node
, SEG_THIN_POOL
, size
)))
3620 if (!(seg
->metadata
= dm_tree_find_node_by_uuid(node
->dtree
, metadata_uuid
))) {
3621 log_error("Missing metadata uuid %s.", metadata_uuid
);
3625 if (!_link_tree_nodes(node
, seg
->metadata
))
3629 /* FIXME: more complex target may need more tweaks */
3630 dm_list_iterate_items(mseg
, &seg
->metadata
->props
.segs
) {
3631 devsize
+= mseg
->size
;
3632 if (devsize
> DM_THIN_MAX_METADATA_SIZE
) {
3633 log_debug_activation("Ignoring %" PRIu64
" of device.",
3634 devsize
- DM_THIN_MAX_METADATA_SIZE
);
3635 mseg
->size
-= (devsize
- DM_THIN_MAX_METADATA_SIZE
);
3636 devsize
= DM_THIN_MAX_METADATA_SIZE
;
3637 /* FIXME: drop remaining segs */
3641 if (!(seg
->pool
= dm_tree_find_node_by_uuid(node
->dtree
, pool_uuid
))) {
3642 log_error("Missing pool uuid %s.", pool_uuid
);
3646 if (!_link_tree_nodes(node
, seg
->pool
))
3649 /* Clean flag delay_resume_if_new - so corelog gets resumed */
3650 seg
->metadata
->props
.delay_resume_if_new
= 0;
3651 seg
->pool
->props
.delay_resume_if_new
= 0;
3653 /* Preload must not resume extended running thin-pool before it's committed */
3654 node
->props
.delay_resume_if_extended
= 1;
3656 /* Validate only transaction_id > 0 when activating thin-pool */
3657 node
->props
.send_messages
= transaction_id
? 1 : 0;
3658 seg
->transaction_id
= transaction_id
;
3659 seg
->low_water_mark
= low_water_mark
;
3660 seg
->data_block_size
= data_block_size
;
3661 seg
->skip_block_zeroing
= skip_block_zeroing
;
3662 dm_list_init(&seg
->thin_messages
);
3667 int dm_tree_node_add_thin_pool_message(struct dm_tree_node
*node
,
3668 dm_thin_message_t type
,
3669 uint64_t id1
, uint64_t id2
)
3671 struct thin_message
*tm
;
3672 struct load_segment
*seg
;
3674 if (!(seg
= _get_single_load_segment(node
, SEG_THIN_POOL
)))
3677 if (!(tm
= dm_pool_zalloc(node
->dtree
->mem
, sizeof (*tm
)))) {
3678 log_error("Failed to allocate thin message.");
3683 case DM_THIN_MESSAGE_CREATE_SNAP
:
3684 /* If the thin origin is active, it must be suspend first! */
3686 log_error("Cannot use same device id for origin and its snapshot.");
3689 if (!_thin_validate_device_id(id1
) ||
3690 !_thin_validate_device_id(id2
))
3692 tm
->message
.u
.m_create_snap
.device_id
= id1
;
3693 tm
->message
.u
.m_create_snap
.origin_id
= id2
;
3695 case DM_THIN_MESSAGE_CREATE_THIN
:
3696 if (!_thin_validate_device_id(id1
))
3698 tm
->message
.u
.m_create_thin
.device_id
= id1
;
3699 tm
->expected_errno
= EEXIST
;
3701 case DM_THIN_MESSAGE_DELETE
:
3702 if (!_thin_validate_device_id(id1
))
3704 tm
->message
.u
.m_delete
.device_id
= id1
;
3705 tm
->expected_errno
= ENODATA
;
3707 case DM_THIN_MESSAGE_SET_TRANSACTION_ID
:
3708 if ((id1
+ 1) != id2
) {
3709 log_error("New transaction id must be sequential.");
3710 return 0; /* FIXME: Maybe too strict here? */
3712 if (id2
!= seg
->transaction_id
) {
3713 log_error("Current transaction id is different from thin pool.");
3714 return 0; /* FIXME: Maybe too strict here? */
3716 tm
->message
.u
.m_set_transaction_id
.current_id
= id1
;
3717 tm
->message
.u
.m_set_transaction_id
.new_id
= id2
;
3720 log_error("Unsupported message type %d.", (int) type
);
3724 tm
->message
.type
= type
;
3725 dm_list_add(&seg
->thin_messages
, &tm
->list
);
3726 /* Higher value >1 identifies there are really some messages */
3727 node
->props
.send_messages
= 2;
3732 int dm_tree_node_set_thin_pool_discard(struct dm_tree_node
*node
,
3734 unsigned no_passdown
)
3736 struct load_segment
*seg
;
3738 if (!(seg
= _get_single_load_segment(node
, SEG_THIN_POOL
)))
3741 seg
->ignore_discard
= ignore
;
3742 seg
->no_discard_passdown
= no_passdown
;
3747 int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node
*node
,
3748 unsigned error_if_no_space
)
3750 struct load_segment
*seg
;
3752 if (!(seg
= _get_single_load_segment(node
, SEG_THIN_POOL
)))
3755 seg
->error_if_no_space
= error_if_no_space
;
3760 int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node
*node
,
3763 struct load_segment
*seg
;
3765 if (!(seg
= _get_single_load_segment(node
, SEG_THIN_POOL
)))
3768 seg
->read_only
= read_only
;
3773 int dm_tree_node_add_thin_target(struct dm_tree_node
*node
,
3775 const char *pool_uuid
,
3778 struct dm_tree_node
*pool
;
3779 struct load_segment
*seg
;
3781 if (!(pool
= dm_tree_find_node_by_uuid(node
->dtree
, pool_uuid
))) {
3782 log_error("Missing thin pool uuid %s.", pool_uuid
);
3786 if (!_link_tree_nodes(node
, pool
))
3789 if (!_thin_validate_device_id(device_id
))
3792 if (!(seg
= _add_segment(node
, SEG_THIN
, size
)))
3796 seg
->device_id
= device_id
;
3801 int dm_tree_node_set_thin_external_origin(struct dm_tree_node
*node
,
3802 const char *external_uuid
)
3804 struct dm_tree_node
*external
;
3805 struct load_segment
*seg
;
3807 if (!(seg
= _get_single_load_segment(node
, SEG_THIN
)))
3810 if (!(external
= dm_tree_find_node_by_uuid(node
->dtree
,
3812 log_error("Missing thin external origin uuid %s.",
3817 if (!_link_tree_nodes(node
, external
))
3820 seg
->external
= external
;
3825 static int _add_area(struct dm_tree_node
*node
, struct load_segment
*seg
, struct dm_tree_node
*dev_node
, uint64_t offset
)
3827 struct seg_area
*area
;
3829 if (!(area
= dm_pool_zalloc(node
->dtree
->mem
, sizeof (*area
)))) {
3830 log_error("Failed to allocate target segment area.");
3834 area
->dev_node
= dev_node
;
3835 area
->offset
= offset
;
3837 dm_list_add(&seg
->areas
, &area
->list
);
3843 int dm_tree_node_add_target_area(struct dm_tree_node
*node
,
3844 const char *dev_name
,
3848 struct load_segment
*seg
;
3850 struct dm_tree_node
*dev_node
;
3852 if ((!dev_name
|| !*dev_name
) && (!uuid
|| !*uuid
)) {
3853 log_error("dm_tree_node_add_target_area called without device");
3858 if (!(dev_node
= dm_tree_find_node_by_uuid(node
->dtree
, uuid
))) {
3859 log_error("Couldn't find area uuid %s.", uuid
);
3862 if (!_link_tree_nodes(node
, dev_node
))
3865 if (stat(dev_name
, &info
) < 0) {
3866 log_error("Device %s not found.", dev_name
);
3870 if (!S_ISBLK(info
.st_mode
)) {
3871 log_error("Device %s is not a block device.", dev_name
);
3875 /* FIXME Check correct macro use */
3876 if (!(dev_node
= _add_dev(node
->dtree
, node
, MAJOR(info
.st_rdev
),
3877 MINOR(info
.st_rdev
), 0, 0)))
3881 if (!(seg
= _get_last_load_segment(node
)))
3884 if (!_add_area(node
, seg
, dev_node
, offset
))
3890 int dm_tree_node_add_null_area(struct dm_tree_node
*node
, uint64_t offset
)
3892 struct load_segment
*seg
;
3894 if (!(seg
= _get_last_load_segment(node
)))
3897 switch (seg
->type
) {
3899 case SEG_RAID0_META
:
3911 case SEG_RAID6_LS_6
:
3912 case SEG_RAID6_RS_6
:
3913 case SEG_RAID6_LA_6
:
3914 case SEG_RAID6_RA_6
:
3917 log_error("dm_tree_node_add_null_area() called on an unsupported segment type");
3921 if (!_add_area(node
, seg
, NULL
, offset
))
3927 void dm_tree_node_set_callback(struct dm_tree_node
*dnode
,
3928 dm_node_callback_fn cb
, void *data
)
3930 dnode
->callback
= cb
;
3931 dnode
->callback_data
= data
;
3934 #if defined(GNU_SYMVER)
3936 * Backward compatible implementations.
3938 * Keep these at the end of the file to make sure that
3939 * no code in this file accidentally calls it.
3942 /* Backward compatible dm_tree_node_size_changed() implementations. */
3943 DM_EXPORT_SYMBOL_BASE(dm_tree_node_size_changed
)
3944 int dm_tree_node_size_changed_base(const struct dm_tree_node
*dnode
);
3945 int dm_tree_node_size_changed_base(const struct dm_tree_node
*dnode
)
3947 /* Base does not make difference between smaller and bigger */
3948 return dm_tree_node_size_changed(dnode
) ? 1 : 0;
3952 * Retain ABI compatibility after adding the DM_CACHE_FEATURE_METADATA2
3953 * in version 1.02.138.
3955 * Binaries compiled against version 1.02.138 onwards will use
3956 * the new function dm_tree_node_add_cache_target which detects unknown
3957 * feature flags and returns error for them.
3959 DM_EXPORT_SYMBOL_BASE(dm_tree_node_add_cache_target
)
3960 int dm_tree_node_add_cache_target_base(struct dm_tree_node
*node
,
3962 uint64_t feature_flags
, /* DM_CACHE_FEATURE_* */
3963 const char *metadata_uuid
,
3964 const char *data_uuid
,
3965 const char *origin_uuid
,
3966 const char *policy_name
,
3967 const struct dm_config_node
*policy_settings
,
3968 uint32_t data_block_size
);
3969 int dm_tree_node_add_cache_target_base(struct dm_tree_node
*node
,
3971 uint64_t feature_flags
,
3972 const char *metadata_uuid
,
3973 const char *data_uuid
,
3974 const char *origin_uuid
,
3975 const char *policy_name
,
3976 const struct dm_config_node
*policy_settings
,
3977 uint32_t data_block_size
)
3979 /* Old version supported only these FEATURE bits, others were ignored so masked them */
3980 const uint64_t mask
=
3981 DM_CACHE_FEATURE_WRITEBACK
|
3982 DM_CACHE_FEATURE_WRITETHROUGH
|
3983 DM_CACHE_FEATURE_PASSTHROUGH
;
3985 return dm_tree_node_add_cache_target(node
, size
, feature_flags
& mask
,
3986 metadata_uuid
, data_uuid
, origin_uuid
,
3987 policy_name
, policy_settings
, data_block_size
);