2 * Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
4 * This file is part of the device-mapper userspace tools.
6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions
8 * of the GNU Lesser General Public License v.2.1.
10 * You should have received a copy of the GNU Lesser General Public License
11 * along with this program; if not, write to the Free Software Foundation,
12 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 #include "libdm-targets.h"
17 #include "libdm-common.h"
22 #include <sys/param.h>
23 #include <sys/utsname.h>
25 #define MAX_TARGET_PARAMSIZE 500000
27 /* FIXME Fix interface so this is used only by LVM */
28 #define UUID_PREFIX "LVM-"
30 #define REPLICATOR_LOCAL_SITE 0
32 /* Supported segment types */
59 /* FIXME Add crypt and multipath support */
65 { SEG_CRYPT
, "crypt" },
66 { SEG_ERROR
, "error" },
67 { SEG_LINEAR
, "linear" },
68 { SEG_MIRRORED
, "mirror" },
69 { SEG_REPLICATOR
, "replicator" },
70 { SEG_REPLICATOR_DEV
, "replicator-dev" },
71 { SEG_SNAPSHOT
, "snapshot" },
72 { SEG_SNAPSHOT_ORIGIN
, "snapshot-origin" },
73 { SEG_SNAPSHOT_MERGE
, "snapshot-merge" },
74 { SEG_STRIPED
, "striped" },
76 { SEG_THIN_POOL
, "thin-pool"},
78 { SEG_RAID1
, "raid1"},
79 { SEG_RAID4
, "raid4"},
80 { SEG_RAID5_LA
, "raid5_la"},
81 { SEG_RAID5_RA
, "raid5_ra"},
82 { SEG_RAID5_LS
, "raid5_ls"},
83 { SEG_RAID5_RS
, "raid5_rs"},
84 { SEG_RAID6_ZR
, "raid6_zr"},
85 { SEG_RAID6_NR
, "raid6_nr"},
86 { SEG_RAID6_NC
, "raid6_nc"},
89 *WARNING: Since 'raid' target overloads this 1:1 mapping table
90 * for search do not add new enum elements past them!
92 { SEG_RAID5_LS
, "raid5"}, /* same as "raid5_ls" (default for MD also) */
93 { SEG_RAID6_ZR
, "raid6"}, /* same as "raid6_zr" */
97 /* Some segment types have a list of areas of other devices attached */
101 struct dm_tree_node
*dev_node
;
105 unsigned rsite_index
; /* Replicator site index */
106 struct dm_tree_node
*slog
; /* Replicator sync log node */
107 uint64_t region_size
; /* Replicator sync log size */
108 uint32_t flags
; /* Replicator sync log flags */
111 struct thin_message
{
113 struct dm_thin_message message
;
117 /* Replicator-log has a list of sites */
118 /* FIXME: maybe move to seg_area too? */
119 struct replicator_site
{
122 unsigned rsite_index
;
123 dm_replicator_mode_t mode
;
124 uint32_t async_timeout
;
125 uint32_t fall_behind_ios
;
126 uint64_t fall_behind_data
;
129 /* Per-segment properties */
130 struct load_segment
{
137 unsigned area_count
; /* Linear + Striped + Mirrored + Crypt + Replicator */
138 struct dm_list areas
; /* Linear + Striped + Mirrored + Crypt + Replicator */
140 uint32_t stripe_size
; /* Striped + raid */
142 int persistent
; /* Snapshot */
143 uint32_t chunk_size
; /* Snapshot */
144 struct dm_tree_node
*cow
; /* Snapshot */
145 struct dm_tree_node
*origin
; /* Snapshot + Snapshot origin */
146 struct dm_tree_node
*merge
; /* Snapshot */
148 struct dm_tree_node
*log
; /* Mirror + Replicator */
149 uint32_t region_size
; /* Mirror + raid */
150 unsigned clustered
; /* Mirror */
151 unsigned mirror_area_count
; /* Mirror */
152 uint32_t flags
; /* Mirror log */
153 char *uuid
; /* Clustered mirror log */
155 const char *cipher
; /* Crypt */
156 const char *chainmode
; /* Crypt */
157 const char *iv
; /* Crypt */
158 uint64_t iv_offset
; /* Crypt */
159 const char *key
; /* Crypt */
161 const char *rlog_type
; /* Replicator */
162 struct dm_list rsites
; /* Replicator */
163 unsigned rsite_count
; /* Replicator */
164 unsigned rdevice_count
; /* Replicator */
165 struct dm_tree_node
*replicator
;/* Replicator-dev */
166 uint64_t rdevice_index
; /* Replicator-dev */
168 uint64_t rebuilds
; /* raid */
170 struct dm_tree_node
*metadata
; /* Thin_pool */
171 struct dm_tree_node
*pool
; /* Thin_pool, Thin */
172 struct dm_list thin_messages
; /* Thin_pool */
173 uint64_t transaction_id
; /* Thin_pool */
174 uint64_t low_water_mark
; /* Thin_pool */
175 uint32_t data_block_size
; /* Thin_pool */
176 unsigned skip_block_zeroing
; /* Thin_pool */
177 uint32_t device_id
; /* Thin */
181 /* Per-device properties */
182 struct load_properties
{
188 uint32_t read_ahead_flags
;
190 unsigned segment_count
;
191 unsigned size_changed
;
194 const char *new_name
;
196 /* If immediate_dev_node is set to 1, try to create the dev node
197 * as soon as possible (e.g. in preload stage even during traversal
198 * and processing of dm tree). This will also flush all stacked dev
199 * node operations, synchronizing with udev.
201 unsigned immediate_dev_node
;
204 * If the device size changed from zero and this is set,
205 * don't resume the device immediately, even if the device
206 * has parents. This works provided the parents do not
207 * validate the device size and is required by pvmove to
208 * avoid starting the mirror resync operation too early.
210 unsigned delay_resume_if_new
;
212 /* Send messages for this node in preload */
213 unsigned send_messages
;
216 /* Two of these used to join two nodes with uses and used_by. */
217 struct dm_tree_link
{
219 struct dm_tree_node
*node
;
222 struct dm_tree_node
{
223 struct dm_tree
*dtree
;
229 struct dm_list uses
; /* Nodes this node uses */
230 struct dm_list used_by
; /* Nodes that use this node */
232 int activation_priority
; /* 0 gets activated first */
234 uint16_t udev_flags
; /* Udev control flags */
236 void *context
; /* External supplied context */
238 struct load_properties props
; /* For creation/table (re)load */
241 * If presuspend of child node is needed
242 * Note: only direct child is allowed
244 struct dm_tree_node
*presuspend_node
;
249 struct dm_hash_table
*devs
;
250 struct dm_hash_table
*uuids
;
251 struct dm_tree_node root
;
252 int skip_lockfs
; /* 1 skips lockfs (for non-snapshots) */
253 int no_flush
; /* 1 sets noflush (mirrors/multipath) */
254 int retry_remove
; /* 1 retries remove if not successful */
258 struct dm_tree
*dm_tree_create(void)
260 struct dm_pool
*dmem
;
261 struct dm_tree
*dtree
;
263 if (!(dmem
= dm_pool_create("dtree", 1024)) ||
264 !(dtree
= dm_pool_zalloc(dmem
, sizeof(*dtree
)))) {
265 log_error("Failed to allocate dtree.");
267 dm_pool_destroy(dmem
);
271 dtree
->root
.dtree
= dtree
;
272 dm_list_init(&dtree
->root
.uses
);
273 dm_list_init(&dtree
->root
.used_by
);
274 dtree
->skip_lockfs
= 0;
278 if (!(dtree
->devs
= dm_hash_create(8))) {
279 log_error("dtree hash creation failed");
280 dm_pool_destroy(dtree
->mem
);
284 if (!(dtree
->uuids
= dm_hash_create(32))) {
285 log_error("dtree uuid hash creation failed");
286 dm_hash_destroy(dtree
->devs
);
287 dm_pool_destroy(dtree
->mem
);
294 void dm_tree_free(struct dm_tree
*dtree
)
299 dm_hash_destroy(dtree
->uuids
);
300 dm_hash_destroy(dtree
->devs
);
301 dm_pool_destroy(dtree
->mem
);
304 static int _nodes_are_linked(const struct dm_tree_node
*parent
,
305 const struct dm_tree_node
*child
)
307 struct dm_tree_link
*dlink
;
309 dm_list_iterate_items(dlink
, &parent
->uses
)
310 if (dlink
->node
== child
)
316 static int _link(struct dm_list
*list
, struct dm_tree_node
*node
)
318 struct dm_tree_link
*dlink
;
320 if (!(dlink
= dm_pool_alloc(node
->dtree
->mem
, sizeof(*dlink
)))) {
321 log_error("dtree link allocation failed");
326 dm_list_add(list
, &dlink
->list
);
331 static int _link_nodes(struct dm_tree_node
*parent
,
332 struct dm_tree_node
*child
)
334 if (_nodes_are_linked(parent
, child
))
337 if (!_link(&parent
->uses
, child
))
340 if (!_link(&child
->used_by
, parent
))
346 static void _unlink(struct dm_list
*list
, struct dm_tree_node
*node
)
348 struct dm_tree_link
*dlink
;
350 dm_list_iterate_items(dlink
, list
)
351 if (dlink
->node
== node
) {
352 dm_list_del(&dlink
->list
);
357 static void _unlink_nodes(struct dm_tree_node
*parent
,
358 struct dm_tree_node
*child
)
360 if (!_nodes_are_linked(parent
, child
))
363 _unlink(&parent
->uses
, child
);
364 _unlink(&child
->used_by
, parent
);
367 static int _add_to_toplevel(struct dm_tree_node
*node
)
369 return _link_nodes(&node
->dtree
->root
, node
);
372 static void _remove_from_toplevel(struct dm_tree_node
*node
)
374 _unlink_nodes(&node
->dtree
->root
, node
);
377 static int _add_to_bottomlevel(struct dm_tree_node
*node
)
379 return _link_nodes(node
, &node
->dtree
->root
);
382 static void _remove_from_bottomlevel(struct dm_tree_node
*node
)
384 _unlink_nodes(node
, &node
->dtree
->root
);
387 static int _link_tree_nodes(struct dm_tree_node
*parent
, struct dm_tree_node
*child
)
389 /* Don't link to root node if child already has a parent */
390 if (parent
== &parent
->dtree
->root
) {
391 if (dm_tree_node_num_children(child
, 1))
394 _remove_from_toplevel(child
);
396 if (child
== &child
->dtree
->root
) {
397 if (dm_tree_node_num_children(parent
, 0))
400 _remove_from_bottomlevel(parent
);
402 return _link_nodes(parent
, child
);
405 static struct dm_tree_node
*_create_dm_tree_node(struct dm_tree
*dtree
,
408 struct dm_info
*info
,
412 struct dm_tree_node
*node
;
415 if (!(node
= dm_pool_zalloc(dtree
->mem
, sizeof(*node
)))) {
416 log_error("_create_dm_tree_node alloc failed");
425 node
->context
= context
;
426 node
->udev_flags
= udev_flags
;
427 node
->activation_priority
= 0;
429 dm_list_init(&node
->uses
);
430 dm_list_init(&node
->used_by
);
431 dm_list_init(&node
->props
.segs
);
433 dev
= MKDEV(info
->major
, info
->minor
);
435 if (!dm_hash_insert_binary(dtree
->devs
, (const char *) &dev
,
436 sizeof(dev
), node
)) {
437 log_error("dtree node hash insertion failed");
438 dm_pool_free(dtree
->mem
, node
);
443 !dm_hash_insert(dtree
->uuids
, uuid
, node
)) {
444 log_error("dtree uuid hash insertion failed");
445 dm_hash_remove_binary(dtree
->devs
, (const char *) &dev
,
447 dm_pool_free(dtree
->mem
, node
);
454 static struct dm_tree_node
*_find_dm_tree_node(struct dm_tree
*dtree
,
455 uint32_t major
, uint32_t minor
)
457 uint64_t dev
= MKDEV(major
, minor
);
459 return dm_hash_lookup_binary(dtree
->devs
, (const char *) &dev
,
463 static struct dm_tree_node
*_find_dm_tree_node_by_uuid(struct dm_tree
*dtree
,
466 struct dm_tree_node
*node
;
468 if ((node
= dm_hash_lookup(dtree
->uuids
, uuid
)))
471 if (strncmp(uuid
, UUID_PREFIX
, sizeof(UUID_PREFIX
) - 1))
474 return dm_hash_lookup(dtree
->uuids
, uuid
+ sizeof(UUID_PREFIX
) - 1);
477 static int _deps(struct dm_task
**dmt
, struct dm_pool
*mem
, uint32_t major
, uint32_t minor
,
478 const char **name
, const char **uuid
,
479 struct dm_info
*info
, struct dm_deps
**deps
)
481 memset(info
, 0, sizeof(*info
));
483 if (!dm_is_dm_major(major
)) {
490 info
->live_table
= 0;
491 info
->inactive_table
= 0;
496 if (!(*dmt
= dm_task_create(DM_DEVICE_DEPS
))) {
497 log_error("deps dm_task creation failed");
501 if (!dm_task_set_major(*dmt
, major
)) {
502 log_error("_deps: failed to set major for (%" PRIu32
":%" PRIu32
")",
507 if (!dm_task_set_minor(*dmt
, minor
)) {
508 log_error("_deps: failed to set minor for (%" PRIu32
":%" PRIu32
")",
513 if (!dm_task_run(*dmt
)) {
514 log_error("_deps: task run failed for (%" PRIu32
":%" PRIu32
")",
519 if (!dm_task_get_info(*dmt
, info
)) {
520 log_error("_deps: failed to get info for (%" PRIu32
":%" PRIu32
")",
530 if (info
->major
!= major
) {
531 log_error("Inconsistent dtree major number: %u != %u",
535 if (info
->minor
!= minor
) {
536 log_error("Inconsistent dtree minor number: %u != %u",
540 if (!(*name
= dm_pool_strdup(mem
, dm_task_get_name(*dmt
)))) {
541 log_error("name pool_strdup failed");
544 if (!(*uuid
= dm_pool_strdup(mem
, dm_task_get_uuid(*dmt
)))) {
545 log_error("uuid pool_strdup failed");
548 *deps
= dm_task_get_deps(*dmt
);
554 dm_task_destroy(*dmt
);
558 static struct dm_tree_node
*_add_dev(struct dm_tree
*dtree
,
559 struct dm_tree_node
*parent
,
560 uint32_t major
, uint32_t minor
,
563 struct dm_task
*dmt
= NULL
;
565 struct dm_deps
*deps
= NULL
;
566 const char *name
= NULL
;
567 const char *uuid
= NULL
;
568 struct dm_tree_node
*node
= NULL
;
572 /* Already in tree? */
573 if (!(node
= _find_dm_tree_node(dtree
, major
, minor
))) {
574 if (!_deps(&dmt
, dtree
->mem
, major
, minor
, &name
, &uuid
, &info
, &deps
))
577 if (!(node
= _create_dm_tree_node(dtree
, name
, uuid
, &info
,
583 if (!_link_tree_nodes(parent
, node
)) {
588 /* If node was already in tree, no need to recurse. */
592 /* Can't recurse if not a mapped device or there are no dependencies */
593 if (!node
->info
.exists
|| !deps
->count
) {
594 if (!_add_to_bottomlevel(node
)) {
601 /* Add dependencies to tree */
602 for (i
= 0; i
< deps
->count
; i
++)
603 if (!_add_dev(dtree
, node
, MAJOR(deps
->device
[i
]),
604 MINOR(deps
->device
[i
]), udev_flags
)) {
611 dm_task_destroy(dmt
);
616 static int _node_clear_table(struct dm_tree_node
*dnode
)
619 struct dm_info
*info
;
623 if (!(info
= &dnode
->info
)) {
624 log_error("_node_clear_table failed: missing info");
628 if (!(name
= dm_tree_node_get_name(dnode
))) {
629 log_error("_node_clear_table failed: missing name");
633 /* Is there a table? */
634 if (!info
->exists
|| !info
->inactive_table
)
637 // FIXME Get inactive deps. If any dev referenced has 1 opener and no live table, remove it after the clear.
639 log_verbose("Clearing inactive table %s (%" PRIu32
":%" PRIu32
")",
640 name
, info
->major
, info
->minor
);
642 if (!(dmt
= dm_task_create(DM_DEVICE_CLEAR
))) {
643 log_error("Table clear dm_task creation failed for %s", name
);
647 if (!dm_task_set_major(dmt
, info
->major
) ||
648 !dm_task_set_minor(dmt
, info
->minor
)) {
649 log_error("Failed to set device number for %s table clear", name
);
650 dm_task_destroy(dmt
);
654 r
= dm_task_run(dmt
);
656 if (!dm_task_get_info(dmt
, info
)) {
657 log_error("_node_clear_table failed: info missing after running task for %s", name
);
661 dm_task_destroy(dmt
);
666 struct dm_tree_node
*dm_tree_add_new_dev(struct dm_tree
*dtree
,
669 uint32_t major
, uint32_t minor
,
674 struct dm_tree_node
*dnode
;
679 /* Do we need to add node to tree? */
680 if (!(dnode
= dm_tree_find_node_by_uuid(dtree
, uuid
))) {
681 if (!(name2
= dm_pool_strdup(dtree
->mem
, name
))) {
682 log_error("name pool_strdup failed");
685 if (!(uuid2
= dm_pool_strdup(dtree
->mem
, uuid
))) {
686 log_error("uuid pool_strdup failed");
694 info
.inactive_table
= 0;
697 if (!(dnode
= _create_dm_tree_node(dtree
, name2
, uuid2
, &info
,
701 /* Attach to root node until a table is supplied */
702 if (!_add_to_toplevel(dnode
) || !_add_to_bottomlevel(dnode
))
705 dnode
->props
.major
= major
;
706 dnode
->props
.minor
= minor
;
707 dnode
->props
.new_name
= NULL
;
708 dnode
->props
.size_changed
= 0;
709 } else if (strcmp(name
, dnode
->name
)) {
710 /* Do we need to rename node? */
711 if (!(dnode
->props
.new_name
= dm_pool_strdup(dtree
->mem
, name
))) {
712 log_error("name pool_strdup failed");
717 dnode
->props
.read_only
= read_only
? 1 : 0;
718 dnode
->props
.read_ahead
= DM_READ_AHEAD_AUTO
;
719 dnode
->props
.read_ahead_flags
= 0;
721 if (clear_inactive
&& !_node_clear_table(dnode
))
724 dnode
->context
= context
;
725 dnode
->udev_flags
= 0;
730 struct dm_tree_node
*dm_tree_add_new_dev_with_udev_flags(struct dm_tree
*dtree
,
740 struct dm_tree_node
*node
;
742 if ((node
= dm_tree_add_new_dev(dtree
, name
, uuid
, major
, minor
, read_only
,
743 clear_inactive
, context
)))
744 node
->udev_flags
= udev_flags
;
749 void dm_tree_node_set_udev_flags(struct dm_tree_node
*dnode
, uint16_t udev_flags
)
752 struct dm_info
*dinfo
= &dnode
->info
;
754 if (udev_flags
!= dnode
->udev_flags
)
755 log_debug("Resetting %s (%" PRIu32
":%" PRIu32
756 ") udev_flags from 0x%x to 0x%x",
757 dnode
->name
, dinfo
->major
, dinfo
->minor
,
758 dnode
->udev_flags
, udev_flags
);
759 dnode
->udev_flags
= udev_flags
;
762 void dm_tree_node_set_read_ahead(struct dm_tree_node
*dnode
,
764 uint32_t read_ahead_flags
)
766 dnode
->props
.read_ahead
= read_ahead
;
767 dnode
->props
.read_ahead_flags
= read_ahead_flags
;
770 void dm_tree_node_set_presuspend_node(struct dm_tree_node
*node
,
771 struct dm_tree_node
*presuspend_node
)
773 node
->presuspend_node
= presuspend_node
;
776 int dm_tree_add_dev(struct dm_tree
*dtree
, uint32_t major
, uint32_t minor
)
778 return _add_dev(dtree
, &dtree
->root
, major
, minor
, 0) ? 1 : 0;
781 int dm_tree_add_dev_with_udev_flags(struct dm_tree
*dtree
, uint32_t major
,
782 uint32_t minor
, uint16_t udev_flags
)
784 return _add_dev(dtree
, &dtree
->root
, major
, minor
, udev_flags
) ? 1 : 0;
787 const char *dm_tree_node_get_name(const struct dm_tree_node
*node
)
789 return node
->info
.exists
? node
->name
: "";
792 const char *dm_tree_node_get_uuid(const struct dm_tree_node
*node
)
794 return node
->info
.exists
? node
->uuid
: "";
797 const struct dm_info
*dm_tree_node_get_info(const struct dm_tree_node
*node
)
802 void *dm_tree_node_get_context(const struct dm_tree_node
*node
)
804 return node
->context
;
807 int dm_tree_node_size_changed(const struct dm_tree_node
*dnode
)
809 return dnode
->props
.size_changed
;
812 int dm_tree_node_num_children(const struct dm_tree_node
*node
, uint32_t inverted
)
815 if (_nodes_are_linked(&node
->dtree
->root
, node
))
817 return dm_list_size(&node
->used_by
);
820 if (_nodes_are_linked(node
, &node
->dtree
->root
))
823 return dm_list_size(&node
->uses
);
827 * Returns 1 if no prefix supplied
829 static int _uuid_prefix_matches(const char *uuid
, const char *uuid_prefix
, size_t uuid_prefix_len
)
834 if (!strncmp(uuid
, uuid_prefix
, uuid_prefix_len
))
837 /* Handle transition: active device uuids might be missing the prefix */
838 if (uuid_prefix_len
<= 4)
841 if (!strncmp(uuid
, UUID_PREFIX
, sizeof(UUID_PREFIX
) - 1))
844 if (strncmp(uuid_prefix
, UUID_PREFIX
, sizeof(UUID_PREFIX
) - 1))
847 if (!strncmp(uuid
, uuid_prefix
+ sizeof(UUID_PREFIX
) - 1, uuid_prefix_len
- (sizeof(UUID_PREFIX
) - 1)))
854 * Returns 1 if no children.
856 static int _children_suspended(struct dm_tree_node
*node
,
858 const char *uuid_prefix
,
859 size_t uuid_prefix_len
)
861 struct dm_list
*list
;
862 struct dm_tree_link
*dlink
;
863 const struct dm_info
*dinfo
;
867 if (_nodes_are_linked(&node
->dtree
->root
, node
))
869 list
= &node
->used_by
;
871 if (_nodes_are_linked(node
, &node
->dtree
->root
))
876 dm_list_iterate_items(dlink
, list
) {
877 if (!(uuid
= dm_tree_node_get_uuid(dlink
->node
))) {
882 /* Ignore if it doesn't belong to this VG */
883 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
886 /* Ignore if parent node wants to presuspend this node */
887 if (dlink
->node
->presuspend_node
== node
)
890 if (!(dinfo
= dm_tree_node_get_info(dlink
->node
))) {
891 stack
; /* FIXME Is this normal? */
895 if (!dinfo
->suspended
)
903 * Set major and minor to zero for root of tree.
905 struct dm_tree_node
*dm_tree_find_node(struct dm_tree
*dtree
,
909 if (!major
&& !minor
)
912 return _find_dm_tree_node(dtree
, major
, minor
);
916 * Set uuid to NULL for root of tree.
918 struct dm_tree_node
*dm_tree_find_node_by_uuid(struct dm_tree
*dtree
,
924 return _find_dm_tree_node_by_uuid(dtree
, uuid
);
928 * First time set *handle to NULL.
929 * Set inverted to invert the tree.
931 struct dm_tree_node
*dm_tree_next_child(void **handle
,
932 const struct dm_tree_node
*parent
,
935 struct dm_list
**dlink
= (struct dm_list
**) handle
;
936 const struct dm_list
*use_list
;
939 use_list
= &parent
->used_by
;
941 use_list
= &parent
->uses
;
944 *dlink
= dm_list_first(use_list
);
946 *dlink
= dm_list_next(use_list
, *dlink
);
948 return (*dlink
) ? dm_list_item(*dlink
, struct dm_tree_link
)->node
: NULL
;
952 * Deactivate a device with its dependencies if the uuid prefix matches.
954 static int _info_by_dev(uint32_t major
, uint32_t minor
, int with_open_count
,
955 struct dm_info
*info
)
960 if (!(dmt
= dm_task_create(DM_DEVICE_INFO
))) {
961 log_error("_info_by_dev: dm_task creation failed");
965 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
966 log_error("_info_by_dev: Failed to set device number");
967 dm_task_destroy(dmt
);
971 if (!with_open_count
&& !dm_task_no_open_count(dmt
))
972 log_error("Failed to disable open_count");
974 if ((r
= dm_task_run(dmt
)))
975 r
= dm_task_get_info(dmt
, info
);
977 dm_task_destroy(dmt
);
982 static int _check_device_not_in_use(const char *name
, struct dm_info
*info
)
987 /* If sysfs is not used, use open_count information only. */
988 if (!*dm_sysfs_dir()) {
989 if (info
->open_count
) {
990 log_error("Device %s (%" PRIu32
":%" PRIu32
") in use",
991 name
, info
->major
, info
->minor
);
998 if (dm_device_has_holders(info
->major
, info
->minor
)) {
999 log_error("Device %s (%" PRIu32
":%" PRIu32
") is used "
1000 "by another device.", name
, info
->major
, info
->minor
);
1004 if (dm_device_has_mounted_fs(info
->major
, info
->minor
)) {
1005 log_error("Device %s (%" PRIu32
":%" PRIu32
") contains "
1006 "a filesystem in use.", name
, info
->major
, info
->minor
);
1013 /* Check if all parent nodes of given node have open_count == 0 */
1014 static int _node_has_closed_parents(struct dm_tree_node
*node
,
1015 const char *uuid_prefix
,
1016 size_t uuid_prefix_len
)
1018 struct dm_tree_link
*dlink
;
1019 const struct dm_info
*dinfo
;
1020 struct dm_info info
;
1023 /* Iterate through parents of this node */
1024 dm_list_iterate_items(dlink
, &node
->used_by
) {
1025 if (!(uuid
= dm_tree_node_get_uuid(dlink
->node
))) {
1030 /* Ignore if it doesn't belong to this VG */
1031 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1034 if (!(dinfo
= dm_tree_node_get_info(dlink
->node
))) {
1035 stack
; /* FIXME Is this normal? */
1039 /* Refresh open_count */
1040 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 1, &info
) ||
1044 if (info
.open_count
) {
1045 log_debug("Node %s %d:%d has open_count %d", uuid_prefix
,
1046 dinfo
->major
, dinfo
->minor
, info
.open_count
);
1054 static int _deactivate_node(const char *name
, uint32_t major
, uint32_t minor
,
1055 uint32_t *cookie
, uint16_t udev_flags
, int retry
)
1057 struct dm_task
*dmt
;
1060 log_verbose("Removing %s (%" PRIu32
":%" PRIu32
")", name
, major
, minor
);
1062 if (!(dmt
= dm_task_create(DM_DEVICE_REMOVE
))) {
1063 log_error("Deactivation dm_task creation failed for %s", name
);
1067 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1068 log_error("Failed to set device number for %s deactivation", name
);
1072 if (!dm_task_no_open_count(dmt
))
1073 log_error("Failed to disable open_count");
1075 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1080 dm_task_retry_remove(dmt
);
1082 r
= dm_task_run(dmt
);
1084 /* FIXME Until kernel returns actual name so dm-iface.c can handle it */
1085 rm_dev_node(name
, dmt
->cookie_set
&& !(udev_flags
& DM_UDEV_DISABLE_DM_RULES_FLAG
),
1086 dmt
->cookie_set
&& (udev_flags
& DM_UDEV_DISABLE_LIBRARY_FALLBACK
));
1088 /* FIXME Remove node from tree or mark invalid? */
1091 dm_task_destroy(dmt
);
1096 static int _rename_node(const char *old_name
, const char *new_name
, uint32_t major
,
1097 uint32_t minor
, uint32_t *cookie
, uint16_t udev_flags
)
1099 struct dm_task
*dmt
;
1102 log_verbose("Renaming %s (%" PRIu32
":%" PRIu32
") to %s", old_name
, major
, minor
, new_name
);
1104 if (!(dmt
= dm_task_create(DM_DEVICE_RENAME
))) {
1105 log_error("Rename dm_task creation failed for %s", old_name
);
1109 if (!dm_task_set_name(dmt
, old_name
)) {
1110 log_error("Failed to set name for %s rename.", old_name
);
1114 if (!dm_task_set_newname(dmt
, new_name
))
1117 if (!dm_task_no_open_count(dmt
))
1118 log_error("Failed to disable open_count");
1120 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1123 r
= dm_task_run(dmt
);
1126 dm_task_destroy(dmt
);
1131 /* FIXME Merge with _suspend_node? */
1132 static int _resume_node(const char *name
, uint32_t major
, uint32_t minor
,
1133 uint32_t read_ahead
, uint32_t read_ahead_flags
,
1134 struct dm_info
*newinfo
, uint32_t *cookie
,
1135 uint16_t udev_flags
, int already_suspended
)
1137 struct dm_task
*dmt
;
1140 log_verbose("Resuming %s (%" PRIu32
":%" PRIu32
")", name
, major
, minor
);
1142 if (!(dmt
= dm_task_create(DM_DEVICE_RESUME
))) {
1143 log_debug("Suspend dm_task creation failed for %s.", name
);
1147 /* FIXME Kernel should fill in name on return instead */
1148 if (!dm_task_set_name(dmt
, name
)) {
1149 log_debug("Failed to set device name for %s resumption.", name
);
1153 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1154 log_error("Failed to set device number for %s resumption.", name
);
1158 if (!dm_task_no_open_count(dmt
))
1159 log_error("Failed to disable open_count");
1161 if (!dm_task_set_read_ahead(dmt
, read_ahead
, read_ahead_flags
))
1162 log_error("Failed to set read ahead");
1164 if (!dm_task_set_cookie(dmt
, cookie
, udev_flags
))
1167 if (!(r
= dm_task_run(dmt
)))
1170 if (already_suspended
)
1173 if (!(r
= dm_task_get_info(dmt
, newinfo
)))
1177 dm_task_destroy(dmt
);
1182 static int _suspend_node(const char *name
, uint32_t major
, uint32_t minor
,
1183 int skip_lockfs
, int no_flush
, struct dm_info
*newinfo
)
1185 struct dm_task
*dmt
;
1188 log_verbose("Suspending %s (%" PRIu32
":%" PRIu32
")%s%s",
1190 skip_lockfs
? "" : " with filesystem sync",
1191 no_flush
? "" : " with device flush");
1193 if (!(dmt
= dm_task_create(DM_DEVICE_SUSPEND
))) {
1194 log_error("Suspend dm_task creation failed for %s", name
);
1198 if (!dm_task_set_major(dmt
, major
) || !dm_task_set_minor(dmt
, minor
)) {
1199 log_error("Failed to set device number for %s suspension.", name
);
1200 dm_task_destroy(dmt
);
1204 if (!dm_task_no_open_count(dmt
))
1205 log_error("Failed to disable open_count");
1207 if (skip_lockfs
&& !dm_task_skip_lockfs(dmt
))
1208 log_error("Failed to set skip_lockfs flag.");
1210 if (no_flush
&& !dm_task_no_flush(dmt
))
1211 log_error("Failed to set no_flush flag.");
1213 if ((r
= dm_task_run(dmt
))) {
1215 r
= dm_task_get_info(dmt
, newinfo
);
1218 dm_task_destroy(dmt
);
1223 static int _thin_pool_status_transaction_id(struct dm_tree_node
*dnode
, uint64_t *transaction_id
)
1225 struct dm_task
*dmt
;
1227 uint64_t start
, length
;
1229 char *params
= NULL
;
1231 if (!(dmt
= dm_task_create(DM_DEVICE_STATUS
)))
1234 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
1235 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
1236 log_error("Failed to set major minor.");
1240 if (!dm_task_run(dmt
))
1243 dm_get_next_target(dmt
, NULL
, &start
, &length
, &type
, ¶ms
);
1245 if (type
&& (strcmp(type
, "thin-pool") != 0)) {
1246 log_error("Expected thin-pool target for %d:%d and got %s.",
1247 dnode
->info
.major
, dnode
->info
.minor
, type
);
1251 if (!params
|| (sscanf(params
, "%" PRIu64
, transaction_id
) != 1)) {
1252 log_error("Failed to parse transaction_id from %s.", params
);
1256 log_debug("Thin pool transaction id: %" PRIu64
" status: %s.", *transaction_id
, params
);
1260 dm_task_destroy(dmt
);
1265 static int _thin_pool_node_message(struct dm_tree_node
*dnode
, struct thin_message
*tm
)
1267 struct dm_task
*dmt
;
1268 struct dm_thin_message
*m
= &tm
->message
;
1273 case DM_THIN_MESSAGE_CREATE_SNAP
:
1274 r
= dm_snprintf(buf
, sizeof(buf
), "create_snap %u %u",
1275 m
->u
.m_create_snap
.device_id
,
1276 m
->u
.m_create_snap
.origin_id
);
1278 case DM_THIN_MESSAGE_CREATE_THIN
:
1279 r
= dm_snprintf(buf
, sizeof(buf
), "create_thin %u",
1280 m
->u
.m_create_thin
.device_id
);
1282 case DM_THIN_MESSAGE_DELETE
:
1283 r
= dm_snprintf(buf
, sizeof(buf
), "delete %u",
1284 m
->u
.m_delete
.device_id
);
1286 case DM_THIN_MESSAGE_TRIM
:
1287 r
= dm_snprintf(buf
, sizeof(buf
), "trim %u %" PRIu64
,
1288 m
->u
.m_trim
.device_id
,
1289 m
->u
.m_trim
.new_size
);
1291 case DM_THIN_MESSAGE_SET_TRANSACTION_ID
:
1292 r
= dm_snprintf(buf
, sizeof(buf
),
1293 "set_transaction_id %" PRIu64
" %" PRIu64
,
1294 m
->u
.m_set_transaction_id
.current_id
,
1295 m
->u
.m_set_transaction_id
.new_id
);
1302 log_error("Failed to prepare message.");
1308 if (!(dmt
= dm_task_create(DM_DEVICE_TARGET_MSG
)))
1311 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
1312 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
1313 log_error("Failed to set message major minor.");
1317 if (!dm_task_set_message(dmt
, buf
))
1320 /* Internal functionality of dm_task */
1321 dmt
->expected_errno
= tm
->expected_errno
;
1323 if (!dm_task_run(dmt
))
1328 dm_task_destroy(dmt
);
1333 static int _node_send_messages(struct dm_tree_node
*dnode
,
1334 const char *uuid_prefix
,
1335 size_t uuid_prefix_len
)
1337 struct load_segment
*seg
;
1338 struct thin_message
*tmsg
;
1342 if (!dnode
->info
.exists
|| (dm_list_size(&dnode
->props
.segs
) != 1))
1345 seg
= dm_list_item(dm_list_last(&dnode
->props
.segs
), struct load_segment
);
1346 if (seg
->type
!= SEG_THIN_POOL
)
1349 if (!(uuid
= dm_tree_node_get_uuid(dnode
)))
1352 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
)) {
1353 log_debug("UUID \"%s\" does not match.", uuid
);
1357 if (!_thin_pool_status_transaction_id(dnode
, &trans_id
))
1360 if (trans_id
== seg
->transaction_id
)
1361 return 1; /* In sync - skip messages */
1363 if (trans_id
!= (seg
->transaction_id
- 1)) {
1364 log_error("Thin pool transaction_id=%" PRIu64
", while expected: %" PRIu64
".",
1365 trans_id
, seg
->transaction_id
- 1);
1366 goto bad
; /* Nothing to send */
1369 dm_list_iterate_items(tmsg
, &seg
->thin_messages
)
1370 if (!(_thin_pool_node_message(dnode
, tmsg
)))
1375 /* Try to deactivate */
1376 if (!(dm_tree_deactivate_children(dnode
, uuid_prefix
, uuid_prefix_len
)))
1377 log_error("Failed to deactivate %s", dnode
->name
);
1383 * FIXME Don't attempt to deactivate known internal dependencies.
1385 static int _dm_tree_deactivate_children(struct dm_tree_node
*dnode
,
1386 const char *uuid_prefix
,
1387 size_t uuid_prefix_len
,
1391 void *handle
= NULL
;
1392 struct dm_tree_node
*child
= dnode
;
1393 struct dm_info info
;
1394 const struct dm_info
*dinfo
;
1398 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1399 if (!(dinfo
= dm_tree_node_get_info(child
))) {
1404 if (!(name
= dm_tree_node_get_name(child
))) {
1409 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1414 /* Ignore if it doesn't belong to this VG */
1415 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1418 /* Refresh open_count */
1419 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 1, &info
) ||
1423 if (info
.open_count
) {
1424 /* Skip internal non-toplevel opened nodes */
1428 /* When retry is not allowed, error */
1429 if (!child
->dtree
->retry_remove
) {
1430 log_error("Unable to deactivate open %s (%" PRIu32
1431 ":%" PRIu32
")", name
, info
.major
, info
.minor
);
1436 /* Check toplevel node for holders/mounted fs */
1437 if (!_check_device_not_in_use(name
, &info
)) {
1442 /* Go on with retry */
1445 /* Also checking open_count in parent nodes of presuspend_node */
1446 if ((child
->presuspend_node
&&
1447 !_node_has_closed_parents(child
->presuspend_node
,
1448 uuid_prefix
, uuid_prefix_len
))) {
1449 /* Only report error from (likely non-internal) dependency at top level */
1451 log_error("Unable to deactivate open %s (%" PRIu32
1452 ":%" PRIu32
")", name
, info
.major
,
1459 /* Suspend child node first if requested */
1460 if (child
->presuspend_node
&&
1461 !dm_tree_suspend_children(child
, uuid_prefix
, uuid_prefix_len
))
1464 if (!_deactivate_node(name
, info
.major
, info
.minor
,
1465 &child
->dtree
->cookie
, child
->udev_flags
,
1466 (level
== 0) ? child
->dtree
->retry_remove
: 0)) {
1467 log_error("Unable to deactivate %s (%" PRIu32
1468 ":%" PRIu32
")", name
, info
.major
,
1472 } else if (info
.suspended
)
1475 if (dm_tree_node_num_children(child
, 0)) {
1476 if (!_dm_tree_deactivate_children(child
, uuid_prefix
, uuid_prefix_len
, level
+ 1))
1484 int dm_tree_deactivate_children(struct dm_tree_node
*dnode
,
1485 const char *uuid_prefix
,
1486 size_t uuid_prefix_len
)
1488 return _dm_tree_deactivate_children(dnode
, uuid_prefix
, uuid_prefix_len
, 0);
1491 void dm_tree_skip_lockfs(struct dm_tree_node
*dnode
)
1493 dnode
->dtree
->skip_lockfs
= 1;
1496 void dm_tree_use_no_flush_suspend(struct dm_tree_node
*dnode
)
1498 dnode
->dtree
->no_flush
= 1;
1501 void dm_tree_retry_remove(struct dm_tree_node
*dnode
)
1503 dnode
->dtree
->retry_remove
= 1;
1506 int dm_tree_suspend_children(struct dm_tree_node
*dnode
,
1507 const char *uuid_prefix
,
1508 size_t uuid_prefix_len
)
1511 void *handle
= NULL
;
1512 struct dm_tree_node
*child
= dnode
;
1513 struct dm_info info
, newinfo
;
1514 const struct dm_info
*dinfo
;
1518 /* Suspend nodes at this level of the tree */
1519 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1520 if (!(dinfo
= dm_tree_node_get_info(child
))) {
1525 if (!(name
= dm_tree_node_get_name(child
))) {
1530 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1535 /* Ignore if it doesn't belong to this VG */
1536 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1539 /* Ensure immediate parents are already suspended */
1540 if (!_children_suspended(child
, 1, uuid_prefix
, uuid_prefix_len
))
1543 if (!_info_by_dev(dinfo
->major
, dinfo
->minor
, 0, &info
) ||
1544 !info
.exists
|| info
.suspended
)
1547 if (!_suspend_node(name
, info
.major
, info
.minor
,
1548 child
->dtree
->skip_lockfs
,
1549 child
->dtree
->no_flush
, &newinfo
)) {
1550 log_error("Unable to suspend %s (%" PRIu32
1551 ":%" PRIu32
")", name
, info
.major
,
1557 /* Update cached info */
1558 child
->info
= newinfo
;
1561 /* Then suspend any child nodes */
1564 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1565 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1570 /* Ignore if it doesn't belong to this VG */
1571 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1574 if (dm_tree_node_num_children(child
, 0))
1575 if (!dm_tree_suspend_children(child
, uuid_prefix
, uuid_prefix_len
))
1582 int dm_tree_activate_children(struct dm_tree_node
*dnode
,
1583 const char *uuid_prefix
,
1584 size_t uuid_prefix_len
)
1587 void *handle
= NULL
;
1588 struct dm_tree_node
*child
= dnode
;
1589 struct dm_info newinfo
;
1594 /* Activate children first */
1595 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1596 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1601 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1604 if (dm_tree_node_num_children(child
, 0))
1605 if (!dm_tree_activate_children(child
, uuid_prefix
, uuid_prefix_len
))
1611 for (priority
= 0; priority
< 3; priority
++) {
1612 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
1613 if (priority
!= child
->activation_priority
)
1616 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
1621 if (!_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
1624 if (!(name
= dm_tree_node_get_name(child
))) {
1630 if (child
->props
.new_name
) {
1631 if (!_rename_node(name
, child
->props
.new_name
, child
->info
.major
,
1632 child
->info
.minor
, &child
->dtree
->cookie
,
1633 child
->udev_flags
)) {
1634 log_error("Failed to rename %s (%" PRIu32
1635 ":%" PRIu32
") to %s", name
, child
->info
.major
,
1636 child
->info
.minor
, child
->props
.new_name
);
1639 child
->name
= child
->props
.new_name
;
1640 child
->props
.new_name
= NULL
;
1643 if (!child
->info
.inactive_table
&& !child
->info
.suspended
)
1646 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
1647 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
1648 &newinfo
, &child
->dtree
->cookie
, child
->udev_flags
, child
->info
.suspended
)) {
1649 log_error("Unable to resume %s (%" PRIu32
1650 ":%" PRIu32
")", child
->name
, child
->info
.major
,
1656 /* Update cached info */
1657 child
->info
= newinfo
;
1666 static int _create_node(struct dm_tree_node
*dnode
)
1669 struct dm_task
*dmt
;
1671 log_verbose("Creating %s", dnode
->name
);
1673 if (!(dmt
= dm_task_create(DM_DEVICE_CREATE
))) {
1674 log_error("Create dm_task creation failed for %s", dnode
->name
);
1678 if (!dm_task_set_name(dmt
, dnode
->name
)) {
1679 log_error("Failed to set device name for %s", dnode
->name
);
1683 if (!dm_task_set_uuid(dmt
, dnode
->uuid
)) {
1684 log_error("Failed to set uuid for %s", dnode
->name
);
1688 if (dnode
->props
.major
&&
1689 (!dm_task_set_major(dmt
, dnode
->props
.major
) ||
1690 !dm_task_set_minor(dmt
, dnode
->props
.minor
))) {
1691 log_error("Failed to set device number for %s creation.", dnode
->name
);
1695 if (dnode
->props
.read_only
&& !dm_task_set_ro(dmt
)) {
1696 log_error("Failed to set read only flag for %s", dnode
->name
);
1700 if (!dm_task_no_open_count(dmt
))
1701 log_error("Failed to disable open_count");
1703 if ((r
= dm_task_run(dmt
)))
1704 r
= dm_task_get_info(dmt
, &dnode
->info
);
1707 dm_task_destroy(dmt
);
1713 static int _build_dev_string(char *devbuf
, size_t bufsize
, struct dm_tree_node
*node
)
1715 if (!dm_format_dev(devbuf
, bufsize
, node
->info
.major
, node
->info
.minor
)) {
1716 log_error("Failed to format %s device number for %s as dm "
1718 node
->name
, node
->uuid
, node
->info
.major
, node
->info
.minor
);
1725 /* simplify string emiting code */
1726 #define EMIT_PARAMS(p, str...)\
1729 if ((w = dm_snprintf(params + p, paramsize - (size_t) p, str)) < 0) {\
1730 stack; /* Out of space */\
1739 * Returns: 1 on success, 0 on failure
1741 static int _emit_areas_line(struct dm_task
*dmt
__attribute__((unused
)),
1742 struct load_segment
*seg
, char *params
,
1743 size_t paramsize
, int *pos
)
1745 struct seg_area
*area
;
1746 char devbuf
[DM_FORMAT_DEV_BUFSIZE
];
1747 unsigned first_time
= 1;
1748 const char *logtype
, *synctype
;
1749 unsigned log_parm_count
;
1751 dm_list_iterate_items(area
, &seg
->areas
) {
1752 switch (seg
->type
) {
1753 case SEG_REPLICATOR_DEV
:
1754 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
1757 EMIT_PARAMS(*pos
, " %d 1 %s", area
->rsite_index
, devbuf
);
1759 EMIT_PARAMS(*pos
, " nolog 0");
1761 /* Remote devices */
1762 log_parm_count
= (area
->flags
&
1763 (DM_NOSYNC
| DM_FORCESYNC
)) ? 2 : 1;
1766 devbuf
[0] = 0; /* Only core log parameters */
1769 devbuf
[0] = ' '; /* Extra space before device name */
1770 if (!_build_dev_string(devbuf
+ 1,
1775 log_parm_count
++; /* Extra sync log device name parameter */
1778 EMIT_PARAMS(*pos
, " %s %u%s %" PRIu64
, logtype
,
1779 log_parm_count
, devbuf
, area
->region_size
);
1781 synctype
= (area
->flags
& DM_NOSYNC
) ?
1782 " nosync" : (area
->flags
& DM_FORCESYNC
) ?
1786 EMIT_PARAMS(*pos
, "%s", synctype
);
1798 if (!area
->dev_node
) {
1799 EMIT_PARAMS(*pos
, " -");
1802 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
1805 EMIT_PARAMS(*pos
, " %s", devbuf
);
1808 if (!_build_dev_string(devbuf
, sizeof(devbuf
), area
->dev_node
))
1811 EMIT_PARAMS(*pos
, "%s%s %" PRIu64
, first_time
? "" : " ",
1812 devbuf
, area
->offset
);
1821 static int _replicator_emit_segment_line(const struct load_segment
*seg
, char *params
,
1822 size_t paramsize
, int *pos
)
1824 const struct load_segment
*rlog_seg
;
1825 struct replicator_site
*rsite
;
1826 char rlogbuf
[DM_FORMAT_DEV_BUFSIZE
];
1827 unsigned parm_count
;
1829 if (!seg
->log
|| !_build_dev_string(rlogbuf
, sizeof(rlogbuf
), seg
->log
))
1832 rlog_seg
= dm_list_item(dm_list_last(&seg
->log
->props
.segs
),
1833 struct load_segment
);
1835 EMIT_PARAMS(*pos
, "%s 4 %s 0 auto %" PRIu64
,
1836 seg
->rlog_type
, rlogbuf
, rlog_seg
->size
);
1838 dm_list_iterate_items(rsite
, &seg
->rsites
) {
1839 parm_count
= (rsite
->fall_behind_data
1840 || rsite
->fall_behind_ios
1841 || rsite
->async_timeout
) ? 4 : 2;
1843 EMIT_PARAMS(*pos
, " blockdev %u %u %s", parm_count
, rsite
->rsite_index
,
1844 (rsite
->mode
== DM_REPLICATOR_SYNC
) ? "synchronous" : "asynchronous");
1846 if (rsite
->fall_behind_data
)
1847 EMIT_PARAMS(*pos
, " data %" PRIu64
, rsite
->fall_behind_data
);
1848 else if (rsite
->fall_behind_ios
)
1849 EMIT_PARAMS(*pos
, " ios %" PRIu32
, rsite
->fall_behind_ios
);
1850 else if (rsite
->async_timeout
)
1851 EMIT_PARAMS(*pos
, " timeout %" PRIu32
, rsite
->async_timeout
);
1858 * Returns: 1 on success, 0 on failure
1860 static int _mirror_emit_segment_line(struct dm_task
*dmt
, struct load_segment
*seg
,
1861 char *params
, size_t paramsize
)
1863 int block_on_error
= 0;
1864 int handle_errors
= 0;
1865 int dm_log_userspace
= 0;
1867 unsigned log_parm_count
;
1869 char logbuf
[DM_FORMAT_DEV_BUFSIZE
];
1870 const char *logtype
;
1871 unsigned kmaj
= 0, kmin
= 0, krel
= 0;
1873 if (uname(&uts
) == -1) {
1874 log_error("Cannot read kernel release version.");
1878 /* Kernels with a major number of 2 always had 3 parts. */
1879 parts
= sscanf(uts
.release
, "%u.%u.%u", &kmaj
, &kmin
, &krel
);
1880 if (parts
< 1 || (kmaj
< 3 && parts
< 3)) {
1881 log_error("Wrong kernel release version %s.", uts
.release
);
1885 if ((seg
->flags
& DM_BLOCK_ON_ERROR
)) {
1887 * Originally, block_on_error was an argument to the log
1888 * portion of the mirror CTR table. It was renamed to
1889 * "handle_errors" and now resides in the 'features'
1890 * section of the mirror CTR table (i.e. at the end).
1892 * We can identify whether to use "block_on_error" or
1893 * "handle_errors" by the dm-mirror module's version
1894 * number (>= 1.12) or by the kernel version (>= 2.6.22).
1896 if (KERNEL_VERSION(kmaj
, kmin
, krel
) >= KERNEL_VERSION(2, 6, 22))
1902 if (seg
->clustered
) {
1903 /* Cluster mirrors require a UUID */
1908 * Cluster mirrors used to have their own log
1909 * types. Now they are accessed through the
1910 * userspace log type.
1912 * The dm-log-userspace module was added to the
1915 if (KERNEL_VERSION(kmaj
, kmin
, krel
) >= KERNEL_VERSION(2, 6, 31))
1916 dm_log_userspace
= 1;
1922 /* [no]sync, block_on_error etc. */
1923 log_parm_count
+= hweight32(seg
->flags
);
1925 /* "handle_errors" is a feature arg now */
1929 /* DM_CORELOG does not count in the param list */
1930 if (seg
->flags
& DM_CORELOG
)
1933 if (seg
->clustered
) {
1934 log_parm_count
++; /* For UUID */
1936 if (!dm_log_userspace
)
1937 EMIT_PARAMS(pos
, "clustered-");
1939 /* For clustered-* type field inserted later */
1948 if (!_build_dev_string(logbuf
, sizeof(logbuf
), seg
->log
))
1952 if (dm_log_userspace
)
1953 EMIT_PARAMS(pos
, "userspace %u %s clustered-%s",
1954 log_parm_count
, seg
->uuid
, logtype
);
1956 EMIT_PARAMS(pos
, "%s %u", logtype
, log_parm_count
);
1959 EMIT_PARAMS(pos
, " %s", logbuf
);
1961 EMIT_PARAMS(pos
, " %u", seg
->region_size
);
1963 if (seg
->clustered
&& !dm_log_userspace
)
1964 EMIT_PARAMS(pos
, " %s", seg
->uuid
);
1966 if ((seg
->flags
& DM_NOSYNC
))
1967 EMIT_PARAMS(pos
, " nosync");
1968 else if ((seg
->flags
& DM_FORCESYNC
))
1969 EMIT_PARAMS(pos
, " sync");
1972 EMIT_PARAMS(pos
, " block_on_error");
1974 EMIT_PARAMS(pos
, " %u ", seg
->mirror_area_count
);
1976 if (_emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
) <= 0)
1980 EMIT_PARAMS(pos
, " 1 handle_errors");
1985 static int _raid_emit_segment_line(struct dm_task
*dmt
, uint32_t major
,
1986 uint32_t minor
, struct load_segment
*seg
,
1987 uint64_t *seg_start
, char *params
,
1991 int param_count
= 1; /* mandatory 'chunk size'/'stripe size' arg */
1994 if ((seg
->flags
& DM_NOSYNC
) || (seg
->flags
& DM_FORCESYNC
))
1997 if (seg
->region_size
)
2000 /* rebuilds is 64-bit */
2001 param_count
+= 2 * hweight32(seg
->rebuilds
& 0xFFFFFFFF);
2002 param_count
+= 2 * hweight32(seg
->rebuilds
>> 32);
2004 if ((seg
->type
== SEG_RAID1
) && seg
->stripe_size
)
2005 log_error("WARNING: Ignoring RAID1 stripe size");
2007 EMIT_PARAMS(pos
, "%s %d %u", dm_segtypes
[seg
->type
].target
,
2008 param_count
, seg
->stripe_size
);
2010 if (seg
->flags
& DM_NOSYNC
)
2011 EMIT_PARAMS(pos
, " nosync");
2012 else if (seg
->flags
& DM_FORCESYNC
)
2013 EMIT_PARAMS(pos
, " sync");
2015 if (seg
->region_size
)
2016 EMIT_PARAMS(pos
, " region_size %u", seg
->region_size
);
2018 for (i
= 0; i
< (seg
->area_count
/ 2); i
++)
2019 if (seg
->rebuilds
& (1 << i
))
2020 EMIT_PARAMS(pos
, " rebuild %u", i
);
2022 /* Print number of metadata/data device pairs */
2023 EMIT_PARAMS(pos
, " %u", seg
->area_count
/2);
2025 if (_emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
) <= 0)
2031 static int _emit_segment_line(struct dm_task
*dmt
, uint32_t major
,
2032 uint32_t minor
, struct load_segment
*seg
,
2033 uint64_t *seg_start
, char *params
,
2038 int target_type_is_raid
= 0;
2039 char originbuf
[DM_FORMAT_DEV_BUFSIZE
], cowbuf
[DM_FORMAT_DEV_BUFSIZE
];
2040 char pool
[DM_FORMAT_DEV_BUFSIZE
], metadata
[DM_FORMAT_DEV_BUFSIZE
];
2048 /* Mirrors are pretty complicated - now in separate function */
2049 r
= _mirror_emit_segment_line(dmt
, seg
, params
, paramsize
);
2053 case SEG_REPLICATOR
:
2054 if ((r
= _replicator_emit_segment_line(seg
, params
, paramsize
,
2060 case SEG_REPLICATOR_DEV
:
2061 if (!seg
->replicator
|| !_build_dev_string(originbuf
,
2066 EMIT_PARAMS(pos
, "%s %" PRIu64
, originbuf
, seg
->rdevice_index
);
2069 case SEG_SNAPSHOT_MERGE
:
2070 if (!_build_dev_string(originbuf
, sizeof(originbuf
), seg
->origin
))
2072 if (!_build_dev_string(cowbuf
, sizeof(cowbuf
), seg
->cow
))
2074 EMIT_PARAMS(pos
, "%s %s %c %d", originbuf
, cowbuf
,
2075 seg
->persistent
? 'P' : 'N', seg
->chunk_size
);
2077 case SEG_SNAPSHOT_ORIGIN
:
2078 if (!_build_dev_string(originbuf
, sizeof(originbuf
), seg
->origin
))
2080 EMIT_PARAMS(pos
, "%s", originbuf
);
2083 EMIT_PARAMS(pos
, "%u %u ", seg
->area_count
, seg
->stripe_size
);
2086 EMIT_PARAMS(pos
, "%s%s%s%s%s %s %" PRIu64
" ", seg
->cipher
,
2087 seg
->chainmode
? "-" : "", seg
->chainmode
?: "",
2088 seg
->iv
? "-" : "", seg
->iv
?: "", seg
->key
,
2089 seg
->iv_offset
!= DM_CRYPT_IV_DEFAULT
?
2090 seg
->iv_offset
: *seg_start
);
2101 target_type_is_raid
= 1;
2102 r
= _raid_emit_segment_line(dmt
, major
, minor
, seg
, seg_start
,
2109 if (!_build_dev_string(metadata
, sizeof(metadata
), seg
->metadata
))
2111 if (!_build_dev_string(pool
, sizeof(pool
), seg
->pool
))
2113 EMIT_PARAMS(pos
, "%s %s %d %" PRIu64
" %s", metadata
, pool
,
2114 seg
->data_block_size
, seg
->low_water_mark
,
2115 seg
->skip_block_zeroing
? "1 skip_block_zeroing" : "0");
2118 if (!_build_dev_string(pool
, sizeof(pool
), seg
->pool
))
2120 EMIT_PARAMS(pos
, "%s %d", pool
, seg
->device_id
);
2126 case SEG_REPLICATOR
:
2128 case SEG_SNAPSHOT_ORIGIN
:
2129 case SEG_SNAPSHOT_MERGE
:
2136 case SEG_REPLICATOR_DEV
:
2138 if ((r
= _emit_areas_line(dmt
, seg
, params
, paramsize
, &pos
)) <= 0) {
2143 log_error("No parameters supplied for %s target "
2144 "%u:%u.", dm_segtypes
[seg
->type
].target
,
2151 log_debug("Adding target to (%" PRIu32
":%" PRIu32
"): %" PRIu64
2152 " %" PRIu64
" %s %s", major
, minor
,
2153 *seg_start
, seg
->size
, target_type_is_raid
? "raid" :
2154 dm_segtypes
[seg
->type
].target
, params
);
2156 if (!dm_task_add_target(dmt
, *seg_start
, seg
->size
,
2157 target_type_is_raid
? "raid" :
2158 dm_segtypes
[seg
->type
].target
, params
))
2161 *seg_start
+= seg
->size
;
2168 static int _emit_segment(struct dm_task
*dmt
, uint32_t major
, uint32_t minor
,
2169 struct load_segment
*seg
, uint64_t *seg_start
)
2172 size_t paramsize
= 4096;
2176 if (!(params
= dm_malloc(paramsize
))) {
2177 log_error("Insufficient space for target parameters.");
2182 ret
= _emit_segment_line(dmt
, major
, minor
, seg
, seg_start
,
2192 log_debug("Insufficient space in params[%" PRIsize_t
2193 "] for target parameters.", paramsize
);
2196 } while (paramsize
< MAX_TARGET_PARAMSIZE
);
2198 log_error("Target parameter size too big. Aborting.");
2202 static int _load_node(struct dm_tree_node
*dnode
)
2205 struct dm_task
*dmt
;
2206 struct load_segment
*seg
;
2207 uint64_t seg_start
= 0, existing_table_size
;
2209 log_verbose("Loading %s table (%" PRIu32
":%" PRIu32
")", dnode
->name
,
2210 dnode
->info
.major
, dnode
->info
.minor
);
2212 if (!(dmt
= dm_task_create(DM_DEVICE_RELOAD
))) {
2213 log_error("Reload dm_task creation failed for %s", dnode
->name
);
2217 if (!dm_task_set_major(dmt
, dnode
->info
.major
) ||
2218 !dm_task_set_minor(dmt
, dnode
->info
.minor
)) {
2219 log_error("Failed to set device number for %s reload.", dnode
->name
);
2223 if (dnode
->props
.read_only
&& !dm_task_set_ro(dmt
)) {
2224 log_error("Failed to set read only flag for %s", dnode
->name
);
2228 if (!dm_task_no_open_count(dmt
))
2229 log_error("Failed to disable open_count");
2231 dm_list_iterate_items(seg
, &dnode
->props
.segs
)
2232 if (!_emit_segment(dmt
, dnode
->info
.major
, dnode
->info
.minor
,
2236 if (!dm_task_suppress_identical_reload(dmt
))
2237 log_error("Failed to suppress reload of identical tables.");
2239 if ((r
= dm_task_run(dmt
))) {
2240 r
= dm_task_get_info(dmt
, &dnode
->info
);
2241 if (r
&& !dnode
->info
.inactive_table
)
2242 log_verbose("Suppressed %s identical table reload.",
2245 existing_table_size
= dm_task_get_existing_table_size(dmt
);
2246 if ((dnode
->props
.size_changed
=
2247 (existing_table_size
== seg_start
) ? 0 : 1)) {
2248 log_debug("Table size changed from %" PRIu64
" to %"
2249 PRIu64
" for %s", existing_table_size
,
2250 seg_start
, dnode
->name
);
2252 * Kernel usually skips size validation on zero-length devices
2253 * now so no need to preload them.
2255 /* FIXME In which kernel version did this begin? */
2256 if (!existing_table_size
&& dnode
->props
.delay_resume_if_new
)
2257 dnode
->props
.size_changed
= 0;
2261 dnode
->props
.segment_count
= 0;
2264 dm_task_destroy(dmt
);
2269 int dm_tree_preload_children(struct dm_tree_node
*dnode
,
2270 const char *uuid_prefix
,
2271 size_t uuid_prefix_len
)
2274 void *handle
= NULL
;
2275 struct dm_tree_node
*child
;
2276 struct dm_info newinfo
;
2277 int update_devs_flag
= 0;
2279 /* Preload children first */
2280 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
2281 /* Skip existing non-device-mapper devices */
2282 if (!child
->info
.exists
&& child
->info
.major
)
2285 /* Ignore if it doesn't belong to this VG */
2286 if (child
->info
.exists
&&
2287 !_uuid_prefix_matches(child
->uuid
, uuid_prefix
, uuid_prefix_len
))
2290 if (dm_tree_node_num_children(child
, 0))
2291 if (!dm_tree_preload_children(child
, uuid_prefix
, uuid_prefix_len
))
2294 /* FIXME Cope if name exists with no uuid? */
2295 if (!child
->info
.exists
&& !_create_node(child
))
2298 if (!child
->info
.inactive_table
&&
2299 child
->props
.segment_count
&&
2303 /* Propagate device size change change */
2304 if (child
->props
.size_changed
)
2305 dnode
->props
.size_changed
= 1;
2307 /* Resume device immediately if it has parents and its size changed */
2308 if (!dm_tree_node_num_children(child
, 1) || !child
->props
.size_changed
)
2311 if (!child
->info
.inactive_table
&& !child
->info
.suspended
)
2314 if (!_resume_node(child
->name
, child
->info
.major
, child
->info
.minor
,
2315 child
->props
.read_ahead
, child
->props
.read_ahead_flags
,
2316 &newinfo
, &child
->dtree
->cookie
, child
->udev_flags
,
2317 child
->info
.suspended
)) {
2318 log_error("Unable to resume %s (%" PRIu32
2319 ":%" PRIu32
")", child
->name
, child
->info
.major
,
2325 /* Update cached info */
2326 child
->info
= newinfo
;
2327 if (child
->props
.send_messages
&&
2328 !(r
= _node_send_messages(child
, uuid_prefix
, uuid_prefix_len
))) {
2333 * Prepare for immediate synchronization with udev and flush all stacked
2334 * dev node operations if requested by immediate_dev_node property. But
2335 * finish processing current level in the tree first.
2337 if (child
->props
.immediate_dev_node
)
2338 update_devs_flag
= 1;
2341 if (r
&& dnode
->props
.send_messages
&&
2342 !(r
= _node_send_messages(dnode
, uuid_prefix
, uuid_prefix_len
)))
2345 if (update_devs_flag
) {
2346 if (!dm_udev_wait(dm_tree_get_cookie(dnode
)))
2348 dm_tree_set_cookie(dnode
, 0);
2351 if (r
&& !_node_send_messages(dnode
, uuid_prefix
, uuid_prefix_len
)) {
2353 if (!(dm_tree_deactivate_children(dnode
, uuid_prefix
, uuid_prefix_len
)))
2354 log_error("Failed to deactivate %s", dnode
->name
);
2362 * Returns 1 if unsure.
2364 int dm_tree_children_use_uuid(struct dm_tree_node
*dnode
,
2365 const char *uuid_prefix
,
2366 size_t uuid_prefix_len
)
2368 void *handle
= NULL
;
2369 struct dm_tree_node
*child
= dnode
;
2372 while ((child
= dm_tree_next_child(&handle
, dnode
, 0))) {
2373 if (!(uuid
= dm_tree_node_get_uuid(child
))) {
2374 log_error("Failed to get uuid for dtree node.");
2378 if (_uuid_prefix_matches(uuid
, uuid_prefix
, uuid_prefix_len
))
2381 if (dm_tree_node_num_children(child
, 0))
2382 dm_tree_children_use_uuid(child
, uuid_prefix
, uuid_prefix_len
);
2391 static struct load_segment
*_add_segment(struct dm_tree_node
*dnode
, unsigned type
, uint64_t size
)
2393 struct load_segment
*seg
;
2395 if (!(seg
= dm_pool_zalloc(dnode
->dtree
->mem
, sizeof(*seg
)))) {
2396 log_error("dtree node segment allocation failed");
2402 seg
->area_count
= 0;
2403 dm_list_init(&seg
->areas
);
2404 seg
->stripe_size
= 0;
2405 seg
->persistent
= 0;
2406 seg
->chunk_size
= 0;
2411 dm_list_add(&dnode
->props
.segs
, &seg
->list
);
2412 dnode
->props
.segment_count
++;
2417 int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node
*dnode
,
2419 const char *origin_uuid
)
2421 struct load_segment
*seg
;
2422 struct dm_tree_node
*origin_node
;
2424 if (!(seg
= _add_segment(dnode
, SEG_SNAPSHOT_ORIGIN
, size
)))
2427 if (!(origin_node
= dm_tree_find_node_by_uuid(dnode
->dtree
, origin_uuid
))) {
2428 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid
);
2432 seg
->origin
= origin_node
;
2433 if (!_link_tree_nodes(dnode
, origin_node
))
2436 /* Resume snapshot origins after new snapshots */
2437 dnode
->activation_priority
= 1;
2442 static int _add_snapshot_target(struct dm_tree_node
*node
,
2444 const char *origin_uuid
,
2445 const char *cow_uuid
,
2446 const char *merge_uuid
,
2448 uint32_t chunk_size
)
2450 struct load_segment
*seg
;
2451 struct dm_tree_node
*origin_node
, *cow_node
, *merge_node
;
2454 seg_type
= !merge_uuid
? SEG_SNAPSHOT
: SEG_SNAPSHOT_MERGE
;
2456 if (!(seg
= _add_segment(node
, seg_type
, size
)))
2459 if (!(origin_node
= dm_tree_find_node_by_uuid(node
->dtree
, origin_uuid
))) {
2460 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid
);
2464 seg
->origin
= origin_node
;
2465 if (!_link_tree_nodes(node
, origin_node
))
2468 if (!(cow_node
= dm_tree_find_node_by_uuid(node
->dtree
, cow_uuid
))) {
2469 log_error("Couldn't find snapshot COW device uuid %s.", cow_uuid
);
2473 seg
->cow
= cow_node
;
2474 if (!_link_tree_nodes(node
, cow_node
))
2477 seg
->persistent
= persistent
? 1 : 0;
2478 seg
->chunk_size
= chunk_size
;
2481 if (!(merge_node
= dm_tree_find_node_by_uuid(node
->dtree
, merge_uuid
))) {
2482 /* not a pure error, merging snapshot may have been deactivated */
2483 log_verbose("Couldn't find merging snapshot uuid %s.", merge_uuid
);
2485 seg
->merge
= merge_node
;
2486 /* must not link merging snapshot, would undermine activation_priority below */
2489 /* Resume snapshot-merge (acting origin) after other snapshots */
2490 node
->activation_priority
= 1;
2492 /* Resume merging snapshot after snapshot-merge */
2493 seg
->merge
->activation_priority
= 2;
2501 int dm_tree_node_add_snapshot_target(struct dm_tree_node
*node
,
2503 const char *origin_uuid
,
2504 const char *cow_uuid
,
2506 uint32_t chunk_size
)
2508 return _add_snapshot_target(node
, size
, origin_uuid
, cow_uuid
,
2509 NULL
, persistent
, chunk_size
);
2512 int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node
*node
,
2514 const char *origin_uuid
,
2515 const char *cow_uuid
,
2516 const char *merge_uuid
,
2517 uint32_t chunk_size
)
2519 return _add_snapshot_target(node
, size
, origin_uuid
, cow_uuid
,
2520 merge_uuid
, 1, chunk_size
);
2523 int dm_tree_node_add_error_target(struct dm_tree_node
*node
,
2526 if (!_add_segment(node
, SEG_ERROR
, size
))
2532 int dm_tree_node_add_zero_target(struct dm_tree_node
*node
,
2535 if (!_add_segment(node
, SEG_ZERO
, size
))
2541 int dm_tree_node_add_linear_target(struct dm_tree_node
*node
,
2544 if (!_add_segment(node
, SEG_LINEAR
, size
))
2550 int dm_tree_node_add_striped_target(struct dm_tree_node
*node
,
2552 uint32_t stripe_size
)
2554 struct load_segment
*seg
;
2556 if (!(seg
= _add_segment(node
, SEG_STRIPED
, size
)))
2559 seg
->stripe_size
= stripe_size
;
2564 int dm_tree_node_add_crypt_target(struct dm_tree_node
*node
,
2567 const char *chainmode
,
2572 struct load_segment
*seg
;
2574 if (!(seg
= _add_segment(node
, SEG_CRYPT
, size
)))
2577 seg
->cipher
= cipher
;
2578 seg
->chainmode
= chainmode
;
2580 seg
->iv_offset
= iv_offset
;
2586 int dm_tree_node_add_mirror_target_log(struct dm_tree_node
*node
,
2587 uint32_t region_size
,
2589 const char *log_uuid
,
2590 unsigned area_count
,
2593 struct dm_tree_node
*log_node
= NULL
;
2594 struct load_segment
*seg
;
2596 if (!node
->props
.segment_count
) {
2597 log_error(INTERNAL_ERROR
"Attempt to add target area to missing segment.");
2601 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2604 if (!(seg
->uuid
= dm_pool_strdup(node
->dtree
->mem
, log_uuid
))) {
2605 log_error("log uuid pool_strdup failed");
2608 if ((flags
& DM_CORELOG
))
2609 /* For pvmove: immediate resume (for size validation) isn't needed. */
2610 node
->props
.delay_resume_if_new
= 1;
2612 if (!(log_node
= dm_tree_find_node_by_uuid(node
->dtree
, log_uuid
))) {
2613 log_error("Couldn't find mirror log uuid %s.", log_uuid
);
2618 log_node
->props
.immediate_dev_node
= 1;
2620 /* The kernel validates the size of disk logs. */
2621 /* FIXME Propagate to any devices below */
2622 log_node
->props
.delay_resume_if_new
= 0;
2624 if (!_link_tree_nodes(node
, log_node
))
2629 seg
->log
= log_node
;
2630 seg
->region_size
= region_size
;
2631 seg
->clustered
= clustered
;
2632 seg
->mirror_area_count
= area_count
;
2638 int dm_tree_node_add_mirror_target(struct dm_tree_node
*node
,
2641 if (!_add_segment(node
, SEG_MIRRORED
, size
))
2647 int dm_tree_node_add_raid_target(struct dm_tree_node
*node
,
2649 const char *raid_type
,
2650 uint32_t region_size
,
2651 uint32_t stripe_size
,
2656 struct load_segment
*seg
= NULL
;
2658 for (i
= 0; dm_segtypes
[i
].target
&& !seg
; i
++)
2659 if (!strcmp(raid_type
, dm_segtypes
[i
].target
))
2660 if (!(seg
= _add_segment(node
,
2661 dm_segtypes
[i
].type
, size
)))
2667 seg
->region_size
= region_size
;
2668 seg
->stripe_size
= stripe_size
;
2669 seg
->area_count
= 0;
2670 seg
->rebuilds
= rebuilds
;
2675 int dm_tree_node_add_replicator_target(struct dm_tree_node
*node
,
2677 const char *rlog_uuid
,
2678 const char *rlog_type
,
2679 unsigned rsite_index
,
2680 dm_replicator_mode_t mode
,
2681 uint32_t async_timeout
,
2682 uint64_t fall_behind_data
,
2683 uint32_t fall_behind_ios
)
2685 struct load_segment
*rseg
;
2686 struct replicator_site
*rsite
;
2688 /* Local site0 - adds replicator segment and links rlog device */
2689 if (rsite_index
== REPLICATOR_LOCAL_SITE
) {
2690 if (node
->props
.segment_count
) {
2691 log_error(INTERNAL_ERROR
"Attempt to add replicator segment to already used node.");
2695 if (!(rseg
= _add_segment(node
, SEG_REPLICATOR
, size
)))
2698 if (!(rseg
->log
= dm_tree_find_node_by_uuid(node
->dtree
, rlog_uuid
))) {
2699 log_error("Missing replicator log uuid %s.", rlog_uuid
);
2703 if (!_link_tree_nodes(node
, rseg
->log
))
2706 if (strcmp(rlog_type
, "ringbuffer") != 0) {
2707 log_error("Unsupported replicator log type %s.", rlog_type
);
2711 if (!(rseg
->rlog_type
= dm_pool_strdup(node
->dtree
->mem
, rlog_type
)))
2714 dm_list_init(&rseg
->rsites
);
2715 rseg
->rdevice_count
= 0;
2716 node
->activation_priority
= 1;
2719 /* Add site to segment */
2720 if (mode
== DM_REPLICATOR_SYNC
2721 && (async_timeout
|| fall_behind_ios
|| fall_behind_data
)) {
2722 log_error("Async parameters passed for synchronnous replicator.");
2726 if (node
->props
.segment_count
!= 1) {
2727 log_error(INTERNAL_ERROR
"Attempt to add remote site area before setting replicator log.");
2731 rseg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2732 if (rseg
->type
!= SEG_REPLICATOR
) {
2733 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment %s.",
2734 dm_segtypes
[rseg
->type
].target
);
2738 if (!(rsite
= dm_pool_zalloc(node
->dtree
->mem
, sizeof(*rsite
)))) {
2739 log_error("Failed to allocate remote site segment.");
2743 dm_list_add(&rseg
->rsites
, &rsite
->list
);
2744 rseg
->rsite_count
++;
2747 rsite
->async_timeout
= async_timeout
;
2748 rsite
->fall_behind_data
= fall_behind_data
;
2749 rsite
->fall_behind_ios
= fall_behind_ios
;
2750 rsite
->rsite_index
= rsite_index
;
2755 /* Appends device node to Replicator */
2756 int dm_tree_node_add_replicator_dev_target(struct dm_tree_node
*node
,
2758 const char *replicator_uuid
,
2759 uint64_t rdevice_index
,
2760 const char *rdev_uuid
,
2761 unsigned rsite_index
,
2762 const char *slog_uuid
,
2763 uint32_t slog_flags
,
2764 uint32_t slog_region_size
)
2766 struct seg_area
*area
;
2767 struct load_segment
*rseg
;
2768 struct load_segment
*rep_seg
;
2770 if (rsite_index
== REPLICATOR_LOCAL_SITE
) {
2771 /* Site index for local target */
2772 if (!(rseg
= _add_segment(node
, SEG_REPLICATOR_DEV
, size
)))
2775 if (!(rseg
->replicator
= dm_tree_find_node_by_uuid(node
->dtree
, replicator_uuid
))) {
2776 log_error("Missing replicator uuid %s.", replicator_uuid
);
2780 /* Local slink0 for replicator must be always initialized first */
2781 if (rseg
->replicator
->props
.segment_count
!= 1) {
2782 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment.");
2786 rep_seg
= dm_list_item(dm_list_last(&rseg
->replicator
->props
.segs
), struct load_segment
);
2787 if (rep_seg
->type
!= SEG_REPLICATOR
) {
2788 log_error(INTERNAL_ERROR
"Attempt to use non replicator segment %s.",
2789 dm_segtypes
[rep_seg
->type
].target
);
2792 rep_seg
->rdevice_count
++;
2794 if (!_link_tree_nodes(node
, rseg
->replicator
))
2797 rseg
->rdevice_index
= rdevice_index
;
2799 /* Local slink0 for replicator must be always initialized first */
2800 if (node
->props
.segment_count
!= 1) {
2801 log_error(INTERNAL_ERROR
"Attempt to use non replicator-dev segment.");
2805 rseg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2806 if (rseg
->type
!= SEG_REPLICATOR_DEV
) {
2807 log_error(INTERNAL_ERROR
"Attempt to use non replicator-dev segment %s.",
2808 dm_segtypes
[rseg
->type
].target
);
2813 if (!(slog_flags
& DM_CORELOG
) && !slog_uuid
) {
2814 log_error("Unspecified sync log uuid.");
2818 if (!dm_tree_node_add_target_area(node
, NULL
, rdev_uuid
, 0))
2821 area
= dm_list_item(dm_list_last(&rseg
->areas
), struct seg_area
);
2823 if (!(slog_flags
& DM_CORELOG
)) {
2824 if (!(area
->slog
= dm_tree_find_node_by_uuid(node
->dtree
, slog_uuid
))) {
2825 log_error("Couldn't find sync log uuid %s.", slog_uuid
);
2829 if (!_link_tree_nodes(node
, area
->slog
))
2833 area
->flags
= slog_flags
;
2834 area
->region_size
= slog_region_size
;
2835 area
->rsite_index
= rsite_index
;
2840 static int _thin_validate_device_id(uint32_t device_id
)
2842 if (device_id
> DM_THIN_MAX_DEVICE_ID
) {
2843 log_error("Device id %u is higher then %u.",
2844 device_id
, DM_THIN_MAX_DEVICE_ID
);
2851 int dm_tree_node_add_thin_pool_target(struct dm_tree_node
*node
,
2853 uint64_t transaction_id
,
2854 const char *metadata_uuid
,
2855 const char *pool_uuid
,
2856 uint32_t data_block_size
,
2857 uint64_t low_water_mark
,
2858 unsigned skip_block_zeroing
)
2860 struct load_segment
*seg
;
2862 if (data_block_size
< DM_THIN_MIN_DATA_BLOCK_SIZE
) {
2863 log_error("Data block size %u is lower then %u sectors.",
2864 data_block_size
, DM_THIN_MIN_DATA_BLOCK_SIZE
);
2868 if (data_block_size
> DM_THIN_MAX_DATA_BLOCK_SIZE
) {
2869 log_error("Data block size %u is higher then %u sectors.",
2870 data_block_size
, DM_THIN_MAX_DATA_BLOCK_SIZE
);
2874 if (!(seg
= _add_segment(node
, SEG_THIN_POOL
, size
)))
2877 if (!(seg
->metadata
= dm_tree_find_node_by_uuid(node
->dtree
, metadata_uuid
))) {
2878 log_error("Missing metadata uuid %s.", metadata_uuid
);
2882 if (!_link_tree_nodes(node
, seg
->metadata
))
2885 if (!(seg
->pool
= dm_tree_find_node_by_uuid(node
->dtree
, pool_uuid
))) {
2886 log_error("Missing pool uuid %s.", pool_uuid
);
2890 if (!_link_tree_nodes(node
, seg
->pool
))
2893 node
->props
.send_messages
= 1;
2894 seg
->transaction_id
= transaction_id
;
2895 seg
->low_water_mark
= low_water_mark
;
2896 seg
->data_block_size
= data_block_size
;
2897 seg
->skip_block_zeroing
= skip_block_zeroing
;
2898 dm_list_init(&seg
->thin_messages
);
2903 int dm_tree_node_add_thin_pool_message(struct dm_tree_node
*node
,
2904 const struct dm_thin_message
*message
)
2906 struct load_segment
*seg
;
2907 struct thin_message
*tm
;
2909 if (node
->props
.segment_count
!= 1) {
2910 log_error("Thin pool node must have only one segment.");
2914 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
2915 if (seg
->type
!= SEG_THIN_POOL
) {
2916 log_error("Thin pool node has segment type %s.",
2917 dm_segtypes
[seg
->type
].target
);
2921 if (!(tm
= dm_pool_zalloc(node
->dtree
->mem
, sizeof (*tm
)))) {
2922 log_error("Failed to allocate thin message.");
2926 switch (message
->type
) {
2927 case DM_THIN_MESSAGE_CREATE_SNAP
:
2928 /* If the thin origin is active, it must be suspend first! */
2929 if (message
->u
.m_create_snap
.device_id
== message
->u
.m_create_snap
.origin_id
) {
2930 log_error("Cannot use same device id for origin and its snapshot.");
2933 if (!_thin_validate_device_id(message
->u
.m_create_snap
.device_id
) ||
2934 !_thin_validate_device_id(message
->u
.m_create_snap
.origin_id
))
2936 tm
->message
.u
.m_create_snap
= message
->u
.m_create_snap
;
2938 case DM_THIN_MESSAGE_CREATE_THIN
:
2939 if (!_thin_validate_device_id(message
->u
.m_create_thin
.device_id
))
2941 tm
->message
.u
.m_create_thin
= message
->u
.m_create_thin
;
2942 tm
->expected_errno
= EEXIST
;
2944 case DM_THIN_MESSAGE_DELETE
:
2945 if (!_thin_validate_device_id(message
->u
.m_delete
.device_id
))
2947 tm
->message
.u
.m_delete
= message
->u
.m_delete
;
2948 tm
->expected_errno
= ENODATA
;
2950 case DM_THIN_MESSAGE_TRIM
:
2951 if (!_thin_validate_device_id(message
->u
.m_trim
.device_id
))
2953 tm
->message
.u
.m_trim
= message
->u
.m_trim
;
2955 case DM_THIN_MESSAGE_SET_TRANSACTION_ID
:
2956 if (message
->u
.m_set_transaction_id
.current_id
!=
2957 (message
->u
.m_set_transaction_id
.new_id
- 1)) {
2958 log_error("New transaction_id must be sequential.");
2959 return 0; /* FIXME: Maybe too strict here? */
2961 tm
->message
.u
.m_set_transaction_id
= message
->u
.m_set_transaction_id
;
2964 log_error("Unsupported message type %d.", (int) message
->type
);
2968 tm
->message
.type
= message
->type
;
2969 dm_list_add(&seg
->thin_messages
, &tm
->list
);
2974 int dm_tree_node_add_thin_target(struct dm_tree_node
*node
,
2976 const char *thin_pool_uuid
,
2979 struct load_segment
*seg
;
2981 if (!_thin_validate_device_id(device_id
))
2984 if (!(seg
= _add_segment(node
, SEG_THIN
, size
)))
2987 if (!(seg
->pool
= dm_tree_find_node_by_uuid(node
->dtree
, thin_pool_uuid
))) {
2988 log_error("Missing thin pool uuid %s.", thin_pool_uuid
);
2992 if (!_link_tree_nodes(node
, seg
->pool
))
2995 seg
->device_id
= device_id
;
3000 static int _add_area(struct dm_tree_node
*node
, struct load_segment
*seg
, struct dm_tree_node
*dev_node
, uint64_t offset
)
3002 struct seg_area
*area
;
3004 if (!(area
= dm_pool_zalloc(node
->dtree
->mem
, sizeof (*area
)))) {
3005 log_error("Failed to allocate target segment area.");
3009 area
->dev_node
= dev_node
;
3010 area
->offset
= offset
;
3012 dm_list_add(&seg
->areas
, &area
->list
);
3018 int dm_tree_node_add_target_area(struct dm_tree_node
*node
,
3019 const char *dev_name
,
3023 struct load_segment
*seg
;
3025 struct dm_tree_node
*dev_node
;
3027 if ((!dev_name
|| !*dev_name
) && (!uuid
|| !*uuid
)) {
3028 log_error("dm_tree_node_add_target_area called without device");
3033 if (!(dev_node
= dm_tree_find_node_by_uuid(node
->dtree
, uuid
))) {
3034 log_error("Couldn't find area uuid %s.", uuid
);
3037 if (!_link_tree_nodes(node
, dev_node
))
3040 if (stat(dev_name
, &info
) < 0) {
3041 log_error("Device %s not found.", dev_name
);
3045 if (!S_ISBLK(info
.st_mode
)) {
3046 log_error("Device %s is not a block device.", dev_name
);
3050 /* FIXME Check correct macro use */
3051 if (!(dev_node
= _add_dev(node
->dtree
, node
, MAJOR(info
.st_rdev
),
3052 MINOR(info
.st_rdev
), 0)))
3056 if (!node
->props
.segment_count
) {
3057 log_error(INTERNAL_ERROR
"Attempt to add target area to missing segment.");
3061 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
3063 if (!_add_area(node
, seg
, dev_node
, offset
))
3069 int dm_tree_node_add_null_area(struct dm_tree_node
*node
, uint64_t offset
)
3071 struct load_segment
*seg
;
3073 seg
= dm_list_item(dm_list_last(&node
->props
.segs
), struct load_segment
);
3075 switch (seg
->type
) {
3087 log_error("dm_tree_node_add_null_area() called on an unsupported segment type");
3091 if (!_add_area(node
, seg
, NULL
, offset
))
3097 void dm_tree_set_cookie(struct dm_tree_node
*node
, uint32_t cookie
)
3099 node
->dtree
->cookie
= cookie
;
3102 uint32_t dm_tree_get_cookie(struct dm_tree_node
*node
)
3104 return node
->dtree
->cookie
;