]> sourceware.org Git - lvm2.git/blame - libdm/libdm-deptree.c
integrity: add --integritysettings for tuning
[lvm2.git] / libdm / libdm-deptree.c
CommitLineData
3d0480ed 1/*
60ddd05f 2 * Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved.
3d0480ed
AK
3 *
4 * This file is part of the device-mapper userspace tools.
5 *
6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions
8 * of the GNU Lesser General Public License v.2.1.
9 *
10 * You should have received a copy of the GNU Lesser General Public License
11 * along with this program; if not, write to the Free Software Foundation,
fcbef05a 12 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3d0480ed
AK
13 */
14
7f97c7ea 15#include "libdm/misc/dmlib.h"
3d0480ed
AK
16#include "libdm-targets.h"
17#include "libdm-common.h"
7f97c7ea
JT
18#include "libdm/misc/kdev_t.h"
19#include "libdm/misc/dm-ioctl.h"
3d0480ed
AK
20
21#include <stdarg.h>
8f26e18c 22#include <sys/utsname.h>
3d0480ed 23
165e4a11
AK
24#define MAX_TARGET_PARAMSIZE 500000
25
26/* Supported segment types */
27enum {
1ff7e214 28 SEG_CACHE,
12ca060e
MB
29 SEG_CRYPT,
30 SEG_ERROR,
165e4a11
AK
31 SEG_LINEAR,
32 SEG_MIRRORED,
33 SEG_SNAPSHOT,
34 SEG_SNAPSHOT_ORIGIN,
aa6f4e51 35 SEG_SNAPSHOT_MERGE,
165e4a11
AK
36 SEG_STRIPED,
37 SEG_ZERO,
4251236e
ZK
38 SEG_THIN_POOL,
39 SEG_THIN,
bf8d0098 40 SEG_RAID0,
d8c2677a 41 SEG_RAID0_META,
cac52ca4 42 SEG_RAID1,
4047e4df 43 SEG_RAID10,
cac52ca4 44 SEG_RAID4,
60ddd05f 45 SEG_RAID5_N,
cac52ca4
JEB
46 SEG_RAID5_LA,
47 SEG_RAID5_RA,
48 SEG_RAID5_LS,
49 SEG_RAID5_RS,
3673ce48 50 SEG_RAID6_N_6,
cac52ca4
JEB
51 SEG_RAID6_ZR,
52 SEG_RAID6_NR,
53 SEG_RAID6_NC,
a4bbaa3b
HM
54 SEG_RAID6_LS_6,
55 SEG_RAID6_RS_6,
56 SEG_RAID6_LA_6,
57 SEG_RAID6_RA_6,
165e4a11 58};
b4f1578f 59
165e4a11
AK
60/* FIXME Add crypt and multipath support */
61
6190ded5 62static const struct {
165e4a11 63 unsigned type;
6190ded5
ZK
64 const char target[16];
65} _dm_segtypes[] = {
1ff7e214 66 { SEG_CACHE, "cache" },
12ca060e 67 { SEG_CRYPT, "crypt" },
165e4a11
AK
68 { SEG_ERROR, "error" },
69 { SEG_LINEAR, "linear" },
70 { SEG_MIRRORED, "mirror" },
71 { SEG_SNAPSHOT, "snapshot" },
72 { SEG_SNAPSHOT_ORIGIN, "snapshot-origin" },
aa6f4e51 73 { SEG_SNAPSHOT_MERGE, "snapshot-merge" },
165e4a11
AK
74 { SEG_STRIPED, "striped" },
75 { SEG_ZERO, "zero"},
4251236e
ZK
76 { SEG_THIN_POOL, "thin-pool"},
77 { SEG_THIN, "thin"},
bf8d0098 78 { SEG_RAID0, "raid0"},
d8c2677a 79 { SEG_RAID0_META, "raid0_meta"},
cac52ca4 80 { SEG_RAID1, "raid1"},
4047e4df 81 { SEG_RAID10, "raid10"},
cac52ca4 82 { SEG_RAID4, "raid4"},
60ddd05f 83 { SEG_RAID5_N, "raid5_n"},
cac52ca4
JEB
84 { SEG_RAID5_LA, "raid5_la"},
85 { SEG_RAID5_RA, "raid5_ra"},
86 { SEG_RAID5_LS, "raid5_ls"},
87 { SEG_RAID5_RS, "raid5_rs"},
3673ce48 88 { SEG_RAID6_N_6,"raid6_n_6"},
cac52ca4
JEB
89 { SEG_RAID6_ZR, "raid6_zr"},
90 { SEG_RAID6_NR, "raid6_nr"},
91 { SEG_RAID6_NC, "raid6_nc"},
a4bbaa3b
HM
92 { SEG_RAID6_LS_6, "raid6_ls_6"},
93 { SEG_RAID6_RS_6, "raid6_rs_6"},
94 { SEG_RAID6_LA_6, "raid6_la_6"},
95 { SEG_RAID6_RA_6, "raid6_ra_6"},
96
ee05be08
ZK
97
98 /*
4e60e624 99 * WARNING: Since 'raid' target overloads this 1:1 mapping table
ee05be08
ZK
100 * for search do not add new enum elements past them!
101 */
cac52ca4
JEB
102 { SEG_RAID5_LS, "raid5"}, /* same as "raid5_ls" (default for MD also) */
103 { SEG_RAID6_ZR, "raid6"}, /* same as "raid6_zr" */
01b5820d 104 { SEG_RAID10, "raid10_near"}, /* same as "raid10" */
165e4a11
AK
105};
106
107/* Some segment types have a list of areas of other devices attached */
108struct seg_area {
2c44337b 109 struct dm_list list;
165e4a11 110
b4f1578f 111 struct dm_tree_node *dev_node;
165e4a11
AK
112
113 uint64_t offset;
b262f3e1
ZK
114};
115
2e732e96
ZK
116struct dm_thin_message {
117 dm_thin_message_t type;
118 union {
119 struct {
120 uint32_t device_id;
121 uint32_t origin_id;
122 } m_create_snap;
123 struct {
124 uint32_t device_id;
125 } m_create_thin;
126 struct {
127 uint32_t device_id;
128 } m_delete;
129 struct {
130 uint64_t current_id;
131 uint64_t new_id;
132 } m_set_transaction_id;
2e732e96
ZK
133 } u;
134};
135
25e6ab87
ZK
136struct thin_message {
137 struct dm_list list;
138 struct dm_thin_message message;
660a42bc 139 int expected_errno;
25e6ab87
ZK
140};
141
165e4a11
AK
142/* Per-segment properties */
143struct load_segment {
2c44337b 144 struct dm_list list;
165e4a11
AK
145
146 unsigned type;
147
148 uint64_t size;
149
e447d7ca
ZK
150 unsigned area_count; /* Linear + Striped + Mirrored + Crypt */
151 struct dm_list areas; /* Linear + Striped + Mirrored + Crypt */
165e4a11 152
cac52ca4 153 uint32_t stripe_size; /* Striped + raid */
165e4a11
AK
154
155 int persistent; /* Snapshot */
79e9bde0 156 uint32_t chunk_size; /* Snapshot */
b4f1578f 157 struct dm_tree_node *cow; /* Snapshot */
1ff7e214 158 struct dm_tree_node *origin; /* Snapshot + Snapshot origin + Cache */
aa6f4e51 159 struct dm_tree_node *merge; /* Snapshot */
165e4a11 160
e447d7ca 161 struct dm_tree_node *log; /* Mirror */
cac52ca4 162 uint32_t region_size; /* Mirror + raid */
165e4a11
AK
163 unsigned clustered; /* Mirror */
164 unsigned mirror_area_count; /* Mirror */
93a63309 165 uint64_t flags; /* Mirror + Raid + Cache */
67b25ed4 166 char *uuid; /* Clustered mirror log */
12ca060e 167
e2ea3cd7 168 const char *policy_name; /* Cache */
c651c614 169 unsigned policy_argc; /* Cache */
20b22cd0 170 struct dm_config_node *policy_settings; /* Cache */
1ff7e214 171
12ca060e
MB
172 const char *cipher; /* Crypt */
173 const char *chainmode; /* Crypt */
174 const char *iv; /* Crypt */
175 uint64_t iv_offset; /* Crypt */
176 const char *key; /* Crypt */
b262f3e1 177
e2354ea3
HM
178 int delta_disks; /* raid reshape number of disks */
179 int data_offset; /* raid reshape data offset on disk to set */
180 uint64_t rebuilds[RAID_BITMAP_SIZE]; /* raid */
181 uint64_t writemostly[RAID_BITMAP_SIZE]; /* raid */
562c678e
JB
182 uint32_t writebehind; /* raid */
183 uint32_t max_recovery_rate; /* raid kB/sec/disk */
184 uint32_t min_recovery_rate; /* raid kB/sec/disk */
e2354ea3 185 uint32_t data_copies; /* raid10 data_copies */
4251236e 186
1ff7e214 187 struct dm_tree_node *metadata; /* Thin_pool + Cache */
4251236e 188 struct dm_tree_node *pool; /* Thin_pool, Thin */
dcd4afc7 189 struct dm_tree_node *external; /* Thin */
25e6ab87 190 struct dm_list thin_messages; /* Thin_pool */
bbcd37e4 191 uint64_t transaction_id; /* Thin_pool */
e9156c2b 192 uint64_t low_water_mark; /* Thin_pool */
79e9bde0 193 uint32_t data_block_size; /* Thin_pool + cache */
74ae1c5b 194 uint32_t migration_threshold; /* Cache */
460c5991 195 unsigned skip_block_zeroing; /* Thin_pool */
4dab0d31
AK
196 unsigned ignore_discard; /* Thin_pool target vsn 1.1 */
197 unsigned no_discard_passdown; /* Thin_pool target vsn 1.1 */
2908ab3e 198 unsigned error_if_no_space; /* Thin pool target vsn 1.10 */
69132f55 199 unsigned read_only; /* Thin pool target vsn 1.3 */
4251236e
ZK
200 uint32_t device_id; /* Thin */
201
165e4a11
AK
202};
203
204/* Per-device properties */
205struct load_properties {
206 int read_only;
207 uint32_t major;
208 uint32_t minor;
209
52b84409
AK
210 uint32_t read_ahead;
211 uint32_t read_ahead_flags;
212
165e4a11 213 unsigned segment_count;
9ef820a2 214 int size_changed;
2c44337b 215 struct dm_list segs;
165e4a11
AK
216
217 const char *new_name;
566515c0
PR
218
219 /* If immediate_dev_node is set to 1, try to create the dev node
220 * as soon as possible (e.g. in preload stage even during traversal
221 * and processing of dm tree). This will also flush all stacked dev
222 * node operations, synchronizing with udev.
223 */
df390f17
AK
224 unsigned immediate_dev_node;
225
226 /*
227 * If the device size changed from zero and this is set,
228 * don't resume the device immediately, even if the device
229 * has parents. This works provided the parents do not
230 * validate the device size and is required by pvmove to
231 * avoid starting the mirror resync operation too early.
232 */
233 unsigned delay_resume_if_new;
bbcd37e4 234
82ae02bc
ZK
235 /*
236 * Preload tree normally only loads and not resume, but there is
237 * automatic resume when target is extended, as it's believed
238 * there can be no i/o flying to this 'new' extedend space
239 * from any device above. Reason is that preloaded target above
240 * may actually need to see its bigger subdevice before it
241 * gets suspended. As long as devices are simple linears
242 * there is no problem to resume bigger device in preload (before commit).
243 * However complex targets like thin-pool (raid,cache...)
244 * they shall not be resumed before their commit.
245 */
246 unsigned delay_resume_if_extended;
247
a900d150
ZK
248 /*
249 * Call node_send_messages(), set to 2 if there are messages
250 * When != 0, it validates matching transaction id, thus thin-pools
251 * where transation_id is passed as 0 are never validated, this
252 * allows external managment of thin-pool TID.
253 */
bbcd37e4 254 unsigned send_messages;
5bef18f2
ZK
255 /* Skip suspending node's children, used when sending messages to thin-pool */
256 int skip_suspend;
9b78f7ee
ZK
257
258 /* Suspend and Resume siblings after node activation with udev flags*/
259 unsigned reactivate_siblings;
260 uint16_t reactivate_udev_flags;
165e4a11
AK
261};
262
263/* Two of these used to join two nodes with uses and used_by. */
b4f1578f 264struct dm_tree_link {
2c44337b 265 struct dm_list list;
b4f1578f 266 struct dm_tree_node *node;
165e4a11
AK
267};
268
b4f1578f
AK
269struct dm_tree_node {
270 struct dm_tree *dtree;
3d0480ed 271
40e5fd8b
AK
272 const char *name;
273 const char *uuid;
274 struct dm_info info;
3d0480ed 275
40e5fd8b
AK
276 struct dm_list uses; /* Nodes this node uses */
277 struct dm_list used_by; /* Nodes that use this node */
165e4a11 278
56c28292 279 int activation_priority; /* 0 gets activated first */
bd2500e6 280 int implicit_deps; /* 1 device only implicitly referenced */
56c28292 281
f16aea9e
PR
282 uint16_t udev_flags; /* Udev control flags */
283
165e4a11
AK
284 void *context; /* External supplied context */
285
286 struct load_properties props; /* For creation/table (re)load */
76d1aec8
ZK
287
288 /*
289 * If presuspend of child node is needed
290 * Note: only direct child is allowed
291 */
292 struct dm_tree_node *presuspend_node;
7e35dfff
ZK
293
294 /* Callback */
295 dm_node_callback_fn callback;
296 void *callback_data;
0638d1d8 297
a8ee82ed 298 int activated; /* tracks activation during preload */
3d0480ed
AK
299};
300
b4f1578f 301struct dm_tree {
a3f6b2ce
AK
302 struct dm_pool *mem;
303 struct dm_hash_table *devs;
165e4a11 304 struct dm_hash_table *uuids;
b4f1578f 305 struct dm_tree_node root;
c55b1410 306 int skip_lockfs; /* 1 skips lockfs (for non-snapshots) */
787200ef
PR
307 int no_flush; /* 1 sets noflush (mirrors/multipath) */
308 int retry_remove; /* 1 retries remove if not successful */
bd90c6b2 309 uint32_t cookie;
04ae5007 310 char buf[DM_NAME_LEN + 32]; /* print buffer for device_name (major:minor) */
9b2f9d64 311 const char * const *optional_uuid_suffixes; /* uuid suffixes ignored when matching */
3d0480ed
AK
312};
313
5c9eae96
AK
314/*
315 * Tree functions.
316 */
b4f1578f 317struct dm_tree *dm_tree_create(void)
3d0480ed 318{
0395dd22 319 struct dm_pool *dmem;
b4f1578f 320 struct dm_tree *dtree;
3d0480ed 321
0395dd22
ZK
322 if (!(dmem = dm_pool_create("dtree", 1024)) ||
323 !(dtree = dm_pool_zalloc(dmem, sizeof(*dtree)))) {
324 log_error("Failed to allocate dtree.");
325 if (dmem)
326 dm_pool_destroy(dmem);
3d0480ed
AK
327 return NULL;
328 }
329
b4f1578f 330 dtree->root.dtree = dtree;
2c44337b
AK
331 dm_list_init(&dtree->root.uses);
332 dm_list_init(&dtree->root.used_by);
c55b1410 333 dtree->skip_lockfs = 0;
b9ffd32c 334 dtree->no_flush = 0;
0395dd22 335 dtree->mem = dmem;
7cff640d 336 dtree->optional_uuid_suffixes = NULL;
3d0480ed 337
b4f1578f
AK
338 if (!(dtree->devs = dm_hash_create(8))) {
339 log_error("dtree hash creation failed");
340 dm_pool_destroy(dtree->mem);
3d0480ed
AK
341 return NULL;
342 }
343
b4f1578f
AK
344 if (!(dtree->uuids = dm_hash_create(32))) {
345 log_error("dtree uuid hash creation failed");
346 dm_hash_destroy(dtree->devs);
347 dm_pool_destroy(dtree->mem);
165e4a11
AK
348 return NULL;
349 }
350
b4f1578f 351 return dtree;
3d0480ed
AK
352}
353
b4f1578f 354void dm_tree_free(struct dm_tree *dtree)
3d0480ed 355{
b4f1578f 356 if (!dtree)
3d0480ed
AK
357 return;
358
b4f1578f
AK
359 dm_hash_destroy(dtree->uuids);
360 dm_hash_destroy(dtree->devs);
361 dm_pool_destroy(dtree->mem);
3d0480ed
AK
362}
363
5c9eae96
AK
364void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie)
365{
366 node->dtree->cookie = cookie;
367}
368
369uint32_t dm_tree_get_cookie(struct dm_tree_node *node)
370{
371 return node->dtree->cookie;
372}
373
374void dm_tree_skip_lockfs(struct dm_tree_node *dnode)
375{
376 dnode->dtree->skip_lockfs = 1;
377}
378
379void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode)
380{
381 dnode->dtree->no_flush = 1;
382}
383
384void dm_tree_retry_remove(struct dm_tree_node *dnode)
385{
386 dnode->dtree->retry_remove = 1;
387}
388
389/*
390 * Node functions.
391 */
04bde319
ZK
392static int _nodes_are_linked(const struct dm_tree_node *parent,
393 const struct dm_tree_node *child)
3d0480ed 394{
b4f1578f 395 struct dm_tree_link *dlink;
3d0480ed 396
2c44337b 397 dm_list_iterate_items(dlink, &parent->uses)
3d0480ed
AK
398 if (dlink->node == child)
399 return 1;
3d0480ed
AK
400
401 return 0;
402}
403
2c44337b 404static int _link(struct dm_list *list, struct dm_tree_node *node)
3d0480ed 405{
b4f1578f 406 struct dm_tree_link *dlink;
3d0480ed 407
b4f1578f
AK
408 if (!(dlink = dm_pool_alloc(node->dtree->mem, sizeof(*dlink)))) {
409 log_error("dtree link allocation failed");
3d0480ed
AK
410 return 0;
411 }
412
413 dlink->node = node;
2c44337b 414 dm_list_add(list, &dlink->list);
3d0480ed
AK
415
416 return 1;
417}
418
b4f1578f
AK
419static int _link_nodes(struct dm_tree_node *parent,
420 struct dm_tree_node *child)
3d0480ed
AK
421{
422 if (_nodes_are_linked(parent, child))
423 return 1;
424
425 if (!_link(&parent->uses, child))
426 return 0;
427
428 if (!_link(&child->used_by, parent))
429 return 0;
430
431 return 1;
432}
433
2c44337b 434static void _unlink(struct dm_list *list, struct dm_tree_node *node)
3d0480ed 435{
b4f1578f 436 struct dm_tree_link *dlink;
3d0480ed 437
2c44337b 438 dm_list_iterate_items(dlink, list)
3d0480ed 439 if (dlink->node == node) {
2c44337b 440 dm_list_del(&dlink->list);
3d0480ed
AK
441 break;
442 }
3d0480ed
AK
443}
444
b4f1578f
AK
445static void _unlink_nodes(struct dm_tree_node *parent,
446 struct dm_tree_node *child)
3d0480ed
AK
447{
448 if (!_nodes_are_linked(parent, child))
449 return;
450
451 _unlink(&parent->uses, child);
452 _unlink(&child->used_by, parent);
453}
454
b4f1578f 455static int _add_to_toplevel(struct dm_tree_node *node)
165e4a11 456{
b4f1578f 457 return _link_nodes(&node->dtree->root, node);
165e4a11
AK
458}
459
b4f1578f 460static void _remove_from_toplevel(struct dm_tree_node *node)
3d0480ed 461{
b1ebf028 462 _unlink_nodes(&node->dtree->root, node);
3d0480ed
AK
463}
464
b4f1578f 465static int _add_to_bottomlevel(struct dm_tree_node *node)
3d0480ed 466{
b4f1578f 467 return _link_nodes(node, &node->dtree->root);
3d0480ed
AK
468}
469
b4f1578f 470static void _remove_from_bottomlevel(struct dm_tree_node *node)
165e4a11 471{
b1ebf028 472 _unlink_nodes(node, &node->dtree->root);
165e4a11
AK
473}
474
b4f1578f 475static int _link_tree_nodes(struct dm_tree_node *parent, struct dm_tree_node *child)
165e4a11
AK
476{
477 /* Don't link to root node if child already has a parent */
f77736ca 478 if (parent == &parent->dtree->root) {
b4f1578f 479 if (dm_tree_node_num_children(child, 1))
165e4a11
AK
480 return 1;
481 } else
482 _remove_from_toplevel(child);
483
f77736ca 484 if (child == &child->dtree->root) {
b4f1578f 485 if (dm_tree_node_num_children(parent, 0))
165e4a11
AK
486 return 1;
487 } else
488 _remove_from_bottomlevel(parent);
489
490 return _link_nodes(parent, child);
491}
492
b4f1578f 493static struct dm_tree_node *_create_dm_tree_node(struct dm_tree *dtree,
3d0480ed
AK
494 const char *name,
495 const char *uuid,
165e4a11 496 struct dm_info *info,
f16aea9e
PR
497 void *context,
498 uint16_t udev_flags)
3d0480ed 499{
b4f1578f 500 struct dm_tree_node *node;
6fc4c99b 501 dev_t dev;
3d0480ed 502
0e177cc7
ZK
503 if (!(node = dm_pool_zalloc(dtree->mem, sizeof(*node))) ||
504 !(node->name = dm_pool_strdup(dtree->mem, name)) ||
505 !(node->uuid = dm_pool_strdup(dtree->mem, uuid))) {
506 log_error("_create_dm_tree_node alloc failed.");
3d0480ed
AK
507 return NULL;
508 }
509
b4f1578f 510 node->dtree = dtree;
3d0480ed 511 node->info = *info;
165e4a11 512 node->context = context;
f16aea9e 513 node->udev_flags = udev_flags;
3d0480ed 514
2c44337b
AK
515 dm_list_init(&node->uses);
516 dm_list_init(&node->used_by);
517 dm_list_init(&node->props.segs);
3d0480ed 518
aa8b2d6a 519 dev = MKDEV(info->major, info->minor);
3d0480ed 520
b4f1578f 521 if (!dm_hash_insert_binary(dtree->devs, (const char *) &dev,
406b566c 522 sizeof(dev), node)) {
b4f1578f
AK
523 log_error("dtree node hash insertion failed");
524 dm_pool_free(dtree->mem, node);
3d0480ed
AK
525 return NULL;
526 }
527
406b566c 528 if (*uuid && !dm_hash_insert(dtree->uuids, uuid, node)) {
b4f1578f
AK
529 log_error("dtree uuid hash insertion failed");
530 dm_hash_remove_binary(dtree->devs, (const char *) &dev,
165e4a11 531 sizeof(dev));
b4f1578f 532 dm_pool_free(dtree->mem, node);
165e4a11
AK
533 return NULL;
534 }
535
3d0480ed
AK
536 return node;
537}
538
b4f1578f 539static struct dm_tree_node *_find_dm_tree_node(struct dm_tree *dtree,
3d0480ed
AK
540 uint32_t major, uint32_t minor)
541{
aa8b2d6a 542 dev_t dev = MKDEV(major, minor);
3d0480ed 543
b4f1578f 544 return dm_hash_lookup_binary(dtree->devs, (const char *) &dev,
a5087866 545 sizeof(dev));
3d0480ed
AK
546}
547
7cff640d
AK
548void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes)
549{
550 dtree->optional_uuid_suffixes = optional_uuid_suffixes;
551}
552
68fdae11 553static const char *_node_name(struct dm_tree_node *dnode);
b4f1578f 554static struct dm_tree_node *_find_dm_tree_node_by_uuid(struct dm_tree *dtree,
165e4a11
AK
555 const char *uuid)
556{
87f98002 557 struct dm_tree_node *node;
2e5ff5d1
AK
558 const char *default_uuid_prefix;
559 size_t default_uuid_prefix_len;
7cff640d 560 const char *suffix, *suffix_position;
d4905724 561 char uuid_without_suffix[DM_UUID_LEN + 1];
7cff640d 562 unsigned i = 0;
9b2f9d64 563 const char * const *suffix_list = dtree->optional_uuid_suffixes;
87f98002 564
7cff640d 565 if ((node = dm_hash_lookup(dtree->uuids, uuid))) {
68fdae11 566 log_debug_activation("Matched uuid %s %s in deptree.", uuid, _node_name(node));
87f98002 567 return node;
7cff640d 568 }
87f98002 569
9971459d 570 if (suffix_list && (suffix_position = strrchr(uuid, '-'))) {
7cff640d
AK
571 while ((suffix = suffix_list[i++])) {
572 if (strcmp(suffix_position + 1, suffix))
573 continue;
574
d4905724 575 dm_strncpy(uuid_without_suffix, uuid, sizeof(uuid_without_suffix));
7cff640d
AK
576 uuid_without_suffix[suffix_position - uuid] = '\0';
577
578 if ((node = dm_hash_lookup(dtree->uuids, uuid_without_suffix))) {
68fdae11
ZK
579 log_debug_activation("Matched uuid %s %s (missing suffix -%s) in deptree.",
580 uuid_without_suffix, _node_name(node), suffix);
7cff640d
AK
581 return node;
582 }
583
584 break;
585 };
586 }
587
68fdae11
ZK
588 default_uuid_prefix = dm_uuid_prefix();
589 default_uuid_prefix_len = strlen(default_uuid_prefix);
87f98002 590
68fdae11
ZK
591 if ((strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len) == 0) &&
592 (node = dm_hash_lookup(dtree->uuids, uuid + default_uuid_prefix_len))) {
593 log_debug_activation("Matched uuid %s %s (missing prefix) in deptree.",
594 uuid + default_uuid_prefix_len, _node_name(node));
7cff640d
AK
595 return node;
596 }
597
68fdae11 598 log_debug_activation("Not matched uuid %s in deptree.", uuid);
7cff640d 599 return NULL;
165e4a11
AK
600}
601
04ae5007
ZK
602/* Return node's device_name (major:minor) for debug messages */
603static const char *_node_name(struct dm_tree_node *dnode)
604{
605 if (dm_snprintf(dnode->dtree->buf, sizeof(dnode->dtree->buf),
2a01e3d4
ZK
606 "%s (" FMTu32 ":" FMTu32 ")",
607 dnode->name ? dnode->name : "",
608 dnode->info.major, dnode->info.minor) < 0) {
04ae5007
ZK
609 stack;
610 return dnode->name;
611 }
612
613 return dnode->dtree->buf;
614}
615
5c9eae96
AK
616void dm_tree_node_set_udev_flags(struct dm_tree_node *dnode, uint16_t udev_flags)
617
618{
5c9eae96 619 if (udev_flags != dnode->udev_flags)
2a01e3d4
ZK
620 log_debug_activation("Resetting %s udev_flags from 0x%x to 0x%x.",
621 _node_name(dnode),
06abb2dd 622 dnode->udev_flags, udev_flags);
5c9eae96
AK
623 dnode->udev_flags = udev_flags;
624}
625
626void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode,
627 uint32_t read_ahead,
628 uint32_t read_ahead_flags)
629{
630 dnode->props.read_ahead = read_ahead;
631 dnode->props.read_ahead_flags = read_ahead_flags;
632}
633
634void dm_tree_node_set_presuspend_node(struct dm_tree_node *node,
635 struct dm_tree_node *presuspend_node)
636{
637 node->presuspend_node = presuspend_node;
638}
639
640const char *dm_tree_node_get_name(const struct dm_tree_node *node)
641{
642 return node->info.exists ? node->name : "";
643}
644
645const char *dm_tree_node_get_uuid(const struct dm_tree_node *node)
646{
647 return node->info.exists ? node->uuid : "";
648}
649
650const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node)
651{
652 return &node->info;
653}
654
655void *dm_tree_node_get_context(const struct dm_tree_node *node)
656{
657 return node->context;
658}
659
660int dm_tree_node_size_changed(const struct dm_tree_node *dnode)
661{
662 return dnode->props.size_changed;
663}
664
665int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted)
666{
667 if (inverted) {
668 if (_nodes_are_linked(&node->dtree->root, node))
669 return 0;
670 return dm_list_size(&node->used_by);
671 }
672
673 if (_nodes_are_linked(node, &node->dtree->root))
674 return 0;
675
676 return dm_list_size(&node->uses);
677}
678
679/*
680 * Returns 1 if no prefix supplied
681 */
682static int _uuid_prefix_matches(const char *uuid, const char *uuid_prefix, size_t uuid_prefix_len)
683{
684 const char *default_uuid_prefix = dm_uuid_prefix();
685 size_t default_uuid_prefix_len = strlen(default_uuid_prefix);
686
687 if (!uuid_prefix)
688 return 1;
689
690 if (!strncmp(uuid, uuid_prefix, uuid_prefix_len))
691 return 1;
692
693 /* Handle transition: active device uuids might be missing the prefix */
694 if (uuid_prefix_len <= 4)
695 return 0;
696
697 if (!strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
698 return 0;
699
700 if (strncmp(uuid_prefix, default_uuid_prefix, default_uuid_prefix_len))
701 return 0;
702
703 if (!strncmp(uuid, uuid_prefix + default_uuid_prefix_len, uuid_prefix_len - default_uuid_prefix_len))
704 return 1;
705
706 return 0;
707}
708
709/*
710 * Returns 1 if no children.
711 */
712static int _children_suspended(struct dm_tree_node *node,
713 uint32_t inverted,
714 const char *uuid_prefix,
715 size_t uuid_prefix_len)
716{
717 struct dm_list *list;
718 struct dm_tree_link *dlink;
719 const struct dm_info *dinfo;
720 const char *uuid;
721
722 if (inverted) {
723 if (_nodes_are_linked(&node->dtree->root, node))
724 return 1;
725 list = &node->used_by;
726 } else {
727 if (_nodes_are_linked(node, &node->dtree->root))
728 return 1;
729 list = &node->uses;
730 }
731
732 dm_list_iterate_items(dlink, list) {
733 if (!(uuid = dm_tree_node_get_uuid(dlink->node))) {
734 stack;
735 continue;
736 }
737
738 /* Ignore if it doesn't belong to this VG */
739 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
740 continue;
741
742 /* Ignore if parent node wants to presuspend this node */
743 if (dlink->node->presuspend_node == node)
744 continue;
745
47419d21
ZK
746 if (!(dinfo = dm_tree_node_get_info(dlink->node)))
747 return_0; /* FIXME Is this normal? */
5c9eae96
AK
748
749 if (!dinfo->suspended)
750 return 0;
751 }
752
753 return 1;
754}
755
756/*
757 * Set major and minor to zero for root of tree.
758 */
759struct dm_tree_node *dm_tree_find_node(struct dm_tree *dtree,
a5087866
ZK
760 uint32_t major,
761 uint32_t minor)
5c9eae96
AK
762{
763 if (!major && !minor)
764 return &dtree->root;
765
766 return _find_dm_tree_node(dtree, major, minor);
767}
768
769/*
770 * Set uuid to NULL for root of tree.
771 */
772struct dm_tree_node *dm_tree_find_node_by_uuid(struct dm_tree *dtree,
a5087866 773 const char *uuid)
5c9eae96
AK
774{
775 if (!uuid || !*uuid)
776 return &dtree->root;
777
778 return _find_dm_tree_node_by_uuid(dtree, uuid);
779}
780
781/*
782 * First time set *handle to NULL.
783 * Set inverted to invert the tree.
784 */
785struct dm_tree_node *dm_tree_next_child(void **handle,
786 const struct dm_tree_node *parent,
787 uint32_t inverted)
788{
789 struct dm_list **dlink = (struct dm_list **) handle;
790 const struct dm_list *use_list;
791
792 if (inverted)
793 use_list = &parent->used_by;
794 else
795 use_list = &parent->uses;
796
797 if (!*dlink)
798 *dlink = dm_list_first(use_list);
799 else
800 *dlink = dm_list_next(use_list, *dlink);
801
802 return (*dlink) ? dm_list_item(*dlink, struct dm_tree_link)->node : NULL;
803}
804
a3f6b2ce 805static int _deps(struct dm_task **dmt, struct dm_pool *mem, uint32_t major, uint32_t minor,
2e5ff5d1 806 const char **name, const char **uuid, unsigned inactive_table,
3d0480ed
AK
807 struct dm_info *info, struct dm_deps **deps)
808{
809 memset(info, 0, sizeof(*info));
7379a262
ZK
810 *name = "";
811 *uuid = "";
812 *deps = NULL;
3d0480ed
AK
813
814 if (!dm_is_dm_major(major)) {
3d0480ed
AK
815 info->major = major;
816 info->minor = minor;
3d0480ed
AK
817 return 1;
818 }
819
925fec6e
ZK
820 if (!(*dmt = dm_task_create(DM_DEVICE_DEPS)))
821 return_0;
3d0480ed 822
7379a262
ZK
823 if (!dm_task_set_major(*dmt, major) || !dm_task_set_minor(*dmt, minor)) {
824 log_error("_deps: failed to set major:minor for (" FMTu32 ":" FMTu32 ").",
b4f1578f 825 major, minor);
3d0480ed 826 goto failed;
b4f1578f 827 }
3d0480ed 828
2e5ff5d1
AK
829 if (inactive_table && !dm_task_query_inactive_table(*dmt)) {
830 log_error("_deps: failed to set inactive table for (%" PRIu32 ":%" PRIu32 ")",
831 major, minor);
832 goto failed;
833 }
834
b4f1578f
AK
835 if (!dm_task_run(*dmt)) {
836 log_error("_deps: task run failed for (%" PRIu32 ":%" PRIu32 ")",
837 major, minor);
3d0480ed 838 goto failed;
b4f1578f 839 }
3d0480ed 840
b4f1578f
AK
841 if (!dm_task_get_info(*dmt, info)) {
842 log_error("_deps: failed to get info for (%" PRIu32 ":%" PRIu32 ")",
843 major, minor);
3d0480ed 844 goto failed;
b4f1578f 845 }
3d0480ed 846
7379a262 847 if (info->exists) {
3d0480ed 848 if (info->major != major) {
b4f1578f 849 log_error("Inconsistent dtree major number: %u != %u",
3d0480ed
AK
850 major, info->major);
851 goto failed;
852 }
853 if (info->minor != minor) {
b4f1578f 854 log_error("Inconsistent dtree minor number: %u != %u",
3d0480ed
AK
855 minor, info->minor);
856 goto failed;
857 }
0e177cc7
ZK
858 *name = dm_task_get_name(*dmt);
859 *uuid = dm_task_get_uuid(*dmt);
3d0480ed
AK
860 *deps = dm_task_get_deps(*dmt);
861 }
862
863 return 1;
864
865failed:
866 dm_task_destroy(*dmt);
7379a262
ZK
867 *dmt = NULL;
868
3d0480ed
AK
869 return 0;
870}
871
5c9eae96
AK
872/*
873 * Deactivate a device with its dependencies if the uuid prefix matches.
874 */
875static int _info_by_dev(uint32_t major, uint32_t minor, int with_open_count,
876 struct dm_info *info, struct dm_pool *mem,
877 const char **name, const char **uuid)
3d0480ed 878{
5c9eae96 879 struct dm_task *dmt;
e3366787 880 int r = 0;
3d0480ed 881
925fec6e
ZK
882 if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
883 return_0;
3d0480ed 884
5c9eae96 885 if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
e3366787
ZK
886 log_error("_info_by_dev: Failed to set device number.");
887 goto out;
5c9eae96
AK
888 }
889
890 if (!with_open_count && !dm_task_no_open_count(dmt))
2593777f 891 log_warn("WARNING: Failed to disable open_count.");
5c9eae96 892
e3366787 893 if (!dm_task_run(dmt))
5c9eae96
AK
894 goto_out;
895
e3366787 896 if (!dm_task_get_info(dmt, info))
5c9eae96
AK
897 goto_out;
898
899 if (name && !(*name = dm_pool_strdup(mem, dm_task_get_name(dmt)))) {
900 log_error("name pool_strdup failed");
3ba3bc0d 901 goto out;
165e4a11 902 }
3d0480ed 903
5c9eae96
AK
904 if (uuid && !(*uuid = dm_pool_strdup(mem, dm_task_get_uuid(dmt)))) {
905 log_error("uuid pool_strdup failed");
3ba3bc0d 906 goto out;
5c9eae96 907 }
3d0480ed 908
e3366787 909 r = 1;
5c9eae96
AK
910out:
911 dm_task_destroy(dmt);
912
913 return r;
914}
915
916static int _check_device_not_in_use(const char *name, struct dm_info *info)
917{
1f6d79ab
ZK
918 const char *reason;
919
5c9eae96
AK
920 if (!info->exists)
921 return 1;
922
923 /* If sysfs is not used, use open_count information only. */
924 if (!*dm_sysfs_dir()) {
1f6d79ab
ZK
925 if (!info->open_count)
926 return 1;
927 reason = "in use";
928 } else if (dm_device_has_holders(info->major, info->minor))
929 reason = "is used by another device";
930 else if (dm_device_has_mounted_fs(info->major, info->minor))
931 reason = "constains a filesystem in use";
932 else
5c9eae96 933 return 1;
5c9eae96 934
1f6d79ab
ZK
935 log_error("Device %s (" FMTu32 ":" FMTu32 ") %s.",
936 name, info->major, info->minor, reason);
937 return 0;
5c9eae96
AK
938}
939
940/* Check if all parent nodes of given node have open_count == 0 */
941static int _node_has_closed_parents(struct dm_tree_node *node,
942 const char *uuid_prefix,
943 size_t uuid_prefix_len)
944{
945 struct dm_tree_link *dlink;
946 const struct dm_info *dinfo;
947 struct dm_info info;
948 const char *uuid;
949
950 /* Iterate through parents of this node */
951 dm_list_iterate_items(dlink, &node->used_by) {
952 if (!(uuid = dm_tree_node_get_uuid(dlink->node))) {
b4f1578f 953 stack;
5c9eae96 954 continue;
b4f1578f 955 }
5c9eae96
AK
956
957 /* Ignore if it doesn't belong to this VG */
958 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
959 continue;
960
47419d21
ZK
961 if (!(dinfo = dm_tree_node_get_info(dlink->node)))
962 return_0; /* FIXME Is this normal? */
5c9eae96
AK
963
964 /* Refresh open_count */
63368a50
ZK
965 if (!_info_by_dev(dinfo->major, dinfo->minor, 1, &info, NULL, NULL, NULL))
966 return_0;
967
968 if (!info.exists)
5c9eae96
AK
969 continue;
970
971 if (info.open_count) {
06abb2dd
AK
972 log_debug_activation("Node %s %d:%d has open_count %d", uuid_prefix,
973 dinfo->major, dinfo->minor, info.open_count);
5c9eae96
AK
974 return 0;
975 }
976 }
977
978 return 1;
979}
980
981static int _deactivate_node(const char *name, uint32_t major, uint32_t minor,
982 uint32_t *cookie, uint16_t udev_flags, int retry)
983{
984 struct dm_task *dmt;
985 int r = 0;
986
987 log_verbose("Removing %s (%" PRIu32 ":%" PRIu32 ")", name, major, minor);
988
989 if (!(dmt = dm_task_create(DM_DEVICE_REMOVE))) {
990 log_error("Deactivation dm_task creation failed for %s", name);
991 return 0;
992 }
993
994 if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
995 log_error("Failed to set device number for %s deactivation", name);
165e4a11 996 goto out;
3d0480ed
AK
997 }
998
5c9eae96 999 if (!dm_task_no_open_count(dmt))
2593777f 1000 log_warn("WARNING: Failed to disable open_count.");
5c9eae96
AK
1001
1002 if (cookie)
1003 if (!dm_task_set_cookie(dmt, cookie, udev_flags))
1004 goto out;
1005
1006 if (retry)
1007 dm_task_retry_remove(dmt);
1008
1009 r = dm_task_run(dmt);
1010
1011 /* FIXME Until kernel returns actual name so dm-iface.c can handle it */
1012 rm_dev_node(name, dmt->cookie_set && !(udev_flags & DM_UDEV_DISABLE_DM_RULES_FLAG),
1013 dmt->cookie_set && (udev_flags & DM_UDEV_DISABLE_LIBRARY_FALLBACK));
1014
1015 /* FIXME Remove node from tree or mark invalid? */
3d0480ed 1016
3d0480ed 1017out:
5c9eae96 1018 dm_task_destroy(dmt);
3d0480ed 1019
5c9eae96 1020 return r;
165e4a11
AK
1021}
1022
2e5ff5d1 1023static int _node_clear_table(struct dm_tree_node *dnode, uint16_t udev_flags)
165e4a11 1024{
2e5ff5d1 1025 struct dm_task *dmt = NULL, *deps_dmt = NULL;
10f37345 1026 struct dm_info *info = &dnode->info, deps_info;
2e5ff5d1 1027 struct dm_deps *deps = NULL;
0e177cc7 1028 const char *name, *uuid, *depname, *depuuid;
2e5ff5d1
AK
1029 const char *default_uuid_prefix;
1030 size_t default_uuid_prefix_len;
1031 uint32_t i;
1032 int r = 0;
165e4a11 1033
b4f1578f
AK
1034 if (!(name = dm_tree_node_get_name(dnode))) {
1035 log_error("_node_clear_table failed: missing name");
165e4a11
AK
1036 return 0;
1037 }
1038
1039 /* Is there a table? */
1040 if (!info->exists || !info->inactive_table)
1041 return 1;
1042
2e5ff5d1 1043 /* Get devices used by inactive table that's about to be deleted. */
0e177cc7 1044 if (!_deps(&deps_dmt, dnode->dtree->mem, info->major, info->minor, &depname, &depuuid, 1, info, &deps)) {
2e5ff5d1
AK
1045 log_error("Failed to obtain dependencies for %s before clearing table.", name);
1046 return 0;
1047 }
10d0d9c7 1048
165e4a11
AK
1049 log_verbose("Clearing inactive table %s (%" PRIu32 ":%" PRIu32 ")",
1050 name, info->major, info->minor);
1051
1052 if (!(dmt = dm_task_create(DM_DEVICE_CLEAR))) {
165e4a11 1053 log_error("Table clear dm_task creation failed for %s", name);
3ba3bc0d 1054 goto out;
165e4a11
AK
1055 }
1056
1057 if (!dm_task_set_major(dmt, info->major) ||
1058 !dm_task_set_minor(dmt, info->minor)) {
1059 log_error("Failed to set device number for %s table clear", name);
3ba3bc0d 1060 goto out;
165e4a11
AK
1061 }
1062
1063 r = dm_task_run(dmt);
1064
1065 if (!dm_task_get_info(dmt, info)) {
b4f1578f 1066 log_error("_node_clear_table failed: info missing after running task for %s", name);
165e4a11
AK
1067 r = 0;
1068 }
1069
2e5ff5d1
AK
1070 if (!r || !deps)
1071 goto_out;
1072
1073 /*
1074 * Remove (incomplete) devices that the inactive table referred to but
1075 * which are not in the tree, no longer referenced and don't have a live
1076 * table.
1077 */
1078 default_uuid_prefix = dm_uuid_prefix();
1079 default_uuid_prefix_len = strlen(default_uuid_prefix);
1080
1081 for (i = 0; i < deps->count; i++) {
1082 /* If already in tree, assume it's under control */
1083 if (_find_dm_tree_node(dnode->dtree, MAJOR(deps->device[i]), MINOR(deps->device[i])))
5c9eae96 1084 continue;
db208f51 1085
5c9eae96
AK
1086 if (!_info_by_dev(MAJOR(deps->device[i]), MINOR(deps->device[i]), 1,
1087 &deps_info, dnode->dtree->mem, &name, &uuid))
63368a50 1088 goto_out;
2e5ff5d1 1089
5c9eae96
AK
1090 /* Proceed if device is an 'orphan' - unreferenced and without a live table. */
1091 if (!deps_info.exists || deps_info.live_table || deps_info.open_count)
1092 continue;
3e8c6b73 1093
5c9eae96
AK
1094 if (strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
1095 continue;
2e5ff5d1 1096
5c9eae96
AK
1097 /* Remove device. */
1098 if (!_deactivate_node(name, deps_info.major, deps_info.minor, &dnode->dtree->cookie, udev_flags, 0)) {
1099 log_error("Failed to deactivate no-longer-used device %s (%"
1100 PRIu32 ":%" PRIu32 ")", name, deps_info.major, deps_info.minor);
1101 } else if (deps_info.suspended)
1102 dec_suspended();
2e5ff5d1
AK
1103 }
1104
1105out:
5c9eae96
AK
1106 if (dmt)
1107 dm_task_destroy(dmt);
1108
1109 if (deps_dmt)
1110 dm_task_destroy(deps_dmt);
3e8c6b73
AK
1111
1112 return r;
1113}
1114
5c9eae96
AK
1115struct dm_tree_node *dm_tree_add_new_dev_with_udev_flags(struct dm_tree *dtree,
1116 const char *name,
1117 const char *uuid,
1118 uint32_t major,
1119 uint32_t minor,
1120 int read_only,
1121 int clear_inactive,
1122 void *context,
1123 uint16_t udev_flags)
125712be 1124{
5c9eae96 1125 struct dm_tree_node *dnode;
6f3cd635 1126 struct dm_info info = { 0 };
125712be 1127
3b5834d7
ZK
1128 if (!name || !uuid) {
1129 log_error("Cannot add device without name and uuid.");
1130 return NULL;
1131 }
1132
5c9eae96
AK
1133 /* Do we need to add node to tree? */
1134 if (!(dnode = dm_tree_find_node_by_uuid(dtree, uuid))) {
0e177cc7 1135 if (!(dnode = _create_dm_tree_node(dtree, name, uuid, &info,
5c9eae96
AK
1136 context, 0)))
1137 return_NULL;
125712be 1138
5c9eae96
AK
1139 /* Attach to root node until a table is supplied */
1140 if (!_add_to_toplevel(dnode) || !_add_to_bottomlevel(dnode))
1141 return_NULL;
f3ef15ef 1142
5c9eae96
AK
1143 dnode->props.major = major;
1144 dnode->props.minor = minor;
5c9eae96
AK
1145 } else if (strcmp(name, dnode->name)) {
1146 /* Do we need to rename node? */
1147 if (!(dnode->props.new_name = dm_pool_strdup(dtree->mem, name))) {
1148 log_error("name pool_strdup failed");
1149 return NULL;
f3ef15ef 1150 }
5c9eae96 1151 }
f3ef15ef 1152
5c9eae96
AK
1153 dnode->props.read_only = read_only ? 1 : 0;
1154 dnode->props.read_ahead = DM_READ_AHEAD_AUTO;
1155 dnode->props.read_ahead_flags = 0;
f3ef15ef 1156
5c9eae96
AK
1157 if (clear_inactive && !_node_clear_table(dnode, udev_flags))
1158 return_NULL;
f3ef15ef 1159
5c9eae96
AK
1160 dnode->context = context;
1161 dnode->udev_flags = udev_flags;
f3ef15ef 1162
5c9eae96
AK
1163 return dnode;
1164}
f3ef15ef 1165
5c9eae96
AK
1166struct dm_tree_node *dm_tree_add_new_dev(struct dm_tree *dtree, const char *name,
1167 const char *uuid, uint32_t major, uint32_t minor,
1168 int read_only, int clear_inactive, void *context)
1169{
1170 return dm_tree_add_new_dev_with_udev_flags(dtree, name, uuid, major, minor,
1171 read_only, clear_inactive, context, 0);
f3ef15ef
ZK
1172}
1173
5c9eae96
AK
1174static struct dm_tree_node *_add_dev(struct dm_tree *dtree,
1175 struct dm_tree_node *parent,
1176 uint32_t major, uint32_t minor,
bd2500e6
ZK
1177 uint16_t udev_flags,
1178 int implicit_deps)
3e8c6b73 1179{
5c9eae96
AK
1180 struct dm_task *dmt = NULL;
1181 struct dm_info info;
1182 struct dm_deps *deps = NULL;
1183 const char *name = NULL;
1184 const char *uuid = NULL;
1185 struct dm_tree_node *node = NULL;
1186 uint32_t i;
1187 int new = 0;
3e8c6b73 1188
5c9eae96
AK
1189 /* Already in tree? */
1190 if (!(node = _find_dm_tree_node(dtree, major, minor))) {
1191 if (!_deps(&dmt, dtree->mem, major, minor, &name, &uuid, 0, &info, &deps))
1192 return_NULL;
3e8c6b73 1193
5c9eae96
AK
1194 if (!(node = _create_dm_tree_node(dtree, name, uuid, &info,
1195 NULL, udev_flags)))
1196 goto_out;
1197 new = 1;
bd2500e6
ZK
1198 node->implicit_deps = implicit_deps;
1199 } else if (!implicit_deps && node->implicit_deps) {
1200 node->udev_flags = udev_flags;
1201 node->implicit_deps = 0;
3e8c6b73
AK
1202 }
1203
5c9eae96
AK
1204 if (!_link_tree_nodes(parent, node)) {
1205 node = NULL;
1206 goto_out;
3e8c6b73
AK
1207 }
1208
5c9eae96
AK
1209 /* If node was already in tree, no need to recurse. */
1210 if (!new)
1211 goto out;
787200ef 1212
5c9eae96 1213 /* Can't recurse if not a mapped device or there are no dependencies */
4d95ccc6 1214 if (!node->info.exists || !deps || !deps->count) {
5c9eae96
AK
1215 if (!_add_to_bottomlevel(node)) {
1216 stack;
1217 node = NULL;
1218 }
1219 goto out;
1220 }
787200ef 1221
5c9eae96
AK
1222 /* Add dependencies to tree */
1223 for (i = 0; i < deps->count; i++)
bd2500e6 1224 /* Implicit devices are by default temporary */
5c9eae96 1225 if (!_add_dev(dtree, node, MAJOR(deps->device[i]),
bd2500e6
ZK
1226 MINOR(deps->device[i]), udev_flags |
1227 DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG |
1228 DM_UDEV_DISABLE_DISK_RULES_FLAG |
1229 DM_UDEV_DISABLE_OTHER_RULES_FLAG, 1)) {
5c9eae96
AK
1230 node = NULL;
1231 goto_out;
1232 }
3e8c6b73 1233
5c9eae96
AK
1234out:
1235 if (dmt)
1236 dm_task_destroy(dmt);
165e4a11 1237
5c9eae96
AK
1238 return node;
1239}
db208f51 1240
5c9eae96
AK
1241int dm_tree_add_dev(struct dm_tree *dtree, uint32_t major, uint32_t minor)
1242{
bd2500e6 1243 return _add_dev(dtree, &dtree->root, major, minor, 0, 0) ? 1 : 0;
5c9eae96 1244}
db208f51 1245
5c9eae96
AK
1246int dm_tree_add_dev_with_udev_flags(struct dm_tree *dtree, uint32_t major,
1247 uint32_t minor, uint16_t udev_flags)
1248{
bd2500e6 1249 return _add_dev(dtree, &dtree->root, major, minor, udev_flags, 0) ? 1 : 0;
db208f51
AK
1250}
1251
bd90c6b2 1252static int _rename_node(const char *old_name, const char *new_name, uint32_t major,
f16aea9e 1253 uint32_t minor, uint32_t *cookie, uint16_t udev_flags)
165e4a11
AK
1254{
1255 struct dm_task *dmt;
1256 int r = 0;
1257
1258 log_verbose("Renaming %s (%" PRIu32 ":%" PRIu32 ") to %s", old_name, major, minor, new_name);
1259
1260 if (!(dmt = dm_task_create(DM_DEVICE_RENAME))) {
1261 log_error("Rename dm_task creation failed for %s", old_name);
1262 return 0;
1263 }
1264
1265 if (!dm_task_set_name(dmt, old_name)) {
1266 log_error("Failed to set name for %s rename.", old_name);
1267 goto out;
1268 }
1269
b4f1578f 1270 if (!dm_task_set_newname(dmt, new_name))
40e5fd8b 1271 goto_out;
165e4a11
AK
1272
1273 if (!dm_task_no_open_count(dmt))
2593777f 1274 log_warn("WARNING: Failed to disable open_count.");
165e4a11 1275
f16aea9e 1276 if (!dm_task_set_cookie(dmt, cookie, udev_flags))
bd90c6b2
AK
1277 goto out;
1278
165e4a11
AK
1279 r = dm_task_run(dmt);
1280
1281out:
1282 dm_task_destroy(dmt);
1283
1284 return r;
1285}
1286
165e4a11
AK
1287/* FIXME Merge with _suspend_node? */
1288static int _resume_node(const char *name, uint32_t major, uint32_t minor,
52b84409 1289 uint32_t read_ahead, uint32_t read_ahead_flags,
f16aea9e 1290 struct dm_info *newinfo, uint32_t *cookie,
1840aa09 1291 uint16_t udev_flags, int already_suspended)
165e4a11
AK
1292{
1293 struct dm_task *dmt;
bd90c6b2 1294 int r = 0;
165e4a11 1295
5abf6b7c 1296 log_verbose("Resuming %s (" FMTu32 ":" FMTu32 ").", name, major, minor);
165e4a11
AK
1297
1298 if (!(dmt = dm_task_create(DM_DEVICE_RESUME))) {
06abb2dd 1299 log_debug_activation("Suspend dm_task creation failed for %s.", name);
165e4a11
AK
1300 return 0;
1301 }
1302
0b7d16bc
AK
1303 /* FIXME Kernel should fill in name on return instead */
1304 if (!dm_task_set_name(dmt, name)) {
06abb2dd 1305 log_debug_activation("Failed to set device name for %s resumption.", name);
bd90c6b2 1306 goto out;
0b7d16bc
AK
1307 }
1308
165e4a11
AK
1309 if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
1310 log_error("Failed to set device number for %s resumption.", name);
bd90c6b2 1311 goto out;
165e4a11
AK
1312 }
1313
1314 if (!dm_task_no_open_count(dmt))
2593777f 1315 log_warn("WARNING: Failed to disable open_count.");
165e4a11 1316
52b84409 1317 if (!dm_task_set_read_ahead(dmt, read_ahead, read_ahead_flags))
2593777f 1318 log_warn("WARNING: Failed to set read ahead.");
52b84409 1319
f16aea9e 1320 if (!dm_task_set_cookie(dmt, cookie, udev_flags))
9a8f192a 1321 goto_out;
bd90c6b2 1322
9a8f192a
ZK
1323 if (!(r = dm_task_run(dmt)))
1324 goto_out;
1325
1326 if (already_suspended)
1327 dec_suspended();
1328
1329 if (!(r = dm_task_get_info(dmt, newinfo)))
1330 stack;
165e4a11 1331
bd90c6b2 1332out:
165e4a11
AK
1333 dm_task_destroy(dmt);
1334
1335 return r;
1336}
1337
db208f51 1338static int _suspend_node(const char *name, uint32_t major, uint32_t minor,
b9ffd32c 1339 int skip_lockfs, int no_flush, struct dm_info *newinfo)
db208f51
AK
1340{
1341 struct dm_task *dmt;
e3366787 1342 int r = 0;
db208f51 1343
b9ffd32c
AK
1344 log_verbose("Suspending %s (%" PRIu32 ":%" PRIu32 ")%s%s",
1345 name, major, minor,
1346 skip_lockfs ? "" : " with filesystem sync",
6e1898a5 1347 no_flush ? "" : " with device flush");
db208f51
AK
1348
1349 if (!(dmt = dm_task_create(DM_DEVICE_SUSPEND))) {
1350 log_error("Suspend dm_task creation failed for %s", name);
1351 return 0;
1352 }
1353
1354 if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
1355 log_error("Failed to set device number for %s suspension.", name);
e3366787 1356 goto out;
db208f51
AK
1357 }
1358
1359 if (!dm_task_no_open_count(dmt))
2593777f 1360 log_warn("WARNING: Failed to disable open_count.");
db208f51 1361
c55b1410 1362 if (skip_lockfs && !dm_task_skip_lockfs(dmt))
2593777f 1363 log_warn("WARNING: Failed to set skip_lockfs flag.");
c55b1410 1364
b9ffd32c 1365 if (no_flush && !dm_task_no_flush(dmt))
2593777f 1366 log_warn("WARNING: Failed to set no_flush flag.");
b9ffd32c 1367
1840aa09
AK
1368 if ((r = dm_task_run(dmt))) {
1369 inc_suspended();
db208f51 1370 r = dm_task_get_info(dmt, newinfo);
1840aa09 1371 }
e3366787 1372out:
3e8c6b73
AK
1373 dm_task_destroy(dmt);
1374
1375 return r;
1376}
1377
a3c7e326
ZK
1378static int _thin_pool_get_status(struct dm_tree_node *dnode,
1379 struct dm_status_thin_pool *s)
e0ea24be
ZK
1380{
1381 struct dm_task *dmt;
1382 int r = 0;
1383 uint64_t start, length;
1384 char *type = NULL;
1385 char *params = NULL;
e0ea24be 1386
25e6ab87
ZK
1387 if (!(dmt = dm_task_create(DM_DEVICE_STATUS)))
1388 return_0;
e0ea24be 1389
25e6ab87
ZK
1390 if (!dm_task_set_major(dmt, dnode->info.major) ||
1391 !dm_task_set_minor(dmt, dnode->info.minor)) {
1392 log_error("Failed to set major minor.");
1393 goto out;
e0ea24be
ZK
1394 }
1395
a4870c79 1396 if (!dm_task_no_flush(dmt))
2593777f 1397 log_warn("WARNING: Can't set no_flush flag."); /* Non fatal */
a4870c79 1398
25e6ab87
ZK
1399 if (!dm_task_run(dmt))
1400 goto_out;
1401
1402 dm_get_next_target(dmt, NULL, &start, &length, &type, &params);
1403
c3e224ad 1404 if (!type || (strcmp(type, "thin-pool") != 0)) {
2a01e3d4 1405 log_error("Expected thin-pool target for %s and got %s.",
c3e224ad 1406 _node_name(dnode), type ? : "no target");
e0ea24be
ZK
1407 goto out;
1408 }
1409
0173c260 1410 if (!parse_thin_pool_status(params, s))
a3c7e326 1411 goto_out;
e0ea24be 1412
21c0b113
ZK
1413 log_debug_activation("Found transaction id %" PRIu64 " for thin pool %s "
1414 "with status line: %s.",
a3c7e326 1415 s->transaction_id, _node_name(dnode), params);
e0ea24be 1416
25e6ab87
ZK
1417 r = 1;
1418out:
1419 dm_task_destroy(dmt);
e0ea24be 1420
25e6ab87
ZK
1421 return r;
1422}
e0ea24be 1423
4c1caa7e
ZK
1424static int _node_message(uint32_t major, uint32_t minor,
1425 int expected_errno, const char *message)
25e6ab87
ZK
1426{
1427 struct dm_task *dmt;
4c1caa7e
ZK
1428 int r = 0;
1429
1430 if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG)))
1431 return_0;
1432
1433 if (!dm_task_set_major(dmt, major) ||
1434 !dm_task_set_minor(dmt, minor)) {
1435 log_error("Failed to set message major minor.");
1436 goto out;
1437 }
1438
1439 if (!dm_task_set_message(dmt, message))
1440 goto_out;
1441
1442 /* Internal functionality of dm_task */
1443 dmt->expected_errno = expected_errno;
1444
1445 if (!dm_task_run(dmt)) {
1446 log_error("Failed to process message \"%s\".", message);
1447 goto out;
1448 }
1449
1450 r = 1;
1451out:
1452 dm_task_destroy(dmt);
1453
1454 return r;
1455}
1456
1457static int _thin_pool_node_message(struct dm_tree_node *dnode, struct thin_message *tm)
1458{
25e6ab87
ZK
1459 struct dm_thin_message *m = &tm->message;
1460 char buf[64];
1461 int r;
e0ea24be 1462
25e6ab87
ZK
1463 switch (m->type) {
1464 case DM_THIN_MESSAGE_CREATE_SNAP:
1465 r = dm_snprintf(buf, sizeof(buf), "create_snap %u %u",
1466 m->u.m_create_snap.device_id,
1467 m->u.m_create_snap.origin_id);
1468 break;
1469 case DM_THIN_MESSAGE_CREATE_THIN:
1470 r = dm_snprintf(buf, sizeof(buf), "create_thin %u",
1471 m->u.m_create_thin.device_id);
1472 break;
1473 case DM_THIN_MESSAGE_DELETE:
1474 r = dm_snprintf(buf, sizeof(buf), "delete %u",
1475 m->u.m_delete.device_id);
1476 break;
25e6ab87
ZK
1477 case DM_THIN_MESSAGE_SET_TRANSACTION_ID:
1478 r = dm_snprintf(buf, sizeof(buf),
1479 "set_transaction_id %" PRIu64 " %" PRIu64,
1480 m->u.m_set_transaction_id.current_id,
1481 m->u.m_set_transaction_id.new_id);
1482 break;
4dab0d31 1483 case DM_THIN_MESSAGE_RESERVE_METADATA_SNAP: /* target vsn 1.1 */
c4db22bd
ZK
1484 r = dm_snprintf(buf, sizeof(buf), "reserve_metadata_snap");
1485 break;
4dab0d31 1486 case DM_THIN_MESSAGE_RELEASE_METADATA_SNAP: /* target vsn 1.1 */
c4db22bd
ZK
1487 r = dm_snprintf(buf, sizeof(buf), "release_metadata_snap");
1488 break;
25de9add
ZK
1489 default:
1490 r = -1;
25e6ab87
ZK
1491 }
1492
25de9add 1493 if (r < 0) {
25e6ab87
ZK
1494 log_error("Failed to prepare message.");
1495 return 0;
1496 }
1497
4c1caa7e 1498 if (!_node_message(dnode->info.major, dnode->info.minor,
4b0565b8
ZK
1499 tm->expected_errno, buf)) {
1500 switch (m->type) {
1501 case DM_THIN_MESSAGE_CREATE_SNAP:
1502 case DM_THIN_MESSAGE_CREATE_THIN:
1503 if (errno == EEXIST) {
1504 /*
1505 * ATM errno from ioctl() is preserved through code error path chain
1506 * If this would ever change, another way need to be used to
1507 * obtain result from failed DM message
1508 */
1509 log_error("Thin pool %s already contain thin device with device_id %u.",
1510 _node_name(dnode), m->u.m_create_snap.device_id);
1511 /*
1512 * TODO:
1513 *
1514 * Give some useful advice how to solve this problem,
1515 * until lvconvert --repair can handle this automatically
1516 */
1517 log_error("Manual intervention may be required to remove device dev_id=%u in thin pool metadata.",
1518 m->u.m_create_snap.device_id);
1519 log_error("Optionally new thin volume with device_id=%u can be manually added into a volume group.",
1520 m->u.m_create_snap.device_id);
1521 log_warn("WARNING: When uncertain how to do this, contact support!");
1522 return 0;
1523 }
1524 /* fall through */
1525 default:
1526 return_0;
1527 }
1528
1529 }
25e6ab87 1530
4c1caa7e 1531 return 1;
e0ea24be
ZK
1532}
1533
4a4ea47f
ZK
1534static struct load_segment *_get_last_load_segment(struct dm_tree_node *node)
1535{
1536 if (dm_list_empty(&node->props.segs)) {
1537 log_error("Node %s is missing a segment.", _node_name(node));
1538 return NULL;
1539 }
1540
1541 return dm_list_item(dm_list_last(&node->props.segs), struct load_segment);
1542}
1543
203affff 1544/* For preload pass only validate pool's transaction_id */
11f64f0a
ZK
1545static int _node_send_messages(struct dm_tree_node *dnode,
1546 const char *uuid_prefix,
203affff
ZK
1547 size_t uuid_prefix_len,
1548 int send)
25e6ab87
ZK
1549{
1550 struct load_segment *seg;
1551 struct thin_message *tmsg;
2208ebfe 1552 struct dm_status_thin_pool stp;
25e6ab87 1553 const char *uuid;
c7b7cb60 1554 int have_messages;
25e6ab87 1555
4a4ea47f 1556 if (!dnode->info.exists)
25e6ab87
ZK
1557 return 1;
1558
4a4ea47f
ZK
1559 if (!(seg = _get_last_load_segment(dnode)))
1560 return_0;
1561
25e6ab87
ZK
1562 if (seg->type != SEG_THIN_POOL)
1563 return 1;
1564
1565 if (!(uuid = dm_tree_node_get_uuid(dnode)))
1566 return_0;
1567
1568 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) {
06abb2dd 1569 log_debug_activation("UUID \"%s\" does not match.", uuid);
25e6ab87
ZK
1570 return 1;
1571 }
1572
a3c7e326 1573 if (!_thin_pool_get_status(dnode, &stp))
c7b7cb60 1574 return_0;
25e6ab87 1575
c7b7cb60 1576 have_messages = !dm_list_empty(&seg->thin_messages) ? 1 : 0;
a3c7e326 1577 if (stp.transaction_id == seg->transaction_id) {
c7b7cb60
ZK
1578 dnode->props.send_messages = 0; /* messages already committed */
1579 if (have_messages)
21c0b113
ZK
1580 log_debug_activation("Thin pool %s transaction_id matches %"
1581 PRIu64 ", skipping messages.",
a3c7e326 1582 _node_name(dnode), stp.transaction_id);
c7b7cb60 1583 return 1;
5658ec2b 1584 }
25e6ab87 1585
c7b7cb60 1586 /* Error if there are no stacked messages or id mismatches */
a3c7e326 1587 if ((stp.transaction_id + 1) != seg->transaction_id) {
21c0b113 1588 log_error("Thin pool %s transaction_id is %" PRIu64 ", while expected %" PRIu64 ".",
a3c7e326 1589 _node_name(dnode), stp.transaction_id, seg->transaction_id - have_messages);
c7b7cb60 1590 return 0;
25e6ab87
ZK
1591 }
1592
c356991f 1593 if (!have_messages || !send)
203affff
ZK
1594 return 1; /* transaction_id is matching */
1595
b2a326b5
ZK
1596 if (stp.fail || stp.read_only || stp.needs_check) {
1597 log_error("Cannot send messages to thin pool %s%s%s%s.",
1598 _node_name(dnode),
1599 stp.fail ? " in failed state" : "",
1600 stp.read_only ? " with read only metadata" : "",
1601 stp.needs_check ? " which needs check first" : "");
1602 return 0;
1603 }
1604
8f518cf1 1605 dm_list_iterate_items(tmsg, &seg->thin_messages) {
25e6ab87 1606 if (!(_thin_pool_node_message(dnode, tmsg)))
c7b7cb60 1607 return_0;
8f518cf1 1608 if (tmsg->message.type == DM_THIN_MESSAGE_SET_TRANSACTION_ID) {
a3c7e326 1609 if (!_thin_pool_get_status(dnode, &stp))
8f518cf1 1610 return_0;
a3c7e326 1611 if (stp.transaction_id != tmsg->message.u.m_set_transaction_id.new_id) {
21c0b113 1612 log_error("Thin pool %s transaction_id is %" PRIu64
8f518cf1 1613 " and does not match expected %" PRIu64 ".",
a3c7e326 1614 _node_name(dnode), stp.transaction_id,
8f518cf1
ZK
1615 tmsg->message.u.m_set_transaction_id.new_id);
1616 return 0;
1617 }
1618 }
1619 }
25e6ab87 1620
c7b7cb60 1621 dnode->props.send_messages = 0; /* messages posted */
bbcd37e4 1622
c7b7cb60 1623 return 1;
25e6ab87
ZK
1624}
1625
18e0f934
AK
1626/*
1627 * FIXME Don't attempt to deactivate known internal dependencies.
1628 */
1629static int _dm_tree_deactivate_children(struct dm_tree_node *dnode,
1630 const char *uuid_prefix,
1631 size_t uuid_prefix_len,
1632 unsigned level)
3e8c6b73 1633{
b7eb2ad0 1634 int r = 1;
3e8c6b73 1635 void *handle = NULL;
b4f1578f 1636 struct dm_tree_node *child = dnode;
3e8c6b73
AK
1637 struct dm_info info;
1638 const struct dm_info *dinfo;
1639 const char *name;
1640 const char *uuid;
1641
b4f1578f
AK
1642 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
1643 if (!(dinfo = dm_tree_node_get_info(child))) {
3e8c6b73
AK
1644 stack;
1645 continue;
1646 }
1647
b4f1578f 1648 if (!(name = dm_tree_node_get_name(child))) {
3e8c6b73
AK
1649 stack;
1650 continue;
1651 }
1652
b4f1578f 1653 if (!(uuid = dm_tree_node_get_uuid(child))) {
3e8c6b73
AK
1654 stack;
1655 continue;
1656 }
1657
1658 /* Ignore if it doesn't belong to this VG */
2b69db1f 1659 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
3e8c6b73 1660 continue;
3e8c6b73
AK
1661
1662 /* Refresh open_count */
63368a50
ZK
1663 if (!_info_by_dev(dinfo->major, dinfo->minor, 1, &info, NULL, NULL, NULL))
1664 return_0;
1665
1666 if (!info.exists)
3e8c6b73
AK
1667 continue;
1668
4ce43894
ZK
1669 if (info.open_count) {
1670 /* Skip internal non-toplevel opened nodes */
1671 if (level)
1672 continue;
1673
1674 /* When retry is not allowed, error */
1675 if (!child->dtree->retry_remove) {
5abf6b7c
ZK
1676 log_error("Unable to deactivate open %s (" FMTu32 ":"
1677 FMTu32 ").", name, info.major, info.minor);
4ce43894
ZK
1678 r = 0;
1679 continue;
1680 }
1681
1682 /* Check toplevel node for holders/mounted fs */
1683 if (!_check_device_not_in_use(name, &info)) {
1684 stack;
1685 r = 0;
1686 continue;
1687 }
1688 /* Go on with retry */
1689 }
125712be 1690
f3ef15ef 1691 /* Also checking open_count in parent nodes of presuspend_node */
125712be 1692 if ((child->presuspend_node &&
f3ef15ef
ZK
1693 !_node_has_closed_parents(child->presuspend_node,
1694 uuid_prefix, uuid_prefix_len))) {
18e0f934
AK
1695 /* Only report error from (likely non-internal) dependency at top level */
1696 if (!level) {
5abf6b7c
ZK
1697 log_error("Unable to deactivate open %s (" FMTu32 ":"
1698 FMTu32 ").", name, info.major, info.minor);
18e0f934
AK
1699 r = 0;
1700 }
f55021f4
AK
1701 continue;
1702 }
1703
76d1aec8
ZK
1704 /* Suspend child node first if requested */
1705 if (child->presuspend_node &&
1706 !dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len))
1707 continue;
1708
f16aea9e 1709 if (!_deactivate_node(name, info.major, info.minor,
787200ef 1710 &child->dtree->cookie, child->udev_flags,
4ce43894 1711 (level == 0) ? child->dtree->retry_remove : 0)) {
5abf6b7c
ZK
1712 log_error("Unable to deactivate %s (" FMTu32 ":"
1713 FMTu32 ").", name, info.major, info.minor);
b7eb2ad0 1714 r = 0;
3e8c6b73 1715 continue;
a920bc1a
ZK
1716 }
1717
1718 if (info.suspended && info.live_table)
f4249251 1719 dec_suspended();
3e8c6b73 1720
7e35dfff
ZK
1721 if (child->callback &&
1722 !child->callback(child, DM_NODE_CALLBACK_DEACTIVATED,
b3103ef3 1723 child->callback_data))
462de06d 1724 stack;
396377bc
AK
1725 /* FIXME Deactivation must currently ignore failure
1726 * here so that lvremove can continue: we need an
1727 * alternative way to handle this state without
1728 * setting r=0. Or better, skip calling thin_check
1729 * entirely if the device is about to be removed. */
7e35dfff 1730
b3103ef3
ZK
1731 if (dm_tree_node_num_children(child, 0) &&
1732 !_dm_tree_deactivate_children(child, uuid_prefix, uuid_prefix_len, level + 1))
1733 return_0;
3e8c6b73
AK
1734 }
1735
b7eb2ad0 1736 return r;
3e8c6b73 1737}
db208f51 1738
18e0f934 1739int dm_tree_deactivate_children(struct dm_tree_node *dnode,
d98511c7
ZK
1740 const char *uuid_prefix,
1741 size_t uuid_prefix_len)
18e0f934
AK
1742{
1743 return _dm_tree_deactivate_children(dnode, uuid_prefix, uuid_prefix_len, 0);
1744}
1745
b4f1578f 1746int dm_tree_suspend_children(struct dm_tree_node *dnode,
08e64ce5
ZK
1747 const char *uuid_prefix,
1748 size_t uuid_prefix_len)
db208f51 1749{
68085c93 1750 int r = 1;
db208f51 1751 void *handle = NULL;
b4f1578f 1752 struct dm_tree_node *child = dnode;
db208f51
AK
1753 struct dm_info info, newinfo;
1754 const struct dm_info *dinfo;
1755 const char *name;
1756 const char *uuid;
1757
690a5da2 1758 /* Suspend nodes at this level of the tree */
b4f1578f
AK
1759 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
1760 if (!(dinfo = dm_tree_node_get_info(child))) {
db208f51
AK
1761 stack;
1762 continue;
1763 }
1764
b4f1578f 1765 if (!(name = dm_tree_node_get_name(child))) {
db208f51
AK
1766 stack;
1767 continue;
1768 }
1769
b4f1578f 1770 if (!(uuid = dm_tree_node_get_uuid(child))) {
db208f51
AK
1771 stack;
1772 continue;
1773 }
1774
1775 /* Ignore if it doesn't belong to this VG */
2b69db1f 1776 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
db208f51
AK
1777 continue;
1778
690a5da2
AK
1779 /* Ensure immediate parents are already suspended */
1780 if (!_children_suspended(child, 1, uuid_prefix, uuid_prefix_len))
1781 continue;
1782
63368a50
ZK
1783 if (!_info_by_dev(dinfo->major, dinfo->minor, 0, &info, NULL, NULL, NULL))
1784 return_0;
1785
1786 if (!info.exists || info.suspended)
db208f51
AK
1787 continue;
1788
5bef18f2
ZK
1789 /* If child has some real messages send them */
1790 if ((child->props.send_messages > 1) && r) {
1791 if (!(r = _node_send_messages(child, uuid_prefix, uuid_prefix_len, 1)))
1792 stack;
1793 else {
a156fc9a 1794 log_debug_activation("Sent messages to thin-pool %s and "
5bef18f2
ZK
1795 "skipping suspend of its children.",
1796 _node_name(child));
1797 child->props.skip_suspend++;
1798 }
1799 continue;
1800 }
1801
c55b1410 1802 if (!_suspend_node(name, info.major, info.minor,
b9ffd32c
AK
1803 child->dtree->skip_lockfs,
1804 child->dtree->no_flush, &newinfo)) {
5abf6b7c
ZK
1805 log_error("Unable to suspend %s (" FMTu32 ":"
1806 FMTu32 ")", name, info.major, info.minor);
68085c93 1807 r = 0;
db208f51
AK
1808 continue;
1809 }
1810
1811 /* Update cached info */
1812 child->info = newinfo;
690a5da2
AK
1813 }
1814
1815 /* Then suspend any child nodes */
1816 handle = NULL;
1817
b4f1578f 1818 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
5bef18f2
ZK
1819 if (child->props.skip_suspend)
1820 continue;
1821
b4f1578f 1822 if (!(uuid = dm_tree_node_get_uuid(child))) {
690a5da2
AK
1823 stack;
1824 continue;
1825 }
1826
1827 /* Ignore if it doesn't belong to this VG */
87f98002 1828 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
690a5da2 1829 continue;
db208f51 1830
b4f1578f 1831 if (dm_tree_node_num_children(child, 0))
68085c93
MS
1832 if (!dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len))
1833 return_0;
db208f51
AK
1834 }
1835
68085c93 1836 return r;
db208f51
AK
1837}
1838
ca514351
JB
1839/*
1840 * _rename_conflict_exists
1841 * @dnode
1842 * @node
1843 * @resolvable
1844 *
1845 * Check if there is a rename conflict with existing peers in
1846 * this tree. 'resolvable' is set if the conflicting node will
1847 * also be undergoing a rename. (Allowing that node to rename
1848 * first would clear the conflict.)
1849 *
1850 * Returns: 1 if conflict, 0 otherwise
1851 */
1852static int _rename_conflict_exists(struct dm_tree_node *parent,
1853 struct dm_tree_node *node,
1854 int *resolvable)
1855{
1856 void *handle = NULL;
1857 const char *name = dm_tree_node_get_name(node);
1858 const char *sibling_name;
1859 struct dm_tree_node *sibling;
1860
1861 *resolvable = 0;
1862
1863 if (!name)
1864 return_0;
1865
1866 while ((sibling = dm_tree_next_child(&handle, parent, 0))) {
1867 if (sibling == node)
1868 continue;
1869
1870 if (!(sibling_name = dm_tree_node_get_name(sibling))) {
1871 stack;
1872 continue;
1873 }
1874
1875 if (!strcmp(node->props.new_name, sibling_name)) {
1876 if (sibling->props.new_name)
1877 *resolvable = 1;
1878 return 1;
1879 }
1880 }
1881
1882 return 0;
1883}
1884
9b78f7ee
ZK
1885/*
1886 * Reactivation of sibling nodes
1887 *
1888 * Function is used when activating origin and its thick snapshots
1889 * to ensure udev is processing first the origin LV and all the
1890 * snapshot LVs are processed afterwards.
1891 */
1892static int _reactivate_siblings(struct dm_tree_node *dnode,
1893 const char *uuid_prefix,
1894 size_t uuid_prefix_len)
1895{
1896 struct dm_tree_node *child;
1897 const char *uuid;
1898 void *handle = NULL;
1899 int r = 1;
1900
1901 /* Wait for udev before reactivating siblings */
1902 if (!dm_udev_wait(dm_tree_get_cookie(dnode)))
1903 stack;
1904
1905 dm_tree_set_cookie(dnode, 0);
1906
1907 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
1908 if (child->props.reactivate_siblings) {
1909 /* Skip 'leading' device in this group, marked with flag */
1910 child->props.reactivate_siblings = 0;
1911 continue;
1912 }
1913
1914 if (!(uuid = dm_tree_node_get_uuid(child))) {
1915 stack;
1916 continue;
1917 }
1918
1919 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
1920 continue;
1921
1922 if (!_suspend_node(child->name, child->info.major, child->info.minor,
1923 child->dtree->skip_lockfs,
1924 child->dtree->no_flush, &child->info)) {
1925 log_error("Unable to suspend %s (" FMTu32
1926 ":" FMTu32 ")", child->name,
1927 child->info.major, child->info.minor);
1928 r = 0;
1929 continue;
1930 }
1931 if (!_resume_node(child->name, child->info.major, child->info.minor,
1932 child->props.read_ahead, child->props.read_ahead_flags,
1933 &child->info, &child->dtree->cookie,
1934 child->props.reactivate_udev_flags, // use these flags
1935 child->info.suspended)) {
1936 log_error("Failed to suspend %s (" FMTu32
1937 ":" FMTu32 ")", child->name,
1938 child->info.major, child->info.minor);
1939 r = 0;
1940 continue;
1941 }
1942 }
1943
1944 return r;
1945}
1946
b4f1578f 1947int dm_tree_activate_children(struct dm_tree_node *dnode,
db208f51
AK
1948 const char *uuid_prefix,
1949 size_t uuid_prefix_len)
1950{
2ca6b865 1951 int r = 1;
ca514351 1952 int resolvable_name_conflict, awaiting_peer_rename = 0;
db208f51 1953 void *handle = NULL;
b4f1578f 1954 struct dm_tree_node *child = dnode;
165e4a11 1955 const char *name;
db208f51 1956 const char *uuid;
56c28292 1957 int priority;
db208f51 1958
165e4a11 1959 /* Activate children first */
b4f1578f
AK
1960 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
1961 if (!(uuid = dm_tree_node_get_uuid(child))) {
165e4a11
AK
1962 stack;
1963 continue;
db208f51
AK
1964 }
1965
908db078
AK
1966 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
1967 continue;
db208f51 1968
b4f1578f 1969 if (dm_tree_node_num_children(child, 0))
2ca6b865
MS
1970 if (!dm_tree_activate_children(child, uuid_prefix, uuid_prefix_len))
1971 return_0;
56c28292 1972 }
165e4a11 1973
56c28292 1974 handle = NULL;
165e4a11 1975
aa6f4e51 1976 for (priority = 0; priority < 3; priority++) {
ca514351 1977 awaiting_peer_rename = 0;
56c28292 1978 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
a5a31ce9
ZK
1979 if (priority != child->activation_priority)
1980 continue;
1981
56c28292
AK
1982 if (!(uuid = dm_tree_node_get_uuid(child))) {
1983 stack;
1984 continue;
165e4a11 1985 }
165e4a11 1986
56c28292
AK
1987 if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
1988 continue;
165e4a11 1989
56c28292
AK
1990 if (!(name = dm_tree_node_get_name(child))) {
1991 stack;
1992 continue;
1993 }
1994
1995 /* Rename? */
1996 if (child->props.new_name) {
ca514351
JB
1997 if (_rename_conflict_exists(dnode, child, &resolvable_name_conflict) &&
1998 resolvable_name_conflict) {
1999 awaiting_peer_rename++;
2000 continue;
2001 }
bd90c6b2 2002 if (!_rename_node(name, child->props.new_name, child->info.major,
f16aea9e
PR
2003 child->info.minor, &child->dtree->cookie,
2004 child->udev_flags)) {
56c28292
AK
2005 log_error("Failed to rename %s (%" PRIu32
2006 ":%" PRIu32 ") to %s", name, child->info.major,
2007 child->info.minor, child->props.new_name);
2008 return 0;
2009 }
2010 child->name = child->props.new_name;
2011 child->props.new_name = NULL;
2012 }
2013
2014 if (!child->info.inactive_table && !child->info.suspended)
2015 continue;
2016
bafa2f39 2017 if (!_resume_node(child->name, child->info.major, child->info.minor,
bd90c6b2 2018 child->props.read_ahead, child->props.read_ahead_flags,
820b1b98 2019 &child->info, &child->dtree->cookie, child->udev_flags, child->info.suspended)) {
2a01e3d4 2020 log_error("Unable to resume %s.", _node_name(child));
2ca6b865 2021 r = 0;
56c28292
AK
2022 continue;
2023 }
756066a2
ZK
2024
2025 /*
2026 * FIXME: Implement delayed error reporting
2027 * activation should be stopped only in the case,
2028 * the submission of transation_id message fails,
2029 * resume should continue further, just whole command
2030 * has to report failure.
2031 */
2032 if (r && (child->props.send_messages > 1) &&
2033 !(r = _node_send_messages(child, uuid_prefix, uuid_prefix_len, 1)))
2034 stack;
9b78f7ee
ZK
2035
2036 /* Reactivate only for fresh activated origin */
2037 if (r && child->props.reactivate_siblings &&
2038 (!(r = _reactivate_siblings(dnode, uuid_prefix, uuid_prefix_len))))
2039 stack;
56c28292 2040 }
ca514351
JB
2041 if (awaiting_peer_rename)
2042 priority--; /* redo priority level */
db208f51
AK
2043 }
2044
2ca6b865 2045 return r;
165e4a11
AK
2046}
2047
a17ec7e0 2048static int _create_node(struct dm_tree_node *dnode, struct dm_tree_node *parent)
165e4a11
AK
2049{
2050 int r = 0;
2051 struct dm_task *dmt;
2052
2053 log_verbose("Creating %s", dnode->name);
2054
2055 if (!(dmt = dm_task_create(DM_DEVICE_CREATE))) {
2056 log_error("Create dm_task creation failed for %s", dnode->name);
2057 return 0;
2058 }
2059
2060 if (!dm_task_set_name(dmt, dnode->name)) {
2061 log_error("Failed to set device name for %s", dnode->name);
2062 goto out;
2063 }
2064
2065 if (!dm_task_set_uuid(dmt, dnode->uuid)) {
2066 log_error("Failed to set uuid for %s", dnode->name);
2067 goto out;
2068 }
2069
2070 if (dnode->props.major &&
2071 (!dm_task_set_major(dmt, dnode->props.major) ||
2072 !dm_task_set_minor(dmt, dnode->props.minor))) {
2073 log_error("Failed to set device number for %s creation.", dnode->name);
2074 goto out;
2075 }
2076
2077 if (dnode->props.read_only && !dm_task_set_ro(dmt)) {
2078 log_error("Failed to set read only flag for %s", dnode->name);
2079 goto out;
2080 }
2081
2082 if (!dm_task_no_open_count(dmt))
2593777f 2083 log_warn("WARNING: Failed to disable open_count.");
165e4a11 2084
442820aa
JB
2085 if ((r = dm_task_run(dmt))) {
2086 if (!(r = dm_task_get_info(dmt, &dnode->info)))
2087 /*
2088 * This should not be possible to occur. However,
2089 * we print an error message anyway for the more
2090 * absurd cases (e.g. memory corruption) so there
2091 * is never any question as to which one failed.
2092 */
2093 log_error(INTERNAL_ERROR
2094 "Unable to get DM task info for %s.",
2095 dnode->name);
2096 }
a17ec7e0
ZK
2097
2098 if (r)
a8ee82ed 2099 dnode->activated = 1;
165e4a11
AK
2100out:
2101 dm_task_destroy(dmt);
2102
2103 return r;
2104}
2105
b4f1578f 2106static int _build_dev_string(char *devbuf, size_t bufsize, struct dm_tree_node *node)
165e4a11
AK
2107{
2108 if (!dm_format_dev(devbuf, bufsize, node->info.major, node->info.minor)) {
40e5fd8b
AK
2109 log_error("Failed to format %s device number for %s as dm "
2110 "target (%u,%u)",
2111 node->name, node->uuid, node->info.major, node->info.minor);
2112 return 0;
165e4a11
AK
2113 }
2114
2115 return 1;
2116}
2117
ffa9b6a5
ZK
2118/* simplify string emiting code */
2119#define EMIT_PARAMS(p, str...)\
7b6c011c
AK
2120do {\
2121 int w;\
2122 if ((w = dm_snprintf(params + p, paramsize - (size_t) p, str)) < 0) {\
2123 stack; /* Out of space */\
2124 return -1;\
2125 }\
2126 p += w;\
2127} while (0)
ffa9b6a5 2128
3c74075f
JEB
2129/*
2130 * _emit_areas_line
2131 *
2132 * Returns: 1 on success, 0 on failure
2133 */
08f1ddea 2134static int _emit_areas_line(struct dm_task *dmt __attribute__((unused)),
4dcaa230
AK
2135 struct load_segment *seg, char *params,
2136 size_t paramsize, int *pos)
165e4a11
AK
2137{
2138 struct seg_area *area;
7d7d93ac 2139 char devbuf[DM_FORMAT_DEV_BUFSIZE];
609faae9 2140 unsigned first_time = 1;
165e4a11 2141
2c44337b 2142 dm_list_iterate_items(area, &seg->areas) {
b262f3e1 2143 switch (seg->type) {
bf8d0098 2144 case SEG_RAID0:
d8c2677a 2145 case SEG_RAID0_META:
cac52ca4 2146 case SEG_RAID1:
4047e4df 2147 case SEG_RAID10:
cac52ca4 2148 case SEG_RAID4:
60ddd05f 2149 case SEG_RAID5_N:
cac52ca4
JEB
2150 case SEG_RAID5_LA:
2151 case SEG_RAID5_RA:
2152 case SEG_RAID5_LS:
2153 case SEG_RAID5_RS:
3673ce48 2154 case SEG_RAID6_N_6:
cac52ca4
JEB
2155 case SEG_RAID6_ZR:
2156 case SEG_RAID6_NR:
2157 case SEG_RAID6_NC:
a4bbaa3b
HM
2158 case SEG_RAID6_LS_6:
2159 case SEG_RAID6_RS_6:
2160 case SEG_RAID6_LA_6:
2161 case SEG_RAID6_RA_6:
6d04311e
JEB
2162 if (!area->dev_node) {
2163 EMIT_PARAMS(*pos, " -");
2164 break;
2165 }
2166 if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node))
2167 return_0;
2168
cac52ca4
JEB
2169 EMIT_PARAMS(*pos, " %s", devbuf);
2170 break;
b262f3e1 2171 default:
6d04311e
JEB
2172 if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node))
2173 return_0;
2174
b262f3e1
ZK
2175 EMIT_PARAMS(*pos, "%s%s %" PRIu64, first_time ? "" : " ",
2176 devbuf, area->offset);
2177 }
609faae9
AK
2178
2179 first_time = 0;
165e4a11
AK
2180 }
2181
2182 return 1;
2183}
2184
3c74075f 2185/*
3c74075f
JEB
2186 * Returns: 1 on success, 0 on failure
2187 */
beecb1e1
ZK
2188static int _mirror_emit_segment_line(struct dm_task *dmt, struct load_segment *seg,
2189 char *params, size_t paramsize)
165e4a11 2190{
8f26e18c
JEB
2191 int block_on_error = 0;
2192 int handle_errors = 0;
2193 int dm_log_userspace = 0;
dbcb64b8 2194 unsigned log_parm_count;
1287edf6 2195 int pos = 0;
7d7d93ac 2196 char logbuf[DM_FORMAT_DEV_BUFSIZE];
dbcb64b8 2197 const char *logtype;
b39fdcf4 2198 unsigned kmaj = 0, kmin = 0, krel = 0;
165e4a11 2199
1287edf6
ZK
2200 if (!get_uname_version(&kmaj, &kmin, &krel))
2201 return_0;
67b25ed4 2202
8f26e18c
JEB
2203 if ((seg->flags & DM_BLOCK_ON_ERROR)) {
2204 /*
2205 * Originally, block_on_error was an argument to the log
2206 * portion of the mirror CTR table. It was renamed to
2207 * "handle_errors" and now resides in the 'features'
2208 * section of the mirror CTR table (i.e. at the end).
2209 *
2210 * We can identify whether to use "block_on_error" or
2211 * "handle_errors" by the dm-mirror module's version
2212 * number (>= 1.12) or by the kernel version (>= 2.6.22).
2213 */
ba61f848 2214 if (KERNEL_VERSION(kmaj, kmin, krel) >= KERNEL_VERSION(2, 6, 22))
8f26e18c
JEB
2215 handle_errors = 1;
2216 else
2217 block_on_error = 1;
2218 }
2219
2220 if (seg->clustered) {
2221 /* Cluster mirrors require a UUID */
2222 if (!seg->uuid)
2223 return_0;
2224
2225 /*
2226 * Cluster mirrors used to have their own log
2227 * types. Now they are accessed through the
2228 * userspace log type.
2229 *
2230 * The dm-log-userspace module was added to the
2231 * 2.6.31 kernel.
2232 */
ba61f848 2233 if (KERNEL_VERSION(kmaj, kmin, krel) >= KERNEL_VERSION(2, 6, 31))
8f26e18c
JEB
2234 dm_log_userspace = 1;
2235 }
2236
2237 /* Region size */
2238 log_parm_count = 1;
2239
2240 /* [no]sync, block_on_error etc. */
2241 log_parm_count += hweight32(seg->flags);
311d6d81 2242
8f26e18c
JEB
2243 /* "handle_errors" is a feature arg now */
2244 if (handle_errors)
2245 log_parm_count--;
2246
2247 /* DM_CORELOG does not count in the param list */
2248 if (seg->flags & DM_CORELOG)
2249 log_parm_count--;
2250
2251 if (seg->clustered) {
2252 log_parm_count++; /* For UUID */
2253
2254 if (!dm_log_userspace)
ffa9b6a5 2255 EMIT_PARAMS(pos, "clustered-");
49b95a5e
JEB
2256 else
2257 /* For clustered-* type field inserted later */
2258 log_parm_count++;
8f26e18c 2259 }
dbcb64b8 2260
8f26e18c
JEB
2261 if (!seg->log)
2262 logtype = "core";
2263 else {
2264 logtype = "disk";
2265 log_parm_count++;
2266 if (!_build_dev_string(logbuf, sizeof(logbuf), seg->log))
2267 return_0;
2268 }
dbcb64b8 2269
8f26e18c
JEB
2270 if (dm_log_userspace)
2271 EMIT_PARAMS(pos, "userspace %u %s clustered-%s",
2272 log_parm_count, seg->uuid, logtype);
2273 else
ffa9b6a5 2274 EMIT_PARAMS(pos, "%s %u", logtype, log_parm_count);
dbcb64b8 2275
8f26e18c
JEB
2276 if (seg->log)
2277 EMIT_PARAMS(pos, " %s", logbuf);
2278
2279 EMIT_PARAMS(pos, " %u", seg->region_size);
dbcb64b8 2280
8f26e18c
JEB
2281 if (seg->clustered && !dm_log_userspace)
2282 EMIT_PARAMS(pos, " %s", seg->uuid);
67b25ed4 2283
8f26e18c
JEB
2284 if ((seg->flags & DM_NOSYNC))
2285 EMIT_PARAMS(pos, " nosync");
2286 else if ((seg->flags & DM_FORCESYNC))
2287 EMIT_PARAMS(pos, " sync");
dbcb64b8 2288
8f26e18c
JEB
2289 if (block_on_error)
2290 EMIT_PARAMS(pos, " block_on_error");
2291
2292 EMIT_PARAMS(pos, " %u ", seg->mirror_area_count);
2293
82bffa99 2294 if (!_emit_areas_line(dmt, seg, params, paramsize, &pos))
3c74075f 2295 return_0;
dbcb64b8 2296
8f26e18c
JEB
2297 if (handle_errors)
2298 EMIT_PARAMS(pos, " 1 handle_errors");
ffa9b6a5 2299
3c74075f 2300 return 1;
8f26e18c 2301}
4e60e624 2302
e2354ea3
HM
2303static int _2_if_value(unsigned p)
2304{
2305 return p ? 2 : 0;
2306}
96a62101 2307
e2354ea3 2308/* Return number of bits passed in @bits assuming 2 * 64 bit size */
b37e4e3f 2309static int _get_params_count(const uint64_t *bits)
96a62101
HM
2310{
2311 int r = 0;
e2354ea3 2312 int i = RAID_BITMAP_SIZE;
96a62101 2313
e2354ea3
HM
2314 while (i--) {
2315 r += 2 * hweight32(bits[i] & 0xFFFFFFFF);
2316 r += 2 * hweight32(bits[i] >> 32);
2317 }
96a62101
HM
2318
2319 return r;
2320}
8f26e18c 2321
b84bf3e8
HM
2322/*
2323 * Get target version (major, minor and patchlevel) for @target_name
2324 *
fe3b9bb7 2325 * FIXME: this function is derived from liblvm.
b84bf3e8
HM
2326 * Integrate with move of liblvm functions
2327 * to libdm in future library layer purge
2328 * (e.g. expose as API dm_target_version()?)
2329 */
2330static int _target_version(const char *target_name, uint32_t *maj,
2331 uint32_t *min, uint32_t *patchlevel)
2332{
2333 int r = 0;
2334 struct dm_task *dmt;
2335 struct dm_versions *target, *last_target = NULL;
2336
2337 log_very_verbose("Getting target version for %s", target_name);
2338 if (!(dmt = dm_task_create(DM_DEVICE_LIST_VERSIONS)))
2339 return_0;
2340
2341 if (!dm_task_run(dmt)) {
2342 log_debug_activation("Failed to get %s target versions", target_name);
2343 /* Assume this was because LIST_VERSIONS isn't supported */
635e7e0c 2344 *maj = *min = *patchlevel = 0;
b84bf3e8 2345 r = 1;
b84bf3e8
HM
2346 } else
2347 for (target = dm_task_get_versions(dmt);
2348 target != last_target;
2349 last_target = target, target = (struct dm_versions *)((char *) target + target->next))
2350 if (!strcmp(target_name, target->name)) {
2351 *maj = target->version[0];
2352 *min = target->version[1];
2353 *patchlevel = target->version[2];
2354 log_very_verbose("Found %s target "
2355 "v%" PRIu32 ".%" PRIu32 ".%" PRIu32 ".",
2356 target_name, *maj, *min, *patchlevel);
2357 r = 1;
2358 break;
2359 }
2360
2361 dm_task_destroy(dmt);
2362
2363 return r;
2364}
2365
cac52ca4
JEB
2366static int _raid_emit_segment_line(struct dm_task *dmt, uint32_t major,
2367 uint32_t minor, struct load_segment *seg,
2368 uint64_t *seg_start, char *params,
2369 size_t paramsize)
2370{
ad2432dc 2371 uint32_t i;
e2354ea3 2372 uint32_t area_count = seg->area_count / 2;
b84bf3e8 2373 uint32_t maj, min, patchlevel;
cac52ca4
JEB
2374 int param_count = 1; /* mandatory 'chunk size'/'stripe size' arg */
2375 int pos = 0;
e2354ea3
HM
2376 unsigned type;
2377
2378 if (seg->area_count % 2)
2379 return 0;
cac52ca4
JEB
2380
2381 if ((seg->flags & DM_NOSYNC) || (seg->flags & DM_FORCESYNC))
2382 param_count++;
2383
e2354ea3
HM
2384 param_count += _2_if_value(seg->data_offset) +
2385 _2_if_value(seg->delta_disks) +
2386 _2_if_value(seg->region_size) +
2387 _2_if_value(seg->writebehind) +
2388 _2_if_value(seg->min_recovery_rate) +
2389 _2_if_value(seg->max_recovery_rate) +
2390 _2_if_value(seg->data_copies > 1);
f439e65b 2391
e2354ea3 2392 /* rebuilds and writemostly are BITMAP_SIZE * 64 bits */
96a62101
HM
2393 param_count += _get_params_count(seg->rebuilds);
2394 param_count += _get_params_count(seg->writemostly);
2e0740f7 2395
e2354ea3
HM
2396 if ((seg->type == SEG_RAID1) && seg->stripe_size)
2397 log_info("WARNING: Ignoring RAID1 stripe size");
cac52ca4 2398
d8c2677a 2399 /* Kernel only expects "raid0", not "raid0_meta" */
e2354ea3 2400 type = seg->type;
d8c2677a
AK
2401 if (type == SEG_RAID0_META)
2402 type = SEG_RAID0;
e2354ea3
HM
2403
2404 EMIT_PARAMS(pos, "%s %d %u",
e2354ea3 2405 type == SEG_RAID10 ? "raid10" : _dm_segtypes[type].target,
cac52ca4
JEB
2406 param_count, seg->stripe_size);
2407
b84bf3e8
HM
2408 if (!_target_version("raid", &maj, &min, &patchlevel))
2409 return_0;
2410
2411 /*
2412 * Target version prior to 1.9.0 and >= 1.11.0 emit
2413 * order of parameters as of kernel target documentation
2414 */
2415 if (maj > 1 || (maj == 1 && (min < 9 || min >= 11))) {
2416 if (seg->flags & DM_NOSYNC)
2417 EMIT_PARAMS(pos, " nosync");
2418 else if (seg->flags & DM_FORCESYNC)
2419 EMIT_PARAMS(pos, " sync");
2420
2421 for (i = 0; i < area_count; i++)
2422 if (seg->rebuilds[i/64] & (1ULL << (i%64)))
2423 EMIT_PARAMS(pos, " rebuild %u", i);
2424
2425 if (seg->min_recovery_rate)
2426 EMIT_PARAMS(pos, " min_recovery_rate %u",
2427 seg->min_recovery_rate);
2428
2429 if (seg->max_recovery_rate)
2430 EMIT_PARAMS(pos, " max_recovery_rate %u",
2431 seg->max_recovery_rate);
2432
2433 for (i = 0; i < area_count; i++)
2434 if (seg->writemostly[i/64] & (1ULL << (i%64)))
2435 EMIT_PARAMS(pos, " write_mostly %u", i);
2436
2437 if (seg->writebehind)
2438 EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind);
2439
2440 if (seg->region_size)
2441 EMIT_PARAMS(pos, " region_size %u", seg->region_size);
2442
2443 if (seg->data_copies > 1 && type == SEG_RAID10)
2444 EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies);
396377bc 2445
b84bf3e8
HM
2446 if (seg->delta_disks)
2447 EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks);
e2354ea3 2448
b84bf3e8
HM
2449 /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */
2450 if (seg->data_offset)
2451 EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset);
e2354ea3 2452
b84bf3e8
HM
2453 /* Target version >= 1.9.0 && < 1.11.0 had a table line parameter ordering flaw */
2454 } else {
2455 if (seg->data_copies > 1 && type == SEG_RAID10)
2456 EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies);
e2354ea3 2457
b84bf3e8
HM
2458 if (seg->flags & DM_NOSYNC)
2459 EMIT_PARAMS(pos, " nosync");
2460 else if (seg->flags & DM_FORCESYNC)
2461 EMIT_PARAMS(pos, " sync");
1e4462db 2462
b84bf3e8
HM
2463 if (seg->region_size)
2464 EMIT_PARAMS(pos, " region_size %u", seg->region_size);
1e4462db 2465
b84bf3e8
HM
2466 /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */
2467 if (seg->data_offset)
2468 EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset);
1e4462db 2469
b84bf3e8
HM
2470 if (seg->delta_disks)
2471 EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks);
1e4462db 2472
b84bf3e8
HM
2473 for (i = 0; i < area_count; i++)
2474 if (seg->rebuilds[i/64] & (1ULL << (i%64)))
2475 EMIT_PARAMS(pos, " rebuild %u", i);
1e4462db 2476
b84bf3e8
HM
2477 for (i = 0; i < area_count; i++)
2478 if (seg->writemostly[i/64] & (1ULL << (i%64)))
2479 EMIT_PARAMS(pos, " write_mostly %u", i);
1e4462db 2480
b84bf3e8
HM
2481 if (seg->writebehind)
2482 EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind);
1e4462db 2483
b84bf3e8
HM
2484 if (seg->max_recovery_rate)
2485 EMIT_PARAMS(pos, " max_recovery_rate %u",
2486 seg->max_recovery_rate);
2487
2488 if (seg->min_recovery_rate)
2489 EMIT_PARAMS(pos, " min_recovery_rate %u",
2490 seg->min_recovery_rate);
2491 }
e2354ea3 2492
cac52ca4 2493 /* Print number of metadata/data device pairs */
e2354ea3 2494 EMIT_PARAMS(pos, " %u", area_count);
cac52ca4 2495
82bffa99 2496 if (!_emit_areas_line(dmt, seg, params, paramsize, &pos))
cac52ca4
JEB
2497 return_0;
2498
2499 return 1;
2500}
2501
1ff7e214
JB
2502static int _cache_emit_segment_line(struct dm_task *dmt,
2503 struct load_segment *seg,
2504 char *params, size_t paramsize)
2505{
c651c614 2506 int pos = 0;
82401953 2507 /* unsigned feature_count; */
1ff7e214
JB
2508 char data[DM_FORMAT_DEV_BUFSIZE];
2509 char metadata[DM_FORMAT_DEV_BUFSIZE];
2510 char origin[DM_FORMAT_DEV_BUFSIZE];
82401953
ZK
2511 const char *name;
2512 struct dm_config_node *cn;
1ff7e214 2513
82401953
ZK
2514 /* Cache Dev */
2515 if (!_build_dev_string(data, sizeof(data), seg->pool))
1ff7e214 2516 return_0;
1ff7e214 2517
82401953
ZK
2518 /* Metadata Dev */
2519 if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata))
1ff7e214 2520 return_0;
1ff7e214
JB
2521
2522 /* Origin Dev */
82401953 2523 if (!_build_dev_string(origin, sizeof(origin), seg->origin))
1ff7e214 2524 return_0;
da268eb4 2525
ddbf0075 2526 EMIT_PARAMS(pos, "%s %s %s", metadata, data, origin);
1ff7e214 2527
79e9bde0
ZK
2528 /* Data block size */
2529 EMIT_PARAMS(pos, " %u", seg->data_block_size);
1ff7e214
JB
2530
2531 /* Features */
82401953
ZK
2532 /* feature_count = hweight32(seg->flags); */
2533 /* EMIT_PARAMS(pos, " %u", feature_count); */
ddd5a768
ZK
2534 if (seg->flags & DM_CACHE_FEATURE_METADATA2)
2535 EMIT_PARAMS(pos, " 2 metadata2 ");
2536 else
2537 EMIT_PARAMS(pos, " 1 ");
2538
82401953 2539 if (seg->flags & DM_CACHE_FEATURE_PASSTHROUGH)
ddd5a768
ZK
2540 EMIT_PARAMS(pos, "passthrough");
2541 else if (seg->flags & DM_CACHE_FEATURE_WRITEBACK)
2542 EMIT_PARAMS(pos, "writeback");
2543 else
2544 EMIT_PARAMS(pos, "writethrough");
1ff7e214
JB
2545
2546 /* Cache Policy */
20b22cd0 2547 name = seg->policy_name ? : "default";
82401953
ZK
2548
2549 EMIT_PARAMS(pos, " %s", name);
2550
74ae1c5b 2551 /* Do not pass migration_threshold 2048 which is default */
6c4cd7b2 2552 EMIT_PARAMS(pos, " %u", (seg->policy_argc + ((seg->migration_threshold != 2048) ? 1 : 0)) * 2);
74ae1c5b
ZK
2553 if (seg->migration_threshold != 2048)
2554 EMIT_PARAMS(pos, " migration_threshold %u", seg->migration_threshold);
20b22cd0
ZK
2555 if (seg->policy_settings)
2556 for (cn = seg->policy_settings->child; cn; cn = cn->sib)
74ae1c5b
ZK
2557 if (cn->v) /* Skip deleted entry */
2558 EMIT_PARAMS(pos, " %s %" PRIu64, cn->key, cn->v->v.i);
1ff7e214
JB
2559
2560 return 1;
2561}
2562
dcd4afc7
ZK
2563static int _thin_pool_emit_segment_line(struct dm_task *dmt,
2564 struct load_segment *seg,
2565 char *params, size_t paramsize)
2566{
2567 int pos = 0;
2568 char pool[DM_FORMAT_DEV_BUFSIZE], metadata[DM_FORMAT_DEV_BUFSIZE];
2908ab3e 2569 int features = (seg->error_if_no_space ? 1 : 0) +
69132f55 2570 (seg->read_only ? 1 : 0) +
2908ab3e
ZK
2571 (seg->ignore_discard ? 1 : 0) +
2572 (seg->no_discard_passdown ? 1 : 0) +
2573 (seg->skip_block_zeroing ? 1 : 0);
dcd4afc7
ZK
2574
2575 if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata))
2576 return_0;
2577
2578 if (!_build_dev_string(pool, sizeof(pool), seg->pool))
2579 return_0;
2580
69132f55 2581 EMIT_PARAMS(pos, "%s %s %d %" PRIu64 " %d%s%s%s%s%s", metadata, pool,
dcd4afc7
ZK
2582 seg->data_block_size, seg->low_water_mark, features,
2583 seg->skip_block_zeroing ? " skip_block_zeroing" : "",
2584 seg->ignore_discard ? " ignore_discard" : "",
f9162274
T
2585 seg->no_discard_passdown ? " no_discard_passdown" : "",
2586 seg->error_if_no_space ? " error_if_no_space" : "",
2587 seg->read_only ? " read_only" : ""
dcd4afc7
ZK
2588 );
2589
2590 return 1;
2591}
2592
2593static int _thin_emit_segment_line(struct dm_task *dmt,
2594 struct load_segment *seg,
2595 char *params, size_t paramsize)
2596{
2597 int pos = 0;
2598 char pool[DM_FORMAT_DEV_BUFSIZE];
2599 char external[DM_FORMAT_DEV_BUFSIZE + 1];
2600
2601 if (!_build_dev_string(pool, sizeof(pool), seg->pool))
2602 return_0;
2603
2604 if (!seg->external)
2605 *external = 0;
2606 else {
2607 *external = ' ';
2608 if (!_build_dev_string(external + 1, sizeof(external) - 1,
2609 seg->external))
2610 return_0;
2611 }
2612
2613 EMIT_PARAMS(pos, "%s %d%s", pool, seg->device_id, external);
2614
2615 return 1;
2616}
2617
8f26e18c
JEB
2618static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
2619 uint32_t minor, struct load_segment *seg,
2620 uint64_t *seg_start, char *params,
2621 size_t paramsize)
2622{
2623 int pos = 0;
cac52ca4 2624 int target_type_is_raid = 0;
8f26e18c 2625 char originbuf[DM_FORMAT_DEV_BUFSIZE], cowbuf[DM_FORMAT_DEV_BUFSIZE];
dbcb64b8 2626
8f26e18c
JEB
2627 switch(seg->type) {
2628 case SEG_ERROR:
2629 case SEG_ZERO:
2630 case SEG_LINEAR:
2631 break;
2632 case SEG_MIRRORED:
2633 /* Mirrors are pretty complicated - now in separate function */
82bffa99 2634 if (!_mirror_emit_segment_line(dmt, seg, params, paramsize))
3c74075f 2635 return_0;
165e4a11
AK
2636 break;
2637 case SEG_SNAPSHOT:
aa6f4e51 2638 case SEG_SNAPSHOT_MERGE:
b4f1578f
AK
2639 if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin))
2640 return_0;
2641 if (!_build_dev_string(cowbuf, sizeof(cowbuf), seg->cow))
2642 return_0;
ffa9b6a5
ZK
2643 EMIT_PARAMS(pos, "%s %s %c %d", originbuf, cowbuf,
2644 seg->persistent ? 'P' : 'N', seg->chunk_size);
165e4a11
AK
2645 break;
2646 case SEG_SNAPSHOT_ORIGIN:
b4f1578f
AK
2647 if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin))
2648 return_0;
ffa9b6a5 2649 EMIT_PARAMS(pos, "%s", originbuf);
165e4a11
AK
2650 break;
2651 case SEG_STRIPED:
609faae9 2652 EMIT_PARAMS(pos, "%u %u ", seg->area_count, seg->stripe_size);
165e4a11 2653 break;
12ca060e 2654 case SEG_CRYPT:
609faae9 2655 EMIT_PARAMS(pos, "%s%s%s%s%s %s %" PRIu64 " ", seg->cipher,
12ca060e
MB
2656 seg->chainmode ? "-" : "", seg->chainmode ?: "",
2657 seg->iv ? "-" : "", seg->iv ?: "", seg->key,
2658 seg->iv_offset != DM_CRYPT_IV_DEFAULT ?
2659 seg->iv_offset : *seg_start);
2660 break;
bf8d0098 2661 case SEG_RAID0:
d8c2677a 2662 case SEG_RAID0_META:
cac52ca4 2663 case SEG_RAID1:
4047e4df 2664 case SEG_RAID10:
cac52ca4 2665 case SEG_RAID4:
60ddd05f 2666 case SEG_RAID5_N:
cac52ca4
JEB
2667 case SEG_RAID5_LA:
2668 case SEG_RAID5_RA:
2669 case SEG_RAID5_LS:
2670 case SEG_RAID5_RS:
3673ce48 2671 case SEG_RAID6_N_6:
cac52ca4
JEB
2672 case SEG_RAID6_ZR:
2673 case SEG_RAID6_NR:
2674 case SEG_RAID6_NC:
a4bbaa3b
HM
2675 case SEG_RAID6_LS_6:
2676 case SEG_RAID6_RS_6:
2677 case SEG_RAID6_LA_6:
2678 case SEG_RAID6_RA_6:
cac52ca4 2679 target_type_is_raid = 1;
82bffa99
ZK
2680 if (!_raid_emit_segment_line(dmt, major, minor, seg, seg_start,
2681 params, paramsize))
cac52ca4
JEB
2682 return_0;
2683
2684 break;
4251236e 2685 case SEG_THIN_POOL:
dcd4afc7 2686 if (!_thin_pool_emit_segment_line(dmt, seg, params, paramsize))
4251236e 2687 return_0;
4251236e
ZK
2688 break;
2689 case SEG_THIN:
dcd4afc7 2690 if (!_thin_emit_segment_line(dmt, seg, params, paramsize))
4251236e 2691 return_0;
4251236e 2692 break;
1ff7e214
JB
2693 case SEG_CACHE:
2694 if (!_cache_emit_segment_line(dmt, seg, params, paramsize))
2695 return_0;
2696 break;
165e4a11
AK
2697 }
2698
2699 switch(seg->type) {
2700 case SEG_ERROR:
2701 case SEG_SNAPSHOT:
2702 case SEG_SNAPSHOT_ORIGIN:
aa6f4e51 2703 case SEG_SNAPSHOT_MERGE:
165e4a11 2704 case SEG_ZERO:
4251236e
ZK
2705 case SEG_THIN_POOL:
2706 case SEG_THIN:
1ff7e214 2707 case SEG_CACHE:
165e4a11 2708 break;
12ca060e 2709 case SEG_CRYPT:
165e4a11 2710 case SEG_LINEAR:
165e4a11 2711 case SEG_STRIPED:
82bffa99
ZK
2712 if (!_emit_areas_line(dmt, seg, params, paramsize, &pos))
2713 return_0;
2714
b6793963
AK
2715 if (!params[0]) {
2716 log_error("No parameters supplied for %s target "
6190ded5 2717 "%u:%u.", _dm_segtypes[seg->type].target,
812e10ac 2718 major, minor);
b6793963
AK
2719 return 0;
2720 }
165e4a11
AK
2721 break;
2722 }
2723
06abb2dd
AK
2724 log_debug_activation("Adding target to (%" PRIu32 ":%" PRIu32 "): %" PRIu64
2725 " %" PRIu64 " %s %s", major, minor,
2726 *seg_start, seg->size, target_type_is_raid ? "raid" :
6190ded5 2727 _dm_segtypes[seg->type].target, params);
165e4a11 2728
cac52ca4
JEB
2729 if (!dm_task_add_target(dmt, *seg_start, seg->size,
2730 target_type_is_raid ? "raid" :
6190ded5 2731 _dm_segtypes[seg->type].target, params))
b4f1578f 2732 return_0;
165e4a11
AK
2733
2734 *seg_start += seg->size;
2735
2736 return 1;
2737}
2738
ffa9b6a5
ZK
2739#undef EMIT_PARAMS
2740
4b2cae46
AK
2741static int _emit_segment(struct dm_task *dmt, uint32_t major, uint32_t minor,
2742 struct load_segment *seg, uint64_t *seg_start)
165e4a11
AK
2743{
2744 char *params;
b84bf3e8 2745 size_t paramsize = 4096; /* FIXME: too small for long RAID lines when > 64 devices supported */
165e4a11
AK
2746 int ret;
2747
2748 do {
2749 if (!(params = dm_malloc(paramsize))) {
2750 log_error("Insufficient space for target parameters.");
2751 return 0;
2752 }
2753
12ea7cb1 2754 params[0] = '\0';
4b2cae46
AK
2755 ret = _emit_segment_line(dmt, major, minor, seg, seg_start,
2756 params, paramsize);
165e4a11
AK
2757 dm_free(params);
2758
2759 if (!ret)
2760 stack;
2761
2762 if (ret >= 0)
2763 return ret;
2764
06abb2dd
AK
2765 log_debug_activation("Insufficient space in params[%" PRIsize_t
2766 "] for target parameters.", paramsize);
165e4a11
AK
2767
2768 paramsize *= 2;
2769 } while (paramsize < MAX_TARGET_PARAMSIZE);
2770
2771 log_error("Target parameter size too big. Aborting.");
2772 return 0;
2773}
2774
b4f1578f 2775static int _load_node(struct dm_tree_node *dnode)
165e4a11
AK
2776{
2777 int r = 0;
2778 struct dm_task *dmt;
2779 struct load_segment *seg;
df390f17 2780 uint64_t seg_start = 0, existing_table_size;
165e4a11 2781
2a01e3d4 2782 log_verbose("Loading table for %s.", _node_name(dnode));
165e4a11
AK
2783
2784 if (!(dmt = dm_task_create(DM_DEVICE_RELOAD))) {
2a01e3d4 2785 log_error("Reload dm_task creation failed for %s.", _node_name(dnode));
165e4a11
AK
2786 return 0;
2787 }
2788
2789 if (!dm_task_set_major(dmt, dnode->info.major) ||
2790 !dm_task_set_minor(dmt, dnode->info.minor)) {
2a01e3d4 2791 log_error("Failed to set device number for %s reload.", _node_name(dnode));
165e4a11
AK
2792 goto out;
2793 }
2794
2795 if (dnode->props.read_only && !dm_task_set_ro(dmt)) {
2a01e3d4 2796 log_error("Failed to set read only flag for %s.", _node_name(dnode));
165e4a11
AK
2797 goto out;
2798 }
2799
2800 if (!dm_task_no_open_count(dmt))
76322d3b 2801 log_warn("WARNING: Failed to disable open_count.");
165e4a11 2802
2c44337b 2803 dm_list_iterate_items(seg, &dnode->props.segs)
4b2cae46
AK
2804 if (!_emit_segment(dmt, dnode->info.major, dnode->info.minor,
2805 seg, &seg_start))
b4f1578f 2806 goto_out;
165e4a11 2807
faeea370 2808 if (!dm_task_suppress_identical_reload(dmt))
76322d3b 2809 log_warn("WARNING: Failed to suppress reload of identical tables.");
ec289b64
AK
2810
2811 if ((r = dm_task_run(dmt))) {
165e4a11 2812 r = dm_task_get_info(dmt, &dnode->info);
ec289b64 2813 if (r && !dnode->info.inactive_table)
2a01e3d4
ZK
2814 log_verbose("Suppressed %s identical table reload.",
2815 _node_name(dnode));
bb875bb9 2816
df390f17 2817 existing_table_size = dm_task_get_existing_table_size(dmt);
bb875bb9 2818 if ((dnode->props.size_changed =
9ef820a2
ZK
2819 (existing_table_size == seg_start) ? 0 :
2820 (existing_table_size > seg_start) ? -1 : 1)) {
df390f17
AK
2821 /*
2822 * Kernel usually skips size validation on zero-length devices
2823 * now so no need to preload them.
2824 */
2825 /* FIXME In which kernel version did this begin? */
2826 if (!existing_table_size && dnode->props.delay_resume_if_new)
2827 dnode->props.size_changed = 0;
b96c2133 2828
06abb2dd 2829 log_debug_activation("Table size changed from %" PRIu64 " to %"
2a01e3d4
ZK
2830 PRIu64 " for %s.%s", existing_table_size,
2831 seg_start, _node_name(dnode),
06abb2dd 2832 dnode->props.size_changed ? "" : " (Ignoring.)");
82ae02bc
ZK
2833
2834 /*
2835 * FIXME: code here has known design problem.
2836 * LVM2 does NOT resize thin-pool on top of other LV in 2 steps -
2837 * where raid would be resized with 1st. transaction
2838 * followed by 2nd. thin-pool resize - RHBZ #1285063
2839 */
2840 if (existing_table_size && dnode->props.delay_resume_if_extended) {
2841 log_debug_activation("Resume of table of extended device %s delayed.",
2842 _node_name(dnode));
2843 dnode->props.size_changed = 0;
2844 }
df390f17 2845 }
ec289b64 2846 }
165e4a11
AK
2847
2848 dnode->props.segment_count = 0;
2849
2850out:
2851 dm_task_destroy(dmt);
2852
2853 return r;
165e4a11
AK
2854}
2855
a8ee82ed
ZK
2856/* Try to deactivate only nodes created during preload. */
2857static int _dm_tree_revert_activated(struct dm_tree_node *dnode)
0638d1d8 2858{
a8ee82ed 2859 void *handle = NULL;
0638d1d8
ZK
2860 struct dm_tree_node *child;
2861
a8ee82ed
ZK
2862 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
2863 if (child->activated) {
2864 if (child->callback) {
2865 log_debug_activation("Dropping callback for %s.", _node_name(child));
2866 child->callback = NULL;
2867 }
2868
2869 log_debug_activation("Reverting %s.", _node_name(child));
2870 if (!_deactivate_node(child->name, child->info.major, child->info.minor,
2871 &child->dtree->cookie, child->udev_flags, 0)) {
2872 log_debug_activation("Unable to deactivate %s.", _node_name(child));
2873 return 0;
2874 }
0638d1d8 2875 }
a8ee82ed
ZK
2876
2877 if (dm_tree_node_num_children(child, 0) &&
2878 !_dm_tree_revert_activated(child))
6116333c 2879 return_0;
0638d1d8
ZK
2880 }
2881
2882 return 1;
2883}
2884
a17ec7e0
ZK
2885static int _dm_tree_wait_and_revert_activated(struct dm_tree_node *dnode)
2886{
2887 if (!dm_udev_wait(dm_tree_get_cookie(dnode)))
2888 stack;
2889
2890 dm_tree_set_cookie(dnode, 0);
2891
2892 return _dm_tree_revert_activated(dnode);
2893}
2894
b4f1578f 2895int dm_tree_preload_children(struct dm_tree_node *dnode,
bb875bb9
AK
2896 const char *uuid_prefix,
2897 size_t uuid_prefix_len)
165e4a11 2898{
442820aa 2899 int r = 1, node_created = 0;
165e4a11 2900 void *handle = NULL;
b4f1578f 2901 struct dm_tree_node *child;
566515c0 2902 int update_devs_flag = 0;
165e4a11
AK
2903
2904 /* Preload children first */
b4f1578f 2905 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
aa68b898 2906 /* Propagate delay of resume from parent node */
f70404ad
ZK
2907 if (dnode->props.delay_resume_if_new > 1)
2908 child->props.delay_resume_if_new = dnode->props.delay_resume_if_new;
aa68b898 2909
165e4a11
AK
2910 /* Skip existing non-device-mapper devices */
2911 if (!child->info.exists && child->info.major)
2912 continue;
2913
2914 /* Ignore if it doesn't belong to this VG */
87f98002
AK
2915 if (child->info.exists &&
2916 !_uuid_prefix_matches(child->uuid, uuid_prefix, uuid_prefix_len))
165e4a11
AK
2917 continue;
2918
b4f1578f 2919 if (dm_tree_node_num_children(child, 0))
2ca6b865
MS
2920 if (!dm_tree_preload_children(child, uuid_prefix, uuid_prefix_len))
2921 return_0;
165e4a11 2922
165e4a11 2923 /* FIXME Cope if name exists with no uuid? */
a17ec7e0 2924 if (!child->info.exists && !(node_created = _create_node(child, dnode)))
3d6782b3 2925 return_0;
165e4a11 2926
82ae02bc
ZK
2927 /* Propagate delayed resume from exteded child node */
2928 if (child->props.delay_resume_if_extended)
2929 dnode->props.delay_resume_if_extended = 1;
2930
3d6782b3
ZK
2931 if (!child->info.inactive_table &&
2932 child->props.segment_count &&
442820aa 2933 !_load_node(child)) {
a17ec7e0 2934 stack;
442820aa 2935 /*
a17ec7e0
ZK
2936 * If the table load fails, try to device in the kernel
2937 * together with other created and preloaded devices.
442820aa 2938 */
a17ec7e0
ZK
2939 if (!_dm_tree_wait_and_revert_activated(dnode))
2940 stack;
2941 r = 0;
2942 continue;
442820aa 2943 }
165e4a11 2944
c908a8b1
ZK
2945 /* No resume for a device without parents or with unchanged or smaller size */
2946 if (!dm_tree_node_num_children(child, 1) || (child->props.size_changed <= 0))
165e4a11
AK
2947 continue;
2948
7707ea90
AK
2949 if (!child->info.inactive_table && !child->info.suspended)
2950 continue;
2951
fc795d87 2952 if (!_resume_node(child->name, child->info.major, child->info.minor,
bd90c6b2 2953 child->props.read_ahead, child->props.read_ahead_flags,
820b1b98 2954 &child->info, &child->dtree->cookie, child->udev_flags,
1840aa09 2955 child->info.suspended)) {
2a01e3d4 2956 log_error("Unable to resume %s.", _node_name(child));
a17ec7e0
ZK
2957 if (!_dm_tree_wait_and_revert_activated(dnode))
2958 stack;
2ca6b865 2959 r = 0;
165e4a11
AK
2960 continue;
2961 }
2962
820b1b98 2963 if (node_created) {
203affff
ZK
2964 /* When creating new node also check transaction_id. */
2965 if (child->props.send_messages &&
2966 !_node_send_messages(child, uuid_prefix, uuid_prefix_len, 0)) {
2967 stack;
a17ec7e0 2968 if (!_dm_tree_wait_and_revert_activated(dnode))
203affff 2969 stack;
203affff
ZK
2970 r = 0;
2971 continue;
2972 }
6116333c 2973 }
0638d1d8 2974
566515c0
PR
2975 /*
2976 * Prepare for immediate synchronization with udev and flush all stacked
2977 * dev node operations if requested by immediate_dev_node property. But
2978 * finish processing current level in the tree first.
2979 */
2980 if (child->props.immediate_dev_node)
2981 update_devs_flag = 1;
165e4a11
AK
2982 }
2983
7e35dfff 2984 if (update_devs_flag ||
1911c616 2985 (r && !dnode->info.exists && dnode->callback)) {
566515c0
PR
2986 if (!dm_udev_wait(dm_tree_get_cookie(dnode)))
2987 stack;
2988 dm_tree_set_cookie(dnode, 0);
7e35dfff 2989
1911c616 2990 if (r && !dnode->info.exists && dnode->callback &&
af7297c7 2991 !dnode->callback(dnode, DM_NODE_CALLBACK_PRELOADED,
7e35dfff 2992 dnode->callback_data))
0638d1d8
ZK
2993 {
2994 /* Try to deactivate what has been activated in preload phase */
2995 (void) _dm_tree_revert_activated(dnode);
7e35dfff 2996 return_0;
0638d1d8 2997 }
566515c0
PR
2998 }
2999
2ca6b865 3000 return r;
165e4a11
AK
3001}
3002
165e4a11
AK
3003/*
3004 * Returns 1 if unsure.
3005 */
b4f1578f 3006int dm_tree_children_use_uuid(struct dm_tree_node *dnode,
165e4a11
AK
3007 const char *uuid_prefix,
3008 size_t uuid_prefix_len)
3009{
3010 void *handle = NULL;
b4f1578f 3011 struct dm_tree_node *child = dnode;
165e4a11
AK
3012 const char *uuid;
3013
b4f1578f
AK
3014 while ((child = dm_tree_next_child(&handle, dnode, 0))) {
3015 if (!(uuid = dm_tree_node_get_uuid(child))) {
1f73cadd
ZK
3016 log_warn("WARNING: Failed to get uuid for dtree node %s.",
3017 _node_name(child));
165e4a11
AK
3018 return 1;
3019 }
3020
87f98002 3021 if (_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
165e4a11
AK
3022 return 1;
3023
b4f1578f
AK
3024 if (dm_tree_node_num_children(child, 0))
3025 dm_tree_children_use_uuid(child, uuid_prefix, uuid_prefix_len);
165e4a11
AK
3026 }
3027
3028 return 0;
3029}
3030
3031/*
3032 * Target functions
3033 */
b4f1578f 3034static struct load_segment *_add_segment(struct dm_tree_node *dnode, unsigned type, uint64_t size)
165e4a11
AK
3035{
3036 struct load_segment *seg;
3037
b4f1578f
AK
3038 if (!(seg = dm_pool_zalloc(dnode->dtree->mem, sizeof(*seg)))) {
3039 log_error("dtree node segment allocation failed");
165e4a11
AK
3040 return NULL;
3041 }
3042
3043 seg->type = type;
3044 seg->size = size;
2c44337b 3045 dm_list_init(&seg->areas);
2c44337b 3046 dm_list_add(&dnode->props.segs, &seg->list);
165e4a11
AK
3047 dnode->props.segment_count++;
3048
3049 return seg;
3050}
3051
b4f1578f 3052int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node *dnode,
a5087866
ZK
3053 uint64_t size,
3054 const char *origin_uuid)
165e4a11
AK
3055{
3056 struct load_segment *seg;
b4f1578f 3057 struct dm_tree_node *origin_node;
165e4a11 3058
b4f1578f
AK
3059 if (!(seg = _add_segment(dnode, SEG_SNAPSHOT_ORIGIN, size)))
3060 return_0;
165e4a11 3061
b4f1578f 3062 if (!(origin_node = dm_tree_find_node_by_uuid(dnode->dtree, origin_uuid))) {
165e4a11
AK
3063 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid);
3064 return 0;
3065 }
3066
3067 seg->origin = origin_node;
b4f1578f
AK
3068 if (!_link_tree_nodes(dnode, origin_node))
3069 return_0;
165e4a11 3070
56c28292
AK
3071 /* Resume snapshot origins after new snapshots */
3072 dnode->activation_priority = 1;
3073
9b78f7ee
ZK
3074 if (!dnode->info.exists)
3075 /* Reactivate siblings for this origin after being resumed */
3076 dnode->props.reactivate_siblings = 1;
3077
f1aabd5c
AK
3078 /*
3079 * Don't resume the origin immediately in case it is a non-trivial
3080 * target that must not be active more than once concurrently!
3081 */
3082 origin_node->props.delay_resume_if_new = 1;
3083
165e4a11
AK
3084 return 1;
3085}
3086
aa6f4e51 3087static int _add_snapshot_target(struct dm_tree_node *node,
a5087866
ZK
3088 uint64_t size,
3089 const char *origin_uuid,
3090 const char *cow_uuid,
3091 const char *merge_uuid,
3092 int persistent,
3093 uint32_t chunk_size)
165e4a11
AK
3094{
3095 struct load_segment *seg;
aa6f4e51
MS
3096 struct dm_tree_node *origin_node, *cow_node, *merge_node;
3097 unsigned seg_type;
3098
3099 seg_type = !merge_uuid ? SEG_SNAPSHOT : SEG_SNAPSHOT_MERGE;
165e4a11 3100
aa6f4e51 3101 if (!(seg = _add_segment(node, seg_type, size)))
b4f1578f 3102 return_0;
165e4a11 3103
b4f1578f 3104 if (!(origin_node = dm_tree_find_node_by_uuid(node->dtree, origin_uuid))) {
165e4a11
AK
3105 log_error("Couldn't find snapshot origin uuid %s.", origin_uuid);
3106 return 0;
3107 }
3108
3109 seg->origin = origin_node;
b4f1578f
AK
3110 if (!_link_tree_nodes(node, origin_node))
3111 return_0;
165e4a11 3112
b4f1578f 3113 if (!(cow_node = dm_tree_find_node_by_uuid(node->dtree, cow_uuid))) {
aa6f4e51 3114 log_error("Couldn't find snapshot COW device uuid %s.", cow_uuid);
165e4a11
AK
3115 return 0;
3116 }
3117
3118 seg->cow = cow_node;
b4f1578f
AK
3119 if (!_link_tree_nodes(node, cow_node))
3120 return_0;
165e4a11
AK
3121
3122 seg->persistent = persistent ? 1 : 0;
3123 seg->chunk_size = chunk_size;
3124
aa6f4e51
MS
3125 if (merge_uuid) {
3126 if (!(merge_node = dm_tree_find_node_by_uuid(node->dtree, merge_uuid))) {
3127 /* not a pure error, merging snapshot may have been deactivated */
3128 log_verbose("Couldn't find merging snapshot uuid %s.", merge_uuid);
3129 } else {
3130 seg->merge = merge_node;
3131 /* must not link merging snapshot, would undermine activation_priority below */
3132 }
3133
3134 /* Resume snapshot-merge (acting origin) after other snapshots */
3135 node->activation_priority = 1;
3136 if (seg->merge) {
3137 /* Resume merging snapshot after snapshot-merge */
3138 seg->merge->activation_priority = 2;
3139 }
9b78f7ee
ZK
3140 } else if (!origin_node->info.exists) {
3141 /* Keep original udev_flags for reactivation. */
3142 node->props.reactivate_udev_flags = node->udev_flags;
3143
3144 /* Reactivation is needed if the origin's -real device is not in DM table.
3145 * For this case after the resume of its origin LV we resume its snapshots
3146 * with updated udev_flags to completely avoid udev scanning for the first resume.
3147 * Reactivation then resumes snapshots with original udev_flags.
3148 */
3149 node->udev_flags |= DM_SUBSYSTEM_UDEV_FLAG0 |
3150 DM_UDEV_DISABLE_DISK_RULES_FLAG |
3151 DM_UDEV_DISABLE_OTHER_RULES_FLAG;
3152 log_debug_activation("Using udev_flags 0x%x for activation of %s.",
3153 node->udev_flags, node->name);
aa6f4e51
MS
3154 }
3155
165e4a11
AK
3156 return 1;
3157}
3158
aa6f4e51
MS
3159
3160int dm_tree_node_add_snapshot_target(struct dm_tree_node *node,
3161 uint64_t size,
3162 const char *origin_uuid,
3163 const char *cow_uuid,
3164 int persistent,
3165 uint32_t chunk_size)
3166{
3167 return _add_snapshot_target(node, size, origin_uuid, cow_uuid,
3168 NULL, persistent, chunk_size);
3169}
3170
3171int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node *node,
3172 uint64_t size,
3173 const char *origin_uuid,
3174 const char *cow_uuid,
3175 const char *merge_uuid,
3176 uint32_t chunk_size)
3177{
3178 return _add_snapshot_target(node, size, origin_uuid, cow_uuid,
3179 merge_uuid, 1, chunk_size);
3180}
3181
b4f1578f 3182int dm_tree_node_add_error_target(struct dm_tree_node *node,
a5087866 3183 uint64_t size)
165e4a11 3184{
b4f1578f
AK
3185 if (!_add_segment(node, SEG_ERROR, size))
3186 return_0;
165e4a11
AK
3187
3188 return 1;
3189}
3190
b4f1578f 3191int dm_tree_node_add_zero_target(struct dm_tree_node *node,
a5087866 3192 uint64_t size)
165e4a11 3193{
b4f1578f
AK
3194 if (!_add_segment(node, SEG_ZERO, size))
3195 return_0;
165e4a11
AK
3196
3197 return 1;
3198}
3199
b4f1578f 3200int dm_tree_node_add_linear_target(struct dm_tree_node *node,
a5087866 3201 uint64_t size)
165e4a11 3202{
b4f1578f
AK
3203 if (!_add_segment(node, SEG_LINEAR, size))
3204 return_0;
165e4a11
AK
3205
3206 return 1;
3207}
3208
b4f1578f 3209int dm_tree_node_add_striped_target(struct dm_tree_node *node,
a5087866
ZK
3210 uint64_t size,
3211 uint32_t stripe_size)
165e4a11
AK
3212{
3213 struct load_segment *seg;
3214
b4f1578f
AK
3215 if (!(seg = _add_segment(node, SEG_STRIPED, size)))
3216 return_0;
165e4a11
AK
3217
3218 seg->stripe_size = stripe_size;
3219
3220 return 1;
3221}
3222
12ca060e
MB
3223int dm_tree_node_add_crypt_target(struct dm_tree_node *node,
3224 uint64_t size,
3225 const char *cipher,
3226 const char *chainmode,
3227 const char *iv,
3228 uint64_t iv_offset,
3229 const char *key)
3230{
3231 struct load_segment *seg;
3232
3233 if (!(seg = _add_segment(node, SEG_CRYPT, size)))
3234 return_0;
3235
3236 seg->cipher = cipher;
3237 seg->chainmode = chainmode;
3238 seg->iv = iv;
3239 seg->iv_offset = iv_offset;
3240 seg->key = key;
3241
3242 return 1;
3243}
3244
b4f1578f 3245int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node,
a5087866
ZK
3246 uint32_t region_size,
3247 unsigned clustered,
3248 const char *log_uuid,
3249 unsigned area_count,
3250 uint32_t flags)
165e4a11 3251{
908db078 3252 struct dm_tree_node *log_node = NULL;
165e4a11
AK
3253 struct load_segment *seg;
3254
4a4ea47f
ZK
3255 if (!(seg = _get_last_load_segment(node)))
3256 return_0;
165e4a11 3257
24b026e3 3258 if (log_uuid) {
67b25ed4
AK
3259 if (!(seg->uuid = dm_pool_strdup(node->dtree->mem, log_uuid))) {
3260 log_error("log uuid pool_strdup failed");
3261 return 0;
3262 }
df390f17
AK
3263 if ((flags & DM_CORELOG))
3264 /* For pvmove: immediate resume (for size validation) isn't needed. */
f70404ad
ZK
3265 /* pvmove flag passed via unused UUID and its suffix */
3266 node->props.delay_resume_if_new = strstr(log_uuid, "pvmove") ? 2 : 1;
df390f17 3267 else {
9723090c
AK
3268 if (!(log_node = dm_tree_find_node_by_uuid(node->dtree, log_uuid))) {
3269 log_error("Couldn't find mirror log uuid %s.", log_uuid);
3270 return 0;
3271 }
3272
566515c0
PR
3273 if (clustered)
3274 log_node->props.immediate_dev_node = 1;
3275
0a99713e
AK
3276 /* The kernel validates the size of disk logs. */
3277 /* FIXME Propagate to any devices below */
3278 log_node->props.delay_resume_if_new = 0;
3279
9723090c
AK
3280 if (!_link_tree_nodes(node, log_node))
3281 return_0;
3282 }
165e4a11
AK
3283 }
3284
3285 seg->log = log_node;
165e4a11
AK
3286 seg->region_size = region_size;
3287 seg->clustered = clustered;
3288 seg->mirror_area_count = area_count;
dbcb64b8 3289 seg->flags = flags;
165e4a11
AK
3290
3291 return 1;
3292}
3293
b4f1578f 3294int dm_tree_node_add_mirror_target(struct dm_tree_node *node,
a5087866 3295 uint64_t size)
165e4a11 3296{
cbecd3cd 3297 if (!_add_segment(node, SEG_MIRRORED, size))
b4f1578f 3298 return_0;
165e4a11
AK
3299
3300 return 1;
3301}
3302
2e0740f7
JB
3303int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node,
3304 uint64_t size,
d582be43 3305 const struct dm_tree_node_raid_params *p)
cac52ca4 3306{
f12e3da6 3307 unsigned i;
cac52ca4
JEB
3308 struct load_segment *seg = NULL;
3309
6190ded5
ZK
3310 for (i = 0; i < DM_ARRAY_SIZE(_dm_segtypes) && !seg; ++i)
3311 if (!strcmp(p->raid_type, _dm_segtypes[i].target))
cac52ca4 3312 if (!(seg = _add_segment(node,
6190ded5 3313 _dm_segtypes[i].type, size)))
cac52ca4 3314 return_0;
d582be43
ZK
3315 if (!seg) {
3316 log_error("Unsupported raid type %s.", p->raid_type);
3317 return 0;
3318 }
b2fa9b43 3319
2e0740f7
JB
3320 seg->region_size = p->region_size;
3321 seg->stripe_size = p->stripe_size;
cac52ca4 3322 seg->area_count = 0;
80a6de61
HM
3323 memset(seg->rebuilds, 0, sizeof(seg->rebuilds));
3324 seg->rebuilds[0] = p->rebuilds;
3325 memset(seg->writemostly, 0, sizeof(seg->writemostly));
3326 seg->writemostly[0] = p->writemostly;
2e0740f7 3327 seg->writebehind = p->writebehind;
562c678e
JB
3328 seg->min_recovery_rate = p->min_recovery_rate;
3329 seg->max_recovery_rate = p->max_recovery_rate;
2e0740f7 3330 seg->flags = p->flags;
cac52ca4
JEB
3331
3332 return 1;
3333}
3334
2e0740f7
JB
3335int dm_tree_node_add_raid_target(struct dm_tree_node *node,
3336 uint64_t size,
3337 const char *raid_type,
3338 uint32_t region_size,
3339 uint32_t stripe_size,
80a6de61 3340 uint64_t rebuilds,
2e0740f7
JB
3341 uint64_t flags)
3342{
6e2f7062 3343 struct dm_tree_node_raid_params params = {
80a6de61
HM
3344 .raid_type = raid_type,
3345 .region_size = region_size,
3346 .stripe_size = stripe_size,
3347 .rebuilds = rebuilds,
3348 .flags = flags
3349 };
3350
3351 return dm_tree_node_add_raid_target_with_params(node, size, &params);
3352}
3353
3354/*
3355 * Version 2 of dm_tree_node_add_raid_target() allowing for:
3356 *
3357 * - maximum 253 legs in a raid set (MD kernel limitation)
3358 * - delta_disks for disk add/remove reshaping
3359 * - data_offset for out-of-place reshaping
3360 * - data_copies to cope witth odd numbers of raid10 disks
3361 */
3362int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node *node,
3363 uint64_t size,
3364 const struct dm_tree_node_raid_params_v2 *p)
3365{
3366 unsigned i;
3367 struct load_segment *seg = NULL;
3368
3369 for (i = 0; i < DM_ARRAY_SIZE(_dm_segtypes) && !seg; ++i)
3370 if (!strcmp(p->raid_type, _dm_segtypes[i].target))
3371 if (!(seg = _add_segment(node,
3372 _dm_segtypes[i].type, size)))
3373 return_0;
3374 if (!seg) {
3375 log_error("Unsupported raid type %s.", p->raid_type);
3376 return 0;
3377 }
3378
3379 seg->region_size = p->region_size;
3380 seg->stripe_size = p->stripe_size;
3381 seg->area_count = 0;
3382 seg->delta_disks = p->delta_disks;
3383 seg->data_offset = p->data_offset;
3384 memcpy(seg->rebuilds, p->rebuilds, sizeof(seg->rebuilds));
3385 memcpy(seg->writemostly, p->writemostly, sizeof(seg->writemostly));
3386 seg->writebehind = p->writebehind;
3387 seg->data_copies = p->data_copies;
3388 seg->min_recovery_rate = p->min_recovery_rate;
3389 seg->max_recovery_rate = p->max_recovery_rate;
3390 seg->flags = p->flags;
3391
3392 return 1;
3393}
3394
a8480f0f
ZK
3395DM_EXPORT_NEW_SYMBOL(int, dm_tree_node_add_cache_target, 1_02_138)
3396 (struct dm_tree_node *node,
3397 uint64_t size,
3398 uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
3399 const char *metadata_uuid,
3400 const char *data_uuid,
3401 const char *origin_uuid,
3402 const char *policy_name,
3403 const struct dm_config_node *policy_settings,
3404 uint32_t data_block_size)
1ff7e214 3405{
82401953 3406 struct dm_config_node *cn;
6190ded5 3407 struct load_segment *seg;
026344e8 3408 const uint64_t _modemask =
bf79fb1a
ZK
3409 DM_CACHE_FEATURE_PASSTHROUGH |
3410 DM_CACHE_FEATURE_WRITETHROUGH |
3411 DM_CACHE_FEATURE_WRITEBACK;
3412
ddd5a768
ZK
3413 /* Detect unknown (bigger) feature bit */
3414 if (feature_flags >= (DM_CACHE_FEATURE_METADATA2 * 2)) {
3415 log_error("Unsupported cache's feature flags set " FMTu64 ".",
3416 feature_flags);
3417 return 0;
3418 }
3419
bf79fb1a
ZK
3420 switch (feature_flags & _modemask) {
3421 case DM_CACHE_FEATURE_PASSTHROUGH:
3422 case DM_CACHE_FEATURE_WRITEBACK:
3423 if (strcmp(policy_name, "cleaner") == 0) {
3424 /* Enforce writethrough mode for cleaner policy */
3425 feature_flags = ~_modemask;
3426 feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH;
3427 }
3428 /* Fall through */
3429 case DM_CACHE_FEATURE_WRITETHROUGH:
3430 break;
3431 default:
3432 log_error("Invalid cache's feature flag " FMTu64 ".",
3433 feature_flags);
3434 return 0;
08f047eb
ZK
3435 }
3436
3437 if (data_block_size < DM_CACHE_MIN_DATA_BLOCK_SIZE) {
3438 log_error("Data block size %u is lower then %u sectors.",
3439 data_block_size, DM_CACHE_MIN_DATA_BLOCK_SIZE);
3440 return 0;
3441 }
3442
3443 if (data_block_size > DM_CACHE_MAX_DATA_BLOCK_SIZE) {
3444 log_error("Data block size %u is higher then %u sectors.",
3445 data_block_size, DM_CACHE_MAX_DATA_BLOCK_SIZE);
3446 return 0;
3447 }
3448
6190ded5 3449 if (!(seg = _add_segment(node, SEG_CACHE, size)))
1ff7e214
JB
3450 return_0;
3451
3452 if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree,
da268eb4
ZK
3453 data_uuid))) {
3454 log_error("Missing cache's data uuid %s.",
3455 data_uuid);
1ff7e214
JB
3456 return 0;
3457 }
3458 if (!_link_tree_nodes(node, seg->pool))
3459 return_0;
3460
3461 if (!(seg->metadata = dm_tree_find_node_by_uuid(node->dtree,
da268eb4
ZK
3462 metadata_uuid))) {
3463 log_error("Missing cache's metadata uuid %s.",
3464 metadata_uuid);
1ff7e214
JB
3465 return 0;
3466 }
3467 if (!_link_tree_nodes(node, seg->metadata))
3468 return_0;
3469
82401953
ZK
3470 if (!(seg->origin = dm_tree_find_node_by_uuid(node->dtree,
3471 origin_uuid))) {
3472 log_error("Missing cache's origin uuid %s.",
3473 metadata_uuid);
3474 return 0;
3475 }
3476 if (!_link_tree_nodes(node, seg->origin))
3477 return_0;
3478
79e9bde0 3479 seg->data_block_size = data_block_size;
bf79fb1a 3480 seg->flags = feature_flags;
82401953 3481 seg->policy_name = policy_name;
74ae1c5b 3482 seg->migration_threshold = 2048; /* Default migration threshold 1MiB */
1ff7e214 3483
82401953 3484 /* FIXME: better validation missing */
20b22cd0
ZK
3485 if (policy_settings) {
3486 if (!(seg->policy_settings = dm_config_clone_node_with_mem(node->dtree->mem, policy_settings, 0)))
82401953 3487 return_0;
1ff7e214 3488
20b22cd0 3489 for (cn = seg->policy_settings->child; cn; cn = cn->sib) {
82401953
ZK
3490 if (!cn->v || (cn->v->type != DM_CFG_INT)) {
3491 /* For now only <key> = <int> pairs are supported */
3492 log_error("Cache policy parameter %s is without integer value.", cn->key);
3493 return 0;
3494 }
74ae1c5b
ZK
3495 if (strcmp(cn->key, "migration_threshold") == 0) {
3496 seg->migration_threshold = cn->v->v.i;
3497 cn->v = NULL; /* skip this entry */
3498 } else
3499 seg->policy_argc++;
82401953
ZK
3500 }
3501 }
1ff7e214 3502
74ae1c5b
ZK
3503 /* Always some throughput available for cache to proceed */
3504 if (seg->migration_threshold < data_block_size * 8)
3505 seg->migration_threshold = data_block_size * 8;
3506
1ff7e214
JB
3507 return 1;
3508}
3509
b262f3e1
ZK
3510int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
3511 uint64_t size,
3512 const char *rlog_uuid,
3513 const char *rlog_type,
3514 unsigned rsite_index,
3515 dm_replicator_mode_t mode,
3516 uint32_t async_timeout,
3517 uint64_t fall_behind_data,
3518 uint32_t fall_behind_ios)
3519{
e447d7ca
ZK
3520 log_error("Replicator segment is unsupported.");
3521 return 0;
b262f3e1
ZK
3522}
3523
3524/* Appends device node to Replicator */
3525int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node,
3526 uint64_t size,
3527 const char *replicator_uuid,
3528 uint64_t rdevice_index,
3529 const char *rdev_uuid,
3530 unsigned rsite_index,
3531 const char *slog_uuid,
3532 uint32_t slog_flags,
3533 uint32_t slog_region_size)
3534{
e447d7ca
ZK
3535 log_error("Replicator targer is unsupported.");
3536 return 0;
b262f3e1
ZK
3537}
3538
dcd4afc7
ZK
3539static struct load_segment *_get_single_load_segment(struct dm_tree_node *node,
3540 unsigned type)
3541{
3542 struct load_segment *seg;
3543
4a4ea47f
ZK
3544 if (!(seg = _get_last_load_segment(node)))
3545 return_NULL;
3546
3547 /* Never used past _load_node(), so can test segment_count */
dcd4afc7
ZK
3548 if (node->props.segment_count != 1) {
3549 log_error("Node %s must have only one segment.",
6190ded5 3550 _dm_segtypes[type].target);
dcd4afc7
ZK
3551 return NULL;
3552 }
3553
dcd4afc7
ZK
3554 if (seg->type != type) {
3555 log_error("Node %s has segment type %s.",
6190ded5
ZK
3556 _dm_segtypes[type].target,
3557 _dm_segtypes[seg->type].target);
dcd4afc7
ZK
3558 return NULL;
3559 }
3560
3561 return seg;
3562}
3563
5668fe04
ZK
3564static int _thin_validate_device_id(uint32_t device_id)
3565{
3566 if (device_id > DM_THIN_MAX_DEVICE_ID) {
3567 log_error("Device id %u is higher then %u.",
3568 device_id, DM_THIN_MAX_DEVICE_ID);
3569 return 0;
3570 }
3571
3572 return 1;
3573}
3574
4251236e
ZK
3575int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node,
3576 uint64_t size,
e0ea24be 3577 uint64_t transaction_id,
4251236e 3578 const char *metadata_uuid,
5668fd6a 3579 const char *pool_uuid,
4251236e 3580 uint32_t data_block_size,
e9156c2b 3581 uint64_t low_water_mark,
460c5991 3582 unsigned skip_block_zeroing)
48030389
ZK
3583{
3584 return dm_tree_node_add_thin_pool_target_v1(node, size, transaction_id,
3585 metadata_uuid, pool_uuid,
3586 data_block_size,
3587 low_water_mark,
3588 skip_block_zeroing,
3589 1);
3590}
3591
3592int dm_tree_node_add_thin_pool_target_v1(struct dm_tree_node *node,
3593 uint64_t size,
3594 uint64_t transaction_id,
3595 const char *metadata_uuid,
3596 const char *pool_uuid,
3597 uint32_t data_block_size,
3598 uint64_t low_water_mark,
3599 unsigned skip_block_zeroing,
3600 unsigned crop_metadata)
4251236e 3601{
7162a25b
ZK
3602 struct load_segment *seg, *mseg;
3603 uint64_t devsize = 0;
4251236e 3604
3f53c059 3605 if (data_block_size < DM_THIN_MIN_DATA_BLOCK_SIZE) {
565a4bfc 3606 log_error("Data block size %u is lower then %u sectors.",
3f53c059 3607 data_block_size, DM_THIN_MIN_DATA_BLOCK_SIZE);
4251236e
ZK
3608 return 0;
3609 }
3610
3f53c059 3611 if (data_block_size > DM_THIN_MAX_DATA_BLOCK_SIZE) {
565a4bfc 3612 log_error("Data block size %u is higher then %u sectors.",
3f53c059 3613 data_block_size, DM_THIN_MAX_DATA_BLOCK_SIZE);
4251236e
ZK
3614 return 0;
3615 }
3616
3617 if (!(seg = _add_segment(node, SEG_THIN_POOL, size)))
3618 return_0;
3619
3620 if (!(seg->metadata = dm_tree_find_node_by_uuid(node->dtree, metadata_uuid))) {
3621 log_error("Missing metadata uuid %s.", metadata_uuid);
3622 return 0;
3623 }
3624
3625 if (!_link_tree_nodes(node, seg->metadata))
3626 return_0;
3627
48030389
ZK
3628 if (crop_metadata)
3629 /* FIXME: more complex target may need more tweaks */
3630 dm_list_iterate_items(mseg, &seg->metadata->props.segs) {
3631 devsize += mseg->size;
3632 if (devsize > DM_THIN_MAX_METADATA_SIZE) {
3633 log_debug_activation("Ignoring %" PRIu64 " of device.",
3634 devsize - DM_THIN_MAX_METADATA_SIZE);
3635 mseg->size -= (devsize - DM_THIN_MAX_METADATA_SIZE);
3636 devsize = DM_THIN_MAX_METADATA_SIZE;
3637 /* FIXME: drop remaining segs */
3638 }
7162a25b 3639 }
7162a25b 3640
4251236e
ZK
3641 if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) {
3642 log_error("Missing pool uuid %s.", pool_uuid);
3643 return 0;
3644 }
3645
3646 if (!_link_tree_nodes(node, seg->pool))
3647 return_0;
3648
1946a453
ZK
3649 /* Clean flag delay_resume_if_new - so corelog gets resumed */
3650 seg->metadata->props.delay_resume_if_new = 0;
3651 seg->pool->props.delay_resume_if_new = 0;
3652
82ae02bc
ZK
3653 /* Preload must not resume extended running thin-pool before it's committed */
3654 node->props.delay_resume_if_extended = 1;
3655
89233544
ZK
3656 /* Validate only transaction_id > 0 when activating thin-pool */
3657 node->props.send_messages = transaction_id ? 1 : 0;
bbcd37e4 3658 seg->transaction_id = transaction_id;
e9156c2b 3659 seg->low_water_mark = low_water_mark;
e0ea24be 3660 seg->data_block_size = data_block_size;
460c5991 3661 seg->skip_block_zeroing = skip_block_zeroing;
25e6ab87
ZK
3662 dm_list_init(&seg->thin_messages);
3663
3664 return 1;
3665}
3666
3667int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node,
2e732e96
ZK
3668 dm_thin_message_t type,
3669 uint64_t id1, uint64_t id2)
25e6ab87 3670{
25e6ab87 3671 struct thin_message *tm;
dcd4afc7 3672 struct load_segment *seg;
25e6ab87 3673
dcd4afc7
ZK
3674 if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
3675 return_0;
25e6ab87
ZK
3676
3677 if (!(tm = dm_pool_zalloc(node->dtree->mem, sizeof (*tm)))) {
3678 log_error("Failed to allocate thin message.");
3679 return 0;
3680 }
3681
2e732e96 3682 switch (type) {
25e6ab87 3683 case DM_THIN_MESSAGE_CREATE_SNAP:
759b9592 3684 /* If the thin origin is active, it must be suspend first! */
2e732e96 3685 if (id1 == id2) {
759b9592 3686 log_error("Cannot use same device id for origin and its snapshot.");
25e6ab87
ZK
3687 return 0;
3688 }
2e732e96
ZK
3689 if (!_thin_validate_device_id(id1) ||
3690 !_thin_validate_device_id(id2))
25e6ab87 3691 return_0;
2e732e96
ZK
3692 tm->message.u.m_create_snap.device_id = id1;
3693 tm->message.u.m_create_snap.origin_id = id2;
25e6ab87
ZK
3694 break;
3695 case DM_THIN_MESSAGE_CREATE_THIN:
2e732e96 3696 if (!_thin_validate_device_id(id1))
25e6ab87 3697 return_0;
2e732e96 3698 tm->message.u.m_create_thin.device_id = id1;
660a42bc 3699 tm->expected_errno = EEXIST;
25e6ab87
ZK
3700 break;
3701 case DM_THIN_MESSAGE_DELETE:
2e732e96 3702 if (!_thin_validate_device_id(id1))
25e6ab87 3703 return_0;
2e732e96 3704 tm->message.u.m_delete.device_id = id1;
660a42bc 3705 tm->expected_errno = ENODATA;
25e6ab87 3706 break;
25e6ab87 3707 case DM_THIN_MESSAGE_SET_TRANSACTION_ID:
19e3f8c3 3708 if ((id1 + 1) != id2) {
2e732e96
ZK
3709 log_error("New transaction id must be sequential.");
3710 return 0; /* FIXME: Maybe too strict here? */
3711 }
19e3f8c3 3712 if (id2 != seg->transaction_id) {
2e732e96 3713 log_error("Current transaction id is different from thin pool.");
25e6ab87
ZK
3714 return 0; /* FIXME: Maybe too strict here? */
3715 }
2e732e96
ZK
3716 tm->message.u.m_set_transaction_id.current_id = id1;
3717 tm->message.u.m_set_transaction_id.new_id = id2;
25e6ab87
ZK
3718 break;
3719 default:
2e732e96 3720 log_error("Unsupported message type %d.", (int) type);
25e6ab87
ZK
3721 return 0;
3722 }
3723
2e732e96 3724 tm->message.type = type;
25e6ab87 3725 dm_list_add(&seg->thin_messages, &tm->list);
a900d150
ZK
3726 /* Higher value >1 identifies there are really some messages */
3727 node->props.send_messages = 2;
4251236e
ZK
3728
3729 return 1;
3730}
3731
dcd4afc7
ZK
3732int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node,
3733 unsigned ignore,
3734 unsigned no_passdown)
3735{
3736 struct load_segment *seg;
3737
3738 if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
3739 return_0;
3740
3741 seg->ignore_discard = ignore;
3742 seg->no_discard_passdown = no_passdown;
3743
3744 return 1;
3745}
3746
2908ab3e
ZK
3747int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node,
3748 unsigned error_if_no_space)
3749{
3750 struct load_segment *seg;
3751
3752 if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
3753 return_0;
3754
3755 seg->error_if_no_space = error_if_no_space;
3756
3757 return 1;
3758}
3759
69132f55
ZK
3760int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node,
3761 unsigned read_only)
3762{
3763 struct load_segment *seg;
3764
3765 if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
3766 return_0;
3767
3768 seg->read_only = read_only;
3769
3770 return 1;
3771}
3772
4251236e
ZK
3773int dm_tree_node_add_thin_target(struct dm_tree_node *node,
3774 uint64_t size,
4d25c81b 3775 const char *pool_uuid,
4251236e
ZK
3776 uint32_t device_id)
3777{
4d25c81b 3778 struct dm_tree_node *pool;
4251236e
ZK
3779 struct load_segment *seg;
3780
4d25c81b
ZK
3781 if (!(pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) {
3782 log_error("Missing thin pool uuid %s.", pool_uuid);
4251236e
ZK
3783 return 0;
3784 }
3785
4d25c81b 3786 if (!_link_tree_nodes(node, pool))
4251236e
ZK
3787 return_0;
3788
6744c143
ZK
3789 if (!_thin_validate_device_id(device_id))
3790 return_0;
4d25c81b 3791
6744c143
ZK
3792 if (!(seg = _add_segment(node, SEG_THIN, size)))
3793 return_0;
4d25c81b 3794
6744c143
ZK
3795 seg->pool = pool;
3796 seg->device_id = device_id;
1419bf1c 3797
4251236e
ZK
3798 return 1;
3799}
3800
dcd4afc7
ZK
3801int dm_tree_node_set_thin_external_origin(struct dm_tree_node *node,
3802 const char *external_uuid)
3803{
3804 struct dm_tree_node *external;
3805 struct load_segment *seg;
3806
3807 if (!(seg = _get_single_load_segment(node, SEG_THIN)))
3808 return_0;
3809
3810 if (!(external = dm_tree_find_node_by_uuid(node->dtree,
3811 external_uuid))) {
3812 log_error("Missing thin external origin uuid %s.",
3813 external_uuid);
3814 return 0;
3815 }
3816
3817 if (!_link_tree_nodes(node, external))
3818 return_0;
3819
3820 seg->external = external;
3821
3822 return 1;
3823}
077c4d1a 3824
b4f1578f 3825static int _add_area(struct dm_tree_node *node, struct load_segment *seg, struct dm_tree_node *dev_node, uint64_t offset)
165e4a11
AK
3826{
3827 struct seg_area *area;
3828
b4f1578f 3829 if (!(area = dm_pool_zalloc(node->dtree->mem, sizeof (*area)))) {
165e4a11
AK
3830 log_error("Failed to allocate target segment area.");
3831 return 0;
3832 }
3833
3834 area->dev_node = dev_node;
3835 area->offset = offset;
3836
2c44337b 3837 dm_list_add(&seg->areas, &area->list);
165e4a11
AK
3838 seg->area_count++;
3839
3840 return 1;
3841}
3842
b4f1578f 3843int dm_tree_node_add_target_area(struct dm_tree_node *node,
a5087866
ZK
3844 const char *dev_name,
3845 const char *uuid,
3846 uint64_t offset)
165e4a11
AK
3847{
3848 struct load_segment *seg;
3849 struct stat info;
b4f1578f 3850 struct dm_tree_node *dev_node;
165e4a11
AK
3851
3852 if ((!dev_name || !*dev_name) && (!uuid || !*uuid)) {
b4f1578f 3853 log_error("dm_tree_node_add_target_area called without device");
165e4a11
AK
3854 return 0;
3855 }
3856
3857 if (uuid) {
b4f1578f 3858 if (!(dev_node = dm_tree_find_node_by_uuid(node->dtree, uuid))) {
165e4a11
AK
3859 log_error("Couldn't find area uuid %s.", uuid);
3860 return 0;
3861 }
b4f1578f
AK
3862 if (!_link_tree_nodes(node, dev_node))
3863 return_0;
165e4a11 3864 } else {
6d04311e 3865 if (stat(dev_name, &info) < 0) {
165e4a11
AK
3866 log_error("Device %s not found.", dev_name);
3867 return 0;
3868 }
3869
40e5fd8b 3870 if (!S_ISBLK(info.st_mode)) {
165e4a11
AK
3871 log_error("Device %s is not a block device.", dev_name);
3872 return 0;
3873 }
3874
3875 /* FIXME Check correct macro use */
cda69e17 3876 if (!(dev_node = _add_dev(node->dtree, node, MAJOR(info.st_rdev),
bd2500e6 3877 MINOR(info.st_rdev), 0, 0)))
b4f1578f 3878 return_0;
165e4a11
AK
3879 }
3880
4a4ea47f
ZK
3881 if (!(seg = _get_last_load_segment(node)))
3882 return_0;
165e4a11 3883
b4f1578f
AK
3884 if (!_add_area(node, seg, dev_node, offset))
3885 return_0;
165e4a11
AK
3886
3887 return 1;
db208f51 3888}
bd90c6b2 3889
6d04311e
JEB
3890int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset)
3891{
3892 struct load_segment *seg;
3893
4a4ea47f
ZK
3894 if (!(seg = _get_last_load_segment(node)))
3895 return_0;
6d04311e 3896
415c0690 3897 switch (seg->type) {
bf8d0098 3898 case SEG_RAID0:
d8c2677a 3899 case SEG_RAID0_META:
415c0690
AK
3900 case SEG_RAID1:
3901 case SEG_RAID4:
60ddd05f 3902 case SEG_RAID5_N:
415c0690
AK
3903 case SEG_RAID5_LA:
3904 case SEG_RAID5_RA:
3905 case SEG_RAID5_LS:
3906 case SEG_RAID5_RS:
3673ce48 3907 case SEG_RAID6_N_6:
415c0690
AK
3908 case SEG_RAID6_ZR:
3909 case SEG_RAID6_NR:
3910 case SEG_RAID6_NC:
a4bbaa3b
HM
3911 case SEG_RAID6_LS_6:
3912 case SEG_RAID6_RS_6:
3913 case SEG_RAID6_LA_6:
3914 case SEG_RAID6_RA_6:
415c0690
AK
3915 break;
3916 default:
3917 log_error("dm_tree_node_add_null_area() called on an unsupported segment type");
3918 return 0;
3919 }
3920
6d04311e
JEB
3921 if (!_add_area(node, seg, NULL, offset))
3922 return_0;
3923
3924 return 1;
3925}
7e35dfff
ZK
3926
3927void dm_tree_node_set_callback(struct dm_tree_node *dnode,
3928 dm_node_callback_fn cb, void *data)
3929{
3930 dnode->callback = cb;
3931 dnode->callback_data = data;
3932}
9ef820a2 3933
1cedbaf1 3934#if defined(GNU_SYMVER)
9ef820a2 3935/*
bb20fac4 3936 * Backward compatible implementations.
9ef820a2 3937 *
bb20fac4
ZK
3938 * Keep these at the end of the file to make sure that
3939 * no code in this file accidentally calls it.
9ef820a2 3940 */
bb20fac4
ZK
3941
3942/* Backward compatible dm_tree_node_size_changed() implementations. */
a8480f0f 3943DM_EXPORT_SYMBOL_BASE(dm_tree_node_size_changed)
9ef820a2 3944int dm_tree_node_size_changed_base(const struct dm_tree_node *dnode);
9ef820a2
ZK
3945int dm_tree_node_size_changed_base(const struct dm_tree_node *dnode)
3946{
3947 /* Base does not make difference between smaller and bigger */
3948 return dm_tree_node_size_changed(dnode) ? 1 : 0;
3949}
bb20fac4
ZK
3950
3951/*
3952 * Retain ABI compatibility after adding the DM_CACHE_FEATURE_METADATA2
3953 * in version 1.02.138.
3954 *
3955 * Binaries compiled against version 1.02.138 onwards will use
3956 * the new function dm_tree_node_add_cache_target which detects unknown
3957 * feature flags and returns error for them.
3958 */
a8480f0f 3959DM_EXPORT_SYMBOL_BASE(dm_tree_node_add_cache_target)
bb20fac4
ZK
3960int dm_tree_node_add_cache_target_base(struct dm_tree_node *node,
3961 uint64_t size,
3962 uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
3963 const char *metadata_uuid,
3964 const char *data_uuid,
3965 const char *origin_uuid,
3966 const char *policy_name,
3967 const struct dm_config_node *policy_settings,
3968 uint32_t data_block_size);
bb20fac4
ZK
3969int dm_tree_node_add_cache_target_base(struct dm_tree_node *node,
3970 uint64_t size,
3971 uint64_t feature_flags,
3972 const char *metadata_uuid,
3973 const char *data_uuid,
3974 const char *origin_uuid,
3975 const char *policy_name,
3976 const struct dm_config_node *policy_settings,
3977 uint32_t data_block_size)
3978{
3979 /* Old version supported only these FEATURE bits, others were ignored so masked them */
026344e8 3980 const uint64_t mask =
bb20fac4
ZK
3981 DM_CACHE_FEATURE_WRITEBACK |
3982 DM_CACHE_FEATURE_WRITETHROUGH |
3983 DM_CACHE_FEATURE_PASSTHROUGH;
3984
026344e8 3985 return dm_tree_node_add_cache_target(node, size, feature_flags & mask,
bb20fac4
ZK
3986 metadata_uuid, data_uuid, origin_uuid,
3987 policy_name, policy_settings, data_block_size);
3988}
9ef820a2 3989#endif
This page took 0.848575 seconds and 6 git commands to generate.