]> sourceware.org Git - lvm2.git/blob - lib/metadata/lv_manip.c
15a1945aa27b1685a13b916e89833082fe5ed109
[lvm2.git] / lib / metadata / lv_manip.c
1 /*
2 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
3 * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
4 *
5 * This file is part of LVM2.
6 *
7 * This copyrighted material is made available to anyone wishing to use,
8 * modify, copy, or redistribute it subject to the terms and conditions
9 * of the GNU Lesser General Public License v.2.1.
10 *
11 * You should have received a copy of the GNU Lesser General Public License
12 * along with this program; if not, write to the Free Software Foundation,
13 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14 */
15
16 #include "lib.h"
17 #include "metadata.h"
18 #include "locking.h"
19 #include "pv_map.h"
20 #include "lvm-string.h"
21 #include "toolcontext.h"
22 #include "lv_alloc.h"
23 #include "pv_alloc.h"
24 #include "display.h"
25 #include "segtype.h"
26 #include "archiver.h"
27 #include "activate.h"
28 #include "str_list.h"
29 #include "defaults.h"
30
31 typedef enum {
32 PREFERRED,
33 USE_AREA,
34 NEXT_PV,
35 NEXT_AREA
36 } area_use_t;
37
38 /* FIXME: remove RAID_METADATA_AREA_LEN macro after defining 'raid_log_extents'*/
39 #define RAID_METADATA_AREA_LEN 1
40
41 /* FIXME These ended up getting used differently from first intended. Refactor. */
42 /* Only one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG, A_CLING_TO_ALLOCED may be set */
43 #define A_CONTIGUOUS_TO_LVSEG 0x01 /* Must be contiguous to an existing segment */
44 #define A_CLING_TO_LVSEG 0x02 /* Must use same disks as existing LV segment */
45 #define A_CLING_TO_ALLOCED 0x04 /* Must use same disks as already-allocated segment */
46
47 #define A_CLING_BY_TAGS 0x08 /* Must match tags against existing segment */
48 #define A_CAN_SPLIT 0x10
49
50 /*
51 * Constant parameters during a single allocation attempt.
52 */
53 struct alloc_parms {
54 alloc_policy_t alloc;
55 unsigned flags; /* Holds A_* */
56 struct lv_segment *prev_lvseg;
57 uint32_t extents_still_needed;
58 };
59
60 /*
61 * Holds varying state of each allocation attempt.
62 */
63 struct alloc_state {
64 struct pv_area_used *areas;
65 uint32_t areas_size;
66 uint32_t log_area_count_still_needed; /* Number of areas still needing to be allocated for the log */
67 uint32_t allocated; /* Total number of extents allocated so far */
68 };
69
70 struct lv_names {
71 const char *old;
72 const char *new;
73 };
74
75 int add_seg_to_segs_using_this_lv(struct logical_volume *lv,
76 struct lv_segment *seg)
77 {
78 struct seg_list *sl;
79
80 dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
81 if (sl->seg == seg) {
82 sl->count++;
83 return 1;
84 }
85 }
86
87 log_very_verbose("Adding %s:%" PRIu32 " as an user of %s",
88 seg->lv->name, seg->le, lv->name);
89
90 if (!(sl = dm_pool_zalloc(lv->vg->vgmem, sizeof(*sl)))) {
91 log_error("Failed to allocate segment list");
92 return 0;
93 }
94
95 sl->count = 1;
96 sl->seg = seg;
97 dm_list_add(&lv->segs_using_this_lv, &sl->list);
98
99 return 1;
100 }
101
102 int remove_seg_from_segs_using_this_lv(struct logical_volume *lv,
103 struct lv_segment *seg)
104 {
105 struct seg_list *sl;
106
107 dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
108 if (sl->seg != seg)
109 continue;
110 if (sl->count > 1)
111 sl->count--;
112 else {
113 log_very_verbose("%s:%" PRIu32 " is no longer a user "
114 "of %s", seg->lv->name, seg->le,
115 lv->name);
116 dm_list_del(&sl->list);
117 }
118 return 1;
119 }
120
121 return 0;
122 }
123
124 /*
125 * This is a function specialized for the common case where there is
126 * only one segment which uses the LV.
127 * e.g. the LV is a layer inserted by insert_layer_for_lv().
128 *
129 * In general, walk through lv->segs_using_this_lv.
130 */
131 struct lv_segment *get_only_segment_using_this_lv(struct logical_volume *lv)
132 {
133 struct seg_list *sl;
134
135 if (dm_list_size(&lv->segs_using_this_lv) != 1) {
136 log_error("%s is expected to have only one segment using it, "
137 "while it has %d", lv->name,
138 dm_list_size(&lv->segs_using_this_lv));
139 return NULL;
140 }
141
142 dm_list_iterate_items(sl, &lv->segs_using_this_lv)
143 break; /* first item */
144
145 if (sl->count != 1) {
146 log_error("%s is expected to have only one segment using it, "
147 "while %s:%" PRIu32 " uses it %d times",
148 lv->name, sl->seg->lv->name, sl->seg->le, sl->count);
149 return NULL;
150 }
151
152 return sl->seg;
153 }
154
155 /*
156 * PVs used by a segment of an LV
157 */
158 struct seg_pvs {
159 struct dm_list list;
160
161 struct dm_list pvs; /* struct pv_list */
162
163 uint32_t le;
164 uint32_t len;
165 };
166
167 static struct seg_pvs *_find_seg_pvs_by_le(struct dm_list *list, uint32_t le)
168 {
169 struct seg_pvs *spvs;
170
171 dm_list_iterate_items(spvs, list)
172 if (le >= spvs->le && le < spvs->le + spvs->len)
173 return spvs;
174
175 return NULL;
176 }
177
178 /*
179 * Find first unused LV number.
180 */
181 uint32_t find_free_lvnum(struct logical_volume *lv)
182 {
183 int lvnum_used[MAX_RESTRICTED_LVS + 1];
184 uint32_t i = 0;
185 struct lv_list *lvl;
186 int lvnum;
187
188 memset(&lvnum_used, 0, sizeof(lvnum_used));
189
190 dm_list_iterate_items(lvl, &lv->vg->lvs) {
191 lvnum = lvnum_from_lvid(&lvl->lv->lvid);
192 if (lvnum <= MAX_RESTRICTED_LVS)
193 lvnum_used[lvnum] = 1;
194 }
195
196 while (lvnum_used[i])
197 i++;
198
199 /* FIXME What if none are free? */
200
201 return i;
202 }
203
204 /*
205 * All lv_segments get created here.
206 */
207 struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
208 struct logical_volume *lv,
209 uint32_t le, uint32_t len,
210 uint64_t status,
211 uint32_t stripe_size,
212 struct logical_volume *log_lv,
213 struct logical_volume *thin_pool_lv,
214 uint32_t area_count,
215 uint32_t area_len,
216 uint32_t chunk_size,
217 uint32_t region_size,
218 uint32_t extents_copied,
219 struct lv_segment *pvmove_source_seg)
220 {
221 struct lv_segment *seg;
222 struct dm_pool *mem = lv->vg->vgmem;
223 uint32_t areas_sz = area_count * sizeof(*seg->areas);
224
225 if (!segtype) {
226 log_error(INTERNAL_ERROR "alloc_lv_segment: Missing segtype.");
227 return NULL;
228 }
229
230 if (!(seg = dm_pool_zalloc(mem, sizeof(*seg))))
231 return_NULL;
232
233 if (!(seg->areas = dm_pool_zalloc(mem, areas_sz))) {
234 dm_pool_free(mem, seg);
235 return_NULL;
236 }
237
238 if (segtype_is_raid(segtype) &&
239 !(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) {
240 dm_pool_free(mem, seg); /* frees everything alloced since seg */
241 return_NULL;
242 }
243
244 seg->segtype = segtype;
245 seg->lv = lv;
246 seg->le = le;
247 seg->len = len;
248 seg->status = status;
249 seg->stripe_size = stripe_size;
250 seg->area_count = area_count;
251 seg->area_len = area_len;
252 seg->chunk_size = chunk_size;
253 seg->region_size = region_size;
254 seg->extents_copied = extents_copied;
255 seg->pvmove_source_seg = pvmove_source_seg;
256 dm_list_init(&seg->tags);
257 dm_list_init(&seg->thin_messages);
258
259 if (thin_pool_lv) {
260 /* If this thin volume, thin snapshot is being created */
261 if (lv_is_thin_volume(thin_pool_lv)) {
262 seg->transaction_id = first_seg(first_seg(thin_pool_lv)->pool_lv)->transaction_id;
263 if (!attach_pool_lv(seg, first_seg(thin_pool_lv)->pool_lv, thin_pool_lv))
264 return_NULL;
265 } else {
266 seg->transaction_id = first_seg(thin_pool_lv)->transaction_id;
267 if (!attach_pool_lv(seg, thin_pool_lv, NULL))
268 return_NULL;
269 }
270 }
271
272 if (log_lv && !attach_mirror_log(seg, log_lv))
273 return_NULL;
274
275 return seg;
276 }
277
278 struct lv_segment *alloc_snapshot_seg(struct logical_volume *lv,
279 uint64_t status, uint32_t old_le_count)
280 {
281 struct lv_segment *seg;
282 const struct segment_type *segtype;
283
284 segtype = get_segtype_from_string(lv->vg->cmd, "snapshot");
285 if (!segtype) {
286 log_error("Failed to find snapshot segtype");
287 return NULL;
288 }
289
290 if (!(seg = alloc_lv_segment(segtype, lv, old_le_count,
291 lv->le_count - old_le_count, status, 0,
292 NULL, NULL, 0, lv->le_count - old_le_count,
293 0, 0, 0, NULL))) {
294 log_error("Couldn't allocate new snapshot segment.");
295 return NULL;
296 }
297
298 dm_list_add(&lv->segments, &seg->list);
299 lv->status |= VIRTUAL;
300
301 return seg;
302 }
303
304 void release_lv_segment_area(struct lv_segment *seg, uint32_t s,
305 uint32_t area_reduction)
306 {
307 if (seg_type(seg, s) == AREA_UNASSIGNED)
308 return;
309
310 if (seg_type(seg, s) == AREA_PV) {
311 if (release_pv_segment(seg_pvseg(seg, s), area_reduction) &&
312 seg->area_len == area_reduction)
313 seg_type(seg, s) = AREA_UNASSIGNED;
314 return;
315 }
316
317 if ((seg_lv(seg, s)->status & MIRROR_IMAGE) ||
318 (seg_lv(seg, s)->status & THIN_POOL_DATA)) {
319 if (!lv_reduce(seg_lv(seg, s), area_reduction))
320 stack; /* FIXME: any upper level reporting */
321 return;
322 }
323
324 if (seg_lv(seg, s)->status & RAID_IMAGE) {
325 /*
326 * FIXME: Use lv_reduce not lv_remove
327 * We use lv_remove for now, because I haven't figured out
328 * why lv_reduce won't remove the LV.
329 lv_reduce(seg_lv(seg, s), area_reduction);
330 */
331 if (area_reduction != seg->area_len) {
332 log_error("Unable to reduce RAID LV - operation not implemented.");
333 return;
334 } else {
335 if (!lv_remove(seg_lv(seg, s))) {
336 log_error("Failed to remove RAID image %s",
337 seg_lv(seg, s)->name);
338 return;
339 }
340 }
341
342 /* Remove metadata area if image has been removed */
343 if (area_reduction == seg->area_len) {
344 if (!lv_reduce(seg_metalv(seg, s),
345 seg_metalv(seg, s)->le_count)) {
346 log_error("Failed to remove RAID meta-device %s",
347 seg_metalv(seg, s)->name);
348 return;
349 }
350 }
351 return;
352 }
353
354 if (area_reduction == seg->area_len) {
355 log_very_verbose("Remove %s:%" PRIu32 "[%" PRIu32 "] from "
356 "the top of LV %s:%" PRIu32,
357 seg->lv->name, seg->le, s,
358 seg_lv(seg, s)->name, seg_le(seg, s));
359
360 remove_seg_from_segs_using_this_lv(seg_lv(seg, s), seg);
361 seg_lv(seg, s) = NULL;
362 seg_le(seg, s) = 0;
363 seg_type(seg, s) = AREA_UNASSIGNED;
364 }
365 }
366
367 /*
368 * Move a segment area from one segment to another
369 */
370 int move_lv_segment_area(struct lv_segment *seg_to, uint32_t area_to,
371 struct lv_segment *seg_from, uint32_t area_from)
372 {
373 struct physical_volume *pv;
374 struct logical_volume *lv;
375 uint32_t pe, le;
376
377 switch (seg_type(seg_from, area_from)) {
378 case AREA_PV:
379 pv = seg_pv(seg_from, area_from);
380 pe = seg_pe(seg_from, area_from);
381
382 release_lv_segment_area(seg_from, area_from,
383 seg_from->area_len);
384 release_lv_segment_area(seg_to, area_to, seg_to->area_len);
385
386 if (!set_lv_segment_area_pv(seg_to, area_to, pv, pe))
387 return_0;
388
389 break;
390
391 case AREA_LV:
392 lv = seg_lv(seg_from, area_from);
393 le = seg_le(seg_from, area_from);
394
395 release_lv_segment_area(seg_from, area_from,
396 seg_from->area_len);
397 release_lv_segment_area(seg_to, area_to, seg_to->area_len);
398
399 if (!set_lv_segment_area_lv(seg_to, area_to, lv, le, 0))
400 return_0;
401
402 break;
403
404 case AREA_UNASSIGNED:
405 release_lv_segment_area(seg_to, area_to, seg_to->area_len);
406 }
407
408 return 1;
409 }
410
411 /*
412 * Link part of a PV to an LV segment.
413 */
414 int set_lv_segment_area_pv(struct lv_segment *seg, uint32_t area_num,
415 struct physical_volume *pv, uint32_t pe)
416 {
417 seg->areas[area_num].type = AREA_PV;
418
419 if (!(seg_pvseg(seg, area_num) =
420 assign_peg_to_lvseg(pv, pe, seg->area_len, seg, area_num)))
421 return_0;
422
423 return 1;
424 }
425
426 /*
427 * Link one LV segment to another. Assumes sizes already match.
428 */
429 int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
430 struct logical_volume *lv, uint32_t le,
431 uint64_t status)
432 {
433 log_very_verbose("Stack %s:%" PRIu32 "[%" PRIu32 "] on LV %s:%" PRIu32,
434 seg->lv->name, seg->le, area_num, lv->name, le);
435
436 if (status & RAID_META) {
437 seg->meta_areas[area_num].type = AREA_LV;
438 seg_metalv(seg, area_num) = lv;
439 if (le) {
440 log_error(INTERNAL_ERROR "Meta le != 0");
441 return 0;
442 }
443 seg_metale(seg, area_num) = 0;
444 } else {
445 seg->areas[area_num].type = AREA_LV;
446 seg_lv(seg, area_num) = lv;
447 seg_le(seg, area_num) = le;
448 }
449 lv->status |= status;
450
451 if (!add_seg_to_segs_using_this_lv(lv, seg))
452 return_0;
453
454 return 1;
455 }
456
457 /*
458 * Prepare for adding parallel areas to an existing segment.
459 */
460 static int _lv_segment_add_areas(struct logical_volume *lv,
461 struct lv_segment *seg,
462 uint32_t new_area_count)
463 {
464 struct lv_segment_area *newareas;
465 uint32_t areas_sz = new_area_count * sizeof(*newareas);
466
467 if (!(newareas = dm_pool_zalloc(lv->vg->cmd->mem, areas_sz)))
468 return_0;
469
470 memcpy(newareas, seg->areas, seg->area_count * sizeof(*seg->areas));
471
472 seg->areas = newareas;
473 seg->area_count = new_area_count;
474
475 return 1;
476 }
477
478 /*
479 * Reduce the size of an lv_segment. New size can be zero.
480 */
481 static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
482 {
483 uint32_t area_reduction, s;
484
485 /* Caller must ensure exact divisibility */
486 if (seg_is_striped(seg)) {
487 if (reduction % seg->area_count) {
488 log_error("Segment extent reduction %" PRIu32
489 " not divisible by #stripes %" PRIu32,
490 reduction, seg->area_count);
491 return 0;
492 }
493 area_reduction = (reduction / seg->area_count);
494 } else
495 area_reduction = reduction;
496
497 for (s = 0; s < seg->area_count; s++)
498 release_lv_segment_area(seg, s, area_reduction);
499
500 seg->len -= reduction;
501 seg->area_len -= area_reduction;
502
503 return 1;
504 }
505
506 /*
507 * Entry point for all LV reductions in size.
508 */
509 static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
510 {
511 struct lv_segment *seg;
512 uint32_t count = extents;
513 uint32_t reduction;
514
515 dm_list_iterate_back_items(seg, &lv->segments) {
516 if (!count)
517 break;
518
519 if (seg->len <= count) {
520 /* remove this segment completely */
521 /* FIXME Check this is safe */
522 if (seg->log_lv && !lv_remove(seg->log_lv))
523 return_0;
524
525 if (seg->metadata_lv && !lv_remove(seg->metadata_lv))
526 return_0;
527
528 if (seg->pool_lv) {
529 if (!detach_pool_lv(seg))
530 return_0;
531 }
532
533 dm_list_del(&seg->list);
534 reduction = seg->len;
535 } else
536 reduction = count;
537
538 if (!_lv_segment_reduce(seg, reduction))
539 return_0;
540 count -= reduction;
541 }
542
543 lv->le_count -= extents;
544 lv->size = (uint64_t) lv->le_count * lv->vg->extent_size;
545
546 if (!delete)
547 return 1;
548
549 /* Remove the LV if it is now empty */
550 if (!lv->le_count && !unlink_lv_from_vg(lv))
551 return_0;
552 else if (lv->vg->fid->fmt->ops->lv_setup &&
553 !lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
554 return_0;
555
556 return 1;
557 }
558
559 /*
560 * Empty an LV.
561 */
562 int lv_empty(struct logical_volume *lv)
563 {
564 return _lv_reduce(lv, lv->le_count, 0);
565 }
566
567 /*
568 * Empty an LV and add error segment.
569 */
570 int replace_lv_with_error_segment(struct logical_volume *lv)
571 {
572 uint32_t len = lv->le_count;
573
574 if (len && !lv_empty(lv))
575 return_0;
576
577 /* Minimum size required for a table. */
578 if (!len)
579 len = 1;
580
581 /*
582 * Since we are replacing the whatever-was-there with
583 * an error segment, we should also clear any flags
584 * that suggest it is anything other than "error".
585 */
586 lv->status &= ~(MIRRORED|PVMOVE);
587
588 /* FIXME: Should we bug if we find a log_lv attached? */
589
590 if (!lv_add_virtual_segment(lv, 0, len, get_segtype_from_string(lv->vg->cmd, "error"), NULL))
591 return_0;
592
593 return 1;
594 }
595
596 /*
597 * Remove given number of extents from LV.
598 */
599 int lv_reduce(struct logical_volume *lv, uint32_t extents)
600 {
601 return _lv_reduce(lv, extents, 1);
602 }
603
604 /*
605 * Completely remove an LV.
606 */
607 int lv_remove(struct logical_volume *lv)
608 {
609
610 if (!lv_reduce(lv, lv->le_count))
611 return_0;
612
613 return 1;
614 }
615
616 /*
617 * A set of contiguous physical extents allocated
618 */
619 struct alloced_area {
620 struct dm_list list;
621
622 struct physical_volume *pv;
623 uint32_t pe;
624 uint32_t len;
625 };
626
627 /*
628 * Details of an allocation attempt
629 */
630 struct alloc_handle {
631 struct cmd_context *cmd;
632 struct dm_pool *mem;
633
634 alloc_policy_t alloc; /* Overall policy */
635 uint32_t new_extents; /* Number of new extents required */
636 uint32_t area_count; /* Number of parallel areas */
637 uint32_t parity_count; /* Adds to area_count, but not area_multiple */
638 uint32_t area_multiple; /* seg->len = area_len * area_multiple */
639 uint32_t log_area_count; /* Number of parallel logs */
640 uint32_t metadata_area_count; /* Number of parallel metadata areas */
641 uint32_t log_len; /* Length of log/metadata_area */
642 uint32_t region_size; /* Mirror region size */
643 uint32_t total_area_len; /* Total number of parallel extents */
644
645 unsigned maximise_cling;
646 unsigned mirror_logs_separate; /* Force mirror logs on separate PVs? */
647
648 /*
649 * RAID devices require a metadata area that accompanies each
650 * device. During initial creation, it is best to look for space
651 * that is new_extents + log_len and then split that between two
652 * allocated areas when found. 'alloc_and_split_meta' indicates
653 * that this is the desired dynamic.
654 */
655 unsigned alloc_and_split_meta;
656
657 const struct dm_config_node *cling_tag_list_cn;
658
659 struct dm_list *parallel_areas; /* PVs to avoid */
660
661 /*
662 * Contains area_count lists of areas allocated to data stripes
663 * followed by log_area_count lists of areas allocated to log stripes.
664 */
665 struct dm_list alloced_areas[0];
666 };
667
668 static uint32_t _calc_area_multiple(const struct segment_type *segtype,
669 const uint32_t area_count, const uint32_t stripes)
670 {
671 if (!area_count)
672 return 1;
673
674 /* Striped */
675 if (segtype_is_striped(segtype))
676 return area_count;
677
678 /* Mirrored stripes */
679 if (stripes)
680 return stripes;
681
682 /* Mirrored */
683 return 1;
684 }
685
686 /*
687 * Returns log device size in extents, algorithm from kernel code
688 */
689 #define BYTE_SHIFT 3
690 static uint32_t mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint32_t area_len)
691 {
692 size_t area_size, bitset_size, log_size, region_count;
693
694 area_size = area_len * pe_size;
695 region_count = dm_div_up(area_size, region_size);
696
697 /* Work out how many "unsigned long"s we need to hold the bitset. */
698 bitset_size = dm_round_up(region_count, sizeof(uint32_t) << BYTE_SHIFT);
699 bitset_size >>= BYTE_SHIFT;
700
701 /* Log device holds both header and bitset. */
702 log_size = dm_round_up((MIRROR_LOG_OFFSET << SECTOR_SHIFT) + bitset_size, 1 << SECTOR_SHIFT);
703 log_size >>= SECTOR_SHIFT;
704 log_size = dm_div_up(log_size, pe_size);
705
706 /*
707 * Kernel requires a mirror to be at least 1 region large. So,
708 * if our mirror log is itself a mirror, it must be at least
709 * 1 region large. This restriction may not be necessary for
710 * non-mirrored logs, but we apply the rule anyway.
711 *
712 * (The other option is to make the region size of the log
713 * mirror smaller than the mirror it is acting as a log for,
714 * but that really complicates things. It's much easier to
715 * keep the region_size the same for both.)
716 */
717 return (log_size > (region_size / pe_size)) ? log_size :
718 (region_size / pe_size);
719 }
720
721 /*
722 * Preparation for a specific allocation attempt
723 * stripes and mirrors refer to the parallel areas used for data.
724 * If log_area_count > 1 it is always mirrored (not striped).
725 */
726 static struct alloc_handle *_alloc_init(struct cmd_context *cmd,
727 struct dm_pool *mem,
728 const struct segment_type *segtype,
729 alloc_policy_t alloc,
730 uint32_t new_extents,
731 uint32_t mirrors,
732 uint32_t stripes,
733 uint32_t metadata_area_count,
734 uint32_t extent_size,
735 uint32_t region_size,
736 struct dm_list *parallel_areas)
737 {
738 struct alloc_handle *ah;
739 uint32_t s, area_count, alloc_count, parity_count;
740 size_t size = 0;
741
742 /* FIXME Caller should ensure this */
743 if (mirrors && !stripes)
744 stripes = 1;
745
746 if (segtype_is_virtual(segtype))
747 area_count = 0;
748 else if (mirrors > 1)
749 area_count = mirrors * stripes;
750 else
751 area_count = stripes;
752
753 size = sizeof(*ah);
754
755 /*
756 * It is a requirement that RAID 4/5/6 are created with a number of
757 * stripes that is greater than the number of parity devices. (e.g
758 * RAID4/5 must have at least 2 stripes and RAID6 must have at least
759 * 3.) It is also a constraint that, when replacing individual devices
760 * in a RAID 4/5/6 array, no more devices can be replaced than
761 * there are parity devices. (Otherwise, there would not be enough
762 * redundancy to maintain the array.) Understanding these two
763 * constraints allows us to infer whether the caller of this function
764 * is intending to allocate an entire array or just replacement
765 * component devices. In the former case, we must account for the
766 * necessary parity_count. In the later case, we do not need to
767 * account for the extra parity devices because the array already
768 * exists and they only want replacement drives.
769 */
770 parity_count = (area_count <= segtype->parity_devs) ? 0 :
771 segtype->parity_devs;
772 alloc_count = area_count + parity_count;
773 if (segtype_is_raid(segtype) && metadata_area_count)
774 /* RAID has a meta area for each device */
775 alloc_count *= 2;
776 else
777 /* mirrors specify their exact log count */
778 alloc_count += metadata_area_count;
779
780 size += sizeof(ah->alloced_areas[0]) * alloc_count;
781
782 if (!(ah = dm_pool_zalloc(mem, size))) {
783 log_error("allocation handle allocation failed");
784 return NULL;
785 }
786
787 ah->cmd = cmd;
788
789 if (segtype_is_virtual(segtype))
790 return ah;
791
792 if (!(area_count + metadata_area_count)) {
793 log_error(INTERNAL_ERROR "_alloc_init called for non-virtual segment with no disk space.");
794 return NULL;
795 }
796
797 if (!(ah->mem = dm_pool_create("allocation", 1024))) {
798 log_error("allocation pool creation failed");
799 return NULL;
800 }
801
802 if (mirrors || stripes)
803 ah->new_extents = new_extents;
804 else
805 ah->new_extents = 0;
806 ah->area_count = area_count;
807 ah->parity_count = parity_count;
808 ah->region_size = region_size;
809 ah->alloc = alloc;
810 ah->area_multiple = _calc_area_multiple(segtype, area_count, stripes);
811 ah->mirror_logs_separate = find_config_tree_bool(cmd, "allocation/mirror_logs_require_separate_pvs",
812 DEFAULT_MIRROR_LOGS_REQUIRE_SEPARATE_PVS);
813
814 if (segtype_is_raid(segtype)) {
815 if (metadata_area_count) {
816 if (metadata_area_count != area_count)
817 log_error(INTERNAL_ERROR
818 "Bad metadata_area_count");
819 ah->metadata_area_count = area_count;
820 ah->alloc_and_split_meta = 1;
821
822 ah->log_len = RAID_METADATA_AREA_LEN;
823
824 /*
825 * We need 'log_len' extents for each
826 * RAID device's metadata_area
827 */
828 ah->new_extents += (ah->log_len * ah->area_multiple);
829 } else {
830 ah->log_area_count = 0;
831 ah->log_len = 0;
832 }
833 } else if (segtype_is_thin_pool(segtype)) {
834 ah->log_area_count = metadata_area_count;
835 /* thin_pool uses region_size to pass metadata size in extents */
836 ah->log_len = ah->region_size;
837 ah->region_size = 0;
838 ah->mirror_logs_separate =
839 find_config_tree_bool(cmd, "allocation/thin_pool_metadata_require_separate_pvs",
840 DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS);
841 } else {
842 ah->log_area_count = metadata_area_count;
843 ah->log_len = !metadata_area_count ? 0 :
844 mirror_log_extents(ah->region_size, extent_size,
845 new_extents / ah->area_multiple);
846 }
847
848 for (s = 0; s < alloc_count; s++)
849 dm_list_init(&ah->alloced_areas[s]);
850
851 ah->parallel_areas = parallel_areas;
852
853 ah->cling_tag_list_cn = find_config_tree_node(cmd, "allocation/cling_tag_list");
854
855 ah->maximise_cling = find_config_tree_bool(cmd, "allocation/maximise_cling", DEFAULT_MAXIMISE_CLING);
856
857 return ah;
858 }
859
860 void alloc_destroy(struct alloc_handle *ah)
861 {
862 if (ah->mem)
863 dm_pool_destroy(ah->mem);
864 }
865
866 /* Is there enough total space or should we give up immediately? */
867 static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms,
868 uint32_t allocated, uint32_t extents_still_needed)
869 {
870 uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple;
871 uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple;
872 uint32_t metadata_extents_needed = ah->metadata_area_count * RAID_METADATA_AREA_LEN; /* One each */
873 uint32_t total_extents_needed = area_extents_needed + parity_extents_needed + metadata_extents_needed;
874 uint32_t free_pes = pv_maps_size(pvms);
875
876 if (total_extents_needed > free_pes) {
877 log_error("Insufficient free space: %" PRIu32 " extents needed,"
878 " but only %" PRIu32 " available",
879 total_extents_needed, free_pes);
880 return 0;
881 }
882
883 return 1;
884 }
885
886 /* For striped mirrors, all the areas are counted, through the mirror layer */
887 static uint32_t _stripes_per_mimage(struct lv_segment *seg)
888 {
889 struct lv_segment *last_lvseg;
890
891 if (seg_is_mirrored(seg) && seg->area_count && seg_type(seg, 0) == AREA_LV) {
892 last_lvseg = dm_list_item(dm_list_last(&seg_lv(seg, 0)->segments), struct lv_segment);
893 if (seg_is_striped(last_lvseg))
894 return last_lvseg->area_count;
895 }
896
897 return 1;
898 }
899
900 static void _init_alloc_parms(struct alloc_handle *ah, struct alloc_parms *alloc_parms, alloc_policy_t alloc,
901 struct lv_segment *prev_lvseg, unsigned can_split,
902 uint32_t allocated, uint32_t extents_still_needed)
903 {
904 alloc_parms->alloc = alloc;
905 alloc_parms->prev_lvseg = prev_lvseg;
906 alloc_parms->flags = 0;
907 alloc_parms->extents_still_needed = extents_still_needed;
908
909 /* Are there any preceding segments we must follow on from? */
910 if (alloc_parms->prev_lvseg) {
911 if (alloc_parms->alloc == ALLOC_CONTIGUOUS)
912 alloc_parms->flags |= A_CONTIGUOUS_TO_LVSEG;
913 else if ((alloc_parms->alloc == ALLOC_CLING) || (alloc_parms->alloc == ALLOC_CLING_BY_TAGS))
914 alloc_parms->flags |= A_CLING_TO_LVSEG;
915 } else
916 /*
917 * A cling allocation that follows a successful contiguous allocation
918 * must use the same PVs (or else fail).
919 */
920 if ((alloc_parms->alloc == ALLOC_CLING) || (alloc_parms->alloc == ALLOC_CLING_BY_TAGS))
921 alloc_parms->flags |= A_CLING_TO_ALLOCED;
922
923 if (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)
924 alloc_parms->flags |= A_CLING_BY_TAGS;
925
926 /*
927 * For normal allocations, if any extents have already been found
928 * for allocation, prefer to place further extents on the same disks as
929 * have already been used.
930 */
931 if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL && allocated != alloc_parms->extents_still_needed)
932 alloc_parms->flags |= A_CLING_TO_ALLOCED;
933
934 if (can_split)
935 alloc_parms->flags |= A_CAN_SPLIT;
936 }
937
938 static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas)
939 {
940 struct seg_pvs *spvs;
941 struct pv_list *pvl;
942 char *pvnames;
943
944 if (!parallel_areas)
945 return 1;
946
947 dm_list_iterate_items(spvs, parallel_areas) {
948 if (!dm_pool_begin_object(mem, 256)) {
949 log_error("dm_pool_begin_object failed");
950 return 0;
951 }
952
953 dm_list_iterate_items(pvl, &spvs->pvs) {
954 if (!dm_pool_grow_object(mem, pv_dev_name(pvl->pv), strlen(pv_dev_name(pvl->pv)))) {
955 log_error("dm_pool_grow_object failed");
956 dm_pool_abandon_object(mem);
957 return 0;
958 }
959 if (!dm_pool_grow_object(mem, " ", 1)) {
960 log_error("dm_pool_grow_object failed");
961 dm_pool_abandon_object(mem);
962 return 0;
963 }
964 }
965
966 if (!dm_pool_grow_object(mem, "\0", 1)) {
967 log_error("dm_pool_grow_object failed");
968 dm_pool_abandon_object(mem);
969 return 0;
970 }
971
972 pvnames = dm_pool_end_object(mem);
973 log_debug("Parallel PVs at LE %" PRIu32 " length %" PRIu32 ": %s",
974 spvs->le, spvs->len, pvnames);
975 dm_pool_free(mem, pvnames);
976 }
977
978 return 1;
979 }
980
981 static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
982 uint32_t area_count,
983 uint32_t stripe_size,
984 const struct segment_type *segtype,
985 struct alloced_area *aa,
986 uint32_t region_size)
987 {
988 uint32_t s, extents, area_multiple;
989 struct lv_segment *seg;
990
991 area_multiple = _calc_area_multiple(segtype, area_count, 0);
992
993 if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count,
994 aa[0].len * area_multiple,
995 status, stripe_size, NULL, NULL,
996 area_count,
997 aa[0].len, 0u, region_size, 0u, NULL))) {
998 log_error("Couldn't allocate new LV segment.");
999 return 0;
1000 }
1001
1002 for (s = 0; s < area_count; s++)
1003 if (!set_lv_segment_area_pv(seg, s, aa[s].pv, aa[s].pe))
1004 return_0;
1005
1006 dm_list_add(&lv->segments, &seg->list);
1007
1008 extents = aa[0].len * area_multiple;
1009 lv->le_count += extents;
1010 lv->size += (uint64_t) extents *lv->vg->extent_size;
1011
1012 if (segtype_is_mirrored(segtype))
1013 lv->status |= MIRRORED;
1014
1015 return 1;
1016 }
1017
1018 static int _setup_alloced_segments(struct logical_volume *lv,
1019 struct dm_list *alloced_areas,
1020 uint32_t area_count,
1021 uint64_t status,
1022 uint32_t stripe_size,
1023 const struct segment_type *segtype,
1024 uint32_t region_size)
1025 {
1026 struct alloced_area *aa;
1027
1028 dm_list_iterate_items(aa, &alloced_areas[0]) {
1029 if (!_setup_alloced_segment(lv, status, area_count,
1030 stripe_size, segtype, aa,
1031 region_size))
1032 return_0;
1033 }
1034
1035 return 1;
1036 }
1037
1038 /*
1039 * This function takes a list of pv_areas and adds them to allocated_areas.
1040 * If the complete area is not needed then it gets split.
1041 * The part used is removed from the pv_map so it can't be allocated twice.
1042 */
1043 static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocate,
1044 struct alloc_state *alloc_state, uint32_t ix_log_offset)
1045 {
1046 uint32_t area_len, len;
1047 uint32_t s;
1048 uint32_t ix_log_skip = 0; /* How many areas to skip in middle of array to reach log areas */
1049 uint32_t total_area_count;
1050 struct alloced_area *aa;
1051 struct pv_area *pva;
1052
1053 total_area_count = ah->area_count + alloc_state->log_area_count_still_needed;
1054 total_area_count += ah->parity_count;
1055 if (!total_area_count) {
1056 log_error(INTERNAL_ERROR "_alloc_parallel_area called without any allocation to do.");
1057 return 1;
1058 }
1059
1060 area_len = max_to_allocate / ah->area_multiple;
1061
1062 /* Reduce area_len to the smallest of the areas */
1063 for (s = 0; s < ah->area_count + ah->parity_count; s++)
1064 if (area_len > alloc_state->areas[s].used)
1065 area_len = alloc_state->areas[s].used;
1066
1067 len = (ah->alloc_and_split_meta) ? total_area_count * 2 : total_area_count;
1068 len *= sizeof(*aa);
1069 if (!(aa = dm_pool_alloc(ah->mem, len))) {
1070 log_error("alloced_area allocation failed");
1071 return 0;
1072 }
1073
1074 /*
1075 * Areas consists of area_count areas for data stripes, then
1076 * ix_log_skip areas to skip, then log_area_count areas to use for the
1077 * log, then some areas too small for the log.
1078 */
1079 len = area_len;
1080 for (s = 0; s < total_area_count; s++) {
1081 if (s == (ah->area_count + ah->parity_count)) {
1082 ix_log_skip = ix_log_offset - ah->area_count;
1083 len = ah->log_len;
1084 }
1085
1086 pva = alloc_state->areas[s + ix_log_skip].pva;
1087 if (ah->alloc_and_split_meta) {
1088 /*
1089 * The metadata area goes at the front of the allocated
1090 * space for now, but could easily go at the end (or
1091 * middle!).
1092 *
1093 * Even though we split these two from the same
1094 * allocation, we store the images at the beginning
1095 * of the areas array and the metadata at the end.
1096 */
1097 s += ah->area_count + ah->parity_count;
1098 aa[s].pv = pva->map->pv;
1099 aa[s].pe = pva->start;
1100 aa[s].len = ah->log_len;
1101
1102 log_debug("Allocating parallel metadata area %" PRIu32
1103 " on %s start PE %" PRIu32
1104 " length %" PRIu32 ".",
1105 (s - (ah->area_count + ah->parity_count)),
1106 pv_dev_name(aa[s].pv), aa[s].pe,
1107 ah->log_len);
1108
1109 consume_pv_area(pva, ah->log_len);
1110 dm_list_add(&ah->alloced_areas[s], &aa[s].list);
1111 s -= ah->area_count + ah->parity_count;
1112 }
1113 aa[s].pv = pva->map->pv;
1114 aa[s].pe = pva->start;
1115 aa[s].len = (ah->alloc_and_split_meta) ? len - ah->log_len : len;
1116
1117 log_debug("Allocating parallel area %" PRIu32
1118 " on %s start PE %" PRIu32 " length %" PRIu32 ".",
1119 s, pv_dev_name(aa[s].pv), aa[s].pe, aa[s].len);
1120
1121 consume_pv_area(pva, aa[s].len);
1122
1123 dm_list_add(&ah->alloced_areas[s], &aa[s].list);
1124 }
1125
1126 /* Only need to alloc metadata from the first batch */
1127 ah->alloc_and_split_meta = 0;
1128
1129 ah->total_area_len += area_len;
1130
1131 alloc_state->allocated += area_len * ah->area_multiple;
1132
1133 return 1;
1134 }
1135
1136 /*
1137 * Call fn for each AREA_PV used by the LV segment at lv:le of length *max_seg_len.
1138 * If any constituent area contains more than one segment, max_seg_len is
1139 * reduced to cover only the first.
1140 * fn should return 0 on error, 1 to continue scanning or >1 to terminate without error.
1141 * In the last case, this function passes on the return code.
1142 */
1143 static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
1144 uint32_t le, uint32_t len, struct lv_segment *seg,
1145 uint32_t *max_seg_len,
1146 uint32_t first_area, uint32_t max_areas,
1147 int top_level_area_index,
1148 int only_single_area_segments,
1149 int (*fn)(struct cmd_context *cmd,
1150 struct pv_segment *peg, uint32_t s,
1151 void *data),
1152 void *data)
1153 {
1154 uint32_t s;
1155 uint32_t remaining_seg_len, area_len, area_multiple;
1156 uint32_t stripes_per_mimage = 1;
1157 int r = 1;
1158
1159 if (!seg && !(seg = find_seg_by_le(lv, le))) {
1160 log_error("Failed to find segment for %s extent %" PRIu32,
1161 lv->name, le);
1162 return 0;
1163 }
1164
1165 /* Remaining logical length of segment */
1166 remaining_seg_len = seg->len - (le - seg->le);
1167
1168 if (remaining_seg_len > len)
1169 remaining_seg_len = len;
1170
1171 if (max_seg_len && *max_seg_len > remaining_seg_len)
1172 *max_seg_len = remaining_seg_len;
1173
1174 area_multiple = _calc_area_multiple(seg->segtype, seg->area_count, 0);
1175 area_len = remaining_seg_len / area_multiple ? : 1;
1176
1177 /* For striped mirrors, all the areas are counted, through the mirror layer */
1178 if (top_level_area_index == -1)
1179 stripes_per_mimage = _stripes_per_mimage(seg);
1180
1181 for (s = first_area;
1182 s < seg->area_count && (!max_areas || s <= max_areas);
1183 s++) {
1184 if (seg_type(seg, s) == AREA_LV) {
1185 if (!(r = _for_each_pv(cmd, seg_lv(seg, s),
1186 seg_le(seg, s) +
1187 (le - seg->le) / area_multiple,
1188 area_len, NULL, max_seg_len, 0,
1189 (stripes_per_mimage == 1) && only_single_area_segments ? 1U : 0U,
1190 (top_level_area_index != -1) ? top_level_area_index : (int) (s * stripes_per_mimage),
1191 only_single_area_segments, fn,
1192 data)))
1193 stack;
1194 } else if (seg_type(seg, s) == AREA_PV)
1195 if (!(r = fn(cmd, seg_pvseg(seg, s), top_level_area_index != -1 ? (uint32_t) top_level_area_index + s : s, data)))
1196 stack;
1197 if (r != 1)
1198 return r;
1199 }
1200
1201 /* FIXME only_single_area_segments used as workaround to skip log LV - needs new param? */
1202 if (!only_single_area_segments && seg_is_mirrored(seg) && seg->log_lv) {
1203 if (!(r = _for_each_pv(cmd, seg->log_lv, 0, seg->log_lv->le_count, NULL,
1204 NULL, 0, 0, 0, only_single_area_segments,
1205 fn, data)))
1206 stack;
1207 if (r != 1)
1208 return r;
1209 }
1210
1211 /* FIXME Add snapshot cow LVs etc. */
1212
1213 return 1;
1214 }
1215
1216 static int _comp_area(const void *l, const void *r)
1217 {
1218 const struct pv_area_used *lhs = (const struct pv_area_used *) l;
1219 const struct pv_area_used *rhs = (const struct pv_area_used *) r;
1220
1221 if (lhs->used < rhs->used)
1222 return 1;
1223
1224 else if (lhs->used > rhs->used)
1225 return -1;
1226
1227 return 0;
1228 }
1229
1230 /*
1231 * Search for pvseg that matches condition
1232 */
1233 struct pv_match {
1234 int (*condition)(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva);
1235
1236 struct pv_area_used *areas;
1237 struct pv_area *pva;
1238 uint32_t areas_size;
1239 const struct dm_config_node *cling_tag_list_cn;
1240 int s; /* Area index of match */
1241 };
1242
1243 /*
1244 * Is PV area on the same PV?
1245 */
1246 static int _is_same_pv(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
1247 {
1248 if (pvseg->pv != pva->map->pv)
1249 return 0;
1250
1251 return 1;
1252 }
1253
1254 /*
1255 * Does PV area have a tag listed in allocation/cling_tag_list that
1256 * matches a tag of the PV of the existing segment?
1257 */
1258 static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn, struct physical_volume *pv1, struct physical_volume *pv2)
1259 {
1260 const struct dm_config_value *cv;
1261 const char *str;
1262 const char *tag_matched;
1263
1264 for (cv = cling_tag_list_cn->v; cv; cv = cv->next) {
1265 if (cv->type != DM_CFG_STRING) {
1266 log_error("Ignoring invalid string in config file entry "
1267 "allocation/cling_tag_list");
1268 continue;
1269 }
1270 str = cv->v.str;
1271 if (!*str) {
1272 log_error("Ignoring empty string in config file entry "
1273 "allocation/cling_tag_list");
1274 continue;
1275 }
1276
1277 if (*str != '@') {
1278 log_error("Ignoring string not starting with @ in config file entry "
1279 "allocation/cling_tag_list: %s", str);
1280 continue;
1281 }
1282
1283 str++;
1284
1285 if (!*str) {
1286 log_error("Ignoring empty tag in config file entry "
1287 "allocation/cling_tag_list");
1288 continue;
1289 }
1290
1291 /* Wildcard matches any tag against any tag. */
1292 if (!strcmp(str, "*")) {
1293 if (!str_list_match_list(&pv1->tags, &pv2->tags, &tag_matched))
1294 continue;
1295 else {
1296 log_debug("Matched allocation PV tag %s on existing %s with free space on %s.",
1297 tag_matched, pv_dev_name(pv1), pv_dev_name(pv2));
1298 return 1;
1299 }
1300 }
1301
1302 if (!str_list_match_item(&pv1->tags, str) ||
1303 !str_list_match_item(&pv2->tags, str))
1304 continue;
1305 else {
1306 log_debug("Matched allocation PV tag %s on existing %s with free space on %s.",
1307 str, pv_dev_name(pv1), pv_dev_name(pv2));
1308 return 1;
1309 }
1310 }
1311
1312 return 0;
1313 }
1314
1315 static int _has_matching_pv_tag(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva)
1316 {
1317 return _pvs_have_matching_tag(pvmatch->cling_tag_list_cn, pvseg->pv, pva->map->pv);
1318 }
1319
1320 /*
1321 * Is PV area contiguous to PV segment?
1322 */
1323 static int _is_contiguous(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
1324 {
1325 if (pvseg->pv != pva->map->pv)
1326 return 0;
1327
1328 if (pvseg->pe + pvseg->len != pva->start)
1329 return 0;
1330
1331 return 1;
1332 }
1333
1334 static void _reserve_area(struct pv_area_used *area_used, struct pv_area *pva, uint32_t required,
1335 uint32_t ix_pva, uint32_t unreserved)
1336 {
1337 log_debug("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
1338 " length %" PRIu32 " leaving %" PRIu32 ".",
1339 area_used->pva ? "Changing " : "Considering",
1340 ix_pva - 1, area_used->pva ? "to" : "as",
1341 dev_name(pva->map->pv->dev), pva->start, required, unreserved);
1342
1343 area_used->pva = pva;
1344 area_used->used = required;
1345 }
1346
1347 static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
1348 struct pv_segment *pvseg, uint32_t s,
1349 void *data)
1350 {
1351 struct pv_match *pvmatch = data;
1352
1353 if (pvmatch->areas[s].pva)
1354 return 1; /* Area already assigned */
1355
1356 if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva))
1357 return 1; /* Continue */
1358
1359 if (s >= pvmatch->areas_size)
1360 return 1;
1361
1362 /*
1363 * Only used for cling and contiguous policies (which only make one allocation per PV)
1364 * so it's safe to say all the available space is used.
1365 */
1366 _reserve_area(&pvmatch->areas[s], pvmatch->pva, pvmatch->pva->count, s + 1, 0);
1367
1368 return 2; /* Finished */
1369 }
1370
1371 /*
1372 * Is pva on same PV as any existing areas?
1373 */
1374 static int _check_cling(struct alloc_handle *ah,
1375 const struct dm_config_node *cling_tag_list_cn,
1376 struct lv_segment *prev_lvseg, struct pv_area *pva,
1377 struct alloc_state *alloc_state)
1378 {
1379 struct pv_match pvmatch;
1380 int r;
1381 uint32_t le, len;
1382
1383 pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv;
1384 pvmatch.areas = alloc_state->areas;
1385 pvmatch.areas_size = alloc_state->areas_size;
1386 pvmatch.pva = pva;
1387 pvmatch.cling_tag_list_cn = cling_tag_list_cn;
1388
1389 if (ah->maximise_cling) {
1390 /* Check entire LV */
1391 le = 0;
1392 len = prev_lvseg->le + prev_lvseg->len;
1393 } else {
1394 /* Only check 1 LE at end of previous LV segment */
1395 le = prev_lvseg->le + prev_lvseg->len - 1;
1396 len = 1;
1397 }
1398
1399 /* FIXME Cope with stacks by flattening */
1400 if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv, le, len, NULL, NULL,
1401 0, 0, -1, 1,
1402 _is_condition, &pvmatch)))
1403 stack;
1404
1405 if (r != 2)
1406 return 0;
1407
1408 return 1;
1409 }
1410
1411 /*
1412 * Is pva contiguous to any existing areas or on the same PV?
1413 */
1414 static int _check_contiguous(struct cmd_context *cmd,
1415 struct lv_segment *prev_lvseg, struct pv_area *pva,
1416 struct alloc_state *alloc_state)
1417 {
1418 struct pv_match pvmatch;
1419 int r;
1420
1421 pvmatch.condition = _is_contiguous;
1422 pvmatch.areas = alloc_state->areas;
1423 pvmatch.areas_size = alloc_state->areas_size;
1424 pvmatch.pva = pva;
1425 pvmatch.cling_tag_list_cn = NULL;
1426
1427 /* FIXME Cope with stacks by flattening */
1428 if (!(r = _for_each_pv(cmd, prev_lvseg->lv,
1429 prev_lvseg->le + prev_lvseg->len - 1, 1, NULL, NULL,
1430 0, 0, -1, 1,
1431 _is_condition, &pvmatch)))
1432 stack;
1433
1434 if (r != 2)
1435 return 0;
1436
1437 return 1;
1438 }
1439
1440 /*
1441 * Is pva on same PV as any areas already used in this allocation attempt?
1442 */
1443 static int _check_cling_to_alloced(struct alloc_handle *ah, const struct dm_config_node *cling_tag_list_cn,
1444 struct pv_area *pva, struct alloc_state *alloc_state)
1445 {
1446 unsigned s;
1447 struct alloced_area *aa;
1448
1449 /*
1450 * Ignore log areas. They are always allocated whole as part of the
1451 * first allocation. If they aren't yet set, we know we've nothing to do.
1452 */
1453 if (alloc_state->log_area_count_still_needed)
1454 return 0;
1455
1456 for (s = 0; s < ah->area_count; s++) {
1457 if (alloc_state->areas[s].pva)
1458 continue; /* Area already assigned */
1459 dm_list_iterate_items(aa, &ah->alloced_areas[s]) {
1460 if ((!cling_tag_list_cn && (pva->map->pv == aa[0].pv)) ||
1461 (cling_tag_list_cn && _pvs_have_matching_tag(cling_tag_list_cn, pva->map->pv, aa[0].pv))) {
1462 _reserve_area(&alloc_state->areas[s], pva, pva->count, s + 1, 0);
1463 return 1;
1464 }
1465 }
1466 }
1467
1468 return 0;
1469 }
1470
1471 static int _pv_is_parallel(struct physical_volume *pv, struct dm_list *parallel_pvs)
1472 {
1473 struct pv_list *pvl;
1474
1475 dm_list_iterate_items(pvl, parallel_pvs)
1476 if (pv == pvl->pv)
1477 return 1;
1478
1479 return 0;
1480 }
1481
1482 /*
1483 * Decide whether or not to try allocation from supplied area pva.
1484 * alloc_state->areas may get modified.
1485 */
1486 static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint32_t still_needed,
1487 const struct alloc_parms *alloc_parms, struct alloc_state *alloc_state,
1488 unsigned already_found_one, unsigned iteration_count, unsigned log_iteration_count)
1489 {
1490 unsigned s;
1491
1492 /* Skip fully-reserved areas (which are not currently removed from the list). */
1493 if (!pva->unreserved)
1494 return NEXT_AREA;
1495
1496 /* FIXME Should this test be removed? */
1497 if (iteration_count)
1498 /*
1499 * Don't use an area twice.
1500 */
1501 for (s = 0; s < alloc_state->areas_size; s++)
1502 if (alloc_state->areas[s].pva == pva)
1503 return NEXT_AREA;
1504
1505 /* If maximise_cling is set, perform several checks, otherwise perform exactly one. */
1506 if (!iteration_count && !log_iteration_count && alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG | A_CLING_TO_ALLOCED)) {
1507 /* Contiguous? */
1508 if (((alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG) || (ah->maximise_cling && alloc_parms->prev_lvseg)) &&
1509 _check_contiguous(ah->cmd, alloc_parms->prev_lvseg, pva, alloc_state))
1510 return PREFERRED;
1511
1512 /* Try next area on same PV if looking for contiguous space */
1513 if (alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG)
1514 return NEXT_AREA;
1515
1516 /* Cling to prev_lvseg? */
1517 if (((alloc_parms->flags & A_CLING_TO_LVSEG) || (ah->maximise_cling && alloc_parms->prev_lvseg)) &&
1518 _check_cling(ah, NULL, alloc_parms->prev_lvseg, pva, alloc_state))
1519 /* If this PV is suitable, use this first area */
1520 return PREFERRED;
1521
1522 /* Cling_to_alloced? */
1523 if ((alloc_parms->flags & A_CLING_TO_ALLOCED) &&
1524 _check_cling_to_alloced(ah, NULL, pva, alloc_state))
1525 return PREFERRED;
1526
1527 /* Cling_by_tags? */
1528 if (!(alloc_parms->flags & A_CLING_BY_TAGS) || !ah->cling_tag_list_cn)
1529 return NEXT_PV;
1530
1531 if (alloc_parms->prev_lvseg) {
1532 if (_check_cling(ah, ah->cling_tag_list_cn, alloc_parms->prev_lvseg, pva, alloc_state))
1533 return PREFERRED;
1534 } else if (_check_cling_to_alloced(ah, ah->cling_tag_list_cn, pva, alloc_state))
1535 return PREFERRED;
1536
1537 /* All areas on this PV give same result so pointless checking more */
1538 return NEXT_PV;
1539 }
1540
1541 /* Normal/Anywhere */
1542
1543 /* Is it big enough on its own? */
1544 if (pva->unreserved * ah->area_multiple < still_needed &&
1545 ((!(alloc_parms->flags & A_CAN_SPLIT) && !ah->log_area_count) ||
1546 (already_found_one && alloc_parms->alloc != ALLOC_ANYWHERE)))
1547 return NEXT_PV;
1548
1549 return USE_AREA;
1550 }
1551
1552 /*
1553 * Decide how many extents we're trying to obtain from a given area.
1554 * Removes the extents from further consideration.
1555 */
1556 static uint32_t _calc_required_extents(struct alloc_handle *ah, struct pv_area *pva, unsigned ix_pva, uint32_t max_to_allocate, alloc_policy_t alloc)
1557 {
1558 uint32_t required = max_to_allocate / ah->area_multiple;
1559
1560 /*
1561 * Update amount unreserved - effectively splitting an area
1562 * into two or more parts. If the whole stripe doesn't fit,
1563 * reduce amount we're looking for.
1564 */
1565 if (alloc == ALLOC_ANYWHERE) {
1566 if (ix_pva - 1 >= ah->area_count)
1567 required = ah->log_len;
1568 } else if (required < ah->log_len)
1569 required = ah->log_len;
1570
1571 if (required >= pva->unreserved) {
1572 required = pva->unreserved;
1573 pva->unreserved = 0;
1574 } else {
1575 pva->unreserved -= required;
1576 reinsert_changed_pv_area(pva);
1577 }
1578
1579 return required;
1580 }
1581
1582 static int _reserve_required_area(struct alloc_handle *ah, uint32_t max_to_allocate,
1583 unsigned ix_pva, struct pv_area *pva,
1584 struct alloc_state *alloc_state, alloc_policy_t alloc)
1585 {
1586 uint32_t required = _calc_required_extents(ah, pva, ix_pva, max_to_allocate, alloc);
1587 uint32_t s;
1588
1589 /* Expand areas array if needed after an area was split. */
1590 if (ix_pva > alloc_state->areas_size) {
1591 alloc_state->areas_size *= 2;
1592 if (!(alloc_state->areas = dm_realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) {
1593 log_error("Memory reallocation for parallel areas failed.");
1594 return 0;
1595 }
1596 for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++)
1597 alloc_state->areas[s].pva = NULL;
1598 }
1599
1600 _reserve_area(&alloc_state->areas[ix_pva - 1], pva, required, ix_pva, pva->unreserved);
1601
1602 return 1;
1603 }
1604
1605 static void _clear_areas(struct alloc_state *alloc_state)
1606 {
1607 uint32_t s;
1608
1609 for (s = 0; s < alloc_state->areas_size; s++)
1610 alloc_state->areas[s].pva = NULL;
1611 }
1612
1613 static void _reset_unreserved(struct dm_list *pvms)
1614 {
1615 struct pv_map *pvm;
1616 struct pv_area *pva;
1617
1618 dm_list_iterate_items(pvm, pvms)
1619 dm_list_iterate_items(pva, &pvm->areas)
1620 if (pva->unreserved != pva->count) {
1621 pva->unreserved = pva->count;
1622 reinsert_changed_pv_area(pva);
1623 }
1624 }
1625
1626 static void _report_needed_allocation_space(struct alloc_handle *ah,
1627 struct alloc_state *alloc_state)
1628 {
1629 const char *metadata_type;
1630 uint32_t parallel_areas_count, parallel_area_size;
1631 uint32_t metadata_count, metadata_size;
1632
1633 parallel_area_size = (ah->new_extents - alloc_state->allocated) / ah->area_multiple -
1634 ((ah->alloc_and_split_meta) ? ah->log_len : 0);
1635
1636 parallel_areas_count = ah->area_count + ah->parity_count;
1637
1638 metadata_size = ah->log_len;
1639 if (ah->alloc_and_split_meta) {
1640 metadata_type = "RAID metadata area";
1641 metadata_count = parallel_areas_count;
1642 } else {
1643 metadata_type = "mirror log";
1644 metadata_count = alloc_state->log_area_count_still_needed;
1645 }
1646
1647 log_debug("Still need %" PRIu32 " total extents:",
1648 parallel_area_size * parallel_areas_count + metadata_size * metadata_count);
1649 log_debug(" %" PRIu32 " (%" PRIu32 " data/%" PRIu32
1650 " parity) parallel areas of %" PRIu32 " extents each",
1651 parallel_areas_count, ah->area_count, ah->parity_count, parallel_area_size);
1652 log_debug(" %" PRIu32 " %ss of %" PRIu32 " extents each",
1653 metadata_count, metadata_type, metadata_size);
1654 }
1655 /*
1656 * Returns 1 regardless of whether any space was found, except on error.
1657 */
1658 static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc_parms *alloc_parms,
1659 struct dm_list *pvms, struct alloc_state *alloc_state,
1660 struct dm_list *parallel_pvs, uint32_t max_to_allocate)
1661 {
1662 unsigned ix = 0;
1663 unsigned last_ix;
1664 struct pv_map *pvm;
1665 struct pv_area *pva;
1666 unsigned preferred_count = 0;
1667 unsigned already_found_one;
1668 unsigned ix_offset = 0; /* Offset for non-preferred allocations */
1669 unsigned ix_log_offset; /* Offset to start of areas to use for log */
1670 unsigned too_small_for_log_count; /* How many too small for log? */
1671 unsigned iteration_count = 0; /* cling_to_alloced may need 2 iterations */
1672 unsigned log_iteration_count = 0; /* extra iteration for logs on data devices */
1673 struct alloced_area *aa;
1674 uint32_t s;
1675 uint32_t devices_needed = ah->area_count + ah->parity_count;
1676
1677 /* ix_offset holds the number of parallel allocations that must be contiguous/cling */
1678 /* At most one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG or A_CLING_TO_ALLOCED may be set */
1679 if (alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG))
1680 ix_offset = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count;
1681
1682 if (alloc_parms->flags & A_CLING_TO_ALLOCED)
1683 ix_offset = ah->area_count;
1684
1685 if (alloc_parms->alloc == ALLOC_NORMAL || (alloc_parms->flags & A_CLING_TO_ALLOCED))
1686 log_debug("Cling_to_allocated is %sset",
1687 alloc_parms->flags & A_CLING_TO_ALLOCED ? "" : "not ");
1688
1689 _clear_areas(alloc_state);
1690 _reset_unreserved(pvms);
1691
1692 _report_needed_allocation_space(ah, alloc_state);
1693
1694 /* ix holds the number of areas found on other PVs */
1695 do {
1696 if (log_iteration_count) {
1697 log_debug("Found %u areas for %" PRIu32 " parallel areas and %" PRIu32 " log areas so far.", ix, devices_needed, alloc_state->log_area_count_still_needed);
1698 } else if (iteration_count)
1699 log_debug("Filled %u out of %u preferred areas so far.", preferred_count, ix_offset);
1700
1701 /*
1702 * Provide for escape from the loop if no progress is made.
1703 * This should not happen: ALLOC_ANYWHERE should be able to use
1704 * all available space. (If there aren't enough extents, the code
1705 * should not reach this point.)
1706 */
1707 last_ix = ix;
1708
1709 /*
1710 * Put the smallest area of each PV that is at least the
1711 * size we need into areas array. If there isn't one
1712 * that fits completely and we're allowed more than one
1713 * LV segment, then take the largest remaining instead.
1714 */
1715 dm_list_iterate_items(pvm, pvms) {
1716 /* PV-level checks */
1717 if (dm_list_empty(&pvm->areas))
1718 continue; /* Next PV */
1719
1720 if (alloc_parms->alloc != ALLOC_ANYWHERE) {
1721 /* Don't allocate onto the log PVs */
1722 if (ah->log_area_count)
1723 dm_list_iterate_items(aa, &ah->alloced_areas[ah->area_count])
1724 for (s = 0; s < ah->log_area_count; s++)
1725 if (!aa[s].pv)
1726 goto next_pv;
1727
1728 /* FIXME Split into log and non-log parallel_pvs and only check the log ones if log_iteration? */
1729 /* (I've temporatily disabled the check.) */
1730 /* Avoid PVs used by existing parallel areas */
1731 if (!log_iteration_count && parallel_pvs && _pv_is_parallel(pvm->pv, parallel_pvs))
1732 goto next_pv;
1733
1734 /*
1735 * Avoid PVs already set aside for log.
1736 * We only reach here if there were enough PVs for the main areas but
1737 * not enough for the logs.
1738 */
1739 if (log_iteration_count) {
1740 for (s = devices_needed; s < ix + ix_offset; s++)
1741 if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
1742 goto next_pv;
1743 /* On a second pass, avoid PVs already used in an uncommitted area */
1744 } else if (iteration_count)
1745 for (s = 0; s < devices_needed; s++)
1746 if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
1747 goto next_pv;
1748 }
1749
1750 already_found_one = 0;
1751 /* First area in each list is the largest */
1752 dm_list_iterate_items(pva, &pvm->areas) {
1753 /*
1754 * There are two types of allocations, which can't be mixed at present.
1755 * PREFERRED are stored immediately in a specific parallel slot.
1756 * USE_AREA are stored for later, then sorted and chosen from.
1757 */
1758 switch(_check_pva(ah, pva, max_to_allocate, alloc_parms,
1759 alloc_state, already_found_one, iteration_count, log_iteration_count)) {
1760
1761 case PREFERRED:
1762 preferred_count++;
1763 /* Fall through */
1764
1765 case NEXT_PV:
1766 goto next_pv;
1767
1768 case NEXT_AREA:
1769 continue;
1770
1771 case USE_AREA:
1772 /*
1773 * Except with ALLOC_ANYWHERE, replace first area with this
1774 * one which is smaller but still big enough.
1775 */
1776 if (!already_found_one ||
1777 alloc_parms->alloc == ALLOC_ANYWHERE) {
1778 ix++;
1779 already_found_one = 1;
1780 }
1781
1782 /* Reserve required amount of pva */
1783 if (!_reserve_required_area(ah, max_to_allocate, ix + ix_offset,
1784 pva, alloc_state, alloc_parms->alloc))
1785 return_0;
1786 }
1787
1788 }
1789
1790 next_pv:
1791 /* With ALLOC_ANYWHERE we ignore further PVs once we have at least enough areas */
1792 /* With cling and contiguous we stop if we found a match for *all* the areas */
1793 /* FIXME Rename these variables! */
1794 if ((alloc_parms->alloc == ALLOC_ANYWHERE &&
1795 ix + ix_offset >= devices_needed + alloc_state->log_area_count_still_needed) ||
1796 (preferred_count == ix_offset &&
1797 (ix_offset == devices_needed + alloc_state->log_area_count_still_needed)))
1798 break;
1799 }
1800 } while ((alloc_parms->alloc == ALLOC_ANYWHERE && last_ix != ix && ix < devices_needed + alloc_state->log_area_count_still_needed) ||
1801 /* With cling_to_alloced and normal, if there were gaps in the preferred areas, have a second iteration */
1802 (alloc_parms->alloc == ALLOC_NORMAL && preferred_count &&
1803 (preferred_count < ix_offset || alloc_state->log_area_count_still_needed) &&
1804 (alloc_parms->flags & A_CLING_TO_ALLOCED) && !iteration_count++) ||
1805 /* Extra iteration needed to fill log areas on PVs already used? */
1806 (alloc_parms->alloc == ALLOC_NORMAL && preferred_count == ix_offset && !ah->mirror_logs_separate &&
1807 (ix + preferred_count >= devices_needed) &&
1808 (ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed) && !log_iteration_count++));
1809
1810 if (preferred_count < ix_offset && !(alloc_parms->flags & A_CLING_TO_ALLOCED))
1811 return 1;
1812
1813 if (ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed)
1814 return 1;
1815
1816 /* Sort the areas so we allocate from the biggest */
1817 if (log_iteration_count) {
1818 if (ix > devices_needed + 1) {
1819 log_debug("Sorting %u log areas", ix - devices_needed);
1820 qsort(alloc_state->areas + devices_needed, ix - devices_needed, sizeof(*alloc_state->areas),
1821 _comp_area);
1822 }
1823 } else if (ix > 1) {
1824 log_debug("Sorting %u areas", ix);
1825 qsort(alloc_state->areas + ix_offset, ix, sizeof(*alloc_state->areas),
1826 _comp_area);
1827 }
1828
1829 /* If there are gaps in our preferred areas, fill then from the sorted part of the array */
1830 if (preferred_count && preferred_count != ix_offset) {
1831 for (s = 0; s < devices_needed; s++)
1832 if (!alloc_state->areas[s].pva) {
1833 alloc_state->areas[s].pva = alloc_state->areas[ix_offset].pva;
1834 alloc_state->areas[s].used = alloc_state->areas[ix_offset].used;
1835 alloc_state->areas[ix_offset++].pva = NULL;
1836 }
1837 }
1838
1839 /*
1840 * First time around, if there's a log, allocate it on the
1841 * smallest device that has space for it.
1842 */
1843 too_small_for_log_count = 0;
1844 ix_log_offset = 0;
1845
1846 /* FIXME This logic is due to its heritage and can be simplified! */
1847 if (alloc_state->log_area_count_still_needed) {
1848 /* How many areas are too small for the log? */
1849 while (too_small_for_log_count < ix_offset + ix &&
1850 (*(alloc_state->areas + ix_offset + ix - 1 -
1851 too_small_for_log_count)).used < ah->log_len)
1852 too_small_for_log_count++;
1853 ix_log_offset = ix_offset + ix - too_small_for_log_count - ah->log_area_count;
1854 }
1855
1856 if (ix + ix_offset < devices_needed +
1857 (alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
1858 too_small_for_log_count : 0))
1859 return 1;
1860
1861 /*
1862 * Finally add the space identified to the list of areas to be used.
1863 */
1864 if (!_alloc_parallel_area(ah, max_to_allocate, alloc_state, ix_log_offset))
1865 return_0;
1866
1867 /*
1868 * Log is always allocated first time.
1869 */
1870 alloc_state->log_area_count_still_needed = 0;
1871
1872 return 1;
1873 }
1874
1875 /*
1876 * Choose sets of parallel areas to use, respecting any constraints
1877 * supplied in alloc_parms.
1878 */
1879 static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, struct alloc_parms *alloc_parms,
1880 struct dm_list *pvms, struct alloc_state *alloc_state)
1881 {
1882 uint32_t max_tmp;
1883 uint32_t max_to_allocate; /* Maximum extents to allocate this time */
1884 uint32_t old_allocated;
1885 uint32_t next_le;
1886 struct seg_pvs *spvs;
1887 struct dm_list *parallel_pvs;
1888
1889 /* FIXME This algorithm needs a lot of cleaning up! */
1890 /* FIXME anywhere doesn't find all space yet */
1891 do {
1892 parallel_pvs = NULL;
1893 max_to_allocate = alloc_parms->extents_still_needed - alloc_state->allocated;
1894
1895 /*
1896 * If there are existing parallel PVs, avoid them and reduce
1897 * the maximum we can allocate in one go accordingly.
1898 */
1899 if (ah->parallel_areas) {
1900 next_le = (alloc_parms->prev_lvseg ? alloc_parms->prev_lvseg->le + alloc_parms->prev_lvseg->len : 0) + alloc_state->allocated / ah->area_multiple;
1901 dm_list_iterate_items(spvs, ah->parallel_areas) {
1902 if (next_le >= spvs->le + spvs->len)
1903 continue;
1904
1905 max_tmp = max_to_allocate +
1906 alloc_state->allocated;
1907
1908 /*
1909 * Because a request that groups metadata and
1910 * data together will be split, we must adjust
1911 * the comparison accordingly.
1912 */
1913 if (ah->alloc_and_split_meta)
1914 max_tmp -= ah->log_len;
1915 if (max_tmp > (spvs->le + spvs->len) * ah->area_multiple) {
1916 max_to_allocate = (spvs->le + spvs->len) * ah->area_multiple - alloc_state->allocated;
1917 max_to_allocate += ah->alloc_and_split_meta ? ah->log_len : 0;
1918 }
1919 parallel_pvs = &spvs->pvs;
1920 break;
1921 }
1922 }
1923
1924 old_allocated = alloc_state->allocated;
1925
1926 if (!_find_some_parallel_space(ah, alloc_parms, pvms, alloc_state, parallel_pvs, max_to_allocate))
1927 return_0;
1928
1929 /*
1930 * If we didn't allocate anything this time with ALLOC_NORMAL and had
1931 * A_CLING_TO_ALLOCED set, try again without it.
1932 *
1933 * For ALLOC_NORMAL, if we did allocate something without the
1934 * flag set, set it and continue so that further allocations
1935 * remain on the same disks where possible.
1936 */
1937 if (old_allocated == alloc_state->allocated) {
1938 if ((alloc_parms->alloc == ALLOC_NORMAL) && (alloc_parms->flags & A_CLING_TO_ALLOCED))
1939 alloc_parms->flags &= ~A_CLING_TO_ALLOCED;
1940 else
1941 break; /* Give up */
1942 } else if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL &&
1943 !(alloc_parms->flags & A_CLING_TO_ALLOCED))
1944 alloc_parms->flags |= A_CLING_TO_ALLOCED;
1945 } while ((alloc_parms->alloc != ALLOC_CONTIGUOUS) && alloc_state->allocated != alloc_parms->extents_still_needed && (alloc_parms->flags & A_CAN_SPLIT));
1946
1947 return 1;
1948 }
1949
1950 /*
1951 * Allocate several segments, each the same size, in parallel.
1952 * If mirrored_pv and mirrored_pe are supplied, it is used as
1953 * the first area, and additional areas are allocated parallel to it.
1954 */
1955 static int _allocate(struct alloc_handle *ah,
1956 struct volume_group *vg,
1957 struct logical_volume *lv,
1958 unsigned can_split,
1959 struct dm_list *allocatable_pvs)
1960 {
1961 uint32_t old_allocated;
1962 struct lv_segment *prev_lvseg = NULL;
1963 int r = 0;
1964 struct dm_list *pvms;
1965 alloc_policy_t alloc;
1966 struct alloc_parms alloc_parms;
1967 struct alloc_state alloc_state;
1968
1969 alloc_state.allocated = lv ? lv->le_count : 0;
1970
1971 if (alloc_state.allocated >= ah->new_extents && !ah->log_area_count) {
1972 log_error("_allocate called with no work to do!");
1973 return 1;
1974 }
1975
1976 if (ah->area_multiple > 1 &&
1977 (ah->new_extents - alloc_state.allocated) % ah->area_multiple) {
1978 log_error("Number of extents requested (%d) needs to be divisible by %d.",
1979 ah->new_extents - alloc_state.allocated,
1980 ah->area_multiple);
1981 return 0;
1982 }
1983
1984 alloc_state.log_area_count_still_needed = ah->log_area_count;
1985
1986 if (ah->alloc == ALLOC_CONTIGUOUS)
1987 can_split = 0;
1988
1989 if (lv && !dm_list_empty(&lv->segments))
1990 prev_lvseg = dm_list_item(dm_list_last(&lv->segments),
1991 struct lv_segment);
1992 /*
1993 * Build the sets of available areas on the pv's.
1994 */
1995 if (!(pvms = create_pv_maps(ah->mem, vg, allocatable_pvs)))
1996 return_0;
1997
1998 if (!_log_parallel_areas(ah->mem, ah->parallel_areas))
1999 stack;
2000
2001 alloc_state.areas_size = dm_list_size(pvms);
2002 if (alloc_state.areas_size &&
2003 alloc_state.areas_size < (ah->area_count + ah->parity_count + ah->log_area_count)) {
2004 if (ah->alloc != ALLOC_ANYWHERE && ah->mirror_logs_separate) {
2005 log_error("Not enough PVs with free space available "
2006 "for parallel allocation.");
2007 log_error("Consider --alloc anywhere if desperate.");
2008 return 0;
2009 }
2010 alloc_state.areas_size = ah->area_count + ah->parity_count + ah->log_area_count;
2011 }
2012
2013 /* Upper bound if none of the PVs in prev_lvseg is in pvms */
2014 /* FIXME Work size out properly */
2015 if (prev_lvseg)
2016 alloc_state.areas_size += _stripes_per_mimage(prev_lvseg) * prev_lvseg->area_count;
2017
2018 /* Allocate an array of pv_areas to hold the largest space on each PV */
2019 if (!(alloc_state.areas = dm_malloc(sizeof(*alloc_state.areas) * alloc_state.areas_size))) {
2020 log_error("Couldn't allocate areas array.");
2021 return 0;
2022 }
2023
2024 /*
2025 * cling includes implicit cling_by_tags
2026 * but it does nothing unless the lvm.conf setting is present.
2027 */
2028 if (ah->alloc == ALLOC_CLING)
2029 ah->alloc = ALLOC_CLING_BY_TAGS;
2030
2031 /* Attempt each defined allocation policy in turn */
2032 for (alloc = ALLOC_CONTIGUOUS; alloc <= ah->alloc; alloc++) {
2033 /* Skip cling_by_tags if no list defined */
2034 if (alloc == ALLOC_CLING_BY_TAGS && !ah->cling_tag_list_cn)
2035 continue;
2036 old_allocated = alloc_state.allocated;
2037 log_debug("Trying allocation using %s policy.", get_alloc_string(alloc));
2038
2039 if (!_sufficient_pes_free(ah, pvms, alloc_state.allocated, ah->new_extents))
2040 goto_out;
2041
2042 _init_alloc_parms(ah, &alloc_parms, alloc, prev_lvseg,
2043 can_split, alloc_state.allocated,
2044 ah->new_extents);
2045
2046 if (!_find_max_parallel_space_for_one_policy(ah, &alloc_parms, pvms, &alloc_state))
2047 goto_out;
2048
2049 if ((alloc_state.allocated == ah->new_extents && !alloc_state.log_area_count_still_needed) ||
2050 (!can_split && (alloc_state.allocated != old_allocated)))
2051 break;
2052 }
2053
2054 if (alloc_state.allocated != ah->new_extents) {
2055 log_error("Insufficient suitable %sallocatable extents "
2056 "for logical volume %s: %u more required",
2057 can_split ? "" : "contiguous ",
2058 lv ? lv->name : "",
2059 (ah->new_extents - alloc_state.allocated) * ah->area_count
2060 / ah->area_multiple);
2061 goto out;
2062 }
2063
2064 if (alloc_state.log_area_count_still_needed) {
2065 log_error("Insufficient free space for log allocation "
2066 "for logical volume %s.",
2067 lv ? lv->name : "");
2068 goto out;
2069 }
2070
2071 r = 1;
2072
2073 out:
2074 dm_free(alloc_state.areas);
2075 return r;
2076 }
2077
2078 int lv_add_virtual_segment(struct logical_volume *lv, uint64_t status,
2079 uint32_t extents, const struct segment_type *segtype,
2080 const char *thin_pool_name)
2081 {
2082 struct lv_segment *seg;
2083 struct logical_volume *thin_pool_lv = NULL;
2084 struct lv_list *lvl;
2085 uint32_t size;
2086
2087 if (thin_pool_name) {
2088 if (!(lvl = find_lv_in_vg(lv->vg, thin_pool_name))) {
2089 log_error("Unable to find existing pool LV %s in VG %s.",
2090 thin_pool_name, lv->vg->name);
2091 return 0;
2092 }
2093 thin_pool_lv = lvl->lv;
2094 size = first_seg(thin_pool_lv)->chunk_size;
2095 if (lv->vg->extent_size < size) {
2096 /* Align extents on chunk boundary size */
2097 size = ((uint64_t)lv->vg->extent_size * extents + size - 1) /
2098 size * size / lv->vg->extent_size;
2099 if (size != extents) {
2100 log_print("Rounding size (%d extents) up to chunk boundary "
2101 "size (%d extents).", extents, size);
2102 extents = size;
2103 }
2104 }
2105 }
2106
2107 if (!dm_list_empty(&lv->segments) &&
2108 (seg = last_seg(lv)) && (seg->segtype == segtype)) {
2109 seg->area_len += extents;
2110 seg->len += extents;
2111 } else {
2112 if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents,
2113 status, 0, NULL, thin_pool_lv, 0,
2114 extents, 0, 0, 0, NULL))) {
2115 log_error("Couldn't allocate new zero segment.");
2116 return 0;
2117 }
2118 lv->status |= VIRTUAL;
2119 dm_list_add(&lv->segments, &seg->list);
2120 }
2121
2122 lv->le_count += extents;
2123 lv->size += (uint64_t) extents *lv->vg->extent_size;
2124
2125 return 1;
2126 }
2127
2128 /*
2129 * Entry point for all extent allocations.
2130 */
2131 struct alloc_handle *allocate_extents(struct volume_group *vg,
2132 struct logical_volume *lv,
2133 const struct segment_type *segtype,
2134 uint32_t stripes,
2135 uint32_t mirrors, uint32_t log_count,
2136 uint32_t region_size, uint32_t extents,
2137 struct dm_list *allocatable_pvs,
2138 alloc_policy_t alloc,
2139 struct dm_list *parallel_areas)
2140 {
2141 struct alloc_handle *ah;
2142 uint32_t new_extents;
2143
2144 if (segtype_is_virtual(segtype)) {
2145 log_error("allocate_extents does not handle virtual segments");
2146 return NULL;
2147 }
2148
2149 if (!allocatable_pvs) {
2150 log_error(INTERNAL_ERROR "Missing allocatable pvs.");
2151 return NULL;
2152 }
2153
2154 if (vg->fid->fmt->ops->segtype_supported &&
2155 !vg->fid->fmt->ops->segtype_supported(vg->fid, segtype)) {
2156 log_error("Metadata format (%s) does not support required "
2157 "LV segment type (%s).", vg->fid->fmt->name,
2158 segtype->name);
2159 log_error("Consider changing the metadata format by running "
2160 "vgconvert.");
2161 return NULL;
2162 }
2163
2164 if (alloc >= ALLOC_INHERIT)
2165 alloc = vg->alloc;
2166
2167 new_extents = (lv ? lv->le_count : 0) + extents;
2168 if (!(ah = _alloc_init(vg->cmd, vg->cmd->mem, segtype, alloc,
2169 new_extents, mirrors, stripes, log_count,
2170 vg->extent_size, region_size,
2171 parallel_areas)))
2172 return_NULL;
2173
2174 if (!_allocate(ah, vg, lv, 1, allocatable_pvs)) {
2175 alloc_destroy(ah);
2176 return_NULL;
2177 }
2178
2179 return ah;
2180 }
2181
2182 /*
2183 * Add new segments to an LV from supplied list of areas.
2184 */
2185 int lv_add_segment(struct alloc_handle *ah,
2186 uint32_t first_area, uint32_t num_areas,
2187 struct logical_volume *lv,
2188 const struct segment_type *segtype,
2189 uint32_t stripe_size,
2190 uint64_t status,
2191 uint32_t region_size)
2192 {
2193 if (!segtype) {
2194 log_error("Missing segtype in lv_add_segment().");
2195 return 0;
2196 }
2197
2198 if (segtype_is_virtual(segtype)) {
2199 log_error("lv_add_segment cannot handle virtual segments");
2200 return 0;
2201 }
2202
2203 if ((status & MIRROR_LOG) && dm_list_size(&lv->segments)) {
2204 log_error("Log segments can only be added to an empty LV");
2205 return 0;
2206 }
2207
2208 if (!_setup_alloced_segments(lv, &ah->alloced_areas[first_area],
2209 num_areas, status,
2210 stripe_size, segtype,
2211 region_size))
2212 return_0;
2213
2214 if ((segtype->flags & SEG_CAN_SPLIT) && !lv_merge_segments(lv)) {
2215 log_error("Couldn't merge segments after extending "
2216 "logical volume.");
2217 return 0;
2218 }
2219
2220 if (lv->vg->fid->fmt->ops->lv_setup &&
2221 !lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
2222 return_0;
2223
2224 return 1;
2225 }
2226
2227 /*
2228 * "mirror" segment type doesn't support split.
2229 * So, when adding mirrors to linear LV segment, first split it,
2230 * then convert it to "mirror" and add areas.
2231 */
2232 static struct lv_segment *_convert_seg_to_mirror(struct lv_segment *seg,
2233 uint32_t region_size,
2234 struct logical_volume *log_lv)
2235 {
2236 struct lv_segment *newseg;
2237 uint32_t s;
2238
2239 if (!seg_is_striped(seg)) {
2240 log_error("Can't convert non-striped segment to mirrored.");
2241 return NULL;
2242 }
2243
2244 if (seg->area_count > 1) {
2245 log_error("Can't convert striped segment with multiple areas "
2246 "to mirrored.");
2247 return NULL;
2248 }
2249
2250 if (!(newseg = alloc_lv_segment(get_segtype_from_string(seg->lv->vg->cmd, "mirror"),
2251 seg->lv, seg->le, seg->len,
2252 seg->status, seg->stripe_size,
2253 log_lv, NULL,
2254 seg->area_count, seg->area_len,
2255 seg->chunk_size, region_size,
2256 seg->extents_copied, NULL))) {
2257 log_error("Couldn't allocate converted LV segment");
2258 return NULL;
2259 }
2260
2261 for (s = 0; s < seg->area_count; s++)
2262 if (!move_lv_segment_area(newseg, s, seg, s))
2263 return_NULL;
2264
2265 seg->pvmove_source_seg = NULL; /* Not maintained after allocation */
2266
2267 dm_list_add(&seg->list, &newseg->list);
2268 dm_list_del(&seg->list);
2269
2270 return newseg;
2271 }
2272
2273 /*
2274 * Add new areas to mirrored segments
2275 */
2276 int lv_add_mirror_areas(struct alloc_handle *ah,
2277 struct logical_volume *lv, uint32_t le,
2278 uint32_t region_size)
2279 {
2280 struct alloced_area *aa;
2281 struct lv_segment *seg;
2282 uint32_t current_le = le;
2283 uint32_t s, old_area_count, new_area_count;
2284
2285 dm_list_iterate_items(aa, &ah->alloced_areas[0]) {
2286 if (!(seg = find_seg_by_le(lv, current_le))) {
2287 log_error("Failed to find segment for %s extent %"
2288 PRIu32, lv->name, current_le);
2289 return 0;
2290 }
2291
2292 /* Allocator assures aa[0].len <= seg->area_len */
2293 if (aa[0].len < seg->area_len) {
2294 if (!lv_split_segment(lv, seg->le + aa[0].len)) {
2295 log_error("Failed to split segment at %s "
2296 "extent %" PRIu32, lv->name, le);
2297 return 0;
2298 }
2299 }
2300
2301 if (!seg_is_mirrored(seg) &&
2302 (!(seg = _convert_seg_to_mirror(seg, region_size, NULL))))
2303 return_0;
2304
2305 old_area_count = seg->area_count;
2306 new_area_count = old_area_count + ah->area_count;
2307
2308 if (!_lv_segment_add_areas(lv, seg, new_area_count))
2309 return_0;
2310
2311 for (s = 0; s < ah->area_count; s++) {
2312 if (!set_lv_segment_area_pv(seg, s + old_area_count,
2313 aa[s].pv, aa[s].pe))
2314 return_0;
2315 }
2316
2317 current_le += seg->area_len;
2318 }
2319
2320 lv->status |= MIRRORED;
2321
2322 if (lv->vg->fid->fmt->ops->lv_setup &&
2323 !lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
2324 return_0;
2325
2326 return 1;
2327 }
2328
2329 /*
2330 * Add mirror image LVs to mirrored segments
2331 */
2332 int lv_add_mirror_lvs(struct logical_volume *lv,
2333 struct logical_volume **sub_lvs,
2334 uint32_t num_extra_areas,
2335 uint64_t status, uint32_t region_size)
2336 {
2337 struct lv_segment *seg;
2338 uint32_t old_area_count, new_area_count;
2339 uint32_t m;
2340 struct segment_type *mirror_segtype;
2341
2342 seg = first_seg(lv);
2343
2344 if (dm_list_size(&lv->segments) != 1 || seg_type(seg, 0) != AREA_LV) {
2345 log_error("Mirror layer must be inserted before adding mirrors");
2346 return 0;
2347 }
2348
2349 mirror_segtype = get_segtype_from_string(lv->vg->cmd, "mirror");
2350 if (seg->segtype != mirror_segtype)
2351 if (!(seg = _convert_seg_to_mirror(seg, region_size, NULL)))
2352 return_0;
2353
2354 if (region_size && region_size != seg->region_size) {
2355 log_error("Conflicting region_size");
2356 return 0;
2357 }
2358
2359 old_area_count = seg->area_count;
2360 new_area_count = old_area_count + num_extra_areas;
2361
2362 if (!_lv_segment_add_areas(lv, seg, new_area_count)) {
2363 log_error("Failed to allocate widened LV segment for %s.",
2364 lv->name);
2365 return 0;
2366 }
2367
2368 for (m = 0; m < old_area_count; m++)
2369 seg_lv(seg, m)->status |= status;
2370
2371 for (m = old_area_count; m < new_area_count; m++) {
2372 if (!set_lv_segment_area_lv(seg, m, sub_lvs[m - old_area_count],
2373 0, status))
2374 return_0;
2375 lv_set_hidden(sub_lvs[m - old_area_count]);
2376 }
2377
2378 lv->status |= MIRRORED;
2379
2380 return 1;
2381 }
2382
2383 /*
2384 * Turn an empty LV into a mirror log.
2385 *
2386 * FIXME: Mirrored logs are built inefficiently.
2387 * A mirrored log currently uses the same layout that a mirror
2388 * LV uses. The mirror layer sits on top of AREA_LVs which form the
2389 * legs, rather on AREA_PVs. This is done to allow re-use of the
2390 * various mirror functions to also handle the mirrored LV that makes
2391 * up the log.
2392 *
2393 * If we used AREA_PVs under the mirror layer of a log, we could
2394 * assemble it all at once by calling 'lv_add_segment' with the
2395 * appropriate segtype (mirror/stripe), like this:
2396 * lv_add_segment(ah, ah->area_count, ah->log_area_count,
2397 * log_lv, segtype, 0, MIRROR_LOG, 0);
2398 *
2399 * For now, we use the same mechanism to build a mirrored log as we
2400 * do for building a mirrored LV: 1) create initial LV, 2) add a
2401 * mirror layer, and 3) add the remaining copy LVs
2402 */
2403 int lv_add_log_segment(struct alloc_handle *ah, uint32_t first_area,
2404 struct logical_volume *log_lv, uint64_t status)
2405 {
2406
2407 return lv_add_segment(ah, ah->area_count + first_area, 1, log_lv,
2408 get_segtype_from_string(log_lv->vg->cmd,
2409 "striped"),
2410 0, status, 0);
2411 }
2412
2413 static int _lv_insert_empty_sublvs(struct logical_volume *lv,
2414 const struct segment_type *segtype,
2415 uint32_t stripe_size, uint32_t region_size,
2416 uint32_t devices)
2417 {
2418 struct logical_volume *sub_lv;
2419 uint32_t i;
2420 uint64_t sub_lv_status = 0;
2421 const char *layer_name;
2422 size_t len = strlen(lv->name) + 32;
2423 char img_name[len];
2424 struct lv_segment *mapseg;
2425
2426 if (lv->le_count || !dm_list_empty(&lv->segments)) {
2427 log_error(INTERNAL_ERROR
2428 "Non-empty LV passed to _lv_insert_empty_sublv");
2429 return 0;
2430 }
2431
2432 if (segtype_is_raid(segtype)) {
2433 lv->status |= RAID;
2434 sub_lv_status = RAID_IMAGE;
2435 layer_name = "rimage";
2436 } else if (segtype_is_mirrored(segtype)) {
2437 lv->status |= MIRRORED;
2438 sub_lv_status = MIRROR_IMAGE;
2439 layer_name = "mimage";
2440 } else
2441 return_0;
2442
2443 /*
2444 * First, create our top-level segment for our top-level LV
2445 */
2446 if (!(mapseg = alloc_lv_segment(segtype, lv, 0, 0, lv->status,
2447 stripe_size, NULL, NULL,
2448 devices, 0, 0, region_size, 0, NULL))) {
2449 log_error("Failed to create mapping segment for %s", lv->name);
2450 return 0;
2451 }
2452
2453 /*
2454 * Next, create all of our sub_lv's and link them in.
2455 */
2456 for (i = 0; i < devices; i++) {
2457 /* Data LVs */
2458 if (devices > 1) {
2459 if (dm_snprintf(img_name, len, "%s_%s_%u",
2460 lv->name, layer_name, i) < 0)
2461 return_0;
2462 } else {
2463 if (dm_snprintf(img_name, len, "%s_%s",
2464 lv->name, layer_name) < 0)
2465 return_0;
2466 }
2467
2468 /* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
2469 if (!(sub_lv = lv_create_empty(img_name, NULL,
2470 LVM_READ | LVM_WRITE,
2471 lv->alloc, lv->vg)))
2472 return_0;
2473
2474 if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, sub_lv_status))
2475 return_0;
2476
2477 /* Metadata LVs for raid */
2478 if (segtype_is_raid(segtype)) {
2479 if (dm_snprintf(img_name, len, "%s_rmeta_%u", lv->name, i) < 0)
2480 return_0;
2481 } else
2482 continue;
2483
2484 /* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
2485 if (!(sub_lv = lv_create_empty(img_name, NULL,
2486 LVM_READ | LVM_WRITE,
2487 lv->alloc, lv->vg)))
2488 return_0;
2489
2490 if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, RAID_META))
2491 return_0;
2492 }
2493
2494 dm_list_add(&lv->segments, &mapseg->list);
2495
2496 return 1;
2497 }
2498
2499 static int _lv_extend_layered_lv(struct alloc_handle *ah,
2500 struct logical_volume *lv,
2501 uint32_t extents, uint32_t first_area,
2502 uint32_t stripes, uint32_t stripe_size)
2503 {
2504 const struct segment_type *segtype;
2505 struct logical_volume *sub_lv, *meta_lv;
2506 struct lv_segment *seg;
2507 uint32_t fa, s;
2508 int clear_metadata = 0;
2509
2510 segtype = get_segtype_from_string(lv->vg->cmd, "striped");
2511
2512 /*
2513 * The component devices of a "striped" LV all go in the same
2514 * LV. However, RAID has an LV for each device - making the
2515 * 'stripes' and 'stripe_size' parameters meaningless.
2516 */
2517 if (seg_is_raid(first_seg(lv))) {
2518 stripes = 1;
2519 stripe_size = 0;
2520 }
2521
2522 seg = first_seg(lv);
2523 for (fa = first_area, s = 0; s < seg->area_count; s++) {
2524 if (is_temporary_mirror_layer(seg_lv(seg, s))) {
2525 if (!_lv_extend_layered_lv(ah, seg_lv(seg, s), extents,
2526 fa, stripes, stripe_size))
2527 return_0;
2528 fa += lv_mirror_count(seg_lv(seg, s));
2529 continue;
2530 }
2531
2532 sub_lv = seg_lv(seg, s);
2533 if (!lv_add_segment(ah, fa, stripes, sub_lv, segtype,
2534 stripe_size, sub_lv->status, 0)) {
2535 log_error("Aborting. Failed to extend %s in %s.",
2536 sub_lv->name, lv->name);
2537 return 0;
2538 }
2539
2540 /* Extend metadata LVs only on initial creation */
2541 if (seg_is_raid(seg) && !lv->le_count) {
2542 if (!seg->meta_areas) {
2543 log_error("No meta_areas for RAID type");
2544 return 0;
2545 }
2546
2547 meta_lv = seg_metalv(seg, s);
2548 if (!lv_add_segment(ah, fa + seg->area_count, 1,
2549 meta_lv, segtype, 0,
2550 meta_lv->status, 0)) {
2551 log_error("Failed to extend %s in %s.",
2552 meta_lv->name, lv->name);
2553 return 0;
2554 }
2555 lv_set_visible(meta_lv);
2556 clear_metadata = 1;
2557 }
2558
2559 fa += stripes;
2560 }
2561
2562 if (clear_metadata) {
2563 /*
2564 * We must clear the metadata areas upon creation.
2565 */
2566 if (!vg_write(lv->vg) || !vg_commit(lv->vg))
2567 return_0;
2568
2569 for (s = 0; s < seg->area_count; s++) {
2570 meta_lv = seg_metalv(seg, s);
2571 if (!activate_lv(meta_lv->vg->cmd, meta_lv)) {
2572 log_error("Failed to activate %s/%s for clearing",
2573 meta_lv->vg->name, meta_lv->name);
2574 return 0;
2575 }
2576
2577 log_verbose("Clearing metadata area of %s/%s",
2578 meta_lv->vg->name, meta_lv->name);
2579 /*
2580 * Rather than wiping meta_lv->size, we can simply
2581 * wipe '1' to remove the superblock of any previous
2582 * RAID devices. It is much quicker.
2583 */
2584 if (!set_lv(meta_lv->vg->cmd, meta_lv, 1, 0)) {
2585 log_error("Failed to zero %s/%s",
2586 meta_lv->vg->name, meta_lv->name);
2587 return 0;
2588 }
2589
2590 if (!deactivate_lv(meta_lv->vg->cmd, meta_lv)) {
2591 log_error("Failed to deactivate %s/%s",
2592 meta_lv->vg->name, meta_lv->name);
2593 return 0;
2594 }
2595 lv_set_hidden(meta_lv);
2596 }
2597 }
2598
2599 seg->area_len += extents;
2600 seg->len += extents;
2601 lv->le_count += extents;
2602 lv->size += (uint64_t) extents *lv->vg->extent_size;
2603
2604 return 1;
2605 }
2606
2607 /*
2608 * Entry point for single-step LV allocation + extension.
2609 */
2610 int lv_extend(struct logical_volume *lv,
2611 const struct segment_type *segtype,
2612 uint32_t stripes, uint32_t stripe_size,
2613 uint32_t mirrors, uint32_t region_size,
2614 uint32_t extents, const char *thin_pool_name,
2615 struct dm_list *allocatable_pvs, alloc_policy_t alloc)
2616 {
2617 int r = 1;
2618 int log_count = 0;
2619 struct alloc_handle *ah;
2620 uint32_t sub_lv_count;
2621
2622 log_very_verbose("Extending segment type, %s", segtype->name);
2623
2624 if (segtype_is_virtual(segtype))
2625 return lv_add_virtual_segment(lv, 0u, extents, segtype, thin_pool_name);
2626
2627 if (!lv->le_count && segtype_is_thin_pool(segtype)) {
2628 /* Thin pool allocation treats its metadata device like a mirror log. */
2629 /* FIXME Allow pool and data on same device with NORMAL */
2630 /* FIXME Support striped metadata pool */
2631 log_count = 1;
2632 } else if (segtype_is_raid(segtype) && !lv->le_count)
2633 log_count = mirrors * stripes;
2634 /* FIXME log_count should be 1 for mirrors */
2635
2636 if (!(ah = allocate_extents(lv->vg, lv, segtype, stripes, mirrors,
2637 log_count, region_size, extents,
2638 allocatable_pvs, alloc, NULL)))
2639 return_0;
2640
2641 if (segtype_is_thin_pool(segtype)) {
2642 if (!lv->le_count) {
2643 if (!(r = extend_pool(lv, segtype, ah, stripes, stripe_size)))
2644 stack;
2645 } else if (!(r = _lv_extend_layered_lv(ah, lv, extents, 0,
2646 stripes, stripe_size)))
2647 stack;
2648 } else if (!segtype_is_mirrored(segtype) && !segtype_is_raid(segtype)) {
2649 if (!(r = lv_add_segment(ah, 0, ah->area_count, lv, segtype,
2650 stripe_size, 0u, 0)))
2651 stack;
2652 } else {
2653 /*
2654 * For RAID, all the devices are AREA_LV.
2655 * However, for 'mirror on stripe' using non-RAID targets,
2656 * the mirror legs are AREA_LV while the stripes underneath
2657 * are AREA_PV.
2658 */
2659 if (segtype_is_raid(segtype))
2660 sub_lv_count = mirrors * stripes + segtype->parity_devs;
2661 else
2662 sub_lv_count = mirrors;
2663
2664 if (!lv->le_count &&
2665 !(r = _lv_insert_empty_sublvs(lv, segtype, stripe_size,
2666 region_size, sub_lv_count))) {
2667 log_error("Failed to insert layer for %s", lv->name);
2668 goto out;
2669 }
2670
2671 if (!(r = _lv_extend_layered_lv(ah, lv, extents, 0,
2672 stripes, stripe_size)))
2673 goto_out;
2674
2675 /*
2676 * If we are expanding an existing mirror, we can skip the
2677 * resync of the extension if the LV is currently in-sync
2678 * and the LV has the LV_NOTSYNCED flag set.
2679 */
2680 if ((lv->le_count != extents) &&
2681 segtype_is_mirrored(segtype) &&
2682 (lv->status & LV_NOTSYNCED)) {
2683 percent_t sync_percent = PERCENT_INVALID;
2684
2685 if (!lv_is_active(lv)) {
2686 log_print("%s/%s is not active."
2687 " Unable to get sync percent.",
2688 lv->vg->name, lv->name);
2689 if (yes_no_prompt("Do full resync of extended "
2690 "portion of %s/%s? [y/n]: ",
2691 lv->vg->name, lv->name) == 'y')
2692 goto out;
2693 r = 0;
2694 goto out;
2695 }
2696
2697 if (!(r = lv_mirror_percent(lv->vg->cmd, lv, 0,
2698 &sync_percent, NULL))) {
2699 log_error("Failed to get sync percent for %s/%s",
2700 lv->vg->name, lv->name);
2701 goto out;
2702 } else if (sync_percent == PERCENT_100) {
2703 log_verbose("Skipping initial resync for "
2704 "extended portion of %s/%s",
2705 lv->vg->name, lv->name);
2706 init_mirror_in_sync(1);
2707 lv->status |= LV_NOTSYNCED;
2708 } else {
2709 log_error("%s/%s cannot be extended while"
2710 " it is recovering.",
2711 lv->vg->name, lv->name);
2712 r = 0;
2713 goto out;
2714 }
2715 }
2716 }
2717
2718 out:
2719 alloc_destroy(ah);
2720 return r;
2721 }
2722
2723 /*
2724 * Minimal LV renaming function.
2725 * Metadata transaction should be made by caller.
2726 * Assumes new_name is allocated from cmd->mem pool.
2727 */
2728 static int _rename_single_lv(struct logical_volume *lv, char *new_name)
2729 {
2730 struct volume_group *vg = lv->vg;
2731
2732 if (find_lv_in_vg(vg, new_name)) {
2733 log_error("Logical volume \"%s\" already exists in "
2734 "volume group \"%s\"", new_name, vg->name);
2735 return 0;
2736 }
2737
2738 if (lv->status & LOCKED) {
2739 log_error("Cannot rename locked LV %s", lv->name);
2740 return 0;
2741 }
2742
2743 lv->name = new_name;
2744
2745 return 1;
2746 }
2747
2748 /*
2749 * Rename sub LV.
2750 * 'lv_name_old' and 'lv_name_new' are old and new names of the main LV.
2751 */
2752 static int _rename_sub_lv(struct cmd_context *cmd,
2753 struct logical_volume *lv,
2754 const char *lv_name_old, const char *lv_name_new)
2755 {
2756 const char *suffix;
2757 char *new_name;
2758 size_t len;
2759
2760 /*
2761 * A sub LV name starts with lv_name_old + '_'.
2762 * The suffix follows lv_name_old and includes '_'.
2763 */
2764 len = strlen(lv_name_old);
2765 if (strncmp(lv->name, lv_name_old, len) || lv->name[len] != '_') {
2766 log_error("Cannot rename \"%s\": name format not recognized "
2767 "for internal LV \"%s\"",
2768 lv_name_old, lv->name);
2769 return 0;
2770 }
2771 suffix = lv->name + len;
2772
2773 /*
2774 * Compose a new name for sub lv:
2775 * e.g. new name is "lvol1_mlog"
2776 * if the sub LV is "lvol0_mlog" and
2777 * a new name for main LV is "lvol1"
2778 */
2779 len = strlen(lv_name_new) + strlen(suffix) + 1;
2780 new_name = dm_pool_alloc(cmd->mem, len);
2781 if (!new_name) {
2782 log_error("Failed to allocate space for new name");
2783 return 0;
2784 }
2785 if (dm_snprintf(new_name, len, "%s%s", lv_name_new, suffix) < 0) {
2786 log_error("Failed to create new name");
2787 return 0;
2788 }
2789
2790 /* Rename it */
2791 return _rename_single_lv(lv, new_name);
2792 }
2793
2794 /* Callback for for_each_sub_lv */
2795 static int _rename_cb(struct cmd_context *cmd, struct logical_volume *lv,
2796 void *data)
2797 {
2798 struct lv_names *lv_names = (struct lv_names *) data;
2799
2800 return _rename_sub_lv(cmd, lv, lv_names->old, lv_names->new);
2801 }
2802
2803 /*
2804 * Loop down sub LVs and call fn for each.
2805 * fn is responsible to log necessary information on failure.
2806 */
2807 int for_each_sub_lv(struct cmd_context *cmd, struct logical_volume *lv,
2808 int (*fn)(struct cmd_context *cmd,
2809 struct logical_volume *lv, void *data),
2810 void *data)
2811 {
2812 struct logical_volume *org;
2813 struct lv_segment *seg;
2814 uint32_t s;
2815
2816 if (lv_is_cow(lv) && lv_is_virtual_origin(org = origin_from_cow(lv))) {
2817 if (!fn(cmd, org, data))
2818 return_0;
2819 if (!for_each_sub_lv(cmd, org, fn, data))
2820 return_0;
2821 }
2822
2823 dm_list_iterate_items(seg, &lv->segments) {
2824 if (seg->log_lv) {
2825 if (!fn(cmd, seg->log_lv, data))
2826 return_0;
2827 if (!for_each_sub_lv(cmd, seg->log_lv, fn, data))
2828 return_0;
2829 }
2830
2831 if (seg->metadata_lv) {
2832 if (!fn(cmd, seg->metadata_lv, data))
2833 return_0;
2834 if (!for_each_sub_lv(cmd, seg->metadata_lv, fn, data))
2835 return_0;
2836 }
2837
2838 for (s = 0; s < seg->area_count; s++) {
2839 if (seg_type(seg, s) != AREA_LV)
2840 continue;
2841 if (!fn(cmd, seg_lv(seg, s), data))
2842 return_0;
2843 if (!for_each_sub_lv(cmd, seg_lv(seg, s), fn, data))
2844 return_0;
2845 }
2846
2847 if (!seg_is_raid(seg))
2848 continue;
2849
2850 /* RAID has meta_areas */
2851 for (s = 0; s < seg->area_count; s++) {
2852 if (seg_metatype(seg, s) != AREA_LV)
2853 continue;
2854 if (!fn(cmd, seg_metalv(seg, s), data))
2855 return_0;
2856 if (!for_each_sub_lv(cmd, seg_metalv(seg, s), fn, data))
2857 return_0;
2858 }
2859 }
2860
2861 return 1;
2862 }
2863
2864
2865 /*
2866 * Core of LV renaming routine.
2867 * VG must be locked by caller.
2868 */
2869 int lv_rename(struct cmd_context *cmd, struct logical_volume *lv,
2870 const char *new_name)
2871 {
2872 struct volume_group *vg = lv->vg;
2873 struct lv_names lv_names;
2874 DM_LIST_INIT(lvs_changed);
2875 struct lv_list lvl, lvl2, *lvlp;
2876 int r = 0;
2877
2878 /* rename is not allowed on sub LVs */
2879 if (!lv_is_visible(lv)) {
2880 log_error("Cannot rename internal LV \"%s\".", lv->name);
2881 return 0;
2882 }
2883
2884 if (find_lv_in_vg(vg, new_name)) {
2885 log_error("Logical volume \"%s\" already exists in "
2886 "volume group \"%s\"", new_name, vg->name);
2887 return 0;
2888 }
2889
2890 if (lv->status & LOCKED) {
2891 log_error("Cannot rename locked LV %s", lv->name);
2892 return 0;
2893 }
2894
2895 if (!archive(vg))
2896 return 0;
2897
2898 /* rename sub LVs */
2899 lv_names.old = lv->name;
2900 lv_names.new = new_name;
2901 if (!for_each_sub_lv(cmd, lv, _rename_cb, (void *) &lv_names))
2902 return 0;
2903
2904 /* rename main LV */
2905 if (!(lv->name = dm_pool_strdup(cmd->mem, new_name))) {
2906 log_error("Failed to allocate space for new name");
2907 return 0;
2908 }
2909
2910 lvl.lv = lv;
2911 dm_list_add(&lvs_changed, &lvl.list);
2912
2913 /* rename active virtual origin too */
2914 if (lv_is_cow(lv) && lv_is_virtual_origin(lvl2.lv = origin_from_cow(lv)))
2915 dm_list_add_h(&lvs_changed, &lvl2.list);
2916
2917 log_verbose("Writing out updated volume group");
2918 if (!vg_write(vg))
2919 return 0;
2920
2921 if (!suspend_lvs(cmd, &lvs_changed, vg))
2922 goto_out;
2923
2924 if (!(r = vg_commit(vg)))
2925 stack;
2926
2927 /*
2928 * FIXME: resume LVs in reverse order to prevent memory
2929 * lock imbalance when resuming virtual snapshot origin
2930 * (resume of snapshot resumes origin too)
2931 */
2932 dm_list_iterate_back_items(lvlp, &lvs_changed)
2933 if (!resume_lv(cmd, lvlp->lv))
2934 stack;
2935 out:
2936 backup(vg);
2937 return r;
2938 }
2939
2940 char *generate_lv_name(struct volume_group *vg, const char *format,
2941 char *buffer, size_t len)
2942 {
2943 struct lv_list *lvl;
2944 int high = -1, i;
2945
2946 dm_list_iterate_items(lvl, &vg->lvs) {
2947 if (sscanf(lvl->lv->name, format, &i) != 1)
2948 continue;
2949
2950 if (i > high)
2951 high = i;
2952 }
2953
2954 if (dm_snprintf(buffer, len, format, high + 1) < 0)
2955 return NULL;
2956
2957 return buffer;
2958 }
2959
2960 int vg_max_lv_reached(struct volume_group *vg)
2961 {
2962 if (!vg->max_lv)
2963 return 0;
2964
2965 if (vg->max_lv > vg_visible_lvs(vg))
2966 return 0;
2967
2968 log_verbose("Maximum number of logical volumes (%u) reached "
2969 "in volume group %s", vg->max_lv, vg->name);
2970
2971 return 1;
2972 }
2973
2974 struct logical_volume *alloc_lv(struct dm_pool *mem)
2975 {
2976 struct logical_volume *lv;
2977
2978 if (!(lv = dm_pool_zalloc(mem, sizeof(*lv)))) {
2979 log_error("Unable to allocate logical volume structure");
2980 return NULL;
2981 }
2982
2983 lv->snapshot = NULL;
2984 dm_list_init(&lv->snapshot_segs);
2985 dm_list_init(&lv->segments);
2986 dm_list_init(&lv->tags);
2987 dm_list_init(&lv->segs_using_this_lv);
2988 dm_list_init(&lv->rsites);
2989
2990 return lv;
2991 }
2992
2993 /*
2994 * Create a new empty LV.
2995 */
2996 struct logical_volume *lv_create_empty(const char *name,
2997 union lvid *lvid,
2998 uint64_t status,
2999 alloc_policy_t alloc,
3000 struct volume_group *vg)
3001 {
3002 struct format_instance *fi = vg->fid;
3003 struct logical_volume *lv;
3004 char dname[NAME_LEN];
3005
3006 if (vg_max_lv_reached(vg))
3007 stack;
3008
3009 if (strstr(name, "%d") &&
3010 !(name = generate_lv_name(vg, name, dname, sizeof(dname)))) {
3011 log_error("Failed to generate unique name for the new "
3012 "logical volume");
3013 return NULL;
3014 } else if (find_lv_in_vg(vg, name)) {
3015 log_error("Unable to create LV %s in Volume Group %s: "
3016 "name already in use.", name, vg->name);
3017 return NULL;
3018 }
3019
3020 log_verbose("Creating logical volume %s", name);
3021
3022 if (!(lv = alloc_lv(vg->vgmem)))
3023 return_NULL;
3024
3025 if (!(lv->name = dm_pool_strdup(vg->vgmem, name)))
3026 goto_bad;
3027
3028 lv->status = status;
3029 lv->alloc = alloc;
3030 lv->read_ahead = vg->cmd->default_settings.read_ahead;
3031 lv->major = -1;
3032 lv->minor = -1;
3033 lv->size = UINT64_C(0);
3034 lv->le_count = 0;
3035
3036 if (lvid)
3037 lv->lvid = *lvid;
3038
3039 if (!link_lv_to_vg(vg, lv))
3040 goto_bad;
3041
3042 if (!lv_set_creation(lv, NULL, 0))
3043 goto_bad;
3044
3045 if (fi->fmt->ops->lv_setup && !fi->fmt->ops->lv_setup(fi, lv))
3046 goto_bad;
3047
3048 return lv;
3049 bad:
3050 dm_pool_free(vg->vgmem, lv);
3051 return NULL;
3052 }
3053
3054 static int _add_pvs(struct cmd_context *cmd, struct pv_segment *peg,
3055 uint32_t s __attribute__((unused)), void *data)
3056 {
3057 struct seg_pvs *spvs = (struct seg_pvs *) data;
3058 struct pv_list *pvl;
3059
3060 /* Don't add again if it's already on list. */
3061 if (find_pv_in_pv_list(&spvs->pvs, peg->pv))
3062 return 1;
3063
3064 if (!(pvl = dm_pool_alloc(cmd->mem, sizeof(*pvl)))) {
3065 log_error("pv_list allocation failed");
3066 return 0;
3067 }
3068
3069 pvl->pv = peg->pv;
3070
3071 dm_list_add(&spvs->pvs, &pvl->list);
3072
3073 return 1;
3074 }
3075
3076 /*
3077 * Construct dm_list of segments of LVs showing which PVs they use.
3078 * For pvmove we use the *parent* LV so we can pick up stripes & existing mirrors etc.
3079 */
3080 struct dm_list *build_parallel_areas_from_lv(struct logical_volume *lv,
3081 unsigned use_pvmove_parent_lv)
3082 {
3083 struct cmd_context *cmd = lv->vg->cmd;
3084 struct dm_list *parallel_areas;
3085 struct seg_pvs *spvs;
3086 uint32_t current_le = 0;
3087 uint32_t raid_multiple;
3088 struct lv_segment *seg = first_seg(lv);
3089
3090 if (!(parallel_areas = dm_pool_alloc(cmd->mem, sizeof(*parallel_areas)))) {
3091 log_error("parallel_areas allocation failed");
3092 return NULL;
3093 }
3094
3095 dm_list_init(parallel_areas);
3096
3097 do {
3098 if (!(spvs = dm_pool_zalloc(cmd->mem, sizeof(*spvs)))) {
3099 log_error("allocation failed");
3100 return NULL;
3101 }
3102
3103 dm_list_init(&spvs->pvs);
3104
3105 spvs->le = current_le;
3106 spvs->len = lv->le_count - current_le;
3107
3108 dm_list_add(parallel_areas, &spvs->list);
3109
3110 if (use_pvmove_parent_lv && !(seg = find_seg_by_le(lv, current_le))) {
3111 log_error("Failed to find segment for %s extent %" PRIu32,
3112 lv->name, current_le);
3113 return 0;
3114 }
3115
3116 /* Find next segment end */
3117 /* FIXME Unnecessary nesting! */
3118 if (!_for_each_pv(cmd, use_pvmove_parent_lv ? seg->pvmove_source_seg->lv : lv,
3119 use_pvmove_parent_lv ? seg->pvmove_source_seg->le : current_le,
3120 use_pvmove_parent_lv ? spvs->len * _calc_area_multiple(seg->pvmove_source_seg->segtype, seg->pvmove_source_seg->area_count, 0) : spvs->len,
3121 use_pvmove_parent_lv ? seg->pvmove_source_seg : NULL,
3122 &spvs->len,
3123 0, 0, -1, 0, _add_pvs, (void *) spvs))
3124 return_NULL;
3125
3126 current_le = spvs->le + spvs->len;
3127 raid_multiple = (seg->segtype->parity_devs) ?
3128 seg->area_count - seg->segtype->parity_devs : 1;
3129 } while ((current_le * raid_multiple) < lv->le_count);
3130
3131 /* FIXME Merge adjacent segments with identical PV lists (avoids need for contiguous allocation attempts between successful allocations) */
3132
3133 return parallel_areas;
3134 }
3135
3136 int link_lv_to_vg(struct volume_group *vg, struct logical_volume *lv)
3137 {
3138 struct lv_list *lvl;
3139
3140 if (vg_max_lv_reached(vg))
3141 stack;
3142
3143 if (!(lvl = dm_pool_zalloc(vg->vgmem, sizeof(*lvl))))
3144 return_0;
3145
3146 lvl->lv = lv;
3147 lv->vg = vg;
3148 dm_list_add(&vg->lvs, &lvl->list);
3149
3150 return 1;
3151 }
3152
3153 int unlink_lv_from_vg(struct logical_volume *lv)
3154 {
3155 struct lv_list *lvl;
3156
3157 if (!(lvl = find_lv_in_vg(lv->vg, lv->name)))
3158 return_0;
3159
3160 dm_list_del(&lvl->list);
3161
3162 return 1;
3163 }
3164
3165 void lv_set_visible(struct logical_volume *lv)
3166 {
3167 if (lv_is_visible(lv))
3168 return;
3169
3170 lv->status |= VISIBLE_LV;
3171
3172 log_debug("LV %s in VG %s is now visible.", lv->name, lv->vg->name);
3173 }
3174
3175 void lv_set_hidden(struct logical_volume *lv)
3176 {
3177 if (!lv_is_visible(lv))
3178 return;
3179
3180 lv->status &= ~VISIBLE_LV;
3181
3182 log_debug("LV %s in VG %s is now hidden.", lv->name, lv->vg->name);
3183 }
3184
3185 int lv_remove_single(struct cmd_context *cmd, struct logical_volume *lv,
3186 const force_t force)
3187 {
3188 struct volume_group *vg;
3189 struct lvinfo info;
3190 struct logical_volume *format1_origin = NULL;
3191 int format1_reload_required = 0;
3192 int visible;
3193 struct logical_volume *pool_lv = NULL;
3194
3195 vg = lv->vg;
3196
3197 if (!vg_check_status(vg, LVM_WRITE))
3198 return_0;
3199
3200 if (lv_is_origin(lv)) {
3201 log_error("Can't remove logical volume \"%s\" under snapshot",
3202 lv->name);
3203 return 0;
3204 }
3205
3206 if (lv->status & MIRROR_IMAGE) {
3207 log_error("Can't remove logical volume %s used by a mirror",
3208 lv->name);
3209 return 0;
3210 }
3211
3212 if (lv->status & MIRROR_LOG) {
3213 log_error("Can't remove logical volume %s used as mirror log",
3214 lv->name);
3215 return 0;
3216 }
3217
3218 if (lv->status & (RAID_META | RAID_IMAGE)) {
3219 log_error("Can't remove logical volume %s used as RAID device",
3220 lv->name);
3221 return 0;
3222 }
3223
3224 if (lv_is_thin_pool_data(lv) || lv_is_thin_pool_metadata(lv)) {
3225 log_error("Can't remove logical volume %s used by a thin pool.",
3226 lv->name);
3227 return 0;
3228 } else if (lv_is_thin_volume(lv))
3229 pool_lv = first_seg(lv)->pool_lv;
3230
3231 if (lv->status & LOCKED) {
3232 log_error("Can't remove locked LV %s", lv->name);
3233 return 0;
3234 }
3235
3236 /* FIXME Ensure not referred to by another existing LVs */
3237
3238 if (lv_info(cmd, lv, 0, &info, 1, 0)) {
3239 if (!lv_check_not_in_use(cmd, lv, &info))
3240 return_0;
3241
3242 if ((force == PROMPT) &&
3243 lv_is_visible(lv) &&
3244 lv_is_active(lv) &&
3245 yes_no_prompt("Do you really want to remove active "
3246 "%slogical volume %s? [y/n]: ",
3247 vg_is_clustered(vg) ? "clustered " : "",
3248 lv->name) == 'n') {
3249 log_error("Logical volume %s not removed", lv->name);
3250 return 0;
3251 }
3252 }
3253
3254 if (!archive(vg))
3255 return 0;
3256
3257 if (lv_is_cow(lv)) {
3258 /* Old format1 code */
3259 if (!(lv->vg->fid->fmt->features & FMT_MDAS))
3260 format1_origin = origin_from_cow(lv);
3261
3262 log_verbose("Removing snapshot %s", lv->name);
3263 /* vg_remove_snapshot() will preload origin/former snapshots */
3264 if (!vg_remove_snapshot(lv))
3265 return_0;
3266 }
3267
3268 /* FIXME Review and fix the snapshot error paths! */
3269 if (!deactivate_lv(cmd, lv)) {
3270 log_error("Unable to deactivate logical volume \"%s\"",
3271 lv->name);
3272 return 0;
3273 }
3274
3275 /* Clear thin pool stacked messages */
3276 if (pool_lv && !pool_has_message(first_seg(pool_lv), lv, 0) &&
3277 !update_pool_lv(pool_lv, 1)) {
3278 log_error("Failed to update thin pool %s.", pool_lv->name);
3279 return 0;
3280 }
3281
3282 visible = lv_is_visible(lv);
3283
3284 log_verbose("Releasing logical volume \"%s\"", lv->name);
3285 if (!lv_remove(lv)) {
3286 log_error("Error releasing logical volume \"%s\"", lv->name);
3287 return 0;
3288 }
3289
3290 /*
3291 * Old format1 code: If no snapshots left reload without -real.
3292 */
3293 if (format1_origin && !lv_is_origin(format1_origin)) {
3294 log_warn("WARNING: Support for snapshots with old LVM1-style metadata is deprecated.");
3295 log_warn("WARNING: Please use lvconvert to update to lvm2 metadata at your convenience.");
3296 format1_reload_required = 1;
3297 }
3298
3299 /* store it on disks */
3300 if (!vg_write(vg))
3301 return_0;
3302
3303 /* format1 */
3304 if (format1_reload_required && !suspend_lv(cmd, format1_origin))
3305 log_error("Failed to refresh %s without snapshot.", format1_origin->name);
3306
3307 if (!vg_commit(vg))
3308 return_0;
3309
3310 /* format1 */
3311 if (format1_reload_required && !resume_lv(cmd, format1_origin)) {
3312 log_error("Failed to resume %s.", format1_origin->name);
3313 return 0;
3314 }
3315
3316 /* Release unneeded blocks in thin pool */
3317 /* TODO: defer when multiple LVs relased at once */
3318 if (pool_lv && !update_pool_lv(pool_lv, 1)) {
3319 log_error("Failed to update thin pool %s.", pool_lv->name);
3320 return 0;
3321 }
3322
3323 backup(vg);
3324
3325 if (visible)
3326 log_print("Logical volume \"%s\" successfully removed", lv->name);
3327
3328 return 1;
3329 }
3330
3331 /*
3332 * remove LVs with its dependencies - LV leaf nodes should be removed first
3333 */
3334 int lv_remove_with_dependencies(struct cmd_context *cmd, struct logical_volume *lv,
3335 const force_t force, unsigned level)
3336 {
3337 percent_t snap_percent;
3338 struct dm_list *snh, *snht;
3339 struct seg_list *sl, *tsl;
3340 struct lvinfo info;
3341
3342 if (lv_is_cow(lv)) {
3343 /*
3344 * A merging snapshot cannot be removed directly unless
3345 * it has been invalidated or failed merge removal is requested.
3346 */
3347 if (lv_is_merging_cow(lv) && !level) {
3348 if (lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) &&
3349 info.exists && info.live_table) {
3350 if (!lv_snapshot_percent(lv, &snap_percent)) {
3351 log_error("Failed to obtain merging snapshot progress percentage for logical volume %s.",
3352 lv->name);
3353 return 0;
3354 }
3355 if ((snap_percent != PERCENT_INVALID) &&
3356 (snap_percent != PERCENT_MERGE_FAILED)) {
3357 log_error("Can't remove merging snapshot logical volume \"%s\"",
3358 lv->name);
3359 return 0;
3360 } else if ((snap_percent == PERCENT_MERGE_FAILED) &&
3361 (force == PROMPT) &&
3362 yes_no_prompt("Removing snapshot \"%s\" that failed to merge may leave origin \"%s\" inconsistent. "
3363 "Proceed? [y/n]: ", lv->name, origin_from_cow(lv)->name) == 'n') {
3364 log_error("Logical volume %s not removed.", lv->name);
3365 return 0;
3366 }
3367 }
3368 }
3369 }
3370
3371 if (lv_is_origin(lv)) {
3372 /* Remove snapshot LVs first */
3373 if ((force == PROMPT) &&
3374 /* Active snapshot already needs to confirm each active LV */
3375 !lv_is_active(lv) &&
3376 yes_no_prompt("Removing origin %s will also remove %u "
3377 "snapshots(s). Proceed? [y/n]: ",
3378 lv->name, lv->origin_count) == 'n') {
3379 log_error("Logical volume %s not removed.", lv->name);
3380 return 0;
3381 }
3382
3383 dm_list_iterate_safe(snh, snht, &lv->snapshot_segs)
3384 if (!lv_remove_with_dependencies(cmd, dm_list_struct_base(snh, struct lv_segment,
3385 origin_list)->cow,
3386 force, level + 1))
3387 return_0;
3388 }
3389
3390 if (lv_is_used_thin_pool(lv)) {
3391 /* Remove thin LVs first */
3392 if ((force == PROMPT) &&
3393 yes_no_prompt("Removing pool %s will also remove %u "
3394 "thin volume(s). OK? [y/n]: ", lv->name,
3395 /* Note: Snaphosts not included */
3396 dm_list_size(&lv->segs_using_this_lv)) == 'n') {
3397 log_error("Logical volume %s not removed.", lv->name);
3398 return 0;
3399 }
3400
3401 dm_list_iterate_items_safe(sl, tsl, &lv->segs_using_this_lv)
3402 if (!lv_remove_with_dependencies(cmd, sl->seg->lv,
3403 force, level + 1))
3404 return_0;
3405 }
3406
3407 return lv_remove_single(cmd, lv, force);
3408 }
3409
3410 /*
3411 * insert_layer_for_segments_on_pv() inserts a layer segment for a segment area.
3412 * However, layer modification could split the underlying layer segment.
3413 * This function splits the parent area according to keep the 1:1 relationship
3414 * between the parent area and the underlying layer segment.
3415 * Since the layer LV might have other layers below, build_parallel_areas()
3416 * is used to find the lowest-level segment boundaries.
3417 */
3418 static int _split_parent_area(struct lv_segment *seg, uint32_t s,
3419 struct dm_list *layer_seg_pvs)
3420 {
3421 uint32_t parent_area_len, parent_le, layer_le;
3422 uint32_t area_multiple;
3423 struct seg_pvs *spvs;
3424
3425 if (seg_is_striped(seg))
3426 area_multiple = seg->area_count;
3427 else
3428 area_multiple = 1;
3429
3430 parent_area_len = seg->area_len;
3431 parent_le = seg->le;
3432 layer_le = seg_le(seg, s);
3433
3434 while (parent_area_len > 0) {
3435 /* Find the layer segment pointed at */
3436 if (!(spvs = _find_seg_pvs_by_le(layer_seg_pvs, layer_le))) {
3437 log_error("layer segment for %s:%" PRIu32 " not found",
3438 seg->lv->name, parent_le);
3439 return 0;
3440 }
3441
3442 if (spvs->le != layer_le) {
3443 log_error("Incompatible layer boundary: "
3444 "%s:%" PRIu32 "[%" PRIu32 "] on %s:%" PRIu32,
3445 seg->lv->name, parent_le, s,
3446 seg_lv(seg, s)->name, layer_le);
3447 return 0;
3448 }
3449
3450 if (spvs->len < parent_area_len) {
3451 parent_le += spvs->len * area_multiple;
3452 if (!lv_split_segment(seg->lv, parent_le))
3453 return_0;
3454 }
3455
3456 parent_area_len -= spvs->len;
3457 layer_le += spvs->len;
3458 }
3459
3460 return 1;
3461 }
3462
3463 /*
3464 * Split the parent LV segments if the layer LV below it is splitted.
3465 */
3466 int split_parent_segments_for_layer(struct cmd_context *cmd,
3467 struct logical_volume *layer_lv)
3468 {
3469 struct lv_list *lvl;
3470 struct logical_volume *parent_lv;
3471 struct lv_segment *seg;
3472 uint32_t s;
3473 struct dm_list *parallel_areas;
3474
3475 if (!(parallel_areas = build_parallel_areas_from_lv(layer_lv, 0)))
3476 return_0;
3477
3478 /* Loop through all LVs except itself */
3479 dm_list_iterate_items(lvl, &layer_lv->vg->lvs) {
3480 parent_lv = lvl->lv;
3481 if (parent_lv == layer_lv)
3482 continue;
3483
3484 /* Find all segments that point at the layer LV */
3485 dm_list_iterate_items(seg, &parent_lv->segments) {
3486 for (s = 0; s < seg->area_count; s++) {
3487 if (seg_type(seg, s) != AREA_LV ||
3488 seg_lv(seg, s) != layer_lv)
3489 continue;
3490
3491 if (!_split_parent_area(seg, s, parallel_areas))
3492 return_0;
3493 }
3494 }
3495 }
3496
3497 return 1;
3498 }
3499
3500 /* Remove a layer from the LV */
3501 int remove_layers_for_segments(struct cmd_context *cmd,
3502 struct logical_volume *lv,
3503 struct logical_volume *layer_lv,
3504 uint64_t status_mask, struct dm_list *lvs_changed)
3505 {
3506 struct lv_segment *seg, *lseg;
3507 uint32_t s;
3508 int lv_changed = 0;
3509 struct lv_list *lvl;
3510
3511 log_very_verbose("Removing layer %s for segments of %s",
3512 layer_lv->name, lv->name);
3513
3514 /* Find all segments that point at the temporary mirror */
3515 dm_list_iterate_items(seg, &lv->segments) {
3516 for (s = 0; s < seg->area_count; s++) {
3517 if (seg_type(seg, s) != AREA_LV ||
3518 seg_lv(seg, s) != layer_lv)
3519 continue;
3520
3521 /* Find the layer segment pointed at */
3522 if (!(lseg = find_seg_by_le(layer_lv, seg_le(seg, s)))) {
3523 log_error("Layer segment found: %s:%" PRIu32,
3524 layer_lv->name, seg_le(seg, s));
3525 return 0;
3526 }
3527
3528 /* Check the segment params are compatible */
3529 if (!seg_is_striped(lseg) || lseg->area_count != 1) {
3530 log_error("Layer is not linear: %s:%" PRIu32,
3531 layer_lv->name, lseg->le);
3532 return 0;
3533 }
3534 if ((lseg->status & status_mask) != status_mask) {
3535 log_error("Layer status does not match: "
3536 "%s:%" PRIu32 " status: 0x%" PRIx64 "/0x%" PRIx64,
3537 layer_lv->name, lseg->le,
3538 lseg->status, status_mask);
3539 return 0;
3540 }
3541 if (lseg->le != seg_le(seg, s) ||
3542 lseg->area_len != seg->area_len) {
3543 log_error("Layer boundary mismatch: "
3544 "%s:%" PRIu32 "-%" PRIu32 " on "
3545 "%s:%" PRIu32 " / "
3546 "%" PRIu32 "-%" PRIu32 " / ",
3547 lv->name, seg->le, seg->area_len,
3548 layer_lv->name, seg_le(seg, s),
3549 lseg->le, lseg->area_len);
3550 return 0;
3551 }
3552
3553 if (!move_lv_segment_area(seg, s, lseg, 0))
3554 return_0;
3555
3556 /* Replace mirror with error segment */
3557 if (!(lseg->segtype =
3558 get_segtype_from_string(lv->vg->cmd, "error"))) {
3559 log_error("Missing error segtype");
3560 return 0;
3561 }
3562 lseg->area_count = 0;
3563
3564 /* First time, add LV to list of LVs affected */
3565 if (!lv_changed && lvs_changed) {
3566 if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
3567 log_error("lv_list alloc failed");
3568 return 0;
3569 }
3570 lvl->lv = lv;
3571 dm_list_add(lvs_changed, &lvl->list);
3572 lv_changed = 1;
3573 }
3574 }
3575 }
3576 if (lv_changed && !lv_merge_segments(lv))
3577 stack;
3578
3579 return 1;
3580 }
3581
3582 /* Remove a layer */
3583 int remove_layers_for_segments_all(struct cmd_context *cmd,
3584 struct logical_volume *layer_lv,
3585 uint64_t status_mask,
3586 struct dm_list *lvs_changed)
3587 {
3588 struct lv_list *lvl;
3589 struct logical_volume *lv1;
3590
3591 /* Loop through all LVs except the temporary mirror */
3592 dm_list_iterate_items(lvl, &layer_lv->vg->lvs) {
3593 lv1 = lvl->lv;
3594 if (lv1 == layer_lv)
3595 continue;
3596
3597 if (!remove_layers_for_segments(cmd, lv1, layer_lv,
3598 status_mask, lvs_changed))
3599 return_0;
3600 }
3601
3602 if (!lv_empty(layer_lv))
3603 return_0;
3604
3605 return 1;
3606 }
3607
3608 int move_lv_segments(struct logical_volume *lv_to,
3609 struct logical_volume *lv_from,
3610 uint64_t set_status, uint64_t reset_status)
3611 {
3612 struct lv_segment *seg;
3613
3614 dm_list_iterate_items(seg, &lv_to->segments)
3615 if (seg->origin) {
3616 log_error("Can't move snapshot segment.");
3617 return 0;
3618 }
3619
3620 dm_list_init(&lv_to->segments);
3621 dm_list_splice(&lv_to->segments, &lv_from->segments);
3622
3623 dm_list_iterate_items(seg, &lv_to->segments) {
3624 seg->lv = lv_to;
3625 seg->status &= ~reset_status;
3626 seg->status |= set_status;
3627 }
3628
3629 lv_to->le_count = lv_from->le_count;
3630 lv_to->size = lv_from->size;
3631
3632 lv_from->le_count = 0;
3633 lv_from->size = 0;
3634
3635 return 1;
3636 }
3637
3638 /* Remove a layer from the LV */
3639 int remove_layer_from_lv(struct logical_volume *lv,
3640 struct logical_volume *layer_lv)
3641 {
3642 struct logical_volume *parent;
3643 struct lv_segment *parent_seg;
3644 struct segment_type *segtype;
3645
3646 log_very_verbose("Removing layer %s for %s", layer_lv->name, lv->name);
3647
3648 if (!(parent_seg = get_only_segment_using_this_lv(layer_lv))) {
3649 log_error("Failed to find layer %s in %s",
3650 layer_lv->name, lv->name);
3651 return 0;
3652 }
3653 parent = parent_seg->lv;
3654
3655 /*
3656 * Before removal, the layer should be cleaned up,
3657 * i.e. additional segments and areas should have been removed.
3658 */
3659 if (dm_list_size(&parent->segments) != 1 ||
3660 parent_seg->area_count != 1 ||
3661 seg_type(parent_seg, 0) != AREA_LV ||
3662 layer_lv != seg_lv(parent_seg, 0) ||
3663 parent->le_count != layer_lv->le_count)
3664 return_0;
3665
3666 if (!lv_empty(parent))
3667 return_0;
3668
3669 if (!move_lv_segments(parent, layer_lv, 0, 0))
3670 return_0;
3671
3672 /* Replace the empty layer with error segment */
3673 segtype = get_segtype_from_string(lv->vg->cmd, "error");
3674 if (!lv_add_virtual_segment(layer_lv, 0, parent->le_count, segtype, NULL))
3675 return_0;
3676
3677 return 1;
3678 }
3679
3680 /*
3681 * Create and insert a linear LV "above" lv_where.
3682 * After the insertion, a new LV named lv_where->name + suffix is created
3683 * and all segments of lv_where is moved to the new LV.
3684 * lv_where will have a single segment which maps linearly to the new LV.
3685 */
3686 struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
3687 struct logical_volume *lv_where,
3688 uint64_t status,
3689 const char *layer_suffix)
3690 {
3691 int r;
3692 char *name;
3693 size_t len;
3694 struct str_list *sl;
3695 struct logical_volume *layer_lv;
3696 struct segment_type *segtype;
3697 struct lv_segment *mapseg;
3698 unsigned exclusive = 0;
3699
3700 /* create an empty layer LV */
3701 len = strlen(lv_where->name) + 32;
3702 if (!(name = alloca(len))) {
3703 log_error("layer name allocation failed. "
3704 "Remove new LV and retry.");
3705 return NULL;
3706 }
3707
3708 if (dm_snprintf(name, len, "%s%s", lv_where->name, layer_suffix) < 0) {
3709 log_error("layer name allocation failed. "
3710 "Remove new LV and retry.");
3711 return NULL;
3712 }
3713
3714 if (!(layer_lv = lv_create_empty(name, NULL, LVM_READ | LVM_WRITE,
3715 ALLOC_INHERIT, lv_where->vg))) {
3716 log_error("Creation of layer LV failed");
3717 return NULL;
3718 }
3719
3720 if (lv_is_active_exclusive_locally(lv_where))
3721 exclusive = 1;
3722
3723 if (lv_is_active(lv_where) && strstr(name, "_mimagetmp")) {
3724 log_very_verbose("Creating transient LV %s for mirror conversion in VG %s.", name, lv_where->vg->name);
3725
3726 segtype = get_segtype_from_string(cmd, "error");
3727
3728 if (!lv_add_virtual_segment(layer_lv, 0, lv_where->le_count, segtype, NULL)) {
3729 log_error("Creation of transient LV %s for mirror conversion in VG %s failed.", name, lv_where->vg->name);
3730 return NULL;
3731 }
3732
3733 /* Temporary tags for activation of the transient LV */
3734 dm_list_iterate_items(sl, &lv_where->tags)
3735 if (!str_list_add(cmd->mem, &layer_lv->tags, sl->str)) {
3736 log_error("Aborting. Unable to tag"
3737 " transient mirror layer.");
3738 return NULL;
3739 }
3740
3741 if (!vg_write(lv_where->vg)) {
3742 log_error("Failed to write intermediate VG %s metadata for mirror conversion.", lv_where->vg->name);
3743 return NULL;
3744 }
3745
3746 if (!vg_commit(lv_where->vg)) {
3747 log_error("Failed to commit intermediate VG %s metadata for mirror conversion.", lv_where->vg->name);
3748 vg_revert(lv_where->vg);
3749 return NULL;
3750 }
3751
3752 if (exclusive)
3753 r = activate_lv_excl(cmd, layer_lv);
3754 else
3755 r = activate_lv(cmd, layer_lv);
3756
3757 if (!r) {
3758 log_error("Failed to resume transient LV"
3759 " %s for mirror conversion in VG %s.",
3760 name, lv_where->vg->name);
3761 return NULL;
3762 }
3763
3764 /* Remove the temporary tags */
3765 dm_list_iterate_items(sl, &lv_where->tags)
3766 str_list_del(&layer_lv->tags, sl->str);
3767
3768 }
3769
3770 log_very_verbose("Inserting layer %s for %s",
3771 layer_lv->name, lv_where->name);
3772
3773 if (!move_lv_segments(layer_lv, lv_where, 0, 0))
3774 return_NULL;
3775
3776 if (!(segtype = get_segtype_from_string(cmd, "striped")))
3777 return_NULL;
3778
3779 /* allocate a new linear segment */
3780 if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, layer_lv->le_count,
3781 status, 0, NULL, NULL, 1, layer_lv->le_count,
3782 0, 0, 0, NULL)))
3783 return_NULL;
3784
3785 /* map the new segment to the original underlying are */
3786 if (!set_lv_segment_area_lv(mapseg, 0, layer_lv, 0, 0))
3787 return_NULL;
3788
3789 /* add the new segment to the layer LV */
3790 dm_list_add(&lv_where->segments, &mapseg->list);
3791 lv_where->le_count = layer_lv->le_count;
3792 lv_where->size = (uint64_t) lv_where->le_count * lv_where->vg->extent_size;
3793
3794 return layer_lv;
3795 }
3796
3797 /*
3798 * Extend and insert a linear layer LV beneath the source segment area.
3799 */
3800 static int _extend_layer_lv_for_segment(struct logical_volume *layer_lv,
3801 struct lv_segment *seg, uint32_t s,
3802 uint64_t status)
3803 {
3804 struct lv_segment *mapseg;
3805 struct segment_type *segtype;
3806 struct physical_volume *src_pv = seg_pv(seg, s);
3807 uint32_t src_pe = seg_pe(seg, s);
3808
3809 if (seg_type(seg, s) != AREA_PV && seg_type(seg, s) != AREA_LV)
3810 return_0;
3811
3812 if (!(segtype = get_segtype_from_string(layer_lv->vg->cmd, "striped")))
3813 return_0;
3814
3815 /* FIXME Incomplete message? Needs more context */
3816 log_very_verbose("Inserting %s:%" PRIu32 "-%" PRIu32 " of %s/%s",
3817 pv_dev_name(src_pv),
3818 src_pe, src_pe + seg->area_len - 1,
3819 seg->lv->vg->name, seg->lv->name);
3820
3821 /* allocate a new segment */
3822 if (!(mapseg = alloc_lv_segment(segtype, layer_lv, layer_lv->le_count,
3823 seg->area_len, status, 0,
3824 NULL, NULL, 1, seg->area_len, 0, 0, 0, seg)))
3825 return_0;
3826
3827 /* map the new segment to the original underlying are */
3828 if (!move_lv_segment_area(mapseg, 0, seg, s))
3829 return_0;
3830
3831 /* add the new segment to the layer LV */
3832 dm_list_add(&layer_lv->segments, &mapseg->list);
3833 layer_lv->le_count += seg->area_len;
3834 layer_lv->size += (uint64_t) seg->area_len * layer_lv->vg->extent_size;
3835
3836 /* map the original area to the new segment */
3837 if (!set_lv_segment_area_lv(seg, s, layer_lv, mapseg->le, 0))
3838 return_0;
3839
3840 return 1;
3841 }
3842
3843 /*
3844 * Match the segment area to PEs in the pvl
3845 * (the segment area boundary should be aligned to PE ranges by
3846 * _adjust_layer_segments() so that there is no partial overlap.)
3847 */
3848 static int _match_seg_area_to_pe_range(struct lv_segment *seg, uint32_t s,
3849 struct pv_list *pvl)
3850 {
3851 struct pe_range *per;
3852 uint32_t pe_start, per_end;
3853
3854 if (!pvl)
3855 return 1;
3856
3857 if (seg_type(seg, s) != AREA_PV || seg_dev(seg, s) != pvl->pv->dev)
3858 return 0;
3859
3860 pe_start = seg_pe(seg, s);
3861
3862 /* Do these PEs match to any of the PEs in pvl? */
3863 dm_list_iterate_items(per, pvl->pe_ranges) {
3864 per_end = per->start + per->count - 1;
3865
3866 if ((pe_start < per->start) || (pe_start > per_end))
3867 continue;
3868
3869 /* FIXME Missing context in this message - add LV/seg details */
3870 log_debug("Matched PE range %s:%" PRIu32 "-%" PRIu32 " against "
3871 "%s %" PRIu32 " len %" PRIu32, dev_name(pvl->pv->dev),
3872 per->start, per_end, dev_name(seg_dev(seg, s)),
3873 seg_pe(seg, s), seg->area_len);
3874
3875 return 1;
3876 }
3877
3878 return 0;
3879 }
3880
3881 /*
3882 * For each segment in lv_where that uses a PV in pvl directly,
3883 * split the segment if it spans more than one underlying PV.
3884 */
3885 static int _align_segment_boundary_to_pe_range(struct logical_volume *lv_where,
3886 struct pv_list *pvl)
3887 {
3888 struct lv_segment *seg;
3889 struct pe_range *per;
3890 uint32_t pe_start, pe_end, per_end, stripe_multiplier, s;
3891
3892 if (!pvl)
3893 return 1;
3894
3895 /* Split LV segments to match PE ranges */
3896 dm_list_iterate_items(seg, &lv_where->segments) {
3897 for (s = 0; s < seg->area_count; s++) {
3898 if (seg_type(seg, s) != AREA_PV ||
3899 seg_dev(seg, s) != pvl->pv->dev)
3900 continue;
3901
3902 /* Do these PEs match with the condition? */
3903 dm_list_iterate_items(per, pvl->pe_ranges) {
3904 pe_start = seg_pe(seg, s);
3905 pe_end = pe_start + seg->area_len - 1;
3906 per_end = per->start + per->count - 1;
3907
3908 /* No overlap? */
3909 if ((pe_end < per->start) ||
3910 (pe_start > per_end))
3911 continue;
3912
3913 if (seg_is_striped(seg))
3914 stripe_multiplier = seg->area_count;
3915 else
3916 stripe_multiplier = 1;
3917
3918 if ((per->start != pe_start &&
3919 per->start > pe_start) &&
3920 !lv_split_segment(lv_where, seg->le +
3921 (per->start - pe_start) *
3922 stripe_multiplier))
3923 return_0;
3924
3925 if ((per_end != pe_end &&
3926 per_end < pe_end) &&
3927 !lv_split_segment(lv_where, seg->le +
3928 (per_end - pe_start + 1) *
3929 stripe_multiplier))
3930 return_0;
3931 }
3932 }
3933 }
3934
3935 return 1;
3936 }
3937
3938 /*
3939 * Scan lv_where for segments on a PV in pvl, and for each one found
3940 * append a linear segment to lv_layer and insert it between the two.
3941 *
3942 * If pvl is empty, a layer is placed under the whole of lv_where.
3943 * If the layer is inserted, lv_where is added to lvs_changed.
3944 */
3945 int insert_layer_for_segments_on_pv(struct cmd_context *cmd,
3946 struct logical_volume *lv_where,
3947 struct logical_volume *layer_lv,
3948 uint64_t status,
3949 struct pv_list *pvl,
3950 struct dm_list *lvs_changed)
3951 {
3952 struct lv_segment *seg;
3953 struct lv_list *lvl;
3954 int lv_used = 0;
3955 uint32_t s;
3956
3957 log_very_verbose("Inserting layer %s for segments of %s on %s",
3958 layer_lv->name, lv_where->name,
3959 pvl ? pv_dev_name(pvl->pv) : "any");
3960
3961 if (!_align_segment_boundary_to_pe_range(lv_where, pvl))
3962 return_0;
3963
3964 /* Work through all segments on the supplied PV */
3965 dm_list_iterate_items(seg, &lv_where->segments) {
3966 for (s = 0; s < seg->area_count; s++) {
3967 if (!_match_seg_area_to_pe_range(seg, s, pvl))
3968 continue;
3969
3970 /* First time, add LV to list of LVs affected */
3971 if (!lv_used && lvs_changed) {
3972 if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
3973 log_error("lv_list alloc failed");
3974 return 0;
3975 }
3976 lvl->lv = lv_where;
3977 dm_list_add(lvs_changed, &lvl->list);
3978 lv_used = 1;
3979 }
3980
3981 if (!_extend_layer_lv_for_segment(layer_lv, seg, s,
3982 status)) {
3983 log_error("Failed to insert segment in layer "
3984 "LV %s under %s:%" PRIu32 "-%" PRIu32,
3985 layer_lv->name, lv_where->name,
3986 seg->le, seg->le + seg->len);
3987 return 0;
3988 }
3989 }
3990 }
3991
3992 return 1;
3993 }
3994
3995 /*
3996 * Initialize the LV with 'value'.
3997 */
3998 int set_lv(struct cmd_context *cmd, struct logical_volume *lv,
3999 uint64_t sectors, int value)
4000 {
4001 struct device *dev;
4002 char *name;
4003
4004 /*
4005 * FIXME:
4006 * <clausen> also, more than 4k
4007 * <clausen> say, reiserfs puts it's superblock 32k in, IIRC
4008 * <ejt_> k, I'll drop a fixme to that effect
4009 * (I know the device is at least 4k, but not 32k)
4010 */
4011 if (!(name = dm_pool_alloc(cmd->mem, PATH_MAX))) {
4012 log_error("Name allocation failed - device not cleared");
4013 return 0;
4014 }
4015
4016 if (dm_snprintf(name, PATH_MAX, "%s%s/%s", cmd->dev_dir,
4017 lv->vg->name, lv->name) < 0) {
4018 log_error("Name too long - device not cleared (%s)", lv->name);
4019 return 0;
4020 }
4021
4022 sync_local_dev_names(cmd); /* Wait until devices are available */
4023
4024 log_verbose("Clearing start of logical volume \"%s\"", lv->name);
4025
4026 if (!(dev = dev_cache_get(name, NULL))) {
4027 log_error("%s: not found: device not cleared", name);
4028 return 0;
4029 }
4030
4031 if (!dev_open_quiet(dev))
4032 return_0;
4033
4034 if (!sectors)
4035 sectors = UINT64_C(4096) >> SECTOR_SHIFT;
4036
4037 if (sectors > lv->size)
4038 sectors = lv->size;
4039
4040 if (!dev_set(dev, UINT64_C(0), (size_t) sectors << SECTOR_SHIFT, value))
4041 stack;
4042
4043 dev_flush(dev);
4044
4045 if (!dev_close_immediate(dev))
4046 stack;
4047
4048 return 1;
4049 }
4050
4051 static struct logical_volume *_create_virtual_origin(struct cmd_context *cmd,
4052 struct volume_group *vg,
4053 const char *lv_name,
4054 uint32_t permission,
4055 uint64_t voriginextents)
4056 {
4057 const struct segment_type *segtype;
4058 size_t len;
4059 char *vorigin_name;
4060 struct logical_volume *lv;
4061
4062 if (!(segtype = get_segtype_from_string(cmd, "zero"))) {
4063 log_error("Zero segment type for virtual origin not found");
4064 return NULL;
4065 }
4066
4067 len = strlen(lv_name) + 32;
4068 if (!(vorigin_name = alloca(len)) ||
4069 dm_snprintf(vorigin_name, len, "%s_vorigin", lv_name) < 0) {
4070 log_error("Virtual origin name allocation failed.");
4071 return NULL;
4072 }
4073
4074 if (!(lv = lv_create_empty(vorigin_name, NULL, permission,
4075 ALLOC_INHERIT, vg)))
4076 return_NULL;
4077
4078 if (!lv_extend(lv, segtype, 1, 0, 1, 0, voriginextents,
4079 NULL, NULL, ALLOC_INHERIT))
4080 return_NULL;
4081
4082 /* store vg on disk(s) */
4083 if (!vg_write(vg) || !vg_commit(vg))
4084 return_NULL;
4085
4086 backup(vg);
4087
4088 return lv;
4089 }
4090
4091 /* Thin notes:
4092 * If lp->thin OR lp->activate is AY*, activate the pool if not already active.
4093 * If lp->thin, create thin LV within the pool - as a snapshot if lp->snapshot.
4094 * If lp->activate is AY*, activate it.
4095 * If lp->activate was AN* and the pool was originally inactive, deactivate it.
4096 */
4097 static struct logical_volume *_lv_create_an_lv(struct volume_group *vg, struct lvcreate_params *lp,
4098 const char *new_lv_name)
4099 {
4100 struct cmd_context *cmd = vg->cmd;
4101 uint32_t size_rest;
4102 uint64_t status = UINT64_C(0);
4103 struct logical_volume *lv, *org = NULL;
4104 struct logical_volume *pool_lv;
4105 struct lv_list *lvl;
4106 int origin_active = 0;
4107 struct lvinfo info;
4108
4109 if (new_lv_name && find_lv_in_vg(vg, new_lv_name)) {
4110 log_error("Logical volume \"%s\" already exists in "
4111 "volume group \"%s\"", new_lv_name, lp->vg_name);
4112 return NULL;
4113 }
4114
4115 if (vg_max_lv_reached(vg)) {
4116 log_error("Maximum number of logical volumes (%u) reached "
4117 "in volume group %s", vg->max_lv, vg->name);
4118 return NULL;
4119 }
4120
4121 if ((segtype_is_mirrored(lp->segtype) ||
4122 segtype_is_raid(lp->segtype) || segtype_is_thin(lp->segtype)) &&
4123 !(vg->fid->fmt->features & FMT_SEGMENTS)) {
4124 log_error("Metadata does not support %s segments.",
4125 lp->segtype->name);
4126 return NULL;
4127 }
4128
4129 if (lp->read_ahead != DM_READ_AHEAD_AUTO &&
4130 lp->read_ahead != DM_READ_AHEAD_NONE &&
4131 (vg->fid->fmt->features & FMT_RESTRICTED_READAHEAD) &&
4132 (lp->read_ahead < 2 || lp->read_ahead > 120)) {
4133 log_error("Metadata only supports readahead values between 2 and 120.");
4134 return NULL;
4135 }
4136
4137 if (lp->stripe_size > vg->extent_size) {
4138 log_error("Reducing requested stripe size %s to maximum, "
4139 "physical extent size %s",
4140 display_size(cmd, (uint64_t) lp->stripe_size),
4141 display_size(cmd, (uint64_t) vg->extent_size));
4142 lp->stripe_size = vg->extent_size;
4143 }
4144
4145 /* Need to check the vg's format to verify this - the cmd format isn't setup properly yet */
4146 if (lp->stripes > 1 &&
4147 !(vg->fid->fmt->features & FMT_UNLIMITED_STRIPESIZE) &&
4148 (lp->stripe_size > STRIPE_SIZE_MAX)) {
4149 log_error("Stripe size may not exceed %s",
4150 display_size(cmd, (uint64_t) STRIPE_SIZE_MAX));
4151 return NULL;
4152 }
4153
4154 if ((size_rest = lp->extents % lp->stripes)) {
4155 log_print("Rounding size (%d extents) up to stripe boundary "
4156 "size (%d extents)", lp->extents,
4157 lp->extents - size_rest + lp->stripes);
4158 lp->extents = lp->extents - size_rest + lp->stripes;
4159 }
4160
4161 /* Does LV need to be zeroed? Thin handles this as a per-pool in-kernel setting. */
4162 if (lp->zero && !segtype_is_thin(lp->segtype) && !activation()) {
4163 log_error("Can't wipe start of new LV without using "
4164 "device-mapper kernel driver");
4165 return NULL;
4166 }
4167
4168 status |= lp->permission | VISIBLE_LV;
4169
4170 if (lp->snapshot && lp->thin) {
4171 if (!(org = find_lv(vg, lp->origin))) {
4172 log_error("Couldn't find origin volume '%s'.",
4173 lp->origin);
4174 return NULL;
4175 }
4176
4177 if (org->status & LOCKED) {
4178 log_error("Snapshots of locked devices are not supported.");
4179 return NULL;
4180 }
4181
4182 lp->voriginextents = org->le_count;
4183 } else if (lp->snapshot) {
4184 if (!activation()) {
4185 log_error("Can't create snapshot without using "
4186 "device-mapper kernel driver");
4187 return NULL;
4188 }
4189
4190 /* Must zero cow */
4191 status |= LVM_WRITE;
4192
4193 if (lp->voriginsize)
4194 origin_active = 1;
4195 else {
4196
4197 if (!(org = find_lv(vg, lp->origin))) {
4198 log_error("Couldn't find origin volume '%s'.",
4199 lp->origin);
4200 return NULL;
4201 }
4202 if (lv_is_virtual_origin(org)) {
4203 log_error("Can't share virtual origins. "
4204 "Use --virtualsize.");
4205 return NULL;
4206 }
4207 if (lv_is_cow(org)) {
4208 log_error("Snapshots of snapshots are not "
4209 "supported yet.");
4210 return NULL;
4211 }
4212 if (org->status & LOCKED) {
4213 log_error("Snapshots of locked devices are not "
4214 "supported yet");
4215 return NULL;
4216 }
4217 if (lv_is_merging_origin(org)) {
4218 log_error("Snapshots of an origin that has a "
4219 "merging snapshot is not supported");
4220 return NULL;
4221 }
4222
4223 if (lv_is_thin_type(org) && !lv_is_thin_volume(org)) {
4224 log_error("Snapshots of thin pool %sdevices "
4225 "are not supported.",
4226 lv_is_thin_pool_data(org) ? "data " :
4227 lv_is_thin_pool_metadata(org) ?
4228 "metadata " : "");
4229 return NULL;
4230 }
4231
4232 if (lv_is_mirror_type(org) &&
4233 !seg_is_raid(first_seg(org))) {
4234 log_error("Snapshots of \"mirror\" segment types"
4235 " are not supported");
4236 return NULL;
4237 }
4238
4239 if (!lv_info(cmd, org, 0, &info, 0, 0)) {
4240 log_error("Check for existence of active snapshot "
4241 "origin '%s' failed.", org->name);
4242 return NULL;
4243 }
4244 origin_active = info.exists;
4245
4246 if (vg_is_clustered(vg) &&
4247 !lv_is_active_exclusive_locally(org)) {
4248 log_error("%s must be active exclusively to"
4249 " create snapshot", org->name);
4250 return NULL;
4251 }
4252 }
4253 }
4254
4255 if (!seg_is_thin_volume(lp) && !lp->extents) {
4256 log_error("Unable to create new logical volume with no extents");
4257 return NULL;
4258 }
4259
4260 if (seg_is_thin_pool(lp) &&
4261 ((uint64_t)lp->extents * vg->extent_size < lp->chunk_size)) {
4262 log_error("Unable to create thin pool smaller than 1 chunk.");
4263 return NULL;
4264 }
4265
4266 if (lp->snapshot && !lp->thin && ((uint64_t)lp->extents * vg->extent_size < 2 * lp->chunk_size)) {
4267 log_error("Unable to create a snapshot smaller than 2 chunks.");
4268 return NULL;
4269 }
4270
4271 if (!seg_is_virtual(lp) &&
4272 vg->free_count < lp->extents) {
4273 log_error("Volume group \"%s\" has insufficient free space "
4274 "(%u extents): %u required.",
4275 vg->name, vg->free_count, lp->extents);
4276 return NULL;
4277 }
4278
4279 if (lp->stripes > dm_list_size(lp->pvh) && lp->alloc != ALLOC_ANYWHERE) {
4280 log_error("Number of stripes (%u) must not exceed "
4281 "number of physical volumes (%d)", lp->stripes,
4282 dm_list_size(lp->pvh));
4283 return NULL;
4284 }
4285
4286 if (!activation() &&
4287 (seg_is_mirrored(lp) ||
4288 seg_is_raid(lp) ||
4289 seg_is_thin_pool(lp))) {
4290 /*
4291 * FIXME: For thin pool add some code to allow delayed
4292 * initialization of empty thin pool volume.
4293 * i.e. using some LV flag, fake message,...
4294 * and testing for metadata pool header signature?
4295 */
4296 log_error("Can't create %s without using "
4297 "device-mapper kernel driver.",
4298 segtype_is_raid(lp->segtype) ? lp->segtype->name :
4299 segtype_is_mirrored(lp->segtype) ? "mirror" :
4300 "thin pool volume");
4301 return NULL;
4302 }
4303
4304 /* The snapshot segment gets created later */
4305 if (lp->snapshot && !lp->thin &&
4306 !(lp->segtype = get_segtype_from_string(cmd, "striped")))
4307 return_NULL;
4308
4309 if (!archive(vg))
4310 return_NULL;
4311
4312 if (!dm_list_empty(&lp->tags)) {
4313 if (!(vg->fid->fmt->features & FMT_TAGS)) {
4314 log_error("Volume group %s does not support tags",
4315 vg->name);
4316 return NULL;
4317 }
4318 }
4319
4320 if (seg_is_thin_volume(lp) &&
4321 ((lp->activate == CHANGE_AY) ||
4322 (lp->activate == CHANGE_AE) ||
4323 (lp->activate == CHANGE_ALY))) {
4324 /* Ensure all stacked messages are submitted */
4325 if (!(lvl = find_lv_in_vg(vg, lp->pool))) {
4326 log_error("Unable to find existing pool LV %s in VG %s.",
4327 lp->pool, vg->name);
4328 return 0;
4329 }
4330 if (!update_pool_lv(lvl->lv, 1))
4331 return_0;
4332 }
4333
4334 if (segtype_is_mirrored(lp->segtype) || segtype_is_raid(lp->segtype)) {
4335 init_mirror_in_sync(lp->nosync);
4336
4337 if (lp->nosync) {
4338 log_warn("WARNING: New %s won't be synchronised. "
4339 "Don't read what you didn't write!",
4340 lp->segtype->name);
4341 status |= LV_NOTSYNCED;
4342 }
4343
4344 lp->region_size = adjusted_mirror_region_size(vg->extent_size,
4345 lp->extents,
4346 lp->region_size);
4347 }
4348
4349 if (!(lv = lv_create_empty(new_lv_name ? : "lvol%d", NULL,
4350 status, lp->alloc, vg)))
4351 return_NULL;
4352
4353 if (lp->read_ahead != lv->read_ahead) {
4354 log_verbose("Setting read ahead sectors");
4355 lv->read_ahead = lp->read_ahead;
4356 }
4357
4358 if (!seg_is_thin_pool(lp) && lp->minor >= 0) {
4359 lv->major = lp->major;
4360 lv->minor = lp->minor;
4361 lv->status |= FIXED_MINOR;
4362 log_verbose("Setting device number to (%d, %d)", lv->major,
4363 lv->minor);
4364 }
4365
4366 dm_list_splice(&lv->tags, &lp->tags);
4367
4368 if (!lv_extend(lv, lp->segtype,
4369 lp->stripes, lp->stripe_size,
4370 lp->mirrors,
4371 seg_is_thin_pool(lp) ? lp->poolmetadataextents : lp->region_size,
4372 seg_is_thin_volume(lp) ? lp->voriginextents : lp->extents,
4373 seg_is_thin_volume(lp) ? (org ? org->name : lp->pool) : NULL, lp->pvh, lp->alloc))
4374 return_NULL;
4375
4376 if (seg_is_thin_pool(lp)) {
4377 first_seg(lv)->zero_new_blocks = lp->zero ? 1 : 0;
4378 first_seg(lv)->chunk_size = lp->chunk_size;
4379 /* FIXME: use lowwatermark via lvm.conf global for all thinpools ? */
4380 first_seg(lv)->low_water_mark = 0;
4381 } else if (seg_is_thin_volume(lp)) {
4382 pool_lv = first_seg(lv)->pool_lv;
4383
4384 if (!(first_seg(lv)->device_id =
4385 get_free_pool_device_id(first_seg(pool_lv)))) {
4386 stack;
4387 goto revert_new_lv;
4388 }
4389
4390 if (!attach_pool_message(first_seg(pool_lv),
4391 DM_THIN_MESSAGE_CREATE_THIN, lv, 0, 0)) {
4392 stack;
4393 goto revert_new_lv;
4394 }
4395 }
4396
4397 /* FIXME Log allocation and attachment should have happened inside lv_extend. */
4398 if (lp->log_count &&
4399 !seg_is_raid(first_seg(lv)) && seg_is_mirrored(first_seg(lv))) {
4400 if (!add_mirror_log(cmd, lv, lp->log_count,
4401 first_seg(lv)->region_size,
4402 lp->pvh, lp->alloc)) {
4403 stack;
4404 goto revert_new_lv;
4405 }
4406 }
4407
4408 /* store vg on disk(s) */
4409 if (!vg_write(vg) || !vg_commit(vg))
4410 return_NULL;
4411
4412 backup(vg);
4413
4414 if (test_mode()) {
4415 log_verbose("Test mode: Skipping activation and zeroing.");
4416 goto out;
4417 }
4418
4419 if (seg_is_thin(lp)) {
4420 /* For snapshot, suspend active thin origin first */
4421 if (org && lv_is_active(org)) {
4422 if (!pool_below_threshold(first_seg(first_seg(org)->pool_lv))) {
4423 log_error("Cannot create thin snapshot. Pool %s/%s is filled "
4424 "over the autoextend threshold.",
4425 org->vg->name, first_seg(org)->pool_lv->name);
4426 goto revert_new_lv;
4427 }
4428 if (!suspend_lv_origin(cmd, org)) {
4429 log_error("Failed to suspend thin snapshot origin %s/%s.",
4430 org->vg->name, org->name);
4431 goto revert_new_lv;
4432 }
4433 if (!resume_lv_origin(cmd, org)) { /* deptree updates thin-pool */
4434 log_error("Failed to resume thin snapshot origin %s/%s.",
4435 org->vg->name, org->name);
4436 goto revert_new_lv;
4437 }
4438 /* At this point remove pool messages, snapshot is active */
4439 if (!update_pool_lv(first_seg(org)->pool_lv, 0)) {
4440 stack;
4441 goto deactivate_and_revert_new_lv;
4442 }
4443 }
4444 if (((lp->activate == CHANGE_AY) ||
4445 (lp->activate == CHANGE_AE) ||
4446 (lp->activate == CHANGE_ALY))) {
4447 /* At this point send message to kernel thin mda */
4448 pool_lv = lv_is_thin_pool(lv) ? lv : first_seg(lv)->pool_lv;
4449 if (!update_pool_lv(pool_lv, 1)) {
4450 stack;
4451 goto deactivate_and_revert_new_lv;
4452 }
4453 if (!activate_lv_excl(cmd, lv)) {
4454 log_error("Aborting. Failed to activate thin %s.",
4455 lv->name);
4456 goto deactivate_and_revert_new_lv;
4457 }
4458 }
4459 } else if (lp->snapshot) {
4460 if (!activate_lv_excl(cmd, lv)) {
4461 log_error("Aborting. Failed to activate snapshot "
4462 "exception store.");
4463 goto revert_new_lv;
4464 }
4465 } else if ((lp->activate == CHANGE_AY && !activate_lv(cmd, lv)) ||
4466 (lp->activate == CHANGE_AE && !activate_lv_excl(cmd, lv)) ||
4467 (lp->activate == CHANGE_ALY && !activate_lv_local(cmd, lv))) {
4468 log_error("Failed to activate new LV.");
4469 if (lp->zero)
4470 goto deactivate_and_revert_new_lv;
4471 return NULL;
4472 }
4473
4474 if (!seg_is_thin(lp) && !lp->zero && !lp->snapshot)
4475 log_warn("WARNING: \"%s\" not zeroed", lv->name);
4476 else if ((!seg_is_thin(lp) ||
4477 (lv_is_thin_volume(lv) &&
4478 !first_seg(first_seg(lv)->pool_lv)->zero_new_blocks)) &&
4479 !set_lv(cmd, lv, UINT64_C(0), 0)) {
4480 log_error("Aborting. Failed to wipe %s.",
4481 lp->snapshot ? "snapshot exception store" :
4482 "start of new LV");
4483 goto deactivate_and_revert_new_lv;
4484 }
4485
4486 if (lp->snapshot && !lp->thin) {
4487 /* Reset permission after zeroing */
4488 if (!(lp->permission & LVM_WRITE))
4489 lv->status &= ~LVM_WRITE;
4490
4491 /* COW area must be deactivated if origin is not active */
4492 if (!origin_active && !deactivate_lv(cmd, lv)) {
4493 log_error("Aborting. Couldn't deactivate snapshot "
4494 "COW area. Manual intervention required.");
4495 return NULL;
4496 }
4497
4498 /* A virtual origin must be activated explicitly. */
4499 if (lp->voriginsize &&
4500 (!(org = _create_virtual_origin(cmd, vg, lv->name,
4501 lp->permission,
4502 lp->voriginextents)) ||
4503 !activate_lv_excl(cmd, org))) {
4504 log_error("Couldn't create virtual origin for LV %s",
4505 lv->name);
4506 if (org && !lv_remove(org))
4507 stack;
4508 goto deactivate_and_revert_new_lv;
4509 }
4510
4511 /* cow LV remains active and becomes snapshot LV */
4512
4513 if (!vg_add_snapshot(org, lv, NULL,
4514 org->le_count, lp->chunk_size)) {
4515 log_error("Couldn't create snapshot.");
4516 goto deactivate_and_revert_new_lv;
4517 }
4518
4519 /* store vg on disk(s) */
4520 if (!vg_write(vg))
4521 return_NULL;
4522
4523 if (!suspend_lv(cmd, org)) {
4524 log_error("Failed to suspend origin %s", org->name);
4525 vg_revert(vg);
4526 return NULL;
4527 }
4528
4529 if (!vg_commit(vg))
4530 return_NULL;
4531
4532 if (!resume_lv(cmd, org)) {
4533 log_error("Problem reactivating origin %s", org->name);
4534 return NULL;
4535 }
4536 }
4537 /* FIXME out of sequence */
4538 backup(vg);
4539
4540 out:
4541 return lv;
4542
4543 deactivate_and_revert_new_lv:
4544 if (!deactivate_lv(cmd, lv)) {
4545 log_error("Unable to deactivate failed new LV. "
4546 "Manual intervention required.");
4547 return NULL;
4548 }
4549
4550 revert_new_lv:
4551 /* FIXME Better to revert to backup of metadata? */
4552 if (!lv_remove(lv) || !vg_write(vg) || !vg_commit(vg))
4553 log_error("Manual intervention may be required to remove "
4554 "abandoned LV(s) before retrying.");
4555 else
4556 backup(vg);
4557
4558 return NULL;
4559 }
4560
4561 int lv_create_single(struct volume_group *vg,
4562 struct lvcreate_params *lp)
4563 {
4564 struct logical_volume *lv;
4565
4566 /* Create thin pool first if necessary */
4567 if (lp->create_thin_pool) {
4568 if (!seg_is_thin_pool(lp) &&
4569 !(lp->segtype = get_segtype_from_string(vg->cmd, "thin-pool")))
4570 return_0;
4571
4572 if (!(lv = _lv_create_an_lv(vg, lp, lp->pool)))
4573 return_0;
4574
4575 if (!lp->thin)
4576 goto out;
4577
4578 lp->pool = lv->name;
4579
4580 if (!(lp->segtype = get_segtype_from_string(vg->cmd, "thin")))
4581 return_0;
4582 }
4583
4584 if (!(lv = _lv_create_an_lv(vg, lp, lp->lv_name)))
4585 return_0;
4586
4587 out:
4588 log_print("Logical volume \"%s\" created", lv->name);
4589
4590 return 1;
4591 }
This page took 0.235955 seconds and 4 git commands to generate.