2 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
3 * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
5 * This file is part of LVM2.
7 * This copyrighted material is made available to anyone wishing to use,
8 * modify, copy, or redistribute it subject to the terms and conditions
9 * of the GNU Lesser General Public License v.2.1.
11 * You should have received a copy of the GNU Lesser General Public License
12 * along with this program; if not, write to the Free Software Foundation,
13 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include "lvm-string.h"
21 #include "toolcontext.h"
38 /* FIXME: remove RAID_METADATA_AREA_LEN macro after defining 'raid_log_extents'*/
39 #define RAID_METADATA_AREA_LEN 1
41 /* FIXME These ended up getting used differently from first intended. Refactor. */
42 /* Only one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG, A_CLING_TO_ALLOCED may be set */
43 #define A_CONTIGUOUS_TO_LVSEG 0x01 /* Must be contiguous to an existing segment */
44 #define A_CLING_TO_LVSEG 0x02 /* Must use same disks as existing LV segment */
45 #define A_CLING_TO_ALLOCED 0x04 /* Must use same disks as already-allocated segment */
47 #define A_CLING_BY_TAGS 0x08 /* Must match tags against existing segment */
48 #define A_CAN_SPLIT 0x10
51 * Constant parameters during a single allocation attempt.
55 unsigned flags
; /* Holds A_* */
56 struct lv_segment
*prev_lvseg
;
57 uint32_t extents_still_needed
;
61 * Holds varying state of each allocation attempt.
64 struct pv_area_used
*areas
;
66 uint32_t log_area_count_still_needed
; /* Number of areas still needing to be allocated for the log */
67 uint32_t allocated
; /* Total number of extents allocated so far */
75 int add_seg_to_segs_using_this_lv(struct logical_volume
*lv
,
76 struct lv_segment
*seg
)
80 dm_list_iterate_items(sl
, &lv
->segs_using_this_lv
) {
87 log_very_verbose("Adding %s:%" PRIu32
" as an user of %s",
88 seg
->lv
->name
, seg
->le
, lv
->name
);
90 if (!(sl
= dm_pool_zalloc(lv
->vg
->vgmem
, sizeof(*sl
)))) {
91 log_error("Failed to allocate segment list");
97 dm_list_add(&lv
->segs_using_this_lv
, &sl
->list
);
102 int remove_seg_from_segs_using_this_lv(struct logical_volume
*lv
,
103 struct lv_segment
*seg
)
107 dm_list_iterate_items(sl
, &lv
->segs_using_this_lv
) {
113 log_very_verbose("%s:%" PRIu32
" is no longer a user "
114 "of %s", seg
->lv
->name
, seg
->le
,
116 dm_list_del(&sl
->list
);
125 * This is a function specialized for the common case where there is
126 * only one segment which uses the LV.
127 * e.g. the LV is a layer inserted by insert_layer_for_lv().
129 * In general, walk through lv->segs_using_this_lv.
131 struct lv_segment
*get_only_segment_using_this_lv(struct logical_volume
*lv
)
135 if (dm_list_size(&lv
->segs_using_this_lv
) != 1) {
136 log_error("%s is expected to have only one segment using it, "
137 "while it has %d", lv
->name
,
138 dm_list_size(&lv
->segs_using_this_lv
));
142 dm_list_iterate_items(sl
, &lv
->segs_using_this_lv
)
143 break; /* first item */
145 if (sl
->count
!= 1) {
146 log_error("%s is expected to have only one segment using it, "
147 "while %s:%" PRIu32
" uses it %d times",
148 lv
->name
, sl
->seg
->lv
->name
, sl
->seg
->le
, sl
->count
);
156 * PVs used by a segment of an LV
161 struct dm_list pvs
; /* struct pv_list */
167 static struct seg_pvs
*_find_seg_pvs_by_le(struct dm_list
*list
, uint32_t le
)
169 struct seg_pvs
*spvs
;
171 dm_list_iterate_items(spvs
, list
)
172 if (le
>= spvs
->le
&& le
< spvs
->le
+ spvs
->len
)
179 * Find first unused LV number.
181 uint32_t find_free_lvnum(struct logical_volume
*lv
)
183 int lvnum_used
[MAX_RESTRICTED_LVS
+ 1];
188 memset(&lvnum_used
, 0, sizeof(lvnum_used
));
190 dm_list_iterate_items(lvl
, &lv
->vg
->lvs
) {
191 lvnum
= lvnum_from_lvid(&lvl
->lv
->lvid
);
192 if (lvnum
<= MAX_RESTRICTED_LVS
)
193 lvnum_used
[lvnum
] = 1;
196 while (lvnum_used
[i
])
199 /* FIXME What if none are free? */
205 * All lv_segments get created here.
207 struct lv_segment
*alloc_lv_segment(const struct segment_type
*segtype
,
208 struct logical_volume
*lv
,
209 uint32_t le
, uint32_t len
,
211 uint32_t stripe_size
,
212 struct logical_volume
*log_lv
,
213 struct logical_volume
*thin_pool_lv
,
217 uint32_t region_size
,
218 uint32_t extents_copied
,
219 struct lv_segment
*pvmove_source_seg
)
221 struct lv_segment
*seg
;
222 struct dm_pool
*mem
= lv
->vg
->vgmem
;
223 uint32_t areas_sz
= area_count
* sizeof(*seg
->areas
);
226 log_error(INTERNAL_ERROR
"alloc_lv_segment: Missing segtype.");
230 if (!(seg
= dm_pool_zalloc(mem
, sizeof(*seg
))))
233 if (!(seg
->areas
= dm_pool_zalloc(mem
, areas_sz
))) {
234 dm_pool_free(mem
, seg
);
238 if (segtype_is_raid(segtype
) &&
239 !(seg
->meta_areas
= dm_pool_zalloc(mem
, areas_sz
))) {
240 dm_pool_free(mem
, seg
); /* frees everything alloced since seg */
244 seg
->segtype
= segtype
;
248 seg
->status
= status
;
249 seg
->stripe_size
= stripe_size
;
250 seg
->area_count
= area_count
;
251 seg
->area_len
= area_len
;
252 seg
->chunk_size
= chunk_size
;
253 seg
->region_size
= region_size
;
254 seg
->extents_copied
= extents_copied
;
255 seg
->pvmove_source_seg
= pvmove_source_seg
;
256 dm_list_init(&seg
->tags
);
257 dm_list_init(&seg
->thin_messages
);
260 /* If this thin volume, thin snapshot is being created */
261 if (lv_is_thin_volume(thin_pool_lv
)) {
262 seg
->transaction_id
= first_seg(first_seg(thin_pool_lv
)->pool_lv
)->transaction_id
;
263 if (!attach_pool_lv(seg
, first_seg(thin_pool_lv
)->pool_lv
, thin_pool_lv
))
266 seg
->transaction_id
= first_seg(thin_pool_lv
)->transaction_id
;
267 if (!attach_pool_lv(seg
, thin_pool_lv
, NULL
))
272 if (log_lv
&& !attach_mirror_log(seg
, log_lv
))
278 struct lv_segment
*alloc_snapshot_seg(struct logical_volume
*lv
,
279 uint64_t status
, uint32_t old_le_count
)
281 struct lv_segment
*seg
;
282 const struct segment_type
*segtype
;
284 segtype
= get_segtype_from_string(lv
->vg
->cmd
, "snapshot");
286 log_error("Failed to find snapshot segtype");
290 if (!(seg
= alloc_lv_segment(segtype
, lv
, old_le_count
,
291 lv
->le_count
- old_le_count
, status
, 0,
292 NULL
, NULL
, 0, lv
->le_count
- old_le_count
,
294 log_error("Couldn't allocate new snapshot segment.");
298 dm_list_add(&lv
->segments
, &seg
->list
);
299 lv
->status
|= VIRTUAL
;
304 void release_lv_segment_area(struct lv_segment
*seg
, uint32_t s
,
305 uint32_t area_reduction
)
307 if (seg_type(seg
, s
) == AREA_UNASSIGNED
)
310 if (seg_type(seg
, s
) == AREA_PV
) {
311 if (release_pv_segment(seg_pvseg(seg
, s
), area_reduction
) &&
312 seg
->area_len
== area_reduction
)
313 seg_type(seg
, s
) = AREA_UNASSIGNED
;
317 if ((seg_lv(seg
, s
)->status
& MIRROR_IMAGE
) ||
318 (seg_lv(seg
, s
)->status
& THIN_POOL_DATA
)) {
319 if (!lv_reduce(seg_lv(seg
, s
), area_reduction
))
320 stack
; /* FIXME: any upper level reporting */
324 if (seg_lv(seg
, s
)->status
& RAID_IMAGE
) {
326 * FIXME: Use lv_reduce not lv_remove
327 * We use lv_remove for now, because I haven't figured out
328 * why lv_reduce won't remove the LV.
329 lv_reduce(seg_lv(seg, s), area_reduction);
331 if (area_reduction
!= seg
->area_len
) {
332 log_error("Unable to reduce RAID LV - operation not implemented.");
335 if (!lv_remove(seg_lv(seg
, s
))) {
336 log_error("Failed to remove RAID image %s",
337 seg_lv(seg
, s
)->name
);
342 /* Remove metadata area if image has been removed */
343 if (area_reduction
== seg
->area_len
) {
344 if (!lv_reduce(seg_metalv(seg
, s
),
345 seg_metalv(seg
, s
)->le_count
)) {
346 log_error("Failed to remove RAID meta-device %s",
347 seg_metalv(seg
, s
)->name
);
354 if (area_reduction
== seg
->area_len
) {
355 log_very_verbose("Remove %s:%" PRIu32
"[%" PRIu32
"] from "
356 "the top of LV %s:%" PRIu32
,
357 seg
->lv
->name
, seg
->le
, s
,
358 seg_lv(seg
, s
)->name
, seg_le(seg
, s
));
360 remove_seg_from_segs_using_this_lv(seg_lv(seg
, s
), seg
);
361 seg_lv(seg
, s
) = NULL
;
363 seg_type(seg
, s
) = AREA_UNASSIGNED
;
368 * Move a segment area from one segment to another
370 int move_lv_segment_area(struct lv_segment
*seg_to
, uint32_t area_to
,
371 struct lv_segment
*seg_from
, uint32_t area_from
)
373 struct physical_volume
*pv
;
374 struct logical_volume
*lv
;
377 switch (seg_type(seg_from
, area_from
)) {
379 pv
= seg_pv(seg_from
, area_from
);
380 pe
= seg_pe(seg_from
, area_from
);
382 release_lv_segment_area(seg_from
, area_from
,
384 release_lv_segment_area(seg_to
, area_to
, seg_to
->area_len
);
386 if (!set_lv_segment_area_pv(seg_to
, area_to
, pv
, pe
))
392 lv
= seg_lv(seg_from
, area_from
);
393 le
= seg_le(seg_from
, area_from
);
395 release_lv_segment_area(seg_from
, area_from
,
397 release_lv_segment_area(seg_to
, area_to
, seg_to
->area_len
);
399 if (!set_lv_segment_area_lv(seg_to
, area_to
, lv
, le
, 0))
404 case AREA_UNASSIGNED
:
405 release_lv_segment_area(seg_to
, area_to
, seg_to
->area_len
);
412 * Link part of a PV to an LV segment.
414 int set_lv_segment_area_pv(struct lv_segment
*seg
, uint32_t area_num
,
415 struct physical_volume
*pv
, uint32_t pe
)
417 seg
->areas
[area_num
].type
= AREA_PV
;
419 if (!(seg_pvseg(seg
, area_num
) =
420 assign_peg_to_lvseg(pv
, pe
, seg
->area_len
, seg
, area_num
)))
427 * Link one LV segment to another. Assumes sizes already match.
429 int set_lv_segment_area_lv(struct lv_segment
*seg
, uint32_t area_num
,
430 struct logical_volume
*lv
, uint32_t le
,
433 log_very_verbose("Stack %s:%" PRIu32
"[%" PRIu32
"] on LV %s:%" PRIu32
,
434 seg
->lv
->name
, seg
->le
, area_num
, lv
->name
, le
);
436 if (status
& RAID_META
) {
437 seg
->meta_areas
[area_num
].type
= AREA_LV
;
438 seg_metalv(seg
, area_num
) = lv
;
440 log_error(INTERNAL_ERROR
"Meta le != 0");
443 seg_metale(seg
, area_num
) = 0;
445 seg
->areas
[area_num
].type
= AREA_LV
;
446 seg_lv(seg
, area_num
) = lv
;
447 seg_le(seg
, area_num
) = le
;
449 lv
->status
|= status
;
451 if (!add_seg_to_segs_using_this_lv(lv
, seg
))
458 * Prepare for adding parallel areas to an existing segment.
460 static int _lv_segment_add_areas(struct logical_volume
*lv
,
461 struct lv_segment
*seg
,
462 uint32_t new_area_count
)
464 struct lv_segment_area
*newareas
;
465 uint32_t areas_sz
= new_area_count
* sizeof(*newareas
);
467 if (!(newareas
= dm_pool_zalloc(lv
->vg
->cmd
->mem
, areas_sz
)))
470 memcpy(newareas
, seg
->areas
, seg
->area_count
* sizeof(*seg
->areas
));
472 seg
->areas
= newareas
;
473 seg
->area_count
= new_area_count
;
479 * Reduce the size of an lv_segment. New size can be zero.
481 static int _lv_segment_reduce(struct lv_segment
*seg
, uint32_t reduction
)
483 uint32_t area_reduction
, s
;
485 /* Caller must ensure exact divisibility */
486 if (seg_is_striped(seg
)) {
487 if (reduction
% seg
->area_count
) {
488 log_error("Segment extent reduction %" PRIu32
489 " not divisible by #stripes %" PRIu32
,
490 reduction
, seg
->area_count
);
493 area_reduction
= (reduction
/ seg
->area_count
);
495 area_reduction
= reduction
;
497 for (s
= 0; s
< seg
->area_count
; s
++)
498 release_lv_segment_area(seg
, s
, area_reduction
);
500 seg
->len
-= reduction
;
501 seg
->area_len
-= area_reduction
;
507 * Entry point for all LV reductions in size.
509 static int _lv_reduce(struct logical_volume
*lv
, uint32_t extents
, int delete)
511 struct lv_segment
*seg
;
512 uint32_t count
= extents
;
515 dm_list_iterate_back_items(seg
, &lv
->segments
) {
519 if (seg
->len
<= count
) {
520 /* remove this segment completely */
521 /* FIXME Check this is safe */
522 if (seg
->log_lv
&& !lv_remove(seg
->log_lv
))
525 if (seg
->metadata_lv
&& !lv_remove(seg
->metadata_lv
))
529 if (!detach_pool_lv(seg
))
533 dm_list_del(&seg
->list
);
534 reduction
= seg
->len
;
538 if (!_lv_segment_reduce(seg
, reduction
))
543 lv
->le_count
-= extents
;
544 lv
->size
= (uint64_t) lv
->le_count
* lv
->vg
->extent_size
;
549 /* Remove the LV if it is now empty */
550 if (!lv
->le_count
&& !unlink_lv_from_vg(lv
))
552 else if (lv
->vg
->fid
->fmt
->ops
->lv_setup
&&
553 !lv
->vg
->fid
->fmt
->ops
->lv_setup(lv
->vg
->fid
, lv
))
562 int lv_empty(struct logical_volume
*lv
)
564 return _lv_reduce(lv
, lv
->le_count
, 0);
568 * Empty an LV and add error segment.
570 int replace_lv_with_error_segment(struct logical_volume
*lv
)
572 uint32_t len
= lv
->le_count
;
574 if (len
&& !lv_empty(lv
))
577 /* Minimum size required for a table. */
582 * Since we are replacing the whatever-was-there with
583 * an error segment, we should also clear any flags
584 * that suggest it is anything other than "error".
586 lv
->status
&= ~(MIRRORED
|PVMOVE
);
588 /* FIXME: Should we bug if we find a log_lv attached? */
590 if (!lv_add_virtual_segment(lv
, 0, len
, get_segtype_from_string(lv
->vg
->cmd
, "error"), NULL
))
597 * Remove given number of extents from LV.
599 int lv_reduce(struct logical_volume
*lv
, uint32_t extents
)
601 return _lv_reduce(lv
, extents
, 1);
605 * Completely remove an LV.
607 int lv_remove(struct logical_volume
*lv
)
610 if (!lv_reduce(lv
, lv
->le_count
))
617 * A set of contiguous physical extents allocated
619 struct alloced_area
{
622 struct physical_volume
*pv
;
628 * Details of an allocation attempt
630 struct alloc_handle
{
631 struct cmd_context
*cmd
;
634 alloc_policy_t alloc
; /* Overall policy */
635 uint32_t new_extents
; /* Number of new extents required */
636 uint32_t area_count
; /* Number of parallel areas */
637 uint32_t parity_count
; /* Adds to area_count, but not area_multiple */
638 uint32_t area_multiple
; /* seg->len = area_len * area_multiple */
639 uint32_t log_area_count
; /* Number of parallel logs */
640 uint32_t metadata_area_count
; /* Number of parallel metadata areas */
641 uint32_t log_len
; /* Length of log/metadata_area */
642 uint32_t region_size
; /* Mirror region size */
643 uint32_t total_area_len
; /* Total number of parallel extents */
645 unsigned maximise_cling
;
646 unsigned mirror_logs_separate
; /* Force mirror logs on separate PVs? */
649 * RAID devices require a metadata area that accompanies each
650 * device. During initial creation, it is best to look for space
651 * that is new_extents + log_len and then split that between two
652 * allocated areas when found. 'alloc_and_split_meta' indicates
653 * that this is the desired dynamic.
655 unsigned alloc_and_split_meta
;
657 const struct dm_config_node
*cling_tag_list_cn
;
659 struct dm_list
*parallel_areas
; /* PVs to avoid */
662 * Contains area_count lists of areas allocated to data stripes
663 * followed by log_area_count lists of areas allocated to log stripes.
665 struct dm_list alloced_areas
[0];
668 static uint32_t _calc_area_multiple(const struct segment_type
*segtype
,
669 const uint32_t area_count
, const uint32_t stripes
)
675 if (segtype_is_striped(segtype
))
678 /* Mirrored stripes */
687 * Returns log device size in extents, algorithm from kernel code
690 static uint32_t mirror_log_extents(uint32_t region_size
, uint32_t pe_size
, uint32_t area_len
)
692 size_t area_size
, bitset_size
, log_size
, region_count
;
694 area_size
= area_len
* pe_size
;
695 region_count
= dm_div_up(area_size
, region_size
);
697 /* Work out how many "unsigned long"s we need to hold the bitset. */
698 bitset_size
= dm_round_up(region_count
, sizeof(uint32_t) << BYTE_SHIFT
);
699 bitset_size
>>= BYTE_SHIFT
;
701 /* Log device holds both header and bitset. */
702 log_size
= dm_round_up((MIRROR_LOG_OFFSET
<< SECTOR_SHIFT
) + bitset_size
, 1 << SECTOR_SHIFT
);
703 log_size
>>= SECTOR_SHIFT
;
704 log_size
= dm_div_up(log_size
, pe_size
);
707 * Kernel requires a mirror to be at least 1 region large. So,
708 * if our mirror log is itself a mirror, it must be at least
709 * 1 region large. This restriction may not be necessary for
710 * non-mirrored logs, but we apply the rule anyway.
712 * (The other option is to make the region size of the log
713 * mirror smaller than the mirror it is acting as a log for,
714 * but that really complicates things. It's much easier to
715 * keep the region_size the same for both.)
717 return (log_size
> (region_size
/ pe_size
)) ? log_size
:
718 (region_size
/ pe_size
);
722 * Preparation for a specific allocation attempt
723 * stripes and mirrors refer to the parallel areas used for data.
724 * If log_area_count > 1 it is always mirrored (not striped).
726 static struct alloc_handle
*_alloc_init(struct cmd_context
*cmd
,
728 const struct segment_type
*segtype
,
729 alloc_policy_t alloc
,
730 uint32_t new_extents
,
733 uint32_t metadata_area_count
,
734 uint32_t extent_size
,
735 uint32_t region_size
,
736 struct dm_list
*parallel_areas
)
738 struct alloc_handle
*ah
;
739 uint32_t s
, area_count
, alloc_count
, parity_count
;
742 /* FIXME Caller should ensure this */
743 if (mirrors
&& !stripes
)
746 if (segtype_is_virtual(segtype
))
748 else if (mirrors
> 1)
749 area_count
= mirrors
* stripes
;
751 area_count
= stripes
;
756 * It is a requirement that RAID 4/5/6 are created with a number of
757 * stripes that is greater than the number of parity devices. (e.g
758 * RAID4/5 must have at least 2 stripes and RAID6 must have at least
759 * 3.) It is also a constraint that, when replacing individual devices
760 * in a RAID 4/5/6 array, no more devices can be replaced than
761 * there are parity devices. (Otherwise, there would not be enough
762 * redundancy to maintain the array.) Understanding these two
763 * constraints allows us to infer whether the caller of this function
764 * is intending to allocate an entire array or just replacement
765 * component devices. In the former case, we must account for the
766 * necessary parity_count. In the later case, we do not need to
767 * account for the extra parity devices because the array already
768 * exists and they only want replacement drives.
770 parity_count
= (area_count
<= segtype
->parity_devs
) ? 0 :
771 segtype
->parity_devs
;
772 alloc_count
= area_count
+ parity_count
;
773 if (segtype_is_raid(segtype
) && metadata_area_count
)
774 /* RAID has a meta area for each device */
777 /* mirrors specify their exact log count */
778 alloc_count
+= metadata_area_count
;
780 size
+= sizeof(ah
->alloced_areas
[0]) * alloc_count
;
782 if (!(ah
= dm_pool_zalloc(mem
, size
))) {
783 log_error("allocation handle allocation failed");
789 if (segtype_is_virtual(segtype
))
792 if (!(area_count
+ metadata_area_count
)) {
793 log_error(INTERNAL_ERROR
"_alloc_init called for non-virtual segment with no disk space.");
797 if (!(ah
->mem
= dm_pool_create("allocation", 1024))) {
798 log_error("allocation pool creation failed");
802 if (mirrors
|| stripes
)
803 ah
->new_extents
= new_extents
;
806 ah
->area_count
= area_count
;
807 ah
->parity_count
= parity_count
;
808 ah
->region_size
= region_size
;
810 ah
->area_multiple
= _calc_area_multiple(segtype
, area_count
, stripes
);
811 ah
->mirror_logs_separate
= find_config_tree_bool(cmd
, "allocation/mirror_logs_require_separate_pvs",
812 DEFAULT_MIRROR_LOGS_REQUIRE_SEPARATE_PVS
);
814 if (segtype_is_raid(segtype
)) {
815 if (metadata_area_count
) {
816 if (metadata_area_count
!= area_count
)
817 log_error(INTERNAL_ERROR
818 "Bad metadata_area_count");
819 ah
->metadata_area_count
= area_count
;
820 ah
->alloc_and_split_meta
= 1;
822 ah
->log_len
= RAID_METADATA_AREA_LEN
;
825 * We need 'log_len' extents for each
826 * RAID device's metadata_area
828 ah
->new_extents
+= (ah
->log_len
* ah
->area_multiple
);
830 ah
->log_area_count
= 0;
833 } else if (segtype_is_thin_pool(segtype
)) {
834 ah
->log_area_count
= metadata_area_count
;
835 /* thin_pool uses region_size to pass metadata size in extents */
836 ah
->log_len
= ah
->region_size
;
838 ah
->mirror_logs_separate
=
839 find_config_tree_bool(cmd
, "allocation/thin_pool_metadata_require_separate_pvs",
840 DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS
);
842 ah
->log_area_count
= metadata_area_count
;
843 ah
->log_len
= !metadata_area_count
? 0 :
844 mirror_log_extents(ah
->region_size
, extent_size
,
845 new_extents
/ ah
->area_multiple
);
848 for (s
= 0; s
< alloc_count
; s
++)
849 dm_list_init(&ah
->alloced_areas
[s
]);
851 ah
->parallel_areas
= parallel_areas
;
853 ah
->cling_tag_list_cn
= find_config_tree_node(cmd
, "allocation/cling_tag_list");
855 ah
->maximise_cling
= find_config_tree_bool(cmd
, "allocation/maximise_cling", DEFAULT_MAXIMISE_CLING
);
860 void alloc_destroy(struct alloc_handle
*ah
)
863 dm_pool_destroy(ah
->mem
);
866 /* Is there enough total space or should we give up immediately? */
867 static int _sufficient_pes_free(struct alloc_handle
*ah
, struct dm_list
*pvms
,
868 uint32_t allocated
, uint32_t extents_still_needed
)
870 uint32_t area_extents_needed
= (extents_still_needed
- allocated
) * ah
->area_count
/ ah
->area_multiple
;
871 uint32_t parity_extents_needed
= (extents_still_needed
- allocated
) * ah
->parity_count
/ ah
->area_multiple
;
872 uint32_t metadata_extents_needed
= ah
->metadata_area_count
* RAID_METADATA_AREA_LEN
; /* One each */
873 uint32_t total_extents_needed
= area_extents_needed
+ parity_extents_needed
+ metadata_extents_needed
;
874 uint32_t free_pes
= pv_maps_size(pvms
);
876 if (total_extents_needed
> free_pes
) {
877 log_error("Insufficient free space: %" PRIu32
" extents needed,"
878 " but only %" PRIu32
" available",
879 total_extents_needed
, free_pes
);
886 /* For striped mirrors, all the areas are counted, through the mirror layer */
887 static uint32_t _stripes_per_mimage(struct lv_segment
*seg
)
889 struct lv_segment
*last_lvseg
;
891 if (seg_is_mirrored(seg
) && seg
->area_count
&& seg_type(seg
, 0) == AREA_LV
) {
892 last_lvseg
= dm_list_item(dm_list_last(&seg_lv(seg
, 0)->segments
), struct lv_segment
);
893 if (seg_is_striped(last_lvseg
))
894 return last_lvseg
->area_count
;
900 static void _init_alloc_parms(struct alloc_handle
*ah
, struct alloc_parms
*alloc_parms
, alloc_policy_t alloc
,
901 struct lv_segment
*prev_lvseg
, unsigned can_split
,
902 uint32_t allocated
, uint32_t extents_still_needed
)
904 alloc_parms
->alloc
= alloc
;
905 alloc_parms
->prev_lvseg
= prev_lvseg
;
906 alloc_parms
->flags
= 0;
907 alloc_parms
->extents_still_needed
= extents_still_needed
;
909 /* Are there any preceding segments we must follow on from? */
910 if (alloc_parms
->prev_lvseg
) {
911 if (alloc_parms
->alloc
== ALLOC_CONTIGUOUS
)
912 alloc_parms
->flags
|= A_CONTIGUOUS_TO_LVSEG
;
913 else if ((alloc_parms
->alloc
== ALLOC_CLING
) || (alloc_parms
->alloc
== ALLOC_CLING_BY_TAGS
))
914 alloc_parms
->flags
|= A_CLING_TO_LVSEG
;
917 * A cling allocation that follows a successful contiguous allocation
918 * must use the same PVs (or else fail).
920 if ((alloc_parms
->alloc
== ALLOC_CLING
) || (alloc_parms
->alloc
== ALLOC_CLING_BY_TAGS
))
921 alloc_parms
->flags
|= A_CLING_TO_ALLOCED
;
923 if (alloc_parms
->alloc
== ALLOC_CLING_BY_TAGS
)
924 alloc_parms
->flags
|= A_CLING_BY_TAGS
;
927 * For normal allocations, if any extents have already been found
928 * for allocation, prefer to place further extents on the same disks as
929 * have already been used.
931 if (ah
->maximise_cling
&& alloc_parms
->alloc
== ALLOC_NORMAL
&& allocated
!= alloc_parms
->extents_still_needed
)
932 alloc_parms
->flags
|= A_CLING_TO_ALLOCED
;
935 alloc_parms
->flags
|= A_CAN_SPLIT
;
938 static int _log_parallel_areas(struct dm_pool
*mem
, struct dm_list
*parallel_areas
)
940 struct seg_pvs
*spvs
;
947 dm_list_iterate_items(spvs
, parallel_areas
) {
948 if (!dm_pool_begin_object(mem
, 256)) {
949 log_error("dm_pool_begin_object failed");
953 dm_list_iterate_items(pvl
, &spvs
->pvs
) {
954 if (!dm_pool_grow_object(mem
, pv_dev_name(pvl
->pv
), strlen(pv_dev_name(pvl
->pv
)))) {
955 log_error("dm_pool_grow_object failed");
956 dm_pool_abandon_object(mem
);
959 if (!dm_pool_grow_object(mem
, " ", 1)) {
960 log_error("dm_pool_grow_object failed");
961 dm_pool_abandon_object(mem
);
966 if (!dm_pool_grow_object(mem
, "\0", 1)) {
967 log_error("dm_pool_grow_object failed");
968 dm_pool_abandon_object(mem
);
972 pvnames
= dm_pool_end_object(mem
);
973 log_debug("Parallel PVs at LE %" PRIu32
" length %" PRIu32
": %s",
974 spvs
->le
, spvs
->len
, pvnames
);
975 dm_pool_free(mem
, pvnames
);
981 static int _setup_alloced_segment(struct logical_volume
*lv
, uint64_t status
,
983 uint32_t stripe_size
,
984 const struct segment_type
*segtype
,
985 struct alloced_area
*aa
,
986 uint32_t region_size
)
988 uint32_t s
, extents
, area_multiple
;
989 struct lv_segment
*seg
;
991 area_multiple
= _calc_area_multiple(segtype
, area_count
, 0);
993 if (!(seg
= alloc_lv_segment(segtype
, lv
, lv
->le_count
,
994 aa
[0].len
* area_multiple
,
995 status
, stripe_size
, NULL
, NULL
,
997 aa
[0].len
, 0u, region_size
, 0u, NULL
))) {
998 log_error("Couldn't allocate new LV segment.");
1002 for (s
= 0; s
< area_count
; s
++)
1003 if (!set_lv_segment_area_pv(seg
, s
, aa
[s
].pv
, aa
[s
].pe
))
1006 dm_list_add(&lv
->segments
, &seg
->list
);
1008 extents
= aa
[0].len
* area_multiple
;
1009 lv
->le_count
+= extents
;
1010 lv
->size
+= (uint64_t) extents
*lv
->vg
->extent_size
;
1012 if (segtype_is_mirrored(segtype
))
1013 lv
->status
|= MIRRORED
;
1018 static int _setup_alloced_segments(struct logical_volume
*lv
,
1019 struct dm_list
*alloced_areas
,
1020 uint32_t area_count
,
1022 uint32_t stripe_size
,
1023 const struct segment_type
*segtype
,
1024 uint32_t region_size
)
1026 struct alloced_area
*aa
;
1028 dm_list_iterate_items(aa
, &alloced_areas
[0]) {
1029 if (!_setup_alloced_segment(lv
, status
, area_count
,
1030 stripe_size
, segtype
, aa
,
1039 * This function takes a list of pv_areas and adds them to allocated_areas.
1040 * If the complete area is not needed then it gets split.
1041 * The part used is removed from the pv_map so it can't be allocated twice.
1043 static int _alloc_parallel_area(struct alloc_handle
*ah
, uint32_t max_to_allocate
,
1044 struct alloc_state
*alloc_state
, uint32_t ix_log_offset
)
1046 uint32_t area_len
, len
;
1048 uint32_t ix_log_skip
= 0; /* How many areas to skip in middle of array to reach log areas */
1049 uint32_t total_area_count
;
1050 struct alloced_area
*aa
;
1051 struct pv_area
*pva
;
1053 total_area_count
= ah
->area_count
+ alloc_state
->log_area_count_still_needed
;
1054 total_area_count
+= ah
->parity_count
;
1055 if (!total_area_count
) {
1056 log_error(INTERNAL_ERROR
"_alloc_parallel_area called without any allocation to do.");
1060 area_len
= max_to_allocate
/ ah
->area_multiple
;
1062 /* Reduce area_len to the smallest of the areas */
1063 for (s
= 0; s
< ah
->area_count
+ ah
->parity_count
; s
++)
1064 if (area_len
> alloc_state
->areas
[s
].used
)
1065 area_len
= alloc_state
->areas
[s
].used
;
1067 len
= (ah
->alloc_and_split_meta
) ? total_area_count
* 2 : total_area_count
;
1069 if (!(aa
= dm_pool_alloc(ah
->mem
, len
))) {
1070 log_error("alloced_area allocation failed");
1075 * Areas consists of area_count areas for data stripes, then
1076 * ix_log_skip areas to skip, then log_area_count areas to use for the
1077 * log, then some areas too small for the log.
1080 for (s
= 0; s
< total_area_count
; s
++) {
1081 if (s
== (ah
->area_count
+ ah
->parity_count
)) {
1082 ix_log_skip
= ix_log_offset
- ah
->area_count
;
1086 pva
= alloc_state
->areas
[s
+ ix_log_skip
].pva
;
1087 if (ah
->alloc_and_split_meta
) {
1089 * The metadata area goes at the front of the allocated
1090 * space for now, but could easily go at the end (or
1093 * Even though we split these two from the same
1094 * allocation, we store the images at the beginning
1095 * of the areas array and the metadata at the end.
1097 s
+= ah
->area_count
+ ah
->parity_count
;
1098 aa
[s
].pv
= pva
->map
->pv
;
1099 aa
[s
].pe
= pva
->start
;
1100 aa
[s
].len
= ah
->log_len
;
1102 log_debug("Allocating parallel metadata area %" PRIu32
1103 " on %s start PE %" PRIu32
1104 " length %" PRIu32
".",
1105 (s
- (ah
->area_count
+ ah
->parity_count
)),
1106 pv_dev_name(aa
[s
].pv
), aa
[s
].pe
,
1109 consume_pv_area(pva
, ah
->log_len
);
1110 dm_list_add(&ah
->alloced_areas
[s
], &aa
[s
].list
);
1111 s
-= ah
->area_count
+ ah
->parity_count
;
1113 aa
[s
].pv
= pva
->map
->pv
;
1114 aa
[s
].pe
= pva
->start
;
1115 aa
[s
].len
= (ah
->alloc_and_split_meta
) ? len
- ah
->log_len
: len
;
1117 log_debug("Allocating parallel area %" PRIu32
1118 " on %s start PE %" PRIu32
" length %" PRIu32
".",
1119 s
, pv_dev_name(aa
[s
].pv
), aa
[s
].pe
, aa
[s
].len
);
1121 consume_pv_area(pva
, aa
[s
].len
);
1123 dm_list_add(&ah
->alloced_areas
[s
], &aa
[s
].list
);
1126 /* Only need to alloc metadata from the first batch */
1127 ah
->alloc_and_split_meta
= 0;
1129 ah
->total_area_len
+= area_len
;
1131 alloc_state
->allocated
+= area_len
* ah
->area_multiple
;
1137 * Call fn for each AREA_PV used by the LV segment at lv:le of length *max_seg_len.
1138 * If any constituent area contains more than one segment, max_seg_len is
1139 * reduced to cover only the first.
1140 * fn should return 0 on error, 1 to continue scanning or >1 to terminate without error.
1141 * In the last case, this function passes on the return code.
1143 static int _for_each_pv(struct cmd_context
*cmd
, struct logical_volume
*lv
,
1144 uint32_t le
, uint32_t len
, struct lv_segment
*seg
,
1145 uint32_t *max_seg_len
,
1146 uint32_t first_area
, uint32_t max_areas
,
1147 int top_level_area_index
,
1148 int only_single_area_segments
,
1149 int (*fn
)(struct cmd_context
*cmd
,
1150 struct pv_segment
*peg
, uint32_t s
,
1155 uint32_t remaining_seg_len
, area_len
, area_multiple
;
1156 uint32_t stripes_per_mimage
= 1;
1159 if (!seg
&& !(seg
= find_seg_by_le(lv
, le
))) {
1160 log_error("Failed to find segment for %s extent %" PRIu32
,
1165 /* Remaining logical length of segment */
1166 remaining_seg_len
= seg
->len
- (le
- seg
->le
);
1168 if (remaining_seg_len
> len
)
1169 remaining_seg_len
= len
;
1171 if (max_seg_len
&& *max_seg_len
> remaining_seg_len
)
1172 *max_seg_len
= remaining_seg_len
;
1174 area_multiple
= _calc_area_multiple(seg
->segtype
, seg
->area_count
, 0);
1175 area_len
= remaining_seg_len
/ area_multiple
? : 1;
1177 /* For striped mirrors, all the areas are counted, through the mirror layer */
1178 if (top_level_area_index
== -1)
1179 stripes_per_mimage
= _stripes_per_mimage(seg
);
1181 for (s
= first_area
;
1182 s
< seg
->area_count
&& (!max_areas
|| s
<= max_areas
);
1184 if (seg_type(seg
, s
) == AREA_LV
) {
1185 if (!(r
= _for_each_pv(cmd
, seg_lv(seg
, s
),
1187 (le
- seg
->le
) / area_multiple
,
1188 area_len
, NULL
, max_seg_len
, 0,
1189 (stripes_per_mimage
== 1) && only_single_area_segments
? 1U : 0U,
1190 (top_level_area_index
!= -1) ? top_level_area_index
: (int) (s
* stripes_per_mimage
),
1191 only_single_area_segments
, fn
,
1194 } else if (seg_type(seg
, s
) == AREA_PV
)
1195 if (!(r
= fn(cmd
, seg_pvseg(seg
, s
), top_level_area_index
!= -1 ? (uint32_t) top_level_area_index
+ s
: s
, data
)))
1201 /* FIXME only_single_area_segments used as workaround to skip log LV - needs new param? */
1202 if (!only_single_area_segments
&& seg_is_mirrored(seg
) && seg
->log_lv
) {
1203 if (!(r
= _for_each_pv(cmd
, seg
->log_lv
, 0, seg
->log_lv
->le_count
, NULL
,
1204 NULL
, 0, 0, 0, only_single_area_segments
,
1211 /* FIXME Add snapshot cow LVs etc. */
1216 static int _comp_area(const void *l
, const void *r
)
1218 const struct pv_area_used
*lhs
= (const struct pv_area_used
*) l
;
1219 const struct pv_area_used
*rhs
= (const struct pv_area_used
*) r
;
1221 if (lhs
->used
< rhs
->used
)
1224 else if (lhs
->used
> rhs
->used
)
1231 * Search for pvseg that matches condition
1234 int (*condition
)(struct pv_match
*pvmatch
, struct pv_segment
*pvseg
, struct pv_area
*pva
);
1236 struct pv_area_used
*areas
;
1237 struct pv_area
*pva
;
1238 uint32_t areas_size
;
1239 const struct dm_config_node
*cling_tag_list_cn
;
1240 int s
; /* Area index of match */
1244 * Is PV area on the same PV?
1246 static int _is_same_pv(struct pv_match
*pvmatch
__attribute((unused
)), struct pv_segment
*pvseg
, struct pv_area
*pva
)
1248 if (pvseg
->pv
!= pva
->map
->pv
)
1255 * Does PV area have a tag listed in allocation/cling_tag_list that
1256 * matches a tag of the PV of the existing segment?
1258 static int _pvs_have_matching_tag(const struct dm_config_node
*cling_tag_list_cn
, struct physical_volume
*pv1
, struct physical_volume
*pv2
)
1260 const struct dm_config_value
*cv
;
1262 const char *tag_matched
;
1264 for (cv
= cling_tag_list_cn
->v
; cv
; cv
= cv
->next
) {
1265 if (cv
->type
!= DM_CFG_STRING
) {
1266 log_error("Ignoring invalid string in config file entry "
1267 "allocation/cling_tag_list");
1272 log_error("Ignoring empty string in config file entry "
1273 "allocation/cling_tag_list");
1278 log_error("Ignoring string not starting with @ in config file entry "
1279 "allocation/cling_tag_list: %s", str
);
1286 log_error("Ignoring empty tag in config file entry "
1287 "allocation/cling_tag_list");
1291 /* Wildcard matches any tag against any tag. */
1292 if (!strcmp(str
, "*")) {
1293 if (!str_list_match_list(&pv1
->tags
, &pv2
->tags
, &tag_matched
))
1296 log_debug("Matched allocation PV tag %s on existing %s with free space on %s.",
1297 tag_matched
, pv_dev_name(pv1
), pv_dev_name(pv2
));
1302 if (!str_list_match_item(&pv1
->tags
, str
) ||
1303 !str_list_match_item(&pv2
->tags
, str
))
1306 log_debug("Matched allocation PV tag %s on existing %s with free space on %s.",
1307 str
, pv_dev_name(pv1
), pv_dev_name(pv2
));
1315 static int _has_matching_pv_tag(struct pv_match
*pvmatch
, struct pv_segment
*pvseg
, struct pv_area
*pva
)
1317 return _pvs_have_matching_tag(pvmatch
->cling_tag_list_cn
, pvseg
->pv
, pva
->map
->pv
);
1321 * Is PV area contiguous to PV segment?
1323 static int _is_contiguous(struct pv_match
*pvmatch
__attribute((unused
)), struct pv_segment
*pvseg
, struct pv_area
*pva
)
1325 if (pvseg
->pv
!= pva
->map
->pv
)
1328 if (pvseg
->pe
+ pvseg
->len
!= pva
->start
)
1334 static void _reserve_area(struct pv_area_used
*area_used
, struct pv_area
*pva
, uint32_t required
,
1335 uint32_t ix_pva
, uint32_t unreserved
)
1337 log_debug("%s allocation area %" PRIu32
" %s %s start PE %" PRIu32
1338 " length %" PRIu32
" leaving %" PRIu32
".",
1339 area_used
->pva
? "Changing " : "Considering",
1340 ix_pva
- 1, area_used
->pva
? "to" : "as",
1341 dev_name(pva
->map
->pv
->dev
), pva
->start
, required
, unreserved
);
1343 area_used
->pva
= pva
;
1344 area_used
->used
= required
;
1347 static int _is_condition(struct cmd_context
*cmd
__attribute__((unused
)),
1348 struct pv_segment
*pvseg
, uint32_t s
,
1351 struct pv_match
*pvmatch
= data
;
1353 if (pvmatch
->areas
[s
].pva
)
1354 return 1; /* Area already assigned */
1356 if (!pvmatch
->condition(pvmatch
, pvseg
, pvmatch
->pva
))
1357 return 1; /* Continue */
1359 if (s
>= pvmatch
->areas_size
)
1363 * Only used for cling and contiguous policies (which only make one allocation per PV)
1364 * so it's safe to say all the available space is used.
1366 _reserve_area(&pvmatch
->areas
[s
], pvmatch
->pva
, pvmatch
->pva
->count
, s
+ 1, 0);
1368 return 2; /* Finished */
1372 * Is pva on same PV as any existing areas?
1374 static int _check_cling(struct alloc_handle
*ah
,
1375 const struct dm_config_node
*cling_tag_list_cn
,
1376 struct lv_segment
*prev_lvseg
, struct pv_area
*pva
,
1377 struct alloc_state
*alloc_state
)
1379 struct pv_match pvmatch
;
1383 pvmatch
.condition
= cling_tag_list_cn
? _has_matching_pv_tag
: _is_same_pv
;
1384 pvmatch
.areas
= alloc_state
->areas
;
1385 pvmatch
.areas_size
= alloc_state
->areas_size
;
1387 pvmatch
.cling_tag_list_cn
= cling_tag_list_cn
;
1389 if (ah
->maximise_cling
) {
1390 /* Check entire LV */
1392 len
= prev_lvseg
->le
+ prev_lvseg
->len
;
1394 /* Only check 1 LE at end of previous LV segment */
1395 le
= prev_lvseg
->le
+ prev_lvseg
->len
- 1;
1399 /* FIXME Cope with stacks by flattening */
1400 if (!(r
= _for_each_pv(ah
->cmd
, prev_lvseg
->lv
, le
, len
, NULL
, NULL
,
1402 _is_condition
, &pvmatch
)))
1412 * Is pva contiguous to any existing areas or on the same PV?
1414 static int _check_contiguous(struct cmd_context
*cmd
,
1415 struct lv_segment
*prev_lvseg
, struct pv_area
*pva
,
1416 struct alloc_state
*alloc_state
)
1418 struct pv_match pvmatch
;
1421 pvmatch
.condition
= _is_contiguous
;
1422 pvmatch
.areas
= alloc_state
->areas
;
1423 pvmatch
.areas_size
= alloc_state
->areas_size
;
1425 pvmatch
.cling_tag_list_cn
= NULL
;
1427 /* FIXME Cope with stacks by flattening */
1428 if (!(r
= _for_each_pv(cmd
, prev_lvseg
->lv
,
1429 prev_lvseg
->le
+ prev_lvseg
->len
- 1, 1, NULL
, NULL
,
1431 _is_condition
, &pvmatch
)))
1441 * Is pva on same PV as any areas already used in this allocation attempt?
1443 static int _check_cling_to_alloced(struct alloc_handle
*ah
, const struct dm_config_node
*cling_tag_list_cn
,
1444 struct pv_area
*pva
, struct alloc_state
*alloc_state
)
1447 struct alloced_area
*aa
;
1450 * Ignore log areas. They are always allocated whole as part of the
1451 * first allocation. If they aren't yet set, we know we've nothing to do.
1453 if (alloc_state
->log_area_count_still_needed
)
1456 for (s
= 0; s
< ah
->area_count
; s
++) {
1457 if (alloc_state
->areas
[s
].pva
)
1458 continue; /* Area already assigned */
1459 dm_list_iterate_items(aa
, &ah
->alloced_areas
[s
]) {
1460 if ((!cling_tag_list_cn
&& (pva
->map
->pv
== aa
[0].pv
)) ||
1461 (cling_tag_list_cn
&& _pvs_have_matching_tag(cling_tag_list_cn
, pva
->map
->pv
, aa
[0].pv
))) {
1462 _reserve_area(&alloc_state
->areas
[s
], pva
, pva
->count
, s
+ 1, 0);
1471 static int _pv_is_parallel(struct physical_volume
*pv
, struct dm_list
*parallel_pvs
)
1473 struct pv_list
*pvl
;
1475 dm_list_iterate_items(pvl
, parallel_pvs
)
1483 * Decide whether or not to try allocation from supplied area pva.
1484 * alloc_state->areas may get modified.
1486 static area_use_t
_check_pva(struct alloc_handle
*ah
, struct pv_area
*pva
, uint32_t still_needed
,
1487 const struct alloc_parms
*alloc_parms
, struct alloc_state
*alloc_state
,
1488 unsigned already_found_one
, unsigned iteration_count
, unsigned log_iteration_count
)
1492 /* Skip fully-reserved areas (which are not currently removed from the list). */
1493 if (!pva
->unreserved
)
1496 /* FIXME Should this test be removed? */
1497 if (iteration_count
)
1499 * Don't use an area twice.
1501 for (s
= 0; s
< alloc_state
->areas_size
; s
++)
1502 if (alloc_state
->areas
[s
].pva
== pva
)
1505 /* If maximise_cling is set, perform several checks, otherwise perform exactly one. */
1506 if (!iteration_count
&& !log_iteration_count
&& alloc_parms
->flags
& (A_CONTIGUOUS_TO_LVSEG
| A_CLING_TO_LVSEG
| A_CLING_TO_ALLOCED
)) {
1508 if (((alloc_parms
->flags
& A_CONTIGUOUS_TO_LVSEG
) || (ah
->maximise_cling
&& alloc_parms
->prev_lvseg
)) &&
1509 _check_contiguous(ah
->cmd
, alloc_parms
->prev_lvseg
, pva
, alloc_state
))
1512 /* Try next area on same PV if looking for contiguous space */
1513 if (alloc_parms
->flags
& A_CONTIGUOUS_TO_LVSEG
)
1516 /* Cling to prev_lvseg? */
1517 if (((alloc_parms
->flags
& A_CLING_TO_LVSEG
) || (ah
->maximise_cling
&& alloc_parms
->prev_lvseg
)) &&
1518 _check_cling(ah
, NULL
, alloc_parms
->prev_lvseg
, pva
, alloc_state
))
1519 /* If this PV is suitable, use this first area */
1522 /* Cling_to_alloced? */
1523 if ((alloc_parms
->flags
& A_CLING_TO_ALLOCED
) &&
1524 _check_cling_to_alloced(ah
, NULL
, pva
, alloc_state
))
1527 /* Cling_by_tags? */
1528 if (!(alloc_parms
->flags
& A_CLING_BY_TAGS
) || !ah
->cling_tag_list_cn
)
1531 if (alloc_parms
->prev_lvseg
) {
1532 if (_check_cling(ah
, ah
->cling_tag_list_cn
, alloc_parms
->prev_lvseg
, pva
, alloc_state
))
1534 } else if (_check_cling_to_alloced(ah
, ah
->cling_tag_list_cn
, pva
, alloc_state
))
1537 /* All areas on this PV give same result so pointless checking more */
1541 /* Normal/Anywhere */
1543 /* Is it big enough on its own? */
1544 if (pva
->unreserved
* ah
->area_multiple
< still_needed
&&
1545 ((!(alloc_parms
->flags
& A_CAN_SPLIT
) && !ah
->log_area_count
) ||
1546 (already_found_one
&& alloc_parms
->alloc
!= ALLOC_ANYWHERE
)))
1553 * Decide how many extents we're trying to obtain from a given area.
1554 * Removes the extents from further consideration.
1556 static uint32_t _calc_required_extents(struct alloc_handle
*ah
, struct pv_area
*pva
, unsigned ix_pva
, uint32_t max_to_allocate
, alloc_policy_t alloc
)
1558 uint32_t required
= max_to_allocate
/ ah
->area_multiple
;
1561 * Update amount unreserved - effectively splitting an area
1562 * into two or more parts. If the whole stripe doesn't fit,
1563 * reduce amount we're looking for.
1565 if (alloc
== ALLOC_ANYWHERE
) {
1566 if (ix_pva
- 1 >= ah
->area_count
)
1567 required
= ah
->log_len
;
1568 } else if (required
< ah
->log_len
)
1569 required
= ah
->log_len
;
1571 if (required
>= pva
->unreserved
) {
1572 required
= pva
->unreserved
;
1573 pva
->unreserved
= 0;
1575 pva
->unreserved
-= required
;
1576 reinsert_changed_pv_area(pva
);
1582 static int _reserve_required_area(struct alloc_handle
*ah
, uint32_t max_to_allocate
,
1583 unsigned ix_pva
, struct pv_area
*pva
,
1584 struct alloc_state
*alloc_state
, alloc_policy_t alloc
)
1586 uint32_t required
= _calc_required_extents(ah
, pva
, ix_pva
, max_to_allocate
, alloc
);
1589 /* Expand areas array if needed after an area was split. */
1590 if (ix_pva
> alloc_state
->areas_size
) {
1591 alloc_state
->areas_size
*= 2;
1592 if (!(alloc_state
->areas
= dm_realloc(alloc_state
->areas
, sizeof(*alloc_state
->areas
) * (alloc_state
->areas_size
)))) {
1593 log_error("Memory reallocation for parallel areas failed.");
1596 for (s
= alloc_state
->areas_size
/ 2; s
< alloc_state
->areas_size
; s
++)
1597 alloc_state
->areas
[s
].pva
= NULL
;
1600 _reserve_area(&alloc_state
->areas
[ix_pva
- 1], pva
, required
, ix_pva
, pva
->unreserved
);
1605 static void _clear_areas(struct alloc_state
*alloc_state
)
1609 for (s
= 0; s
< alloc_state
->areas_size
; s
++)
1610 alloc_state
->areas
[s
].pva
= NULL
;
1613 static void _reset_unreserved(struct dm_list
*pvms
)
1616 struct pv_area
*pva
;
1618 dm_list_iterate_items(pvm
, pvms
)
1619 dm_list_iterate_items(pva
, &pvm
->areas
)
1620 if (pva
->unreserved
!= pva
->count
) {
1621 pva
->unreserved
= pva
->count
;
1622 reinsert_changed_pv_area(pva
);
1626 static void _report_needed_allocation_space(struct alloc_handle
*ah
,
1627 struct alloc_state
*alloc_state
)
1629 const char *metadata_type
;
1630 uint32_t parallel_areas_count
, parallel_area_size
;
1631 uint32_t metadata_count
, metadata_size
;
1633 parallel_area_size
= (ah
->new_extents
- alloc_state
->allocated
) / ah
->area_multiple
-
1634 ((ah
->alloc_and_split_meta
) ? ah
->log_len
: 0);
1636 parallel_areas_count
= ah
->area_count
+ ah
->parity_count
;
1638 metadata_size
= ah
->log_len
;
1639 if (ah
->alloc_and_split_meta
) {
1640 metadata_type
= "RAID metadata area";
1641 metadata_count
= parallel_areas_count
;
1643 metadata_type
= "mirror log";
1644 metadata_count
= alloc_state
->log_area_count_still_needed
;
1647 log_debug("Still need %" PRIu32
" total extents:",
1648 parallel_area_size
* parallel_areas_count
+ metadata_size
* metadata_count
);
1649 log_debug(" %" PRIu32
" (%" PRIu32
" data/%" PRIu32
1650 " parity) parallel areas of %" PRIu32
" extents each",
1651 parallel_areas_count
, ah
->area_count
, ah
->parity_count
, parallel_area_size
);
1652 log_debug(" %" PRIu32
" %ss of %" PRIu32
" extents each",
1653 metadata_count
, metadata_type
, metadata_size
);
1656 * Returns 1 regardless of whether any space was found, except on error.
1658 static int _find_some_parallel_space(struct alloc_handle
*ah
, const struct alloc_parms
*alloc_parms
,
1659 struct dm_list
*pvms
, struct alloc_state
*alloc_state
,
1660 struct dm_list
*parallel_pvs
, uint32_t max_to_allocate
)
1665 struct pv_area
*pva
;
1666 unsigned preferred_count
= 0;
1667 unsigned already_found_one
;
1668 unsigned ix_offset
= 0; /* Offset for non-preferred allocations */
1669 unsigned ix_log_offset
; /* Offset to start of areas to use for log */
1670 unsigned too_small_for_log_count
; /* How many too small for log? */
1671 unsigned iteration_count
= 0; /* cling_to_alloced may need 2 iterations */
1672 unsigned log_iteration_count
= 0; /* extra iteration for logs on data devices */
1673 struct alloced_area
*aa
;
1675 uint32_t devices_needed
= ah
->area_count
+ ah
->parity_count
;
1677 /* ix_offset holds the number of parallel allocations that must be contiguous/cling */
1678 /* At most one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG or A_CLING_TO_ALLOCED may be set */
1679 if (alloc_parms
->flags
& (A_CONTIGUOUS_TO_LVSEG
| A_CLING_TO_LVSEG
))
1680 ix_offset
= _stripes_per_mimage(alloc_parms
->prev_lvseg
) * alloc_parms
->prev_lvseg
->area_count
;
1682 if (alloc_parms
->flags
& A_CLING_TO_ALLOCED
)
1683 ix_offset
= ah
->area_count
;
1685 if (alloc_parms
->alloc
== ALLOC_NORMAL
|| (alloc_parms
->flags
& A_CLING_TO_ALLOCED
))
1686 log_debug("Cling_to_allocated is %sset",
1687 alloc_parms
->flags
& A_CLING_TO_ALLOCED
? "" : "not ");
1689 _clear_areas(alloc_state
);
1690 _reset_unreserved(pvms
);
1692 _report_needed_allocation_space(ah
, alloc_state
);
1694 /* ix holds the number of areas found on other PVs */
1696 if (log_iteration_count
) {
1697 log_debug("Found %u areas for %" PRIu32
" parallel areas and %" PRIu32
" log areas so far.", ix
, devices_needed
, alloc_state
->log_area_count_still_needed
);
1698 } else if (iteration_count
)
1699 log_debug("Filled %u out of %u preferred areas so far.", preferred_count
, ix_offset
);
1702 * Provide for escape from the loop if no progress is made.
1703 * This should not happen: ALLOC_ANYWHERE should be able to use
1704 * all available space. (If there aren't enough extents, the code
1705 * should not reach this point.)
1710 * Put the smallest area of each PV that is at least the
1711 * size we need into areas array. If there isn't one
1712 * that fits completely and we're allowed more than one
1713 * LV segment, then take the largest remaining instead.
1715 dm_list_iterate_items(pvm
, pvms
) {
1716 /* PV-level checks */
1717 if (dm_list_empty(&pvm
->areas
))
1718 continue; /* Next PV */
1720 if (alloc_parms
->alloc
!= ALLOC_ANYWHERE
) {
1721 /* Don't allocate onto the log PVs */
1722 if (ah
->log_area_count
)
1723 dm_list_iterate_items(aa
, &ah
->alloced_areas
[ah
->area_count
])
1724 for (s
= 0; s
< ah
->log_area_count
; s
++)
1728 /* FIXME Split into log and non-log parallel_pvs and only check the log ones if log_iteration? */
1729 /* (I've temporatily disabled the check.) */
1730 /* Avoid PVs used by existing parallel areas */
1731 if (!log_iteration_count
&& parallel_pvs
&& _pv_is_parallel(pvm
->pv
, parallel_pvs
))
1735 * Avoid PVs already set aside for log.
1736 * We only reach here if there were enough PVs for the main areas but
1737 * not enough for the logs.
1739 if (log_iteration_count
) {
1740 for (s
= devices_needed
; s
< ix
+ ix_offset
; s
++)
1741 if (alloc_state
->areas
[s
].pva
&& alloc_state
->areas
[s
].pva
->map
->pv
== pvm
->pv
)
1743 /* On a second pass, avoid PVs already used in an uncommitted area */
1744 } else if (iteration_count
)
1745 for (s
= 0; s
< devices_needed
; s
++)
1746 if (alloc_state
->areas
[s
].pva
&& alloc_state
->areas
[s
].pva
->map
->pv
== pvm
->pv
)
1750 already_found_one
= 0;
1751 /* First area in each list is the largest */
1752 dm_list_iterate_items(pva
, &pvm
->areas
) {
1754 * There are two types of allocations, which can't be mixed at present.
1755 * PREFERRED are stored immediately in a specific parallel slot.
1756 * USE_AREA are stored for later, then sorted and chosen from.
1758 switch(_check_pva(ah
, pva
, max_to_allocate
, alloc_parms
,
1759 alloc_state
, already_found_one
, iteration_count
, log_iteration_count
)) {
1773 * Except with ALLOC_ANYWHERE, replace first area with this
1774 * one which is smaller but still big enough.
1776 if (!already_found_one
||
1777 alloc_parms
->alloc
== ALLOC_ANYWHERE
) {
1779 already_found_one
= 1;
1782 /* Reserve required amount of pva */
1783 if (!_reserve_required_area(ah
, max_to_allocate
, ix
+ ix_offset
,
1784 pva
, alloc_state
, alloc_parms
->alloc
))
1791 /* With ALLOC_ANYWHERE we ignore further PVs once we have at least enough areas */
1792 /* With cling and contiguous we stop if we found a match for *all* the areas */
1793 /* FIXME Rename these variables! */
1794 if ((alloc_parms
->alloc
== ALLOC_ANYWHERE
&&
1795 ix
+ ix_offset
>= devices_needed
+ alloc_state
->log_area_count_still_needed
) ||
1796 (preferred_count
== ix_offset
&&
1797 (ix_offset
== devices_needed
+ alloc_state
->log_area_count_still_needed
)))
1800 } while ((alloc_parms
->alloc
== ALLOC_ANYWHERE
&& last_ix
!= ix
&& ix
< devices_needed
+ alloc_state
->log_area_count_still_needed
) ||
1801 /* With cling_to_alloced and normal, if there were gaps in the preferred areas, have a second iteration */
1802 (alloc_parms
->alloc
== ALLOC_NORMAL
&& preferred_count
&&
1803 (preferred_count
< ix_offset
|| alloc_state
->log_area_count_still_needed
) &&
1804 (alloc_parms
->flags
& A_CLING_TO_ALLOCED
) && !iteration_count
++) ||
1805 /* Extra iteration needed to fill log areas on PVs already used? */
1806 (alloc_parms
->alloc
== ALLOC_NORMAL
&& preferred_count
== ix_offset
&& !ah
->mirror_logs_separate
&&
1807 (ix
+ preferred_count
>= devices_needed
) &&
1808 (ix
+ preferred_count
< devices_needed
+ alloc_state
->log_area_count_still_needed
) && !log_iteration_count
++));
1810 if (preferred_count
< ix_offset
&& !(alloc_parms
->flags
& A_CLING_TO_ALLOCED
))
1813 if (ix
+ preferred_count
< devices_needed
+ alloc_state
->log_area_count_still_needed
)
1816 /* Sort the areas so we allocate from the biggest */
1817 if (log_iteration_count
) {
1818 if (ix
> devices_needed
+ 1) {
1819 log_debug("Sorting %u log areas", ix
- devices_needed
);
1820 qsort(alloc_state
->areas
+ devices_needed
, ix
- devices_needed
, sizeof(*alloc_state
->areas
),
1823 } else if (ix
> 1) {
1824 log_debug("Sorting %u areas", ix
);
1825 qsort(alloc_state
->areas
+ ix_offset
, ix
, sizeof(*alloc_state
->areas
),
1829 /* If there are gaps in our preferred areas, fill then from the sorted part of the array */
1830 if (preferred_count
&& preferred_count
!= ix_offset
) {
1831 for (s
= 0; s
< devices_needed
; s
++)
1832 if (!alloc_state
->areas
[s
].pva
) {
1833 alloc_state
->areas
[s
].pva
= alloc_state
->areas
[ix_offset
].pva
;
1834 alloc_state
->areas
[s
].used
= alloc_state
->areas
[ix_offset
].used
;
1835 alloc_state
->areas
[ix_offset
++].pva
= NULL
;
1840 * First time around, if there's a log, allocate it on the
1841 * smallest device that has space for it.
1843 too_small_for_log_count
= 0;
1846 /* FIXME This logic is due to its heritage and can be simplified! */
1847 if (alloc_state
->log_area_count_still_needed
) {
1848 /* How many areas are too small for the log? */
1849 while (too_small_for_log_count
< ix_offset
+ ix
&&
1850 (*(alloc_state
->areas
+ ix_offset
+ ix
- 1 -
1851 too_small_for_log_count
)).used
< ah
->log_len
)
1852 too_small_for_log_count
++;
1853 ix_log_offset
= ix_offset
+ ix
- too_small_for_log_count
- ah
->log_area_count
;
1856 if (ix
+ ix_offset
< devices_needed
+
1857 (alloc_state
->log_area_count_still_needed
? alloc_state
->log_area_count_still_needed
+
1858 too_small_for_log_count
: 0))
1862 * Finally add the space identified to the list of areas to be used.
1864 if (!_alloc_parallel_area(ah
, max_to_allocate
, alloc_state
, ix_log_offset
))
1868 * Log is always allocated first time.
1870 alloc_state
->log_area_count_still_needed
= 0;
1876 * Choose sets of parallel areas to use, respecting any constraints
1877 * supplied in alloc_parms.
1879 static int _find_max_parallel_space_for_one_policy(struct alloc_handle
*ah
, struct alloc_parms
*alloc_parms
,
1880 struct dm_list
*pvms
, struct alloc_state
*alloc_state
)
1883 uint32_t max_to_allocate
; /* Maximum extents to allocate this time */
1884 uint32_t old_allocated
;
1886 struct seg_pvs
*spvs
;
1887 struct dm_list
*parallel_pvs
;
1889 /* FIXME This algorithm needs a lot of cleaning up! */
1890 /* FIXME anywhere doesn't find all space yet */
1892 parallel_pvs
= NULL
;
1893 max_to_allocate
= alloc_parms
->extents_still_needed
- alloc_state
->allocated
;
1896 * If there are existing parallel PVs, avoid them and reduce
1897 * the maximum we can allocate in one go accordingly.
1899 if (ah
->parallel_areas
) {
1900 next_le
= (alloc_parms
->prev_lvseg
? alloc_parms
->prev_lvseg
->le
+ alloc_parms
->prev_lvseg
->len
: 0) + alloc_state
->allocated
/ ah
->area_multiple
;
1901 dm_list_iterate_items(spvs
, ah
->parallel_areas
) {
1902 if (next_le
>= spvs
->le
+ spvs
->len
)
1905 max_tmp
= max_to_allocate
+
1906 alloc_state
->allocated
;
1909 * Because a request that groups metadata and
1910 * data together will be split, we must adjust
1911 * the comparison accordingly.
1913 if (ah
->alloc_and_split_meta
)
1914 max_tmp
-= ah
->log_len
;
1915 if (max_tmp
> (spvs
->le
+ spvs
->len
) * ah
->area_multiple
) {
1916 max_to_allocate
= (spvs
->le
+ spvs
->len
) * ah
->area_multiple
- alloc_state
->allocated
;
1917 max_to_allocate
+= ah
->alloc_and_split_meta
? ah
->log_len
: 0;
1919 parallel_pvs
= &spvs
->pvs
;
1924 old_allocated
= alloc_state
->allocated
;
1926 if (!_find_some_parallel_space(ah
, alloc_parms
, pvms
, alloc_state
, parallel_pvs
, max_to_allocate
))
1930 * If we didn't allocate anything this time with ALLOC_NORMAL and had
1931 * A_CLING_TO_ALLOCED set, try again without it.
1933 * For ALLOC_NORMAL, if we did allocate something without the
1934 * flag set, set it and continue so that further allocations
1935 * remain on the same disks where possible.
1937 if (old_allocated
== alloc_state
->allocated
) {
1938 if ((alloc_parms
->alloc
== ALLOC_NORMAL
) && (alloc_parms
->flags
& A_CLING_TO_ALLOCED
))
1939 alloc_parms
->flags
&= ~A_CLING_TO_ALLOCED
;
1941 break; /* Give up */
1942 } else if (ah
->maximise_cling
&& alloc_parms
->alloc
== ALLOC_NORMAL
&&
1943 !(alloc_parms
->flags
& A_CLING_TO_ALLOCED
))
1944 alloc_parms
->flags
|= A_CLING_TO_ALLOCED
;
1945 } while ((alloc_parms
->alloc
!= ALLOC_CONTIGUOUS
) && alloc_state
->allocated
!= alloc_parms
->extents_still_needed
&& (alloc_parms
->flags
& A_CAN_SPLIT
));
1951 * Allocate several segments, each the same size, in parallel.
1952 * If mirrored_pv and mirrored_pe are supplied, it is used as
1953 * the first area, and additional areas are allocated parallel to it.
1955 static int _allocate(struct alloc_handle
*ah
,
1956 struct volume_group
*vg
,
1957 struct logical_volume
*lv
,
1959 struct dm_list
*allocatable_pvs
)
1961 uint32_t old_allocated
;
1962 struct lv_segment
*prev_lvseg
= NULL
;
1964 struct dm_list
*pvms
;
1965 alloc_policy_t alloc
;
1966 struct alloc_parms alloc_parms
;
1967 struct alloc_state alloc_state
;
1969 alloc_state
.allocated
= lv
? lv
->le_count
: 0;
1971 if (alloc_state
.allocated
>= ah
->new_extents
&& !ah
->log_area_count
) {
1972 log_error("_allocate called with no work to do!");
1976 if (ah
->area_multiple
> 1 &&
1977 (ah
->new_extents
- alloc_state
.allocated
) % ah
->area_multiple
) {
1978 log_error("Number of extents requested (%d) needs to be divisible by %d.",
1979 ah
->new_extents
- alloc_state
.allocated
,
1984 alloc_state
.log_area_count_still_needed
= ah
->log_area_count
;
1986 if (ah
->alloc
== ALLOC_CONTIGUOUS
)
1989 if (lv
&& !dm_list_empty(&lv
->segments
))
1990 prev_lvseg
= dm_list_item(dm_list_last(&lv
->segments
),
1993 * Build the sets of available areas on the pv's.
1995 if (!(pvms
= create_pv_maps(ah
->mem
, vg
, allocatable_pvs
)))
1998 if (!_log_parallel_areas(ah
->mem
, ah
->parallel_areas
))
2001 alloc_state
.areas_size
= dm_list_size(pvms
);
2002 if (alloc_state
.areas_size
&&
2003 alloc_state
.areas_size
< (ah
->area_count
+ ah
->parity_count
+ ah
->log_area_count
)) {
2004 if (ah
->alloc
!= ALLOC_ANYWHERE
&& ah
->mirror_logs_separate
) {
2005 log_error("Not enough PVs with free space available "
2006 "for parallel allocation.");
2007 log_error("Consider --alloc anywhere if desperate.");
2010 alloc_state
.areas_size
= ah
->area_count
+ ah
->parity_count
+ ah
->log_area_count
;
2013 /* Upper bound if none of the PVs in prev_lvseg is in pvms */
2014 /* FIXME Work size out properly */
2016 alloc_state
.areas_size
+= _stripes_per_mimage(prev_lvseg
) * prev_lvseg
->area_count
;
2018 /* Allocate an array of pv_areas to hold the largest space on each PV */
2019 if (!(alloc_state
.areas
= dm_malloc(sizeof(*alloc_state
.areas
) * alloc_state
.areas_size
))) {
2020 log_error("Couldn't allocate areas array.");
2025 * cling includes implicit cling_by_tags
2026 * but it does nothing unless the lvm.conf setting is present.
2028 if (ah
->alloc
== ALLOC_CLING
)
2029 ah
->alloc
= ALLOC_CLING_BY_TAGS
;
2031 /* Attempt each defined allocation policy in turn */
2032 for (alloc
= ALLOC_CONTIGUOUS
; alloc
<= ah
->alloc
; alloc
++) {
2033 /* Skip cling_by_tags if no list defined */
2034 if (alloc
== ALLOC_CLING_BY_TAGS
&& !ah
->cling_tag_list_cn
)
2036 old_allocated
= alloc_state
.allocated
;
2037 log_debug("Trying allocation using %s policy.", get_alloc_string(alloc
));
2039 if (!_sufficient_pes_free(ah
, pvms
, alloc_state
.allocated
, ah
->new_extents
))
2042 _init_alloc_parms(ah
, &alloc_parms
, alloc
, prev_lvseg
,
2043 can_split
, alloc_state
.allocated
,
2046 if (!_find_max_parallel_space_for_one_policy(ah
, &alloc_parms
, pvms
, &alloc_state
))
2049 if ((alloc_state
.allocated
== ah
->new_extents
&& !alloc_state
.log_area_count_still_needed
) ||
2050 (!can_split
&& (alloc_state
.allocated
!= old_allocated
)))
2054 if (alloc_state
.allocated
!= ah
->new_extents
) {
2055 log_error("Insufficient suitable %sallocatable extents "
2056 "for logical volume %s: %u more required",
2057 can_split
? "" : "contiguous ",
2059 (ah
->new_extents
- alloc_state
.allocated
) * ah
->area_count
2060 / ah
->area_multiple
);
2064 if (alloc_state
.log_area_count_still_needed
) {
2065 log_error("Insufficient free space for log allocation "
2066 "for logical volume %s.",
2067 lv
? lv
->name
: "");
2074 dm_free(alloc_state
.areas
);
2078 int lv_add_virtual_segment(struct logical_volume
*lv
, uint64_t status
,
2079 uint32_t extents
, const struct segment_type
*segtype
,
2080 const char *thin_pool_name
)
2082 struct lv_segment
*seg
;
2083 struct logical_volume
*thin_pool_lv
= NULL
;
2084 struct lv_list
*lvl
;
2087 if (thin_pool_name
) {
2088 if (!(lvl
= find_lv_in_vg(lv
->vg
, thin_pool_name
))) {
2089 log_error("Unable to find existing pool LV %s in VG %s.",
2090 thin_pool_name
, lv
->vg
->name
);
2093 thin_pool_lv
= lvl
->lv
;
2094 size
= first_seg(thin_pool_lv
)->chunk_size
;
2095 if (lv
->vg
->extent_size
< size
) {
2096 /* Align extents on chunk boundary size */
2097 size
= ((uint64_t)lv
->vg
->extent_size
* extents
+ size
- 1) /
2098 size
* size
/ lv
->vg
->extent_size
;
2099 if (size
!= extents
) {
2100 log_print("Rounding size (%d extents) up to chunk boundary "
2101 "size (%d extents).", extents
, size
);
2107 if (!dm_list_empty(&lv
->segments
) &&
2108 (seg
= last_seg(lv
)) && (seg
->segtype
== segtype
)) {
2109 seg
->area_len
+= extents
;
2110 seg
->len
+= extents
;
2112 if (!(seg
= alloc_lv_segment(segtype
, lv
, lv
->le_count
, extents
,
2113 status
, 0, NULL
, thin_pool_lv
, 0,
2114 extents
, 0, 0, 0, NULL
))) {
2115 log_error("Couldn't allocate new zero segment.");
2118 lv
->status
|= VIRTUAL
;
2119 dm_list_add(&lv
->segments
, &seg
->list
);
2122 lv
->le_count
+= extents
;
2123 lv
->size
+= (uint64_t) extents
*lv
->vg
->extent_size
;
2129 * Entry point for all extent allocations.
2131 struct alloc_handle
*allocate_extents(struct volume_group
*vg
,
2132 struct logical_volume
*lv
,
2133 const struct segment_type
*segtype
,
2135 uint32_t mirrors
, uint32_t log_count
,
2136 uint32_t region_size
, uint32_t extents
,
2137 struct dm_list
*allocatable_pvs
,
2138 alloc_policy_t alloc
,
2139 struct dm_list
*parallel_areas
)
2141 struct alloc_handle
*ah
;
2142 uint32_t new_extents
;
2144 if (segtype_is_virtual(segtype
)) {
2145 log_error("allocate_extents does not handle virtual segments");
2149 if (!allocatable_pvs
) {
2150 log_error(INTERNAL_ERROR
"Missing allocatable pvs.");
2154 if (vg
->fid
->fmt
->ops
->segtype_supported
&&
2155 !vg
->fid
->fmt
->ops
->segtype_supported(vg
->fid
, segtype
)) {
2156 log_error("Metadata format (%s) does not support required "
2157 "LV segment type (%s).", vg
->fid
->fmt
->name
,
2159 log_error("Consider changing the metadata format by running "
2164 if (alloc
>= ALLOC_INHERIT
)
2167 new_extents
= (lv
? lv
->le_count
: 0) + extents
;
2168 if (!(ah
= _alloc_init(vg
->cmd
, vg
->cmd
->mem
, segtype
, alloc
,
2169 new_extents
, mirrors
, stripes
, log_count
,
2170 vg
->extent_size
, region_size
,
2174 if (!_allocate(ah
, vg
, lv
, 1, allocatable_pvs
)) {
2183 * Add new segments to an LV from supplied list of areas.
2185 int lv_add_segment(struct alloc_handle
*ah
,
2186 uint32_t first_area
, uint32_t num_areas
,
2187 struct logical_volume
*lv
,
2188 const struct segment_type
*segtype
,
2189 uint32_t stripe_size
,
2191 uint32_t region_size
)
2194 log_error("Missing segtype in lv_add_segment().");
2198 if (segtype_is_virtual(segtype
)) {
2199 log_error("lv_add_segment cannot handle virtual segments");
2203 if ((status
& MIRROR_LOG
) && dm_list_size(&lv
->segments
)) {
2204 log_error("Log segments can only be added to an empty LV");
2208 if (!_setup_alloced_segments(lv
, &ah
->alloced_areas
[first_area
],
2210 stripe_size
, segtype
,
2214 if ((segtype
->flags
& SEG_CAN_SPLIT
) && !lv_merge_segments(lv
)) {
2215 log_error("Couldn't merge segments after extending "
2220 if (lv
->vg
->fid
->fmt
->ops
->lv_setup
&&
2221 !lv
->vg
->fid
->fmt
->ops
->lv_setup(lv
->vg
->fid
, lv
))
2228 * "mirror" segment type doesn't support split.
2229 * So, when adding mirrors to linear LV segment, first split it,
2230 * then convert it to "mirror" and add areas.
2232 static struct lv_segment
*_convert_seg_to_mirror(struct lv_segment
*seg
,
2233 uint32_t region_size
,
2234 struct logical_volume
*log_lv
)
2236 struct lv_segment
*newseg
;
2239 if (!seg_is_striped(seg
)) {
2240 log_error("Can't convert non-striped segment to mirrored.");
2244 if (seg
->area_count
> 1) {
2245 log_error("Can't convert striped segment with multiple areas "
2250 if (!(newseg
= alloc_lv_segment(get_segtype_from_string(seg
->lv
->vg
->cmd
, "mirror"),
2251 seg
->lv
, seg
->le
, seg
->len
,
2252 seg
->status
, seg
->stripe_size
,
2254 seg
->area_count
, seg
->area_len
,
2255 seg
->chunk_size
, region_size
,
2256 seg
->extents_copied
, NULL
))) {
2257 log_error("Couldn't allocate converted LV segment");
2261 for (s
= 0; s
< seg
->area_count
; s
++)
2262 if (!move_lv_segment_area(newseg
, s
, seg
, s
))
2265 seg
->pvmove_source_seg
= NULL
; /* Not maintained after allocation */
2267 dm_list_add(&seg
->list
, &newseg
->list
);
2268 dm_list_del(&seg
->list
);
2274 * Add new areas to mirrored segments
2276 int lv_add_mirror_areas(struct alloc_handle
*ah
,
2277 struct logical_volume
*lv
, uint32_t le
,
2278 uint32_t region_size
)
2280 struct alloced_area
*aa
;
2281 struct lv_segment
*seg
;
2282 uint32_t current_le
= le
;
2283 uint32_t s
, old_area_count
, new_area_count
;
2285 dm_list_iterate_items(aa
, &ah
->alloced_areas
[0]) {
2286 if (!(seg
= find_seg_by_le(lv
, current_le
))) {
2287 log_error("Failed to find segment for %s extent %"
2288 PRIu32
, lv
->name
, current_le
);
2292 /* Allocator assures aa[0].len <= seg->area_len */
2293 if (aa
[0].len
< seg
->area_len
) {
2294 if (!lv_split_segment(lv
, seg
->le
+ aa
[0].len
)) {
2295 log_error("Failed to split segment at %s "
2296 "extent %" PRIu32
, lv
->name
, le
);
2301 if (!seg_is_mirrored(seg
) &&
2302 (!(seg
= _convert_seg_to_mirror(seg
, region_size
, NULL
))))
2305 old_area_count
= seg
->area_count
;
2306 new_area_count
= old_area_count
+ ah
->area_count
;
2308 if (!_lv_segment_add_areas(lv
, seg
, new_area_count
))
2311 for (s
= 0; s
< ah
->area_count
; s
++) {
2312 if (!set_lv_segment_area_pv(seg
, s
+ old_area_count
,
2313 aa
[s
].pv
, aa
[s
].pe
))
2317 current_le
+= seg
->area_len
;
2320 lv
->status
|= MIRRORED
;
2322 if (lv
->vg
->fid
->fmt
->ops
->lv_setup
&&
2323 !lv
->vg
->fid
->fmt
->ops
->lv_setup(lv
->vg
->fid
, lv
))
2330 * Add mirror image LVs to mirrored segments
2332 int lv_add_mirror_lvs(struct logical_volume
*lv
,
2333 struct logical_volume
**sub_lvs
,
2334 uint32_t num_extra_areas
,
2335 uint64_t status
, uint32_t region_size
)
2337 struct lv_segment
*seg
;
2338 uint32_t old_area_count
, new_area_count
;
2340 struct segment_type
*mirror_segtype
;
2342 seg
= first_seg(lv
);
2344 if (dm_list_size(&lv
->segments
) != 1 || seg_type(seg
, 0) != AREA_LV
) {
2345 log_error("Mirror layer must be inserted before adding mirrors");
2349 mirror_segtype
= get_segtype_from_string(lv
->vg
->cmd
, "mirror");
2350 if (seg
->segtype
!= mirror_segtype
)
2351 if (!(seg
= _convert_seg_to_mirror(seg
, region_size
, NULL
)))
2354 if (region_size
&& region_size
!= seg
->region_size
) {
2355 log_error("Conflicting region_size");
2359 old_area_count
= seg
->area_count
;
2360 new_area_count
= old_area_count
+ num_extra_areas
;
2362 if (!_lv_segment_add_areas(lv
, seg
, new_area_count
)) {
2363 log_error("Failed to allocate widened LV segment for %s.",
2368 for (m
= 0; m
< old_area_count
; m
++)
2369 seg_lv(seg
, m
)->status
|= status
;
2371 for (m
= old_area_count
; m
< new_area_count
; m
++) {
2372 if (!set_lv_segment_area_lv(seg
, m
, sub_lvs
[m
- old_area_count
],
2375 lv_set_hidden(sub_lvs
[m
- old_area_count
]);
2378 lv
->status
|= MIRRORED
;
2384 * Turn an empty LV into a mirror log.
2386 * FIXME: Mirrored logs are built inefficiently.
2387 * A mirrored log currently uses the same layout that a mirror
2388 * LV uses. The mirror layer sits on top of AREA_LVs which form the
2389 * legs, rather on AREA_PVs. This is done to allow re-use of the
2390 * various mirror functions to also handle the mirrored LV that makes
2393 * If we used AREA_PVs under the mirror layer of a log, we could
2394 * assemble it all at once by calling 'lv_add_segment' with the
2395 * appropriate segtype (mirror/stripe), like this:
2396 * lv_add_segment(ah, ah->area_count, ah->log_area_count,
2397 * log_lv, segtype, 0, MIRROR_LOG, 0);
2399 * For now, we use the same mechanism to build a mirrored log as we
2400 * do for building a mirrored LV: 1) create initial LV, 2) add a
2401 * mirror layer, and 3) add the remaining copy LVs
2403 int lv_add_log_segment(struct alloc_handle
*ah
, uint32_t first_area
,
2404 struct logical_volume
*log_lv
, uint64_t status
)
2407 return lv_add_segment(ah
, ah
->area_count
+ first_area
, 1, log_lv
,
2408 get_segtype_from_string(log_lv
->vg
->cmd
,
2413 static int _lv_insert_empty_sublvs(struct logical_volume
*lv
,
2414 const struct segment_type
*segtype
,
2415 uint32_t stripe_size
, uint32_t region_size
,
2418 struct logical_volume
*sub_lv
;
2420 uint64_t sub_lv_status
= 0;
2421 const char *layer_name
;
2422 size_t len
= strlen(lv
->name
) + 32;
2424 struct lv_segment
*mapseg
;
2426 if (lv
->le_count
|| !dm_list_empty(&lv
->segments
)) {
2427 log_error(INTERNAL_ERROR
2428 "Non-empty LV passed to _lv_insert_empty_sublv");
2432 if (segtype_is_raid(segtype
)) {
2434 sub_lv_status
= RAID_IMAGE
;
2435 layer_name
= "rimage";
2436 } else if (segtype_is_mirrored(segtype
)) {
2437 lv
->status
|= MIRRORED
;
2438 sub_lv_status
= MIRROR_IMAGE
;
2439 layer_name
= "mimage";
2444 * First, create our top-level segment for our top-level LV
2446 if (!(mapseg
= alloc_lv_segment(segtype
, lv
, 0, 0, lv
->status
,
2447 stripe_size
, NULL
, NULL
,
2448 devices
, 0, 0, region_size
, 0, NULL
))) {
2449 log_error("Failed to create mapping segment for %s", lv
->name
);
2454 * Next, create all of our sub_lv's and link them in.
2456 for (i
= 0; i
< devices
; i
++) {
2459 if (dm_snprintf(img_name
, len
, "%s_%s_%u",
2460 lv
->name
, layer_name
, i
) < 0)
2463 if (dm_snprintf(img_name
, len
, "%s_%s",
2464 lv
->name
, layer_name
) < 0)
2468 /* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
2469 if (!(sub_lv
= lv_create_empty(img_name
, NULL
,
2470 LVM_READ
| LVM_WRITE
,
2471 lv
->alloc
, lv
->vg
)))
2474 if (!set_lv_segment_area_lv(mapseg
, i
, sub_lv
, 0, sub_lv_status
))
2477 /* Metadata LVs for raid */
2478 if (segtype_is_raid(segtype
)) {
2479 if (dm_snprintf(img_name
, len
, "%s_rmeta_%u", lv
->name
, i
) < 0)
2484 /* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
2485 if (!(sub_lv
= lv_create_empty(img_name
, NULL
,
2486 LVM_READ
| LVM_WRITE
,
2487 lv
->alloc
, lv
->vg
)))
2490 if (!set_lv_segment_area_lv(mapseg
, i
, sub_lv
, 0, RAID_META
))
2494 dm_list_add(&lv
->segments
, &mapseg
->list
);
2499 static int _lv_extend_layered_lv(struct alloc_handle
*ah
,
2500 struct logical_volume
*lv
,
2501 uint32_t extents
, uint32_t first_area
,
2502 uint32_t stripes
, uint32_t stripe_size
)
2504 const struct segment_type
*segtype
;
2505 struct logical_volume
*sub_lv
, *meta_lv
;
2506 struct lv_segment
*seg
;
2508 int clear_metadata
= 0;
2510 segtype
= get_segtype_from_string(lv
->vg
->cmd
, "striped");
2513 * The component devices of a "striped" LV all go in the same
2514 * LV. However, RAID has an LV for each device - making the
2515 * 'stripes' and 'stripe_size' parameters meaningless.
2517 if (seg_is_raid(first_seg(lv
))) {
2522 seg
= first_seg(lv
);
2523 for (fa
= first_area
, s
= 0; s
< seg
->area_count
; s
++) {
2524 if (is_temporary_mirror_layer(seg_lv(seg
, s
))) {
2525 if (!_lv_extend_layered_lv(ah
, seg_lv(seg
, s
), extents
,
2526 fa
, stripes
, stripe_size
))
2528 fa
+= lv_mirror_count(seg_lv(seg
, s
));
2532 sub_lv
= seg_lv(seg
, s
);
2533 if (!lv_add_segment(ah
, fa
, stripes
, sub_lv
, segtype
,
2534 stripe_size
, sub_lv
->status
, 0)) {
2535 log_error("Aborting. Failed to extend %s in %s.",
2536 sub_lv
->name
, lv
->name
);
2540 /* Extend metadata LVs only on initial creation */
2541 if (seg_is_raid(seg
) && !lv
->le_count
) {
2542 if (!seg
->meta_areas
) {
2543 log_error("No meta_areas for RAID type");
2547 meta_lv
= seg_metalv(seg
, s
);
2548 if (!lv_add_segment(ah
, fa
+ seg
->area_count
, 1,
2549 meta_lv
, segtype
, 0,
2550 meta_lv
->status
, 0)) {
2551 log_error("Failed to extend %s in %s.",
2552 meta_lv
->name
, lv
->name
);
2555 lv_set_visible(meta_lv
);
2562 if (clear_metadata
) {
2564 * We must clear the metadata areas upon creation.
2566 if (!vg_write(lv
->vg
) || !vg_commit(lv
->vg
))
2569 for (s
= 0; s
< seg
->area_count
; s
++) {
2570 meta_lv
= seg_metalv(seg
, s
);
2571 if (!activate_lv(meta_lv
->vg
->cmd
, meta_lv
)) {
2572 log_error("Failed to activate %s/%s for clearing",
2573 meta_lv
->vg
->name
, meta_lv
->name
);
2577 log_verbose("Clearing metadata area of %s/%s",
2578 meta_lv
->vg
->name
, meta_lv
->name
);
2580 * Rather than wiping meta_lv->size, we can simply
2581 * wipe '1' to remove the superblock of any previous
2582 * RAID devices. It is much quicker.
2584 if (!set_lv(meta_lv
->vg
->cmd
, meta_lv
, 1, 0)) {
2585 log_error("Failed to zero %s/%s",
2586 meta_lv
->vg
->name
, meta_lv
->name
);
2590 if (!deactivate_lv(meta_lv
->vg
->cmd
, meta_lv
)) {
2591 log_error("Failed to deactivate %s/%s",
2592 meta_lv
->vg
->name
, meta_lv
->name
);
2595 lv_set_hidden(meta_lv
);
2599 seg
->area_len
+= extents
;
2600 seg
->len
+= extents
;
2601 lv
->le_count
+= extents
;
2602 lv
->size
+= (uint64_t) extents
*lv
->vg
->extent_size
;
2608 * Entry point for single-step LV allocation + extension.
2610 int lv_extend(struct logical_volume
*lv
,
2611 const struct segment_type
*segtype
,
2612 uint32_t stripes
, uint32_t stripe_size
,
2613 uint32_t mirrors
, uint32_t region_size
,
2614 uint32_t extents
, const char *thin_pool_name
,
2615 struct dm_list
*allocatable_pvs
, alloc_policy_t alloc
)
2619 struct alloc_handle
*ah
;
2620 uint32_t sub_lv_count
;
2622 log_very_verbose("Extending segment type, %s", segtype
->name
);
2624 if (segtype_is_virtual(segtype
))
2625 return lv_add_virtual_segment(lv
, 0u, extents
, segtype
, thin_pool_name
);
2627 if (!lv
->le_count
&& segtype_is_thin_pool(segtype
)) {
2628 /* Thin pool allocation treats its metadata device like a mirror log. */
2629 /* FIXME Allow pool and data on same device with NORMAL */
2630 /* FIXME Support striped metadata pool */
2632 } else if (segtype_is_raid(segtype
) && !lv
->le_count
)
2633 log_count
= mirrors
* stripes
;
2634 /* FIXME log_count should be 1 for mirrors */
2636 if (!(ah
= allocate_extents(lv
->vg
, lv
, segtype
, stripes
, mirrors
,
2637 log_count
, region_size
, extents
,
2638 allocatable_pvs
, alloc
, NULL
)))
2641 if (segtype_is_thin_pool(segtype
)) {
2642 if (!lv
->le_count
) {
2643 if (!(r
= extend_pool(lv
, segtype
, ah
, stripes
, stripe_size
)))
2645 } else if (!(r
= _lv_extend_layered_lv(ah
, lv
, extents
, 0,
2646 stripes
, stripe_size
)))
2648 } else if (!segtype_is_mirrored(segtype
) && !segtype_is_raid(segtype
)) {
2649 if (!(r
= lv_add_segment(ah
, 0, ah
->area_count
, lv
, segtype
,
2650 stripe_size
, 0u, 0)))
2654 * For RAID, all the devices are AREA_LV.
2655 * However, for 'mirror on stripe' using non-RAID targets,
2656 * the mirror legs are AREA_LV while the stripes underneath
2659 if (segtype_is_raid(segtype
))
2660 sub_lv_count
= mirrors
* stripes
+ segtype
->parity_devs
;
2662 sub_lv_count
= mirrors
;
2664 if (!lv
->le_count
&&
2665 !(r
= _lv_insert_empty_sublvs(lv
, segtype
, stripe_size
,
2666 region_size
, sub_lv_count
))) {
2667 log_error("Failed to insert layer for %s", lv
->name
);
2671 if (!(r
= _lv_extend_layered_lv(ah
, lv
, extents
, 0,
2672 stripes
, stripe_size
)))
2676 * If we are expanding an existing mirror, we can skip the
2677 * resync of the extension if the LV is currently in-sync
2678 * and the LV has the LV_NOTSYNCED flag set.
2680 if ((lv
->le_count
!= extents
) &&
2681 segtype_is_mirrored(segtype
) &&
2682 (lv
->status
& LV_NOTSYNCED
)) {
2683 percent_t sync_percent
= PERCENT_INVALID
;
2685 if (!lv_is_active(lv
)) {
2686 log_print("%s/%s is not active."
2687 " Unable to get sync percent.",
2688 lv
->vg
->name
, lv
->name
);
2689 if (yes_no_prompt("Do full resync of extended "
2690 "portion of %s/%s? [y/n]: ",
2691 lv
->vg
->name
, lv
->name
) == 'y')
2697 if (!(r
= lv_mirror_percent(lv
->vg
->cmd
, lv
, 0,
2698 &sync_percent
, NULL
))) {
2699 log_error("Failed to get sync percent for %s/%s",
2700 lv
->vg
->name
, lv
->name
);
2702 } else if (sync_percent
== PERCENT_100
) {
2703 log_verbose("Skipping initial resync for "
2704 "extended portion of %s/%s",
2705 lv
->vg
->name
, lv
->name
);
2706 init_mirror_in_sync(1);
2707 lv
->status
|= LV_NOTSYNCED
;
2709 log_error("%s/%s cannot be extended while"
2710 " it is recovering.",
2711 lv
->vg
->name
, lv
->name
);
2724 * Minimal LV renaming function.
2725 * Metadata transaction should be made by caller.
2726 * Assumes new_name is allocated from cmd->mem pool.
2728 static int _rename_single_lv(struct logical_volume
*lv
, char *new_name
)
2730 struct volume_group
*vg
= lv
->vg
;
2732 if (find_lv_in_vg(vg
, new_name
)) {
2733 log_error("Logical volume \"%s\" already exists in "
2734 "volume group \"%s\"", new_name
, vg
->name
);
2738 if (lv
->status
& LOCKED
) {
2739 log_error("Cannot rename locked LV %s", lv
->name
);
2743 lv
->name
= new_name
;
2750 * 'lv_name_old' and 'lv_name_new' are old and new names of the main LV.
2752 static int _rename_sub_lv(struct cmd_context
*cmd
,
2753 struct logical_volume
*lv
,
2754 const char *lv_name_old
, const char *lv_name_new
)
2761 * A sub LV name starts with lv_name_old + '_'.
2762 * The suffix follows lv_name_old and includes '_'.
2764 len
= strlen(lv_name_old
);
2765 if (strncmp(lv
->name
, lv_name_old
, len
) || lv
->name
[len
] != '_') {
2766 log_error("Cannot rename \"%s\": name format not recognized "
2767 "for internal LV \"%s\"",
2768 lv_name_old
, lv
->name
);
2771 suffix
= lv
->name
+ len
;
2774 * Compose a new name for sub lv:
2775 * e.g. new name is "lvol1_mlog"
2776 * if the sub LV is "lvol0_mlog" and
2777 * a new name for main LV is "lvol1"
2779 len
= strlen(lv_name_new
) + strlen(suffix
) + 1;
2780 new_name
= dm_pool_alloc(cmd
->mem
, len
);
2782 log_error("Failed to allocate space for new name");
2785 if (dm_snprintf(new_name
, len
, "%s%s", lv_name_new
, suffix
) < 0) {
2786 log_error("Failed to create new name");
2791 return _rename_single_lv(lv
, new_name
);
2794 /* Callback for for_each_sub_lv */
2795 static int _rename_cb(struct cmd_context
*cmd
, struct logical_volume
*lv
,
2798 struct lv_names
*lv_names
= (struct lv_names
*) data
;
2800 return _rename_sub_lv(cmd
, lv
, lv_names
->old
, lv_names
->new);
2804 * Loop down sub LVs and call fn for each.
2805 * fn is responsible to log necessary information on failure.
2807 int for_each_sub_lv(struct cmd_context
*cmd
, struct logical_volume
*lv
,
2808 int (*fn
)(struct cmd_context
*cmd
,
2809 struct logical_volume
*lv
, void *data
),
2812 struct logical_volume
*org
;
2813 struct lv_segment
*seg
;
2816 if (lv_is_cow(lv
) && lv_is_virtual_origin(org
= origin_from_cow(lv
))) {
2817 if (!fn(cmd
, org
, data
))
2819 if (!for_each_sub_lv(cmd
, org
, fn
, data
))
2823 dm_list_iterate_items(seg
, &lv
->segments
) {
2825 if (!fn(cmd
, seg
->log_lv
, data
))
2827 if (!for_each_sub_lv(cmd
, seg
->log_lv
, fn
, data
))
2831 if (seg
->metadata_lv
) {
2832 if (!fn(cmd
, seg
->metadata_lv
, data
))
2834 if (!for_each_sub_lv(cmd
, seg
->metadata_lv
, fn
, data
))
2838 for (s
= 0; s
< seg
->area_count
; s
++) {
2839 if (seg_type(seg
, s
) != AREA_LV
)
2841 if (!fn(cmd
, seg_lv(seg
, s
), data
))
2843 if (!for_each_sub_lv(cmd
, seg_lv(seg
, s
), fn
, data
))
2847 if (!seg_is_raid(seg
))
2850 /* RAID has meta_areas */
2851 for (s
= 0; s
< seg
->area_count
; s
++) {
2852 if (seg_metatype(seg
, s
) != AREA_LV
)
2854 if (!fn(cmd
, seg_metalv(seg
, s
), data
))
2856 if (!for_each_sub_lv(cmd
, seg_metalv(seg
, s
), fn
, data
))
2866 * Core of LV renaming routine.
2867 * VG must be locked by caller.
2869 int lv_rename(struct cmd_context
*cmd
, struct logical_volume
*lv
,
2870 const char *new_name
)
2872 struct volume_group
*vg
= lv
->vg
;
2873 struct lv_names lv_names
;
2874 DM_LIST_INIT(lvs_changed
);
2875 struct lv_list lvl
, lvl2
, *lvlp
;
2878 /* rename is not allowed on sub LVs */
2879 if (!lv_is_visible(lv
)) {
2880 log_error("Cannot rename internal LV \"%s\".", lv
->name
);
2884 if (find_lv_in_vg(vg
, new_name
)) {
2885 log_error("Logical volume \"%s\" already exists in "
2886 "volume group \"%s\"", new_name
, vg
->name
);
2890 if (lv
->status
& LOCKED
) {
2891 log_error("Cannot rename locked LV %s", lv
->name
);
2898 /* rename sub LVs */
2899 lv_names
.old
= lv
->name
;
2900 lv_names
.new = new_name
;
2901 if (!for_each_sub_lv(cmd
, lv
, _rename_cb
, (void *) &lv_names
))
2904 /* rename main LV */
2905 if (!(lv
->name
= dm_pool_strdup(cmd
->mem
, new_name
))) {
2906 log_error("Failed to allocate space for new name");
2911 dm_list_add(&lvs_changed
, &lvl
.list
);
2913 /* rename active virtual origin too */
2914 if (lv_is_cow(lv
) && lv_is_virtual_origin(lvl2
.lv
= origin_from_cow(lv
)))
2915 dm_list_add_h(&lvs_changed
, &lvl2
.list
);
2917 log_verbose("Writing out updated volume group");
2921 if (!suspend_lvs(cmd
, &lvs_changed
, vg
))
2924 if (!(r
= vg_commit(vg
)))
2928 * FIXME: resume LVs in reverse order to prevent memory
2929 * lock imbalance when resuming virtual snapshot origin
2930 * (resume of snapshot resumes origin too)
2932 dm_list_iterate_back_items(lvlp
, &lvs_changed
)
2933 if (!resume_lv(cmd
, lvlp
->lv
))
2940 char *generate_lv_name(struct volume_group
*vg
, const char *format
,
2941 char *buffer
, size_t len
)
2943 struct lv_list
*lvl
;
2946 dm_list_iterate_items(lvl
, &vg
->lvs
) {
2947 if (sscanf(lvl
->lv
->name
, format
, &i
) != 1)
2954 if (dm_snprintf(buffer
, len
, format
, high
+ 1) < 0)
2960 int vg_max_lv_reached(struct volume_group
*vg
)
2965 if (vg
->max_lv
> vg_visible_lvs(vg
))
2968 log_verbose("Maximum number of logical volumes (%u) reached "
2969 "in volume group %s", vg
->max_lv
, vg
->name
);
2974 struct logical_volume
*alloc_lv(struct dm_pool
*mem
)
2976 struct logical_volume
*lv
;
2978 if (!(lv
= dm_pool_zalloc(mem
, sizeof(*lv
)))) {
2979 log_error("Unable to allocate logical volume structure");
2983 lv
->snapshot
= NULL
;
2984 dm_list_init(&lv
->snapshot_segs
);
2985 dm_list_init(&lv
->segments
);
2986 dm_list_init(&lv
->tags
);
2987 dm_list_init(&lv
->segs_using_this_lv
);
2988 dm_list_init(&lv
->rsites
);
2994 * Create a new empty LV.
2996 struct logical_volume
*lv_create_empty(const char *name
,
2999 alloc_policy_t alloc
,
3000 struct volume_group
*vg
)
3002 struct format_instance
*fi
= vg
->fid
;
3003 struct logical_volume
*lv
;
3004 char dname
[NAME_LEN
];
3006 if (vg_max_lv_reached(vg
))
3009 if (strstr(name
, "%d") &&
3010 !(name
= generate_lv_name(vg
, name
, dname
, sizeof(dname
)))) {
3011 log_error("Failed to generate unique name for the new "
3014 } else if (find_lv_in_vg(vg
, name
)) {
3015 log_error("Unable to create LV %s in Volume Group %s: "
3016 "name already in use.", name
, vg
->name
);
3020 log_verbose("Creating logical volume %s", name
);
3022 if (!(lv
= alloc_lv(vg
->vgmem
)))
3025 if (!(lv
->name
= dm_pool_strdup(vg
->vgmem
, name
)))
3028 lv
->status
= status
;
3030 lv
->read_ahead
= vg
->cmd
->default_settings
.read_ahead
;
3033 lv
->size
= UINT64_C(0);
3039 if (!link_lv_to_vg(vg
, lv
))
3042 if (!lv_set_creation(lv
, NULL
, 0))
3045 if (fi
->fmt
->ops
->lv_setup
&& !fi
->fmt
->ops
->lv_setup(fi
, lv
))
3050 dm_pool_free(vg
->vgmem
, lv
);
3054 static int _add_pvs(struct cmd_context
*cmd
, struct pv_segment
*peg
,
3055 uint32_t s
__attribute__((unused
)), void *data
)
3057 struct seg_pvs
*spvs
= (struct seg_pvs
*) data
;
3058 struct pv_list
*pvl
;
3060 /* Don't add again if it's already on list. */
3061 if (find_pv_in_pv_list(&spvs
->pvs
, peg
->pv
))
3064 if (!(pvl
= dm_pool_alloc(cmd
->mem
, sizeof(*pvl
)))) {
3065 log_error("pv_list allocation failed");
3071 dm_list_add(&spvs
->pvs
, &pvl
->list
);
3077 * Construct dm_list of segments of LVs showing which PVs they use.
3078 * For pvmove we use the *parent* LV so we can pick up stripes & existing mirrors etc.
3080 struct dm_list
*build_parallel_areas_from_lv(struct logical_volume
*lv
,
3081 unsigned use_pvmove_parent_lv
)
3083 struct cmd_context
*cmd
= lv
->vg
->cmd
;
3084 struct dm_list
*parallel_areas
;
3085 struct seg_pvs
*spvs
;
3086 uint32_t current_le
= 0;
3087 uint32_t raid_multiple
;
3088 struct lv_segment
*seg
= first_seg(lv
);
3090 if (!(parallel_areas
= dm_pool_alloc(cmd
->mem
, sizeof(*parallel_areas
)))) {
3091 log_error("parallel_areas allocation failed");
3095 dm_list_init(parallel_areas
);
3098 if (!(spvs
= dm_pool_zalloc(cmd
->mem
, sizeof(*spvs
)))) {
3099 log_error("allocation failed");
3103 dm_list_init(&spvs
->pvs
);
3105 spvs
->le
= current_le
;
3106 spvs
->len
= lv
->le_count
- current_le
;
3108 dm_list_add(parallel_areas
, &spvs
->list
);
3110 if (use_pvmove_parent_lv
&& !(seg
= find_seg_by_le(lv
, current_le
))) {
3111 log_error("Failed to find segment for %s extent %" PRIu32
,
3112 lv
->name
, current_le
);
3116 /* Find next segment end */
3117 /* FIXME Unnecessary nesting! */
3118 if (!_for_each_pv(cmd
, use_pvmove_parent_lv
? seg
->pvmove_source_seg
->lv
: lv
,
3119 use_pvmove_parent_lv
? seg
->pvmove_source_seg
->le
: current_le
,
3120 use_pvmove_parent_lv
? spvs
->len
* _calc_area_multiple(seg
->pvmove_source_seg
->segtype
, seg
->pvmove_source_seg
->area_count
, 0) : spvs
->len
,
3121 use_pvmove_parent_lv
? seg
->pvmove_source_seg
: NULL
,
3123 0, 0, -1, 0, _add_pvs
, (void *) spvs
))
3126 current_le
= spvs
->le
+ spvs
->len
;
3127 raid_multiple
= (seg
->segtype
->parity_devs
) ?
3128 seg
->area_count
- seg
->segtype
->parity_devs
: 1;
3129 } while ((current_le
* raid_multiple
) < lv
->le_count
);
3131 /* FIXME Merge adjacent segments with identical PV lists (avoids need for contiguous allocation attempts between successful allocations) */
3133 return parallel_areas
;
3136 int link_lv_to_vg(struct volume_group
*vg
, struct logical_volume
*lv
)
3138 struct lv_list
*lvl
;
3140 if (vg_max_lv_reached(vg
))
3143 if (!(lvl
= dm_pool_zalloc(vg
->vgmem
, sizeof(*lvl
))))
3148 dm_list_add(&vg
->lvs
, &lvl
->list
);
3153 int unlink_lv_from_vg(struct logical_volume
*lv
)
3155 struct lv_list
*lvl
;
3157 if (!(lvl
= find_lv_in_vg(lv
->vg
, lv
->name
)))
3160 dm_list_del(&lvl
->list
);
3165 void lv_set_visible(struct logical_volume
*lv
)
3167 if (lv_is_visible(lv
))
3170 lv
->status
|= VISIBLE_LV
;
3172 log_debug("LV %s in VG %s is now visible.", lv
->name
, lv
->vg
->name
);
3175 void lv_set_hidden(struct logical_volume
*lv
)
3177 if (!lv_is_visible(lv
))
3180 lv
->status
&= ~VISIBLE_LV
;
3182 log_debug("LV %s in VG %s is now hidden.", lv
->name
, lv
->vg
->name
);
3185 int lv_remove_single(struct cmd_context
*cmd
, struct logical_volume
*lv
,
3186 const force_t force
)
3188 struct volume_group
*vg
;
3190 struct logical_volume
*format1_origin
= NULL
;
3191 int format1_reload_required
= 0;
3193 struct logical_volume
*pool_lv
= NULL
;
3197 if (!vg_check_status(vg
, LVM_WRITE
))
3200 if (lv_is_origin(lv
)) {
3201 log_error("Can't remove logical volume \"%s\" under snapshot",
3206 if (lv
->status
& MIRROR_IMAGE
) {
3207 log_error("Can't remove logical volume %s used by a mirror",
3212 if (lv
->status
& MIRROR_LOG
) {
3213 log_error("Can't remove logical volume %s used as mirror log",
3218 if (lv
->status
& (RAID_META
| RAID_IMAGE
)) {
3219 log_error("Can't remove logical volume %s used as RAID device",
3224 if (lv_is_thin_pool_data(lv
) || lv_is_thin_pool_metadata(lv
)) {
3225 log_error("Can't remove logical volume %s used by a thin pool.",
3228 } else if (lv_is_thin_volume(lv
))
3229 pool_lv
= first_seg(lv
)->pool_lv
;
3231 if (lv
->status
& LOCKED
) {
3232 log_error("Can't remove locked LV %s", lv
->name
);
3236 /* FIXME Ensure not referred to by another existing LVs */
3238 if (lv_info(cmd
, lv
, 0, &info
, 1, 0)) {
3239 if (!lv_check_not_in_use(cmd
, lv
, &info
))
3242 if ((force
== PROMPT
) &&
3243 lv_is_visible(lv
) &&
3245 yes_no_prompt("Do you really want to remove active "
3246 "%slogical volume %s? [y/n]: ",
3247 vg_is_clustered(vg
) ? "clustered " : "",
3249 log_error("Logical volume %s not removed", lv
->name
);
3257 if (lv_is_cow(lv
)) {
3258 /* Old format1 code */
3259 if (!(lv
->vg
->fid
->fmt
->features
& FMT_MDAS
))
3260 format1_origin
= origin_from_cow(lv
);
3262 log_verbose("Removing snapshot %s", lv
->name
);
3263 /* vg_remove_snapshot() will preload origin/former snapshots */
3264 if (!vg_remove_snapshot(lv
))
3268 /* FIXME Review and fix the snapshot error paths! */
3269 if (!deactivate_lv(cmd
, lv
)) {
3270 log_error("Unable to deactivate logical volume \"%s\"",
3275 /* Clear thin pool stacked messages */
3276 if (pool_lv
&& !pool_has_message(first_seg(pool_lv
), lv
, 0) &&
3277 !update_pool_lv(pool_lv
, 1)) {
3278 log_error("Failed to update thin pool %s.", pool_lv
->name
);
3282 visible
= lv_is_visible(lv
);
3284 log_verbose("Releasing logical volume \"%s\"", lv
->name
);
3285 if (!lv_remove(lv
)) {
3286 log_error("Error releasing logical volume \"%s\"", lv
->name
);
3291 * Old format1 code: If no snapshots left reload without -real.
3293 if (format1_origin
&& !lv_is_origin(format1_origin
)) {
3294 log_warn("WARNING: Support for snapshots with old LVM1-style metadata is deprecated.");
3295 log_warn("WARNING: Please use lvconvert to update to lvm2 metadata at your convenience.");
3296 format1_reload_required
= 1;
3299 /* store it on disks */
3304 if (format1_reload_required
&& !suspend_lv(cmd
, format1_origin
))
3305 log_error("Failed to refresh %s without snapshot.", format1_origin
->name
);
3311 if (format1_reload_required
&& !resume_lv(cmd
, format1_origin
)) {
3312 log_error("Failed to resume %s.", format1_origin
->name
);
3316 /* Release unneeded blocks in thin pool */
3317 /* TODO: defer when multiple LVs relased at once */
3318 if (pool_lv
&& !update_pool_lv(pool_lv
, 1)) {
3319 log_error("Failed to update thin pool %s.", pool_lv
->name
);
3326 log_print("Logical volume \"%s\" successfully removed", lv
->name
);
3332 * remove LVs with its dependencies - LV leaf nodes should be removed first
3334 int lv_remove_with_dependencies(struct cmd_context
*cmd
, struct logical_volume
*lv
,
3335 const force_t force
, unsigned level
)
3337 percent_t snap_percent
;
3338 struct dm_list
*snh
, *snht
;
3339 struct seg_list
*sl
, *tsl
;
3342 if (lv_is_cow(lv
)) {
3344 * A merging snapshot cannot be removed directly unless
3345 * it has been invalidated or failed merge removal is requested.
3347 if (lv_is_merging_cow(lv
) && !level
) {
3348 if (lv_info(lv
->vg
->cmd
, lv
, 0, &info
, 1, 0) &&
3349 info
.exists
&& info
.live_table
) {
3350 if (!lv_snapshot_percent(lv
, &snap_percent
)) {
3351 log_error("Failed to obtain merging snapshot progress percentage for logical volume %s.",
3355 if ((snap_percent
!= PERCENT_INVALID
) &&
3356 (snap_percent
!= PERCENT_MERGE_FAILED
)) {
3357 log_error("Can't remove merging snapshot logical volume \"%s\"",
3360 } else if ((snap_percent
== PERCENT_MERGE_FAILED
) &&
3361 (force
== PROMPT
) &&
3362 yes_no_prompt("Removing snapshot \"%s\" that failed to merge may leave origin \"%s\" inconsistent. "
3363 "Proceed? [y/n]: ", lv
->name
, origin_from_cow(lv
)->name
) == 'n') {
3364 log_error("Logical volume %s not removed.", lv
->name
);
3371 if (lv_is_origin(lv
)) {
3372 /* Remove snapshot LVs first */
3373 if ((force
== PROMPT
) &&
3374 /* Active snapshot already needs to confirm each active LV */
3375 !lv_is_active(lv
) &&
3376 yes_no_prompt("Removing origin %s will also remove %u "
3377 "snapshots(s). Proceed? [y/n]: ",
3378 lv
->name
, lv
->origin_count
) == 'n') {
3379 log_error("Logical volume %s not removed.", lv
->name
);
3383 dm_list_iterate_safe(snh
, snht
, &lv
->snapshot_segs
)
3384 if (!lv_remove_with_dependencies(cmd
, dm_list_struct_base(snh
, struct lv_segment
,
3390 if (lv_is_used_thin_pool(lv
)) {
3391 /* Remove thin LVs first */
3392 if ((force
== PROMPT
) &&
3393 yes_no_prompt("Removing pool %s will also remove %u "
3394 "thin volume(s). OK? [y/n]: ", lv
->name
,
3395 /* Note: Snaphosts not included */
3396 dm_list_size(&lv
->segs_using_this_lv
)) == 'n') {
3397 log_error("Logical volume %s not removed.", lv
->name
);
3401 dm_list_iterate_items_safe(sl
, tsl
, &lv
->segs_using_this_lv
)
3402 if (!lv_remove_with_dependencies(cmd
, sl
->seg
->lv
,
3407 return lv_remove_single(cmd
, lv
, force
);
3411 * insert_layer_for_segments_on_pv() inserts a layer segment for a segment area.
3412 * However, layer modification could split the underlying layer segment.
3413 * This function splits the parent area according to keep the 1:1 relationship
3414 * between the parent area and the underlying layer segment.
3415 * Since the layer LV might have other layers below, build_parallel_areas()
3416 * is used to find the lowest-level segment boundaries.
3418 static int _split_parent_area(struct lv_segment
*seg
, uint32_t s
,
3419 struct dm_list
*layer_seg_pvs
)
3421 uint32_t parent_area_len
, parent_le
, layer_le
;
3422 uint32_t area_multiple
;
3423 struct seg_pvs
*spvs
;
3425 if (seg_is_striped(seg
))
3426 area_multiple
= seg
->area_count
;
3430 parent_area_len
= seg
->area_len
;
3431 parent_le
= seg
->le
;
3432 layer_le
= seg_le(seg
, s
);
3434 while (parent_area_len
> 0) {
3435 /* Find the layer segment pointed at */
3436 if (!(spvs
= _find_seg_pvs_by_le(layer_seg_pvs
, layer_le
))) {
3437 log_error("layer segment for %s:%" PRIu32
" not found",
3438 seg
->lv
->name
, parent_le
);
3442 if (spvs
->le
!= layer_le
) {
3443 log_error("Incompatible layer boundary: "
3444 "%s:%" PRIu32
"[%" PRIu32
"] on %s:%" PRIu32
,
3445 seg
->lv
->name
, parent_le
, s
,
3446 seg_lv(seg
, s
)->name
, layer_le
);
3450 if (spvs
->len
< parent_area_len
) {
3451 parent_le
+= spvs
->len
* area_multiple
;
3452 if (!lv_split_segment(seg
->lv
, parent_le
))
3456 parent_area_len
-= spvs
->len
;
3457 layer_le
+= spvs
->len
;
3464 * Split the parent LV segments if the layer LV below it is splitted.
3466 int split_parent_segments_for_layer(struct cmd_context
*cmd
,
3467 struct logical_volume
*layer_lv
)
3469 struct lv_list
*lvl
;
3470 struct logical_volume
*parent_lv
;
3471 struct lv_segment
*seg
;
3473 struct dm_list
*parallel_areas
;
3475 if (!(parallel_areas
= build_parallel_areas_from_lv(layer_lv
, 0)))
3478 /* Loop through all LVs except itself */
3479 dm_list_iterate_items(lvl
, &layer_lv
->vg
->lvs
) {
3480 parent_lv
= lvl
->lv
;
3481 if (parent_lv
== layer_lv
)
3484 /* Find all segments that point at the layer LV */
3485 dm_list_iterate_items(seg
, &parent_lv
->segments
) {
3486 for (s
= 0; s
< seg
->area_count
; s
++) {
3487 if (seg_type(seg
, s
) != AREA_LV
||
3488 seg_lv(seg
, s
) != layer_lv
)
3491 if (!_split_parent_area(seg
, s
, parallel_areas
))
3500 /* Remove a layer from the LV */
3501 int remove_layers_for_segments(struct cmd_context
*cmd
,
3502 struct logical_volume
*lv
,
3503 struct logical_volume
*layer_lv
,
3504 uint64_t status_mask
, struct dm_list
*lvs_changed
)
3506 struct lv_segment
*seg
, *lseg
;
3509 struct lv_list
*lvl
;
3511 log_very_verbose("Removing layer %s for segments of %s",
3512 layer_lv
->name
, lv
->name
);
3514 /* Find all segments that point at the temporary mirror */
3515 dm_list_iterate_items(seg
, &lv
->segments
) {
3516 for (s
= 0; s
< seg
->area_count
; s
++) {
3517 if (seg_type(seg
, s
) != AREA_LV
||
3518 seg_lv(seg
, s
) != layer_lv
)
3521 /* Find the layer segment pointed at */
3522 if (!(lseg
= find_seg_by_le(layer_lv
, seg_le(seg
, s
)))) {
3523 log_error("Layer segment found: %s:%" PRIu32
,
3524 layer_lv
->name
, seg_le(seg
, s
));
3528 /* Check the segment params are compatible */
3529 if (!seg_is_striped(lseg
) || lseg
->area_count
!= 1) {
3530 log_error("Layer is not linear: %s:%" PRIu32
,
3531 layer_lv
->name
, lseg
->le
);
3534 if ((lseg
->status
& status_mask
) != status_mask
) {
3535 log_error("Layer status does not match: "
3536 "%s:%" PRIu32
" status: 0x%" PRIx64
"/0x%" PRIx64
,
3537 layer_lv
->name
, lseg
->le
,
3538 lseg
->status
, status_mask
);
3541 if (lseg
->le
!= seg_le(seg
, s
) ||
3542 lseg
->area_len
!= seg
->area_len
) {
3543 log_error("Layer boundary mismatch: "
3544 "%s:%" PRIu32
"-%" PRIu32
" on "
3546 "%" PRIu32
"-%" PRIu32
" / ",
3547 lv
->name
, seg
->le
, seg
->area_len
,
3548 layer_lv
->name
, seg_le(seg
, s
),
3549 lseg
->le
, lseg
->area_len
);
3553 if (!move_lv_segment_area(seg
, s
, lseg
, 0))
3556 /* Replace mirror with error segment */
3557 if (!(lseg
->segtype
=
3558 get_segtype_from_string(lv
->vg
->cmd
, "error"))) {
3559 log_error("Missing error segtype");
3562 lseg
->area_count
= 0;
3564 /* First time, add LV to list of LVs affected */
3565 if (!lv_changed
&& lvs_changed
) {
3566 if (!(lvl
= dm_pool_alloc(cmd
->mem
, sizeof(*lvl
)))) {
3567 log_error("lv_list alloc failed");
3571 dm_list_add(lvs_changed
, &lvl
->list
);
3576 if (lv_changed
&& !lv_merge_segments(lv
))
3582 /* Remove a layer */
3583 int remove_layers_for_segments_all(struct cmd_context
*cmd
,
3584 struct logical_volume
*layer_lv
,
3585 uint64_t status_mask
,
3586 struct dm_list
*lvs_changed
)
3588 struct lv_list
*lvl
;
3589 struct logical_volume
*lv1
;
3591 /* Loop through all LVs except the temporary mirror */
3592 dm_list_iterate_items(lvl
, &layer_lv
->vg
->lvs
) {
3594 if (lv1
== layer_lv
)
3597 if (!remove_layers_for_segments(cmd
, lv1
, layer_lv
,
3598 status_mask
, lvs_changed
))
3602 if (!lv_empty(layer_lv
))
3608 int move_lv_segments(struct logical_volume
*lv_to
,
3609 struct logical_volume
*lv_from
,
3610 uint64_t set_status
, uint64_t reset_status
)
3612 struct lv_segment
*seg
;
3614 dm_list_iterate_items(seg
, &lv_to
->segments
)
3616 log_error("Can't move snapshot segment.");
3620 dm_list_init(&lv_to
->segments
);
3621 dm_list_splice(&lv_to
->segments
, &lv_from
->segments
);
3623 dm_list_iterate_items(seg
, &lv_to
->segments
) {
3625 seg
->status
&= ~reset_status
;
3626 seg
->status
|= set_status
;
3629 lv_to
->le_count
= lv_from
->le_count
;
3630 lv_to
->size
= lv_from
->size
;
3632 lv_from
->le_count
= 0;
3638 /* Remove a layer from the LV */
3639 int remove_layer_from_lv(struct logical_volume
*lv
,
3640 struct logical_volume
*layer_lv
)
3642 struct logical_volume
*parent
;
3643 struct lv_segment
*parent_seg
;
3644 struct segment_type
*segtype
;
3646 log_very_verbose("Removing layer %s for %s", layer_lv
->name
, lv
->name
);
3648 if (!(parent_seg
= get_only_segment_using_this_lv(layer_lv
))) {
3649 log_error("Failed to find layer %s in %s",
3650 layer_lv
->name
, lv
->name
);
3653 parent
= parent_seg
->lv
;
3656 * Before removal, the layer should be cleaned up,
3657 * i.e. additional segments and areas should have been removed.
3659 if (dm_list_size(&parent
->segments
) != 1 ||
3660 parent_seg
->area_count
!= 1 ||
3661 seg_type(parent_seg
, 0) != AREA_LV
||
3662 layer_lv
!= seg_lv(parent_seg
, 0) ||
3663 parent
->le_count
!= layer_lv
->le_count
)
3666 if (!lv_empty(parent
))
3669 if (!move_lv_segments(parent
, layer_lv
, 0, 0))
3672 /* Replace the empty layer with error segment */
3673 segtype
= get_segtype_from_string(lv
->vg
->cmd
, "error");
3674 if (!lv_add_virtual_segment(layer_lv
, 0, parent
->le_count
, segtype
, NULL
))
3681 * Create and insert a linear LV "above" lv_where.
3682 * After the insertion, a new LV named lv_where->name + suffix is created
3683 * and all segments of lv_where is moved to the new LV.
3684 * lv_where will have a single segment which maps linearly to the new LV.
3686 struct logical_volume
*insert_layer_for_lv(struct cmd_context
*cmd
,
3687 struct logical_volume
*lv_where
,
3689 const char *layer_suffix
)
3694 struct str_list
*sl
;
3695 struct logical_volume
*layer_lv
;
3696 struct segment_type
*segtype
;
3697 struct lv_segment
*mapseg
;
3698 unsigned exclusive
= 0;
3700 /* create an empty layer LV */
3701 len
= strlen(lv_where
->name
) + 32;
3702 if (!(name
= alloca(len
))) {
3703 log_error("layer name allocation failed. "
3704 "Remove new LV and retry.");
3708 if (dm_snprintf(name
, len
, "%s%s", lv_where
->name
, layer_suffix
) < 0) {
3709 log_error("layer name allocation failed. "
3710 "Remove new LV and retry.");
3714 if (!(layer_lv
= lv_create_empty(name
, NULL
, LVM_READ
| LVM_WRITE
,
3715 ALLOC_INHERIT
, lv_where
->vg
))) {
3716 log_error("Creation of layer LV failed");
3720 if (lv_is_active_exclusive_locally(lv_where
))
3723 if (lv_is_active(lv_where
) && strstr(name
, "_mimagetmp")) {
3724 log_very_verbose("Creating transient LV %s for mirror conversion in VG %s.", name
, lv_where
->vg
->name
);
3726 segtype
= get_segtype_from_string(cmd
, "error");
3728 if (!lv_add_virtual_segment(layer_lv
, 0, lv_where
->le_count
, segtype
, NULL
)) {
3729 log_error("Creation of transient LV %s for mirror conversion in VG %s failed.", name
, lv_where
->vg
->name
);
3733 /* Temporary tags for activation of the transient LV */
3734 dm_list_iterate_items(sl
, &lv_where
->tags
)
3735 if (!str_list_add(cmd
->mem
, &layer_lv
->tags
, sl
->str
)) {
3736 log_error("Aborting. Unable to tag"
3737 " transient mirror layer.");
3741 if (!vg_write(lv_where
->vg
)) {
3742 log_error("Failed to write intermediate VG %s metadata for mirror conversion.", lv_where
->vg
->name
);
3746 if (!vg_commit(lv_where
->vg
)) {
3747 log_error("Failed to commit intermediate VG %s metadata for mirror conversion.", lv_where
->vg
->name
);
3748 vg_revert(lv_where
->vg
);
3753 r
= activate_lv_excl(cmd
, layer_lv
);
3755 r
= activate_lv(cmd
, layer_lv
);
3758 log_error("Failed to resume transient LV"
3759 " %s for mirror conversion in VG %s.",
3760 name
, lv_where
->vg
->name
);
3764 /* Remove the temporary tags */
3765 dm_list_iterate_items(sl
, &lv_where
->tags
)
3766 str_list_del(&layer_lv
->tags
, sl
->str
);
3770 log_very_verbose("Inserting layer %s for %s",
3771 layer_lv
->name
, lv_where
->name
);
3773 if (!move_lv_segments(layer_lv
, lv_where
, 0, 0))
3776 if (!(segtype
= get_segtype_from_string(cmd
, "striped")))
3779 /* allocate a new linear segment */
3780 if (!(mapseg
= alloc_lv_segment(segtype
, lv_where
, 0, layer_lv
->le_count
,
3781 status
, 0, NULL
, NULL
, 1, layer_lv
->le_count
,
3785 /* map the new segment to the original underlying are */
3786 if (!set_lv_segment_area_lv(mapseg
, 0, layer_lv
, 0, 0))
3789 /* add the new segment to the layer LV */
3790 dm_list_add(&lv_where
->segments
, &mapseg
->list
);
3791 lv_where
->le_count
= layer_lv
->le_count
;
3792 lv_where
->size
= (uint64_t) lv_where
->le_count
* lv_where
->vg
->extent_size
;
3798 * Extend and insert a linear layer LV beneath the source segment area.
3800 static int _extend_layer_lv_for_segment(struct logical_volume
*layer_lv
,
3801 struct lv_segment
*seg
, uint32_t s
,
3804 struct lv_segment
*mapseg
;
3805 struct segment_type
*segtype
;
3806 struct physical_volume
*src_pv
= seg_pv(seg
, s
);
3807 uint32_t src_pe
= seg_pe(seg
, s
);
3809 if (seg_type(seg
, s
) != AREA_PV
&& seg_type(seg
, s
) != AREA_LV
)
3812 if (!(segtype
= get_segtype_from_string(layer_lv
->vg
->cmd
, "striped")))
3815 /* FIXME Incomplete message? Needs more context */
3816 log_very_verbose("Inserting %s:%" PRIu32
"-%" PRIu32
" of %s/%s",
3817 pv_dev_name(src_pv
),
3818 src_pe
, src_pe
+ seg
->area_len
- 1,
3819 seg
->lv
->vg
->name
, seg
->lv
->name
);
3821 /* allocate a new segment */
3822 if (!(mapseg
= alloc_lv_segment(segtype
, layer_lv
, layer_lv
->le_count
,
3823 seg
->area_len
, status
, 0,
3824 NULL
, NULL
, 1, seg
->area_len
, 0, 0, 0, seg
)))
3827 /* map the new segment to the original underlying are */
3828 if (!move_lv_segment_area(mapseg
, 0, seg
, s
))
3831 /* add the new segment to the layer LV */
3832 dm_list_add(&layer_lv
->segments
, &mapseg
->list
);
3833 layer_lv
->le_count
+= seg
->area_len
;
3834 layer_lv
->size
+= (uint64_t) seg
->area_len
* layer_lv
->vg
->extent_size
;
3836 /* map the original area to the new segment */
3837 if (!set_lv_segment_area_lv(seg
, s
, layer_lv
, mapseg
->le
, 0))
3844 * Match the segment area to PEs in the pvl
3845 * (the segment area boundary should be aligned to PE ranges by
3846 * _adjust_layer_segments() so that there is no partial overlap.)
3848 static int _match_seg_area_to_pe_range(struct lv_segment
*seg
, uint32_t s
,
3849 struct pv_list
*pvl
)
3851 struct pe_range
*per
;
3852 uint32_t pe_start
, per_end
;
3857 if (seg_type(seg
, s
) != AREA_PV
|| seg_dev(seg
, s
) != pvl
->pv
->dev
)
3860 pe_start
= seg_pe(seg
, s
);
3862 /* Do these PEs match to any of the PEs in pvl? */
3863 dm_list_iterate_items(per
, pvl
->pe_ranges
) {
3864 per_end
= per
->start
+ per
->count
- 1;
3866 if ((pe_start
< per
->start
) || (pe_start
> per_end
))
3869 /* FIXME Missing context in this message - add LV/seg details */
3870 log_debug("Matched PE range %s:%" PRIu32
"-%" PRIu32
" against "
3871 "%s %" PRIu32
" len %" PRIu32
, dev_name(pvl
->pv
->dev
),
3872 per
->start
, per_end
, dev_name(seg_dev(seg
, s
)),
3873 seg_pe(seg
, s
), seg
->area_len
);
3882 * For each segment in lv_where that uses a PV in pvl directly,
3883 * split the segment if it spans more than one underlying PV.
3885 static int _align_segment_boundary_to_pe_range(struct logical_volume
*lv_where
,
3886 struct pv_list
*pvl
)
3888 struct lv_segment
*seg
;
3889 struct pe_range
*per
;
3890 uint32_t pe_start
, pe_end
, per_end
, stripe_multiplier
, s
;
3895 /* Split LV segments to match PE ranges */
3896 dm_list_iterate_items(seg
, &lv_where
->segments
) {
3897 for (s
= 0; s
< seg
->area_count
; s
++) {
3898 if (seg_type(seg
, s
) != AREA_PV
||
3899 seg_dev(seg
, s
) != pvl
->pv
->dev
)
3902 /* Do these PEs match with the condition? */
3903 dm_list_iterate_items(per
, pvl
->pe_ranges
) {
3904 pe_start
= seg_pe(seg
, s
);
3905 pe_end
= pe_start
+ seg
->area_len
- 1;
3906 per_end
= per
->start
+ per
->count
- 1;
3909 if ((pe_end
< per
->start
) ||
3910 (pe_start
> per_end
))
3913 if (seg_is_striped(seg
))
3914 stripe_multiplier
= seg
->area_count
;
3916 stripe_multiplier
= 1;
3918 if ((per
->start
!= pe_start
&&
3919 per
->start
> pe_start
) &&
3920 !lv_split_segment(lv_where
, seg
->le
+
3921 (per
->start
- pe_start
) *
3925 if ((per_end
!= pe_end
&&
3926 per_end
< pe_end
) &&
3927 !lv_split_segment(lv_where
, seg
->le
+
3928 (per_end
- pe_start
+ 1) *
3939 * Scan lv_where for segments on a PV in pvl, and for each one found
3940 * append a linear segment to lv_layer and insert it between the two.
3942 * If pvl is empty, a layer is placed under the whole of lv_where.
3943 * If the layer is inserted, lv_where is added to lvs_changed.
3945 int insert_layer_for_segments_on_pv(struct cmd_context
*cmd
,
3946 struct logical_volume
*lv_where
,
3947 struct logical_volume
*layer_lv
,
3949 struct pv_list
*pvl
,
3950 struct dm_list
*lvs_changed
)
3952 struct lv_segment
*seg
;
3953 struct lv_list
*lvl
;
3957 log_very_verbose("Inserting layer %s for segments of %s on %s",
3958 layer_lv
->name
, lv_where
->name
,
3959 pvl
? pv_dev_name(pvl
->pv
) : "any");
3961 if (!_align_segment_boundary_to_pe_range(lv_where
, pvl
))
3964 /* Work through all segments on the supplied PV */
3965 dm_list_iterate_items(seg
, &lv_where
->segments
) {
3966 for (s
= 0; s
< seg
->area_count
; s
++) {
3967 if (!_match_seg_area_to_pe_range(seg
, s
, pvl
))
3970 /* First time, add LV to list of LVs affected */
3971 if (!lv_used
&& lvs_changed
) {
3972 if (!(lvl
= dm_pool_alloc(cmd
->mem
, sizeof(*lvl
)))) {
3973 log_error("lv_list alloc failed");
3977 dm_list_add(lvs_changed
, &lvl
->list
);
3981 if (!_extend_layer_lv_for_segment(layer_lv
, seg
, s
,
3983 log_error("Failed to insert segment in layer "
3984 "LV %s under %s:%" PRIu32
"-%" PRIu32
,
3985 layer_lv
->name
, lv_where
->name
,
3986 seg
->le
, seg
->le
+ seg
->len
);
3996 * Initialize the LV with 'value'.
3998 int set_lv(struct cmd_context
*cmd
, struct logical_volume
*lv
,
3999 uint64_t sectors
, int value
)
4006 * <clausen> also, more than 4k
4007 * <clausen> say, reiserfs puts it's superblock 32k in, IIRC
4008 * <ejt_> k, I'll drop a fixme to that effect
4009 * (I know the device is at least 4k, but not 32k)
4011 if (!(name
= dm_pool_alloc(cmd
->mem
, PATH_MAX
))) {
4012 log_error("Name allocation failed - device not cleared");
4016 if (dm_snprintf(name
, PATH_MAX
, "%s%s/%s", cmd
->dev_dir
,
4017 lv
->vg
->name
, lv
->name
) < 0) {
4018 log_error("Name too long - device not cleared (%s)", lv
->name
);
4022 sync_local_dev_names(cmd
); /* Wait until devices are available */
4024 log_verbose("Clearing start of logical volume \"%s\"", lv
->name
);
4026 if (!(dev
= dev_cache_get(name
, NULL
))) {
4027 log_error("%s: not found: device not cleared", name
);
4031 if (!dev_open_quiet(dev
))
4035 sectors
= UINT64_C(4096) >> SECTOR_SHIFT
;
4037 if (sectors
> lv
->size
)
4040 if (!dev_set(dev
, UINT64_C(0), (size_t) sectors
<< SECTOR_SHIFT
, value
))
4045 if (!dev_close_immediate(dev
))
4051 static struct logical_volume
*_create_virtual_origin(struct cmd_context
*cmd
,
4052 struct volume_group
*vg
,
4053 const char *lv_name
,
4054 uint32_t permission
,
4055 uint64_t voriginextents
)
4057 const struct segment_type
*segtype
;
4060 struct logical_volume
*lv
;
4062 if (!(segtype
= get_segtype_from_string(cmd
, "zero"))) {
4063 log_error("Zero segment type for virtual origin not found");
4067 len
= strlen(lv_name
) + 32;
4068 if (!(vorigin_name
= alloca(len
)) ||
4069 dm_snprintf(vorigin_name
, len
, "%s_vorigin", lv_name
) < 0) {
4070 log_error("Virtual origin name allocation failed.");
4074 if (!(lv
= lv_create_empty(vorigin_name
, NULL
, permission
,
4075 ALLOC_INHERIT
, vg
)))
4078 if (!lv_extend(lv
, segtype
, 1, 0, 1, 0, voriginextents
,
4079 NULL
, NULL
, ALLOC_INHERIT
))
4082 /* store vg on disk(s) */
4083 if (!vg_write(vg
) || !vg_commit(vg
))
4092 * If lp->thin OR lp->activate is AY*, activate the pool if not already active.
4093 * If lp->thin, create thin LV within the pool - as a snapshot if lp->snapshot.
4094 * If lp->activate is AY*, activate it.
4095 * If lp->activate was AN* and the pool was originally inactive, deactivate it.
4097 static struct logical_volume
*_lv_create_an_lv(struct volume_group
*vg
, struct lvcreate_params
*lp
,
4098 const char *new_lv_name
)
4100 struct cmd_context
*cmd
= vg
->cmd
;
4102 uint64_t status
= UINT64_C(0);
4103 struct logical_volume
*lv
, *org
= NULL
;
4104 struct logical_volume
*pool_lv
;
4105 struct lv_list
*lvl
;
4106 int origin_active
= 0;
4109 if (new_lv_name
&& find_lv_in_vg(vg
, new_lv_name
)) {
4110 log_error("Logical volume \"%s\" already exists in "
4111 "volume group \"%s\"", new_lv_name
, lp
->vg_name
);
4115 if (vg_max_lv_reached(vg
)) {
4116 log_error("Maximum number of logical volumes (%u) reached "
4117 "in volume group %s", vg
->max_lv
, vg
->name
);
4121 if ((segtype_is_mirrored(lp
->segtype
) ||
4122 segtype_is_raid(lp
->segtype
) || segtype_is_thin(lp
->segtype
)) &&
4123 !(vg
->fid
->fmt
->features
& FMT_SEGMENTS
)) {
4124 log_error("Metadata does not support %s segments.",
4129 if (lp
->read_ahead
!= DM_READ_AHEAD_AUTO
&&
4130 lp
->read_ahead
!= DM_READ_AHEAD_NONE
&&
4131 (vg
->fid
->fmt
->features
& FMT_RESTRICTED_READAHEAD
) &&
4132 (lp
->read_ahead
< 2 || lp
->read_ahead
> 120)) {
4133 log_error("Metadata only supports readahead values between 2 and 120.");
4137 if (lp
->stripe_size
> vg
->extent_size
) {
4138 log_error("Reducing requested stripe size %s to maximum, "
4139 "physical extent size %s",
4140 display_size(cmd
, (uint64_t) lp
->stripe_size
),
4141 display_size(cmd
, (uint64_t) vg
->extent_size
));
4142 lp
->stripe_size
= vg
->extent_size
;
4145 /* Need to check the vg's format to verify this - the cmd format isn't setup properly yet */
4146 if (lp
->stripes
> 1 &&
4147 !(vg
->fid
->fmt
->features
& FMT_UNLIMITED_STRIPESIZE
) &&
4148 (lp
->stripe_size
> STRIPE_SIZE_MAX
)) {
4149 log_error("Stripe size may not exceed %s",
4150 display_size(cmd
, (uint64_t) STRIPE_SIZE_MAX
));
4154 if ((size_rest
= lp
->extents
% lp
->stripes
)) {
4155 log_print("Rounding size (%d extents) up to stripe boundary "
4156 "size (%d extents)", lp
->extents
,
4157 lp
->extents
- size_rest
+ lp
->stripes
);
4158 lp
->extents
= lp
->extents
- size_rest
+ lp
->stripes
;
4161 /* Does LV need to be zeroed? Thin handles this as a per-pool in-kernel setting. */
4162 if (lp
->zero
&& !segtype_is_thin(lp
->segtype
) && !activation()) {
4163 log_error("Can't wipe start of new LV without using "
4164 "device-mapper kernel driver");
4168 status
|= lp
->permission
| VISIBLE_LV
;
4170 if (lp
->snapshot
&& lp
->thin
) {
4171 if (!(org
= find_lv(vg
, lp
->origin
))) {
4172 log_error("Couldn't find origin volume '%s'.",
4177 if (org
->status
& LOCKED
) {
4178 log_error("Snapshots of locked devices are not supported.");
4182 lp
->voriginextents
= org
->le_count
;
4183 } else if (lp
->snapshot
) {
4184 if (!activation()) {
4185 log_error("Can't create snapshot without using "
4186 "device-mapper kernel driver");
4191 status
|= LVM_WRITE
;
4193 if (lp
->voriginsize
)
4197 if (!(org
= find_lv(vg
, lp
->origin
))) {
4198 log_error("Couldn't find origin volume '%s'.",
4202 if (lv_is_virtual_origin(org
)) {
4203 log_error("Can't share virtual origins. "
4204 "Use --virtualsize.");
4207 if (lv_is_cow(org
)) {
4208 log_error("Snapshots of snapshots are not "
4212 if (org
->status
& LOCKED
) {
4213 log_error("Snapshots of locked devices are not "
4217 if (lv_is_merging_origin(org
)) {
4218 log_error("Snapshots of an origin that has a "
4219 "merging snapshot is not supported");
4223 if (lv_is_thin_type(org
) && !lv_is_thin_volume(org
)) {
4224 log_error("Snapshots of thin pool %sdevices "
4225 "are not supported.",
4226 lv_is_thin_pool_data(org
) ? "data " :
4227 lv_is_thin_pool_metadata(org
) ?
4232 if (lv_is_mirror_type(org
) &&
4233 !seg_is_raid(first_seg(org
))) {
4234 log_error("Snapshots of \"mirror\" segment types"
4235 " are not supported");
4239 if (!lv_info(cmd
, org
, 0, &info
, 0, 0)) {
4240 log_error("Check for existence of active snapshot "
4241 "origin '%s' failed.", org
->name
);
4244 origin_active
= info
.exists
;
4246 if (vg_is_clustered(vg
) &&
4247 !lv_is_active_exclusive_locally(org
)) {
4248 log_error("%s must be active exclusively to"
4249 " create snapshot", org
->name
);
4255 if (!seg_is_thin_volume(lp
) && !lp
->extents
) {
4256 log_error("Unable to create new logical volume with no extents");
4260 if (seg_is_thin_pool(lp
) &&
4261 ((uint64_t)lp
->extents
* vg
->extent_size
< lp
->chunk_size
)) {
4262 log_error("Unable to create thin pool smaller than 1 chunk.");
4266 if (lp
->snapshot
&& !lp
->thin
&& ((uint64_t)lp
->extents
* vg
->extent_size
< 2 * lp
->chunk_size
)) {
4267 log_error("Unable to create a snapshot smaller than 2 chunks.");
4271 if (!seg_is_virtual(lp
) &&
4272 vg
->free_count
< lp
->extents
) {
4273 log_error("Volume group \"%s\" has insufficient free space "
4274 "(%u extents): %u required.",
4275 vg
->name
, vg
->free_count
, lp
->extents
);
4279 if (lp
->stripes
> dm_list_size(lp
->pvh
) && lp
->alloc
!= ALLOC_ANYWHERE
) {
4280 log_error("Number of stripes (%u) must not exceed "
4281 "number of physical volumes (%d)", lp
->stripes
,
4282 dm_list_size(lp
->pvh
));
4286 if (!activation() &&
4287 (seg_is_mirrored(lp
) ||
4289 seg_is_thin_pool(lp
))) {
4291 * FIXME: For thin pool add some code to allow delayed
4292 * initialization of empty thin pool volume.
4293 * i.e. using some LV flag, fake message,...
4294 * and testing for metadata pool header signature?
4296 log_error("Can't create %s without using "
4297 "device-mapper kernel driver.",
4298 segtype_is_raid(lp
->segtype
) ? lp
->segtype
->name
:
4299 segtype_is_mirrored(lp
->segtype
) ? "mirror" :
4300 "thin pool volume");
4304 /* The snapshot segment gets created later */
4305 if (lp
->snapshot
&& !lp
->thin
&&
4306 !(lp
->segtype
= get_segtype_from_string(cmd
, "striped")))
4312 if (!dm_list_empty(&lp
->tags
)) {
4313 if (!(vg
->fid
->fmt
->features
& FMT_TAGS
)) {
4314 log_error("Volume group %s does not support tags",
4320 if (seg_is_thin_volume(lp
) &&
4321 ((lp
->activate
== CHANGE_AY
) ||
4322 (lp
->activate
== CHANGE_AE
) ||
4323 (lp
->activate
== CHANGE_ALY
))) {
4324 /* Ensure all stacked messages are submitted */
4325 if (!(lvl
= find_lv_in_vg(vg
, lp
->pool
))) {
4326 log_error("Unable to find existing pool LV %s in VG %s.",
4327 lp
->pool
, vg
->name
);
4330 if (!update_pool_lv(lvl
->lv
, 1))
4334 if (segtype_is_mirrored(lp
->segtype
) || segtype_is_raid(lp
->segtype
)) {
4335 init_mirror_in_sync(lp
->nosync
);
4338 log_warn("WARNING: New %s won't be synchronised. "
4339 "Don't read what you didn't write!",
4341 status
|= LV_NOTSYNCED
;
4344 lp
->region_size
= adjusted_mirror_region_size(vg
->extent_size
,
4349 if (!(lv
= lv_create_empty(new_lv_name
? : "lvol%d", NULL
,
4350 status
, lp
->alloc
, vg
)))
4353 if (lp
->read_ahead
!= lv
->read_ahead
) {
4354 log_verbose("Setting read ahead sectors");
4355 lv
->read_ahead
= lp
->read_ahead
;
4358 if (!seg_is_thin_pool(lp
) && lp
->minor
>= 0) {
4359 lv
->major
= lp
->major
;
4360 lv
->minor
= lp
->minor
;
4361 lv
->status
|= FIXED_MINOR
;
4362 log_verbose("Setting device number to (%d, %d)", lv
->major
,
4366 dm_list_splice(&lv
->tags
, &lp
->tags
);
4368 if (!lv_extend(lv
, lp
->segtype
,
4369 lp
->stripes
, lp
->stripe_size
,
4371 seg_is_thin_pool(lp
) ? lp
->poolmetadataextents
: lp
->region_size
,
4372 seg_is_thin_volume(lp
) ? lp
->voriginextents
: lp
->extents
,
4373 seg_is_thin_volume(lp
) ? (org
? org
->name
: lp
->pool
) : NULL
, lp
->pvh
, lp
->alloc
))
4376 if (seg_is_thin_pool(lp
)) {
4377 first_seg(lv
)->zero_new_blocks
= lp
->zero
? 1 : 0;
4378 first_seg(lv
)->chunk_size
= lp
->chunk_size
;
4379 /* FIXME: use lowwatermark via lvm.conf global for all thinpools ? */
4380 first_seg(lv
)->low_water_mark
= 0;
4381 } else if (seg_is_thin_volume(lp
)) {
4382 pool_lv
= first_seg(lv
)->pool_lv
;
4384 if (!(first_seg(lv
)->device_id
=
4385 get_free_pool_device_id(first_seg(pool_lv
)))) {
4390 if (!attach_pool_message(first_seg(pool_lv
),
4391 DM_THIN_MESSAGE_CREATE_THIN
, lv
, 0, 0)) {
4397 /* FIXME Log allocation and attachment should have happened inside lv_extend. */
4398 if (lp
->log_count
&&
4399 !seg_is_raid(first_seg(lv
)) && seg_is_mirrored(first_seg(lv
))) {
4400 if (!add_mirror_log(cmd
, lv
, lp
->log_count
,
4401 first_seg(lv
)->region_size
,
4402 lp
->pvh
, lp
->alloc
)) {
4408 /* store vg on disk(s) */
4409 if (!vg_write(vg
) || !vg_commit(vg
))
4415 log_verbose("Test mode: Skipping activation and zeroing.");
4419 if (seg_is_thin(lp
)) {
4420 /* For snapshot, suspend active thin origin first */
4421 if (org
&& lv_is_active(org
)) {
4422 if (!pool_below_threshold(first_seg(first_seg(org
)->pool_lv
))) {
4423 log_error("Cannot create thin snapshot. Pool %s/%s is filled "
4424 "over the autoextend threshold.",
4425 org
->vg
->name
, first_seg(org
)->pool_lv
->name
);
4428 if (!suspend_lv_origin(cmd
, org
)) {
4429 log_error("Failed to suspend thin snapshot origin %s/%s.",
4430 org
->vg
->name
, org
->name
);
4433 if (!resume_lv_origin(cmd
, org
)) { /* deptree updates thin-pool */
4434 log_error("Failed to resume thin snapshot origin %s/%s.",
4435 org
->vg
->name
, org
->name
);
4438 /* At this point remove pool messages, snapshot is active */
4439 if (!update_pool_lv(first_seg(org
)->pool_lv
, 0)) {
4441 goto deactivate_and_revert_new_lv
;
4444 if (((lp
->activate
== CHANGE_AY
) ||
4445 (lp
->activate
== CHANGE_AE
) ||
4446 (lp
->activate
== CHANGE_ALY
))) {
4447 /* At this point send message to kernel thin mda */
4448 pool_lv
= lv_is_thin_pool(lv
) ? lv
: first_seg(lv
)->pool_lv
;
4449 if (!update_pool_lv(pool_lv
, 1)) {
4451 goto deactivate_and_revert_new_lv
;
4453 if (!activate_lv_excl(cmd
, lv
)) {
4454 log_error("Aborting. Failed to activate thin %s.",
4456 goto deactivate_and_revert_new_lv
;
4459 } else if (lp
->snapshot
) {
4460 if (!activate_lv_excl(cmd
, lv
)) {
4461 log_error("Aborting. Failed to activate snapshot "
4462 "exception store.");
4465 } else if ((lp
->activate
== CHANGE_AY
&& !activate_lv(cmd
, lv
)) ||
4466 (lp
->activate
== CHANGE_AE
&& !activate_lv_excl(cmd
, lv
)) ||
4467 (lp
->activate
== CHANGE_ALY
&& !activate_lv_local(cmd
, lv
))) {
4468 log_error("Failed to activate new LV.");
4470 goto deactivate_and_revert_new_lv
;
4474 if (!seg_is_thin(lp
) && !lp
->zero
&& !lp
->snapshot
)
4475 log_warn("WARNING: \"%s\" not zeroed", lv
->name
);
4476 else if ((!seg_is_thin(lp
) ||
4477 (lv_is_thin_volume(lv
) &&
4478 !first_seg(first_seg(lv
)->pool_lv
)->zero_new_blocks
)) &&
4479 !set_lv(cmd
, lv
, UINT64_C(0), 0)) {
4480 log_error("Aborting. Failed to wipe %s.",
4481 lp
->snapshot
? "snapshot exception store" :
4483 goto deactivate_and_revert_new_lv
;
4486 if (lp
->snapshot
&& !lp
->thin
) {
4487 /* Reset permission after zeroing */
4488 if (!(lp
->permission
& LVM_WRITE
))
4489 lv
->status
&= ~LVM_WRITE
;
4491 /* COW area must be deactivated if origin is not active */
4492 if (!origin_active
&& !deactivate_lv(cmd
, lv
)) {
4493 log_error("Aborting. Couldn't deactivate snapshot "
4494 "COW area. Manual intervention required.");
4498 /* A virtual origin must be activated explicitly. */
4499 if (lp
->voriginsize
&&
4500 (!(org
= _create_virtual_origin(cmd
, vg
, lv
->name
,
4502 lp
->voriginextents
)) ||
4503 !activate_lv_excl(cmd
, org
))) {
4504 log_error("Couldn't create virtual origin for LV %s",
4506 if (org
&& !lv_remove(org
))
4508 goto deactivate_and_revert_new_lv
;
4511 /* cow LV remains active and becomes snapshot LV */
4513 if (!vg_add_snapshot(org
, lv
, NULL
,
4514 org
->le_count
, lp
->chunk_size
)) {
4515 log_error("Couldn't create snapshot.");
4516 goto deactivate_and_revert_new_lv
;
4519 /* store vg on disk(s) */
4523 if (!suspend_lv(cmd
, org
)) {
4524 log_error("Failed to suspend origin %s", org
->name
);
4532 if (!resume_lv(cmd
, org
)) {
4533 log_error("Problem reactivating origin %s", org
->name
);
4537 /* FIXME out of sequence */
4543 deactivate_and_revert_new_lv
:
4544 if (!deactivate_lv(cmd
, lv
)) {
4545 log_error("Unable to deactivate failed new LV. "
4546 "Manual intervention required.");
4551 /* FIXME Better to revert to backup of metadata? */
4552 if (!lv_remove(lv
) || !vg_write(vg
) || !vg_commit(vg
))
4553 log_error("Manual intervention may be required to remove "
4554 "abandoned LV(s) before retrying.");
4561 int lv_create_single(struct volume_group
*vg
,
4562 struct lvcreate_params
*lp
)
4564 struct logical_volume
*lv
;
4566 /* Create thin pool first if necessary */
4567 if (lp
->create_thin_pool
) {
4568 if (!seg_is_thin_pool(lp
) &&
4569 !(lp
->segtype
= get_segtype_from_string(vg
->cmd
, "thin-pool")))
4572 if (!(lv
= _lv_create_an_lv(vg
, lp
, lp
->pool
)))
4578 lp
->pool
= lv
->name
;
4580 if (!(lp
->segtype
= get_segtype_from_string(vg
->cmd
, "thin")))
4584 if (!(lv
= _lv_create_an_lv(vg
, lp
, lp
->lv_name
)))
4588 log_print("Logical volume \"%s\" created", lv
->name
);