]> sourceware.org Git - lvm2.git/blame - lib/metadata/raid_manip.c
Fix code that performs RAID device replacement while under snapshot.
[lvm2.git] / lib / metadata / raid_manip.c
CommitLineData
4aebd52c
JEB
1/*
2 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
3 *
4 * This file is part of LVM2.
5 *
6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions
8 * of the GNU Lesser General Public License v.2.1.
9 *
10 * You should have received a copy of the GNU Lesser General Public License
11 * along with this program; if not, write to the Free Software Foundation,
12 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
13 */
14
15#include "lib.h"
16#include "metadata.h"
17#include "toolcontext.h"
18#include "segtype.h"
19#include "display.h"
20#include "archiver.h"
21#include "activate.h"
22#include "lv_alloc.h"
23#include "lvm-string.h"
24#include "str_list.h"
25#include "memlock.h"
26
d3582e02
JEB
27#define RAID_REGION_SIZE 1024
28
97110574
JEB
29static int _lv_is_raid_with_tracking(const struct logical_volume *lv,
30 struct logical_volume **tracking)
a927e401
JEB
31{
32 uint32_t s;
33 struct lv_segment *seg;
34
97110574
JEB
35 *tracking = NULL;
36 seg = first_seg(lv);
a927e401 37
97110574
JEB
38 if (!(lv->status & RAID))
39 return 0;
40
41 for (s = 0; s < seg->area_count; s++)
42 if (lv_is_visible(seg_lv(seg, s)) &&
43 !(seg_lv(seg, s)->status & LVM_WRITE))
44 *tracking = seg_lv(seg, s);
45
46
47 return *tracking ? 1 : 0;
48}
49
50int lv_is_raid_with_tracking(const struct logical_volume *lv)
51{
52 struct logical_volume *tracking;
53
54 return _lv_is_raid_with_tracking(lv, &tracking);
a927e401
JEB
55}
56
4aebd52c
JEB
57uint32_t lv_raid_image_count(const struct logical_volume *lv)
58{
59 struct lv_segment *seg = first_seg(lv);
60
61 if (!seg_is_raid(seg))
62 return 1;
63
64 return seg->area_count;
65}
66
187486c7
JEB
67/*
68 * Resume sub-LVs first, then top-level LV
69 */
70static int _bottom_up_resume(struct logical_volume *lv)
71{
72 uint32_t s;
73 struct lv_segment *seg = first_seg(lv);
74
75 if (seg_is_raid(seg) && (seg->area_count > 1)) {
76 for (s = 0; s < seg->area_count; s++)
77 if (!resume_lv(lv->vg->cmd, seg_lv(seg, s)) ||
78 !resume_lv(lv->vg->cmd, seg_metalv(seg, s)))
79 return_0;
80 }
81
82 return resume_lv(lv->vg->cmd, lv);
83}
84
6d04311e
JEB
85static int _activate_sublv_preserving_excl(struct logical_volume *top_lv,
86 struct logical_volume *sub_lv)
87{
88 struct cmd_context *cmd = top_lv->vg->cmd;
89
90 /* If top RAID was EX, use EX */
91 if (lv_is_active_exclusive_locally(top_lv)) {
92 if (!activate_lv_excl(cmd, sub_lv))
93 return_0;
94 } else {
95 if (!activate_lv(cmd, sub_lv))
96 return_0;
97 }
98 return 1;
99}
100
4aebd52c 101/*
3250b385 102 * _lv_is_on_pv
4aebd52c
JEB
103 * @lv:
104 * @pv:
105 *
106 * If any of the component devices of the LV are on the given PV, 1
107 * is returned; otherwise 0. For example if one of the images of a RAID
108 * (or its metadata device) is on the PV, 1 would be returned for the
109 * top-level LV.
110 * If you wish to check the images themselves, you should pass them.
111 *
112 * FIXME: This should be made more generic, possibly use 'for_each_sub_lv',
113 * and be put in lv_manip.c. 'for_each_sub_lv' does not yet allow us to
114 * short-circuit execution or pass back the values we need yet though...
115 */
3250b385 116static int _lv_is_on_pv(struct logical_volume *lv, struct physical_volume *pv)
4aebd52c
JEB
117{
118 uint32_t s;
119 struct physical_volume *pv2;
120 struct lv_segment *seg;
121
122 if (!lv)
123 return 0;
124
125 seg = first_seg(lv);
126 if (!seg)
127 return 0;
128
129 /* Check mirror log */
3250b385 130 if (_lv_is_on_pv(seg->log_lv, pv))
4aebd52c
JEB
131 return 1;
132
133 /* Check stack of LVs */
134 dm_list_iterate_items(seg, &lv->segments) {
135 for (s = 0; s < seg->area_count; s++) {
136 if (seg_type(seg, s) == AREA_PV) {
137 pv2 = seg_pv(seg, s);
138 if (id_equal(&pv->id, &pv2->id))
139 return 1;
140 if (pv->dev && pv2->dev &&
141 (pv->dev->dev == pv2->dev->dev))
142 return 1;
143 }
144
145 if ((seg_type(seg, s) == AREA_LV) &&
3250b385 146 _lv_is_on_pv(seg_lv(seg, s), pv))
4aebd52c
JEB
147 return 1;
148
149 if (!seg_is_raid(seg))
150 continue;
151
152 /* This is RAID, so we know the meta_area is AREA_LV */
3250b385 153 if (_lv_is_on_pv(seg_metalv(seg, s), pv))
4aebd52c
JEB
154 return 1;
155 }
156 }
157
158 return 0;
159}
160
3250b385 161static int _lv_is_on_pvs(struct logical_volume *lv, struct dm_list *pvs)
4aebd52c
JEB
162{
163 struct pv_list *pvl;
164
165 dm_list_iterate_items(pvl, pvs)
3250b385 166 if (_lv_is_on_pv(lv, pvl->pv)) {
4aebd52c
JEB
167 log_debug("%s is on %s", lv->name,
168 pv_dev_name(pvl->pv));
169 return 1;
170 } else
171 log_debug("%s is not on %s", lv->name,
172 pv_dev_name(pvl->pv));
173 return 0;
174}
175
d3582e02
JEB
176static int _get_pv_list_for_lv(struct logical_volume *lv, struct dm_list *pvs)
177{
178 uint32_t s;
179 struct pv_list *pvl;
180 struct lv_segment *seg = first_seg(lv);
181
182 if (!seg_is_linear(seg)) {
183 log_error(INTERNAL_ERROR
184 "_get_pv_list_for_lv only handles linear volumes");
185 return 0;
186 }
187
188 log_debug("Getting list of PVs that %s/%s is on:",
189 lv->vg->name, lv->name);
190
191 dm_list_iterate_items(seg, &lv->segments) {
192 for (s = 0; s < seg->area_count; s++) {
193 if (seg_type(seg, s) != AREA_PV) {
194 log_error(INTERNAL_ERROR
195 "Linear seg_type should be AREA_PV");
196 return 0;
197 }
198
199 if (!(pvl = dm_pool_zalloc(lv->vg->cmd->mem,
200 sizeof(*pvl)))) {
201 log_error("Failed to allocate memory");
202 return 0;
203 }
204
205 pvl->pv = seg_pv(seg, s);
206 log_debug(" %s/%s is on %s", lv->vg->name, lv->name,
207 pv_dev_name(pvl->pv));
208 dm_list_add(pvs, &pvl->list);
209 }
210 }
211
212 return 1;
213}
214
f60175c3
JEB
215/*
216 * _raid_in_sync
217 * @lv
218 *
219 * _raid_in_sync works for all types of RAID segtypes, as well
220 * as 'mirror' segtype. (This is because 'lv_raid_percent' is
221 * simply a wrapper around 'lv_mirror_percent'.
222 *
223 * Returns: 1 if in-sync, 0 otherwise.
224 */
3250b385 225static int _raid_in_sync(struct logical_volume *lv)
4aebd52c
JEB
226{
227 percent_t sync_percent;
228
229 if (!lv_raid_percent(lv, &sync_percent)) {
230 log_error("Unable to determine sync status of %s/%s.",
231 lv->vg->name, lv->name);
232 return 0;
233 }
234
235 return (sync_percent == PERCENT_100) ? 1 : 0;
236}
237
4903b85d 238/*
3250b385 239 * _raid_remove_top_layer
4903b85d
JEB
240 * @lv
241 * @removal_list
242 *
243 * Remove top layer of RAID LV in order to convert to linear.
244 * This function makes no on-disk changes. The residual LVs
245 * returned in 'removal_list' must be freed by the caller.
246 *
247 * Returns: 1 on succes, 0 on failure
248 */
3250b385
AK
249static int _raid_remove_top_layer(struct logical_volume *lv,
250 struct dm_list *removal_list)
4903b85d
JEB
251{
252 struct lv_list *lvl_array, *lvl;
253 struct lv_segment *seg = first_seg(lv);
254
255 if (!seg_is_mirrored(seg)) {
256 log_error(INTERNAL_ERROR
257 "Unable to remove RAID layer from segment type %s",
258 seg->segtype->name);
259 return 0;
260 }
261
262 if (seg->area_count != 1) {
263 log_error(INTERNAL_ERROR
264 "Unable to remove RAID layer when there"
265 " is more than one sub-lv");
266 return 0;
267 }
268
269 lvl_array = dm_pool_alloc(lv->vg->vgmem, 2 * sizeof(*lvl));
270 if (!lvl_array) {
271 log_error("Memory allocation failed.");
272 return 0;
273 }
274
275 /* Add last metadata area to removal_list */
276 lvl_array[0].lv = seg_metalv(seg, 0);
277 lv_set_visible(seg_metalv(seg, 0));
278 remove_seg_from_segs_using_this_lv(seg_metalv(seg, 0), seg);
279 seg_metatype(seg, 0) = AREA_UNASSIGNED;
280 dm_list_add(removal_list, &(lvl_array[0].list));
281
282 /* Remove RAID layer and add residual LV to removal_list*/
283 seg_lv(seg, 0)->status &= ~RAID_IMAGE;
284 lv_set_visible(seg_lv(seg, 0));
285 lvl_array[1].lv = seg_lv(seg, 0);
286 dm_list_add(removal_list, &(lvl_array[1].list));
287
288 if (!remove_layer_from_lv(lv, seg_lv(seg, 0)))
289 return_0;
290
291 lv->status &= ~(MIRRORED | RAID);
292 return 1;
293}
294
f439e65b 295/*
3250b385 296 * _clear_lv
f439e65b
JEB
297 * @lv
298 *
299 * If LV is active:
300 * clear first block of device
301 * otherwise:
302 * activate, clear, deactivate
303 *
304 * Returns: 1 on success, 0 on failure
305 */
3250b385 306static int _clear_lv(struct logical_volume *lv)
f439e65b
JEB
307{
308 int was_active = lv_is_active(lv);
309
310 if (!was_active && !activate_lv(lv->vg->cmd, lv)) {
311 log_error("Failed to activate %s for clearing",
312 lv->name);
313 return 0;
314 }
315
316 log_verbose("Clearing metadata area of %s/%s",
317 lv->vg->name, lv->name);
318 /*
319 * Rather than wiping lv->size, we can simply
1d64dcfb 320 * wipe the first sector to remove the superblock of any previous
f439e65b
JEB
321 * RAID devices. It is much quicker.
322 */
323 if (!set_lv(lv->vg->cmd, lv, 1, 0)) {
324 log_error("Failed to zero %s", lv->name);
325 return 0;
326 }
327
328 if (!was_active && !deactivate_lv(lv->vg->cmd, lv)) {
329 log_error("Failed to deactivate %s", lv->name);
330 return 0;
331 }
332
333 return 1;
334}
335
336/* Makes on-disk metadata changes */
3250b385 337static int _clear_lvs(struct dm_list *lv_list)
f439e65b
JEB
338{
339 struct lv_list *lvl;
340 struct volume_group *vg = NULL;
341
342 if (dm_list_empty(lv_list)) {
343 log_debug(INTERNAL_ERROR "Empty list of LVs given for clearing");
344 return 1;
345 }
346
347 dm_list_iterate_items(lvl, lv_list) {
348 if (!lv_is_visible(lvl->lv)) {
349 log_error(INTERNAL_ERROR
350 "LVs must be set visible before clearing");
351 return 0;
352 }
353 vg = lvl->lv->vg;
354 }
355
356 /*
357 * FIXME: only vg_[write|commit] if LVs are not already written
358 * as visible in the LVM metadata (which is never the case yet).
359 */
360 if (!vg || !vg_write(vg) || !vg_commit(vg))
361 return_0;
362
363 dm_list_iterate_items(lvl, lv_list)
3250b385 364 if (!_clear_lv(lvl->lv))
f439e65b
JEB
365 return 0;
366
367 return 1;
368}
369
4aebd52c
JEB
370/*
371 * _shift_and_rename_image_components
372 * @seg: Top-level RAID segment
373 *
374 * Shift all higher indexed segment areas down to fill in gaps where
375 * there are 'AREA_UNASSIGNED' areas and rename data/metadata LVs so
376 * that their names match their new index. When finished, set
377 * seg->area_count to new reduced total.
378 *
379 * Returns: 1 on success, 0 on failure
380 */
381static int _shift_and_rename_image_components(struct lv_segment *seg)
382{
383 int len;
384 char *shift_name;
385 uint32_t s, missing;
386 struct cmd_context *cmd = seg->lv->vg->cmd;
387
388 /*
389 * All LVs must be properly named for their index before
390 * shifting begins. (e.g. Index '0' must contain *_rimage_0 and
391 * *_rmeta_0. Index 'n' must contain *_rimage_n and *_rmeta_n.)
392 */
393
394 if (!seg_is_raid(seg))
395 return_0;
396
397 if (seg->area_count > 10) {
398 /*
399 * FIXME: Handling more would mean I'd have
400 * to handle double digits
401 */
402 log_error("Unable handle arrays with more than 10 devices");
403 return 0;
404 }
405
406 log_very_verbose("Shifting images in %s", seg->lv->name);
407
408 for (s = 0, missing = 0; s < seg->area_count; s++) {
409 if (seg_type(seg, s) == AREA_UNASSIGNED) {
410 if (seg_metatype(seg, s) != AREA_UNASSIGNED) {
411 log_error(INTERNAL_ERROR "Metadata segment area"
412 " #%d should be AREA_UNASSIGNED", s);
413 return 0;
414 }
415 missing++;
416 continue;
417 }
418 if (!missing)
419 continue;
420
421 log_very_verbose("Shifting %s and %s by %u",
422 seg_metalv(seg, s)->name,
423 seg_lv(seg, s)->name, missing);
424
425 /* Alter rmeta name */
426 shift_name = dm_pool_strdup(cmd->mem, seg_metalv(seg, s)->name);
b2fa9b43
JEB
427 if (!shift_name) {
428 log_error("Memory allocation failed.");
429 return 0;
430 }
4aebd52c
JEB
431 len = strlen(shift_name) - 1;
432 shift_name[len] -= missing;
433 seg_metalv(seg, s)->name = shift_name;
434
435 /* Alter rimage name */
436 shift_name = dm_pool_strdup(cmd->mem, seg_lv(seg, s)->name);
b2fa9b43
JEB
437 if (!shift_name) {
438 log_error("Memory allocation failed.");
439 return 0;
440 }
4aebd52c
JEB
441 len = strlen(shift_name) - 1;
442 shift_name[len] -= missing;
443 seg_lv(seg, s)->name = shift_name;
444
445 seg->areas[s - missing] = seg->areas[s];
446 seg->meta_areas[s - missing] = seg->meta_areas[s];
447 }
448
449 seg->area_count -= missing;
450 return 1;
451}
452
f439e65b
JEB
453/*
454 * Create an LV of specified type. Set visible after creation.
455 * This function does not make metadata changes.
456 */
457static int _alloc_image_component(struct logical_volume *lv,
f60175c3 458 const char *alt_base_name,
f439e65b 459 struct alloc_handle *ah, uint32_t first_area,
2ef5b7cc 460 uint64_t type, struct logical_volume **new_lv)
f439e65b
JEB
461{
462 uint64_t status;
463 size_t len = strlen(lv->name) + 32;
464 char img_name[len];
f60175c3 465 const char *base_name = (alt_base_name) ? alt_base_name : lv->name;
f439e65b
JEB
466 struct logical_volume *tmp_lv;
467 const struct segment_type *segtype;
468
469 if (type == RAID_META) {
f60175c3 470 if (dm_snprintf(img_name, len, "%s_rmeta_%%d", base_name) < 0)
f439e65b
JEB
471 return_0;
472 } else if (type == RAID_IMAGE) {
f60175c3 473 if (dm_snprintf(img_name, len, "%s_rimage_%%d", base_name) < 0)
f439e65b
JEB
474 return_0;
475 } else {
476 log_error(INTERNAL_ERROR
477 "Bad type provided to _alloc_raid_component");
478 return 0;
479 }
480
481 if (!ah) {
482 first_area = 0;
483 log_error(INTERNAL_ERROR
484 "Stand-alone %s area allocation not implemented",
485 (type == RAID_META) ? "metadata" : "data");
486 return 0;
487 }
488
0c506d9a 489 status = LVM_READ | LVM_WRITE | LV_REBUILD | type;
f439e65b
JEB
490 tmp_lv = lv_create_empty(img_name, NULL, status, ALLOC_INHERIT, lv->vg);
491 if (!tmp_lv) {
492 log_error("Failed to allocate new raid component, %s", img_name);
493 return 0;
494 }
495
496 segtype = get_segtype_from_string(lv->vg->cmd, "striped");
497 if (!lv_add_segment(ah, first_area, 1, tmp_lv, segtype, 0, status, 0)) {
498 log_error("Failed to add segment to LV, %s", img_name);
499 return 0;
500 }
501
502 lv_set_visible(tmp_lv);
503 *new_lv = tmp_lv;
504 return 1;
505}
506
507static int _alloc_image_components(struct logical_volume *lv,
508 struct dm_list *pvs, uint32_t count,
509 struct dm_list *new_meta_lvs,
510 struct dm_list *new_data_lvs)
511{
512 uint32_t s;
d3582e02 513 uint32_t region_size;
9bdfb307 514 uint32_t extents;
f439e65b 515 struct lv_segment *seg = first_seg(lv);
d3582e02 516 const struct segment_type *segtype;
f439e65b
JEB
517 struct alloc_handle *ah;
518 struct dm_list *parallel_areas;
519 struct logical_volume *tmp_lv;
520 struct lv_list *lvl_array;
521
522 lvl_array = dm_pool_alloc(lv->vg->vgmem,
523 sizeof(*lvl_array) * count * 2);
524 if (!lvl_array)
525 return_0;
526
527 if (!(parallel_areas = build_parallel_areas_from_lv(lv, 0)))
528 return_0;
529
d3582e02
JEB
530 if (seg_is_linear(seg))
531 region_size = RAID_REGION_SIZE;
532 else
533 region_size = seg->region_size;
534
535 if (seg_is_raid(seg))
536 segtype = seg->segtype;
537 else if (!(segtype = get_segtype_from_string(lv->vg->cmd, "raid1")))
538 return_0;
539
870762d8
JEB
540 /*
541 * The number of extents is based on the RAID type. For RAID1,
542 * each of the rimages is the same size - 'le_count'. However
543 * for RAID 4/5/6, the stripes add together (NOT including the parity
544 * devices) to equal 'le_count'. Thus, when we are allocating
545 * individual devies, we must specify how large the individual device
546 * is along with the number we want ('count').
547 */
9bdfb307
JEB
548 extents = (segtype->parity_devs) ?
549 (lv->le_count / (seg->area_count - segtype->parity_devs)) :
550 lv->le_count;
870762d8 551
d3582e02 552 if (!(ah = allocate_extents(lv->vg, NULL, segtype, 0, count, count,
9bdfb307 553 region_size, extents, pvs,
f439e65b
JEB
554 lv->alloc, parallel_areas)))
555 return_0;
556
557 for (s = 0; s < count; s++) {
558 /*
559 * The allocation areas are grouped together. First
560 * come the rimage allocated areas, then come the metadata
561 * allocated areas. Thus, the metadata areas are pulled
562 * from 's + count'.
563 */
f60175c3 564 if (!_alloc_image_component(lv, NULL, ah, s + count,
f439e65b
JEB
565 RAID_META, &tmp_lv))
566 return_0;
567 lvl_array[s + count].lv = tmp_lv;
568 dm_list_add(new_meta_lvs, &(lvl_array[s + count].list));
569
f60175c3
JEB
570 if (!_alloc_image_component(lv, NULL, ah, s,
571 RAID_IMAGE, &tmp_lv))
f439e65b
JEB
572 return_0;
573 lvl_array[s].lv = tmp_lv;
574 dm_list_add(new_data_lvs, &(lvl_array[s].list));
575 }
576 alloc_destroy(ah);
577 return 1;
578}
579
d3582e02
JEB
580/*
581 * _alloc_rmeta_for_lv
582 * @lv
583 *
584 * Allocate a RAID metadata device for the given LV (which is or will
585 * be the associated RAID data device). The new metadata device must
586 * be allocated from the same PV(s) as the data device.
587 */
588static int _alloc_rmeta_for_lv(struct logical_volume *data_lv,
589 struct logical_volume **meta_lv)
590{
591 struct dm_list allocatable_pvs;
592 struct alloc_handle *ah;
593 struct lv_segment *seg = first_seg(data_lv);
f60175c3 594 char *p, base_name[strlen(data_lv->name) + 1];
d3582e02
JEB
595
596 dm_list_init(&allocatable_pvs);
597
598 if (!seg_is_linear(seg)) {
599 log_error(INTERNAL_ERROR "Unable to allocate RAID metadata "
600 "area for non-linear LV, %s", data_lv->name);
601 return 0;
602 }
603
f60175c3
JEB
604 sprintf(base_name, "%s", data_lv->name);
605 if ((p = strstr(base_name, "_mimage_")))
606 *p = '\0';
d3582e02
JEB
607
608 if (!_get_pv_list_for_lv(data_lv, &allocatable_pvs)) {
609 log_error("Failed to build list of PVs for %s/%s",
610 data_lv->vg->name, data_lv->name);
611 return 0;
612 }
613
614 if (!(ah = allocate_extents(data_lv->vg, NULL, seg->segtype, 0, 1, 0,
615 seg->region_size,
616 1 /*RAID_METADATA_AREA_LEN*/,
617 &allocatable_pvs, data_lv->alloc, NULL)))
618 return_0;
619
f60175c3
JEB
620 if (!_alloc_image_component(data_lv, base_name, ah, 0,
621 RAID_META, meta_lv))
d3582e02
JEB
622 return_0;
623
624 alloc_destroy(ah);
625 return 1;
626}
627
3250b385
AK
628static int _raid_add_images(struct logical_volume *lv,
629 uint32_t new_count, struct dm_list *pvs)
4aebd52c 630{
0c506d9a 631 int rebuild_flag_cleared = 0;
f439e65b
JEB
632 uint32_t s;
633 uint32_t old_count = lv_raid_image_count(lv);
634 uint32_t count = new_count - old_count;
d3582e02 635 uint64_t status_mask = -1;
f439e65b
JEB
636 struct cmd_context *cmd = lv->vg->cmd;
637 struct lv_segment *seg = first_seg(lv);
638 struct dm_list meta_lvs, data_lvs;
639 struct lv_list *lvl;
640 struct lv_segment_area *new_areas;
4aebd52c 641
f439e65b
JEB
642 dm_list_init(&meta_lvs); /* For image addition */
643 dm_list_init(&data_lvs); /* For image addition */
644
d3582e02
JEB
645 /*
646 * If the segtype is linear, then we must allocate a metadata
647 * LV to accompany it.
648 */
649 if (seg_is_linear(seg)) {
650 /* A complete resync will be done, no need to mark each sub-lv */
0c506d9a 651 status_mask = ~(LV_REBUILD);
d3582e02
JEB
652
653 if (!(lvl = dm_pool_alloc(lv->vg->vgmem, sizeof(*lvl)))) {
654 log_error("Memory allocation failed");
655 return 0;
656 }
657
658 if (!_alloc_rmeta_for_lv(lv, &lvl->lv))
659 return_0;
660
661 dm_list_add(&meta_lvs, &lvl->list);
662 } else if (!seg_is_raid(seg)) {
f439e65b
JEB
663 log_error("Unable to add RAID images to %s of segment type %s",
664 lv->name, seg->segtype->name);
665 return 0;
666 }
667
668 if (!_alloc_image_components(lv, pvs, count, &meta_lvs, &data_lvs)) {
669 log_error("Failed to allocate new image components");
670 return 0;
671 }
672
d3582e02
JEB
673 /*
674 * If linear, we must correct data LV names. They are off-by-one
675 * because the linear volume hasn't taken its proper name of "_rimage_0"
676 * yet. This action must be done before '_clear_lvs' because it
677 * commits the LVM metadata before clearing the LVs.
678 */
679 if (seg_is_linear(seg)) {
680 char *name;
681 size_t len;
682 struct dm_list *l;
683 struct lv_list *lvl_tmp;
684
685 dm_list_iterate(l, &data_lvs) {
686 if (l == dm_list_last(&data_lvs)) {
687 lvl = dm_list_item(l, struct lv_list);
688 len = strlen(lv->name) + strlen("_rimage_XXX");
cbe6bcd5
ZK
689 if (!(name = dm_pool_alloc(lv->vg->vgmem, len))) {
690 log_error("Failed to allocate rimage name.");
691 return 0;
692 }
d3582e02
JEB
693 sprintf(name, "%s_rimage_%u", lv->name, count);
694 lvl->lv->name = name;
695 continue;
696 }
697 lvl = dm_list_item(l, struct lv_list);
698 lvl_tmp = dm_list_item(l->n, struct lv_list);
699 lvl->lv->name = lvl_tmp->lv->name;
700 }
701 }
702
f439e65b 703 /* Metadata LVs must be cleared before being added to the array */
3250b385 704 if (!_clear_lvs(&meta_lvs))
f439e65b
JEB
705 goto fail;
706
d3582e02
JEB
707 if (seg_is_linear(seg)) {
708 first_seg(lv)->status |= RAID_IMAGE;
709 if (!insert_layer_for_lv(lv->vg->cmd, lv,
710 RAID | LVM_READ | LVM_WRITE,
711 "_rimage_0"))
712 return_0;
713
714 lv->status |= RAID;
715 seg = first_seg(lv);
716 seg_lv(seg, 0)->status |= RAID_IMAGE | LVM_READ | LVM_WRITE;
717 seg->region_size = RAID_REGION_SIZE;
718 seg->segtype = get_segtype_from_string(lv->vg->cmd, "raid1");
719 if (!seg->segtype)
720 return_0;
721 }
f439e65b
JEB
722/*
723FIXME: It would be proper to activate the new LVs here, instead of having
724them activated by the suspend. However, this causes residual device nodes
725to be left for these sub-lvs.
726 dm_list_iterate_items(lvl, &meta_lvs)
727 if (!do_correct_activate(lv, lvl->lv))
728 return_0;
729 dm_list_iterate_items(lvl, &data_lvs)
730 if (!do_correct_activate(lv, lvl->lv))
731 return_0;
732*/
733 /* Expand areas array */
734 if (!(new_areas = dm_pool_zalloc(lv->vg->cmd->mem,
735 new_count * sizeof(*new_areas))))
736 goto fail;
737 memcpy(new_areas, seg->areas, seg->area_count * sizeof(*seg->areas));
738 seg->areas = new_areas;
f439e65b
JEB
739
740 /* Expand meta_areas array */
741 if (!(new_areas = dm_pool_zalloc(lv->vg->cmd->mem,
742 new_count * sizeof(*new_areas))))
743 goto fail;
d3582e02
JEB
744 if (seg->meta_areas)
745 memcpy(new_areas, seg->meta_areas,
746 seg->area_count * sizeof(*seg->meta_areas));
f439e65b 747 seg->meta_areas = new_areas;
40c85cf1 748 seg->area_count = new_count;
f439e65b 749
d3582e02
JEB
750 /* Add extra meta area when converting from linear */
751 s = (old_count == 1) ? 0 : old_count;
752
f439e65b 753 /* Set segment areas for metadata sub_lvs */
f439e65b
JEB
754 dm_list_iterate_items(lvl, &meta_lvs) {
755 log_debug("Adding %s to %s",
756 lvl->lv->name, lv->name);
d3582e02
JEB
757 lvl->lv->status &= status_mask;
758 first_seg(lvl->lv)->status &= status_mask;
f439e65b
JEB
759 if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0,
760 lvl->lv->status)) {
761 log_error("Failed to add %s to %s",
762 lvl->lv->name, lv->name);
763 goto fail;
764 }
765 s++;
766 }
767
f439e65b 768 s = old_count;
d3582e02
JEB
769
770 /* Set segment areas for data sub_lvs */
f439e65b
JEB
771 dm_list_iterate_items(lvl, &data_lvs) {
772 log_debug("Adding %s to %s",
773 lvl->lv->name, lv->name);
d3582e02
JEB
774 lvl->lv->status &= status_mask;
775 first_seg(lvl->lv)->status &= status_mask;
f439e65b
JEB
776 if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0,
777 lvl->lv->status)) {
778 log_error("Failed to add %s to %s",
779 lvl->lv->name, lv->name);
780 goto fail;
781 }
782 s++;
783 }
784
785 /*
786 * FIXME: Failure handling during these points is harder.
787 */
788 dm_list_iterate_items(lvl, &meta_lvs)
789 lv_set_hidden(lvl->lv);
790 dm_list_iterate_items(lvl, &data_lvs)
791 lv_set_hidden(lvl->lv);
792
793 if (!vg_write(lv->vg)) {
794 log_error("Failed to write changes to %s in %s",
795 lv->name, lv->vg->name);
796 return 0;
797 }
798
6cf32747 799 if (!suspend_lv_origin(cmd, lv)) {
f439e65b
JEB
800 log_error("Failed to suspend %s/%s before committing changes",
801 lv->vg->name, lv->name);
802 return 0;
803 }
804
805 if (!vg_commit(lv->vg)) {
806 log_error("Failed to commit changes to %s in %s",
807 lv->name, lv->vg->name);
808 return 0;
809 }
810
6cf32747 811 if (!resume_lv_origin(cmd, lv)) {
f439e65b
JEB
812 log_error("Failed to resume %s/%s after committing changes",
813 lv->vg->name, lv->name);
814 return 0;
815 }
816
0c506d9a
JEB
817 /*
818 * Now that the 'REBUILD' has made its way to the kernel, we must
819 * remove the flag so that the individual devices are not rebuilt
820 * upon every activation.
821 */
822 seg = first_seg(lv);
823 for (s = 0; s < seg->area_count; s++) {
824 if ((seg_lv(seg, s)->status & LV_REBUILD) ||
825 (seg_metalv(seg, s)->status & LV_REBUILD)) {
826 seg_metalv(seg, s)->status &= ~LV_REBUILD;
827 seg_lv(seg, s)->status &= ~LV_REBUILD;
828 rebuild_flag_cleared = 1;
829 }
830 }
831 if (rebuild_flag_cleared &&
832 (!vg_write(lv->vg) || !vg_commit(lv->vg))) {
833 log_error("Failed to clear REBUILD flag for %s/%s components",
834 lv->vg->name, lv->name);
835 return 0;
836 }
837
f439e65b
JEB
838 return 1;
839
840fail:
1d64dcfb 841 /* Cleanly remove newly-allocated LVs that failed insertion attempt */
f439e65b
JEB
842
843 dm_list_iterate_items(lvl, &meta_lvs)
844 if (!lv_remove(lvl->lv))
845 return_0;
846 dm_list_iterate_items(lvl, &data_lvs)
847 if (!lv_remove(lvl->lv))
848 return_0;
849 return_0;
4aebd52c
JEB
850}
851
852/*
853 * _extract_image_components
854 * @seg
855 * @idx: The index in the areas array to remove
856 * @extracted_rmeta: The displaced metadata LV
857 * @extracted_rimage: The displaced data LV
858 *
859 * This function extracts the image components - setting the respective
860 * 'extracted' pointers. It appends '_extracted' to the LVs' names, so that
861 * there are not future conflicts. It does /not/ commit the results.
862 * (IOW, erroring-out requires no unwinding of operations.)
863 *
864 * This function does /not/ attempt to:
865 * 1) shift the 'areas' or 'meta_areas' arrays.
866 * The '[meta_]areas' are left as AREA_UNASSIGNED.
867 * 2) Adjust the seg->area_count
868 * 3) Name the extracted LVs appropriately (appends '_extracted' to names)
869 * These actions must be performed by the caller.
870 *
871 * Returns: 1 on success, 0 on failure
872 */
873static int _extract_image_components(struct lv_segment *seg, uint32_t idx,
874 struct logical_volume **extracted_rmeta,
875 struct logical_volume **extracted_rimage)
876{
877 int len;
878 char *tmp_name;
a22515c8 879 struct volume_group *vg = seg->lv->vg;
4aebd52c
JEB
880 struct logical_volume *data_lv = seg_lv(seg, idx);
881 struct logical_volume *meta_lv = seg_metalv(seg, idx);
882
883 log_very_verbose("Extracting image components %s and %s from %s",
884 data_lv->name, meta_lv->name, seg->lv->name);
885
886 data_lv->status &= ~RAID_IMAGE;
887 meta_lv->status &= ~RAID_META;
888 lv_set_visible(data_lv);
889 lv_set_visible(meta_lv);
890
891 /* release removes data and meta areas */
892 remove_seg_from_segs_using_this_lv(data_lv, seg);
893 remove_seg_from_segs_using_this_lv(meta_lv, seg);
894
895 seg_type(seg, idx) = AREA_UNASSIGNED;
896 seg_metatype(seg, idx) = AREA_UNASSIGNED;
897
898 len = strlen(meta_lv->name) + strlen("_extracted") + 1;
a22515c8 899 tmp_name = dm_pool_alloc(vg->vgmem, len);
4aebd52c
JEB
900 if (!tmp_name)
901 return_0;
902 sprintf(tmp_name, "%s_extracted", meta_lv->name);
903 meta_lv->name = tmp_name;
904
905 len = strlen(data_lv->name) + strlen("_extracted") + 1;
a22515c8 906 tmp_name = dm_pool_alloc(vg->vgmem, len);
4aebd52c
JEB
907 if (!tmp_name)
908 return_0;
909 sprintf(tmp_name, "%s_extracted", data_lv->name);
910 data_lv->name = tmp_name;
911
912 *extracted_rmeta = meta_lv;
913 *extracted_rimage = data_lv;
914
915 return 1;
916}
917
918/*
3250b385 919 * _raid_extract_images
4aebd52c
JEB
920 * @lv
921 * @new_count: The absolute count of images (e.g. '2' for a 2-way mirror)
922 * @target_pvs: The list of PVs that are candidates for removal
923 * @shift: If set, use _shift_and_rename_image_components().
924 * Otherwise, leave the [meta_]areas as AREA_UNASSIGNED and
925 * seg->area_count unchanged.
926 * @extracted_[meta|data]_lvs: The LVs removed from the array. If 'shift'
927 * is set, then there will likely be name conflicts.
928 *
929 * This function extracts _both_ portions of the indexed image. It
930 * does /not/ commit the results. (IOW, erroring-out requires no unwinding
931 * of operations.)
932 *
933 * Returns: 1 on success, 0 on failure
934 */
3250b385
AK
935static int _raid_extract_images(struct logical_volume *lv, uint32_t new_count,
936 struct dm_list *target_pvs, int shift,
937 struct dm_list *extracted_meta_lvs,
938 struct dm_list *extracted_data_lvs)
4aebd52c
JEB
939{
940 int s, extract, lvl_idx = 0;
941 struct lv_list *lvl_array;
942 struct lv_segment *seg = first_seg(lv);
943 struct logical_volume *rmeta_lv, *rimage_lv;
944
945 extract = seg->area_count - new_count;
946 log_verbose("Extracting %u %s from %s/%s", extract,
947 (extract > 1) ? "images" : "image",
948 lv->vg->name, lv->name);
949
f439e65b 950 lvl_array = dm_pool_alloc(lv->vg->vgmem,
4aebd52c
JEB
951 sizeof(*lvl_array) * extract * 2);
952 if (!lvl_array)
953 return_0;
954
955 for (s = seg->area_count - 1; (s >= 0) && extract; s--) {
3250b385
AK
956 if (!_lv_is_on_pvs(seg_lv(seg, s), target_pvs) ||
957 !_lv_is_on_pvs(seg_metalv(seg, s), target_pvs))
4aebd52c 958 continue;
3250b385 959 if (!_raid_in_sync(lv) &&
4aebd52c
JEB
960 (!seg_is_mirrored(seg) || (s == 0))) {
961 log_error("Unable to extract %sRAID image"
962 " while RAID array is not in-sync",
963 seg_is_mirrored(seg) ? "primary " : "");
964 return 0;
965 }
966
967 if (!_extract_image_components(seg, s, &rmeta_lv, &rimage_lv)) {
968 log_error("Failed to extract %s from %s",
969 seg_lv(seg, s)->name, lv->name);
970 return 0;
971 }
972
973 if (shift && !_shift_and_rename_image_components(seg)) {
974 log_error("Failed to shift and rename image components");
975 return 0;
976 }
977
978 lvl_array[lvl_idx].lv = rmeta_lv;
979 lvl_array[lvl_idx + 1].lv = rimage_lv;
980 dm_list_add(extracted_meta_lvs, &(lvl_array[lvl_idx++].list));
981 dm_list_add(extracted_data_lvs, &(lvl_array[lvl_idx++].list));
982
983 extract--;
984 }
985 if (extract) {
986 log_error("Unable to extract enough images to satisfy request");
987 return 0;
988 }
989
990 return 1;
991}
992
3250b385
AK
993static int _raid_remove_images(struct logical_volume *lv,
994 uint32_t new_count, struct dm_list *pvs)
4aebd52c 995{
4aebd52c 996 struct dm_list removal_list;
4903b85d 997 struct lv_list *lvl;
4aebd52c
JEB
998
999 dm_list_init(&removal_list);
1000
3250b385 1001 if (!_raid_extract_images(lv, new_count, pvs, 1,
f439e65b
JEB
1002 &removal_list, &removal_list)) {
1003 log_error("Failed to extract images from %s/%s",
1004 lv->vg->name, lv->name);
4aebd52c
JEB
1005 return 0;
1006 }
1007
4aebd52c 1008 /* Convert to linear? */
3250b385 1009 if ((new_count == 1) && !_raid_remove_top_layer(lv, &removal_list)) {
4903b85d
JEB
1010 log_error("Failed to remove RAID layer after linear conversion");
1011 return 0;
4aebd52c
JEB
1012 }
1013
1014 if (!vg_write(lv->vg)) {
1015 log_error("Failed to write changes to %s in %s",
1016 lv->name, lv->vg->name);
1017 return 0;
1018 }
1019
1020 if (!suspend_lv(lv->vg->cmd, lv)) {
1021 log_error("Failed to suspend %s/%s before committing changes",
1022 lv->vg->name, lv->name);
1023 return 0;
1024 }
1025
1026 if (!vg_commit(lv->vg)) {
1027 log_error("Failed to commit changes to %s in %s",
1028 lv->name, lv->vg->name);
1029 return 0;
1030 }
1031
dc7b1640
JEB
1032 /*
1033 * We resume the extracted sub-LVs first so they are renamed
1034 * and won't conflict with the remaining (possibly shifted)
1035 * sub-LVs.
1036 */
1037 dm_list_iterate_items(lvl, &removal_list) {
1038 if (!resume_lv(lv->vg->cmd, lvl->lv)) {
1039 log_error("Failed to resume extracted LVs");
1040 return 0;
1041 }
1042 }
1043
4aebd52c 1044 /*
c0b5886f
JEB
1045 * Resume the remaining LVs
1046 * We must start by resuming the sub-LVs first (which would
1047 * otherwise be handled automatically) because the shifting
1048 * of positions could otherwise cause name collisions. For
1049 * example, if position 0 of a 3-way array is removed, position
1050 * 1 and 2 must be shifted and renamed 0 and 1. If position 2
1051 * tries to rename first, it will collide with the existing
1052 * position 1.
4aebd52c 1053 */
187486c7 1054 if (!_bottom_up_resume(lv)) {
4aebd52c
JEB
1055 log_error("Failed to resume %s/%s after committing changes",
1056 lv->vg->name, lv->name);
1057 return 0;
1058 }
1059
1060 /*
1061 * Eliminate the extracted LVs
1062 */
63d32fb6 1063 sync_local_dev_names(lv->vg->cmd);
4aebd52c
JEB
1064 if (!dm_list_empty(&removal_list)) {
1065 dm_list_iterate_items(lvl, &removal_list) {
1066 if (!deactivate_lv(lv->vg->cmd, lvl->lv))
1067 return_0;
1068 if (!lv_remove(lvl->lv))
1069 return_0;
1070 }
1071
1072 if (!vg_write(lv->vg) || !vg_commit(lv->vg))
1073 return_0;
1074 }
1075
1076 return 1;
1077}
a324baf6 1078
f439e65b
JEB
1079/*
1080 * lv_raid_change_image_count
1081 * @lv
1082 * @new_count: The absolute count of images (e.g. '2' for a 2-way mirror)
1083 * @pvs: The list of PVs that are candidates for removal (or empty list)
1084 *
1085 * RAID arrays have 'images' which are composed of two parts, they are:
1086 * - 'rimage': The data/parity holding portion
1087 * - 'rmeta' : The metadata holding portion (i.e. superblock/bitmap area)
1088 * This function adds or removes _both_ portions of the image and commits
1089 * the results.
1090 *
1091 * Returns: 1 on success, 0 on failure
1092 */
1093int lv_raid_change_image_count(struct logical_volume *lv,
1094 uint32_t new_count, struct dm_list *pvs)
1095{
1096 uint32_t old_count = lv_raid_image_count(lv);
f439e65b
JEB
1097
1098 if (old_count == new_count) {
1099 log_error("%s/%s already has image count of %d",
1100 lv->vg->name, lv->name, new_count);
1101 return 1;
1102 }
1103
1104 if (old_count > new_count)
3250b385 1105 return _raid_remove_images(lv, new_count, pvs);
f439e65b 1106
3250b385 1107 return _raid_add_images(lv, new_count, pvs);
f439e65b
JEB
1108}
1109
a324baf6
JEB
1110int lv_raid_split(struct logical_volume *lv, const char *split_name,
1111 uint32_t new_count, struct dm_list *splittable_pvs)
1112{
1113 const char *old_name;
1114 struct lv_list *lvl;
1115 struct dm_list removal_list, data_list;
1116 struct cmd_context *cmd = lv->vg->cmd;
1117 uint32_t old_count = lv_raid_image_count(lv);
97110574
JEB
1118 struct logical_volume *tracking;
1119 struct dm_list tracking_pvs;
a324baf6
JEB
1120
1121 dm_list_init(&removal_list);
1122 dm_list_init(&data_list);
1123
1124 if ((old_count - new_count) != 1) {
1125 log_error("Unable to split more than one image from %s/%s",
1126 lv->vg->name, lv->name);
1127 return 0;
1128 }
1129
1130 if (!seg_is_mirrored(first_seg(lv))) {
1131 log_error("Unable to split logical volume of segment type, %s",
1132 first_seg(lv)->segtype->name);
1133 return 0;
1134 }
1135
1136 if (find_lv_in_vg(lv->vg, split_name)) {
1137 log_error("Logical Volume \"%s\" already exists in %s",
1138 split_name, lv->vg->name);
1139 return 0;
1140 }
1141
3250b385 1142 if (!_raid_in_sync(lv)) {
a324baf6
JEB
1143 log_error("Unable to split %s/%s while it is not in-sync.",
1144 lv->vg->name, lv->name);
1145 return 0;
1146 }
1147
97110574
JEB
1148 /*
1149 * We only allow a split while there is tracking if it is to
1150 * complete the split of the tracking sub-LV
1151 */
1152 if (_lv_is_raid_with_tracking(lv, &tracking)) {
1153 if (!_lv_is_on_pvs(tracking, splittable_pvs)) {
1154 log_error("Unable to split additional image from %s "
1155 "while tracking changes for %s",
1156 lv->name, tracking->name);
1157 return 0;
1158 } else {
1159 /* Ensure we only split the tracking image */
1160 dm_list_init(&tracking_pvs);
1161 splittable_pvs = &tracking_pvs;
1162 if (!_get_pv_list_for_lv(tracking, splittable_pvs))
1163 return_0;
1164 }
1165 }
1166
3250b385 1167 if (!_raid_extract_images(lv, new_count, splittable_pvs, 1,
a324baf6
JEB
1168 &removal_list, &data_list)) {
1169 log_error("Failed to extract images from %s/%s",
1170 lv->vg->name, lv->name);
1171 return 0;
1172 }
1173
1174 /* Convert to linear? */
3250b385 1175 if ((new_count == 1) && !_raid_remove_top_layer(lv, &removal_list)) {
a324baf6
JEB
1176 log_error("Failed to remove RAID layer after linear conversion");
1177 return 0;
1178 }
1179
1180 /* Get first item */
1181 dm_list_iterate_items(lvl, &data_list)
1182 break;
1183
1184 old_name = lvl->lv->name;
1185 lvl->lv->name = split_name;
1186
1187 if (!vg_write(lv->vg)) {
1188 log_error("Failed to write changes to %s in %s",
1189 lv->name, lv->vg->name);
1190 return 0;
1191 }
1192
1193 if (!suspend_lv(cmd, lv)) {
1194 log_error("Failed to suspend %s/%s before committing changes",
1195 lv->vg->name, lv->name);
1196 return 0;
1197 }
1198
1199 if (!vg_commit(lv->vg)) {
1200 log_error("Failed to commit changes to %s in %s",
1201 lv->name, lv->vg->name);
1202 return 0;
1203 }
1204
1205 /*
187486c7
JEB
1206 * First resume the newly split LV and LVs on the removal list.
1207 * This is necessary so that there are no name collisions due to
1208 * the original RAID LV having possibly had sub-LVs that have been
1209 * shifted and renamed.
a324baf6 1210 */
187486c7
JEB
1211 if (!resume_lv(cmd, lvl->lv))
1212 return_0;
1213 dm_list_iterate_items(lvl, &removal_list)
1214 if (!resume_lv(cmd, lvl->lv))
1215 return_0;
1216
1217 /*
1218 * Resume the remaining LVs
1219 * We must start by resuming the sub-LVs first (which would
1220 * otherwise be handled automatically) because the shifting
1221 * of positions could otherwise cause name collisions. For
1222 * example, if position 0 of a 3-way array is split, position
1223 * 1 and 2 must be shifted and renamed 0 and 1. If position 2
1224 * tries to rename first, it will collide with the existing
1225 * position 1.
1226 */
1227 if (!_bottom_up_resume(lv)) {
a324baf6
JEB
1228 log_error("Failed to resume %s/%s after committing changes",
1229 lv->vg->name, lv->name);
1230 return 0;
1231 }
1232
a324baf6
JEB
1233 /*
1234 * Eliminate the residual LVs
1235 */
1236 dm_list_iterate_items(lvl, &removal_list) {
1237 if (!deactivate_lv(cmd, lvl->lv))
1238 return_0;
1239
1240 if (!lv_remove(lvl->lv))
1241 return_0;
1242 }
1243
1244 if (!vg_write(lv->vg) || !vg_commit(lv->vg))
1245 return_0;
1246
1247 return 1;
1248}
6d04311e
JEB
1249
1250/*
1251 * lv_raid_split_and_track
1252 * @lv
1253 * @splittable_pvs
1254 *
1255 * Only allows a single image to be split while tracking. The image
1256 * never actually leaves the mirror. It is simply made visible. This
1257 * action triggers two things: 1) users are able to access the (data) image
1258 * and 2) lower layers replace images marked with a visible flag with
1259 * error targets.
1260 *
1261 * Returns: 1 on success, 0 on error
1262 */
1263int lv_raid_split_and_track(struct logical_volume *lv,
1264 struct dm_list *splittable_pvs)
1265{
1266 int s;
1267 struct lv_segment *seg = first_seg(lv);
1268
1269 if (!seg_is_mirrored(seg)) {
1270 log_error("Unable to split images from non-mirrored RAID");
1271 return 0;
1272 }
1273
3250b385 1274 if (!_raid_in_sync(lv)) {
6d04311e
JEB
1275 log_error("Unable to split image from %s/%s while not in-sync",
1276 lv->vg->name, lv->name);
1277 return 0;
1278 }
1279
97110574
JEB
1280 /* Cannot track two split images at once */
1281 if (lv_is_raid_with_tracking(lv)) {
1282 log_error("Cannot track more than one split image at a time");
1283 return 0;
1284 }
1285
6d04311e 1286 for (s = seg->area_count - 1; s >= 0; s--) {
3250b385 1287 if (!_lv_is_on_pvs(seg_lv(seg, s), splittable_pvs))
6d04311e
JEB
1288 continue;
1289 lv_set_visible(seg_lv(seg, s));
886d0056 1290 seg_lv(seg, s)->status &= ~LVM_WRITE;
6d04311e
JEB
1291 break;
1292 }
1293
1294 if (s >= seg->area_count) {
1295 log_error("Unable to find image to satisfy request");
1296 return 0;
1297 }
1298
1299 if (!vg_write(lv->vg)) {
1300 log_error("Failed to write changes to %s in %s",
1301 lv->name, lv->vg->name);
1302 return 0;
1303 }
1304
1305 if (!suspend_lv(lv->vg->cmd, lv)) {
1306 log_error("Failed to suspend %s/%s before committing changes",
1307 lv->vg->name, lv->name);
1308 return 0;
1309 }
1310
1311 if (!vg_commit(lv->vg)) {
1312 log_error("Failed to commit changes to %s in %s",
1313 lv->name, lv->vg->name);
1314 return 0;
1315 }
1316
1317 log_print("%s split from %s for read-only purposes.",
1318 seg_lv(seg, s)->name, lv->name);
1319
1320 /* Resume original LV */
1321 if (!resume_lv(lv->vg->cmd, lv)) {
1322 log_error("Failed to resume %s/%s after committing changes",
1323 lv->vg->name, lv->name);
1324 return 0;
1325 }
1326
1327 /* Activate the split (and tracking) LV */
1328 if (!_activate_sublv_preserving_excl(lv, seg_lv(seg, s)))
1329 return 0;
1330
1331 log_print("Use 'lvconvert --merge %s/%s' to merge back into %s",
1332 lv->vg->name, seg_lv(seg, s)->name, lv->name);
1333 return 1;
1334}
a2facf4a
JEB
1335
1336int lv_raid_merge(struct logical_volume *image_lv)
1337{
1338 uint32_t s;
1339 char *p, *lv_name;
1340 struct lv_list *lvl;
1341 struct logical_volume *lv;
1342 struct logical_volume *meta_lv = NULL;
1343 struct lv_segment *seg;
1344 struct volume_group *vg = image_lv->vg;
1345
1346 lv_name = dm_pool_strdup(vg->vgmem, image_lv->name);
1347 if (!lv_name)
1348 return_0;
1349
1350 if (!(p = strstr(lv_name, "_rimage_"))) {
1351 log_error("Unable to merge non-mirror image %s/%s",
1352 vg->name, image_lv->name);
1353 return 0;
1354 }
1355 *p = '\0'; /* lv_name is now that of top-level RAID */
1356
1357 if (image_lv->status & LVM_WRITE) {
1358 log_error("%s/%s is not read-only - refusing to merge",
1359 vg->name, image_lv->name);
1360 return 0;
1361 }
1362
1363 if (!(lvl = find_lv_in_vg(vg, lv_name))) {
1364 log_error("Unable to find containing RAID array for %s/%s",
1365 vg->name, image_lv->name);
1366 return 0;
1367 }
1368 lv = lvl->lv;
1369 seg = first_seg(lv);
1370 for (s = 0; s < seg->area_count; s++) {
1371 if (seg_lv(seg, s) == image_lv) {
1372 meta_lv = seg_metalv(seg, s);
1373 }
1374 }
1375 if (!meta_lv)
1376 return_0;
1377
1378 if (!deactivate_lv(vg->cmd, meta_lv)) {
1379 log_error("Failed to deactivate %s", meta_lv->name);
1380 return 0;
1381 }
1382
1383 if (!deactivate_lv(vg->cmd, image_lv)) {
1384 log_error("Failed to deactivate %s/%s before merging",
1385 vg->name, image_lv->name);
1386 return 0;
1387 }
1388 lv_set_hidden(image_lv);
1389 image_lv->status |= (lv->status & LVM_WRITE);
1390 image_lv->status |= RAID_IMAGE;
1391
1392 if (!vg_write(vg)) {
1393 log_error("Failed to write changes to %s in %s",
1394 lv->name, vg->name);
1395 return 0;
1396 }
1397
1398 if (!suspend_lv(vg->cmd, lv)) {
1399 log_error("Failed to suspend %s/%s before committing changes",
1400 vg->name, lv->name);
1401 return 0;
1402 }
1403
1404 if (!vg_commit(vg)) {
1405 log_error("Failed to commit changes to %s in %s",
1406 lv->name, vg->name);
1407 return 0;
1408 }
1409
1410 if (!resume_lv(vg->cmd, lv)) {
1411 log_error("Failed to resume %s/%s after committing changes",
1412 vg->name, lv->name);
1413 return 0;
1414 }
1415
1416 log_print("%s/%s successfully merged back into %s/%s",
1417 vg->name, image_lv->name,
1418 vg->name, lv->name);
1419 return 1;
1420}
f60175c3
JEB
1421
1422static int _convert_mirror_to_raid1(struct logical_volume *lv,
1423 const struct segment_type *new_segtype)
1424{
1425 uint32_t s;
1426 struct lv_segment *seg = first_seg(lv);
1427 struct lv_list lvl_array[seg->area_count], *lvl;
1428 struct dm_list meta_lvs;
1429 struct lv_segment_area *meta_areas;
1430
1431 dm_list_init(&meta_lvs);
1432
1433 if (!_raid_in_sync(lv)) {
1434 log_error("Unable to convert %s/%s while it is not in-sync",
1435 lv->vg->name, lv->name);
1436 return 0;
1437 }
1438
1439 meta_areas = dm_pool_zalloc(lv->vg->vgmem,
1440 lv_mirror_count(lv) * sizeof(*meta_areas));
1441 if (!meta_areas) {
1442 log_error("Failed to allocate memory");
1443 return 0;
1444 }
1445
1446 for (s = 0; s < seg->area_count; s++) {
1447 log_debug("Allocating new metadata LV for %s",
1448 seg_lv(seg, s)->name);
1449 if (!_alloc_rmeta_for_lv(seg_lv(seg, s), &(lvl_array[s].lv))) {
1450 log_error("Failed to allocate metadata LV for %s in %s",
1451 seg_lv(seg, s)->name, lv->name);
1452 return 0;
1453 }
1454 dm_list_add(&meta_lvs, &(lvl_array[s].list));
1455 }
1456
1457 log_debug("Clearing newly allocated metadata LVs");
1458 if (!_clear_lvs(&meta_lvs)) {
1459 log_error("Failed to initialize metadata LVs");
1460 return 0;
1461 }
1462
1463 if (seg->log_lv) {
1464 log_debug("Removing mirror log, %s", seg->log_lv->name);
1465 if (!remove_mirror_log(lv->vg->cmd, lv, NULL, 0)) {
1466 log_error("Failed to remove mirror log");
1467 return 0;
1468 }
1469 }
1470
1471 seg->meta_areas = meta_areas;
1472 s = 0;
1473
1474 dm_list_iterate_items(lvl, &meta_lvs) {
1475 log_debug("Adding %s to %s", lvl->lv->name, lv->name);
1476
1477 /* Images are known to be in-sync */
0c506d9a
JEB
1478 lvl->lv->status &= ~LV_REBUILD;
1479 first_seg(lvl->lv)->status &= ~LV_REBUILD;
f60175c3
JEB
1480 lv_set_hidden(lvl->lv);
1481
1482 if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0,
1483 lvl->lv->status)) {
1484 log_error("Failed to add %s to %s",
1485 lvl->lv->name, lv->name);
1486 return 0;
1487 }
1488 s++;
1489 }
1490
1491 for (s = 0; s < seg->area_count; s++) {
1492 char *new_name;
1493
1494 new_name = dm_pool_zalloc(lv->vg->vgmem,
1495 strlen(lv->name) +
1496 strlen("_rimage_XXn"));
1497 if (!new_name) {
1498 log_error("Failed to rename mirror images");
1499 return 0;
1500 }
1501
1502 sprintf(new_name, "%s_rimage_%u", lv->name, s);
1503 log_debug("Renaming %s to %s", seg_lv(seg, s)->name, new_name);
1504 seg_lv(seg, s)->name = new_name;
1505 seg_lv(seg, s)->status &= ~MIRROR_IMAGE;
1506 seg_lv(seg, s)->status |= RAID_IMAGE;
1507 }
1508 init_mirror_in_sync(1);
1509
1510 log_debug("Setting new segtype for %s", lv->name);
1511 seg->segtype = new_segtype;
1512 lv->status &= ~MIRRORED;
1513 lv->status |= RAID;
1514 seg->status |= RAID;
1515
1516 if (!vg_write(lv->vg)) {
1517 log_error("Failed to write changes to %s in %s",
1518 lv->name, lv->vg->name);
1519 return 0;
1520 }
1521
1522 if (!suspend_lv(lv->vg->cmd, lv)) {
1523 log_error("Failed to suspend %s/%s before committing changes",
1524 lv->vg->name, lv->name);
1525 return 0;
1526 }
1527
1528 if (!vg_commit(lv->vg)) {
1529 log_error("Failed to commit changes to %s in %s",
1530 lv->name, lv->vg->name);
1531 return 0;
1532 }
1533
1534 if (!resume_lv(lv->vg->cmd, lv)) {
1535 log_error("Failed to resume %s/%s after committing changes",
1536 lv->vg->name, lv->name);
1537 return 0;
1538 }
1539
1540 return 1;
1541}
1542
1543/*
1544 * lv_raid_reshape
1545 * @lv
1546 * @new_segtype
1547 *
1548 * Convert an LV from one RAID type (or 'mirror' segtype) to another.
1549 *
1550 * Returns: 1 on success, 0 on failure
1551 */
1552int lv_raid_reshape(struct logical_volume *lv,
1553 const struct segment_type *new_segtype)
1554{
1555 struct lv_segment *seg = first_seg(lv);
1556
1557 if (!new_segtype) {
1558 log_error(INTERNAL_ERROR "New segtype not specified");
1559 return 0;
1560 }
1561
1562 if (!strcmp(seg->segtype->name, "mirror") &&
1563 (!strcmp(new_segtype->name, "raid1")))
1564 return _convert_mirror_to_raid1(lv, new_segtype);
1565
1566 log_error("Converting the segment type for %s/%s from %s to %s"
1567 " is not yet supported.", lv->vg->name, lv->name,
1568 seg->segtype->name, new_segtype->name);
1569 return 0;
1570}
0c506d9a
JEB
1571
1572/*
1573 * lv_raid_replace
1574 * @lv
1575 * @replace_pvs
1576 * @allocatable_pvs
1577 *
1578 * Replace the specified PVs.
1579 */
1580int lv_raid_replace(struct logical_volume *lv,
1581 struct dm_list *remove_pvs,
1582 struct dm_list *allocate_pvs)
1583{
1584 uint32_t s, sd, match_count = 0;
1585 struct dm_list old_meta_lvs, old_data_lvs;
1586 struct dm_list new_meta_lvs, new_data_lvs;
1587 struct lv_segment *raid_seg = first_seg(lv);
1588 struct lv_list *lvl;
1589 char *tmp_names[raid_seg->area_count * 2];
1590
1591 dm_list_init(&old_meta_lvs);
1592 dm_list_init(&old_data_lvs);
1593 dm_list_init(&new_meta_lvs);
1594 dm_list_init(&new_data_lvs);
1595
1596 /*
1597 * How many sub-LVs are being removed?
1598 */
1599 for (s = 0; s < raid_seg->area_count; s++) {
1600 if ((seg_type(raid_seg, s) == AREA_UNASSIGNED) ||
1601 (seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
1602 log_error("Unable to replace RAID images while the "
1603 "array has unassigned areas");
1604 return 0;
1605 }
1606
1607 if (_lv_is_on_pvs(seg_lv(raid_seg, s), remove_pvs) ||
1608 _lv_is_on_pvs(seg_metalv(raid_seg, s), remove_pvs))
1609 match_count++;
1610 }
1611
1612 if (!match_count) {
1613 log_verbose("%s/%s does not contain devices specified"
1614 " for replacement", lv->vg->name, lv->name);
1615 return 1;
1616 } else if (match_count == raid_seg->area_count) {
1617 log_error("Unable to remove all PVs from %s/%s at once.",
1618 lv->vg->name, lv->name);
1619 return 0;
1620 } else if (raid_seg->segtype->parity_devs &&
1621 (match_count > raid_seg->segtype->parity_devs)) {
1622 log_error("Unable to replace more than %u PVs from (%s) %s/%s",
1623 raid_seg->segtype->parity_devs,
1624 raid_seg->segtype->name, lv->vg->name, lv->name);
1625 return 0;
1626 }
1627
1628 /*
1629 * Allocate the new image components first
1630 * - This makes it easy to avoid all currently used devs
1631 * - We can immediately tell if there is enough space
1632 *
1633 * - We need to change the LV names when we insert them.
1634 */
1635 if (!_alloc_image_components(lv, allocate_pvs, match_count,
1636 &new_meta_lvs, &new_data_lvs)) {
1637 log_error("Failed to allocate replacement images for %s/%s",
1638 lv->vg->name, lv->name);
1639 return 0;
1640 }
1641
1642 /*
1643 * Remove the old images
1644 * - If we did this before the allocate, we wouldn't have to rename
1645 * the allocated images, but it'd be much harder to avoid the right
1646 * PVs during allocation.
1647 */
1648 if (!_raid_extract_images(lv, raid_seg->area_count - match_count,
1649 remove_pvs, 0,
1650 &old_meta_lvs, &old_data_lvs)) {
1651 log_error("Failed to remove the specified images from %s/%s",
1652 lv->vg->name, lv->name);
1653 return 0;
1654 }
1655
1656 /*
1657 * Skip metadata operation normally done to clear the metadata sub-LVs.
1658 *
1659 * The LV_REBUILD flag is set on the new sub-LVs,
1660 * so they will be rebuilt and we don't need to clear the metadata dev.
1661 */
1662
1663 for (s = 0; s < raid_seg->area_count; s++) {
1664 tmp_names[s] = NULL;
1665 sd = s + raid_seg->area_count;
1666 tmp_names[sd] = NULL;
1667
1668 if ((seg_type(raid_seg, s) == AREA_UNASSIGNED) &&
1669 (seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
1670 /* Adjust the new metadata LV name */
1671 lvl = dm_list_item(dm_list_first(&new_meta_lvs),
1672 struct lv_list);
1673 dm_list_del(&lvl->list);
1674 tmp_names[s] = dm_pool_alloc(lv->vg->vgmem,
1675 strlen(lvl->lv->name) + 1);
1676 if (!tmp_names[s])
1677 return_0;
1678 if (dm_snprintf(tmp_names[s], strlen(lvl->lv->name) + 1,
1679 "%s_rmeta_%u", lv->name, s) < 0)
1680 return_0;
1681 if (!set_lv_segment_area_lv(raid_seg, s, lvl->lv, 0,
1682 lvl->lv->status)) {
1683 log_error("Failed to add %s to %s",
1684 lvl->lv->name, lv->name);
1685 return 0;
1686 }
1687 lv_set_hidden(lvl->lv);
1688
1689 /* Adjust the new data LV name */
1690 lvl = dm_list_item(dm_list_first(&new_data_lvs),
1691 struct lv_list);
1692 dm_list_del(&lvl->list);
1693 tmp_names[sd] = dm_pool_alloc(lv->vg->vgmem,
1694 strlen(lvl->lv->name) + 1);
1695 if (!tmp_names[sd])
1696 return_0;
1697 if (dm_snprintf(tmp_names[sd], strlen(lvl->lv->name) + 1,
1698 "%s_rimage_%u", lv->name, s) < 0)
1699 return_0;
1700 if (!set_lv_segment_area_lv(raid_seg, s, lvl->lv, 0,
1701 lvl->lv->status)) {
1702 log_error("Failed to add %s to %s",
1703 lvl->lv->name, lv->name);
1704 return 0;
1705 }
1706 lv_set_hidden(lvl->lv);
1707 }
1708 }
1709
1710 if (!vg_write(lv->vg)) {
1711 log_error("Failed to write changes to %s in %s",
1712 lv->name, lv->vg->name);
1713 return 0;
1714 }
1715
a7feae8a 1716 if (!suspend_lv_origin(lv->vg->cmd, lv)) {
0c506d9a
JEB
1717 log_error("Failed to suspend %s/%s before committing changes",
1718 lv->vg->name, lv->name);
1719 return 0;
1720 }
1721
1722 if (!vg_commit(lv->vg)) {
1723 log_error("Failed to commit changes to %s in %s",
1724 lv->name, lv->vg->name);
1725 return 0;
1726 }
1727
a7feae8a 1728 if (!resume_lv_origin(lv->vg->cmd, lv)) {
0c506d9a
JEB
1729 log_error("Failed to resume %s/%s after committing changes",
1730 lv->vg->name, lv->name);
1731 return 0;
1732 }
1733
1734 dm_list_iterate_items(lvl, &old_meta_lvs) {
1735 if (!deactivate_lv(lv->vg->cmd, lvl->lv))
1736 return_0;
1737 if (!lv_remove(lvl->lv))
1738 return_0;
1739 }
1740 dm_list_iterate_items(lvl, &old_data_lvs) {
1741 if (!deactivate_lv(lv->vg->cmd, lvl->lv))
1742 return_0;
1743 if (!lv_remove(lvl->lv))
1744 return_0;
1745 }
1746
1747 /* Update new sub-LVs to correct name and clear REBUILD flag */
1748 for (s = 0; s < raid_seg->area_count; s++) {
1749 sd = s + raid_seg->area_count;
1750 if (tmp_names[s] && tmp_names[sd]) {
1751 seg_metalv(raid_seg, s)->name = tmp_names[s];
1752 seg_lv(raid_seg, s)->name = tmp_names[sd];
1753 seg_metalv(raid_seg, s)->status &= ~LV_REBUILD;
1754 seg_lv(raid_seg, s)->status &= ~LV_REBUILD;
1755 }
1756 }
1757
1758 if (!vg_write(lv->vg)) {
1759 log_error("Failed to write changes to %s in %s",
1760 lv->name, lv->vg->name);
1761 return 0;
1762 }
1763
a7feae8a 1764 if (!suspend_lv_origin(lv->vg->cmd, lv)) {
0c506d9a
JEB
1765 log_error("Failed to suspend %s/%s before committing changes",
1766 lv->vg->name, lv->name);
1767 return 0;
1768 }
1769
1770 if (!vg_commit(lv->vg)) {
1771 log_error("Failed to commit changes to %s in %s",
1772 lv->name, lv->vg->name);
1773 return 0;
1774 }
1775
a7feae8a 1776 if (!resume_lv_origin(lv->vg->cmd, lv)) {
0c506d9a
JEB
1777 log_error("Failed to resume %s/%s after committing changes",
1778 lv->vg->name, lv->name);
1779 return 0;
1780 }
1781
1782 return 1;
1783}
This page took 0.242659 seconds and 5 git commands to generate.