]> sourceware.org Git - lvm2.git/blob - lib/metadata/raid_manip.c
Fix code that performs RAID device replacement while under snapshot.
[lvm2.git] / lib / metadata / raid_manip.c
1 /*
2 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
3 *
4 * This file is part of LVM2.
5 *
6 * This copyrighted material is made available to anyone wishing to use,
7 * modify, copy, or redistribute it subject to the terms and conditions
8 * of the GNU Lesser General Public License v.2.1.
9 *
10 * You should have received a copy of the GNU Lesser General Public License
11 * along with this program; if not, write to the Free Software Foundation,
12 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
13 */
14
15 #include "lib.h"
16 #include "metadata.h"
17 #include "toolcontext.h"
18 #include "segtype.h"
19 #include "display.h"
20 #include "archiver.h"
21 #include "activate.h"
22 #include "lv_alloc.h"
23 #include "lvm-string.h"
24 #include "str_list.h"
25 #include "memlock.h"
26
27 #define RAID_REGION_SIZE 1024
28
29 static int _lv_is_raid_with_tracking(const struct logical_volume *lv,
30 struct logical_volume **tracking)
31 {
32 uint32_t s;
33 struct lv_segment *seg;
34
35 *tracking = NULL;
36 seg = first_seg(lv);
37
38 if (!(lv->status & RAID))
39 return 0;
40
41 for (s = 0; s < seg->area_count; s++)
42 if (lv_is_visible(seg_lv(seg, s)) &&
43 !(seg_lv(seg, s)->status & LVM_WRITE))
44 *tracking = seg_lv(seg, s);
45
46
47 return *tracking ? 1 : 0;
48 }
49
50 int lv_is_raid_with_tracking(const struct logical_volume *lv)
51 {
52 struct logical_volume *tracking;
53
54 return _lv_is_raid_with_tracking(lv, &tracking);
55 }
56
57 uint32_t lv_raid_image_count(const struct logical_volume *lv)
58 {
59 struct lv_segment *seg = first_seg(lv);
60
61 if (!seg_is_raid(seg))
62 return 1;
63
64 return seg->area_count;
65 }
66
67 /*
68 * Resume sub-LVs first, then top-level LV
69 */
70 static int _bottom_up_resume(struct logical_volume *lv)
71 {
72 uint32_t s;
73 struct lv_segment *seg = first_seg(lv);
74
75 if (seg_is_raid(seg) && (seg->area_count > 1)) {
76 for (s = 0; s < seg->area_count; s++)
77 if (!resume_lv(lv->vg->cmd, seg_lv(seg, s)) ||
78 !resume_lv(lv->vg->cmd, seg_metalv(seg, s)))
79 return_0;
80 }
81
82 return resume_lv(lv->vg->cmd, lv);
83 }
84
85 static int _activate_sublv_preserving_excl(struct logical_volume *top_lv,
86 struct logical_volume *sub_lv)
87 {
88 struct cmd_context *cmd = top_lv->vg->cmd;
89
90 /* If top RAID was EX, use EX */
91 if (lv_is_active_exclusive_locally(top_lv)) {
92 if (!activate_lv_excl(cmd, sub_lv))
93 return_0;
94 } else {
95 if (!activate_lv(cmd, sub_lv))
96 return_0;
97 }
98 return 1;
99 }
100
101 /*
102 * _lv_is_on_pv
103 * @lv:
104 * @pv:
105 *
106 * If any of the component devices of the LV are on the given PV, 1
107 * is returned; otherwise 0. For example if one of the images of a RAID
108 * (or its metadata device) is on the PV, 1 would be returned for the
109 * top-level LV.
110 * If you wish to check the images themselves, you should pass them.
111 *
112 * FIXME: This should be made more generic, possibly use 'for_each_sub_lv',
113 * and be put in lv_manip.c. 'for_each_sub_lv' does not yet allow us to
114 * short-circuit execution or pass back the values we need yet though...
115 */
116 static int _lv_is_on_pv(struct logical_volume *lv, struct physical_volume *pv)
117 {
118 uint32_t s;
119 struct physical_volume *pv2;
120 struct lv_segment *seg;
121
122 if (!lv)
123 return 0;
124
125 seg = first_seg(lv);
126 if (!seg)
127 return 0;
128
129 /* Check mirror log */
130 if (_lv_is_on_pv(seg->log_lv, pv))
131 return 1;
132
133 /* Check stack of LVs */
134 dm_list_iterate_items(seg, &lv->segments) {
135 for (s = 0; s < seg->area_count; s++) {
136 if (seg_type(seg, s) == AREA_PV) {
137 pv2 = seg_pv(seg, s);
138 if (id_equal(&pv->id, &pv2->id))
139 return 1;
140 if (pv->dev && pv2->dev &&
141 (pv->dev->dev == pv2->dev->dev))
142 return 1;
143 }
144
145 if ((seg_type(seg, s) == AREA_LV) &&
146 _lv_is_on_pv(seg_lv(seg, s), pv))
147 return 1;
148
149 if (!seg_is_raid(seg))
150 continue;
151
152 /* This is RAID, so we know the meta_area is AREA_LV */
153 if (_lv_is_on_pv(seg_metalv(seg, s), pv))
154 return 1;
155 }
156 }
157
158 return 0;
159 }
160
161 static int _lv_is_on_pvs(struct logical_volume *lv, struct dm_list *pvs)
162 {
163 struct pv_list *pvl;
164
165 dm_list_iterate_items(pvl, pvs)
166 if (_lv_is_on_pv(lv, pvl->pv)) {
167 log_debug("%s is on %s", lv->name,
168 pv_dev_name(pvl->pv));
169 return 1;
170 } else
171 log_debug("%s is not on %s", lv->name,
172 pv_dev_name(pvl->pv));
173 return 0;
174 }
175
176 static int _get_pv_list_for_lv(struct logical_volume *lv, struct dm_list *pvs)
177 {
178 uint32_t s;
179 struct pv_list *pvl;
180 struct lv_segment *seg = first_seg(lv);
181
182 if (!seg_is_linear(seg)) {
183 log_error(INTERNAL_ERROR
184 "_get_pv_list_for_lv only handles linear volumes");
185 return 0;
186 }
187
188 log_debug("Getting list of PVs that %s/%s is on:",
189 lv->vg->name, lv->name);
190
191 dm_list_iterate_items(seg, &lv->segments) {
192 for (s = 0; s < seg->area_count; s++) {
193 if (seg_type(seg, s) != AREA_PV) {
194 log_error(INTERNAL_ERROR
195 "Linear seg_type should be AREA_PV");
196 return 0;
197 }
198
199 if (!(pvl = dm_pool_zalloc(lv->vg->cmd->mem,
200 sizeof(*pvl)))) {
201 log_error("Failed to allocate memory");
202 return 0;
203 }
204
205 pvl->pv = seg_pv(seg, s);
206 log_debug(" %s/%s is on %s", lv->vg->name, lv->name,
207 pv_dev_name(pvl->pv));
208 dm_list_add(pvs, &pvl->list);
209 }
210 }
211
212 return 1;
213 }
214
215 /*
216 * _raid_in_sync
217 * @lv
218 *
219 * _raid_in_sync works for all types of RAID segtypes, as well
220 * as 'mirror' segtype. (This is because 'lv_raid_percent' is
221 * simply a wrapper around 'lv_mirror_percent'.
222 *
223 * Returns: 1 if in-sync, 0 otherwise.
224 */
225 static int _raid_in_sync(struct logical_volume *lv)
226 {
227 percent_t sync_percent;
228
229 if (!lv_raid_percent(lv, &sync_percent)) {
230 log_error("Unable to determine sync status of %s/%s.",
231 lv->vg->name, lv->name);
232 return 0;
233 }
234
235 return (sync_percent == PERCENT_100) ? 1 : 0;
236 }
237
238 /*
239 * _raid_remove_top_layer
240 * @lv
241 * @removal_list
242 *
243 * Remove top layer of RAID LV in order to convert to linear.
244 * This function makes no on-disk changes. The residual LVs
245 * returned in 'removal_list' must be freed by the caller.
246 *
247 * Returns: 1 on succes, 0 on failure
248 */
249 static int _raid_remove_top_layer(struct logical_volume *lv,
250 struct dm_list *removal_list)
251 {
252 struct lv_list *lvl_array, *lvl;
253 struct lv_segment *seg = first_seg(lv);
254
255 if (!seg_is_mirrored(seg)) {
256 log_error(INTERNAL_ERROR
257 "Unable to remove RAID layer from segment type %s",
258 seg->segtype->name);
259 return 0;
260 }
261
262 if (seg->area_count != 1) {
263 log_error(INTERNAL_ERROR
264 "Unable to remove RAID layer when there"
265 " is more than one sub-lv");
266 return 0;
267 }
268
269 lvl_array = dm_pool_alloc(lv->vg->vgmem, 2 * sizeof(*lvl));
270 if (!lvl_array) {
271 log_error("Memory allocation failed.");
272 return 0;
273 }
274
275 /* Add last metadata area to removal_list */
276 lvl_array[0].lv = seg_metalv(seg, 0);
277 lv_set_visible(seg_metalv(seg, 0));
278 remove_seg_from_segs_using_this_lv(seg_metalv(seg, 0), seg);
279 seg_metatype(seg, 0) = AREA_UNASSIGNED;
280 dm_list_add(removal_list, &(lvl_array[0].list));
281
282 /* Remove RAID layer and add residual LV to removal_list*/
283 seg_lv(seg, 0)->status &= ~RAID_IMAGE;
284 lv_set_visible(seg_lv(seg, 0));
285 lvl_array[1].lv = seg_lv(seg, 0);
286 dm_list_add(removal_list, &(lvl_array[1].list));
287
288 if (!remove_layer_from_lv(lv, seg_lv(seg, 0)))
289 return_0;
290
291 lv->status &= ~(MIRRORED | RAID);
292 return 1;
293 }
294
295 /*
296 * _clear_lv
297 * @lv
298 *
299 * If LV is active:
300 * clear first block of device
301 * otherwise:
302 * activate, clear, deactivate
303 *
304 * Returns: 1 on success, 0 on failure
305 */
306 static int _clear_lv(struct logical_volume *lv)
307 {
308 int was_active = lv_is_active(lv);
309
310 if (!was_active && !activate_lv(lv->vg->cmd, lv)) {
311 log_error("Failed to activate %s for clearing",
312 lv->name);
313 return 0;
314 }
315
316 log_verbose("Clearing metadata area of %s/%s",
317 lv->vg->name, lv->name);
318 /*
319 * Rather than wiping lv->size, we can simply
320 * wipe the first sector to remove the superblock of any previous
321 * RAID devices. It is much quicker.
322 */
323 if (!set_lv(lv->vg->cmd, lv, 1, 0)) {
324 log_error("Failed to zero %s", lv->name);
325 return 0;
326 }
327
328 if (!was_active && !deactivate_lv(lv->vg->cmd, lv)) {
329 log_error("Failed to deactivate %s", lv->name);
330 return 0;
331 }
332
333 return 1;
334 }
335
336 /* Makes on-disk metadata changes */
337 static int _clear_lvs(struct dm_list *lv_list)
338 {
339 struct lv_list *lvl;
340 struct volume_group *vg = NULL;
341
342 if (dm_list_empty(lv_list)) {
343 log_debug(INTERNAL_ERROR "Empty list of LVs given for clearing");
344 return 1;
345 }
346
347 dm_list_iterate_items(lvl, lv_list) {
348 if (!lv_is_visible(lvl->lv)) {
349 log_error(INTERNAL_ERROR
350 "LVs must be set visible before clearing");
351 return 0;
352 }
353 vg = lvl->lv->vg;
354 }
355
356 /*
357 * FIXME: only vg_[write|commit] if LVs are not already written
358 * as visible in the LVM metadata (which is never the case yet).
359 */
360 if (!vg || !vg_write(vg) || !vg_commit(vg))
361 return_0;
362
363 dm_list_iterate_items(lvl, lv_list)
364 if (!_clear_lv(lvl->lv))
365 return 0;
366
367 return 1;
368 }
369
370 /*
371 * _shift_and_rename_image_components
372 * @seg: Top-level RAID segment
373 *
374 * Shift all higher indexed segment areas down to fill in gaps where
375 * there are 'AREA_UNASSIGNED' areas and rename data/metadata LVs so
376 * that their names match their new index. When finished, set
377 * seg->area_count to new reduced total.
378 *
379 * Returns: 1 on success, 0 on failure
380 */
381 static int _shift_and_rename_image_components(struct lv_segment *seg)
382 {
383 int len;
384 char *shift_name;
385 uint32_t s, missing;
386 struct cmd_context *cmd = seg->lv->vg->cmd;
387
388 /*
389 * All LVs must be properly named for their index before
390 * shifting begins. (e.g. Index '0' must contain *_rimage_0 and
391 * *_rmeta_0. Index 'n' must contain *_rimage_n and *_rmeta_n.)
392 */
393
394 if (!seg_is_raid(seg))
395 return_0;
396
397 if (seg->area_count > 10) {
398 /*
399 * FIXME: Handling more would mean I'd have
400 * to handle double digits
401 */
402 log_error("Unable handle arrays with more than 10 devices");
403 return 0;
404 }
405
406 log_very_verbose("Shifting images in %s", seg->lv->name);
407
408 for (s = 0, missing = 0; s < seg->area_count; s++) {
409 if (seg_type(seg, s) == AREA_UNASSIGNED) {
410 if (seg_metatype(seg, s) != AREA_UNASSIGNED) {
411 log_error(INTERNAL_ERROR "Metadata segment area"
412 " #%d should be AREA_UNASSIGNED", s);
413 return 0;
414 }
415 missing++;
416 continue;
417 }
418 if (!missing)
419 continue;
420
421 log_very_verbose("Shifting %s and %s by %u",
422 seg_metalv(seg, s)->name,
423 seg_lv(seg, s)->name, missing);
424
425 /* Alter rmeta name */
426 shift_name = dm_pool_strdup(cmd->mem, seg_metalv(seg, s)->name);
427 if (!shift_name) {
428 log_error("Memory allocation failed.");
429 return 0;
430 }
431 len = strlen(shift_name) - 1;
432 shift_name[len] -= missing;
433 seg_metalv(seg, s)->name = shift_name;
434
435 /* Alter rimage name */
436 shift_name = dm_pool_strdup(cmd->mem, seg_lv(seg, s)->name);
437 if (!shift_name) {
438 log_error("Memory allocation failed.");
439 return 0;
440 }
441 len = strlen(shift_name) - 1;
442 shift_name[len] -= missing;
443 seg_lv(seg, s)->name = shift_name;
444
445 seg->areas[s - missing] = seg->areas[s];
446 seg->meta_areas[s - missing] = seg->meta_areas[s];
447 }
448
449 seg->area_count -= missing;
450 return 1;
451 }
452
453 /*
454 * Create an LV of specified type. Set visible after creation.
455 * This function does not make metadata changes.
456 */
457 static int _alloc_image_component(struct logical_volume *lv,
458 const char *alt_base_name,
459 struct alloc_handle *ah, uint32_t first_area,
460 uint64_t type, struct logical_volume **new_lv)
461 {
462 uint64_t status;
463 size_t len = strlen(lv->name) + 32;
464 char img_name[len];
465 const char *base_name = (alt_base_name) ? alt_base_name : lv->name;
466 struct logical_volume *tmp_lv;
467 const struct segment_type *segtype;
468
469 if (type == RAID_META) {
470 if (dm_snprintf(img_name, len, "%s_rmeta_%%d", base_name) < 0)
471 return_0;
472 } else if (type == RAID_IMAGE) {
473 if (dm_snprintf(img_name, len, "%s_rimage_%%d", base_name) < 0)
474 return_0;
475 } else {
476 log_error(INTERNAL_ERROR
477 "Bad type provided to _alloc_raid_component");
478 return 0;
479 }
480
481 if (!ah) {
482 first_area = 0;
483 log_error(INTERNAL_ERROR
484 "Stand-alone %s area allocation not implemented",
485 (type == RAID_META) ? "metadata" : "data");
486 return 0;
487 }
488
489 status = LVM_READ | LVM_WRITE | LV_REBUILD | type;
490 tmp_lv = lv_create_empty(img_name, NULL, status, ALLOC_INHERIT, lv->vg);
491 if (!tmp_lv) {
492 log_error("Failed to allocate new raid component, %s", img_name);
493 return 0;
494 }
495
496 segtype = get_segtype_from_string(lv->vg->cmd, "striped");
497 if (!lv_add_segment(ah, first_area, 1, tmp_lv, segtype, 0, status, 0)) {
498 log_error("Failed to add segment to LV, %s", img_name);
499 return 0;
500 }
501
502 lv_set_visible(tmp_lv);
503 *new_lv = tmp_lv;
504 return 1;
505 }
506
507 static int _alloc_image_components(struct logical_volume *lv,
508 struct dm_list *pvs, uint32_t count,
509 struct dm_list *new_meta_lvs,
510 struct dm_list *new_data_lvs)
511 {
512 uint32_t s;
513 uint32_t region_size;
514 uint32_t extents;
515 struct lv_segment *seg = first_seg(lv);
516 const struct segment_type *segtype;
517 struct alloc_handle *ah;
518 struct dm_list *parallel_areas;
519 struct logical_volume *tmp_lv;
520 struct lv_list *lvl_array;
521
522 lvl_array = dm_pool_alloc(lv->vg->vgmem,
523 sizeof(*lvl_array) * count * 2);
524 if (!lvl_array)
525 return_0;
526
527 if (!(parallel_areas = build_parallel_areas_from_lv(lv, 0)))
528 return_0;
529
530 if (seg_is_linear(seg))
531 region_size = RAID_REGION_SIZE;
532 else
533 region_size = seg->region_size;
534
535 if (seg_is_raid(seg))
536 segtype = seg->segtype;
537 else if (!(segtype = get_segtype_from_string(lv->vg->cmd, "raid1")))
538 return_0;
539
540 /*
541 * The number of extents is based on the RAID type. For RAID1,
542 * each of the rimages is the same size - 'le_count'. However
543 * for RAID 4/5/6, the stripes add together (NOT including the parity
544 * devices) to equal 'le_count'. Thus, when we are allocating
545 * individual devies, we must specify how large the individual device
546 * is along with the number we want ('count').
547 */
548 extents = (segtype->parity_devs) ?
549 (lv->le_count / (seg->area_count - segtype->parity_devs)) :
550 lv->le_count;
551
552 if (!(ah = allocate_extents(lv->vg, NULL, segtype, 0, count, count,
553 region_size, extents, pvs,
554 lv->alloc, parallel_areas)))
555 return_0;
556
557 for (s = 0; s < count; s++) {
558 /*
559 * The allocation areas are grouped together. First
560 * come the rimage allocated areas, then come the metadata
561 * allocated areas. Thus, the metadata areas are pulled
562 * from 's + count'.
563 */
564 if (!_alloc_image_component(lv, NULL, ah, s + count,
565 RAID_META, &tmp_lv))
566 return_0;
567 lvl_array[s + count].lv = tmp_lv;
568 dm_list_add(new_meta_lvs, &(lvl_array[s + count].list));
569
570 if (!_alloc_image_component(lv, NULL, ah, s,
571 RAID_IMAGE, &tmp_lv))
572 return_0;
573 lvl_array[s].lv = tmp_lv;
574 dm_list_add(new_data_lvs, &(lvl_array[s].list));
575 }
576 alloc_destroy(ah);
577 return 1;
578 }
579
580 /*
581 * _alloc_rmeta_for_lv
582 * @lv
583 *
584 * Allocate a RAID metadata device for the given LV (which is or will
585 * be the associated RAID data device). The new metadata device must
586 * be allocated from the same PV(s) as the data device.
587 */
588 static int _alloc_rmeta_for_lv(struct logical_volume *data_lv,
589 struct logical_volume **meta_lv)
590 {
591 struct dm_list allocatable_pvs;
592 struct alloc_handle *ah;
593 struct lv_segment *seg = first_seg(data_lv);
594 char *p, base_name[strlen(data_lv->name) + 1];
595
596 dm_list_init(&allocatable_pvs);
597
598 if (!seg_is_linear(seg)) {
599 log_error(INTERNAL_ERROR "Unable to allocate RAID metadata "
600 "area for non-linear LV, %s", data_lv->name);
601 return 0;
602 }
603
604 sprintf(base_name, "%s", data_lv->name);
605 if ((p = strstr(base_name, "_mimage_")))
606 *p = '\0';
607
608 if (!_get_pv_list_for_lv(data_lv, &allocatable_pvs)) {
609 log_error("Failed to build list of PVs for %s/%s",
610 data_lv->vg->name, data_lv->name);
611 return 0;
612 }
613
614 if (!(ah = allocate_extents(data_lv->vg, NULL, seg->segtype, 0, 1, 0,
615 seg->region_size,
616 1 /*RAID_METADATA_AREA_LEN*/,
617 &allocatable_pvs, data_lv->alloc, NULL)))
618 return_0;
619
620 if (!_alloc_image_component(data_lv, base_name, ah, 0,
621 RAID_META, meta_lv))
622 return_0;
623
624 alloc_destroy(ah);
625 return 1;
626 }
627
628 static int _raid_add_images(struct logical_volume *lv,
629 uint32_t new_count, struct dm_list *pvs)
630 {
631 int rebuild_flag_cleared = 0;
632 uint32_t s;
633 uint32_t old_count = lv_raid_image_count(lv);
634 uint32_t count = new_count - old_count;
635 uint64_t status_mask = -1;
636 struct cmd_context *cmd = lv->vg->cmd;
637 struct lv_segment *seg = first_seg(lv);
638 struct dm_list meta_lvs, data_lvs;
639 struct lv_list *lvl;
640 struct lv_segment_area *new_areas;
641
642 dm_list_init(&meta_lvs); /* For image addition */
643 dm_list_init(&data_lvs); /* For image addition */
644
645 /*
646 * If the segtype is linear, then we must allocate a metadata
647 * LV to accompany it.
648 */
649 if (seg_is_linear(seg)) {
650 /* A complete resync will be done, no need to mark each sub-lv */
651 status_mask = ~(LV_REBUILD);
652
653 if (!(lvl = dm_pool_alloc(lv->vg->vgmem, sizeof(*lvl)))) {
654 log_error("Memory allocation failed");
655 return 0;
656 }
657
658 if (!_alloc_rmeta_for_lv(lv, &lvl->lv))
659 return_0;
660
661 dm_list_add(&meta_lvs, &lvl->list);
662 } else if (!seg_is_raid(seg)) {
663 log_error("Unable to add RAID images to %s of segment type %s",
664 lv->name, seg->segtype->name);
665 return 0;
666 }
667
668 if (!_alloc_image_components(lv, pvs, count, &meta_lvs, &data_lvs)) {
669 log_error("Failed to allocate new image components");
670 return 0;
671 }
672
673 /*
674 * If linear, we must correct data LV names. They are off-by-one
675 * because the linear volume hasn't taken its proper name of "_rimage_0"
676 * yet. This action must be done before '_clear_lvs' because it
677 * commits the LVM metadata before clearing the LVs.
678 */
679 if (seg_is_linear(seg)) {
680 char *name;
681 size_t len;
682 struct dm_list *l;
683 struct lv_list *lvl_tmp;
684
685 dm_list_iterate(l, &data_lvs) {
686 if (l == dm_list_last(&data_lvs)) {
687 lvl = dm_list_item(l, struct lv_list);
688 len = strlen(lv->name) + strlen("_rimage_XXX");
689 if (!(name = dm_pool_alloc(lv->vg->vgmem, len))) {
690 log_error("Failed to allocate rimage name.");
691 return 0;
692 }
693 sprintf(name, "%s_rimage_%u", lv->name, count);
694 lvl->lv->name = name;
695 continue;
696 }
697 lvl = dm_list_item(l, struct lv_list);
698 lvl_tmp = dm_list_item(l->n, struct lv_list);
699 lvl->lv->name = lvl_tmp->lv->name;
700 }
701 }
702
703 /* Metadata LVs must be cleared before being added to the array */
704 if (!_clear_lvs(&meta_lvs))
705 goto fail;
706
707 if (seg_is_linear(seg)) {
708 first_seg(lv)->status |= RAID_IMAGE;
709 if (!insert_layer_for_lv(lv->vg->cmd, lv,
710 RAID | LVM_READ | LVM_WRITE,
711 "_rimage_0"))
712 return_0;
713
714 lv->status |= RAID;
715 seg = first_seg(lv);
716 seg_lv(seg, 0)->status |= RAID_IMAGE | LVM_READ | LVM_WRITE;
717 seg->region_size = RAID_REGION_SIZE;
718 seg->segtype = get_segtype_from_string(lv->vg->cmd, "raid1");
719 if (!seg->segtype)
720 return_0;
721 }
722 /*
723 FIXME: It would be proper to activate the new LVs here, instead of having
724 them activated by the suspend. However, this causes residual device nodes
725 to be left for these sub-lvs.
726 dm_list_iterate_items(lvl, &meta_lvs)
727 if (!do_correct_activate(lv, lvl->lv))
728 return_0;
729 dm_list_iterate_items(lvl, &data_lvs)
730 if (!do_correct_activate(lv, lvl->lv))
731 return_0;
732 */
733 /* Expand areas array */
734 if (!(new_areas = dm_pool_zalloc(lv->vg->cmd->mem,
735 new_count * sizeof(*new_areas))))
736 goto fail;
737 memcpy(new_areas, seg->areas, seg->area_count * sizeof(*seg->areas));
738 seg->areas = new_areas;
739
740 /* Expand meta_areas array */
741 if (!(new_areas = dm_pool_zalloc(lv->vg->cmd->mem,
742 new_count * sizeof(*new_areas))))
743 goto fail;
744 if (seg->meta_areas)
745 memcpy(new_areas, seg->meta_areas,
746 seg->area_count * sizeof(*seg->meta_areas));
747 seg->meta_areas = new_areas;
748 seg->area_count = new_count;
749
750 /* Add extra meta area when converting from linear */
751 s = (old_count == 1) ? 0 : old_count;
752
753 /* Set segment areas for metadata sub_lvs */
754 dm_list_iterate_items(lvl, &meta_lvs) {
755 log_debug("Adding %s to %s",
756 lvl->lv->name, lv->name);
757 lvl->lv->status &= status_mask;
758 first_seg(lvl->lv)->status &= status_mask;
759 if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0,
760 lvl->lv->status)) {
761 log_error("Failed to add %s to %s",
762 lvl->lv->name, lv->name);
763 goto fail;
764 }
765 s++;
766 }
767
768 s = old_count;
769
770 /* Set segment areas for data sub_lvs */
771 dm_list_iterate_items(lvl, &data_lvs) {
772 log_debug("Adding %s to %s",
773 lvl->lv->name, lv->name);
774 lvl->lv->status &= status_mask;
775 first_seg(lvl->lv)->status &= status_mask;
776 if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0,
777 lvl->lv->status)) {
778 log_error("Failed to add %s to %s",
779 lvl->lv->name, lv->name);
780 goto fail;
781 }
782 s++;
783 }
784
785 /*
786 * FIXME: Failure handling during these points is harder.
787 */
788 dm_list_iterate_items(lvl, &meta_lvs)
789 lv_set_hidden(lvl->lv);
790 dm_list_iterate_items(lvl, &data_lvs)
791 lv_set_hidden(lvl->lv);
792
793 if (!vg_write(lv->vg)) {
794 log_error("Failed to write changes to %s in %s",
795 lv->name, lv->vg->name);
796 return 0;
797 }
798
799 if (!suspend_lv_origin(cmd, lv)) {
800 log_error("Failed to suspend %s/%s before committing changes",
801 lv->vg->name, lv->name);
802 return 0;
803 }
804
805 if (!vg_commit(lv->vg)) {
806 log_error("Failed to commit changes to %s in %s",
807 lv->name, lv->vg->name);
808 return 0;
809 }
810
811 if (!resume_lv_origin(cmd, lv)) {
812 log_error("Failed to resume %s/%s after committing changes",
813 lv->vg->name, lv->name);
814 return 0;
815 }
816
817 /*
818 * Now that the 'REBUILD' has made its way to the kernel, we must
819 * remove the flag so that the individual devices are not rebuilt
820 * upon every activation.
821 */
822 seg = first_seg(lv);
823 for (s = 0; s < seg->area_count; s++) {
824 if ((seg_lv(seg, s)->status & LV_REBUILD) ||
825 (seg_metalv(seg, s)->status & LV_REBUILD)) {
826 seg_metalv(seg, s)->status &= ~LV_REBUILD;
827 seg_lv(seg, s)->status &= ~LV_REBUILD;
828 rebuild_flag_cleared = 1;
829 }
830 }
831 if (rebuild_flag_cleared &&
832 (!vg_write(lv->vg) || !vg_commit(lv->vg))) {
833 log_error("Failed to clear REBUILD flag for %s/%s components",
834 lv->vg->name, lv->name);
835 return 0;
836 }
837
838 return 1;
839
840 fail:
841 /* Cleanly remove newly-allocated LVs that failed insertion attempt */
842
843 dm_list_iterate_items(lvl, &meta_lvs)
844 if (!lv_remove(lvl->lv))
845 return_0;
846 dm_list_iterate_items(lvl, &data_lvs)
847 if (!lv_remove(lvl->lv))
848 return_0;
849 return_0;
850 }
851
852 /*
853 * _extract_image_components
854 * @seg
855 * @idx: The index in the areas array to remove
856 * @extracted_rmeta: The displaced metadata LV
857 * @extracted_rimage: The displaced data LV
858 *
859 * This function extracts the image components - setting the respective
860 * 'extracted' pointers. It appends '_extracted' to the LVs' names, so that
861 * there are not future conflicts. It does /not/ commit the results.
862 * (IOW, erroring-out requires no unwinding of operations.)
863 *
864 * This function does /not/ attempt to:
865 * 1) shift the 'areas' or 'meta_areas' arrays.
866 * The '[meta_]areas' are left as AREA_UNASSIGNED.
867 * 2) Adjust the seg->area_count
868 * 3) Name the extracted LVs appropriately (appends '_extracted' to names)
869 * These actions must be performed by the caller.
870 *
871 * Returns: 1 on success, 0 on failure
872 */
873 static int _extract_image_components(struct lv_segment *seg, uint32_t idx,
874 struct logical_volume **extracted_rmeta,
875 struct logical_volume **extracted_rimage)
876 {
877 int len;
878 char *tmp_name;
879 struct volume_group *vg = seg->lv->vg;
880 struct logical_volume *data_lv = seg_lv(seg, idx);
881 struct logical_volume *meta_lv = seg_metalv(seg, idx);
882
883 log_very_verbose("Extracting image components %s and %s from %s",
884 data_lv->name, meta_lv->name, seg->lv->name);
885
886 data_lv->status &= ~RAID_IMAGE;
887 meta_lv->status &= ~RAID_META;
888 lv_set_visible(data_lv);
889 lv_set_visible(meta_lv);
890
891 /* release removes data and meta areas */
892 remove_seg_from_segs_using_this_lv(data_lv, seg);
893 remove_seg_from_segs_using_this_lv(meta_lv, seg);
894
895 seg_type(seg, idx) = AREA_UNASSIGNED;
896 seg_metatype(seg, idx) = AREA_UNASSIGNED;
897
898 len = strlen(meta_lv->name) + strlen("_extracted") + 1;
899 tmp_name = dm_pool_alloc(vg->vgmem, len);
900 if (!tmp_name)
901 return_0;
902 sprintf(tmp_name, "%s_extracted", meta_lv->name);
903 meta_lv->name = tmp_name;
904
905 len = strlen(data_lv->name) + strlen("_extracted") + 1;
906 tmp_name = dm_pool_alloc(vg->vgmem, len);
907 if (!tmp_name)
908 return_0;
909 sprintf(tmp_name, "%s_extracted", data_lv->name);
910 data_lv->name = tmp_name;
911
912 *extracted_rmeta = meta_lv;
913 *extracted_rimage = data_lv;
914
915 return 1;
916 }
917
918 /*
919 * _raid_extract_images
920 * @lv
921 * @new_count: The absolute count of images (e.g. '2' for a 2-way mirror)
922 * @target_pvs: The list of PVs that are candidates for removal
923 * @shift: If set, use _shift_and_rename_image_components().
924 * Otherwise, leave the [meta_]areas as AREA_UNASSIGNED and
925 * seg->area_count unchanged.
926 * @extracted_[meta|data]_lvs: The LVs removed from the array. If 'shift'
927 * is set, then there will likely be name conflicts.
928 *
929 * This function extracts _both_ portions of the indexed image. It
930 * does /not/ commit the results. (IOW, erroring-out requires no unwinding
931 * of operations.)
932 *
933 * Returns: 1 on success, 0 on failure
934 */
935 static int _raid_extract_images(struct logical_volume *lv, uint32_t new_count,
936 struct dm_list *target_pvs, int shift,
937 struct dm_list *extracted_meta_lvs,
938 struct dm_list *extracted_data_lvs)
939 {
940 int s, extract, lvl_idx = 0;
941 struct lv_list *lvl_array;
942 struct lv_segment *seg = first_seg(lv);
943 struct logical_volume *rmeta_lv, *rimage_lv;
944
945 extract = seg->area_count - new_count;
946 log_verbose("Extracting %u %s from %s/%s", extract,
947 (extract > 1) ? "images" : "image",
948 lv->vg->name, lv->name);
949
950 lvl_array = dm_pool_alloc(lv->vg->vgmem,
951 sizeof(*lvl_array) * extract * 2);
952 if (!lvl_array)
953 return_0;
954
955 for (s = seg->area_count - 1; (s >= 0) && extract; s--) {
956 if (!_lv_is_on_pvs(seg_lv(seg, s), target_pvs) ||
957 !_lv_is_on_pvs(seg_metalv(seg, s), target_pvs))
958 continue;
959 if (!_raid_in_sync(lv) &&
960 (!seg_is_mirrored(seg) || (s == 0))) {
961 log_error("Unable to extract %sRAID image"
962 " while RAID array is not in-sync",
963 seg_is_mirrored(seg) ? "primary " : "");
964 return 0;
965 }
966
967 if (!_extract_image_components(seg, s, &rmeta_lv, &rimage_lv)) {
968 log_error("Failed to extract %s from %s",
969 seg_lv(seg, s)->name, lv->name);
970 return 0;
971 }
972
973 if (shift && !_shift_and_rename_image_components(seg)) {
974 log_error("Failed to shift and rename image components");
975 return 0;
976 }
977
978 lvl_array[lvl_idx].lv = rmeta_lv;
979 lvl_array[lvl_idx + 1].lv = rimage_lv;
980 dm_list_add(extracted_meta_lvs, &(lvl_array[lvl_idx++].list));
981 dm_list_add(extracted_data_lvs, &(lvl_array[lvl_idx++].list));
982
983 extract--;
984 }
985 if (extract) {
986 log_error("Unable to extract enough images to satisfy request");
987 return 0;
988 }
989
990 return 1;
991 }
992
993 static int _raid_remove_images(struct logical_volume *lv,
994 uint32_t new_count, struct dm_list *pvs)
995 {
996 struct dm_list removal_list;
997 struct lv_list *lvl;
998
999 dm_list_init(&removal_list);
1000
1001 if (!_raid_extract_images(lv, new_count, pvs, 1,
1002 &removal_list, &removal_list)) {
1003 log_error("Failed to extract images from %s/%s",
1004 lv->vg->name, lv->name);
1005 return 0;
1006 }
1007
1008 /* Convert to linear? */
1009 if ((new_count == 1) && !_raid_remove_top_layer(lv, &removal_list)) {
1010 log_error("Failed to remove RAID layer after linear conversion");
1011 return 0;
1012 }
1013
1014 if (!vg_write(lv->vg)) {
1015 log_error("Failed to write changes to %s in %s",
1016 lv->name, lv->vg->name);
1017 return 0;
1018 }
1019
1020 if (!suspend_lv(lv->vg->cmd, lv)) {
1021 log_error("Failed to suspend %s/%s before committing changes",
1022 lv->vg->name, lv->name);
1023 return 0;
1024 }
1025
1026 if (!vg_commit(lv->vg)) {
1027 log_error("Failed to commit changes to %s in %s",
1028 lv->name, lv->vg->name);
1029 return 0;
1030 }
1031
1032 /*
1033 * We resume the extracted sub-LVs first so they are renamed
1034 * and won't conflict with the remaining (possibly shifted)
1035 * sub-LVs.
1036 */
1037 dm_list_iterate_items(lvl, &removal_list) {
1038 if (!resume_lv(lv->vg->cmd, lvl->lv)) {
1039 log_error("Failed to resume extracted LVs");
1040 return 0;
1041 }
1042 }
1043
1044 /*
1045 * Resume the remaining LVs
1046 * We must start by resuming the sub-LVs first (which would
1047 * otherwise be handled automatically) because the shifting
1048 * of positions could otherwise cause name collisions. For
1049 * example, if position 0 of a 3-way array is removed, position
1050 * 1 and 2 must be shifted and renamed 0 and 1. If position 2
1051 * tries to rename first, it will collide with the existing
1052 * position 1.
1053 */
1054 if (!_bottom_up_resume(lv)) {
1055 log_error("Failed to resume %s/%s after committing changes",
1056 lv->vg->name, lv->name);
1057 return 0;
1058 }
1059
1060 /*
1061 * Eliminate the extracted LVs
1062 */
1063 sync_local_dev_names(lv->vg->cmd);
1064 if (!dm_list_empty(&removal_list)) {
1065 dm_list_iterate_items(lvl, &removal_list) {
1066 if (!deactivate_lv(lv->vg->cmd, lvl->lv))
1067 return_0;
1068 if (!lv_remove(lvl->lv))
1069 return_0;
1070 }
1071
1072 if (!vg_write(lv->vg) || !vg_commit(lv->vg))
1073 return_0;
1074 }
1075
1076 return 1;
1077 }
1078
1079 /*
1080 * lv_raid_change_image_count
1081 * @lv
1082 * @new_count: The absolute count of images (e.g. '2' for a 2-way mirror)
1083 * @pvs: The list of PVs that are candidates for removal (or empty list)
1084 *
1085 * RAID arrays have 'images' which are composed of two parts, they are:
1086 * - 'rimage': The data/parity holding portion
1087 * - 'rmeta' : The metadata holding portion (i.e. superblock/bitmap area)
1088 * This function adds or removes _both_ portions of the image and commits
1089 * the results.
1090 *
1091 * Returns: 1 on success, 0 on failure
1092 */
1093 int lv_raid_change_image_count(struct logical_volume *lv,
1094 uint32_t new_count, struct dm_list *pvs)
1095 {
1096 uint32_t old_count = lv_raid_image_count(lv);
1097
1098 if (old_count == new_count) {
1099 log_error("%s/%s already has image count of %d",
1100 lv->vg->name, lv->name, new_count);
1101 return 1;
1102 }
1103
1104 if (old_count > new_count)
1105 return _raid_remove_images(lv, new_count, pvs);
1106
1107 return _raid_add_images(lv, new_count, pvs);
1108 }
1109
1110 int lv_raid_split(struct logical_volume *lv, const char *split_name,
1111 uint32_t new_count, struct dm_list *splittable_pvs)
1112 {
1113 const char *old_name;
1114 struct lv_list *lvl;
1115 struct dm_list removal_list, data_list;
1116 struct cmd_context *cmd = lv->vg->cmd;
1117 uint32_t old_count = lv_raid_image_count(lv);
1118 struct logical_volume *tracking;
1119 struct dm_list tracking_pvs;
1120
1121 dm_list_init(&removal_list);
1122 dm_list_init(&data_list);
1123
1124 if ((old_count - new_count) != 1) {
1125 log_error("Unable to split more than one image from %s/%s",
1126 lv->vg->name, lv->name);
1127 return 0;
1128 }
1129
1130 if (!seg_is_mirrored(first_seg(lv))) {
1131 log_error("Unable to split logical volume of segment type, %s",
1132 first_seg(lv)->segtype->name);
1133 return 0;
1134 }
1135
1136 if (find_lv_in_vg(lv->vg, split_name)) {
1137 log_error("Logical Volume \"%s\" already exists in %s",
1138 split_name, lv->vg->name);
1139 return 0;
1140 }
1141
1142 if (!_raid_in_sync(lv)) {
1143 log_error("Unable to split %s/%s while it is not in-sync.",
1144 lv->vg->name, lv->name);
1145 return 0;
1146 }
1147
1148 /*
1149 * We only allow a split while there is tracking if it is to
1150 * complete the split of the tracking sub-LV
1151 */
1152 if (_lv_is_raid_with_tracking(lv, &tracking)) {
1153 if (!_lv_is_on_pvs(tracking, splittable_pvs)) {
1154 log_error("Unable to split additional image from %s "
1155 "while tracking changes for %s",
1156 lv->name, tracking->name);
1157 return 0;
1158 } else {
1159 /* Ensure we only split the tracking image */
1160 dm_list_init(&tracking_pvs);
1161 splittable_pvs = &tracking_pvs;
1162 if (!_get_pv_list_for_lv(tracking, splittable_pvs))
1163 return_0;
1164 }
1165 }
1166
1167 if (!_raid_extract_images(lv, new_count, splittable_pvs, 1,
1168 &removal_list, &data_list)) {
1169 log_error("Failed to extract images from %s/%s",
1170 lv->vg->name, lv->name);
1171 return 0;
1172 }
1173
1174 /* Convert to linear? */
1175 if ((new_count == 1) && !_raid_remove_top_layer(lv, &removal_list)) {
1176 log_error("Failed to remove RAID layer after linear conversion");
1177 return 0;
1178 }
1179
1180 /* Get first item */
1181 dm_list_iterate_items(lvl, &data_list)
1182 break;
1183
1184 old_name = lvl->lv->name;
1185 lvl->lv->name = split_name;
1186
1187 if (!vg_write(lv->vg)) {
1188 log_error("Failed to write changes to %s in %s",
1189 lv->name, lv->vg->name);
1190 return 0;
1191 }
1192
1193 if (!suspend_lv(cmd, lv)) {
1194 log_error("Failed to suspend %s/%s before committing changes",
1195 lv->vg->name, lv->name);
1196 return 0;
1197 }
1198
1199 if (!vg_commit(lv->vg)) {
1200 log_error("Failed to commit changes to %s in %s",
1201 lv->name, lv->vg->name);
1202 return 0;
1203 }
1204
1205 /*
1206 * First resume the newly split LV and LVs on the removal list.
1207 * This is necessary so that there are no name collisions due to
1208 * the original RAID LV having possibly had sub-LVs that have been
1209 * shifted and renamed.
1210 */
1211 if (!resume_lv(cmd, lvl->lv))
1212 return_0;
1213 dm_list_iterate_items(lvl, &removal_list)
1214 if (!resume_lv(cmd, lvl->lv))
1215 return_0;
1216
1217 /*
1218 * Resume the remaining LVs
1219 * We must start by resuming the sub-LVs first (which would
1220 * otherwise be handled automatically) because the shifting
1221 * of positions could otherwise cause name collisions. For
1222 * example, if position 0 of a 3-way array is split, position
1223 * 1 and 2 must be shifted and renamed 0 and 1. If position 2
1224 * tries to rename first, it will collide with the existing
1225 * position 1.
1226 */
1227 if (!_bottom_up_resume(lv)) {
1228 log_error("Failed to resume %s/%s after committing changes",
1229 lv->vg->name, lv->name);
1230 return 0;
1231 }
1232
1233 /*
1234 * Eliminate the residual LVs
1235 */
1236 dm_list_iterate_items(lvl, &removal_list) {
1237 if (!deactivate_lv(cmd, lvl->lv))
1238 return_0;
1239
1240 if (!lv_remove(lvl->lv))
1241 return_0;
1242 }
1243
1244 if (!vg_write(lv->vg) || !vg_commit(lv->vg))
1245 return_0;
1246
1247 return 1;
1248 }
1249
1250 /*
1251 * lv_raid_split_and_track
1252 * @lv
1253 * @splittable_pvs
1254 *
1255 * Only allows a single image to be split while tracking. The image
1256 * never actually leaves the mirror. It is simply made visible. This
1257 * action triggers two things: 1) users are able to access the (data) image
1258 * and 2) lower layers replace images marked with a visible flag with
1259 * error targets.
1260 *
1261 * Returns: 1 on success, 0 on error
1262 */
1263 int lv_raid_split_and_track(struct logical_volume *lv,
1264 struct dm_list *splittable_pvs)
1265 {
1266 int s;
1267 struct lv_segment *seg = first_seg(lv);
1268
1269 if (!seg_is_mirrored(seg)) {
1270 log_error("Unable to split images from non-mirrored RAID");
1271 return 0;
1272 }
1273
1274 if (!_raid_in_sync(lv)) {
1275 log_error("Unable to split image from %s/%s while not in-sync",
1276 lv->vg->name, lv->name);
1277 return 0;
1278 }
1279
1280 /* Cannot track two split images at once */
1281 if (lv_is_raid_with_tracking(lv)) {
1282 log_error("Cannot track more than one split image at a time");
1283 return 0;
1284 }
1285
1286 for (s = seg->area_count - 1; s >= 0; s--) {
1287 if (!_lv_is_on_pvs(seg_lv(seg, s), splittable_pvs))
1288 continue;
1289 lv_set_visible(seg_lv(seg, s));
1290 seg_lv(seg, s)->status &= ~LVM_WRITE;
1291 break;
1292 }
1293
1294 if (s >= seg->area_count) {
1295 log_error("Unable to find image to satisfy request");
1296 return 0;
1297 }
1298
1299 if (!vg_write(lv->vg)) {
1300 log_error("Failed to write changes to %s in %s",
1301 lv->name, lv->vg->name);
1302 return 0;
1303 }
1304
1305 if (!suspend_lv(lv->vg->cmd, lv)) {
1306 log_error("Failed to suspend %s/%s before committing changes",
1307 lv->vg->name, lv->name);
1308 return 0;
1309 }
1310
1311 if (!vg_commit(lv->vg)) {
1312 log_error("Failed to commit changes to %s in %s",
1313 lv->name, lv->vg->name);
1314 return 0;
1315 }
1316
1317 log_print("%s split from %s for read-only purposes.",
1318 seg_lv(seg, s)->name, lv->name);
1319
1320 /* Resume original LV */
1321 if (!resume_lv(lv->vg->cmd, lv)) {
1322 log_error("Failed to resume %s/%s after committing changes",
1323 lv->vg->name, lv->name);
1324 return 0;
1325 }
1326
1327 /* Activate the split (and tracking) LV */
1328 if (!_activate_sublv_preserving_excl(lv, seg_lv(seg, s)))
1329 return 0;
1330
1331 log_print("Use 'lvconvert --merge %s/%s' to merge back into %s",
1332 lv->vg->name, seg_lv(seg, s)->name, lv->name);
1333 return 1;
1334 }
1335
1336 int lv_raid_merge(struct logical_volume *image_lv)
1337 {
1338 uint32_t s;
1339 char *p, *lv_name;
1340 struct lv_list *lvl;
1341 struct logical_volume *lv;
1342 struct logical_volume *meta_lv = NULL;
1343 struct lv_segment *seg;
1344 struct volume_group *vg = image_lv->vg;
1345
1346 lv_name = dm_pool_strdup(vg->vgmem, image_lv->name);
1347 if (!lv_name)
1348 return_0;
1349
1350 if (!(p = strstr(lv_name, "_rimage_"))) {
1351 log_error("Unable to merge non-mirror image %s/%s",
1352 vg->name, image_lv->name);
1353 return 0;
1354 }
1355 *p = '\0'; /* lv_name is now that of top-level RAID */
1356
1357 if (image_lv->status & LVM_WRITE) {
1358 log_error("%s/%s is not read-only - refusing to merge",
1359 vg->name, image_lv->name);
1360 return 0;
1361 }
1362
1363 if (!(lvl = find_lv_in_vg(vg, lv_name))) {
1364 log_error("Unable to find containing RAID array for %s/%s",
1365 vg->name, image_lv->name);
1366 return 0;
1367 }
1368 lv = lvl->lv;
1369 seg = first_seg(lv);
1370 for (s = 0; s < seg->area_count; s++) {
1371 if (seg_lv(seg, s) == image_lv) {
1372 meta_lv = seg_metalv(seg, s);
1373 }
1374 }
1375 if (!meta_lv)
1376 return_0;
1377
1378 if (!deactivate_lv(vg->cmd, meta_lv)) {
1379 log_error("Failed to deactivate %s", meta_lv->name);
1380 return 0;
1381 }
1382
1383 if (!deactivate_lv(vg->cmd, image_lv)) {
1384 log_error("Failed to deactivate %s/%s before merging",
1385 vg->name, image_lv->name);
1386 return 0;
1387 }
1388 lv_set_hidden(image_lv);
1389 image_lv->status |= (lv->status & LVM_WRITE);
1390 image_lv->status |= RAID_IMAGE;
1391
1392 if (!vg_write(vg)) {
1393 log_error("Failed to write changes to %s in %s",
1394 lv->name, vg->name);
1395 return 0;
1396 }
1397
1398 if (!suspend_lv(vg->cmd, lv)) {
1399 log_error("Failed to suspend %s/%s before committing changes",
1400 vg->name, lv->name);
1401 return 0;
1402 }
1403
1404 if (!vg_commit(vg)) {
1405 log_error("Failed to commit changes to %s in %s",
1406 lv->name, vg->name);
1407 return 0;
1408 }
1409
1410 if (!resume_lv(vg->cmd, lv)) {
1411 log_error("Failed to resume %s/%s after committing changes",
1412 vg->name, lv->name);
1413 return 0;
1414 }
1415
1416 log_print("%s/%s successfully merged back into %s/%s",
1417 vg->name, image_lv->name,
1418 vg->name, lv->name);
1419 return 1;
1420 }
1421
1422 static int _convert_mirror_to_raid1(struct logical_volume *lv,
1423 const struct segment_type *new_segtype)
1424 {
1425 uint32_t s;
1426 struct lv_segment *seg = first_seg(lv);
1427 struct lv_list lvl_array[seg->area_count], *lvl;
1428 struct dm_list meta_lvs;
1429 struct lv_segment_area *meta_areas;
1430
1431 dm_list_init(&meta_lvs);
1432
1433 if (!_raid_in_sync(lv)) {
1434 log_error("Unable to convert %s/%s while it is not in-sync",
1435 lv->vg->name, lv->name);
1436 return 0;
1437 }
1438
1439 meta_areas = dm_pool_zalloc(lv->vg->vgmem,
1440 lv_mirror_count(lv) * sizeof(*meta_areas));
1441 if (!meta_areas) {
1442 log_error("Failed to allocate memory");
1443 return 0;
1444 }
1445
1446 for (s = 0; s < seg->area_count; s++) {
1447 log_debug("Allocating new metadata LV for %s",
1448 seg_lv(seg, s)->name);
1449 if (!_alloc_rmeta_for_lv(seg_lv(seg, s), &(lvl_array[s].lv))) {
1450 log_error("Failed to allocate metadata LV for %s in %s",
1451 seg_lv(seg, s)->name, lv->name);
1452 return 0;
1453 }
1454 dm_list_add(&meta_lvs, &(lvl_array[s].list));
1455 }
1456
1457 log_debug("Clearing newly allocated metadata LVs");
1458 if (!_clear_lvs(&meta_lvs)) {
1459 log_error("Failed to initialize metadata LVs");
1460 return 0;
1461 }
1462
1463 if (seg->log_lv) {
1464 log_debug("Removing mirror log, %s", seg->log_lv->name);
1465 if (!remove_mirror_log(lv->vg->cmd, lv, NULL, 0)) {
1466 log_error("Failed to remove mirror log");
1467 return 0;
1468 }
1469 }
1470
1471 seg->meta_areas = meta_areas;
1472 s = 0;
1473
1474 dm_list_iterate_items(lvl, &meta_lvs) {
1475 log_debug("Adding %s to %s", lvl->lv->name, lv->name);
1476
1477 /* Images are known to be in-sync */
1478 lvl->lv->status &= ~LV_REBUILD;
1479 first_seg(lvl->lv)->status &= ~LV_REBUILD;
1480 lv_set_hidden(lvl->lv);
1481
1482 if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0,
1483 lvl->lv->status)) {
1484 log_error("Failed to add %s to %s",
1485 lvl->lv->name, lv->name);
1486 return 0;
1487 }
1488 s++;
1489 }
1490
1491 for (s = 0; s < seg->area_count; s++) {
1492 char *new_name;
1493
1494 new_name = dm_pool_zalloc(lv->vg->vgmem,
1495 strlen(lv->name) +
1496 strlen("_rimage_XXn"));
1497 if (!new_name) {
1498 log_error("Failed to rename mirror images");
1499 return 0;
1500 }
1501
1502 sprintf(new_name, "%s_rimage_%u", lv->name, s);
1503 log_debug("Renaming %s to %s", seg_lv(seg, s)->name, new_name);
1504 seg_lv(seg, s)->name = new_name;
1505 seg_lv(seg, s)->status &= ~MIRROR_IMAGE;
1506 seg_lv(seg, s)->status |= RAID_IMAGE;
1507 }
1508 init_mirror_in_sync(1);
1509
1510 log_debug("Setting new segtype for %s", lv->name);
1511 seg->segtype = new_segtype;
1512 lv->status &= ~MIRRORED;
1513 lv->status |= RAID;
1514 seg->status |= RAID;
1515
1516 if (!vg_write(lv->vg)) {
1517 log_error("Failed to write changes to %s in %s",
1518 lv->name, lv->vg->name);
1519 return 0;
1520 }
1521
1522 if (!suspend_lv(lv->vg->cmd, lv)) {
1523 log_error("Failed to suspend %s/%s before committing changes",
1524 lv->vg->name, lv->name);
1525 return 0;
1526 }
1527
1528 if (!vg_commit(lv->vg)) {
1529 log_error("Failed to commit changes to %s in %s",
1530 lv->name, lv->vg->name);
1531 return 0;
1532 }
1533
1534 if (!resume_lv(lv->vg->cmd, lv)) {
1535 log_error("Failed to resume %s/%s after committing changes",
1536 lv->vg->name, lv->name);
1537 return 0;
1538 }
1539
1540 return 1;
1541 }
1542
1543 /*
1544 * lv_raid_reshape
1545 * @lv
1546 * @new_segtype
1547 *
1548 * Convert an LV from one RAID type (or 'mirror' segtype) to another.
1549 *
1550 * Returns: 1 on success, 0 on failure
1551 */
1552 int lv_raid_reshape(struct logical_volume *lv,
1553 const struct segment_type *new_segtype)
1554 {
1555 struct lv_segment *seg = first_seg(lv);
1556
1557 if (!new_segtype) {
1558 log_error(INTERNAL_ERROR "New segtype not specified");
1559 return 0;
1560 }
1561
1562 if (!strcmp(seg->segtype->name, "mirror") &&
1563 (!strcmp(new_segtype->name, "raid1")))
1564 return _convert_mirror_to_raid1(lv, new_segtype);
1565
1566 log_error("Converting the segment type for %s/%s from %s to %s"
1567 " is not yet supported.", lv->vg->name, lv->name,
1568 seg->segtype->name, new_segtype->name);
1569 return 0;
1570 }
1571
1572 /*
1573 * lv_raid_replace
1574 * @lv
1575 * @replace_pvs
1576 * @allocatable_pvs
1577 *
1578 * Replace the specified PVs.
1579 */
1580 int lv_raid_replace(struct logical_volume *lv,
1581 struct dm_list *remove_pvs,
1582 struct dm_list *allocate_pvs)
1583 {
1584 uint32_t s, sd, match_count = 0;
1585 struct dm_list old_meta_lvs, old_data_lvs;
1586 struct dm_list new_meta_lvs, new_data_lvs;
1587 struct lv_segment *raid_seg = first_seg(lv);
1588 struct lv_list *lvl;
1589 char *tmp_names[raid_seg->area_count * 2];
1590
1591 dm_list_init(&old_meta_lvs);
1592 dm_list_init(&old_data_lvs);
1593 dm_list_init(&new_meta_lvs);
1594 dm_list_init(&new_data_lvs);
1595
1596 /*
1597 * How many sub-LVs are being removed?
1598 */
1599 for (s = 0; s < raid_seg->area_count; s++) {
1600 if ((seg_type(raid_seg, s) == AREA_UNASSIGNED) ||
1601 (seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
1602 log_error("Unable to replace RAID images while the "
1603 "array has unassigned areas");
1604 return 0;
1605 }
1606
1607 if (_lv_is_on_pvs(seg_lv(raid_seg, s), remove_pvs) ||
1608 _lv_is_on_pvs(seg_metalv(raid_seg, s), remove_pvs))
1609 match_count++;
1610 }
1611
1612 if (!match_count) {
1613 log_verbose("%s/%s does not contain devices specified"
1614 " for replacement", lv->vg->name, lv->name);
1615 return 1;
1616 } else if (match_count == raid_seg->area_count) {
1617 log_error("Unable to remove all PVs from %s/%s at once.",
1618 lv->vg->name, lv->name);
1619 return 0;
1620 } else if (raid_seg->segtype->parity_devs &&
1621 (match_count > raid_seg->segtype->parity_devs)) {
1622 log_error("Unable to replace more than %u PVs from (%s) %s/%s",
1623 raid_seg->segtype->parity_devs,
1624 raid_seg->segtype->name, lv->vg->name, lv->name);
1625 return 0;
1626 }
1627
1628 /*
1629 * Allocate the new image components first
1630 * - This makes it easy to avoid all currently used devs
1631 * - We can immediately tell if there is enough space
1632 *
1633 * - We need to change the LV names when we insert them.
1634 */
1635 if (!_alloc_image_components(lv, allocate_pvs, match_count,
1636 &new_meta_lvs, &new_data_lvs)) {
1637 log_error("Failed to allocate replacement images for %s/%s",
1638 lv->vg->name, lv->name);
1639 return 0;
1640 }
1641
1642 /*
1643 * Remove the old images
1644 * - If we did this before the allocate, we wouldn't have to rename
1645 * the allocated images, but it'd be much harder to avoid the right
1646 * PVs during allocation.
1647 */
1648 if (!_raid_extract_images(lv, raid_seg->area_count - match_count,
1649 remove_pvs, 0,
1650 &old_meta_lvs, &old_data_lvs)) {
1651 log_error("Failed to remove the specified images from %s/%s",
1652 lv->vg->name, lv->name);
1653 return 0;
1654 }
1655
1656 /*
1657 * Skip metadata operation normally done to clear the metadata sub-LVs.
1658 *
1659 * The LV_REBUILD flag is set on the new sub-LVs,
1660 * so they will be rebuilt and we don't need to clear the metadata dev.
1661 */
1662
1663 for (s = 0; s < raid_seg->area_count; s++) {
1664 tmp_names[s] = NULL;
1665 sd = s + raid_seg->area_count;
1666 tmp_names[sd] = NULL;
1667
1668 if ((seg_type(raid_seg, s) == AREA_UNASSIGNED) &&
1669 (seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
1670 /* Adjust the new metadata LV name */
1671 lvl = dm_list_item(dm_list_first(&new_meta_lvs),
1672 struct lv_list);
1673 dm_list_del(&lvl->list);
1674 tmp_names[s] = dm_pool_alloc(lv->vg->vgmem,
1675 strlen(lvl->lv->name) + 1);
1676 if (!tmp_names[s])
1677 return_0;
1678 if (dm_snprintf(tmp_names[s], strlen(lvl->lv->name) + 1,
1679 "%s_rmeta_%u", lv->name, s) < 0)
1680 return_0;
1681 if (!set_lv_segment_area_lv(raid_seg, s, lvl->lv, 0,
1682 lvl->lv->status)) {
1683 log_error("Failed to add %s to %s",
1684 lvl->lv->name, lv->name);
1685 return 0;
1686 }
1687 lv_set_hidden(lvl->lv);
1688
1689 /* Adjust the new data LV name */
1690 lvl = dm_list_item(dm_list_first(&new_data_lvs),
1691 struct lv_list);
1692 dm_list_del(&lvl->list);
1693 tmp_names[sd] = dm_pool_alloc(lv->vg->vgmem,
1694 strlen(lvl->lv->name) + 1);
1695 if (!tmp_names[sd])
1696 return_0;
1697 if (dm_snprintf(tmp_names[sd], strlen(lvl->lv->name) + 1,
1698 "%s_rimage_%u", lv->name, s) < 0)
1699 return_0;
1700 if (!set_lv_segment_area_lv(raid_seg, s, lvl->lv, 0,
1701 lvl->lv->status)) {
1702 log_error("Failed to add %s to %s",
1703 lvl->lv->name, lv->name);
1704 return 0;
1705 }
1706 lv_set_hidden(lvl->lv);
1707 }
1708 }
1709
1710 if (!vg_write(lv->vg)) {
1711 log_error("Failed to write changes to %s in %s",
1712 lv->name, lv->vg->name);
1713 return 0;
1714 }
1715
1716 if (!suspend_lv_origin(lv->vg->cmd, lv)) {
1717 log_error("Failed to suspend %s/%s before committing changes",
1718 lv->vg->name, lv->name);
1719 return 0;
1720 }
1721
1722 if (!vg_commit(lv->vg)) {
1723 log_error("Failed to commit changes to %s in %s",
1724 lv->name, lv->vg->name);
1725 return 0;
1726 }
1727
1728 if (!resume_lv_origin(lv->vg->cmd, lv)) {
1729 log_error("Failed to resume %s/%s after committing changes",
1730 lv->vg->name, lv->name);
1731 return 0;
1732 }
1733
1734 dm_list_iterate_items(lvl, &old_meta_lvs) {
1735 if (!deactivate_lv(lv->vg->cmd, lvl->lv))
1736 return_0;
1737 if (!lv_remove(lvl->lv))
1738 return_0;
1739 }
1740 dm_list_iterate_items(lvl, &old_data_lvs) {
1741 if (!deactivate_lv(lv->vg->cmd, lvl->lv))
1742 return_0;
1743 if (!lv_remove(lvl->lv))
1744 return_0;
1745 }
1746
1747 /* Update new sub-LVs to correct name and clear REBUILD flag */
1748 for (s = 0; s < raid_seg->area_count; s++) {
1749 sd = s + raid_seg->area_count;
1750 if (tmp_names[s] && tmp_names[sd]) {
1751 seg_metalv(raid_seg, s)->name = tmp_names[s];
1752 seg_lv(raid_seg, s)->name = tmp_names[sd];
1753 seg_metalv(raid_seg, s)->status &= ~LV_REBUILD;
1754 seg_lv(raid_seg, s)->status &= ~LV_REBUILD;
1755 }
1756 }
1757
1758 if (!vg_write(lv->vg)) {
1759 log_error("Failed to write changes to %s in %s",
1760 lv->name, lv->vg->name);
1761 return 0;
1762 }
1763
1764 if (!suspend_lv_origin(lv->vg->cmd, lv)) {
1765 log_error("Failed to suspend %s/%s before committing changes",
1766 lv->vg->name, lv->name);
1767 return 0;
1768 }
1769
1770 if (!vg_commit(lv->vg)) {
1771 log_error("Failed to commit changes to %s in %s",
1772 lv->name, lv->vg->name);
1773 return 0;
1774 }
1775
1776 if (!resume_lv_origin(lv->vg->cmd, lv)) {
1777 log_error("Failed to resume %s/%s after committing changes",
1778 lv->vg->name, lv->name);
1779 return 0;
1780 }
1781
1782 return 1;
1783 }
This page took 0.117218 seconds and 5 git commands to generate.