]> sourceware.org Git - lvm2.git/commitdiff
Fix for bug 637936: killing both redundant logs causes deadlock
authorJonathan Earl Brassow <jbrassow@redhat.com>
Thu, 14 Oct 2010 20:03:12 +0000 (20:03 +0000)
committerJonathan Earl Brassow <jbrassow@redhat.com>
Thu, 14 Oct 2010 20:03:12 +0000 (20:03 +0000)
Problem:
When both legs of a mirrored log fail, neither the log nor the parent
mirror can proceed.  The repair code must be careful to replace the
log with an error target before operating on the parent - otherwise,
the parent can get stuck trying to suspend because it can't push through
any writes.  The steps to replace the log device with an error target
were incomplete and resulted in the replacement not happening at all!

The code originally had all the necessary logic to complete the
replacement task, but was pulled out in a effort to clean-up that
section of code, while fixing another bug:
<offending commit msg>
In addition, I added following three changes.

- Removed tmp_orphan_lvs handling procedure
  It seems that _delete_lv() can handle detached_log_lv properly
  without adding mirror legs in mirrored log to tmp_orphan_lvs.
  Therefore, I removed the procedure.

- Removed vg_write()/vg_commit()
  Metadata is saved by vg_write()/vg_commit() just after detached_log_lv
  is handled. Therefore, I removed vg_write()/vg_commit().
</offending commit msg>

http://sources.redhat.com/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/mirror.c?cvsroot=lvm2&f=h#rev1.130

I've reverted the "clean-up" changes associated with that fix, but not what
that commit was actually fixing.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Reviewed-by: Petr Rockai <prockai@redhat.com>
WHATS_NEW
lib/metadata/lv_manip.c
lib/metadata/mirror.c

index 61e9714916c408f720382aed90f413ddbe5660db..e03e7014ea8301f831d741e5b0bc5832158a0235 100644 (file)
--- a/WHATS_NEW
+++ b/WHATS_NEW
@@ -1,5 +1,6 @@
 Version 2.02.75 - 
 =====================================
+  Fix hang when repairing a mirrored-log that had both devs fail.
   Convey need for snapshot-merge target in lvconvert error message and man page.
   Add "devices/disable_after_error_count" to lvm.conf.
   Give correct error message when creating a too-small snapshot.
index 5044376eb4dbb4c13c5e206192697ccb8250e0f0..e4349cc2c492a1ad17a349f51d91920f07223fd2 100644 (file)
@@ -462,6 +462,15 @@ int replace_lv_with_error_segment(struct logical_volume *lv)
        if (!lv_empty(lv))
                return_0;
 
+       /*
+        * Since we are replacing the whatever-was-there with
+        * an error segment, we should also clear any flags
+        * that suggest it is anything other than "error".
+        */
+       lv->status &= ~MIRRORED;
+
+       /* FIXME: Should we bug if we find a log_lv attached? */
+
        if (!lv_add_virtual_segment(lv, 0, len,
                                    get_segtype_from_string(lv->vg->cmd,
                                                            "error")))
index 083e9cd6c089bbbec23127a1b8a01d82ca32334b..6224132ea143ed301b3ea7bd57db3b9e42307c4c 100644 (file)
@@ -896,18 +896,40 @@ static int _remove_mirror_images(struct logical_volume *lv,
         */
        if (detached_log_lv && lv_is_mirrored(detached_log_lv) &&
            (detached_log_lv->status & PARTIAL_LV)) {
+               struct lv_segment *seg = first_seg(detached_log_lv);
+
                log_very_verbose("%s being removed due to failures",
                                 detached_log_lv->name);
 
+               /*
+                * We are going to replace the mirror with an
+                * error segment, but before we do, we must remember
+                * all of the LVs that must be deleted later (i.e.
+                * the sub-lv's)
+                */
+               for (m = 0; m < seg->area_count; m++) {
+                       seg_lv(seg, m)->status &= ~MIRROR_IMAGE;
+                       lv_set_visible(seg_lv(seg, m));
+                       if (!(lvl = dm_pool_alloc(lv->vg->cmd->mem,
+                                                 sizeof(*lvl)))) {
+                               log_error("dm_pool_alloc failed");
+                               return 0;
+                       }
+                       lvl->lv = seg_lv(seg, m);
+                       dm_list_add(&tmp_orphan_lvs, &lvl->list);
+               }
+
                if (!replace_lv_with_error_segment(detached_log_lv)) {
                        log_error("Failed error target substitution for %s",
                                  detached_log_lv->name);
                        return 0;
                }
 
-               /*
-                * Flush all I/Os held by mirrored log.
-                */
+               if (!vg_write(detached_log_lv->vg)) {
+                       log_error("intermediate VG write failed.");
+                       return 0;
+               }
+
                if (!suspend_lv(detached_log_lv->vg->cmd,
                                detached_log_lv)) {
                        log_error("Failed to suspend %s",
@@ -915,8 +937,14 @@ static int _remove_mirror_images(struct logical_volume *lv,
                        return 0;
                }
 
-               if (!resume_lv(detached_log_lv->vg->cmd,
-                              detached_log_lv)) {
+               if (!vg_commit(detached_log_lv->vg)) {
+                       if (!resume_lv(detached_log_lv->vg->cmd,
+                                      detached_log_lv))
+                               stack;
+                       return_0;
+               }
+
+               if (!resume_lv(detached_log_lv->vg->cmd, detached_log_lv)) {
                        log_error("Failed to resume %s",
                                  detached_log_lv->name);
                        return_0;
This page took 0.053702 seconds and 5 git commands to generate.