]> sourceware.org Git - lvm2.git/commitdiff
lvmlockd: use lvmlock LV size 1470849140
authorDavid Teigland <teigland@redhat.com>
Wed, 25 Sep 2024 21:18:32 +0000 (16:18 -0500)
committerDavid Teigland <teigland@redhat.com>
Thu, 26 Sep 2024 13:53:09 +0000 (08:53 -0500)
Previously, lvmlockd detected the end of the lvmlock LV
by doing i/o to it until an i/o error was returned.
This triggered sanlock warning messages, so use the LV
size to avoid accessing beyond the end of the device.

Previously, every lvcreate would refresh the lvmlock LV
in case another machine had extended it.  This involves
a lot of unnecessary work in most cases, so now compare
the LV size and device size to detect when a refresh is
needed.

daemons/lvmlockd/lvmlockd-core.c
daemons/lvmlockd/lvmlockd-internal.h
daemons/lvmlockd/lvmlockd-sanlock.c
lib/locking/lvmlockd.c

index 733e7182118d71d3d26d001bceba0c33cd229bcd..4b2d28c22608c4d08c9fadf1001a11d6b4886795 100644 (file)
@@ -1177,12 +1177,12 @@ static void lm_rem_resource(struct lockspace *ls, struct resource *r)
                lm_rem_resource_idm(ls, r);
 }
 
-static int lm_find_free_lock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size)
+static int lm_find_free_lock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size)
 {
        if (ls->lm_type == LD_LM_DLM)
                return 0;
        else if (ls->lm_type == LD_LM_SANLOCK)
-               return lm_find_free_lock_sanlock(ls, free_offset, sector_size, align_size);
+               return lm_find_free_lock_sanlock(ls, lv_size_bytes, free_offset, sector_size, align_size);
        else if (ls->lm_type == LD_LM_IDM)
                return 0;
        return -1;
@@ -2717,7 +2717,7 @@ static void *lockspace_thread_main(void *arg_in)
                                int align_size = 0;
 
                                log_debug("S %s find free lock", ls->name);
-                               rv = lm_find_free_lock(ls, &free_offset, &sector_size, &align_size);
+                               rv = lm_find_free_lock(ls, act->lv_size_bytes, &free_offset, &sector_size, &align_size);
                                log_debug("S %s find free lock %d offset %llu sector_size %d align_size %d",
                                          ls->name, rv, (unsigned long long)free_offset, sector_size, align_size);
                                ls->free_lock_offset = free_offset;
@@ -5032,6 +5032,8 @@ static void client_recv_action(struct client *cl)
        if (val)
                act->host_id = val;
 
+       act->lv_size_bytes = (uint64_t)dm_config_find_int64(req.cft->root, "lv_size_bytes", 0);
+
        /* Create PV list for idm */
        if (lm == LD_LM_IDM) {
                memset(&pvs, 0x0, sizeof(pvs));
index 6034349b2887c2a078bc3647ddd679335b79a987..fbbefbeaa50d8f30e330c0f2af5861249a9c09a1 100644 (file)
@@ -132,6 +132,7 @@ struct action {
        uint32_t flags;                 /* LD_AF_ */
        uint32_t version;
        uint64_t host_id;
+       uint64_t lv_size_bytes;
        int8_t op;                      /* operation type LD_OP_ */
        int8_t rt;                      /* resource type LD_RT_ */
        int8_t mode;                    /* lock mode LD_LK_ */
@@ -527,7 +528,7 @@ int lm_gl_is_enabled(struct lockspace *ls);
 int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin);
 int lm_data_size_sanlock(void);
 int lm_is_running_sanlock(void);
-int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size);
+int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size);
 
 static inline int lm_support_sanlock(void)
 {
@@ -630,7 +631,7 @@ static inline int lm_is_running_sanlock(void)
        return 0;
 }
 
-static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size)
+static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size)
 {
        return -1;
 }
index 7492dbc90344153a759b203073618c7b88459d14..55743cffbcbbe6380ad472a051c4815d6cfc39ef 100644 (file)
@@ -339,14 +339,16 @@ fail:
        return rv;
 }
 
-static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val)
+static void _read_sysfs_size(dev_t devno, const char *name, uint64_t *val)
 {
        char path[PATH_MAX];
        char buf[32];
        FILE *fp;
        size_t len;
 
-       snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s",
+       *val = 0;
+
+       snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/%s",
                 (int)major(devno), (int)minor(devno), name);
 
        if (!(fp = fopen(path, "r")))
@@ -359,20 +361,19 @@ static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val)
                buf[--len] = '\0';
 
        if (strlen(buf))
-               *val = atoi(buf);
+               *val = strtoull(buf, NULL, 0);
 out:
-       if (fclose(fp))
-               log_debug("Failed to fclose host id file %s (%s).", path, strerror(errno));
-
+       (void)fclose(fp);
 }
 
 /* Select sector/align size for a new VG based on what the device reports for
    sector size of the lvmlock LV. */
 
-static int get_sizes_device(char *path, int *sector_size, int *align_size)
+static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, int *align_size)
 {
        unsigned int physical_block_size = 0;
        unsigned int logical_block_size = 0;
+       uint64_t val;
        struct stat st;
        int rv;
 
@@ -382,8 +383,14 @@ static int get_sizes_device(char *path, int *sector_size, int *align_size)
                return -1;
        }
 
-       _read_sysfs_size(st.st_rdev, "physical_block_size", &physical_block_size);
-       _read_sysfs_size(st.st_rdev, "logical_block_size", &logical_block_size);
+       _read_sysfs_size(st.st_rdev, "size", &val);
+       *dev_size = val * 512;
+
+       _read_sysfs_size(st.st_rdev, "queue/physical_block_size", &val);
+       physical_block_size = (unsigned int)val;
+
+       _read_sysfs_size(st.st_rdev, "queue/logical_block_size", &val);
+       logical_block_size = (unsigned int)val;
 
        if ((physical_block_size == 512) && (logical_block_size == 512)) {
                *sector_size = 512;
@@ -508,6 +515,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
        uint32_t daemon_version;
        uint32_t daemon_proto;
        uint64_t offset;
+       uint64_t dev_size;
        int sector_size = 0;
        int align_size = 0;
        int i, rv;
@@ -555,7 +563,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
                  daemon_version, daemon_proto);
 
        /* Nothing formatted on disk yet, use what the device reports. */
-       rv = get_sizes_device(disk.path, &sector_size, &align_size);
+       rv = get_sizes_device(disk.path, &dev_size, &sector_size, &align_size);
        if (rv < 0) {
                if (rv == -EACCES) {
                        log_error("S %s init_vg_san sanlock error -EACCES: no permission to access %s",
@@ -568,6 +576,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
                }
        }
 
+       log_debug("S %s init_vg_san %s dev_size %llu sector_size %u align_size %u",
+                 ls_name, disk.path, (unsigned long long)dev_size, sector_size, align_size);
+
        strcpy_name_len(ss.name, ls_name, SANLK_NAME_LEN);
        memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN);
        ss.host_id_disk.offset = 0;
@@ -658,6 +669,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
        log_debug("S %s init_vg_san clearing lv lease areas", ls_name);
 
        for (i = 0; ; i++) {
+               if (dev_size && (offset + align_size > dev_size))
+                       break;
+
                rd.rs.disks[0].offset = offset;
 
                rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
@@ -1197,7 +1211,7 @@ int lm_gl_is_enabled(struct lockspace *ls)
  * been disabled.)
  */
 
-int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size)
+int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size)
 {
        struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
        struct sanlk_resourced rd;
@@ -1244,9 +1258,31 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *
 
                memset(rd.rs.name, 0, SANLK_NAME_LEN);
 
+               /*
+                * End of the device. Older lvm versions didn't pass lv_size_bytes
+                * and just relied on sanlock_read_resource returning an error when
+                * reading beyond the device.
+                */
+               if (lv_size_bytes && (offset + lms->align_size > lv_size_bytes)) {
+                       /* end of the device */
+                       log_debug("S %s find_free_lock_san read limit offset %llu lv_size_bytes %llu",
+                                 ls->name, (unsigned long long)offset, (unsigned long long)lv_size_bytes);
+
+                       /* remember the NO SPACE offset, if no free area left,
+                        * search from this offset after extend */
+                       *free_offset = offset;
+
+                       offset = lms->align_size * LV_LOCK_BEGIN;
+                       round = 1;
+                       continue;
+               }
+
                rv = sanlock_read_resource(&rd.rs, 0);
                if (rv == -EMSGSIZE || rv == -ENOSPC) {
-                       /* This indicates the end of the device is reached. */
+                       /*
+                        * These errors indicate the end of the device is reached.
+                        * Still check this in case lv_size_bytes is not provided.
+                        */
                        log_debug("S %s find_free_lock_san read limit offset %llu",
                                  ls->name, (unsigned long long)offset);
 
index 6ea93d7475fab19ab258f87416cc15c1b5a1521b..746aacfe55dcccd888840f9fefacb112d1d0fbf5 100644 (file)
@@ -18,6 +18,7 @@
 #include "daemons/lvmlockd/lvmlockd-client.h"
 
 #include <mntent.h>
+#include <sys/ioctl.h>
 
 static daemon_handle _lvmlockd;
 static const char *_lvmlockd_socket = NULL;
@@ -493,7 +494,7 @@ static int _lockd_request(struct cmd_context *cmd,
 static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
                              const char *lock_lv_name, int num_mb)
 {
-       uint32_t lv_size_bytes;
+       uint64_t lv_size_bytes;
        uint32_t extent_bytes;
        uint32_t total_extents;
        struct logical_volume *lv;
@@ -511,6 +512,15 @@ static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
                .zero = 1,
        };
 
+       /*
+        * Make the lvmlock lv a multiple of 8 MB, i.e. a multiple of any
+        * sanlock align_size, to avoid having unused space at the end of the
+        * lvmlock LV.
+        */
+
+       if (num_mb % 8)
+               num_mb += (8 - (num_mb % 8));
+
        lv_size_bytes = num_mb * ONE_MB_IN_BYTES;  /* size of sanlock LV in bytes */
        extent_bytes = vg->extent_size * SECTOR_SIZE; /* size of one extent in bytes */
        total_extents = dm_div_up(lv_size_bytes, extent_bytes); /* number of extents in sanlock LV */
@@ -518,7 +528,8 @@ static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
 
        lv_size_bytes = total_extents * extent_bytes;
        num_mb = lv_size_bytes / ONE_MB_IN_BYTES;
-       log_debug("Creating lvmlock LV for sanlock with size %um %ub %u extents", num_mb, lv_size_bytes, lp.extents);
+       log_debug("Creating lvmlock LV for sanlock with size %um %llub %u extents",
+                 num_mb, (unsigned long long)lv_size_bytes, lp.extents);
 
        dm_list_init(&lp.tags);
 
@@ -547,11 +558,9 @@ static int _remove_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
        return 1;
 }
 
-static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, unsigned extend_mb)
+static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, unsigned extend_mb, char *lvmlock_path)
 {
        struct device *dev;
-       char path[PATH_MAX];
-       char *name;
        uint64_t old_size_bytes;
        uint64_t new_size_bytes;
        uint32_t extend_bytes;
@@ -594,23 +603,14 @@ static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
 
        new_size_bytes = lv->size * SECTOR_SIZE;
 
-       if (!(name = dm_build_dm_name(lv->vg->cmd->mem, lv->vg->name, lv->name, NULL)))
-               return_0;
-
-       if (dm_snprintf(path, sizeof(path), "%s/%s", dm_dir(), name) < 0) {
-               log_error("Extend sanlock LV %s name too long - extended size not zeroed.",
-                         display_lvname(lv));
-               return 0;
-       }
-
        log_debug("Extend sanlock LV zeroing %u bytes from offset %llu to %llu",
                  (uint32_t)(new_size_bytes - old_size_bytes),
                  (unsigned long long)old_size_bytes,
                  (unsigned long long)new_size_bytes);
 
-       log_print_unless_silent("Zeroing %u MiB on extended internal lvmlock LV...", extend_mb);
+       log_debug("Zeroing %u MiB on extended internal lvmlock LV...", extend_mb);
 
-       if (!(dev = dev_cache_get(cmd, path, NULL))) {
+       if (!(dev = dev_cache_get(cmd, lvmlock_path, NULL))) {
                log_error("Extend sanlock LV %s cannot find device.", display_lvname(lv));
                return 0;
        }
@@ -653,16 +653,27 @@ static int _refresh_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
 
 int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
 {
+       struct logical_volume *lv = vg->sanlock_lv;
        daemon_reply reply;
+       char *lvmlock_name;
+       char lvmlock_path[PATH_MAX];
        unsigned extend_mb;
+       uint64_t lv_size_bytes;
+       uint64_t dm_size_bytes;
        int result;
        int ret;
+       int fd;
 
        if (!_use_lvmlockd)
                return 1;
        if (!_lvmlockd_connected)
                return 0;
 
+       if (!lv) {
+               log_error("No internal lvmlock LV found.");
+               return 0;
+       }
+
        extend_mb = (unsigned) find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL);
 
        /*
@@ -672,17 +683,46 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
        if (!extend_mb)
                return 1;
 
+       lv_size_bytes = lv->size * SECTOR_SIZE;
+
+       if (!(lvmlock_name = dm_build_dm_name(cmd->mem, vg->name, lv->name, NULL)))
+               return_0;
+
+       if (dm_snprintf(lvmlock_path, sizeof(lvmlock_path), "%s/%s", dm_dir(), lvmlock_name) < 0) {
+               log_error("Handle sanlock LV %s path too long.", lvmlock_name);
+               return 0;
+       }
+
+       fd = open(lvmlock_path, O_RDONLY);
+       if (fd < 0) {
+               log_error("Cannot open sanlock LV %s.", lvmlock_path);
+               return 0;
+       }
+
+       if (ioctl(fd, BLKGETSIZE64, &dm_size_bytes) < 0) {
+               log_error("Cannot get size of sanlock LV %s.", lvmlock_path);
+               if (close(fd))
+                       stack;
+               return 0;
+       }
+
+       if (close(fd))
+               stack;
+
        /*
-        * Another host may have extended the lvmlock LV already.
-        * Refresh so that we'll find the new space they added
-        * when we search for new space.
-        *
-        * FIXME: we should be able to check if the lvmlock size
-        * in VG metadata is smaller than lvmlock size reported
-        * by the kernel, and avoid refresh if they match.
+        * Another host may have extended the lvmlock LV.
+        * If so the lvmlock LV size in metadata will be
+        * larger than our active lvmlock LV, and we need
+        * to refresh our lvmlock LV to use the new space.
         */
-       if (!_refresh_sanlock_lv(cmd, vg))
-               return 0;
+       if (lv_size_bytes > dm_size_bytes) {
+               log_debug("Refresh sanlock lv %llu dm %llu",
+                         (unsigned long long)lv_size_bytes,
+                         (unsigned long long)dm_size_bytes);
+
+               if (!_refresh_sanlock_lv(cmd, vg))
+                       return 0;
+       }
 
        /*
         * Ask lvmlockd/sanlock to look for an unused lock.
@@ -690,6 +730,7 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
        reply = _lockd_send("find_free_lock",
                        "pid = " FMTd64, (int64_t) getpid(),
                        "vg_name = %s", vg->name,
+                       "lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes,
                        NULL);
 
        if (!_lockd_result(reply, &result, NULL)) {
@@ -700,7 +741,7 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
 
        /* No space on the lvmlock lv for a new lease. */
        if (result == -EMSGSIZE)
-               ret = _extend_sanlock_lv(cmd, vg, extend_mb);
+               ret = _extend_sanlock_lv(cmd, vg, extend_mb, lvmlock_path);
 
        daemon_reply_destroy(reply);
 
This page took 0.051932 seconds and 5 git commands to generate.