From: David Teigland Date: Wed, 25 Sep 2024 21:18:32 +0000 (-0500) Subject: lvmlockd: use lvmlock LV size X-Git-Tag: v2_03_27~23 X-Git-Url: https://sourceware.org/git/?a=commitdiff_plain;h=refs%2Fpipelines%2F1470849140;p=lvm2.git lvmlockd: use lvmlock LV size Previously, lvmlockd detected the end of the lvmlock LV by doing i/o to it until an i/o error was returned. This triggered sanlock warning messages, so use the LV size to avoid accessing beyond the end of the device. Previously, every lvcreate would refresh the lvmlock LV in case another machine had extended it. This involves a lot of unnecessary work in most cases, so now compare the LV size and device size to detect when a refresh is needed. --- diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c index 733e71821..4b2d28c22 100644 --- a/daemons/lvmlockd/lvmlockd-core.c +++ b/daemons/lvmlockd/lvmlockd-core.c @@ -1177,12 +1177,12 @@ static void lm_rem_resource(struct lockspace *ls, struct resource *r) lm_rem_resource_idm(ls, r); } -static int lm_find_free_lock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) +static int lm_find_free_lock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) { if (ls->lm_type == LD_LM_DLM) return 0; else if (ls->lm_type == LD_LM_SANLOCK) - return lm_find_free_lock_sanlock(ls, free_offset, sector_size, align_size); + return lm_find_free_lock_sanlock(ls, lv_size_bytes, free_offset, sector_size, align_size); else if (ls->lm_type == LD_LM_IDM) return 0; return -1; @@ -2717,7 +2717,7 @@ static void *lockspace_thread_main(void *arg_in) int align_size = 0; log_debug("S %s find free lock", ls->name); - rv = lm_find_free_lock(ls, &free_offset, §or_size, &align_size); + rv = lm_find_free_lock(ls, act->lv_size_bytes, &free_offset, §or_size, &align_size); log_debug("S %s find free lock %d offset %llu sector_size %d align_size %d", ls->name, rv, (unsigned long long)free_offset, sector_size, align_size); ls->free_lock_offset = free_offset; @@ -5032,6 +5032,8 @@ static void client_recv_action(struct client *cl) if (val) act->host_id = val; + act->lv_size_bytes = (uint64_t)dm_config_find_int64(req.cft->root, "lv_size_bytes", 0); + /* Create PV list for idm */ if (lm == LD_LM_IDM) { memset(&pvs, 0x0, sizeof(pvs)); diff --git a/daemons/lvmlockd/lvmlockd-internal.h b/daemons/lvmlockd/lvmlockd-internal.h index 6034349b2..fbbefbeaa 100644 --- a/daemons/lvmlockd/lvmlockd-internal.h +++ b/daemons/lvmlockd/lvmlockd-internal.h @@ -132,6 +132,7 @@ struct action { uint32_t flags; /* LD_AF_ */ uint32_t version; uint64_t host_id; + uint64_t lv_size_bytes; int8_t op; /* operation type LD_OP_ */ int8_t rt; /* resource type LD_RT_ */ int8_t mode; /* lock mode LD_LK_ */ @@ -527,7 +528,7 @@ int lm_gl_is_enabled(struct lockspace *ls); int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin); int lm_data_size_sanlock(void); int lm_is_running_sanlock(void); -int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size); +int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size); static inline int lm_support_sanlock(void) { @@ -630,7 +631,7 @@ static inline int lm_is_running_sanlock(void) return 0; } -static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) +static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) { return -1; } diff --git a/daemons/lvmlockd/lvmlockd-sanlock.c b/daemons/lvmlockd/lvmlockd-sanlock.c index 7492dbc90..55743cffb 100644 --- a/daemons/lvmlockd/lvmlockd-sanlock.c +++ b/daemons/lvmlockd/lvmlockd-sanlock.c @@ -339,14 +339,16 @@ fail: return rv; } -static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val) +static void _read_sysfs_size(dev_t devno, const char *name, uint64_t *val) { char path[PATH_MAX]; char buf[32]; FILE *fp; size_t len; - snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s", + *val = 0; + + snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/%s", (int)major(devno), (int)minor(devno), name); if (!(fp = fopen(path, "r"))) @@ -359,20 +361,19 @@ static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val) buf[--len] = '\0'; if (strlen(buf)) - *val = atoi(buf); + *val = strtoull(buf, NULL, 0); out: - if (fclose(fp)) - log_debug("Failed to fclose host id file %s (%s).", path, strerror(errno)); - + (void)fclose(fp); } /* Select sector/align size for a new VG based on what the device reports for sector size of the lvmlock LV. */ -static int get_sizes_device(char *path, int *sector_size, int *align_size) +static int get_sizes_device(char *path, uint64_t *dev_size, int *sector_size, int *align_size) { unsigned int physical_block_size = 0; unsigned int logical_block_size = 0; + uint64_t val; struct stat st; int rv; @@ -382,8 +383,14 @@ static int get_sizes_device(char *path, int *sector_size, int *align_size) return -1; } - _read_sysfs_size(st.st_rdev, "physical_block_size", &physical_block_size); - _read_sysfs_size(st.st_rdev, "logical_block_size", &logical_block_size); + _read_sysfs_size(st.st_rdev, "size", &val); + *dev_size = val * 512; + + _read_sysfs_size(st.st_rdev, "queue/physical_block_size", &val); + physical_block_size = (unsigned int)val; + + _read_sysfs_size(st.st_rdev, "queue/logical_block_size", &val); + logical_block_size = (unsigned int)val; if ((physical_block_size == 512) && (logical_block_size == 512)) { *sector_size = 512; @@ -508,6 +515,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar uint32_t daemon_version; uint32_t daemon_proto; uint64_t offset; + uint64_t dev_size; int sector_size = 0; int align_size = 0; int i, rv; @@ -555,7 +563,7 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar daemon_version, daemon_proto); /* Nothing formatted on disk yet, use what the device reports. */ - rv = get_sizes_device(disk.path, §or_size, &align_size); + rv = get_sizes_device(disk.path, &dev_size, §or_size, &align_size); if (rv < 0) { if (rv == -EACCES) { log_error("S %s init_vg_san sanlock error -EACCES: no permission to access %s", @@ -568,6 +576,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar } } + log_debug("S %s init_vg_san %s dev_size %llu sector_size %u align_size %u", + ls_name, disk.path, (unsigned long long)dev_size, sector_size, align_size); + strcpy_name_len(ss.name, ls_name, SANLK_NAME_LEN); memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN); ss.host_id_disk.offset = 0; @@ -658,6 +669,9 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar log_debug("S %s init_vg_san clearing lv lease areas", ls_name); for (i = 0; ; i++) { + if (dev_size && (offset + align_size > dev_size)) + break; + rd.rs.disks[0].offset = offset; rv = sanlock_write_resource(&rd.rs, 0, 0, 0); @@ -1197,7 +1211,7 @@ int lm_gl_is_enabled(struct lockspace *ls) * been disabled.) */ -int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size) +int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t lv_size_bytes, uint64_t *free_offset, int *sector_size, int *align_size) { struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data; struct sanlk_resourced rd; @@ -1244,9 +1258,31 @@ int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int * memset(rd.rs.name, 0, SANLK_NAME_LEN); + /* + * End of the device. Older lvm versions didn't pass lv_size_bytes + * and just relied on sanlock_read_resource returning an error when + * reading beyond the device. + */ + if (lv_size_bytes && (offset + lms->align_size > lv_size_bytes)) { + /* end of the device */ + log_debug("S %s find_free_lock_san read limit offset %llu lv_size_bytes %llu", + ls->name, (unsigned long long)offset, (unsigned long long)lv_size_bytes); + + /* remember the NO SPACE offset, if no free area left, + * search from this offset after extend */ + *free_offset = offset; + + offset = lms->align_size * LV_LOCK_BEGIN; + round = 1; + continue; + } + rv = sanlock_read_resource(&rd.rs, 0); if (rv == -EMSGSIZE || rv == -ENOSPC) { - /* This indicates the end of the device is reached. */ + /* + * These errors indicate the end of the device is reached. + * Still check this in case lv_size_bytes is not provided. + */ log_debug("S %s find_free_lock_san read limit offset %llu", ls->name, (unsigned long long)offset); diff --git a/lib/locking/lvmlockd.c b/lib/locking/lvmlockd.c index 6ea93d747..746aacfe5 100644 --- a/lib/locking/lvmlockd.c +++ b/lib/locking/lvmlockd.c @@ -18,6 +18,7 @@ #include "daemons/lvmlockd/lvmlockd-client.h" #include +#include static daemon_handle _lvmlockd; static const char *_lvmlockd_socket = NULL; @@ -493,7 +494,7 @@ static int _lockd_request(struct cmd_context *cmd, static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, const char *lock_lv_name, int num_mb) { - uint32_t lv_size_bytes; + uint64_t lv_size_bytes; uint32_t extent_bytes; uint32_t total_extents; struct logical_volume *lv; @@ -511,6 +512,15 @@ static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, .zero = 1, }; + /* + * Make the lvmlock lv a multiple of 8 MB, i.e. a multiple of any + * sanlock align_size, to avoid having unused space at the end of the + * lvmlock LV. + */ + + if (num_mb % 8) + num_mb += (8 - (num_mb % 8)); + lv_size_bytes = num_mb * ONE_MB_IN_BYTES; /* size of sanlock LV in bytes */ extent_bytes = vg->extent_size * SECTOR_SIZE; /* size of one extent in bytes */ total_extents = dm_div_up(lv_size_bytes, extent_bytes); /* number of extents in sanlock LV */ @@ -518,7 +528,8 @@ static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, lv_size_bytes = total_extents * extent_bytes; num_mb = lv_size_bytes / ONE_MB_IN_BYTES; - log_debug("Creating lvmlock LV for sanlock with size %um %ub %u extents", num_mb, lv_size_bytes, lp.extents); + log_debug("Creating lvmlock LV for sanlock with size %um %llub %u extents", + num_mb, (unsigned long long)lv_size_bytes, lp.extents); dm_list_init(&lp.tags); @@ -547,11 +558,9 @@ static int _remove_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) return 1; } -static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, unsigned extend_mb) +static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, unsigned extend_mb, char *lvmlock_path) { struct device *dev; - char path[PATH_MAX]; - char *name; uint64_t old_size_bytes; uint64_t new_size_bytes; uint32_t extend_bytes; @@ -594,23 +603,14 @@ static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, new_size_bytes = lv->size * SECTOR_SIZE; - if (!(name = dm_build_dm_name(lv->vg->cmd->mem, lv->vg->name, lv->name, NULL))) - return_0; - - if (dm_snprintf(path, sizeof(path), "%s/%s", dm_dir(), name) < 0) { - log_error("Extend sanlock LV %s name too long - extended size not zeroed.", - display_lvname(lv)); - return 0; - } - log_debug("Extend sanlock LV zeroing %u bytes from offset %llu to %llu", (uint32_t)(new_size_bytes - old_size_bytes), (unsigned long long)old_size_bytes, (unsigned long long)new_size_bytes); - log_print_unless_silent("Zeroing %u MiB on extended internal lvmlock LV...", extend_mb); + log_debug("Zeroing %u MiB on extended internal lvmlock LV...", extend_mb); - if (!(dev = dev_cache_get(cmd, path, NULL))) { + if (!(dev = dev_cache_get(cmd, lvmlock_path, NULL))) { log_error("Extend sanlock LV %s cannot find device.", display_lvname(lv)); return 0; } @@ -653,16 +653,27 @@ static int _refresh_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) { + struct logical_volume *lv = vg->sanlock_lv; daemon_reply reply; + char *lvmlock_name; + char lvmlock_path[PATH_MAX]; unsigned extend_mb; + uint64_t lv_size_bytes; + uint64_t dm_size_bytes; int result; int ret; + int fd; if (!_use_lvmlockd) return 1; if (!_lvmlockd_connected) return 0; + if (!lv) { + log_error("No internal lvmlock LV found."); + return 0; + } + extend_mb = (unsigned) find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL); /* @@ -672,17 +683,46 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) if (!extend_mb) return 1; + lv_size_bytes = lv->size * SECTOR_SIZE; + + if (!(lvmlock_name = dm_build_dm_name(cmd->mem, vg->name, lv->name, NULL))) + return_0; + + if (dm_snprintf(lvmlock_path, sizeof(lvmlock_path), "%s/%s", dm_dir(), lvmlock_name) < 0) { + log_error("Handle sanlock LV %s path too long.", lvmlock_name); + return 0; + } + + fd = open(lvmlock_path, O_RDONLY); + if (fd < 0) { + log_error("Cannot open sanlock LV %s.", lvmlock_path); + return 0; + } + + if (ioctl(fd, BLKGETSIZE64, &dm_size_bytes) < 0) { + log_error("Cannot get size of sanlock LV %s.", lvmlock_path); + if (close(fd)) + stack; + return 0; + } + + if (close(fd)) + stack; + /* - * Another host may have extended the lvmlock LV already. - * Refresh so that we'll find the new space they added - * when we search for new space. - * - * FIXME: we should be able to check if the lvmlock size - * in VG metadata is smaller than lvmlock size reported - * by the kernel, and avoid refresh if they match. + * Another host may have extended the lvmlock LV. + * If so the lvmlock LV size in metadata will be + * larger than our active lvmlock LV, and we need + * to refresh our lvmlock LV to use the new space. */ - if (!_refresh_sanlock_lv(cmd, vg)) - return 0; + if (lv_size_bytes > dm_size_bytes) { + log_debug("Refresh sanlock lv %llu dm %llu", + (unsigned long long)lv_size_bytes, + (unsigned long long)dm_size_bytes); + + if (!_refresh_sanlock_lv(cmd, vg)) + return 0; + } /* * Ask lvmlockd/sanlock to look for an unused lock. @@ -690,6 +730,7 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) reply = _lockd_send("find_free_lock", "pid = " FMTd64, (int64_t) getpid(), "vg_name = %s", vg->name, + "lv_size_bytes = " FMTd64, (int64_t) lv_size_bytes, NULL); if (!_lockd_result(reply, &result, NULL)) { @@ -700,7 +741,7 @@ int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg) /* No space on the lvmlock lv for a new lease. */ if (result == -EMSGSIZE) - ret = _extend_sanlock_lv(cmd, vg, extend_mb); + ret = _extend_sanlock_lv(cmd, vg, extend_mb, lvmlock_path); daemon_reply_destroy(reply);