#include "sanlock_admin.h"
#include "sanlock_resource.h"
+/* FIXME: these are copied from sanlock.h only until
+ an updated version of sanlock is available with them. */
+#define SANLK_RES_ALIGN1M 0x00000010
+#define SANLK_RES_ALIGN2M 0x00000020
+#define SANLK_RES_ALIGN4M 0x00000040
+#define SANLK_RES_ALIGN8M 0x00000080
+#define SANLK_RES_SECTOR512 0x00000100
+#define SANLK_RES_SECTOR4K 0x00000200
+#define SANLK_LSF_ALIGN1M 0x00000010
+#define SANLK_LSF_ALIGN2M 0x00000020
+#define SANLK_LSF_ALIGN4M 0x00000040
+#define SANLK_LSF_ALIGN8M 0x00000080
+#define SANLK_LSF_SECTOR512 0x00000100
+#define SANLK_LSF_SECTOR4K 0x00000200
+
#include <stddef.h>
#include <poll.h>
#include <errno.h>
#include <syslog.h>
+#include <blkid/blkid.h>
+#include <linux/kdev_t.h>
+
+#define ONE_MB 1048576
/*
-------------------------------------------------------------------------------
struct lm_sanlock {
struct sanlk_lockspace ss;
+ int sector_size;
int align_size;
int sock; /* sanlock daemon connection */
};
* ...
*/
-#define LS_BEGIN 0
#define GL_LOCK_BEGIN UINT64_C(65)
#define VG_LOCK_BEGIN UINT64_C(66)
#define LV_LOCK_BEGIN UINT64_C(67)
return rv;
}
+static void _read_sysfs_size(dev_t devno, const char *name, unsigned int *val)
+{
+ char path[PATH_MAX];
+ char buf[32];
+ FILE *fp;
+ size_t len;
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s",
+ (int)MAJOR(devno), (int)MINOR(devno), name);
+
+ if (!(fp = fopen(path, "r")))
+ return;
+
+ if (!fgets(buf, sizeof(buf), fp))
+ goto out;
+
+ if ((len = strlen(buf)) && buf[len - 1] == '\n')
+ buf[--len] = '\0';
+
+ if (strlen(buf))
+ *val = atoi(buf);
+out:
+ fclose(fp);
+}
+
+/* Select sector/align size for a new VG based on what the device reports for
+ sector size of the lvmlock LV. */
+
+static int get_sizes_device(char *path, int *sector_size, int *align_size)
+{
+ unsigned int physical_block_size = 0;
+ unsigned int logical_block_size = 0;
+ struct stat st;
+ int rv;
+
+ rv = stat(path, &st);
+ if (rv < 0) {
+ log_error("Failed to stat device to get block size %s %d", path, errno);
+ return -1;
+ }
+
+ _read_sysfs_size(st.st_rdev, "physical_block_size", &physical_block_size);
+ _read_sysfs_size(st.st_rdev, "logical_block_size", &logical_block_size);
+
+ if ((physical_block_size == 512) && (logical_block_size == 512)) {
+ *sector_size = 512;
+ *align_size = ONE_MB;
+ return 0;
+ }
+
+ if ((physical_block_size == 4096) && (logical_block_size == 4096)) {
+ *sector_size = 4096;
+ *align_size = 8 * ONE_MB;
+ return 0;
+ }
+
+ if (physical_block_size && (physical_block_size != 512) && (physical_block_size != 4096)) {
+ log_warn("WARNING: invalid block sizes physical %u logical %u for %s",
+ physical_block_size, logical_block_size, path);
+ physical_block_size = 0;
+ }
+
+ if (logical_block_size && (logical_block_size != 512) && (logical_block_size != 4096)) {
+ log_warn("WARNING: invalid block sizes physical %u logical %u for %s",
+ physical_block_size, logical_block_size, path);
+ logical_block_size = 0;
+ }
+
+ if (!physical_block_size && !logical_block_size) {
+ log_error("Failed to get a block size for %s", path);
+ return -1;
+ }
+
+ if (!physical_block_size || !logical_block_size) {
+ log_warn("WARNING: incomplete block size information physical %u logical %u for %s",
+ physical_block_size, logical_block_size, path);
+ if (!physical_block_size)
+ physical_block_size = logical_block_size;
+ if (!logical_block_size)
+ logical_block_size = physical_block_size;
+ }
+
+ if ((logical_block_size == 4096) && (physical_block_size == 512)) {
+ log_warn("WARNING: mixed block sizes physical %u logical %u (using 4096) for %s",
+ physical_block_size, logical_block_size, path);
+ *sector_size = 4096;
+ *align_size = 8 * ONE_MB;
+ return 0;
+ }
+
+ if ((physical_block_size == 4096) && (logical_block_size == 512)) {
+ log_warn("WARNING: mixed block sizes physical %u logical %u (using 4096) for %s",
+ physical_block_size, logical_block_size, path);
+ *sector_size = 4096;
+ *align_size = 8 * ONE_MB;
+ return 0;
+ }
+
+ if (physical_block_size == 512) {
+ *sector_size = 512;
+ *align_size = ONE_MB;
+ return 0;
+ }
+
+ if (physical_block_size == 4096) {
+ *sector_size = 4096;
+ *align_size = 8 * ONE_MB;
+ return 0;
+ }
+
+ log_error("Failed to get a block size for %s", path);
+ return -1;
+}
+
+
+/* Get the sector/align sizes that were used to create an existing VG.
+ sanlock encoded this in the lockspace/resource structs on disk. */
+
+static int get_sizes_lockspace(char *path, int *sector_size, int *align_size)
+{
+ struct sanlk_lockspace ss;
+ uint32_t io_timeout = 0;
+ int rv;
+
+ memset(&ss, 0, sizeof(ss));
+ memcpy(ss.host_id_disk.path, path, SANLK_PATH_LEN);
+ ss.host_id_disk.offset = 0;
+
+ rv = sanlock_read_lockspace(&ss, 0, &io_timeout);
+ if (rv < 0) {
+ log_error("get_sizes_lockspace %s error %d", path, rv);
+ return rv;
+ }
+
+ if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN8M)) {
+ *sector_size = 4096;
+ *align_size = 8 * ONE_MB;
+ } else if ((ss.flags & SANLK_LSF_SECTOR512) && (ss.flags & SANLK_LSF_ALIGN1M)) {
+ *sector_size = 512;
+ *align_size = ONE_MB;
+ }
+
+ log_debug("get_sizes_lockspace found %d %d", *sector_size, *align_size);
+ return 0;
+}
+
/*
* vgcreate
*
uint32_t daemon_version;
uint32_t daemon_proto;
uint64_t offset;
- int align_size;
+ int sector_size = 0;
+ int align_size = 0;
int i, rv;
memset(&ss, 0, sizeof(ss));
log_debug("sanlock daemon version %08x proto %08x",
daemon_version, daemon_proto);
- rv = sanlock_align(&disk);
- if (rv <= 0) {
+ /* Nothing formatted on disk yet, use what the device reports. */
+ rv = get_sizes_device(disk.path, §or_size, &align_size);
+ if (rv < 0) {
if (rv == -EACCES) {
log_error("S %s init_vg_san sanlock error -EACCES: no permission to access %s",
ls_name, disk.path);
return -EDEVOPEN;
} else {
- log_error("S %s init_vg_san sanlock error %d trying to get align size of %s",
+ log_error("S %s init_vg_san sanlock error %d trying to get sector/align size of %s",
ls_name, rv, disk.path);
return -EARGS;
}
- } else
- align_size = rv;
+ }
strncpy(ss.name, ls_name, SANLK_NAME_LEN);
memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN);
- ss.host_id_disk.offset = LS_BEGIN * align_size;
+ ss.host_id_disk.offset = 0;
+ ss.flags = (sector_size == 4096) ? (SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M) :
+ (SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M);
rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout);
if (rv < 0) {
memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
rd.rs.disks[0].offset = align_size * GL_LOCK_BEGIN;
rd.rs.num_disks = 1;
+ rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
if (rv < 0) {
memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
rd.rs.disks[0].offset = align_size * VG_LOCK_BEGIN;
rd.rs.num_disks = 1;
+ rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
if (rv < 0) {
memset(&rd, 0, sizeof(rd));
rd.rs.num_disks = 1;
+ rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
strncpy(rd.rs.lockspace_name, ls_name, SANLK_NAME_LEN);
strcpy(rd.rs.name, "#unused");
*/
int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name,
- char *vg_args, char *lv_args, uint64_t free_offset)
+ char *vg_args, char *lv_args,
+ int sector_size, int align_size, uint64_t free_offset)
{
struct sanlk_resourced rd;
char lock_lv_name[MAX_ARGS+1];
char lock_args_version[MAX_ARGS+1];
uint64_t offset;
- int align_size;
int rv;
memset(&rd, 0, sizeof(rd));
LV_LOCK_ARGS_MAJOR, LV_LOCK_ARGS_MINOR, LV_LOCK_ARGS_PATCH);
if (daemon_test) {
- align_size = 1048576;
+ align_size = ONE_MB;
snprintf(lv_args, MAX_ARGS, "%s:%llu",
lock_args_version,
(unsigned long long)((align_size * LV_LOCK_BEGIN) + (align_size * daemon_test_lv_count)));
if ((rv = build_dm_path(rd.rs.disks[0].path, SANLK_PATH_LEN, vg_name, lock_lv_name)))
return rv;
- align_size = sanlock_align(&rd.rs.disks[0]);
- if (align_size <= 0) {
- log_error("S %s init_lv_san align error %d", ls_name, align_size);
- return -EINVAL;
+ /*
+ * These should not usually be zero, maybe only the first time this function is called?
+ * We need to use the same sector/align sizes that are already being used.
+ */
+ if (!sector_size || !align_size) {
+ rv = get_sizes_lockspace(rd.rs.disks[0].path, §or_size, &align_size);
+ if (rv < 0) {
+ log_error("S %s init_lv_san read_lockspace error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ if (sector_size)
+ log_debug("S %s init_lv_san found ls sector_size %d align_size %d", ls_name, sector_size, align_size);
+ else {
+ /* use the old method */
+ align_size = sanlock_align(&rd.rs.disks[0]);
+ if (align_size <= 0) {
+ log_error("S %s init_lv_san align error %d", ls_name, align_size);
+ return -EINVAL;
+ }
+ sector_size = (align_size == ONE_MB) ? 512 : 4096;
+ log_debug("S %s init_lv_san found old sector_size %d align_size %d", ls_name, sector_size, align_size);
+ }
}
+ rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
+
if (free_offset)
offset = free_offset;
else
ls_name, lv_name, (unsigned long long)offset);
strncpy(rd.rs.name, lv_name, SANLK_NAME_LEN);
+ rd.rs.flags = (sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
if (!rv) {
char lock_lv_name[MAX_ARGS+1];
uint64_t offset;
uint32_t io_timeout;
- int align_size;
+ int sector_size = 0;
+ int align_size = 0;
int i, rv;
memset(&disk, 0, sizeof(disk));
/* FIXME: device is not always ready for us here */
sleep(1);
- align_size = sanlock_align(&disk);
- if (align_size <= 0) {
- log_error("S %s rename_vg_san bad align size %d %s",
- ls_name, align_size, disk.path);
- return -EINVAL;
- }
-
/*
* Lockspace
*/
memset(&ss, 0, sizeof(ss));
memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN);
- ss.host_id_disk.offset = LS_BEGIN * align_size;
+ ss.host_id_disk.offset = 0;
rv = sanlock_read_lockspace(&ss, 0, &io_timeout);
if (rv < 0) {
return rv;
}
+ if ((ss.flags & SANLK_LSF_SECTOR4K) && (ss.flags & SANLK_LSF_ALIGN8M)) {
+ sector_size = 4096;
+ align_size = 8 * ONE_MB;
+ } else if ((ss.flags & SANLK_LSF_SECTOR512) && (ss.flags & SANLK_LSF_ALIGN1M)) {
+ sector_size = 512;
+ align_size = ONE_MB;
+ } else {
+ /* use the old method */
+ align_size = sanlock_align(&ss.host_id_disk);
+ if (align_size <= 0) {
+ log_error("S %s rename_vg_san unknown sector/align size for %s",
+ ls_name, ss.host_id_disk.path);
+ return -1;
+ }
+ sector_size = (align_size == ONE_MB) ? 512 : 4096;
+ }
+
+ if (!sector_size || !align_size)
+ return -1;
+
strncpy(ss.name, ls_name, SANLK_NAME_LEN);
rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout);
rd1.rs.num_disks = 1;
strncpy(rd1.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1);
rd1.rs.disks[0].offset = lms->align_size * GL_LOCK_BEGIN;
+
+ rd1.rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
+ rd2.rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
rv = sanlock_acquire(lms->sock, -1, 0, 1, &rs1, NULL);
if (rv < 0) {
rd.rs.num_disks = 1;
strncpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1);
rd.rs.disks[0].offset = lms->align_size * GL_LOCK_BEGIN;
+ rd.rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
if (rv < 0) {
rv = sanlock_read_resource(&rd.rs, 0);
if (rv < 0) {
- log_error("gl_is_enabled read_resource error %d", rv);
+ log_error("gl_is_enabled read_resource align_size %d offset %llu error %d",
+ lms->align_size, (unsigned long long)offset, rv);
return rv;
}
* been disabled.)
*/
-int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset)
+int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset, int *sector_size, int *align_size)
{
struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
struct sanlk_resourced rd;
int round = 0;
if (daemon_test) {
- *free_offset = (1048576 * LV_LOCK_BEGIN) + (1048576 * (daemon_test_lv_count + 1));
+ *free_offset = (ONE_MB * LV_LOCK_BEGIN) + (ONE_MB * (daemon_test_lv_count + 1));
+ *sector_size = 512;
+ *align_size = ONE_MB;
return 0;
}
+ *sector_size = lms->sector_size;
+ *align_size = lms->align_size;
+
memset(&rd, 0, sizeof(rd));
strncpy(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
rd.rs.num_disks = 1;
strncpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1);
+ rd.rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) :
+ (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
if (ls->free_lock_offset)
offset = ls->free_lock_offset;
char disk_path[SANLK_PATH_LEN];
char killpath[SANLK_PATH_LEN];
char killargs[SANLK_PATH_LEN];
+ int sector_size = 0;
+ int align_size = 0;
int gl_found;
int ret, rv;
goto fail;
}
- lms->align_size = sanlock_align(&lms->ss.host_id_disk);
- if (lms->align_size <= 0) {
- log_error("S %s prepare_lockspace_san align error %d", lsname, lms->align_size);
+ rv = get_sizes_lockspace(disk_path, §or_size, &align_size);
+ if (rv < 0) {
+ log_error("S %s prepare_lockspace_san cannot get sector/align sizes %d", lsname, rv);
ret = -EMANAGER;
goto fail;
}
+ if (!sector_size) {
+ log_debug("S %s prepare_lockspace_san using old size method", lsname);
+ /* use the old method */
+ align_size = sanlock_align(&lms->ss.host_id_disk);
+ if (align_size <= 0) {
+ log_error("S %s prepare_lockspace_san align error %d", lsname, align_size);
+ return -EINVAL;
+ }
+ sector_size = (align_size == ONE_MB) ? 512 : 4096;
+ log_debug("S %s prepare_lockspace_san found old sector_size %d align_size %d", lsname, sector_size, align_size);
+ }
+
+ log_debug("S %s prepare_lockspace_san sizes %d %d", lsname, sector_size, align_size);
+
+ lms->align_size = align_size;
+ lms->sector_size = sector_size;
+
+ lms->ss.flags = (sector_size == 4096) ? (SANLK_LSF_SECTOR4K | SANLK_LSF_ALIGN8M) :
+ (SANLK_LSF_SECTOR512 | SANLK_LSF_ALIGN1M);
+
gl_found = gl_is_enabled(ls, lms);
if (gl_found < 0) {
log_error("S %s prepare_lockspace_san gl_enabled error %d", lsname, gl_found);
strncpy(rds->rs.name, r->name, SANLK_NAME_LEN);
rds->rs.num_disks = 1;
memcpy(rds->rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN);
+ rds->rs.flags = (lms->sector_size == 4096) ? (SANLK_RES_SECTOR4K | SANLK_RES_ALIGN8M) : (SANLK_RES_SECTOR512 | SANLK_RES_ALIGN1M);
if (r->type == LD_RT_GL)
rds->rs.disks[0].offset = GL_LOCK_BEGIN * lms->align_size;