From 976b95d9290ad0e9ef10bdd35fe3ed08270666b8 Mon Sep 17 00:00:00 2001 From: Petr Rockai Date: Wed, 13 Oct 2010 15:40:38 +0000 Subject: [PATCH] Limit repeated accesses to broken devices. Signed-off-by: Takahiro Yasui Reviewed-by: Petr Rockai --- doc/example.conf.in | 5 +++++ lib/commands/toolcontext.c | 4 ++++ lib/config/defaults.h | 3 +++ lib/device/dev-cache.c | 19 +++++++++++++++++++ lib/device/dev-cache.h | 2 ++ lib/device/dev-io.c | 34 ++++++++++++++++++++++++++++++++-- lib/device/device.h | 2 ++ lib/locking/locking.c | 1 + lib/misc/lvm-globals.c | 11 +++++++++++ lib/misc/lvm-globals.h | 4 ++++ man/lvm.conf.5.in | 5 +++++ 11 files changed, 88 insertions(+), 2 deletions(-) diff --git a/doc/example.conf.in b/doc/example.conf.in index 17fca0520..6bd21ed29 100644 --- a/doc/example.conf.in +++ b/doc/example.conf.in @@ -136,6 +136,11 @@ devices { # in recovery situations. ignore_suspended_devices = 0 + # During each LVM operation any errors received from a device are counted. + # If this counter exceeds the number here, no further I/O is sent to the + # device. + disable_after_error_count = 0 + # Allow use of pvcreate --uuid without requiring --restorefile. require_restorefile_with_uuid = 1 } diff --git a/lib/commands/toolcontext.c b/lib/commands/toolcontext.c index 289a0fcab..a9f5bd681 100644 --- a/lib/commands/toolcontext.c +++ b/lib/commands/toolcontext.c @@ -560,6 +560,10 @@ static int _init_dev_cache(struct cmd_context *cmd) const struct config_node *cn; struct config_value *cv; + init_dev_disable_after_error_count( + find_config_tree_int(cmd, "devices/disable_after_error_count", + DEFAULT_DISABLE_AFTER_ERROR_COUNT)); + if (!dev_cache_init(cmd)) return_0; diff --git a/lib/config/defaults.h b/lib/config/defaults.h index e4db65e9d..cbb30fa43 100644 --- a/lib/config/defaults.h +++ b/lib/config/defaults.h @@ -33,6 +33,7 @@ #define DEFAULT_MD_COMPONENT_DETECTION 1 #define DEFAULT_MD_CHUNK_ALIGNMENT 1 #define DEFAULT_IGNORE_SUSPENDED_DEVICES 1 +#define DEFAULT_DISABLE_AFTER_ERROR_COUNT 0 #define DEFAULT_REQUIRE_RESTOREFILE_WITH_UUID 1 #define DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION 1 #define DEFAULT_DATA_ALIGNMENT_DETECTION 1 @@ -117,6 +118,8 @@ # define DEFAULT_MAX_HISTORY 100 #endif +#define DEFAULT_MAX_ERROR_COUNT NO_DEV_ERROR_COUNT_LIMIT + #define DEFAULT_REP_ALIGNED 1 #define DEFAULT_REP_BUFFERED 1 #define DEFAULT_REP_COLUMNS_AS_ROWS 0 diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c index d43fcc02c..ef93f4d69 100644 --- a/lib/device/dev-cache.c +++ b/lib/device/dev-cache.c @@ -104,6 +104,8 @@ struct device *dev_create_file(const char *filename, struct device *dev, dev->dev = 0; dev->fd = -1; dev->open_count = 0; + dev->error_count = 0; + dev->max_error_count = NO_DEV_ERROR_COUNT_LIMIT; dev->block_size = -1; dev->read_ahead = -1; memset(dev->pvid, 0, sizeof(dev->pvid)); @@ -125,6 +127,7 @@ static struct device *_dev_create(dev_t d) dev->dev = d; dev->fd = -1; dev->open_count = 0; + dev->max_error_count = dev_disable_after_error_count(); dev->block_size = -1; dev->read_ahead = -1; dev->end = UINT64_C(0); @@ -845,6 +848,22 @@ struct device *dev_iter_get(struct dev_iter *iter) return NULL; } +void dev_reset_error_count(struct cmd_context *cmd) +{ + struct dev_iter *iter; + struct device *dev; + + if (!(iter = dev_iter_create(cmd->filter, 0))) { + log_error("Resetting device error count failed"); + return; + } + + for (dev = dev_iter_get(iter); dev; dev = dev_iter_get(iter)) + dev->error_count = 0; + + dev_iter_destroy(iter); +} + int dev_fd(struct device *dev) { return dev->fd; diff --git a/lib/device/dev-cache.h b/lib/device/dev-cache.h index 0ade053c5..c1c86d6af 100644 --- a/lib/device/dev-cache.h +++ b/lib/device/dev-cache.h @@ -53,4 +53,6 @@ struct dev_iter *dev_iter_create(struct dev_filter *f, int dev_scan); void dev_iter_destroy(struct dev_iter *iter); struct device *dev_iter_get(struct dev_iter *iter); +void dev_reset_error_count(struct cmd_context *cmd); + #endif diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c index 1995bdead..eb80a8942 100644 --- a/lib/device/dev-io.c +++ b/lib/device/dev-io.c @@ -603,18 +603,40 @@ void dev_close_all(void) } } +static inline int _dev_is_valid(struct device *dev) +{ + return (dev->max_error_count == NO_DEV_ERROR_COUNT_LIMIT || + dev->error_count < dev->max_error_count); +} + +static void _dev_inc_error_count(struct device *dev) +{ + if (++dev->error_count == dev->max_error_count) + log_warn("WARNING: Error counts reached a limit of %d. " + "Device %s was disabled", + dev->max_error_count, dev_name(dev)); +} + int dev_read(struct device *dev, uint64_t offset, size_t len, void *buffer) { struct device_area where; + int ret; if (!dev->open_count) return_0; + if (!_dev_is_valid(dev)) + return 0; + where.dev = dev; where.start = offset; where.size = len; - return _aligned_io(&where, buffer, 0); + ret = _aligned_io(&where, buffer, 0); + if (!ret) + _dev_inc_error_count(dev); + + return ret; } /* @@ -670,17 +692,25 @@ int dev_append(struct device *dev, size_t len, void *buffer) int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer) { struct device_area where; + int ret; if (!dev->open_count) return_0; + if (!_dev_is_valid(dev)) + return 0; + where.dev = dev; where.start = offset; where.size = len; dev->flags |= DEV_ACCESSED_W; - return _aligned_io(&where, buffer, 1); + ret = _aligned_io(&where, buffer, 1); + if (!ret) + _dev_inc_error_count(dev); + + return ret; } int dev_set(struct device *dev, uint64_t offset, size_t len, int value) diff --git a/lib/device/device.h b/lib/device/device.h index 5a5995086..694f503f5 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -39,6 +39,8 @@ struct device { /* private */ int fd; int open_count; + int error_count; + int max_error_count; int block_size; int read_ahead; uint32_t flags; diff --git a/lib/locking/locking.c b/lib/locking/locking.c index e64d227fa..94c8e8bab 100644 --- a/lib/locking/locking.c +++ b/lib/locking/locking.c @@ -382,6 +382,7 @@ static int _lock_vol(struct cmd_context *cmd, const char *resource, else lvmcache_lock_vgname(resource, (flags & LCK_TYPE_MASK) == LCK_READ); + dev_reset_error_count(cmd); } _update_vg_lock_count(resource, flags); diff --git a/lib/misc/lvm-globals.c b/lib/misc/lvm-globals.c index 795dd54d4..9da61fe05 100644 --- a/lib/misc/lvm-globals.c +++ b/lib/misc/lvm-globals.c @@ -41,6 +41,7 @@ static int _error_message_produced = 0; static unsigned _is_static = 0; static int _udev_checking = 1; static char _sysfs_dir_path[PATH_MAX] = ""; +static int _dev_disable_after_error_count = DEFAULT_DISABLE_AFTER_ERROR_COUNT; void init_verbose(int level) { @@ -122,6 +123,11 @@ void init_udev_checking(int checking) log_debug("LVM udev checking disabled"); } +void init_dev_disable_after_error_count(int value) +{ + _dev_disable_after_error_count = value; +} + void set_cmd_name(const char *cmd) { strncpy(_cmd_name, cmd, sizeof(_cmd_name)); @@ -236,3 +242,8 @@ const char *sysfs_dir_path() { return _sysfs_dir_path; } + +int dev_disable_after_error_count(void) +{ + return _dev_disable_after_error_count; +} diff --git a/lib/misc/lvm-globals.h b/lib/misc/lvm-globals.h index 507393cf9..2fabbc7c1 100644 --- a/lib/misc/lvm-globals.h +++ b/lib/misc/lvm-globals.h @@ -37,6 +37,7 @@ void init_ignore_suspended_devices(int ignore); void init_error_message_produced(int produced); void init_is_static(unsigned value); void init_udev_checking(int checking); +void init_dev_disable_after_error_count(int value); void set_cmd_name(const char *cmd_name); void set_sysfs_dir_path(const char *path); @@ -62,4 +63,7 @@ const char *sysfs_dir_path(void); #define DMEVENTD_MONITOR_IGNORE -1 int dmeventd_monitor_mode(void); +#define NO_DEV_ERROR_COUNT_LIMIT 0 +int dev_disable_after_error_count(void); + #endif diff --git a/man/lvm.conf.5.in b/man/lvm.conf.5.in index ca951b500..35595baee 100644 --- a/man/lvm.conf.5.in +++ b/man/lvm.conf.5.in @@ -165,6 +165,11 @@ use \fBpvs -o +pe_start\fP . It will be a multiple of the requested \fBdata_alignment\fP plus the alignment_offset from \fBdata_alignment_offset_detection\fP (if enabled) or the pvcreate commandline. +.IP +\fBdev_max_error_count\fP \(em Maximum number of error counts per device +before disabling devices. This option prevents a broken device from +being accessed repeatedly. If set to 0, no access control to devices is +done. .TP \fBlog\fP \(em Default log settings .IP -- 2.43.5