FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Device-mapper Development

 
 
LinkBack Thread Tools
 
Old 06-02-2008, 07:50 AM
Mike Christie
 
Default dm-mpath: don't fail paths on first error

If we get a transient error then we may not want to fail the path
right away. This patch fails the path after X seconds.

I am not sure how valuable this is. If users just set the no_path_retry
option then we end up with similar results. Without the patch + no_path_retry
then the IO is quickly sent to the new path and has a smaller chance of
getting sent to a queue that is blocked. With the patch we might avoid
some of the path failure messages that scare users. But most users
are not setting no_path_retry. Will they set this new timer?

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
---
drivers/md/dm-mpath.c | 36 ++++++++++++++++++++++++++++++++++--
1 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e7ee59e..4a24219 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -25,12 +25,19 @@
#define DM_MSG_PREFIX "multipath"
#define MESG_STR(x) x, sizeof(x)

+/*
+ * TODO: pass this in instead of hard coding it
+ */
+#define DM_DEV_LOSS_TMO 5 * HZ
+
/* Path properties */
struct pgpath {
struct list_head list;

struct priority_group *pg; /* Owning PG */
unsigned fail_count; /* Cumulative failure count */
+ unsigned curr_fail_count;
+ unsigned long fail_start;

struct dm_path path;
};
@@ -313,6 +320,14 @@ static int map_io(struct multipath *m, struct bio *bio,

spin_lock_irqsave(&m->lock, flags);

+ /*
+ * If the path is experiencing problems but is not marked failed,
+ * then throttle it until IO starts to execute correctly again.
+ */
+ if (m->current_pgpath && m->current_pgpath->curr_fail_count > 0 &&
+ m->repeat_count > 1)
+ m->repeat_count = 2;
+
/* Do we need to select a new pgpath? */
if (!m->current_pgpath ||
(!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
@@ -847,7 +862,15 @@ static int fail_path(struct pgpath *pgpath)
if (!pgpath->path.is_active)
goto out;

- DMWARN("Failing path %s.", pgpath->path.dev->name);
+ if (!pgpath->curr_fail_count) {
+ pgpath->fail_start = jiffies;
+ goto choose_new_path;
+ } else if (time_after_eq(pgpath->fail_start + DM_DEV_LOSS_TMO,
+ jiffies))
+ goto choose_new_path;
+
+ DMWARN("Failing Path %s current fail count %d.",
+ pgpath->path.dev->name, pgpath->curr_fail_count);

pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
pgpath->path.is_active = 0;
@@ -855,6 +878,9 @@ static int fail_path(struct pgpath *pgpath)

m->nr_valid_paths--;

+choose_new_path:
+ pgpath->curr_fail_count++;
+
if (pgpath == m->current_pgpath)
m->current_pgpath = NULL;

@@ -880,6 +906,9 @@ static int reinstate_path(struct pgpath *pgpath)

spin_lock_irqsave(&m->lock, flags);

+ pgpath->fail_start = 0;
+ pgpath->curr_fail_count = 0;
+
if (pgpath->path.is_active)
goto out;

@@ -1073,8 +1102,11 @@ static int do_end_io(struct multipath *m, struct bio *bio,
unsigned err_flags = MP_FAIL_PATH; /* Default behavior */
unsigned long flags;

- if (!error)
+ if (!error) {
+ mpio->pgpath->curr_fail_count = 0;
+ mpio->pgpath->fail_start = 0;
return 0; /* I/O complete */
+ }

if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
return error;
--
1.5.4.1


--------------070800010905090700030600--

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 

Thread Tools




All times are GMT. The time now is 06:30 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org