FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Device-mapper Development

 
 
LinkBack Thread Tools
 
Old 02-15-2008, 09:33 PM
Kiyoshi Ueda
 
Default dm-mpath: convert to request-based

This patch converts dm-multipath target to request-based from bio-based.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
---
drivers/md/dm-mpath.c | 228 +++++++++++++++++++++++++++++++++-------------
drivers/md/dm-rq-record.h | 36 +++++++
2 files changed, 204 insertions(+), 60 deletions(-)

Index: 2.6.25-rc1/drivers/md/dm-mpath.c
================================================== =================
--- 2.6.25-rc1.orig/drivers/md/dm-mpath.c
+++ 2.6.25-rc1/drivers/md/dm-mpath.c
@@ -8,8 +8,7 @@
#include "dm.h"
#include "dm-path-selector.h"
#include "dm-hw-handler.h"
-#include "dm-bio-list.h"
-#include "dm-bio-record.h"
+#include "dm-rq-record.h"
#include "dm-uevent.h"

#include <linux/ctype.h>
@@ -80,7 +79,7 @@ struct multipath {
unsigned pg_init_count; /* Number of times pg_init called */

struct work_struct process_queued_ios;
- struct bio_list queued_ios;
+ struct list_head queued_ios;
unsigned queue_size;

struct work_struct trigger_event;
@@ -89,22 +88,22 @@ struct multipath {
* We must use a mempool of dm_mpath_io structs so that we
* can resubmit bios on error.
*/
- mempool_t *mpio_pool;
+ mempool_t *mpio_pool; /* REMOVE ME */
};

/*
* Context information attached to each bio we process.
*/
-struct dm_mpath_io {
+struct dm_mpath_io { /* REMOVE ME */
struct pgpath *pgpath;
- struct dm_bio_details details;
+ struct dm_rq_details details;
};

typedef int (*action_fn) (struct pgpath *pgpath);

#define MIN_IOS 256 /* Mempool size */

-static struct kmem_cache *_mpio_cache;
+static struct kmem_cache *_mpio_cache; /* REMOVE ME */

static struct workqueue_struct *kmultipathd;
static void process_queued_ios(struct work_struct *work);
@@ -174,6 +173,7 @@ static struct multipath *alloc_multipath
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) {
INIT_LIST_HEAD(&m->priority_groups);
+ INIT_LIST_HEAD(&m->queued_ios);
spin_lock_init(&m->lock);
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
@@ -304,12 +304,13 @@ static int __must_push_back(struct multi
dm_noflush_suspending(m->ti));
}

-static int map_io(struct multipath *m, struct bio *bio,
+static int map_io(struct multipath *m, struct request *clone,
struct dm_mpath_io *mpio, unsigned was_queued)
{
int r = DM_MAPIO_REMAPPED;
unsigned long flags;
struct pgpath *pgpath;
+ struct block_device *bdev;

spin_lock_irqsave(&m->lock, flags);

@@ -326,19 +327,28 @@ static int map_io(struct multipath *m, s
if ((pgpath && m->queue_io) ||
(!pgpath && m->queue_if_no_path)) {
/* Queue for the daemon to resubmit */
- bio_list_add(&m->queued_ios, bio);
+ list_add_tail(&clone->queuelist, &m->queued_ios);
m->queue_size++;
if ((m->pg_init_required && !m->pg_init_in_progress) ||
!m->queue_io)
queue_work(kmultipathd, &m->process_queued_ios);
pgpath = NULL;
+ clone->q = NULL;
+ clone->rq_disk = NULL;
r = DM_MAPIO_SUBMITTED;
- } else if (pgpath)
- bio->bi_bdev = pgpath->path.dev->bdev;
- else if (__must_push_back(m))
+ } else if (pgpath) {
+ bdev = pgpath->path.dev->bdev;
+ clone->q = bdev_get_queue(bdev);
+ clone->rq_disk = bdev->bd_disk;
+ } else if (__must_push_back(m)) {
+ clone->q = NULL;
+ clone->rq_disk = NULL;
r = DM_MAPIO_REQUEUE;
- else
+ } else {
+ clone->q = NULL;
+ clone->rq_disk = NULL;
r = -EIO; /* Failed */
+ }

mpio->pgpath = pgpath;

@@ -378,30 +388,28 @@ static void dispatch_queued_ios(struct m
{
int r;
unsigned long flags;
- struct bio *bio = NULL, *next;
struct dm_mpath_io *mpio;
union map_info *info;
+ struct request *clone, *n;
+ LIST_HEAD(cl);

spin_lock_irqsave(&m->lock, flags);
- bio = bio_list_get(&m->queued_ios);
+ list_splice_init(&m->queued_ios, &cl);
spin_unlock_irqrestore(&m->lock, flags);

- while (bio) {
- next = bio->bi_next;
- bio->bi_next = NULL;
+ list_for_each_entry_safe(clone, n, &cl, queuelist) {
+ list_del_init(&clone->queuelist);

- info = dm_get_mapinfo(bio);
+ info = dm_get_rq_mapinfo(clone);
mpio = info->ptr;

- r = map_io(m, bio, mpio, 1);
+ r = map_io(m, clone, mpio, 1);
if (r < 0)
- bio_endio(bio, r);
+ blk_end_request(clone, r, blk_rq_bytes(clone));
else if (r == DM_MAPIO_REMAPPED)
- generic_make_request(bio);
+ dm_dispatch_request(clone->q, clone);
else if (r == DM_MAPIO_REQUEUE)
- bio_endio(bio, -EIO);
-
- bio = next;
+ blk_end_request(clone, -EIO, blk_rq_bytes(clone));
}
}

@@ -813,21 +821,26 @@ static void multipath_dtr(struct dm_targ
}

/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;

- mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
- dm_bio_record(&mpio->details, bio);
+ mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
+ if (!mpio)
+ /* ENOMEM, requeue */
+ return DM_MAPIO_REQUEUE;
+ memset(mpio, 0, sizeof(*mpio));

+ clone->cmd_flags |= REQ_FAILFAST;
+ dm_rq_record(&mpio->details, clone);
map_context->ptr = mpio;
- bio->bi_rw |= (1 << BIO_RW_FAILFAST);
- r = map_io(m, bio, mpio, 0);
+
+ r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);

@@ -1063,25 +1076,37 @@ void dm_pg_init_complete(struct dm_path
spin_unlock_irqrestore(&m->lock, flags);
}

+static void multipath_queue_in_tgt(struct dm_target *ti, struct request *clone,
+ union map_info *map_context)
+{
+ struct multipath *m = (struct multipath *) ti->private;
+ struct dm_mpath_io *mpio = (struct dm_mpath_io *) map_context->ptr;
+ unsigned long flags = 0UL;
+
+ dm_rq_restore(&mpio->details, clone);
+
+ /* queue for the daemon to resubmit or fail */
+ spin_lock_irqsave(&m->lock, flags);
+ list_add_tail(&clone->queuelist, &m->queued_ios);
+ m->queue_size++;
+ if (!m->queue_io)
+ queue_work(kmultipathd, &m->process_queued_ios);
+ spin_unlock_irqrestore(&m->lock, flags);
+}
+
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
- int error, struct dm_mpath_io *mpio)
+static int do_end_io(struct multipath *m, struct request *clone, int error,
+ struct dm_mpath_io *mpio)
{
struct hw_handler *hwh = &m->hw_handler;
unsigned err_flags = MP_FAIL_PATH; /* Default behavior */
- unsigned long flags;
+ unsigned long flags = 0UL;

- if (!error)
+ if (!clone->errors && !error)
return 0; /* I/O complete */

- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
- if (error == -EOPNOTSUPP)
- return error;
-
spin_lock_irqsave(&m->lock, flags);
if (!m->nr_valid_paths) {
if (__must_push_back(m)) {
@@ -1097,8 +1122,8 @@ static int do_end_io(struct multipath *m
}
spin_unlock_irqrestore(&m->lock, flags);

- if (hwh->type && hwh->type->error)
- err_flags = hwh->type->error(hwh, bio);
+ if (hwh->type && hwh->type->error_rq)
+ err_flags = hwh->type->error_rq(hwh, clone);

if (mpio->pgpath) {
if (err_flags & MP_FAIL_PATH)
@@ -1112,21 +1137,14 @@ static int do_end_io(struct multipath *m
return -EIO;

requeue:
- dm_bio_restore(&mpio->details, bio);
-
- /* queue for the daemon to resubmit or fail */
- spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_ios, bio);
- m->queue_size++;
- if (!m->queue_io)
- queue_work(kmultipathd, &m->process_queued_ios);
- spin_unlock_irqrestore(&m->lock, flags);
-
return DM_ENDIO_INCOMPLETE; /* io not complete */
}

-static int multipath_end_io(struct dm_target *ti, struct bio *bio,
- int error, union map_info *map_context)
+/*
+ * clone->q's lock must be held
+ */
+static int multipath_end_io_first(struct dm_target *ti, struct request *clone,
+ int error, union map_info *map_context)
{
struct multipath *m = ti->private;
struct dm_mpath_io *mpio = map_context->ptr;
@@ -1134,18 +1152,28 @@ static int multipath_end_io(struct dm_ta
struct path_selector *ps;
int r;

- r = do_end_io(m, bio, error, mpio);
+ r = do_end_io(m, clone, error, mpio);
if (pgpath) {
ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path);
}
- if (r != DM_ENDIO_INCOMPLETE)
- mempool_free(mpio, m->mpio_pool);

return r;
}

+static int multipath_end_io(struct dm_target *ti, struct request *clone,
+ int error, union map_info *map_context)
+{
+ struct multipath *m = (struct multipath *) ti->private;
+ struct dm_mpath_io *mpio = (struct dm_mpath_io *) map_context->ptr;
+
+ if (mpio)
+ mempool_free(mpio, m->mpio_pool);
+
+ return 0;
+}
+
/*
* Suspend can't complete until all the I/O is processed so if
* the last path fails we must error any remaining I/O.
@@ -1380,6 +1408,83 @@ static int multipath_ioctl(struct dm_tar
bdev->bd_disk, cmd, arg);
}

+static int __pgpath_congested(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ if (dm_underlying_device_congested(q))
+ return 1;
+
+ return 0;
+}
+
+#if 0
+#define READY_PATH_EXIST 0
+#define ALL_PATH_CONGESTED 1
+#define NO_ACTIVE_PATH 2
+
+static int __pg_congested(struct priority_group *pg)
+{
+ int r = READY_PATH_EXIST, has_active = 0, has_ready = 0;
+ struct pgpath *pgpath;
+
+ list_for_each_entry(pgpath, &pg->list, list) {
+ if (pgpath->path.is_active) {
+ has_active = 1;
+
+ if (!__pgpath_congested(pgpath))
+ has_ready = 1;
+ }
+ }
+
+ if (!has_active)
+ r = NO_ACTIVE_PATH;
+ else if (!has_ready)
+ r = ALL_PATH_CONGESTED;
+
+ return r;
+}
+#endif
+
+static int multipath_congested(struct dm_target *ti)
+{
+ int r = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ unsigned long flags = 0UL;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ if (m->current_pgpath && m->repeat_count > 1) {
+ /* m->current_pgpath is surely used at next mapping time. */
+ if (__pgpath_congested(m->current_pgpath))
+ r = 1;
+
+ goto out;
+ }
+
+ /*
+ * We are here means that path selection is executed
+ * at next mapping time.
+ * We run the path selection here and check congestion status
+ * of the next path.
+ * And increment repeat_count to avoid path selection again
+ * in map_io(). (This is a hack for pre-decrementing repeat_count
+ * in map_io(). Needs to be fixed this repeat_count bug.)
+ */
+ __choose_pgpath(m);
+ if (m->current_pgpath) {
+ if (__pgpath_congested(m->current_pgpath))
+ r = 1;
+
+ m->repeat_count++;
+ }
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return r;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1389,13 +1494,16 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io_first = multipath_end_io_first,
+ .rq_end_io = multipath_end_io,
+ .queue_in_tgt = multipath_queue_in_tgt,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .congested = multipath_congested,
};

static int __init dm_multipath_init(void)
Index: 2.6.25-rc1/drivers/md/dm-rq-record.h
================================================== =================
--- /dev/null
+++ 2.6.25-rc1/drivers/md/dm-rq-record.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2007 NEC Corporation.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_RQ_RECORD_H
+#define DM_RQ_RECORD_H
+
+#include <linux/blkdev.h>
+
+/*
+ * There are lots of mutable fields in the request struct that get
+ * changed by the lower levels of the block layer. Some targets,
+ * such as multipath, may wish to resubmit a request on error. The
+ * functions in this file help the target record and restore the
+ * original request state.
+ */
+struct dm_rq_details {
+ unsigned int cmd_flags;
+ int ref_count;
+};
+
+static inline void dm_rq_record(struct dm_rq_details *rd, struct request *rq)
+{
+ rd->cmd_flags = rq->cmd_flags;
+ rd->ref_count = rq->ref_count;
+}
+
+static inline void dm_rq_restore(struct dm_rq_details *rd, struct request *rq)
+{
+ rq->cmd_flags = rd->cmd_flags;
+ rq->ref_count = rd->ref_count;
+}
+
+#endif

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 03-19-2008, 10:12 PM
Kiyoshi Ueda
 
Default dm-mpath: convert to request-based

This patch converts dm-multipath target to request-based from bio-based.

In I/O completion, the clone request doesn't have clone bios,
so the target driver can't resubmit it as it is, even if the target
driver wants to retry it using another path.
To resubmit the clone request, memory allocation for clone bios is
needed. There is no big benefit to resubmit the clone request.

So the target driver doesn't requeue a clone in the target queue.
Instead, the target driver ask dm core for requeueing and remapping
the original request of the clone (do_end_io()).
As a result, the target driver don't need to record/restore
the information of the original request for resubmitting the clone.


Actual path selection is done at congestion state checking time
(multipath_congested()), since we'd like to check the path which
will be definitely used for next I/O.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
---
drivers/md/dm-mpath.c | 175 +++++++++++++++++++++++++++++++-------------------
1 files changed, 109 insertions(+), 66 deletions(-)

Index: 2.6.25-rc5/drivers/md/dm-mpath.c
================================================== =================
--- 2.6.25-rc5.orig/drivers/md/dm-mpath.c
+++ 2.6.25-rc5/drivers/md/dm-mpath.c
@@ -8,8 +8,6 @@
#include "dm.h"
#include "dm-path-selector.h"
#include "dm-hw-handler.h"
-#include "dm-bio-list.h"
-#include "dm-bio-record.h"
#include "dm-uevent.h"

#include <linux/ctype.h>
@@ -80,7 +78,7 @@ struct multipath {
unsigned pg_init_count; /* Number of times pg_init called */

struct work_struct process_queued_ios;
- struct bio_list queued_ios;
+ struct list_head queued_ios;
unsigned queue_size;

struct work_struct trigger_event;
@@ -97,7 +95,6 @@ struct multipath {
*/
struct dm_mpath_io {
struct pgpath *pgpath;
- struct dm_bio_details details;
};

typedef int (*action_fn) (struct pgpath *pgpath);
@@ -174,6 +171,7 @@ static struct multipath *alloc_multipath
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) {
INIT_LIST_HEAD(&m->priority_groups);
+ INIT_LIST_HEAD(&m->queued_ios);
spin_lock_init(&m->lock);
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
@@ -304,12 +302,13 @@ static int __must_push_back(struct multi
dm_noflush_suspending(m->ti));
}

-static int map_io(struct multipath *m, struct bio *bio,
+static int map_io(struct multipath *m, struct request *clone,
struct dm_mpath_io *mpio, unsigned was_queued)
{
int r = DM_MAPIO_REMAPPED;
unsigned long flags;
struct pgpath *pgpath;
+ struct block_device *bdev;

spin_lock_irqsave(&m->lock, flags);

@@ -326,19 +325,28 @@ static int map_io(struct multipath *m, s
if ((pgpath && m->queue_io) ||
(!pgpath && m->queue_if_no_path)) {
/* Queue for the daemon to resubmit */
- bio_list_add(&m->queued_ios, bio);
+ list_add_tail(&clone->queuelist, &m->queued_ios);
m->queue_size++;
if ((m->pg_init_required && !m->pg_init_in_progress) ||
!m->queue_io)
queue_work(kmultipathd, &m->process_queued_ios);
pgpath = NULL;
+ clone->q = NULL;
+ clone->rq_disk = NULL;
r = DM_MAPIO_SUBMITTED;
- } else if (pgpath)
- bio->bi_bdev = pgpath->path.dev->bdev;
- else if (__must_push_back(m))
+ } else if (pgpath) {
+ bdev = pgpath->path.dev->bdev;
+ clone->q = bdev_get_queue(bdev);
+ clone->rq_disk = bdev->bd_disk;
+ } else if (__must_push_back(m)) {
+ clone->q = NULL;
+ clone->rq_disk = NULL;
r = DM_MAPIO_REQUEUE;
- else
+ } else {
+ clone->q = NULL;
+ clone->rq_disk = NULL;
r = -EIO; /* Failed */
+ }

mpio->pgpath = pgpath;

@@ -378,30 +386,29 @@ static void dispatch_queued_ios(struct m
{
int r;
unsigned long flags;
- struct bio *bio = NULL, *next;
struct dm_mpath_io *mpio;
union map_info *info;
+ struct request *clone, *n;
+ LIST_HEAD(cl);

spin_lock_irqsave(&m->lock, flags);
- bio = bio_list_get(&m->queued_ios);
+ list_splice_init(&m->queued_ios, &cl);
spin_unlock_irqrestore(&m->lock, flags);

- while (bio) {
- next = bio->bi_next;
- bio->bi_next = NULL;
+ list_for_each_entry_safe(clone, n, &cl, queuelist) {
+ list_del_init(&clone->queuelist);

- info = dm_get_mapinfo(bio);
+ info = dm_get_rq_mapinfo(clone);
mpio = info->ptr;

- r = map_io(m, bio, mpio, 1);
- if (r < 0)
- bio_endio(bio, r);
- else if (r == DM_MAPIO_REMAPPED)
- generic_make_request(bio);
- else if (r == DM_MAPIO_REQUEUE)
- bio_endio(bio, -EIO);
-
- bio = next;
+ r = map_io(m, clone, mpio, 1);
+ if (r < 0 || r == DM_MAPIO_REQUEUE) {
+ mempool_free(mpio, m->mpio_pool);
+ if (r == DM_MAPIO_REQUEUE)
+ r = DM_ENDIO_REQUEUE;
+ dm_end_request(clone, r);
+ } else if (r == DM_MAPIO_REMAPPED)
+ dm_dispatch_request(clone);
}
}

@@ -813,21 +820,25 @@ static void multipath_dtr(struct dm_targ
}

/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;

- mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
- dm_bio_record(&mpio->details, bio);
+ mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
+ if (!mpio)
+ /* ENOMEM, requeue */
+ return DM_MAPIO_REQUEUE;
+ memset(mpio, 0, sizeof(*mpio));

map_context->ptr = mpio;
- bio->bi_rw |= (1 << BIO_RW_FAILFAST);
- r = map_io(m, bio, mpio, 0);
+ clone->cmd_flags |= REQ_FAILFAST;
+
+ r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);

@@ -1066,39 +1077,34 @@ void dm_pg_init_complete(struct dm_path
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
+static int do_end_io(struct multipath *m, struct request *clone,
int error, struct dm_mpath_io *mpio)
{
struct hw_handler *hwh = &m->hw_handler;
unsigned err_flags = MP_FAIL_PATH; /* Default behavior */
unsigned long flags;
+ int r;

- if (!error)
+ if (!error && !clone->errors)
return 0; /* I/O complete */

- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
if (error == -EOPNOTSUPP)
return error;

spin_lock_irqsave(&m->lock, flags);
if (!m->nr_valid_paths) {
- if (__must_push_back(m)) {
- spin_unlock_irqrestore(&m->lock, flags);
- return DM_ENDIO_REQUEUE;
- } else if (!m->queue_if_no_path) {
- spin_unlock_irqrestore(&m->lock, flags);
- return -EIO;
- } else {
- spin_unlock_irqrestore(&m->lock, flags);
- goto requeue;
- }
+ if (__must_push_back(m) || m->queue_if_no_path)
+ r = DM_ENDIO_REQUEUE;
+ else
+ r = -EIO;
+
+ spin_unlock_irqrestore(&m->lock, flags);
+ return r;
}
spin_unlock_irqrestore(&m->lock, flags);

- if (hwh->type && hwh->type->error)
- err_flags = hwh->type->error(hwh, bio);
+ if (hwh->type && hwh->type->error_rq)
+ err_flags = hwh->type->error_rq(hwh, clone);

if (mpio->pgpath) {
if (err_flags & MP_FAIL_PATH)
@@ -1111,21 +1117,10 @@ static int do_end_io(struct multipath *m
if (err_flags & MP_ERROR_IO)
return -EIO;

- requeue:
- dm_bio_restore(&mpio->details, bio);
-
- /* queue for the daemon to resubmit or fail */
- spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_ios, bio);
- m->queue_size++;
- if (!m->queue_io)
- queue_work(kmultipathd, &m->process_queued_ios);
- spin_unlock_irqrestore(&m->lock, flags);
-
- return DM_ENDIO_INCOMPLETE; /* io not complete */
+ return DM_ENDIO_REQUEUE;
}

-static int multipath_end_io(struct dm_target *ti, struct bio *bio,
+static int multipath_end_io(struct dm_target *ti, struct request *clone,
int error, union map_info *map_context)
{
struct multipath *m = ti->private;
@@ -1134,14 +1129,13 @@ static int multipath_end_io(struct dm_ta
struct path_selector *ps;
int r;

- r = do_end_io(m, bio, error, mpio);
+ r = do_end_io(m, clone, error, mpio);
if (pgpath) {
ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path);
}
- if (r != DM_ENDIO_INCOMPLETE)
- mempool_free(mpio, m->mpio_pool);
+ mempool_free(mpio, m->mpio_pool);

return r;
}
@@ -1380,6 +1374,54 @@ static int multipath_ioctl(struct dm_tar
bdev->bd_disk, cmd, arg);
}

+static int __pgpath_congested(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ if (dm_underlying_device_congested(q))
+ return 1;
+
+ return 0;
+}
+
+static int multipath_congested(struct dm_target *ti)
+{
+ int congested = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ if (m->current_pgpath && m->repeat_count > 1) {
+ /* m->current_pgpath is surely used at next mapping time. */
+ if (__pgpath_congested(m->current_pgpath))
+ congested = 1;
+
+ goto out;
+ }
+
+ /*
+ * We are here means that path selection will be executed
+ * at next mapping time.
+ * We run the path selection here and check congestion status
+ * of the next path.
+ * And increment repeat_count to avoid path selection again
+ * in map_io().
+ */
+ __choose_pgpath(m);
+ if (m->current_pgpath) {
+ if (__pgpath_congested(m->current_pgpath))
+ congested = 1;
+
+ m->repeat_count++;
+ }
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return congested;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1389,13 +1431,14 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io = multipath_end_io,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .congested = multipath_congested,
};

static int __init dm_multipath_init(void)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 09-12-2008, 02:47 PM
Kiyoshi Ueda
 
Default dm-mpath: convert to request-based

This patch converts dm-multipath target to request-based from bio-based.

Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.

I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.

In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.

On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.

As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.


multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.

In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)


Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
---
drivers/md/dm-mpath.c | 195 +++++++++++++++++++++++++++++++++-----------------
1 files changed, 130 insertions(+), 65 deletions(-)

Index: 2.6.27-rc6/drivers/md/dm-mpath.c
================================================== =================
--- 2.6.27-rc6.orig/drivers/md/dm-mpath.c
+++ 2.6.27-rc6/drivers/md/dm-mpath.c
@@ -7,8 +7,6 @@

#include "dm.h"
#include "dm-path-selector.h"
-#include "dm-bio-list.h"
-#include "dm-bio-record.h"
#include "dm-uevent.h"

#include <linux/ctype.h>
@@ -82,7 +80,7 @@ struct multipath {
unsigned pg_init_count; /* Number of times pg_init called */

struct work_struct process_queued_ios;
- struct bio_list queued_ios;
+ struct list_head queued_ios;
unsigned queue_size;

struct work_struct trigger_event;
@@ -99,7 +97,6 @@ struct multipath {
*/
struct dm_mpath_io {
struct pgpath *pgpath;
- struct dm_bio_details details;
};

typedef int (*action_fn) (struct pgpath *pgpath);
@@ -180,6 +177,7 @@ static struct multipath *alloc_multipath
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) {
INIT_LIST_HEAD(&m->priority_groups);
+ INIT_LIST_HEAD(&m->queued_ios);
spin_lock_init(&m->lock);
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
@@ -304,12 +302,13 @@ static int __must_push_back(struct multi
dm_noflush_suspending(m->ti));
}

-static int map_io(struct multipath *m, struct bio *bio,
+static int map_io(struct multipath *m, struct request *clone,
struct dm_mpath_io *mpio, unsigned was_queued)
{
int r = DM_MAPIO_REMAPPED;
unsigned long flags;
struct pgpath *pgpath;
+ struct block_device *bdev;

spin_lock_irqsave(&m->lock, flags);

@@ -326,16 +325,18 @@ static int map_io(struct multipath *m, s
if ((pgpath && m->queue_io) ||
(!pgpath && m->queue_if_no_path)) {
/* Queue for the daemon to resubmit */
- bio_list_add(&m->queued_ios, bio);
+ list_add_tail(&clone->queuelist, &m->queued_ios);
m->queue_size++;
if ((m->pg_init_required && !m->pg_init_in_progress) ||
!m->queue_io)
queue_work(kmultipathd, &m->process_queued_ios);
pgpath = NULL;
r = DM_MAPIO_SUBMITTED;
- } else if (pgpath)
- bio->bi_bdev = pgpath->path.dev->bdev;
- else if (__must_push_back(m))
+ } else if (pgpath) {
+ bdev = pgpath->path.dev->bdev;
+ clone->q = bdev_get_queue(bdev);
+ clone->rq_disk = bdev->bd_disk;
+ } else if (__must_push_back(m))
r = DM_MAPIO_REQUEUE;
else
r = -EIO; /* Failed */
@@ -378,30 +379,31 @@ static void dispatch_queued_ios(struct m
{
int r;
unsigned long flags;
- struct bio *bio = NULL, *next;
struct dm_mpath_io *mpio;
union map_info *info;
+ struct request *clone, *n;
+ LIST_HEAD(cl);

spin_lock_irqsave(&m->lock, flags);
- bio = bio_list_get(&m->queued_ios);
+ list_splice_init(&m->queued_ios, &cl);
spin_unlock_irqrestore(&m->lock, flags);

- while (bio) {
- next = bio->bi_next;
- bio->bi_next = NULL;
+ list_for_each_entry_safe(clone, n, &cl, queuelist) {
+ list_del_init(&clone->queuelist);

- info = dm_get_mapinfo(bio);
+ info = dm_get_rq_mapinfo(clone);
mpio = info->ptr;

- r = map_io(m, bio, mpio, 1);
- if (r < 0)
- bio_endio(bio, r);
- else if (r == DM_MAPIO_REMAPPED)
- generic_make_request(bio);
- else if (r == DM_MAPIO_REQUEUE)
- bio_endio(bio, -EIO);
-
- bio = next;
+ r = map_io(m, clone, mpio, 1);
+ if (r < 0) {
+ mempool_free(mpio, m->mpio_pool);
+ dm_kill_request(clone, r);
+ } else if (r == DM_MAPIO_REMAPPED)
+ dm_dispatch_request(clone);
+ else if (r == DM_MAPIO_REQUEUE) {
+ mempool_free(mpio, m->mpio_pool);
+ dm_requeue_request(clone);
+ }
}
}

@@ -817,21 +819,24 @@ static void multipath_dtr(struct dm_targ
}

/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;

- mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
- dm_bio_record(&mpio->details, bio);
+ mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
+ if (!mpio)
+ /* ENOMEM, requeue */
+ return DM_MAPIO_REQUEUE;
+ memset(mpio, 0, sizeof(*mpio));

map_context->ptr = mpio;
- bio->bi_rw |= (1 << BIO_RW_FAILFAST);
- r = map_io(m, bio, mpio, 0);
+ clone->cmd_flags |= REQ_FAILFAST;
+ r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);

@@ -1105,53 +1110,41 @@ static void activate_path(struct work_st
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
+static int do_end_io(struct multipath *m, struct request *clone,
int error, struct dm_mpath_io *mpio)
{
+ /*
+ * We don't queue any clone request inside the multipath target
+ * during end I/O handling, since those clone requests don't have
+ * bio clones. If we queue them inside the multipath target,
+ * we need to make bio clones, that requires memory allocation.
+ * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+ * don't have bio clones.)
+ * Instead of queueing the clone request here, we queue the original
+ * request into dm core, which will remake a clone request and
+ * clone bios for it and resubmit it later.
+ */
+ int r = DM_ENDIO_REQUEUE;
unsigned long flags;

- if (!error)
+ if (!error && !clone->errors)
return 0; /* I/O complete */

- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
if (error == -EOPNOTSUPP)
return error;

- spin_lock_irqsave(&m->lock, flags);
- if (!m->nr_valid_paths) {
- if (__must_push_back(m)) {
- spin_unlock_irqrestore(&m->lock, flags);
- return DM_ENDIO_REQUEUE;
- } else if (!m->queue_if_no_path) {
- spin_unlock_irqrestore(&m->lock, flags);
- return -EIO;
- } else {
- spin_unlock_irqrestore(&m->lock, flags);
- goto requeue;
- }
- }
- spin_unlock_irqrestore(&m->lock, flags);
-
if (mpio->pgpath)
fail_path(mpio->pgpath);

- requeue:
- dm_bio_restore(&mpio->details, bio);
-
- /* queue for the daemon to resubmit or fail */
spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_ios, bio);
- m->queue_size++;
- if (!m->queue_io)
- queue_work(kmultipathd, &m->process_queued_ios);
+ if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
+ r = -EIO;
spin_unlock_irqrestore(&m->lock, flags);

- return DM_ENDIO_INCOMPLETE; /* io not complete */
+ return r;
}

-static int multipath_end_io(struct dm_target *ti, struct bio *bio,
+static int multipath_end_io(struct dm_target *ti, struct request *clone,
int error, union map_info *map_context)
{
struct multipath *m = ti->private;
@@ -1160,14 +1153,13 @@ static int multipath_end_io(struct dm_ta
struct path_selector *ps;
int r;

- r = do_end_io(m, bio, error, mpio);
+ r = do_end_io(m, clone, error, mpio);
if (pgpath) {
ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path);
}
- if (r != DM_ENDIO_INCOMPLETE)
- mempool_free(mpio, m->mpio_pool);
+ mempool_free(mpio, m->mpio_pool);

return r;
}
@@ -1403,6 +1395,78 @@ static int multipath_ioctl(struct dm_tar
bdev->bd_disk, cmd, arg);
}

+static int __pgpath_congested(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ if (dm_underlying_device_congested(q))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * We return "busy", only when we can map I/Os but underlying devices
+ * are congested (so even if we map I/Os now, the I/Os will wait on
+ * the underlying queue).
+ * In other words, if we want to kill I/Os or queue them inside us
+ * due to map unavailability, we don't return "busy". Otherwise,
+ * dm core won't give us the I/Os and we can't do what we want.
+ */
+static int multipath_busy(struct dm_target *ti)
+{
+ int busy = 0, has_active = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ struct priority_group *pg;
+ struct pgpath *pgpath;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ /* Guess which priority_group will be used at next mapping time */
+ if (unlikely(!m->current_pgpath && m->next_pg))
+ pg = m->next_pg;
+ else if (likely(m->current_pg))
+ pg = m->current_pg;
+ else
+ /*
+ * We don't know which pg will be used at next mapping time.
+ * We don't call __choose_pgpath() here to avoid to trigger
+ * pg_init just by congestion checking.
+ * So we don't know whether underlying devices we will be using
+ * at next mapping time are congested or not. Just try mapping.
+ */
+ goto out;
+
+ /*
+ * If there is one uncongested active path at least, the path selector
+ * will be able to select it. So we consider such a pg as uncongested.
+ */
+ busy = 1;
+ list_for_each_entry(pgpath, &pg->pgpaths, list)
+ if (pgpath->is_active) {
+ has_active = 1;
+
+ if (!__pgpath_congested(pgpath)) {
+ busy = 0;
+ break;
+ }
+ }
+
+ if (!has_active)
+ /*
+ * No active path in this pg, so this pg won't be used and
+ * the current_pg will be changed at next mapping time.
+ * We need to try mapping to determine it.
+ */
+ busy = 0;
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return busy;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1412,13 +1476,14 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io = multipath_end_io,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .busy = multipath_busy,
};

static int __init dm_multipath_init(void)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 10-03-2008, 03:19 PM
Kiyoshi Ueda
 
Default dm-mpath: convert to request-based

This patch converts dm-multipath target to request-based from bio-based.

Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.

I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.

In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.

On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.

As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.


multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.

In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)


Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
---
drivers/md/dm-mpath.c | 192 +++++++++++++++++++++++++++++++++-----------------
1 files changed, 127 insertions(+), 65 deletions(-)

Index: 2.6.27-rc8/drivers/md/dm-mpath.c
================================================== =================
--- 2.6.27-rc8.orig/drivers/md/dm-mpath.c
+++ 2.6.27-rc8/drivers/md/dm-mpath.c
@@ -7,8 +7,6 @@

#include "dm.h"
#include "dm-path-selector.h"
-#include "dm-bio-list.h"
-#include "dm-bio-record.h"
#include "dm-uevent.h"

#include <linux/ctype.h>
@@ -83,7 +81,7 @@ struct multipath {
unsigned pg_init_count; /* Number of times pg_init called */

struct work_struct process_queued_ios;
- struct bio_list queued_ios;
+ struct list_head queued_ios;
unsigned queue_size;

struct work_struct trigger_event;
@@ -100,7 +98,6 @@ struct multipath {
*/
struct dm_mpath_io {
struct pgpath *pgpath;
- struct dm_bio_details details;
};

typedef int (*action_fn) (struct pgpath *pgpath);
@@ -186,6 +183,7 @@ static struct multipath *alloc_multipath
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) {
INIT_LIST_HEAD(&m->priority_groups);
+ INIT_LIST_HEAD(&m->queued_ios);
spin_lock_init(&m->lock);
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
@@ -310,12 +308,13 @@ static int __must_push_back(struct multi
dm_noflush_suspending(m->ti));
}

-static int map_io(struct multipath *m, struct bio *bio,
+static int map_io(struct multipath *m, struct request *clone,
struct dm_mpath_io *mpio, unsigned was_queued)
{
int r = DM_MAPIO_REMAPPED;
unsigned long flags;
struct pgpath *pgpath;
+ struct block_device *bdev;

spin_lock_irqsave(&m->lock, flags);

@@ -332,16 +331,18 @@ static int map_io(struct multipath *m, s
if ((pgpath && m->queue_io) ||
(!pgpath && m->queue_if_no_path)) {
/* Queue for the daemon to resubmit */
- bio_list_add(&m->queued_ios, bio);
+ list_add_tail(&clone->queuelist, &m->queued_ios);
m->queue_size++;
if ((m->pg_init_required && !m->pg_init_in_progress) ||
!m->queue_io)
queue_work(kmultipathd, &m->process_queued_ios);
pgpath = NULL;
r = DM_MAPIO_SUBMITTED;
- } else if (pgpath)
- bio->bi_bdev = pgpath->path.dev->bdev;
- else if (__must_push_back(m))
+ } else if (pgpath) {
+ bdev = pgpath->path.dev->bdev;
+ clone->q = bdev_get_queue(bdev);
+ clone->rq_disk = bdev->bd_disk;
+ } else if (__must_push_back(m))
r = DM_MAPIO_REQUEUE;
else
r = -EIO; /* Failed */
@@ -384,30 +385,31 @@ static void dispatch_queued_ios(struct m
{
int r;
unsigned long flags;
- struct bio *bio = NULL, *next;
struct dm_mpath_io *mpio;
union map_info *info;
+ struct request *clone, *n;
+ LIST_HEAD(cl);

spin_lock_irqsave(&m->lock, flags);
- bio = bio_list_get(&m->queued_ios);
+ list_splice_init(&m->queued_ios, &cl);
spin_unlock_irqrestore(&m->lock, flags);

- while (bio) {
- next = bio->bi_next;
- bio->bi_next = NULL;
+ list_for_each_entry_safe(clone, n, &cl, queuelist) {
+ list_del_init(&clone->queuelist);

- info = dm_get_mapinfo(bio);
+ info = dm_get_rq_mapinfo(clone);
mpio = info->ptr;

- r = map_io(m, bio, mpio, 1);
- if (r < 0)
- bio_endio(bio, r);
- else if (r == DM_MAPIO_REMAPPED)
- generic_make_request(bio);
- else if (r == DM_MAPIO_REQUEUE)
- bio_endio(bio, -EIO);
-
- bio = next;
+ r = map_io(m, clone, mpio, 1);
+ if (r < 0) {
+ mempool_free(mpio, m->mpio_pool);
+ dm_kill_request(clone, r);
+ } else if (r == DM_MAPIO_REMAPPED)
+ dm_dispatch_request(clone);
+ else if (r == DM_MAPIO_REQUEUE) {
+ mempool_free(mpio, m->mpio_pool);
+ dm_requeue_request(clone);
+ }
}
}

@@ -824,21 +826,24 @@ static void multipath_dtr(struct dm_targ
}

/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;

- mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
- dm_bio_record(&mpio->details, bio);
+ mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
+ if (!mpio)
+ /* ENOMEM, requeue */
+ return DM_MAPIO_REQUEUE;
+ memset(mpio, 0, sizeof(*mpio));

map_context->ptr = mpio;
- bio->bi_rw |= (1 << BIO_RW_FAILFAST);
- r = map_io(m, bio, mpio, 0);
+ clone->cmd_flags |= REQ_FAILFAST;
+ r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);

@@ -1119,53 +1124,41 @@ static void activate_path(struct work_st
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
+static int do_end_io(struct multipath *m, struct request *clone,
int error, struct dm_mpath_io *mpio)
{
+ /*
+ * We don't queue any clone request inside the multipath target
+ * during end I/O handling, since those clone requests don't have
+ * bio clones. If we queue them inside the multipath target,
+ * we need to make bio clones, that requires memory allocation.
+ * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+ * don't have bio clones.)
+ * Instead of queueing the clone request here, we queue the original
+ * request into dm core, which will remake a clone request and
+ * clone bios for it and resubmit it later.
+ */
+ int r = DM_ENDIO_REQUEUE;
unsigned long flags;

- if (!error)
+ if (!error && !clone->errors)
return 0; /* I/O complete */

- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
if (error == -EOPNOTSUPP)
return error;

- spin_lock_irqsave(&m->lock, flags);
- if (!m->nr_valid_paths) {
- if (__must_push_back(m)) {
- spin_unlock_irqrestore(&m->lock, flags);
- return DM_ENDIO_REQUEUE;
- } else if (!m->queue_if_no_path) {
- spin_unlock_irqrestore(&m->lock, flags);
- return -EIO;
- } else {
- spin_unlock_irqrestore(&m->lock, flags);
- goto requeue;
- }
- }
- spin_unlock_irqrestore(&m->lock, flags);
-
if (mpio->pgpath)
fail_path(mpio->pgpath);

- requeue:
- dm_bio_restore(&mpio->details, bio);
-
- /* queue for the daemon to resubmit or fail */
spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_ios, bio);
- m->queue_size++;
- if (!m->queue_io)
- queue_work(kmultipathd, &m->process_queued_ios);
+ if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
+ r = -EIO;
spin_unlock_irqrestore(&m->lock, flags);

- return DM_ENDIO_INCOMPLETE; /* io not complete */
+ return r;
}

-static int multipath_end_io(struct dm_target *ti, struct bio *bio,
+static int multipath_end_io(struct dm_target *ti, struct request *clone,
int error, union map_info *map_context)
{
struct multipath *m = ti->private;
@@ -1174,14 +1167,13 @@ static int multipath_end_io(struct dm_ta
struct path_selector *ps;
int r;

- r = do_end_io(m, bio, error, mpio);
+ r = do_end_io(m, clone, error, mpio);
if (pgpath) {
ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path);
}
- if (r != DM_ENDIO_INCOMPLETE)
- mempool_free(mpio, m->mpio_pool);
+ mempool_free(mpio, m->mpio_pool);

return r;
}
@@ -1417,6 +1409,75 @@ static int multipath_ioctl(struct dm_tar
bdev->bd_disk, cmd, arg);
}

+static int __pgpath_busy(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ return dm_underlying_device_busy(q);
+}
+
+/*
+ * We return "busy", only when we can map I/Os but underlying devices
+ * are busy (so even if we map I/Os now, the I/Os will wait on
+ * the underlying queue).
+ * In other words, if we want to kill I/Os or queue them inside us
+ * due to map unavailability, we don't return "busy". Otherwise,
+ * dm core won't give us the I/Os and we can't do what we want.
+ */
+static int multipath_busy(struct dm_target *ti)
+{
+ int busy = 0, has_active = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ struct priority_group *pg;
+ struct pgpath *pgpath;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ /* Guess which priority_group will be used at next mapping time */
+ if (unlikely(!m->current_pgpath && m->next_pg))
+ pg = m->next_pg;
+ else if (likely(m->current_pg))
+ pg = m->current_pg;
+ else
+ /*
+ * We don't know which pg will be used at next mapping time.
+ * We don't call __choose_pgpath() here to avoid to trigger
+ * pg_init just by busy checking.
+ * So we don't know whether underlying devices we will be using
+ * at next mapping time are busy or not. Just try mapping.
+ */
+ goto out;
+
+ /*
+ * If there is one non-busy active path at least, the path selector
+ * will be able to select it. So we consider such a pg as not busy.
+ */
+ busy = 1;
+ list_for_each_entry(pgpath, &pg->pgpaths, list)
+ if (pgpath->is_active) {
+ has_active = 1;
+
+ if (!__pgpath_busy(pgpath)) {
+ busy = 0;
+ break;
+ }
+ }
+
+ if (!has_active)
+ /*
+ * No active path in this pg, so this pg won't be used and
+ * the current_pg will be changed at next mapping time.
+ * We need to try mapping to determine it.
+ */
+ busy = 0;
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return busy;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1426,13 +1487,14 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io = multipath_end_io,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .busy = multipath_busy,
};

static int __init dm_multipath_init(void)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 10-24-2008, 07:55 AM
"Nikanth K"
 
Default dm-mpath: convert to request-based

On Fri, Sep 12, 2008 at 8:17 PM, Kiyoshi Ueda <k-ueda@ct.jp.nec.com> wrote:
> This patch converts dm-multipath target to request-based from bio-based.
>

Can we keep both the bio-based as well as request-based dm-multipath?

Thanks
Nikanth Karthikesan

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 10-28-2008, 03:03 PM
Kiyoshi Ueda
 
Default dm-mpath: convert to request-based

Hi Nikanth,

On Fri, 24 Oct 2008 13:25:33 +0530, "Nikanth K" wrote:
> On Fri, Sep 12, 2008 at 8:17 PM, Kiyoshi Ueda wrote:
> > This patch converts dm-multipath target to request-based from bio-based.
>
> Can we keep both the bio-based as well as request-based dm-multipath?

Why do you want to keep bio-based dm-multipath?
In the realistic environment, dm-multipath is usually the bottom
of the stacking devices. So I think existing working configurations
are still possible with request-based dm-multipath.

If we keep both types of multipath targets, maintenance cost would
be doubled and additional user interface might be needed to specify
which type is used. I can't see any major benefit worth paying
the cost of keeping bio-based dm-multipath.

I know dm developers sometimes use linear/error maps under multipath
just for testing purpose. If that is your requirement, I can make
request-based targets for linear and error (e.g. named like linear_rq
and error_rq).

Thanks,
Kiyoshi Ueda

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 03-18-2009, 07:55 AM
Kiyoshi Ueda
 
Default dm-mpath: convert to request-based

This patch converts dm-multipath target to request-based from bio-based.

Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.

I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.

In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.

On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.

As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.


multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.

In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)


Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
---
drivers/md/dm-mpath.c | 194 ++++++++++++++++++++++++++++++++------------------
1 file changed, 128 insertions(+), 66 deletions(-)

Index: 2.6.29-rc8/drivers/md/dm-mpath.c
================================================== =================
--- 2.6.29-rc8.orig/drivers/md/dm-mpath.c
+++ 2.6.29-rc8/drivers/md/dm-mpath.c
@@ -8,8 +8,6 @@
#include <linux/device-mapper.h>

#include "dm-path-selector.h"
-#include "dm-bio-list.h"
-#include "dm-bio-record.h"
#include "dm-uevent.h"

#include <linux/ctype.h>
@@ -85,7 +83,7 @@ struct multipath {
unsigned pg_init_count; /* Number of times pg_init called */

struct work_struct process_queued_ios;
- struct bio_list queued_ios;
+ struct list_head queued_ios;
unsigned queue_size;

struct work_struct trigger_event;
@@ -102,7 +100,6 @@ struct multipath {
*/
struct dm_mpath_io {
struct pgpath *pgpath;
- struct dm_bio_details details;
size_t nr_bytes;
};

@@ -200,6 +197,7 @@ static struct multipath *alloc_multipath
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) {
INIT_LIST_HEAD(&m->priority_groups);
+ INIT_LIST_HEAD(&m->queued_ios);
spin_lock_init(&m->lock);
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
@@ -325,13 +323,14 @@ static int __must_push_back(struct multi
dm_noflush_suspending(m->ti));
}

-static int map_io(struct multipath *m, struct bio *bio,
+static int map_io(struct multipath *m, struct request *clone,
struct dm_mpath_io *mpio, unsigned was_queued)
{
int r = DM_MAPIO_REMAPPED;
- size_t nr_bytes = bio->bi_size;
+ size_t nr_bytes = blk_rq_bytes(clone);
unsigned long flags;
struct pgpath *pgpath;
+ struct block_device *bdev;

spin_lock_irqsave(&m->lock, flags);

@@ -348,16 +347,18 @@ static int map_io(struct multipath *m, s
if ((pgpath && m->queue_io) ||
(!pgpath && m->queue_if_no_path)) {
/* Queue for the daemon to resubmit */
- bio_list_add(&m->queued_ios, bio);
+ list_add_tail(&clone->queuelist, &m->queued_ios);
m->queue_size++;
if ((m->pg_init_required && !m->pg_init_in_progress) ||
!m->queue_io)
queue_work(kmultipathd, &m->process_queued_ios);
pgpath = NULL;
r = DM_MAPIO_SUBMITTED;
- } else if (pgpath)
- bio->bi_bdev = pgpath->path.dev->bdev;
- else if (__must_push_back(m))
+ } else if (pgpath) {
+ bdev = pgpath->path.dev->bdev;
+ clone->q = bdev_get_queue(bdev);
+ clone->rq_disk = bdev->bd_disk;
+ } else if (__must_push_back(m))
r = DM_MAPIO_REQUEUE;
else
r = -EIO; /* Failed */
@@ -405,30 +406,31 @@ static void dispatch_queued_ios(struct m
{
int r;
unsigned long flags;
- struct bio *bio = NULL, *next;
struct dm_mpath_io *mpio;
union map_info *info;
+ struct request *clone, *n;
+ LIST_HEAD(cl);

spin_lock_irqsave(&m->lock, flags);
- bio = bio_list_get(&m->queued_ios);
+ list_splice_init(&m->queued_ios, &cl);
spin_unlock_irqrestore(&m->lock, flags);

- while (bio) {
- next = bio->bi_next;
- bio->bi_next = NULL;
+ list_for_each_entry_safe(clone, n, &cl, queuelist) {
+ list_del_init(&clone->queuelist);

- info = dm_get_mapinfo(bio);
+ info = dm_get_rq_mapinfo(clone);
mpio = info->ptr;

- r = map_io(m, bio, mpio, 1);
- if (r < 0)
- bio_endio(bio, r);
- else if (r == DM_MAPIO_REMAPPED)
- generic_make_request(bio);
- else if (r == DM_MAPIO_REQUEUE)
- bio_endio(bio, -EIO);
-
- bio = next;
+ r = map_io(m, clone, mpio, 1);
+ if (r < 0) {
+ mempool_free(mpio, m->mpio_pool);
+ dm_kill_unmapped_request(clone, r);
+ } else if (r == DM_MAPIO_REMAPPED)
+ dm_dispatch_request(clone);
+ else if (r == DM_MAPIO_REQUEUE) {
+ mempool_free(mpio, m->mpio_pool);
+ dm_requeue_unmapped_request(clone);
+ }
}
}

@@ -849,21 +851,24 @@ static void multipath_dtr(struct dm_targ
}

/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;

- mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
- dm_bio_record(&mpio->details, bio);
+ mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
+ if (!mpio)
+ /* ENOMEM, requeue */
+ return DM_MAPIO_REQUEUE;
+ memset(mpio, 0, sizeof(*mpio));

map_context->ptr = mpio;
- bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT);
- r = map_io(m, bio, mpio, 0);
+ clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+ r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);

@@ -1145,53 +1150,41 @@ static void activate_path(struct work_st
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
+static int do_end_io(struct multipath *m, struct request *clone,
int error, struct dm_mpath_io *mpio)
{
+ /*
+ * We don't queue any clone request inside the multipath target
+ * during end I/O handling, since those clone requests don't have
+ * bio clones. If we queue them inside the multipath target,
+ * we need to make bio clones, that requires memory allocation.
+ * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+ * don't have bio clones.)
+ * Instead of queueing the clone request here, we queue the original
+ * request into dm core, which will remake a clone request and
+ * clone bios for it and resubmit it later.
+ */
+ int r = DM_ENDIO_REQUEUE;
unsigned long flags;

- if (!error)
+ if (!error && !clone->errors)
return 0; /* I/O complete */

- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
if (error == -EOPNOTSUPP)
return error;

- spin_lock_irqsave(&m->lock, flags);
- if (!m->nr_valid_paths) {
- if (__must_push_back(m)) {
- spin_unlock_irqrestore(&m->lock, flags);
- return DM_ENDIO_REQUEUE;
- } else if (!m->queue_if_no_path) {
- spin_unlock_irqrestore(&m->lock, flags);
- return -EIO;
- } else {
- spin_unlock_irqrestore(&m->lock, flags);
- goto requeue;
- }
- }
- spin_unlock_irqrestore(&m->lock, flags);
-
if (mpio->pgpath)
fail_path(mpio->pgpath);

- requeue:
- dm_bio_restore(&mpio->details, bio);
-
- /* queue for the daemon to resubmit or fail */
spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_ios, bio);
- m->queue_size++;
- if (!m->queue_io)
- queue_work(kmultipathd, &m->process_queued_ios);
+ if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
+ r = -EIO;
spin_unlock_irqrestore(&m->lock, flags);

- return DM_ENDIO_INCOMPLETE; /* io not complete */
+ return r;
}

-static int multipath_end_io(struct dm_target *ti, struct bio *bio,
+static int multipath_end_io(struct dm_target *ti, struct request *clone,
int error, union map_info *map_context)
{
struct multipath *m = ti->private;
@@ -1200,14 +1193,13 @@ static int multipath_end_io(struct dm_ta
struct path_selector *ps;
int r;

- r = do_end_io(m, bio, error, mpio);
+ r = do_end_io(m, clone, error, mpio);
if (pgpath) {
ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
- if (r != DM_ENDIO_INCOMPLETE)
- mempool_free(mpio, m->mpio_pool);
+ mempool_free(mpio, m->mpio_pool);

return r;
}
@@ -1437,6 +1429,75 @@ static int multipath_ioctl(struct dm_tar
return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
}

+static int __pgpath_busy(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ return dm_underlying_device_busy(q);
+}
+
+/*
+ * We return "busy", only when we can map I/Os but underlying devices
+ * are busy (so even if we map I/Os now, the I/Os will wait on
+ * the underlying queue).
+ * In other words, if we want to kill I/Os or queue them inside us
+ * due to map unavailability, we don't return "busy". Otherwise,
+ * dm core won't give us the I/Os and we can't do what we want.
+ */
+static int multipath_busy(struct dm_target *ti)
+{
+ int busy = 0, has_active = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ struct priority_group *pg;
+ struct pgpath *pgpath;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ /* Guess which priority_group will be used at next mapping time */
+ if (unlikely(!m->current_pgpath && m->next_pg))
+ pg = m->next_pg;
+ else if (likely(m->current_pg))
+ pg = m->current_pg;
+ else
+ /*
+ * We don't know which pg will be used at next mapping time.
+ * We don't call __choose_pgpath() here to avoid to trigger
+ * pg_init just by busy checking.
+ * So we don't know whether underlying devices we will be using
+ * at next mapping time are busy or not. Just try mapping.
+ */
+ goto out;
+
+ /*
+ * If there is one non-busy active path at least, the path selector
+ * will be able to select it. So we consider such a pg as not busy.
+ */
+ busy = 1;
+ list_for_each_entry(pgpath, &pg->pgpaths, list)
+ if (pgpath->is_active) {
+ has_active = 1;
+
+ if (!__pgpath_busy(pgpath)) {
+ busy = 0;
+ break;
+ }
+ }
+
+ if (!has_active)
+ /*
+ * No active path in this pg, so this pg won't be used and
+ * the current_pg will be changed at next mapping time.
+ * We need to try mapping to determine it.
+ */
+ busy = 0;
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return busy;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1446,13 +1507,14 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io = multipath_end_io,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .busy = multipath_busy,
};

static int __init dm_multipath_init(void)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 

Thread Tools




All times are GMT. The time now is 08:13 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org