struct work_struct trigger_event;
@@ -89,22 +88,22 @@ struct multipath {
* We must use a mempool of dm_mpath_io structs so that we
* can resubmit bios on error.
*/
- mempool_t *mpio_pool;
+ mempool_t *mpio_pool; /* REMOVE ME */
};
/*
* Context information attached to each bio we process.
*/
-struct dm_mpath_io {
+struct dm_mpath_io { /* REMOVE ME */
struct pgpath *pgpath;
- struct dm_bio_details details;
+ struct dm_rq_details details;
};
/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;
+static void multipath_queue_in_tgt(struct dm_target *ti, struct request *clone,
+ union map_info *map_context)
+{
+ struct multipath *m = (struct multipath *) ti->private;
+ struct dm_mpath_io *mpio = (struct dm_mpath_io *) map_context->ptr;
+ unsigned long flags = 0UL;
+
+ dm_rq_restore(&mpio->details, clone);
+
+ /* queue for the daemon to resubmit or fail */
+ spin_lock_irqsave(&m->lock, flags);
+ list_add_tail(&clone->queuelist, &m->queued_ios);
+ m->queue_size++;
+ if (!m->queue_io)
+ queue_work(kmultipathd, &m->process_queued_ios);
+ spin_unlock_irqrestore(&m->lock, flags);
+}
+
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
- int error, struct dm_mpath_io *mpio)
+static int do_end_io(struct multipath *m, struct request *clone, int error,
+ struct dm_mpath_io *mpio)
{
struct hw_handler *hwh = &m->hw_handler;
unsigned err_flags = MP_FAIL_PATH; /* Default behavior */
- unsigned long flags;
+ unsigned long flags = 0UL;
- if (!error)
+ if (!clone->errors && !error)
return 0; /* I/O complete */
- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
- if (error == -EOPNOTSUPP)
- return error;
-
spin_lock_irqsave(&m->lock, flags);
if (!m->nr_valid_paths) {
if (__must_push_back(m)) {
@@ -1097,8 +1122,8 @@ static int do_end_io(struct multipath *m
}
spin_unlock_irqrestore(&m->lock, flags);
- if (hwh->type && hwh->type->error)
- err_flags = hwh->type->error(hwh, bio);
+ if (hwh->type && hwh->type->error_rq)
+ err_flags = hwh->type->error_rq(hwh, clone);
if (mpio->pgpath) {
if (err_flags & MP_FAIL_PATH)
@@ -1112,21 +1137,14 @@ static int do_end_io(struct multipath *m
return -EIO;
requeue:
- dm_bio_restore(&mpio->details, bio);
-
- /* queue for the daemon to resubmit or fail */
- spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_ios, bio);
- m->queue_size++;
- if (!m->queue_io)
- queue_work(kmultipathd, &m->process_queued_ios);
- spin_unlock_irqrestore(&m->lock, flags);
-
return DM_ENDIO_INCOMPLETE; /* io not complete */
}
-static int multipath_end_io(struct dm_target *ti, struct bio *bio,
- int error, union map_info *map_context)
+/*
+ * clone->q's lock must be held
+ */
+static int multipath_end_io_first(struct dm_target *ti, struct request *clone,
+ int error, union map_info *map_context)
{
struct multipath *m = ti->private;
struct dm_mpath_io *mpio = map_context->ptr;
@@ -1134,18 +1152,28 @@ static int multipath_end_io(struct dm_ta
struct path_selector *ps;
int r;
- r = do_end_io(m, bio, error, mpio);
+ r = do_end_io(m, clone, error, mpio);
if (pgpath) {
ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path);
}
- if (r != DM_ENDIO_INCOMPLETE)
- mempool_free(mpio, m->mpio_pool);
return r;
}
+static int multipath_end_io(struct dm_target *ti, struct request *clone,
+ int error, union map_info *map_context)
+{
+ struct multipath *m = (struct multipath *) ti->private;
+ struct dm_mpath_io *mpio = (struct dm_mpath_io *) map_context->ptr;
+
+ if (mpio)
+ mempool_free(mpio, m->mpio_pool);
+
+ return 0;
+}
+
/*
* Suspend can't complete until all the I/O is processed so if
* the last path fails we must error any remaining I/O.
@@ -1380,6 +1408,83 @@ static int multipath_ioctl(struct dm_tar
bdev->bd_disk, cmd, arg);
}
+static int __pgpath_congested(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ if (dm_underlying_device_congested(q))
+ return 1;
+
+ return 0;
+}
+
+#if 0
+#define READY_PATH_EXIST 0
+#define ALL_PATH_CONGESTED 1
+#define NO_ACTIVE_PATH 2
+
+static int __pg_congested(struct priority_group *pg)
+{
+ int r = READY_PATH_EXIST, has_active = 0, has_ready = 0;
+ struct pgpath *pgpath;
+
+ list_for_each_entry(pgpath, &pg->list, list) {
+ if (pgpath->path.is_active) {
+ has_active = 1;
+
+ if (!__pgpath_congested(pgpath))
+ has_ready = 1;
+ }
+ }
+
+ if (!has_active)
+ r = NO_ACTIVE_PATH;
+ else if (!has_ready)
+ r = ALL_PATH_CONGESTED;
+
+ return r;
+}
+#endif
+
+static int multipath_congested(struct dm_target *ti)
+{
+ int r = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ unsigned long flags = 0UL;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ if (m->current_pgpath && m->repeat_count > 1) {
+ /* m->current_pgpath is surely used at next mapping time. */
+ if (__pgpath_congested(m->current_pgpath))
+ r = 1;
+
+ goto out;
+ }
+
+ /*
+ * We are here means that path selection is executed
+ * at next mapping time.
+ * We run the path selection here and check congestion status
+ * of the next path.
+ * And increment repeat_count to avoid path selection again
+ * in map_io(). (This is a hack for pre-decrementing repeat_count
+ * in map_io(). Needs to be fixed this repeat_count bug.)
+ */
+ __choose_pgpath(m);
+ if (m->current_pgpath) {
+ if (__pgpath_congested(m->current_pgpath))
+ r = 1;
+
+ m->repeat_count++;
+ }
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return r;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1389,13 +1494,16 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io_first = multipath_end_io_first,
+ .rq_end_io = multipath_end_io,
+ .queue_in_tgt = multipath_queue_in_tgt,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .congested = multipath_congested,
};
static int __init dm_multipath_init(void)
Index: 2.6.25-rc1/drivers/md/dm-rq-record.h
================================================== =================
--- /dev/null
+++ 2.6.25-rc1/drivers/md/dm-rq-record.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2007 NEC Corporation.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_RQ_RECORD_H
+#define DM_RQ_RECORD_H
+
+#include <linux/blkdev.h>
+
+/*
+ * There are lots of mutable fields in the request struct that get
+ * changed by the lower levels of the block layer. Some targets,
+ * such as multipath, may wish to resubmit a request on error. The
+ * functions in this file help the target record and restore the
+ * original request state.
+ */
+struct dm_rq_details {
+ unsigned int cmd_flags;
+ int ref_count;
+};
+
+static inline void dm_rq_record(struct dm_rq_details *rd, struct request *rq)
+{
+ rd->cmd_flags = rq->cmd_flags;
+ rd->ref_count = rq->ref_count;
+}
+
+static inline void dm_rq_restore(struct dm_rq_details *rd, struct request *rq)
+{
+ rq->cmd_flags = rd->cmd_flags;
+ rq->ref_count = rd->ref_count;
+}
+
+#endif
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
03-19-2008, 10:12 PM
Kiyoshi Ueda
dm-mpath: convert to request-based
This patch converts dm-multipath target to request-based from bio-based.
In I/O completion, the clone request doesn't have clone bios,
so the target driver can't resubmit it as it is, even if the target
driver wants to retry it using another path.
To resubmit the clone request, memory allocation for clone bios is
needed. There is no big benefit to resubmit the clone request.
So the target driver doesn't requeue a clone in the target queue.
Instead, the target driver ask dm core for requeueing and remapping
the original request of the clone (do_end_io()).
As a result, the target driver don't need to record/restore
the information of the original request for resubmitting the clone.
Actual path selection is done at congestion state checking time
(multipath_congested()), since we'd like to check the path which
will be definitely used for next I/O.
/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;
+static int __pgpath_congested(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ if (dm_underlying_device_congested(q))
+ return 1;
+
+ return 0;
+}
+
+static int multipath_congested(struct dm_target *ti)
+{
+ int congested = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ if (m->current_pgpath && m->repeat_count > 1) {
+ /* m->current_pgpath is surely used at next mapping time. */
+ if (__pgpath_congested(m->current_pgpath))
+ congested = 1;
+
+ goto out;
+ }
+
+ /*
+ * We are here means that path selection will be executed
+ * at next mapping time.
+ * We run the path selection here and check congestion status
+ * of the next path.
+ * And increment repeat_count to avoid path selection again
+ * in map_io().
+ */
+ __choose_pgpath(m);
+ if (m->current_pgpath) {
+ if (__pgpath_congested(m->current_pgpath))
+ congested = 1;
+
+ m->repeat_count++;
+ }
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return congested;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1389,13 +1431,14 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io = multipath_end_io,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .congested = multipath_congested,
};
static int __init dm_multipath_init(void)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
09-12-2008, 02:47 PM
Kiyoshi Ueda
dm-mpath: convert to request-based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;
map_context->ptr = mpio;
- bio->bi_rw |= (1 << BIO_RW_FAILFAST);
- r = map_io(m, bio, mpio, 0);
+ clone->cmd_flags |= REQ_FAILFAST;
+ r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);
@@ -1105,53 +1110,41 @@ static void activate_path(struct work_st
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
+static int do_end_io(struct multipath *m, struct request *clone,
int error, struct dm_mpath_io *mpio)
{
+ /*
+ * We don't queue any clone request inside the multipath target
+ * during end I/O handling, since those clone requests don't have
+ * bio clones. If we queue them inside the multipath target,
+ * we need to make bio clones, that requires memory allocation.
+ * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+ * don't have bio clones.)
+ * Instead of queueing the clone request here, we queue the original
+ * request into dm core, which will remake a clone request and
+ * clone bios for it and resubmit it later.
+ */
+ int r = DM_ENDIO_REQUEUE;
unsigned long flags;
- if (!error)
+ if (!error && !clone->errors)
return 0; /* I/O complete */
- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
if (error == -EOPNOTSUPP)
return error;
+static int __pgpath_congested(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ if (dm_underlying_device_congested(q))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * We return "busy", only when we can map I/Os but underlying devices
+ * are congested (so even if we map I/Os now, the I/Os will wait on
+ * the underlying queue).
+ * In other words, if we want to kill I/Os or queue them inside us
+ * due to map unavailability, we don't return "busy". Otherwise,
+ * dm core won't give us the I/Os and we can't do what we want.
+ */
+static int multipath_busy(struct dm_target *ti)
+{
+ int busy = 0, has_active = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ struct priority_group *pg;
+ struct pgpath *pgpath;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ /* Guess which priority_group will be used at next mapping time */
+ if (unlikely(!m->current_pgpath && m->next_pg))
+ pg = m->next_pg;
+ else if (likely(m->current_pg))
+ pg = m->current_pg;
+ else
+ /*
+ * We don't know which pg will be used at next mapping time.
+ * We don't call __choose_pgpath() here to avoid to trigger
+ * pg_init just by congestion checking.
+ * So we don't know whether underlying devices we will be using
+ * at next mapping time are congested or not. Just try mapping.
+ */
+ goto out;
+
+ /*
+ * If there is one uncongested active path at least, the path selector
+ * will be able to select it. So we consider such a pg as uncongested.
+ */
+ busy = 1;
+ list_for_each_entry(pgpath, &pg->pgpaths, list)
+ if (pgpath->is_active) {
+ has_active = 1;
+
+ if (!__pgpath_congested(pgpath)) {
+ busy = 0;
+ break;
+ }
+ }
+
+ if (!has_active)
+ /*
+ * No active path in this pg, so this pg won't be used and
+ * the current_pg will be changed at next mapping time.
+ * We need to try mapping to determine it.
+ */
+ busy = 0;
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return busy;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1412,13 +1476,14 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io = multipath_end_io,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .busy = multipath_busy,
};
static int __init dm_multipath_init(void)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
10-03-2008, 03:19 PM
Kiyoshi Ueda
dm-mpath: convert to request-based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;
map_context->ptr = mpio;
- bio->bi_rw |= (1 << BIO_RW_FAILFAST);
- r = map_io(m, bio, mpio, 0);
+ clone->cmd_flags |= REQ_FAILFAST;
+ r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);
@@ -1119,53 +1124,41 @@ static void activate_path(struct work_st
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
+static int do_end_io(struct multipath *m, struct request *clone,
int error, struct dm_mpath_io *mpio)
{
+ /*
+ * We don't queue any clone request inside the multipath target
+ * during end I/O handling, since those clone requests don't have
+ * bio clones. If we queue them inside the multipath target,
+ * we need to make bio clones, that requires memory allocation.
+ * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+ * don't have bio clones.)
+ * Instead of queueing the clone request here, we queue the original
+ * request into dm core, which will remake a clone request and
+ * clone bios for it and resubmit it later.
+ */
+ int r = DM_ENDIO_REQUEUE;
unsigned long flags;
- if (!error)
+ if (!error && !clone->errors)
return 0; /* I/O complete */
- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
if (error == -EOPNOTSUPP)
return error;
+static int __pgpath_busy(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ return dm_underlying_device_busy(q);
+}
+
+/*
+ * We return "busy", only when we can map I/Os but underlying devices
+ * are busy (so even if we map I/Os now, the I/Os will wait on
+ * the underlying queue).
+ * In other words, if we want to kill I/Os or queue them inside us
+ * due to map unavailability, we don't return "busy". Otherwise,
+ * dm core won't give us the I/Os and we can't do what we want.
+ */
+static int multipath_busy(struct dm_target *ti)
+{
+ int busy = 0, has_active = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ struct priority_group *pg;
+ struct pgpath *pgpath;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ /* Guess which priority_group will be used at next mapping time */
+ if (unlikely(!m->current_pgpath && m->next_pg))
+ pg = m->next_pg;
+ else if (likely(m->current_pg))
+ pg = m->current_pg;
+ else
+ /*
+ * We don't know which pg will be used at next mapping time.
+ * We don't call __choose_pgpath() here to avoid to trigger
+ * pg_init just by busy checking.
+ * So we don't know whether underlying devices we will be using
+ * at next mapping time are busy or not. Just try mapping.
+ */
+ goto out;
+
+ /*
+ * If there is one non-busy active path at least, the path selector
+ * will be able to select it. So we consider such a pg as not busy.
+ */
+ busy = 1;
+ list_for_each_entry(pgpath, &pg->pgpaths, list)
+ if (pgpath->is_active) {
+ has_active = 1;
+
+ if (!__pgpath_busy(pgpath)) {
+ busy = 0;
+ break;
+ }
+ }
+
+ if (!has_active)
+ /*
+ * No active path in this pg, so this pg won't be used and
+ * the current_pg will be changed at next mapping time.
+ * We need to try mapping to determine it.
+ */
+ busy = 0;
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return busy;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1426,13 +1487,14 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io = multipath_end_io,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .busy = multipath_busy,
};
static int __init dm_multipath_init(void)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
10-24-2008, 07:55 AM
"Nikanth K"
dm-mpath: convert to request-based
On Fri, Sep 12, 2008 at 8:17 PM, Kiyoshi Ueda <k-ueda@ct.jp.nec.com> wrote:
> This patch converts dm-multipath target to request-based from bio-based.
>
Can we keep both the bio-based as well as request-based dm-multipath?
Thanks
Nikanth Karthikesan
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
10-28-2008, 03:03 PM
Kiyoshi Ueda
dm-mpath: convert to request-based
Hi Nikanth,
On Fri, 24 Oct 2008 13:25:33 +0530, "Nikanth K" wrote:
> On Fri, Sep 12, 2008 at 8:17 PM, Kiyoshi Ueda wrote:
> > This patch converts dm-multipath target to request-based from bio-based.
>
> Can we keep both the bio-based as well as request-based dm-multipath?
Why do you want to keep bio-based dm-multipath?
In the realistic environment, dm-multipath is usually the bottom
of the stacking devices. So I think existing working configurations
are still possible with request-based dm-multipath.
If we keep both types of multipath targets, maintenance cost would
be doubled and additional user interface might be needed to specify
which type is used. I can't see any major benefit worth paying
the cost of keeping bio-based dm-multipath.
I know dm developers sometimes use linear/error maps under multipath
just for testing purpose. If that is your requirement, I can make
request-based targets for linear and error (e.g. named like linear_rq
and error_rq).
Thanks,
Kiyoshi Ueda
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
03-18-2009, 07:55 AM
Kiyoshi Ueda
dm-mpath: convert to request-based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
/*
- * Map bios, recording original fields for later in case we have to resubmit
+ * Map cloned requests
*/
-static int multipath_map(struct dm_target *ti, struct bio *bio,
+static int multipath_map(struct dm_target *ti, struct request *clone,
union map_info *map_context)
{
int r;
struct dm_mpath_io *mpio;
struct multipath *m = (struct multipath *) ti->private;
map_context->ptr = mpio;
- bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT);
- r = map_io(m, bio, mpio, 0);
+ clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+ r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);
@@ -1145,53 +1150,41 @@ static void activate_path(struct work_st
/*
* end_io handling
*/
-static int do_end_io(struct multipath *m, struct bio *bio,
+static int do_end_io(struct multipath *m, struct request *clone,
int error, struct dm_mpath_io *mpio)
{
+ /*
+ * We don't queue any clone request inside the multipath target
+ * during end I/O handling, since those clone requests don't have
+ * bio clones. If we queue them inside the multipath target,
+ * we need to make bio clones, that requires memory allocation.
+ * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
+ * don't have bio clones.)
+ * Instead of queueing the clone request here, we queue the original
+ * request into dm core, which will remake a clone request and
+ * clone bios for it and resubmit it later.
+ */
+ int r = DM_ENDIO_REQUEUE;
unsigned long flags;
- if (!error)
+ if (!error && !clone->errors)
return 0; /* I/O complete */
- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
- return error;
-
if (error == -EOPNOTSUPP)
return error;
-static int multipath_end_io(struct dm_target *ti, struct bio *bio,
+static int multipath_end_io(struct dm_target *ti, struct request *clone,
int error, union map_info *map_context)
{
struct multipath *m = ti->private;
@@ -1200,14 +1193,13 @@ static int multipath_end_io(struct dm_ta
struct path_selector *ps;
int r;
- r = do_end_io(m, bio, error, mpio);
+ r = do_end_io(m, clone, error, mpio);
if (pgpath) {
ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
- if (r != DM_ENDIO_INCOMPLETE)
- mempool_free(mpio, m->mpio_pool);
+ mempool_free(mpio, m->mpio_pool);
return r;
}
@@ -1437,6 +1429,75 @@ static int multipath_ioctl(struct dm_tar
return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
}
+static int __pgpath_busy(struct pgpath *pgpath)
+{
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
+
+ return dm_underlying_device_busy(q);
+}
+
+/*
+ * We return "busy", only when we can map I/Os but underlying devices
+ * are busy (so even if we map I/Os now, the I/Os will wait on
+ * the underlying queue).
+ * In other words, if we want to kill I/Os or queue them inside us
+ * due to map unavailability, we don't return "busy". Otherwise,
+ * dm core won't give us the I/Os and we can't do what we want.
+ */
+static int multipath_busy(struct dm_target *ti)
+{
+ int busy = 0, has_active = 0;
+ struct multipath *m = (struct multipath *) ti->private;
+ struct priority_group *pg;
+ struct pgpath *pgpath;
+ unsigned long flags;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ /* Guess which priority_group will be used at next mapping time */
+ if (unlikely(!m->current_pgpath && m->next_pg))
+ pg = m->next_pg;
+ else if (likely(m->current_pg))
+ pg = m->current_pg;
+ else
+ /*
+ * We don't know which pg will be used at next mapping time.
+ * We don't call __choose_pgpath() here to avoid to trigger
+ * pg_init just by busy checking.
+ * So we don't know whether underlying devices we will be using
+ * at next mapping time are busy or not. Just try mapping.
+ */
+ goto out;
+
+ /*
+ * If there is one non-busy active path at least, the path selector
+ * will be able to select it. So we consider such a pg as not busy.
+ */
+ busy = 1;
+ list_for_each_entry(pgpath, &pg->pgpaths, list)
+ if (pgpath->is_active) {
+ has_active = 1;
+
+ if (!__pgpath_busy(pgpath)) {
+ busy = 0;
+ break;
+ }
+ }
+
+ if (!has_active)
+ /*
+ * No active path in this pg, so this pg won't be used and
+ * the current_pg will be changed at next mapping time.
+ * We need to try mapping to determine it.
+ */
+ busy = 0;
+
+out:
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return busy;
+}
+
/*-----------------------------------------------------------------
* Module setup
*---------------------------------------------------------------*/
@@ -1446,13 +1507,14 @@ static struct target_type multipath_targ
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,
- .map = multipath_map,
- .end_io = multipath_end_io,
+ .map_rq = multipath_map,
+ .rq_end_io = multipath_end_io,
.presuspend = multipath_presuspend,
.resume = multipath_resume,
.status = multipath_status,
.message = multipath_message,
.ioctl = multipath_ioctl,
+ .busy = multipath_busy,
};
static int __init dm_multipath_init(void)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel