So, here's my shot at it. After this patch, barrier no longer
dictates the ordering of other requests. The block layer sequences
the barrier request without interfering with other requests (not even
elevator draining). Multiple pending barriers are handled by saving
those in a separate queue and servicing them one by one. Basically,
barrier sequences form a separate FIFO command stream independent of
other requests and all the ordering between the two streams is
filesystem's responsibility.
Ordered tag support is dropped as no one seems to be making any
meaningful use of it. I'm fairly skeptical about its usefulness
anyway. The only thing ordered tag saves is latencies between command
completions and issues in barrier sequences, which isn't much to begin
with and puts additional ordering restrictions compared to ordering in
software (ordered tag commands will unnecessary affect processing of
simple tag commands).
Lightly tested for all three BAR (!WC), FLUSH and FUA cases. The
multiple pending barrier code path isn't tested yet.
Christoph, does this look like something the filesystems can use or
have I misunderstood something?
- if (!q->ordseq) {
- if (!is_barrier)
- return true;
-
- if (q->next_ordered != QUEUE_ORDERED_NONE)
- return start_ordered(q, rqp);
- else {
- /*
- * Queue ordering not supported. Terminate
- * with prejudice.
- */
- blk_dequeue_request(rq);
- __blk_end_request_all(rq, -EOPNOTSUPP);
- *rqp = NULL;
- return false;
- }
+ if (!blk_barrier_rq(rq))
+ return rq;
+
+ if (q->ordseq) {
+ /*
+ * Barrier is already in progress and they can't be
+ * processed in parallel. Queue for later processing.
+ */
+ list_move_tail(&rq->queuelist, &q->pending_barriers);
+ return NULL;
+ }
+
+ if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
+ /*
+ * Queue ordering not supported. Terminate
+ * with prejudice.
+ */
+ blk_dequeue_request(rq);
+ __blk_end_request_all(rq, -EOPNOTSUPP);
+ return NULL;
}
/*
- * Ordered sequence in progress
+ * Start a new ordered sequence
*/
+ q->orderr = 0;
+ q->ordered = q->next_ordered;
+ q->ordseq |= QUEUE_ORDSEQ_STARTED;
- /* Special requests are not subject to ordering rules. */
- if (!blk_fs_request(rq) &&
- rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
- return true;
-
- if (q->ordered & QUEUE_ORDERED_BY_TAG) {
- /* Ordered by tag. Blocking the next barrier is enough. */
- if (is_barrier && rq != &q->bar_rq)
- *rqp = NULL;
- } else {
- /* Ordered by draining. Wait for turn. */
- WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
- if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
- *rqp = NULL;
- }
+ /*
+ * For an empty barrier, there's no actual BAR request, which
+ * in turn makes POSTFLUSH unnecessary. Mask them off.
+ */
+ if (!blk_rq_sectors(rq))
+ q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+ QUEUE_ORDERED_DO_POSTFLUSH);
+
+ /* stash away the original request */
+ blk_dequeue_request(rq);
+ q->orig_bar_rq = rq;
+
+ if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
+ skip |= QUEUE_ORDSEQ_PREFLUSH;
+
+ if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
+ skip |= QUEUE_ORDSEQ_BAR;
+
+ if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
+ skip |= QUEUE_ORDSEQ_POSTFLUSH;
- return true;
+ /* complete skipped sequences and return the first sequence */
+ return blk_ordered_complete_seq(q, skip, 0);
}
static void bio_end_empty_barrier(struct bio *bio, int err)
Index: work/include/linux/blkdev.h
================================================== =================
--- work.orig/include/linux/blkdev.h
+++ work/include/linux/blkdev.h
@@ -106,7 +106,6 @@ enum rq_flag_bits {
__REQ_FAILED, /* set if the request failed */
__REQ_QUIET, /* don't worry about errors */
__REQ_PREEMPT, /* set for "ide_preempt" requests */
- __REQ_ORDERED_COLOR, /* is before or after barrier */
__REQ_RW_SYNC, /* request is sync (sync write or read) */
__REQ_ALLOCED, /* request came from our alloc pool */
__REQ_RW_META, /* metadata io request */
@@ -135,7 +134,6 @@ enum rq_flag_bits {
#define REQ_FAILED (1 << __REQ_FAILED)
#define REQ_QUIET (1 << __REQ_QUIET)
#define REQ_PREEMPT (1 << __REQ_PREEMPT)
-#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
#define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
#define REQ_ALLOCED (1 << __REQ_ALLOCED)
#define REQ_RW_META (1 << __REQ_RW_META)
@@ -437,9 +435,10 @@ struct request_queue
* reserved for flush operations
*/
unsigned int ordered, next_ordered, ordseq;
- int orderr, ordcolor;
- struct request pre_flush_rq, bar_rq, post_flush_rq;
- struct request *orig_bar_rq;
+ int orderr;
+ struct request bar_rq;
+ struct request *orig_bar_rq;
+ struct list_head pending_barriers;
struct mutex sysfs_lock;
@@ -543,47 +542,33 @@ enum {
* Hardbarrier is supported with one of the following methods.
*
* NONE : hardbarrier unsupported
- * DRAIN : ordering by draining is enough
- * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
- * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
- * TAG : ordering by tag is enough
- * TAG_FLUSH : ordering by tag w/ pre and post flushes
- * TAG_FUA : ordering by tag w/ pre flush and FUA write
- */
- QUEUE_ORDERED_BY_TAG = 0x02,
- QUEUE_ORDERED_DO_PREFLUSH = 0x10,
- QUEUE_ORDERED_DO_BAR = 0x20,
- QUEUE_ORDERED_DO_POSTFLUSH = 0x40,
- QUEUE_ORDERED_DO_FUA = 0x80,
+ * BAR : writing out barrier is enough
+ * FLUSH : barrier and surrounding pre and post flushes
+ * FUA : FUA barrier w/ pre flush
+ */
+ QUEUE_ORDERED_DO_PREFLUSH = 1 << 0,
+ QUEUE_ORDERED_DO_BAR = 1 << 1,
+ QUEUE_ORDERED_DO_POSTFLUSH = 1 << 2,
+ QUEUE_ORDERED_DO_FUA = 1 << 3,
/* If barriers are supported, tell block layer that queue is ordered */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
- blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH,
+ blk_queue_ordered(q, QUEUE_ORDERED_FLUSH,
virtblk_prepare_flush);
else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
- blk_queue_ordered(q, QUEUE_ORDERED_TAG, NULL);
+ blk_queue_ordered(q, QUEUE_ORDERED_BAR, NULL);
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
Index: work/drivers/scsi/sd.c
================================================== =================
--- work.orig/drivers/scsi/sd.c
+++ work/drivers/scsi/sd.c
@@ -2103,15 +2103,13 @@ static int sd_revalidate_disk(struct gen
/*
* We now have all cache related info, determine how we deal
- * with ordered requests. Note that as the current SCSI
- * dispatch function can alter request order, we cannot use
- * QUEUE_ORDERED_TAG_* even when ordered tag is supported.
+ * with ordered requests.
*/
if (sdkp->WCE)
ordered = sdkp->DPOFUA
- ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;
+ ? QUEUE_ORDERED_FUA : QUEUE_ORDERED_FLUSH;
else
- ordered = QUEUE_ORDERED_DRAIN;
+ ordered = QUEUE_ORDERED_BAR;
-/*
- * add-request adds a request to the linked list.
- * queue lock is held and interrupts disabled, as we muck with the
- * request queue list.
- */
-static inline void add_request(struct request_queue *q, struct request *req)
-{
- drive_stat_acct(req, 1);
-
- /*
- * elevator indicated where it wants this request to be
- * inserted at elevator_merge time
- */
- __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
-}
-
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
{
@@ -1184,6 +1169,7 @@ static int __make_request(struct request
const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+ int where = ELEVATOR_INSERT_SORT;
int rw_flags;
if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&
@@ -1191,6 +1177,7 @@ static int __make_request(struct request
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
+
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -1200,7 +1187,12 @@ static int __make_request(struct request
spin_lock_irq(q->queue_lock);
- if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
+ if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+ where = ELEVATOR_INSERT_ORDERED;
+ goto get_rq;
+ }
+
+ if (elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
@@ -1297,7 +1289,10 @@ get_rq:
req->cpu = blk_cpu_to_group(smp_processor_id());
if (queue_should_plug(q) && elv_queue_empty(q))
blk_plug_device(q);
- add_request(q, req);
+
+ /* insert the request into the elevator */
+ drive_stat_acct(req, 1);
+ __elv_add_request(q, req, where, 0);
out:
if (unplug || !queue_should_plug(q))
__generic_unplug_device(q);
Index: work/block/elevator.c
================================================== =================
--- work.orig/block/elevator.c
+++ work/block/elevator.c
@@ -564,7 +564,7 @@ void elv_requeue_request(struct request_
/* If barriers are supported, tell block layer that queue is ordered */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
- blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH,
+ blk_queue_ordered(q, QUEUE_ORDERED_FLUSH,
virtblk_prepare_flush);
else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
- blk_queue_ordered(q, QUEUE_ORDERED_TAG, NULL);
+ blk_queue_ordered(q, QUEUE_ORDERED_BAR, NULL);
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
Index: work/drivers/scsi/sd.c
================================================== =================
--- work.orig/drivers/scsi/sd.c
+++ work/drivers/scsi/sd.c
@@ -2103,15 +2103,13 @@ static int sd_revalidate_disk(struct gen
/*
* We now have all cache related info, determine how we deal
- * with ordered requests. Note that as the current SCSI
- * dispatch function can alter request order, we cannot use
- * QUEUE_ORDERED_TAG_* even when ordered tag is supported.
+ * with ordered requests.
*/
if (sdkp->WCE)
ordered = sdkp->DPOFUA
- ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;
+ ? QUEUE_ORDERED_FUA : QUEUE_ORDERED_FLUSH;
else
- ordered = QUEUE_ORDERED_DRAIN;
+ ordered = QUEUE_ORDERED_BAR;
-/*
- * add-request adds a request to the linked list.
- * queue lock is held and interrupts disabled, as we muck with the
- * request queue list.
- */
-static inline void add_request(struct request_queue *q, struct request *req)
-{
- drive_stat_acct(req, 1);
-
- /*
- * elevator indicated where it wants this request to be
- * inserted at elevator_merge time
- */
- __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
-}
-
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
{
@@ -1184,6 +1169,7 @@ static int __make_request(struct request
const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+ int where = ELEVATOR_INSERT_SORT;
int rw_flags;
if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&
@@ -1191,6 +1177,7 @@ static int __make_request(struct request
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
+
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -1200,7 +1187,12 @@ static int __make_request(struct request
spin_lock_irq(q->queue_lock);
- if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
+ if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+ where = ELEVATOR_INSERT_ORDERED;
+ goto get_rq;
+ }
+
+ if (elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
@@ -1297,7 +1289,10 @@ get_rq:
req->cpu = blk_cpu_to_group(smp_processor_id());
if (queue_should_plug(q) && elv_queue_empty(q))
blk_plug_device(q);
- add_request(q, req);
+
+ /* insert the request into the elevator */
+ drive_stat_acct(req, 1);
+ __elv_add_request(q, req, where, 0);
out:
if (unplug || !queue_should_plug(q))
__generic_unplug_device(q);
Index: work/block/elevator.c
================================================== =================
--- work.orig/block/elevator.c
+++ work/block/elevator.c
@@ -564,7 +564,7 @@ void elv_requeue_request(struct request_
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
08-08-2010, 02:31 PM
Christoph Hellwig
relaxed barriers
On Sat, Aug 07, 2010 at 12:13:06PM +0200, Tejun Heo wrote:
> The patch was on top of v2.6.35 but was generated against dirty tree
> and wouldn't apply cleanly. Here's the proper one.
Here's an updated version:
(a) ported to Jens' current block tree
(b) optimize barriers on devices not requiring flushes to be no-ops
(b) redo the blk_queue_ordered interface to just set QUEUE_HAS_FLUSH
and QUEUE_HAS_FUA flags.
- /*
- * For an empty barrier, there's no actual BAR request, which
- * in turn makes POSTFLUSH unnecessary. Mask them off.
- */
- if (!blk_rq_sectors(rq)) {
- q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
- QUEUE_ORDERED_DO_POSTFLUSH);
+ if (!(q->cache_features & QUEUE_HAS_FLUSH)) {
/*
- * Empty barrier on a write-through device w/ ordered
- * tag has no command to issue and without any command
- * to issue, ordering by tag can't be used. Drain
- * instead.
+ * No flush required. We can just send on write requests
+ * and complete cache flush requests ASAP.
*/
- if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
- !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
- q->ordered &= ~QUEUE_ORDERED_BY_TAG;
- q->ordered |= QUEUE_ORDERED_BY_DRAIN;
+ if (blk_rq_sectors(rq)) {
+ rq->cmd_flags &= ~REQ_HARDBARRIER;
+ return rq;
}
+ blk_dequeue_request(rq);
+ __blk_end_request_all(rq, 0);
+ return NULL;
}
- /* stash away the original request */
- blk_dequeue_request(rq);
- q->orig_bar_rq = rq;
- rq = NULL;
-
- /*
- * Queue ordered sequence. As we stack them at the head, we
- * need to queue in reverse order. Note that we rely on that
- * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
- * request gets inbetween ordered sequence.
- */
- if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
- queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
- rq = &q->post_flush_rq;
- } else
- skip |= QUEUE_ORDSEQ_POSTFLUSH;
-
- if (q->ordered & QUEUE_ORDERED_DO_BAR) {
- rq = &q->bar_rq;
-
- /* initialize proxy request and queue it */
- blk_rq_init(q, rq);
- if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
- rq->cmd_flags |= REQ_WRITE;
- if (q->ordered & QUEUE_ORDERED_DO_FUA)
- rq->cmd_flags |= REQ_FUA;
- init_request_from_bio(rq, q->orig_bar_rq->bio);
- rq->end_io = bar_end_io;
-
- elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
- } else
- skip |= QUEUE_ORDSEQ_BAR;
-
- if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
- queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
- rq = &q->pre_flush_rq;
- } else
- skip |= QUEUE_ORDSEQ_PREFLUSH;
-
- if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
- rq = NULL;
- else
- skip |= QUEUE_ORDSEQ_DRAIN;
+ if (q->ordseq) {
+ /*
+ * Barrier is already in progress and they can't be
+ * processed in parallel. Queue for later processing.
+ */
+ list_move_tail(&rq->queuelist, &q->pending_barriers);
+ return NULL;
+ }
- *rqp = rq;
/*
- * Complete skipped sequences. If whole sequence is complete,
- * return false to tell elevator that this request is gone.
+ * Start a new ordered sequence
*/
- return !blk_ordered_complete_seq(q, skip, 0);
-}
-
-bool blk_do_ordered(struct request_queue *q, struct request **rqp)
-{
- struct request *rq = *rqp;
- const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
- (rq->cmd_flags & REQ_HARDBARRIER);
-
- if (!q->ordseq) {
- if (!is_barrier)
- return true;
-
- if (q->next_ordered != QUEUE_ORDERED_NONE)
- return start_ordered(q, rqp);
- else {
- /*
- * Queue ordering not supported. Terminate
- * with prejudice.
- */
- blk_dequeue_request(rq);
- __blk_end_request_all(rq, -EOPNOTSUPP);
- *rqp = NULL;
- return false;
- }
- }
+ q->orderr = 0;
+ q->ordseq |= QUEUE_ORDSEQ_STARTED;
/*
- * Ordered sequence in progress
+ * For an empty barrier, there's no actual BAR request, which
+ * in turn makes POSTFLUSH unnecessary. Mask them off.
*/
+ if (!blk_rq_sectors(rq))
+ skip |= (QUEUE_ORDSEQ_BAR|QUEUE_ORDSEQ_POSTFLUSH);
+ else if (q->cache_features & QUEUE_HAS_FUA)
+ skip |= QUEUE_ORDSEQ_POSTFLUSH;
- /* Special requests are not subject to ordering rules. */
- if (rq->cmd_type != REQ_TYPE_FS &&
- rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
- return true;
-
- if (q->ordered & QUEUE_ORDERED_BY_TAG) {
- /* Ordered by tag. Blocking the next barrier is enough. */
- if (is_barrier && rq != &q->bar_rq)
- *rqp = NULL;
- } else {
- /* Ordered by draining. Wait for turn. */
- WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
- if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
- *rqp = NULL;
- }
+ /* stash away the original request */
+ blk_dequeue_request(rq);
+ q->orig_bar_rq = rq;
- return true;
+ /* complete skipped sequences and return the first sequence */
+ return blk_ordered_complete_seq(q, skip, 0);
}
static void bio_end_empty_barrier(struct bio *bio, int err)
Index: linux-2.6/include/linux/blkdev.h
================================================== =================
--- linux-2.6.orig/include/linux/blkdev.h 2010-08-07 12:53:23.774479189 -0400
+++ linux-2.6/include/linux/blkdev.h 2010-08-07 14:51:42.751479190 -0400
@@ -354,13 +354,20 @@ struct request_queue
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace *blk_trace;
#endif
+
+ /*
+ * Features this queue understands.
+ */
+ unsigned int cache_features;
+
/*
* reserved for flush operations
*/
- unsigned int ordered, next_ordered, ordseq;
- int orderr, ordcolor;
- struct request pre_flush_rq, bar_rq, post_flush_rq;
- struct request *orig_bar_rq;
+ unsigned int ordseq;
+ int orderr;
+ struct request bar_rq;
+ struct request *orig_bar_rq;
+ struct list_head pending_barriers;
- if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) {
- /*
- * If the FLUSH feature is supported we do have support for
- * flushing a volatile write cache on the host. Use that
- * to implement write barrier support.
- */
- blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
- } else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) {
- /*
- * If the BARRIER feature is supported the host expects us
- * to order request by tags. This implies there is not
- * volatile write cache on the host, and that the host
- * never re-orders outstanding I/O. This feature is not
- * useful for real life scenarious and deprecated.
- */
- blk_queue_ordered(q, QUEUE_ORDERED_TAG);
- } else {
- /*
- * If the FLUSH feature is not supported we must assume that
- * the host does not perform any kind of volatile write
- * caching. We still need to drain the queue to provider
- * proper barrier semantics.
- */
- blk_queue_ordered(q, QUEUE_ORDERED_DRAIN);
- }
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
+ blk_queue_cache_features(q, QUEUE_HAS_FLUSH);
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
Index: linux-2.6/drivers/scsi/sd.c
================================================== =================
--- linux-2.6.orig/drivers/scsi/sd.c 2010-08-07 12:53:23.872479189 -0400
+++ linux-2.6/drivers/scsi/sd.c 2010-08-07 14:54:47.812479189 -0400
@@ -2109,7 +2109,7 @@ static int sd_revalidate_disk(struct gen
struct scsi_disk *sdkp = scsi_disk(disk);
struct scsi_device *sdp = sdkp->device;
unsigned char *buffer;
- unsigned ordered;
+ unsigned ordered = 0;
SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
"sd_revalidate_disk
"));
@@ -2151,17 +2151,14 @@ static int sd_revalidate_disk(struct gen
/*
* We now have all cache related info, determine how we deal
- * with ordered requests. Note that as the current SCSI
- * dispatch function can alter request order, we cannot use
- * QUEUE_ORDERED_TAG_* even when ordered tag is supported.
+ * with barriers.
*/
- if (sdkp->WCE)
- ordered = sdkp->DPOFUA
- ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;
- else
- ordered = QUEUE_ORDERED_DRAIN;
-
- blk_queue_ordered(sdkp->disk->queue, ordered);
+ if (sdkp->WCE) {
+ ordered |= QUEUE_HAS_FLUSH;
+ if (sdkp->DPOFUA)
+ ordered |= QUEUE_HAS_FUA;
+ }
+ blk_queue_cache_features(sdkp->disk->queue, ordered);
-/*
- * add-request adds a request to the linked list.
- * queue lock is held and interrupts disabled, as we muck with the
- * request queue list.
- */
-static inline void add_request(struct request_queue *q, struct request *req)
-{
- drive_stat_acct(req, 1);
-
- /*
- * elevator indicated where it wants this request to be
- * inserted at elevator_merge time
- */
- __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
-}
-
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
{
@@ -1201,13 +1186,9 @@ static int __make_request(struct request
const bool sync = (bio->bi_rw & REQ_SYNC);
const bool unplug = (bio->bi_rw & REQ_UNPLUG);
const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+ int where = ELEVATOR_INSERT_SORT;
int rw_flags;
- if ((bio->bi_rw & REQ_HARDBARRIER) &&
- (q->next_ordered == QUEUE_ORDERED_NONE)) {
- bio_endio(bio, -EOPNOTSUPP);
- return 0;
- }
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -1217,7 +1198,12 @@ static int __make_request(struct request
spin_lock_irq(q->queue_lock);
- if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
+ if (bio->bi_rw & REQ_HARDBARRIER) {
+ where = ELEVATOR_INSERT_ORDERED;
+ goto get_rq;
+ }
+
+ if (elv_queue_empty(q))
goto get_rq;
- xlvbd_barrier(info);
-
if (vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
@@ -662,8 +636,6 @@ static irqreturn_t blkif_interrupt(int i
printk(KERN_WARNING "blkfront: %s: write barrier op failed
",
info->gd->disk_name);
error = -EOPNOTSUPP;
- info->feature_barrier = QUEUE_ORDERED_NONE;
- xlvbd_barrier(info);
}
/* fall through */
case BLKIF_OP_READ:
@@ -1073,24 +1045,6 @@ static void blkfront_connect(struct blkf
"feature-barrier", "%lu", &barrier,
NULL);
- /*
- * If there's no "feature-barrier" defined, then it means
- * we're dealing with a very old backend which writes
- * synchronously; draining will do what needs to get done.
- *
- * If there are barriers, then we can do full queued writes
- * with tagged barriers.
- *
- * If barriers are not supported, then there's no much we can
- * do, so just set ordering to NONE.
- */
- if (err)
- info->feature_barrier = QUEUE_ORDERED_DRAIN;
- else if (barrier)
- info->feature_barrier = QUEUE_ORDERED_TAG;
- else
- info->feature_barrier = QUEUE_ORDERED_NONE;
-
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
Index: linux-2.6/drivers/ide/ide-disk.c
================================================== =================
--- linux-2.6.orig/drivers/ide/ide-disk.c 2010-08-07 12:53:23.889479189 -0400
+++ linux-2.6/drivers/ide/ide-disk.c 2010-08-07 15:00:30.215479189 -0400
@@ -518,12 +518,13 @@ static int ide_do_setfeature(ide_drive_t
if (drive->dev_flags & IDE_DFLAG_WCACHE) {
+ u16 *id = drive->id;
unsigned long long capacity;
int barrier;
+
/*
* We must avoid issuing commands a drive does not
* understand or we may crash it. We check flush cache
@@ -543,13 +544,18 @@ static void update_ordered(ide_drive_t *
drive->name, barrier ? "" : "not ");
blk_queue_max_segments(queue, -1);
blk_queue_max_segment_size(queue, dev->bounce_size);
Index: linux-2.6/include/linux/blk_types.h
================================================== =================
--- linux-2.6.orig/include/linux/blk_types.h 2010-08-07 12:53:23.793479189 -0400
+++ linux-2.6/include/linux/blk_types.h 2010-08-07 12:53:53.243479190 -0400
@@ -141,7 +141,6 @@ enum rq_flag_bits {
__REQ_FAILED, /* set if the request failed */
__REQ_QUIET, /* don't worry about errors */
__REQ_PREEMPT, /* set for "ide_preempt" requests */
- __REQ_ORDERED_COLOR, /* is before or after barrier */
__REQ_ALLOCED, /* request came from our alloc pool */
__REQ_COPY_USER, /* contains copies of user pages */
__REQ_INTEGRITY, /* integrity metadata has been remapped */
@@ -181,7 +180,6 @@ enum rq_flag_bits {
#define REQ_FAILED (1 << __REQ_FAILED)
#define REQ_QUIET (1 << __REQ_QUIET)
#define REQ_PREEMPT (1 << __REQ_PREEMPT)
-#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
#define REQ_ALLOCED (1 << __REQ_ALLOCED)
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
08-09-2010, 02:50 PM
Tejun Heo
relaxed barriers
On 08/08/2010 04:31 PM, Christoph Hellwig wrote:
> On Sat, Aug 07, 2010 at 12:13:06PM +0200, Tejun Heo wrote:
>> The patch was on top of v2.6.35 but was generated against dirty tree
>> and wouldn't apply cleanly. Here's the proper one.
>
> Here's an updated version:
>
> (a) ported to Jens' current block tree
> (b) optimize barriers on devices not requiring flushes to be no-ops
> (b) redo the blk_queue_ordered interface to just set QUEUE_HAS_FLUSH
> and QUEUE_HAS_FUA flags.
Nice. I'm working on a properly split patchset implementing
REQ_FLUSH/FUA based interface, which replaces REQ_HARDBARRIER. Empty
request w/ REQ_FLUSH just flushes cache but has no other ordering
restrictions. REQ_FLUSH + data means preflush + data write. REQ_FUA
+ data means data would be committed to NV media on completion.
REQ_FLUSH + FUA + data means preflush + NV data write. All FLUSH/FUA
requests w/ data are ordered only against each other. I think I'll be
able to post in several days.
Thanks.
--
tejun
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel