FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Cluster Development

 
 
LinkBack Thread Tools
 
Old 02-05-2010, 04:45 AM
Dave Chinner
 
Default gfs2: ordered buffer writes are not sync

Currently gfs2 ordered buffer writes use WRITE_SYNC_PLUG as the IO
type being dispatched. They aren't sync writes; we issue all the IO
pending, then wait for it all. IOWs, this is async IO with a bulk
wait on the end.

We should use normal WRITE tagging for this, and before we start
waiting make sure that all the Io is issued by unplugging the
device. The use of normal WRITEs for these buffers should
significantly reduce the overhead of processing in the cfq elevator
and enable the disk subsystem to get much closer to disk bandwidth
for large sequential writes.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/gfs2/aops.c | 3 +++
fs/gfs2/log.c | 11 +++++++----
fs/gfs2/lops.c | 18 ++++++++++--------
3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 7b8da94..b75784c 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,6 +20,7 @@
#include <linux/swap.h>
#include <linux/gfs2_ondisk.h>
#include <linux/backing-dev.h>
+#include <linux/blkdev.h>

#include "gfs2.h"
#include "incore.h"
@@ -34,6 +35,7 @@
#include "super.h"
#include "util.h"
#include "glops.h"
+#include "trace_gfs2.h"


static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
@@ -52,6 +54,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
if (gfs2_is_jdata(ip))
set_buffer_uptodate(bh);
gfs2_trans_add_bh(ip->i_gl, bh, 0);
+ trace_gfs2_submit_bh(bh, WRITE, __func__);
}
}

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index bd26dff..a9797be 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/bio.h>
+#include <linux/blkdev.h>

#include "gfs2.h"
#include "incore.h"
@@ -121,8 +122,8 @@ __acquires(&sdp->sd_log_lock)
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
- trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
- submit_bh(WRITE_SYNC_PLUG, bh);
+ trace_gfs2_submit_bh(bh, WRITE, __func__);
+ submit_bh(WRITE, bh);
} else {
unlock_buffer(bh);
brelse(bh);
@@ -675,8 +676,8 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
lock_buffer(bh);
if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
- trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
- submit_bh(WRITE_SYNC_PLUG, bh);
+ trace_gfs2_submit_bh(bh, WRITE, __func__);
+ submit_bh(WRITE, bh);
} else {
unlock_buffer(bh);
brelse(bh);
@@ -692,6 +693,8 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
struct gfs2_bufdata *bd;
struct buffer_head *bh;

+ blk_run_backing_dev(blk_get_backing_dev_info(sdp->sd_vfs->s_bdev), NULL);
+
gfs2_log_lock(sdp);
while (!list_empty(&sdp->sd_log_le_ordered)) {
bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 7278cf0..0fe2f3c 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -15,6 +15,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
#include <linux/fs.h>
+#include <linux/blkdev.h>

#include "gfs2.h"
#include "incore.h"
@@ -198,8 +199,8 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
}

gfs2_log_unlock(sdp);
- trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
- submit_bh(WRITE_SYNC_PLUG, bh);
+ trace_gfs2_submit_bh(bh, WRITE, __func__);
+ submit_bh(WRITE, bh);
gfs2_log_lock(sdp);

n = 0;
@@ -209,8 +210,8 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
gfs2_log_unlock(sdp);
lock_buffer(bd2->bd_bh);
bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
- trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
- submit_bh(WRITE_SYNC_PLUG, bh);
+ trace_gfs2_submit_bh(bh, WRITE, __func__);
+ submit_bh(WRITE, bh);
gfs2_log_lock(sdp);
if (++n >= num)
break;
@@ -220,6 +221,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
total -= num;
}
gfs2_log_unlock(sdp);
+ blk_run_backing_dev(blk_get_backing_dev_info(sdp->sd_vfs->s_bdev), NULL);
}

static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
@@ -573,8 +575,8 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
ptr = bh_log_ptr(bh);

get_bh(bh);
- trace_gfs2_submit_bh(bh, WRITE_SYNC_PLUG, __func__);
- submit_bh(WRITE_SYNC_PLUG, bh);
+ trace_gfs2_submit_bh(bh, WRITE, __func__);
+ submit_bh(WRITE, bh);
gfs2_log_lock(sdp);
while(!list_empty(list)) {
bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -600,8 +602,8 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
} else {
bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
}
- trace_gfs2_submit_bh(bh1, WRITE_SYNC_PLUG, __func__);
- submit_bh(WRITE_SYNC_PLUG, bh1);
+ trace_gfs2_submit_bh(bh1, WRITE, __func__);
+ submit_bh(WRITE, bh1);
gfs2_log_lock(sdp);
ptr += 2;
}
--
1.6.5
 
Old 02-05-2010, 09:58 AM
Steven Whitehouse
 
Default gfs2: ordered buffer writes are not sync

Hi,

On Fri, 2010-02-05 at 16:45 +1100, Dave Chinner wrote:
> Currently gfs2 ordered buffer writes use WRITE_SYNC_PLUG as the IO
> type being dispatched. They aren't sync writes; we issue all the IO
> pending, then wait for it all. IOWs, this is async IO with a bulk
> wait on the end.
>
> We should use normal WRITE tagging for this, and before we start
> waiting make sure that all the Io is issued by unplugging the
> device. The use of normal WRITEs for these buffers should
> significantly reduce the overhead of processing in the cfq elevator
> and enable the disk subsystem to get much closer to disk bandwidth
> for large sequential writes.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>

That sounds reasonable. With respect to the new trace point, I'd raise
the same question as per the initial patch in the series. Also I'm
wondering about the calls to blk_run_backing_dev() as I'd thought that
this would happen automatically when we get to wait for the I/O.

Bearing in mind that your tests show no particular increase in
performance for this change, I'm tempted to be a bit more cautious about
applying it for now,

Steve.
 

Thread Tools




All times are GMT. The time now is 07:47 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org