Prior to this patch, we have two ways of sending i/o to the log.
One of those is used when we need to allocate both the data
to be written itself and also a buffer head to submit it. This
is done via sb_getblk and friends. This is used mostly for writing
log headers.
The other method is used when writing blocks which have some
in-place counterpart. This is the case for all the metadata
blocks which are journalled, and when journaled data is in use,
for unescaped journalled data blocks.
This patch replaces both of those two methods, and about half
a dozen separate i/o submission points with a single i/o
submission function. We also go direct to bio rather than
using buffer heads, since this allows us to build i/o
requests of the maximum size for the block device in
question. It also reduces the memory required for flushing
the log, which can be very useful in low memory situations.
There are some further simplifications which can be made
and I'll probably leave those for a later patch.
This has survived some tests with postmark and appears
to be merging the log i/o nicely. It needs some further
testing to ensure it is solid, but it seems to be heading
in the right direction, anyway.
-static inline __be64 *bh_ptr_end(struct buffer_head *bh)
+/**
+ * gfs2_end_log_write_bh - end log write of pagecache data with buffers
+ * @sdp: The superblock
+ * @bvec: The bio_vec
+ * @error: The i/o status
+ *
+ * This finds the relavent buffers and unlocks then and sets the
+ * error flag according to the status of the i/o request. This is
+ * used when the log is writing data which has an in-place version
+ * that is pinned in the pagecache.
+ */
+
+static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
+ int error)
{
- return (__force __be64 *)(bh->b_data + bh->b_size);
+ struct buffer_head *bh, *next;
+ struct page *page = bvec->bv_page;
+ unsigned size;
+
+ bh = page_buffers(page);
+ size = bvec->bv_len;
+ while (bh_offset(bh) < bvec->bv_offset)
+ bh = bh->b_this_page;
+ do {
+ if (error)
+ set_buffer_write_io_error(bh);
+ unlock_buffer(bh);
+ next = bh->b_this_page;
+ size -= bh->b_size;
+ brelse(bh);
+ bh = next;
+ } while(bh && size);
}
/**
- * gfs2_log_write_endio - End of I/O for a log buffer
- * @bh: The buffer head
- * @uptodate: I/O Status
+ * gfs2_end_log_write - end of i/o to the log
+ * @bio: The bio
+ * @error: Status of i/o request
+ *
+ * Each bio_vec contains either data from the pagecache or data
+ * relating to the log itself. Here we iterate over the bio_vec
+ * array, processing both kinds of data.
*
*/
- end_buffer_write_sync(bh, uptodate);
+ bio_put(bio);
if (atomic_dec_and_test(&sdp->sd_log_in_flight))
wake_up(&sdp->sd_log_flush_wait);
}
/**
- * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
- * @sdp: The GFS2 superblock
+ * gfs2_log_flush_bio - Submit any pending log bio
+ * @sdp: The superblock
+ * @rw: The rw flags
*
- * tReturns: the buffer_head
+ * Submit any pending part-built or full bio to the block device. If
+ * there is no pending bio, then this is a no-op.
*/
- bh = sb_getblk(sdp->sd_vfs, blkno);
- lock_buffer(bh);
- memset(bh->b_data, 0, bh->b_size);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- gfs2_log_incr_head(sdp);
- atomic_inc(&sdp->sd_log_in_flight);
- bh->b_private = sdp;
- bh->b_end_io = gfs2_log_write_endio;
+/**
+ * gfs2_log_alloc_bio - Allocate a new bio for log writing
+ * @sdp: The superblock
+ * @blkno: The next device block number we want to write to
+ *
+ * This should never be called when there is a cached bio in the
+ * super block. When it returns, there will be a cached bio in the
+ * super block which will have as many bio_vecs as the device is
+ * happy to handle.
+ *
+ * Returns: Newly allocated bio
+ */
/**
- * gfs2_fake_write_endio -
- * @bh: The buffer head
- * @uptodate: The I/O Status
+ * gfs2_log_get_bio - Get cached log bio, or allocate a new one
+ * @sdp: The superblock
+ * @blkno: The device block number we want to write to
+ *
+ * If there is a cached bio, then if the next block number is sequential
+ * with the previous one, return it, otherwise flush the bio to the
+ * device. If there is not a cached bio, or we just flushed it, then
+ * allocate a new one.
*
+ * Returns: The bio to use for log writes
*/
+
/**
- * gfs2_log_write_buf - write metadata buffer to log
+ * gfs2_log_write - write to log
* @sdp: the filesystem
- * @real: the in-place buffer head
+ * @page: the page to write
+ * @size: the size of the data to write
+ * @offset: the offset within the page
*
+ * Try and add the page segment to the current bio. If that fails,
+ * submit the current bio to the device and create a new one, and
+ * then add the page segment to that.
*/
- bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS);
- atomic_set(&bh->b_count, 1);
- bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
- set_bh_page(bh, real->b_page, bh_offset(real));
- bh->b_blocknr = blkno;
- bh->b_size = sdp->sd_sb.sb_bsize;
- bh->b_bdev = sdp->sd_vfs->s_bdev;
- bh->b_private = real;
- bh->b_end_io = gfs2_fake_write_endio;
+/**
+ * gfs2_log_write_bh - write a buffer's content to the log
+ * @sdp: The super block
+ * @bh: The buffer pointing to the in-place location
+ *
+ * This writes the content of the buffer to the next available location
+ * in the log. The buffer will be unlocked once the i/o to the log has
+ * completed.
+ */
+
+static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
+{
+ gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh));
+}
- gfs2_log_incr_head(sdp);
- atomic_inc(&sdp->sd_log_in_flight);
+/**
+ * gfs2_log_write_page - write one block stored in a page, into the log
+ * @sdp: The superblock
+ * @page: The struct page
+ *
+ * This writes the first block-sized part of the page into the log. Note
+ * that the page must have been allocated from the gfs2_page_pool mempool
+ * and that after this has been called, ownership has been transferred and
+ * the page may be freed at any time.
+ */