FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Cluster Development

 
 
LinkBack Thread Tools
 
Old 11-21-2011, 09:21 AM
Steven Whitehouse
 
Default GFS2: move toward a generic multi-block allocator

Hi,

On Fri, 2011-11-18 at 10:58 -0500, Bob Peterson wrote:
> Hi,
>
> This patch is a revision of the one I previously posted.
> I tried to integrate all the suggestions Steve gave.
> The purpose of the patch is to change function gfs2_alloc_block
> (allocate either a dinode block or an extent of data blocks)
> to a more generic gfs2_alloc_blocks function that can
> allocate both a dinode _and_ an extent of data blocks in the
> same call. This will ultimately help us create a multi-block
> reservation scheme to reduce file fragmentation.
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> --
> GFS2: move toward a generic multi-block allocator
>
> This patch moves more toward a generic multi-block allocator that
> takes a pointer to the number of data blocks to allocate, plus whether
> or not to allocate a dinode. In theory, it could be called to allocate
> (1) a single dinode block, (2) a group of one or more data blocks, or
> (3) a dinode plus several data blocks.
>
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index b69235b..cb74312 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -133,7 +133,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
> and write it out to disk */
>
> unsigned int n = 1;
> - error = gfs2_alloc_block(ip, &block, &n, 0, NULL);
> + error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
> if (error)
> goto out_brelse;
> if (isdir) {
> @@ -503,7 +503,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
> do {
> int error;
> n = blks - alloced;
> - error = gfs2_alloc_block(ip, &bn, &n, 0, NULL);
> + error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
> if (error)
> return error;
> alloced += n;
> diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
> index ae75319..f8485da 100644
> --- a/fs/gfs2/dir.c
> +++ b/fs/gfs2/dir.c
> @@ -823,7 +823,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
> struct gfs2_dirent *dent;
> struct qstr name = { .name = "", .len = 0, .hash = 0 };
>
> - error = gfs2_alloc_block(ip, &bn, &n, 0, NULL);
> + error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
> if (error)
> return NULL;
> bh = gfs2_meta_new(ip->i_gl, bn);
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index de2668f..3ab192b 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -389,6 +389,7 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
> {
> struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
> int error;
> + int dblocks = 0;
>
> if (gfs2_alloc_get(dip) == NULL)
> return -ENOMEM;
> @@ -402,7 +403,7 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
> if (error)
> goto out_ipreserv;
>
> - error = gfs2_alloc_block(dip, no_addr, NULL, 1, generation);
> + error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation);
>
> gfs2_trans_end(sdp);
>
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index 855597a..b8935af 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -65,8 +65,8 @@ static const char valid_change[16] = {
> };
>
> static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
> - unsigned char old_state, unsigned char new_state,
> - unsigned int *n);
> + unsigned char old_state, bool dinode,
> + unsigned int *ndata);
>
> /**
> * gfs2_setbit - Set a bit in the bitmaps
> @@ -921,8 +921,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
> while (goal < rgd->rd_data) {
> down_write(&sdp->sd_log_flush_lock);
> n = 1;
> - block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
> - GFS2_BLKST_UNLINKED, &n);
> + block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 0, &n);
> up_write(&sdp->sd_log_flush_lock);
> if (block == BFITNOENT)
> break;
> @@ -1115,7 +1114,7 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
> * @rgd: the resource group descriptor
> * @goal: the goal block within the RG (start here to search for avail block)
> * @old_state: GFS2_BLKST_XXX the before-allocation state to find
> - * @new_state: GFS2_BLKST_XXX the after-allocation block state
> + * @dinode: TRUE if the first block we allocate is for a dinode
> * @n: The extent length
> *
> * Walk rgrp's bitmap to find bits that represent a block in @old_state.
> @@ -1132,8 +1131,7 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
> */
>
> static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
> - unsigned char old_state, unsigned char new_state,
> - unsigned int *n)
> + unsigned char old_state, bool dinode, unsigned int *n)
> {
> struct gfs2_bitmap *bi = NULL;
> const u32 length = rgd->rd_length;
> @@ -1192,13 +1190,14 @@ skip:
> if (blk == BFITNOENT)
> return blk;
>
> - *n = 1;
> - if (old_state == new_state)
> + if (old_state == GFS2_BLKST_UNLINKED)
> goto out;
>
> gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
> gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
> - bi, blk, new_state);
> + bi, blk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
> + if (!dinode)
> + (*n)++;
> goal = blk;
> while (*n < elen) {
> goal++;
> @@ -1208,7 +1207,7 @@ skip:
> GFS2_BLKST_FREE)
> break;
> gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
> - bi, goal, new_state);
> + bi, goal, GFS2_BLKST_USED);
> (*n)++;
> }
> out:
> @@ -1300,28 +1299,26 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
> }
>
> /**
> - * gfs2_alloc_block - Allocate one or more blocks
> + * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
> * @ip: the inode to allocate the block for
> * @bn: Used to return the starting block number
> - * @n: requested number of blocks/extent length (value/result)
> - * dinode: 1 if we're allocating a dinode, 0 if it's a data block
> + * @ndata: requested number of data blocks/extent length (value/result)
> + * @dinode: 1 if we're allocating a dinode block, else 0
> * @generation: the generation number of the inode
> *
> * Returns: 0 or error
> */
>
> -int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
> - int dinode, u64 *generation)
> +int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *ndata,
> + bool dinode, u64 *generation)
> {
> struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
> struct buffer_head *dibh;
> struct gfs2_alloc *al = ip->i_alloc;
> struct gfs2_rgrpd *rgd;
> - u32 goal, blk; /* block, within the rgrp scope */
> + u32 goal, extlen, blk; /* block, within the rgrp scope */
> u64 block; /* block, within the file system scope */
> - unsigned int extn = 1;
> int error;
> - unsigned char blk_type = dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED;
>
> /* Only happens if there is a bug in gfs2, return something distinctive
> * to ensure that it is noticed.
> @@ -1329,8 +1326,6 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
> if (al == NULL)
> return -ECANCELED;
>
> - if (n == NULL)
> - n = &extn;
> rgd = ip->i_rgd;
>
> if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
> @@ -1338,7 +1333,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
> else
> goal = rgd->rd_last_alloc;
>
> - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, blk_type, n);
> + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, dinode, ndata);
>
> /* Since all blocks are reserved in advance, this shouldn't happen */
> if (blk == BFITNOENT)
> @@ -1347,7 +1342,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
> rgd->rd_last_alloc = blk;
> block = rgd->rd_data0 + blk;
> if (!dinode) {
> - ip->i_goal = block + *n - 1;
> + ip->i_goal = block + *ndata - 1;
> error = gfs2_meta_inode_buffer(ip, &dibh);
> if (error == 0) {
> struct gfs2_dinode *di =
> @@ -1358,10 +1353,13 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
> brelse(dibh);
> }
> }
> - if (rgd->rd_free < *n)
> + extlen = *ndata;
> + if (dinode)
> + extlen++;
> + if (rgd->rd_free < extlen)
> goto rgrp_error;
>
> - rgd->rd_free -= *n;
> + rgd->rd_free -= extlen;
> if (dinode) {
> rgd->rd_dinodes++;
> *generation = rgd->rd_igeneration++;
> @@ -1372,15 +1370,16 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
> gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
> gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
>
> - gfs2_statfs_change(sdp, 0, -(s64)*n, dinode ? 1 : 0);
> + gfs2_statfs_change(sdp, 0, -(s64)extlen, dinode ? 1 : 0);
> if (dinode)
> gfs2_trans_add_unrevoke(sdp, block, 1);
> - else
> - gfs2_quota_change(ip, *n, ip->i_inode.i_uid,
> + if (*ndata)
> + gfs2_quota_change(ip, *ndata, ip->i_inode.i_uid,
> ip->i_inode.i_gid);
>
> - rgd->rd_free_clone -= *n;
> - trace_gfs2_block_alloc(ip, block, *n, blk_type);
> + rgd->rd_free_clone -= extlen;
> + trace_gfs2_block_alloc(ip, block, *ndata,
> + dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);

This is still going to give us the wrong tracing data here. In the
dinode case, this will give us a trace entry that says that starting at
the first block of the extent, that block and all following blocks are
dinodes up to *ndata, which is one block less than the extent size. So
the size is wrong (off by one) for the complete extent, and the type is
also wrong for all except the first block.

In the non-dinode case, it should give the correct answer though as
extlen and *ndata are the same.

What we could do, to save having to call the tracepoint twice in the
dinode case, would be to say that for an extent size greater than 1, we
"know" that all subsequent blocks will be "used". So while thats ok,
provided we document that, the *ndata still needs to be changed for
extlen to correct that, so I'll fix that up when I apply this,

Steve.



> *bn = block;
> return 0;
>
> diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
> index 4cb5608..b3b61b8 100644
> --- a/fs/gfs2/rgrp.h
> +++ b/fs/gfs2/rgrp.h
> @@ -39,8 +39,8 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip)
> extern int gfs2_inplace_reserve(struct gfs2_inode *ip);
> extern void gfs2_inplace_release(struct gfs2_inode *ip);
>
> -extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
> - int dinode, u64 *generation);
> +extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
> + bool dinode, u64 *generation);
>
> extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
> extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
> diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
> index e4794a5..ef74e159 100644
> --- a/fs/gfs2/xattr.c
> +++ b/fs/gfs2/xattr.c
> @@ -610,7 +610,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
> u64 block;
> int error;
>
> - error = gfs2_alloc_block(ip, &block, &n, 0, NULL);
> + error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
> if (error)
> return error;
> gfs2_trans_add_unrevoke(sdp, block, 1);
> @@ -672,7 +672,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
> int mh_size = sizeof(struct gfs2_meta_header);
> unsigned int n = 1;
>
> - error = gfs2_alloc_block(ip, &block, &n, 0, NULL);
> + error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
> if (error)
> return error;
> gfs2_trans_add_unrevoke(sdp, block, 1);
> @@ -992,7 +992,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
> } else {
> u64 blk;
> unsigned int n = 1;
> - error = gfs2_alloc_block(ip, &blk, &n, 0, NULL);
> + error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL);
> if (error)
> return error;
> gfs2_trans_add_unrevoke(sdp, blk, 1);
 

Thread Tools




All times are GMT. The time now is 08:07 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org