FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Cluster Development

 
 
LinkBack Thread Tools
 
Old 04-12-2012, 12:43 PM
Bob Peterson
 
Default GFS2: Add rgrp information to block_alloc trace point

Hi,

This patch adds rgrp information to the block allocation trace point.

Regards,

Bob Peterson
Red Hat GFS

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
--
Author: Bob Peterson <rpeterso@redhat.com>
Date: Thu Apr 12 08:32:45 2012 -0500

GFS2: Add rgrp information to block_alloc trace point

This patch adds resource group information to the block allocation
trace point for GFS2. This makes it easier to debug problems with
resource groups, such as management of the number of free blocks.

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7a1cf67..146c3d2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1566,7 +1566,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
ip->i_inode.i_gid);

rgd->rd_free_clone -= *nblocks;
- trace_gfs2_block_alloc(ip, block, *nblocks,
+ trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
*bn = block;
return 0;
@@ -1593,7 +1593,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
- trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
+ trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1631,7 +1631,7 @@ void gfs2_unlink_di(struct inode *inode)
rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
return;
- trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
+ trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
}
@@ -1661,7 +1661,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
gfs2_free_uninit_di(rgd, ip->i_no_addr);
- trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
+ trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, ip->i_no_addr, 1);
}
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index dfa89cd..981b360 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -457,13 +457,15 @@ TRACE_EVENT(gfs2_bmap,
/* Keep track of blocks as they are allocated/freed */
TRACE_EVENT(gfs2_block_alloc,

- TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
- u8 block_state),
+ TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 block, unsigned len, u8 block_state),

- TP_ARGS(ip, block, len, block_state),
+ TP_ARGS(ip, rgd, block, len, block_state),

TP_STRUCT__entry(
__field( dev_t, dev )
+ __field( u64, rd_addr )
+ __field( u32, rd_free_clone )
__field( u64, start )
__field( u64, inum )
__field( u32, len )
@@ -472,14 +474,18 @@ TRACE_EVENT(gfs2_block_alloc,

TP_fast_assign(
__entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->rd_addr = rgd->rd_addr;
+ __entry->rd_free_clone = rgd->rd_free_clone;
__entry->start = block;
__entry->inum = ip->i_no_addr;
__entry->len = len;
__entry->block_state = block_state;
),

- TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+ TP_printk("%u,%u rg:%llu rf:%u bmap %llu alloc %llu/%lu %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->rd_addr,
+ __entry->rd_free_clone,
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long)__entry->len,
 
Old 04-12-2012, 01:16 PM
Bob Peterson
 
Default GFS2: Add rgrp information to block_alloc trace point

----- Original Message -----
| All the bmap group tracepoints start with the device number, followed
| by
| the string bmap, the inode number and then the start/length of the
| blocks, so I'd rather not change that, without good reason.
|
| If we are going to add the rgrp information here, then it should be
| done
| later in the structure/string. I'm also wondering whether we
| shouldn't
| add some of the other fields as well... rd_free and rd_dinodes spring
| to
| mind as obvious candidates.
|
| Since there is quite a lot of information in each rgrp, it almost
| warrants its own tracepoint rather than trying to add it into an
| existing one...
|
| So I think that this probably needs some more thought,
|
| Steve.

Hi,

I can reformat it to put the rgrp data later in the string.

The main reason I added that particular rgrp information was for
the purposes of debugging the (future) block reservations code
for file defragmentation. For that (future) patch I add a new
trace point for block reservations. Adding the rgrp address and
rd_free_clone to this trace point allow us to see a correlation
between the decisions made by the reservations code and the
actual blocks that are allocated as a result. While I agree that
another trace point may be warranted for rgrp information, I'd
rather keep the rgrp address and rd_free_clone in this trace point
for that reason.

I'll see if I can rearrange the format to be more suitable.

Regards,

Bob Peterson
Red Hat File Systems
 
Old 04-12-2012, 01:34 PM
Bob Peterson
 
Default GFS2: Add rgrp information to block_alloc trace point

----- Original Message -----
| Hi,
|
| On Thu, 2012-04-12 at 09:16 -0400, Bob Peterson wrote:
| > ----- Original Message -----
| > | All the bmap group tracepoints start with the device number,
| > | followed
| > | by
| > | the string bmap, the inode number and then the start/length of
| > | the
| > | blocks, so I'd rather not change that, without good reason.
| > |
| > | If we are going to add the rgrp information here, then it should
| > | be
| > | done
| > | later in the structure/string. I'm also wondering whether we
| > | shouldn't
| > | add some of the other fields as well... rd_free and rd_dinodes
| > | spring
| > | to
| > | mind as obvious candidates.
| > |
| > | Since there is quite a lot of information in each rgrp, it almost
| > | warrants its own tracepoint rather than trying to add it into an
| > | existing one...
| > |
| > | So I think that this probably needs some more thought,
| > |
| > | Steve.
| >
| > Hi,
| >
| > I can reformat it to put the rgrp data later in the string.
| >
| > The main reason I added that particular rgrp information was for
| > the purposes of debugging the (future) block reservations code
| > for file defragmentation. For that (future) patch I add a new
| > trace point for block reservations. Adding the rgrp address and
| > rd_free_clone to this trace point allow us to see a correlation
| > between the decisions made by the reservations code and the
| > actual blocks that are allocated as a result. While I agree that
| > another trace point may be warranted for rgrp information, I'd
| > rather keep the rgrp address and rd_free_clone in this trace point
| > for that reason.
| >
| > I'll see if I can rearrange the format to be more suitable.
| >
| > Regards,
| >
| > Bob Peterson
| > Red Hat File Systems
|
| I'm still confused though... the reservation is just a start/length
| pair, so why do we need the resource group in order to match it up
| here?
|
| The rgrp information is also available via the rgrp specific entry in
| the glocks file too, so we can always get it that way if required,
|
| Steve.

Hi,

The forthcoming block reservations code is complex, but basically,
some writes are allocating spans of blocks as you say. Others are
not, for technical reasons. Later, the blocks within the reserved
span are claimed. These reserved spans of blocks become "no-fly
zones" for other processes which may also be reserving blocks and/or
allocating blocks _without_ a reservation.

In debugging this new code, I ran into cases where a process was
improperly allocating a block that had been reserved by another process.
I also ran into cases where the block allocation code failed because
of mismanagement of the free space numbers for rgrps. By adding the
rgrp address and free space info to the allocation trace point, I
was able to make a correlation between what was reserved and what
was actually allocated. Without the information, I couldn't tell
when blocks were being allocated (and/or free space number problems)
by processes that didn't hold a reservation.

Regards,

Bob Peterson
Red Hat File Systems
 
Old 05-09-2012, 04:11 PM
Bob Peterson
 
Default GFS2: Add rgrp information to block_alloc trace point

Hi,

This is a second attempt at a patch that adds rgrp information to the
block allocation trace point for GFS2. As suggested, the patch was
modified to list the rgrp information _after_ the fields that exist today.

Again, the reason for this patch is to allow us to trace and debug
problems with the block reservations patch, which is still in the works.
We can debug problems with reservations if we can see what block allocations
result from the block reservations. It may also be handy in figuring out
if there are problems in rgrp free space accounting. In other words,
we can use it to track the rgrp and its free space along side the allocations
that are taking place.

Regards,

Bob Peterson
Red Hat File Systems

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
Author: Bob Peterson <rpeterso@redhat.com>
Date: Thu Apr 12 08:32:45 2012 -0500

GFS2: Add rgrp information to block_alloc trace point

This patch adds resource group information to the block allocation
trace point for GFS2. This makes it easier to debug problems with
resource groups, such as management of the number of free blocks.

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 645c16f..f74fb9b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1556,7 +1556,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
ip->i_inode.i_gid);

rgd->rd_free_clone -= *nblocks;
- trace_gfs2_block_alloc(ip, block, *nblocks,
+ trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
*bn = block;
return 0;
@@ -1583,7 +1583,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
- trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
+ trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1621,7 +1621,7 @@ void gfs2_unlink_di(struct inode *inode)
rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
return;
- trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
+ trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
}
@@ -1651,7 +1651,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
gfs2_free_uninit_di(rgd, ip->i_no_addr);
- trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
+ trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, ip->i_no_addr, 1);
}
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index dfa89cd..1b8b815 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -457,10 +457,10 @@ TRACE_EVENT(gfs2_bmap,
/* Keep track of blocks as they are allocated/freed */
TRACE_EVENT(gfs2_block_alloc,

- TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
- u8 block_state),
+ TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 block, unsigned len, u8 block_state),

- TP_ARGS(ip, block, len, block_state),
+ TP_ARGS(ip, rgd, block, len, block_state),

TP_STRUCT__entry(
__field( dev_t, dev )
@@ -468,6 +468,8 @@ TRACE_EVENT(gfs2_block_alloc,
__field( u64, inum )
__field( u32, len )
__field( u8, block_state )
+ __field( u64, rd_addr )
+ __field( u32, rd_free_clone )
),

TP_fast_assign(
@@ -476,14 +478,18 @@ TRACE_EVENT(gfs2_block_alloc,
__entry->inum = ip->i_no_addr;
__entry->len = len;
__entry->block_state = block_state;
+ __entry->rd_addr = rgd->rd_addr;
+ __entry->rd_free_clone = rgd->rd_free_clone;
),

- TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+ TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long)__entry->len,
- block_state_name(__entry->block_state))
+ block_state_name(__entry->block_state),
+ (unsigned long long)__entry->rd_addr,
+ __entry->rd_free_clone)
);

#endif /* _TRACE_GFS2_H */
 
Old 05-17-2012, 12:23 PM
Steven Whitehouse
 
Default GFS2: Add rgrp information to block_alloc trace point

From: Bob Peterson <rpeterso@redhat.com>

This is a second attempt at a patch that adds rgrp information to the
block allocation trace point for GFS2. As suggested, the patch was
modified to list the rgrp information _after_ the fields that exist today.

Again, the reason for this patch is to allow us to trace and debug
problems with the block reservations patch, which is still in the works.
We can debug problems with reservations if we can see what block allocations
result from the block reservations. It may also be handy in figuring out
if there are problems in rgrp free space accounting. In other words,
we can use it to track the rgrp and its free space along side the allocations
that are taking place.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 645c16f..f74fb9b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1556,7 +1556,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
ip->i_inode.i_gid);

rgd->rd_free_clone -= *nblocks;
- trace_gfs2_block_alloc(ip, block, *nblocks,
+ trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
*bn = block;
return 0;
@@ -1583,7 +1583,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
- trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
+ trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1621,7 +1621,7 @@ void gfs2_unlink_di(struct inode *inode)
rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
return;
- trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
+ trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
}
@@ -1651,7 +1651,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
gfs2_free_uninit_di(rgd, ip->i_no_addr);
- trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
+ trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, ip->i_no_addr, 1);
}
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index dfa89cd..1b8b815 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -457,10 +457,10 @@ TRACE_EVENT(gfs2_bmap,
/* Keep track of blocks as they are allocated/freed */
TRACE_EVENT(gfs2_block_alloc,

- TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
- u8 block_state),
+ TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 block, unsigned len, u8 block_state),

- TP_ARGS(ip, block, len, block_state),
+ TP_ARGS(ip, rgd, block, len, block_state),

TP_STRUCT__entry(
__field( dev_t, dev )
@@ -468,6 +468,8 @@ TRACE_EVENT(gfs2_block_alloc,
__field( u64, inum )
__field( u32, len )
__field( u8, block_state )
+ __field( u64, rd_addr )
+ __field( u32, rd_free_clone )
),

TP_fast_assign(
@@ -476,14 +478,18 @@ TRACE_EVENT(gfs2_block_alloc,
__entry->inum = ip->i_no_addr;
__entry->len = len;
__entry->block_state = block_state;
+ __entry->rd_addr = rgd->rd_addr;
+ __entry->rd_free_clone = rgd->rd_free_clone;
),

- TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+ TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long)__entry->len,
- block_state_name(__entry->block_state))
+ block_state_name(__entry->block_state),
+ (unsigned long long)__entry->rd_addr,
+ __entry->rd_free_clone)
);

#endif /* _TRACE_GFS2_H */
--
1.7.4
 

Thread Tools




All times are GMT. The time now is 09:40 PM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org