FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Cluster Development

 
 
LinkBack Thread Tools
 
Old 03-02-2012, 06:28 PM
Bob Peterson
 
Default GFS2: Eliminate sd_rindex_mutex

Hi,

This patch eliminates the sd_rindex_mutex altogether.
See comments below:

Regards,

Bob Peterson
Red Hat File Systems

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
--
GFS2: Eliminate sd_rindex_mutex

Over time, we've slowly eliminated the use of sd_rindex_mutex.
Up to this point, it was only used in two places: function
gfs2_ri_total (which totals the file system size by reading
and parsing the rindex file) and function gfs2_rindex_update
which updates the rgrps in memory. Both of these functions have
the rindex glock to protect them, so the rindex mutex is unnecessary.
--
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4d546df..47d0bda 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -644,7 +644,6 @@ struct gfs2_sbd {

int sd_rindex_uptodate;
spinlock_t sd_rindex_spin;
- struct mutex sd_rindex_mutex;
struct rb_root sd_rindex_tree;
unsigned int sd_rgrps;
unsigned int sd_max_rg_data;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a55baa7..ae5e0a4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -83,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
spin_lock_init(&sdp->sd_statfs_spin);

spin_lock_init(&sdp->sd_rindex_spin);
- mutex_init(&sdp->sd_rindex_mutex);
sdp->sd_rindex_tree.rb_node = NULL;

INIT_LIST_HEAD(&sdp->sd_jindex_list);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e09370e..f9bf429 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -540,7 +540,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
struct file_ra_state ra_state;
int error, rgrps;

- mutex_lock(&sdp->sd_rindex_mutex);
file_ra_state_init(&ra_state, inode->i_mapping);
for (rgrps = 0;; rgrps++) {
loff_t pos = rgrps * sizeof(struct gfs2_rindex);
@@ -553,7 +552,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
break;
total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
}
- mutex_unlock(&sdp->sd_rindex_mutex);
return total_data;
}

@@ -695,22 +693,18 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)

/* Read new copy from disk if we don't have the latest */
if (!sdp->sd_rindex_uptodate) {
- mutex_lock(&sdp->sd_rindex_mutex);
if (!gfs2_glock_is_locked_by_me(gl)) {
error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
if (error)
- goto out_unlock;
+ return error;
unlock_required = 1;
}
if (!sdp->sd_rindex_uptodate)
error = gfs2_ri_update(ip);
if (unlock_required)
gfs2_glock_dq_uninit(&ri_gh);
-out_unlock:
- mutex_unlock(&sdp->sd_rindex_mutex);
}

-
return error;
}
 
Old 03-05-2012, 11:20 AM
Steven Whitehouse
 
Default GFS2: Eliminate sd_rindex_mutex

Hi,

On Fri, 2012-03-02 at 14:28 -0500, Bob Peterson wrote:
> Hi,
>
> This patch eliminates the sd_rindex_mutex altogether.
> See comments below:
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> --
> GFS2: Eliminate sd_rindex_mutex
>
> Over time, we've slowly eliminated the use of sd_rindex_mutex.
> Up to this point, it was only used in two places: function
> gfs2_ri_total (which totals the file system size by reading
> and parsing the rindex file) and function gfs2_rindex_update
> which updates the rgrps in memory. Both of these functions have
> the rindex glock to protect them, so the rindex mutex is unnecessary.
> --
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 4d546df..47d0bda 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -644,7 +644,6 @@ struct gfs2_sbd {
>
> int sd_rindex_uptodate;
> spinlock_t sd_rindex_spin;
> - struct mutex sd_rindex_mutex;
> struct rb_root sd_rindex_tree;
> unsigned int sd_rgrps;
> unsigned int sd_max_rg_data;
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index a55baa7..ae5e0a4 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -83,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
> spin_lock_init(&sdp->sd_statfs_spin);
>
> spin_lock_init(&sdp->sd_rindex_spin);
> - mutex_init(&sdp->sd_rindex_mutex);
> sdp->sd_rindex_tree.rb_node = NULL;
>
> INIT_LIST_HEAD(&sdp->sd_jindex_list);
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index e09370e..f9bf429 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -540,7 +540,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
> struct file_ra_state ra_state;
> int error, rgrps;
>
> - mutex_lock(&sdp->sd_rindex_mutex);
> file_ra_state_init(&ra_state, inode->i_mapping);
> for (rgrps = 0;; rgrps++) {
> loff_t pos = rgrps * sizeof(struct gfs2_rindex);
> @@ -553,7 +552,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
> break;
> total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
> }
> - mutex_unlock(&sdp->sd_rindex_mutex);
> return total_data;
> }
>
> @@ -695,22 +693,18 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
>
> /* Read new copy from disk if we don't have the latest */
> if (!sdp->sd_rindex_uptodate) {
> - mutex_lock(&sdp->sd_rindex_mutex);
> if (!gfs2_glock_is_locked_by_me(gl)) {
> error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
> if (error)
> - goto out_unlock;
> + return error;
> unlock_required = 1;
> }
> if (!sdp->sd_rindex_uptodate)
> error = gfs2_ri_update(ip);
> if (unlock_required)
> gfs2_glock_dq_uninit(&ri_gh);
> -out_unlock:
> - mutex_unlock(&sdp->sd_rindex_mutex);
> }
>
> -
> return error;
> }
>
>


Bearing in mind that the mutex is an exclusive lock and the glock is
only a shared lock, do we have any other protection against the rgrp
tree being updated simultaneously?

Steve.
 
Old 03-05-2012, 12:33 PM
Bob Peterson
 
Default GFS2: Eliminate sd_rindex_mutex

----- Original Message -----
| Bearing in mind that the mutex is an exclusive lock and the glock is
| only a shared lock, do we have any other protection against the rgrp
| tree being updated simultaneously?
|
| Steve.

Hi,

Yes, I think you're right. The existing code should work most of the
time but has the potential to leak rgrp memory if the timing is right.
We could approach the solution two ways:

(1) We could change the shared lock to an exclusive lock.
(2) We could change function rgd_insert so that it returns an error if
the rgrp was already in the rb_tree. That way, whoever gets the
sd_rindex_spin spinlock first will call rgd_insert to insert the new
rgrp into the rgrp tree, and when the second caller tries to insert
its new rgrp into the rb_tree, it will find the entry already there,
(inserted by the first caller), then take the error path and exit,
freeing the rgrp it kmalloced.

Regards,

Bob Peterson
Red Hat File Systems
 
Old 03-05-2012, 12:51 PM
Steven Whitehouse
 
Default GFS2: Eliminate sd_rindex_mutex

Hi,

On Mon, 2012-03-05 at 08:33 -0500, Bob Peterson wrote:
> ----- Original Message -----
> | Bearing in mind that the mutex is an exclusive lock and the glock is
> | only a shared lock, do we have any other protection against the rgrp
> | tree being updated simultaneously?
> |
> | Steve.
>
> Hi,
>
> Yes, I think you're right. The existing code should work most of the
> time but has the potential to leak rgrp memory if the timing is right.
> We could approach the solution two ways:
>
> (1) We could change the shared lock to an exclusive lock.
That is not a good solution. Think about what happens when we have
multiple nodes, and grow has just finished on one... we want them all to
be able to update at the same time.

> (2) We could change function rgd_insert so that it returns an error if
> the rgrp was already in the rb_tree. That way, whoever gets the
> sd_rindex_spin spinlock first will call rgd_insert to insert the new
> rgrp into the rgrp tree, and when the second caller tries to insert
> its new rgrp into the rb_tree, it will find the entry already there,
> (inserted by the first caller), then take the error path and exit,
> freeing the rgrp it kmalloced.
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems

Yes, thats ok since we know that the rgrps will only grow and never
shrink, so if we find one already inserted, we know that we raced and
can free the new entry rather than adding it,

Steve.
 
Old 03-05-2012, 03:24 PM
Steven Whitehouse
 
Default GFS2: Eliminate sd_rindex_mutex

Hi,

Now in the -nmw tree. Thanks,

Steve.

On Mon, 2012-03-05 at 09:20 -0500, Bob Peterson wrote:
> ----- Original Message -----
> | Hi,
> |
> | Here is a replacement patch that implements solution #2 as
> | described above:
> |
> | Regards,
> |
> | Bob Peterson
> | Red Hat File Systems
> |
> | Signed-off-by: Bob Peterson <rpeterso@redhat.com>
>
> Hi,
>
> Ignore that last patch (try #2); I forgot to unlock the spin_lock in
> the error case. This replacement patch simplifies the code a bit.
>
> Regards,
>
> Bob Peterson
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> --
> GFS2: Eliminate sd_rindex_mutex
>
> Over time, we've slowly eliminated the use of sd_rindex_mutex.
> Up to this point, it was only used in two places: function
> gfs2_ri_total (which totals the file system size by reading
> and parsing the rindex file) and function gfs2_rindex_update
> which updates the rgrps in memory. Both of these functions have
> the rindex glock to protect them, so the rindex is unnecessary.
> Since gfs2_grow writes to the rindex via the meta_fs, the mutex
> is in the wrong order according to the normal rules. This patch
> eliminates the mutex entirely to avoid the problem.
>
> rhbz#798763
> --
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 4d546df..47d0bda 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -644,7 +644,6 @@ struct gfs2_sbd {
>
> int sd_rindex_uptodate;
> spinlock_t sd_rindex_spin;
> - struct mutex sd_rindex_mutex;
> struct rb_root sd_rindex_tree;
> unsigned int sd_rgrps;
> unsigned int sd_max_rg_data;
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index a55baa7..ae5e0a4 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -83,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
> spin_lock_init(&sdp->sd_statfs_spin);
>
> spin_lock_init(&sdp->sd_rindex_spin);
> - mutex_init(&sdp->sd_rindex_mutex);
> sdp->sd_rindex_tree.rb_node = NULL;
>
> INIT_LIST_HEAD(&sdp->sd_jindex_list);
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index e09370e..6ff9f17 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -540,7 +540,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
> struct file_ra_state ra_state;
> int error, rgrps;
>
> - mutex_lock(&sdp->sd_rindex_mutex);
> file_ra_state_init(&ra_state, inode->i_mapping);
> for (rgrps = 0;; rgrps++) {
> loff_t pos = rgrps * sizeof(struct gfs2_rindex);
> @@ -553,11 +552,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
> break;
> total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
> }
> - mutex_unlock(&sdp->sd_rindex_mutex);
> return total_data;
> }
>
> -static void rgd_insert(struct gfs2_rgrpd *rgd)
> +static int rgd_insert(struct gfs2_rgrpd *rgd)
> {
> struct gfs2_sbd *sdp = rgd->rd_sbd;
> struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
> @@ -573,11 +571,13 @@ static void rgd_insert(struct gfs2_rgrpd *rgd)
> else if (rgd->rd_addr > cur->rd_addr)
> newn = &((*newn)->rb_right);
> else
> - return;
> + return -EEXIST;
> }
>
> rb_link_node(&rgd->rd_node, parent, newn);
> rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
> + sdp->sd_rgrps++;
> + return 0;
> }
>
> /**
> @@ -631,10 +631,12 @@ static int read_rindex_entry(struct gfs2_inode *ip,
> if (rgd->rd_data > sdp->sd_max_rg_data)
> sdp->sd_max_rg_data = rgd->rd_data;
> spin_lock(&sdp->sd_rindex_spin);
> - rgd_insert(rgd);
> - sdp->sd_rgrps++;
> + error = rgd_insert(rgd);
> spin_unlock(&sdp->sd_rindex_spin);
> - return error;
> + if (!error)
> + return 0;
> +
> + error = 0; /* someone else read in the rgrp; free it and ignore it */
>
> fail:
> kfree(rgd->rd_bits);
> @@ -695,22 +697,18 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
>
> /* Read new copy from disk if we don't have the latest */
> if (!sdp->sd_rindex_uptodate) {
> - mutex_lock(&sdp->sd_rindex_mutex);
> if (!gfs2_glock_is_locked_by_me(gl)) {
> error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
> if (error)
> - goto out_unlock;
> + return error;
> unlock_required = 1;
> }
> if (!sdp->sd_rindex_uptodate)
> error = gfs2_ri_update(ip);
> if (unlock_required)
> gfs2_glock_dq_uninit(&ri_gh);
> -out_unlock:
> - mutex_unlock(&sdp->sd_rindex_mutex);
> }
>
> -
> return error;
> }
>
 
Old 03-19-2012, 09:25 AM
Steven Whitehouse
 
Default GFS2: Eliminate sd_rindex_mutex

From: Bob Peterson <rpeterso@redhat.com>

Over time, we've slowly eliminated the use of sd_rindex_mutex.
Up to this point, it was only used in two places: function
gfs2_ri_total (which totals the file system size by reading
and parsing the rindex file) and function gfs2_rindex_update
which updates the rgrps in memory. Both of these functions have
the rindex glock to protect them, so the rindex is unnecessary.
Since gfs2_grow writes to the rindex via the meta_fs, the mutex
is in the wrong order according to the normal rules. This patch
eliminates the mutex entirely to avoid the problem.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4d546df..47d0bda 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -644,7 +644,6 @@ struct gfs2_sbd {

int sd_rindex_uptodate;
spinlock_t sd_rindex_spin;
- struct mutex sd_rindex_mutex;
struct rb_root sd_rindex_tree;
unsigned int sd_rgrps;
unsigned int sd_max_rg_data;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a55baa7..ae5e0a4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -83,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
spin_lock_init(&sdp->sd_statfs_spin);

spin_lock_init(&sdp->sd_rindex_spin);
- mutex_init(&sdp->sd_rindex_mutex);
sdp->sd_rindex_tree.rb_node = NULL;

INIT_LIST_HEAD(&sdp->sd_jindex_list);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e09370e..6ff9f17 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -540,7 +540,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
struct file_ra_state ra_state;
int error, rgrps;

- mutex_lock(&sdp->sd_rindex_mutex);
file_ra_state_init(&ra_state, inode->i_mapping);
for (rgrps = 0;; rgrps++) {
loff_t pos = rgrps * sizeof(struct gfs2_rindex);
@@ -553,11 +552,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
break;
total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
}
- mutex_unlock(&sdp->sd_rindex_mutex);
return total_data;
}

-static void rgd_insert(struct gfs2_rgrpd *rgd)
+static int rgd_insert(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
@@ -573,11 +571,13 @@ static void rgd_insert(struct gfs2_rgrpd *rgd)
else if (rgd->rd_addr > cur->rd_addr)
newn = &((*newn)->rb_right);
else
- return;
+ return -EEXIST;
}

rb_link_node(&rgd->rd_node, parent, newn);
rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
+ sdp->sd_rgrps++;
+ return 0;
}

/**
@@ -631,10 +631,12 @@ static int read_rindex_entry(struct gfs2_inode *ip,
if (rgd->rd_data > sdp->sd_max_rg_data)
sdp->sd_max_rg_data = rgd->rd_data;
spin_lock(&sdp->sd_rindex_spin);
- rgd_insert(rgd);
- sdp->sd_rgrps++;
+ error = rgd_insert(rgd);
spin_unlock(&sdp->sd_rindex_spin);
- return error;
+ if (!error)
+ return 0;
+
+ error = 0; /* someone else read in the rgrp; free it and ignore it */

fail:
kfree(rgd->rd_bits);
@@ -695,22 +697,18 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)

/* Read new copy from disk if we don't have the latest */
if (!sdp->sd_rindex_uptodate) {
- mutex_lock(&sdp->sd_rindex_mutex);
if (!gfs2_glock_is_locked_by_me(gl)) {
error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
if (error)
- goto out_unlock;
+ return error;
unlock_required = 1;
}
if (!sdp->sd_rindex_uptodate)
error = gfs2_ri_update(ip);
if (unlock_required)
gfs2_glock_dq_uninit(&ri_gh);
-out_unlock:
- mutex_unlock(&sdp->sd_rindex_mutex);
}

-
return error;
}

--
1.7.4
 

Thread Tools




All times are GMT. The time now is 06:16 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org