Linux Archive

Linux Archive (http://www.linux-archive.org/)
-   Cluster Development (http://www.linux-archive.org/cluster-development/)
-   -   GFS2: Eliminate sd_rindex_mutex (http://www.linux-archive.org/cluster-development/640984-gfs2-eliminate-sd_rindex_mutex.html)

Bob Peterson 03-05-2012 12:49 PM

GFS2: Eliminate sd_rindex_mutex
 
----- Original Message -----
| ----- Original Message -----
| | Bearing in mind that the mutex is an exclusive lock and the glock
| | is
| | only a shared lock, do we have any other protection against the
| | rgrp
| | tree being updated simultaneously?
| |
| | Steve.
|
| Hi,
|
| Yes, I think you're right. The existing code should work most of the
| time but has the potential to leak rgrp memory if the timing is
| right.
| We could approach the solution two ways:
|
| (1) We could change the shared lock to an exclusive lock.
| (2) We could change function rgd_insert so that it returns an error
| if
| the rgrp was already in the rb_tree. That way, whoever gets the
| sd_rindex_spin spinlock first will call rgd_insert to insert the
| new
| rgrp into the rgrp tree, and when the second caller tries to
| insert
| its new rgrp into the rb_tree, it will find the entry already
| there,
| (inserted by the first caller), then take the error path and
| exit,
| freeing the rgrp it kmalloced.
|
| Regards,
|
| Bob Peterson
| Red Hat File Systems
Hi,

Here is a replacement patch that implements solution #2 as
described above:

Regards,

Bob Peterson
Red Hat File Systems

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
--
GFS2: Eliminate sd_rindex_mutex

Over time, we've slowly eliminated the use of sd_rindex_mutex.
Up to this point, it was only used in two places: function
gfs2_ri_total (which totals the file system size by reading
and parsing the rindex file) and function gfs2_rindex_update
which updates the rgrps in memory. Both of these functions have
the rindex glock to protect them, so the rindex is unnecessary.
Since gfs2_grow writes to the rindex via the meta_fs, the mutex
is in the wrong order according to the normal rules. This patch
eliminates the mutex entirely to avoid the problem.

rhbz#798763
--
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4d546df..47d0bda 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -644,7 +644,6 @@ struct gfs2_sbd {

int sd_rindex_uptodate;
spinlock_t sd_rindex_spin;
- struct mutex sd_rindex_mutex;
struct rb_root sd_rindex_tree;
unsigned int sd_rgrps;
unsigned int sd_max_rg_data;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a55baa7..ae5e0a4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -83,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
spin_lock_init(&sdp->sd_statfs_spin);

spin_lock_init(&sdp->sd_rindex_spin);
- mutex_init(&sdp->sd_rindex_mutex);
sdp->sd_rindex_tree.rb_node = NULL;

INIT_LIST_HEAD(&sdp->sd_jindex_list);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e09370e..0b844c8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -540,7 +540,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
struct file_ra_state ra_state;
int error, rgrps;

- mutex_lock(&sdp->sd_rindex_mutex);
file_ra_state_init(&ra_state, inode->i_mapping);
for (rgrps = 0;; rgrps++) {
loff_t pos = rgrps * sizeof(struct gfs2_rindex);
@@ -553,11 +552,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
break;
total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
}
- mutex_unlock(&sdp->sd_rindex_mutex);
return total_data;
}

-static void rgd_insert(struct gfs2_rgrpd *rgd)
+static int rgd_insert(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
@@ -573,11 +571,12 @@ static void rgd_insert(struct gfs2_rgrpd *rgd)
else if (rgd->rd_addr > cur->rd_addr)
newn = &((*newn)->rb_right);
else
- return;
+ return -EEXIST;
}

rb_link_node(&rgd->rd_node, parent, newn);
rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
+ return 0;
}

/**
@@ -631,7 +630,11 @@ static int read_rindex_entry(struct gfs2_inode *ip,
if (rgd->rd_data > sdp->sd_max_rg_data)
sdp->sd_max_rg_data = rgd->rd_data;
spin_lock(&sdp->sd_rindex_spin);
- rgd_insert(rgd);
+ error = rgd_insert(rgd);
+ if (error == -EEXIST) { /* someone else read the rgrp in; ignore it */
+ error = 0;
+ goto fail;
+ }
sdp->sd_rgrps++;
spin_unlock(&sdp->sd_rindex_spin);
return error;
@@ -695,22 +698,18 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)

/* Read new copy from disk if we don't have the latest */
if (!sdp->sd_rindex_uptodate) {
- mutex_lock(&sdp->sd_rindex_mutex);
if (!gfs2_glock_is_locked_by_me(gl)) {
error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
if (error)
- goto out_unlock;
+ return error;
unlock_required = 1;
}
if (!sdp->sd_rindex_uptodate)
error = gfs2_ri_update(ip);
if (unlock_required)
gfs2_glock_dq_uninit(&ri_gh);
-out_unlock:
- mutex_unlock(&sdp->sd_rindex_mutex);
}

-
return error;
}

Bob Peterson 03-05-2012 01:20 PM

GFS2: Eliminate sd_rindex_mutex
 
----- Original Message -----
| Hi,
|
| Here is a replacement patch that implements solution #2 as
| described above:
|
| Regards,
|
| Bob Peterson
| Red Hat File Systems
|
| Signed-off-by: Bob Peterson <rpeterso@redhat.com>

Hi,

Ignore that last patch (try #2); I forgot to unlock the spin_lock in
the error case. This replacement patch simplifies the code a bit.

Regards,

Bob Peterson
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
--
GFS2: Eliminate sd_rindex_mutex

Over time, we've slowly eliminated the use of sd_rindex_mutex.
Up to this point, it was only used in two places: function
gfs2_ri_total (which totals the file system size by reading
and parsing the rindex file) and function gfs2_rindex_update
which updates the rgrps in memory. Both of these functions have
the rindex glock to protect them, so the rindex is unnecessary.
Since gfs2_grow writes to the rindex via the meta_fs, the mutex
is in the wrong order according to the normal rules. This patch
eliminates the mutex entirely to avoid the problem.

rhbz#798763
--
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4d546df..47d0bda 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -644,7 +644,6 @@ struct gfs2_sbd {

int sd_rindex_uptodate;
spinlock_t sd_rindex_spin;
- struct mutex sd_rindex_mutex;
struct rb_root sd_rindex_tree;
unsigned int sd_rgrps;
unsigned int sd_max_rg_data;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a55baa7..ae5e0a4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -83,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
spin_lock_init(&sdp->sd_statfs_spin);

spin_lock_init(&sdp->sd_rindex_spin);
- mutex_init(&sdp->sd_rindex_mutex);
sdp->sd_rindex_tree.rb_node = NULL;

INIT_LIST_HEAD(&sdp->sd_jindex_list);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e09370e..6ff9f17 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -540,7 +540,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
struct file_ra_state ra_state;
int error, rgrps;

- mutex_lock(&sdp->sd_rindex_mutex);
file_ra_state_init(&ra_state, inode->i_mapping);
for (rgrps = 0;; rgrps++) {
loff_t pos = rgrps * sizeof(struct gfs2_rindex);
@@ -553,11 +552,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
break;
total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
}
- mutex_unlock(&sdp->sd_rindex_mutex);
return total_data;
}

-static void rgd_insert(struct gfs2_rgrpd *rgd)
+static int rgd_insert(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
@@ -573,11 +571,13 @@ static void rgd_insert(struct gfs2_rgrpd *rgd)
else if (rgd->rd_addr > cur->rd_addr)
newn = &((*newn)->rb_right);
else
- return;
+ return -EEXIST;
}

rb_link_node(&rgd->rd_node, parent, newn);
rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
+ sdp->sd_rgrps++;
+ return 0;
}

/**
@@ -631,10 +631,12 @@ static int read_rindex_entry(struct gfs2_inode *ip,
if (rgd->rd_data > sdp->sd_max_rg_data)
sdp->sd_max_rg_data = rgd->rd_data;
spin_lock(&sdp->sd_rindex_spin);
- rgd_insert(rgd);
- sdp->sd_rgrps++;
+ error = rgd_insert(rgd);
spin_unlock(&sdp->sd_rindex_spin);
- return error;
+ if (!error)
+ return 0;
+
+ error = 0; /* someone else read in the rgrp; free it and ignore it */

fail:
kfree(rgd->rd_bits);
@@ -695,22 +697,18 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)

/* Read new copy from disk if we don't have the latest */
if (!sdp->sd_rindex_uptodate) {
- mutex_lock(&sdp->sd_rindex_mutex);
if (!gfs2_glock_is_locked_by_me(gl)) {
error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
if (error)
- goto out_unlock;
+ return error;
unlock_required = 1;
}
if (!sdp->sd_rindex_uptodate)
error = gfs2_ri_update(ip);
if (unlock_required)
gfs2_glock_dq_uninit(&ri_gh);
-out_unlock:
- mutex_unlock(&sdp->sd_rindex_mutex);
}

-
return error;
}


All times are GMT. The time now is 12:14 PM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.