FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Device-mapper Development

 
 
LinkBack Thread Tools
 
Old 04-28-2012, 04:44 AM
Mike Snitzer
 
Default dm thin: support for non power of 2 pool blocksize

Non power of 2 blocksize support is needed to properly align thinp IO
on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).

Use do_div wrappers to support non power of 2 blocksize for the pool's
data device. do_div provides comparable performance to the power of 2
math that was performed until now (as tested on modern x86_64 hardware).

Verify that the pool's blocksize is a multiple of 64K and that the
pool's data device is a multiple of blocksize.

Eliminate pool structure's 'sectors_per_block', 'block_shift' and
remaining 4 byte holes.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-thin.c | 56 +++++++++++++++++++++++++++++++++----------------
1 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ce7dd80..91644b4 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -507,10 +507,8 @@ struct pool {
struct block_device *md_dev;
struct dm_pool_metadata *pmd;

- uint32_t sectors_per_block;
- unsigned block_shift;
- dm_block_t offset_mask;
dm_block_t low_water_blocks;
+ uint32_t sectors_per_block;

struct pool_features pf;
unsigned low_water_triggered:1; /* A dm event has been sent */
@@ -523,8 +521,8 @@ struct pool {
struct work_struct worker;
struct delayed_work waker;

- unsigned ref_count;
unsigned long last_commit_jiffies;
+ unsigned ref_count;

spinlock_t lock;
struct bio_list deferred_bios;
@@ -673,9 +671,27 @@ static void requeue_io(struct thin_c *tc)
* target.
*/

+/*
+ * do_div wrappers that don't modify the dividend
+ */
+static inline sector_t dm_thin_do_div(sector_t a, __u32 b)
+{
+ sector_t r = a;
+
+ do_div(r, b);
+ return r;
+}
+
+static inline sector_t dm_thin_do_mod(sector_t a, __u32 b)
+{
+ sector_t tmp = a;
+
+ return do_div(tmp, b);
+}
+
static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
{
- return bio->bi_sector >> tc->pool->block_shift;
+ return dm_thin_do_div(bio->bi_sector, tc->pool->sectors_per_block);
}

static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
@@ -683,8 +699,8 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
struct pool *pool = tc->pool;

bio->bi_bdev = tc->pool_dev->bdev;
- bio->bi_sector = (block << pool->block_shift) +
- (bio->bi_sector & pool->offset_mask);
+ bio->bi_sector = (block * pool->sectors_per_block) +
+ dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block);
}

static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -929,9 +945,8 @@ static void process_prepared(struct pool *pool, struct list_head *head,
*/
static int io_overlaps_block(struct pool *pool, struct bio *bio)
{
- return !(bio->bi_sector & pool->offset_mask) &&
+ return !dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block) &&
(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
-
}

static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1234,8 +1249,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
* part of the discard that is in a subsequent
* block.
*/
- sector_t offset = bio->bi_sector - (block << pool->block_shift);
- unsigned remaining = (pool->sectors_per_block - offset) << 9;
+ sector_t offset = bio->bi_sector - (block * pool->sectors_per_block);
+ unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT;
bio->bi_size = min(bio->bi_size, remaining);

cell_release_singleton(cell, bio);
@@ -1696,8 +1711,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,

pool->pmd = pmd;
pool->sectors_per_block = block_size;
- pool->block_shift = ffs(block_size) - 1;
- pool->offset_mask = block_size - 1;
pool->low_water_blocks = 0;
pool_features_init(&pool->pf);
pool->prison = prison_create(PRISON_CELLS);
@@ -1941,12 +1954,18 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
- !is_power_of_2(block_size)) {
+ dm_thin_do_mod(block_size, DATA_DEV_BLOCK_SIZE_MIN_SECTORS)) {
ti->error = "Invalid block size";
r = -EINVAL;
goto out;
}

+ if (dm_thin_do_mod(ti->len, block_size)) {
+ ti->error = "Data device is not a multiple of block size";
+ r = -EINVAL;
+ goto out;
+ }
+
if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
ti->error = "Invalid low water mark";
r = -EINVAL;
@@ -2089,7 +2108,7 @@ static int pool_preresume(struct dm_target *ti)
if (r)
return r;

- data_size = ti->len >> pool->block_shift;
+ data_size = dm_thin_do_div(ti->len, pool->sectors_per_block);
r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
if (r) {
DMERR("failed to retrieve data device size");
@@ -2709,17 +2728,18 @@ static int thin_iterate_devices(struct dm_target *ti,
{
dm_block_t blocks;
struct thin_c *tc = ti->private;
+ struct pool *pool = tc->pool;

/*
* We can't call dm_pool_get_data_dev_size() since that blocks. So
* we follow a more convoluted path through to the pool's target.
*/
- if (!tc->pool->ti)
+ if (!pool->ti)
return 0; /* nothing is bound */

- blocks = tc->pool->ti->len >> tc->pool->block_shift;
+ blocks = dm_thin_do_div(pool->ti->len, pool->sectors_per_block);
if (blocks)
- return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
+ return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);

return 0;
}
--
1.7.1

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 04-30-2012, 09:55 AM
Joe Thornber
 
Default dm thin: support for non power of 2 pool blocksize

Hi Mike,

In general this looks good. A lot cleaner now you've dropped the
specialisation of the division. A few nit-picks below.

- Joe

On Sat, Apr 28, 2012 at 12:44:29AM -0400, Mike Snitzer wrote:
> +/*
> + * do_div wrappers that don't modify the dividend
> + */
> +static inline sector_t dm_thin_do_div(sector_t a, __u32 b)
> +{
> + sector_t r = a;
> +
> + do_div(r, b);
> + return r;
> +}
> +
> +static inline sector_t dm_thin_do_mod(sector_t a, __u32 b)
> +{
> + sector_t tmp = a;
> +
> + return do_div(tmp, b);
> +}

Please don't inline static functions. Let the compiler make the
decision.

Also those sector_t's are passed by value, so you don't need to
declare r or tmp. eg, it's enough to do this:

static sector_t dm_thin_do_div(sector_t a, __u32 b)
{
do_div(a, b);
return a;
}

static sector_t dm_thin_do_mod(sector_t a, __u32 b)
{
return do_div(a, b);
}

> @@ -1941,12 +1954,18 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)

...

> + if (dm_thin_do_mod(ti->len, block_size)) {
> + ti->error = "Data device is not a multiple of block size";
> + r = -EINVAL;
> + goto out;
> + }

I don't see the need for this check. If I have a disk that isn't a
multiple of the block size why should I have to layer a linear mapping
on it to truncate it before I can use it as a data volume? Any
partial block at the end of the device is already ignored (see the
data_size calculation in pool_preresume). Is this restriction causing
some of the changes you made to the test-suite?

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 04-30-2012, 05:33 PM
Mike Snitzer
 
Default dm thin: support for non power of 2 pool blocksize

On Mon, Apr 30 2012 at 5:55am -0400,
Joe Thornber <thornber@redhat.com> wrote:

> Hi Mike,
>
> In general this looks good. A lot cleaner now you've dropped the
> specialisation of the division. A few nit-picks below.
>
> - Joe
>
> On Sat, Apr 28, 2012 at 12:44:29AM -0400, Mike Snitzer wrote:
> > +/*
> > + * do_div wrappers that don't modify the dividend
> > + */
> > +static inline sector_t dm_thin_do_div(sector_t a, __u32 b)
> > +{
> > + sector_t r = a;
> > +
> > + do_div(r, b);
> > + return r;
> > +}
> > +
> > +static inline sector_t dm_thin_do_mod(sector_t a, __u32 b)
> > +{
> > + sector_t tmp = a;
> > +
> > + return do_div(tmp, b);
> > +}
>
> Please don't inline static functions. Let the compiler make the
> decision.

But in this instance we certainly want it inlined; regardless of whether
the compiler might do it anyway. Why would we ever want to allow the
compiler to not inline it?

> Also those sector_t's are passed by value, so you don't need to
> declare r or tmp. eg, it's enough to do this:
>
> static sector_t dm_thin_do_div(sector_t a, __u32 b)
> {
> do_div(a, b);
> return a;
> }
>
> static sector_t dm_thin_do_mod(sector_t a, __u32 b)
> {
> return do_div(a, b);
> }

OK, makes sense.

> > @@ -1941,12 +1954,18 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
>
> ...
>
> > + if (dm_thin_do_mod(ti->len, block_size)) {
> > + ti->error = "Data device is not a multiple of block size";
> > + r = -EINVAL;
> > + goto out;
> > + }
>
> I don't see the need for this check. If I have a disk that isn't a
> multiple of the block size why should I have to layer a linear mapping
> on it to truncate it before I can use it as a data volume? Any
> partial block at the end of the device is already ignored (see the
> data_size calculation in pool_preresume). Is this restriction causing
> some of the changes you made to the test-suite?

Yes. But imposing this restriction was motivated by and the "attempt to
access beyond end of device" failures I saw with test_origin_unchanged
(I shared more details about this in private mail to you and kabi on
4/25 -- making the pool size a multiple of the non power of 2 blocksize
resolved that failure).

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 05-01-2012, 09:41 AM
Joe Thornber
 
Default dm thin: support for non power of 2 pool blocksize

On Mon, Apr 30, 2012 at 01:33:47PM -0400, Mike Snitzer wrote:
> Yes. But imposing this restriction was motivated by and the "attempt to
> access beyond end of device" failures I saw with test_origin_unchanged
> (I shared more details about this in private mail to you and kabi on
> 4/25 -- making the pool size a multiple of the non power of 2 blocksize
> resolved that failure).

This is the wrong fix; you're just hiding the real issue.

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 05-05-2012, 03:57 AM
Mike Snitzer
 
Default dm thin: support for non power of 2 pool blocksize

Non power of 2 blocksize support is needed to properly align thinp IO
on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).

Use do_div wrappers to support non power of 2 blocksize for the pool's
data device. do_div provides comparable performance to the power of 2
math that was performed until now (as tested on modern x86_64 hardware).

Eliminate pool structure's 'sectors_per_block', 'block_shift' and
remaining 4 byte holes.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-thin.c | 32 ++++++++++++++------------------
1 file changed, 14 insertions(+), 18 deletions(-)

Index: linux/drivers/md/dm-thin.c
================================================== =================
--- linux.orig/drivers/md/dm-thin.c
+++ linux/drivers/md/dm-thin.c
@@ -507,10 +507,8 @@ struct pool {
struct block_device *md_dev;
struct dm_pool_metadata *pmd;

- uint32_t sectors_per_block;
- unsigned block_shift;
- dm_block_t offset_mask;
dm_block_t low_water_blocks;
+ uint32_t sectors_per_block;

struct pool_features pf;
unsigned low_water_triggered:1; /* A dm event has been sent */
@@ -523,8 +521,8 @@ struct pool {
struct work_struct worker;
struct delayed_work waker;

- unsigned ref_count;
unsigned long last_commit_jiffies;
+ unsigned ref_count;

spinlock_t lock;
struct bio_list deferred_bios;
@@ -675,7 +673,7 @@ static void requeue_io(struct thin_c *tc

static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
{
- return bio->bi_sector >> tc->pool->block_shift;
+ return dm_do_div(bio->bi_sector, tc->pool->sectors_per_block);
}

static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
@@ -683,8 +681,8 @@ static void remap(struct thin_c *tc, str
struct pool *pool = tc->pool;

bio->bi_bdev = tc->pool_dev->bdev;
- bio->bi_sector = (block << pool->block_shift) +
- (bio->bi_sector & pool->offset_mask);
+ bio->bi_sector = (block * pool->sectors_per_block) +
+ dm_do_mod(bio->bi_sector, pool->sectors_per_block);
}

static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -929,9 +927,8 @@ static void process_prepared(struct pool
*/
static int io_overlaps_block(struct pool *pool, struct bio *bio)
{
- return !(bio->bi_sector & pool->offset_mask) &&
+ return !dm_do_mod(bio->bi_sector, pool->sectors_per_block) &&
(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
-
}

static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1234,8 +1231,8 @@ static void process_discard(struct thin_
* part of the discard that is in a subsequent
* block.
*/
- sector_t offset = bio->bi_sector - (block << pool->block_shift);
- unsigned remaining = (pool->sectors_per_block - offset) << 9;
+ sector_t offset = bio->bi_sector - (block * pool->sectors_per_block);
+ unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT;
bio->bi_size = min(bio->bi_size, remaining);

cell_release_singleton(cell, bio);
@@ -1696,8 +1693,6 @@ static struct pool *pool_create(struct m

pool->pmd = pmd;
pool->sectors_per_block = block_size;
- pool->block_shift = ffs(block_size) - 1;
- pool->offset_mask = block_size - 1;
pool->low_water_blocks = 0;
pool_features_init(&pool->pf);
pool->prison = prison_create(PRISON_CELLS);
@@ -1941,7 +1936,7 @@ static int pool_ctr(struct dm_target *ti
if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
- !is_power_of_2(block_size)) {
+ dm_do_mod(block_size, DATA_DEV_BLOCK_SIZE_MIN_SECTORS)) {
ti->error = "Invalid block size";
r = -EINVAL;
goto out;
@@ -2089,7 +2084,7 @@ static int pool_preresume(struct dm_targ
if (r)
return r;

- data_size = ti->len >> pool->block_shift;
+ data_size = dm_do_div(ti->len, pool->sectors_per_block);
r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
if (r) {
DMERR("failed to retrieve data device size");
@@ -2711,17 +2706,18 @@ static int thin_iterate_devices(struct d
{
dm_block_t blocks;
struct thin_c *tc = ti->private;
+ struct pool *pool = tc->pool;

/*
* We can't call dm_pool_get_data_dev_size() since that blocks. So
* we follow a more convoluted path through to the pool's target.
*/
- if (!tc->pool->ti)
+ if (!pool->ti)
return 0; /* nothing is bound */

- blocks = tc->pool->ti->len >> tc->pool->block_shift;
+ blocks = dm_do_div(pool->ti->len, pool->sectors_per_block);
if (blocks)
- return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
+ return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);

return 0;
}

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 

Thread Tools




All times are GMT. The time now is 12:06 PM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org