Linux Archive

Linux Archive (http://www.linux-archive.org/)
-   Device-mapper Development (http://www.linux-archive.org/device-mapper-development/)
-   -   block: Rework bio splitting (http://www.linux-archive.org/device-mapper-development/668065-block-rework-bio-splitting.html)

05-18-2012 02:59 AM

block: Rework bio splitting
 
From: Kent Overstreet <koverstreet@google.com>

Break out bio_split() and bio_pair_split(), and get rid of the
limitations of bio_split()

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
drivers/block/drbd/drbd_req.c | 18 +----
drivers/block/pktcdvd.c | 6 +-
drivers/block/rbd.c | 4 +-
drivers/md/linear.c | 6 +-
drivers/md/raid0.c | 8 +-
drivers/md/raid10.c | 23 ++-----
fs/bio-integrity.c | 44 ------------
fs/bio.c | 149 ++++++++++++++++++++++++++---------------
include/linux/bio.h | 27 +++-----
9 files changed, 125 insertions(+), 160 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 4a0f314..68fa494 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1101,18 +1101,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
if (likely(s_enr == e_enr)) {
inc_ap_bio(mdev, 1);
drbd_make_request_common(mdev, bio, start_time);
- return;
- }
-
- /* can this bio be split generically?
- * Maybe add our own split-arbitrary-bios function. */
- if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
- /* rather error out here than BUG in bio_split */
- dev_err(DEV, "bio would need to, but cannot, be split: "
- "(vcnt=%u,idx=%u,size=%u,sector=%llu)
",
- bio->bi_vcnt, bio->bi_idx, bio->bi_size,
- (unsigned long long)bio->bi_sector);
- bio_endio(bio, -EINVAL);
} else {
/* This bio crosses some boundary, so we have to split it. */
struct bio_pair *bp;
@@ -1128,7 +1116,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
const int sps = 1 << HT_SHIFT; /* sectors per slot */
const int mask = sps - 1;
const sector_t first_sectors = sps - (sect & mask);
- bp = bio_split(bio, first_sectors);
+ bp = bio_pair_split(bio, first_sectors);

/* we need to get a "reference count" (ap_bio_cnt)
* to avoid races with the disconnect/reconnect/suspend code.
@@ -1139,10 +1127,10 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)

D_ASSERT(e_enr == s_enr + 1);

- while (drbd_make_request_common(mdev, &bp->bio1, start_time))
+ while (drbd_make_request_common(mdev, &bp->split, start_time))
inc_ap_bio(mdev, 1);

- while (drbd_make_request_common(mdev, &bp->bio2, start_time))
+ while (drbd_make_request_common(mdev, bio, start_time))
inc_ap_bio(mdev, 1);

dec_ap_bio(mdev);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 6fe693a..1465155 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2467,10 +2467,10 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
if (last_zone != zone) {
BUG_ON(last_zone != zone + pd->settings.size);
first_sectors = last_zone - bio->bi_sector;
- bp = bio_split(bio, first_sectors);
+ bp = bio_pair_split(bio, first_sectors);
BUG_ON(!bp);
- pkt_make_request(q, &bp->bio1);
- pkt_make_request(q, &bp->bio2);
+ pkt_make_request(q, &bp->split);
+ pkt_make_request(q, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5a953c6..dd19311 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -747,11 +747,11 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,

/* split the bio. We'll release it either in the next
call, or it will have to be released outside */
- bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
+ bp = bio_pair_split(old_chain, (len - total) / SECTOR_SIZE);
if (!bp)
goto err_out;

- *next = &bp->bio2;
+ *next = &bp->split;
} else
*next = old_chain->bi_next;

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index fa211d8..7c6cafd 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -314,10 +314,10 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)

rcu_read_unlock();

- bp = bio_split(bio, end_sector - bio->bi_sector);
+ bp = bio_pair_split(bio, end_sector - bio->bi_sector);

- linear_make_request(mddev, &bp->bio1);
- linear_make_request(mddev, &bp->bio2);
+ linear_make_request(mddev, &bp->split);
+ linear_make_request(mddev, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index de63a1f..3469adf 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -516,13 +516,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
* refuse to split for us, so we need to split it.
*/
if (likely(is_power_of_2(chunk_sects)))
- bp = bio_split(bio, chunk_sects - (sector &
+ bp = bio_pair_split(bio, chunk_sects - (sector &
(chunk_sects-1)));
else
- bp = bio_split(bio, chunk_sects -
+ bp = bio_pair_split(bio, chunk_sects -
sector_div(sector, chunk_sects));
- raid0_make_request(mddev, &bp->bio1);
- raid0_make_request(mddev, &bp->bio2);
+ raid0_make_request(mddev, &bp->split);
+ raid0_make_request(mddev, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3e7b154..0062326 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1001,15 +1001,9 @@ static void make_request(struct mddev *mddev, struct bio * bio)
> chunk_sects &&
conf->near_copies < conf->raid_disks)) {
struct bio_pair *bp;
- /* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1 ||
- bio->bi_idx != 0)
- goto bad_map;
- /* This is a one page bio that upper layers
- * refuse to split for us, so we need to split it.
- */
- bp = bio_split(bio,
- chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
+
+ bp = bio_pair_split(bio,
+ chunk_sects - (bio->bi_sector & (chunk_sects - 1)));

/* Each of these 'make_request' calls will call 'wait_barrier'.
* If the first succeeds but the second blocks due to the resync
@@ -1023,8 +1017,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
conf->nr_waiting++;
spin_unlock_irq(&conf->resync_lock);

- make_request(mddev, &bp->bio1);
- make_request(mddev, &bp->bio2);
+ make_request(mddev, &bp->split);
+ make_request(mddev, bio);

spin_lock_irq(&conf->resync_lock);
conf->nr_waiting--;
@@ -1033,13 +1027,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)

bio_pair_release(bp);
return;
- bad_map:
- printk("md/raid10:%s: make_request bug: can't convert block across chunks"
- " or bigger than %dk %llu %d
", mdname(mddev), chunk_sects/2,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
- bio_io_error(bio);
- return;
}

md_write_start(mddev, bio);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index e85c04b..9ed2c44 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -682,50 +682,6 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset,
EXPORT_SYMBOL(bio_integrity_trim);

/**
- * bio_integrity_split - Split integrity metadata
- * @bio: Protected bio
- * @bp: Resulting bio_pair
- * @sectors: Offset
- *
- * Description: Splits an integrity page into a bio_pair.
- */
-void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
-{
- struct blk_integrity *bi;
- struct bio_integrity_payload *bip = bio->bi_integrity;
- unsigned int nr_sectors;
-
- if (bio_integrity(bio) == 0)
- return;
-
- bi = bdev_get_integrity(bio->bi_bdev);
- BUG_ON(bi == NULL);
- BUG_ON(bip->bip_vcnt != 1);
-
- nr_sectors = bio_integrity_hw_sectors(bi, sectors);
-
- bp->bio1.bi_integrity = &bp->bip1;
- bp->bio2.bi_integrity = &bp->bip2;
-
- bp->iv1 = bip->bip_vec[0];
- bp->iv2 = bip->bip_vec[0];
-
- bp->bip1.bip_vec[0] = bp->iv1;
- bp->bip2.bip_vec[0] = bp->iv2;
-
- bp->iv1.bv_len = sectors * bi->tuple_size;
- bp->iv2.bv_offset += sectors * bi->tuple_size;
- bp->iv2.bv_len -= sectors * bi->tuple_size;
-
- bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
- bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
-
- bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
- bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
-}
-EXPORT_SYMBOL(bio_integrity_split);
-
-/**
* bio_integrity_clone - Callback for cloning bios with integrity metadata
* @bio: New bio
* @bio_src: Original bio
diff --git a/fs/bio.c b/fs/bio.c
index 3332800..781a8cf 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -35,7 +35,7 @@
*/
#define BIO_INLINE_VECS 4

-static mempool_t *bio_split_pool __read_mostly;
+static struct bio_set *bio_split_pool __read_mostly;

/*
* if you change this list, also change bvec_alloc or things will
@@ -1464,84 +1464,124 @@ void bio_endio(struct bio *bio, int error)
}
EXPORT_SYMBOL(bio_endio);

-void bio_pair_release(struct bio_pair *bp)
+struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs)
{
- if (atomic_dec_and_test(&bp->cnt)) {
- struct bio *master = bp->bio1.bi_private;
+ unsigned idx, vcnt = 0, nbytes = sectors << 9;
+ struct bio_vec *bv;
+ struct bio *ret = NULL;
+
+ BUG_ON(sectors <= 0);
+
+ if (current->bio_list)
+ gfp &= ~__GFP_WAIT;
+
+ if (nbytes >= bio->bi_size)
+ return bio;
+
+ trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
+ bio->bi_sector + sectors);
+
+ bio_for_each_segment(bv, bio, idx) {
+ vcnt = idx - bio->bi_idx;
+
+ if (!nbytes) {
+ ret = bio_alloc_bioset(gfp, 0, bs);
+ if (!ret)
+ return NULL;
+
+ ret->bi_io_vec = bio_iovec(bio);
+ ret->bi_flags |= 1 << BIO_CLONED;
+ break;
+ } else if (nbytes < bv->bv_len) {
+ ret = bio_alloc_bioset(gfp, ++vcnt, bs);
+ if (!ret)
+ return NULL;
+
+ memcpy(ret->bi_io_vec, bio_iovec(bio),
+ sizeof(struct bio_vec) * vcnt);
+
+ ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
+ bv->bv_offset += nbytes;
+ bv->bv_len -= nbytes;
+ break;
+ }
+
+ nbytes -= bv->bv_len;
+ }
+
+ ret->bi_bdev = bio->bi_bdev;
+ ret->bi_sector = bio->bi_sector;
+ ret->bi_size = sectors << 9;
+ ret->bi_rw = bio->bi_rw;
+ ret->bi_vcnt = vcnt;
+ ret->bi_max_vecs = vcnt;
+ ret->bi_end_io = bio->bi_end_io;
+ ret->bi_private = bio->bi_private;

- bio_endio(master, bp->error);
- mempool_free(bp, bp->bio2.bi_private);
+ bio->bi_sector += sectors;
+ bio->bi_size -= sectors << 9;
+ bio->bi_idx = idx;
+
+ if (bio_integrity(bio)) {
+ bio_integrity_clone(ret, bio, gfp, bs);
+ bio_integrity_trim(ret, 0, bio_sectors(ret));
+ bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
}
+
+ return ret;
}
-EXPORT_SYMBOL(bio_pair_release);
+EXPORT_SYMBOL_GPL(bio_split);

-static void bio_pair_end_1(struct bio *bi, int err)
+void bio_pair_release(struct bio_pair *bp)
{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
-
- if (err)
- bp->error = err;
+ if (atomic_dec_and_test(&bp->cnt)) {
+ bp->orig->bi_end_io = bp->bi_end_io;
+ bp->orig->bi_private = bp->bi_private;

- bio_pair_release(bp);
+ bio_endio(bp->orig, 0);
+ bio_put(&bp->split);
+ }
}
+EXPORT_SYMBOL(bio_pair_release);

-static void bio_pair_end_2(struct bio *bi, int err)
+static void bio_pair_end(struct bio *bio, int error)
{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
+ struct bio_pair *bp = bio->bi_private;

- if (err)
- bp->error = err;
+ if (error)
+ clear_bit(BIO_UPTODATE, &bp->orig->bi_flags);

bio_pair_release(bp);
}

-/*
- * split a bio - only worry about a bio with a single page in its iovec
- */
-struct bio_pair *bio_split(struct bio *bi, int first_sectors)
+struct bio_pair *bio_pair_split(struct bio *bio, int first_sectors)
{
- struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
-
- if (!bp)
- return bp;
+ struct bio_pair *bp;
+ struct bio *split = bio_split(bio, first_sectors,
+ GFP_NOIO, bio_split_pool);

- trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
- bi->bi_sector + first_sectors);
-
- BUG_ON(bi->bi_vcnt != 1);
- BUG_ON(bi->bi_idx != 0);
- atomic_set(&bp->cnt, 3);
- bp->error = 0;
- bp->bio1 = *bi;
- bp->bio2 = *bi;
- bp->bio2.bi_sector += first_sectors;
- bp->bio2.bi_size -= first_sectors << 9;
- bp->bio1.bi_size = first_sectors << 9;
+ if (!split)
+ return NULL;

- bp->bv1 = bi->bi_io_vec[0];
- bp->bv2 = bi->bi_io_vec[0];
- bp->bv2.bv_offset += first_sectors << 9;
- bp->bv2.bv_len -= first_sectors << 9;
- bp->bv1.bv_len = first_sectors << 9;
+ BUG_ON(split == bio);

- bp->bio1.bi_io_vec = &bp->bv1;
- bp->bio2.bi_io_vec = &bp->bv2;
+ bp = container_of(split, struct bio_pair, split);

- bp->bio1.bi_max_vecs = 1;
- bp->bio2.bi_max_vecs = 1;
+ atomic_set(&bp->cnt, 3);

- bp->bio1.bi_end_io = bio_pair_end_1;
- bp->bio2.bi_end_io = bio_pair_end_2;
+ bp->bi_end_io = bio->bi_end_io;
+ bp->bi_private = bio->bi_private;

- bp->bio1.bi_private = bi;
- bp->bio2.bi_private = bio_split_pool;
+ bio->bi_private = bp;
+ bio->bi_end_io = bio_pair_end;

- if (bio_integrity(bi))
- bio_integrity_split(bi, bp, first_sectors);
+ split->bi_private = bp;
+ split->bi_end_io = bio_pair_end;

return bp;
}
-EXPORT_SYMBOL(bio_split);
+EXPORT_SYMBOL(bio_pair_split);

/**
* bio_sector_offset - Find hardware sector offset in bio
@@ -1694,8 +1734,7 @@ static int __init init_bio(void)
if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
panic("bio: can't create integrity pool
");

- bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
- sizeof(struct bio_pair));
+ bio_split_pool = bioset_create(BIO_POOL_SIZE, offsetof(struct bio_pair, split));
if (!bio_split_pool)
panic("bio: can't create split pool
");

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 35f7c4d..db38175 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -197,16 +197,18 @@ struct bio_integrity_payload {
* in bio2.bi_private
*/
struct bio_pair {
- struct bio bio1, bio2;
- struct bio_vec bv1, bv2;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
- struct bio_integrity_payload bip1, bip2;
- struct bio_vec iv1, iv2;
-#endif
- atomic_t cnt;
- int error;
+ atomic_t cnt;
+
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+
+ struct bio *orig;
+ struct bio split;
};
-extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
+
+extern struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs);
+extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors);
extern void bio_pair_release(struct bio_pair *dbio);

extern struct bio_set *bioset_create(unsigned int, unsigned int);
@@ -511,7 +513,6 @@ extern int bio_integrity_prep(struct bio *);
extern void bio_integrity_endio(struct bio *, int);
extern void bio_integrity_advance(struct bio *, unsigned int);
extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
-extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
extern int bioset_integrity_create(struct bio_set *, int);
extern void bioset_integrity_free(struct bio_set *);
@@ -555,12 +556,6 @@ static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0;
}

-static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
- int sectors)
-{
- return;
-}
-
static inline void bio_integrity_advance(struct bio *bio,
unsigned int bytes_done)
{
--
1.7.9.rc2

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Tejun Heo 05-18-2012 05:07 PM

block: Rework bio splitting
 
Hello,

On Thu, May 17, 2012 at 10:59:57PM -0400, koverstreet@google.com wrote:
> From: Kent Overstreet <koverstreet@google.com>
>
> Break out bio_split() and bio_pair_split(), and get rid of the
> limitations of bio_split()

Again, what are those limitations being removed and why and at what
cost?

> diff --git a/fs/bio.c b/fs/bio.c
> index 3332800..781a8cf 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -35,7 +35,7 @@
> */
> #define BIO_INLINE_VECS 4
>
> -static mempool_t *bio_split_pool __read_mostly;
> +static struct bio_set *bio_split_pool __read_mostly;
>
> /*
> * if you change this list, also change bvec_alloc or things will
> @@ -1464,84 +1464,124 @@ void bio_endio(struct bio *bio, int error)
> }
> EXPORT_SYMBOL(bio_endio);
>

Please add /** comment documenting the function.

> -void bio_pair_release(struct bio_pair *bp)
> +struct bio *bio_split(struct bio *bio, int sectors,
> + gfp_t gfp, struct bio_set *bs)
> {
> - if (atomic_dec_and_test(&bp->cnt)) {
> - struct bio *master = bp->bio1.bi_private;
> + unsigned idx, vcnt = 0, nbytes = sectors << 9;
> + struct bio_vec *bv;
> + struct bio *ret = NULL;

Maybe naming it @split is better?

> +
> + BUG_ON(sectors <= 0);
> +
> + if (current->bio_list)
> + gfp &= ~__GFP_WAIT;

Please explain what this means.

> + if (nbytes >= bio->bi_size)
> + return bio;
> +
> + trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
> + bio->bi_sector + sectors);
> +
> + bio_for_each_segment(bv, bio, idx) {
> + vcnt = idx - bio->bi_idx;
> +
> + if (!nbytes) {
> + ret = bio_alloc_bioset(gfp, 0, bs);
> + if (!ret)
> + return NULL;
> +
> + ret->bi_io_vec = bio_iovec(bio);
> + ret->bi_flags |= 1 << BIO_CLONED;
> + break;
> + } else if (nbytes < bv->bv_len) {
> + ret = bio_alloc_bioset(gfp, ++vcnt, bs);
> + if (!ret)
> + return NULL;
> +
> + memcpy(ret->bi_io_vec, bio_iovec(bio),
> + sizeof(struct bio_vec) * vcnt);
> +
> + ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
> + bv->bv_offset += nbytes;
> + bv->bv_len -= nbytes;

Please don't indent assignments.

> + break;
> + }
> +
> + nbytes -= bv->bv_len;
> + }

I find the code a bit confusing. Wouldn't it be better to structure
it as

bio_for_each_segment() {
find splitting point;
}

Do all of the splitting.

> + ret->bi_bdev = bio->bi_bdev;
> + ret->bi_sector = bio->bi_sector;
> + ret->bi_size = sectors << 9;
> + ret->bi_rw = bio->bi_rw;
> + ret->bi_vcnt = vcnt;
> + ret->bi_max_vecs = vcnt;
> + ret->bi_end_io = bio->bi_end_io;
> + ret->bi_private = bio->bi_private;
>
> - bio_endio(master, bp->error);
> - mempool_free(bp, bp->bio2.bi_private);
> + bio->bi_sector += sectors;
> + bio->bi_size -= sectors << 9;
> + bio->bi_idx = idx;

I personally would prefer not having indentations here either.

> + if (bio_integrity(bio)) {
> + bio_integrity_clone(ret, bio, gfp, bs);
> + bio_integrity_trim(ret, 0, bio_sectors(ret));
> + bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
> }
> +
> + return ret;
> }
> -EXPORT_SYMBOL(bio_pair_release);
> +EXPORT_SYMBOL_GPL(bio_split);

/** comment would be nice.

> -static void bio_pair_end_1(struct bio *bi, int err)
> +void bio_pair_release(struct bio_pair *bp)
> {
> - struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
> -
> - if (err)
> - bp->error = err;
> + if (atomic_dec_and_test(&bp->cnt)) {
> + bp->orig->bi_end_io = bp->bi_end_io;
> + bp->orig->bi_private = bp->bi_private;

So, before, split wouldn't override orig->bi_private. Now, it does so
while the bio is in flight. I don't know. If the only benefit of
temporary override is avoiding have a separate end_io call, I think
not doing so is better. Also, behavior changes as subtle as this
*must* be noted in the patch description.

> - bio_pair_release(bp);
> + bio_endio(bp->orig, 0);
> + bio_put(&bp->split);
> + }
> }
> +EXPORT_SYMBOL(bio_pair_release);

And it would be super-nice if you can add /** comment here too.

> -/*
> - * split a bio - only worry about a bio with a single page in its iovec
> - */
> -struct bio_pair *bio_split(struct bio *bi, int first_sectors)
> +struct bio_pair *bio_pair_split(struct bio *bio, int first_sectors)
> {
> - struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
> -
> - if (!bp)
> - return bp;
> + struct bio_pair *bp;
> + struct bio *split = bio_split(bio, first_sectors,
> + GFP_NOIO, bio_split_pool);

I know fs/bio.c isn't a bastion of good style but let's try improve it
bit by bit. It's generally considered a bad style to put a statement
which may fail in local var declaration. IOW, please do,

struct bio *split;

split = bio_split();
if (!split)
return NULL;

> @@ -1694,8 +1734,7 @@ static int __init init_bio(void)
> if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
> panic("bio: can't create integrity pool
");
>
> - bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
> - sizeof(struct bio_pair));
> + bio_split_pool = bioset_create(BIO_POOL_SIZE, offsetof(struct bio_pair, split));
> if (!bio_split_pool)
> panic("bio: can't create split pool
");

BIO_SPLIT_ENTRIES is probably something dependent on how splits can
stack, so I don't think we want to change that to BIO_POOL_SIZE.

Thanks.

--
tejun

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Boaz Harrosh 05-18-2012 05:46 PM

block: Rework bio splitting
 
On 05/18/2012 05:59 AM, koverstreet@google.com wrote:

> From: Kent Overstreet <koverstreet@google.com>
>
> Break out bio_split() and bio_pair_split(), and get rid of the
> limitations of bio_split()
>
> Signed-off-by: Kent Overstreet <koverstreet@google.com>
> ---
> drivers/block/drbd/drbd_req.c | 18 +----
> drivers/block/pktcdvd.c | 6 +-
> drivers/block/rbd.c | 4 +-
> drivers/md/linear.c | 6 +-
> drivers/md/raid0.c | 8 +-
> drivers/md/raid10.c | 23 ++-----
> fs/bio-integrity.c | 44 ------------
> fs/bio.c | 149 ++++++++++++++++++++++++++---------------
> include/linux/bio.h | 27 +++-----
> 9 files changed, 125 insertions(+), 160 deletions(-)
>
> diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
> index 4a0f314..68fa494 100644
> --- a/drivers/block/drbd/drbd_req.c
> +++ b/drivers/block/drbd/drbd_req.c
> @@ -1101,18 +1101,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
> if (likely(s_enr == e_enr)) {
> inc_ap_bio(mdev, 1);
> drbd_make_request_common(mdev, bio, start_time);
> - return;
> - }
> -
> - /* can this bio be split generically?
> - * Maybe add our own split-arbitrary-bios function. */
> - if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
> - /* rather error out here than BUG in bio_split */
> - dev_err(DEV, "bio would need to, but cannot, be split: "
> - "(vcnt=%u,idx=%u,size=%u,sector=%llu)
",
> - bio->bi_vcnt, bio->bi_idx, bio->bi_size,
> - (unsigned long long)bio->bi_sector);
> - bio_endio(bio, -EINVAL);
> } else {
> /* This bio crosses some boundary, so we have to split it. */
> struct bio_pair *bp;
> @@ -1128,7 +1116,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
> const int sps = 1 << HT_SHIFT; /* sectors per slot */
> const int mask = sps - 1;
> const sector_t first_sectors = sps - (sect & mask);
> - bp = bio_split(bio, first_sectors);
> + bp = bio_pair_split(bio, first_sectors);


I do not see the point of this rename. See below

>
> /* we need to get a "reference count" (ap_bio_cnt)
> * to avoid races with the disconnect/reconnect/suspend code.
> @@ -1139,10 +1127,10 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
>
> D_ASSERT(e_enr == s_enr + 1);
>
> - while (drbd_make_request_common(mdev, &bp->bio1, start_time))
> + while (drbd_make_request_common(mdev, &bp->split, start_time))
> inc_ap_bio(mdev, 1);
>
> - while (drbd_make_request_common(mdev, &bp->bio2, start_time))
> + while (drbd_make_request_common(mdev, bio, start_time))
> inc_ap_bio(mdev, 1);
>
> dec_ap_bio(mdev);
> diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
> index 6fe693a..1465155 100644
> --- a/drivers/block/pktcdvd.c
> +++ b/drivers/block/pktcdvd.c
> @@ -2467,10 +2467,10 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
> if (last_zone != zone) {
> BUG_ON(last_zone != zone + pd->settings.size);
> first_sectors = last_zone - bio->bi_sector;
> - bp = bio_split(bio, first_sectors);
> + bp = bio_pair_split(bio, first_sectors);


just a rename. See below


> BUG_ON(!bp);
> - pkt_make_request(q, &bp->bio1);
> - pkt_make_request(q, &bp->bio2);
> + pkt_make_request(q, &bp->split);
> + pkt_make_request(q, bio);
> bio_pair_release(bp);
> return;
> }
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 5a953c6..dd19311 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -747,11 +747,11 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
>
> /* split the bio. We'll release it either in the next
> call, or it will have to be released outside */
> - bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
> + bp = bio_pair_split(old_chain, (len - total) / SECTOR_SIZE);


just a rename. See below

> if (!bp)
> goto err_out;
>
> - *next = &bp->bio2;
> + *next = &bp->split;
> } else
> *next = old_chain->bi_next;
>
> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
> index fa211d8..7c6cafd 100644
> --- a/drivers/md/linear.c
> +++ b/drivers/md/linear.c
> @@ -314,10 +314,10 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
>
> rcu_read_unlock();
>
> - bp = bio_split(bio, end_sector - bio->bi_sector);
> + bp = bio_pair_split(bio, end_sector - bio->bi_sector);
>


just a rename. See below

> - linear_make_request(mddev, &bp->bio1);
> - linear_make_request(mddev, &bp->bio2);
> + linear_make_request(mddev, &bp->split);
> + linear_make_request(mddev, bio);
> bio_pair_release(bp);
> return;
> }
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index de63a1f..3469adf 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -516,13 +516,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
> * refuse to split for us, so we need to split it.
> */
> if (likely(is_power_of_2(chunk_sects)))
> - bp = bio_split(bio, chunk_sects - (sector &
> + bp = bio_pair_split(bio, chunk_sects - (sector &
> (chunk_sects-1)));
> else
> - bp = bio_split(bio, chunk_sects -
> + bp = bio_pair_split(bio, chunk_sects -
> sector_div(sector, chunk_sects));


just a rename. See below


> - raid0_make_request(mddev, &bp->bio1);
> - raid0_make_request(mddev, &bp->bio2);
> + raid0_make_request(mddev, &bp->split);
> + raid0_make_request(mddev, bio);
> bio_pair_release(bp);
> return;
> }
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 3e7b154..0062326 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1001,15 +1001,9 @@ static void make_request(struct mddev *mddev, struct bio * bio)
> > chunk_sects &&
> conf->near_copies < conf->raid_disks)) {
> struct bio_pair *bp;
> - /* Sanity check -- queue functions should prevent this happening */
> - if (bio->bi_vcnt != 1 ||
> - bio->bi_idx != 0)
> - goto bad_map;
> - /* This is a one page bio that upper layers
> - * refuse to split for us, so we need to split it.
> - */
> - bp = bio_split(bio,
> - chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
> +
> + bp = bio_pair_split(bio,
> + chunk_sects - (bio->bi_sector & (chunk_sects - 1)));


just a rename. See below

>
> /* Each of these 'make_request' calls will call 'wait_barrier'.
> * If the first succeeds but the second blocks due to the resync
> @@ -1023,8 +1017,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
> conf->nr_waiting++;
> spin_unlock_irq(&conf->resync_lock);
>
> - make_request(mddev, &bp->bio1);
> - make_request(mddev, &bp->bio2);
> + make_request(mddev, &bp->split);
> + make_request(mddev, bio);
>
> spin_lock_irq(&conf->resync_lock);
> conf->nr_waiting--;
> @@ -1033,13 +1027,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
>
> bio_pair_release(bp);
> return;
> - bad_map:
> - printk("md/raid10:%s: make_request bug: can't convert block across chunks"
> - " or bigger than %dk %llu %d
", mdname(mddev), chunk_sects/2,
> - (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
> -
> - bio_io_error(bio);
> - return;
> }
>
> md_write_start(mddev, bio);
> diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
> index e85c04b..9ed2c44 100644
> --- a/fs/bio-integrity.c
> +++ b/fs/bio-integrity.c
> @@ -682,50 +682,6 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset,
> EXPORT_SYMBOL(bio_integrity_trim);
>
> /**
> - * bio_integrity_split - Split integrity metadata
> - * @bio: Protected bio
> - * @bp: Resulting bio_pair
> - * @sectors: Offset
> - *
> - * Description: Splits an integrity page into a bio_pair.
> - */
> -void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
> -{
> - struct blk_integrity *bi;
> - struct bio_integrity_payload *bip = bio->bi_integrity;
> - unsigned int nr_sectors;
> -
> - if (bio_integrity(bio) == 0)
> - return;
> -
> - bi = bdev_get_integrity(bio->bi_bdev);
> - BUG_ON(bi == NULL);
> - BUG_ON(bip->bip_vcnt != 1);
> -
> - nr_sectors = bio_integrity_hw_sectors(bi, sectors);
> -
> - bp->bio1.bi_integrity = &bp->bip1;
> - bp->bio2.bi_integrity = &bp->bip2;
> -
> - bp->iv1 = bip->bip_vec[0];
> - bp->iv2 = bip->bip_vec[0];
> -
> - bp->bip1.bip_vec[0] = bp->iv1;
> - bp->bip2.bip_vec[0] = bp->iv2;
> -
> - bp->iv1.bv_len = sectors * bi->tuple_size;
> - bp->iv2.bv_offset += sectors * bi->tuple_size;
> - bp->iv2.bv_len -= sectors * bi->tuple_size;
> -
> - bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
> - bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
> -
> - bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
> - bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
> -}
> -EXPORT_SYMBOL(bio_integrity_split);
> -
> -/**
> * bio_integrity_clone - Callback for cloning bios with integrity metadata
> * @bio: New bio
> * @bio_src: Original bio
> diff --git a/fs/bio.c b/fs/bio.c
> index 3332800..781a8cf 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -35,7 +35,7 @@
> */
> #define BIO_INLINE_VECS 4
>
> -static mempool_t *bio_split_pool __read_mostly;
> +static struct bio_set *bio_split_pool __read_mostly;
>
> /*
> * if you change this list, also change bvec_alloc or things will
> @@ -1464,84 +1464,124 @@ void bio_endio(struct bio *bio, int error)
> }
> EXPORT_SYMBOL(bio_endio);
>
> -void bio_pair_release(struct bio_pair *bp)


bio_pair_release() was just moved to down there. Which makes
a complete messy patch, unless I patch it on real code
Its hard to see what's written.

For review sake if I *must* move+change code, I split
it up by doing the pure move in a separate patch, or
sometime commenting the old code and remove it in
a next patch.

But here I don't see the point of why you switched
them?

> +struct bio *bio_split(struct bio *bio, int sectors,
> + gfp_t gfp, struct bio_set *bs)
> {
> - if (atomic_dec_and_test(&bp->cnt)) {
> - struct bio *master = bp->bio1.bi_private;
> + unsigned idx, vcnt = 0, nbytes = sectors << 9;
> + struct bio_vec *bv;
> + struct bio *ret = NULL;
> +
> + BUG_ON(sectors <= 0);
> +
> + if (current->bio_list)
> + gfp &= ~__GFP_WAIT;
> +
> + if (nbytes >= bio->bi_size)
> + return bio;
> +
> + trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
> + bio->bi_sector + sectors);
> +
> + bio_for_each_segment(bv, bio, idx) {
> + vcnt = idx - bio->bi_idx;
> +
> + if (!nbytes) {
> + ret = bio_alloc_bioset(gfp, 0, bs);
> + if (!ret)
> + return NULL;
> +
> + ret->bi_io_vec = bio_iovec(bio);
> + ret->bi_flags |= 1 << BIO_CLONED;
> + break;
> + } else if (nbytes < bv->bv_len) {
> + ret = bio_alloc_bioset(gfp, ++vcnt, bs);
> + if (!ret)
> + return NULL;
> +
> + memcpy(ret->bi_io_vec, bio_iovec(bio),
> + sizeof(struct bio_vec) * vcnt);
> +
> + ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
> + bv->bv_offset += nbytes;
> + bv->bv_len -= nbytes;
> + break;
> + }
> +
> + nbytes -= bv->bv_len;
> + }
> +
> + ret->bi_bdev = bio->bi_bdev;
> + ret->bi_sector = bio->bi_sector;
> + ret->bi_size = sectors << 9;
> + ret->bi_rw = bio->bi_rw;
> + ret->bi_vcnt = vcnt;
> + ret->bi_max_vecs = vcnt;
> + ret->bi_end_io = bio->bi_end_io;
> + ret->bi_private = bio->bi_private;
>
> - bio_endio(master, bp->error);
> - mempool_free(bp, bp->bio2.bi_private);
> + bio->bi_sector += sectors;
> + bio->bi_size -= sectors << 9;
> + bio->bi_idx = idx;
> +
> + if (bio_integrity(bio)) {
> + bio_integrity_clone(ret, bio, gfp, bs);
> + bio_integrity_trim(ret, 0, bio_sectors(ret));
> + bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
> }
> +
> + return ret;
> }
> -EXPORT_SYMBOL(bio_pair_release);
> +EXPORT_SYMBOL_GPL(bio_split);
>
> -static void bio_pair_end_1(struct bio *bi, int err)
> +void bio_pair_release(struct bio_pair *bp)
> {
> - struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
> -
> - if (err)
> - bp->error = err;
> + if (atomic_dec_and_test(&bp->cnt)) {
> + bp->orig->bi_end_io = bp->bi_end_io;
> + bp->orig->bi_private = bp->bi_private;
>
> - bio_pair_release(bp);
> + bio_endio(bp->orig, 0);
> + bio_put(&bp->split);
> + }
> }
> +EXPORT_SYMBOL(bio_pair_release);
>
> -static void bio_pair_end_2(struct bio *bi, int err)
> +static void bio_pair_end(struct bio *bio, int error)
> {
> - struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
> + struct bio_pair *bp = bio->bi_private;
>
> - if (err)
> - bp->error = err;
> + if (error)
> + clear_bit(BIO_UPTODATE, &bp->orig->bi_flags);
>
> bio_pair_release(bp);
> }
>
> -/*
> - * split a bio - only worry about a bio with a single page in its iovec
> - */
> -struct bio_pair *bio_split(struct bio *bi, int first_sectors)
> +struct bio_pair *bio_pair_split(struct bio *bio, int first_sectors)
> {


Here too if you would have staged the function order properly that
new-split above would better merge with the old-split and the
new-old-split that uses the new-split would be a new hunk by itself.

I do this not only for reviewers, but also for myself to prove
I have not changed something I didn't mean to change.

To summarize. I think undoing code movement and better staging
of the code could make this patch much cleaner.

> - struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
> -
> - if (!bp)
> - return bp;
> + struct bio_pair *bp;
> + struct bio *split = bio_split(bio, first_sectors,
> + GFP_NOIO, bio_split_pool);
>
> - trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
> - bi->bi_sector + first_sectors);
> -
> - BUG_ON(bi->bi_vcnt != 1);
> - BUG_ON(bi->bi_idx != 0);
> - atomic_set(&bp->cnt, 3);
> - bp->error = 0;
> - bp->bio1 = *bi;
> - bp->bio2 = *bi;
> - bp->bio2.bi_sector += first_sectors;
> - bp->bio2.bi_size -= first_sectors << 9;
> - bp->bio1.bi_size = first_sectors << 9;
> + if (!split)
> + return NULL;
>
> - bp->bv1 = bi->bi_io_vec[0];
> - bp->bv2 = bi->bi_io_vec[0];
> - bp->bv2.bv_offset += first_sectors << 9;
> - bp->bv2.bv_len -= first_sectors << 9;
> - bp->bv1.bv_len = first_sectors << 9;
> + BUG_ON(split == bio);
>
> - bp->bio1.bi_io_vec = &bp->bv1;
> - bp->bio2.bi_io_vec = &bp->bv2;
> + bp = container_of(split, struct bio_pair, split);
>
> - bp->bio1.bi_max_vecs = 1;
> - bp->bio2.bi_max_vecs = 1;
> + atomic_set(&bp->cnt, 3);
>
> - bp->bio1.bi_end_io = bio_pair_end_1;
> - bp->bio2.bi_end_io = bio_pair_end_2;
> + bp->bi_end_io = bio->bi_end_io;
> + bp->bi_private = bio->bi_private;
>
> - bp->bio1.bi_private = bi;
> - bp->bio2.bi_private = bio_split_pool;
> + bio->bi_private = bp;
> + bio->bi_end_io = bio_pair_end;
>
> - if (bio_integrity(bi))
> - bio_integrity_split(bi, bp, first_sectors);
> + split->bi_private = bp;
> + split->bi_end_io = bio_pair_end;
>
> return bp;
> }
> -EXPORT_SYMBOL(bio_split);
> +EXPORT_SYMBOL(bio_pair_split);
>
> /**
> * bio_sector_offset - Find hardware sector offset in bio
> @@ -1694,8 +1734,7 @@ static int __init init_bio(void)
> if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
> panic("bio: can't create integrity pool
");
>
> - bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
> - sizeof(struct bio_pair));
> + bio_split_pool = bioset_create(BIO_POOL_SIZE, offsetof(struct bio_pair, split));
> if (!bio_split_pool)
> panic("bio: can't create split pool
");
>
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 35f7c4d..db38175 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -197,16 +197,18 @@ struct bio_integrity_payload {
> * in bio2.bi_private
> */
> struct bio_pair {
> - struct bio bio1, bio2;
> - struct bio_vec bv1, bv2;
> -#if defined(CONFIG_BLK_DEV_INTEGRITY)
> - struct bio_integrity_payload bip1, bip2;
> - struct bio_vec iv1, iv2;
> -#endif
> - atomic_t cnt;
> - int error;
> + atomic_t cnt;
> +
> + bio_end_io_t *bi_end_io;
> + void *bi_private;
> +
> + struct bio *orig;
> + struct bio split;
> };
> -extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
> +
> +extern struct bio *bio_split(struct bio *bio, int sectors,
> + gfp_t gfp, struct bio_set *bs);
> +extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors);


I do not get this. Please explain. We have two different topics here.
1. Introduction of a new/better split function.
2. Rename of an old one.

Now if you must, making the rename a patch of it's own is preferred.
But why must you? can't you think of a creative name for the new
one?

Though I admit your names do make sense. The first belongs to the
bio domain bio_DO, and the second belongs to the bio_pair domain
bio_pair_DO

Hey this patch and the rest are amazing. It gives one the incentive
to do some more complicating things, just because it is easier now.
Thanks!

Boaz

> extern void bio_pair_release(struct bio_pair *dbio);
>
> extern struct bio_set *bioset_create(unsigned int, unsigned int);
> @@ -511,7 +513,6 @@ extern int bio_integrity_prep(struct bio *);
> extern void bio_integrity_endio(struct bio *, int);
> extern void bio_integrity_advance(struct bio *, unsigned int);
> extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
> -extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
> extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
> extern int bioset_integrity_create(struct bio_set *, int);
> extern void bioset_integrity_free(struct bio_set *);
> @@ -555,12 +556,6 @@ static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
> return 0;
> }
>
> -static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
> - int sectors)
> -{
> - return;
> -}
> -
> static inline void bio_integrity_advance(struct bio *bio,
> unsigned int bytes_done)
> {


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Kent Overstreet 05-24-2012 12:02 AM

block: Rework bio splitting
 
Break out bio_split() and bio_pair_split(), and get rid of the
limitations of bio_split()

Signed-off-by: Kent Overstreet <koverstreet@google.com>
Change-Id: Ic4733be7af6a4957974b42b09233b2209a779cbf
---
drivers/block/drbd/drbd_req.c | 16 +----
drivers/block/pktcdvd.c | 4 +-
drivers/block/rbd.c | 5 +-
drivers/md/linear.c | 4 +-
drivers/md/raid0.c | 6 +-
drivers/md/raid10.c | 21 ++----
fs/bio-integrity.c | 44 ------------
fs/bio.c | 153 ++++++++++++++++++++++++++---------------
include/linux/bio.h | 25 +++----
9 files changed, 124 insertions(+), 154 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 1c7e3c4..68fa494 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1101,18 +1101,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
if (likely(s_enr == e_enr)) {
inc_ap_bio(mdev, 1);
drbd_make_request_common(mdev, bio, start_time);
- return;
- }
-
- /* can this bio be split generically?
- * Maybe add our own split-arbitrary-bios function. */
- if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
- /* rather error out here than BUG in bio_split */
- dev_err(DEV, "bio would need to, but cannot, be split: "
- "(vcnt=%u,idx=%u,size=%u,sector=%llu)
",
- bio->bi_vcnt, bio->bi_idx, bio->bi_size,
- (unsigned long long)bio->bi_sector);
- bio_endio(bio, -EINVAL);
} else {
/* This bio crosses some boundary, so we have to split it. */
struct bio_pair *bp;
@@ -1139,10 +1127,10 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)

D_ASSERT(e_enr == s_enr + 1);

- while (drbd_make_request_common(mdev, &bp->bio1, start_time))
+ while (drbd_make_request_common(mdev, &bp->split, start_time))
inc_ap_bio(mdev, 1);

- while (drbd_make_request_common(mdev, &bp->bio2, start_time))
+ while (drbd_make_request_common(mdev, bio, start_time))
inc_ap_bio(mdev, 1);

dec_ap_bio(mdev);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 12a14c0..1465155 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2469,8 +2469,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
first_sectors = last_zone - bio->bi_sector;
bp = bio_pair_split(bio, first_sectors);
BUG_ON(!bp);
- pkt_make_request(q, &bp->bio1);
- pkt_make_request(q, &bp->bio2);
+ pkt_make_request(q, &bp->split);
+ pkt_make_request(q, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 60dd556..dd19311 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -747,12 +747,11 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,

/* split the bio. We'll release it either in the next
call, or it will have to be released outside */
- bp = bio_pair_split(old_chain,
- (len - total) / SECTOR_SIZE);
+ bp = bio_pair_split(old_chain, (len - total) / SECTOR_SIZE);
if (!bp)
goto err_out;

- *next = &bp->bio2;
+ *next = &bp->split;
} else
*next = old_chain->bi_next;

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index e860cb9..7c6cafd 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -316,8 +316,8 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)

bp = bio_pair_split(bio, end_sector - bio->bi_sector);

- linear_make_request(mddev, &bp->bio1);
- linear_make_request(mddev, &bp->bio2);
+ linear_make_request(mddev, &bp->split);
+ linear_make_request(mddev, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c89c8aa..3469adf 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -520,9 +520,9 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
(chunk_sects-1)));
else
bp = bio_pair_split(bio, chunk_sects -
- sector_div(sector, chunk_sects));
- raid0_make_request(mddev, &bp->bio1);
- raid0_make_request(mddev, &bp->bio2);
+ sector_div(sector, chunk_sects));
+ raid0_make_request(mddev, &bp->split);
+ raid0_make_request(mddev, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f8b6f14..0062326 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1001,15 +1001,9 @@ static void make_request(struct mddev *mddev, struct bio * bio)
> chunk_sects &&
conf->near_copies < conf->raid_disks)) {
struct bio_pair *bp;
- /* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1 ||
- bio->bi_idx != 0)
- goto bad_map;
- /* This is a one page bio that upper layers
- * refuse to split for us, so we need to split it.
- */
+
bp = bio_pair_split(bio,
- chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
+ chunk_sects - (bio->bi_sector & (chunk_sects - 1)));

/* Each of these 'make_request' calls will call 'wait_barrier'.
* If the first succeeds but the second blocks due to the resync
@@ -1023,8 +1017,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
conf->nr_waiting++;
spin_unlock_irq(&conf->resync_lock);

- make_request(mddev, &bp->bio1);
- make_request(mddev, &bp->bio2);
+ make_request(mddev, &bp->split);
+ make_request(mddev, bio);

spin_lock_irq(&conf->resync_lock);
conf->nr_waiting--;
@@ -1033,13 +1027,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)

bio_pair_release(bp);
return;
- bad_map:
- printk("md/raid10:%s: make_request bug: can't convert block across chunks"
- " or bigger than %dk %llu %d
", mdname(mddev), chunk_sects/2,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
- bio_io_error(bio);
- return;
}

md_write_start(mddev, bio);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index e85c04b..9ed2c44 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -682,50 +682,6 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset,
EXPORT_SYMBOL(bio_integrity_trim);

/**
- * bio_integrity_split - Split integrity metadata
- * @bio: Protected bio
- * @bp: Resulting bio_pair
- * @sectors: Offset
- *
- * Description: Splits an integrity page into a bio_pair.
- */
-void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
-{
- struct blk_integrity *bi;
- struct bio_integrity_payload *bip = bio->bi_integrity;
- unsigned int nr_sectors;
-
- if (bio_integrity(bio) == 0)
- return;
-
- bi = bdev_get_integrity(bio->bi_bdev);
- BUG_ON(bi == NULL);
- BUG_ON(bip->bip_vcnt != 1);
-
- nr_sectors = bio_integrity_hw_sectors(bi, sectors);
-
- bp->bio1.bi_integrity = &bp->bip1;
- bp->bio2.bi_integrity = &bp->bip2;
-
- bp->iv1 = bip->bip_vec[0];
- bp->iv2 = bip->bip_vec[0];
-
- bp->bip1.bip_vec[0] = bp->iv1;
- bp->bip2.bip_vec[0] = bp->iv2;
-
- bp->iv1.bv_len = sectors * bi->tuple_size;
- bp->iv2.bv_offset += sectors * bi->tuple_size;
- bp->iv2.bv_len -= sectors * bi->tuple_size;
-
- bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
- bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
-
- bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
- bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
-}
-EXPORT_SYMBOL(bio_integrity_split);
-
-/**
* bio_integrity_clone - Callback for cloning bios with integrity metadata
* @bio: New bio
* @bio_src: Original bio
diff --git a/fs/bio.c b/fs/bio.c
index 3e7d6cd..b73c570 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -35,7 +35,7 @@
*/
#define BIO_INLINE_VECS 4

-static mempool_t *bio_split_pool __read_mostly;
+static struct bio_set *bio_split_pool __read_mostly;

/*
* if you change this list, also change bvec_alloc or things will
@@ -1462,80 +1462,126 @@ void bio_endio(struct bio *bio, int error)
}
EXPORT_SYMBOL(bio_endio);

-void bio_pair_release(struct bio_pair *bp)
+struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs)
{
- if (atomic_dec_and_test(&bp->cnt)) {
- struct bio *master = bp->bio1.bi_private;
+ unsigned idx, vcnt = 0, nbytes = sectors << 9;
+ struct bio_vec *bv;
+ struct bio *ret = NULL;
+
+ BUG_ON(sectors <= 0);
+
+ /*
+ * If we're being called from underneath generic_make_request() and we
+ * already allocated any bios from this bio set, we risk deadlock if we
+ * use the mempool. So instead, we possibly fail and let the caller punt
+ * to workqueue or somesuch and retry in a safe context.
+ */
+ if (current->bio_list)
+ gfp &= ~__GFP_WAIT;
+
+ if (nbytes >= bio->bi_size)
+ return bio;
+
+ trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
+ bio->bi_sector + sectors);
+
+ bio_for_each_segment(bv, bio, idx) {
+ vcnt = idx - bio->bi_idx;
+
+ if (!nbytes) {
+ ret = bio_alloc_bioset(gfp, 0, bs);
+ if (!ret)
+ return NULL;
+
+ ret->bi_io_vec = bio_iovec(bio);
+ ret->bi_flags |= 1 << BIO_CLONED;
+ break;
+ } else if (nbytes < bv->bv_len) {
+ ret = bio_alloc_bioset(gfp, ++vcnt, bs);
+ if (!ret)
+ return NULL;
+
+ memcpy(ret->bi_io_vec, bio_iovec(bio),
+ sizeof(struct bio_vec) * vcnt);
+
+ ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
+ bv->bv_offset += nbytes;
+ bv->bv_len -= nbytes;
+ break;
+ }
+
+ nbytes -= bv->bv_len;
+ }

- bio_endio(master, bp->error);
- mempool_free(bp, bp->bio2.bi_private);
+ ret->bi_bdev = bio->bi_bdev;
+ ret->bi_sector = bio->bi_sector;
+ ret->bi_size = sectors << 9;
+ ret->bi_rw = bio->bi_rw;
+ ret->bi_vcnt = vcnt;
+ ret->bi_max_vecs = vcnt;
+ ret->bi_end_io = bio->bi_end_io;
+ ret->bi_private = bio->bi_private;
+
+ bio->bi_sector += sectors;
+ bio->bi_size -= sectors << 9;
+ bio->bi_idx = idx;
+
+ if (bio_integrity(bio)) {
+ bio_integrity_clone(ret, bio, gfp, bs);
+ bio_integrity_trim(ret, 0, bio_sectors(ret));
+ bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
}
+
+ return ret;
}
-EXPORT_SYMBOL(bio_pair_release);
+EXPORT_SYMBOL_GPL(bio_split);

-static void bio_pair_end_1(struct bio *bi, int err)
+void bio_pair_release(struct bio_pair *bp)
{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
-
- if (err)
- bp->error = err;
+ if (atomic_dec_and_test(&bp->cnt)) {
+ bp->orig->bi_end_io = bp->bi_end_io;
+ bp->orig->bi_private = bp->bi_private;

- bio_pair_release(bp);
+ bio_endio(bp->orig, 0);
+ bio_put(&bp->split);
+ }
}
+EXPORT_SYMBOL(bio_pair_release);

-static void bio_pair_end_2(struct bio *bi, int err)
+static void bio_pair_end(struct bio *bio, int error)
{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
+ struct bio_pair *bp = bio->bi_private;

- if (err)
- bp->error = err;
+ if (error)
+ clear_bit(BIO_UPTODATE, &bp->orig->bi_flags);

bio_pair_release(bp);
}

-/*
- * split a bio - only worry about a bio with a single page in its iovec
- */
-struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors)
+struct bio_pair *bio_pair_split(struct bio *bio, int first_sectors)
{
- struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
+ struct bio_pair *bp;
+ struct bio *split = bio_split(bio, first_sectors,
+ GFP_NOIO, bio_split_pool);

- if (!bp)
- return bp;
-
- trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
- bi->bi_sector + first_sectors);
-
- BUG_ON(bi->bi_vcnt != 1);
- BUG_ON(bi->bi_idx != 0);
- atomic_set(&bp->cnt, 3);
- bp->error = 0;
- bp->bio1 = *bi;
- bp->bio2 = *bi;
- bp->bio2.bi_sector += first_sectors;
- bp->bio2.bi_size -= first_sectors << 9;
- bp->bio1.bi_size = first_sectors << 9;
+ if (!split)
+ return NULL;

- bp->bv1 = bi->bi_io_vec[0];
- bp->bv2 = bi->bi_io_vec[0];
- bp->bv2.bv_offset += first_sectors << 9;
- bp->bv2.bv_len -= first_sectors << 9;
- bp->bv1.bv_len = first_sectors << 9;
+ BUG_ON(split == bio);

- bp->bio1.bi_io_vec = &bp->bv1;
- bp->bio2.bi_io_vec = &bp->bv2;
+ bp = container_of(split, struct bio_pair, split);

- bp->bio1.bi_max_vecs = 1;
- bp->bio2.bi_max_vecs = 1;
+ atomic_set(&bp->cnt, 3);

- bp->bio1.bi_end_io = bio_pair_end_1;
- bp->bio2.bi_end_io = bio_pair_end_2;
+ bp->bi_end_io = bio->bi_end_io;
+ bp->bi_private = bio->bi_private;

- bp->bio1.bi_private = bi;
- bp->bio2.bi_private = bio_split_pool;
+ bio->bi_private = bp;
+ bio->bi_end_io = bio_pair_end;

- if (bio_integrity(bi))
- bio_integrity_split(bi, bp, first_sectors);
+ split->bi_private = bp;
+ split->bi_end_io = bio_pair_end;

return bp;
}
@@ -1692,8 +1738,7 @@ static int __init init_bio(void)
if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
panic("bio: can't create integrity pool
");

- bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
- sizeof(struct bio_pair));
+ bio_split_pool = bioset_create(BIO_POOL_SIZE, offsetof(struct bio_pair, split));
if (!bio_split_pool)
panic("bio: can't create split pool
");

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8468f3b..64fdcc8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -192,15 +192,17 @@ struct bio_integrity_payload {
* in bio2.bi_private
*/
struct bio_pair {
- struct bio bio1, bio2;
- struct bio_vec bv1, bv2;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
- struct bio_integrity_payload bip1, bip2;
- struct bio_vec iv1, iv2;
-#endif
- atomic_t cnt;
- int error;
+ atomic_t cnt;
+
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+
+ struct bio *orig;
+ struct bio split;
};
+
+extern struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs);
extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors);
extern void bio_pair_release(struct bio_pair *dbio);

@@ -506,7 +508,6 @@ extern int bio_integrity_prep(struct bio *);
extern void bio_integrity_endio(struct bio *, int);
extern void bio_integrity_advance(struct bio *, unsigned int);
extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
-extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
extern int bioset_integrity_create(struct bio_set *, int);
extern void bioset_integrity_free(struct bio_set *);
@@ -550,12 +551,6 @@ static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0;
}

-static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
- int sectors)
-{
- return;
-}
-
static inline void bio_integrity_advance(struct bio *bio,
unsigned int bytes_done)
{
--
1.7.9.3.327.g2980b

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Boaz Harrosh 05-24-2012 04:56 PM

block: Rework bio splitting
 
On 05/24/2012 03:02 AM, Kent Overstreet wrote:

> Break out bio_split() and bio_pair_split(), and get rid of the
> limitations of bio_split()
>
> Signed-off-by: Kent Overstreet <koverstreet@google.com>
> Change-Id: Ic4733be7af6a4957974b42b09233b2209a779cbf
> ---
> drivers/block/drbd/drbd_req.c | 16 +----
> drivers/block/pktcdvd.c | 4 +-
> drivers/block/rbd.c | 5 +-
> drivers/md/linear.c | 4 +-
> drivers/md/raid0.c | 6 +-
> drivers/md/raid10.c | 21 ++----
> fs/bio-integrity.c | 44 ------------
> fs/bio.c | 153 ++++++++++++++++++++++++++---------------
> include/linux/bio.h | 25 +++----
> 9 files changed, 124 insertions(+), 154 deletions(-)
>
> diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
> index 1c7e3c4..68fa494 100644
> --- a/drivers/block/drbd/drbd_req.c
> +++ b/drivers/block/drbd/drbd_req.c
> @@ -1101,18 +1101,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
> if (likely(s_enr == e_enr)) {
> inc_ap_bio(mdev, 1);
> drbd_make_request_common(mdev, bio, start_time);
> - return;
> - }
> -
> - /* can this bio be split generically?
> - * Maybe add our own split-arbitrary-bios function. */
> - if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
> - /* rather error out here than BUG in bio_split */
> - dev_err(DEV, "bio would need to, but cannot, be split: "
> - "(vcnt=%u,idx=%u,size=%u,sector=%llu)
",
> - bio->bi_vcnt, bio->bi_idx, bio->bi_size,
> - (unsigned long long)bio->bi_sector);
> - bio_endio(bio, -EINVAL);
> } else {
> /* This bio crosses some boundary, so we have to split it. */
> struct bio_pair *bp;
> @@ -1139,10 +1127,10 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
>
> D_ASSERT(e_enr == s_enr + 1);
>
> - while (drbd_make_request_common(mdev, &bp->bio1, start_time))
> + while (drbd_make_request_common(mdev, &bp->split, start_time))
> inc_ap_bio(mdev, 1);
>
> - while (drbd_make_request_common(mdev, &bp->bio2, start_time))
> + while (drbd_make_request_common(mdev, bio, start_time))
> inc_ap_bio(mdev, 1);
>
> dec_ap_bio(mdev);
> diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
> index 12a14c0..1465155 100644
> --- a/drivers/block/pktcdvd.c
> +++ b/drivers/block/pktcdvd.c
> @@ -2469,8 +2469,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
> first_sectors = last_zone - bio->bi_sector;
> bp = bio_pair_split(bio, first_sectors);
> BUG_ON(!bp);
> - pkt_make_request(q, &bp->bio1);
> - pkt_make_request(q, &bp->bio2);
> + pkt_make_request(q, &bp->split);
> + pkt_make_request(q, bio);
> bio_pair_release(bp);
> return;
> }
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 60dd556..dd19311 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -747,12 +747,11 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
>
> /* split the bio. We'll release it either in the next
> call, or it will have to be released outside */
> - bp = bio_pair_split(old_chain,
> - (len - total) / SECTOR_SIZE);
> + bp = bio_pair_split(old_chain, (len - total) / SECTOR_SIZE);
> if (!bp)
> goto err_out;
>
> - *next = &bp->bio2;
> + *next = &bp->split;
> } else
> *next = old_chain->bi_next;
>
> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
> index e860cb9..7c6cafd 100644
> --- a/drivers/md/linear.c
> +++ b/drivers/md/linear.c
> @@ -316,8 +316,8 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
>
> bp = bio_pair_split(bio, end_sector - bio->bi_sector);
>
> - linear_make_request(mddev, &bp->bio1);
> - linear_make_request(mddev, &bp->bio2);
> + linear_make_request(mddev, &bp->split);
> + linear_make_request(mddev, bio);
> bio_pair_release(bp);
> return;
> }
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index c89c8aa..3469adf 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -520,9 +520,9 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
> (chunk_sects-1)));
> else
> bp = bio_pair_split(bio, chunk_sects -
> - sector_div(sector, chunk_sects));
> - raid0_make_request(mddev, &bp->bio1);
> - raid0_make_request(mddev, &bp->bio2);
> + sector_div(sector, chunk_sects));
> + raid0_make_request(mddev, &bp->split);
> + raid0_make_request(mddev, bio);
> bio_pair_release(bp);
> return;
> }
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index f8b6f14..0062326 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1001,15 +1001,9 @@ static void make_request(struct mddev *mddev, struct bio * bio)
> > chunk_sects &&
> conf->near_copies < conf->raid_disks)) {
> struct bio_pair *bp;
> - /* Sanity check -- queue functions should prevent this happening */
> - if (bio->bi_vcnt != 1 ||
> - bio->bi_idx != 0)
> - goto bad_map;
> - /* This is a one page bio that upper layers
> - * refuse to split for us, so we need to split it.
> - */
> +
> bp = bio_pair_split(bio,
> - chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
> + chunk_sects - (bio->bi_sector & (chunk_sects - 1)));
>
> /* Each of these 'make_request' calls will call 'wait_barrier'.
> * If the first succeeds but the second blocks due to the resync
> @@ -1023,8 +1017,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
> conf->nr_waiting++;
> spin_unlock_irq(&conf->resync_lock);
>
> - make_request(mddev, &bp->bio1);
> - make_request(mddev, &bp->bio2);
> + make_request(mddev, &bp->split);
> + make_request(mddev, bio);
>
> spin_lock_irq(&conf->resync_lock);
> conf->nr_waiting--;
> @@ -1033,13 +1027,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
>
> bio_pair_release(bp);
> return;
> - bad_map:
> - printk("md/raid10:%s: make_request bug: can't convert block across chunks"
> - " or bigger than %dk %llu %d
", mdname(mddev), chunk_sects/2,
> - (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
> -
> - bio_io_error(bio);
> - return;
> }
>
> md_write_start(mddev, bio);
> diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
> index e85c04b..9ed2c44 100644
> --- a/fs/bio-integrity.c
> +++ b/fs/bio-integrity.c
> @@ -682,50 +682,6 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset,
> EXPORT_SYMBOL(bio_integrity_trim);
>
> /**
> - * bio_integrity_split - Split integrity metadata
> - * @bio: Protected bio
> - * @bp: Resulting bio_pair
> - * @sectors: Offset
> - *
> - * Description: Splits an integrity page into a bio_pair.
> - */
> -void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
> -{
> - struct blk_integrity *bi;
> - struct bio_integrity_payload *bip = bio->bi_integrity;
> - unsigned int nr_sectors;
> -
> - if (bio_integrity(bio) == 0)
> - return;
> -
> - bi = bdev_get_integrity(bio->bi_bdev);
> - BUG_ON(bi == NULL);
> - BUG_ON(bip->bip_vcnt != 1);
> -
> - nr_sectors = bio_integrity_hw_sectors(bi, sectors);
> -
> - bp->bio1.bi_integrity = &bp->bip1;
> - bp->bio2.bi_integrity = &bp->bip2;
> -
> - bp->iv1 = bip->bip_vec[0];
> - bp->iv2 = bip->bip_vec[0];
> -
> - bp->bip1.bip_vec[0] = bp->iv1;
> - bp->bip2.bip_vec[0] = bp->iv2;
> -
> - bp->iv1.bv_len = sectors * bi->tuple_size;
> - bp->iv2.bv_offset += sectors * bi->tuple_size;
> - bp->iv2.bv_len -= sectors * bi->tuple_size;
> -
> - bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
> - bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
> -
> - bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
> - bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
> -}
> -EXPORT_SYMBOL(bio_integrity_split);
> -
> -/**
> * bio_integrity_clone - Callback for cloning bios with integrity metadata
> * @bio: New bio
> * @bio_src: Original bio
> diff --git a/fs/bio.c b/fs/bio.c
> index 3e7d6cd..b73c570 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -35,7 +35,7 @@
> */
> #define BIO_INLINE_VECS 4
>
> -static mempool_t *bio_split_pool __read_mostly;
> +static struct bio_set *bio_split_pool __read_mostly;
>
> /*
> * if you change this list, also change bvec_alloc or things will
> @@ -1462,80 +1462,126 @@ void bio_endio(struct bio *bio, int error)
> }
> EXPORT_SYMBOL(bio_endio);
>
> -void bio_pair_release(struct bio_pair *bp)




Again could you please take this bio_split and just put
it down below the implementation of bio_pair_split. This way
we can better review what the changes actually are. Now it
is a complete mess in the diff, where the deleted lines of the
bio_pair_release are in between the new lines of bio_split().
Please ???

> +struct bio *bio_split(struct bio *bio, int sectors,
> + gfp_t gfp, struct bio_set *bs)
> {




This is a freaking important and capable exported function.
Could you please put some comment on what it does and what are
it's limitation.

For example the returned bio is the beginning of the chain
and the original is the reminder, right?

> - if (atomic_dec_and_test(&bp->cnt)) {
> - struct bio *master = bp->bio1.bi_private;
> + unsigned idx, vcnt = 0, nbytes = sectors << 9;
> + struct bio_vec *bv;
> + struct bio *ret = NULL;
> +
> + BUG_ON(sectors <= 0);
> +
> + /*
> + * If we're being called from underneath generic_make_request() and we
> + * already allocated any bios from this bio set, we risk deadlock if we
> + * use the mempool. So instead, we possibly fail and let the caller punt
> + * to workqueue or somesuch and retry in a safe context.
> + */
> + if (current->bio_list)
> + gfp &= ~__GFP_WAIT;




Is this true also when @bio is not from @bs ?

Is it at all supported when they are not the same?

Are kmalloc bios not split-able?

Please put answer to these in above comment.

In the split you have a single bio with or without bvects allocation
should you not let the caller make sure not to set __GFP_WAIT.

For me, inspecting current->bio_list is out of context and a complete
hack. The caller should take care of it, which has more context.

For example I might want to use split from OSD code where I do
not use an elevator at all, and current->bio_list could belong
to a completely different device. (Maybe)

> +
> + if (nbytes >= bio->bi_size)
> + return bio;
> +
> + trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
> + bio->bi_sector + sectors);
> +
> + bio_for_each_segment(bv, bio, idx) {
> + vcnt = idx - bio->bi_idx;
> +
> + if (!nbytes) {
> + ret = bio_alloc_bioset(gfp, 0, bs);
> + if (!ret)
> + return NULL;
> +
> + ret->bi_io_vec = bio_iovec(bio);
> + ret->bi_flags |= 1 << BIO_CLONED;
> + break;




I don't see below any references taken by "ret" on @bio.
What protects us from @bio not been freed before "ret" ?

If it's the caller responsibility please say so in
comment above

> + } else if (nbytes < bv->bv_len) {
> + ret = bio_alloc_bioset(gfp, ++vcnt, bs);
> + if (!ret)
> + return NULL;




We could in this case save and only allocate one more bio with a single
bio_vec and chain it to the end.

And if you change it around, where the reminder is "ret" and the
beginning of the chain is left @bio. you wouldn't even need the
extra bio. (Trim the last and a single-bvec bio holds the reminder + remainder-chain)

Thanks
Boaz

> +
> + memcpy(ret->bi_io_vec, bio_iovec(bio),
> + sizeof(struct bio_vec) * vcnt);
> +
> + ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
> + bv->bv_offset += nbytes;
> + bv->bv_len -= nbytes;
> + break;
> + }
> +
> + nbytes -= bv->bv_len;
> + }
>
> - bio_endio(master, bp->error);
> - mempool_free(bp, bp->bio2.bi_private);
> + ret->bi_bdev = bio->bi_bdev;
> + ret->bi_sector = bio->bi_sector;
> + ret->bi_size = sectors << 9;
> + ret->bi_rw = bio->bi_rw;
> + ret->bi_vcnt = vcnt;
> + ret->bi_max_vecs = vcnt;
> + ret->bi_end_io = bio->bi_end_io;
> + ret->bi_private = bio->bi_private;
> +
> + bio->bi_sector += sectors;
> + bio->bi_size -= sectors << 9;
> + bio->bi_idx = idx;
> +
> + if (bio_integrity(bio)) {
> + bio_integrity_clone(ret, bio, gfp, bs);
> + bio_integrity_trim(ret, 0, bio_sectors(ret));
> + bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
> }
> +
> + return ret;
> }
> -EXPORT_SYMBOL(bio_pair_release);
> +EXPORT_SYMBOL_GPL(bio_split);
>
> -static void bio_pair_end_1(struct bio *bi, int err)
> +void bio_pair_release(struct bio_pair *bp)
> {
> - struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
> -
> - if (err)
> - bp->error = err;
> + if (atomic_dec_and_test(&bp->cnt)) {
> + bp->orig->bi_end_io = bp->bi_end_io;
> + bp->orig->bi_private = bp->bi_private;
>
> - bio_pair_release(bp);
> + bio_endio(bp->orig, 0);
> + bio_put(&bp->split);
> + }
> }
> +EXPORT_SYMBOL(bio_pair_release);
>
> -static void bio_pair_end_2(struct bio *bi, int err)
> +static void bio_pair_end(struct bio *bio, int error)
> {
> - struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
> + struct bio_pair *bp = bio->bi_private;
>
> - if (err)
> - bp->error = err;
> + if (error)
> + clear_bit(BIO_UPTODATE, &bp->orig->bi_flags);
>
> bio_pair_release(bp);
> }
>
> -/*
> - * split a bio - only worry about a bio with a single page in its iovec
> - */
> -struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors)
> +struct bio_pair *bio_pair_split(struct bio *bio, int first_sectors)
> {
> - struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
> + struct bio_pair *bp;
> + struct bio *split = bio_split(bio, first_sectors,
> + GFP_NOIO, bio_split_pool);
>
> - if (!bp)
> - return bp;
> -
> - trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
> - bi->bi_sector + first_sectors);
> -
> - BUG_ON(bi->bi_vcnt != 1);
> - BUG_ON(bi->bi_idx != 0);
> - atomic_set(&bp->cnt, 3);
> - bp->error = 0;
> - bp->bio1 = *bi;
> - bp->bio2 = *bi;
> - bp->bio2.bi_sector += first_sectors;
> - bp->bio2.bi_size -= first_sectors << 9;
> - bp->bio1.bi_size = first_sectors << 9;
> + if (!split)
> + return NULL;
>
> - bp->bv1 = bi->bi_io_vec[0];
> - bp->bv2 = bi->bi_io_vec[0];
> - bp->bv2.bv_offset += first_sectors << 9;
> - bp->bv2.bv_len -= first_sectors << 9;
> - bp->bv1.bv_len = first_sectors << 9;
> + BUG_ON(split == bio);
>
> - bp->bio1.bi_io_vec = &bp->bv1;
> - bp->bio2.bi_io_vec = &bp->bv2;
> + bp = container_of(split, struct bio_pair, split);
>
> - bp->bio1.bi_max_vecs = 1;
> - bp->bio2.bi_max_vecs = 1;
> + atomic_set(&bp->cnt, 3);
>
> - bp->bio1.bi_end_io = bio_pair_end_1;
> - bp->bio2.bi_end_io = bio_pair_end_2;
> + bp->bi_end_io = bio->bi_end_io;
> + bp->bi_private = bio->bi_private;
>
> - bp->bio1.bi_private = bi;
> - bp->bio2.bi_private = bio_split_pool;
> + bio->bi_private = bp;
> + bio->bi_end_io = bio_pair_end;
>
> - if (bio_integrity(bi))
> - bio_integrity_split(bi, bp, first_sectors);
> + split->bi_private = bp;
> + split->bi_end_io = bio_pair_end;
>
> return bp;
> }
> @@ -1692,8 +1738,7 @@ static int __init init_bio(void)
> if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
> panic("bio: can't create integrity pool
");
>
> - bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
> - sizeof(struct bio_pair));
> + bio_split_pool = bioset_create(BIO_POOL_SIZE, offsetof(struct bio_pair, split));
> if (!bio_split_pool)
> panic("bio: can't create split pool
");
>
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 8468f3b..64fdcc8 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -192,15 +192,17 @@ struct bio_integrity_payload {
> * in bio2.bi_private
> */
> struct bio_pair {
> - struct bio bio1, bio2;
> - struct bio_vec bv1, bv2;
> -#if defined(CONFIG_BLK_DEV_INTEGRITY)
> - struct bio_integrity_payload bip1, bip2;
> - struct bio_vec iv1, iv2;
> -#endif
> - atomic_t cnt;
> - int error;
> + atomic_t cnt;
> +
> + bio_end_io_t *bi_end_io;
> + void *bi_private;
> +
> + struct bio *orig;
> + struct bio split;
> };
> +
> +extern struct bio *bio_split(struct bio *bio, int sectors,
> + gfp_t gfp, struct bio_set *bs);
> extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors);
> extern void bio_pair_release(struct bio_pair *dbio);
>
> @@ -506,7 +508,6 @@ extern int bio_integrity_prep(struct bio *);
> extern void bio_integrity_endio(struct bio *, int);
> extern void bio_integrity_advance(struct bio *, unsigned int);
> extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
> -extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
> extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
> extern int bioset_integrity_create(struct bio_set *, int);
> extern void bioset_integrity_free(struct bio_set *);
> @@ -550,12 +551,6 @@ static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
> return 0;
> }
>
> -static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
> - int sectors)
> -{
> - return;
> -}
> -
> static inline void bio_integrity_advance(struct bio *bio,
> unsigned int bytes_done)
> {




--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Kent Overstreet 05-24-2012 09:27 PM

block: Rework bio splitting
 
On Thu, May 24, 2012 at 07:56:03PM +0300, Boaz Harrosh wrote:
> Again could you please take this bio_split and just put
> it down below the implementation of bio_pair_split. This way
> we can better review what the changes actually are. Now it
> is a complete mess in the diff, where the deleted lines of the
> bio_pair_release are in between the new lines of bio_split().
> Please ???

Ahh, I saw that comment but I missed what it was that you wanted me to
reorder. Will do.

>
> > +struct bio *bio_split(struct bio *bio, int sectors,
> > + gfp_t gfp, struct bio_set *bs)
> > {
>
>
>
>
> This is a freaking important and capable exported function.
> Could you please put some comment on what it does and what are
> it's limitation.

Yeah, I'll do that.

> For example the returned bio is the beginning of the chain
> and the original is the reminder, right?

Yes.

>
> > - if (atomic_dec_and_test(&bp->cnt)) {
> > - struct bio *master = bp->bio1.bi_private;
> > + unsigned idx, vcnt = 0, nbytes = sectors << 9;
> > + struct bio_vec *bv;
> > + struct bio *ret = NULL;
> > +
> > + BUG_ON(sectors <= 0);
> > +
> > + /*
> > + * If we're being called from underneath generic_make_request() and we
> > + * already allocated any bios from this bio set, we risk deadlock if we
> > + * use the mempool. So instead, we possibly fail and let the caller punt
> > + * to workqueue or somesuch and retry in a safe context.
> > + */
> > + if (current->bio_list)
> > + gfp &= ~__GFP_WAIT;
>
>
>
>
> Is this true also when @bio is not from @bs ?

Yes. Not masking out __GFP_WAIT would only be safe if you could
guarantee that you never tried to split a bio more than once (from the
same bio set), and IMO that'd be a terrible thing to rely on.

>
> Is it at all supported when they are not the same?

When what's not the same?

>
> Are kmalloc bios not split-able?

They are.

>
> Please put answer to these in above comment.
>
> In the split you have a single bio with or without bvects allocation
> should you not let the caller make sure not to set __GFP_WAIT.
>
> For me, inspecting current->bio_list is out of context and a complete
> hack. The caller should take care of it, which has more context.

I agree that it is hacky, but shifting the responsibility onto the
caller would IMO be much more likely to lead to buggy code in the
future (and those deadlocks are not going to be easy to track down).

It might be better to mask out __GFP_WAIT in bio_alloc_bioset(), I'm not
sure.

> For example I might want to use split from OSD code where I do
> not use an elevator at all, and current->bio_list could belong
> to a completely different device. (Maybe)

Doesn't matter. If you allocate a split, it won't free itself until the
IO is submitted and completes; current->bio_list != NULL the bio cannot
complete until you return.

> I don't see below any references taken by "ret" on @bio.
> What protects us from @bio not been freed before "ret" ?
>
> If it's the caller responsibility please say so in
> comment above

Yeah, caller responsibility. Will do.

>
> > + } else if (nbytes < bv->bv_len) {
> > + ret = bio_alloc_bioset(gfp, ++vcnt, bs);
> > + if (!ret)
> > + return NULL;
>
>
>
>
> We could in this case save and only allocate one more bio with a single
> bio_vec and chain it to the end.

I thought about that, but this works for now - my preferred solution is
to make bi_io_vec immutable (that'll require a bi_bvec_offset field in
struct bio, and the end result is we'd be able to split mid bvec and
share the bvec with both bios).

>
> And if you change it around, where the reminder is "ret" and the
> beginning of the chain is left @bio. you wouldn't even need the
> extra bio. (Trim the last and a single-bvec bio holds the reminder + remainder-chain)

Don't follow... If you're processing a bio starting from the smallest
sector though, you want the split to be the front of the bio otherwise
if you split multiple times you'll try to split a split, and deadlock.

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Vivek Goyal 05-25-2012 06:48 PM

block: Rework bio splitting
 
On Thu, May 24, 2012 at 07:56:03PM +0300, Boaz Harrosh wrote:

[..]
> In the split you have a single bio with or without bvects allocation
> should you not let the caller make sure not to set __GFP_WAIT.
>
> For me, inspecting current->bio_list is out of context and a complete
> hack. The caller should take care of it, which has more context.
>
> For example I might want to use split from OSD code where I do
> not use an elevator at all, and current->bio_list could belong
> to a completely different device. (Maybe)

FWIW, I too think that checking for current->bio_list in bio_split() sounds
hackish and it should be left to caller to set right gfp flags. And it should
be commented well.

Thanks
Vivek

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Kent Overstreet 05-25-2012 08:25 PM

block: Rework bio splitting
 
Break out bio_split() and bio_pair_split(), and get rid of the
limitations of bio_split()

Signed-off-by: Kent Overstreet <koverstreet@google.com>
Change-Id: Ic4733be7af6a4957974b42b09233b2209a779cbf
---
drivers/block/drbd/drbd_req.c | 16 +---
drivers/block/pktcdvd.c | 4 +-
drivers/block/rbd.c | 7 +-
drivers/md/linear.c | 4 +-
drivers/md/raid0.c | 6 +-
drivers/md/raid10.c | 21 +-----
fs/bio-integrity.c | 44 -----------
fs/bio.c | 167 +++++++++++++++++++++++++++++------------
include/linux/bio.h | 25 +++---
9 files changed, 144 insertions(+), 150 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 1c7e3c4..68fa494 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1101,18 +1101,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
if (likely(s_enr == e_enr)) {
inc_ap_bio(mdev, 1);
drbd_make_request_common(mdev, bio, start_time);
- return;
- }
-
- /* can this bio be split generically?
- * Maybe add our own split-arbitrary-bios function. */
- if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
- /* rather error out here than BUG in bio_split */
- dev_err(DEV, "bio would need to, but cannot, be split: "
- "(vcnt=%u,idx=%u,size=%u,sector=%llu)
",
- bio->bi_vcnt, bio->bi_idx, bio->bi_size,
- (unsigned long long)bio->bi_sector);
- bio_endio(bio, -EINVAL);
} else {
/* This bio crosses some boundary, so we have to split it. */
struct bio_pair *bp;
@@ -1139,10 +1127,10 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)

D_ASSERT(e_enr == s_enr + 1);

- while (drbd_make_request_common(mdev, &bp->bio1, start_time))
+ while (drbd_make_request_common(mdev, &bp->split, start_time))
inc_ap_bio(mdev, 1);

- while (drbd_make_request_common(mdev, &bp->bio2, start_time))
+ while (drbd_make_request_common(mdev, bio, start_time))
inc_ap_bio(mdev, 1);

dec_ap_bio(mdev);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 12a14c0..1465155 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2469,8 +2469,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
first_sectors = last_zone - bio->bi_sector;
bp = bio_pair_split(bio, first_sectors);
BUG_ON(!bp);
- pkt_make_request(q, &bp->bio1);
- pkt_make_request(q, &bp->bio2);
+ pkt_make_request(q, &bp->split);
+ pkt_make_request(q, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index a0b76c6..11777b4 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -747,14 +747,13 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,

/* split the bio. We'll release it either in the next
call, or it will have to be released outside */
- bp = bio_pair_split(old_chain,
- (len - total) / SECTOR_SIZE);
+ bp = bio_pair_split(old_chain, (len - total) / SECTOR_SIZE);
if (!bp)
goto err_out;

- __bio_clone(tmp, &bp->bio1);
+ __bio_clone(tmp, &bp->split);

- *next = &bp->bio2;
+ *next = bp->orig;
} else {
__bio_clone(tmp, old_chain);
*next = old_chain->bi_next;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index e860cb9..7c6cafd 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -316,8 +316,8 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)

bp = bio_pair_split(bio, end_sector - bio->bi_sector);

- linear_make_request(mddev, &bp->bio1);
- linear_make_request(mddev, &bp->bio2);
+ linear_make_request(mddev, &bp->split);
+ linear_make_request(mddev, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c89c8aa..3469adf 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -520,9 +520,9 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
(chunk_sects-1)));
else
bp = bio_pair_split(bio, chunk_sects -
- sector_div(sector, chunk_sects));
- raid0_make_request(mddev, &bp->bio1);
- raid0_make_request(mddev, &bp->bio2);
+ sector_div(sector, chunk_sects));
+ raid0_make_request(mddev, &bp->split);
+ raid0_make_request(mddev, bio);
bio_pair_release(bp);
return;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f8b6f14..0062326 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1001,15 +1001,9 @@ static void make_request(struct mddev *mddev, struct bio * bio)
> chunk_sects &&
conf->near_copies < conf->raid_disks)) {
struct bio_pair *bp;
- /* Sanity check -- queue functions should prevent this happening */
- if (bio->bi_vcnt != 1 ||
- bio->bi_idx != 0)
- goto bad_map;
- /* This is a one page bio that upper layers
- * refuse to split for us, so we need to split it.
- */
+
bp = bio_pair_split(bio,
- chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
+ chunk_sects - (bio->bi_sector & (chunk_sects - 1)));

/* Each of these 'make_request' calls will call 'wait_barrier'.
* If the first succeeds but the second blocks due to the resync
@@ -1023,8 +1017,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
conf->nr_waiting++;
spin_unlock_irq(&conf->resync_lock);

- make_request(mddev, &bp->bio1);
- make_request(mddev, &bp->bio2);
+ make_request(mddev, &bp->split);
+ make_request(mddev, bio);

spin_lock_irq(&conf->resync_lock);
conf->nr_waiting--;
@@ -1033,13 +1027,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)

bio_pair_release(bp);
return;
- bad_map:
- printk("md/raid10:%s: make_request bug: can't convert block across chunks"
- " or bigger than %dk %llu %d
", mdname(mddev), chunk_sects/2,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
- bio_io_error(bio);
- return;
}

md_write_start(mddev, bio);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index e85c04b..9ed2c44 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -682,50 +682,6 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset,
EXPORT_SYMBOL(bio_integrity_trim);

/**
- * bio_integrity_split - Split integrity metadata
- * @bio: Protected bio
- * @bp: Resulting bio_pair
- * @sectors: Offset
- *
- * Description: Splits an integrity page into a bio_pair.
- */
-void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
-{
- struct blk_integrity *bi;
- struct bio_integrity_payload *bip = bio->bi_integrity;
- unsigned int nr_sectors;
-
- if (bio_integrity(bio) == 0)
- return;
-
- bi = bdev_get_integrity(bio->bi_bdev);
- BUG_ON(bi == NULL);
- BUG_ON(bip->bip_vcnt != 1);
-
- nr_sectors = bio_integrity_hw_sectors(bi, sectors);
-
- bp->bio1.bi_integrity = &bp->bip1;
- bp->bio2.bi_integrity = &bp->bip2;
-
- bp->iv1 = bip->bip_vec[0];
- bp->iv2 = bip->bip_vec[0];
-
- bp->bip1.bip_vec[0] = bp->iv1;
- bp->bip2.bip_vec[0] = bp->iv2;
-
- bp->iv1.bv_len = sectors * bi->tuple_size;
- bp->iv2.bv_offset += sectors * bi->tuple_size;
- bp->iv2.bv_len -= sectors * bi->tuple_size;
-
- bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
- bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
-
- bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
- bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
-}
-EXPORT_SYMBOL(bio_integrity_split);
-
-/**
* bio_integrity_clone - Callback for cloning bios with integrity metadata
* @bio: New bio
* @bio_src: Original bio
diff --git a/fs/bio.c b/fs/bio.c
index fb7607b..62c4af81 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -35,7 +35,7 @@
*/
#define BIO_INLINE_VECS 4

-static mempool_t *bio_split_pool __read_mostly;
+static struct bio_set *bio_split_pool __read_mostly;

/*
* if you change this list, also change bvec_alloc or things will
@@ -1447,81 +1447,151 @@ EXPORT_SYMBOL(bio_endio);
void bio_pair_release(struct bio_pair *bp)
{
if (atomic_dec_and_test(&bp->cnt)) {
- struct bio *master = bp->bio1.bi_private;
+ bp->orig->bi_end_io = bp->bi_end_io;
+ bp->orig->bi_private = bp->bi_private;

- bio_endio(master, bp->error);
- mempool_free(bp, bp->bio2.bi_private);
+ bio_endio(bp->orig, 0);
+ bio_put(&bp->split);
}
}
EXPORT_SYMBOL(bio_pair_release);

-static void bio_pair_end_1(struct bio *bi, int err)
+static void bio_pair_end(struct bio *bio, int error)
{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
+ struct bio_pair *bp = bio->bi_private;

- if (err)
- bp->error = err;
+ if (error)
+ clear_bit(BIO_UPTODATE, &bp->orig->bi_flags);

bio_pair_release(bp);
}

-static void bio_pair_end_2(struct bio *bi, int err)
+struct bio_pair *bio_pair_split(struct bio *bio, int first_sectors)
{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
+ struct bio_pair *bp;
+ struct bio *split = bio_split(bio, first_sectors,
+ GFP_NOIO, bio_split_pool);

- if (err)
- bp->error = err;
+ if (!split)
+ return NULL;

- bio_pair_release(bp);
+ BUG_ON(split == bio);
+
+ bp = container_of(split, struct bio_pair, split);
+
+ atomic_set(&bp->cnt, 3);
+
+ bp->bi_end_io = bio->bi_end_io;
+ bp->bi_private = bio->bi_private;
+
+ bio->bi_private = bp;
+ bio->bi_end_io = bio_pair_end;
+
+ split->bi_private = bp;
+ split->bi_end_io = bio_pair_end;
+
+ return bp;
}
+EXPORT_SYMBOL(bio_pair_split);

-/*
- * split a bio - only worry about a bio with a single page in its iovec
+/**
+ * bio_split - split a bio
+ * @bio: bio to split
+ * @sectors: number of sectors to split from the front of @bio
+ * @gfp: gfp mask
+ * @bs: bio set to allocate from
+ *
+ * Allocates and returns a new bio which represents @sectors from the start of
+ * @bio, and updates @bio to represent the remaining sectors.
+ *
+ * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio
+ * unchanged.
+ *
+ * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
+ * bvec boundry; it is the caller's responsibility to ensure that @bio is not
+ * freed before the split.
+ *
+ * If bio_split() is running under generic_make_request(), it's not safe to
+ * allocate more than one bio from the same bio set. Therefore, if it is running
+ * under generic_make_request() it masks out __GFP_WAIT when doing the
+ * allocation. The caller must check for failure if there's any possibility of
+ * it being called from under generic_make_request(); it is then the caller's
+ * responsibility to retry from a safe context (by e.g. punting to workqueue).
*/
-struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors)
+struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs)
{
- struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
+ unsigned idx, vcnt = 0, nbytes = sectors << 9;
+ struct bio_vec *bv;
+ struct bio *ret = NULL;

- if (!bp)
- return bp;
+ BUG_ON(sectors <= 0);

- trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
- bi->bi_sector + first_sectors);
+ /*
+ * If we're being called from underneath generic_make_request() and we
+ * already allocated any bios from this bio set, we risk deadlock if we
+ * use the mempool. So instead, we possibly fail and let the caller punt
+ * to workqueue or somesuch and retry in a safe context.
+ */
+ if (current->bio_list)
+ gfp &= ~__GFP_WAIT;

- BUG_ON(bi->bi_vcnt != 1);
- BUG_ON(bi->bi_idx != 0);
- atomic_set(&bp->cnt, 3);
- bp->error = 0;
- bp->bio1 = *bi;
- bp->bio2 = *bi;
- bp->bio2.bi_sector += first_sectors;
- bp->bio2.bi_size -= first_sectors << 9;
- bp->bio1.bi_size = first_sectors << 9;
+ if (nbytes >= bio->bi_size)
+ return bio;

- bp->bv1 = bi->bi_io_vec[0];
- bp->bv2 = bi->bi_io_vec[0];
- bp->bv2.bv_offset += first_sectors << 9;
- bp->bv2.bv_len -= first_sectors << 9;
- bp->bv1.bv_len = first_sectors << 9;
+ trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
+ bio->bi_sector + sectors);

- bp->bio1.bi_io_vec = &bp->bv1;
- bp->bio2.bi_io_vec = &bp->bv2;
+ bio_for_each_segment(bv, bio, idx) {
+ vcnt = idx - bio->bi_idx;

- bp->bio1.bi_max_vecs = 1;
- bp->bio2.bi_max_vecs = 1;
+ if (!nbytes) {
+ ret = bio_alloc_bioset(gfp, 0, bs);
+ if (!ret)
+ return NULL;

- bp->bio1.bi_end_io = bio_pair_end_1;
- bp->bio2.bi_end_io = bio_pair_end_2;
+ ret->bi_io_vec = bio_iovec(bio);
+ ret->bi_flags |= 1 << BIO_CLONED;
+ break;
+ } else if (nbytes < bv->bv_len) {
+ ret = bio_alloc_bioset(gfp, ++vcnt, bs);
+ if (!ret)
+ return NULL;

- bp->bio1.bi_private = bi;
- bp->bio2.bi_private = bio_split_pool;
+ memcpy(ret->bi_io_vec, bio_iovec(bio),
+ sizeof(struct bio_vec) * vcnt);

- if (bio_integrity(bi))
- bio_integrity_split(bi, bp, first_sectors);
+ ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
+ bv->bv_offset += nbytes;
+ bv->bv_len -= nbytes;
+ break;
+ }

- return bp;
+ nbytes -= bv->bv_len;
+ }
+
+ ret->bi_bdev = bio->bi_bdev;
+ ret->bi_sector = bio->bi_sector;
+ ret->bi_size = sectors << 9;
+ ret->bi_rw = bio->bi_rw;
+ ret->bi_vcnt = vcnt;
+ ret->bi_max_vecs = vcnt;
+ ret->bi_end_io = bio->bi_end_io;
+ ret->bi_private = bio->bi_private;
+
+ bio->bi_sector += sectors;
+ bio->bi_size -= sectors << 9;
+ bio->bi_idx = idx;
+
+ if (bio_integrity(bio)) {
+ bio_integrity_clone(ret, bio, gfp, bs);
+ bio_integrity_trim(ret, 0, bio_sectors(ret));
+ bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
+ }
+
+ return ret;
}
-EXPORT_SYMBOL(bio_pair_split);
+EXPORT_SYMBOL_GPL(bio_split);

/**
* bio_sector_offset - Find hardware sector offset in bio
@@ -1674,8 +1744,7 @@ static int __init init_bio(void)
if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
panic("bio: can't create integrity pool
");

- bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
- sizeof(struct bio_pair));
+ bio_split_pool = bioset_create(BIO_POOL_SIZE, offsetof(struct bio_pair, split));
if (!bio_split_pool)
panic("bio: can't create split pool
");

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 18ab020..486233c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -192,15 +192,17 @@ struct bio_integrity_payload {
* in bio2.bi_private
*/
struct bio_pair {
- struct bio bio1, bio2;
- struct bio_vec bv1, bv2;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
- struct bio_integrity_payload bip1, bip2;
- struct bio_vec iv1, iv2;
-#endif
- atomic_t cnt;
- int error;
+ atomic_t cnt;
+
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+
+ struct bio *orig;
+ struct bio split;
};
+
+extern struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs);
extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors);
extern void bio_pair_release(struct bio_pair *dbio);

@@ -503,7 +505,6 @@ extern int bio_integrity_prep(struct bio *);
extern void bio_integrity_endio(struct bio *, int);
extern void bio_integrity_advance(struct bio *, unsigned int);
extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
-extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
extern int bioset_integrity_create(struct bio_set *, int);
extern void bioset_integrity_free(struct bio_set *);
@@ -547,12 +548,6 @@ static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0;
}

-static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
- int sectors)
-{
- return;
-}
-
static inline void bio_integrity_advance(struct bio *bio,
unsigned int bytes_done)
{
--
1.7.9.3.327.g2980b

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Mikulas Patocka 05-28-2012 04:12 PM

block: Rework bio splitting
 
On Fri, 25 May 2012, Kent Overstreet wrote:

> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index f8b6f14..0062326 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1001,15 +1001,9 @@ static void make_request(struct mddev *mddev, struct bio * bio)
> > chunk_sects &&
> conf->near_copies < conf->raid_disks)) {
> struct bio_pair *bp;
> - /* Sanity check -- queue functions should prevent this happening */
> - if (bio->bi_vcnt != 1 ||
> - bio->bi_idx != 0)
> - goto bad_map;
> - /* This is a one page bio that upper layers
> - * refuse to split for us, so we need to split it.
> - */
> +

That "Sanity check" should be removed from drivers/md/raid0.c too. Your
patch only removes it from raid10.

Mikulas

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


All times are GMT. The time now is 06:00 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.