FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Device-mapper Development

 
 
LinkBack Thread Tools
 
Old 08-22-2012, 05:04 PM
Kent Overstreet
 
Default block: Avoid deadlocks with bio allocation by stacking drivers

Previously, if we ever try to allocate more than once from the same bio
set while running under generic_make_request(), we risk deadlock.

This would happen if e.g. a bio ever needed to be split more than once,
and it's difficult to handle correctly in the drivers - so in practice
it's not.

This patch fixes this issue by allocating a rescuer workqueue for each
bio_set, and punting queued bios to said rescuer when necessary:

Signed-off-by: Kent Overstreet <koverstreet@google.com>
CC: Jens Axboe <axboe@kernel.dk>
---
fs/bio.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++---
include/linux/bio.h | 75 ++++++++++++++++++++++++++++++-----------------------
2 files changed, 112 insertions(+), 37 deletions(-)

diff --git a/fs/bio.c b/fs/bio.c
index aa67bf3..a82a3c7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -279,6 +279,23 @@ void bio_reset(struct bio *bio)
}
EXPORT_SYMBOL(bio_reset);

+static void bio_alloc_rescue(struct work_struct *work)
+{
+ struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
+ struct bio *bio;
+
+ while (1) {
+ spin_lock(&bs->rescue_lock);
+ bio = bio_list_pop(&bs->rescue_list);
+ spin_unlock(&bs->rescue_lock);
+
+ if (!bio)
+ break;
+
+ generic_make_request(bio);
+ }
+}
+
/**
* bio_alloc_bioset - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
@@ -292,6 +309,7 @@ EXPORT_SYMBOL(bio_reset);
**/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
+ gfp_t saved_gfp = gfp_mask;
unsigned front_pad;
unsigned inline_vecs;
unsigned long idx = BIO_POOL_NONE;
@@ -306,16 +324,39 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
p = kmalloc(sizeof(struct bio) +
nr_iovecs * sizeof(struct bio_vec),
gfp_mask);
+
front_pad = 0;
inline_vecs = nr_iovecs;
} else {
- p = mempool_alloc(bs->bio_pool, gfp_mask);
+ /*
+ * If we're running under generic_make_request()
+ * (current->bio_list != NULL), we risk deadlock if we sleep on
+ * allocation and there's already bios on current->bio_list that
+ * were allocated from the same bio_set; they won't be submitted
+ * (and thus freed) as long as we're blocked here.
+ *
+ * To deal with this, we first try the allocation without using
+ * the mempool; if that fails, we punt all the bios on
+ * current->bio_list to a different thread and then retry the
+ * allocation with the original gfp mask.
+ */
+
+ if (current->bio_list &&
+ !bio_list_empty(current->bio_list) &&
+ (gfp_mask & __GFP_WAIT))
+ gfp_mask &= GFP_ATOMIC;
+retry:
+ if (gfp_mask & __GFP_WAIT)
+ p = mempool_alloc(bs->bio_pool, gfp_mask);
+ else
+ p = kmem_cache_alloc(bs->bio_slab, gfp_mask);
+
front_pad = bs->front_pad;
inline_vecs = BIO_INLINE_VECS;
}

if (unlikely(!p))
- return NULL;
+ goto err;

bio = p + front_pad;
bio_init(bio);
@@ -336,6 +377,19 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)

err_free:
mempool_free(p, bs->bio_pool);
+err:
+ if (gfp_mask != saved_gfp) {
+ gfp_mask = saved_gfp;
+
+ spin_lock(&bs->rescue_lock);
+ bio_list_merge(&bs->rescue_list, current->bio_list);
+ bio_list_init(current->bio_list);
+ spin_unlock(&bs->rescue_lock);
+
+ queue_work(bs->rescue_workqueue, &bs->rescue_work);
+ goto retry;
+ }
+
return NULL;
}
EXPORT_SYMBOL(bio_alloc_bioset);
@@ -1544,6 +1598,9 @@ static void biovec_free_pools(struct bio_set *bs)

void bioset_free(struct bio_set *bs)
{
+ if (bs->rescue_workqueue)
+ destroy_workqueue(bs->rescue_workqueue);
+
if (bs->bio_pool)
mempool_destroy(bs->bio_pool);

@@ -1579,6 +1636,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)

bs->front_pad = front_pad;

+ spin_lock_init(&bs->rescue_lock);
+ bio_list_init(&bs->rescue_list);
+ INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
+
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
if (!bs->bio_slab) {
kfree(bs);
@@ -1589,9 +1650,14 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
if (!bs->bio_pool)
goto bad;

- if (!biovec_create_pools(bs, pool_size))
- return bs;
+ if (biovec_create_pools(bs, pool_size))
+ goto bad;
+
+ bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
+ if (!bs->rescue_workqueue)
+ goto bad;

+ return bs;
bad:
bioset_free(bs);
return NULL;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b22c22b..ba5b52e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -290,39 +290,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
static inline void bio_disassociate_task(struct bio *bio) { }
#endif /* CONFIG_BLK_CGROUP */

-/*
- * bio_set is used to allow other portions of the IO system to
- * allocate their own private memory pools for bio and iovec structures.
- * These memory pools in turn all allocate from the bio_slab
- * and the bvec_slabs[].
- */
-#define BIO_POOL_SIZE 2
-#define BIOVEC_NR_POOLS 6
-#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
-
-struct bio_set {
- struct kmem_cache *bio_slab;
- unsigned int front_pad;
-
- mempool_t *bio_pool;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
- mempool_t *bio_integrity_pool;
-#endif
- mempool_t *bvec_pool;
-};
-
-struct biovec_slab {
- int nr_vecs;
- char *name;
- struct kmem_cache *slab;
-};
-
-/*
- * a small number of entries is fine, not going to be performance critical.
- * basically we just need to survive
- */
-#define BIO_SPLIT_ENTRIES 2
-
#ifdef CONFIG_HIGHMEM
/*
* remember never ever reenable interrupts between a bvec_kmap_irq and
@@ -497,6 +464,48 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
return bio;
}

+/*
+ * bio_set is used to allow other portions of the IO system to
+ * allocate their own private memory pools for bio and iovec structures.
+ * These memory pools in turn all allocate from the bio_slab
+ * and the bvec_slabs[].
+ */
+#define BIO_POOL_SIZE 2
+#define BIOVEC_NR_POOLS 6
+#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
+
+struct bio_set {
+ struct kmem_cache *bio_slab;
+ unsigned int front_pad;
+
+ mempool_t *bio_pool;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ mempool_t *bio_integrity_pool;
+#endif
+ mempool_t *bvec_pool;
+
+ /*
+ * Deadlock avoidance for stacking block drivers: see comments in
+ * bio_alloc_bioset() for details
+ */
+ spinlock_t rescue_lock;
+ struct bio_list rescue_list;
+ struct work_struct rescue_work;
+ struct workqueue_struct *rescue_workqueue;
+};
+
+struct biovec_slab {
+ int nr_vecs;
+ char *name;
+ struct kmem_cache *slab;
+};
+
+/*
+ * a small number of entries is fine, not going to be performance critical.
+ * basically we just need to survive
+ */
+#define BIO_SPLIT_ENTRIES 2
+
#if defined(CONFIG_BLK_DEV_INTEGRITY)

#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
--
1.7.12

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 08-28-2012, 05:37 PM
Kent Overstreet
 
Default block: Avoid deadlocks with bio allocation by stacking drivers

Previously, if we ever try to allocate more than once from the same bio
set while running under generic_make_request() (i.e. a stacking block
driver), we risk deadlock.

This is because of the code in generic_make_request() that converts
recursion to iteration; any bios we submit won't actually be submitted
(so they can complete and eventually be freed) until after we return -
this means if we allocate a second bio, we're blocking the first one
from ever being freed.

Thus if enough threads call into a stacking block driver at the same
time with bios that need multiple splits, and the bio_set's reserve gets
used up, we deadlock.

This can be worked around in the driver code - we could check if we're
running under generic_make_request(), then mask out __GFP_WAIT when we
go to allocate a bio, and if the allocation fails punt to workqueue and
retry the allocation.

But this is tricky and not a generic solution. This patch solves it for
all users by inverting the previously described technique. We allocate a
rescuer workqueue for each bio_set, and then in the allocation code if
there are bios on current->bio_list we would be blocking, we punt them
to the rescuer workqueue to be submitted.

Tested it by forcing the rescue codepath to be taken (by disabling the
first GFP_NOWAIT) attempt, and then ran it with bcache (which does a lot
of arbitrary bio splitting) and verified that the rescuer was being
invoked.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
CC: Jens Axboe <axboe@kernel.dk>
---
fs/bio.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++---
include/linux/bio.h | 9 +++++++
2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/fs/bio.c b/fs/bio.c
index 31e637a..5d46318 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -285,6 +285,23 @@ void bio_reset(struct bio *bio)
}
EXPORT_SYMBOL(bio_reset);

+static void bio_alloc_rescue(struct work_struct *work)
+{
+ struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
+ struct bio *bio;
+
+ while (1) {
+ spin_lock(&bs->rescue_lock);
+ bio = bio_list_pop(&bs->rescue_list);
+ spin_unlock(&bs->rescue_lock);
+
+ if (!bio)
+ break;
+
+ generic_make_request(bio);
+ }
+}
+
/**
* bio_alloc_bioset - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
@@ -307,6 +324,7 @@ EXPORT_SYMBOL(bio_reset);
*/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
+ gfp_t saved_gfp = gfp_mask;
unsigned front_pad;
unsigned inline_vecs;
unsigned long idx = BIO_POOL_NONE;
@@ -324,13 +342,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
front_pad = 0;
inline_vecs = nr_iovecs;
} else {
+ /*
+ * generic_make_request() converts recursion to iteration; this
+ * means if we're running beneath it, any bios we allocate and
+ * submit will not be submitted (and thus freed) until after we
+ * return.
+ *
+ * This exposes us to a potential deadlock if we allocate
+ * multiple bios from the same bio_set() while running
+ * underneath generic_make_request(). If we were to allocate
+ * multiple bios (say a stacking block driver that was splitting
+ * bios), we would deadlock if we exhausted the mempool's
+ * reserve.
+ *
+ * We solve this, and guarantee forward progress, with a rescuer
+ * workqueue per bio_set. If we go to allocate and there are
+ * bios on current->bio_list, we first try the allocation
+ * without __GFP_WAIT; if that fails, we punt those bios we
+ * would be blocking to the rescuer workqueue before we retry
+ * with the original gfp_flags.
+ */
+
+ if (current->bio_list && !bio_list_empty(current->bio_list))
+ gfp_mask &= ~__GFP_WAIT;
+retry:
p = mempool_alloc(bs->bio_pool, gfp_mask);
front_pad = bs->front_pad;
inline_vecs = BIO_INLINE_VECS;
}

if (unlikely(!p))
- return NULL;
+ goto err;

bio = p + front_pad;
bio_init(bio);
@@ -351,6 +393,19 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)

err_free:
mempool_free(p, bs->bio_pool);
+err:
+ if (gfp_mask != saved_gfp) {
+ gfp_mask = saved_gfp;
+
+ spin_lock(&bs->rescue_lock);
+ bio_list_merge(&bs->rescue_list, current->bio_list);
+ bio_list_init(current->bio_list);
+ spin_unlock(&bs->rescue_lock);
+
+ queue_work(bs->rescue_workqueue, &bs->rescue_work);
+ goto retry;
+ }
+
return NULL;
}
EXPORT_SYMBOL(bio_alloc_bioset);
@@ -1562,6 +1617,9 @@ static void biovec_free_pools(struct bio_set *bs)

void bioset_free(struct bio_set *bs)
{
+ if (bs->rescue_workqueue)
+ destroy_workqueue(bs->rescue_workqueue);
+
if (bs->bio_pool)
mempool_destroy(bs->bio_pool);

@@ -1597,6 +1655,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)

bs->front_pad = front_pad;

+ spin_lock_init(&bs->rescue_lock);
+ bio_list_init(&bs->rescue_list);
+ INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
+
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
if (!bs->bio_slab) {
kfree(bs);
@@ -1607,9 +1669,14 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
if (!bs->bio_pool)
goto bad;

- if (!biovec_create_pools(bs, pool_size))
- return bs;
+ if (biovec_create_pools(bs, pool_size))
+ goto bad;
+
+ bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
+ if (!bs->rescue_workqueue)
+ goto bad;

+ return bs;
bad:
bioset_free(bs);
return NULL;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3a8345e..84fdaac 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,6 +492,15 @@ struct bio_set {
mempool_t *bio_integrity_pool;
#endif
mempool_t *bvec_pool;
+
+ /*
+ * Deadlock avoidance for stacking block drivers: see comments in
+ * bio_alloc_bioset() for details
+ */
+ spinlock_t rescue_lock;
+ struct bio_list rescue_list;
+ struct work_struct rescue_work;
+ struct workqueue_struct *rescue_workqueue;
};

struct biovec_slab {
--
1.7.12

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 

Thread Tools




All times are GMT. The time now is 07:40 PM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org