UBUNTU: SAUCE: Improve Amazon EBS performance for EC2
OriginalAuthor: Amazona from Ben Howard <behoward@amazon.com>
BugLink: http://bugs.launchpad.net/bugs/634316 The pv-ops kernel suffers from poor performance when using Amazon's Elastic block storage (EBS). This patch from Amazon improves pv-ops kernel performance, and has not exhibited any regressions. Signed-off-by: John Johansen <john.johansen@canonical.com> --- drivers/block/xen-blkfront.c | 125 +++++++++++++++++++++++++++----------- include/xen/interface/io/blkif.h | 12 ++++ 2 files changed, 101 insertions(+), 36 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index cda9b5a..221028a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -69,7 +69,8 @@ struct blk_shadow { static const struct block_device_operations xlvbd_block_fops; -#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) +#define BLK_MAX_RING_AREA_SIZE (BLKIF_MAX_NUM_RING_PAGES * PAGE_SIZE) +#define BLK_MAX_RING_SIZE __RING_SIZE((struct blkif_sring *)0, BLK_MAX_RING_AREA_SIZE) /* * We have one of these per vbd, whether ide, scsi or 'other'. They @@ -83,14 +84,15 @@ struct blkfront_info int vdevice; blkif_vdev_t handle; enum blkif_state connected; - int ring_ref; + int num_ring_pages; + int ring_ref[BLKIF_MAX_NUM_RING_PAGES]; struct blkif_front_ring ring; struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_RING_SIZE]; + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; unsigned long shadow_free; int feature_barrier; int is_ready; @@ -104,8 +106,6 @@ struct blkfront_info static DEFINE_SPINLOCK(blkif_io_lock); -#define MAXIMUM_OUTSTANDING_BLOCK_REQS - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) #define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 @@ -124,7 +124,8 @@ static DEFINE_SPINLOCK(blkif_io_lock); static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; - BUG_ON(free >= BLK_RING_SIZE); + int ring_size = __RING_SIZE((struct blkif_sring *)0, info->num_ring_pages * PAGE_SIZE); + BUG_ON(free >= ring_size); info->shadow_free = info->shadow[free].req.id; info->shadow[free].req.id = 0x0fffffee; /* debug */ return free; @@ -496,6 +497,8 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { + int i; + /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); info->connected = suspend ? @@ -511,10 +514,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) flush_scheduled_work(); /* Free resources associated with old device channel. */ - if (info->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - info->ring_ref = GRANT_INVALID_REF; + for (i=0;i<info->num_ring_pages;i++) { + /* Free resources associated with old device channel. */ + if (info->ring_ref[i] != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref[i], 0, 0L); + info->ring_ref[i] = GRANT_INVALID_REF; + } + } + if (info->ring.sring) { + int ring_area_size = info->num_ring_pages * PAGE_SIZE; + free_pages((unsigned long)info->ring.sring, + get_order(ring_area_size )); info->ring.sring = NULL; } if (info->irq) @@ -607,27 +617,32 @@ static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { struct blkif_sring *sring; - int err; + int i, order, err; + int ring_area_size = info->num_ring_pages * PAGE_SIZE; - info->ring_ref = GRANT_INVALID_REF; + for (i=0;i<info->num_ring_pages; i++) { + info->ring_ref[i] = GRANT_INVALID_REF; + } - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + order = get_order(ring_area_size); + sring = (struct blkif_sring *)__get_free_pages(GFP_KERNEL, order); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - - sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); - if (err < 0) { - free_page((unsigned long)sring); - info->ring.sring = NULL; - goto fail; + FRONT_RING_INIT(&info->ring, sring, ring_area_size); + + for (i=0;i<info->num_ring_pages; i++) { + unsigned long addr = (unsigned long)info->ring.sring + i * PAGE_SIZE; + err = xenbus_grant_ring(dev, virt_to_mfn(addr)); + if (err < 0) { + free_pages((unsigned long)sring, order); + info->ring.sring = NULL; + goto fail; + } + info->ring_ref[i] = err; } - info->ring_ref = err; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) @@ -656,8 +671,14 @@ static int talk_to_backend(struct xenbus_device *dev, { const char *message = NULL; struct xenbus_transaction xbt; - int err; - + int err, i; + + BUILD_BUG_ON(BLKIF_MAX_NUM_RING_PAGES != 1 && + BLKIF_MAX_NUM_RING_PAGES != 2 && + BLKIF_MAX_NUM_RING_PAGES != 4 && + BLKIF_MAX_NUM_RING_PAGES != 8 && + BLKIF_MAX_NUM_RING_PAGES != 16); + /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, info); if (err) @@ -670,12 +691,31 @@ again: goto destroy_blkring; } - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", info->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; - } + if (info->num_ring_pages == 1) { + err = xenbus_printf(xbt, dev->nodename, + "ring-ref","%u", info->ring_ref[0]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } else { + err = xenbus_printf(xbt, dev->nodename, "num-ring-pages", "%u", + info->num_ring_pages); + if (err) { + message = "writing num-ring-pages"; + goto abort_transaction; + } + for (i=0;i<info->num_ring_pages;i++) { + char buf[16]; + snprintf(buf, sizeof(buf), "ring-ref%d", i); + err = xenbus_printf(xbt, dev->nodename, buf, "%u", + info->ring_ref[i]); + if (err) { + message = "writing ring-refs"; + goto abort_transaction; + } + } + } err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", info->evtchn); if (err) { @@ -723,6 +763,7 @@ static int blkfront_probe(struct xenbus_device *dev, { int err, vdevice, i; struct blkfront_info *info; + int ring_size, max_ring_pages; /* FIXME: Use dynamic device id if this is not set. */ err = xenbus_scanf(XBT_NIL, dev->nodename, @@ -736,6 +777,10 @@ static int blkfront_probe(struct xenbus_device *dev, return err; } } + err = xenbus_scanf(XBT_NIL, dev->otherend, + "max-ring-pages", "%u", &max_ring_pages ); + if (err != 1) + max_ring_pages = 1; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { @@ -748,9 +793,13 @@ static int blkfront_probe(struct xenbus_device *dev, info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) + info->num_ring_pages = min(max_ring_pages, BLKIF_MAX_NUM_RING_PAGES); + + ring_size = __RING_SIZE((struct blkif_sring *)0, + info->num_ring_pages * PAGE_SIZE); + for (i = 0; i < ring_size; i++) info->shadow[i].req.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + info->shadow[ring_size-1].req.id = 0x0fffffff; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); @@ -763,6 +812,9 @@ static int blkfront_probe(struct xenbus_device *dev, return err; } + printk(KERN_INFO "blkfront %s num-ring-pages %d nr_ents %d. ", + dev->nodename, info->num_ring_pages, ring_size); + return 0; } @@ -773,6 +825,7 @@ static int blkif_recover(struct blkfront_info *info) struct blkif_request *req; struct blk_shadow *copy; int j; + int ring_size = __RING_SIZE((struct blkif_sring *)0, info->num_ring_pages * PAGE_SIZE); /* Stage 1: Make a safe copy of the shadow state. */ copy = kmalloc(sizeof(info->shadow), @@ -783,13 +836,13 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < ring_size; i++) info->shadow[i].req.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + info->shadow[ring_size-1].req.id = 0x0fffffff; /* Stage 3: Find pending requests and requeue them. */ - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < ring_size; i++) { /* Not in use? */ if (copy[i].request == 0) continue; diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index c2d1fa4..f7837ca 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -87,6 +87,18 @@ struct blkif_response { DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); +/* + * Maximum number of pages used for a blkif ring + * max-ring-pages advertised by blkback to blkfront may be lowered at blkback + * mod load time. Load time param set to default. + */ +#define BLKIF_MAX_NUM_RING_PAGES 16 +#define BLKIF_MAX_NUM_RING_PAGES_DFLT 4 +#if BLKIF_MAX_NUM_RING_PAGES < BLKIF_MAX_NUM_RING_PAGES_DFLT +#undef BLKIF_MAX_NUM_RING_PAGES_DFLT +#define BLKIF_MAX_NUM_RING_PAGES_DFLT BLKIF_MAX_NUM_RING_PAGES +#endif + #define VDISK_CDROM 0x1 #define VDISK_REMOVABLE 0x2 #define VDISK_READONLY 0x4 -- 1.7.1 -- kernel-team mailing list kernel-team@lists.ubuntu.com https://lists.ubuntu.com/mailman/listinfo/kernel-team |
| All times are GMT. The time now is 04:25 AM. |
VBulletin, Copyright ©2000 - 2013, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.