Linux Archive

Linux Archive (http://www.linux-archive.org/)
-   Device-mapper Development (http://www.linux-archive.org/device-mapper-development/)
-   -   block: add partition resize function to blkpg ioctl (http://www.linux-archive.org/device-mapper-development/632486-block-add-partition-resize-function-blkpg-ioctl.html)

Vivek Goyal 02-13-2012 06:30 PM

block: add partition resize function to blkpg ioctl
 
Add a new operation code ( BLKPG_RESIZE_PARTITION ) to the
BLKPG ioctl that allows altering the size of an existing
partition, even if it is currently in use.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
block/genhd.c | 20 ++++++++++++----
block/ioctl.c | 51 +++++++++++++++++++++++++++++++++++++--
block/partition-generic.c | 4 ++-
include/linux/blkpg.h | 1 +
include/linux/genhd.h | 57 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 23b4f70..935e09b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -153,7 +153,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
- if (!part->nr_sects &&
+ if (!part_nr_sects_read(part) &&
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
piter->idx == 0))
@@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
return part->start_sect <= sector &&
- sector < part->start_sect + part->nr_sects;
+ sector < part->start_sect + part_nr_sects_read(part);
}

/**
@@ -765,8 +765,8 @@ void __init printk_all_partitions(void)

printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
bdevt_str(part_devt(part), devt_buf),
- (unsigned long long)part->nr_sects >> 1,
- disk_name(disk, part->partno, name_buf), uuid);
+ (unsigned long long)part_nr_sects_read(part) >> 1
+ , disk_name(disk, part->partno, name_buf), uuid);
if (is_part0) {
if (disk->driverfs_dev != NULL &&
disk->driverfs_dev->driver != NULL)
@@ -857,7 +857,7 @@ static int show_partition(struct seq_file *seqf, void *v)
while ((part = disk_part_iter_next(&piter)))
seq_printf(seqf, "%4d %7d %10llu %s
",
MAJOR(part_devt(part)), MINOR(part_devt(part)),
- (unsigned long long)part->nr_sects >> 1,
+ (unsigned long long)part_nr_sects_read(part) >> 1,
disk_name(sgp, part->partno, buf));
disk_part_iter_exit(&piter);

@@ -1263,6 +1263,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
}
disk->part_tbl->part[0] = &disk->part0;

+ /*
+ * set_capacity() and get_capacity() currently don't use
+ * seqcounter to read/update the part0->nr_sects. Still init
+ * the counter as we can read the sectors in IO submission
+ * patch using seqence counters.
+ *
+ * TODO: Ideally set_capacity() and get_capacity() should be
+ * converted to make use of bd_mutex and sequence counters.
+ */
+ seqcount_init(&disk->part0.nr_sects_seq);
hd_ref_init(&disk->part0);

disk->minors = minors;
diff --git a/block/ioctl.c b/block/ioctl.c
index ba15b2d..57d99b2 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
{
struct block_device *bdevp;
struct gendisk *disk;
- struct hd_struct *part;
+ struct hd_struct *part, *lpart;
struct blkpg_ioctl_arg a;
struct blkpg_partition p;
struct disk_part_iter piter;
@@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
case BLKPG_ADD_PARTITION:
start = p.start >> 9;
length = p.length >> 9;
- /* check for fit in a hd_struct */
- if (sizeof(sector_t) == sizeof(long) &&
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long) &&
sizeof(long long) > sizeof(long)) {
long pstart = start, plength = length;
if (pstart != start || plength != length
@@ -92,6 +92,51 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
bdput(bdevp);

return 0;
+ case BLKPG_RESIZE_PARTITION:
+ /* new length of partition in bytes */
+ length = p.length >> 9;
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long) &&
+ sizeof(long long) > sizeof(long)) {
+ long plength = length;
+ if (plength != length || plength < 0)
+ return -EINVAL;
+ }
+ part = disk_get_part(disk, partno);
+ if (!part)
+ return -ENXIO;
+ bdevp = bdget(part_devt(part));
+ if (!bdevp) {
+ disk_put_part(part);
+ return -ENOMEM;
+ }
+ mutex_lock(&bdevp->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, 1);
+ start = part->start_sect;
+
+ /* overlap? */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY);
+ while ((lpart = disk_part_iter_next(&piter))) {
+ if (lpart->partno != partno &&
+ !(start + length <= lpart->start_sect ||
+ start >= lpart->start_sect + lpart->nr_sects)
+ ) {
+ disk_part_iter_exit(&piter);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ disk_put_part(part);
+ return -EBUSY;
+ }
+ }
+ disk_part_iter_exit(&piter);
+ part_nr_sects_write(part, (sector_t)length);
+ i_size_write(bdevp->bd_inode, p.length);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return 0;
default:
return -EINVAL;
}
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d06ec1c..363a6f6 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%llu
",(unsigned long long)p->nr_sects);
+ return sprintf(buf, "%llu
",(unsigned long long)part_nr_sects_read(p));
}

static ssize_t part_ro_show(struct device *dev,
@@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
err = -ENOMEM;
goto out_free;
}
+
+ seqcount_init(&p->nr_sects_seq);
pdev = part_to_dev(p);

p->start_sect = start;
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
index faf8a45..a851944 100644
--- a/include/linux/blkpg.h
+++ b/include/linux/blkpg.h
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
/* The subfunctions (for the op field) */
#define BLKPG_ADD_PARTITION 1
#define BLKPG_DEL_PARTITION 2
+#define BLKPG_RESIZE_PARTITION 3

/* Sizes of name fields. Unused at present. */
#define BLKPG_DEVNAMELTH 64
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index fe23ee7..0def3ef 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,13 @@ struct partition_meta_info {

struct hd_struct {
sector_t start_sect;
+ /*
+ * nr_sects is protected by sequence counter. One might extend a
+ * partition while IO is happening to it and update of nr_sects
+ * can be non-atomic on 32bit machines with 64bit sector_t.
+ */
sector_t nr_sects;
+ seqcount_t nr_sects_seq;
sector_t alignment_offset;
unsigned int discard_alignment;
struct device __dev;
@@ -653,6 +659,57 @@ static inline void hd_struct_put(struct hd_struct *part)
__delete_partition(part);
}

+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+ sector_t nr_sects;
+ unsigned seq;
+ do {
+ seq = read_seqcount_begin(&part->nr_sects_seq);
+ nr_sects = part->nr_sects;
+ } while (read_seqcount_retry(&part->nr_sects_seq, seq));
+ return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+ sector_t nr_sects;
+
+ preempt_disable();
+ nr_sects = part->nr_sects;
+ preempt_enable();
+ return nr_sects;
+#else
+ return part->nr_sects;
+#endif
+}
+
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+ write_seqcount_begin(&part->nr_sects_seq);
+ part->nr_sects = size;
+ write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ part->nr_sects = size;
+ preempt_enable();
+#else
+ part->nr_sects = size;
+#endif
+}
+
#else /* CONFIG_BLOCK */

static inline void printk_all_partitions(void) { }
--
1.7.6.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Vivek Goyal 02-14-2012 07:39 PM

block: add partition resize function to blkpg ioctl
 
Add a new operation code ( BLKPG_RESIZE_PARTITION ) to the
BLKPG ioctl that allows altering the size of an existing
partition, even if it is currently in use.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
block/genhd.c | 20 ++++++++++++----
block/ioctl.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
block/partition-generic.c | 4 ++-
include/linux/blkpg.h | 1 +
include/linux/genhd.h | 57 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 130 insertions(+), 9 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 23b4f70..935e09b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -153,7 +153,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
- if (!part->nr_sects &&
+ if (!part_nr_sects_read(part) &&
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
piter->idx == 0))
@@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
return part->start_sect <= sector &&
- sector < part->start_sect + part->nr_sects;
+ sector < part->start_sect + part_nr_sects_read(part);
}

/**
@@ -765,8 +765,8 @@ void __init printk_all_partitions(void)

printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
bdevt_str(part_devt(part), devt_buf),
- (unsigned long long)part->nr_sects >> 1,
- disk_name(disk, part->partno, name_buf), uuid);
+ (unsigned long long)part_nr_sects_read(part) >> 1
+ , disk_name(disk, part->partno, name_buf), uuid);
if (is_part0) {
if (disk->driverfs_dev != NULL &&
disk->driverfs_dev->driver != NULL)
@@ -857,7 +857,7 @@ static int show_partition(struct seq_file *seqf, void *v)
while ((part = disk_part_iter_next(&piter)))
seq_printf(seqf, "%4d %7d %10llu %s
",
MAJOR(part_devt(part)), MINOR(part_devt(part)),
- (unsigned long long)part->nr_sects >> 1,
+ (unsigned long long)part_nr_sects_read(part) >> 1,
disk_name(sgp, part->partno, buf));
disk_part_iter_exit(&piter);

@@ -1263,6 +1263,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
}
disk->part_tbl->part[0] = &disk->part0;

+ /*
+ * set_capacity() and get_capacity() currently don't use
+ * seqcounter to read/update the part0->nr_sects. Still init
+ * the counter as we can read the sectors in IO submission
+ * patch using seqence counters.
+ *
+ * TODO: Ideally set_capacity() and get_capacity() should be
+ * converted to make use of bd_mutex and sequence counters.
+ */
+ seqcount_init(&disk->part0.nr_sects_seq);
hd_ref_init(&disk->part0);

disk->minors = minors;
diff --git a/block/ioctl.c b/block/ioctl.c
index ba15b2d..ddbc649 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
{
struct block_device *bdevp;
struct gendisk *disk;
- struct hd_struct *part;
+ struct hd_struct *part, *lpart;
struct blkpg_ioctl_arg a;
struct blkpg_partition p;
struct disk_part_iter piter;
@@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
case BLKPG_ADD_PARTITION:
start = p.start >> 9;
length = p.length >> 9;
- /* check for fit in a hd_struct */
- if (sizeof(sector_t) == sizeof(long) &&
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long) &&
sizeof(long long) > sizeof(long)) {
long pstart = start, plength = length;
if (pstart != start || plength != length
@@ -92,6 +92,57 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
bdput(bdevp);

return 0;
+ case BLKPG_RESIZE_PARTITION:
+ start = p.start >> 9;
+ /* new length of partition in bytes */
+ length = p.length >> 9;
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long) &&
+ sizeof(long long) > sizeof(long)) {
+ long pstart = start, plength = length;
+ if (pstart != start || plength != length
+ || pstart < 0 || plength < 0)
+ return -EINVAL;
+ }
+ part = disk_get_part(disk, partno);
+ if (!part)
+ return -ENXIO;
+ bdevp = bdget(part_devt(part));
+ if (!bdevp) {
+ disk_put_part(part);
+ return -ENOMEM;
+ }
+ mutex_lock(&bdevp->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, 1);
+ if (start != part->start_sect) {
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ disk_put_part(part);
+ return -EINVAL;
+ }
+ /* overlap? */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY);
+ while ((lpart = disk_part_iter_next(&piter))) {
+ if (lpart->partno != partno &&
+ !(start + length <= lpart->start_sect ||
+ start >= lpart->start_sect + lpart->nr_sects)
+ ) {
+ disk_part_iter_exit(&piter);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ disk_put_part(part);
+ return -EBUSY;
+ }
+ }
+ disk_part_iter_exit(&piter);
+ part_nr_sects_write(part, (sector_t)length);
+ i_size_write(bdevp->bd_inode, p.length);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return 0;
default:
return -EINVAL;
}
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d06ec1c..363a6f6 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%llu
",(unsigned long long)p->nr_sects);
+ return sprintf(buf, "%llu
",(unsigned long long)part_nr_sects_read(p));
}

static ssize_t part_ro_show(struct device *dev,
@@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
err = -ENOMEM;
goto out_free;
}
+
+ seqcount_init(&p->nr_sects_seq);
pdev = part_to_dev(p);

p->start_sect = start;
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
index faf8a45..a851944 100644
--- a/include/linux/blkpg.h
+++ b/include/linux/blkpg.h
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
/* The subfunctions (for the op field) */
#define BLKPG_ADD_PARTITION 1
#define BLKPG_DEL_PARTITION 2
+#define BLKPG_RESIZE_PARTITION 3

/* Sizes of name fields. Unused at present. */
#define BLKPG_DEVNAMELTH 64
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index fe23ee7..0def3ef 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,13 @@ struct partition_meta_info {

struct hd_struct {
sector_t start_sect;
+ /*
+ * nr_sects is protected by sequence counter. One might extend a
+ * partition while IO is happening to it and update of nr_sects
+ * can be non-atomic on 32bit machines with 64bit sector_t.
+ */
sector_t nr_sects;
+ seqcount_t nr_sects_seq;
sector_t alignment_offset;
unsigned int discard_alignment;
struct device __dev;
@@ -653,6 +659,57 @@ static inline void hd_struct_put(struct hd_struct *part)
__delete_partition(part);
}

+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+ sector_t nr_sects;
+ unsigned seq;
+ do {
+ seq = read_seqcount_begin(&part->nr_sects_seq);
+ nr_sects = part->nr_sects;
+ } while (read_seqcount_retry(&part->nr_sects_seq, seq));
+ return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+ sector_t nr_sects;
+
+ preempt_disable();
+ nr_sects = part->nr_sects;
+ preempt_enable();
+ return nr_sects;
+#else
+ return part->nr_sects;
+#endif
+}
+
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+ write_seqcount_begin(&part->nr_sects_seq);
+ part->nr_sects = size;
+ write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ part->nr_sects = size;
+ preempt_enable();
+#else
+ part->nr_sects = size;
+#endif
+}
+
#else /* CONFIG_BLOCK */

static inline void printk_all_partitions(void) { }
--
1.7.6.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Vivek Goyal 02-20-2012 01:42 PM

block: add partition resize function to blkpg ioctl
 
On Tue, Feb 14, 2012 at 03:39:50PM -0500, Vivek Goyal wrote:
> Add a new operation code ( BLKPG_RESIZE_PARTITION ) to the
> BLKPG ioctl that allows altering the size of an existing
> partition, even if it is currently in use.

Hi Phillip,

Are you ok with the change?

Thanks
Vivek

>
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
> block/genhd.c | 20 ++++++++++++----
> block/ioctl.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
> block/partition-generic.c | 4 ++-
> include/linux/blkpg.h | 1 +
> include/linux/genhd.h | 57 +++++++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 130 insertions(+), 9 deletions(-)
>
> diff --git a/block/genhd.c b/block/genhd.c
> index 23b4f70..935e09b 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -153,7 +153,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
> part = rcu_dereference(ptbl->part[piter->idx]);
> if (!part)
> continue;
> - if (!part->nr_sects &&
> + if (!part_nr_sects_read(part) &&
> !(piter->flags & DISK_PITER_INCL_EMPTY) &&
> !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
> piter->idx == 0))
> @@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
> static inline int sector_in_part(struct hd_struct *part, sector_t sector)
> {
> return part->start_sect <= sector &&
> - sector < part->start_sect + part->nr_sects;
> + sector < part->start_sect + part_nr_sects_read(part);
> }
>
> /**
> @@ -765,8 +765,8 @@ void __init printk_all_partitions(void)
>
> printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
> bdevt_str(part_devt(part), devt_buf),
> - (unsigned long long)part->nr_sects >> 1,
> - disk_name(disk, part->partno, name_buf), uuid);
> + (unsigned long long)part_nr_sects_read(part) >> 1
> + , disk_name(disk, part->partno, name_buf), uuid);
> if (is_part0) {
> if (disk->driverfs_dev != NULL &&
> disk->driverfs_dev->driver != NULL)
> @@ -857,7 +857,7 @@ static int show_partition(struct seq_file *seqf, void *v)
> while ((part = disk_part_iter_next(&piter)))
> seq_printf(seqf, "%4d %7d %10llu %s
",
> MAJOR(part_devt(part)), MINOR(part_devt(part)),
> - (unsigned long long)part->nr_sects >> 1,
> + (unsigned long long)part_nr_sects_read(part) >> 1,
> disk_name(sgp, part->partno, buf));
> disk_part_iter_exit(&piter);
>
> @@ -1263,6 +1263,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
> }
> disk->part_tbl->part[0] = &disk->part0;
>
> + /*
> + * set_capacity() and get_capacity() currently don't use
> + * seqcounter to read/update the part0->nr_sects. Still init
> + * the counter as we can read the sectors in IO submission
> + * patch using seqence counters.
> + *
> + * TODO: Ideally set_capacity() and get_capacity() should be
> + * converted to make use of bd_mutex and sequence counters.
> + */
> + seqcount_init(&disk->part0.nr_sects_seq);
> hd_ref_init(&disk->part0);
>
> disk->minors = minors;
> diff --git a/block/ioctl.c b/block/ioctl.c
> index ba15b2d..ddbc649 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> {
> struct block_device *bdevp;
> struct gendisk *disk;
> - struct hd_struct *part;
> + struct hd_struct *part, *lpart;
> struct blkpg_ioctl_arg a;
> struct blkpg_partition p;
> struct disk_part_iter piter;
> @@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> case BLKPG_ADD_PARTITION:
> start = p.start >> 9;
> length = p.length >> 9;
> - /* check for fit in a hd_struct */
> - if (sizeof(sector_t) == sizeof(long) &&
> + /* check for fit in a hd_struct */
> + if (sizeof(sector_t) == sizeof(long) &&
> sizeof(long long) > sizeof(long)) {
> long pstart = start, plength = length;
> if (pstart != start || plength != length
> @@ -92,6 +92,57 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> bdput(bdevp);
>
> return 0;
> + case BLKPG_RESIZE_PARTITION:
> + start = p.start >> 9;
> + /* new length of partition in bytes */
> + length = p.length >> 9;
> + /* check for fit in a hd_struct */
> + if (sizeof(sector_t) == sizeof(long) &&
> + sizeof(long long) > sizeof(long)) {
> + long pstart = start, plength = length;
> + if (pstart != start || plength != length
> + || pstart < 0 || plength < 0)
> + return -EINVAL;
> + }
> + part = disk_get_part(disk, partno);
> + if (!part)
> + return -ENXIO;
> + bdevp = bdget(part_devt(part));
> + if (!bdevp) {
> + disk_put_part(part);
> + return -ENOMEM;
> + }
> + mutex_lock(&bdevp->bd_mutex);
> + mutex_lock_nested(&bdev->bd_mutex, 1);
> + if (start != part->start_sect) {
> + mutex_unlock(&bdevp->bd_mutex);
> + mutex_unlock(&bdev->bd_mutex);
> + disk_put_part(part);
> + return -EINVAL;
> + }
> + /* overlap? */
> + disk_part_iter_init(&piter, disk,
> + DISK_PITER_INCL_EMPTY);
> + while ((lpart = disk_part_iter_next(&piter))) {
> + if (lpart->partno != partno &&
> + !(start + length <= lpart->start_sect ||
> + start >= lpart->start_sect + lpart->nr_sects)
> + ) {
> + disk_part_iter_exit(&piter);
> + mutex_unlock(&bdevp->bd_mutex);
> + mutex_unlock(&bdev->bd_mutex);
> + disk_put_part(part);
> + return -EBUSY;
> + }
> + }
> + disk_part_iter_exit(&piter);
> + part_nr_sects_write(part, (sector_t)length);
> + i_size_write(bdevp->bd_inode, p.length);
> + mutex_unlock(&bdevp->bd_mutex);
> + mutex_unlock(&bdev->bd_mutex);
> + bdput(bdevp);
> + disk_put_part(part);
> + return 0;
> default:
> return -EINVAL;
> }
> diff --git a/block/partition-generic.c b/block/partition-generic.c
> index d06ec1c..363a6f6 100644
> --- a/block/partition-generic.c
> +++ b/block/partition-generic.c
> @@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> {
> struct hd_struct *p = dev_to_part(dev);
> - return sprintf(buf, "%llu
",(unsigned long long)p->nr_sects);
> + return sprintf(buf, "%llu
",(unsigned long long)part_nr_sects_read(p));
> }
>
> static ssize_t part_ro_show(struct device *dev,
> @@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
> err = -ENOMEM;
> goto out_free;
> }
> +
> + seqcount_init(&p->nr_sects_seq);
> pdev = part_to_dev(p);
>
> p->start_sect = start;
> diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
> index faf8a45..a851944 100644
> --- a/include/linux/blkpg.h
> +++ b/include/linux/blkpg.h
> @@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
> /* The subfunctions (for the op field) */
> #define BLKPG_ADD_PARTITION 1
> #define BLKPG_DEL_PARTITION 2
> +#define BLKPG_RESIZE_PARTITION 3
>
> /* Sizes of name fields. Unused at present. */
> #define BLKPG_DEVNAMELTH 64
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index fe23ee7..0def3ef 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -98,7 +98,13 @@ struct partition_meta_info {
>
> struct hd_struct {
> sector_t start_sect;
> + /*
> + * nr_sects is protected by sequence counter. One might extend a
> + * partition while IO is happening to it and update of nr_sects
> + * can be non-atomic on 32bit machines with 64bit sector_t.
> + */
> sector_t nr_sects;
> + seqcount_t nr_sects_seq;
> sector_t alignment_offset;
> unsigned int discard_alignment;
> struct device __dev;
> @@ -653,6 +659,57 @@ static inline void hd_struct_put(struct hd_struct *part)
> __delete_partition(part);
> }
>
> +/*
> + * Any access of part->nr_sects which is not protected by partition
> + * bd_mutex or gendisk bdev bd_mutex, should be done using this
> + * accessor function.
> + *
> + * Code written along the lines of i_size_read() and i_size_write().
> + * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
> + * on.
> + */
> +static inline sector_t part_nr_sects_read(struct hd_struct *part)
> +{
> +#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
> + sector_t nr_sects;
> + unsigned seq;
> + do {
> + seq = read_seqcount_begin(&part->nr_sects_seq);
> + nr_sects = part->nr_sects;
> + } while (read_seqcount_retry(&part->nr_sects_seq, seq));
> + return nr_sects;
> +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
> + sector_t nr_sects;
> +
> + preempt_disable();
> + nr_sects = part->nr_sects;
> + preempt_enable();
> + return nr_sects;
> +#else
> + return part->nr_sects;
> +#endif
> +}
> +
> +/*
> + * Should be called with mutex lock held (typically bd_mutex) of partition
> + * to provide mutual exlusion among writers otherwise seqcount might be
> + * left in wrong state leaving the readers spinning infinitely.
> + */
> +static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
> +{
> +#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
> + write_seqcount_begin(&part->nr_sects_seq);
> + part->nr_sects = size;
> + write_seqcount_end(&part->nr_sects_seq);
> +#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
> + preempt_disable();
> + part->nr_sects = size;
> + preempt_enable();
> +#else
> + part->nr_sects = size;
> +#endif
> +}
> +
> #else /* CONFIG_BLOCK */
>
> static inline void printk_all_partitions(void) { }
> --
> 1.7.6.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Phillip Susi 02-20-2012 02:17 PM

block: add partition resize function to blkpg ioctl
 
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 2/20/2012 9:42 AM, Vivek Goyal wrote:
> On Tue, Feb 14, 2012 at 03:39:50PM -0500, Vivek Goyal wrote:
>> Add a new operation code ( BLKPG_RESIZE_PARTITION ) to the BLKPG
>> ioctl that allows altering the size of an existing partition,
>> even if it is currently in use.
>
> Hi Phillip,
>
> Are you ok with the change?

Yes.
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.17 (MingW32)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJPQmQWAAoJEJrBOlT6nu75V8YH/0khEQy1sCiDmjqaVAqJGn3R
xjS7eCU6ndgGj3CHE+j4AJCTJ6cdrcaheEF/QWd3f2GxI0kQ6bBiWzxMeEXaSA57
VQOyKqoGalaWA78a3xFR1oax8ZwOAQi7LdyttdvzXUTKDXrO57 cAAIHGQcLTPFiv
a926d27kMulNQzXvjhBj/h8LAOeUVFIEbrGk5QxOw28gdStEv/RtMKeSvuq3e4qu
/TxNT78K49HBaWuhTZJB4Mg7ttyBTQJrQr5c23oo9KLSUgd+3Zv aHF53vUsP7IMm
Kbor7u648P0Xo2gaWGXPF5z4hmyhO08/1SNFKAKw0CSycpOlwjsbel/Ys57gIAY=
=oTDA
-----END PGP SIGNATURE-----

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Vivek Goyal 02-20-2012 02:28 PM

block: add partition resize function to blkpg ioctl
 
On Tue, Feb 14, 2012 at 03:39:50PM -0500, Vivek Goyal wrote:
> Add a new operation code ( BLKPG_RESIZE_PARTITION ) to the
> BLKPG ioctl that allows altering the size of an existing
> partition, even if it is currently in use.
>
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>

Hi Jens,

Can you please consider this patch for inclusion. One of our customer
does want to be able to grow partitions without having to reboot
the system.

Thanks
Vivek

> ---
> block/genhd.c | 20 ++++++++++++----
> block/ioctl.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
> block/partition-generic.c | 4 ++-
> include/linux/blkpg.h | 1 +
> include/linux/genhd.h | 57 +++++++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 130 insertions(+), 9 deletions(-)
>
> diff --git a/block/genhd.c b/block/genhd.c
> index 23b4f70..935e09b 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -153,7 +153,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
> part = rcu_dereference(ptbl->part[piter->idx]);
> if (!part)
> continue;
> - if (!part->nr_sects &&
> + if (!part_nr_sects_read(part) &&
> !(piter->flags & DISK_PITER_INCL_EMPTY) &&
> !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
> piter->idx == 0))
> @@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
> static inline int sector_in_part(struct hd_struct *part, sector_t sector)
> {
> return part->start_sect <= sector &&
> - sector < part->start_sect + part->nr_sects;
> + sector < part->start_sect + part_nr_sects_read(part);
> }
>
> /**
> @@ -765,8 +765,8 @@ void __init printk_all_partitions(void)
>
> printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
> bdevt_str(part_devt(part), devt_buf),
> - (unsigned long long)part->nr_sects >> 1,
> - disk_name(disk, part->partno, name_buf), uuid);
> + (unsigned long long)part_nr_sects_read(part) >> 1
> + , disk_name(disk, part->partno, name_buf), uuid);
> if (is_part0) {
> if (disk->driverfs_dev != NULL &&
> disk->driverfs_dev->driver != NULL)
> @@ -857,7 +857,7 @@ static int show_partition(struct seq_file *seqf, void *v)
> while ((part = disk_part_iter_next(&piter)))
> seq_printf(seqf, "%4d %7d %10llu %s
",
> MAJOR(part_devt(part)), MINOR(part_devt(part)),
> - (unsigned long long)part->nr_sects >> 1,
> + (unsigned long long)part_nr_sects_read(part) >> 1,
> disk_name(sgp, part->partno, buf));
> disk_part_iter_exit(&piter);
>
> @@ -1263,6 +1263,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
> }
> disk->part_tbl->part[0] = &disk->part0;
>
> + /*
> + * set_capacity() and get_capacity() currently don't use
> + * seqcounter to read/update the part0->nr_sects. Still init
> + * the counter as we can read the sectors in IO submission
> + * patch using seqence counters.
> + *
> + * TODO: Ideally set_capacity() and get_capacity() should be
> + * converted to make use of bd_mutex and sequence counters.
> + */
> + seqcount_init(&disk->part0.nr_sects_seq);
> hd_ref_init(&disk->part0);
>
> disk->minors = minors;
> diff --git a/block/ioctl.c b/block/ioctl.c
> index ba15b2d..ddbc649 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> {
> struct block_device *bdevp;
> struct gendisk *disk;
> - struct hd_struct *part;
> + struct hd_struct *part, *lpart;
> struct blkpg_ioctl_arg a;
> struct blkpg_partition p;
> struct disk_part_iter piter;
> @@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> case BLKPG_ADD_PARTITION:
> start = p.start >> 9;
> length = p.length >> 9;
> - /* check for fit in a hd_struct */
> - if (sizeof(sector_t) == sizeof(long) &&
> + /* check for fit in a hd_struct */
> + if (sizeof(sector_t) == sizeof(long) &&
> sizeof(long long) > sizeof(long)) {
> long pstart = start, plength = length;
> if (pstart != start || plength != length
> @@ -92,6 +92,57 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> bdput(bdevp);
>
> return 0;
> + case BLKPG_RESIZE_PARTITION:
> + start = p.start >> 9;
> + /* new length of partition in bytes */
> + length = p.length >> 9;
> + /* check for fit in a hd_struct */
> + if (sizeof(sector_t) == sizeof(long) &&
> + sizeof(long long) > sizeof(long)) {
> + long pstart = start, plength = length;
> + if (pstart != start || plength != length
> + || pstart < 0 || plength < 0)
> + return -EINVAL;
> + }
> + part = disk_get_part(disk, partno);
> + if (!part)
> + return -ENXIO;
> + bdevp = bdget(part_devt(part));
> + if (!bdevp) {
> + disk_put_part(part);
> + return -ENOMEM;
> + }
> + mutex_lock(&bdevp->bd_mutex);
> + mutex_lock_nested(&bdev->bd_mutex, 1);
> + if (start != part->start_sect) {
> + mutex_unlock(&bdevp->bd_mutex);
> + mutex_unlock(&bdev->bd_mutex);
> + disk_put_part(part);
> + return -EINVAL;
> + }
> + /* overlap? */
> + disk_part_iter_init(&piter, disk,
> + DISK_PITER_INCL_EMPTY);
> + while ((lpart = disk_part_iter_next(&piter))) {
> + if (lpart->partno != partno &&
> + !(start + length <= lpart->start_sect ||
> + start >= lpart->start_sect + lpart->nr_sects)
> + ) {
> + disk_part_iter_exit(&piter);
> + mutex_unlock(&bdevp->bd_mutex);
> + mutex_unlock(&bdev->bd_mutex);
> + disk_put_part(part);
> + return -EBUSY;
> + }
> + }
> + disk_part_iter_exit(&piter);
> + part_nr_sects_write(part, (sector_t)length);
> + i_size_write(bdevp->bd_inode, p.length);
> + mutex_unlock(&bdevp->bd_mutex);
> + mutex_unlock(&bdev->bd_mutex);
> + bdput(bdevp);
> + disk_put_part(part);
> + return 0;
> default:
> return -EINVAL;
> }
> diff --git a/block/partition-generic.c b/block/partition-generic.c
> index d06ec1c..363a6f6 100644
> --- a/block/partition-generic.c
> +++ b/block/partition-generic.c
> @@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> {
> struct hd_struct *p = dev_to_part(dev);
> - return sprintf(buf, "%llu
",(unsigned long long)p->nr_sects);
> + return sprintf(buf, "%llu
",(unsigned long long)part_nr_sects_read(p));
> }
>
> static ssize_t part_ro_show(struct device *dev,
> @@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
> err = -ENOMEM;
> goto out_free;
> }
> +
> + seqcount_init(&p->nr_sects_seq);
> pdev = part_to_dev(p);
>
> p->start_sect = start;
> diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
> index faf8a45..a851944 100644
> --- a/include/linux/blkpg.h
> +++ b/include/linux/blkpg.h
> @@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
> /* The subfunctions (for the op field) */
> #define BLKPG_ADD_PARTITION 1
> #define BLKPG_DEL_PARTITION 2
> +#define BLKPG_RESIZE_PARTITION 3
>
> /* Sizes of name fields. Unused at present. */
> #define BLKPG_DEVNAMELTH 64
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index fe23ee7..0def3ef 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -98,7 +98,13 @@ struct partition_meta_info {
>
> struct hd_struct {
> sector_t start_sect;
> + /*
> + * nr_sects is protected by sequence counter. One might extend a
> + * partition while IO is happening to it and update of nr_sects
> + * can be non-atomic on 32bit machines with 64bit sector_t.
> + */
> sector_t nr_sects;
> + seqcount_t nr_sects_seq;
> sector_t alignment_offset;
> unsigned int discard_alignment;
> struct device __dev;
> @@ -653,6 +659,57 @@ static inline void hd_struct_put(struct hd_struct *part)
> __delete_partition(part);
> }
>
> +/*
> + * Any access of part->nr_sects which is not protected by partition
> + * bd_mutex or gendisk bdev bd_mutex, should be done using this
> + * accessor function.
> + *
> + * Code written along the lines of i_size_read() and i_size_write().
> + * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
> + * on.
> + */
> +static inline sector_t part_nr_sects_read(struct hd_struct *part)
> +{
> +#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
> + sector_t nr_sects;
> + unsigned seq;
> + do {
> + seq = read_seqcount_begin(&part->nr_sects_seq);
> + nr_sects = part->nr_sects;
> + } while (read_seqcount_retry(&part->nr_sects_seq, seq));
> + return nr_sects;
> +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
> + sector_t nr_sects;
> +
> + preempt_disable();
> + nr_sects = part->nr_sects;
> + preempt_enable();
> + return nr_sects;
> +#else
> + return part->nr_sects;
> +#endif
> +}
> +
> +/*
> + * Should be called with mutex lock held (typically bd_mutex) of partition
> + * to provide mutual exlusion among writers otherwise seqcount might be
> + * left in wrong state leaving the readers spinning infinitely.
> + */
> +static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
> +{
> +#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
> + write_seqcount_begin(&part->nr_sects_seq);
> + part->nr_sects = size;
> + write_seqcount_end(&part->nr_sects_seq);
> +#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
> + preempt_disable();
> + part->nr_sects = size;
> + preempt_enable();
> +#else
> + part->nr_sects = size;
> +#endif
> +}
> +
> #else /* CONFIG_BLOCK */
>
> static inline void printk_all_partitions(void) { }
> --
> 1.7.6.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Vivek Goyal 03-02-2012 05:54 PM

block: add partition resize function to blkpg ioctl
 
On Mon, Feb 20, 2012 at 10:28:49AM -0500, Vivek Goyal wrote:
> On Tue, Feb 14, 2012 at 03:39:50PM -0500, Vivek Goyal wrote:
> > Add a new operation code ( BLKPG_RESIZE_PARTITION ) to the
> > BLKPG ioctl that allows altering the size of an existing
> > partition, even if it is currently in use.
> >
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
>
> Hi Jens,
>
> Can you please consider this patch for inclusion. One of our customer
> does want to be able to grow partitions without having to reboot
> the system.

Hi Jens,

Do you have concerns about this patch? If no, can you please consider
merging it.

Thanks
Vivek

>
> > ---
> > block/genhd.c | 20 ++++++++++++----
> > block/ioctl.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
> > block/partition-generic.c | 4 ++-
> > include/linux/blkpg.h | 1 +
> > include/linux/genhd.h | 57 +++++++++++++++++++++++++++++++++++++++++++++
> > 5 files changed, 130 insertions(+), 9 deletions(-)
> >
> > diff --git a/block/genhd.c b/block/genhd.c
> > index 23b4f70..935e09b 100644
> > --- a/block/genhd.c
> > +++ b/block/genhd.c
> > @@ -153,7 +153,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
> > part = rcu_dereference(ptbl->part[piter->idx]);
> > if (!part)
> > continue;
> > - if (!part->nr_sects &&
> > + if (!part_nr_sects_read(part) &&
> > !(piter->flags & DISK_PITER_INCL_EMPTY) &&
> > !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
> > piter->idx == 0))
> > @@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
> > static inline int sector_in_part(struct hd_struct *part, sector_t sector)
> > {
> > return part->start_sect <= sector &&
> > - sector < part->start_sect + part->nr_sects;
> > + sector < part->start_sect + part_nr_sects_read(part);
> > }
> >
> > /**
> > @@ -765,8 +765,8 @@ void __init printk_all_partitions(void)
> >
> > printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
> > bdevt_str(part_devt(part), devt_buf),
> > - (unsigned long long)part->nr_sects >> 1,
> > - disk_name(disk, part->partno, name_buf), uuid);
> > + (unsigned long long)part_nr_sects_read(part) >> 1
> > + , disk_name(disk, part->partno, name_buf), uuid);
> > if (is_part0) {
> > if (disk->driverfs_dev != NULL &&
> > disk->driverfs_dev->driver != NULL)
> > @@ -857,7 +857,7 @@ static int show_partition(struct seq_file *seqf, void *v)
> > while ((part = disk_part_iter_next(&piter)))
> > seq_printf(seqf, "%4d %7d %10llu %s
",
> > MAJOR(part_devt(part)), MINOR(part_devt(part)),
> > - (unsigned long long)part->nr_sects >> 1,
> > + (unsigned long long)part_nr_sects_read(part) >> 1,
> > disk_name(sgp, part->partno, buf));
> > disk_part_iter_exit(&piter);
> >
> > @@ -1263,6 +1263,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
> > }
> > disk->part_tbl->part[0] = &disk->part0;
> >
> > + /*
> > + * set_capacity() and get_capacity() currently don't use
> > + * seqcounter to read/update the part0->nr_sects. Still init
> > + * the counter as we can read the sectors in IO submission
> > + * patch using seqence counters.
> > + *
> > + * TODO: Ideally set_capacity() and get_capacity() should be
> > + * converted to make use of bd_mutex and sequence counters.
> > + */
> > + seqcount_init(&disk->part0.nr_sects_seq);
> > hd_ref_init(&disk->part0);
> >
> > disk->minors = minors;
> > diff --git a/block/ioctl.c b/block/ioctl.c
> > index ba15b2d..ddbc649 100644
> > --- a/block/ioctl.c
> > +++ b/block/ioctl.c
> > @@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> > {
> > struct block_device *bdevp;
> > struct gendisk *disk;
> > - struct hd_struct *part;
> > + struct hd_struct *part, *lpart;
> > struct blkpg_ioctl_arg a;
> > struct blkpg_partition p;
> > struct disk_part_iter piter;
> > @@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> > case BLKPG_ADD_PARTITION:
> > start = p.start >> 9;
> > length = p.length >> 9;
> > - /* check for fit in a hd_struct */
> > - if (sizeof(sector_t) == sizeof(long) &&
> > + /* check for fit in a hd_struct */
> > + if (sizeof(sector_t) == sizeof(long) &&
> > sizeof(long long) > sizeof(long)) {
> > long pstart = start, plength = length;
> > if (pstart != start || plength != length
> > @@ -92,6 +92,57 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
> > bdput(bdevp);
> >
> > return 0;
> > + case BLKPG_RESIZE_PARTITION:
> > + start = p.start >> 9;
> > + /* new length of partition in bytes */
> > + length = p.length >> 9;
> > + /* check for fit in a hd_struct */
> > + if (sizeof(sector_t) == sizeof(long) &&
> > + sizeof(long long) > sizeof(long)) {
> > + long pstart = start, plength = length;
> > + if (pstart != start || plength != length
> > + || pstart < 0 || plength < 0)
> > + return -EINVAL;
> > + }
> > + part = disk_get_part(disk, partno);
> > + if (!part)
> > + return -ENXIO;
> > + bdevp = bdget(part_devt(part));
> > + if (!bdevp) {
> > + disk_put_part(part);
> > + return -ENOMEM;
> > + }
> > + mutex_lock(&bdevp->bd_mutex);
> > + mutex_lock_nested(&bdev->bd_mutex, 1);
> > + if (start != part->start_sect) {
> > + mutex_unlock(&bdevp->bd_mutex);
> > + mutex_unlock(&bdev->bd_mutex);
> > + disk_put_part(part);
> > + return -EINVAL;
> > + }
> > + /* overlap? */
> > + disk_part_iter_init(&piter, disk,
> > + DISK_PITER_INCL_EMPTY);
> > + while ((lpart = disk_part_iter_next(&piter))) {
> > + if (lpart->partno != partno &&
> > + !(start + length <= lpart->start_sect ||
> > + start >= lpart->start_sect + lpart->nr_sects)
> > + ) {
> > + disk_part_iter_exit(&piter);
> > + mutex_unlock(&bdevp->bd_mutex);
> > + mutex_unlock(&bdev->bd_mutex);
> > + disk_put_part(part);
> > + return -EBUSY;
> > + }
> > + }
> > + disk_part_iter_exit(&piter);
> > + part_nr_sects_write(part, (sector_t)length);
> > + i_size_write(bdevp->bd_inode, p.length);
> > + mutex_unlock(&bdevp->bd_mutex);
> > + mutex_unlock(&bdev->bd_mutex);
> > + bdput(bdevp);
> > + disk_put_part(part);
> > + return 0;
> > default:
> > return -EINVAL;
> > }
> > diff --git a/block/partition-generic.c b/block/partition-generic.c
> > index d06ec1c..363a6f6 100644
> > --- a/block/partition-generic.c
> > +++ b/block/partition-generic.c
> > @@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
> > struct device_attribute *attr, char *buf)
> > {
> > struct hd_struct *p = dev_to_part(dev);
> > - return sprintf(buf, "%llu
",(unsigned long long)p->nr_sects);
> > + return sprintf(buf, "%llu
",(unsigned long long)part_nr_sects_read(p));
> > }
> >
> > static ssize_t part_ro_show(struct device *dev,
> > @@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
> > err = -ENOMEM;
> > goto out_free;
> > }
> > +
> > + seqcount_init(&p->nr_sects_seq);
> > pdev = part_to_dev(p);
> >
> > p->start_sect = start;
> > diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
> > index faf8a45..a851944 100644
> > --- a/include/linux/blkpg.h
> > +++ b/include/linux/blkpg.h
> > @@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
> > /* The subfunctions (for the op field) */
> > #define BLKPG_ADD_PARTITION 1
> > #define BLKPG_DEL_PARTITION 2
> > +#define BLKPG_RESIZE_PARTITION 3
> >
> > /* Sizes of name fields. Unused at present. */
> > #define BLKPG_DEVNAMELTH 64
> > diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> > index fe23ee7..0def3ef 100644
> > --- a/include/linux/genhd.h
> > +++ b/include/linux/genhd.h
> > @@ -98,7 +98,13 @@ struct partition_meta_info {
> >
> > struct hd_struct {
> > sector_t start_sect;
> > + /*
> > + * nr_sects is protected by sequence counter. One might extend a
> > + * partition while IO is happening to it and update of nr_sects
> > + * can be non-atomic on 32bit machines with 64bit sector_t.
> > + */
> > sector_t nr_sects;
> > + seqcount_t nr_sects_seq;
> > sector_t alignment_offset;
> > unsigned int discard_alignment;
> > struct device __dev;
> > @@ -653,6 +659,57 @@ static inline void hd_struct_put(struct hd_struct *part)
> > __delete_partition(part);
> > }
> >
> > +/*
> > + * Any access of part->nr_sects which is not protected by partition
> > + * bd_mutex or gendisk bdev bd_mutex, should be done using this
> > + * accessor function.
> > + *
> > + * Code written along the lines of i_size_read() and i_size_write().
> > + * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
> > + * on.
> > + */
> > +static inline sector_t part_nr_sects_read(struct hd_struct *part)
> > +{
> > +#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
> > + sector_t nr_sects;
> > + unsigned seq;
> > + do {
> > + seq = read_seqcount_begin(&part->nr_sects_seq);
> > + nr_sects = part->nr_sects;
> > + } while (read_seqcount_retry(&part->nr_sects_seq, seq));
> > + return nr_sects;
> > +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
> > + sector_t nr_sects;
> > +
> > + preempt_disable();
> > + nr_sects = part->nr_sects;
> > + preempt_enable();
> > + return nr_sects;
> > +#else
> > + return part->nr_sects;
> > +#endif
> > +}
> > +
> > +/*
> > + * Should be called with mutex lock held (typically bd_mutex) of partition
> > + * to provide mutual exlusion among writers otherwise seqcount might be
> > + * left in wrong state leaving the readers spinning infinitely.
> > + */
> > +static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
> > +{
> > +#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
> > + write_seqcount_begin(&part->nr_sects_seq);
> > + part->nr_sects = size;
> > + write_seqcount_end(&part->nr_sects_seq);
> > +#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
> > + preempt_disable();
> > + part->nr_sects = size;
> > + preempt_enable();
> > +#else
> > + part->nr_sects = size;
> > +#endif
> > +}
> > +
> > #else /* CONFIG_BLOCK */
> >
> > static inline void printk_all_partitions(void) { }
> > --
> > 1.7.6.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

"Maxim V. Patlasov" 04-09-2012 04:40 PM

block: add partition resize function to blkpg ioctl
 
Hi Vivek,

See please inline comments below...

On 02/15/2012 12:39 AM, Vivek Goyal wrote:

...
@@ -765,8 +765,8 @@ void __init printk_all_partitions(void)

printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
bdevt_str(part_devt(part), devt_buf),
- (unsigned long long)part->nr_sects>> 1,
- disk_name(disk, part->partno, name_buf), uuid);
+ (unsigned long long)part_nr_sects_read(part)>> 1
+ , disk_name(disk, part->partno, name_buf), uuid);


A line starting from comma looks unusual. Is it what you intended?


diff --git a/block/ioctl.c b/block/ioctl.c
index ba15b2d..ddbc649 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
...
@@ -92,6 +92,57 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
bdput(bdevp);

return 0;
+ case BLKPG_RESIZE_PARTITION:
+ start = p.start>> 9;
+ /* new length of partition in bytes */
+ length = p.length>> 9;
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long)&&
+ sizeof(long long)> sizeof(long)) {
+ long pstart = start, plength = length;
+ if (pstart != start || plength != length
+ || pstart< 0 || plength< 0)
+ return -EINVAL;
+ }
+ part = disk_get_part(disk, partno);
+ if (!part)
+ return -ENXIO;
+ bdevp = bdget(part_devt(part));
+ if (!bdevp) {
+ disk_put_part(part);
+ return -ENOMEM;
+ }
+ mutex_lock(&bdevp->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, 1);
+ if (start != part->start_sect) {
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ disk_put_part(part);
+ return -EINVAL;


bdput(bdevp) missed?



+ }
+ /* overlap? */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY);
+ while ((lpart = disk_part_iter_next(&piter))) {
+ if (lpart->partno != partno&&
+ !(start + length<= lpart->start_sect ||
+ start>= lpart->start_sect + lpart->nr_sects)
+ ) {
+ disk_part_iter_exit(&piter);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ disk_put_part(part);
+ return -EBUSY;


bdput(bdevp) missed?


+ }
+ }
+ disk_part_iter_exit(&piter);
+ part_nr_sects_write(part, (sector_t)length);
+ i_size_write(bdevp->bd_inode, p.length);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return 0;
default:
return -EINVAL;
}


Thanks,
Maxim

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Phillip Susi 07-07-2012 01:51 AM

block: add partition resize function to blkpg ioctl
 
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

What's the status of this patch? Forgotten, or are there still any outstanding concerns?

On 03/02/2012 01:54 PM, Vivek Goyal wrote:
> Hi Jens,
>
> Do you have concerns about this patch? If no, can you please consider
> merging it.
>
> Thanks
> Vivek
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJP95YUAAoJEJrBOlT6nu75A/kIAIEWs+MlA8Me05jjBGpSFQsn
VigiYTF4UdWjA3bG0CNB41eqpzOKVl/B4vTBAy1YezuUXMamBRp1OD6hatEL/blO
ps/M2S2NNPgFOzDmZBgfWIib6tnbCJvTowLdt4n4NnP0DoQRn+5bX opL/jcm4lwU
XWheiqFFX1xSB5YgP+GMl4zVWZhyrHYcynqK/25EimbEXtjgTyR3Cy4wMfGgMdnI
HkY7D0Kn420n+X6uRLXZW8hV3apATZCz3PGsxg7FI83gFi7Tc9 rneOhwgRkAXHxq
FcJ2NABK83dACAYOU0fhVTmxoumxuHNCmp7iRGiavnbNCBJWxL V2x1WhceX23lc=
=1FUQ
-----END PGP SIGNATURE-----

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Vivek Goyal 07-09-2012 03:13 PM

block: add partition resize function to blkpg ioctl
 
On Fri, Jul 06, 2012 at 09:51:16PM -0400, Phillip Susi wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> What's the status of this patch? Forgotten, or are there still any outstanding concerns?

There was one outstanding concern from maxim about missing "bdput(bdevp)".
Will see if I can find some time to brush it up and test patches again. If
somebody can beat me to it, that would be great.

Thanks
Vivek

>
> On 03/02/2012 01:54 PM, Vivek Goyal wrote:
> > Hi Jens,
> >
> > Do you have concerns about this patch? If no, can you please consider
> > merging it.
> >
> > Thanks
> > Vivek
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v1.4.11 (GNU/Linux)
> Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/
>
> iQEcBAEBAgAGBQJP95YUAAoJEJrBOlT6nu75A/kIAIEWs+MlA8Me05jjBGpSFQsn
> VigiYTF4UdWjA3bG0CNB41eqpzOKVl/B4vTBAy1YezuUXMamBRp1OD6hatEL/blO
> ps/M2S2NNPgFOzDmZBgfWIib6tnbCJvTowLdt4n4NnP0DoQRn+5bX opL/jcm4lwU
> XWheiqFFX1xSB5YgP+GMl4zVWZhyrHYcynqK/25EimbEXtjgTyR3Cy4wMfGgMdnI
> HkY7D0Kn420n+X6uRLXZW8hV3apATZCz3PGsxg7FI83gFi7Tc9 rneOhwgRkAXHxq
> FcJ2NABK83dACAYOU0fhVTmxoumxuHNCmp7iRGiavnbNCBJWxL V2x1WhceX23lc=
> =1FUQ
> -----END PGP SIGNATURE-----

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

07-09-2012 09:34 PM

block: add partition resize function to blkpg ioctl
 
Add a new operation code (BLKPG_RESIZE_PARTITION) to the BLKPG ioctl that
allows altering the size of an existing partition, even if it is currently
in use.

This patch converts hd_struct->nr_sects into sequence counter because
One might extend a partition while IO is happening to it and update of
nr_sects can be non-atomic on 32bit machines with 64bit sector_t. This
can lead to issues like reading inconsistent size of a partition. Sequence
counter have been used so that readers don't have to take bdev mutex lock
as we call sector_in_part() very frequently.

Now all the access to hd_struct->nr_sects should happen using sequence
counter read/update helper functions part_nr_sects_read/part_nr_sects_write.
There is one exception though, set_capacity()/get_capacity(). I think
theoritically race should exist there too but this patch does not
modify set_capacity()/get_capacity() due to sheer number of call sites
and I am afraid that change might break something. I have left that as a
TODO item. We can handle it later if need be. This patch does not introduce
any new races as such w.r.t set_capacity()/get_capacity().

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
block/genhd.c | 20 +++++++++++----
block/ioctl.c | 59 ++++++++++++++++++++++++++++++++++++++++++--
block/partition-generic.c | 4 ++-
include/linux/blkpg.h | 1 +
include/linux/genhd.h | 57 +++++++++++++++++++++++++++++++++++++++++++
5 files changed, 132 insertions(+), 9 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 9cf5583..cac7366 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
- if (!part->nr_sects &&
+ if (!part_nr_sects_read(part) &&
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
piter->idx == 0))
@@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
return part->start_sect <= sector &&
- sector < part->start_sect + part->nr_sects;
+ sector < part->start_sect + part_nr_sects_read(part);
}

/**
@@ -769,8 +769,8 @@ void __init printk_all_partitions(void)

printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
bdevt_str(part_devt(part), devt_buf),
- (unsigned long long)part->nr_sects >> 1,
- disk_name(disk, part->partno, name_buf),
+ (unsigned long long)part_nr_sects_read(part) >> 1
+ , disk_name(disk, part->partno, name_buf),
uuid_buf);
if (is_part0) {
if (disk->driverfs_dev != NULL &&
@@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v)
while ((part = disk_part_iter_next(&piter)))
seq_printf(seqf, "%4d %7d %10llu %s
",
MAJOR(part_devt(part)), MINOR(part_devt(part)),
- (unsigned long long)part->nr_sects >> 1,
+ (unsigned long long)part_nr_sects_read(part) >> 1,
disk_name(sgp, part->partno, buf));
disk_part_iter_exit(&piter);

@@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
}
disk->part_tbl->part[0] = &disk->part0;

+ /*
+ * set_capacity() and get_capacity() currently don't use
+ * seqcounter to read/update the part0->nr_sects. Still init
+ * the counter as we can read the sectors in IO submission
+ * patch using seqence counters.
+ *
+ * TODO: Ideally set_capacity() and get_capacity() should be
+ * converted to make use of bd_mutex and sequence counters.
+ */
+ seqcount_init(&disk->part0.nr_sects_seq);
hd_ref_init(&disk->part0);

disk->minors = minors;
diff --git a/block/ioctl.c b/block/ioctl.c
index ba15b2d..4476e0e8 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
{
struct block_device *bdevp;
struct gendisk *disk;
- struct hd_struct *part;
+ struct hd_struct *part, *lpart;
struct blkpg_ioctl_arg a;
struct blkpg_partition p;
struct disk_part_iter piter;
@@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
case BLKPG_ADD_PARTITION:
start = p.start >> 9;
length = p.length >> 9;
- /* check for fit in a hd_struct */
- if (sizeof(sector_t) == sizeof(long) &&
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long) &&
sizeof(long long) > sizeof(long)) {
long pstart = start, plength = length;
if (pstart != start || plength != length
@@ -92,6 +92,59 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
bdput(bdevp);

return 0;
+ case BLKPG_RESIZE_PARTITION:
+ start = p.start >> 9;
+ /* new length of partition in bytes */
+ length = p.length >> 9;
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long) &&
+ sizeof(long long) > sizeof(long)) {
+ long pstart = start, plength = length;
+ if (pstart != start || plength != length
+ || pstart < 0 || plength < 0)
+ return -EINVAL;
+ }
+ part = disk_get_part(disk, partno);
+ if (!part)
+ return -ENXIO;
+ bdevp = bdget(part_devt(part));
+ if (!bdevp) {
+ disk_put_part(part);
+ return -ENOMEM;
+ }
+ mutex_lock(&bdevp->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, 1);
+ if (start != part->start_sect) {
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return -EINVAL;
+ }
+ /* overlap? */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY);
+ while ((lpart = disk_part_iter_next(&piter))) {
+ if (lpart->partno != partno &&
+ !(start + length <= lpart->start_sect ||
+ start >= lpart->start_sect + lpart->nr_sects)
+ ) {
+ disk_part_iter_exit(&piter);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return -EBUSY;
+ }
+ }
+ disk_part_iter_exit(&piter);
+ part_nr_sects_write(part, (sector_t)length);
+ i_size_write(bdevp->bd_inode, p.length);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return 0;
default:
return -EINVAL;
}
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 6df5d69..f1d1451 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%llu
",(unsigned long long)p->nr_sects);
+ return sprintf(buf, "%llu
",(unsigned long long)part_nr_sects_read(p));
}

static ssize_t part_ro_show(struct device *dev,
@@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
err = -ENOMEM;
goto out_free;
}
+
+ seqcount_init(&p->nr_sects_seq);
pdev = part_to_dev(p);

p->start_sect = start;
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
index faf8a45..a851944 100644
--- a/include/linux/blkpg.h
+++ b/include/linux/blkpg.h
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
/* The subfunctions (for the op field) */
#define BLKPG_ADD_PARTITION 1
#define BLKPG_DEL_PARTITION 2
+#define BLKPG_RESIZE_PARTITION 3

/* Sizes of name fields. Unused at present. */
#define BLKPG_DEVNAMELTH 64
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 017a7fb..ee8e688 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,13 @@ struct partition_meta_info {

struct hd_struct {
sector_t start_sect;
+ /*
+ * nr_sects is protected by sequence counter. One might extend a
+ * partition while IO is happening to it and update of nr_sects
+ * can be non-atomic on 32bit machines with 64bit sector_t.
+ */
sector_t nr_sects;
+ seqcount_t nr_sects_seq;
sector_t alignment_offset;
unsigned int discard_alignment;
struct device __dev;
@@ -648,6 +654,57 @@ static inline void hd_struct_put(struct hd_struct *part)
__delete_partition(part);
}

+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+ sector_t nr_sects;
+ unsigned seq;
+ do {
+ seq = read_seqcount_begin(&part->nr_sects_seq);
+ nr_sects = part->nr_sects;
+ } while (read_seqcount_retry(&part->nr_sects_seq, seq));
+ return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+ sector_t nr_sects;
+
+ preempt_disable();
+ nr_sects = part->nr_sects;
+ preempt_enable();
+ return nr_sects;
+#else
+ return part->nr_sects;
+#endif
+}
+
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+ write_seqcount_begin(&part->nr_sects_seq);
+ part->nr_sects = size;
+ write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ part->nr_sects = size;
+ preempt_enable();
+#else
+ part->nr_sects = size;
+#endif
+}
+
#else /* CONFIG_BLOCK */

static inline void printk_all_partitions(void) { }
--
1.7.7.6


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


All times are GMT. The time now is 07:37 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.