FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Redhat > Device-mapper Development

 
 
LinkBack Thread Tools
 
Old 08-04-2008, 09:00 AM
Ryo Tsuruta
 
Default bio-cgroup: Implement the bio-cgroup

This patch implements the bio cgroup on the memory cgroup.

Based on 2.6.27-rc1-mm1
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>

diff -Ndupr linux-2.6.27-rc1-mm1.cg1/block/blk-ioc.c linux-2.6.27-rc1-mm1.cg2/block/blk-ioc.c
--- linux-2.6.27-rc1-mm1.cg1/block/blk-ioc.c 2008-07-29 11:40:31.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/block/blk-ioc.c 2008-08-01 19:18:38.000000000 +0900
@@ -84,24 +84,28 @@ void exit_io_context(void)
}
}

+void init_io_context(struct io_context *ioc)
+{
+ atomic_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
+ spin_lock_init(&ioc->lock);
+ ioc->ioprio_changed = 0;
+ ioc->ioprio = 0;
+ ioc->last_waited = jiffies; /* doesn't matter... */
+ ioc->nr_batch_requests = 0; /* because this is 0 */
+ ioc->aic = NULL;
+ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ioc->cic_list);
+ ioc->ioc_data = NULL;
+}
+
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
{
struct io_context *ret;

ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
- if (ret) {
- atomic_set(&ret->refcount, 1);
- atomic_set(&ret->nr_tasks, 1);
- spin_lock_init(&ret->lock);
- ret->ioprio_changed = 0;
- ret->ioprio = 0;
- ret->last_waited = jiffies; /* doesn't matter... */
- ret->nr_batch_requests = 0; /* because this is 0 */
- ret->aic = NULL;
- INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
- INIT_HLIST_HEAD(&ret->cic_list);
- ret->ioc_data = NULL;
- }
+ if (ret)
+ init_io_context(ret);

return ret;
}
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/biocontrol.h linux-2.6.27-rc1-mm1.cg2/include/linux/biocontrol.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/biocontrol.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/biocontrol.h 2008-08-01 19:21:56.000000000 +0900
@@ -0,0 +1,159 @@
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+
+#ifndef _LINUX_BIOCONTROL_H
+#define _LINUX_BIOCONTROL_H
+
+#ifdef CONFIG_CGROUP_BIO
+
+struct io_context;
+struct block_device;
+
+struct bio_cgroup {
+ struct cgroup_subsys_state css;
+ int id;
+ struct io_context *io_context; /* default io_context */
+/* struct radix_tree_root io_context_root; per device io_context */
+ spinlock_t page_list_lock;
+ struct list_head page_list;
+};
+
+static inline int bio_cgroup_disabled(void)
+{
+ return bio_cgroup_subsys.disabled;
+}
+
+static inline struct bio_cgroup *bio_cgroup_from_task(struct task_struct *p)
+{
+ return container_of(task_subsys_state(p, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static inline void __bio_cgroup_add_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ list_add(&pc->blist, &biog->page_list);
+}
+
+static inline void bio_cgroup_add_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ unsigned long flags;
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ __bio_cgroup_add_page(pc);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+}
+
+static inline void __bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+ list_del_init(&pc->blist);
+}
+
+static inline void bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ unsigned long flags;
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ __bio_cgroup_remove_page(pc);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+}
+
+static inline void get_bio_cgroup(struct bio_cgroup *biog)
+{
+ css_get(&biog->css);
+}
+
+static inline void put_bio_cgroup(struct bio_cgroup *biog)
+{
+ css_put(&biog->css);
+}
+
+static inline void set_bio_cgroup(struct page_cgroup *pc,
+ struct bio_cgroup *biog)
+{
+ pc->bio_cgroup = biog;
+}
+
+static inline void clear_bio_cgroup(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ pc->bio_cgroup = NULL;
+ put_bio_cgroup(biog);
+}
+
+static inline struct bio_cgroup *get_bio_page_cgroup(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ css_get(&biog->css);
+ return biog;
+}
+
+/* This sould be called in an RCU-protected section. */
+static inline struct bio_cgroup *mm_get_bio_cgroup(struct mm_struct *mm)
+{
+ struct bio_cgroup *biog;
+ biog = bio_cgroup_from_task(rcu_dereference(mm->owner));
+ get_bio_cgroup(biog);
+ return biog;
+}
+
+extern struct io_context *get_bio_cgroup_iocontext(struct bio *bio);
+
+#else /* CONFIG_CGROUP_BIO */
+
+struct bio_cgroup;
+
+static inline int bio_cgroup_disabled(void)
+{
+ return 1;
+}
+
+static inline void bio_cgroup_add_page(struct page_cgroup *pc)
+{
+}
+
+static inline void bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+}
+
+static inline void get_bio_cgroup(struct bio_cgroup *biog)
+{
+}
+
+static inline void put_bio_cgroup(struct bio_cgroup *biog)
+{
+}
+
+static inline void set_bio_cgroup(struct page_cgroup *pc,
+ struct bio_cgroup *biog)
+{
+}
+
+static inline void clear_bio_cgroup(struct page_cgroup *pc)
+{
+}
+
+static inline struct bio_cgroup *get_bio_page_cgroup(struct page_cgroup *pc)
+{
+ return NULL;
+}
+
+static inline struct bio_cgroup *mm_get_bio_cgroup(struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static inline int get_bio_cgroup_id(struct page *page)
+{
+ return 0;
+}
+
+static inline struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_CGROUP_BIO */
+
+#endif /* _LINUX_BIOCONTROL_H */
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/cgroup_subsys.h linux-2.6.27-rc1-mm1.cg2/include/linux/cgroup_subsys.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/cgroup_subsys.h 2008-08-01 12:18:28.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/cgroup_subsys.h 2008-08-01 19:18:38.000000000 +0900
@@ -43,6 +43,12 @@ SUBSYS(mem_cgroup)

/* */

+#ifdef CONFIG_CGROUP_BIO
+SUBSYS(bio_cgroup)
+#endif
+
+/* */
+
#ifdef CONFIG_CGROUP_DEVICE
SUBSYS(devices)
#endif
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/iocontext.h linux-2.6.27-rc1-mm1.cg2/include/linux/iocontext.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/iocontext.h 2008-07-29 11:40:31.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/iocontext.h 2008-08-01 19:18:38.000000000 +0900
@@ -83,6 +83,8 @@ struct io_context {
struct radix_tree_root radix_root;
struct hlist_head cic_list;
void *ioc_data;
+
+ int id; /* cgroup ID */
};

static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -104,6 +106,7 @@ int put_io_context(struct io_context *io
void exit_io_context(void);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+void init_io_context(struct io_context *ioc);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
#else
static inline void exit_io_context(void)
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/memcontrol.h linux-2.6.27-rc1-mm1.cg2/include/linux/memcontrol.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/memcontrol.h 2008-08-01 19:03:21.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/memcontrol.h 2008-08-01 19:22:10.000000000 +0900
@@ -54,6 +54,10 @@ struct page_cgroup {
struct list_head lru; /* per cgroup LRU list */
struct mem_cgroup *mem_cgroup;
#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
+#ifdef CONFIG_CGROUP_BIO
+ struct list_head blist; /* for bio_cgroup page list */
+ struct bio_cgroup *bio_cgroup;
+#endif
struct page *page;
int flags;
};
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/init/Kconfig linux-2.6.27-rc1-mm1.cg2/init/Kconfig
--- linux-2.6.27-rc1-mm1.cg1/init/Kconfig 2008-08-01 19:03:21.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/init/Kconfig 2008-08-01 19:18:38.000000000 +0900
@@ -418,9 +418,20 @@ config CGROUP_MEMRLIMIT_CTLR
memory RSS and Page Cache control. Virtual address space control
is provided by this controller.

+config CGROUP_BIO
+ bool "Block I/O cgroup subsystem"
+ depends on CGROUPS
+ select MM_OWNER
+ help
+ Provides a Resource Controller which enables to track the onwner
+ of every Block I/O.
+ The information this subsystem provides can be used from any
+ kind of module such as dm-ioband device mapper modules or
+ the cfq-scheduler.
+
config CGROUP_PAGE
def_bool y
- depends on CGROUP_MEM_RES_CTLR
+ depends on CGROUP_MEM_RES_CTLR || CGROUP_BIO

config SYSFS_DEPRECATED
bool
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/Makefile linux-2.6.27-rc1-mm1.cg2/mm/Makefile
--- linux-2.6.27-rc1-mm1.cg1/mm/Makefile 2008-08-01 19:03:21.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/Makefile 2008-08-01 19:18:38.000000000 +0900
@@ -35,4 +35,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_PAGE) += memcontrol.o
+obj-$(CONFIG_CGROUP_BIO) += biocontrol.o
obj-$(CONFIG_CGROUP_MEMRLIMIT_CTLR) += memrlimitcgroup.o
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/biocontrol.c linux-2.6.27-rc1-mm1.cg2/mm/biocontrol.c
--- linux-2.6.27-rc1-mm1.cg1/mm/biocontrol.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/biocontrol.c 2008-08-01 19:35:51.000000000 +0900
@@ -0,0 +1,233 @@
+/* biocontrol.c - Block I/O Controller
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ * Copyright VA Linux Systems Japan, 2008
+ * Author Hirokazu Takahashi <taka@valinux.co.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/blkdev.h>
+#include <linux/smp.h>
+#include <linux/bit_spinlock.h>
+#include <linux/idr.h>
+#include <linux/err.h>
+#include <linux/biocontrol.h>
+
+/* return corresponding bio_cgroup object of a cgroup */
+static inline struct bio_cgroup *cgroup_bio(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static struct idr bio_cgroup_id;
+static DEFINE_SPINLOCK(bio_cgroup_idr_lock);
+
+static struct cgroup_subsys_state *
+bio_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog;
+ struct io_context *ioc;
+ int error;
+
+ if (!cgrp->parent) {
+ static struct bio_cgroup default_bio_cgroup;
+ static struct io_context default_bio_io_context;
+
+ biog = &default_bio_cgroup;
+ ioc = &default_bio_io_context;
+ init_io_context(ioc);
+
+ idr_init(&bio_cgroup_id);
+ biog->id = 0;
+
+ page_cgroup_init();
+ } else {
+ biog = kzalloc(sizeof(*biog), GFP_KERNEL);
+ ioc = alloc_io_context(GFP_KERNEL, -1);
+ if (!ioc || !biog) {
+ error = -ENOMEM;
+ goto out;
+ }
+retry:
+ if (unlikely(!idr_pre_get(&bio_cgroup_id, GFP_KERNEL))) {
+ error = -EAGAIN;
+ goto out;
+ }
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ error = idr_get_new_above(&bio_cgroup_id,
+ (void *)biog, 1, &biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ if (error == -EAGAIN)
+ goto retry;
+ else if (error)
+ goto out;
+ }
+
+ ioc->id = biog->id;
+ biog->io_context = ioc;
+
+ INIT_LIST_HEAD(&biog->page_list);
+ spin_lock_init(&biog->page_list_lock);
+
+ /* Bind the cgroup to bio_cgroup object we just created */
+ biog->css.cgroup = cgrp;
+
+ return &biog->css;
+out:
+ if (ioc)
+ put_io_context(ioc);
+ kfree(biog);
+ return ERR_PTR(error);
+}
+
+#define FORCE_UNCHARGE_BATCH (128)
+static void bio_cgroup_force_empty(struct bio_cgroup *biog)
+{
+ struct page_cgroup *pc;
+ struct page *page;
+ int count = FORCE_UNCHARGE_BATCH;
+ struct list_head *list = &biog->page_list;
+ unsigned long flags;
+
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ while (!list_empty(list)) {
+ pc = list_entry(list->prev, struct page_cgroup, blist);
+ page = pc->page;
+ get_page(page);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+ mem_cgroup_uncharge_page(page);
+ put_page(page);
+ if (--count <= 0) {
+ count = FORCE_UNCHARGE_BATCH;
+ cond_resched();
+ }
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ }
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+ return;
+}
+
+static void bio_cgroup_pre_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+ bio_cgroup_force_empty(biog);
+}
+
+static void bio_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ put_io_context(biog->io_context);
+
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ idr_remove(&bio_cgroup_id, biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+
+ kfree(biog);
+}
+
+struct bio_cgroup *find_bio_cgroup(int id)
+{
+ struct bio_cgroup *biog;
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ biog = (struct bio_cgroup *)
+ idr_find(&bio_cgroup_id, id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ get_bio_cgroup(biog);
+ return biog;
+}
+
+struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ struct io_context *ioc;
+ struct page_cgroup *pc;
+ struct bio_cgroup *biog;
+ struct page *page = bio_iovec_idx(bio, 0)->bv_page;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc)
+ biog = pc->bio_cgroup;
+ else
+ biog = bio_cgroup_from_task(rcu_dereference(init_mm.owner ));
+ ioc = biog->io_context; /* default io_context for this cgroup */
+ atomic_inc(&ioc->refcount);
+ unlock_page_cgroup(page);
+ return ioc;
+}
+EXPORT_SYMBOL(get_bio_cgroup_iocontext);
+
+static u64 bio_id_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ return (u64) biog->id;
+}
+
+
+static struct cftype bio_files[] = {
+ {
+ .name = "id",
+ .read_u64 = bio_id_read,
+ },
+};
+
+static int bio_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ if (bio_cgroup_disabled())
+ return 0;
+ return cgroup_add_files(cont, ss, bio_files, ARRAY_SIZE(bio_files));
+}
+
+static void bio_cgroup_move_task(struct cgroup_subsys *ss,
+ struct cgroup *cont,
+ struct cgroup *old_cont,
+ struct task_struct *p)
+{
+ struct mm_struct *mm;
+ struct bio_cgroup *biog, *old_biog;
+
+ if (bio_cgroup_disabled())
+ return;
+
+ mm = get_task_mm(p);
+ if (mm == NULL)
+ return;
+
+ biog = cgroup_bio(cont);
+ old_biog = cgroup_bio(old_cont);
+
+ mmput(mm);
+ return;
+}
+
+
+struct cgroup_subsys bio_cgroup_subsys = {
+ .name = "bio",
+ .subsys_id = bio_cgroup_subsys_id,
+ .create = bio_cgroup_create,
+ .destroy = bio_cgroup_destroy,
+ .pre_destroy = bio_cgroup_pre_destroy,
+ .populate = bio_cgroup_populate,
+ .attach = bio_cgroup_move_task,
+ .early_init = 0,
+};
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/memcontrol.c linux-2.6.27-rc1-mm1.cg2/mm/memcontrol.c
--- linux-2.6.27-rc1-mm1.cg1/mm/memcontrol.c 2008-08-01 19:49:38.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/memcontrol.c 2008-08-01 19:49:53.000000000 +0900
@@ -20,6 +20,7 @@
#include <linux/res_counter.h>
#include <linux/memcontrol.h>
#include <linux/cgroup.h>
+#include <linux/biocontrol.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/page-flags.h>
@@ -1019,11 +1020,12 @@ struct page_cgroup *page_get_page_cgroup
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, enum charge_type ctype,
- struct mem_cgroup *memcg)
+ gfp_t gfp_mask, enum charge_type ctype,
+ struct mem_cgroup *memcg, struct bio_cgroup *biocg)
{
struct page_cgroup *pc;
struct mem_cgroup *mem;
+ struct bio_cgroup *biog;

pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
if (unlikely(pc == NULL))
@@ -1035,18 +1037,15 @@ static int mem_cgroup_charge_common(stru
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (likely(!memcg)) {
- rcu_read_lock();
- mem = mm_get_mem_cgroup(mm);
- rcu_read_unlock();
- } else {
- mem = memcg;
- get_mem_cgroup(mem);
- }
+ rcu_read_lock();
+ mem = memcg ? memcg : mm_get_mem_cgroup(mm);
+ biog = biocg ? biocg : mm_get_bio_cgroup(mm);
+ rcu_read_unlock();

if (mem_cgroup_try_to_allocate(mem, gfp_mask) < 0)
goto out;
set_mem_cgroup(pc, mem);
+ set_bio_cgroup(pc, biog);
pc->page = page;
/*
* If a page is accounted as a page cache, insert to inactive list.
@@ -1065,18 +1064,21 @@ static int mem_cgroup_charge_common(stru
if (unlikely(page_get_page_cgroup(page))) {
unlock_page_cgroup(page);
clear_mem_cgroup(pc);
+ clear_bio_cgroup(pc);
kmem_cache_free(page_cgroup_cache, pc);
goto done;
}
page_assign_page_cgroup(page, pc);

mem_cgroup_add_page(pc);
+ bio_cgroup_add_page(pc);

unlock_page_cgroup(page);
done:
return 0;
out:
put_mem_cgroup(mem);
+ put_bio_cgroup(biog);
kmem_cache_free(page_cgroup_cache, pc);
err:
return -ENOMEM;
@@ -1099,7 +1101,7 @@ int mem_cgroup_charge(struct page *page,
if (unlikely(!mm))
mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
+ MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL, NULL);
}

int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -1135,7 +1137,7 @@ int mem_cgroup_cache_charge(struct page
mm = &init_mm;

return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+ MEM_CGROUP_CHARGE_TYPE_CACHE, NULL, NULL);
}

/*
@@ -1146,7 +1148,7 @@ __mem_cgroup_uncharge_common(struct page
{
struct page_cgroup *pc;

- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return;

/*
@@ -1166,11 +1168,13 @@ __mem_cgroup_uncharge_common(struct page
goto unlock;

mem_cgroup_remove_page(pc);
+ bio_cgroup_remove_page(pc);

page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);

clear_mem_cgroup(pc);
+ clear_bio_cgroup(pc);

kmem_cache_free(page_cgroup_cache, pc);
return;
@@ -1196,24 +1200,29 @@ int mem_cgroup_prepare_migration(struct
{
struct page_cgroup *pc;
struct mem_cgroup *mem = NULL;
+ struct bio_cgroup *biog = NULL;
enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
int ret = 0;

- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return 0;

lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
if (pc) {
mem = get_mem_page_cgroup(pc);
+ biog = get_bio_page_cgroup(pc);
if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
}
unlock_page_cgroup(page);
- if (mem) {
+ if (pc) {
ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
- ctype, mem);
- put_mem_cgroup(mem);
+ ctype, mem, biog);
+ if (mem)
+ put_mem_cgroup(mem);
+ if (biog)
+ put_bio_cgroup(biog);
}
return ret;
}

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 08-08-2008, 07:10 AM
Takuya Yoshikawa
 
Default bio-cgroup: Implement the bio-cgroup

Ryo Tsuruta wrote:

+static void bio_cgroup_move_task(struct cgroup_subsys *ss,
+ struct cgroup *cont,
+ struct cgroup *old_cont,
+ struct task_struct *p)
+{
+ struct mm_struct *mm;
+ struct bio_cgroup *biog, *old_biog;
+
+ if (bio_cgroup_disabled())
+ return;
+
+ mm = get_task_mm(p);
+ if (mm == NULL)
+ return;
+
+ biog = cgroup_bio(cont);
+ old_biog = cgroup_bio(old_cont);
+
+ mmput(mm);
+ return;
+}


Is this function fully implemented?
I tried to put a process into a group by writing to
"/cgroup/bio/BGROUP/tasks" but failed.


I think this function is not enough to be used as "attach."


+
+
+struct cgroup_subsys bio_cgroup_subsys = {
+ .name = "bio",
+ .subsys_id = bio_cgroup_subsys_id,
+ .create = bio_cgroup_create,
+ .destroy = bio_cgroup_destroy,
+ .pre_destroy = bio_cgroup_pre_destroy,
+ .populate = bio_cgroup_populate,
+ .attach = bio_cgroup_move_task,
+ .early_init = 0,
+};


Without "attach" function, it is difficult to check
the effectiveness of block I/O tracking.

Thanks,
- Takuya Yoshikawa

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 08-08-2008, 08:30 AM
Ryo Tsuruta
 
Default bio-cgroup: Implement the bio-cgroup

Hi Yoshikawa-san,

> > +static void bio_cgroup_move_task(struct cgroup_subsys *ss,
> > + struct cgroup *cont,
> > + struct cgroup *old_cont,
> > + struct task_struct *p)
> > +{
> > + struct mm_struct *mm;
> > + struct bio_cgroup *biog, *old_biog;
> > +
> > + if (bio_cgroup_disabled())
> > + return;
> > +
> > + mm = get_task_mm(p);
> > + if (mm == NULL)
> > + return;
> > +
> > + biog = cgroup_bio(cont);
> > + old_biog = cgroup_bio(old_cont);
> > +
> > + mmput(mm);
> > + return;
> > +}
>
> Is this function fully implemented?

This function can be more simplified, there is some unnecessary code
from old version.

> I tried to put a process into a group by writing to
> "/cgroup/bio/BGROUP/tasks" but failed.

Could you tell me what you actually did? I will try the same thing.

--
Ryo Tsuruta <ryov@valinux.co.jp>

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 08-08-2008, 09:42 AM
Takuya Yoshikawa
 
Default bio-cgroup: Implement the bio-cgroup

Hi Tsuruta-san,

Ryo Tsuruta wrote:

Hi Yoshikawa-san,


+static void bio_cgroup_move_task(struct cgroup_subsys *ss,
+ struct cgroup *cont,
+ struct cgroup *old_cont,
+ struct task_struct *p)
+{
+ struct mm_struct *mm;
+ struct bio_cgroup *biog, *old_biog;
+
+ if (bio_cgroup_disabled())
+ return;
+
+ mm = get_task_mm(p);
+ if (mm == NULL)
+ return;
+
+ biog = cgroup_bio(cont);
+ old_biog = cgroup_bio(old_cont);
+
+ mmput(mm);
+ return;
+}

Is this function fully implemented?


This function can be more simplified, there is some unnecessary code
from old version.




I think it is neccessary to attach the task p to new biog.


I tried to put a process into a group by writing to
"/cgroup/bio/BGROUP/tasks" but failed.


Could you tell me what you actually did? I will try the same thing.

--
Ryo Tsuruta <ryov@valinux.co.jp>



I wanted to test my own scheduler which uses bio tracking information.
SO I tried your patch, especially, get_bio_cgroup_iocontext(), to get
the io_context from bio.

In my test, I made some threads with certain iopriorities run
concurrently. To schedule these threads based on their iopriorities,

I made BGROUP directories for each iopriorities.
e.g. /cgroup/bio/be0 ... /cgroup/bio/be7
Then, I tried to attach the processes to the appropriate groups.

But the processes stayed in the original group(id=0).
...

I am sorry but I have to leave now and I cannot come here next week.
--> I will take summer holidays.

I will reply to you later.

Thanks,
- Takuya Yoshikawa

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 08-08-2008, 11:41 AM
Ryo Tsuruta
 
Default bio-cgroup: Implement the bio-cgroup

Hi Yoshikawa-san,

> I wanted to test my own scheduler which uses bio tracking information.
> SO I tried your patch, especially, get_bio_cgroup_iocontext(), to get
> the io_context from bio.
>
> In my test, I made some threads with certain iopriorities run
> concurrently. To schedule these threads based on their iopriorities,
> I made BGROUP directories for each iopriorities.
> e.g. /cgroup/bio/be0 ... /cgroup/bio/be7
> Then, I tried to attach the processes to the appropriate groups.
>
> But the processes stayed in the original group(id=0).

In the current implementation, when a process moves to an another cgroup:
- Already allocated memory does not move to the cgroup, still remains.
- Only allocated memory after move belongs to the cgroup.
This behavior follows the memory controller.

Memory does not move between cgroups since it is so heavy operation,
but it would be worth under some sort of conditions.

Could you try to move a process between cgroups in the following way?

# echo $$ > /cgroup/bio/be0
# run_your_program
# echo $$ > /cgroup/bio/be1
# run_your_program
...

> I am sorry but I have to leave now and I cannot come here next week.
> --> I will take summer holidays.

Have a nice vacation!

Thanks,
Ryo Tsuruta

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 08-12-2008, 12:36 PM
Ryo Tsuruta
 
Default bio-cgroup: Implement the bio-cgroup

This patch implements the bio cgroup on the memory cgroup.

Based on 2.6.27-rc1-mm1
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>

diff -Ndupr linux-2.6.27-rc1-mm1.cg1/block/blk-ioc.c linux-2.6.27-rc1-mm1.cg2/block/blk-ioc.c
--- linux-2.6.27-rc1-mm1.cg1/block/blk-ioc.c 2008-07-29 11:40:31.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/block/blk-ioc.c 2008-08-12 14:47:59.000000000 +0900
@@ -84,24 +84,28 @@ void exit_io_context(void)
}
}

+void init_io_context(struct io_context *ioc)
+{
+ atomic_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
+ spin_lock_init(&ioc->lock);
+ ioc->ioprio_changed = 0;
+ ioc->ioprio = 0;
+ ioc->last_waited = jiffies; /* doesn't matter... */
+ ioc->nr_batch_requests = 0; /* because this is 0 */
+ ioc->aic = NULL;
+ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ioc->cic_list);
+ ioc->ioc_data = NULL;
+}
+
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
{
struct io_context *ret;

ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
- if (ret) {
- atomic_set(&ret->refcount, 1);
- atomic_set(&ret->nr_tasks, 1);
- spin_lock_init(&ret->lock);
- ret->ioprio_changed = 0;
- ret->ioprio = 0;
- ret->last_waited = jiffies; /* doesn't matter... */
- ret->nr_batch_requests = 0; /* because this is 0 */
- ret->aic = NULL;
- INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
- INIT_HLIST_HEAD(&ret->cic_list);
- ret->ioc_data = NULL;
- }
+ if (ret)
+ init_io_context(ret);

return ret;
}
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/biocontrol.h linux-2.6.27-rc1-mm1.cg2/include/linux/biocontrol.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/biocontrol.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/biocontrol.h 2008-08-12 14:48:00.000000000 +0900
@@ -0,0 +1,159 @@
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+
+#ifndef _LINUX_BIOCONTROL_H
+#define _LINUX_BIOCONTROL_H
+
+#ifdef CONFIG_CGROUP_BIO
+
+struct io_context;
+struct block_device;
+
+struct bio_cgroup {
+ struct cgroup_subsys_state css;
+ int id;
+ struct io_context *io_context; /* default io_context */
+/* struct radix_tree_root io_context_root; per device io_context */
+ spinlock_t page_list_lock;
+ struct list_head page_list;
+};
+
+static inline int bio_cgroup_disabled(void)
+{
+ return bio_cgroup_subsys.disabled;
+}
+
+static inline struct bio_cgroup *bio_cgroup_from_task(struct task_struct *p)
+{
+ return container_of(task_subsys_state(p, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static inline void __bio_cgroup_add_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ list_add(&pc->blist, &biog->page_list);
+}
+
+static inline void bio_cgroup_add_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ unsigned long flags;
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ __bio_cgroup_add_page(pc);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+}
+
+static inline void __bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+ list_del_init(&pc->blist);
+}
+
+static inline void bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ unsigned long flags;
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ __bio_cgroup_remove_page(pc);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+}
+
+static inline void get_bio_cgroup(struct bio_cgroup *biog)
+{
+ css_get(&biog->css);
+}
+
+static inline void put_bio_cgroup(struct bio_cgroup *biog)
+{
+ css_put(&biog->css);
+}
+
+static inline void set_bio_cgroup(struct page_cgroup *pc,
+ struct bio_cgroup *biog)
+{
+ pc->bio_cgroup = biog;
+}
+
+static inline void clear_bio_cgroup(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ pc->bio_cgroup = NULL;
+ put_bio_cgroup(biog);
+}
+
+static inline struct bio_cgroup *get_bio_page_cgroup(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ css_get(&biog->css);
+ return biog;
+}
+
+/* This sould be called in an RCU-protected section. */
+static inline struct bio_cgroup *mm_get_bio_cgroup(struct mm_struct *mm)
+{
+ struct bio_cgroup *biog;
+ biog = bio_cgroup_from_task(rcu_dereference(mm->owner));
+ get_bio_cgroup(biog);
+ return biog;
+}
+
+extern struct io_context *get_bio_cgroup_iocontext(struct bio *bio);
+
+#else /* CONFIG_CGROUP_BIO */
+
+struct bio_cgroup;
+
+static inline int bio_cgroup_disabled(void)
+{
+ return 1;
+}
+
+static inline void bio_cgroup_add_page(struct page_cgroup *pc)
+{
+}
+
+static inline void bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+}
+
+static inline void get_bio_cgroup(struct bio_cgroup *biog)
+{
+}
+
+static inline void put_bio_cgroup(struct bio_cgroup *biog)
+{
+}
+
+static inline void set_bio_cgroup(struct page_cgroup *pc,
+ struct bio_cgroup *biog)
+{
+}
+
+static inline void clear_bio_cgroup(struct page_cgroup *pc)
+{
+}
+
+static inline struct bio_cgroup *get_bio_page_cgroup(struct page_cgroup *pc)
+{
+ return NULL;
+}
+
+static inline struct bio_cgroup *mm_get_bio_cgroup(struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static inline int get_bio_cgroup_id(struct page *page)
+{
+ return 0;
+}
+
+static inline struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_CGROUP_BIO */
+
+#endif /* _LINUX_BIOCONTROL_H */
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/cgroup_subsys.h linux-2.6.27-rc1-mm1.cg2/include/linux/cgroup_subsys.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/cgroup_subsys.h 2008-08-12 14:30:19.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/cgroup_subsys.h 2008-08-12 14:48:00.000000000 +0900
@@ -43,6 +43,12 @@ SUBSYS(mem_cgroup)

/* */

+#ifdef CONFIG_CGROUP_BIO
+SUBSYS(bio_cgroup)
+#endif
+
+/* */
+
#ifdef CONFIG_CGROUP_DEVICE
SUBSYS(devices)
#endif
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/iocontext.h linux-2.6.27-rc1-mm1.cg2/include/linux/iocontext.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/iocontext.h 2008-07-29 11:40:31.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/iocontext.h 2008-08-12 14:48:00.000000000 +0900
@@ -83,6 +83,8 @@ struct io_context {
struct radix_tree_root radix_root;
struct hlist_head cic_list;
void *ioc_data;
+
+ int id; /* cgroup ID */
};

static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -104,6 +106,7 @@ int put_io_context(struct io_context *io
void exit_io_context(void);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+void init_io_context(struct io_context *ioc);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
#else
static inline void exit_io_context(void)
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/memcontrol.h linux-2.6.27-rc1-mm1.cg2/include/linux/memcontrol.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/memcontrol.h 2008-08-12 14:47:22.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/memcontrol.h 2008-08-12 14:48:00.000000000 +0900
@@ -54,6 +54,10 @@ struct page_cgroup {
struct list_head lru; /* per cgroup LRU list */
struct mem_cgroup *mem_cgroup;
#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
+#ifdef CONFIG_CGROUP_BIO
+ struct list_head blist; /* for bio_cgroup page list */
+ struct bio_cgroup *bio_cgroup;
+#endif
struct page *page;
int flags;
};
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/init/Kconfig linux-2.6.27-rc1-mm1.cg2/init/Kconfig
--- linux-2.6.27-rc1-mm1.cg1/init/Kconfig 2008-08-12 14:47:22.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/init/Kconfig 2008-08-12 14:48:00.000000000 +0900
@@ -418,9 +418,20 @@ config CGROUP_MEMRLIMIT_CTLR
memory RSS and Page Cache control. Virtual address space control
is provided by this controller.

+config CGROUP_BIO
+ bool "Block I/O cgroup subsystem"
+ depends on CGROUPS
+ select MM_OWNER
+ help
+ Provides a Resource Controller which enables to track the onwner
+ of every Block I/O.
+ The information this subsystem provides can be used from any
+ kind of module such as dm-ioband device mapper modules or
+ the cfq-scheduler.
+
config CGROUP_PAGE
def_bool y
- depends on CGROUP_MEM_RES_CTLR
+ depends on CGROUP_MEM_RES_CTLR || CGROUP_BIO

config SYSFS_DEPRECATED
bool
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/Makefile linux-2.6.27-rc1-mm1.cg2/mm/Makefile
--- linux-2.6.27-rc1-mm1.cg1/mm/Makefile 2008-08-12 14:47:22.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/Makefile 2008-08-12 14:48:00.000000000 +0900
@@ -35,4 +35,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_PAGE) += memcontrol.o
+obj-$(CONFIG_CGROUP_BIO) += biocontrol.o
obj-$(CONFIG_CGROUP_MEMRLIMIT_CTLR) += memrlimitcgroup.o
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/biocontrol.c linux-2.6.27-rc1-mm1.cg2/mm/biocontrol.c
--- linux-2.6.27-rc1-mm1.cg1/mm/biocontrol.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/biocontrol.c 2008-08-12 15:06:12.000000000 +0900
@@ -0,0 +1,219 @@
+/* biocontrol.c - Block I/O Controller
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ * Copyright VA Linux Systems Japan, 2008
+ * Author Hirokazu Takahashi <taka@valinux.co.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/blkdev.h>
+#include <linux/smp.h>
+#include <linux/bit_spinlock.h>
+#include <linux/idr.h>
+#include <linux/err.h>
+#include <linux/biocontrol.h>
+
+/* return corresponding bio_cgroup object of a cgroup */
+static inline struct bio_cgroup *cgroup_bio(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static struct idr bio_cgroup_id;
+static DEFINE_SPINLOCK(bio_cgroup_idr_lock);
+
+static struct cgroup_subsys_state *
+bio_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog;
+ struct io_context *ioc;
+ int error;
+
+ if (!cgrp->parent) {
+ static struct bio_cgroup default_bio_cgroup;
+ static struct io_context default_bio_io_context;
+
+ biog = &default_bio_cgroup;
+ ioc = &default_bio_io_context;
+ init_io_context(ioc);
+
+ idr_init(&bio_cgroup_id);
+ biog->id = 0;
+
+ page_cgroup_init();
+ } else {
+ biog = kzalloc(sizeof(*biog), GFP_KERNEL);
+ ioc = alloc_io_context(GFP_KERNEL, -1);
+ if (!ioc || !biog) {
+ error = -ENOMEM;
+ goto out;
+ }
+retry:
+ if (unlikely(!idr_pre_get(&bio_cgroup_id, GFP_KERNEL))) {
+ error = -EAGAIN;
+ goto out;
+ }
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ error = idr_get_new_above(&bio_cgroup_id,
+ (void *)biog, 1, &biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ if (error == -EAGAIN)
+ goto retry;
+ else if (error)
+ goto out;
+ }
+
+ ioc->id = biog->id;
+ biog->io_context = ioc;
+
+ INIT_LIST_HEAD(&biog->page_list);
+ spin_lock_init(&biog->page_list_lock);
+
+ /* Bind the cgroup to bio_cgroup object we just created */
+ biog->css.cgroup = cgrp;
+
+ return &biog->css;
+out:
+ if (ioc)
+ put_io_context(ioc);
+ kfree(biog);
+ return ERR_PTR(error);
+}
+
+#define FORCE_UNCHARGE_BATCH (128)
+static void bio_cgroup_force_empty(struct bio_cgroup *biog)
+{
+ struct page_cgroup *pc;
+ struct page *page;
+ int count = FORCE_UNCHARGE_BATCH;
+ struct list_head *list = &biog->page_list;
+ unsigned long flags;
+
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ while (!list_empty(list)) {
+ pc = list_entry(list->prev, struct page_cgroup, blist);
+ page = pc->page;
+ get_page(page);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+ mem_cgroup_uncharge_page(page);
+ put_page(page);
+ if (--count <= 0) {
+ count = FORCE_UNCHARGE_BATCH;
+ cond_resched();
+ }
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ }
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+ return;
+}
+
+static void bio_cgroup_pre_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+ bio_cgroup_force_empty(biog);
+}
+
+static void bio_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ put_io_context(biog->io_context);
+
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ idr_remove(&bio_cgroup_id, biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+
+ kfree(biog);
+}
+
+struct bio_cgroup *find_bio_cgroup(int id)
+{
+ struct bio_cgroup *biog;
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ biog = (struct bio_cgroup *)
+ idr_find(&bio_cgroup_id, id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ get_bio_cgroup(biog);
+ return biog;
+}
+
+struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ struct io_context *ioc;
+ struct page_cgroup *pc;
+ struct bio_cgroup *biog;
+ struct page *page = bio_iovec_idx(bio, 0)->bv_page;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc)
+ biog = pc->bio_cgroup;
+ else
+ biog = bio_cgroup_from_task(rcu_dereference(init_mm.owner ));
+ ioc = biog->io_context; /* default io_context for this cgroup */
+ atomic_inc(&ioc->refcount);
+ unlock_page_cgroup(page);
+ return ioc;
+}
+EXPORT_SYMBOL(get_bio_cgroup_iocontext);
+
+static u64 bio_id_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ return (u64) biog->id;
+}
+
+
+static struct cftype bio_files[] = {
+ {
+ .name = "id",
+ .read_u64 = bio_id_read,
+ },
+};
+
+static int bio_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ if (bio_cgroup_disabled())
+ return 0;
+ return cgroup_add_files(cont, ss, bio_files, ARRAY_SIZE(bio_files));
+}
+
+static void bio_cgroup_move_task(struct cgroup_subsys *ss,
+ struct cgroup *cont,
+ struct cgroup *old_cont,
+ struct task_struct *p)
+{
+ /* do nothing */
+}
+
+
+struct cgroup_subsys bio_cgroup_subsys = {
+ .name = "bio",
+ .subsys_id = bio_cgroup_subsys_id,
+ .create = bio_cgroup_create,
+ .destroy = bio_cgroup_destroy,
+ .pre_destroy = bio_cgroup_pre_destroy,
+ .populate = bio_cgroup_populate,
+ .attach = bio_cgroup_move_task,
+ .early_init = 0,
+};
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/memcontrol.c linux-2.6.27-rc1-mm1.cg2/mm/memcontrol.c
--- linux-2.6.27-rc1-mm1.cg1/mm/memcontrol.c 2008-08-12 14:47:40.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/memcontrol.c 2008-08-12 14:48:00.000000000 +0900
@@ -20,6 +20,7 @@
#include <linux/res_counter.h>
#include <linux/memcontrol.h>
#include <linux/cgroup.h>
+#include <linux/biocontrol.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/page-flags.h>
@@ -1019,11 +1020,12 @@ struct page_cgroup *page_get_page_cgroup
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, enum charge_type ctype,
- struct mem_cgroup *memcg)
+ gfp_t gfp_mask, enum charge_type ctype,
+ struct mem_cgroup *memcg, struct bio_cgroup *biocg)
{
struct page_cgroup *pc;
struct mem_cgroup *mem;
+ struct bio_cgroup *biog;

pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
if (unlikely(pc == NULL))
@@ -1035,18 +1037,15 @@ static int mem_cgroup_charge_common(stru
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (likely(!memcg)) {
- rcu_read_lock();
- mem = mm_get_mem_cgroup(mm);
- rcu_read_unlock();
- } else {
- mem = memcg;
- get_mem_cgroup(mem);
- }
+ rcu_read_lock();
+ mem = memcg ? memcg : mm_get_mem_cgroup(mm);
+ biog = biocg ? biocg : mm_get_bio_cgroup(mm);
+ rcu_read_unlock();

if (mem_cgroup_try_to_allocate(mem, gfp_mask) < 0)
goto out;
set_mem_cgroup(pc, mem);
+ set_bio_cgroup(pc, biog);
pc->page = page;
/*
* If a page is accounted as a page cache, insert to inactive list.
@@ -1065,18 +1064,21 @@ static int mem_cgroup_charge_common(stru
if (unlikely(page_get_page_cgroup(page))) {
unlock_page_cgroup(page);
clear_mem_cgroup(pc);
+ clear_bio_cgroup(pc);
kmem_cache_free(page_cgroup_cache, pc);
goto done;
}
page_assign_page_cgroup(page, pc);

mem_cgroup_add_page(pc);
+ bio_cgroup_add_page(pc);

unlock_page_cgroup(page);
done:
return 0;
out:
put_mem_cgroup(mem);
+ put_bio_cgroup(biog);
kmem_cache_free(page_cgroup_cache, pc);
err:
return -ENOMEM;
@@ -1099,7 +1101,7 @@ int mem_cgroup_charge(struct page *page,
if (unlikely(!mm))
mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
+ MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL, NULL);
}

int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -1135,7 +1137,7 @@ int mem_cgroup_cache_charge(struct page
mm = &init_mm;

return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+ MEM_CGROUP_CHARGE_TYPE_CACHE, NULL, NULL);
}

/*
@@ -1146,7 +1148,7 @@ __mem_cgroup_uncharge_common(struct page
{
struct page_cgroup *pc;

- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return;

/*
@@ -1166,11 +1168,13 @@ __mem_cgroup_uncharge_common(struct page
goto unlock;

mem_cgroup_remove_page(pc);
+ bio_cgroup_remove_page(pc);

page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);

clear_mem_cgroup(pc);
+ clear_bio_cgroup(pc);

kmem_cache_free(page_cgroup_cache, pc);
return;
@@ -1196,24 +1200,29 @@ int mem_cgroup_prepare_migration(struct
{
struct page_cgroup *pc;
struct mem_cgroup *mem = NULL;
+ struct bio_cgroup *biog = NULL;
enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
int ret = 0;

- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return 0;

lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
if (pc) {
mem = get_mem_page_cgroup(pc);
+ biog = get_bio_page_cgroup(pc);
if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
}
unlock_page_cgroup(page);
- if (mem) {
+ if (pc) {
ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
- ctype, mem);
- put_mem_cgroup(mem);
+ ctype, mem, biog);
+ if (mem)
+ put_mem_cgroup(mem);
+ if (biog)
+ put_bio_cgroup(biog);
}
return ret;
}

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 09-19-2008, 11:03 AM
Ryo Tsuruta
 
Default bio-cgroup: Implement the bio-cgroup

This patch implements the bio cgroup on the memory cgroup.

Based on 2.6.27-rc1-mm1
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>

diff -Ndupr linux-2.6.27-rc1-mm1.cg1/block/blk-ioc.c linux-2.6.27-rc1-mm1.cg2/block/blk-ioc.c
--- linux-2.6.27-rc1-mm1.cg1/block/blk-ioc.c 2008-07-29 11:40:31.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/block/blk-ioc.c 2008-09-19 18:50:59.000000000 +0900
@@ -84,24 +84,28 @@ void exit_io_context(void)
}
}

+void init_io_context(struct io_context *ioc)
+{
+ atomic_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
+ spin_lock_init(&ioc->lock);
+ ioc->ioprio_changed = 0;
+ ioc->ioprio = 0;
+ ioc->last_waited = jiffies; /* doesn't matter... */
+ ioc->nr_batch_requests = 0; /* because this is 0 */
+ ioc->aic = NULL;
+ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ioc->cic_list);
+ ioc->ioc_data = NULL;
+}
+
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
{
struct io_context *ret;

ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
- if (ret) {
- atomic_set(&ret->refcount, 1);
- atomic_set(&ret->nr_tasks, 1);
- spin_lock_init(&ret->lock);
- ret->ioprio_changed = 0;
- ret->ioprio = 0;
- ret->last_waited = jiffies; /* doesn't matter... */
- ret->nr_batch_requests = 0; /* because this is 0 */
- ret->aic = NULL;
- INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
- INIT_HLIST_HEAD(&ret->cic_list);
- ret->ioc_data = NULL;
- }
+ if (ret)
+ init_io_context(ret);

return ret;
}
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/biocontrol.h linux-2.6.27-rc1-mm1.cg2/include/linux/biocontrol.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/biocontrol.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/biocontrol.h 2008-09-19 18:50:59.000000000 +0900
@@ -0,0 +1,159 @@
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+
+#ifndef _LINUX_BIOCONTROL_H
+#define _LINUX_BIOCONTROL_H
+
+#ifdef CONFIG_CGROUP_BIO
+
+struct io_context;
+struct block_device;
+
+struct bio_cgroup {
+ struct cgroup_subsys_state css;
+ int id;
+ struct io_context *io_context; /* default io_context */
+/* struct radix_tree_root io_context_root; per device io_context */
+ spinlock_t page_list_lock;
+ struct list_head page_list;
+};
+
+static inline int bio_cgroup_disabled(void)
+{
+ return bio_cgroup_subsys.disabled;
+}
+
+static inline struct bio_cgroup *bio_cgroup_from_task(struct task_struct *p)
+{
+ return container_of(task_subsys_state(p, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static inline void __bio_cgroup_add_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ list_add(&pc->blist, &biog->page_list);
+}
+
+static inline void bio_cgroup_add_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ unsigned long flags;
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ __bio_cgroup_add_page(pc);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+}
+
+static inline void __bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+ list_del_init(&pc->blist);
+}
+
+static inline void bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ unsigned long flags;
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ __bio_cgroup_remove_page(pc);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+}
+
+static inline void get_bio_cgroup(struct bio_cgroup *biog)
+{
+ css_get(&biog->css);
+}
+
+static inline void put_bio_cgroup(struct bio_cgroup *biog)
+{
+ css_put(&biog->css);
+}
+
+static inline void set_bio_cgroup(struct page_cgroup *pc,
+ struct bio_cgroup *biog)
+{
+ pc->bio_cgroup = biog;
+}
+
+static inline void clear_bio_cgroup(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ pc->bio_cgroup = NULL;
+ put_bio_cgroup(biog);
+}
+
+static inline struct bio_cgroup *get_bio_page_cgroup(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ css_get(&biog->css);
+ return biog;
+}
+
+/* This sould be called in an RCU-protected section. */
+static inline struct bio_cgroup *mm_get_bio_cgroup(struct mm_struct *mm)
+{
+ struct bio_cgroup *biog;
+ biog = bio_cgroup_from_task(rcu_dereference(mm->owner));
+ get_bio_cgroup(biog);
+ return biog;
+}
+
+extern struct io_context *get_bio_cgroup_iocontext(struct bio *bio);
+
+#else /* CONFIG_CGROUP_BIO */
+
+struct bio_cgroup;
+
+static inline int bio_cgroup_disabled(void)
+{
+ return 1;
+}
+
+static inline void bio_cgroup_add_page(struct page_cgroup *pc)
+{
+}
+
+static inline void bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+}
+
+static inline void get_bio_cgroup(struct bio_cgroup *biog)
+{
+}
+
+static inline void put_bio_cgroup(struct bio_cgroup *biog)
+{
+}
+
+static inline void set_bio_cgroup(struct page_cgroup *pc,
+ struct bio_cgroup *biog)
+{
+}
+
+static inline void clear_bio_cgroup(struct page_cgroup *pc)
+{
+}
+
+static inline struct bio_cgroup *get_bio_page_cgroup(struct page_cgroup *pc)
+{
+ return NULL;
+}
+
+static inline struct bio_cgroup *mm_get_bio_cgroup(struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static inline int get_bio_cgroup_id(struct page *page)
+{
+ return 0;
+}
+
+static inline struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_CGROUP_BIO */
+
+#endif /* _LINUX_BIOCONTROL_H */
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/cgroup_subsys.h linux-2.6.27-rc1-mm1.cg2/include/linux/cgroup_subsys.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/cgroup_subsys.h 2008-09-19 10:54:43.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/cgroup_subsys.h 2008-09-19 18:50:59.000000000 +0900
@@ -43,6 +43,12 @@ SUBSYS(mem_cgroup)

/* */

+#ifdef CONFIG_CGROUP_BIO
+SUBSYS(bio_cgroup)
+#endif
+
+/* */
+
#ifdef CONFIG_CGROUP_DEVICE
SUBSYS(devices)
#endif
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/iocontext.h linux-2.6.27-rc1-mm1.cg2/include/linux/iocontext.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/iocontext.h 2008-07-29 11:40:31.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/iocontext.h 2008-09-19 18:50:59.000000000 +0900
@@ -83,6 +83,8 @@ struct io_context {
struct radix_tree_root radix_root;
struct hlist_head cic_list;
void *ioc_data;
+
+ int id; /* cgroup ID */
};

static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -104,6 +106,7 @@ int put_io_context(struct io_context *io
void exit_io_context(void);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+void init_io_context(struct io_context *ioc);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
#else
static inline void exit_io_context(void)
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/include/linux/memcontrol.h linux-2.6.27-rc1-mm1.cg2/include/linux/memcontrol.h
--- linux-2.6.27-rc1-mm1.cg1/include/linux/memcontrol.h 2008-09-19 18:50:59.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/include/linux/memcontrol.h 2008-09-19 18:50:59.000000000 +0900
@@ -54,6 +54,10 @@ struct page_cgroup {
struct list_head lru; /* per cgroup LRU list */
struct mem_cgroup *mem_cgroup;
#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
+#ifdef CONFIG_CGROUP_BIO
+ struct list_head blist; /* for bio_cgroup page list */
+ struct bio_cgroup *bio_cgroup;
+#endif
struct page *page;
int flags;
};
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/init/Kconfig linux-2.6.27-rc1-mm1.cg2/init/Kconfig
--- linux-2.6.27-rc1-mm1.cg1/init/Kconfig 2008-09-19 18:50:59.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/init/Kconfig 2008-09-19 18:50:59.000000000 +0900
@@ -418,9 +418,20 @@ config CGROUP_MEMRLIMIT_CTLR
memory RSS and Page Cache control. Virtual address space control
is provided by this controller.

+config CGROUP_BIO
+ bool "Block I/O cgroup subsystem"
+ depends on CGROUPS
+ select MM_OWNER
+ help
+ Provides a Resource Controller which enables to track the onwner
+ of every Block I/O.
+ The information this subsystem provides can be used from any
+ kind of module such as dm-ioband device mapper modules or
+ the cfq-scheduler.
+
config CGROUP_PAGE
def_bool y
- depends on CGROUP_MEM_RES_CTLR
+ depends on CGROUP_MEM_RES_CTLR || CGROUP_BIO

config SYSFS_DEPRECATED
bool
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/biocontrol.c linux-2.6.27-rc1-mm1.cg2/mm/biocontrol.c
--- linux-2.6.27-rc1-mm1.cg1/mm/biocontrol.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/biocontrol.c 2008-09-19 18:50:59.000000000 +0900
@@ -0,0 +1,219 @@
+/* biocontrol.c - Block I/O Controller
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ * Copyright VA Linux Systems Japan, 2008
+ * Author Hirokazu Takahashi <taka@valinux.co.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/blkdev.h>
+#include <linux/smp.h>
+#include <linux/bit_spinlock.h>
+#include <linux/idr.h>
+#include <linux/err.h>
+#include <linux/biocontrol.h>
+
+/* return corresponding bio_cgroup object of a cgroup */
+static inline struct bio_cgroup *cgroup_bio(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static struct idr bio_cgroup_id;
+static DEFINE_SPINLOCK(bio_cgroup_idr_lock);
+
+static struct cgroup_subsys_state *
+bio_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog;
+ struct io_context *ioc;
+ int error;
+
+ if (!cgrp->parent) {
+ static struct bio_cgroup default_bio_cgroup;
+ static struct io_context default_bio_io_context;
+
+ biog = &default_bio_cgroup;
+ ioc = &default_bio_io_context;
+ init_io_context(ioc);
+
+ idr_init(&bio_cgroup_id);
+ biog->id = 0;
+
+ page_cgroup_init();
+ } else {
+ biog = kzalloc(sizeof(*biog), GFP_KERNEL);
+ ioc = alloc_io_context(GFP_KERNEL, -1);
+ if (!ioc || !biog) {
+ error = -ENOMEM;
+ goto out;
+ }
+retry:
+ if (unlikely(!idr_pre_get(&bio_cgroup_id, GFP_KERNEL))) {
+ error = -EAGAIN;
+ goto out;
+ }
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ error = idr_get_new_above(&bio_cgroup_id,
+ (void *)biog, 1, &biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ if (error == -EAGAIN)
+ goto retry;
+ else if (error)
+ goto out;
+ }
+
+ ioc->id = biog->id;
+ biog->io_context = ioc;
+
+ INIT_LIST_HEAD(&biog->page_list);
+ spin_lock_init(&biog->page_list_lock);
+
+ /* Bind the cgroup to bio_cgroup object we just created */
+ biog->css.cgroup = cgrp;
+
+ return &biog->css;
+out:
+ if (ioc)
+ put_io_context(ioc);
+ kfree(biog);
+ return ERR_PTR(error);
+}
+
+#define FORCE_UNCHARGE_BATCH (128)
+static void bio_cgroup_force_empty(struct bio_cgroup *biog)
+{
+ struct page_cgroup *pc;
+ struct page *page;
+ int count = FORCE_UNCHARGE_BATCH;
+ struct list_head *list = &biog->page_list;
+ unsigned long flags;
+
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ while (!list_empty(list)) {
+ pc = list_entry(list->prev, struct page_cgroup, blist);
+ page = pc->page;
+ get_page(page);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+ mem_cgroup_uncharge_page(page);
+ put_page(page);
+ if (--count <= 0) {
+ count = FORCE_UNCHARGE_BATCH;
+ cond_resched();
+ }
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ }
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+ return;
+}
+
+static void bio_cgroup_pre_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+ bio_cgroup_force_empty(biog);
+}
+
+static void bio_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ put_io_context(biog->io_context);
+
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ idr_remove(&bio_cgroup_id, biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+
+ kfree(biog);
+}
+
+struct bio_cgroup *find_bio_cgroup(int id)
+{
+ struct bio_cgroup *biog;
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ biog = (struct bio_cgroup *)
+ idr_find(&bio_cgroup_id, id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ get_bio_cgroup(biog);
+ return biog;
+}
+
+struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ struct io_context *ioc;
+ struct page_cgroup *pc;
+ struct bio_cgroup *biog;
+ struct page *page = bio_iovec_idx(bio, 0)->bv_page;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc)
+ biog = pc->bio_cgroup;
+ else
+ biog = bio_cgroup_from_task(rcu_dereference(init_mm.owner ));
+ ioc = biog->io_context; /* default io_context for this cgroup */
+ atomic_inc(&ioc->refcount);
+ unlock_page_cgroup(page);
+ return ioc;
+}
+EXPORT_SYMBOL(get_bio_cgroup_iocontext);
+
+static u64 bio_id_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ return (u64) biog->id;
+}
+
+
+static struct cftype bio_files[] = {
+ {
+ .name = "id",
+ .read_u64 = bio_id_read,
+ },
+};
+
+static int bio_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ if (bio_cgroup_disabled())
+ return 0;
+ return cgroup_add_files(cont, ss, bio_files, ARRAY_SIZE(bio_files));
+}
+
+static void bio_cgroup_move_task(struct cgroup_subsys *ss,
+ struct cgroup *cont,
+ struct cgroup *old_cont,
+ struct task_struct *p)
+{
+ /* do nothing */
+}
+
+
+struct cgroup_subsys bio_cgroup_subsys = {
+ .name = "bio",
+ .subsys_id = bio_cgroup_subsys_id,
+ .create = bio_cgroup_create,
+ .destroy = bio_cgroup_destroy,
+ .pre_destroy = bio_cgroup_pre_destroy,
+ .populate = bio_cgroup_populate,
+ .attach = bio_cgroup_move_task,
+ .early_init = 0,
+};
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/Makefile linux-2.6.27-rc1-mm1.cg2/mm/Makefile
--- linux-2.6.27-rc1-mm1.cg1/mm/Makefile 2008-09-19 18:50:59.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/Makefile 2008-09-19 18:50:59.000000000 +0900
@@ -35,4 +35,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_PAGE) += memcontrol.o
+obj-$(CONFIG_CGROUP_BIO) += biocontrol.o
obj-$(CONFIG_CGROUP_MEMRLIMIT_CTLR) += memrlimitcgroup.o
diff -Ndupr linux-2.6.27-rc1-mm1.cg1/mm/memcontrol.c linux-2.6.27-rc1-mm1.cg2/mm/memcontrol.c
--- linux-2.6.27-rc1-mm1.cg1/mm/memcontrol.c 2008-09-19 18:50:59.000000000 +0900
+++ linux-2.6.27-rc1-mm1.cg2/mm/memcontrol.c 2008-09-19 18:50:59.000000000 +0900
@@ -20,6 +20,7 @@
#include <linux/res_counter.h>
#include <linux/memcontrol.h>
#include <linux/cgroup.h>
+#include <linux/biocontrol.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/page-flags.h>
@@ -1019,11 +1020,12 @@ struct page_cgroup *page_get_page_cgroup
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, enum charge_type ctype,
- struct mem_cgroup *memcg)
+ gfp_t gfp_mask, enum charge_type ctype,
+ struct mem_cgroup *memcg, struct bio_cgroup *biocg)
{
struct page_cgroup *pc;
struct mem_cgroup *mem;
+ struct bio_cgroup *biog;

pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
if (unlikely(pc == NULL))
@@ -1035,18 +1037,15 @@ static int mem_cgroup_charge_common(stru
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (likely(!memcg)) {
- rcu_read_lock();
- mem = mm_get_mem_cgroup(mm);
- rcu_read_unlock();
- } else {
- mem = memcg;
- get_mem_cgroup(mem);
- }
+ rcu_read_lock();
+ mem = memcg ? memcg : mm_get_mem_cgroup(mm);
+ biog = biocg ? biocg : mm_get_bio_cgroup(mm);
+ rcu_read_unlock();

if (mem_cgroup_try_to_allocate(mem, gfp_mask) < 0)
goto out;
set_mem_cgroup(pc, mem);
+ set_bio_cgroup(pc, biog);
pc->page = page;
/*
* If a page is accounted as a page cache, insert to inactive list.
@@ -1065,18 +1064,21 @@ static int mem_cgroup_charge_common(stru
if (unlikely(page_get_page_cgroup(page))) {
unlock_page_cgroup(page);
clear_mem_cgroup(pc);
+ clear_bio_cgroup(pc);
kmem_cache_free(page_cgroup_cache, pc);
goto done;
}
page_assign_page_cgroup(page, pc);

mem_cgroup_add_page(pc);
+ bio_cgroup_add_page(pc);

unlock_page_cgroup(page);
done:
return 0;
out:
put_mem_cgroup(mem);
+ put_bio_cgroup(biog);
kmem_cache_free(page_cgroup_cache, pc);
err:
return -ENOMEM;
@@ -1099,7 +1101,7 @@ int mem_cgroup_charge(struct page *page,
if (unlikely(!mm))
mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
+ MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL, NULL);
}

int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -1135,7 +1137,7 @@ int mem_cgroup_cache_charge(struct page
mm = &init_mm;

return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+ MEM_CGROUP_CHARGE_TYPE_CACHE, NULL, NULL);
}

/*
@@ -1146,7 +1148,7 @@ __mem_cgroup_uncharge_common(struct page
{
struct page_cgroup *pc;

- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return;

/*
@@ -1165,11 +1167,13 @@ __mem_cgroup_uncharge_common(struct page
goto unlock;

mem_cgroup_remove_page(pc);
+ bio_cgroup_remove_page(pc);

page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);

clear_mem_cgroup(pc);
+ clear_bio_cgroup(pc);

kmem_cache_free(page_cgroup_cache, pc);
return;
@@ -1195,24 +1199,29 @@ int mem_cgroup_prepare_migration(struct
{
struct page_cgroup *pc;
struct mem_cgroup *mem = NULL;
+ struct bio_cgroup *biog = NULL;
enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
int ret = 0;

- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return 0;

lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
if (pc) {
mem = get_mem_page_cgroup(pc);
+ biog = get_bio_page_cgroup(pc);
if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
}
unlock_page_cgroup(page);
- if (mem) {
+ if (pc) {
ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
- ctype, mem);
- put_mem_cgroup(mem);
+ ctype, mem, biog);
+ if (mem)
+ put_mem_cgroup(mem);
+ if (biog)
+ put_bio_cgroup(biog);
}
return ret;
}

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 
Old 09-24-2008, 10:14 AM
Ryo Tsuruta
 
Default bio-cgroup: Implement the bio-cgroup

This patch implements the bio cgroup on the memory cgroup.

Based on 2.6.27-rc5-mm1
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>

diff -Ndupr linux-2.6.27-rc5-mm1.cg1/block/blk-ioc.c linux-2.6.27-rc5-mm1.cg2/block/blk-ioc.c
--- linux-2.6.27-rc5-mm1.cg1/block/blk-ioc.c 2008-08-29 07:52:02.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/block/blk-ioc.c 2008-09-24 16:34:51.000000000 +0900
@@ -84,24 +84,28 @@ void exit_io_context(void)
}
}

+void init_io_context(struct io_context *ioc)
+{
+ atomic_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
+ spin_lock_init(&ioc->lock);
+ ioc->ioprio_changed = 0;
+ ioc->ioprio = 0;
+ ioc->last_waited = jiffies; /* doesn't matter... */
+ ioc->nr_batch_requests = 0; /* because this is 0 */
+ ioc->aic = NULL;
+ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ioc->cic_list);
+ ioc->ioc_data = NULL;
+}
+
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
{
struct io_context *ret;

ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
- if (ret) {
- atomic_set(&ret->refcount, 1);
- atomic_set(&ret->nr_tasks, 1);
- spin_lock_init(&ret->lock);
- ret->ioprio_changed = 0;
- ret->ioprio = 0;
- ret->last_waited = jiffies; /* doesn't matter... */
- ret->nr_batch_requests = 0; /* because this is 0 */
- ret->aic = NULL;
- INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
- INIT_HLIST_HEAD(&ret->cic_list);
- ret->ioc_data = NULL;
- }
+ if (ret)
+ init_io_context(ret);

return ret;
}
diff -Ndupr linux-2.6.27-rc5-mm1.cg1/include/linux/biocontrol.h linux-2.6.27-rc5-mm1.cg2/include/linux/biocontrol.h
--- linux-2.6.27-rc5-mm1.cg1/include/linux/biocontrol.h 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/include/linux/biocontrol.h 2008-09-24 16:34:51.000000000 +0900
@@ -0,0 +1,159 @@
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+
+#ifndef _LINUX_BIOCONTROL_H
+#define _LINUX_BIOCONTROL_H
+
+#ifdef CONFIG_CGROUP_BIO
+
+struct io_context;
+struct block_device;
+
+struct bio_cgroup {
+ struct cgroup_subsys_state css;
+ int id;
+ struct io_context *io_context; /* default io_context */
+/* struct radix_tree_root io_context_root; per device io_context */
+ spinlock_t page_list_lock;
+ struct list_head page_list;
+};
+
+static inline int bio_cgroup_disabled(void)
+{
+ return bio_cgroup_subsys.disabled;
+}
+
+static inline struct bio_cgroup *bio_cgroup_from_task(struct task_struct *p)
+{
+ return container_of(task_subsys_state(p, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static inline void __bio_cgroup_add_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ list_add(&pc->blist, &biog->page_list);
+}
+
+static inline void bio_cgroup_add_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ unsigned long flags;
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ __bio_cgroup_add_page(pc);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+}
+
+static inline void __bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+ list_del_init(&pc->blist);
+}
+
+static inline void bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ unsigned long flags;
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ __bio_cgroup_remove_page(pc);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+}
+
+static inline void get_bio_cgroup(struct bio_cgroup *biog)
+{
+ css_get(&biog->css);
+}
+
+static inline void put_bio_cgroup(struct bio_cgroup *biog)
+{
+ css_put(&biog->css);
+}
+
+static inline void set_bio_cgroup(struct page_cgroup *pc,
+ struct bio_cgroup *biog)
+{
+ pc->bio_cgroup = biog;
+}
+
+static inline void clear_bio_cgroup(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ pc->bio_cgroup = NULL;
+ put_bio_cgroup(biog);
+}
+
+static inline struct bio_cgroup *get_bio_page_cgroup(struct page_cgroup *pc)
+{
+ struct bio_cgroup *biog = pc->bio_cgroup;
+ css_get(&biog->css);
+ return biog;
+}
+
+/* This sould be called in an RCU-protected section. */
+static inline struct bio_cgroup *mm_get_bio_cgroup(struct mm_struct *mm)
+{
+ struct bio_cgroup *biog;
+ biog = bio_cgroup_from_task(rcu_dereference(mm->owner));
+ get_bio_cgroup(biog);
+ return biog;
+}
+
+extern struct io_context *get_bio_cgroup_iocontext(struct bio *bio);
+
+#else /* CONFIG_CGROUP_BIO */
+
+struct bio_cgroup;
+
+static inline int bio_cgroup_disabled(void)
+{
+ return 1;
+}
+
+static inline void bio_cgroup_add_page(struct page_cgroup *pc)
+{
+}
+
+static inline void bio_cgroup_remove_page(struct page_cgroup *pc)
+{
+}
+
+static inline void get_bio_cgroup(struct bio_cgroup *biog)
+{
+}
+
+static inline void put_bio_cgroup(struct bio_cgroup *biog)
+{
+}
+
+static inline void set_bio_cgroup(struct page_cgroup *pc,
+ struct bio_cgroup *biog)
+{
+}
+
+static inline void clear_bio_cgroup(struct page_cgroup *pc)
+{
+}
+
+static inline struct bio_cgroup *get_bio_page_cgroup(struct page_cgroup *pc)
+{
+ return NULL;
+}
+
+static inline struct bio_cgroup *mm_get_bio_cgroup(struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static inline int get_bio_cgroup_id(struct page *page)
+{
+ return 0;
+}
+
+static inline struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_CGROUP_BIO */
+
+#endif /* _LINUX_BIOCONTROL_H */
diff -Ndupr linux-2.6.27-rc5-mm1.cg1/include/linux/cgroup_subsys.h linux-2.6.27-rc5-mm1.cg2/include/linux/cgroup_subsys.h
--- linux-2.6.27-rc5-mm1.cg1/include/linux/cgroup_subsys.h 2008-09-24 16:23:03.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/include/linux/cgroup_subsys.h 2008-09-24 16:34:51.000000000 +0900
@@ -43,6 +43,12 @@ SUBSYS(mem_cgroup)

/* */

+#ifdef CONFIG_CGROUP_BIO
+SUBSYS(bio_cgroup)
+#endif
+
+/* */
+
#ifdef CONFIG_CGROUP_DEVICE
SUBSYS(devices)
#endif
diff -Ndupr linux-2.6.27-rc5-mm1.cg1/include/linux/iocontext.h linux-2.6.27-rc5-mm1.cg2/include/linux/iocontext.h
--- linux-2.6.27-rc5-mm1.cg1/include/linux/iocontext.h 2008-08-29 07:52:02.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/include/linux/iocontext.h 2008-09-24 16:34:51.000000000 +0900
@@ -83,6 +83,8 @@ struct io_context {
struct radix_tree_root radix_root;
struct hlist_head cic_list;
void *ioc_data;
+
+ int id; /* cgroup ID */
};

static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -104,6 +106,7 @@ int put_io_context(struct io_context *io
void exit_io_context(void);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+void init_io_context(struct io_context *ioc);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
#else
static inline void exit_io_context(void)
diff -Ndupr linux-2.6.27-rc5-mm1.cg1/include/linux/memcontrol.h linux-2.6.27-rc5-mm1.cg2/include/linux/memcontrol.h
--- linux-2.6.27-rc5-mm1.cg1/include/linux/memcontrol.h 2008-09-24 16:34:48.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/include/linux/memcontrol.h 2008-09-24 16:34:51.000000000 +0900
@@ -54,6 +54,10 @@ struct page_cgroup {
struct list_head lru; /* per cgroup LRU list */
struct mem_cgroup *mem_cgroup;
#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
+#ifdef CONFIG_CGROUP_BIO
+ struct list_head blist; /* for bio_cgroup page list */
+ struct bio_cgroup *bio_cgroup;
+#endif
struct page *page;
int flags;
};
diff -Ndupr linux-2.6.27-rc5-mm1.cg1/init/Kconfig linux-2.6.27-rc5-mm1.cg2/init/Kconfig
--- linux-2.6.27-rc5-mm1.cg1/init/Kconfig 2008-09-24 16:34:48.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/init/Kconfig 2008-09-24 16:34:51.000000000 +0900
@@ -425,9 +425,20 @@ config CGROUP_MEMRLIMIT_CTLR
memory RSS and Page Cache control. Virtual address space control
is provided by this controller.

+config CGROUP_BIO
+ bool "Block I/O cgroup subsystem"
+ depends on CGROUPS
+ select MM_OWNER
+ help
+ Provides a Resource Controller which enables to track the onwner
+ of every Block I/O.
+ The information this subsystem provides can be used from any
+ kind of module such as dm-ioband device mapper modules or
+ the cfq-scheduler.
+
config CGROUP_PAGE
def_bool y
- depends on CGROUP_MEM_RES_CTLR
+ depends on CGROUP_MEM_RES_CTLR || CGROUP_BIO

config SYSFS_DEPRECATED
bool
diff -Ndupr linux-2.6.27-rc5-mm1.cg1/mm/biocontrol.c linux-2.6.27-rc5-mm1.cg2/mm/biocontrol.c
--- linux-2.6.27-rc5-mm1.cg1/mm/biocontrol.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/mm/biocontrol.c 2008-09-24 16:34:51.000000000 +0900
@@ -0,0 +1,219 @@
+/* biocontrol.c - Block I/O Controller
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * Copyright 2007 OpenVZ SWsoft Inc
+ * Author: Pavel Emelianov <xemul@openvz.org>
+ *
+ * Copyright VA Linux Systems Japan, 2008
+ * Author Hirokazu Takahashi <taka@valinux.co.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/blkdev.h>
+#include <linux/smp.h>
+#include <linux/bit_spinlock.h>
+#include <linux/idr.h>
+#include <linux/err.h>
+#include <linux/biocontrol.h>
+
+/* return corresponding bio_cgroup object of a cgroup */
+static inline struct bio_cgroup *cgroup_bio(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static struct idr bio_cgroup_id;
+static DEFINE_SPINLOCK(bio_cgroup_idr_lock);
+
+static struct cgroup_subsys_state *
+bio_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog;
+ struct io_context *ioc;
+ int error;
+
+ if (!cgrp->parent) {
+ static struct bio_cgroup default_bio_cgroup;
+ static struct io_context default_bio_io_context;
+
+ biog = &default_bio_cgroup;
+ ioc = &default_bio_io_context;
+ init_io_context(ioc);
+
+ idr_init(&bio_cgroup_id);
+ biog->id = 0;
+
+ page_cgroup_init();
+ } else {
+ biog = kzalloc(sizeof(*biog), GFP_KERNEL);
+ ioc = alloc_io_context(GFP_KERNEL, -1);
+ if (!ioc || !biog) {
+ error = -ENOMEM;
+ goto out;
+ }
+retry:
+ if (unlikely(!idr_pre_get(&bio_cgroup_id, GFP_KERNEL))) {
+ error = -EAGAIN;
+ goto out;
+ }
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ error = idr_get_new_above(&bio_cgroup_id,
+ (void *)biog, 1, &biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ if (error == -EAGAIN)
+ goto retry;
+ else if (error)
+ goto out;
+ }
+
+ ioc->id = biog->id;
+ biog->io_context = ioc;
+
+ INIT_LIST_HEAD(&biog->page_list);
+ spin_lock_init(&biog->page_list_lock);
+
+ /* Bind the cgroup to bio_cgroup object we just created */
+ biog->css.cgroup = cgrp;
+
+ return &biog->css;
+out:
+ if (ioc)
+ put_io_context(ioc);
+ kfree(biog);
+ return ERR_PTR(error);
+}
+
+#define FORCE_UNCHARGE_BATCH (128)
+static void bio_cgroup_force_empty(struct bio_cgroup *biog)
+{
+ struct page_cgroup *pc;
+ struct page *page;
+ int count = FORCE_UNCHARGE_BATCH;
+ struct list_head *list = &biog->page_list;
+ unsigned long flags;
+
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ while (!list_empty(list)) {
+ pc = list_entry(list->prev, struct page_cgroup, blist);
+ page = pc->page;
+ get_page(page);
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+ mem_cgroup_uncharge_page(page);
+ put_page(page);
+ if (--count <= 0) {
+ count = FORCE_UNCHARGE_BATCH;
+ cond_resched();
+ }
+ spin_lock_irqsave(&biog->page_list_lock, flags);
+ }
+ spin_unlock_irqrestore(&biog->page_list_lock, flags);
+ return;
+}
+
+static void bio_cgroup_pre_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+ bio_cgroup_force_empty(biog);
+}
+
+static void bio_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ put_io_context(biog->io_context);
+
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ idr_remove(&bio_cgroup_id, biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+
+ kfree(biog);
+}
+
+struct bio_cgroup *find_bio_cgroup(int id)
+{
+ struct bio_cgroup *biog;
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ biog = (struct bio_cgroup *)
+ idr_find(&bio_cgroup_id, id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ get_bio_cgroup(biog);
+ return biog;
+}
+
+struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ struct io_context *ioc;
+ struct page_cgroup *pc;
+ struct bio_cgroup *biog;
+ struct page *page = bio_iovec_idx(bio, 0)->bv_page;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc)
+ biog = pc->bio_cgroup;
+ else
+ biog = bio_cgroup_from_task(rcu_dereference(init_mm.owner ));
+ ioc = biog->io_context; /* default io_context for this cgroup */
+ atomic_inc(&ioc->refcount);
+ unlock_page_cgroup(page);
+ return ioc;
+}
+EXPORT_SYMBOL(get_bio_cgroup_iocontext);
+
+static u64 bio_id_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ return (u64) biog->id;
+}
+
+
+static struct cftype bio_files[] = {
+ {
+ .name = "id",
+ .read_u64 = bio_id_read,
+ },
+};
+
+static int bio_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ if (bio_cgroup_disabled())
+ return 0;
+ return cgroup_add_files(cont, ss, bio_files, ARRAY_SIZE(bio_files));
+}
+
+static void bio_cgroup_move_task(struct cgroup_subsys *ss,
+ struct cgroup *cont,
+ struct cgroup *old_cont,
+ struct task_struct *p)
+{
+ /* do nothing */
+}
+
+
+struct cgroup_subsys bio_cgroup_subsys = {
+ .name = "bio",
+ .subsys_id = bio_cgroup_subsys_id,
+ .create = bio_cgroup_create,
+ .destroy = bio_cgroup_destroy,
+ .pre_destroy = bio_cgroup_pre_destroy,
+ .populate = bio_cgroup_populate,
+ .attach = bio_cgroup_move_task,
+ .early_init = 0,
+};
diff -Ndupr linux-2.6.27-rc5-mm1.cg1/mm/Makefile linux-2.6.27-rc5-mm1.cg2/mm/Makefile
--- linux-2.6.27-rc5-mm1.cg1/mm/Makefile 2008-09-24 16:34:48.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/mm/Makefile 2008-09-24 16:34:51.000000000 +0900
@@ -35,5 +35,6 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_PAGE) += memcontrol.o
+obj-$(CONFIG_CGROUP_BIO) += biocontrol.o
obj-$(CONFIG_CGROUP_MEMRLIMIT_CTLR) += memrlimitcgroup.o
obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
diff -Ndupr linux-2.6.27-rc5-mm1.cg1/mm/memcontrol.c linux-2.6.27-rc5-mm1.cg2/mm/memcontrol.c
--- linux-2.6.27-rc5-mm1.cg1/mm/memcontrol.c 2008-09-24 16:34:48.000000000 +0900
+++ linux-2.6.27-rc5-mm1.cg2/mm/memcontrol.c 2008-09-24 16:34:51.000000000 +0900
@@ -20,6 +20,7 @@
#include <linux/res_counter.h>
#include <linux/memcontrol.h>
#include <linux/cgroup.h>
+#include <linux/biocontrol.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/page-flags.h>
@@ -1021,11 +1022,12 @@ struct page_cgroup *page_get_page_cgroup
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, enum charge_type ctype,
- struct mem_cgroup *memcg)
+ gfp_t gfp_mask, enum charge_type ctype,
+ struct mem_cgroup *memcg, struct bio_cgroup *biocg)
{
struct page_cgroup *pc;
struct mem_cgroup *mem;
+ struct bio_cgroup *biog;

pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
if (unlikely(pc == NULL))
@@ -1037,18 +1039,15 @@ static int mem_cgroup_charge_common(stru
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (likely(!memcg)) {
- rcu_read_lock();
- mem = mm_get_mem_cgroup(mm);
- rcu_read_unlock();
- } else {
- mem = memcg;
- get_mem_cgroup(mem);
- }
+ rcu_read_lock();
+ mem = memcg ? memcg : mm_get_mem_cgroup(mm);
+ biog = biocg ? biocg : mm_get_bio_cgroup(mm);
+ rcu_read_unlock();

if (mem_cgroup_try_to_allocate(mem, gfp_mask) < 0)
goto out;
set_mem_cgroup(pc, mem);
+ set_bio_cgroup(pc, biog);
pc->page = page;
/*
* If a page is accounted as a page cache, insert to inactive list.
@@ -1067,18 +1066,21 @@ static int mem_cgroup_charge_common(stru
if (unlikely(page_get_page_cgroup(page))) {
unlock_page_cgroup(page);
clear_mem_cgroup(pc);
+ clear_bio_cgroup(pc);
kmem_cache_free(page_cgroup_cache, pc);
goto done;
}
page_assign_page_cgroup(page, pc);

mem_cgroup_add_page(pc);
+ bio_cgroup_add_page(pc);

unlock_page_cgroup(page);
done:
return 0;
out:
put_mem_cgroup(mem);
+ put_bio_cgroup(biog);
kmem_cache_free(page_cgroup_cache, pc);
err:
return -ENOMEM;
@@ -1101,7 +1103,7 @@ int mem_cgroup_charge(struct page *page,
if (unlikely(!mm))
mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
+ MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL, NULL);
}

int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -1137,7 +1139,7 @@ int mem_cgroup_cache_charge(struct page
mm = &init_mm;

return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+ MEM_CGROUP_CHARGE_TYPE_CACHE, NULL, NULL);
}

/*
@@ -1148,7 +1150,7 @@ __mem_cgroup_uncharge_common(struct page
{
struct page_cgroup *pc;

- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return;

/*
@@ -1167,11 +1169,13 @@ __mem_cgroup_uncharge_common(struct page
goto unlock;

mem_cgroup_remove_page(pc);
+ bio_cgroup_remove_page(pc);

page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);

clear_mem_cgroup(pc);
+ clear_bio_cgroup(pc);

kmem_cache_free(page_cgroup_cache, pc);
return;
@@ -1197,24 +1201,29 @@ int mem_cgroup_prepare_migration(struct
{
struct page_cgroup *pc;
struct mem_cgroup *mem = NULL;
+ struct bio_cgroup *biog = NULL;
enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
int ret = 0;

- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return 0;

lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
if (pc) {
mem = get_mem_page_cgroup(pc);
+ biog = get_bio_page_cgroup(pc);
if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
}
unlock_page_cgroup(page);
- if (mem) {
+ if (pc) {
ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
- ctype, mem);
- put_mem_cgroup(mem);
+ ctype, mem, biog);
+ if (mem)
+ put_mem_cgroup(mem);
+ if (biog)
+ put_bio_cgroup(biog);
}
return ret;
}

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
 

Thread Tools




All times are GMT. The time now is 05:01 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org