FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > Ubuntu > Ubuntu Kernel Team

 
 
LinkBack Thread Tools
 
Old 06-21-2011, 11:08 AM
Stefan Bader
 
Default xen: Avoid double list adds/frees of pgds

This patch also moves around some code to make the flow more
obvious and actually look like the current code in the xen repo.

The major change is that pgd_ctor will not add the pgd to the pgd_list
when compiled with PAE. Similar pgd_dtor will not remove a pgd from
that list. This was not done already for shared KERNEL_PMDs and in the
other case it is already done by pgd_alloc and pgd_free.

Also the call to xen_create_continous_region is only done for PAE and
not having SHARED_KERNEL_PMD set. So the call to destroy it should
only be done in that case, too.

BugLink: http://bugs.launchpad.net/bugs/705562

Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
---
arch/x86/mm/pgtable_32-xen.c | 162 ++++++++++++++++++++++-------------------
1 files changed, 87 insertions(+), 75 deletions(-)

diff --git a/arch/x86/mm/pgtable_32-xen.c b/arch/x86/mm/pgtable_32-xen.c
index 021e29e..03dd6ed 100644
--- a/arch/x86/mm/pgtable_32-xen.c
+++ b/arch/x86/mm/pgtable_32-xen.c
@@ -254,65 +254,62 @@ static inline void pgd_list_del(pgd_t *pgd)
}


-
-#if (PTRS_PER_PMD == 1)
-/* Non-PAE pgd constructor */
+/*
+ * The constructors for the PAE and non-PAE case have been combined now.
+ * In case of PAE with non-shared kernel PMD it was wrong to add the pgd
+ * to the pgd_list (already done in pgd_alloc) and setting the pointers
+ * to NULL is also unnecessary (but not fatal) as pgd_alloc will either
+ * freshly set all of them or be aware of partially set pointers in the
+ * OOM case.
+ */
static void pgd_ctor(void *pgd)
{
- unsigned long flags;
-
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-
- spin_lock_irqsave(&pgd_lock, flags);
-
- /* must happen under lock */
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
+ if (PTRS_PER_PMD > 1) {
+ /* PAE, kernel PMD may be shared */
+ if (SHARED_KERNEL_PMD)
+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
+ } else { /* NON-PAE case */
+ unsigned long flags;

- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
- USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-}
-#else /* PTRS_PER_PMD > 1 */
-/* PAE pgd constructor */
-static void pgd_ctor(void *pgd)
-{
- /* PAE, kernel PMD may be shared */
+ spin_lock_irqsave(&pgd_lock, flags);
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));

- if (SHARED_KERNEL_PMD) {
+ /* must happen under lock */
clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
KERNEL_PGD_PTRS);
-#ifndef CONFIG_XEN
- } else {
- unsigned long flags;

- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- spin_lock_irqsave(&pgd_lock, flags);
+ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
+ USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
-#endif
}
}
-#endif /* PTRS_PER_PMD */

static void pgd_dtor(void *pgd)
{
- unsigned long flags; /* can be called from interrupt context */
-
- if (SHARED_KERNEL_PMD)
- return;
+ /*
+ * In the upstream code it is claimed to be only called for NON-PAE.
+ * The call for pgd_test_and_unpin was probably duplicate already
+ * as it is called in pgd_free. Unlinking from the pgd_list is
+ * definitely wrong here for NON-PAE. Either it was never added
+ * (if SHARED_KERNEL_PMD is defined) or has already been taken off
+ * the list.
+ */
+ if (PTRS_PER_PMD == 1) {
+ unsigned long flags; /* can be called from interrupt context */

- paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
- spin_lock_irqsave(&pgd_lock, flags);
- pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);

- pgd_test_and_unpin(pgd);
+ pgd_test_and_unpin(pgd);
+ }
}

#define UNSHARED_PTRS_PER_PGD
@@ -354,7 +351,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{
int i;
pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
- pmd_t **pmds = NULL;
+ pmd_t **pmds;
unsigned long flags;

if (!pgd)
@@ -366,39 +363,46 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
return pgd;

#ifdef CONFIG_XEN
- if (!SHARED_KERNEL_PMD) {
- /*
- * We can race save/restore (if we sleep during a GFP_KERNEL memory
- * allocation). We therefore store virtual addresses of pmds as they
- * do not change across save/restore, and poke the machine addresses
- * into the pgdir under the pgd_lock.
- */
- pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
- if (!pmds) {
- quicklist_free(0, pgd_dtor, pgd);
- return NULL;
+ /*
+ * Take care of setting the non-shared pointers here. This duplicates
+ * a bit of code but allows to early exit for the shared-case.
+ */
+ if (SHARED_KERNEL_PMD) {
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ pmd_t *pmd = pmd_cache_alloc(i);
+ if (!pmd)
+ goto out_oom;
+
+ paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
}
+ return pgd;
+ }
+
+ /*
+ * We can race save/restore (if we sleep during a GFP_KERNEL memory
+ * allocation). We therefore store virtual addresses of pmds as they
+ * do not change across save/restore, and poke the machine addresses
+ * into the pgdir under the pgd_lock.
+ */
+ pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
+ if (!pmds) {
+ quicklist_free(0, pgd_dtor, pgd);
+ return NULL;
}
#endif

/* Allocate pmds, remember virtual addresses. */
- for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = pmd_cache_alloc(i);
+ for (i = 0; i < PTRS_PER_PGD; ++i) {
+ pmds[i] = pmd_cache_alloc(i);

- if (!pmd)
+ if (!pmds[i])
goto out_oom;

- paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
- if (pmds)
- pmds[i] = pmd;
- else
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ paravirt_alloc_pd(__pa(pmds[i]) >> PAGE_SHIFT);
}

#ifdef CONFIG_XEN
- if (SHARED_KERNEL_PMD)
- return pgd;
-
spin_lock_irqsave(&pgd_lock, flags);

/* Protect against save/restore: move below 4GB under pgd_lock. */
@@ -437,7 +441,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
return pgd;

out_oom:
- if (!pmds) {
+ if (SHARED_KERNEL_PMD) {
for (i--; i >= 0; i--) {
pgd_t pgdent = pgd[i];
void* pmd = (void *)__va(pgd_val(pgdent)-1);
@@ -471,22 +475,30 @@ void pgd_free(pgd_t *pgd)

/* in the PAE case user pgd entries are overwritten before usage */
if (PTRS_PER_PMD > 1) {
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ pgd_t pgdent = pgd[i];
+ void* pmd = (void *)__va(pgd_val(pgdent)-1);
+ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+ pmd_cache_free(pmd, i);
+ }
+
if (!SHARED_KERNEL_PMD) {
unsigned long flags;
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
- }

- for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
- pgd_t pgdent = pgd[i];
- void* pmd = (void *)__va(pgd_val(pgdent)-1);
- paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
- pmd_cache_free(pmd, i);
- }
+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; ++i) {
+ pgd_t pgdent = pgd[i];
+ void* pmd = (void *)__va(pgd_val(pgdent)-1);
+ paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+ pmd_cache_free(pmd, i);
+ }

- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
- xen_destroy_contiguous_region((unsigned long)pgd, 0);
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
+ xen_destroy_contiguous_region(
+ (unsigned long)pgd, 0);
+ }
}

/* in the non-PAE case, free_pgtables() clears user pgd entries */
--
1.7.4.1


--------------080807080104060808040303
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

--
kernel-team mailing list
kernel-team@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/kernel-team

--------------080807080104060808040303--
 

Thread Tools




All times are GMT. The time now is 06:44 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org