This patch also moves around some code to make the flow more
obvious and actually look like the current code in the xen repo.
The major change is that pgd_ctor will not add the pgd to the pgd_list
when compiled with PAE. Similar pgd_dtor will not remove a pgd from
that list. This was not done already for shared KERNEL_PMDs and in the
other case it is already done by pgd_alloc and pgd_free.
Also the call to xen_create_continous_region is only done for PAE and
not having SHARED_KERNEL_PMD set. So the call to destroy it should
only be done in that case, too.
-
-#if (PTRS_PER_PMD == 1)
-/* Non-PAE pgd constructor */
+/*
+ * The constructors for the PAE and non-PAE case have been combined now.
+ * In case of PAE with non-shared kernel PMD it was wrong to add the pgd
+ * to the pgd_list (already done in pgd_alloc) and setting the pointers
+ * to NULL is also unnecessary (but not fatal) as pgd_alloc will either
+ * freshly set all of them or be aware of partially set pointers in the
+ * OOM case.
+ */
static void pgd_ctor(void *pgd)
{
- unsigned long flags;
-
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-
- spin_lock_irqsave(&pgd_lock, flags);
-
- /* must happen under lock */
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
+ if (PTRS_PER_PMD > 1) {
+ /* PAE, kernel PMD may be shared */
+ if (SHARED_KERNEL_PMD)
+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
+ } else { /* NON-PAE case */
+ unsigned long flags;
static void pgd_dtor(void *pgd)
{
- unsigned long flags; /* can be called from interrupt context */
-
- if (SHARED_KERNEL_PMD)
- return;
+ /*
+ * In the upstream code it is claimed to be only called for NON-PAE.
+ * The call for pgd_test_and_unpin was probably duplicate already
+ * as it is called in pgd_free. Unlinking from the pgd_list is
+ * definitely wrong here for NON-PAE. Either it was never added
+ * (if SHARED_KERNEL_PMD is defined) or has already been taken off
+ * the list.
+ */
+ if (PTRS_PER_PMD == 1) {
+ unsigned long flags; /* can be called from interrupt context */
#ifdef CONFIG_XEN
- if (!SHARED_KERNEL_PMD) {
- /*
- * We can race save/restore (if we sleep during a GFP_KERNEL memory
- * allocation). We therefore store virtual addresses of pmds as they
- * do not change across save/restore, and poke the machine addresses
- * into the pgdir under the pgd_lock.
- */
- pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
- if (!pmds) {
- quicklist_free(0, pgd_dtor, pgd);
- return NULL;
+ /*
+ * Take care of setting the non-shared pointers here. This duplicates
+ * a bit of code but allows to early exit for the shared-case.
+ */
+ if (SHARED_KERNEL_PMD) {
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ pmd_t *pmd = pmd_cache_alloc(i);
+ if (!pmd)
+ goto out_oom;
+
+ paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
}
+ return pgd;
+ }
+
+ /*
+ * We can race save/restore (if we sleep during a GFP_KERNEL memory
+ * allocation). We therefore store virtual addresses of pmds as they
+ * do not change across save/restore, and poke the machine addresses
+ * into the pgdir under the pgd_lock.
+ */
+ pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
+ if (!pmds) {
+ quicklist_free(0, pgd_dtor, pgd);
+ return NULL;
}
#endif
/* Allocate pmds, remember virtual addresses. */
- for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = pmd_cache_alloc(i);
+ for (i = 0; i < PTRS_PER_PGD; ++i) {
+ pmds[i] = pmd_cache_alloc(i);