From 33b7f521facc760c3a8139bd956330cda1a389af Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 11 Dec 2007 16:14:09 -0200 Subject: [PATCH] Lots of unrelated changes Debugging + changes to make pagetable initialization code work. This must be splitted in smaller changesets and changed to avoid adding a new .c file just for the xen_init_pt() function. Signed-off-by: Eduardo Habkost --- arch/x86/xen/Makefile | 6 ++ arch/x86/xen/enlighten.c | 41 +++++++++- arch/x86/xen/init.h | 10 +++ arch/x86/xen/init_32.c | 3 + arch/x86/xen/init_64.c | 188 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/mmu.c | 86 +++++++++++++++----- arch/x86/xen/mmu.h | 8 ++ arch/x86/xen/multicalls.c | 13 ++- arch/x86/xen/xen-ops.h | 7 ++ include/xen/page.h | 9 ++- 10 files changed, 342 insertions(+), 29 deletions(-) create mode 100644 arch/x86/xen/init.h create mode 100644 arch/x86/xen/init_32.c create mode 100644 arch/x86/xen/init_64.c diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 343df24..e2dc9c1 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,10 @@ obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ events.o time.o manage.o xen-asm.o +ifeq ($(CONFIG_X86_32),y) +obj-y += init_32.o +else +obj-y += init_64.o +endif + obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a676a23..1828284 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -51,6 +51,7 @@ #include "xen-ops.h" #include "mmu.h" #include "multicalls.h" +#include "init.h" EXPORT_SYMBOL_GPL(hypercall_page); @@ -702,6 +703,25 @@ static void xen_write_cr3(unsigned long cr3) xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } +static void xen_new_user_baseptr(unsigned long pfn) +{ + struct mmuext_op *op; + struct multicall_space mcs; + unsigned long mfn = pfn_to_mfn(PFN_DOWN(pfn)); + + xprintk("%s: %lx\n", __func__, pfn); + + mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ + + op = mcs.args; + op->cmd = MMUEXT_NEW_USER_BASEPTR; + op->arg1.mfn = mfn; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ +} + /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) @@ -961,10 +981,12 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, goto patch_site switch (type) { +#ifdef CONFIG_X86_32 SITE(pv_irq_ops, irq_enable); SITE(pv_irq_ops, irq_disable); SITE(pv_irq_ops, save_fl); SITE(pv_irq_ops, restore_fl); +#endif #undef SITE patch_site: @@ -1161,7 +1183,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pte = xen_make_pte, .make_pgd = xen_make_pgd, -#ifdef CONFIG_X86_PAE +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, .set_pud = xen_set_pud, @@ -1170,7 +1193,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pmd = xen_make_pmd, .pmd_val = xen_pmd_val, -#endif /* PAE */ +# endif /* PAE */ +#else + .set_pgd = xen_set_pgd, + .make_pud = xen_make_pud, + + .make_pmd = xen_make_pmd, + .pmd_val = xen_pmd_val, +#endif .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, @@ -1262,6 +1292,8 @@ asmlinkage void __init xen_start_kernel(void) BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); + xprintk("hello, world!\n"); + #ifdef CONFIG_X86_64 { /*FIXME: move this to common code @@ -1302,6 +1334,7 @@ asmlinkage void __init xen_start_kernel(void) pgd = (pgd_t *)xen_start_info->pt_base; #ifdef CONFIG_X86_32 + /*FIXME: x86_64 equivalent */ init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; #endif @@ -1316,6 +1349,10 @@ asmlinkage void __init xen_start_kernel(void) possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + xprintk("xen_init_pt:\n"); + xen_init_pt(); + xprintk("xen_init_pt returned.\n"); + pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; diff --git a/arch/x86/xen/init.h b/arch/x86/xen/init.h new file mode 100644 index 0000000..9cd3954 --- /dev/null +++ b/arch/x86/xen/init.h @@ -0,0 +1,10 @@ +#ifndef __X86_XEN_INIT_H +#define __X86_XEN_INIT_H + +void xen_init_pt(void); + +extern pud_t level3_user_pgt[512]; +extern pgd_t init_level4_user_pgt[]; + + +#endif /* __X86_XEN_INIT_H */ diff --git a/arch/x86/xen/init_32.c b/arch/x86/xen/init_32.c new file mode 100644 index 0000000..6b49e40 --- /dev/null +++ b/arch/x86/xen/init_32.c @@ -0,0 +1,3 @@ +void xen_init_pt(void) +{ +} diff --git a/arch/x86/xen/init_64.c b/arch/x86/xen/init_64.c new file mode 100644 index 0000000..f025c21 --- /dev/null +++ b/arch/x86/xen/init_64.c @@ -0,0 +1,188 @@ +#include +#include + +#include + +#include +#include +#include + +#include "xen-ops.h" +#include "init.h" +#include "mmu.h" + +#define addr_to_page(addr, page) \ + (addr) &= PHYSICAL_PAGE_MASK; \ + (page) = ((unsigned long *) ((unsigned long) \ + (((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + \ + __START_KERNEL_map))) + + +static void __meminit early_make_page_readonly(void *va, unsigned int feature) +{ + unsigned long addr, _va = (unsigned long)va; + int r; + unsigned long idx; + pte_t pte, *ptep; + unsigned long *page = (unsigned long *) init_level4_pgt; + + if (xen_feature(feature)) + return; + + idx = pgd_index(_va); + + addr = (unsigned long) page[idx]; + addr_to_page(addr, page); + + idx = pud_index(_va); + addr = page[idx]; + + addr_to_page(addr, page); + + idx = pmd_index(_va); + addr = page[idx]; + addr_to_page(addr, page); + + idx =pte_index(_va); + ptep = (pte_t *) &page[idx]; + + pte.pte = ptep->pte & ~_PAGE_RW; + + if ( (r = HYPERVISOR_update_va_mapping(_va, pte, 0)) ) { + xprintk("%s: failure: %d :(\n", __func__, r); + BUG(); + } +} + + + +void __init xen_init_pt(void) +{ + unsigned long addr, *page; + + xprintk("%s starting\n", __func__); + + xprintk("xen_cr3: %lx. xen_cur_cr3: %lx\n", x86_read_percpu(xen_cr3), x86_read_percpu(xen_current_cr3)); + + /* Find the initial pte page that was built for us. */ + page = (unsigned long *)xen_start_info->pt_base; + xprintk("page1: %p\n", page); + addr = page[pgd_index(__START_KERNEL_map)]; + xprintk("addr1: %lx\n", addr); + addr_to_page(addr, page); + xprintk("addr1, page2: %lx, %p\n", addr, page); + + addr = page[pud_index(__START_KERNEL_map)]; + xprintk("addr2: %lx\n", addr); + + addr_to_page(addr, page); + xprintk("addr2,page2: %lx, %p\n", addr, page); + + +#if 0 /* CONFIG_XEN_COMPAT <= 0x030002 */ + /* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER + in kernel PTEs. We check that here. */ + if (HYPERVISOR_xen_version(XENVER_version, NULL) <= 0x30000) { + unsigned long *pg; + pte_t pte; + + /* Mess with the initial mapping of page 0. It's not needed. */ + BUILD_BUG_ON(__START_KERNEL <= __START_KERNEL_map); + addr = page[pmd_index(__START_KERNEL_map)]; + addr_to_page(addr, pg); + pte.pte = pg[pte_index(__START_KERNEL_map)]; + BUG_ON(!(pte.pte & _PAGE_PRESENT)); + + /* If _PAGE_USER isn't set, we obviously do not need it. */ + if (pte.pte & _PAGE_USER) { + /* _PAGE_USER is needed, but is it set implicitly? */ + pte.pte &= ~_PAGE_USER; + if ((HYPERVISOR_update_va_mapping(__START_KERNEL_map, + pte, 0) != 0) || + !(pg[pte_index(__START_KERNEL_map)] & _PAGE_USER)) + /* We need to explicitly specify _PAGE_USER. */ + __kernel_page_user = _PAGE_USER; + } + } +#endif + + xprintk("l4pgt: %p\n", init_level4_pgt); + + /* The page tables are pre-initialized with values that are not + * usable by Xen. Zero them out. + */ + memset(init_level4_pgt, 0, PAGE_SIZE); + memset(level3_kernel_pgt, 0, PAGE_SIZE); + memset(level2_kernel_pgt, 0, PAGE_SIZE); + + xprintk("%s going change init_level4_pgt\n", __func__); + + /* Construct mapping of initial pte page in our own directories. */ + init_level4_pgt[pgd_index(__START_KERNEL_map)] = + mk_kernel_pgd(__pa_symbol(level3_kernel_pgt)); + + xprintk("pointed l4 to l3kpgt\n"); + + xprintk("l3pgt: %p\n", level3_kernel_pgt); + + + { + unsigned long pi = pud_index(__START_KERNEL_map); + pud_t newp; + /*int i; + for (i = 0; i < PTRS_PER_PUD; i++) + xprintk("l3[%d] (%p)= %lx\n", i, &level3_kernel_pgt[i], (unsigned long)level3_kernel_pgt[i].pud);*/ + + xprintk("pud index: %lx\n", pi); + xprintk("writing to %p\n", &level3_kernel_pgt[pi]); + newp = __pud(__pa_symbol(level2_kernel_pgt) | + _KERNPG_TABLE); + xprintk("new pud: %lx\n", (unsigned long)newp.pud); + level3_kernel_pgt[pi] = newp; + + } + xprintk("pointed l3 to l2kpgt\n"); + + xprintk("l2pgt: %p\n", level2_kernel_pgt); + memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE); + xprintk("copied l2kpgt from page\n"); + + + xprintk("%s: going to pin\n", __func__); + + early_make_page_readonly(init_level4_user_pgt, + XENFEAT_writable_page_tables); + early_make_page_readonly(level3_user_pgt, + XENFEAT_writable_page_tables); + + xprintk("%s going to make pages readonly\n", __func__); + early_make_page_readonly(init_level4_pgt, + XENFEAT_writable_page_tables); + early_make_page_readonly(level3_kernel_pgt, + XENFEAT_writable_page_tables); + early_make_page_readonly(level2_kernel_pgt, + XENFEAT_writable_page_tables); + + xprintk("%s going to pin pgds\n", __func__); + if (!xen_feature(XENFEAT_writable_page_tables)) { + xprintk("kernel:\n"); + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(init_level4_pgt))); + xprintk("user:\n"); + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(init_level4_user_pgt))); + } + + + /*FIXME: check why this isn't set before xen_pgd_pin() on + * x86_64 XenSource code + */ + xprintk("%s: set_pgd:\n", __func__); + set_pgd((pgd_t *)(init_level4_user_pgt + 511), + mk_kernel_pgd(__pa_symbol(level3_user_pgt))); + xprintk("%s: returning.\n", __func__); + + xprintk("going to load new pagetable:\n"); + write_cr3(__pa(init_level4_pgt)); + xprintk("loaded new pagetable\n"); +} + + diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d4742df..89aaa58 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -53,6 +53,8 @@ #include #include +#include + #include "multicalls.h" #include "mmu.h" @@ -157,7 +159,21 @@ pteval_t xen_pte_val(pte_t pte) return ret; } -#ifdef CONFIG_X86_PAE +pte_t xen_make_pte(pteval_t pte) +{ + if (pte & _PAGE_PRESENT) { + pte = phys_to_machine(XPADDR(pte)).maddr; + pte &= ~(_PAGE_PCD | _PAGE_PWT); + } + + return (pte_t){ .pte = pte }; +} + +#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) + +/*FIXME: merge functions where possible */ + + void xen_set_pud(pud_t *ptr, pud_t val) { struct multicall_space mcs; @@ -176,6 +192,46 @@ void xen_set_pud(pud_t *ptr, pud_t val) preempt_enable(); } + +#ifdef CONFIG_X86_64 + +void xen_set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte = pte.pte; + //smp_wmb(); +} + +void xen_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_64bit((unsigned long *)ptep, pte_val_ma(pte)); +} + +void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + ptep->pte = 0; +} + +void xen_set_pgd(pgd_t *ptr, pgd_t val) +{ + struct multicall_space mcs; + struct mmu_update *u; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*u)); + u = mcs.args; + + u->ptr = virt_to_machine(ptr).maddr; + u->val = pgd_val_ma(val); + + MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} +#else + void xen_set_pte(pte_t *ptep, pte_t pte) { ptep->pte_high = pte.pte_high; @@ -195,6 +251,8 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) ptep->pte_high = 0; } +#endif + void xen_pmd_clear(pmd_t *pmdp) { xen_set_pmd(pmdp, __pmd(0)); @@ -203,6 +261,7 @@ void xen_pmd_clear(pmd_t *pmdp) pmdval_t xen_pmd_val(pmd_t pmd) { unsigned long long ret = pmd.pmd; + if (ret) ret = machine_to_phys(XMADDR(ret)).paddr | 1; return ret; @@ -216,16 +275,6 @@ pgdval_t xen_pgd_val(pgd_t pgd) return ret; } -pte_t xen_make_pte(pteval_t pte) -{ - if (pte & _PAGE_PRESENT) { - pte = phys_to_machine(XPADDR(pte)).maddr; - pte &= ~(_PAGE_PCD | _PAGE_PWT); - } - - return (pte_t){ .pte = pte }; -} - pmd_t xen_make_pmd(pmdval_t pmd) { if (pmd & 1) @@ -241,7 +290,9 @@ pgd_t xen_make_pgd(pgdval_t pgd) return (pgd_t){ pgd }; } + #else /* !PAE */ + void xen_set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; @@ -255,16 +306,6 @@ pgdval_t xen_pgd_val(pgd_t pgd) return ret; } -pte_t xen_make_pte(pteval_t pte) -{ - if (pte & _PAGE_PRESENT) { - pte = phys_to_machine(XPADDR(pte)).maddr; - pte &= ~(_PAGE_PCD | _PAGE_PWT); - } - - return (pte_t){ pte }; -} - pgd_t xen_make_pgd(pgdval_t pgd) { if (pgd & _PAGE_PRESENT) @@ -272,6 +313,7 @@ pgd_t xen_make_pgd(pgdval_t pgd) return (pgd_t){ pgd }; } + #endif /* CONFIG_X86_PAE */ /* @@ -368,7 +410,7 @@ static void do_unlock(void *v) spin_unlock(ptl); } -static void xen_do_pin(unsigned level, unsigned long pfn) +void xen_do_pin(unsigned level, unsigned long pfn) { struct mmuext_op *op; struct multicall_space mcs; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index ce425a4..62a325c 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -36,13 +36,17 @@ void xen_exit_mmap(struct mm_struct *mm); void xen_pgd_pin(pgd_t *pgd); //void xen_pgd_unpin(pgd_t *pgd); +// +void xen_do_pin(unsigned level, unsigned long pfn); pteval_t xen_pte_val(pte_t); pmdval_t xen_pmd_val(pmd_t); +pudval_t xen_pud_val(pud_t); pgdval_t xen_pgd_val(pgd_t); pte_t xen_make_pte(pteval_t); pmd_t xen_make_pmd(pmdval_t); +pud_t xen_make_pud(pudval_t); pgd_t xen_make_pgd(pgdval_t); #ifdef CONFIG_X86_PAE @@ -55,4 +59,8 @@ void xen_pmd_clear(pmd_t *pmdp); #endif +#ifdef CONFIG_X86_64 +void xen_set_pgd(pgd_t *pgdp, pgd_t pgdval); +#endif + #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 5e6f36f..c96a8b0 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -24,6 +24,8 @@ #include +#include + #include "multicalls.h" #define MC_DEBUG 1 @@ -60,6 +62,8 @@ void xen_mc_flush(void) something in the middle */ local_irq_save(flags); + xprintk("xen_mc_flush called. mcidx: %u\n", b->mcidx); + if (b->mcidx) { #if MC_DEBUG memcpy(b->debug, b->entries, @@ -72,19 +76,20 @@ void xen_mc_flush(void) if (b->entries[i].result < 0) ret++; -#if MC_DEBUG + xprintk("multicall ret: %d\n", ret); +//#if MC_DEBUG if (ret) { - printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", + xprintk("%d multicall(s) failed: cpu %d\n", ret, smp_processor_id()); for(i = 0; i < b->mcidx; i++) { - printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", + xprintk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", i+1, b->mcidx, b->debug[i].op, b->debug[i].args[0], b->entries[i].result); } } -#endif +//#endif b->mcidx = 0; b->argidx = 0; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index b02a909..f062eab 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -2,6 +2,13 @@ #define XEN_OPS_H #include +#include + +#include + +#include + +#include /* These are code, but not functions. Defined in entry.S */ extern const char xen_hypervisor_callback[]; diff --git a/include/xen/page.h b/include/xen/page.h index 7bf5b56..8008734 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -8,6 +8,10 @@ #include +#include + +#include + #ifdef CONFIG_X86_PAE /* Xen machine address */ typedef struct xmaddr { @@ -70,6 +74,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) #endif pfn = 0; + xprintk("mfn_to_pfn(%lx):\n", mfn); + /* * The array access can fail (e.g., device space beyond end of RAM). * In such cases it doesn't matter what we return (we return garbage), @@ -77,6 +83,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) */ __get_user(pfn, &machine_to_phys_mapping[mfn]); + xprintk("mfn_to_pfn(%lx) = %lx\n", mfn, pfn); return pfn; } @@ -154,7 +161,6 @@ static inline pteval_t pte_val_ma(pte_t x) { return native_pte_val(x); } -#define pud_val_ma(v) ((v).pgd.pgd) #define __pte_ma(x) (native_make_pte(x)) #define __pmd_ma(x) ((pmd_t) { (x) } ) #else /* !X86_PAE */ @@ -166,6 +172,7 @@ static inline pteval_t pte_val_ma(pte_t x) #define pgd_val_ma(x) ((x).pgd) #define pmd_val_ma(x) (native_pmd_val((x))) +#define pud_val_ma(x) (native_pud_val((x))) xmaddr_t arbitrary_virt_to_machine(unsigned long address); -- 1.5.4.1