Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1023-2.6.25-xen-patch-2.6.22.patch
Parent Directory | Revision Log
Revision 609 -
(show annotations)
(download)
Fri May 23 17:35:37 2008 UTC (16 years, 4 months ago) by niro
File size: 212197 byte(s)
Fri May 23 17:35:37 2008 UTC (16 years, 4 months ago) by niro
File size: 212197 byte(s)
-using opensuse xen patchset, updated kernel configs
1 | From: www.kernel.org |
2 | Subject: Update to 2.6.22 |
3 | Patch-mainline: 2.6.22 |
4 | |
5 | Automatically created from "patches.kernel.org/patch-2.6.22" by xen-port-patches.py |
6 | |
7 | Acked-by: jbeulich@novell.com |
8 | |
9 | --- |
10 | arch/x86/Kconfig | 5 |
11 | arch/x86/ia32/ia32entry-xen.S | 18 - |
12 | arch/x86/kernel/Makefile | 2 |
13 | arch/x86/kernel/acpi/sleep_64-xen.c | 26 - |
14 | arch/x86/kernel/apic_32-xen.c | 1 |
15 | arch/x86/kernel/apic_64-xen.c | 1 |
16 | arch/x86/kernel/cpu/common-xen.c | 224 ++++--------- |
17 | arch/x86/kernel/cpu/mtrr/main-xen.c | 2 |
18 | arch/x86/kernel/e820_32-xen.c | 46 +- |
19 | arch/x86/kernel/e820_64-xen.c | 28 - |
20 | arch/x86/kernel/early_printk-xen.c | 27 - |
21 | arch/x86/kernel/entry_32-xen.S | 30 - |
22 | arch/x86/kernel/entry_64-xen.S | 7 |
23 | arch/x86/kernel/genapic_64-xen.c | 106 +----- |
24 | arch/x86/kernel/genapic_xen_64.c | 3 |
25 | arch/x86/kernel/head64-xen.c | 32 + |
26 | arch/x86/kernel/head_32-xen.S | 101 ------ |
27 | arch/x86/kernel/head_64-xen.S | 37 -- |
28 | arch/x86/kernel/io_apic_32-xen.c | 43 -- |
29 | arch/x86/kernel/io_apic_64-xen.c | 39 -- |
30 | arch/x86/kernel/ioport_32-xen.c | 2 |
31 | arch/x86/kernel/ioport_64-xen.c | 2 |
32 | arch/x86/kernel/irq_32-xen.c | 3 |
33 | arch/x86/kernel/irq_64-xen.c | 34 +- |
34 | arch/x86/kernel/ldt_32-xen.c | 1 |
35 | arch/x86/kernel/ldt_64-xen.c | 1 |
36 | arch/x86/kernel/microcode-xen.c | 2 |
37 | arch/x86/kernel/mpparse_32-xen.c | 3 |
38 | arch/x86/kernel/mpparse_64-xen.c | 3 |
39 | arch/x86/kernel/pci-dma_32-xen.c | 29 + |
40 | arch/x86/kernel/pci-swiotlb_64-xen.c | 2 |
41 | arch/x86/kernel/process_32-xen.c | 27 + |
42 | arch/x86/kernel/process_64-xen.c | 16 |
43 | arch/x86/kernel/quirks-xen.c | 63 --- |
44 | arch/x86/kernel/setup64-xen.c | 17 - |
45 | arch/x86/kernel/setup_64-xen.c | 30 - |
46 | arch/x86/kernel/smp_32-xen.c | 191 ++++------- |
47 | arch/x86/kernel/smp_64-xen.c | 29 - |
48 | arch/x86/kernel/time_32-xen.c | 62 +-- |
49 | arch/x86/kernel/traps_32-xen.c | 46 +- |
50 | arch/x86/kernel/traps_64-xen.c | 55 +-- |
51 | arch/x86/kernel/vsyscall_64-xen.c | 73 +++- |
52 | arch/x86/mm/fault_32-xen.c | 42 +- |
53 | arch/x86/mm/fault_64-xen.c | 15 |
54 | arch/x86/mm/highmem_32-xen.c | 14 |
55 | arch/x86/mm/init_32-xen.c | 157 ++++++--- |
56 | arch/x86/mm/init_64-xen.c | 132 ++++--- |
57 | arch/x86/mm/ioremap_32-xen.c | 1 |
58 | arch/x86/mm/pageattr_64-xen.c | 27 + |
59 | arch/x86/mm/pgtable_32-xen.c | 210 +++++++----- |
60 | drivers/char/tpm/tpm_xen.c | 2 |
61 | drivers/xen/blkfront/blkfront.c | 2 |
62 | drivers/xen/char/mem.c | 1 |
63 | drivers/xen/core/hypervisor_sysfs.c | 2 |
64 | drivers/xen/core/smpboot.c | 49 +- |
65 | drivers/xen/core/xen_sysfs.c | 20 - |
66 | drivers/xen/netback/netback.c | 14 |
67 | drivers/xen/netfront/netfront.c | 2 |
68 | drivers/xen/pciback/xenbus.c | 2 |
69 | drivers/xen/pcifront/xenbus.c | 4 |
70 | drivers/xen/sfc_netback/accel_fwd.c | 7 |
71 | drivers/xen/sfc_netback/accel_solarflare.c | 2 |
72 | drivers/xen/sfc_netfront/accel_tso.c | 28 - |
73 | drivers/xen/sfc_netfront/accel_vi.c | 4 |
74 | drivers/xen/sfc_netfront/accel_xenbus.c | 4 |
75 | drivers/xen/xenoprof/xenoprofile.c | 2 |
76 | fs/aio.c | 7 |
77 | include/asm-x86/mach-xen/asm/desc_32.h | 119 ++++--- |
78 | include/asm-x86/mach-xen/asm/desc_64.h | 30 - |
79 | include/asm-x86/mach-xen/asm/dma-mapping_64.h | 2 |
80 | include/asm-x86/mach-xen/asm/fixmap_32.h | 9 |
81 | include/asm-x86/mach-xen/asm/fixmap_64.h | 1 |
82 | include/asm-x86/mach-xen/asm/highmem.h | 6 |
83 | include/asm-x86/mach-xen/asm/io_32.h | 13 |
84 | include/asm-x86/mach-xen/asm/irqflags_32.h | 78 ++-- |
85 | include/asm-x86/mach-xen/asm/irqflags_64.h | 19 - |
86 | include/asm-x86/mach-xen/asm/mmu.h | 8 |
87 | include/asm-x86/mach-xen/asm/mmu_64.h | 8 |
88 | include/asm-x86/mach-xen/asm/mmu_context_32.h | 29 + |
89 | include/asm-x86/mach-xen/asm/mmu_context_64.h | 3 |
90 | include/asm-x86/mach-xen/asm/page_64.h | 61 +-- |
91 | include/asm-x86/mach-xen/asm/pgalloc_32.h | 3 |
92 | include/asm-x86/mach-xen/asm/pgalloc_64.h | 15 |
93 | include/asm-x86/mach-xen/asm/pgtable-2level.h | 43 +- |
94 | include/asm-x86/mach-xen/asm/pgtable-3level-defs.h | 2 |
95 | include/asm-x86/mach-xen/asm/pgtable-3level.h | 61 ++- |
96 | include/asm-x86/mach-xen/asm/pgtable_32.h | 80 ++-- |
97 | include/asm-x86/mach-xen/asm/pgtable_64.h | 83 ++--- |
98 | include/asm-x86/mach-xen/asm/processor_32.h | 141 +++----- |
99 | include/asm-x86/mach-xen/asm/processor_64.h | 55 --- |
100 | include/asm-x86/mach-xen/asm/scatterlist_32.h | 2 |
101 | include/asm-x86/mach-xen/asm/segment_32.h | 10 |
102 | include/asm-x86/mach-xen/asm/smp_32.h | 117 +++++-- |
103 | include/asm-x86/mach-xen/asm/smp_64.h | 20 - |
104 | include/asm-x86/mach-xen/asm/system_32.h | 348 ++++----------------- |
105 | include/asm-x86/mach-xen/asm/system_64.h | 106 ------ |
106 | include/asm-x86/mach-xen/asm/tlbflush_32.h | 11 |
107 | include/asm-x86/mach-xen/asm/tlbflush_64.h | 2 |
108 | lib/swiotlb-xen.c | 1 |
109 | net/core/dev.c | 15 |
110 | scripts/Makefile.xen.awk | 2 |
111 | 101 files changed, 1642 insertions(+), 2080 deletions(-) |
112 | |
113 | --- a/arch/x86/Kconfig |
114 | +++ b/arch/x86/Kconfig |
115 | @@ -1222,7 +1222,7 @@ |
116 | |
117 | config RELOCATABLE |
118 | bool "Build a relocatable kernel (EXPERIMENTAL)" |
119 | - depends on EXPERIMENTAL && !X86_XEN |
120 | + depends on EXPERIMENTAL && !X86_XEN && !X86_64_XEN |
121 | help |
122 | This builds a kernel image that retains relocation information |
123 | so it can be loaded someplace besides the default 1MB. |
124 | @@ -1276,7 +1276,6 @@ |
125 | def_bool y |
126 | prompt "Compat VDSO support" |
127 | depends on X86_32 || IA32_EMULATION |
128 | - depends on !X86_XEN |
129 | help |
130 | Map the 32-bit VDSO to the predictable old-style address too. |
131 | ---help--- |
132 | @@ -1453,7 +1452,7 @@ |
133 | bool "PCI support" if !X86_VISWS |
134 | depends on !X86_VOYAGER |
135 | default y |
136 | - select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) |
137 | + select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC && !X86_XEN && !X86_64_XEN) |
138 | help |
139 | Find out whether you have a PCI motherboard. PCI is the name of a |
140 | bus system, i.e. the way the CPU talks to the other stuff inside |
141 | --- a/arch/x86/ia32/ia32entry-xen.S |
142 | +++ b/arch/x86/ia32/ia32entry-xen.S |
143 | @@ -431,11 +431,7 @@ |
144 | .quad sys_symlink |
145 | .quad sys_lstat |
146 | .quad sys_readlink /* 85 */ |
147 | -#ifdef CONFIG_IA32_AOUT |
148 | .quad sys_uselib |
149 | -#else |
150 | - .quad quiet_ni_syscall |
151 | -#endif |
152 | .quad sys_swapon |
153 | .quad sys_reboot |
154 | .quad compat_sys_old_readdir |
155 | @@ -574,7 +570,7 @@ |
156 | .quad quiet_ni_syscall /* tux */ |
157 | .quad quiet_ni_syscall /* security */ |
158 | .quad sys_gettid |
159 | - .quad sys_readahead /* 225 */ |
160 | + .quad sys32_readahead /* 225 */ |
161 | .quad sys_setxattr |
162 | .quad sys_lsetxattr |
163 | .quad sys_fsetxattr |
164 | @@ -599,7 +595,7 @@ |
165 | .quad compat_sys_io_getevents |
166 | .quad compat_sys_io_submit |
167 | .quad sys_io_cancel |
168 | - .quad sys_fadvise64 /* 250 */ |
169 | + .quad sys32_fadvise64 /* 250 */ |
170 | .quad quiet_ni_syscall /* free_huge_pages */ |
171 | .quad sys_exit_group |
172 | .quad sys32_lookup_dcookie |
173 | @@ -663,10 +659,14 @@ |
174 | .quad compat_sys_set_robust_list |
175 | .quad compat_sys_get_robust_list |
176 | .quad sys_splice |
177 | - .quad sys_sync_file_range |
178 | - .quad sys_tee |
179 | + .quad sys32_sync_file_range |
180 | + .quad sys_tee /* 315 */ |
181 | .quad compat_sys_vmsplice |
182 | .quad compat_sys_move_pages |
183 | .quad sys_getcpu |
184 | .quad sys_epoll_pwait |
185 | -ia32_syscall_end: |
186 | + .quad compat_sys_utimensat /* 320 */ |
187 | + .quad compat_sys_signalfd |
188 | + .quad compat_sys_timerfd |
189 | + .quad sys_eventfd |
190 | +ia32_syscall_end: |
191 | --- a/arch/x86/kernel/Makefile |
192 | +++ b/arch/x86/kernel/Makefile |
193 | @@ -106,4 +106,4 @@ |
194 | |
195 | disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ |
196 | smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o |
197 | -%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := |
198 | +%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := |
199 | --- a/arch/x86/kernel/acpi/sleep_64-xen.c |
200 | +++ b/arch/x86/kernel/acpi/sleep_64-xen.c |
201 | @@ -60,19 +60,6 @@ |
202 | extern char wakeup_start, wakeup_end; |
203 | |
204 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); |
205 | - |
206 | -static pgd_t low_ptr; |
207 | - |
208 | -static void init_low_mapping(void) |
209 | -{ |
210 | - pgd_t *slot0 = pgd_offset(current->mm, 0UL); |
211 | - low_ptr = *slot0; |
212 | - /* FIXME: We're playing with the current task's page tables here, which |
213 | - * is potentially dangerous on SMP systems. |
214 | - */ |
215 | - set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); |
216 | - local_flush_tlb(); |
217 | -} |
218 | #endif |
219 | |
220 | /** |
221 | @@ -84,8 +71,6 @@ |
222 | int acpi_save_state_mem(void) |
223 | { |
224 | #ifndef CONFIG_ACPI_PV_SLEEP |
225 | - init_low_mapping(); |
226 | - |
227 | memcpy((void *)acpi_wakeup_address, &wakeup_start, |
228 | &wakeup_end - &wakeup_start); |
229 | acpi_copy_wakeup_routine(acpi_wakeup_address); |
230 | @@ -98,10 +83,6 @@ |
231 | */ |
232 | void acpi_restore_state_mem(void) |
233 | { |
234 | -#ifndef CONFIG_ACPI_PV_SLEEP |
235 | - set_pgd(pgd_offset(current->mm, 0UL), low_ptr); |
236 | - local_flush_tlb(); |
237 | -#endif |
238 | } |
239 | |
240 | /** |
241 | @@ -115,10 +96,11 @@ |
242 | void __init acpi_reserve_bootmem(void) |
243 | { |
244 | #ifndef CONFIG_ACPI_PV_SLEEP |
245 | - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); |
246 | - if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) |
247 | + acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); |
248 | + if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2)) |
249 | printk(KERN_CRIT |
250 | - "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); |
251 | + "ACPI: Wakeup code way too big, will crash on attempt" |
252 | + " to suspend\n"); |
253 | #endif |
254 | } |
255 | |
256 | --- a/arch/x86/kernel/apic_32-xen.c |
257 | +++ b/arch/x86/kernel/apic_32-xen.c |
258 | @@ -19,7 +19,6 @@ |
259 | #include <linux/mm.h> |
260 | #include <linux/delay.h> |
261 | #include <linux/bootmem.h> |
262 | -#include <linux/smp_lock.h> |
263 | #include <linux/interrupt.h> |
264 | #include <linux/mc146818rtc.h> |
265 | #include <linux/kernel_stat.h> |
266 | --- a/arch/x86/kernel/apic_64-xen.c |
267 | +++ b/arch/x86/kernel/apic_64-xen.c |
268 | @@ -19,7 +19,6 @@ |
269 | #include <linux/mm.h> |
270 | #include <linux/delay.h> |
271 | #include <linux/bootmem.h> |
272 | -#include <linux/smp_lock.h> |
273 | #include <linux/interrupt.h> |
274 | #include <linux/mc146818rtc.h> |
275 | #include <linux/kernel_stat.h> |
276 | --- a/arch/x86/kernel/cpu/common-xen.c |
277 | +++ b/arch/x86/kernel/cpu/common-xen.c |
278 | @@ -22,16 +22,40 @@ |
279 | #define phys_pkg_id(a,b) a |
280 | #endif |
281 | #endif |
282 | -#include <asm/pda.h> |
283 | #include <asm/hypervisor.h> |
284 | |
285 | #include "cpu.h" |
286 | |
287 | -DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); |
288 | -EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); |
289 | +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { |
290 | + [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, |
291 | + [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, |
292 | + [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, |
293 | + [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, |
294 | +#ifndef CONFIG_XEN |
295 | + /* |
296 | + * Segments used for calling PnP BIOS have byte granularity. |
297 | + * They code segments and data segments have fixed 64k limits, |
298 | + * the transfer segment sizes are set at run time. |
299 | + */ |
300 | + [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ |
301 | + [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ |
302 | + [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ |
303 | + [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ |
304 | + [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ |
305 | + /* |
306 | + * The APM segments have byte granularity and their bases |
307 | + * are set at run time. All have 64k limits. |
308 | + */ |
309 | + [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ |
310 | + /* 16-bit code */ |
311 | + [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, |
312 | + [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ |
313 | |
314 | -struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; |
315 | -EXPORT_SYMBOL(_cpu_pda); |
316 | + [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, |
317 | +#endif |
318 | + [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, |
319 | +} }; |
320 | +EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
321 | |
322 | static int cachesize_override __cpuinitdata = -1; |
323 | static int disable_x86_fxsr __cpuinitdata; |
324 | @@ -373,7 +397,7 @@ |
325 | /* |
326 | * This does the hard work of actually picking apart the CPU stuff... |
327 | */ |
328 | -void __cpuinit identify_cpu(struct cpuinfo_x86 *c) |
329 | +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) |
330 | { |
331 | int i; |
332 | |
333 | @@ -484,15 +508,22 @@ |
334 | |
335 | /* Init Machine Check Exception if available. */ |
336 | mcheck_init(c); |
337 | +} |
338 | |
339 | - if (c == &boot_cpu_data) |
340 | - sysenter_setup(); |
341 | +void __init identify_boot_cpu(void) |
342 | +{ |
343 | + identify_cpu(&boot_cpu_data); |
344 | + sysenter_setup(); |
345 | enable_sep_cpu(); |
346 | + mtrr_bp_init(); |
347 | +} |
348 | |
349 | - if (c == &boot_cpu_data) |
350 | - mtrr_bp_init(); |
351 | - else |
352 | - mtrr_ap_init(); |
353 | +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
354 | +{ |
355 | + BUG_ON(c == &boot_cpu_data); |
356 | + identify_cpu(c); |
357 | + enable_sep_cpu(); |
358 | + mtrr_ap_init(); |
359 | } |
360 | |
361 | #ifdef CONFIG_X86_HT |
362 | @@ -606,136 +637,47 @@ |
363 | #endif |
364 | } |
365 | |
366 | -/* Make sure %gs is initialized properly in idle threads */ |
367 | +/* Make sure %fs is initialized properly in idle threads */ |
368 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) |
369 | { |
370 | memset(regs, 0, sizeof(struct pt_regs)); |
371 | - regs->xfs = __KERNEL_PDA; |
372 | + regs->xfs = __KERNEL_PERCPU; |
373 | return regs; |
374 | } |
375 | |
376 | -static __cpuinit int alloc_gdt(int cpu) |
377 | +/* Current gdt points %fs at the "master" per-cpu area: after this, |
378 | + * it's on the real one. */ |
379 | +void switch_to_new_gdt(void) |
380 | { |
381 | - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
382 | - struct desc_struct *gdt; |
383 | - struct i386_pda *pda; |
384 | - |
385 | - gdt = (struct desc_struct *)cpu_gdt_descr->address; |
386 | - pda = cpu_pda(cpu); |
387 | - |
388 | - /* |
389 | - * This is a horrible hack to allocate the GDT. The problem |
390 | - * is that cpu_init() is called really early for the boot CPU |
391 | - * (and hence needs bootmem) but much later for the secondary |
392 | - * CPUs, when bootmem will have gone away |
393 | - */ |
394 | - if (NODE_DATA(0)->bdata->node_bootmem_map) { |
395 | - BUG_ON(gdt != NULL || pda != NULL); |
396 | - |
397 | - gdt = alloc_bootmem_pages(PAGE_SIZE); |
398 | - pda = alloc_bootmem(sizeof(*pda)); |
399 | - /* alloc_bootmem(_pages) panics on failure, so no check */ |
400 | - |
401 | - memset(gdt, 0, PAGE_SIZE); |
402 | - memset(pda, 0, sizeof(*pda)); |
403 | - } else { |
404 | - /* GDT and PDA might already have been allocated if |
405 | - this is a CPU hotplug re-insertion. */ |
406 | - if (gdt == NULL) |
407 | - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); |
408 | - |
409 | - if (pda == NULL) |
410 | - pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); |
411 | - |
412 | - if (unlikely(!gdt || !pda)) { |
413 | - free_pages((unsigned long)gdt, 0); |
414 | - kfree(pda); |
415 | - return 0; |
416 | - } |
417 | - } |
418 | - |
419 | - cpu_gdt_descr->address = (unsigned long)gdt; |
420 | - cpu_pda(cpu) = pda; |
421 | - |
422 | - return 1; |
423 | -} |
424 | - |
425 | -/* Initial PDA used by boot CPU */ |
426 | -struct i386_pda boot_pda = { |
427 | - ._pda = &boot_pda, |
428 | - .cpu_number = 0, |
429 | - .pcurrent = &init_task, |
430 | -}; |
431 | - |
432 | -static inline void set_kernel_fs(void) |
433 | -{ |
434 | - /* Set %fs for this CPU's PDA. Memory clobber is to create a |
435 | - barrier with respect to any PDA operations, so the compiler |
436 | - doesn't move any before here. */ |
437 | - asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); |
438 | -} |
439 | - |
440 | -/* Initialize the CPU's GDT and PDA. The boot CPU does this for |
441 | - itself, but secondaries find this done for them. */ |
442 | -__cpuinit int init_gdt(int cpu, struct task_struct *idle) |
443 | -{ |
444 | - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
445 | - struct desc_struct *gdt; |
446 | - struct i386_pda *pda; |
447 | - |
448 | - /* For non-boot CPUs, the GDT and PDA should already have been |
449 | - allocated. */ |
450 | - if (!alloc_gdt(cpu)) { |
451 | - printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); |
452 | - return 0; |
453 | - } |
454 | - |
455 | - gdt = (struct desc_struct *)cpu_gdt_descr->address; |
456 | - pda = cpu_pda(cpu); |
457 | - |
458 | - BUG_ON(gdt == NULL || pda == NULL); |
459 | - |
460 | - /* |
461 | - * Initialize the per-CPU GDT with the boot GDT, |
462 | - * and set up the GDT descriptor: |
463 | - */ |
464 | - memcpy(gdt, cpu_gdt_table, GDT_SIZE); |
465 | - cpu_gdt_descr->size = GDT_SIZE - 1; |
466 | - |
467 | - pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, |
468 | - (u32 *)&gdt[GDT_ENTRY_PDA].b, |
469 | - (unsigned long)pda, sizeof(*pda) - 1, |
470 | - 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ |
471 | - |
472 | - memset(pda, 0, sizeof(*pda)); |
473 | - pda->_pda = pda; |
474 | - pda->cpu_number = cpu; |
475 | - pda->pcurrent = idle; |
476 | - |
477 | - return 1; |
478 | -} |
479 | - |
480 | -void __cpuinit cpu_set_gdt(int cpu) |
481 | -{ |
482 | - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
483 | + struct Xgt_desc_struct gdt_descr; |
484 | unsigned long va, frames[16]; |
485 | int f; |
486 | |
487 | - for (va = cpu_gdt_descr->address, f = 0; |
488 | - va < cpu_gdt_descr->address + cpu_gdt_descr->size; |
489 | + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); |
490 | + gdt_descr.size = GDT_SIZE - 1; |
491 | + |
492 | + for (va = gdt_descr.address, f = 0; |
493 | + va < gdt_descr.address + gdt_descr.size; |
494 | va += PAGE_SIZE, f++) { |
495 | frames[f] = virt_to_mfn(va); |
496 | make_lowmem_page_readonly( |
497 | (void *)va, XENFEAT_writable_descriptor_tables); |
498 | } |
499 | - BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); |
500 | - |
501 | - set_kernel_fs(); |
502 | + if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8)) |
503 | + BUG(); |
504 | + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); |
505 | } |
506 | |
507 | -/* Common CPU init for both boot and secondary CPUs */ |
508 | -static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) |
509 | +/* |
510 | + * cpu_init() initializes state that is per-CPU. Some data is already |
511 | + * initialized (naturally) in the bootstrap process, such as the GDT |
512 | + * and IDT. We reload them nevertheless, this function acts as a |
513 | + * 'CPU state barrier', nothing should get across. |
514 | + */ |
515 | +void __cpuinit cpu_init(void) |
516 | { |
517 | + int cpu = smp_processor_id(); |
518 | + struct task_struct *curr = current; |
519 | #ifndef CONFIG_X86_NO_TSS |
520 | struct tss_struct * t = &per_cpu(init_tss, cpu); |
521 | #endif |
522 | @@ -757,6 +699,8 @@ |
523 | set_in_cr4(X86_CR4_TSD); |
524 | } |
525 | |
526 | + switch_to_new_gdt(); |
527 | + |
528 | /* |
529 | * Set up and load the per-CPU TSS and LDT |
530 | */ |
531 | @@ -794,38 +738,6 @@ |
532 | mxcsr_feature_mask_init(); |
533 | } |
534 | |
535 | -/* Entrypoint to initialize secondary CPU */ |
536 | -void __cpuinit secondary_cpu_init(void) |
537 | -{ |
538 | - int cpu = smp_processor_id(); |
539 | - struct task_struct *curr = current; |
540 | - |
541 | - _cpu_init(cpu, curr); |
542 | -} |
543 | - |
544 | -/* |
545 | - * cpu_init() initializes state that is per-CPU. Some data is already |
546 | - * initialized (naturally) in the bootstrap process, such as the GDT |
547 | - * and IDT. We reload them nevertheless, this function acts as a |
548 | - * 'CPU state barrier', nothing should get across. |
549 | - */ |
550 | -void __cpuinit cpu_init(void) |
551 | -{ |
552 | - int cpu = smp_processor_id(); |
553 | - struct task_struct *curr = current; |
554 | - |
555 | - /* Set up the real GDT and PDA, so we can transition from the |
556 | - boot versions. */ |
557 | - if (!init_gdt(cpu, curr)) { |
558 | - /* failed to allocate something; not much we can do... */ |
559 | - for (;;) |
560 | - local_irq_enable(); |
561 | - } |
562 | - |
563 | - cpu_set_gdt(cpu); |
564 | - _cpu_init(cpu, curr); |
565 | -} |
566 | - |
567 | #ifdef CONFIG_HOTPLUG_CPU |
568 | void __cpuinit cpu_uninit(void) |
569 | { |
570 | --- a/arch/x86/kernel/cpu/mtrr/main-xen.c |
571 | +++ b/arch/x86/kernel/cpu/mtrr/main-xen.c |
572 | @@ -167,7 +167,7 @@ |
573 | EXPORT_SYMBOL(mtrr_add); |
574 | EXPORT_SYMBOL(mtrr_del); |
575 | |
576 | -void __init mtrr_bp_init(void) |
577 | +__init void mtrr_bp_init(void) |
578 | { |
579 | } |
580 | |
581 | --- a/arch/x86/kernel/e820_32-xen.c |
582 | +++ b/arch/x86/kernel/e820_32-xen.c |
583 | @@ -162,26 +162,27 @@ |
584 | |
585 | static int __init romsignature(const unsigned char *rom) |
586 | { |
587 | + const unsigned short * const ptr = (const unsigned short *)rom; |
588 | unsigned short sig; |
589 | |
590 | - return probe_kernel_address((const unsigned short *)rom, sig) == 0 && |
591 | - sig == ROMSIGNATURE; |
592 | + return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; |
593 | } |
594 | |
595 | -static int __init romchecksum(unsigned char *rom, unsigned long length) |
596 | +static int __init romchecksum(const unsigned char *rom, unsigned long length) |
597 | { |
598 | - unsigned char sum; |
599 | + unsigned char sum, c; |
600 | |
601 | - for (sum = 0; length; length--) |
602 | - sum += *rom++; |
603 | - return sum == 0; |
604 | + for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) |
605 | + sum += c; |
606 | + return !length && !sum; |
607 | } |
608 | |
609 | static void __init probe_roms(void) |
610 | { |
611 | + const unsigned char *rom; |
612 | unsigned long start, length, upper; |
613 | - unsigned char *rom; |
614 | - int i; |
615 | + unsigned char c; |
616 | + int i; |
617 | |
618 | #ifdef CONFIG_XEN |
619 | /* Nothing to do if not running in dom0. */ |
620 | @@ -198,8 +199,11 @@ |
621 | |
622 | video_rom_resource.start = start; |
623 | |
624 | + if (probe_kernel_address(rom + 2, c) != 0) |
625 | + continue; |
626 | + |
627 | /* 0 < length <= 0x7f * 512, historically */ |
628 | - length = rom[2] * 512; |
629 | + length = c * 512; |
630 | |
631 | /* if checksum okay, trust length byte */ |
632 | if (length && romchecksum(rom, length)) |
633 | @@ -233,8 +237,11 @@ |
634 | if (!romsignature(rom)) |
635 | continue; |
636 | |
637 | + if (probe_kernel_address(rom + 2, c) != 0) |
638 | + continue; |
639 | + |
640 | /* 0 < length <= 0x7f * 512, historically */ |
641 | - length = rom[2] * 512; |
642 | + length = c * 512; |
643 | |
644 | /* but accept any length that fits if checksum okay */ |
645 | if (!length || start + length > upper || !romchecksum(rom, length)) |
646 | @@ -249,7 +256,7 @@ |
647 | } |
648 | |
649 | #ifdef CONFIG_XEN |
650 | -static struct e820map machine_e820 __initdata; |
651 | +static struct e820map machine_e820; |
652 | #define e820 machine_e820 |
653 | #endif |
654 | |
655 | @@ -409,10 +416,8 @@ |
656 | ____________________33__ |
657 | ______________________4_ |
658 | */ |
659 | - printk("sanitize start\n"); |
660 | /* if there's only one memory region, don't bother */ |
661 | if (*pnr_map < 2) { |
662 | - printk("sanitize bail 0\n"); |
663 | return -1; |
664 | } |
665 | |
666 | @@ -421,7 +426,6 @@ |
667 | /* bail out if we find any unreasonable addresses in bios map */ |
668 | for (i=0; i<old_nr; i++) |
669 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { |
670 | - printk("sanitize bail 1\n"); |
671 | return -1; |
672 | } |
673 | |
674 | @@ -517,7 +521,6 @@ |
675 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); |
676 | *pnr_map = new_nr; |
677 | |
678 | - printk("sanitize end\n"); |
679 | return 0; |
680 | } |
681 | |
682 | @@ -552,7 +555,6 @@ |
683 | unsigned long long size = biosmap->size; |
684 | unsigned long long end = start + size; |
685 | unsigned long type = biosmap->type; |
686 | - printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); |
687 | |
688 | /* Overflow in 64 bits? Ignore the memory map. */ |
689 | if (start > end) |
690 | @@ -564,17 +566,11 @@ |
691 | * Not right. Fix it up. |
692 | */ |
693 | if (type == E820_RAM) { |
694 | - printk("copy_e820_map() type is E820_RAM\n"); |
695 | if (start < 0x100000ULL && end > 0xA0000ULL) { |
696 | - printk("copy_e820_map() lies in range...\n"); |
697 | - if (start < 0xA0000ULL) { |
698 | - printk("copy_e820_map() start < 0xA0000ULL\n"); |
699 | + if (start < 0xA0000ULL) |
700 | add_memory_region(start, 0xA0000ULL-start, type); |
701 | - } |
702 | - if (end <= 0x100000ULL) { |
703 | - printk("copy_e820_map() end <= 0x100000ULL\n"); |
704 | + if (end <= 0x100000ULL) |
705 | continue; |
706 | - } |
707 | start = 0x100000ULL; |
708 | size = end - start; |
709 | } |
710 | --- a/arch/x86/kernel/e820_64-xen.c |
711 | +++ b/arch/x86/kernel/e820_64-xen.c |
712 | @@ -17,6 +17,8 @@ |
713 | #include <linux/kexec.h> |
714 | #include <linux/module.h> |
715 | #include <linux/mm.h> |
716 | +#include <linux/suspend.h> |
717 | +#include <linux/pfn.h> |
718 | |
719 | #include <asm/pgtable.h> |
720 | #include <asm/page.h> |
721 | @@ -28,7 +30,7 @@ |
722 | |
723 | struct e820map e820 __initdata; |
724 | #ifdef CONFIG_XEN |
725 | -struct e820map machine_e820 __initdata; |
726 | +struct e820map machine_e820; |
727 | #endif |
728 | |
729 | /* |
730 | @@ -293,22 +295,6 @@ |
731 | } |
732 | |
733 | #ifndef CONFIG_XEN |
734 | -/* Mark pages corresponding to given address range as nosave */ |
735 | -static void __init |
736 | -e820_mark_nosave_range(unsigned long start, unsigned long end) |
737 | -{ |
738 | - unsigned long pfn, max_pfn; |
739 | - |
740 | - if (start >= end) |
741 | - return; |
742 | - |
743 | - printk("Nosave address range: %016lx - %016lx\n", start, end); |
744 | - max_pfn = end >> PAGE_SHIFT; |
745 | - for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) |
746 | - if (pfn_valid(pfn)) |
747 | - SetPageNosave(pfn_to_page(pfn)); |
748 | -} |
749 | - |
750 | /* |
751 | * Find the ranges of physical addresses that do not correspond to |
752 | * e820 RAM areas and mark the corresponding pages as nosave for software |
753 | @@ -327,13 +313,13 @@ |
754 | struct e820entry *ei = &e820.map[i]; |
755 | |
756 | if (paddr < ei->addr) |
757 | - e820_mark_nosave_range(paddr, |
758 | - round_up(ei->addr, PAGE_SIZE)); |
759 | + register_nosave_region(PFN_DOWN(paddr), |
760 | + PFN_UP(ei->addr)); |
761 | |
762 | paddr = round_down(ei->addr + ei->size, PAGE_SIZE); |
763 | if (ei->type != E820_RAM) |
764 | - e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), |
765 | - paddr); |
766 | + register_nosave_region(PFN_UP(ei->addr), |
767 | + PFN_DOWN(paddr)); |
768 | |
769 | if (paddr >= (end_pfn << PAGE_SHIFT)) |
770 | break; |
771 | --- a/arch/x86/kernel/early_printk-xen.c |
772 | +++ b/arch/x86/kernel/early_printk-xen.c |
773 | @@ -11,11 +11,10 @@ |
774 | |
775 | #ifdef __i386__ |
776 | #include <asm/setup.h> |
777 | -#define VGABASE (__ISA_IO_base + 0xb8000) |
778 | #else |
779 | #include <asm/bootsetup.h> |
780 | -#define VGABASE ((void __iomem *)0xffffffff800b8000UL) |
781 | #endif |
782 | +#define VGABASE (__ISA_IO_base + 0xb8000) |
783 | |
784 | #ifndef CONFIG_XEN |
785 | static int max_ypos = 25, max_xpos = 80; |
786 | @@ -93,9 +92,9 @@ |
787 | static void early_serial_write(struct console *con, const char *s, unsigned n) |
788 | { |
789 | while (*s && n-- > 0) { |
790 | - early_serial_putc(*s); |
791 | if (*s == '\n') |
792 | early_serial_putc('\r'); |
793 | + early_serial_putc(*s); |
794 | s++; |
795 | } |
796 | } |
797 | @@ -205,7 +204,7 @@ |
798 | return ret; |
799 | } |
800 | |
801 | -void __init simnow_init(char *str) |
802 | +static void __init simnow_init(char *str) |
803 | { |
804 | char *fn = "klog"; |
805 | if (*str == '=') |
806 | @@ -277,22 +276,12 @@ |
807 | early_console = &simnow_console; |
808 | keep_early = 1; |
809 | } |
810 | + |
811 | + if (keep_early) |
812 | + early_console->flags &= ~CON_BOOT; |
813 | + else |
814 | + early_console->flags |= CON_BOOT; |
815 | register_console(early_console); |
816 | return 0; |
817 | } |
818 | - |
819 | early_param("earlyprintk", setup_early_printk); |
820 | - |
821 | -void __init disable_early_printk(void) |
822 | -{ |
823 | - if (!early_console_initialized || !early_console) |
824 | - return; |
825 | - if (!keep_early) { |
826 | - printk("disabling early console\n"); |
827 | - unregister_console(early_console); |
828 | - early_console_initialized = 0; |
829 | - } else { |
830 | - printk("keeping early console\n"); |
831 | - } |
832 | -} |
833 | - |
834 | --- a/arch/x86/kernel/entry_32-xen.S |
835 | +++ b/arch/x86/kernel/entry_32-xen.S |
836 | @@ -15,7 +15,7 @@ |
837 | * I changed all the .align's to 4 (16 byte alignment), as that's faster |
838 | * on a 486. |
839 | * |
840 | - * Stack layout in 'ret_from_system_call': |
841 | + * Stack layout in 'syscall_exit': |
842 | * ptrace needs to have all regs on the stack. |
843 | * if the order here is changed, it needs to be |
844 | * updated in fork.c:copy_process, signal.c:do_signal, |
845 | @@ -135,7 +135,7 @@ |
846 | movl $(__USER_DS), %edx; \ |
847 | movl %edx, %ds; \ |
848 | movl %edx, %es; \ |
849 | - movl $(__KERNEL_PDA), %edx; \ |
850 | + movl $(__KERNEL_PERCPU), %edx; \ |
851 | movl %edx, %fs |
852 | |
853 | #define RESTORE_INT_REGS \ |
854 | @@ -308,16 +308,12 @@ |
855 | pushl $(__USER_CS) |
856 | CFI_ADJUST_CFA_OFFSET 4 |
857 | /*CFI_REL_OFFSET cs, 0*/ |
858 | -#ifndef CONFIG_COMPAT_VDSO |
859 | /* |
860 | * Push current_thread_info()->sysenter_return to the stack. |
861 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
862 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
863 | */ |
864 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) |
865 | -#else |
866 | - pushl $SYSENTER_RETURN |
867 | -#endif |
868 | CFI_ADJUST_CFA_OFFSET 4 |
869 | CFI_REL_OFFSET eip, 0 |
870 | |
871 | @@ -345,7 +341,7 @@ |
872 | jae syscall_badsys |
873 | call *sys_call_table(,%eax,4) |
874 | movl %eax,PT_EAX(%esp) |
875 | - DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) |
876 | + DISABLE_INTERRUPTS(CLBR_ANY) |
877 | TRACE_IRQS_OFF |
878 | movl TI_flags(%ebp), %ecx |
879 | testw $_TIF_ALLWORK_MASK, %cx |
880 | @@ -400,10 +396,6 @@ |
881 | CFI_ADJUST_CFA_OFFSET 4 |
882 | SAVE_ALL |
883 | GET_THREAD_INFO(%ebp) |
884 | - testl $TF_MASK,PT_EFLAGS(%esp) |
885 | - jz no_singlestep |
886 | - orl $_TIF_SINGLESTEP,TI_flags(%ebp) |
887 | -no_singlestep: |
888 | # system call tracing in operation / emulation |
889 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
890 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) |
891 | @@ -418,6 +410,10 @@ |
892 | # setting need_resched or sigpending |
893 | # between sampling and the iret |
894 | TRACE_IRQS_OFF |
895 | + testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit |
896 | + jz no_singlestep |
897 | + orl $_TIF_SINGLESTEP,TI_flags(%ebp) |
898 | +no_singlestep: |
899 | movl TI_flags(%ebp), %ecx |
900 | testw $_TIF_ALLWORK_MASK, %cx # current->work |
901 | jne syscall_exit_work |
902 | @@ -635,9 +631,7 @@ |
903 | #ifndef CONFIG_XEN |
904 | #define FIXUP_ESPFIX_STACK \ |
905 | /* since we are on a wrong stack, we cant make it a C code :( */ \ |
906 | - movl %fs:PDA_cpu, %ebx; \ |
907 | - PER_CPU(cpu_gdt_descr, %ebx); \ |
908 | - movl GDS_address(%ebx), %ebx; \ |
909 | + PER_CPU(gdt_page, %ebx); \ |
910 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
911 | addl %esp, %eax; \ |
912 | pushl $__KERNEL_DS; \ |
913 | @@ -710,7 +704,7 @@ |
914 | SAVE_ALL; \ |
915 | TRACE_IRQS_OFF \ |
916 | movl %esp,%eax; \ |
917 | - call smp_/**/name; \ |
918 | + call smp_##name; \ |
919 | jmp ret_from_intr; \ |
920 | CFI_ENDPROC; \ |
921 | ENDPROC(name) |
922 | @@ -718,10 +712,6 @@ |
923 | /* The include is where all of the SMP etc. interrupts come from */ |
924 | #include "entry_arch.h" |
925 | |
926 | -/* This alternate entry is needed because we hijack the apic LVTT */ |
927 | -#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) |
928 | -BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) |
929 | -#endif |
930 | #else |
931 | #define UNWIND_ESPFIX_STACK |
932 | #endif |
933 | @@ -764,7 +754,7 @@ |
934 | pushl %fs |
935 | CFI_ADJUST_CFA_OFFSET 4 |
936 | /*CFI_REL_OFFSET fs, 0*/ |
937 | - movl $(__KERNEL_PDA), %ecx |
938 | + movl $(__KERNEL_PERCPU), %ecx |
939 | movl %ecx, %fs |
940 | UNWIND_ESPFIX_STACK |
941 | popl %ecx |
942 | --- a/arch/x86/kernel/entry_64-xen.S |
943 | +++ b/arch/x86/kernel/entry_64-xen.S |
944 | @@ -1254,3 +1254,10 @@ |
945 | ret |
946 | CFI_ENDPROC |
947 | ENDPROC(call_softirq) |
948 | + |
949 | +KPROBE_ENTRY(ignore_sysret) |
950 | + CFI_STARTPROC |
951 | + mov $-ENOSYS,%eax |
952 | + HYPERVISOR_IRET 0 |
953 | + CFI_ENDPROC |
954 | +ENDPROC(ignore_sysret) |
955 | --- a/arch/x86/kernel/genapic_64-xen.c |
956 | +++ b/arch/x86/kernel/genapic_64-xen.c |
957 | @@ -11,123 +11,57 @@ |
958 | #include <linux/threads.h> |
959 | #include <linux/cpumask.h> |
960 | #include <linux/string.h> |
961 | +#include <linux/module.h> |
962 | #include <linux/kernel.h> |
963 | #include <linux/ctype.h> |
964 | #include <linux/init.h> |
965 | -#include <linux/module.h> |
966 | |
967 | #include <asm/smp.h> |
968 | #include <asm/ipi.h> |
969 | +#include <asm/genapic.h> |
970 | |
971 | -#if defined(CONFIG_ACPI) |
972 | +#ifdef CONFIG_ACPI |
973 | #include <acpi/acpi_bus.h> |
974 | #endif |
975 | |
976 | /* which logical CPU number maps to which CPU (physical APIC ID) */ |
977 | -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
978 | +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly |
979 | + = { [0 ... NR_CPUS-1] = BAD_APICID }; |
980 | EXPORT_SYMBOL(x86_cpu_to_apicid); |
981 | -u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; |
982 | |
983 | -extern struct genapic apic_cluster; |
984 | -extern struct genapic apic_flat; |
985 | -extern struct genapic apic_physflat; |
986 | +u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; |
987 | |
988 | #ifndef CONFIG_XEN |
989 | -struct genapic *genapic = &apic_flat; |
990 | -struct genapic *genapic_force; |
991 | +struct genapic __read_mostly *genapic = &apic_flat; |
992 | #else |
993 | extern struct genapic apic_xen; |
994 | -struct genapic *genapic = &apic_xen; |
995 | +struct genapic __read_mostly *genapic = &apic_xen; |
996 | #endif |
997 | |
998 | |
999 | /* |
1000 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
1001 | */ |
1002 | -void __init clustered_apic_check(void) |
1003 | +void __init setup_apic_routing(void) |
1004 | { |
1005 | #ifndef CONFIG_XEN |
1006 | - long i; |
1007 | - u8 clusters, max_cluster; |
1008 | - u8 id; |
1009 | - u8 cluster_cnt[NUM_APIC_CLUSTERS]; |
1010 | - int max_apic = 0; |
1011 | - |
1012 | - /* genapic selection can be forced because of certain quirks. |
1013 | - */ |
1014 | - if (genapic_force) { |
1015 | - genapic = genapic_force; |
1016 | - goto print; |
1017 | - } |
1018 | - |
1019 | -#if defined(CONFIG_ACPI) |
1020 | +#ifdef CONFIG_ACPI |
1021 | /* |
1022 | - * Some x86_64 machines use physical APIC mode regardless of how many |
1023 | - * procs/clusters are present (x86_64 ES7000 is an example). |
1024 | + * Quirk: some x86_64 machines can only use physical APIC mode |
1025 | + * regardless of how many processors are present (x86_64 ES7000 |
1026 | + * is an example). |
1027 | */ |
1028 | - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) |
1029 | - if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { |
1030 | - genapic = &apic_cluster; |
1031 | - goto print; |
1032 | - } |
1033 | -#endif |
1034 | - |
1035 | - memset(cluster_cnt, 0, sizeof(cluster_cnt)); |
1036 | - for (i = 0; i < NR_CPUS; i++) { |
1037 | - id = bios_cpu_apicid[i]; |
1038 | - if (id == BAD_APICID) |
1039 | - continue; |
1040 | - if (id > max_apic) |
1041 | - max_apic = id; |
1042 | - cluster_cnt[APIC_CLUSTERID(id)]++; |
1043 | - } |
1044 | - |
1045 | - /* Don't use clustered mode on AMD platforms. */ |
1046 | - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
1047 | + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && |
1048 | + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) |
1049 | genapic = &apic_physflat; |
1050 | -#ifndef CONFIG_HOTPLUG_CPU |
1051 | - /* In the CPU hotplug case we cannot use broadcast mode |
1052 | - because that opens a race when a CPU is removed. |
1053 | - Stay at physflat mode in this case. |
1054 | - It is bad to do this unconditionally though. Once |
1055 | - we have ACPI platform support for CPU hotplug |
1056 | - we should detect hotplug capablity from ACPI tables and |
1057 | - only do this when really needed. -AK */ |
1058 | - if (max_apic <= 8) |
1059 | - genapic = &apic_flat; |
1060 | + else |
1061 | #endif |
1062 | - goto print; |
1063 | - } |
1064 | |
1065 | - clusters = 0; |
1066 | - max_cluster = 0; |
1067 | - |
1068 | - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { |
1069 | - if (cluster_cnt[i] > 0) { |
1070 | - ++clusters; |
1071 | - if (cluster_cnt[i] > max_cluster) |
1072 | - max_cluster = cluster_cnt[i]; |
1073 | - } |
1074 | - } |
1075 | - |
1076 | - /* |
1077 | - * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode, |
1078 | - * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical |
1079 | - * else physical mode. |
1080 | - * (We don't use lowest priority delivery + HW APIC IRQ steering, so |
1081 | - * can ignore the clustered logical case and go straight to physical.) |
1082 | - */ |
1083 | - if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) { |
1084 | -#ifdef CONFIG_HOTPLUG_CPU |
1085 | - /* Don't use APIC shortcuts in CPU hotplug to avoid races */ |
1086 | - genapic = &apic_physflat; |
1087 | -#else |
1088 | + if (cpus_weight(cpu_possible_map) <= 8) |
1089 | genapic = &apic_flat; |
1090 | -#endif |
1091 | - } else |
1092 | - genapic = &apic_cluster; |
1093 | + else |
1094 | + genapic = &apic_physflat; |
1095 | |
1096 | -print: |
1097 | #else |
1098 | /* hardcode to xen apic functions */ |
1099 | genapic = &apic_xen; |
1100 | @@ -135,7 +69,7 @@ |
1101 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); |
1102 | } |
1103 | |
1104 | -/* Same for both flat and clustered. */ |
1105 | +/* Same for both flat and physical. */ |
1106 | |
1107 | #ifdef CONFIG_XEN |
1108 | extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); |
1109 | --- a/arch/x86/kernel/genapic_xen_64.c |
1110 | +++ b/arch/x86/kernel/genapic_xen_64.c |
1111 | @@ -21,9 +21,8 @@ |
1112 | #include <asm/ipi.h> |
1113 | #else |
1114 | #include <asm/apic.h> |
1115 | -#include <asm/apicdef.h> |
1116 | -#include <asm/genapic.h> |
1117 | #endif |
1118 | +#include <asm/genapic.h> |
1119 | #include <xen/evtchn.h> |
1120 | |
1121 | DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); |
1122 | --- a/arch/x86/kernel/head64-xen.c |
1123 | +++ b/arch/x86/kernel/head64-xen.c |
1124 | @@ -22,13 +22,21 @@ |
1125 | #include <asm/setup.h> |
1126 | #include <asm/desc.h> |
1127 | #include <asm/pgtable.h> |
1128 | +#include <asm/tlbflush.h> |
1129 | #include <asm/sections.h> |
1130 | |
1131 | unsigned long start_pfn; |
1132 | |
1133 | +#ifndef CONFIG_XEN |
1134 | +static void __init zap_identity_mappings(void) |
1135 | +{ |
1136 | + pgd_t *pgd = pgd_offset_k(0UL); |
1137 | + pgd_clear(pgd); |
1138 | + __flush_tlb(); |
1139 | +} |
1140 | + |
1141 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
1142 | yet. */ |
1143 | -#if 0 |
1144 | static void __init clear_bss(void) |
1145 | { |
1146 | memset(__bss_start, 0, |
1147 | @@ -37,26 +45,25 @@ |
1148 | #endif |
1149 | |
1150 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ |
1151 | -#define OLD_CL_MAGIC_ADDR 0x90020 |
1152 | +#define OLD_CL_MAGIC_ADDR 0x20 |
1153 | #define OLD_CL_MAGIC 0xA33F |
1154 | -#define OLD_CL_BASE_ADDR 0x90000 |
1155 | -#define OLD_CL_OFFSET 0x90022 |
1156 | +#define OLD_CL_OFFSET 0x22 |
1157 | |
1158 | static void __init copy_bootdata(char *real_mode_data) |
1159 | { |
1160 | #ifndef CONFIG_XEN |
1161 | - int new_data; |
1162 | + unsigned long new_data; |
1163 | char * command_line; |
1164 | |
1165 | memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); |
1166 | - new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); |
1167 | + new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); |
1168 | if (!new_data) { |
1169 | - if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { |
1170 | + if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { |
1171 | return; |
1172 | } |
1173 | - new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; |
1174 | + new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); |
1175 | } |
1176 | - command_line = (char *) ((u64)(new_data)); |
1177 | + command_line = __va(new_data); |
1178 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); |
1179 | #else |
1180 | int max_cmdline; |
1181 | @@ -98,10 +105,13 @@ |
1182 | while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) |
1183 | machine_to_phys_order++; |
1184 | |
1185 | -#if 0 |
1186 | +#ifndef CONFIG_XEN |
1187 | /* clear bss before set_intr_gate with early_idt_handler */ |
1188 | clear_bss(); |
1189 | |
1190 | + /* Make NULL pointers segfault */ |
1191 | + zap_identity_mappings(); |
1192 | + |
1193 | for (i = 0; i < IDT_ENTRIES; i++) |
1194 | set_intr_gate(i, early_idt_handler); |
1195 | asm volatile("lidt %0" :: "m" (idt_descr)); |
1196 | @@ -113,7 +123,7 @@ |
1197 | cpu_pda(i) = &boot_cpu_pda[i]; |
1198 | |
1199 | pda_init(0); |
1200 | - copy_bootdata(real_mode_data); |
1201 | + copy_bootdata(__va(real_mode_data)); |
1202 | #ifdef CONFIG_SMP |
1203 | cpu_set(0, cpu_online_map); |
1204 | #endif |
1205 | --- a/arch/x86/kernel/head_32-xen.S |
1206 | +++ b/arch/x86/kernel/head_32-xen.S |
1207 | @@ -37,7 +37,8 @@ |
1208 | /* Set up the stack pointer */ |
1209 | movl $(init_thread_union+THREAD_SIZE),%esp |
1210 | |
1211 | - call setup_pda |
1212 | + movl %ss,%eax |
1213 | + movl %eax,%fs # gets reset once there's real percpu |
1214 | |
1215 | /* get vendor info */ |
1216 | xorl %eax,%eax # call CPUID with 0 -> return vendor ID |
1217 | @@ -64,55 +65,11 @@ |
1218 | xorl %eax,%eax # Clear GS |
1219 | movl %eax,%gs |
1220 | |
1221 | - movl $(__KERNEL_PDA),%eax |
1222 | - mov %eax,%fs |
1223 | - |
1224 | cld # gcc2 wants the direction flag cleared at all times |
1225 | |
1226 | pushl $0 # fake return address for unwinder |
1227 | jmp start_kernel |
1228 | |
1229 | -/* |
1230 | - * Point the GDT at this CPU's PDA. This will be |
1231 | - * cpu_gdt_table and boot_pda. |
1232 | - */ |
1233 | -ENTRY(setup_pda) |
1234 | - /* get the PDA pointer */ |
1235 | - movl $boot_pda, %eax |
1236 | - |
1237 | - /* slot the PDA address into the GDT */ |
1238 | - mov $cpu_gdt_table, %ecx |
1239 | - mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ |
1240 | - shr $16, %eax |
1241 | - mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ |
1242 | - mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ |
1243 | - |
1244 | - # %esi still points to start_info, and no registers |
1245 | - # need to be preserved. |
1246 | - |
1247 | - movl XEN_START_mfn_list(%esi), %ebx |
1248 | - movl $(cpu_gdt_table - __PAGE_OFFSET), %eax |
1249 | - shrl $PAGE_SHIFT, %eax |
1250 | - movl (%ebx,%eax,4), %ecx |
1251 | - pushl %ecx # frame number for set_gdt below |
1252 | - |
1253 | - xorl %esi, %esi |
1254 | - xorl %edx, %edx |
1255 | - shldl $PAGE_SHIFT, %ecx, %edx |
1256 | - shll $PAGE_SHIFT, %ecx |
1257 | - orl $0x61, %ecx |
1258 | - movl $cpu_gdt_table, %ebx |
1259 | - movl $__HYPERVISOR_update_va_mapping, %eax |
1260 | - int $0x82 |
1261 | - |
1262 | - movl $(PAGE_SIZE_asm / 8), %ecx |
1263 | - movl %esp, %ebx |
1264 | - movl $__HYPERVISOR_set_gdt, %eax |
1265 | - int $0x82 |
1266 | - |
1267 | - popl %ecx |
1268 | - ret |
1269 | - |
1270 | #define HYPERCALL_PAGE_OFFSET 0x1000 |
1271 | .org HYPERCALL_PAGE_OFFSET |
1272 | ENTRY(hypercall_page) |
1273 | @@ -138,60 +95,6 @@ |
1274 | */ |
1275 | .data |
1276 | |
1277 | -/* |
1278 | - * The Global Descriptor Table contains 28 quadwords, per-CPU. |
1279 | - */ |
1280 | - .section .data.page_aligned, "aw" |
1281 | - .align PAGE_SIZE_asm |
1282 | -ENTRY(cpu_gdt_table) |
1283 | - .quad 0x0000000000000000 /* NULL descriptor */ |
1284 | - .quad 0x0000000000000000 /* 0x0b reserved */ |
1285 | - .quad 0x0000000000000000 /* 0x13 reserved */ |
1286 | - .quad 0x0000000000000000 /* 0x1b reserved */ |
1287 | - .quad 0x0000000000000000 /* 0x20 unused */ |
1288 | - .quad 0x0000000000000000 /* 0x28 unused */ |
1289 | - .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ |
1290 | - .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ |
1291 | - .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ |
1292 | - .quad 0x0000000000000000 /* 0x4b reserved */ |
1293 | - .quad 0x0000000000000000 /* 0x53 reserved */ |
1294 | - .quad 0x0000000000000000 /* 0x5b reserved */ |
1295 | - |
1296 | - .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ |
1297 | - .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ |
1298 | - .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ |
1299 | - .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ |
1300 | - |
1301 | - .quad 0x0000000000000000 /* 0x80 TSS descriptor */ |
1302 | - .quad 0x0000000000000000 /* 0x88 LDT descriptor */ |
1303 | - |
1304 | - /* |
1305 | - * Segments used for calling PnP BIOS have byte granularity. |
1306 | - * They code segments and data segments have fixed 64k limits, |
1307 | - * the transfer segment sizes are set at run time. |
1308 | - */ |
1309 | - .quad 0x0000000000000000 /* 0x90 32-bit code */ |
1310 | - .quad 0x0000000000000000 /* 0x98 16-bit code */ |
1311 | - .quad 0x0000000000000000 /* 0xa0 16-bit data */ |
1312 | - .quad 0x0000000000000000 /* 0xa8 16-bit data */ |
1313 | - .quad 0x0000000000000000 /* 0xb0 16-bit data */ |
1314 | - |
1315 | - /* |
1316 | - * The APM segments have byte granularity and their bases |
1317 | - * are set at run time. All have 64k limits. |
1318 | - */ |
1319 | - .quad 0x0000000000000000 /* 0xb8 APM CS code */ |
1320 | - .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ |
1321 | - .quad 0x0000000000000000 /* 0xc8 APM DS data */ |
1322 | - |
1323 | - .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */ |
1324 | - .quad 0x00cf92000000ffff /* 0xd8 - PDA */ |
1325 | - .quad 0x0000000000000000 /* 0xe0 - unused */ |
1326 | - .quad 0x0000000000000000 /* 0xe8 - unused */ |
1327 | - .quad 0x0000000000000000 /* 0xf0 - unused */ |
1328 | - .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ |
1329 | - .align PAGE_SIZE_asm |
1330 | - |
1331 | #if CONFIG_XEN_COMPAT <= 0x030002 |
1332 | /* |
1333 | * __xen_guest information |
1334 | --- a/arch/x86/kernel/head_64-xen.S |
1335 | +++ b/arch/x86/kernel/head_64-xen.S |
1336 | @@ -5,6 +5,7 @@ |
1337 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
1338 | * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> |
1339 | * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> |
1340 | + * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com> |
1341 | * Jun Nakajima <jun.nakajima@intel.com> |
1342 | * Modified for Xen |
1343 | */ |
1344 | @@ -41,18 +42,15 @@ |
1345 | .word gdt_end-cpu_gdt_table-1 |
1346 | .long cpu_gdt_table-__START_KERNEL_map |
1347 | #endif |
1348 | -ENTRY(stext) |
1349 | -ENTRY(_stext) |
1350 | |
1351 | - $page = 0 |
1352 | +.balign PAGE_SIZE |
1353 | + |
1354 | #define NEXT_PAGE(name) \ |
1355 | - $page = $page + 1; \ |
1356 | - .org $page * 0x1000; \ |
1357 | - phys_##name = $page * 0x1000 + __PHYSICAL_START; \ |
1358 | + .balign PAGE_SIZE; \ |
1359 | + phys_##name = . - .bootstrap.text; \ |
1360 | ENTRY(name) |
1361 | |
1362 | NEXT_PAGE(init_level4_pgt) |
1363 | - /* This gets initialized in x86_64_start_kernel */ |
1364 | .fill 512,8,0 |
1365 | NEXT_PAGE(init_level4_user_pgt) |
1366 | /* |
1367 | @@ -136,13 +134,13 @@ |
1368 | |
1369 | ENTRY(cpu_gdt_table) |
1370 | .quad 0x0000000000000000 /* NULL descriptor */ |
1371 | + .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ |
1372 | + .quad 0x00af9b000000ffff /* __KERNEL_CS */ |
1373 | + .quad 0x00cf93000000ffff /* __KERNEL_DS */ |
1374 | + .quad 0x00cffb000000ffff /* __USER32_CS */ |
1375 | + .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ |
1376 | + .quad 0x00affb000000ffff /* __USER_CS */ |
1377 | .quad 0x0 /* unused */ |
1378 | - .quad 0x00af9a000000ffff /* __KERNEL_CS */ |
1379 | - .quad 0x00cf92000000ffff /* __KERNEL_DS */ |
1380 | - .quad 0x00cffa000000ffff /* __USER32_CS */ |
1381 | - .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */ |
1382 | - .quad 0x00affa000000ffff /* __USER_CS */ |
1383 | - .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ |
1384 | .quad 0,0 /* TSS */ |
1385 | .quad 0,0 /* LDT */ |
1386 | .quad 0,0,0 /* three TLS descriptors */ |
1387 | @@ -165,14 +163,11 @@ |
1388 | * __xen_guest information |
1389 | */ |
1390 | .macro utoh value |
1391 | - .if (\value) < 0 || (\value) >= 0x10 |
1392 | - utoh (((\value)>>4)&0x0fffffffffffffff) |
1393 | - .endif |
1394 | - .if ((\value) & 0xf) < 10 |
1395 | - .byte '0' + ((\value) & 0xf) |
1396 | - .else |
1397 | - .byte 'A' + ((\value) & 0xf) - 10 |
1398 | - .endif |
1399 | + i = 64 |
1400 | + .rept 16 |
1401 | + i = i - 4 |
1402 | + .byte '0' + ((((\value) >> i) & 0xf) > 9) * ('0' - 'A' + 10) + (((\value) >> i) & 0xf) |
1403 | + .endr |
1404 | .endm |
1405 | |
1406 | .section __xen_guest |
1407 | --- a/arch/x86/kernel/io_apic_32-xen.c |
1408 | +++ b/arch/x86/kernel/io_apic_32-xen.c |
1409 | @@ -25,7 +25,6 @@ |
1410 | #include <linux/init.h> |
1411 | #include <linux/delay.h> |
1412 | #include <linux/sched.h> |
1413 | -#include <linux/smp_lock.h> |
1414 | #include <linux/mc146818rtc.h> |
1415 | #include <linux/compiler.h> |
1416 | #include <linux/acpi.h> |
1417 | @@ -35,6 +34,7 @@ |
1418 | #include <linux/msi.h> |
1419 | #include <linux/htirq.h> |
1420 | #include <linux/freezer.h> |
1421 | +#include <linux/kthread.h> |
1422 | |
1423 | #include <asm/io.h> |
1424 | #include <asm/smp.h> |
1425 | @@ -705,8 +705,6 @@ |
1426 | unsigned long prev_balance_time = jiffies; |
1427 | long time_remaining = balanced_irq_interval; |
1428 | |
1429 | - daemonize("kirqd"); |
1430 | - |
1431 | /* push everything to CPU 0 to give us a starting point. */ |
1432 | for (i = 0 ; i < NR_IRQS ; i++) { |
1433 | irq_desc[i].pending_mask = cpumask_of_cpu(0); |
1434 | @@ -766,10 +764,9 @@ |
1435 | } |
1436 | |
1437 | printk(KERN_INFO "Starting balanced_irq\n"); |
1438 | - if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) |
1439 | + if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) |
1440 | return 0; |
1441 | - else |
1442 | - printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); |
1443 | + printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); |
1444 | failed: |
1445 | for_each_possible_cpu(i) { |
1446 | kfree(irq_cpu_data[i].irq_delta); |
1447 | @@ -1445,10 +1442,6 @@ |
1448 | enable_8259A_irq(0); |
1449 | } |
1450 | |
1451 | -static inline void UNEXPECTED_IO_APIC(void) |
1452 | -{ |
1453 | -} |
1454 | - |
1455 | void __init print_IO_APIC(void) |
1456 | { |
1457 | int apic, i; |
1458 | @@ -1488,34 +1481,12 @@ |
1459 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
1460 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
1461 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); |
1462 | - if (reg_00.bits.ID >= get_physical_broadcast()) |
1463 | - UNEXPECTED_IO_APIC(); |
1464 | - if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) |
1465 | - UNEXPECTED_IO_APIC(); |
1466 | |
1467 | printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); |
1468 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); |
1469 | - if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ |
1470 | - (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ |
1471 | - (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ |
1472 | - (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ |
1473 | - (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ |
1474 | - (reg_01.bits.entries != 0x2E) && |
1475 | - (reg_01.bits.entries != 0x3F) |
1476 | - ) |
1477 | - UNEXPECTED_IO_APIC(); |
1478 | |
1479 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); |
1480 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); |
1481 | - if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ |
1482 | - (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ |
1483 | - (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ |
1484 | - (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ |
1485 | - (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ |
1486 | - ) |
1487 | - UNEXPECTED_IO_APIC(); |
1488 | - if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) |
1489 | - UNEXPECTED_IO_APIC(); |
1490 | |
1491 | /* |
1492 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, |
1493 | @@ -1525,8 +1496,6 @@ |
1494 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { |
1495 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); |
1496 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); |
1497 | - if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) |
1498 | - UNEXPECTED_IO_APIC(); |
1499 | } |
1500 | |
1501 | /* |
1502 | @@ -1538,8 +1507,6 @@ |
1503 | reg_03.raw != reg_01.raw) { |
1504 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); |
1505 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); |
1506 | - if (reg_03.bits.__reserved_1) |
1507 | - UNEXPECTED_IO_APIC(); |
1508 | } |
1509 | |
1510 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
1511 | @@ -2670,19 +2637,19 @@ |
1512 | if (irq < 0) |
1513 | return irq; |
1514 | |
1515 | - set_irq_msi(irq, desc); |
1516 | ret = msi_compose_msg(dev, irq, &msg); |
1517 | if (ret < 0) { |
1518 | destroy_irq(irq); |
1519 | return ret; |
1520 | } |
1521 | |
1522 | + set_irq_msi(irq, desc); |
1523 | write_msi_msg(irq, &msg); |
1524 | |
1525 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, |
1526 | "edge"); |
1527 | |
1528 | - return irq; |
1529 | + return 0; |
1530 | } |
1531 | |
1532 | void arch_teardown_msi_irq(unsigned int irq) |
1533 | --- a/arch/x86/kernel/io_apic_64-xen.c |
1534 | +++ b/arch/x86/kernel/io_apic_64-xen.c |
1535 | @@ -25,7 +25,6 @@ |
1536 | #include <linux/init.h> |
1537 | #include <linux/delay.h> |
1538 | #include <linux/sched.h> |
1539 | -#include <linux/smp_lock.h> |
1540 | #include <linux/pci.h> |
1541 | #include <linux/mc146818rtc.h> |
1542 | #include <linux/acpi.h> |
1543 | @@ -897,10 +896,6 @@ |
1544 | enable_8259A_irq(0); |
1545 | } |
1546 | |
1547 | -void __init UNEXPECTED_IO_APIC(void) |
1548 | -{ |
1549 | -} |
1550 | - |
1551 | void __apicdebuginit print_IO_APIC(void) |
1552 | { |
1553 | int apic, i; |
1554 | @@ -936,40 +931,16 @@ |
1555 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); |
1556 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
1557 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
1558 | - if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) |
1559 | - UNEXPECTED_IO_APIC(); |
1560 | |
1561 | printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); |
1562 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); |
1563 | - if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ |
1564 | - (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ |
1565 | - (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ |
1566 | - (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ |
1567 | - (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ |
1568 | - (reg_01.bits.entries != 0x2E) && |
1569 | - (reg_01.bits.entries != 0x3F) && |
1570 | - (reg_01.bits.entries != 0x03) |
1571 | - ) |
1572 | - UNEXPECTED_IO_APIC(); |
1573 | |
1574 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); |
1575 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); |
1576 | - if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ |
1577 | - (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */ |
1578 | - (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ |
1579 | - (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ |
1580 | - (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ |
1581 | - (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ |
1582 | - ) |
1583 | - UNEXPECTED_IO_APIC(); |
1584 | - if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) |
1585 | - UNEXPECTED_IO_APIC(); |
1586 | |
1587 | if (reg_01.bits.version >= 0x10) { |
1588 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); |
1589 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); |
1590 | - if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) |
1591 | - UNEXPECTED_IO_APIC(); |
1592 | } |
1593 | |
1594 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
1595 | @@ -1401,8 +1372,7 @@ |
1596 | |
1597 | vector = ~get_irq_regs()->orig_rax; |
1598 | me = smp_processor_id(); |
1599 | - if ((vector == cfg->vector) && |
1600 | - cpu_isset(smp_processor_id(), cfg->domain)) { |
1601 | + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { |
1602 | cpumask_t cleanup_mask; |
1603 | |
1604 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); |
1605 | @@ -1437,7 +1407,7 @@ |
1606 | |
1607 | /* |
1608 | * We must acknowledge the irq before we move it or the acknowledge will |
1609 | - * not propogate properly. |
1610 | + * not propagate properly. |
1611 | */ |
1612 | ack_APIC_irq(); |
1613 | |
1614 | @@ -1520,6 +1490,7 @@ |
1615 | static void end_lapic_irq (unsigned int i) { /* nothing */ } |
1616 | |
1617 | static struct hw_interrupt_type lapic_irq_type __read_mostly = { |
1618 | + .name = "local-APIC", |
1619 | .typename = "local-APIC-edge", |
1620 | .startup = NULL, /* startup_irq() not used for IRQ0 */ |
1621 | .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ |
1622 | @@ -1989,18 +1960,18 @@ |
1623 | if (irq < 0) |
1624 | return irq; |
1625 | |
1626 | - set_irq_msi(irq, desc); |
1627 | ret = msi_compose_msg(dev, irq, &msg); |
1628 | if (ret < 0) { |
1629 | destroy_irq(irq); |
1630 | return ret; |
1631 | } |
1632 | |
1633 | + set_irq_msi(irq, desc); |
1634 | write_msi_msg(irq, &msg); |
1635 | |
1636 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); |
1637 | |
1638 | - return irq; |
1639 | + return 0; |
1640 | } |
1641 | |
1642 | void arch_teardown_msi_irq(unsigned int irq) |
1643 | --- a/arch/x86/kernel/ioport_32-xen.c |
1644 | +++ b/arch/x86/kernel/ioport_32-xen.c |
1645 | @@ -12,10 +12,10 @@ |
1646 | #include <linux/types.h> |
1647 | #include <linux/ioport.h> |
1648 | #include <linux/smp.h> |
1649 | -#include <linux/smp_lock.h> |
1650 | #include <linux/stddef.h> |
1651 | #include <linux/slab.h> |
1652 | #include <linux/thread_info.h> |
1653 | +#include <linux/syscalls.h> |
1654 | #include <xen/interface/physdev.h> |
1655 | |
1656 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ |
1657 | --- a/arch/x86/kernel/ioport_64-xen.c |
1658 | +++ b/arch/x86/kernel/ioport_64-xen.c |
1659 | @@ -13,10 +13,10 @@ |
1660 | #include <linux/ioport.h> |
1661 | #include <linux/mm.h> |
1662 | #include <linux/smp.h> |
1663 | -#include <linux/smp_lock.h> |
1664 | #include <linux/stddef.h> |
1665 | #include <linux/slab.h> |
1666 | #include <linux/thread_info.h> |
1667 | +#include <linux/syscalls.h> |
1668 | #include <xen/interface/physdev.h> |
1669 | |
1670 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ |
1671 | --- a/arch/x86/kernel/irq_32-xen.c |
1672 | +++ b/arch/x86/kernel/irq_32-xen.c |
1673 | @@ -24,6 +24,9 @@ |
1674 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; |
1675 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
1676 | |
1677 | +DEFINE_PER_CPU(struct pt_regs *, irq_regs); |
1678 | +EXPORT_PER_CPU_SYMBOL(irq_regs); |
1679 | + |
1680 | /* |
1681 | * 'what should we do if we get a hw irq event on an illegal vector'. |
1682 | * each architecture has to answer this themselves. |
1683 | --- a/arch/x86/kernel/irq_64-xen.c |
1684 | +++ b/arch/x86/kernel/irq_64-xen.c |
1685 | @@ -32,7 +32,7 @@ |
1686 | */ |
1687 | static inline void stack_overflow_check(struct pt_regs *regs) |
1688 | { |
1689 | - u64 curbase = (u64) current->thread_info; |
1690 | + u64 curbase = (u64)task_stack_page(current); |
1691 | static unsigned long warned = -60*HZ; |
1692 | |
1693 | if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && |
1694 | @@ -145,17 +145,43 @@ |
1695 | |
1696 | for (irq = 0; irq < NR_IRQS; irq++) { |
1697 | cpumask_t mask; |
1698 | + int break_affinity = 0; |
1699 | + int set_affinity = 1; |
1700 | + |
1701 | if (irq == 2) |
1702 | continue; |
1703 | |
1704 | + /* interrupt's are disabled at this point */ |
1705 | + spin_lock(&irq_desc[irq].lock); |
1706 | + |
1707 | + if (!irq_has_action(irq) || |
1708 | + cpus_equal(irq_desc[irq].affinity, map)) { |
1709 | + spin_unlock(&irq_desc[irq].lock); |
1710 | + continue; |
1711 | + } |
1712 | + |
1713 | cpus_and(mask, irq_desc[irq].affinity, map); |
1714 | - if (any_online_cpu(mask) == NR_CPUS) { |
1715 | - printk("Breaking affinity for irq %i\n", irq); |
1716 | + if (cpus_empty(mask)) { |
1717 | + break_affinity = 1; |
1718 | mask = map; |
1719 | } |
1720 | + |
1721 | + if (irq_desc[irq].chip->mask) |
1722 | + irq_desc[irq].chip->mask(irq); |
1723 | + |
1724 | if (irq_desc[irq].chip->set_affinity) |
1725 | irq_desc[irq].chip->set_affinity(irq, mask); |
1726 | - else if (irq_desc[irq].action && !(warned++)) |
1727 | + else if (!(warned++)) |
1728 | + set_affinity = 0; |
1729 | + |
1730 | + if (irq_desc[irq].chip->unmask) |
1731 | + irq_desc[irq].chip->unmask(irq); |
1732 | + |
1733 | + spin_unlock(&irq_desc[irq].lock); |
1734 | + |
1735 | + if (break_affinity && set_affinity) |
1736 | + printk("Broke affinity for irq %i\n", irq); |
1737 | + else if (!set_affinity) |
1738 | printk("Cannot set affinity for irq %i\n", irq); |
1739 | } |
1740 | |
1741 | --- a/arch/x86/kernel/ldt_32-xen.c |
1742 | +++ b/arch/x86/kernel/ldt_32-xen.c |
1743 | @@ -10,7 +10,6 @@ |
1744 | #include <linux/string.h> |
1745 | #include <linux/mm.h> |
1746 | #include <linux/smp.h> |
1747 | -#include <linux/smp_lock.h> |
1748 | #include <linux/vmalloc.h> |
1749 | #include <linux/slab.h> |
1750 | |
1751 | --- a/arch/x86/kernel/ldt_64-xen.c |
1752 | +++ b/arch/x86/kernel/ldt_64-xen.c |
1753 | @@ -13,7 +13,6 @@ |
1754 | #include <linux/string.h> |
1755 | #include <linux/mm.h> |
1756 | #include <linux/smp.h> |
1757 | -#include <linux/smp_lock.h> |
1758 | #include <linux/vmalloc.h> |
1759 | #include <linux/slab.h> |
1760 | |
1761 | --- a/arch/x86/kernel/microcode-xen.c |
1762 | +++ b/arch/x86/kernel/microcode-xen.c |
1763 | @@ -135,7 +135,7 @@ |
1764 | return 0; |
1765 | } |
1766 | |
1767 | -static void __exit microcode_dev_exit (void) |
1768 | +static void microcode_dev_exit (void) |
1769 | { |
1770 | misc_deregister(µcode_dev); |
1771 | } |
1772 | --- a/arch/x86/kernel/mpparse_32-xen.c |
1773 | +++ b/arch/x86/kernel/mpparse_32-xen.c |
1774 | @@ -18,7 +18,6 @@ |
1775 | #include <linux/acpi.h> |
1776 | #include <linux/delay.h> |
1777 | #include <linux/bootmem.h> |
1778 | -#include <linux/smp_lock.h> |
1779 | #include <linux/kernel_stat.h> |
1780 | #include <linux/mc146818rtc.h> |
1781 | #include <linux/bitops.h> |
1782 | @@ -484,7 +483,7 @@ |
1783 | } |
1784 | ++mpc_record; |
1785 | } |
1786 | - clustered_apic_check(); |
1787 | + setup_apic_routing(); |
1788 | if (!num_processors) |
1789 | printk(KERN_ERR "SMP mptable: no processors registered!\n"); |
1790 | return num_processors; |
1791 | --- a/arch/x86/kernel/mpparse_64-xen.c |
1792 | +++ b/arch/x86/kernel/mpparse_64-xen.c |
1793 | @@ -17,7 +17,6 @@ |
1794 | #include <linux/init.h> |
1795 | #include <linux/delay.h> |
1796 | #include <linux/bootmem.h> |
1797 | -#include <linux/smp_lock.h> |
1798 | #include <linux/kernel_stat.h> |
1799 | #include <linux/mc146818rtc.h> |
1800 | #include <linux/acpi.h> |
1801 | @@ -307,7 +306,7 @@ |
1802 | } |
1803 | } |
1804 | } |
1805 | - clustered_apic_check(); |
1806 | + setup_apic_routing(); |
1807 | if (!num_processors) |
1808 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
1809 | return num_processors; |
1810 | --- a/arch/x86/kernel/pci-dma_32-xen.c |
1811 | +++ b/arch/x86/kernel/pci-dma_32-xen.c |
1812 | @@ -13,6 +13,7 @@ |
1813 | #include <linux/pci.h> |
1814 | #include <linux/module.h> |
1815 | #include <linux/version.h> |
1816 | +#include <linux/pci.h> |
1817 | #include <asm/io.h> |
1818 | #include <xen/balloon.h> |
1819 | #include <xen/gnttab.h> |
1820 | @@ -284,7 +285,7 @@ |
1821 | { |
1822 | void __iomem *mem_base = NULL; |
1823 | int pages = size >> PAGE_SHIFT; |
1824 | - int bitmap_size = (pages + 31)/32; |
1825 | + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); |
1826 | |
1827 | if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) |
1828 | goto out; |
1829 | @@ -357,6 +358,32 @@ |
1830 | EXPORT_SYMBOL(dma_mark_declared_memory_occupied); |
1831 | #endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ |
1832 | |
1833 | +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) |
1834 | +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ |
1835 | + |
1836 | +int forbid_dac; |
1837 | +EXPORT_SYMBOL(forbid_dac); |
1838 | + |
1839 | +static __devinit void via_no_dac(struct pci_dev *dev) |
1840 | +{ |
1841 | + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { |
1842 | + printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); |
1843 | + forbid_dac = 1; |
1844 | + } |
1845 | +} |
1846 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); |
1847 | + |
1848 | +static int check_iommu(char *s) |
1849 | +{ |
1850 | + if (!strcmp(s, "usedac")) { |
1851 | + forbid_dac = -1; |
1852 | + return 1; |
1853 | + } |
1854 | + return 0; |
1855 | +} |
1856 | +__setup("iommu=", check_iommu); |
1857 | +#endif |
1858 | + |
1859 | dma_addr_t |
1860 | dma_map_single(struct device *dev, void *ptr, size_t size, |
1861 | enum dma_data_direction direction) |
1862 | --- a/arch/x86/kernel/pci-swiotlb_64-xen.c |
1863 | +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c |
1864 | @@ -16,7 +16,7 @@ |
1865 | |
1866 | void swiotlb_init(void); |
1867 | |
1868 | -struct dma_mapping_ops swiotlb_dma_ops = { |
1869 | +const struct dma_mapping_ops swiotlb_dma_ops = { |
1870 | #if 0 |
1871 | .mapping_error = swiotlb_dma_mapping_error, |
1872 | .alloc_coherent = swiotlb_alloc_coherent, |
1873 | --- a/arch/x86/kernel/process_32-xen.c |
1874 | +++ b/arch/x86/kernel/process_32-xen.c |
1875 | @@ -21,7 +21,6 @@ |
1876 | #include <linux/mm.h> |
1877 | #include <linux/elfcore.h> |
1878 | #include <linux/smp.h> |
1879 | -#include <linux/smp_lock.h> |
1880 | #include <linux/stddef.h> |
1881 | #include <linux/slab.h> |
1882 | #include <linux/vmalloc.h> |
1883 | @@ -39,6 +38,7 @@ |
1884 | #include <linux/random.h> |
1885 | #include <linux/personality.h> |
1886 | #include <linux/tick.h> |
1887 | +#include <linux/percpu.h> |
1888 | |
1889 | #include <asm/uaccess.h> |
1890 | #include <asm/pgtable.h> |
1891 | @@ -61,7 +61,6 @@ |
1892 | |
1893 | #include <asm/tlbflush.h> |
1894 | #include <asm/cpu.h> |
1895 | -#include <asm/pda.h> |
1896 | |
1897 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
1898 | |
1899 | @@ -70,6 +69,12 @@ |
1900 | unsigned long boot_option_idle_override = 0; |
1901 | EXPORT_SYMBOL(boot_option_idle_override); |
1902 | |
1903 | +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
1904 | +EXPORT_PER_CPU_SYMBOL(current_task); |
1905 | + |
1906 | +DEFINE_PER_CPU(int, cpu_number); |
1907 | +EXPORT_PER_CPU_SYMBOL(cpu_number); |
1908 | + |
1909 | /* |
1910 | * Return saved PC of a blocked thread. |
1911 | */ |
1912 | @@ -168,6 +173,7 @@ |
1913 | if (__get_cpu_var(cpu_idle_state)) |
1914 | __get_cpu_var(cpu_idle_state) = 0; |
1915 | |
1916 | + check_pgt_cache(); |
1917 | rmb(); |
1918 | idle = xen_idle; /* no alternatives */ |
1919 | |
1920 | @@ -218,18 +224,19 @@ |
1921 | { |
1922 | } |
1923 | |
1924 | -static int __init idle_setup (char *str) |
1925 | +static int __init idle_setup(char *str) |
1926 | { |
1927 | - if (!strncmp(str, "poll", 4)) { |
1928 | + if (!strcmp(str, "poll")) { |
1929 | printk("using polling idle threads.\n"); |
1930 | pm_idle = poll_idle; |
1931 | } |
1932 | + else |
1933 | + return -1; |
1934 | |
1935 | boot_option_idle_override = 1; |
1936 | - return 1; |
1937 | + return 0; |
1938 | } |
1939 | - |
1940 | -__setup("idle=", idle_setup); |
1941 | +early_param("idle", idle_setup); |
1942 | |
1943 | void show_regs(struct pt_regs * regs) |
1944 | { |
1945 | @@ -282,7 +289,7 @@ |
1946 | |
1947 | regs.xds = __USER_DS; |
1948 | regs.xes = __USER_DS; |
1949 | - regs.xfs = __KERNEL_PDA; |
1950 | + regs.xfs = __KERNEL_PERCPU; |
1951 | regs.orig_eax = -1; |
1952 | regs.eip = (unsigned long) kernel_thread_helper; |
1953 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
1954 | @@ -556,7 +563,7 @@ |
1955 | * multicall to indicate FPU task switch, rather than |
1956 | * synchronously trapping to Xen. |
1957 | */ |
1958 | - if (prev_p->thread_info->status & TS_USEDFPU) { |
1959 | + if (task_thread_info(prev_p)->status & TS_USEDFPU) { |
1960 | __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ |
1961 | mcl->op = __HYPERVISOR_fpu_taskswitch; |
1962 | mcl->args[0] = 1; |
1963 | @@ -648,7 +655,7 @@ |
1964 | if (prev->gs | next->gs) |
1965 | loadsegment(gs, next->gs); |
1966 | |
1967 | - write_pda(pcurrent, next_p); |
1968 | + x86_write_percpu(current_task, next_p); |
1969 | |
1970 | return prev_p; |
1971 | } |
1972 | --- a/arch/x86/kernel/process_64-xen.c |
1973 | +++ b/arch/x86/kernel/process_64-xen.c |
1974 | @@ -39,6 +39,7 @@ |
1975 | #include <linux/random.h> |
1976 | #include <linux/notifier.h> |
1977 | #include <linux/kprobes.h> |
1978 | +#include <linux/kdebug.h> |
1979 | |
1980 | #include <asm/uaccess.h> |
1981 | #include <asm/pgtable.h> |
1982 | @@ -49,7 +50,6 @@ |
1983 | #include <asm/mmu_context.h> |
1984 | #include <asm/pda.h> |
1985 | #include <asm/prctl.h> |
1986 | -#include <asm/kdebug.h> |
1987 | #include <xen/interface/platform.h> |
1988 | #include <xen/interface/physdev.h> |
1989 | #include <xen/interface/vcpu.h> |
1990 | @@ -232,16 +232,18 @@ |
1991 | |
1992 | static int __init idle_setup (char *str) |
1993 | { |
1994 | - if (!strncmp(str, "poll", 4)) { |
1995 | + if (!strcmp(str, "poll")) { |
1996 | printk("using polling idle threads.\n"); |
1997 | pm_idle = poll_idle; |
1998 | - } |
1999 | + } else if (!strcmp(str, "mwait")) |
2000 | + force_mwait = 1; |
2001 | + else |
2002 | + return -1; |
2003 | |
2004 | boot_option_idle_override = 1; |
2005 | - return 1; |
2006 | + return 0; |
2007 | } |
2008 | - |
2009 | -__setup("idle=", idle_setup); |
2010 | +early_param("idle", idle_setup); |
2011 | |
2012 | /* Prints also some state that isn't saved in the pt_regs */ |
2013 | void __show_regs(struct pt_regs * regs) |
2014 | @@ -540,7 +542,7 @@ |
2015 | * The AMD workaround requires it to be after DS reload, or |
2016 | * after DS has been cleared, which we do in __prepare_arch_switch. |
2017 | */ |
2018 | - if (prev_p->thread_info->status & TS_USEDFPU) { |
2019 | + if (task_thread_info(prev_p)->status & TS_USEDFPU) { |
2020 | __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ |
2021 | mcl->op = __HYPERVISOR_fpu_taskswitch; |
2022 | mcl->args[0] = 1; |
2023 | --- a/arch/x86/kernel/quirks-xen.c |
2024 | +++ b/arch/x86/kernel/quirks-xen.c |
2025 | @@ -3,12 +3,10 @@ |
2026 | */ |
2027 | #include <linux/pci.h> |
2028 | #include <linux/irq.h> |
2029 | -#include <asm/pci-direct.h> |
2030 | -#include <asm/genapic.h> |
2031 | -#include <asm/cpu.h> |
2032 | |
2033 | #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) |
2034 | -static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) |
2035 | + |
2036 | +static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) |
2037 | { |
2038 | u8 config, rev; |
2039 | u32 word; |
2040 | @@ -16,7 +14,7 @@ |
2041 | /* BIOS may enable hardware IRQ balancing for |
2042 | * E7520/E7320/E7525(revision ID 0x9 and below) |
2043 | * based platforms. |
2044 | - * For those platforms, make sure that the genapic is set to 'flat' |
2045 | + * Disable SW irqbalance/affinity on those platforms. |
2046 | */ |
2047 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); |
2048 | if (rev > 0x9) |
2049 | @@ -30,59 +28,20 @@ |
2050 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); |
2051 | |
2052 | if (!(word & (1 << 13))) { |
2053 | -#ifndef CONFIG_XEN |
2054 | -#ifdef CONFIG_X86_64 |
2055 | - if (genapic != &apic_flat) |
2056 | - panic("APIC mode must be flat on this system\n"); |
2057 | -#elif defined(CONFIG_X86_GENERICARCH) |
2058 | - if (genapic != &apic_default) |
2059 | - panic("APIC mode must be default(flat) on this system. Use apic=default\n"); |
2060 | -#endif |
2061 | -#endif |
2062 | - } |
2063 | - |
2064 | - /* put back the original value for config space*/ |
2065 | - if (!(config & 0x2)) |
2066 | - pci_write_config_byte(dev, 0xf4, config); |
2067 | -} |
2068 | - |
2069 | -void __init quirk_intel_irqbalance(void) |
2070 | -{ |
2071 | - u8 config, rev; |
2072 | - u32 word; |
2073 | - |
2074 | - /* BIOS may enable hardware IRQ balancing for |
2075 | - * E7520/E7320/E7525(revision ID 0x9 and below) |
2076 | - * based platforms. |
2077 | - * Disable SW irqbalance/affinity on those platforms. |
2078 | - */ |
2079 | - rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); |
2080 | - if (rev > 0x9) |
2081 | - return; |
2082 | - |
2083 | - printk(KERN_INFO "Intel E7520/7320/7525 detected."); |
2084 | - |
2085 | - /* enable access to config space */ |
2086 | - config = read_pci_config_byte(0, 0, 0, 0xf4); |
2087 | - write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); |
2088 | - |
2089 | - /* read xTPR register */ |
2090 | - word = read_pci_config_16(0, 0, 0x40, 0x4c); |
2091 | - |
2092 | - if (!(word & (1 << 13))) { |
2093 | struct xen_platform_op op; |
2094 | - printk(KERN_INFO "Disabling irq balancing and affinity\n"); |
2095 | + |
2096 | + printk(KERN_INFO "Intel E7520/7320/7525 detected. " |
2097 | + "Disabling irq balancing and affinity\n"); |
2098 | op.cmd = XENPF_platform_quirk; |
2099 | op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING; |
2100 | WARN_ON(HYPERVISOR_platform_op(&op)); |
2101 | } |
2102 | |
2103 | - /* put back the original value for config space */ |
2104 | + /* put back the original value for config space*/ |
2105 | if (!(config & 0x2)) |
2106 | - write_pci_config_byte(0, 0, 0, 0xf4, config); |
2107 | + pci_write_config_byte(dev, 0xf4, config); |
2108 | } |
2109 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); |
2110 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); |
2111 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); |
2112 | - |
2113 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); |
2114 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); |
2115 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); |
2116 | #endif |
2117 | --- a/arch/x86/kernel/setup64-xen.c |
2118 | +++ b/arch/x86/kernel/setup64-xen.c |
2119 | @@ -113,9 +113,9 @@ |
2120 | if (!NODE_DATA(cpu_to_node(i))) { |
2121 | printk("cpu with no node %d, num_online_nodes %d\n", |
2122 | i, num_online_nodes()); |
2123 | - ptr = alloc_bootmem(size); |
2124 | + ptr = alloc_bootmem_pages(size); |
2125 | } else { |
2126 | - ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); |
2127 | + ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); |
2128 | } |
2129 | if (!ptr) |
2130 | panic("Cannot allocate cpu data for CPU %d\n", i); |
2131 | @@ -208,6 +208,8 @@ |
2132 | __attribute__((section(".bss.page_aligned"))); |
2133 | #endif |
2134 | |
2135 | +extern asmlinkage void ignore_sysret(void); |
2136 | + |
2137 | /* May not be marked __init: used by software suspend */ |
2138 | void syscall_init(void) |
2139 | { |
2140 | @@ -219,12 +221,22 @@ |
2141 | */ |
2142 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); |
2143 | wrmsrl(MSR_LSTAR, system_call); |
2144 | + wrmsrl(MSR_CSTAR, ignore_sysret); |
2145 | |
2146 | /* Flags to clear on syscall */ |
2147 | wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); |
2148 | #endif |
2149 | #ifdef CONFIG_IA32_EMULATION |
2150 | syscall32_cpu_init (); |
2151 | +#else |
2152 | + { |
2153 | + static const struct callback_register cstar = { |
2154 | + .type = CALLBACKTYPE_syscall32, |
2155 | + .address = (unsigned long)ignore_sysret |
2156 | + }; |
2157 | + if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar)) |
2158 | + printk(KERN_WARN "Unable to register CSTAR callback\n"); |
2159 | + } |
2160 | #endif |
2161 | } |
2162 | |
2163 | @@ -262,7 +274,6 @@ |
2164 | /* CPU 0 is initialised in head64.c */ |
2165 | if (cpu != 0) { |
2166 | pda_init(cpu); |
2167 | - zap_low_mappings(cpu); |
2168 | } |
2169 | #ifndef CONFIG_X86_NO_TSS |
2170 | else |
2171 | --- a/arch/x86/kernel/setup_64-xen.c |
2172 | +++ b/arch/x86/kernel/setup_64-xen.c |
2173 | @@ -123,6 +123,8 @@ |
2174 | |
2175 | unsigned long saved_video_mode; |
2176 | |
2177 | +int force_mwait __cpuinitdata; |
2178 | + |
2179 | /* |
2180 | * Early DMI memory |
2181 | */ |
2182 | @@ -256,10 +258,10 @@ |
2183 | * there is a real-mode segmented pointer pointing to the |
2184 | * 4K EBDA area at 0x40E |
2185 | */ |
2186 | - ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; |
2187 | + ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); |
2188 | ebda_addr <<= 4; |
2189 | |
2190 | - ebda_size = *(unsigned short *)(unsigned long)ebda_addr; |
2191 | + ebda_size = *(unsigned short *)__va(ebda_addr); |
2192 | |
2193 | /* Round EBDA up to pages */ |
2194 | if (ebda_size == 0) |
2195 | @@ -413,15 +415,8 @@ |
2196 | #endif |
2197 | |
2198 | #ifdef CONFIG_SMP |
2199 | - /* |
2200 | - * But first pinch a few for the stack/trampoline stuff |
2201 | - * FIXME: Don't need the extra page at 4K, but need to fix |
2202 | - * trampoline before removing it. (see the GDT stuff) |
2203 | - */ |
2204 | - reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE); |
2205 | - |
2206 | /* Reserve SMP trampoline */ |
2207 | - reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); |
2208 | + reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE); |
2209 | #endif |
2210 | #endif |
2211 | |
2212 | @@ -573,8 +568,6 @@ |
2213 | early_quirks(); |
2214 | #endif |
2215 | |
2216 | - zap_low_mappings(0); |
2217 | - |
2218 | /* |
2219 | * set this early, so we dont allocate cpu0 |
2220 | * if MADT list doesnt list BSP first |
2221 | @@ -877,6 +870,10 @@ |
2222 | |
2223 | /* RDTSC can be speculated around */ |
2224 | clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
2225 | + |
2226 | + /* Family 10 doesn't support C states in MWAIT so don't use it */ |
2227 | + if (c->x86 == 0x10 && !force_mwait) |
2228 | + clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); |
2229 | } |
2230 | |
2231 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
2232 | @@ -1159,9 +1156,7 @@ |
2233 | #ifdef CONFIG_X86_MCE |
2234 | mcheck_init(c); |
2235 | #endif |
2236 | - if (c == &boot_cpu_data) |
2237 | - mtrr_bp_init(); |
2238 | - else |
2239 | + if (c != &boot_cpu_data) |
2240 | mtrr_ap_init(); |
2241 | #ifdef CONFIG_NUMA |
2242 | numa_add_cpu(smp_processor_id()); |
2243 | @@ -1252,9 +1247,8 @@ |
2244 | "stc", |
2245 | "100mhzsteps", |
2246 | "hwpstate", |
2247 | - NULL, /* tsc invariant mapped to constant_tsc */ |
2248 | - NULL, |
2249 | - /* nothing */ /* constant_tsc - moved to flags */ |
2250 | + "", /* tsc invariant mapped to constant_tsc */ |
2251 | + /* nothing */ |
2252 | }; |
2253 | |
2254 | |
2255 | --- a/arch/x86/kernel/smp_32-xen.c |
2256 | +++ b/arch/x86/kernel/smp_32-xen.c |
2257 | @@ -13,7 +13,6 @@ |
2258 | #include <linux/mm.h> |
2259 | #include <linux/delay.h> |
2260 | #include <linux/spinlock.h> |
2261 | -#include <linux/smp_lock.h> |
2262 | #include <linux/kernel_stat.h> |
2263 | #include <linux/mc146818rtc.h> |
2264 | #include <linux/cache.h> |
2265 | @@ -216,7 +215,6 @@ |
2266 | static struct mm_struct * flush_mm; |
2267 | static unsigned long flush_va; |
2268 | static DEFINE_SPINLOCK(tlbstate_lock); |
2269 | -#define FLUSH_ALL 0xffffffff |
2270 | |
2271 | /* |
2272 | * We cannot call mmdrop() because we are in interrupt context, |
2273 | @@ -298,7 +296,7 @@ |
2274 | |
2275 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { |
2276 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { |
2277 | - if (flush_va == FLUSH_ALL) |
2278 | + if (flush_va == TLB_FLUSH_ALL) |
2279 | local_flush_tlb(); |
2280 | else |
2281 | __flush_tlb_one(flush_va); |
2282 | @@ -314,9 +312,11 @@ |
2283 | return IRQ_HANDLED; |
2284 | } |
2285 | |
2286 | -static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, |
2287 | - unsigned long va) |
2288 | +void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
2289 | + unsigned long va) |
2290 | { |
2291 | + cpumask_t cpumask = *cpumaskp; |
2292 | + |
2293 | /* |
2294 | * A couple of (to be removed) sanity checks: |
2295 | * |
2296 | @@ -327,10 +327,12 @@ |
2297 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); |
2298 | BUG_ON(!mm); |
2299 | |
2300 | +#ifdef CONFIG_HOTPLUG_CPU |
2301 | /* If a CPU which we ran on has gone down, OK. */ |
2302 | cpus_and(cpumask, cpumask, cpu_online_map); |
2303 | - if (cpus_empty(cpumask)) |
2304 | + if (unlikely(cpus_empty(cpumask))) |
2305 | return; |
2306 | +#endif |
2307 | |
2308 | /* |
2309 | * i'm not happy about this global shared spinlock in the |
2310 | @@ -341,17 +343,7 @@ |
2311 | |
2312 | flush_mm = mm; |
2313 | flush_va = va; |
2314 | -#if NR_CPUS <= BITS_PER_LONG |
2315 | - atomic_set_mask(cpumask, &flush_cpumask); |
2316 | -#else |
2317 | - { |
2318 | - int k; |
2319 | - unsigned long *flush_mask = (unsigned long *)&flush_cpumask; |
2320 | - unsigned long *cpu_mask = (unsigned long *)&cpumask; |
2321 | - for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) |
2322 | - atomic_set_mask(cpu_mask[k], &flush_mask[k]); |
2323 | - } |
2324 | -#endif |
2325 | + cpus_or(flush_cpumask, cpumask, flush_cpumask); |
2326 | /* |
2327 | * We have to send the IPI only to |
2328 | * CPUs affected. |
2329 | @@ -378,7 +370,7 @@ |
2330 | |
2331 | local_flush_tlb(); |
2332 | if (!cpus_empty(cpu_mask)) |
2333 | - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); |
2334 | + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
2335 | preempt_enable(); |
2336 | } |
2337 | |
2338 | @@ -397,7 +389,7 @@ |
2339 | leave_mm(smp_processor_id()); |
2340 | } |
2341 | if (!cpus_empty(cpu_mask)) |
2342 | - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); |
2343 | + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
2344 | |
2345 | preempt_enable(); |
2346 | } |
2347 | @@ -446,7 +438,7 @@ |
2348 | * it goes straight through and wastes no time serializing |
2349 | * anything. Worst case is that we lose a reschedule ... |
2350 | */ |
2351 | -void smp_send_reschedule(int cpu) |
2352 | +void xen_smp_send_reschedule(int cpu) |
2353 | { |
2354 | WARN_ON(cpu_is_offline(cpu)); |
2355 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); |
2356 | @@ -478,36 +470,79 @@ |
2357 | |
2358 | static struct call_data_struct *call_data; |
2359 | |
2360 | +static void __smp_call_function(void (*func) (void *info), void *info, |
2361 | + int nonatomic, int wait) |
2362 | +{ |
2363 | + struct call_data_struct data; |
2364 | + int cpus = num_online_cpus() - 1; |
2365 | + |
2366 | + if (!cpus) |
2367 | + return; |
2368 | + |
2369 | + data.func = func; |
2370 | + data.info = info; |
2371 | + atomic_set(&data.started, 0); |
2372 | + data.wait = wait; |
2373 | + if (wait) |
2374 | + atomic_set(&data.finished, 0); |
2375 | + |
2376 | + call_data = &data; |
2377 | + mb(); |
2378 | + |
2379 | + /* Send a message to all other CPUs and wait for them to respond */ |
2380 | + send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
2381 | + |
2382 | + /* Wait for response */ |
2383 | + while (atomic_read(&data.started) != cpus) |
2384 | + cpu_relax(); |
2385 | + |
2386 | + if (wait) |
2387 | + while (atomic_read(&data.finished) != cpus) |
2388 | + cpu_relax(); |
2389 | +} |
2390 | + |
2391 | + |
2392 | /** |
2393 | - * smp_call_function(): Run a function on all other CPUs. |
2394 | + * smp_call_function_mask(): Run a function on a set of other CPUs. |
2395 | + * @mask: The set of cpus to run on. Must not include the current cpu. |
2396 | * @func: The function to run. This must be fast and non-blocking. |
2397 | * @info: An arbitrary pointer to pass to the function. |
2398 | - * @nonatomic: currently unused. |
2399 | * @wait: If true, wait (atomically) until function has completed on other CPUs. |
2400 | * |
2401 | - * Returns 0 on success, else a negative status code. Does not return until |
2402 | - * remote CPUs are nearly ready to execute <<func>> or are or have executed. |
2403 | + * Returns 0 on success, else a negative status code. |
2404 | + * |
2405 | + * If @wait is true, then returns once @func has returned; otherwise |
2406 | + * it returns just before the target cpu calls @func. |
2407 | * |
2408 | * You must not call this function with disabled interrupts or from a |
2409 | * hardware interrupt handler or from a bottom half handler. |
2410 | */ |
2411 | -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, |
2412 | - int wait) |
2413 | +int |
2414 | +xen_smp_call_function_mask(cpumask_t mask, |
2415 | + void (*func)(void *), void *info, |
2416 | + int wait) |
2417 | { |
2418 | struct call_data_struct data; |
2419 | + cpumask_t allbutself; |
2420 | int cpus; |
2421 | |
2422 | + /* Can deadlock when called with interrupts disabled */ |
2423 | + WARN_ON(irqs_disabled()); |
2424 | + |
2425 | /* Holding any lock stops cpus from going down. */ |
2426 | spin_lock(&call_lock); |
2427 | - cpus = num_online_cpus() - 1; |
2428 | + |
2429 | + allbutself = cpu_online_map; |
2430 | + cpu_clear(smp_processor_id(), allbutself); |
2431 | + |
2432 | + cpus_and(mask, mask, allbutself); |
2433 | + cpus = cpus_weight(mask); |
2434 | + |
2435 | if (!cpus) { |
2436 | spin_unlock(&call_lock); |
2437 | return 0; |
2438 | } |
2439 | |
2440 | - /* Can deadlock when called with interrupts disabled */ |
2441 | - WARN_ON(irqs_disabled()); |
2442 | - |
2443 | data.func = func; |
2444 | data.info = info; |
2445 | atomic_set(&data.started, 0); |
2446 | @@ -517,9 +552,12 @@ |
2447 | |
2448 | call_data = &data; |
2449 | mb(); |
2450 | - |
2451 | - /* Send a message to all other CPUs and wait for them to respond */ |
2452 | - send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
2453 | + |
2454 | + /* Send a message to other CPUs */ |
2455 | + if (cpus_equal(mask, allbutself)) |
2456 | + send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
2457 | + else |
2458 | + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
2459 | |
2460 | /* Wait for response */ |
2461 | while (atomic_read(&data.started) != cpus) |
2462 | @@ -532,15 +570,14 @@ |
2463 | |
2464 | return 0; |
2465 | } |
2466 | -EXPORT_SYMBOL(smp_call_function); |
2467 | |
2468 | static void stop_this_cpu (void * dummy) |
2469 | { |
2470 | + local_irq_disable(); |
2471 | /* |
2472 | * Remove this CPU: |
2473 | */ |
2474 | cpu_clear(smp_processor_id(), cpu_online_map); |
2475 | - local_irq_disable(); |
2476 | disable_all_local_evtchn(); |
2477 | if (cpu_data[smp_processor_id()].hlt_works_ok) |
2478 | for(;;) halt(); |
2479 | @@ -551,13 +588,18 @@ |
2480 | * this function calls the 'stop' function on all other CPUs in the system. |
2481 | */ |
2482 | |
2483 | -void smp_send_stop(void) |
2484 | +void xen_smp_send_stop(void) |
2485 | { |
2486 | - smp_call_function(stop_this_cpu, NULL, 1, 0); |
2487 | + /* Don't deadlock on the call lock in panic */ |
2488 | + int nolock = !spin_trylock(&call_lock); |
2489 | + unsigned long flags; |
2490 | |
2491 | - local_irq_disable(); |
2492 | + local_irq_save(flags); |
2493 | + __smp_call_function(stop_this_cpu, NULL, 0, 0); |
2494 | + if (!nolock) |
2495 | + spin_unlock(&call_lock); |
2496 | disable_all_local_evtchn(); |
2497 | - local_irq_enable(); |
2498 | + local_irq_restore(flags); |
2499 | } |
2500 | |
2501 | /* |
2502 | @@ -598,74 +640,3 @@ |
2503 | |
2504 | return IRQ_HANDLED; |
2505 | } |
2506 | - |
2507 | -/* |
2508 | - * this function sends a 'generic call function' IPI to one other CPU |
2509 | - * in the system. |
2510 | - * |
2511 | - * cpu is a standard Linux logical CPU number. |
2512 | - */ |
2513 | -static void |
2514 | -__smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
2515 | - int nonatomic, int wait) |
2516 | -{ |
2517 | - struct call_data_struct data; |
2518 | - int cpus = 1; |
2519 | - |
2520 | - data.func = func; |
2521 | - data.info = info; |
2522 | - atomic_set(&data.started, 0); |
2523 | - data.wait = wait; |
2524 | - if (wait) |
2525 | - atomic_set(&data.finished, 0); |
2526 | - |
2527 | - call_data = &data; |
2528 | - wmb(); |
2529 | - /* Send a message to all other CPUs and wait for them to respond */ |
2530 | - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); |
2531 | - |
2532 | - /* Wait for response */ |
2533 | - while (atomic_read(&data.started) != cpus) |
2534 | - cpu_relax(); |
2535 | - |
2536 | - if (!wait) |
2537 | - return; |
2538 | - |
2539 | - while (atomic_read(&data.finished) != cpus) |
2540 | - cpu_relax(); |
2541 | -} |
2542 | - |
2543 | -/* |
2544 | - * smp_call_function_single - Run a function on another CPU |
2545 | - * @func: The function to run. This must be fast and non-blocking. |
2546 | - * @info: An arbitrary pointer to pass to the function. |
2547 | - * @nonatomic: Currently unused. |
2548 | - * @wait: If true, wait until function has completed on other CPUs. |
2549 | - * |
2550 | - * Retrurns 0 on success, else a negative status code. |
2551 | - * |
2552 | - * Does not return until the remote CPU is nearly ready to execute <func> |
2553 | - * or is or has executed. |
2554 | - */ |
2555 | - |
2556 | -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
2557 | - int nonatomic, int wait) |
2558 | -{ |
2559 | - /* prevent preemption and reschedule on another processor */ |
2560 | - int me = get_cpu(); |
2561 | - if (cpu == me) { |
2562 | - WARN_ON(1); |
2563 | - put_cpu(); |
2564 | - return -EBUSY; |
2565 | - } |
2566 | - |
2567 | - /* Can deadlock when called with interrupts disabled */ |
2568 | - WARN_ON(irqs_disabled()); |
2569 | - |
2570 | - spin_lock_bh(&call_lock); |
2571 | - __smp_call_function_single(cpu, func, info, nonatomic, wait); |
2572 | - spin_unlock_bh(&call_lock); |
2573 | - put_cpu(); |
2574 | - return 0; |
2575 | -} |
2576 | -EXPORT_SYMBOL(smp_call_function_single); |
2577 | --- a/arch/x86/kernel/smp_64-xen.c |
2578 | +++ b/arch/x86/kernel/smp_64-xen.c |
2579 | @@ -14,7 +14,6 @@ |
2580 | #include <linux/mm.h> |
2581 | #include <linux/delay.h> |
2582 | #include <linux/spinlock.h> |
2583 | -#include <linux/smp_lock.h> |
2584 | #include <linux/smp.h> |
2585 | #include <linux/kernel_stat.h> |
2586 | #include <linux/mc146818rtc.h> |
2587 | @@ -457,44 +456,36 @@ |
2588 | } |
2589 | EXPORT_SYMBOL(smp_call_function); |
2590 | |
2591 | -void smp_stop_cpu(void) |
2592 | +static void stop_this_cpu(void *dummy) |
2593 | { |
2594 | - unsigned long flags; |
2595 | + local_irq_disable(); |
2596 | /* |
2597 | * Remove this CPU: |
2598 | */ |
2599 | cpu_clear(smp_processor_id(), cpu_online_map); |
2600 | - local_irq_save(flags); |
2601 | disable_all_local_evtchn(); |
2602 | - local_irq_restore(flags); |
2603 | -} |
2604 | - |
2605 | -static void smp_really_stop_cpu(void *dummy) |
2606 | -{ |
2607 | - smp_stop_cpu(); |
2608 | for (;;) |
2609 | halt(); |
2610 | } |
2611 | |
2612 | void smp_send_stop(void) |
2613 | { |
2614 | - int nolock = 0; |
2615 | + int nolock; |
2616 | + unsigned long flags; |
2617 | + |
2618 | #ifndef CONFIG_XEN |
2619 | if (reboot_force) |
2620 | return; |
2621 | #endif |
2622 | + |
2623 | /* Don't deadlock on the call lock in panic */ |
2624 | - if (!spin_trylock(&call_lock)) { |
2625 | - /* ignore locking because we have panicked anyways */ |
2626 | - nolock = 1; |
2627 | - } |
2628 | - __smp_call_function(smp_really_stop_cpu, NULL, 0, 0); |
2629 | + nolock = !spin_trylock(&call_lock); |
2630 | + local_irq_save(flags); |
2631 | + __smp_call_function(stop_this_cpu, NULL, 0, 0); |
2632 | if (!nolock) |
2633 | spin_unlock(&call_lock); |
2634 | - |
2635 | - local_irq_disable(); |
2636 | disable_all_local_evtchn(); |
2637 | - local_irq_enable(); |
2638 | + local_irq_restore(flags); |
2639 | } |
2640 | |
2641 | /* |
2642 | --- a/arch/x86/kernel/time_32-xen.c |
2643 | +++ b/arch/x86/kernel/time_32-xen.c |
2644 | @@ -80,7 +80,6 @@ |
2645 | #include <asm/i8253.h> |
2646 | DEFINE_SPINLOCK(i8253_lock); |
2647 | EXPORT_SYMBOL(i8253_lock); |
2648 | -int pit_latch_buggy; /* extern */ |
2649 | #else |
2650 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; |
2651 | #endif |
2652 | @@ -589,7 +588,7 @@ |
2653 | return IRQ_HANDLED; |
2654 | } |
2655 | |
2656 | -void mark_tsc_unstable(void) |
2657 | +void mark_tsc_unstable(char *reason) |
2658 | { |
2659 | #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */ |
2660 | tsc_unstable = 1; |
2661 | @@ -597,17 +596,18 @@ |
2662 | } |
2663 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); |
2664 | |
2665 | +static cycle_t cs_last; |
2666 | + |
2667 | static cycle_t xen_clocksource_read(void) |
2668 | { |
2669 | cycle_t ret = sched_clock(); |
2670 | |
2671 | #ifdef CONFIG_SMP |
2672 | for (;;) { |
2673 | - static cycle_t last_ret; |
2674 | #ifndef CONFIG_64BIT |
2675 | - cycle_t last = cmpxchg64(&last_ret, 0, 0); |
2676 | + cycle_t last = cmpxchg64(&cs_last, 0, 0); |
2677 | #else |
2678 | - cycle_t last = last_ret; |
2679 | + cycle_t last = cs_last; |
2680 | #define cmpxchg64 cmpxchg |
2681 | #endif |
2682 | |
2683 | @@ -627,7 +627,7 @@ |
2684 | } |
2685 | ret = last; |
2686 | } |
2687 | - if (cmpxchg64(&last_ret, last, ret) == last) |
2688 | + if (cmpxchg64(&cs_last, last, ret) == last) |
2689 | break; |
2690 | } |
2691 | #endif |
2692 | @@ -635,6 +635,14 @@ |
2693 | return ret; |
2694 | } |
2695 | |
2696 | +static void xen_clocksource_resume(void) |
2697 | +{ |
2698 | + extern void time_resume(void); |
2699 | + |
2700 | + time_resume(); |
2701 | + cs_last = sched_clock(); |
2702 | +} |
2703 | + |
2704 | static struct clocksource clocksource_xen = { |
2705 | .name = "xen", |
2706 | .rating = 400, |
2707 | @@ -643,6 +651,7 @@ |
2708 | .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */ |
2709 | .shift = XEN_SHIFT, |
2710 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
2711 | + .resume = xen_clocksource_resume, |
2712 | }; |
2713 | |
2714 | static void init_missing_ticks_accounting(unsigned int cpu) |
2715 | @@ -731,35 +740,6 @@ |
2716 | mod_timer(&sync_xen_wallclock_timer, jiffies + 1); |
2717 | } |
2718 | |
2719 | -static int timer_resume(struct sys_device *dev) |
2720 | -{ |
2721 | - extern void time_resume(void); |
2722 | - time_resume(); |
2723 | - return 0; |
2724 | -} |
2725 | - |
2726 | -static struct sysdev_class timer_sysclass = { |
2727 | - .resume = timer_resume, |
2728 | - set_kset_name("timer"), |
2729 | -}; |
2730 | - |
2731 | - |
2732 | -/* XXX this driverfs stuff should probably go elsewhere later -john */ |
2733 | -static struct sys_device device_timer = { |
2734 | - .id = 0, |
2735 | - .cls = &timer_sysclass, |
2736 | -}; |
2737 | - |
2738 | -static int time_init_device(void) |
2739 | -{ |
2740 | - int error = sysdev_class_register(&timer_sysclass); |
2741 | - if (!error) |
2742 | - error = sysdev_register(&device_timer); |
2743 | - return error; |
2744 | -} |
2745 | - |
2746 | -device_initcall(time_init_device); |
2747 | - |
2748 | extern void (*late_time_init)(void); |
2749 | |
2750 | /* Dynamically-mapped IRQ. */ |
2751 | @@ -772,7 +752,7 @@ |
2752 | VIRQ_TIMER, |
2753 | 0, |
2754 | timer_interrupt, |
2755 | - SA_INTERRUPT, |
2756 | + IRQF_DISABLED, |
2757 | "timer0", |
2758 | NULL); |
2759 | BUG_ON(per_cpu(timer_irq, 0) < 0); |
2760 | @@ -890,21 +870,21 @@ |
2761 | cpu_clear(smp_processor_id(), nohz_cpu_mask); |
2762 | } |
2763 | |
2764 | -void raw_safe_halt(void) |
2765 | +void xen_safe_halt(void) |
2766 | { |
2767 | stop_hz_timer(); |
2768 | /* Blocking includes an implicit local_irq_enable(). */ |
2769 | HYPERVISOR_block(); |
2770 | start_hz_timer(); |
2771 | } |
2772 | -EXPORT_SYMBOL(raw_safe_halt); |
2773 | +EXPORT_SYMBOL(xen_safe_halt); |
2774 | |
2775 | -void halt(void) |
2776 | +void xen_halt(void) |
2777 | { |
2778 | if (irqs_disabled()) |
2779 | VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); |
2780 | } |
2781 | -EXPORT_SYMBOL(halt); |
2782 | +EXPORT_SYMBOL(xen_halt); |
2783 | |
2784 | /* No locking required. Interrupts are disabled on all CPUs. */ |
2785 | void time_resume(void) |
2786 | @@ -967,7 +947,7 @@ |
2787 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, |
2788 | cpu, |
2789 | timer_interrupt, |
2790 | - SA_INTERRUPT, |
2791 | + IRQF_DISABLED, |
2792 | timer_name[cpu], |
2793 | NULL); |
2794 | if (irq < 0) |
2795 | --- a/arch/x86/kernel/traps_32-xen.c |
2796 | +++ b/arch/x86/kernel/traps_32-xen.c |
2797 | @@ -52,7 +52,7 @@ |
2798 | #include <asm/unwind.h> |
2799 | #include <asm/smp.h> |
2800 | #include <asm/arch_hooks.h> |
2801 | -#include <asm/kdebug.h> |
2802 | +#include <linux/kdebug.h> |
2803 | #include <asm/stacktrace.h> |
2804 | |
2805 | #include <linux/module.h> |
2806 | @@ -101,20 +101,6 @@ |
2807 | |
2808 | int kstack_depth_to_print = 24; |
2809 | static unsigned int code_bytes = 64; |
2810 | -ATOMIC_NOTIFIER_HEAD(i386die_chain); |
2811 | - |
2812 | -int register_die_notifier(struct notifier_block *nb) |
2813 | -{ |
2814 | - vmalloc_sync_all(); |
2815 | - return atomic_notifier_chain_register(&i386die_chain, nb); |
2816 | -} |
2817 | -EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */ |
2818 | - |
2819 | -int unregister_die_notifier(struct notifier_block *nb) |
2820 | -{ |
2821 | - return atomic_notifier_chain_unregister(&i386die_chain, nb); |
2822 | -} |
2823 | -EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */ |
2824 | |
2825 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) |
2826 | { |
2827 | @@ -325,7 +311,7 @@ |
2828 | regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); |
2829 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", |
2830 | TASK_COMM_LEN, current->comm, current->pid, |
2831 | - current_thread_info(), current, current->thread_info); |
2832 | + current_thread_info(), current, task_thread_info(current)); |
2833 | /* |
2834 | * When in-kernel, we also print out the stack and code at the |
2835 | * time of the fault.. |
2836 | @@ -482,8 +468,6 @@ |
2837 | siginfo_t *info) |
2838 | { |
2839 | struct task_struct *tsk = current; |
2840 | - tsk->thread.error_code = error_code; |
2841 | - tsk->thread.trap_no = trapnr; |
2842 | |
2843 | if (regs->eflags & VM_MASK) { |
2844 | if (vm86) |
2845 | @@ -495,6 +479,18 @@ |
2846 | goto kernel_trap; |
2847 | |
2848 | trap_signal: { |
2849 | + /* |
2850 | + * We want error_code and trap_no set for userspace faults and |
2851 | + * kernelspace faults which result in die(), but not |
2852 | + * kernelspace faults which are fixed up. die() gives the |
2853 | + * process no chance to handle the signal and notice the |
2854 | + * kernel fault information, so that won't result in polluting |
2855 | + * the information about previously queued, but not yet |
2856 | + * delivered, faults. See also do_general_protection below. |
2857 | + */ |
2858 | + tsk->thread.error_code = error_code; |
2859 | + tsk->thread.trap_no = trapnr; |
2860 | + |
2861 | if (info) |
2862 | force_sig_info(signr, info, tsk); |
2863 | else |
2864 | @@ -503,8 +499,11 @@ |
2865 | } |
2866 | |
2867 | kernel_trap: { |
2868 | - if (!fixup_exception(regs)) |
2869 | + if (!fixup_exception(regs)) { |
2870 | + tsk->thread.error_code = error_code; |
2871 | + tsk->thread.trap_no = trapnr; |
2872 | die(str, regs, error_code); |
2873 | + } |
2874 | return; |
2875 | } |
2876 | |
2877 | @@ -578,9 +577,6 @@ |
2878 | fastcall void __kprobes do_general_protection(struct pt_regs * regs, |
2879 | long error_code) |
2880 | { |
2881 | - current->thread.error_code = error_code; |
2882 | - current->thread.trap_no = 13; |
2883 | - |
2884 | if (regs->eflags & VM_MASK) |
2885 | goto gp_in_vm86; |
2886 | |
2887 | @@ -599,6 +595,8 @@ |
2888 | |
2889 | gp_in_kernel: |
2890 | if (!fixup_exception(regs)) { |
2891 | + current->thread.error_code = error_code; |
2892 | + current->thread.trap_no = 13; |
2893 | if (notify_die(DIE_GPF, "general protection fault", regs, |
2894 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
2895 | return; |
2896 | @@ -987,9 +985,7 @@ |
2897 | fastcall unsigned long patch_espfix_desc(unsigned long uesp, |
2898 | unsigned long kesp) |
2899 | { |
2900 | - int cpu = smp_processor_id(); |
2901 | - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
2902 | - struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; |
2903 | + struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; |
2904 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; |
2905 | unsigned long new_kesp = kesp - base; |
2906 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; |
2907 | --- a/arch/x86/kernel/traps_64-xen.c |
2908 | +++ b/arch/x86/kernel/traps_64-xen.c |
2909 | @@ -32,6 +32,7 @@ |
2910 | #include <linux/unwind.h> |
2911 | #include <linux/uaccess.h> |
2912 | #include <linux/bug.h> |
2913 | +#include <linux/kdebug.h> |
2914 | |
2915 | #include <asm/system.h> |
2916 | #include <asm/io.h> |
2917 | @@ -39,7 +40,6 @@ |
2918 | #include <asm/debugreg.h> |
2919 | #include <asm/desc.h> |
2920 | #include <asm/i387.h> |
2921 | -#include <asm/kdebug.h> |
2922 | #include <asm/processor.h> |
2923 | #include <asm/unwind.h> |
2924 | #include <asm/smp.h> |
2925 | @@ -71,22 +71,6 @@ |
2926 | asmlinkage void machine_check(void); |
2927 | asmlinkage void spurious_interrupt_bug(void); |
2928 | |
2929 | -ATOMIC_NOTIFIER_HEAD(die_chain); |
2930 | -EXPORT_SYMBOL(die_chain); |
2931 | - |
2932 | -int register_die_notifier(struct notifier_block *nb) |
2933 | -{ |
2934 | - vmalloc_sync_all(); |
2935 | - return atomic_notifier_chain_register(&die_chain, nb); |
2936 | -} |
2937 | -EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */ |
2938 | - |
2939 | -int unregister_die_notifier(struct notifier_block *nb) |
2940 | -{ |
2941 | - return atomic_notifier_chain_unregister(&die_chain, nb); |
2942 | -} |
2943 | -EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */ |
2944 | - |
2945 | static inline void conditional_sti(struct pt_regs *regs) |
2946 | { |
2947 | if (regs->eflags & X86_EFLAGS_IF) |
2948 | @@ -428,8 +412,7 @@ |
2949 | const int cpu = smp_processor_id(); |
2950 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
2951 | |
2952 | - rsp = regs->rsp; |
2953 | - |
2954 | + rsp = regs->rsp; |
2955 | printk("CPU %d ", cpu); |
2956 | __show_regs(regs); |
2957 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
2958 | @@ -440,7 +423,6 @@ |
2959 | * time of the fault.. |
2960 | */ |
2961 | if (in_kernel) { |
2962 | - |
2963 | printk("Stack: "); |
2964 | _show_stack(NULL, regs, (unsigned long*)rsp); |
2965 | |
2966 | @@ -485,13 +467,14 @@ |
2967 | |
2968 | unsigned __kprobes long oops_begin(void) |
2969 | { |
2970 | - int cpu = smp_processor_id(); |
2971 | + int cpu; |
2972 | unsigned long flags; |
2973 | |
2974 | oops_enter(); |
2975 | |
2976 | /* racy, but better than risking deadlock. */ |
2977 | local_irq_save(flags); |
2978 | + cpu = smp_processor_id(); |
2979 | if (!spin_trylock(&die_lock)) { |
2980 | if (cpu == die_owner) |
2981 | /* nested oops. should stop eventually */; |
2982 | @@ -585,10 +568,20 @@ |
2983 | { |
2984 | struct task_struct *tsk = current; |
2985 | |
2986 | - tsk->thread.error_code = error_code; |
2987 | - tsk->thread.trap_no = trapnr; |
2988 | - |
2989 | if (user_mode(regs)) { |
2990 | + /* |
2991 | + * We want error_code and trap_no set for userspace |
2992 | + * faults and kernelspace faults which result in |
2993 | + * die(), but not kernelspace faults which are fixed |
2994 | + * up. die() gives the process no chance to handle |
2995 | + * the signal and notice the kernel fault information, |
2996 | + * so that won't result in polluting the information |
2997 | + * about previously queued, but not yet delivered, |
2998 | + * faults. See also do_general_protection below. |
2999 | + */ |
3000 | + tsk->thread.error_code = error_code; |
3001 | + tsk->thread.trap_no = trapnr; |
3002 | + |
3003 | if (exception_trace && unhandled_signal(tsk, signr)) |
3004 | printk(KERN_INFO |
3005 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", |
3006 | @@ -609,8 +602,11 @@ |
3007 | fixup = search_exception_tables(regs->rip); |
3008 | if (fixup) |
3009 | regs->rip = fixup->fixup; |
3010 | - else |
3011 | + else { |
3012 | + tsk->thread.error_code = error_code; |
3013 | + tsk->thread.trap_no = trapnr; |
3014 | die(str, regs, error_code); |
3015 | + } |
3016 | return; |
3017 | } |
3018 | } |
3019 | @@ -686,10 +682,10 @@ |
3020 | |
3021 | conditional_sti(regs); |
3022 | |
3023 | - tsk->thread.error_code = error_code; |
3024 | - tsk->thread.trap_no = 13; |
3025 | - |
3026 | if (user_mode(regs)) { |
3027 | + tsk->thread.error_code = error_code; |
3028 | + tsk->thread.trap_no = 13; |
3029 | + |
3030 | if (exception_trace && unhandled_signal(tsk, SIGSEGV)) |
3031 | printk(KERN_INFO |
3032 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", |
3033 | @@ -708,6 +704,9 @@ |
3034 | regs->rip = fixup->fixup; |
3035 | return; |
3036 | } |
3037 | + |
3038 | + tsk->thread.error_code = error_code; |
3039 | + tsk->thread.trap_no = 13; |
3040 | if (notify_die(DIE_GPF, "general protection fault", regs, |
3041 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
3042 | return; |
3043 | --- a/arch/x86/kernel/vsyscall_64-xen.c |
3044 | +++ b/arch/x86/kernel/vsyscall_64-xen.c |
3045 | @@ -45,14 +45,34 @@ |
3046 | |
3047 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
3048 | #define __syscall_clobber "r11","rcx","memory" |
3049 | +#define __pa_vsymbol(x) \ |
3050 | + ({unsigned long v; \ |
3051 | + extern char __vsyscall_0; \ |
3052 | + asm("" : "=r" (v) : "0" (x)); \ |
3053 | + ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) |
3054 | |
3055 | +/* |
3056 | + * vsyscall_gtod_data contains data that is : |
3057 | + * - readonly from vsyscalls |
3058 | + * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) |
3059 | + * Try to keep this structure as small as possible to avoid cache line ping pongs |
3060 | + */ |
3061 | struct vsyscall_gtod_data_t { |
3062 | - seqlock_t lock; |
3063 | - int sysctl_enabled; |
3064 | - struct timeval wall_time_tv; |
3065 | + seqlock_t lock; |
3066 | + |
3067 | + /* open coded 'struct timespec' */ |
3068 | + time_t wall_time_sec; |
3069 | + u32 wall_time_nsec; |
3070 | + |
3071 | + int sysctl_enabled; |
3072 | struct timezone sys_tz; |
3073 | - cycle_t offset_base; |
3074 | - struct clocksource clock; |
3075 | + struct { /* extract of a clocksource struct */ |
3076 | + cycle_t (*vread)(void); |
3077 | + cycle_t cycle_last; |
3078 | + cycle_t mask; |
3079 | + u32 mult; |
3080 | + u32 shift; |
3081 | + } clock; |
3082 | }; |
3083 | int __vgetcpu_mode __section_vgetcpu_mode; |
3084 | |
3085 | @@ -68,9 +88,13 @@ |
3086 | |
3087 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); |
3088 | /* copy vsyscall data */ |
3089 | - vsyscall_gtod_data.clock = *clock; |
3090 | - vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; |
3091 | - vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; |
3092 | + vsyscall_gtod_data.clock.vread = clock->vread; |
3093 | + vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
3094 | + vsyscall_gtod_data.clock.mask = clock->mask; |
3095 | + vsyscall_gtod_data.clock.mult = clock->mult; |
3096 | + vsyscall_gtod_data.clock.shift = clock->shift; |
3097 | + vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
3098 | + vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
3099 | vsyscall_gtod_data.sys_tz = sys_tz; |
3100 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
3101 | } |
3102 | @@ -105,7 +129,8 @@ |
3103 | static __always_inline void do_vgettimeofday(struct timeval * tv) |
3104 | { |
3105 | cycle_t now, base, mask, cycle_delta; |
3106 | - unsigned long seq, mult, shift, nsec_delta; |
3107 | + unsigned seq; |
3108 | + unsigned long mult, shift, nsec; |
3109 | cycle_t (*vread)(void); |
3110 | do { |
3111 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); |
3112 | @@ -121,21 +146,20 @@ |
3113 | mult = __vsyscall_gtod_data.clock.mult; |
3114 | shift = __vsyscall_gtod_data.clock.shift; |
3115 | |
3116 | - *tv = __vsyscall_gtod_data.wall_time_tv; |
3117 | - |
3118 | + tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; |
3119 | + nsec = __vsyscall_gtod_data.wall_time_nsec; |
3120 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); |
3121 | |
3122 | /* calculate interval: */ |
3123 | cycle_delta = (now - base) & mask; |
3124 | /* convert to nsecs: */ |
3125 | - nsec_delta = (cycle_delta * mult) >> shift; |
3126 | + nsec += (cycle_delta * mult) >> shift; |
3127 | |
3128 | - /* convert to usecs and add to timespec: */ |
3129 | - tv->tv_usec += nsec_delta / NSEC_PER_USEC; |
3130 | - while (tv->tv_usec > USEC_PER_SEC) { |
3131 | + while (nsec >= NSEC_PER_SEC) { |
3132 | tv->tv_sec += 1; |
3133 | - tv->tv_usec -= USEC_PER_SEC; |
3134 | + nsec -= NSEC_PER_SEC; |
3135 | } |
3136 | + tv->tv_usec = nsec / NSEC_PER_USEC; |
3137 | } |
3138 | |
3139 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) |
3140 | @@ -151,11 +175,16 @@ |
3141 | * unlikely */ |
3142 | time_t __vsyscall(1) vtime(time_t *t) |
3143 | { |
3144 | + struct timeval tv; |
3145 | + time_t result; |
3146 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) |
3147 | return time_syscall(t); |
3148 | - else if (t) |
3149 | - *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; |
3150 | - return __vsyscall_gtod_data.wall_time_tv.tv_sec; |
3151 | + |
3152 | + vgettimeofday(&tv, 0); |
3153 | + result = tv.tv_sec; |
3154 | + if (t) |
3155 | + *t = result; |
3156 | + return result; |
3157 | } |
3158 | |
3159 | /* Fast way to get current CPU and node. |
3160 | @@ -224,10 +253,10 @@ |
3161 | return ret; |
3162 | /* gcc has some trouble with __va(__pa()), so just do it this |
3163 | way. */ |
3164 | - map1 = ioremap(__pa_symbol(&vsysc1), 2); |
3165 | + map1 = ioremap(__pa_vsymbol(&vsysc1), 2); |
3166 | if (!map1) |
3167 | return -ENOMEM; |
3168 | - map2 = ioremap(__pa_symbol(&vsysc2), 2); |
3169 | + map2 = ioremap(__pa_vsymbol(&vsysc2), 2); |
3170 | if (!map2) { |
3171 | ret = -ENOMEM; |
3172 | goto out; |
3173 | @@ -304,7 +333,7 @@ |
3174 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) |
3175 | { |
3176 | long cpu = (long)arg; |
3177 | - if (action == CPU_ONLINE) |
3178 | + if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) |
3179 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); |
3180 | return NOTIFY_DONE; |
3181 | } |
3182 | --- a/arch/x86/mm/fault_32-xen.c |
3183 | +++ b/arch/x86/mm/fault_32-xen.c |
3184 | @@ -14,19 +14,20 @@ |
3185 | #include <linux/mman.h> |
3186 | #include <linux/mm.h> |
3187 | #include <linux/smp.h> |
3188 | -#include <linux/smp_lock.h> |
3189 | #include <linux/interrupt.h> |
3190 | #include <linux/init.h> |
3191 | #include <linux/tty.h> |
3192 | #include <linux/vt_kern.h> /* For unblank_screen() */ |
3193 | #include <linux/highmem.h> |
3194 | +#include <linux/bootmem.h> /* for max_low_pfn */ |
3195 | +#include <linux/vmalloc.h> |
3196 | #include <linux/module.h> |
3197 | #include <linux/kprobes.h> |
3198 | #include <linux/uaccess.h> |
3199 | +#include <linux/kdebug.h> |
3200 | |
3201 | #include <asm/system.h> |
3202 | #include <asm/desc.h> |
3203 | -#include <asm/kdebug.h> |
3204 | #include <asm/segment.h> |
3205 | |
3206 | extern void die(const char *,struct pt_regs *,long); |
3207 | @@ -259,25 +260,20 @@ |
3208 | unsigned long page; |
3209 | |
3210 | page = read_cr3(); |
3211 | - page = ((unsigned long *) __va(page))[address >> 22]; |
3212 | - if (oops_may_print()) |
3213 | - printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, |
3214 | - machine_to_phys(page)); |
3215 | + page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT]; |
3216 | + printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, |
3217 | + machine_to_phys(page)); |
3218 | /* |
3219 | * We must not directly access the pte in the highpte |
3220 | * case if the page table is located in highmem. |
3221 | * And lets rather not kmap-atomic the pte, just in case |
3222 | * it's allocated already. |
3223 | */ |
3224 | -#ifdef CONFIG_HIGHPTE |
3225 | - if ((page >> PAGE_SHIFT) >= highstart_pfn) |
3226 | - return; |
3227 | -#endif |
3228 | - if ((page & 1) && oops_may_print()) { |
3229 | - page &= PAGE_MASK; |
3230 | - address &= 0x003ff000; |
3231 | - page = machine_to_phys(page); |
3232 | - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; |
3233 | + if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn |
3234 | + && (page & _PAGE_PRESENT)) { |
3235 | + page = machine_to_phys(page & PAGE_MASK); |
3236 | + page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) |
3237 | + & (PTRS_PER_PTE - 1)]; |
3238 | printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page, |
3239 | machine_to_phys(page)); |
3240 | } |
3241 | @@ -581,6 +577,11 @@ |
3242 | bad_area_nosemaphore: |
3243 | /* User mode accesses just cause a SIGSEGV */ |
3244 | if (error_code & 4) { |
3245 | + /* |
3246 | + * It's possible to have interrupts off here. |
3247 | + */ |
3248 | + local_irq_enable(); |
3249 | + |
3250 | /* |
3251 | * Valid to do another page fault here because this one came |
3252 | * from user space. |
3253 | @@ -633,7 +634,7 @@ |
3254 | bust_spinlocks(1); |
3255 | |
3256 | if (oops_may_print()) { |
3257 | - #ifdef CONFIG_X86_PAE |
3258 | +#ifdef CONFIG_X86_PAE |
3259 | if (error_code & 16) { |
3260 | pte_t *pte = lookup_address(address); |
3261 | |
3262 | @@ -642,7 +643,7 @@ |
3263 | "NX-protected page - exploit attempt? " |
3264 | "(uid: %d)\n", current->uid); |
3265 | } |
3266 | - #endif |
3267 | +#endif |
3268 | if (address < PAGE_SIZE) |
3269 | printk(KERN_ALERT "BUG: unable to handle kernel NULL " |
3270 | "pointer dereference"); |
3271 | @@ -652,8 +653,8 @@ |
3272 | printk(" at virtual address %08lx\n",address); |
3273 | printk(KERN_ALERT " printing eip:\n"); |
3274 | printk("%08lx\n", regs->eip); |
3275 | + dump_fault_path(address); |
3276 | } |
3277 | - dump_fault_path(address); |
3278 | tsk->thread.cr2 = address; |
3279 | tsk->thread.trap_no = 14; |
3280 | tsk->thread.error_code = error_code; |
3281 | @@ -694,7 +695,6 @@ |
3282 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); |
3283 | } |
3284 | |
3285 | -#if !HAVE_SHARED_KERNEL_PMD |
3286 | void vmalloc_sync_all(void) |
3287 | { |
3288 | /* |
3289 | @@ -710,6 +710,9 @@ |
3290 | static unsigned long start = TASK_SIZE; |
3291 | unsigned long address; |
3292 | |
3293 | + if (SHARED_KERNEL_PMD) |
3294 | + return; |
3295 | + |
3296 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); |
3297 | for (address = start; |
3298 | address >= TASK_SIZE && address < hypervisor_virt_start; |
3299 | @@ -739,4 +742,3 @@ |
3300 | start = address + (1UL << PMD_SHIFT); |
3301 | } |
3302 | } |
3303 | -#endif |
3304 | --- a/arch/x86/mm/fault_64-xen.c |
3305 | +++ b/arch/x86/mm/fault_64-xen.c |
3306 | @@ -15,22 +15,22 @@ |
3307 | #include <linux/mman.h> |
3308 | #include <linux/mm.h> |
3309 | #include <linux/smp.h> |
3310 | -#include <linux/smp_lock.h> |
3311 | #include <linux/interrupt.h> |
3312 | #include <linux/init.h> |
3313 | #include <linux/tty.h> |
3314 | #include <linux/vt_kern.h> /* For unblank_screen() */ |
3315 | #include <linux/compiler.h> |
3316 | +#include <linux/vmalloc.h> |
3317 | #include <linux/module.h> |
3318 | #include <linux/kprobes.h> |
3319 | #include <linux/uaccess.h> |
3320 | +#include <linux/kdebug.h> |
3321 | |
3322 | #include <asm/system.h> |
3323 | #include <asm/pgalloc.h> |
3324 | #include <asm/smp.h> |
3325 | #include <asm/tlbflush.h> |
3326 | #include <asm/proto.h> |
3327 | -#include <asm/kdebug.h> |
3328 | #include <asm-generic/sections.h> |
3329 | |
3330 | /* Page fault error code bits */ |
3331 | @@ -537,6 +537,12 @@ |
3332 | bad_area_nosemaphore: |
3333 | /* User mode accesses just cause a SIGSEGV */ |
3334 | if (error_code & PF_USER) { |
3335 | + |
3336 | + /* |
3337 | + * It's possible to have interrupts off here. |
3338 | + */ |
3339 | + local_irq_enable(); |
3340 | + |
3341 | if (is_prefetch(regs, address, error_code)) |
3342 | return; |
3343 | |
3344 | @@ -646,7 +652,7 @@ |
3345 | } |
3346 | |
3347 | DEFINE_SPINLOCK(pgd_lock); |
3348 | -struct page *pgd_list; |
3349 | +LIST_HEAD(pgd_list); |
3350 | |
3351 | void vmalloc_sync_all(void) |
3352 | { |
3353 | @@ -666,8 +672,7 @@ |
3354 | if (pgd_none(*pgd_ref)) |
3355 | continue; |
3356 | spin_lock(&pgd_lock); |
3357 | - for (page = pgd_list; page; |
3358 | - page = (struct page *)page->index) { |
3359 | + list_for_each_entry(page, &pgd_list, lru) { |
3360 | pgd_t *pgd; |
3361 | pgd = (pgd_t *)page_address(page) + pgd_index(address); |
3362 | if (pgd_none(*pgd)) |
3363 | --- a/arch/x86/mm/highmem_32-xen.c |
3364 | +++ b/arch/x86/mm/highmem_32-xen.c |
3365 | @@ -26,7 +26,7 @@ |
3366 | * However when holding an atomic kmap is is not legal to sleep, so atomic |
3367 | * kmaps are appropriate for short, tight code paths only. |
3368 | */ |
3369 | -static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot) |
3370 | +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) |
3371 | { |
3372 | enum fixed_addresses idx; |
3373 | unsigned long vaddr; |
3374 | @@ -49,15 +49,7 @@ |
3375 | |
3376 | void *kmap_atomic(struct page *page, enum km_type type) |
3377 | { |
3378 | - return __kmap_atomic(page, type, kmap_prot); |
3379 | -} |
3380 | - |
3381 | -/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */ |
3382 | -void *kmap_atomic_pte(struct page *page, enum km_type type) |
3383 | -{ |
3384 | - return __kmap_atomic(page, type, |
3385 | - test_bit(PG_pinned, &page->flags) |
3386 | - ? PAGE_KERNEL_RO : kmap_prot); |
3387 | + return kmap_atomic_prot(page, type, kmap_prot); |
3388 | } |
3389 | |
3390 | void kunmap_atomic(void *kvaddr, enum km_type type) |
3391 | @@ -80,6 +72,7 @@ |
3392 | #endif |
3393 | } |
3394 | |
3395 | + arch_flush_lazy_mmu_mode(); |
3396 | pagefault_enable(); |
3397 | } |
3398 | |
3399 | @@ -117,6 +110,5 @@ |
3400 | EXPORT_SYMBOL(kmap); |
3401 | EXPORT_SYMBOL(kunmap); |
3402 | EXPORT_SYMBOL(kmap_atomic); |
3403 | -EXPORT_SYMBOL(kmap_atomic_pte); |
3404 | EXPORT_SYMBOL(kunmap_atomic); |
3405 | EXPORT_SYMBOL(kmap_atomic_to_page); |
3406 | --- a/arch/x86/mm/init_32-xen.c |
3407 | +++ b/arch/x86/mm/init_32-xen.c |
3408 | @@ -22,6 +22,7 @@ |
3409 | #include <linux/init.h> |
3410 | #include <linux/highmem.h> |
3411 | #include <linux/pagemap.h> |
3412 | +#include <linux/pfn.h> |
3413 | #include <linux/poison.h> |
3414 | #include <linux/bootmem.h> |
3415 | #include <linux/slab.h> |
3416 | @@ -67,17 +68,19 @@ |
3417 | pmd_t *pmd_table; |
3418 | |
3419 | #ifdef CONFIG_X86_PAE |
3420 | - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
3421 | - paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); |
3422 | - make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); |
3423 | - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
3424 | - pud = pud_offset(pgd, 0); |
3425 | - if (pmd_table != pmd_offset(pud, 0)) |
3426 | - BUG(); |
3427 | -#else |
3428 | + if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) { |
3429 | + pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
3430 | + |
3431 | + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); |
3432 | + make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); |
3433 | + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
3434 | + pud = pud_offset(pgd, 0); |
3435 | + if (pmd_table != pmd_offset(pud, 0)) |
3436 | + BUG(); |
3437 | + } |
3438 | +#endif |
3439 | pud = pud_offset(pgd, 0); |
3440 | pmd_table = pmd_offset(pud, 0); |
3441 | -#endif |
3442 | |
3443 | return pmd_table; |
3444 | } |
3445 | @@ -88,16 +91,18 @@ |
3446 | */ |
3447 | static pte_t * __init one_page_table_init(pmd_t *pmd) |
3448 | { |
3449 | +#if CONFIG_XEN_COMPAT <= 0x030002 |
3450 | if (pmd_none(*pmd)) { |
3451 | +#else |
3452 | + if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) { |
3453 | +#endif |
3454 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
3455 | + |
3456 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); |
3457 | make_lowmem_page_readonly(page_table, |
3458 | XENFEAT_writable_page_tables); |
3459 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
3460 | - if (page_table != pte_offset_kernel(pmd, 0)) |
3461 | - BUG(); |
3462 | - |
3463 | - return page_table; |
3464 | + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); |
3465 | } |
3466 | |
3467 | return pte_offset_kernel(pmd, 0); |
3468 | @@ -117,7 +122,6 @@ |
3469 | static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) |
3470 | { |
3471 | pgd_t *pgd; |
3472 | - pud_t *pud; |
3473 | pmd_t *pmd; |
3474 | int pgd_idx, pmd_idx; |
3475 | unsigned long vaddr; |
3476 | @@ -128,12 +132,10 @@ |
3477 | pgd = pgd_base + pgd_idx; |
3478 | |
3479 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { |
3480 | - if (pgd_none(*pgd)) |
3481 | - one_md_table_init(pgd); |
3482 | - pud = pud_offset(pgd, vaddr); |
3483 | - pmd = pmd_offset(pud, vaddr); |
3484 | + pmd = one_md_table_init(pgd); |
3485 | + pmd = pmd + pmd_index(vaddr); |
3486 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { |
3487 | - if (vaddr < hypervisor_virt_start && pmd_none(*pmd)) |
3488 | + if (vaddr < hypervisor_virt_start) |
3489 | one_page_table_init(pmd); |
3490 | |
3491 | vaddr += PMD_SIZE; |
3492 | @@ -196,24 +198,25 @@ |
3493 | /* Map with big pages if possible, otherwise create normal page tables. */ |
3494 | if (cpu_has_pse) { |
3495 | unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; |
3496 | - |
3497 | if (is_kernel_text(address) || is_kernel_text(address2)) |
3498 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); |
3499 | else |
3500 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); |
3501 | + |
3502 | pfn += PTRS_PER_PTE; |
3503 | } else { |
3504 | pte = one_page_table_init(pmd); |
3505 | |
3506 | - pte += pte_ofs; |
3507 | - for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { |
3508 | - /* XEN: Only map initial RAM allocation. */ |
3509 | - if ((pfn >= max_ram_pfn) || pte_present(*pte)) |
3510 | - continue; |
3511 | - if (is_kernel_text(address)) |
3512 | - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); |
3513 | - else |
3514 | - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); |
3515 | + for (pte += pte_ofs; |
3516 | + pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; |
3517 | + pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { |
3518 | + /* XEN: Only map initial RAM allocation. */ |
3519 | + if ((pfn >= max_ram_pfn) || pte_present(*pte)) |
3520 | + continue; |
3521 | + if (is_kernel_text(address)) |
3522 | + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); |
3523 | + else |
3524 | + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); |
3525 | } |
3526 | pte_ofs = 0; |
3527 | } |
3528 | @@ -383,15 +386,44 @@ |
3529 | |
3530 | pgd_t *swapper_pg_dir; |
3531 | |
3532 | +static void __init xen_pagetable_setup_start(pgd_t *base) |
3533 | +{ |
3534 | +} |
3535 | + |
3536 | +static void __init xen_pagetable_setup_done(pgd_t *base) |
3537 | +{ |
3538 | +} |
3539 | + |
3540 | +/* |
3541 | + * Build a proper pagetable for the kernel mappings. Up until this |
3542 | + * point, we've been running on some set of pagetables constructed by |
3543 | + * the boot process. |
3544 | + * |
3545 | + * If we're booting on native hardware, this will be a pagetable |
3546 | + * constructed in arch/i386/kernel/head.S, and not running in PAE mode |
3547 | + * (even if we'll end up running in PAE). The root of the pagetable |
3548 | + * will be swapper_pg_dir. |
3549 | + * |
3550 | + * If we're booting paravirtualized under a hypervisor, then there are |
3551 | + * more options: we may already be running PAE, and the pagetable may |
3552 | + * or may not be based in swapper_pg_dir. In any case, |
3553 | + * paravirt_pagetable_setup_start() will set up swapper_pg_dir |
3554 | + * appropriately for the rest of the initialization to work. |
3555 | + * |
3556 | + * In general, pagetable_init() assumes that the pagetable may already |
3557 | + * be partially populated, and so it avoids stomping on any existing |
3558 | + * mappings. |
3559 | + */ |
3560 | static void __init pagetable_init (void) |
3561 | { |
3562 | - unsigned long vaddr; |
3563 | + unsigned long vaddr, end; |
3564 | pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base; |
3565 | |
3566 | + xen_pagetable_setup_start(pgd_base); |
3567 | + |
3568 | /* Enable PSE if available */ |
3569 | - if (cpu_has_pse) { |
3570 | + if (cpu_has_pse) |
3571 | set_in_cr4(X86_CR4_PSE); |
3572 | - } |
3573 | |
3574 | /* Enable PGE if available */ |
3575 | if (cpu_has_pge) { |
3576 | @@ -408,9 +440,12 @@ |
3577 | * created - mappings will be set by set_fixmap(): |
3578 | */ |
3579 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; |
3580 | - page_table_range_init(vaddr, hypervisor_virt_start, pgd_base); |
3581 | + end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; |
3582 | + page_table_range_init(vaddr, end, pgd_base); |
3583 | |
3584 | permanent_kmaps_init(pgd_base); |
3585 | + |
3586 | + xen_pagetable_setup_done(pgd_base); |
3587 | } |
3588 | |
3589 | #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) |
3590 | @@ -757,34 +792,29 @@ |
3591 | EXPORT_SYMBOL_GPL(remove_memory); |
3592 | #endif |
3593 | |
3594 | -struct kmem_cache *pgd_cache; |
3595 | struct kmem_cache *pmd_cache; |
3596 | |
3597 | void __init pgtable_cache_init(void) |
3598 | { |
3599 | + size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); |
3600 | + |
3601 | if (PTRS_PER_PMD > 1) { |
3602 | pmd_cache = kmem_cache_create("pmd", |
3603 | PTRS_PER_PMD*sizeof(pmd_t), |
3604 | PTRS_PER_PMD*sizeof(pmd_t), |
3605 | - 0, |
3606 | + SLAB_PANIC, |
3607 | pmd_ctor, |
3608 | NULL); |
3609 | - if (!pmd_cache) |
3610 | - panic("pgtable_cache_init(): cannot create pmd cache"); |
3611 | + if (!SHARED_KERNEL_PMD) { |
3612 | + /* If we're in PAE mode and have a non-shared |
3613 | + kernel pmd, then the pgd size must be a |
3614 | + page size. This is because the pgd_list |
3615 | + links through the page structure, so there |
3616 | + can only be one pgd per page for this to |
3617 | + work. */ |
3618 | + pgd_size = PAGE_SIZE; |
3619 | + } |
3620 | } |
3621 | - pgd_cache = kmem_cache_create("pgd", |
3622 | -#ifndef CONFIG_XEN |
3623 | - PTRS_PER_PGD*sizeof(pgd_t), |
3624 | - PTRS_PER_PGD*sizeof(pgd_t), |
3625 | -#else |
3626 | - PAGE_SIZE, |
3627 | - PAGE_SIZE, |
3628 | -#endif |
3629 | - 0, |
3630 | - pgd_ctor, |
3631 | - PTRS_PER_PMD == 1 ? pgd_dtor : NULL); |
3632 | - if (!pgd_cache) |
3633 | - panic("pgtable_cache_init(): Cannot create pgd cache"); |
3634 | } |
3635 | |
3636 | /* |
3637 | @@ -818,13 +848,26 @@ |
3638 | |
3639 | void mark_rodata_ro(void) |
3640 | { |
3641 | - unsigned long addr = (unsigned long)__start_rodata; |
3642 | - |
3643 | - for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) |
3644 | - change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); |
3645 | + unsigned long start = PFN_ALIGN(_text); |
3646 | + unsigned long size = PFN_ALIGN(_etext) - start; |
3647 | |
3648 | - printk("Write protecting the kernel read-only data: %uk\n", |
3649 | - (__end_rodata - __start_rodata) >> 10); |
3650 | +#ifndef CONFIG_KPROBES |
3651 | +#ifdef CONFIG_HOTPLUG_CPU |
3652 | + /* It must still be possible to apply SMP alternatives. */ |
3653 | + if (num_possible_cpus() <= 1) |
3654 | +#endif |
3655 | + { |
3656 | + change_page_attr(virt_to_page(start), |
3657 | + size >> PAGE_SHIFT, PAGE_KERNEL_RX); |
3658 | + printk("Write protecting the kernel text: %luk\n", size >> 10); |
3659 | + } |
3660 | +#endif |
3661 | + start += size; |
3662 | + size = (unsigned long)__end_rodata - start; |
3663 | + change_page_attr(virt_to_page(start), |
3664 | + size >> PAGE_SHIFT, PAGE_KERNEL_RO); |
3665 | + printk("Write protecting the kernel read-only data: %luk\n", |
3666 | + size >> 10); |
3667 | |
3668 | /* |
3669 | * change_page_attr() requires a global_flush_tlb() call after it. |
3670 | @@ -847,7 +890,7 @@ |
3671 | free_page(addr); |
3672 | totalram_pages++; |
3673 | } |
3674 | - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); |
3675 | + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
3676 | } |
3677 | |
3678 | void free_initmem(void) |
3679 | --- a/arch/x86/mm/init_64-xen.c |
3680 | +++ b/arch/x86/mm/init_64-xen.c |
3681 | @@ -25,10 +25,12 @@ |
3682 | #include <linux/bootmem.h> |
3683 | #include <linux/proc_fs.h> |
3684 | #include <linux/pci.h> |
3685 | +#include <linux/pfn.h> |
3686 | #include <linux/poison.h> |
3687 | #include <linux/dma-mapping.h> |
3688 | #include <linux/module.h> |
3689 | #include <linux/memory_hotplug.h> |
3690 | +#include <linux/nmi.h> |
3691 | |
3692 | #include <asm/processor.h> |
3693 | #include <asm/system.h> |
3694 | @@ -51,7 +53,7 @@ |
3695 | #define Dprintk(x...) |
3696 | #endif |
3697 | |
3698 | -struct dma_mapping_ops* dma_ops; |
3699 | +const struct dma_mapping_ops* dma_ops; |
3700 | EXPORT_SYMBOL(dma_ops); |
3701 | |
3702 | #if CONFIG_XEN_COMPAT <= 0x030002 |
3703 | @@ -189,6 +191,13 @@ |
3704 | |
3705 | for_each_online_pgdat(pgdat) { |
3706 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
3707 | + /* this loop can take a while with 256 GB and 4k pages |
3708 | + so update the NMI watchdog */ |
3709 | + if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { |
3710 | + touch_nmi_watchdog(); |
3711 | + } |
3712 | + if (!pfn_valid(pgdat->node_start_pfn + i)) |
3713 | + continue; |
3714 | page = pfn_to_page(pgdat->node_start_pfn + i); |
3715 | total++; |
3716 | if (PageReserved(page)) |
3717 | @@ -350,7 +359,7 @@ |
3718 | } |
3719 | } |
3720 | |
3721 | -unsigned long __initdata table_start, table_end; |
3722 | +unsigned long __meminitdata table_start, table_end; |
3723 | |
3724 | static __meminit void *alloc_static_page(unsigned long *phys) |
3725 | { |
3726 | @@ -367,7 +376,7 @@ |
3727 | start_pfn++; |
3728 | memset((void *)va, 0, PAGE_SIZE); |
3729 | return (void *)va; |
3730 | -} |
3731 | +} |
3732 | |
3733 | #define PTE_SIZE PAGE_SIZE |
3734 | |
3735 | @@ -408,28 +417,46 @@ |
3736 | |
3737 | #ifndef CONFIG_XEN |
3738 | /* Must run before zap_low_mappings */ |
3739 | -__init void *early_ioremap(unsigned long addr, unsigned long size) |
3740 | +__meminit void *early_ioremap(unsigned long addr, unsigned long size) |
3741 | { |
3742 | - unsigned long map = round_down(addr, LARGE_PAGE_SIZE); |
3743 | - |
3744 | - /* actually usually some more */ |
3745 | - if (size >= LARGE_PAGE_SIZE) { |
3746 | - return NULL; |
3747 | + unsigned long vaddr; |
3748 | + pmd_t *pmd, *last_pmd; |
3749 | + int i, pmds; |
3750 | + |
3751 | + pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; |
3752 | + vaddr = __START_KERNEL_map; |
3753 | + pmd = level2_kernel_pgt; |
3754 | + last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; |
3755 | + for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { |
3756 | + for (i = 0; i < pmds; i++) { |
3757 | + if (pmd_present(pmd[i])) |
3758 | + goto next; |
3759 | + } |
3760 | + vaddr += addr & ~PMD_MASK; |
3761 | + addr &= PMD_MASK; |
3762 | + for (i = 0; i < pmds; i++, addr += PMD_SIZE) |
3763 | + set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); |
3764 | + __flush_tlb(); |
3765 | + return (void *)vaddr; |
3766 | + next: |
3767 | + ; |
3768 | } |
3769 | - set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); |
3770 | - map += LARGE_PAGE_SIZE; |
3771 | - set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); |
3772 | - __flush_tlb(); |
3773 | - return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); |
3774 | + printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); |
3775 | + return NULL; |
3776 | } |
3777 | |
3778 | /* To avoid virtual aliases later */ |
3779 | -__init void early_iounmap(void *addr, unsigned long size) |
3780 | +__meminit void early_iounmap(void *addr, unsigned long size) |
3781 | { |
3782 | - if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) |
3783 | - printk("early_iounmap: bad address %p\n", addr); |
3784 | - set_pmd(temp_mappings[0].pmd, __pmd(0)); |
3785 | - set_pmd(temp_mappings[1].pmd, __pmd(0)); |
3786 | + unsigned long vaddr; |
3787 | + pmd_t *pmd; |
3788 | + int i, pmds; |
3789 | + |
3790 | + vaddr = (unsigned long)addr; |
3791 | + pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; |
3792 | + pmd = level2_kernel_pgt + pmd_index(vaddr); |
3793 | + for (i = 0; i < pmds; i++) |
3794 | + pmd_clear(pmd + i); |
3795 | __flush_tlb(); |
3796 | } |
3797 | #endif |
3798 | @@ -763,14 +790,6 @@ |
3799 | __flush_tlb_all(); |
3800 | } |
3801 | |
3802 | -void __cpuinit zap_low_mappings(int cpu) |
3803 | -{ |
3804 | - /* this is not required for Xen */ |
3805 | -#if 0 |
3806 | - swap_low_mappings(); |
3807 | -#endif |
3808 | -} |
3809 | - |
3810 | #ifndef CONFIG_NUMA |
3811 | void __init paging_init(void) |
3812 | { |
3813 | @@ -961,17 +980,6 @@ |
3814 | reservedpages << (PAGE_SHIFT-10), |
3815 | datasize >> 10, |
3816 | initsize >> 10); |
3817 | - |
3818 | -#ifndef CONFIG_XEN |
3819 | -#ifdef CONFIG_SMP |
3820 | - /* |
3821 | - * Sync boot_level4_pgt mappings with the init_level4_pgt |
3822 | - * except for the low identity mappings which are already zapped |
3823 | - * in init_level4_pgt. This sync-up is essential for AP's bringup |
3824 | - */ |
3825 | - memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); |
3826 | -#endif |
3827 | -#endif |
3828 | } |
3829 | |
3830 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
3831 | @@ -981,7 +989,7 @@ |
3832 | if (begin >= end) |
3833 | return; |
3834 | |
3835 | - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); |
3836 | + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
3837 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
3838 | ClearPageReserved(virt_to_page(addr)); |
3839 | init_page_count(virt_to_page(addr)); |
3840 | @@ -990,24 +998,17 @@ |
3841 | if (addr >= __START_KERNEL_map) { |
3842 | /* make_readonly() reports all kernel addresses. */ |
3843 | __make_page_writable(__va(__pa(addr))); |
3844 | - if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { |
3845 | - pgd_t *pgd = pgd_offset_k(addr); |
3846 | - pud_t *pud = pud_offset(pgd, addr); |
3847 | - pmd_t *pmd = pmd_offset(pud, addr); |
3848 | - pte_t *pte = pte_offset_kernel(pmd, addr); |
3849 | - |
3850 | - xen_l1_entry_update(pte, __pte(0)); /* fallback */ |
3851 | - } |
3852 | + change_page_attr_addr(addr, 1, __pgprot(0)); |
3853 | } |
3854 | free_page(addr); |
3855 | totalram_pages++; |
3856 | } |
3857 | + if (addr > __START_KERNEL_map) |
3858 | + global_flush_tlb(); |
3859 | } |
3860 | |
3861 | void free_initmem(void) |
3862 | { |
3863 | - memset(__initdata_begin, POISON_FREE_INITDATA, |
3864 | - __initdata_end - __initdata_begin); |
3865 | free_init_pages("unused kernel memory", |
3866 | (unsigned long)(&__init_begin), |
3867 | (unsigned long)(&__init_end)); |
3868 | @@ -1017,13 +1018,28 @@ |
3869 | |
3870 | void mark_rodata_ro(void) |
3871 | { |
3872 | - unsigned long addr = (unsigned long)__start_rodata; |
3873 | + unsigned long start = (unsigned long)_stext, end; |
3874 | + |
3875 | +#ifdef CONFIG_HOTPLUG_CPU |
3876 | + /* It must still be possible to apply SMP alternatives. */ |
3877 | + if (num_possible_cpus() > 1) |
3878 | + start = (unsigned long)_etext; |
3879 | +#endif |
3880 | + |
3881 | +#ifdef CONFIG_KPROBES |
3882 | + start = (unsigned long)__start_rodata; |
3883 | +#endif |
3884 | + |
3885 | + end = (unsigned long)__end_rodata; |
3886 | + start = (start + PAGE_SIZE - 1) & PAGE_MASK; |
3887 | + end &= PAGE_MASK; |
3888 | + if (end <= start) |
3889 | + return; |
3890 | |
3891 | - for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) |
3892 | - change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); |
3893 | + change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO); |
3894 | |
3895 | - printk ("Write protecting the kernel read-only data: %luk\n", |
3896 | - (__end_rodata - __start_rodata) >> 10); |
3897 | + printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
3898 | + (end - start) >> 10); |
3899 | |
3900 | /* |
3901 | * change_page_attr_addr() requires a global_flush_tlb() call after it. |
3902 | @@ -1176,3 +1192,11 @@ |
3903 | { |
3904 | return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); |
3905 | } |
3906 | + |
3907 | +#ifndef CONFIG_XEN |
3908 | +void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) |
3909 | +{ |
3910 | + return __alloc_bootmem_core(pgdat->bdata, size, |
3911 | + SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); |
3912 | +} |
3913 | +#endif |
3914 | --- a/arch/x86/mm/ioremap_32-xen.c |
3915 | +++ b/arch/x86/mm/ioremap_32-xen.c |
3916 | @@ -13,6 +13,7 @@ |
3917 | #include <linux/slab.h> |
3918 | #include <linux/module.h> |
3919 | #include <linux/io.h> |
3920 | +#include <linux/sched.h> |
3921 | #include <asm/fixmap.h> |
3922 | #include <asm/cacheflush.h> |
3923 | #include <asm/tlbflush.h> |
3924 | --- a/arch/x86/mm/pageattr_64-xen.c |
3925 | +++ b/arch/x86/mm/pageattr_64-xen.c |
3926 | @@ -215,13 +215,13 @@ |
3927 | preempt_enable(); |
3928 | } |
3929 | |
3930 | -void _arch_dup_mmap(struct mm_struct *mm) |
3931 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) |
3932 | { |
3933 | if (!mm->context.pinned) |
3934 | mm_pin(mm); |
3935 | } |
3936 | |
3937 | -void _arch_exit_mmap(struct mm_struct *mm) |
3938 | +void arch_exit_mmap(struct mm_struct *mm) |
3939 | { |
3940 | struct task_struct *tsk = current; |
3941 | |
3942 | @@ -337,10 +337,11 @@ |
3943 | struct page *pg; |
3944 | |
3945 | /* When clflush is available always use it because it is |
3946 | - much cheaper than WBINVD */ |
3947 | - if (!cpu_has_clflush) |
3948 | + much cheaper than WBINVD. Disable clflush for now because |
3949 | + the high level code is not ready yet */ |
3950 | + if (1 || !cpu_has_clflush) |
3951 | asm volatile("wbinvd" ::: "memory"); |
3952 | - list_for_each_entry(pg, l, lru) { |
3953 | + else list_for_each_entry(pg, l, lru) { |
3954 | void *adr = page_address(pg); |
3955 | if (cpu_has_clflush) |
3956 | cache_flush_page(adr); |
3957 | @@ -454,16 +455,24 @@ |
3958 | */ |
3959 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) |
3960 | { |
3961 | - int err = 0; |
3962 | + int err = 0, kernel_map = 0; |
3963 | int i; |
3964 | |
3965 | + if (address >= __START_KERNEL_map |
3966 | + && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) { |
3967 | + address = (unsigned long)__va(__pa(address)); |
3968 | + kernel_map = 1; |
3969 | + } |
3970 | + |
3971 | down_write(&init_mm.mmap_sem); |
3972 | for (i = 0; i < numpages; i++, address += PAGE_SIZE) { |
3973 | unsigned long pfn = __pa(address) >> PAGE_SHIFT; |
3974 | |
3975 | - err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); |
3976 | - if (err) |
3977 | - break; |
3978 | + if (!kernel_map || pte_present(pfn_pte(0, prot))) { |
3979 | + err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); |
3980 | + if (err) |
3981 | + break; |
3982 | + } |
3983 | /* Handle kernel mapping too which aliases part of the |
3984 | * lowmem */ |
3985 | if (__pa(address) < KERNEL_TEXT_SIZE) { |
3986 | --- a/arch/x86/mm/pgtable_32-xen.c |
3987 | +++ b/arch/x86/mm/pgtable_32-xen.c |
3988 | @@ -13,6 +13,7 @@ |
3989 | #include <linux/pagemap.h> |
3990 | #include <linux/spinlock.h> |
3991 | #include <linux/module.h> |
3992 | +#include <linux/quicklist.h> |
3993 | |
3994 | #include <asm/system.h> |
3995 | #include <asm/pgtable.h> |
3996 | @@ -212,8 +213,6 @@ |
3997 | * against pageattr.c; it is the unique case in which a valid change |
3998 | * of kernel pagetables can't be lazily synchronized by vmalloc faults. |
3999 | * vmalloc faults work because attached pagetables are never freed. |
4000 | - * The locking scheme was chosen on the basis of manfred's |
4001 | - * recommendations and having no core impact whatsoever. |
4002 | * -- wli |
4003 | */ |
4004 | DEFINE_SPINLOCK(pgd_lock); |
4005 | @@ -239,37 +238,59 @@ |
4006 | set_page_private(next, (unsigned long)pprev); |
4007 | } |
4008 | |
4009 | -void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
4010 | + |
4011 | + |
4012 | +#if (PTRS_PER_PMD == 1) |
4013 | +/* Non-PAE pgd constructor */ |
4014 | +void pgd_ctor(void *pgd) |
4015 | { |
4016 | unsigned long flags; |
4017 | |
4018 | - if (PTRS_PER_PMD > 1) { |
4019 | - if (HAVE_SHARED_KERNEL_PMD) |
4020 | - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, |
4021 | - swapper_pg_dir + USER_PTRS_PER_PGD, |
4022 | - KERNEL_PGD_PTRS); |
4023 | - } else { |
4024 | - spin_lock_irqsave(&pgd_lock, flags); |
4025 | + /* !PAE, no pagetable sharing */ |
4026 | + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); |
4027 | + |
4028 | + spin_lock_irqsave(&pgd_lock, flags); |
4029 | + |
4030 | + /* must happen under lock */ |
4031 | + clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, |
4032 | + swapper_pg_dir + USER_PTRS_PER_PGD, |
4033 | + KERNEL_PGD_PTRS); |
4034 | + |
4035 | + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, |
4036 | + __pa(swapper_pg_dir) >> PAGE_SHIFT, |
4037 | + USER_PTRS_PER_PGD, |
4038 | + KERNEL_PGD_PTRS); |
4039 | + pgd_list_add(pgd); |
4040 | + spin_unlock_irqrestore(&pgd_lock, flags); |
4041 | +} |
4042 | +#else /* PTRS_PER_PMD > 1 */ |
4043 | +/* PAE pgd constructor */ |
4044 | +void pgd_ctor(void *pgd) |
4045 | +{ |
4046 | + /* PAE, kernel PMD may be shared */ |
4047 | + |
4048 | + if (SHARED_KERNEL_PMD) { |
4049 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, |
4050 | swapper_pg_dir + USER_PTRS_PER_PGD, |
4051 | KERNEL_PGD_PTRS); |
4052 | - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); |
4053 | - |
4054 | - /* must happen under lock */ |
4055 | - paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, |
4056 | - __pa(swapper_pg_dir) >> PAGE_SHIFT, |
4057 | - USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); |
4058 | + } else { |
4059 | + unsigned long flags; |
4060 | |
4061 | + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); |
4062 | + spin_lock_irqsave(&pgd_lock, flags); |
4063 | pgd_list_add(pgd); |
4064 | spin_unlock_irqrestore(&pgd_lock, flags); |
4065 | } |
4066 | } |
4067 | +#endif /* PTRS_PER_PMD */ |
4068 | |
4069 | -/* never called when PTRS_PER_PMD > 1 */ |
4070 | -void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
4071 | +void pgd_dtor(void *pgd) |
4072 | { |
4073 | unsigned long flags; /* can be called from interrupt context */ |
4074 | |
4075 | + if (SHARED_KERNEL_PMD) |
4076 | + return; |
4077 | + |
4078 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); |
4079 | spin_lock_irqsave(&pgd_lock, flags); |
4080 | pgd_list_del(pgd); |
4081 | @@ -278,11 +299,46 @@ |
4082 | pgd_test_and_unpin(pgd); |
4083 | } |
4084 | |
4085 | +#define UNSHARED_PTRS_PER_PGD \ |
4086 | + (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) |
4087 | + |
4088 | +/* If we allocate a pmd for part of the kernel address space, then |
4089 | + make sure its initialized with the appropriate kernel mappings. |
4090 | + Otherwise use a cached zeroed pmd. */ |
4091 | +static pmd_t *pmd_cache_alloc(int idx) |
4092 | +{ |
4093 | + pmd_t *pmd; |
4094 | + |
4095 | + if (idx >= USER_PTRS_PER_PGD) { |
4096 | + pmd = (pmd_t *)__get_free_page(GFP_KERNEL); |
4097 | + |
4098 | +#ifndef CONFIG_XEN |
4099 | + if (pmd) |
4100 | + memcpy(pmd, |
4101 | + (void *)pgd_page_vaddr(swapper_pg_dir[idx]), |
4102 | + sizeof(pmd_t) * PTRS_PER_PMD); |
4103 | +#endif |
4104 | + } else |
4105 | + pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); |
4106 | + |
4107 | + return pmd; |
4108 | +} |
4109 | + |
4110 | +static void pmd_cache_free(pmd_t *pmd, int idx) |
4111 | +{ |
4112 | + if (idx >= USER_PTRS_PER_PGD) { |
4113 | + make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables); |
4114 | + memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); |
4115 | + free_page((unsigned long)pmd); |
4116 | + } else |
4117 | + kmem_cache_free(pmd_cache, pmd); |
4118 | +} |
4119 | + |
4120 | pgd_t *pgd_alloc(struct mm_struct *mm) |
4121 | { |
4122 | int i; |
4123 | - pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); |
4124 | - pmd_t **pmd; |
4125 | + pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); |
4126 | + pmd_t **pmds = NULL; |
4127 | unsigned long flags; |
4128 | |
4129 | pgd_test_and_unpin(pgd); |
4130 | @@ -290,37 +346,40 @@ |
4131 | if (PTRS_PER_PMD == 1 || !pgd) |
4132 | return pgd; |
4133 | |
4134 | - if (HAVE_SHARED_KERNEL_PMD) { |
4135 | - for (i = 0; i < USER_PTRS_PER_PGD; ++i) { |
4136 | - pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); |
4137 | - if (!pmd) |
4138 | - goto out_oom; |
4139 | - paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); |
4140 | - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); |
4141 | +#ifdef CONFIG_XEN |
4142 | + if (!SHARED_KERNEL_PMD) { |
4143 | + /* |
4144 | + * We can race save/restore (if we sleep during a GFP_KERNEL memory |
4145 | + * allocation). We therefore store virtual addresses of pmds as they |
4146 | + * do not change across save/restore, and poke the machine addresses |
4147 | + * into the pgdir under the pgd_lock. |
4148 | + */ |
4149 | + pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); |
4150 | + if (!pmds) { |
4151 | + quicklist_free(0, pgd_dtor, pgd); |
4152 | + return NULL; |
4153 | } |
4154 | - return pgd; |
4155 | - } |
4156 | - |
4157 | - /* |
4158 | - * We can race save/restore (if we sleep during a GFP_KERNEL memory |
4159 | - * allocation). We therefore store virtual addresses of pmds as they |
4160 | - * do not change across save/restore, and poke the machine addresses |
4161 | - * into the pgdir under the pgd_lock. |
4162 | - */ |
4163 | - pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); |
4164 | - if (!pmd) { |
4165 | - kmem_cache_free(pgd_cache, pgd); |
4166 | - return NULL; |
4167 | } |
4168 | +#endif |
4169 | |
4170 | /* Allocate pmds, remember virtual addresses. */ |
4171 | - for (i = 0; i < PTRS_PER_PGD; ++i) { |
4172 | - pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); |
4173 | - if (!pmd[i]) |
4174 | + for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { |
4175 | + pmd_t *pmd = pmd_cache_alloc(i); |
4176 | + |
4177 | + if (!pmd) |
4178 | goto out_oom; |
4179 | + |
4180 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); |
4181 | + if (pmds) |
4182 | + pmds[i] = pmd; |
4183 | + else |
4184 | + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); |
4185 | } |
4186 | |
4187 | +#ifdef CONFIG_XEN |
4188 | + if (SHARED_KERNEL_PMD) |
4189 | + return pgd; |
4190 | + |
4191 | spin_lock_irqsave(&pgd_lock, flags); |
4192 | |
4193 | /* Protect against save/restore: move below 4GB under pgd_lock. */ |
4194 | @@ -335,44 +394,40 @@ |
4195 | |
4196 | /* Copy kernel pmd contents and write-protect the new pmds. */ |
4197 | for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { |
4198 | - unsigned long v = (unsigned long)i << PGDIR_SHIFT; |
4199 | - pgd_t *kpgd = pgd_offset_k(v); |
4200 | - pud_t *kpud = pud_offset(kpgd, v); |
4201 | - pmd_t *kpmd = pmd_offset(kpud, v); |
4202 | - memcpy(pmd[i], kpmd, PAGE_SIZE); |
4203 | + memcpy(pmds[i], |
4204 | + (void *)pgd_page_vaddr(swapper_pg_dir[i]), |
4205 | + sizeof(pmd_t) * PTRS_PER_PMD); |
4206 | make_lowmem_page_readonly( |
4207 | - pmd[i], XENFEAT_writable_page_tables); |
4208 | + pmds[i], XENFEAT_writable_page_tables); |
4209 | } |
4210 | |
4211 | /* It is safe to poke machine addresses of pmds under the pmd_lock. */ |
4212 | for (i = 0; i < PTRS_PER_PGD; i++) |
4213 | - set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i]))); |
4214 | - |
4215 | - /* Ensure this pgd gets picked up and pinned on save/restore. */ |
4216 | - pgd_list_add(pgd); |
4217 | + set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i]))); |
4218 | |
4219 | spin_unlock_irqrestore(&pgd_lock, flags); |
4220 | |
4221 | - kfree(pmd); |
4222 | + kfree(pmds); |
4223 | +#endif |
4224 | |
4225 | return pgd; |
4226 | |
4227 | out_oom: |
4228 | - if (HAVE_SHARED_KERNEL_PMD) { |
4229 | + if (!pmds) { |
4230 | for (i--; i >= 0; i--) { |
4231 | pgd_t pgdent = pgd[i]; |
4232 | void* pmd = (void *)__va(pgd_val(pgdent)-1); |
4233 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
4234 | - kmem_cache_free(pmd_cache, pmd); |
4235 | + pmd_cache_free(pmd, i); |
4236 | } |
4237 | } else { |
4238 | for (i--; i >= 0; i--) { |
4239 | - paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT); |
4240 | - kmem_cache_free(pmd_cache, pmd[i]); |
4241 | + paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT); |
4242 | + pmd_cache_free(pmds[i], i); |
4243 | } |
4244 | - kfree(pmd); |
4245 | + kfree(pmds); |
4246 | } |
4247 | - kmem_cache_free(pgd_cache, pgd); |
4248 | + quicklist_free(0, pgd_dtor, pgd); |
4249 | return NULL; |
4250 | } |
4251 | |
4252 | @@ -392,35 +447,24 @@ |
4253 | |
4254 | /* in the PAE case user pgd entries are overwritten before usage */ |
4255 | if (PTRS_PER_PMD > 1) { |
4256 | - for (i = 0; i < USER_PTRS_PER_PGD; ++i) { |
4257 | + for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { |
4258 | pgd_t pgdent = pgd[i]; |
4259 | void* pmd = (void *)__va(pgd_val(pgdent)-1); |
4260 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
4261 | - kmem_cache_free(pmd_cache, pmd); |
4262 | + pmd_cache_free(pmd, i); |
4263 | } |
4264 | |
4265 | - if (!HAVE_SHARED_KERNEL_PMD) { |
4266 | - unsigned long flags; |
4267 | - spin_lock_irqsave(&pgd_lock, flags); |
4268 | - pgd_list_del(pgd); |
4269 | - spin_unlock_irqrestore(&pgd_lock, flags); |
4270 | - |
4271 | - for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { |
4272 | - pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); |
4273 | - make_lowmem_page_writable( |
4274 | - pmd, XENFEAT_writable_page_tables); |
4275 | - memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); |
4276 | - kmem_cache_free(pmd_cache, pmd); |
4277 | - } |
4278 | - |
4279 | - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) |
4280 | - xen_destroy_contiguous_region( |
4281 | - (unsigned long)pgd, 0); |
4282 | - } |
4283 | + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) |
4284 | + xen_destroy_contiguous_region((unsigned long)pgd, 0); |
4285 | } |
4286 | |
4287 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ |
4288 | - kmem_cache_free(pgd_cache, pgd); |
4289 | + quicklist_free(0, pgd_dtor, pgd); |
4290 | +} |
4291 | + |
4292 | +void check_pgt_cache(void) |
4293 | +{ |
4294 | + quicklist_trim(0, pgd_dtor, 25, 16); |
4295 | } |
4296 | |
4297 | void make_lowmem_page_readonly(void *va, unsigned int feature) |
4298 | @@ -717,13 +761,13 @@ |
4299 | spin_unlock_irqrestore(&pgd_lock, flags); |
4300 | } |
4301 | |
4302 | -void _arch_dup_mmap(struct mm_struct *mm) |
4303 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) |
4304 | { |
4305 | if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags)) |
4306 | mm_pin(mm); |
4307 | } |
4308 | |
4309 | -void _arch_exit_mmap(struct mm_struct *mm) |
4310 | +void arch_exit_mmap(struct mm_struct *mm) |
4311 | { |
4312 | struct task_struct *tsk = current; |
4313 | |
4314 | --- a/drivers/char/tpm/tpm_xen.c |
4315 | +++ b/drivers/char/tpm/tpm_xen.c |
4316 | @@ -463,7 +463,7 @@ |
4317 | tp->backend_id = domid; |
4318 | |
4319 | err = bind_listening_port_to_irqhandler( |
4320 | - domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); |
4321 | + domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp); |
4322 | if (err <= 0) { |
4323 | WPRINTK("bind_listening_port_to_irqhandler failed " |
4324 | "(err=%d)\n", err); |
4325 | --- a/drivers/xen/blkfront/blkfront.c |
4326 | +++ b/drivers/xen/blkfront/blkfront.c |
4327 | @@ -236,7 +236,7 @@ |
4328 | info->ring_ref = err; |
4329 | |
4330 | err = bind_listening_port_to_irqhandler( |
4331 | - dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); |
4332 | + dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info); |
4333 | if (err <= 0) { |
4334 | xenbus_dev_fatal(dev, err, |
4335 | "bind_listening_port_to_irqhandler"); |
4336 | --- a/drivers/xen/char/mem.c |
4337 | +++ b/drivers/xen/char/mem.c |
4338 | @@ -18,7 +18,6 @@ |
4339 | #include <linux/raw.h> |
4340 | #include <linux/tty.h> |
4341 | #include <linux/capability.h> |
4342 | -#include <linux/smp_lock.h> |
4343 | #include <linux/ptrace.h> |
4344 | #include <linux/device.h> |
4345 | #include <asm/pgalloc.h> |
4346 | --- a/drivers/xen/core/hypervisor_sysfs.c |
4347 | +++ b/drivers/xen/core/hypervisor_sysfs.c |
4348 | @@ -50,7 +50,7 @@ |
4349 | if (!is_running_on_xen()) |
4350 | return -ENODEV; |
4351 | |
4352 | - hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type; |
4353 | + hypervisor_subsys.kobj.ktype = &hyp_sysfs_kobj_type; |
4354 | return 0; |
4355 | } |
4356 | |
4357 | --- a/drivers/xen/core/smpboot.c |
4358 | +++ b/drivers/xen/core/smpboot.c |
4359 | @@ -121,7 +121,7 @@ |
4360 | rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, |
4361 | cpu, |
4362 | smp_reschedule_interrupt, |
4363 | - SA_INTERRUPT, |
4364 | + IRQF_DISABLED, |
4365 | resched_name[cpu], |
4366 | NULL); |
4367 | if (rc < 0) |
4368 | @@ -132,7 +132,7 @@ |
4369 | rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, |
4370 | cpu, |
4371 | smp_call_function_interrupt, |
4372 | - SA_INTERRUPT, |
4373 | + IRQF_DISABLED, |
4374 | callfunc_name[cpu], |
4375 | NULL); |
4376 | if (rc < 0) |
4377 | @@ -165,13 +165,12 @@ |
4378 | |
4379 | void __cpuinit cpu_bringup(void) |
4380 | { |
4381 | + cpu_init(); |
4382 | #ifdef __i386__ |
4383 | - cpu_set_gdt(current_thread_info()->cpu); |
4384 | - secondary_cpu_init(); |
4385 | + identify_secondary_cpu(cpu_data + smp_processor_id()); |
4386 | #else |
4387 | - cpu_init(); |
4388 | -#endif |
4389 | identify_cpu(cpu_data + smp_processor_id()); |
4390 | +#endif |
4391 | touch_softlockup_watchdog(); |
4392 | preempt_disable(); |
4393 | local_irq_enable(); |
4394 | @@ -191,11 +190,6 @@ |
4395 | static DEFINE_SPINLOCK(ctxt_lock); |
4396 | |
4397 | struct task_struct *idle = idle_task(cpu); |
4398 | -#ifdef __x86_64__ |
4399 | - struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu]; |
4400 | -#else |
4401 | - struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
4402 | -#endif |
4403 | |
4404 | if (cpu_test_and_set(cpu, cpu_initialized_map)) |
4405 | return; |
4406 | @@ -218,11 +212,11 @@ |
4407 | smp_trap_init(ctxt.trap_ctxt); |
4408 | |
4409 | ctxt.ldt_ents = 0; |
4410 | - |
4411 | - ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address); |
4412 | - ctxt.gdt_ents = gdt_descr->size / 8; |
4413 | + ctxt.gdt_ents = GDT_SIZE / 8; |
4414 | |
4415 | #ifdef __i386__ |
4416 | + ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu)); |
4417 | + |
4418 | ctxt.user_regs.cs = __KERNEL_CS; |
4419 | ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); |
4420 | |
4421 | @@ -235,7 +229,11 @@ |
4422 | ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; |
4423 | |
4424 | ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); |
4425 | + |
4426 | + ctxt.user_regs.fs = __KERNEL_PERCPU; |
4427 | #else /* __x86_64__ */ |
4428 | + ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address); |
4429 | + |
4430 | ctxt.user_regs.cs = __KERNEL_CS; |
4431 | ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); |
4432 | |
4433 | @@ -265,9 +263,8 @@ |
4434 | struct vcpu_get_physid cpu_id; |
4435 | #ifdef __x86_64__ |
4436 | struct desc_ptr *gdt_descr; |
4437 | -#else |
4438 | - struct Xgt_desc_struct *gdt_descr; |
4439 | #endif |
4440 | + void *gdt_addr; |
4441 | |
4442 | apicid = 0; |
4443 | if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) |
4444 | @@ -317,14 +314,12 @@ |
4445 | } |
4446 | gdt_descr->size = GDT_SIZE; |
4447 | memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); |
4448 | + gdt_addr = (void *)gdt_descr->address; |
4449 | #else |
4450 | - if (unlikely(!init_gdt(cpu, idle))) |
4451 | - continue; |
4452 | - gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
4453 | + init_gdt(cpu); |
4454 | + gdt_addr = get_cpu_gdt_table(cpu); |
4455 | #endif |
4456 | - make_page_readonly( |
4457 | - (void *)gdt_descr->address, |
4458 | - XENFEAT_writable_descriptor_tables); |
4459 | + make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables); |
4460 | |
4461 | apicid = cpu; |
4462 | if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) |
4463 | @@ -338,7 +333,9 @@ |
4464 | #ifdef __x86_64__ |
4465 | cpu_pda(cpu)->pcurrent = idle; |
4466 | cpu_pda(cpu)->cpunumber = cpu; |
4467 | - clear_ti_thread_flag(idle->thread_info, TIF_FORK); |
4468 | + clear_ti_thread_flag(task_thread_info(idle), TIF_FORK); |
4469 | +#else |
4470 | + per_cpu(current_task, cpu) = idle; |
4471 | #endif |
4472 | |
4473 | irq_ctx_init(cpu); |
4474 | @@ -363,8 +360,12 @@ |
4475 | #endif |
4476 | } |
4477 | |
4478 | -void __devinit smp_prepare_boot_cpu(void) |
4479 | +void __init smp_prepare_boot_cpu(void) |
4480 | { |
4481 | +#ifdef __i386__ |
4482 | + init_gdt(smp_processor_id()); |
4483 | + switch_to_new_gdt(); |
4484 | +#endif |
4485 | prefill_possible_map(); |
4486 | } |
4487 | |
4488 | --- a/drivers/xen/core/xen_sysfs.c |
4489 | +++ b/drivers/xen/core/xen_sysfs.c |
4490 | @@ -28,12 +28,12 @@ |
4491 | |
4492 | static int __init xen_sysfs_type_init(void) |
4493 | { |
4494 | - return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); |
4495 | + return sysfs_create_file(&hypervisor_subsys.kobj, &type_attr.attr); |
4496 | } |
4497 | |
4498 | static void xen_sysfs_type_destroy(void) |
4499 | { |
4500 | - sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); |
4501 | + sysfs_remove_file(&hypervisor_subsys.kobj, &type_attr.attr); |
4502 | } |
4503 | |
4504 | /* xen version attributes */ |
4505 | @@ -89,13 +89,13 @@ |
4506 | |
4507 | static int __init xen_sysfs_version_init(void) |
4508 | { |
4509 | - return sysfs_create_group(&hypervisor_subsys.kset.kobj, |
4510 | + return sysfs_create_group(&hypervisor_subsys.kobj, |
4511 | &version_group); |
4512 | } |
4513 | |
4514 | static void xen_sysfs_version_destroy(void) |
4515 | { |
4516 | - sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group); |
4517 | + sysfs_remove_group(&hypervisor_subsys.kobj, &version_group); |
4518 | } |
4519 | |
4520 | /* UUID */ |
4521 | @@ -125,12 +125,12 @@ |
4522 | |
4523 | static int __init xen_sysfs_uuid_init(void) |
4524 | { |
4525 | - return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); |
4526 | + return sysfs_create_file(&hypervisor_subsys.kobj, &uuid_attr.attr); |
4527 | } |
4528 | |
4529 | static void xen_sysfs_uuid_destroy(void) |
4530 | { |
4531 | - sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); |
4532 | + sysfs_remove_file(&hypervisor_subsys.kobj, &uuid_attr.attr); |
4533 | } |
4534 | |
4535 | /* xen compilation attributes */ |
4536 | @@ -203,13 +203,13 @@ |
4537 | |
4538 | int __init static xen_compilation_init(void) |
4539 | { |
4540 | - return sysfs_create_group(&hypervisor_subsys.kset.kobj, |
4541 | + return sysfs_create_group(&hypervisor_subsys.kobj, |
4542 | &xen_compilation_group); |
4543 | } |
4544 | |
4545 | static void xen_compilation_destroy(void) |
4546 | { |
4547 | - sysfs_remove_group(&hypervisor_subsys.kset.kobj, |
4548 | + sysfs_remove_group(&hypervisor_subsys.kobj, |
4549 | &xen_compilation_group); |
4550 | } |
4551 | |
4552 | @@ -324,13 +324,13 @@ |
4553 | |
4554 | static int __init xen_properties_init(void) |
4555 | { |
4556 | - return sysfs_create_group(&hypervisor_subsys.kset.kobj, |
4557 | + return sysfs_create_group(&hypervisor_subsys.kobj, |
4558 | &xen_properties_group); |
4559 | } |
4560 | |
4561 | static void xen_properties_destroy(void) |
4562 | { |
4563 | - sysfs_remove_group(&hypervisor_subsys.kset.kobj, |
4564 | + sysfs_remove_group(&hypervisor_subsys.kobj, |
4565 | &xen_properties_group); |
4566 | } |
4567 | |
4568 | --- a/drivers/xen/netback/netback.c |
4569 | +++ b/drivers/xen/netback/netback.c |
4570 | @@ -180,7 +180,7 @@ |
4571 | goto err; |
4572 | |
4573 | skb_reserve(nskb, 16 + NET_IP_ALIGN); |
4574 | - headlen = nskb->end - nskb->data; |
4575 | + headlen = skb_end_pointer(nskb) - nskb->data; |
4576 | if (headlen > skb_headlen(skb)) |
4577 | headlen = skb_headlen(skb); |
4578 | ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); |
4579 | @@ -226,11 +226,15 @@ |
4580 | len -= copy; |
4581 | } |
4582 | |
4583 | +#ifdef NET_SKBUFF_DATA_USES_OFFSET |
4584 | + offset = 0; |
4585 | +#else |
4586 | offset = nskb->data - skb->data; |
4587 | +#endif |
4588 | |
4589 | - nskb->h.raw = skb->h.raw + offset; |
4590 | - nskb->nh.raw = skb->nh.raw + offset; |
4591 | - nskb->mac.raw = skb->mac.raw + offset; |
4592 | + nskb->transport_header = skb->transport_header + offset; |
4593 | + nskb->network_header = skb->network_header + offset; |
4594 | + nskb->mac_header = skb->mac_header + offset; |
4595 | |
4596 | return nskb; |
4597 | |
4598 | @@ -1601,7 +1605,7 @@ |
4599 | (void)bind_virq_to_irqhandler(VIRQ_DEBUG, |
4600 | 0, |
4601 | netif_be_dbg, |
4602 | - SA_SHIRQ, |
4603 | + IRQF_SHARED, |
4604 | "net-be-dbg", |
4605 | &netif_be_dbg); |
4606 | #endif |
4607 | --- a/drivers/xen/netfront/netfront.c |
4608 | +++ b/drivers/xen/netfront/netfront.c |
4609 | @@ -513,7 +513,7 @@ |
4610 | memcpy(netdev->dev_addr, info->mac, ETH_ALEN); |
4611 | |
4612 | err = bind_listening_port_to_irqhandler( |
4613 | - dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name, |
4614 | + dev->otherend_id, netif_int, IRQF_SAMPLE_RANDOM, netdev->name, |
4615 | netdev); |
4616 | if (err < 0) |
4617 | goto fail; |
4618 | --- a/drivers/xen/pciback/xenbus.c |
4619 | +++ b/drivers/xen/pciback/xenbus.c |
4620 | @@ -86,7 +86,7 @@ |
4621 | |
4622 | err = bind_interdomain_evtchn_to_irqhandler( |
4623 | pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, |
4624 | - SA_SAMPLE_RANDOM, "pciback", pdev); |
4625 | + IRQF_SAMPLE_RANDOM, "pciback", pdev); |
4626 | if (err < 0) { |
4627 | xenbus_dev_fatal(pdev->xdev, err, |
4628 | "Error binding event channel to IRQ"); |
4629 | --- a/drivers/xen/pcifront/xenbus.c |
4630 | +++ b/drivers/xen/pcifront/xenbus.c |
4631 | @@ -10,10 +10,6 @@ |
4632 | #include <xen/gnttab.h> |
4633 | #include "pcifront.h" |
4634 | |
4635 | -#ifndef __init_refok |
4636 | -#define __init_refok |
4637 | -#endif |
4638 | - |
4639 | #define INVALID_GRANT_REF (0) |
4640 | #define INVALID_EVTCHN (-1) |
4641 | |
4642 | --- a/drivers/xen/sfc_netback/accel_fwd.c |
4643 | +++ b/drivers/xen/sfc_netback/accel_fwd.c |
4644 | @@ -308,7 +308,7 @@ |
4645 | static inline int packet_is_arp_reply(struct sk_buff *skb) |
4646 | { |
4647 | return skb->protocol == ntohs(ETH_P_ARP) |
4648 | - && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY); |
4649 | + && arp_hdr(skb)->ar_op == ntohs(ARPOP_REPLY); |
4650 | } |
4651 | |
4652 | |
4653 | @@ -392,12 +392,13 @@ |
4654 | |
4655 | BUG_ON(fwd_priv == NULL); |
4656 | |
4657 | - if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) { |
4658 | + if (is_broadcast_ether_addr(skb_mac_header(skb)) |
4659 | + && packet_is_arp_reply(skb)) { |
4660 | /* |
4661 | * update our fast path forwarding to reflect this |
4662 | * gratuitous ARP |
4663 | */ |
4664 | - mac = skb->mac.raw+ETH_ALEN; |
4665 | + mac = skb_mac_header(skb)+ETH_ALEN; |
4666 | |
4667 | DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n", |
4668 | __FUNCTION__, MAC_ARG(mac)); |
4669 | --- a/drivers/xen/sfc_netback/accel_solarflare.c |
4670 | +++ b/drivers/xen/sfc_netback/accel_solarflare.c |
4671 | @@ -114,7 +114,7 @@ |
4672 | BUG_ON(port == NULL); |
4673 | |
4674 | NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++); |
4675 | - if (skb->mac.raw != NULL) |
4676 | + if (skb_mac_header_was_set(skb)) |
4677 | netback_accel_tx_packet(skb, port->fwd_priv); |
4678 | else { |
4679 | DPRINTK("Ignoring packet with missing mac address\n"); |
4680 | --- a/drivers/xen/sfc_netfront/accel_tso.c |
4681 | +++ b/drivers/xen/sfc_netfront/accel_tso.c |
4682 | @@ -33,10 +33,9 @@ |
4683 | |
4684 | #include "accel_tso.h" |
4685 | |
4686 | -#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2)) |
4687 | -#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data) |
4688 | -#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data) |
4689 | -#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data) |
4690 | +#define ETH_HDR_LEN(skb) skb_network_offset(skb) |
4691 | +#define SKB_TCP_OFF(skb) skb_transport_offset(skb) |
4692 | +#define SKB_IP_OFF(skb) skb_network_offset(skb) |
4693 | |
4694 | /* |
4695 | * Set a maximum number of buffers in each output packet to make life |
4696 | @@ -114,9 +113,8 @@ |
4697 | static inline void tso_check_safe(struct sk_buff *skb) { |
4698 | EPRINTK_ON(skb->protocol != htons (ETH_P_IP)); |
4699 | EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP)); |
4700 | - EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP); |
4701 | - EPRINTK_ON((SKB_TCP_OFF(skb) |
4702 | - + (skb->h.th->doff << 2u)) > skb_headlen(skb)); |
4703 | + EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP); |
4704 | + EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb)); |
4705 | } |
4706 | |
4707 | |
4708 | @@ -129,17 +127,17 @@ |
4709 | * All ethernet/IP/TCP headers combined size is TCP header size |
4710 | * plus offset of TCP header relative to start of packet. |
4711 | */ |
4712 | - st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb); |
4713 | + st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb); |
4714 | st->p.full_packet_size = (st->p.header_length |
4715 | + skb_shinfo(skb)->gso_size); |
4716 | st->p.gso_size = skb_shinfo(skb)->gso_size; |
4717 | |
4718 | - st->p.ip_id = htons(skb->nh.iph->id); |
4719 | - st->seqnum = ntohl(skb->h.th->seq); |
4720 | + st->p.ip_id = htons(ip_hdr(skb)->id); |
4721 | + st->seqnum = ntohl(tcp_hdr(skb)->seq); |
4722 | |
4723 | - EPRINTK_ON(skb->h.th->urg); |
4724 | - EPRINTK_ON(skb->h.th->syn); |
4725 | - EPRINTK_ON(skb->h.th->rst); |
4726 | + EPRINTK_ON(tcp_hdr(skb)->urg); |
4727 | + EPRINTK_ON(tcp_hdr(skb)->syn); |
4728 | + EPRINTK_ON(tcp_hdr(skb)->rst); |
4729 | |
4730 | st->remaining_len = skb->len - st->p.header_length; |
4731 | |
4732 | @@ -258,8 +256,8 @@ |
4733 | /* This packet will be the last in the TSO burst. */ |
4734 | ip_length = (st->p.header_length - ETH_HDR_LEN(skb) |
4735 | + st->remaining_len); |
4736 | - tsoh_th->fin = skb->h.th->fin; |
4737 | - tsoh_th->psh = skb->h.th->psh; |
4738 | + tsoh_th->fin = tcp_hdr(skb)->fin; |
4739 | + tsoh_th->psh = tcp_hdr(skb)->psh; |
4740 | } |
4741 | |
4742 | tsoh_iph->tot_len = htons(ip_length); |
4743 | --- a/drivers/xen/sfc_netfront/accel_vi.c |
4744 | +++ b/drivers/xen/sfc_netfront/accel_vi.c |
4745 | @@ -463,7 +463,7 @@ |
4746 | |
4747 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
4748 | /* Set to zero to encourage falcon to work it out for us */ |
4749 | - *(u16*)(skb->h.raw + skb->csum_offset) = 0; |
4750 | + *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0; |
4751 | } |
4752 | |
4753 | if (multi_post_start_new_buffer(vnic, &state)) { |
4754 | @@ -582,7 +582,7 @@ |
4755 | |
4756 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
4757 | /* Set to zero to encourage falcon to work it out for us */ |
4758 | - *(u16*)(skb->h.raw + skb->csum_offset) = 0; |
4759 | + *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0; |
4760 | } |
4761 | NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT |
4762 | (skb, idx, frag_data, frag_len, { |
4763 | --- a/drivers/xen/sfc_netfront/accel_xenbus.c |
4764 | +++ b/drivers/xen/sfc_netfront/accel_xenbus.c |
4765 | @@ -356,7 +356,7 @@ |
4766 | /* Create xenbus msg event channel */ |
4767 | err = bind_listening_port_to_irqhandler |
4768 | (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend, |
4769 | - SA_SAMPLE_RANDOM, "vnicctrl", vnic); |
4770 | + IRQF_SAMPLE_RANDOM, "vnicctrl", vnic); |
4771 | if (err < 0) { |
4772 | EPRINTK("Couldn't bind msg event channel\n"); |
4773 | goto fail_msg_irq; |
4774 | @@ -367,7 +367,7 @@ |
4775 | /* Create xenbus net event channel */ |
4776 | err = bind_listening_port_to_irqhandler |
4777 | (dev->otherend_id, netfront_accel_net_channel_irq_from_bend, |
4778 | - SA_SAMPLE_RANDOM, "vnicfront", vnic); |
4779 | + IRQF_SAMPLE_RANDOM, "vnicfront", vnic); |
4780 | if (err < 0) { |
4781 | EPRINTK("Couldn't bind net event channel\n"); |
4782 | goto fail_net_irq; |
4783 | --- a/drivers/xen/xenoprof/xenoprofile.c |
4784 | +++ b/drivers/xen/xenoprof/xenoprofile.c |
4785 | @@ -236,7 +236,7 @@ |
4786 | result = bind_virq_to_irqhandler(VIRQ_XENOPROF, |
4787 | i, |
4788 | xenoprof_ovf_interrupt, |
4789 | - SA_INTERRUPT, |
4790 | + IRQF_DISABLED, |
4791 | "xenoprof", |
4792 | NULL); |
4793 | |
4794 | --- a/fs/aio.c |
4795 | +++ b/fs/aio.c |
4796 | @@ -38,7 +38,7 @@ |
4797 | |
4798 | #ifdef CONFIG_EPOLL |
4799 | #include <linux/poll.h> |
4800 | -#include <linux/eventpoll.h> |
4801 | +#include <linux/anon_inodes.h> |
4802 | #endif |
4803 | |
4804 | #if DEBUG > 1 |
4805 | @@ -1308,7 +1308,7 @@ |
4806 | |
4807 | /* make_aio_fd: |
4808 | * Create a file descriptor that can be used to poll the event queue. |
4809 | - * Based and piggybacked on the excellent epoll code. |
4810 | + * Based on the excellent epoll code. |
4811 | */ |
4812 | |
4813 | static int make_aio_fd(struct kioctx *ioctx) |
4814 | @@ -1317,7 +1317,8 @@ |
4815 | struct inode *inode; |
4816 | struct file *file; |
4817 | |
4818 | - error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); |
4819 | + error = anon_inode_getfd(&fd, &inode, &file, "[aioq]", |
4820 | + &aioq_fops, ioctx); |
4821 | if (error) |
4822 | return error; |
4823 | |
4824 | --- a/include/asm-x86/mach-xen/asm/desc_32.h |
4825 | +++ b/include/asm-x86/mach-xen/asm/desc_32.h |
4826 | @@ -11,23 +11,24 @@ |
4827 | |
4828 | #include <asm/mmu.h> |
4829 | |
4830 | -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; |
4831 | - |
4832 | struct Xgt_desc_struct { |
4833 | unsigned short size; |
4834 | unsigned long address __attribute__((packed)); |
4835 | unsigned short pad; |
4836 | } __attribute__ ((packed)); |
4837 | |
4838 | -extern struct Xgt_desc_struct idt_descr; |
4839 | -DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); |
4840 | -extern struct Xgt_desc_struct early_gdt_descr; |
4841 | +struct gdt_page |
4842 | +{ |
4843 | + struct desc_struct gdt[GDT_ENTRIES]; |
4844 | +} __attribute__((aligned(PAGE_SIZE))); |
4845 | +DECLARE_PER_CPU(struct gdt_page, gdt_page); |
4846 | |
4847 | static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) |
4848 | { |
4849 | - return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; |
4850 | + return per_cpu(gdt_page, cpu).gdt; |
4851 | } |
4852 | |
4853 | +extern struct Xgt_desc_struct idt_descr; |
4854 | extern struct desc_struct idt_table[]; |
4855 | extern void set_intr_gate(unsigned int irq, void * addr); |
4856 | |
4857 | @@ -55,53 +56,32 @@ |
4858 | #define DESCTYPE_S 0x10 /* !system */ |
4859 | |
4860 | #ifndef CONFIG_XEN |
4861 | -#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) |
4862 | - |
4863 | -#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) |
4864 | -#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) |
4865 | +#define load_TR_desc() native_load_tr_desc() |
4866 | +#define load_gdt(dtr) native_load_gdt(dtr) |
4867 | +#define load_idt(dtr) native_load_idt(dtr) |
4868 | #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) |
4869 | #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) |
4870 | |
4871 | -#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) |
4872 | -#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) |
4873 | -#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) |
4874 | +#define store_gdt(dtr) native_store_gdt(dtr) |
4875 | +#define store_idt(dtr) native_store_idt(dtr) |
4876 | +#define store_tr(tr) (tr = native_store_tr()) |
4877 | #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) |
4878 | -#endif |
4879 | |
4880 | -#if TLS_SIZE != 24 |
4881 | -# error update this code. |
4882 | -#endif |
4883 | - |
4884 | -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) |
4885 | -{ |
4886 | -#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \ |
4887 | - *(u64 *)&t->tls_array[i]) \ |
4888 | - BUG() |
4889 | - C(0); C(1); C(2); |
4890 | -#undef C |
4891 | -} |
4892 | +#define load_TLS(t, cpu) native_load_tls(t, cpu) |
4893 | +#define set_ldt native_set_ldt |
4894 | |
4895 | -#ifndef CONFIG_XEN |
4896 | #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
4897 | #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
4898 | #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
4899 | |
4900 | -static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) |
4901 | +static inline void write_dt_entry(struct desc_struct *dt, |
4902 | + int entry, u32 entry_low, u32 entry_high) |
4903 | { |
4904 | - __u32 *lp = (__u32 *)((char *)dt + entry*8); |
4905 | - *lp = entry_a; |
4906 | - *(lp+1) = entry_b; |
4907 | + dt[entry].a = entry_low; |
4908 | + dt[entry].b = entry_high; |
4909 | } |
4910 | -#define set_ldt native_set_ldt |
4911 | -#else |
4912 | -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); |
4913 | -extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); |
4914 | -#define set_ldt xen_set_ldt |
4915 | -#endif |
4916 | |
4917 | -#ifndef CONFIG_XEN |
4918 | -static inline fastcall void native_set_ldt(const void *addr, |
4919 | - unsigned int entries) |
4920 | +static inline void native_set_ldt(const void *addr, unsigned int entries) |
4921 | { |
4922 | if (likely(entries == 0)) |
4923 | __asm__ __volatile__("lldt %w0"::"q" (0)); |
4924 | @@ -116,6 +96,65 @@ |
4925 | __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); |
4926 | } |
4927 | } |
4928 | + |
4929 | + |
4930 | +static inline void native_load_tr_desc(void) |
4931 | +{ |
4932 | + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); |
4933 | +} |
4934 | + |
4935 | +static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) |
4936 | +{ |
4937 | + asm volatile("lgdt %0"::"m" (*dtr)); |
4938 | +} |
4939 | + |
4940 | +static inline void native_load_idt(const struct Xgt_desc_struct *dtr) |
4941 | +{ |
4942 | + asm volatile("lidt %0"::"m" (*dtr)); |
4943 | +} |
4944 | + |
4945 | +static inline void native_store_gdt(struct Xgt_desc_struct *dtr) |
4946 | +{ |
4947 | + asm ("sgdt %0":"=m" (*dtr)); |
4948 | +} |
4949 | + |
4950 | +static inline void native_store_idt(struct Xgt_desc_struct *dtr) |
4951 | +{ |
4952 | + asm ("sidt %0":"=m" (*dtr)); |
4953 | +} |
4954 | + |
4955 | +static inline unsigned long native_store_tr(void) |
4956 | +{ |
4957 | + unsigned long tr; |
4958 | + asm ("str %0":"=r" (tr)); |
4959 | + return tr; |
4960 | +} |
4961 | + |
4962 | +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) |
4963 | +{ |
4964 | + unsigned int i; |
4965 | + struct desc_struct *gdt = get_cpu_gdt_table(cpu); |
4966 | + |
4967 | + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) |
4968 | + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; |
4969 | +} |
4970 | +#else |
4971 | +#define load_TLS(t, cpu) xen_load_tls(t, cpu) |
4972 | +#define set_ldt xen_set_ldt |
4973 | + |
4974 | +extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); |
4975 | +extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); |
4976 | + |
4977 | +static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu) |
4978 | +{ |
4979 | + unsigned int i; |
4980 | + struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN; |
4981 | + |
4982 | + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) |
4983 | + if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), |
4984 | + *(u64 *)&t->tls_array[i])) |
4985 | + BUG(); |
4986 | +} |
4987 | #endif |
4988 | |
4989 | #ifndef CONFIG_X86_NO_IDT |
4990 | --- a/include/asm-x86/mach-xen/asm/desc_64.h |
4991 | +++ b/include/asm-x86/mach-xen/asm/desc_64.h |
4992 | @@ -127,16 +127,6 @@ |
4993 | DESC_LDT, size * 8 - 1); |
4994 | } |
4995 | |
4996 | -static inline void set_seg_base(unsigned cpu, int entry, void *base) |
4997 | -{ |
4998 | - struct desc_struct *d = &cpu_gdt(cpu)[entry]; |
4999 | - u32 addr = (u32)(u64)base; |
5000 | - BUG_ON((u64)base >> 32); |
5001 | - d->base0 = addr & 0xffff; |
5002 | - d->base1 = (addr >> 16) & 0xff; |
5003 | - d->base2 = (addr >> 24) & 0xff; |
5004 | -} |
5005 | - |
5006 | #define LDT_entry_a(info) \ |
5007 | ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) |
5008 | /* Don't allow setting of the lm bit. It is useless anyways because |
5009 | @@ -165,25 +155,15 @@ |
5010 | (info)->useable == 0 && \ |
5011 | (info)->lm == 0) |
5012 | |
5013 | -#if TLS_SIZE != 24 |
5014 | -# error update this code. |
5015 | -#endif |
5016 | - |
5017 | static inline void load_TLS(struct thread_struct *t, unsigned int cpu) |
5018 | { |
5019 | -#if 0 |
5020 | + unsigned int i; |
5021 | u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); |
5022 | - gdt[0] = t->tls_array[0]; |
5023 | - gdt[1] = t->tls_array[1]; |
5024 | - gdt[2] = t->tls_array[2]; |
5025 | -#endif |
5026 | -#define C(i) \ |
5027 | - if (HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]), \ |
5028 | - t->tls_array[i])) \ |
5029 | - BUG(); |
5030 | |
5031 | - C(0); C(1); C(2); |
5032 | -#undef C |
5033 | + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) |
5034 | + if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), |
5035 | + t->tls_array[i])) |
5036 | + BUG(); |
5037 | } |
5038 | |
5039 | /* |
5040 | --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h |
5041 | +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h |
5042 | @@ -51,7 +51,7 @@ |
5043 | }; |
5044 | |
5045 | extern dma_addr_t bad_dma_address; |
5046 | -extern struct dma_mapping_ops* dma_ops; |
5047 | +extern const struct dma_mapping_ops* dma_ops; |
5048 | extern int iommu_merge; |
5049 | |
5050 | #if 0 |
5051 | --- a/include/asm-x86/mach-xen/asm/fixmap_32.h |
5052 | +++ b/include/asm-x86/mach-xen/asm/fixmap_32.h |
5053 | @@ -19,10 +19,8 @@ |
5054 | * the start of the fixmap. |
5055 | */ |
5056 | extern unsigned long __FIXADDR_TOP; |
5057 | -#ifdef CONFIG_COMPAT_VDSO |
5058 | -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) |
5059 | -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) |
5060 | -#endif |
5061 | +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) |
5062 | +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) |
5063 | |
5064 | #ifndef __ASSEMBLY__ |
5065 | #include <linux/kernel.h> |
5066 | @@ -85,6 +83,9 @@ |
5067 | #ifdef CONFIG_PCI_MMCONFIG |
5068 | FIX_PCIE_MCFG, |
5069 | #endif |
5070 | +#ifdef CONFIG_PARAVIRT |
5071 | + FIX_PARAVIRT_BOOTMAP, |
5072 | +#endif |
5073 | FIX_SHARED_INFO, |
5074 | #define NR_FIX_ISAMAPS 256 |
5075 | FIX_ISAMAP_END, |
5076 | --- a/include/asm-x86/mach-xen/asm/fixmap_64.h |
5077 | +++ b/include/asm-x86/mach-xen/asm/fixmap_64.h |
5078 | @@ -15,7 +15,6 @@ |
5079 | #include <asm/apicdef.h> |
5080 | #include <asm/page.h> |
5081 | #include <asm/vsyscall.h> |
5082 | -#include <asm/vsyscall32.h> |
5083 | #include <asm/acpi.h> |
5084 | |
5085 | /* |
5086 | --- a/include/asm-x86/mach-xen/asm/highmem.h |
5087 | +++ b/include/asm-x86/mach-xen/asm/highmem.h |
5088 | @@ -67,12 +67,18 @@ |
5089 | |
5090 | void *kmap(struct page *page); |
5091 | void kunmap(struct page *page); |
5092 | +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); |
5093 | void *kmap_atomic(struct page *page, enum km_type type); |
5094 | void *kmap_atomic_pte(struct page *page, enum km_type type); |
5095 | void kunmap_atomic(void *kvaddr, enum km_type type); |
5096 | void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); |
5097 | struct page *kmap_atomic_to_page(void *ptr); |
5098 | |
5099 | +#define kmap_atomic_pte(page, type) \ |
5100 | + kmap_atomic_prot(page, type, \ |
5101 | + test_bit(PG_pinned, &(page)->flags) \ |
5102 | + ? PAGE_KERNEL_RO : kmap_prot) |
5103 | + |
5104 | #define flush_cache_kmaps() do { } while (0) |
5105 | |
5106 | #endif /* __KERNEL__ */ |
5107 | --- a/include/asm-x86/mach-xen/asm/io_32.h |
5108 | +++ b/include/asm-x86/mach-xen/asm/io_32.h |
5109 | @@ -263,15 +263,18 @@ |
5110 | |
5111 | #endif /* __KERNEL__ */ |
5112 | |
5113 | -#define __SLOW_DOWN_IO "outb %%al,$0x80;" |
5114 | +static inline void xen_io_delay(void) |
5115 | +{ |
5116 | + asm volatile("outb %%al,$0x80" : : : "memory"); |
5117 | +} |
5118 | |
5119 | static inline void slow_down_io(void) { |
5120 | - __asm__ __volatile__( |
5121 | - __SLOW_DOWN_IO |
5122 | + xen_io_delay(); |
5123 | #ifdef REALLY_SLOW_IO |
5124 | - __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO |
5125 | + xen_io_delay(); |
5126 | + xen_io_delay(); |
5127 | + xen_io_delay(); |
5128 | #endif |
5129 | - : : ); |
5130 | } |
5131 | |
5132 | #ifdef CONFIG_X86_NUMAQ |
5133 | --- a/include/asm-x86/mach-xen/asm/irqflags_32.h |
5134 | +++ b/include/asm-x86/mach-xen/asm/irqflags_32.h |
5135 | @@ -11,6 +11,43 @@ |
5136 | #define _ASM_IRQFLAGS_H |
5137 | |
5138 | #ifndef __ASSEMBLY__ |
5139 | +#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask) |
5140 | + |
5141 | +#define xen_restore_fl(f) \ |
5142 | +do { \ |
5143 | + vcpu_info_t *_vcpu; \ |
5144 | + barrier(); \ |
5145 | + _vcpu = current_vcpu_info(); \ |
5146 | + if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \ |
5147 | + barrier(); /* unmask then check (avoid races) */\ |
5148 | + if (unlikely(_vcpu->evtchn_upcall_pending)) \ |
5149 | + force_evtchn_callback(); \ |
5150 | + } \ |
5151 | +} while (0) |
5152 | + |
5153 | +#define xen_irq_disable() \ |
5154 | +do { \ |
5155 | + current_vcpu_info()->evtchn_upcall_mask = 1; \ |
5156 | + barrier(); \ |
5157 | +} while (0) |
5158 | + |
5159 | +#define xen_irq_enable() \ |
5160 | +do { \ |
5161 | + vcpu_info_t *_vcpu; \ |
5162 | + barrier(); \ |
5163 | + _vcpu = current_vcpu_info(); \ |
5164 | + _vcpu->evtchn_upcall_mask = 0; \ |
5165 | + barrier(); /* unmask then check (avoid races) */ \ |
5166 | + if (unlikely(_vcpu->evtchn_upcall_pending)) \ |
5167 | + force_evtchn_callback(); \ |
5168 | +} while (0) |
5169 | + |
5170 | +void xen_safe_halt(void); |
5171 | + |
5172 | +void xen_halt(void); |
5173 | +#endif /* __ASSEMBLY__ */ |
5174 | + |
5175 | +#ifndef __ASSEMBLY__ |
5176 | |
5177 | /* |
5178 | * The use of 'barrier' in the following reflects their use as local-lock |
5179 | @@ -20,48 +57,31 @@ |
5180 | * includes these barriers, for example. |
5181 | */ |
5182 | |
5183 | -#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) |
5184 | +#define __raw_local_save_flags(void) xen_save_fl() |
5185 | |
5186 | -#define raw_local_irq_restore(x) \ |
5187 | -do { \ |
5188 | - vcpu_info_t *_vcpu; \ |
5189 | - barrier(); \ |
5190 | - _vcpu = current_vcpu_info(); \ |
5191 | - if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ |
5192 | - barrier(); /* unmask then check (avoid races) */ \ |
5193 | - if (unlikely(_vcpu->evtchn_upcall_pending)) \ |
5194 | - force_evtchn_callback(); \ |
5195 | - } \ |
5196 | -} while (0) |
5197 | +#define raw_local_irq_restore(flags) xen_restore_fl(flags) |
5198 | |
5199 | -#define raw_local_irq_disable() \ |
5200 | -do { \ |
5201 | - current_vcpu_info()->evtchn_upcall_mask = 1; \ |
5202 | - barrier(); \ |
5203 | -} while (0) |
5204 | +#define raw_local_irq_disable() xen_irq_disable() |
5205 | |
5206 | -#define raw_local_irq_enable() \ |
5207 | -do { \ |
5208 | - vcpu_info_t *_vcpu; \ |
5209 | - barrier(); \ |
5210 | - _vcpu = current_vcpu_info(); \ |
5211 | - _vcpu->evtchn_upcall_mask = 0; \ |
5212 | - barrier(); /* unmask then check (avoid races) */ \ |
5213 | - if (unlikely(_vcpu->evtchn_upcall_pending)) \ |
5214 | - force_evtchn_callback(); \ |
5215 | -} while (0) |
5216 | +#define raw_local_irq_enable() xen_irq_enable() |
5217 | |
5218 | /* |
5219 | * Used in the idle loop; sti takes one instruction cycle |
5220 | * to complete: |
5221 | */ |
5222 | -void raw_safe_halt(void); |
5223 | +static inline void raw_safe_halt(void) |
5224 | +{ |
5225 | + xen_safe_halt(); |
5226 | +} |
5227 | |
5228 | /* |
5229 | * Used when interrupts are already enabled or to |
5230 | * shutdown the processor: |
5231 | */ |
5232 | -void halt(void); |
5233 | +static inline void halt(void) |
5234 | +{ |
5235 | + xen_halt(); |
5236 | +} |
5237 | |
5238 | /* |
5239 | * For spinlocks, etc: |
5240 | --- a/include/asm-x86/mach-xen/asm/irqflags_64.h |
5241 | +++ b/include/asm-x86/mach-xen/asm/irqflags_64.h |
5242 | @@ -9,6 +9,7 @@ |
5243 | */ |
5244 | #ifndef _ASM_IRQFLAGS_H |
5245 | #define _ASM_IRQFLAGS_H |
5246 | +#include <asm/processor-flags.h> |
5247 | |
5248 | #ifndef __ASSEMBLY__ |
5249 | /* |
5250 | @@ -50,19 +51,19 @@ |
5251 | { |
5252 | unsigned long flags = __raw_local_save_flags(); |
5253 | |
5254 | - raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); |
5255 | + raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); |
5256 | } |
5257 | |
5258 | static inline void raw_local_irq_enable(void) |
5259 | { |
5260 | unsigned long flags = __raw_local_save_flags(); |
5261 | |
5262 | - raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); |
5263 | + raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); |
5264 | } |
5265 | |
5266 | static inline int raw_irqs_disabled_flags(unsigned long flags) |
5267 | { |
5268 | - return !(flags & (1<<9)) || (flags & (1 << 18)); |
5269 | + return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); |
5270 | } |
5271 | |
5272 | #else /* CONFIG_X86_VSMP */ |
5273 | @@ -118,13 +119,21 @@ |
5274 | * Used in the idle loop; sti takes one instruction cycle |
5275 | * to complete: |
5276 | */ |
5277 | -void raw_safe_halt(void); |
5278 | +void xen_safe_halt(void); |
5279 | +static inline void raw_safe_halt(void) |
5280 | +{ |
5281 | + xen_safe_halt(); |
5282 | +} |
5283 | |
5284 | /* |
5285 | * Used when interrupts are already enabled or to |
5286 | * shutdown the processor: |
5287 | */ |
5288 | -void halt(void); |
5289 | +void xen_halt(void); |
5290 | +static inline void halt(void) |
5291 | +{ |
5292 | + xen_halt(); |
5293 | +} |
5294 | |
5295 | #else /* __ASSEMBLY__: */ |
5296 | # ifdef CONFIG_TRACE_IRQFLAGS |
5297 | --- a/include/asm-x86/mach-xen/asm/mmu.h |
5298 | +++ b/include/asm-x86/mach-xen/asm/mmu.h |
5299 | @@ -18,12 +18,4 @@ |
5300 | #endif |
5301 | } mm_context_t; |
5302 | |
5303 | -/* mm/memory.c:exit_mmap hook */ |
5304 | -extern void _arch_exit_mmap(struct mm_struct *mm); |
5305 | -#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) |
5306 | - |
5307 | -/* kernel/fork.c:dup_mmap hook */ |
5308 | -extern void _arch_dup_mmap(struct mm_struct *mm); |
5309 | -#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm)) |
5310 | - |
5311 | #endif |
5312 | --- a/include/asm-x86/mach-xen/asm/mmu_64.h |
5313 | +++ b/include/asm-x86/mach-xen/asm/mmu_64.h |
5314 | @@ -25,14 +25,6 @@ |
5315 | #ifdef CONFIG_XEN |
5316 | extern struct list_head mm_unpinned; |
5317 | extern spinlock_t mm_unpinned_lock; |
5318 | - |
5319 | -/* mm/memory.c:exit_mmap hook */ |
5320 | -extern void _arch_exit_mmap(struct mm_struct *mm); |
5321 | -#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) |
5322 | - |
5323 | -/* kernel/fork.c:dup_mmap hook */ |
5324 | -extern void _arch_dup_mmap(struct mm_struct *mm); |
5325 | -#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm)) |
5326 | #endif |
5327 | |
5328 | #endif |
5329 | --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h |
5330 | +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h |
5331 | @@ -6,6 +6,20 @@ |
5332 | #include <asm/pgalloc.h> |
5333 | #include <asm/tlbflush.h> |
5334 | |
5335 | +void arch_exit_mmap(struct mm_struct *mm); |
5336 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); |
5337 | + |
5338 | +void mm_pin(struct mm_struct *mm); |
5339 | +void mm_unpin(struct mm_struct *mm); |
5340 | +void mm_pin_all(void); |
5341 | + |
5342 | +static inline void xen_activate_mm(struct mm_struct *prev, |
5343 | + struct mm_struct *next) |
5344 | +{ |
5345 | + if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) |
5346 | + mm_pin(next); |
5347 | +} |
5348 | + |
5349 | /* |
5350 | * Used for LDT copy/destruction. |
5351 | */ |
5352 | @@ -37,10 +51,6 @@ |
5353 | : : "r" (0) ); |
5354 | } |
5355 | |
5356 | -extern void mm_pin(struct mm_struct *mm); |
5357 | -extern void mm_unpin(struct mm_struct *mm); |
5358 | -void mm_pin_all(void); |
5359 | - |
5360 | static inline void switch_mm(struct mm_struct *prev, |
5361 | struct mm_struct *next, |
5362 | struct task_struct *tsk) |
5363 | @@ -97,11 +107,10 @@ |
5364 | #define deactivate_mm(tsk, mm) \ |
5365 | asm("movl %0,%%gs": :"r" (0)); |
5366 | |
5367 | -static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) |
5368 | -{ |
5369 | - if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) |
5370 | - mm_pin(next); |
5371 | - switch_mm(prev, next, NULL); |
5372 | -} |
5373 | +#define activate_mm(prev, next) \ |
5374 | + do { \ |
5375 | + xen_activate_mm(prev, next); \ |
5376 | + switch_mm((prev),(next),NULL); \ |
5377 | + } while(0) |
5378 | |
5379 | #endif |
5380 | --- a/include/asm-x86/mach-xen/asm/mmu_context_64.h |
5381 | +++ b/include/asm-x86/mach-xen/asm/mmu_context_64.h |
5382 | @@ -9,6 +9,9 @@ |
5383 | #include <asm/pgtable.h> |
5384 | #include <asm/tlbflush.h> |
5385 | |
5386 | +void arch_exit_mmap(struct mm_struct *mm); |
5387 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); |
5388 | + |
5389 | /* |
5390 | * possibly do the LDT unload here? |
5391 | */ |
5392 | --- a/include/asm-x86/mach-xen/asm/page_64.h |
5393 | +++ b/include/asm-x86/mach-xen/asm/page_64.h |
5394 | @@ -7,6 +7,7 @@ |
5395 | #include <linux/types.h> |
5396 | #include <asm/bug.h> |
5397 | #endif |
5398 | +#include <linux/const.h> |
5399 | #include <xen/interface/xen.h> |
5400 | |
5401 | /* |
5402 | @@ -19,18 +20,14 @@ |
5403 | |
5404 | /* PAGE_SHIFT determines the page size */ |
5405 | #define PAGE_SHIFT 12 |
5406 | -#ifdef __ASSEMBLY__ |
5407 | -#define PAGE_SIZE (0x1 << PAGE_SHIFT) |
5408 | -#else |
5409 | -#define PAGE_SIZE (1UL << PAGE_SHIFT) |
5410 | -#endif |
5411 | +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) |
5412 | #define PAGE_MASK (~(PAGE_SIZE-1)) |
5413 | |
5414 | /* See Documentation/x86_64/mm.txt for a description of the memory map. */ |
5415 | #define __PHYSICAL_MASK_SHIFT 46 |
5416 | -#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) |
5417 | +#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1) |
5418 | #define __VIRTUAL_MASK_SHIFT 48 |
5419 | -#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) |
5420 | +#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) |
5421 | |
5422 | #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) |
5423 | |
5424 | @@ -55,10 +52,10 @@ |
5425 | #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ |
5426 | |
5427 | #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) |
5428 | -#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) |
5429 | +#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) |
5430 | |
5431 | #define HPAGE_SHIFT PMD_SHIFT |
5432 | -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) |
5433 | +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) |
5434 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) |
5435 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) |
5436 | |
5437 | @@ -152,17 +149,23 @@ |
5438 | |
5439 | #define __pgprot(x) ((pgprot_t) { (x) } ) |
5440 | |
5441 | -#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) |
5442 | -#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) |
5443 | -#define __START_KERNEL_map 0xffffffff80000000UL |
5444 | -#define __PAGE_OFFSET 0xffff880000000000UL |
5445 | +#endif /* !__ASSEMBLY__ */ |
5446 | |
5447 | -#else |
5448 | #define __PHYSICAL_START CONFIG_PHYSICAL_START |
5449 | +#define __KERNEL_ALIGN 0x200000 |
5450 | + |
5451 | +/* |
5452 | + * Make sure kernel is aligned to 2MB address. Catching it at compile |
5453 | + * time is better. Change your config file and compile the kernel |
5454 | + * for a 2MB aligned address (CONFIG_PHYSICAL_START) |
5455 | + */ |
5456 | +#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 |
5457 | +#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" |
5458 | +#endif |
5459 | + |
5460 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) |
5461 | -#define __START_KERNEL_map 0xffffffff80000000 |
5462 | -#define __PAGE_OFFSET 0xffff880000000000 |
5463 | -#endif /* !__ASSEMBLY__ */ |
5464 | +#define __START_KERNEL_map _AC(0xffffffff80000000, UL) |
5465 | +#define __PAGE_OFFSET _AC(0xffff880000000000, UL) |
5466 | |
5467 | #if CONFIG_XEN_COMPAT <= 0x030002 |
5468 | #undef LOAD_OFFSET |
5469 | @@ -172,20 +175,20 @@ |
5470 | /* to align the pointer to the (next) page boundary */ |
5471 | #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) |
5472 | |
5473 | -#define KERNEL_TEXT_SIZE (40UL*1024*1024) |
5474 | -#define KERNEL_TEXT_START 0xffffffff80000000UL |
5475 | +#define KERNEL_TEXT_SIZE (40*1024*1024) |
5476 | +#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) |
5477 | + |
5478 | +#define PAGE_OFFSET __PAGE_OFFSET |
5479 | |
5480 | -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) |
5481 | +#ifndef __ASSEMBLY__ |
5482 | +static inline unsigned long __phys_addr(unsigned long x) |
5483 | +{ |
5484 | + return x - (x >= __START_KERNEL_map ? __START_KERNEL_map : PAGE_OFFSET); |
5485 | +} |
5486 | +#endif |
5487 | |
5488 | -/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. |
5489 | - Otherwise you risk miscompilation. */ |
5490 | -#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) |
5491 | -/* __pa_symbol should be used for C visible symbols. |
5492 | - This seems to be the official gcc blessed way to do such arithmetic. */ |
5493 | -#define __pa_symbol(x) \ |
5494 | - ({unsigned long v; \ |
5495 | - asm("" : "=r" (v) : "0" (x)); \ |
5496 | - __pa(v); }) |
5497 | +#define __pa(x) __phys_addr((unsigned long)(x)) |
5498 | +#define __pa_symbol(x) __phys_addr((unsigned long)(x)) |
5499 | |
5500 | #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) |
5501 | #define __boot_va(x) __va(x) |
5502 | --- a/include/asm-x86/mach-xen/asm/pgalloc_32.h |
5503 | +++ b/include/asm-x86/mach-xen/asm/pgalloc_32.h |
5504 | @@ -1,7 +1,6 @@ |
5505 | #ifndef _I386_PGALLOC_H |
5506 | #define _I386_PGALLOC_H |
5507 | |
5508 | -#include <asm/fixmap.h> |
5509 | #include <linux/threads.h> |
5510 | #include <linux/mm.h> /* for struct page */ |
5511 | #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */ |
5512 | @@ -69,6 +68,4 @@ |
5513 | #define pud_populate(mm, pmd, pte) BUG() |
5514 | #endif |
5515 | |
5516 | -#define check_pgt_cache() do { } while (0) |
5517 | - |
5518 | #endif /* _I386_PGALLOC_H */ |
5519 | --- a/include/asm-x86/mach-xen/asm/pgalloc_64.h |
5520 | +++ b/include/asm-x86/mach-xen/asm/pgalloc_64.h |
5521 | @@ -1,7 +1,6 @@ |
5522 | #ifndef _X86_64_PGALLOC_H |
5523 | #define _X86_64_PGALLOC_H |
5524 | |
5525 | -#include <asm/fixmap.h> |
5526 | #include <asm/pda.h> |
5527 | #include <linux/threads.h> |
5528 | #include <linux/mm.h> |
5529 | @@ -100,24 +99,16 @@ |
5530 | struct page *page = virt_to_page(pgd); |
5531 | |
5532 | spin_lock(&pgd_lock); |
5533 | - page->index = (pgoff_t)pgd_list; |
5534 | - if (pgd_list) |
5535 | - pgd_list->private = (unsigned long)&page->index; |
5536 | - pgd_list = page; |
5537 | - page->private = (unsigned long)&pgd_list; |
5538 | + list_add(&page->lru, &pgd_list); |
5539 | spin_unlock(&pgd_lock); |
5540 | } |
5541 | |
5542 | static inline void pgd_list_del(pgd_t *pgd) |
5543 | { |
5544 | - struct page *next, **pprev, *page = virt_to_page(pgd); |
5545 | + struct page *page = virt_to_page(pgd); |
5546 | |
5547 | spin_lock(&pgd_lock); |
5548 | - next = (struct page *)page->index; |
5549 | - pprev = (struct page **)page->private; |
5550 | - *pprev = next; |
5551 | - if (next) |
5552 | - next->private = (unsigned long)pprev; |
5553 | + list_del(&page->lru); |
5554 | spin_unlock(&pgd_lock); |
5555 | } |
5556 | |
5557 | --- a/include/asm-x86/mach-xen/asm/pgtable-2level.h |
5558 | +++ b/include/asm-x86/mach-xen/asm/pgtable-2level.h |
5559 | @@ -13,22 +13,43 @@ |
5560 | * within a page table are directly modified. Thus, the following |
5561 | * hook is made available. |
5562 | */ |
5563 | -#define set_pte(pteptr, pteval) (*(pteptr) = pteval) |
5564 | - |
5565 | -#define set_pte_at(_mm,addr,ptep,pteval) do { \ |
5566 | - if (((_mm) != current->mm && (_mm) != &init_mm) || \ |
5567 | - HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \ |
5568 | - set_pte((ptep), (pteval)); \ |
5569 | -} while (0) |
5570 | - |
5571 | -#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) |
5572 | +static inline void xen_set_pte(pte_t *ptep , pte_t pte) |
5573 | +{ |
5574 | + *ptep = pte; |
5575 | +} |
5576 | +static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
5577 | + pte_t *ptep , pte_t pte) |
5578 | +{ |
5579 | + if ((mm != current->mm && mm != &init_mm) || |
5580 | + HYPERVISOR_update_va_mapping(addr, pte, 0)) |
5581 | + xen_set_pte(ptep, pte); |
5582 | +} |
5583 | +static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) |
5584 | +{ |
5585 | + xen_l2_entry_update(pmdp, pmd); |
5586 | +} |
5587 | +#define set_pte(pteptr, pteval) xen_set_pte(pteptr, pteval) |
5588 | +#define set_pte_at(mm,addr,ptep,pteval) xen_set_pte_at(mm, addr, ptep, pteval) |
5589 | +#define set_pmd(pmdptr, pmdval) xen_set_pmd(pmdptr, pmdval) |
5590 | |
5591 | #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) |
5592 | |
5593 | #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) |
5594 | #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) |
5595 | |
5596 | -#define raw_ptep_get_and_clear(xp, pte) __pte_ma(xchg(&(xp)->pte_low, 0)) |
5597 | +static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) |
5598 | +{ |
5599 | + xen_set_pte_at(mm, addr, xp, __pte(0)); |
5600 | +} |
5601 | + |
5602 | +#ifdef CONFIG_SMP |
5603 | +static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res) |
5604 | +{ |
5605 | + return __pte_ma(xchg(&xp->pte_low, 0)); |
5606 | +} |
5607 | +#else |
5608 | +#define xen_ptep_get_and_clear(xp, res) xen_local_ptep_get_and_clear(xp, res) |
5609 | +#endif |
5610 | |
5611 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
5612 | #define ptep_clear_flush(vma, addr, ptep) \ |
5613 | @@ -95,6 +116,4 @@ |
5614 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) |
5615 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |
5616 | |
5617 | -void vmalloc_sync_all(void); |
5618 | - |
5619 | #endif /* _I386_PGTABLE_2LEVEL_H */ |
5620 | --- a/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h |
5621 | +++ b/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h |
5622 | @@ -1,7 +1,7 @@ |
5623 | #ifndef _I386_PGTABLE_3LEVEL_DEFS_H |
5624 | #define _I386_PGTABLE_3LEVEL_DEFS_H |
5625 | |
5626 | -#define HAVE_SHARED_KERNEL_PMD 0 |
5627 | +#define SHARED_KERNEL_PMD 0 |
5628 | |
5629 | /* |
5630 | * PGDIR_SHIFT determines what a top-level page table entry can map |
5631 | --- a/include/asm-x86/mach-xen/asm/pgtable-3level.h |
5632 | +++ b/include/asm-x86/mach-xen/asm/pgtable-3level.h |
5633 | @@ -52,32 +52,40 @@ |
5634 | * value and then use set_pte to update it. -ben |
5635 | */ |
5636 | |
5637 | -static inline void set_pte(pte_t *ptep, pte_t pte) |
5638 | +static inline void xen_set_pte(pte_t *ptep, pte_t pte) |
5639 | { |
5640 | ptep->pte_high = pte.pte_high; |
5641 | smp_wmb(); |
5642 | ptep->pte_low = pte.pte_low; |
5643 | } |
5644 | -#define set_pte_atomic(pteptr,pteval) \ |
5645 | - set_64bit((unsigned long long *)(pteptr),__pte_val(pteval)) |
5646 | |
5647 | -#define set_pte_at(_mm,addr,ptep,pteval) do { \ |
5648 | - if (((_mm) != current->mm && (_mm) != &init_mm) || \ |
5649 | - HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \ |
5650 | - set_pte((ptep), (pteval)); \ |
5651 | -} while (0) |
5652 | - |
5653 | -#define set_pmd(pmdptr,pmdval) \ |
5654 | - xen_l2_entry_update((pmdptr), (pmdval)) |
5655 | -#define set_pud(pudptr,pudval) \ |
5656 | - xen_l3_entry_update((pudptr), (pudval)) |
5657 | +static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
5658 | + pte_t *ptep , pte_t pte) |
5659 | +{ |
5660 | + if ((mm != current->mm && mm != &init_mm) || |
5661 | + HYPERVISOR_update_va_mapping(addr, pte, 0)) |
5662 | + xen_set_pte(ptep, pte); |
5663 | +} |
5664 | + |
5665 | +static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
5666 | +{ |
5667 | + set_64bit((unsigned long long *)(ptep),__pte_val(pte)); |
5668 | +} |
5669 | +static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) |
5670 | +{ |
5671 | + xen_l2_entry_update(pmdp, pmd); |
5672 | +} |
5673 | +static inline void xen_set_pud(pud_t *pudp, pud_t pud) |
5674 | +{ |
5675 | + xen_l3_entry_update(pudp, pud); |
5676 | +} |
5677 | |
5678 | /* |
5679 | * For PTEs and PDEs, we must clear the P-bit first when clearing a page table |
5680 | * entry, so clear the bottom half first and enforce ordering with a compiler |
5681 | * barrier. |
5682 | */ |
5683 | -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
5684 | +static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
5685 | { |
5686 | if ((mm != current->mm && mm != &init_mm) |
5687 | || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { |
5688 | @@ -87,7 +95,18 @@ |
5689 | } |
5690 | } |
5691 | |
5692 | -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) |
5693 | +static inline void xen_pmd_clear(pmd_t *pmd) |
5694 | +{ |
5695 | + xen_l2_entry_update(pmd, __pmd(0)); |
5696 | +} |
5697 | + |
5698 | +#define set_pte(ptep, pte) xen_set_pte(ptep, pte) |
5699 | +#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) |
5700 | +#define set_pte_atomic(ptep, pte) xen_set_pte_atomic(ptep, pte) |
5701 | +#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd) |
5702 | +#define set_pud(pudp, pud) xen_set_pud(pudp, pud) |
5703 | +#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep) |
5704 | +#define pmd_clear(pmd) xen_pmd_clear(pmd) |
5705 | |
5706 | /* |
5707 | * Pentium-II erratum A13: in PAE mode we explicitly have to flush |
5708 | @@ -108,7 +127,8 @@ |
5709 | #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ |
5710 | pmd_index(address)) |
5711 | |
5712 | -static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res) |
5713 | +#ifdef CONFIG_SMP |
5714 | +static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res) |
5715 | { |
5716 | uint64_t val = __pte_val(res); |
5717 | if (__cmpxchg64(ptep, val, 0) != val) { |
5718 | @@ -119,6 +139,9 @@ |
5719 | } |
5720 | return res; |
5721 | } |
5722 | +#else |
5723 | +#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte) |
5724 | +#endif |
5725 | |
5726 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
5727 | #define ptep_clear_flush(vma, addr, ptep) \ |
5728 | @@ -165,13 +188,13 @@ |
5729 | static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) |
5730 | { |
5731 | return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | |
5732 | - pgprot_val(pgprot)) & __supported_pte_mask); |
5733 | + pgprot_val(pgprot)) & __supported_pte_mask); |
5734 | } |
5735 | |
5736 | static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) |
5737 | { |
5738 | return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | |
5739 | - pgprot_val(pgprot)) & __supported_pte_mask); |
5740 | + pgprot_val(pgprot)) & __supported_pte_mask); |
5741 | } |
5742 | |
5743 | /* |
5744 | @@ -191,6 +214,4 @@ |
5745 | |
5746 | #define __pmd_free_tlb(tlb, x) do { } while (0) |
5747 | |
5748 | -void vmalloc_sync_all(void); |
5749 | - |
5750 | #endif /* _I386_PGTABLE_3LEVEL_H */ |
5751 | --- a/include/asm-x86/mach-xen/asm/pgtable_32.h |
5752 | +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h |
5753 | @@ -24,11 +24,11 @@ |
5754 | #include <linux/slab.h> |
5755 | #include <linux/list.h> |
5756 | #include <linux/spinlock.h> |
5757 | +#include <linux/sched.h> |
5758 | |
5759 | /* Is this pagetable pinned? */ |
5760 | #define PG_pinned PG_arch_1 |
5761 | |
5762 | -struct mm_struct; |
5763 | struct vm_area_struct; |
5764 | |
5765 | /* |
5766 | @@ -38,17 +38,16 @@ |
5767 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) |
5768 | extern unsigned long empty_zero_page[1024]; |
5769 | extern pgd_t *swapper_pg_dir; |
5770 | -extern struct kmem_cache *pgd_cache; |
5771 | extern struct kmem_cache *pmd_cache; |
5772 | extern spinlock_t pgd_lock; |
5773 | extern struct page *pgd_list; |
5774 | +void check_pgt_cache(void); |
5775 | |
5776 | void pmd_ctor(void *, struct kmem_cache *, unsigned long); |
5777 | -void pgd_ctor(void *, struct kmem_cache *, unsigned long); |
5778 | -void pgd_dtor(void *, struct kmem_cache *, unsigned long); |
5779 | void pgtable_cache_init(void); |
5780 | void paging_init(void); |
5781 | |
5782 | + |
5783 | /* |
5784 | * The Linux x86 paging architecture is 'compile-time dual-mode', it |
5785 | * implements both the traditional 2-level x86 page tables and the |
5786 | @@ -165,6 +164,7 @@ |
5787 | |
5788 | extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; |
5789 | #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) |
5790 | +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) |
5791 | #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) |
5792 | #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) |
5793 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) |
5794 | @@ -172,6 +172,7 @@ |
5795 | #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) |
5796 | #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) |
5797 | #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) |
5798 | +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) |
5799 | #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) |
5800 | #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) |
5801 | #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) |
5802 | @@ -275,7 +276,13 @@ |
5803 | */ |
5804 | #define pte_update(mm, addr, ptep) do { } while (0) |
5805 | #define pte_update_defer(mm, addr, ptep) do { } while (0) |
5806 | -#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) |
5807 | + |
5808 | +/* local pte updates need not use xchg for locking */ |
5809 | +static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res) |
5810 | +{ |
5811 | + xen_set_pte(ptep, __pte(0)); |
5812 | + return res; |
5813 | +} |
5814 | |
5815 | /* |
5816 | * We only update the dirty/accessed state if we set |
5817 | @@ -286,17 +293,34 @@ |
5818 | */ |
5819 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
5820 | #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ |
5821 | -do { \ |
5822 | - if (dirty) \ |
5823 | +({ \ |
5824 | + int __changed = !pte_same(*(ptep), entry); \ |
5825 | + if (__changed && (dirty)) \ |
5826 | ptep_establish(vma, address, ptep, entry); \ |
5827 | -} while (0) |
5828 | + __changed; \ |
5829 | +}) |
5830 | |
5831 | -/* |
5832 | - * We don't actually have these, but we want to advertise them so that |
5833 | - * we can encompass the flush here. |
5834 | - */ |
5835 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY |
5836 | +#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \ |
5837 | + int __ret = 0; \ |
5838 | + if (pte_dirty(*(ptep))) \ |
5839 | + __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \ |
5840 | + &(ptep)->pte_low); \ |
5841 | + if (__ret) \ |
5842 | + pte_update((vma)->vm_mm, addr, ptep); \ |
5843 | + __ret; \ |
5844 | +}) |
5845 | + |
5846 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
5847 | +#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ |
5848 | + int __ret = 0; \ |
5849 | + if (pte_young(*(ptep))) \ |
5850 | + __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ |
5851 | + &(ptep)->pte_low); \ |
5852 | + if (__ret) \ |
5853 | + pte_update((vma)->vm_mm, addr, ptep); \ |
5854 | + __ret; \ |
5855 | +}) |
5856 | |
5857 | /* |
5858 | * Rules for using ptep_establish: the pte MUST be a user pte, and |
5859 | @@ -323,7 +347,7 @@ |
5860 | int __dirty = pte_dirty(__pte); \ |
5861 | __pte = pte_mkclean(__pte); \ |
5862 | if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ |
5863 | - ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ |
5864 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ |
5865 | else if (__dirty) \ |
5866 | (ptep)->pte_low = __pte.pte_low; \ |
5867 | __dirty; \ |
5868 | @@ -336,7 +360,7 @@ |
5869 | int __young = pte_young(__pte); \ |
5870 | __pte = pte_mkold(__pte); \ |
5871 | if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ |
5872 | - ptep_set_access_flags(vma, address, ptep, __pte, __young); \ |
5873 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ |
5874 | else if (__young) \ |
5875 | (ptep)->pte_low = __pte.pte_low; \ |
5876 | __young; \ |
5877 | @@ -349,7 +373,7 @@ |
5878 | if (!pte_none(pte) |
5879 | && (mm != &init_mm |
5880 | || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { |
5881 | - pte = raw_ptep_get_and_clear(ptep, pte); |
5882 | + pte = xen_ptep_get_and_clear(ptep, pte); |
5883 | pte_update(mm, addr, ptep); |
5884 | } |
5885 | return pte; |
5886 | @@ -491,24 +515,10 @@ |
5887 | #endif |
5888 | |
5889 | #if defined(CONFIG_HIGHPTE) |
5890 | -#define pte_offset_map(dir, address) \ |
5891 | -({ \ |
5892 | - pte_t *__ptep; \ |
5893 | - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ |
5894 | - __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \ |
5895 | - paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ |
5896 | - __ptep = __ptep + pte_index(address); \ |
5897 | - __ptep; \ |
5898 | -}) |
5899 | -#define pte_offset_map_nested(dir, address) \ |
5900 | -({ \ |
5901 | - pte_t *__ptep; \ |
5902 | - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ |
5903 | - __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \ |
5904 | - paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ |
5905 | - __ptep = __ptep + pte_index(address); \ |
5906 | - __ptep; \ |
5907 | -}) |
5908 | +#define pte_offset_map(dir, address) \ |
5909 | + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) |
5910 | +#define pte_offset_map_nested(dir, address) \ |
5911 | + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) |
5912 | #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) |
5913 | #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) |
5914 | #else |
5915 | @@ -587,10 +597,6 @@ |
5916 | #define io_remap_pfn_range(vma,from,pfn,size,prot) \ |
5917 | direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO) |
5918 | |
5919 | -#define MK_IOSPACE_PFN(space, pfn) (pfn) |
5920 | -#define GET_IOSPACE(pfn) 0 |
5921 | -#define GET_PFN(pfn) (pfn) |
5922 | - |
5923 | #include <asm-generic/pgtable.h> |
5924 | |
5925 | #endif /* _I386_PGTABLE_H */ |
5926 | --- a/include/asm-x86/mach-xen/asm/pgtable_64.h |
5927 | +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h |
5928 | @@ -1,12 +1,14 @@ |
5929 | #ifndef _X86_64_PGTABLE_H |
5930 | #define _X86_64_PGTABLE_H |
5931 | |
5932 | +#include <linux/const.h> |
5933 | +#ifndef __ASSEMBLY__ |
5934 | + |
5935 | /* |
5936 | * This file contains the functions and defines necessary to modify and use |
5937 | * the x86-64 page table tree. |
5938 | */ |
5939 | #include <asm/processor.h> |
5940 | -#include <asm/fixmap.h> |
5941 | #include <asm/bitops.h> |
5942 | #include <linux/threads.h> |
5943 | #include <linux/sched.h> |
5944 | @@ -34,11 +36,9 @@ |
5945 | #endif |
5946 | |
5947 | extern pud_t level3_kernel_pgt[512]; |
5948 | -extern pud_t level3_physmem_pgt[512]; |
5949 | extern pud_t level3_ident_pgt[512]; |
5950 | extern pmd_t level2_kernel_pgt[512]; |
5951 | extern pgd_t init_level4_pgt[]; |
5952 | -extern pgd_t boot_level4_pgt[]; |
5953 | extern unsigned long __supported_pte_mask; |
5954 | |
5955 | #define swapper_pg_dir init_level4_pgt |
5956 | @@ -53,6 +53,8 @@ |
5957 | extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; |
5958 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) |
5959 | |
5960 | +#endif /* !__ASSEMBLY__ */ |
5961 | + |
5962 | /* |
5963 | * PGDIR_SHIFT determines what a top-level page table entry can map |
5964 | */ |
5965 | @@ -77,6 +79,8 @@ |
5966 | */ |
5967 | #define PTRS_PER_PTE 512 |
5968 | |
5969 | +#ifndef __ASSEMBLY__ |
5970 | + |
5971 | #define pte_ERROR(e) \ |
5972 | printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \ |
5973 | &(e), __pte_val(e), pte_pfn(e)) |
5974 | @@ -119,22 +123,23 @@ |
5975 | |
5976 | #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) |
5977 | |
5978 | -#define PMD_SIZE (1UL << PMD_SHIFT) |
5979 | +#endif /* !__ASSEMBLY__ */ |
5980 | + |
5981 | +#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) |
5982 | #define PMD_MASK (~(PMD_SIZE-1)) |
5983 | -#define PUD_SIZE (1UL << PUD_SHIFT) |
5984 | +#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) |
5985 | #define PUD_MASK (~(PUD_SIZE-1)) |
5986 | -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) |
5987 | +#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) |
5988 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
5989 | |
5990 | #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) |
5991 | #define FIRST_USER_ADDRESS 0 |
5992 | |
5993 | -#ifndef __ASSEMBLY__ |
5994 | -#define MAXMEM 0x3fffffffffffUL |
5995 | -#define VMALLOC_START 0xffffc20000000000UL |
5996 | -#define VMALLOC_END 0xffffe1ffffffffffUL |
5997 | -#define MODULES_VADDR 0xffffffff88000000UL |
5998 | -#define MODULES_END 0xfffffffffff00000UL |
5999 | +#define MAXMEM _AC(0x3fffffffffff, UL) |
6000 | +#define VMALLOC_START _AC(0xffffc20000000000, UL) |
6001 | +#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) |
6002 | +#define MODULES_VADDR _AC(0xffffffff88000000, UL) |
6003 | +#define MODULES_END _AC(0xfffffffffff00000, UL) |
6004 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) |
6005 | |
6006 | #define _PAGE_BIT_PRESENT 0 |
6007 | @@ -160,16 +165,18 @@ |
6008 | #define _PAGE_GLOBAL 0x100 /* Global TLB entry */ |
6009 | |
6010 | #define _PAGE_PROTNONE 0x080 /* If not present */ |
6011 | -#define _PAGE_NX (1UL<<_PAGE_BIT_NX) |
6012 | +#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) |
6013 | |
6014 | /* Mapped page is I/O or foreign and has no associated page struct. */ |
6015 | #define _PAGE_IO 0x200 |
6016 | |
6017 | +#ifndef __ASSEMBLY__ |
6018 | #if CONFIG_XEN_COMPAT <= 0x030002 |
6019 | extern unsigned int __kernel_page_user; |
6020 | #else |
6021 | #define __kernel_page_user 0 |
6022 | #endif |
6023 | +#endif |
6024 | |
6025 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) |
6026 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user) |
6027 | @@ -234,6 +241,8 @@ |
6028 | #define __S110 PAGE_SHARED_EXEC |
6029 | #define __S111 PAGE_SHARED_EXEC |
6030 | |
6031 | +#ifndef __ASSEMBLY__ |
6032 | + |
6033 | static inline unsigned long pgd_bad(pgd_t pgd) |
6034 | { |
6035 | return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); |
6036 | @@ -345,6 +354,20 @@ |
6037 | static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } |
6038 | static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; } |
6039 | |
6040 | +static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) |
6041 | +{ |
6042 | + if (!pte_dirty(*ptep)) |
6043 | + return 0; |
6044 | + return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte); |
6045 | +} |
6046 | + |
6047 | +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) |
6048 | +{ |
6049 | + if (!pte_young(*ptep)) |
6050 | + return 0; |
6051 | + return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte); |
6052 | +} |
6053 | + |
6054 | static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
6055 | { |
6056 | pte_t pte = *ptep; |
6057 | @@ -470,18 +493,12 @@ |
6058 | * bit at the same time. */ |
6059 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
6060 | #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ |
6061 | - do { \ |
6062 | - if (dirty) \ |
6063 | - ptep_establish(vma, address, ptep, entry); \ |
6064 | - } while (0) |
6065 | - |
6066 | - |
6067 | -/* |
6068 | - * i386 says: We don't actually have these, but we want to advertise |
6069 | - * them so that we can encompass the flush here. |
6070 | - */ |
6071 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY |
6072 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
6073 | +({ \ |
6074 | + int __changed = !pte_same(*(ptep), entry); \ |
6075 | + if (__changed && (dirty)) \ |
6076 | + ptep_establish(vma, address, ptep, entry); \ |
6077 | + __changed; \ |
6078 | +}) |
6079 | |
6080 | #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH |
6081 | #define ptep_clear_flush_dirty(vma, address, ptep) \ |
6082 | @@ -490,7 +507,7 @@ |
6083 | int __dirty = pte_dirty(__pte); \ |
6084 | __pte = pte_mkclean(__pte); \ |
6085 | if ((vma)->vm_mm->context.pinned) \ |
6086 | - ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ |
6087 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ |
6088 | else if (__dirty) \ |
6089 | set_pte(ptep, __pte); \ |
6090 | __dirty; \ |
6091 | @@ -503,7 +520,7 @@ |
6092 | int __young = pte_young(__pte); \ |
6093 | __pte = pte_mkold(__pte); \ |
6094 | if ((vma)->vm_mm->context.pinned) \ |
6095 | - ptep_set_access_flags(vma, address, ptep, __pte, __young); \ |
6096 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ |
6097 | else if (__young) \ |
6098 | set_pte(ptep, __pte); \ |
6099 | __young; \ |
6100 | @@ -517,10 +534,7 @@ |
6101 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |
6102 | |
6103 | extern spinlock_t pgd_lock; |
6104 | -extern struct page *pgd_list; |
6105 | -void vmalloc_sync_all(void); |
6106 | - |
6107 | -#endif /* !__ASSEMBLY__ */ |
6108 | +extern struct list_head pgd_list; |
6109 | |
6110 | extern int kern_addr_valid(unsigned long addr); |
6111 | |
6112 | @@ -559,10 +573,6 @@ |
6113 | #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ |
6114 | direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO) |
6115 | |
6116 | -#define MK_IOSPACE_PFN(space, pfn) (pfn) |
6117 | -#define GET_IOSPACE(pfn) 0 |
6118 | -#define GET_PFN(pfn) (pfn) |
6119 | - |
6120 | #define HAVE_ARCH_UNMAPPED_AREA |
6121 | |
6122 | #define pgtable_cache_init() do { } while (0) |
6123 | @@ -576,11 +586,14 @@ |
6124 | #define kc_offset_to_vaddr(o) \ |
6125 | (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) |
6126 | |
6127 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
6128 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY |
6129 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
6130 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
6131 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
6132 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT |
6133 | #define __HAVE_ARCH_PTE_SAME |
6134 | #include <asm-generic/pgtable.h> |
6135 | +#endif /* !__ASSEMBLY__ */ |
6136 | |
6137 | #endif /* _X86_64_PGTABLE_H */ |
6138 | --- a/include/asm-x86/mach-xen/asm/processor_32.h |
6139 | +++ b/include/asm-x86/mach-xen/asm/processor_32.h |
6140 | @@ -21,6 +21,7 @@ |
6141 | #include <asm/percpu.h> |
6142 | #include <linux/cpumask.h> |
6143 | #include <linux/init.h> |
6144 | +#include <asm/processor-flags.h> |
6145 | #include <xen/interface/physdev.h> |
6146 | |
6147 | /* flag for disabling the tsc */ |
6148 | @@ -118,7 +119,8 @@ |
6149 | |
6150 | void __init cpu_detect(struct cpuinfo_x86 *c); |
6151 | |
6152 | -extern void identify_cpu(struct cpuinfo_x86 *); |
6153 | +extern void identify_boot_cpu(void); |
6154 | +extern void identify_secondary_cpu(struct cpuinfo_x86 *); |
6155 | extern void print_cpu_info(struct cpuinfo_x86 *); |
6156 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); |
6157 | extern unsigned short num_cache_leaves; |
6158 | @@ -129,29 +131,8 @@ |
6159 | static inline void detect_ht(struct cpuinfo_x86 *c) {} |
6160 | #endif |
6161 | |
6162 | -/* |
6163 | - * EFLAGS bits |
6164 | - */ |
6165 | -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ |
6166 | -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ |
6167 | -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ |
6168 | -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ |
6169 | -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ |
6170 | -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ |
6171 | -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ |
6172 | -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ |
6173 | -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ |
6174 | -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ |
6175 | -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ |
6176 | -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ |
6177 | -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ |
6178 | -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ |
6179 | -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ |
6180 | -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ |
6181 | -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ |
6182 | - |
6183 | -static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx, |
6184 | - unsigned int *ecx, unsigned int *edx) |
6185 | +static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx, |
6186 | + unsigned int *ecx, unsigned int *edx) |
6187 | { |
6188 | /* ecx is often an input as well as an output. */ |
6189 | __asm__(XEN_CPUID |
6190 | @@ -165,21 +146,6 @@ |
6191 | #define load_cr3(pgdir) write_cr3(__pa(pgdir)) |
6192 | |
6193 | /* |
6194 | - * Intel CPU features in CR4 |
6195 | - */ |
6196 | -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ |
6197 | -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ |
6198 | -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ |
6199 | -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ |
6200 | -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ |
6201 | -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ |
6202 | -#define X86_CR4_MCE 0x0040 /* Machine check enable */ |
6203 | -#define X86_CR4_PGE 0x0080 /* enable global pages */ |
6204 | -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ |
6205 | -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ |
6206 | -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ |
6207 | - |
6208 | -/* |
6209 | * Save the cr4 feature set we're using (ie |
6210 | * Pentium 4MB enable and PPro Global page |
6211 | * enable), so that any CPU's that boot up |
6212 | @@ -206,26 +172,6 @@ |
6213 | } |
6214 | |
6215 | /* |
6216 | - * NSC/Cyrix CPU configuration register indexes |
6217 | - */ |
6218 | - |
6219 | -#define CX86_PCR0 0x20 |
6220 | -#define CX86_GCR 0xb8 |
6221 | -#define CX86_CCR0 0xc0 |
6222 | -#define CX86_CCR1 0xc1 |
6223 | -#define CX86_CCR2 0xc2 |
6224 | -#define CX86_CCR3 0xc3 |
6225 | -#define CX86_CCR4 0xe8 |
6226 | -#define CX86_CCR5 0xe9 |
6227 | -#define CX86_CCR6 0xea |
6228 | -#define CX86_CCR7 0xeb |
6229 | -#define CX86_PCR1 0xf0 |
6230 | -#define CX86_DIR0 0xfe |
6231 | -#define CX86_DIR1 0xff |
6232 | -#define CX86_ARR_BASE 0xc4 |
6233 | -#define CX86_RCR_BASE 0xdc |
6234 | - |
6235 | -/* |
6236 | * NSC/Cyrix CPU indexed register access macros |
6237 | */ |
6238 | |
6239 | @@ -351,7 +297,8 @@ |
6240 | struct thread_struct; |
6241 | |
6242 | #ifndef CONFIG_X86_NO_TSS |
6243 | -struct tss_struct { |
6244 | +/* This is the TSS defined by the hardware. */ |
6245 | +struct i386_hw_tss { |
6246 | unsigned short back_link,__blh; |
6247 | unsigned long esp0; |
6248 | unsigned short ss0,__ss0h; |
6249 | @@ -375,6 +322,11 @@ |
6250 | unsigned short gs, __gsh; |
6251 | unsigned short ldt, __ldth; |
6252 | unsigned short trace, io_bitmap_base; |
6253 | +} __attribute__((packed)); |
6254 | + |
6255 | +struct tss_struct { |
6256 | + struct i386_hw_tss x86_tss; |
6257 | + |
6258 | /* |
6259 | * The extra 1 is there because the CPU will access an |
6260 | * additional byte beyond the end of the IO permission |
6261 | @@ -428,10 +380,11 @@ |
6262 | }; |
6263 | |
6264 | #define INIT_THREAD { \ |
6265 | + .esp0 = sizeof(init_stack) + (long)&init_stack, \ |
6266 | .vm86_info = NULL, \ |
6267 | .sysenter_cs = __KERNEL_CS, \ |
6268 | .io_bitmap_ptr = NULL, \ |
6269 | - .fs = __KERNEL_PDA, \ |
6270 | + .fs = __KERNEL_PERCPU, \ |
6271 | } |
6272 | |
6273 | /* |
6274 | @@ -441,10 +394,12 @@ |
6275 | * be within the limit. |
6276 | */ |
6277 | #define INIT_TSS { \ |
6278 | - .esp0 = sizeof(init_stack) + (long)&init_stack, \ |
6279 | - .ss0 = __KERNEL_DS, \ |
6280 | - .ss1 = __KERNEL_CS, \ |
6281 | - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ |
6282 | + .x86_tss = { \ |
6283 | + .esp0 = sizeof(init_stack) + (long)&init_stack, \ |
6284 | + .ss0 = __KERNEL_DS, \ |
6285 | + .ss1 = __KERNEL_CS, \ |
6286 | + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ |
6287 | + }, \ |
6288 | .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ |
6289 | } |
6290 | |
6291 | @@ -551,38 +506,33 @@ |
6292 | |
6293 | #define cpu_relax() rep_nop() |
6294 | |
6295 | -#define paravirt_enabled() 0 |
6296 | -#define __cpuid xen_cpuid |
6297 | - |
6298 | #ifndef CONFIG_X86_NO_TSS |
6299 | -static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) |
6300 | +static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) |
6301 | { |
6302 | - tss->esp0 = thread->esp0; |
6303 | + tss->x86_tss.esp0 = thread->esp0; |
6304 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ |
6305 | - if (unlikely(tss->ss1 != thread->sysenter_cs)) { |
6306 | - tss->ss1 = thread->sysenter_cs; |
6307 | + if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { |
6308 | + tss->x86_tss.ss1 = thread->sysenter_cs; |
6309 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
6310 | } |
6311 | } |
6312 | -#define load_esp0(tss, thread) \ |
6313 | - __load_esp0(tss, thread) |
6314 | #else |
6315 | -#define load_esp0(tss, thread) do { \ |
6316 | +#define xen_load_esp0(tss, thread) do { \ |
6317 | if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ |
6318 | BUG(); \ |
6319 | } while (0) |
6320 | #endif |
6321 | |
6322 | |
6323 | -/* |
6324 | - * These special macros can be used to get or set a debugging register |
6325 | - */ |
6326 | -#define get_debugreg(var, register) \ |
6327 | - (var) = HYPERVISOR_get_debugreg(register) |
6328 | -#define set_debugreg(value, register) \ |
6329 | - WARN_ON(HYPERVISOR_set_debugreg(register, value)) |
6330 | +static inline unsigned long xen_get_debugreg(int regno) |
6331 | +{ |
6332 | + return HYPERVISOR_get_debugreg(regno); |
6333 | +} |
6334 | |
6335 | -#define set_iopl_mask xen_set_iopl_mask |
6336 | +static inline void xen_set_debugreg(int regno, unsigned long value) |
6337 | +{ |
6338 | + WARN_ON(HYPERVISOR_set_debugreg(regno, value)); |
6339 | +} |
6340 | |
6341 | /* |
6342 | * Set IOPL bits in EFLAGS from given mask |
6343 | @@ -597,6 +547,21 @@ |
6344 | } |
6345 | |
6346 | |
6347 | +#define paravirt_enabled() 0 |
6348 | +#define __cpuid xen_cpuid |
6349 | + |
6350 | +#define load_esp0 xen_load_esp0 |
6351 | + |
6352 | +/* |
6353 | + * These special macros can be used to get or set a debugging register |
6354 | + */ |
6355 | +#define get_debugreg(var, register) \ |
6356 | + (var) = xen_get_debugreg(register) |
6357 | +#define set_debugreg(value, register) \ |
6358 | + xen_set_debugreg(register, value) |
6359 | + |
6360 | +#define set_iopl_mask xen_set_iopl_mask |
6361 | + |
6362 | /* |
6363 | * Generic CPUID function |
6364 | * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx |
6365 | @@ -749,8 +714,14 @@ |
6366 | extern void enable_sep_cpu(void); |
6367 | extern int sysenter_setup(void); |
6368 | |
6369 | -extern int init_gdt(int cpu, struct task_struct *idle); |
6370 | +/* Defined in head.S */ |
6371 | +extern struct Xgt_desc_struct early_gdt_descr; |
6372 | + |
6373 | extern void cpu_set_gdt(int); |
6374 | -extern void secondary_cpu_init(void); |
6375 | +extern void switch_to_new_gdt(void); |
6376 | +extern void cpu_init(void); |
6377 | +extern void init_gdt(int cpu); |
6378 | + |
6379 | +extern int force_mwait; |
6380 | |
6381 | #endif /* __ASM_I386_PROCESSOR_H */ |
6382 | --- a/include/asm-x86/mach-xen/asm/processor_64.h |
6383 | +++ b/include/asm-x86/mach-xen/asm/processor_64.h |
6384 | @@ -20,6 +20,7 @@ |
6385 | #include <asm/percpu.h> |
6386 | #include <linux/personality.h> |
6387 | #include <linux/cpumask.h> |
6388 | +#include <asm/processor-flags.h> |
6389 | |
6390 | #define TF_MASK 0x00000100 |
6391 | #define IF_MASK 0x00000200 |
6392 | @@ -103,42 +104,6 @@ |
6393 | extern unsigned short num_cache_leaves; |
6394 | |
6395 | /* |
6396 | - * EFLAGS bits |
6397 | - */ |
6398 | -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ |
6399 | -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ |
6400 | -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ |
6401 | -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ |
6402 | -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ |
6403 | -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ |
6404 | -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ |
6405 | -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ |
6406 | -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ |
6407 | -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ |
6408 | -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ |
6409 | -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ |
6410 | -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ |
6411 | -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ |
6412 | -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ |
6413 | -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ |
6414 | -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ |
6415 | - |
6416 | -/* |
6417 | - * Intel CPU features in CR4 |
6418 | - */ |
6419 | -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ |
6420 | -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ |
6421 | -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ |
6422 | -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ |
6423 | -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ |
6424 | -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ |
6425 | -#define X86_CR4_MCE 0x0040 /* Machine check enable */ |
6426 | -#define X86_CR4_PGE 0x0080 /* enable global pages */ |
6427 | -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ |
6428 | -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ |
6429 | -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ |
6430 | - |
6431 | -/* |
6432 | * Save the cr4 feature set we're using (ie |
6433 | * Pentium 4MB enable and PPro Global page |
6434 | * enable), so that any CPU's that boot up |
6435 | @@ -203,7 +168,7 @@ |
6436 | u32 mxcsr; |
6437 | u32 mxcsr_mask; |
6438 | u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ |
6439 | - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ |
6440 | + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ |
6441 | u32 padding[24]; |
6442 | } __attribute__ ((aligned (16))); |
6443 | |
6444 | @@ -436,22 +401,6 @@ |
6445 | #define cpu_relax() rep_nop() |
6446 | |
6447 | /* |
6448 | - * NSC/Cyrix CPU configuration register indexes |
6449 | - */ |
6450 | -#define CX86_CCR0 0xc0 |
6451 | -#define CX86_CCR1 0xc1 |
6452 | -#define CX86_CCR2 0xc2 |
6453 | -#define CX86_CCR3 0xc3 |
6454 | -#define CX86_CCR4 0xe8 |
6455 | -#define CX86_CCR5 0xe9 |
6456 | -#define CX86_CCR6 0xea |
6457 | -#define CX86_CCR7 0xeb |
6458 | -#define CX86_DIR0 0xfe |
6459 | -#define CX86_DIR1 0xff |
6460 | -#define CX86_ARR_BASE 0xc4 |
6461 | -#define CX86_RCR_BASE 0xdc |
6462 | - |
6463 | -/* |
6464 | * NSC/Cyrix CPU indexed register access macros |
6465 | */ |
6466 | |
6467 | --- a/include/asm-x86/mach-xen/asm/scatterlist_32.h |
6468 | +++ b/include/asm-x86/mach-xen/asm/scatterlist_32.h |
6469 | @@ -1,6 +1,8 @@ |
6470 | #ifndef _I386_SCATTERLIST_H |
6471 | #define _I386_SCATTERLIST_H |
6472 | |
6473 | +#include <asm/types.h> |
6474 | + |
6475 | struct scatterlist { |
6476 | struct page *page; |
6477 | unsigned int offset; |
6478 | --- a/include/asm-x86/mach-xen/asm/segment_32.h |
6479 | +++ b/include/asm-x86/mach-xen/asm/segment_32.h |
6480 | @@ -39,7 +39,7 @@ |
6481 | * 25 - APM BIOS support |
6482 | * |
6483 | * 26 - ESPFIX small SS |
6484 | - * 27 - PDA [ per-cpu private data area ] |
6485 | + * 27 - per-cpu [ offset to per-cpu data area ] |
6486 | * 28 - unused |
6487 | * 29 - unused |
6488 | * 30 - unused |
6489 | @@ -74,8 +74,12 @@ |
6490 | #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) |
6491 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) |
6492 | |
6493 | -#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) |
6494 | -#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) |
6495 | +#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) |
6496 | +#ifdef CONFIG_SMP |
6497 | +#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) |
6498 | +#else |
6499 | +#define __KERNEL_PERCPU 0 |
6500 | +#endif |
6501 | |
6502 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 |
6503 | |
6504 | --- a/include/asm-x86/mach-xen/asm/smp_32.h |
6505 | +++ b/include/asm-x86/mach-xen/asm/smp_32.h |
6506 | @@ -8,19 +8,15 @@ |
6507 | #include <linux/kernel.h> |
6508 | #include <linux/threads.h> |
6509 | #include <linux/cpumask.h> |
6510 | -#include <asm/pda.h> |
6511 | #endif |
6512 | |
6513 | -#ifdef CONFIG_X86_LOCAL_APIC |
6514 | -#ifndef __ASSEMBLY__ |
6515 | -#include <asm/fixmap.h> |
6516 | +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) |
6517 | #include <asm/bitops.h> |
6518 | #include <asm/mpspec.h> |
6519 | +#include <asm/apic.h> |
6520 | #ifdef CONFIG_X86_IO_APIC |
6521 | #include <asm/io_apic.h> |
6522 | #endif |
6523 | -#include <asm/apic.h> |
6524 | -#endif |
6525 | #endif |
6526 | |
6527 | #define BAD_APICID 0xFFu |
6528 | @@ -52,9 +48,76 @@ |
6529 | extern void cpu_uninit(void); |
6530 | #endif |
6531 | |
6532 | -#ifndef CONFIG_PARAVIRT |
6533 | +#ifndef CONFIG_XEN |
6534 | +struct smp_ops |
6535 | +{ |
6536 | + void (*smp_prepare_boot_cpu)(void); |
6537 | + void (*smp_prepare_cpus)(unsigned max_cpus); |
6538 | + int (*cpu_up)(unsigned cpu); |
6539 | + void (*smp_cpus_done)(unsigned max_cpus); |
6540 | + |
6541 | + void (*smp_send_stop)(void); |
6542 | + void (*smp_send_reschedule)(int cpu); |
6543 | + int (*smp_call_function_mask)(cpumask_t mask, |
6544 | + void (*func)(void *info), void *info, |
6545 | + int wait); |
6546 | +}; |
6547 | + |
6548 | +extern struct smp_ops smp_ops; |
6549 | + |
6550 | +static inline void smp_prepare_boot_cpu(void) |
6551 | +{ |
6552 | + smp_ops.smp_prepare_boot_cpu(); |
6553 | +} |
6554 | +static inline void smp_prepare_cpus(unsigned int max_cpus) |
6555 | +{ |
6556 | + smp_ops.smp_prepare_cpus(max_cpus); |
6557 | +} |
6558 | +static inline int __cpu_up(unsigned int cpu) |
6559 | +{ |
6560 | + return smp_ops.cpu_up(cpu); |
6561 | +} |
6562 | +static inline void smp_cpus_done(unsigned int max_cpus) |
6563 | +{ |
6564 | + smp_ops.smp_cpus_done(max_cpus); |
6565 | +} |
6566 | + |
6567 | +static inline void smp_send_stop(void) |
6568 | +{ |
6569 | + smp_ops.smp_send_stop(); |
6570 | +} |
6571 | +static inline void smp_send_reschedule(int cpu) |
6572 | +{ |
6573 | + smp_ops.smp_send_reschedule(cpu); |
6574 | +} |
6575 | +static inline int smp_call_function_mask(cpumask_t mask, |
6576 | + void (*func) (void *info), void *info, |
6577 | + int wait) |
6578 | +{ |
6579 | + return smp_ops.smp_call_function_mask(mask, func, info, wait); |
6580 | +} |
6581 | + |
6582 | +void native_smp_prepare_boot_cpu(void); |
6583 | +void native_smp_prepare_cpus(unsigned int max_cpus); |
6584 | +int native_cpu_up(unsigned int cpunum); |
6585 | +void native_smp_cpus_done(unsigned int max_cpus); |
6586 | + |
6587 | #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ |
6588 | do { } while (0) |
6589 | + |
6590 | +#else |
6591 | + |
6592 | + |
6593 | +void xen_smp_send_stop(void); |
6594 | +void xen_smp_send_reschedule(int cpu); |
6595 | +int xen_smp_call_function_mask(cpumask_t mask, |
6596 | + void (*func) (void *info), void *info, |
6597 | + int wait); |
6598 | + |
6599 | +#define smp_send_stop xen_smp_send_stop |
6600 | +#define smp_send_reschedule xen_smp_send_reschedule |
6601 | +#define smp_call_function_mask xen_smp_call_function_mask |
6602 | + |
6603 | #endif |
6604 | |
6605 | /* |
6606 | @@ -62,7 +125,8 @@ |
6607 | * from the initial startup. We map APIC_BASE very early in page_setup(), |
6608 | * so this is correct in the x86 case. |
6609 | */ |
6610 | -#define raw_smp_processor_id() (read_pda(cpu_number)) |
6611 | +DECLARE_PER_CPU(int, cpu_number); |
6612 | +#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) |
6613 | |
6614 | extern cpumask_t cpu_possible_map; |
6615 | #define cpu_callin_map cpu_possible_map |
6616 | @@ -73,20 +137,6 @@ |
6617 | return cpus_weight(cpu_possible_map); |
6618 | } |
6619 | |
6620 | -#ifdef CONFIG_X86_LOCAL_APIC |
6621 | - |
6622 | -#ifdef APIC_DEFINITION |
6623 | -extern int hard_smp_processor_id(void); |
6624 | -#else |
6625 | -#include <mach_apicdef.h> |
6626 | -static inline int hard_smp_processor_id(void) |
6627 | -{ |
6628 | - /* we don't want to mark this access volatile - bad code generation */ |
6629 | - return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); |
6630 | -} |
6631 | -#endif |
6632 | -#endif |
6633 | - |
6634 | #define safe_smp_processor_id() smp_processor_id() |
6635 | extern int __cpu_disable(void); |
6636 | extern void __cpu_die(unsigned int cpu); |
6637 | @@ -102,10 +152,31 @@ |
6638 | |
6639 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
6640 | |
6641 | -#endif |
6642 | +#endif /* CONFIG_SMP */ |
6643 | |
6644 | #ifndef __ASSEMBLY__ |
6645 | |
6646 | +#ifdef CONFIG_X86_LOCAL_APIC |
6647 | + |
6648 | +#ifdef APIC_DEFINITION |
6649 | +extern int hard_smp_processor_id(void); |
6650 | +#else |
6651 | +#include <mach_apicdef.h> |
6652 | +static inline int hard_smp_processor_id(void) |
6653 | +{ |
6654 | + /* we don't want to mark this access volatile - bad code generation */ |
6655 | + return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); |
6656 | +} |
6657 | +#endif /* APIC_DEFINITION */ |
6658 | + |
6659 | +#else /* CONFIG_X86_LOCAL_APIC */ |
6660 | + |
6661 | +#ifndef CONFIG_SMP |
6662 | +#define hard_smp_processor_id() 0 |
6663 | +#endif |
6664 | + |
6665 | +#endif /* CONFIG_X86_LOCAL_APIC */ |
6666 | + |
6667 | extern u8 apicid_2_node[]; |
6668 | |
6669 | #ifdef CONFIG_X86_LOCAL_APIC |
6670 | --- a/include/asm-x86/mach-xen/asm/smp_64.h |
6671 | +++ b/include/asm-x86/mach-xen/asm/smp_64.h |
6672 | @@ -11,12 +11,11 @@ |
6673 | extern int disable_apic; |
6674 | |
6675 | #ifdef CONFIG_X86_LOCAL_APIC |
6676 | -#include <asm/fixmap.h> |
6677 | #include <asm/mpspec.h> |
6678 | +#include <asm/apic.h> |
6679 | #ifdef CONFIG_X86_IO_APIC |
6680 | #include <asm/io_apic.h> |
6681 | #endif |
6682 | -#include <asm/apic.h> |
6683 | #include <asm/thread_info.h> |
6684 | #endif |
6685 | |
6686 | @@ -41,7 +40,6 @@ |
6687 | extern void unlock_ipi_call_lock(void); |
6688 | extern int smp_num_siblings; |
6689 | extern void smp_send_reschedule(int cpu); |
6690 | -void smp_stop_cpu(void); |
6691 | |
6692 | extern cpumask_t cpu_sibling_map[NR_CPUS]; |
6693 | extern cpumask_t cpu_core_map[NR_CPUS]; |
6694 | @@ -62,14 +60,6 @@ |
6695 | |
6696 | #define raw_smp_processor_id() read_pda(cpunumber) |
6697 | |
6698 | -#ifdef CONFIG_X86_LOCAL_APIC |
6699 | -static inline int hard_smp_processor_id(void) |
6700 | -{ |
6701 | - /* we don't want to mark this access volatile - bad code generation */ |
6702 | - return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); |
6703 | -} |
6704 | -#endif |
6705 | - |
6706 | extern int __cpu_disable(void); |
6707 | extern void __cpu_die(unsigned int cpu); |
6708 | extern void prefill_possible_map(void); |
6709 | @@ -78,6 +68,14 @@ |
6710 | |
6711 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
6712 | |
6713 | +#endif /* CONFIG_SMP */ |
6714 | + |
6715 | +#ifdef CONFIG_X86_LOCAL_APIC |
6716 | +static inline int hard_smp_processor_id(void) |
6717 | +{ |
6718 | + /* we don't want to mark this access volatile - bad code generation */ |
6719 | + return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); |
6720 | +} |
6721 | #endif |
6722 | |
6723 | /* |
6724 | --- a/include/asm-x86/mach-xen/asm/system_32.h |
6725 | +++ b/include/asm-x86/mach-xen/asm/system_32.h |
6726 | @@ -4,7 +4,7 @@ |
6727 | #include <linux/kernel.h> |
6728 | #include <asm/segment.h> |
6729 | #include <asm/cpufeature.h> |
6730 | -#include <linux/bitops.h> /* for LOCK_PREFIX */ |
6731 | +#include <asm/cmpxchg.h> |
6732 | #include <asm/synch_bitops.h> |
6733 | #include <asm/hypervisor.h> |
6734 | |
6735 | @@ -90,308 +90,102 @@ |
6736 | #define savesegment(seg, value) \ |
6737 | asm volatile("mov %%" #seg ",%0":"=rm" (value)) |
6738 | |
6739 | -#define read_cr0() ({ \ |
6740 | - unsigned int __dummy; \ |
6741 | - __asm__ __volatile__( \ |
6742 | - "movl %%cr0,%0\n\t" \ |
6743 | - :"=r" (__dummy)); \ |
6744 | - __dummy; \ |
6745 | -}) |
6746 | -#define write_cr0(x) \ |
6747 | - __asm__ __volatile__("movl %0,%%cr0": :"r" (x)) |
6748 | - |
6749 | -#define read_cr2() (current_vcpu_info()->arch.cr2) |
6750 | -#define write_cr2(x) \ |
6751 | - __asm__ __volatile__("movl %0,%%cr2": :"r" (x)) |
6752 | - |
6753 | -#define read_cr3() ({ \ |
6754 | - unsigned int __dummy; \ |
6755 | - __asm__ ( \ |
6756 | - "movl %%cr3,%0\n\t" \ |
6757 | - :"=r" (__dummy)); \ |
6758 | - __dummy = xen_cr3_to_pfn(__dummy); \ |
6759 | - mfn_to_pfn(__dummy) << PAGE_SHIFT; \ |
6760 | -}) |
6761 | -#define write_cr3(x) ({ \ |
6762 | - unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \ |
6763 | - __dummy = xen_pfn_to_cr3(__dummy); \ |
6764 | - __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \ |
6765 | -}) |
6766 | -#define read_cr4() ({ \ |
6767 | - unsigned int __dummy; \ |
6768 | - __asm__( \ |
6769 | - "movl %%cr4,%0\n\t" \ |
6770 | - :"=r" (__dummy)); \ |
6771 | - __dummy; \ |
6772 | -}) |
6773 | -#define read_cr4_safe() ({ \ |
6774 | - unsigned int __dummy; \ |
6775 | - /* This could fault if %cr4 does not exist */ \ |
6776 | - __asm__("1: movl %%cr4, %0 \n" \ |
6777 | - "2: \n" \ |
6778 | - ".section __ex_table,\"a\" \n" \ |
6779 | - ".long 1b,2b \n" \ |
6780 | - ".previous \n" \ |
6781 | - : "=r" (__dummy): "0" (0)); \ |
6782 | - __dummy; \ |
6783 | -}) |
6784 | - |
6785 | -#define write_cr4(x) \ |
6786 | - __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) |
6787 | - |
6788 | -#define wbinvd() \ |
6789 | - __asm__ __volatile__ ("wbinvd": : :"memory") |
6790 | - |
6791 | -/* Clear the 'TS' bit */ |
6792 | -#define clts() (HYPERVISOR_fpu_taskswitch(0)) |
6793 | - |
6794 | -/* Set the 'TS' bit */ |
6795 | -#define stts() (HYPERVISOR_fpu_taskswitch(1)) |
6796 | - |
6797 | -#endif /* __KERNEL__ */ |
6798 | - |
6799 | -static inline unsigned long get_limit(unsigned long segment) |
6800 | +static inline void xen_clts(void) |
6801 | { |
6802 | - unsigned long __limit; |
6803 | - __asm__("lsll %1,%0" |
6804 | - :"=r" (__limit):"r" (segment)); |
6805 | - return __limit+1; |
6806 | + HYPERVISOR_fpu_taskswitch(0); |
6807 | } |
6808 | |
6809 | -#define nop() __asm__ __volatile__ ("nop") |
6810 | - |
6811 | -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) |
6812 | - |
6813 | -#define tas(ptr) (xchg((ptr),1)) |
6814 | - |
6815 | -struct __xchg_dummy { unsigned long a[100]; }; |
6816 | -#define __xg(x) ((struct __xchg_dummy *)(x)) |
6817 | +static inline unsigned long xen_read_cr0(void) |
6818 | +{ |
6819 | + unsigned long val; |
6820 | + asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); |
6821 | + return val; |
6822 | +} |
6823 | |
6824 | +static inline void xen_write_cr0(unsigned long val) |
6825 | +{ |
6826 | + asm volatile("movl %0,%%cr0": :"r" (val)); |
6827 | +} |
6828 | |
6829 | -#ifdef CONFIG_X86_CMPXCHG64 |
6830 | +#define xen_read_cr2() (current_vcpu_info()->arch.cr2) |
6831 | |
6832 | -/* |
6833 | - * The semantics of XCHGCMP8B are a bit strange, this is why |
6834 | - * there is a loop and the loading of %%eax and %%edx has to |
6835 | - * be inside. This inlines well in most cases, the cached |
6836 | - * cost is around ~38 cycles. (in the future we might want |
6837 | - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that |
6838 | - * might have an implicit FPU-save as a cost, so it's not |
6839 | - * clear which path to go.) |
6840 | - * |
6841 | - * cmpxchg8b must be used with the lock prefix here to allow |
6842 | - * the instruction to be executed atomically, see page 3-102 |
6843 | - * of the instruction set reference 24319102.pdf. We need |
6844 | - * the reader side to see the coherent 64bit value. |
6845 | - */ |
6846 | -static inline void __set_64bit (unsigned long long * ptr, |
6847 | - unsigned int low, unsigned int high) |
6848 | +static inline void xen_write_cr2(unsigned long val) |
6849 | { |
6850 | - __asm__ __volatile__ ( |
6851 | - "\n1:\t" |
6852 | - "movl (%0), %%eax\n\t" |
6853 | - "movl 4(%0), %%edx\n\t" |
6854 | - "lock cmpxchg8b (%0)\n\t" |
6855 | - "jnz 1b" |
6856 | - : /* no outputs */ |
6857 | - : "D"(ptr), |
6858 | - "b"(low), |
6859 | - "c"(high) |
6860 | - : "ax","dx","memory"); |
6861 | + asm volatile("movl %0,%%cr2": :"r" (val)); |
6862 | } |
6863 | |
6864 | -static inline void __set_64bit_constant (unsigned long long *ptr, |
6865 | - unsigned long long value) |
6866 | +static inline unsigned long xen_read_cr3(void) |
6867 | { |
6868 | - __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); |
6869 | + unsigned long val; |
6870 | + asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); |
6871 | + return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT; |
6872 | } |
6873 | -#define ll_low(x) *(((unsigned int*)&(x))+0) |
6874 | -#define ll_high(x) *(((unsigned int*)&(x))+1) |
6875 | |
6876 | -static inline void __set_64bit_var (unsigned long long *ptr, |
6877 | - unsigned long long value) |
6878 | +static inline void xen_write_cr3(unsigned long val) |
6879 | { |
6880 | - __set_64bit(ptr,ll_low(value), ll_high(value)); |
6881 | + val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT)); |
6882 | + asm volatile("movl %0,%%cr3": :"r" (val)); |
6883 | } |
6884 | |
6885 | -#define set_64bit(ptr,value) \ |
6886 | -(__builtin_constant_p(value) ? \ |
6887 | - __set_64bit_constant(ptr, value) : \ |
6888 | - __set_64bit_var(ptr, value) ) |
6889 | +static inline unsigned long xen_read_cr4(void) |
6890 | +{ |
6891 | + unsigned long val; |
6892 | + asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); |
6893 | + return val; |
6894 | +} |
6895 | |
6896 | -#define _set_64bit(ptr,value) \ |
6897 | -(__builtin_constant_p(value) ? \ |
6898 | - __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ |
6899 | - __set_64bit(ptr, ll_low(value), ll_high(value)) ) |
6900 | +static inline unsigned long xen_read_cr4_safe(void) |
6901 | +{ |
6902 | + unsigned long val; |
6903 | + /* This could fault if %cr4 does not exist */ |
6904 | + asm("1: movl %%cr4, %0 \n" |
6905 | + "2: \n" |
6906 | + ".section __ex_table,\"a\" \n" |
6907 | + ".long 1b,2b \n" |
6908 | + ".previous \n" |
6909 | + : "=r" (val): "0" (0)); |
6910 | + return val; |
6911 | +} |
6912 | |
6913 | -#endif |
6914 | +static inline void xen_write_cr4(unsigned long val) |
6915 | +{ |
6916 | + asm volatile("movl %0,%%cr4": :"r" (val)); |
6917 | +} |
6918 | |
6919 | -/* |
6920 | - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway |
6921 | - * Note 2: xchg has side effect, so that attribute volatile is necessary, |
6922 | - * but generally the primitive is invalid, *ptr is output argument. --ANK |
6923 | - */ |
6924 | -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) |
6925 | +static inline void xen_wbinvd(void) |
6926 | { |
6927 | - switch (size) { |
6928 | - case 1: |
6929 | - __asm__ __volatile__("xchgb %b0,%1" |
6930 | - :"=q" (x) |
6931 | - :"m" (*__xg(ptr)), "0" (x) |
6932 | - :"memory"); |
6933 | - break; |
6934 | - case 2: |
6935 | - __asm__ __volatile__("xchgw %w0,%1" |
6936 | - :"=r" (x) |
6937 | - :"m" (*__xg(ptr)), "0" (x) |
6938 | - :"memory"); |
6939 | - break; |
6940 | - case 4: |
6941 | - __asm__ __volatile__("xchgl %0,%1" |
6942 | - :"=r" (x) |
6943 | - :"m" (*__xg(ptr)), "0" (x) |
6944 | - :"memory"); |
6945 | - break; |
6946 | - } |
6947 | - return x; |
6948 | + asm volatile("wbinvd": : :"memory"); |
6949 | } |
6950 | |
6951 | -/* |
6952 | - * Atomic compare and exchange. Compare OLD with MEM, if identical, |
6953 | - * store NEW in MEM. Return the initial value in MEM. Success is |
6954 | - * indicated by comparing RETURN with OLD. |
6955 | - */ |
6956 | +#define read_cr0() (xen_read_cr0()) |
6957 | +#define write_cr0(x) (xen_write_cr0(x)) |
6958 | +#define read_cr2() (xen_read_cr2()) |
6959 | +#define write_cr2(x) (xen_write_cr2(x)) |
6960 | +#define read_cr3() (xen_read_cr3()) |
6961 | +#define write_cr3(x) (xen_write_cr3(x)) |
6962 | +#define read_cr4() (xen_read_cr4()) |
6963 | +#define read_cr4_safe() (xen_read_cr4_safe()) |
6964 | +#define write_cr4(x) (xen_write_cr4(x)) |
6965 | +#define wbinvd() (xen_wbinvd()) |
6966 | |
6967 | -#ifdef CONFIG_X86_CMPXCHG |
6968 | -#define __HAVE_ARCH_CMPXCHG 1 |
6969 | -#define cmpxchg(ptr,o,n)\ |
6970 | - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ |
6971 | - (unsigned long)(n),sizeof(*(ptr)))) |
6972 | -#define sync_cmpxchg(ptr,o,n)\ |
6973 | - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ |
6974 | - (unsigned long)(n),sizeof(*(ptr)))) |
6975 | -#endif |
6976 | - |
6977 | -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, |
6978 | - unsigned long new, int size) |
6979 | -{ |
6980 | - unsigned long prev; |
6981 | - switch (size) { |
6982 | - case 1: |
6983 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" |
6984 | - : "=a"(prev) |
6985 | - : "q"(new), "m"(*__xg(ptr)), "0"(old) |
6986 | - : "memory"); |
6987 | - return prev; |
6988 | - case 2: |
6989 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" |
6990 | - : "=a"(prev) |
6991 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) |
6992 | - : "memory"); |
6993 | - return prev; |
6994 | - case 4: |
6995 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" |
6996 | - : "=a"(prev) |
6997 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) |
6998 | - : "memory"); |
6999 | - return prev; |
7000 | - } |
7001 | - return old; |
7002 | -} |
7003 | +/* Clear the 'TS' bit */ |
7004 | +#define clts() (xen_clts()) |
7005 | |
7006 | -/* |
7007 | - * Always use locked operations when touching memory shared with a |
7008 | - * hypervisor, since the system may be SMP even if the guest kernel |
7009 | - * isn't. |
7010 | - */ |
7011 | -static inline unsigned long __sync_cmpxchg(volatile void *ptr, |
7012 | - unsigned long old, |
7013 | - unsigned long new, int size) |
7014 | -{ |
7015 | - unsigned long prev; |
7016 | - switch (size) { |
7017 | - case 1: |
7018 | - __asm__ __volatile__("lock; cmpxchgb %b1,%2" |
7019 | - : "=a"(prev) |
7020 | - : "q"(new), "m"(*__xg(ptr)), "0"(old) |
7021 | - : "memory"); |
7022 | - return prev; |
7023 | - case 2: |
7024 | - __asm__ __volatile__("lock; cmpxchgw %w1,%2" |
7025 | - : "=a"(prev) |
7026 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) |
7027 | - : "memory"); |
7028 | - return prev; |
7029 | - case 4: |
7030 | - __asm__ __volatile__("lock; cmpxchgl %1,%2" |
7031 | - : "=a"(prev) |
7032 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) |
7033 | - : "memory"); |
7034 | - return prev; |
7035 | - } |
7036 | - return old; |
7037 | -} |
7038 | +/* Set the 'TS' bit */ |
7039 | +#define stts() (HYPERVISOR_fpu_taskswitch(1)) |
7040 | |
7041 | -#ifndef CONFIG_X86_CMPXCHG |
7042 | -/* |
7043 | - * Building a kernel capable running on 80386. It may be necessary to |
7044 | - * simulate the cmpxchg on the 80386 CPU. For that purpose we define |
7045 | - * a function for each of the sizes we support. |
7046 | - */ |
7047 | +#endif /* __KERNEL__ */ |
7048 | |
7049 | -extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8); |
7050 | -extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16); |
7051 | -extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32); |
7052 | - |
7053 | -static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, |
7054 | - unsigned long new, int size) |
7055 | -{ |
7056 | - switch (size) { |
7057 | - case 1: |
7058 | - return cmpxchg_386_u8(ptr, old, new); |
7059 | - case 2: |
7060 | - return cmpxchg_386_u16(ptr, old, new); |
7061 | - case 4: |
7062 | - return cmpxchg_386_u32(ptr, old, new); |
7063 | - } |
7064 | - return old; |
7065 | -} |
7066 | - |
7067 | -#define cmpxchg(ptr,o,n) \ |
7068 | -({ \ |
7069 | - __typeof__(*(ptr)) __ret; \ |
7070 | - if (likely(boot_cpu_data.x86 > 3)) \ |
7071 | - __ret = __cmpxchg((ptr), (unsigned long)(o), \ |
7072 | - (unsigned long)(n), sizeof(*(ptr))); \ |
7073 | - else \ |
7074 | - __ret = cmpxchg_386((ptr), (unsigned long)(o), \ |
7075 | - (unsigned long)(n), sizeof(*(ptr))); \ |
7076 | - __ret; \ |
7077 | -}) |
7078 | -#endif |
7079 | - |
7080 | -#ifdef CONFIG_X86_CMPXCHG64 |
7081 | - |
7082 | -static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, |
7083 | - unsigned long long new) |
7084 | -{ |
7085 | - unsigned long long prev; |
7086 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" |
7087 | - : "=A"(prev) |
7088 | - : "b"((unsigned long)new), |
7089 | - "c"((unsigned long)(new >> 32)), |
7090 | - "m"(*__xg(ptr)), |
7091 | - "0"(old) |
7092 | - : "memory"); |
7093 | - return prev; |
7094 | -} |
7095 | - |
7096 | -#define cmpxchg64(ptr,o,n)\ |
7097 | - ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ |
7098 | - (unsigned long long)(n))) |
7099 | +static inline unsigned long get_limit(unsigned long segment) |
7100 | +{ |
7101 | + unsigned long __limit; |
7102 | + __asm__("lsll %1,%0" |
7103 | + :"=r" (__limit):"r" (segment)); |
7104 | + return __limit+1; |
7105 | +} |
7106 | + |
7107 | +#define nop() __asm__ __volatile__ ("nop") |
7108 | |
7109 | -#endif |
7110 | - |
7111 | /* |
7112 | * Force strict CPU ordering. |
7113 | * And yes, this is required on UP too when we're talking |
7114 | --- a/include/asm-x86/mach-xen/asm/system_64.h |
7115 | +++ b/include/asm-x86/mach-xen/asm/system_64.h |
7116 | @@ -3,7 +3,7 @@ |
7117 | |
7118 | #include <linux/kernel.h> |
7119 | #include <asm/segment.h> |
7120 | -#include <asm/alternative.h> |
7121 | +#include <asm/cmpxchg.h> |
7122 | |
7123 | #include <asm/synch_bitops.h> |
7124 | #include <asm/hypervisor.h> |
7125 | @@ -43,7 +43,7 @@ |
7126 | [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ |
7127 | [ti_flags] "i" (offsetof(struct thread_info, flags)),\ |
7128 | [tif_fork] "i" (TIF_FORK), \ |
7129 | - [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ |
7130 | + [thread_info] "i" (offsetof(struct task_struct, stack)), \ |
7131 | [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ |
7132 | : "memory", "cc" __EXTRA_CLOBBER) |
7133 | |
7134 | @@ -92,6 +92,12 @@ |
7135 | machine_to_phys(__dummy); \ |
7136 | }) |
7137 | |
7138 | +static inline void write_cr3(unsigned long val) |
7139 | +{ |
7140 | + val = phys_to_machine(val); |
7141 | + asm volatile("movq %0,%%cr3" :: "r" (val) : "memory"); |
7142 | +} |
7143 | + |
7144 | static inline unsigned long read_cr4(void) |
7145 | { |
7146 | unsigned long cr4; |
7147 | @@ -101,7 +107,7 @@ |
7148 | |
7149 | static inline void write_cr4(unsigned long val) |
7150 | { |
7151 | - asm volatile("movq %0,%%cr4" :: "r" (val)); |
7152 | + asm volatile("movq %0,%%cr4" :: "r" (val) : "memory"); |
7153 | } |
7154 | |
7155 | #define stts() (HYPERVISOR_fpu_taskswitch(1)) |
7156 | @@ -122,100 +128,6 @@ |
7157 | |
7158 | #define nop() __asm__ __volatile__ ("nop") |
7159 | |
7160 | -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) |
7161 | - |
7162 | -#define tas(ptr) (xchg((ptr),1)) |
7163 | - |
7164 | -#define __xg(x) ((volatile long *)(x)) |
7165 | - |
7166 | -static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) |
7167 | -{ |
7168 | - *ptr = val; |
7169 | -} |
7170 | - |
7171 | -#define _set_64bit set_64bit |
7172 | - |
7173 | -/* |
7174 | - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway |
7175 | - * Note 2: xchg has side effect, so that attribute volatile is necessary, |
7176 | - * but generally the primitive is invalid, *ptr is output argument. --ANK |
7177 | - */ |
7178 | -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) |
7179 | -{ |
7180 | - switch (size) { |
7181 | - case 1: |
7182 | - __asm__ __volatile__("xchgb %b0,%1" |
7183 | - :"=q" (x) |
7184 | - :"m" (*__xg(ptr)), "0" (x) |
7185 | - :"memory"); |
7186 | - break; |
7187 | - case 2: |
7188 | - __asm__ __volatile__("xchgw %w0,%1" |
7189 | - :"=r" (x) |
7190 | - :"m" (*__xg(ptr)), "0" (x) |
7191 | - :"memory"); |
7192 | - break; |
7193 | - case 4: |
7194 | - __asm__ __volatile__("xchgl %k0,%1" |
7195 | - :"=r" (x) |
7196 | - :"m" (*__xg(ptr)), "0" (x) |
7197 | - :"memory"); |
7198 | - break; |
7199 | - case 8: |
7200 | - __asm__ __volatile__("xchgq %0,%1" |
7201 | - :"=r" (x) |
7202 | - :"m" (*__xg(ptr)), "0" (x) |
7203 | - :"memory"); |
7204 | - break; |
7205 | - } |
7206 | - return x; |
7207 | -} |
7208 | - |
7209 | -/* |
7210 | - * Atomic compare and exchange. Compare OLD with MEM, if identical, |
7211 | - * store NEW in MEM. Return the initial value in MEM. Success is |
7212 | - * indicated by comparing RETURN with OLD. |
7213 | - */ |
7214 | - |
7215 | -#define __HAVE_ARCH_CMPXCHG 1 |
7216 | - |
7217 | -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, |
7218 | - unsigned long new, int size) |
7219 | -{ |
7220 | - unsigned long prev; |
7221 | - switch (size) { |
7222 | - case 1: |
7223 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" |
7224 | - : "=a"(prev) |
7225 | - : "q"(new), "m"(*__xg(ptr)), "0"(old) |
7226 | - : "memory"); |
7227 | - return prev; |
7228 | - case 2: |
7229 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" |
7230 | - : "=a"(prev) |
7231 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) |
7232 | - : "memory"); |
7233 | - return prev; |
7234 | - case 4: |
7235 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2" |
7236 | - : "=a"(prev) |
7237 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) |
7238 | - : "memory"); |
7239 | - return prev; |
7240 | - case 8: |
7241 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2" |
7242 | - : "=a"(prev) |
7243 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) |
7244 | - : "memory"); |
7245 | - return prev; |
7246 | - } |
7247 | - return old; |
7248 | -} |
7249 | - |
7250 | -#define cmpxchg(ptr,o,n)\ |
7251 | - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ |
7252 | - (unsigned long)(n),sizeof(*(ptr)))) |
7253 | - |
7254 | #ifdef CONFIG_SMP |
7255 | #define smp_mb() mb() |
7256 | #define smp_rmb() rmb() |
7257 | --- a/include/asm-x86/mach-xen/asm/tlbflush_32.h |
7258 | +++ b/include/asm-x86/mach-xen/asm/tlbflush_32.h |
7259 | @@ -29,8 +29,13 @@ |
7260 | * and page-granular flushes are available only on i486 and up. |
7261 | */ |
7262 | |
7263 | +#define TLB_FLUSH_ALL 0xffffffff |
7264 | + |
7265 | + |
7266 | #ifndef CONFIG_SMP |
7267 | |
7268 | +#include <linux/sched.h> |
7269 | + |
7270 | #define flush_tlb() __flush_tlb() |
7271 | #define flush_tlb_all() __flush_tlb_all() |
7272 | #define local_flush_tlb() __flush_tlb() |
7273 | @@ -55,7 +60,7 @@ |
7274 | __flush_tlb(); |
7275 | } |
7276 | |
7277 | -#else |
7278 | +#else /* SMP */ |
7279 | |
7280 | #include <asm/smp.h> |
7281 | |
7282 | @@ -84,9 +89,7 @@ |
7283 | char __cacheline_padding[L1_CACHE_BYTES-8]; |
7284 | }; |
7285 | DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); |
7286 | - |
7287 | - |
7288 | -#endif |
7289 | +#endif /* SMP */ |
7290 | |
7291 | #define flush_tlb_kernel_range(start, end) flush_tlb_all() |
7292 | |
7293 | --- a/include/asm-x86/mach-xen/asm/tlbflush_64.h |
7294 | +++ b/include/asm-x86/mach-xen/asm/tlbflush_64.h |
7295 | @@ -2,7 +2,9 @@ |
7296 | #define _X8664_TLBFLUSH_H |
7297 | |
7298 | #include <linux/mm.h> |
7299 | +#include <linux/sched.h> |
7300 | #include <asm/processor.h> |
7301 | +#include <asm/system.h> |
7302 | |
7303 | #define __flush_tlb() xen_tlb_flush() |
7304 | |
7305 | --- a/lib/swiotlb-xen.c |
7306 | +++ b/lib/swiotlb-xen.c |
7307 | @@ -729,7 +729,6 @@ |
7308 | return (mask >= ((1UL << dma_bits) - 1)); |
7309 | } |
7310 | |
7311 | -EXPORT_SYMBOL(swiotlb_init); |
7312 | EXPORT_SYMBOL(swiotlb_map_single); |
7313 | EXPORT_SYMBOL(swiotlb_unmap_single); |
7314 | EXPORT_SYMBOL(swiotlb_map_sg); |
7315 | --- a/net/core/dev.c |
7316 | +++ b/net/core/dev.c |
7317 | @@ -1590,12 +1590,17 @@ |
7318 | inline int skb_checksum_setup(struct sk_buff *skb) |
7319 | { |
7320 | if (skb->proto_csum_blank) { |
7321 | + struct iphdr *iph; |
7322 | + unsigned char *th; |
7323 | + |
7324 | if (skb->protocol != htons(ETH_P_IP)) |
7325 | goto out; |
7326 | - skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; |
7327 | - if (skb->h.raw >= skb->tail) |
7328 | + iph = ip_hdr(skb); |
7329 | + th = skb_network_header(skb) + 4 * iph->ihl; |
7330 | + if (th >= skb_tail_pointer(skb)) |
7331 | goto out; |
7332 | - switch (skb->nh.iph->protocol) { |
7333 | + skb->csum_start = th - skb->head; |
7334 | + switch (iph->protocol) { |
7335 | case IPPROTO_TCP: |
7336 | skb->csum_offset = offsetof(struct tcphdr, check); |
7337 | break; |
7338 | @@ -1606,10 +1611,10 @@ |
7339 | if (net_ratelimit()) |
7340 | printk(KERN_ERR "Attempting to checksum a non-" |
7341 | "TCP/UDP packet, dropping a protocol" |
7342 | - " %d packet", skb->nh.iph->protocol); |
7343 | + " %d packet", iph->protocol); |
7344 | goto out; |
7345 | } |
7346 | - if ((skb->h.raw + skb->csum_offset + 2) > skb->tail) |
7347 | + if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) |
7348 | goto out; |
7349 | skb->ip_summed = CHECKSUM_PARTIAL; |
7350 | skb->proto_csum_blank = 0; |
7351 | --- a/scripts/Makefile.xen.awk |
7352 | +++ b/scripts/Makefile.xen.awk |
7353 | @@ -13,7 +13,7 @@ |
7354 | next |
7355 | } |
7356 | |
7357 | -/:[[:space:]]*%\.[cS][[:space:]]/ { |
7358 | +/:[[:space:]]*\$\(src\)\/%\.[cS][[:space:]]/ { |
7359 | line = gensub(/%.([cS])/, "%-xen.\\1", "g", $0) |
7360 | line = gensub(/(single-used-m)/, "xen-\\1", "g", line) |
7361 | print line |