Annotation of /trunk/kernel26-xen/patches-2.6.25-r1/1023-2.6.25-xen-patch-2.6.22.patch
Parent Directory | Revision Log
Revision 613 -
(hide annotations)
(download)
Sat May 24 01:13:37 2008 UTC (16 years, 4 months ago) by niro
File size: 211855 byte(s)
Sat May 24 01:13:37 2008 UTC (16 years, 4 months ago) by niro
File size: 211855 byte(s)
-fixed patch
1 | niro | 609 | From: www.kernel.org |
2 | Subject: Update to 2.6.22 | ||
3 | Patch-mainline: 2.6.22 | ||
4 | |||
5 | Automatically created from "patches.kernel.org/patch-2.6.22" by xen-port-patches.py | ||
6 | |||
7 | Acked-by: jbeulich@novell.com | ||
8 | |||
9 | --- | ||
10 | arch/x86/Kconfig | 5 | ||
11 | arch/x86/ia32/ia32entry-xen.S | 18 - | ||
12 | arch/x86/kernel/Makefile | 2 | ||
13 | arch/x86/kernel/acpi/sleep_64-xen.c | 26 - | ||
14 | arch/x86/kernel/apic_32-xen.c | 1 | ||
15 | arch/x86/kernel/apic_64-xen.c | 1 | ||
16 | arch/x86/kernel/cpu/common-xen.c | 224 ++++--------- | ||
17 | arch/x86/kernel/cpu/mtrr/main-xen.c | 2 | ||
18 | arch/x86/kernel/e820_32-xen.c | 46 +- | ||
19 | arch/x86/kernel/e820_64-xen.c | 28 - | ||
20 | arch/x86/kernel/early_printk-xen.c | 27 - | ||
21 | arch/x86/kernel/entry_32-xen.S | 30 - | ||
22 | arch/x86/kernel/entry_64-xen.S | 7 | ||
23 | arch/x86/kernel/genapic_64-xen.c | 106 +----- | ||
24 | arch/x86/kernel/genapic_xen_64.c | 3 | ||
25 | arch/x86/kernel/head64-xen.c | 32 + | ||
26 | arch/x86/kernel/head_32-xen.S | 101 ------ | ||
27 | arch/x86/kernel/head_64-xen.S | 37 -- | ||
28 | arch/x86/kernel/io_apic_32-xen.c | 43 -- | ||
29 | arch/x86/kernel/io_apic_64-xen.c | 39 -- | ||
30 | arch/x86/kernel/ioport_32-xen.c | 2 | ||
31 | arch/x86/kernel/ioport_64-xen.c | 2 | ||
32 | arch/x86/kernel/irq_32-xen.c | 3 | ||
33 | arch/x86/kernel/irq_64-xen.c | 34 +- | ||
34 | arch/x86/kernel/ldt_32-xen.c | 1 | ||
35 | arch/x86/kernel/ldt_64-xen.c | 1 | ||
36 | arch/x86/kernel/microcode-xen.c | 2 | ||
37 | arch/x86/kernel/mpparse_32-xen.c | 3 | ||
38 | arch/x86/kernel/mpparse_64-xen.c | 3 | ||
39 | arch/x86/kernel/pci-dma_32-xen.c | 29 + | ||
40 | arch/x86/kernel/pci-swiotlb_64-xen.c | 2 | ||
41 | arch/x86/kernel/process_32-xen.c | 27 + | ||
42 | arch/x86/kernel/process_64-xen.c | 16 | ||
43 | arch/x86/kernel/quirks-xen.c | 63 --- | ||
44 | arch/x86/kernel/setup64-xen.c | 17 - | ||
45 | arch/x86/kernel/setup_64-xen.c | 30 - | ||
46 | arch/x86/kernel/smp_32-xen.c | 191 ++++------- | ||
47 | arch/x86/kernel/smp_64-xen.c | 29 - | ||
48 | arch/x86/kernel/time_32-xen.c | 62 +-- | ||
49 | arch/x86/kernel/traps_32-xen.c | 46 +- | ||
50 | arch/x86/kernel/traps_64-xen.c | 55 +-- | ||
51 | arch/x86/kernel/vsyscall_64-xen.c | 73 +++- | ||
52 | arch/x86/mm/fault_32-xen.c | 42 +- | ||
53 | arch/x86/mm/fault_64-xen.c | 15 | ||
54 | arch/x86/mm/highmem_32-xen.c | 14 | ||
55 | arch/x86/mm/init_32-xen.c | 157 ++++++--- | ||
56 | arch/x86/mm/init_64-xen.c | 132 ++++--- | ||
57 | arch/x86/mm/ioremap_32-xen.c | 1 | ||
58 | arch/x86/mm/pageattr_64-xen.c | 27 + | ||
59 | arch/x86/mm/pgtable_32-xen.c | 210 +++++++----- | ||
60 | drivers/char/tpm/tpm_xen.c | 2 | ||
61 | drivers/xen/blkfront/blkfront.c | 2 | ||
62 | drivers/xen/char/mem.c | 1 | ||
63 | drivers/xen/core/hypervisor_sysfs.c | 2 | ||
64 | drivers/xen/core/smpboot.c | 49 +- | ||
65 | drivers/xen/core/xen_sysfs.c | 20 - | ||
66 | drivers/xen/netback/netback.c | 14 | ||
67 | drivers/xen/netfront/netfront.c | 2 | ||
68 | drivers/xen/pciback/xenbus.c | 2 | ||
69 | drivers/xen/pcifront/xenbus.c | 4 | ||
70 | drivers/xen/sfc_netback/accel_fwd.c | 7 | ||
71 | drivers/xen/sfc_netback/accel_solarflare.c | 2 | ||
72 | drivers/xen/sfc_netfront/accel_tso.c | 28 - | ||
73 | drivers/xen/sfc_netfront/accel_vi.c | 4 | ||
74 | drivers/xen/sfc_netfront/accel_xenbus.c | 4 | ||
75 | drivers/xen/xenoprof/xenoprofile.c | 2 | ||
76 | fs/aio.c | 7 | ||
77 | include/asm-x86/mach-xen/asm/desc_32.h | 119 ++++--- | ||
78 | include/asm-x86/mach-xen/asm/desc_64.h | 30 - | ||
79 | include/asm-x86/mach-xen/asm/dma-mapping_64.h | 2 | ||
80 | include/asm-x86/mach-xen/asm/fixmap_32.h | 9 | ||
81 | include/asm-x86/mach-xen/asm/fixmap_64.h | 1 | ||
82 | include/asm-x86/mach-xen/asm/highmem.h | 6 | ||
83 | include/asm-x86/mach-xen/asm/io_32.h | 13 | ||
84 | include/asm-x86/mach-xen/asm/irqflags_32.h | 78 ++-- | ||
85 | include/asm-x86/mach-xen/asm/irqflags_64.h | 19 - | ||
86 | include/asm-x86/mach-xen/asm/mmu.h | 8 | ||
87 | include/asm-x86/mach-xen/asm/mmu_64.h | 8 | ||
88 | include/asm-x86/mach-xen/asm/mmu_context_32.h | 29 + | ||
89 | include/asm-x86/mach-xen/asm/mmu_context_64.h | 3 | ||
90 | include/asm-x86/mach-xen/asm/page_64.h | 61 +-- | ||
91 | include/asm-x86/mach-xen/asm/pgalloc_32.h | 3 | ||
92 | include/asm-x86/mach-xen/asm/pgalloc_64.h | 15 | ||
93 | include/asm-x86/mach-xen/asm/pgtable-2level.h | 43 +- | ||
94 | include/asm-x86/mach-xen/asm/pgtable-3level-defs.h | 2 | ||
95 | include/asm-x86/mach-xen/asm/pgtable-3level.h | 61 ++- | ||
96 | include/asm-x86/mach-xen/asm/pgtable_32.h | 80 ++-- | ||
97 | include/asm-x86/mach-xen/asm/pgtable_64.h | 83 ++--- | ||
98 | include/asm-x86/mach-xen/asm/processor_32.h | 141 +++----- | ||
99 | include/asm-x86/mach-xen/asm/processor_64.h | 55 --- | ||
100 | include/asm-x86/mach-xen/asm/scatterlist_32.h | 2 | ||
101 | include/asm-x86/mach-xen/asm/segment_32.h | 10 | ||
102 | include/asm-x86/mach-xen/asm/smp_32.h | 117 +++++-- | ||
103 | include/asm-x86/mach-xen/asm/smp_64.h | 20 - | ||
104 | include/asm-x86/mach-xen/asm/system_32.h | 348 ++++----------------- | ||
105 | include/asm-x86/mach-xen/asm/system_64.h | 106 ------ | ||
106 | include/asm-x86/mach-xen/asm/tlbflush_32.h | 11 | ||
107 | include/asm-x86/mach-xen/asm/tlbflush_64.h | 2 | ||
108 | lib/swiotlb-xen.c | 1 | ||
109 | net/core/dev.c | 15 | ||
110 | scripts/Makefile.xen.awk | 2 | ||
111 | 101 files changed, 1642 insertions(+), 2080 deletions(-) | ||
112 | |||
113 | --- a/arch/x86/Kconfig | ||
114 | +++ b/arch/x86/Kconfig | ||
115 | @@ -1222,7 +1222,7 @@ | ||
116 | |||
117 | config RELOCATABLE | ||
118 | bool "Build a relocatable kernel (EXPERIMENTAL)" | ||
119 | - depends on EXPERIMENTAL && !X86_XEN | ||
120 | + depends on EXPERIMENTAL && !X86_XEN && !X86_64_XEN | ||
121 | help | ||
122 | This builds a kernel image that retains relocation information | ||
123 | so it can be loaded someplace besides the default 1MB. | ||
124 | @@ -1276,7 +1276,6 @@ | ||
125 | def_bool y | ||
126 | prompt "Compat VDSO support" | ||
127 | depends on X86_32 || IA32_EMULATION | ||
128 | - depends on !X86_XEN | ||
129 | help | ||
130 | Map the 32-bit VDSO to the predictable old-style address too. | ||
131 | ---help--- | ||
132 | @@ -1453,7 +1452,7 @@ | ||
133 | bool "PCI support" if !X86_VISWS | ||
134 | depends on !X86_VOYAGER | ||
135 | default y | ||
136 | - select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) | ||
137 | + select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC && !X86_XEN && !X86_64_XEN) | ||
138 | help | ||
139 | Find out whether you have a PCI motherboard. PCI is the name of a | ||
140 | bus system, i.e. the way the CPU talks to the other stuff inside | ||
141 | --- a/arch/x86/ia32/ia32entry-xen.S | ||
142 | +++ b/arch/x86/ia32/ia32entry-xen.S | ||
143 | @@ -431,11 +431,7 @@ | ||
144 | .quad sys_symlink | ||
145 | .quad sys_lstat | ||
146 | .quad sys_readlink /* 85 */ | ||
147 | -#ifdef CONFIG_IA32_AOUT | ||
148 | .quad sys_uselib | ||
149 | -#else | ||
150 | - .quad quiet_ni_syscall | ||
151 | -#endif | ||
152 | .quad sys_swapon | ||
153 | .quad sys_reboot | ||
154 | .quad compat_sys_old_readdir | ||
155 | @@ -574,7 +570,7 @@ | ||
156 | .quad quiet_ni_syscall /* tux */ | ||
157 | .quad quiet_ni_syscall /* security */ | ||
158 | .quad sys_gettid | ||
159 | - .quad sys_readahead /* 225 */ | ||
160 | + .quad sys32_readahead /* 225 */ | ||
161 | .quad sys_setxattr | ||
162 | .quad sys_lsetxattr | ||
163 | .quad sys_fsetxattr | ||
164 | @@ -599,7 +595,7 @@ | ||
165 | .quad compat_sys_io_getevents | ||
166 | .quad compat_sys_io_submit | ||
167 | .quad sys_io_cancel | ||
168 | - .quad sys_fadvise64 /* 250 */ | ||
169 | + .quad sys32_fadvise64 /* 250 */ | ||
170 | .quad quiet_ni_syscall /* free_huge_pages */ | ||
171 | .quad sys_exit_group | ||
172 | .quad sys32_lookup_dcookie | ||
173 | @@ -663,10 +659,14 @@ | ||
174 | .quad compat_sys_set_robust_list | ||
175 | .quad compat_sys_get_robust_list | ||
176 | .quad sys_splice | ||
177 | - .quad sys_sync_file_range | ||
178 | - .quad sys_tee | ||
179 | + .quad sys32_sync_file_range | ||
180 | + .quad sys_tee /* 315 */ | ||
181 | .quad compat_sys_vmsplice | ||
182 | .quad compat_sys_move_pages | ||
183 | .quad sys_getcpu | ||
184 | .quad sys_epoll_pwait | ||
185 | -ia32_syscall_end: | ||
186 | + .quad compat_sys_utimensat /* 320 */ | ||
187 | + .quad compat_sys_signalfd | ||
188 | + .quad compat_sys_timerfd | ||
189 | + .quad sys_eventfd | ||
190 | +ia32_syscall_end: | ||
191 | --- a/arch/x86/kernel/Makefile | ||
192 | +++ b/arch/x86/kernel/Makefile | ||
193 | @@ -106,4 +106,4 @@ | ||
194 | |||
195 | disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ | ||
196 | smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o | ||
197 | -%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := | ||
198 | +%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := | ||
199 | --- a/arch/x86/kernel/acpi/sleep_64-xen.c | ||
200 | +++ b/arch/x86/kernel/acpi/sleep_64-xen.c | ||
201 | @@ -60,19 +60,6 @@ | ||
202 | extern char wakeup_start, wakeup_end; | ||
203 | |||
204 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); | ||
205 | - | ||
206 | -static pgd_t low_ptr; | ||
207 | - | ||
208 | -static void init_low_mapping(void) | ||
209 | -{ | ||
210 | - pgd_t *slot0 = pgd_offset(current->mm, 0UL); | ||
211 | - low_ptr = *slot0; | ||
212 | - /* FIXME: We're playing with the current task's page tables here, which | ||
213 | - * is potentially dangerous on SMP systems. | ||
214 | - */ | ||
215 | - set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); | ||
216 | - local_flush_tlb(); | ||
217 | -} | ||
218 | #endif | ||
219 | |||
220 | /** | ||
221 | @@ -84,8 +71,6 @@ | ||
222 | int acpi_save_state_mem(void) | ||
223 | { | ||
224 | #ifndef CONFIG_ACPI_PV_SLEEP | ||
225 | - init_low_mapping(); | ||
226 | - | ||
227 | memcpy((void *)acpi_wakeup_address, &wakeup_start, | ||
228 | &wakeup_end - &wakeup_start); | ||
229 | acpi_copy_wakeup_routine(acpi_wakeup_address); | ||
230 | @@ -98,10 +83,6 @@ | ||
231 | */ | ||
232 | void acpi_restore_state_mem(void) | ||
233 | { | ||
234 | -#ifndef CONFIG_ACPI_PV_SLEEP | ||
235 | - set_pgd(pgd_offset(current->mm, 0UL), low_ptr); | ||
236 | - local_flush_tlb(); | ||
237 | -#endif | ||
238 | } | ||
239 | |||
240 | /** | ||
241 | @@ -115,10 +96,11 @@ | ||
242 | void __init acpi_reserve_bootmem(void) | ||
243 | { | ||
244 | #ifndef CONFIG_ACPI_PV_SLEEP | ||
245 | - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); | ||
246 | - if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) | ||
247 | + acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); | ||
248 | + if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2)) | ||
249 | printk(KERN_CRIT | ||
250 | - "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); | ||
251 | + "ACPI: Wakeup code way too big, will crash on attempt" | ||
252 | + " to suspend\n"); | ||
253 | #endif | ||
254 | } | ||
255 | |||
256 | --- a/arch/x86/kernel/apic_32-xen.c | ||
257 | +++ b/arch/x86/kernel/apic_32-xen.c | ||
258 | @@ -19,7 +19,6 @@ | ||
259 | #include <linux/mm.h> | ||
260 | #include <linux/delay.h> | ||
261 | #include <linux/bootmem.h> | ||
262 | -#include <linux/smp_lock.h> | ||
263 | #include <linux/interrupt.h> | ||
264 | #include <linux/mc146818rtc.h> | ||
265 | #include <linux/kernel_stat.h> | ||
266 | --- a/arch/x86/kernel/apic_64-xen.c | ||
267 | +++ b/arch/x86/kernel/apic_64-xen.c | ||
268 | @@ -19,7 +19,6 @@ | ||
269 | #include <linux/mm.h> | ||
270 | #include <linux/delay.h> | ||
271 | #include <linux/bootmem.h> | ||
272 | -#include <linux/smp_lock.h> | ||
273 | #include <linux/interrupt.h> | ||
274 | #include <linux/mc146818rtc.h> | ||
275 | #include <linux/kernel_stat.h> | ||
276 | --- a/arch/x86/kernel/cpu/common-xen.c | ||
277 | +++ b/arch/x86/kernel/cpu/common-xen.c | ||
278 | @@ -22,16 +22,40 @@ | ||
279 | #define phys_pkg_id(a,b) a | ||
280 | #endif | ||
281 | #endif | ||
282 | -#include <asm/pda.h> | ||
283 | #include <asm/hypervisor.h> | ||
284 | |||
285 | #include "cpu.h" | ||
286 | |||
287 | -DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); | ||
288 | -EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); | ||
289 | +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | ||
290 | + [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, | ||
291 | + [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, | ||
292 | + [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, | ||
293 | + [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, | ||
294 | +#ifndef CONFIG_XEN | ||
295 | + /* | ||
296 | + * Segments used for calling PnP BIOS have byte granularity. | ||
297 | + * They code segments and data segments have fixed 64k limits, | ||
298 | + * the transfer segment sizes are set at run time. | ||
299 | + */ | ||
300 | + [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | ||
301 | + [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ | ||
302 | + [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ | ||
303 | + [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ | ||
304 | + [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ | ||
305 | + /* | ||
306 | + * The APM segments have byte granularity and their bases | ||
307 | + * are set at run time. All have 64k limits. | ||
308 | + */ | ||
309 | + [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | ||
310 | + /* 16-bit code */ | ||
311 | + [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, | ||
312 | + [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ | ||
313 | |||
314 | -struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; | ||
315 | -EXPORT_SYMBOL(_cpu_pda); | ||
316 | + [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, | ||
317 | +#endif | ||
318 | + [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, | ||
319 | +} }; | ||
320 | +EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | ||
321 | |||
322 | static int cachesize_override __cpuinitdata = -1; | ||
323 | static int disable_x86_fxsr __cpuinitdata; | ||
324 | @@ -373,7 +397,7 @@ | ||
325 | /* | ||
326 | * This does the hard work of actually picking apart the CPU stuff... | ||
327 | */ | ||
328 | -void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | ||
329 | +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | ||
330 | { | ||
331 | int i; | ||
332 | |||
333 | @@ -484,15 +508,22 @@ | ||
334 | |||
335 | /* Init Machine Check Exception if available. */ | ||
336 | mcheck_init(c); | ||
337 | +} | ||
338 | |||
339 | - if (c == &boot_cpu_data) | ||
340 | - sysenter_setup(); | ||
341 | +void __init identify_boot_cpu(void) | ||
342 | +{ | ||
343 | + identify_cpu(&boot_cpu_data); | ||
344 | + sysenter_setup(); | ||
345 | enable_sep_cpu(); | ||
346 | + mtrr_bp_init(); | ||
347 | +} | ||
348 | |||
349 | - if (c == &boot_cpu_data) | ||
350 | - mtrr_bp_init(); | ||
351 | - else | ||
352 | - mtrr_ap_init(); | ||
353 | +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | ||
354 | +{ | ||
355 | + BUG_ON(c == &boot_cpu_data); | ||
356 | + identify_cpu(c); | ||
357 | + enable_sep_cpu(); | ||
358 | + mtrr_ap_init(); | ||
359 | } | ||
360 | |||
361 | #ifdef CONFIG_X86_HT | ||
362 | @@ -606,136 +637,47 @@ | ||
363 | #endif | ||
364 | } | ||
365 | |||
366 | -/* Make sure %gs is initialized properly in idle threads */ | ||
367 | +/* Make sure %fs is initialized properly in idle threads */ | ||
368 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | ||
369 | { | ||
370 | memset(regs, 0, sizeof(struct pt_regs)); | ||
371 | - regs->xfs = __KERNEL_PDA; | ||
372 | + regs->xfs = __KERNEL_PERCPU; | ||
373 | return regs; | ||
374 | } | ||
375 | |||
376 | -static __cpuinit int alloc_gdt(int cpu) | ||
377 | +/* Current gdt points %fs at the "master" per-cpu area: after this, | ||
378 | + * it's on the real one. */ | ||
379 | +void switch_to_new_gdt(void) | ||
380 | { | ||
381 | - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
382 | - struct desc_struct *gdt; | ||
383 | - struct i386_pda *pda; | ||
384 | - | ||
385 | - gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
386 | - pda = cpu_pda(cpu); | ||
387 | - | ||
388 | - /* | ||
389 | - * This is a horrible hack to allocate the GDT. The problem | ||
390 | - * is that cpu_init() is called really early for the boot CPU | ||
391 | - * (and hence needs bootmem) but much later for the secondary | ||
392 | - * CPUs, when bootmem will have gone away | ||
393 | - */ | ||
394 | - if (NODE_DATA(0)->bdata->node_bootmem_map) { | ||
395 | - BUG_ON(gdt != NULL || pda != NULL); | ||
396 | - | ||
397 | - gdt = alloc_bootmem_pages(PAGE_SIZE); | ||
398 | - pda = alloc_bootmem(sizeof(*pda)); | ||
399 | - /* alloc_bootmem(_pages) panics on failure, so no check */ | ||
400 | - | ||
401 | - memset(gdt, 0, PAGE_SIZE); | ||
402 | - memset(pda, 0, sizeof(*pda)); | ||
403 | - } else { | ||
404 | - /* GDT and PDA might already have been allocated if | ||
405 | - this is a CPU hotplug re-insertion. */ | ||
406 | - if (gdt == NULL) | ||
407 | - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); | ||
408 | - | ||
409 | - if (pda == NULL) | ||
410 | - pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); | ||
411 | - | ||
412 | - if (unlikely(!gdt || !pda)) { | ||
413 | - free_pages((unsigned long)gdt, 0); | ||
414 | - kfree(pda); | ||
415 | - return 0; | ||
416 | - } | ||
417 | - } | ||
418 | - | ||
419 | - cpu_gdt_descr->address = (unsigned long)gdt; | ||
420 | - cpu_pda(cpu) = pda; | ||
421 | - | ||
422 | - return 1; | ||
423 | -} | ||
424 | - | ||
425 | -/* Initial PDA used by boot CPU */ | ||
426 | -struct i386_pda boot_pda = { | ||
427 | - ._pda = &boot_pda, | ||
428 | - .cpu_number = 0, | ||
429 | - .pcurrent = &init_task, | ||
430 | -}; | ||
431 | - | ||
432 | -static inline void set_kernel_fs(void) | ||
433 | -{ | ||
434 | - /* Set %fs for this CPU's PDA. Memory clobber is to create a | ||
435 | - barrier with respect to any PDA operations, so the compiler | ||
436 | - doesn't move any before here. */ | ||
437 | - asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); | ||
438 | -} | ||
439 | - | ||
440 | -/* Initialize the CPU's GDT and PDA. The boot CPU does this for | ||
441 | - itself, but secondaries find this done for them. */ | ||
442 | -__cpuinit int init_gdt(int cpu, struct task_struct *idle) | ||
443 | -{ | ||
444 | - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
445 | - struct desc_struct *gdt; | ||
446 | - struct i386_pda *pda; | ||
447 | - | ||
448 | - /* For non-boot CPUs, the GDT and PDA should already have been | ||
449 | - allocated. */ | ||
450 | - if (!alloc_gdt(cpu)) { | ||
451 | - printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); | ||
452 | - return 0; | ||
453 | - } | ||
454 | - | ||
455 | - gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
456 | - pda = cpu_pda(cpu); | ||
457 | - | ||
458 | - BUG_ON(gdt == NULL || pda == NULL); | ||
459 | - | ||
460 | - /* | ||
461 | - * Initialize the per-CPU GDT with the boot GDT, | ||
462 | - * and set up the GDT descriptor: | ||
463 | - */ | ||
464 | - memcpy(gdt, cpu_gdt_table, GDT_SIZE); | ||
465 | - cpu_gdt_descr->size = GDT_SIZE - 1; | ||
466 | - | ||
467 | - pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, | ||
468 | - (u32 *)&gdt[GDT_ENTRY_PDA].b, | ||
469 | - (unsigned long)pda, sizeof(*pda) - 1, | ||
470 | - 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ | ||
471 | - | ||
472 | - memset(pda, 0, sizeof(*pda)); | ||
473 | - pda->_pda = pda; | ||
474 | - pda->cpu_number = cpu; | ||
475 | - pda->pcurrent = idle; | ||
476 | - | ||
477 | - return 1; | ||
478 | -} | ||
479 | - | ||
480 | -void __cpuinit cpu_set_gdt(int cpu) | ||
481 | -{ | ||
482 | - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
483 | + struct Xgt_desc_struct gdt_descr; | ||
484 | unsigned long va, frames[16]; | ||
485 | int f; | ||
486 | |||
487 | - for (va = cpu_gdt_descr->address, f = 0; | ||
488 | - va < cpu_gdt_descr->address + cpu_gdt_descr->size; | ||
489 | + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | ||
490 | + gdt_descr.size = GDT_SIZE - 1; | ||
491 | + | ||
492 | + for (va = gdt_descr.address, f = 0; | ||
493 | + va < gdt_descr.address + gdt_descr.size; | ||
494 | va += PAGE_SIZE, f++) { | ||
495 | frames[f] = virt_to_mfn(va); | ||
496 | make_lowmem_page_readonly( | ||
497 | (void *)va, XENFEAT_writable_descriptor_tables); | ||
498 | } | ||
499 | - BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); | ||
500 | - | ||
501 | - set_kernel_fs(); | ||
502 | + if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8)) | ||
503 | + BUG(); | ||
504 | + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); | ||
505 | } | ||
506 | |||
507 | -/* Common CPU init for both boot and secondary CPUs */ | ||
508 | -static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | ||
509 | +/* | ||
510 | + * cpu_init() initializes state that is per-CPU. Some data is already | ||
511 | + * initialized (naturally) in the bootstrap process, such as the GDT | ||
512 | + * and IDT. We reload them nevertheless, this function acts as a | ||
513 | + * 'CPU state barrier', nothing should get across. | ||
514 | + */ | ||
515 | +void __cpuinit cpu_init(void) | ||
516 | { | ||
517 | + int cpu = smp_processor_id(); | ||
518 | + struct task_struct *curr = current; | ||
519 | #ifndef CONFIG_X86_NO_TSS | ||
520 | struct tss_struct * t = &per_cpu(init_tss, cpu); | ||
521 | #endif | ||
522 | @@ -757,6 +699,8 @@ | ||
523 | set_in_cr4(X86_CR4_TSD); | ||
524 | } | ||
525 | |||
526 | + switch_to_new_gdt(); | ||
527 | + | ||
528 | /* | ||
529 | * Set up and load the per-CPU TSS and LDT | ||
530 | */ | ||
531 | @@ -794,38 +738,6 @@ | ||
532 | mxcsr_feature_mask_init(); | ||
533 | } | ||
534 | |||
535 | -/* Entrypoint to initialize secondary CPU */ | ||
536 | -void __cpuinit secondary_cpu_init(void) | ||
537 | -{ | ||
538 | - int cpu = smp_processor_id(); | ||
539 | - struct task_struct *curr = current; | ||
540 | - | ||
541 | - _cpu_init(cpu, curr); | ||
542 | -} | ||
543 | - | ||
544 | -/* | ||
545 | - * cpu_init() initializes state that is per-CPU. Some data is already | ||
546 | - * initialized (naturally) in the bootstrap process, such as the GDT | ||
547 | - * and IDT. We reload them nevertheless, this function acts as a | ||
548 | - * 'CPU state barrier', nothing should get across. | ||
549 | - */ | ||
550 | -void __cpuinit cpu_init(void) | ||
551 | -{ | ||
552 | - int cpu = smp_processor_id(); | ||
553 | - struct task_struct *curr = current; | ||
554 | - | ||
555 | - /* Set up the real GDT and PDA, so we can transition from the | ||
556 | - boot versions. */ | ||
557 | - if (!init_gdt(cpu, curr)) { | ||
558 | - /* failed to allocate something; not much we can do... */ | ||
559 | - for (;;) | ||
560 | - local_irq_enable(); | ||
561 | - } | ||
562 | - | ||
563 | - cpu_set_gdt(cpu); | ||
564 | - _cpu_init(cpu, curr); | ||
565 | -} | ||
566 | - | ||
567 | #ifdef CONFIG_HOTPLUG_CPU | ||
568 | void __cpuinit cpu_uninit(void) | ||
569 | { | ||
570 | --- a/arch/x86/kernel/cpu/mtrr/main-xen.c | ||
571 | +++ b/arch/x86/kernel/cpu/mtrr/main-xen.c | ||
572 | @@ -167,7 +167,7 @@ | ||
573 | EXPORT_SYMBOL(mtrr_add); | ||
574 | EXPORT_SYMBOL(mtrr_del); | ||
575 | |||
576 | -void __init mtrr_bp_init(void) | ||
577 | +__init void mtrr_bp_init(void) | ||
578 | { | ||
579 | } | ||
580 | |||
581 | --- a/arch/x86/kernel/e820_32-xen.c | ||
582 | +++ b/arch/x86/kernel/e820_32-xen.c | ||
583 | @@ -162,26 +162,27 @@ | ||
584 | |||
585 | static int __init romsignature(const unsigned char *rom) | ||
586 | { | ||
587 | + const unsigned short * const ptr = (const unsigned short *)rom; | ||
588 | unsigned short sig; | ||
589 | |||
590 | - return probe_kernel_address((const unsigned short *)rom, sig) == 0 && | ||
591 | - sig == ROMSIGNATURE; | ||
592 | + return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; | ||
593 | } | ||
594 | |||
595 | -static int __init romchecksum(unsigned char *rom, unsigned long length) | ||
596 | +static int __init romchecksum(const unsigned char *rom, unsigned long length) | ||
597 | { | ||
598 | - unsigned char sum; | ||
599 | + unsigned char sum, c; | ||
600 | |||
601 | - for (sum = 0; length; length--) | ||
602 | - sum += *rom++; | ||
603 | - return sum == 0; | ||
604 | + for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) | ||
605 | + sum += c; | ||
606 | + return !length && !sum; | ||
607 | } | ||
608 | |||
609 | static void __init probe_roms(void) | ||
610 | { | ||
611 | + const unsigned char *rom; | ||
612 | unsigned long start, length, upper; | ||
613 | - unsigned char *rom; | ||
614 | - int i; | ||
615 | + unsigned char c; | ||
616 | + int i; | ||
617 | |||
618 | #ifdef CONFIG_XEN | ||
619 | /* Nothing to do if not running in dom0. */ | ||
620 | @@ -198,8 +199,11 @@ | ||
621 | |||
622 | video_rom_resource.start = start; | ||
623 | |||
624 | + if (probe_kernel_address(rom + 2, c) != 0) | ||
625 | + continue; | ||
626 | + | ||
627 | /* 0 < length <= 0x7f * 512, historically */ | ||
628 | - length = rom[2] * 512; | ||
629 | + length = c * 512; | ||
630 | |||
631 | /* if checksum okay, trust length byte */ | ||
632 | if (length && romchecksum(rom, length)) | ||
633 | @@ -233,8 +237,11 @@ | ||
634 | if (!romsignature(rom)) | ||
635 | continue; | ||
636 | |||
637 | + if (probe_kernel_address(rom + 2, c) != 0) | ||
638 | + continue; | ||
639 | + | ||
640 | /* 0 < length <= 0x7f * 512, historically */ | ||
641 | - length = rom[2] * 512; | ||
642 | + length = c * 512; | ||
643 | |||
644 | /* but accept any length that fits if checksum okay */ | ||
645 | if (!length || start + length > upper || !romchecksum(rom, length)) | ||
646 | @@ -249,7 +256,7 @@ | ||
647 | } | ||
648 | |||
649 | #ifdef CONFIG_XEN | ||
650 | -static struct e820map machine_e820 __initdata; | ||
651 | +static struct e820map machine_e820; | ||
652 | #define e820 machine_e820 | ||
653 | #endif | ||
654 | |||
655 | @@ -409,10 +416,8 @@ | ||
656 | ____________________33__ | ||
657 | ______________________4_ | ||
658 | */ | ||
659 | - printk("sanitize start\n"); | ||
660 | /* if there's only one memory region, don't bother */ | ||
661 | if (*pnr_map < 2) { | ||
662 | - printk("sanitize bail 0\n"); | ||
663 | return -1; | ||
664 | } | ||
665 | |||
666 | @@ -421,7 +426,6 @@ | ||
667 | /* bail out if we find any unreasonable addresses in bios map */ | ||
668 | for (i=0; i<old_nr; i++) | ||
669 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { | ||
670 | - printk("sanitize bail 1\n"); | ||
671 | return -1; | ||
672 | } | ||
673 | |||
674 | @@ -517,7 +521,6 @@ | ||
675 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | ||
676 | *pnr_map = new_nr; | ||
677 | |||
678 | - printk("sanitize end\n"); | ||
679 | return 0; | ||
680 | } | ||
681 | |||
682 | @@ -552,7 +555,6 @@ | ||
683 | unsigned long long size = biosmap->size; | ||
684 | unsigned long long end = start + size; | ||
685 | unsigned long type = biosmap->type; | ||
686 | - printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); | ||
687 | |||
688 | /* Overflow in 64 bits? Ignore the memory map. */ | ||
689 | if (start > end) | ||
690 | @@ -564,17 +566,11 @@ | ||
691 | * Not right. Fix it up. | ||
692 | */ | ||
693 | if (type == E820_RAM) { | ||
694 | - printk("copy_e820_map() type is E820_RAM\n"); | ||
695 | if (start < 0x100000ULL && end > 0xA0000ULL) { | ||
696 | - printk("copy_e820_map() lies in range...\n"); | ||
697 | - if (start < 0xA0000ULL) { | ||
698 | - printk("copy_e820_map() start < 0xA0000ULL\n"); | ||
699 | + if (start < 0xA0000ULL) | ||
700 | add_memory_region(start, 0xA0000ULL-start, type); | ||
701 | - } | ||
702 | - if (end <= 0x100000ULL) { | ||
703 | - printk("copy_e820_map() end <= 0x100000ULL\n"); | ||
704 | + if (end <= 0x100000ULL) | ||
705 | continue; | ||
706 | - } | ||
707 | start = 0x100000ULL; | ||
708 | size = end - start; | ||
709 | } | ||
710 | --- a/arch/x86/kernel/e820_64-xen.c | ||
711 | +++ b/arch/x86/kernel/e820_64-xen.c | ||
712 | @@ -17,6 +17,8 @@ | ||
713 | #include <linux/kexec.h> | ||
714 | #include <linux/module.h> | ||
715 | #include <linux/mm.h> | ||
716 | +#include <linux/suspend.h> | ||
717 | +#include <linux/pfn.h> | ||
718 | |||
719 | #include <asm/pgtable.h> | ||
720 | #include <asm/page.h> | ||
721 | @@ -28,7 +30,7 @@ | ||
722 | |||
723 | struct e820map e820 __initdata; | ||
724 | #ifdef CONFIG_XEN | ||
725 | -struct e820map machine_e820 __initdata; | ||
726 | +struct e820map machine_e820; | ||
727 | #endif | ||
728 | |||
729 | /* | ||
730 | @@ -293,22 +295,6 @@ | ||
731 | } | ||
732 | |||
733 | #ifndef CONFIG_XEN | ||
734 | -/* Mark pages corresponding to given address range as nosave */ | ||
735 | -static void __init | ||
736 | -e820_mark_nosave_range(unsigned long start, unsigned long end) | ||
737 | -{ | ||
738 | - unsigned long pfn, max_pfn; | ||
739 | - | ||
740 | - if (start >= end) | ||
741 | - return; | ||
742 | - | ||
743 | - printk("Nosave address range: %016lx - %016lx\n", start, end); | ||
744 | - max_pfn = end >> PAGE_SHIFT; | ||
745 | - for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) | ||
746 | - if (pfn_valid(pfn)) | ||
747 | - SetPageNosave(pfn_to_page(pfn)); | ||
748 | -} | ||
749 | - | ||
750 | /* | ||
751 | * Find the ranges of physical addresses that do not correspond to | ||
752 | * e820 RAM areas and mark the corresponding pages as nosave for software | ||
753 | @@ -327,13 +313,13 @@ | ||
754 | struct e820entry *ei = &e820.map[i]; | ||
755 | |||
756 | if (paddr < ei->addr) | ||
757 | - e820_mark_nosave_range(paddr, | ||
758 | - round_up(ei->addr, PAGE_SIZE)); | ||
759 | + register_nosave_region(PFN_DOWN(paddr), | ||
760 | + PFN_UP(ei->addr)); | ||
761 | |||
762 | paddr = round_down(ei->addr + ei->size, PAGE_SIZE); | ||
763 | if (ei->type != E820_RAM) | ||
764 | - e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), | ||
765 | - paddr); | ||
766 | + register_nosave_region(PFN_UP(ei->addr), | ||
767 | + PFN_DOWN(paddr)); | ||
768 | |||
769 | if (paddr >= (end_pfn << PAGE_SHIFT)) | ||
770 | break; | ||
771 | --- a/arch/x86/kernel/early_printk-xen.c | ||
772 | +++ b/arch/x86/kernel/early_printk-xen.c | ||
773 | @@ -11,11 +11,10 @@ | ||
774 | |||
775 | #ifdef __i386__ | ||
776 | #include <asm/setup.h> | ||
777 | -#define VGABASE (__ISA_IO_base + 0xb8000) | ||
778 | #else | ||
779 | #include <asm/bootsetup.h> | ||
780 | -#define VGABASE ((void __iomem *)0xffffffff800b8000UL) | ||
781 | #endif | ||
782 | +#define VGABASE (__ISA_IO_base + 0xb8000) | ||
783 | |||
784 | #ifndef CONFIG_XEN | ||
785 | static int max_ypos = 25, max_xpos = 80; | ||
786 | @@ -93,9 +92,9 @@ | ||
787 | static void early_serial_write(struct console *con, const char *s, unsigned n) | ||
788 | { | ||
789 | while (*s && n-- > 0) { | ||
790 | - early_serial_putc(*s); | ||
791 | if (*s == '\n') | ||
792 | early_serial_putc('\r'); | ||
793 | + early_serial_putc(*s); | ||
794 | s++; | ||
795 | } | ||
796 | } | ||
797 | @@ -205,7 +204,7 @@ | ||
798 | return ret; | ||
799 | } | ||
800 | |||
801 | -void __init simnow_init(char *str) | ||
802 | +static void __init simnow_init(char *str) | ||
803 | { | ||
804 | char *fn = "klog"; | ||
805 | if (*str == '=') | ||
806 | @@ -277,22 +276,12 @@ | ||
807 | early_console = &simnow_console; | ||
808 | keep_early = 1; | ||
809 | } | ||
810 | + | ||
811 | + if (keep_early) | ||
812 | + early_console->flags &= ~CON_BOOT; | ||
813 | + else | ||
814 | + early_console->flags |= CON_BOOT; | ||
815 | register_console(early_console); | ||
816 | return 0; | ||
817 | } | ||
818 | - | ||
819 | early_param("earlyprintk", setup_early_printk); | ||
820 | - | ||
821 | -void __init disable_early_printk(void) | ||
822 | -{ | ||
823 | - if (!early_console_initialized || !early_console) | ||
824 | - return; | ||
825 | - if (!keep_early) { | ||
826 | - printk("disabling early console\n"); | ||
827 | - unregister_console(early_console); | ||
828 | - early_console_initialized = 0; | ||
829 | - } else { | ||
830 | - printk("keeping early console\n"); | ||
831 | - } | ||
832 | -} | ||
833 | - | ||
834 | --- a/arch/x86/kernel/entry_32-xen.S | ||
835 | +++ b/arch/x86/kernel/entry_32-xen.S | ||
836 | @@ -15,7 +15,7 @@ | ||
837 | * I changed all the .align's to 4 (16 byte alignment), as that's faster | ||
838 | * on a 486. | ||
839 | * | ||
840 | - * Stack layout in 'ret_from_system_call': | ||
841 | + * Stack layout in 'syscall_exit': | ||
842 | * ptrace needs to have all regs on the stack. | ||
843 | * if the order here is changed, it needs to be | ||
844 | * updated in fork.c:copy_process, signal.c:do_signal, | ||
845 | @@ -135,7 +135,7 @@ | ||
846 | movl $(__USER_DS), %edx; \ | ||
847 | movl %edx, %ds; \ | ||
848 | movl %edx, %es; \ | ||
849 | - movl $(__KERNEL_PDA), %edx; \ | ||
850 | + movl $(__KERNEL_PERCPU), %edx; \ | ||
851 | movl %edx, %fs | ||
852 | |||
853 | #define RESTORE_INT_REGS \ | ||
854 | @@ -308,16 +308,12 @@ | ||
855 | pushl $(__USER_CS) | ||
856 | CFI_ADJUST_CFA_OFFSET 4 | ||
857 | /*CFI_REL_OFFSET cs, 0*/ | ||
858 | -#ifndef CONFIG_COMPAT_VDSO | ||
859 | /* | ||
860 | * Push current_thread_info()->sysenter_return to the stack. | ||
861 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | ||
862 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | ||
863 | */ | ||
864 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | ||
865 | -#else | ||
866 | - pushl $SYSENTER_RETURN | ||
867 | -#endif | ||
868 | CFI_ADJUST_CFA_OFFSET 4 | ||
869 | CFI_REL_OFFSET eip, 0 | ||
870 | |||
871 | @@ -345,7 +341,7 @@ | ||
872 | jae syscall_badsys | ||
873 | call *sys_call_table(,%eax,4) | ||
874 | movl %eax,PT_EAX(%esp) | ||
875 | - DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) | ||
876 | + DISABLE_INTERRUPTS(CLBR_ANY) | ||
877 | TRACE_IRQS_OFF | ||
878 | movl TI_flags(%ebp), %ecx | ||
879 | testw $_TIF_ALLWORK_MASK, %cx | ||
880 | @@ -400,10 +396,6 @@ | ||
881 | CFI_ADJUST_CFA_OFFSET 4 | ||
882 | SAVE_ALL | ||
883 | GET_THREAD_INFO(%ebp) | ||
884 | - testl $TF_MASK,PT_EFLAGS(%esp) | ||
885 | - jz no_singlestep | ||
886 | - orl $_TIF_SINGLESTEP,TI_flags(%ebp) | ||
887 | -no_singlestep: | ||
888 | # system call tracing in operation / emulation | ||
889 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | ||
890 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | ||
891 | @@ -418,6 +410,10 @@ | ||
892 | # setting need_resched or sigpending | ||
893 | # between sampling and the iret | ||
894 | TRACE_IRQS_OFF | ||
895 | + testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit | ||
896 | + jz no_singlestep | ||
897 | + orl $_TIF_SINGLESTEP,TI_flags(%ebp) | ||
898 | +no_singlestep: | ||
899 | movl TI_flags(%ebp), %ecx | ||
900 | testw $_TIF_ALLWORK_MASK, %cx # current->work | ||
901 | jne syscall_exit_work | ||
902 | @@ -635,9 +631,7 @@ | ||
903 | #ifndef CONFIG_XEN | ||
904 | #define FIXUP_ESPFIX_STACK \ | ||
905 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | ||
906 | - movl %fs:PDA_cpu, %ebx; \ | ||
907 | - PER_CPU(cpu_gdt_descr, %ebx); \ | ||
908 | - movl GDS_address(%ebx), %ebx; \ | ||
909 | + PER_CPU(gdt_page, %ebx); \ | ||
910 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | ||
911 | addl %esp, %eax; \ | ||
912 | pushl $__KERNEL_DS; \ | ||
913 | @@ -710,7 +704,7 @@ | ||
914 | SAVE_ALL; \ | ||
915 | TRACE_IRQS_OFF \ | ||
916 | movl %esp,%eax; \ | ||
917 | - call smp_/**/name; \ | ||
918 | + call smp_##name; \ | ||
919 | jmp ret_from_intr; \ | ||
920 | CFI_ENDPROC; \ | ||
921 | ENDPROC(name) | ||
922 | @@ -718,10 +712,6 @@ | ||
923 | /* The include is where all of the SMP etc. interrupts come from */ | ||
924 | #include "entry_arch.h" | ||
925 | |||
926 | -/* This alternate entry is needed because we hijack the apic LVTT */ | ||
927 | -#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) | ||
928 | -BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) | ||
929 | -#endif | ||
930 | #else | ||
931 | #define UNWIND_ESPFIX_STACK | ||
932 | #endif | ||
933 | @@ -764,7 +754,7 @@ | ||
934 | pushl %fs | ||
935 | CFI_ADJUST_CFA_OFFSET 4 | ||
936 | /*CFI_REL_OFFSET fs, 0*/ | ||
937 | - movl $(__KERNEL_PDA), %ecx | ||
938 | + movl $(__KERNEL_PERCPU), %ecx | ||
939 | movl %ecx, %fs | ||
940 | UNWIND_ESPFIX_STACK | ||
941 | popl %ecx | ||
942 | --- a/arch/x86/kernel/entry_64-xen.S | ||
943 | +++ b/arch/x86/kernel/entry_64-xen.S | ||
944 | @@ -1254,3 +1254,10 @@ | ||
945 | ret | ||
946 | CFI_ENDPROC | ||
947 | ENDPROC(call_softirq) | ||
948 | + | ||
949 | +KPROBE_ENTRY(ignore_sysret) | ||
950 | + CFI_STARTPROC | ||
951 | + mov $-ENOSYS,%eax | ||
952 | + HYPERVISOR_IRET 0 | ||
953 | + CFI_ENDPROC | ||
954 | +ENDPROC(ignore_sysret) | ||
955 | --- a/arch/x86/kernel/genapic_64-xen.c | ||
956 | +++ b/arch/x86/kernel/genapic_64-xen.c | ||
957 | @@ -11,123 +11,57 @@ | ||
958 | #include <linux/threads.h> | ||
959 | #include <linux/cpumask.h> | ||
960 | #include <linux/string.h> | ||
961 | +#include <linux/module.h> | ||
962 | #include <linux/kernel.h> | ||
963 | #include <linux/ctype.h> | ||
964 | #include <linux/init.h> | ||
965 | -#include <linux/module.h> | ||
966 | |||
967 | #include <asm/smp.h> | ||
968 | #include <asm/ipi.h> | ||
969 | +#include <asm/genapic.h> | ||
970 | |||
971 | -#if defined(CONFIG_ACPI) | ||
972 | +#ifdef CONFIG_ACPI | ||
973 | #include <acpi/acpi_bus.h> | ||
974 | #endif | ||
975 | |||
976 | /* which logical CPU number maps to which CPU (physical APIC ID) */ | ||
977 | -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
978 | +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly | ||
979 | + = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
980 | EXPORT_SYMBOL(x86_cpu_to_apicid); | ||
981 | -u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
982 | |||
983 | -extern struct genapic apic_cluster; | ||
984 | -extern struct genapic apic_flat; | ||
985 | -extern struct genapic apic_physflat; | ||
986 | +u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
987 | |||
988 | #ifndef CONFIG_XEN | ||
989 | -struct genapic *genapic = &apic_flat; | ||
990 | -struct genapic *genapic_force; | ||
991 | +struct genapic __read_mostly *genapic = &apic_flat; | ||
992 | #else | ||
993 | extern struct genapic apic_xen; | ||
994 | -struct genapic *genapic = &apic_xen; | ||
995 | +struct genapic __read_mostly *genapic = &apic_xen; | ||
996 | #endif | ||
997 | |||
998 | |||
999 | /* | ||
1000 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | ||
1001 | */ | ||
1002 | -void __init clustered_apic_check(void) | ||
1003 | +void __init setup_apic_routing(void) | ||
1004 | { | ||
1005 | #ifndef CONFIG_XEN | ||
1006 | - long i; | ||
1007 | - u8 clusters, max_cluster; | ||
1008 | - u8 id; | ||
1009 | - u8 cluster_cnt[NUM_APIC_CLUSTERS]; | ||
1010 | - int max_apic = 0; | ||
1011 | - | ||
1012 | - /* genapic selection can be forced because of certain quirks. | ||
1013 | - */ | ||
1014 | - if (genapic_force) { | ||
1015 | - genapic = genapic_force; | ||
1016 | - goto print; | ||
1017 | - } | ||
1018 | - | ||
1019 | -#if defined(CONFIG_ACPI) | ||
1020 | +#ifdef CONFIG_ACPI | ||
1021 | /* | ||
1022 | - * Some x86_64 machines use physical APIC mode regardless of how many | ||
1023 | - * procs/clusters are present (x86_64 ES7000 is an example). | ||
1024 | + * Quirk: some x86_64 machines can only use physical APIC mode | ||
1025 | + * regardless of how many processors are present (x86_64 ES7000 | ||
1026 | + * is an example). | ||
1027 | */ | ||
1028 | - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) | ||
1029 | - if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { | ||
1030 | - genapic = &apic_cluster; | ||
1031 | - goto print; | ||
1032 | - } | ||
1033 | -#endif | ||
1034 | - | ||
1035 | - memset(cluster_cnt, 0, sizeof(cluster_cnt)); | ||
1036 | - for (i = 0; i < NR_CPUS; i++) { | ||
1037 | - id = bios_cpu_apicid[i]; | ||
1038 | - if (id == BAD_APICID) | ||
1039 | - continue; | ||
1040 | - if (id > max_apic) | ||
1041 | - max_apic = id; | ||
1042 | - cluster_cnt[APIC_CLUSTERID(id)]++; | ||
1043 | - } | ||
1044 | - | ||
1045 | - /* Don't use clustered mode on AMD platforms. */ | ||
1046 | - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | ||
1047 | + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && | ||
1048 | + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) | ||
1049 | genapic = &apic_physflat; | ||
1050 | -#ifndef CONFIG_HOTPLUG_CPU | ||
1051 | - /* In the CPU hotplug case we cannot use broadcast mode | ||
1052 | - because that opens a race when a CPU is removed. | ||
1053 | - Stay at physflat mode in this case. | ||
1054 | - It is bad to do this unconditionally though. Once | ||
1055 | - we have ACPI platform support for CPU hotplug | ||
1056 | - we should detect hotplug capablity from ACPI tables and | ||
1057 | - only do this when really needed. -AK */ | ||
1058 | - if (max_apic <= 8) | ||
1059 | - genapic = &apic_flat; | ||
1060 | + else | ||
1061 | #endif | ||
1062 | - goto print; | ||
1063 | - } | ||
1064 | |||
1065 | - clusters = 0; | ||
1066 | - max_cluster = 0; | ||
1067 | - | ||
1068 | - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { | ||
1069 | - if (cluster_cnt[i] > 0) { | ||
1070 | - ++clusters; | ||
1071 | - if (cluster_cnt[i] > max_cluster) | ||
1072 | - max_cluster = cluster_cnt[i]; | ||
1073 | - } | ||
1074 | - } | ||
1075 | - | ||
1076 | - /* | ||
1077 | - * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode, | ||
1078 | - * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical | ||
1079 | - * else physical mode. | ||
1080 | - * (We don't use lowest priority delivery + HW APIC IRQ steering, so | ||
1081 | - * can ignore the clustered logical case and go straight to physical.) | ||
1082 | - */ | ||
1083 | - if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) { | ||
1084 | -#ifdef CONFIG_HOTPLUG_CPU | ||
1085 | - /* Don't use APIC shortcuts in CPU hotplug to avoid races */ | ||
1086 | - genapic = &apic_physflat; | ||
1087 | -#else | ||
1088 | + if (cpus_weight(cpu_possible_map) <= 8) | ||
1089 | genapic = &apic_flat; | ||
1090 | -#endif | ||
1091 | - } else | ||
1092 | - genapic = &apic_cluster; | ||
1093 | + else | ||
1094 | + genapic = &apic_physflat; | ||
1095 | |||
1096 | -print: | ||
1097 | #else | ||
1098 | /* hardcode to xen apic functions */ | ||
1099 | genapic = &apic_xen; | ||
1100 | @@ -135,7 +69,7 @@ | ||
1101 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | ||
1102 | } | ||
1103 | |||
1104 | -/* Same for both flat and clustered. */ | ||
1105 | +/* Same for both flat and physical. */ | ||
1106 | |||
1107 | #ifdef CONFIG_XEN | ||
1108 | extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); | ||
1109 | --- a/arch/x86/kernel/genapic_xen_64.c | ||
1110 | +++ b/arch/x86/kernel/genapic_xen_64.c | ||
1111 | @@ -21,9 +21,8 @@ | ||
1112 | #include <asm/ipi.h> | ||
1113 | #else | ||
1114 | #include <asm/apic.h> | ||
1115 | -#include <asm/apicdef.h> | ||
1116 | -#include <asm/genapic.h> | ||
1117 | #endif | ||
1118 | +#include <asm/genapic.h> | ||
1119 | #include <xen/evtchn.h> | ||
1120 | |||
1121 | DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); | ||
1122 | --- a/arch/x86/kernel/head64-xen.c | ||
1123 | +++ b/arch/x86/kernel/head64-xen.c | ||
1124 | @@ -22,13 +22,21 @@ | ||
1125 | #include <asm/setup.h> | ||
1126 | #include <asm/desc.h> | ||
1127 | #include <asm/pgtable.h> | ||
1128 | +#include <asm/tlbflush.h> | ||
1129 | #include <asm/sections.h> | ||
1130 | |||
1131 | unsigned long start_pfn; | ||
1132 | |||
1133 | +#ifndef CONFIG_XEN | ||
1134 | +static void __init zap_identity_mappings(void) | ||
1135 | +{ | ||
1136 | + pgd_t *pgd = pgd_offset_k(0UL); | ||
1137 | + pgd_clear(pgd); | ||
1138 | + __flush_tlb(); | ||
1139 | +} | ||
1140 | + | ||
1141 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | ||
1142 | yet. */ | ||
1143 | -#if 0 | ||
1144 | static void __init clear_bss(void) | ||
1145 | { | ||
1146 | memset(__bss_start, 0, | ||
1147 | @@ -37,26 +45,25 @@ | ||
1148 | #endif | ||
1149 | |||
1150 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ | ||
1151 | -#define OLD_CL_MAGIC_ADDR 0x90020 | ||
1152 | +#define OLD_CL_MAGIC_ADDR 0x20 | ||
1153 | #define OLD_CL_MAGIC 0xA33F | ||
1154 | -#define OLD_CL_BASE_ADDR 0x90000 | ||
1155 | -#define OLD_CL_OFFSET 0x90022 | ||
1156 | +#define OLD_CL_OFFSET 0x22 | ||
1157 | |||
1158 | static void __init copy_bootdata(char *real_mode_data) | ||
1159 | { | ||
1160 | #ifndef CONFIG_XEN | ||
1161 | - int new_data; | ||
1162 | + unsigned long new_data; | ||
1163 | char * command_line; | ||
1164 | |||
1165 | memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); | ||
1166 | - new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); | ||
1167 | + new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); | ||
1168 | if (!new_data) { | ||
1169 | - if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { | ||
1170 | + if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { | ||
1171 | return; | ||
1172 | } | ||
1173 | - new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; | ||
1174 | + new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); | ||
1175 | } | ||
1176 | - command_line = (char *) ((u64)(new_data)); | ||
1177 | + command_line = __va(new_data); | ||
1178 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); | ||
1179 | #else | ||
1180 | int max_cmdline; | ||
1181 | @@ -98,10 +105,13 @@ | ||
1182 | while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) | ||
1183 | machine_to_phys_order++; | ||
1184 | |||
1185 | -#if 0 | ||
1186 | +#ifndef CONFIG_XEN | ||
1187 | /* clear bss before set_intr_gate with early_idt_handler */ | ||
1188 | clear_bss(); | ||
1189 | |||
1190 | + /* Make NULL pointers segfault */ | ||
1191 | + zap_identity_mappings(); | ||
1192 | + | ||
1193 | for (i = 0; i < IDT_ENTRIES; i++) | ||
1194 | set_intr_gate(i, early_idt_handler); | ||
1195 | asm volatile("lidt %0" :: "m" (idt_descr)); | ||
1196 | @@ -113,7 +123,7 @@ | ||
1197 | cpu_pda(i) = &boot_cpu_pda[i]; | ||
1198 | |||
1199 | pda_init(0); | ||
1200 | - copy_bootdata(real_mode_data); | ||
1201 | + copy_bootdata(__va(real_mode_data)); | ||
1202 | #ifdef CONFIG_SMP | ||
1203 | cpu_set(0, cpu_online_map); | ||
1204 | #endif | ||
1205 | --- a/arch/x86/kernel/head_32-xen.S | ||
1206 | +++ b/arch/x86/kernel/head_32-xen.S | ||
1207 | @@ -37,7 +37,8 @@ | ||
1208 | /* Set up the stack pointer */ | ||
1209 | movl $(init_thread_union+THREAD_SIZE),%esp | ||
1210 | |||
1211 | - call setup_pda | ||
1212 | + movl %ss,%eax | ||
1213 | + movl %eax,%fs # gets reset once there's real percpu | ||
1214 | |||
1215 | /* get vendor info */ | ||
1216 | xorl %eax,%eax # call CPUID with 0 -> return vendor ID | ||
1217 | @@ -64,55 +65,11 @@ | ||
1218 | xorl %eax,%eax # Clear GS | ||
1219 | movl %eax,%gs | ||
1220 | |||
1221 | - movl $(__KERNEL_PDA),%eax | ||
1222 | - mov %eax,%fs | ||
1223 | - | ||
1224 | cld # gcc2 wants the direction flag cleared at all times | ||
1225 | |||
1226 | pushl $0 # fake return address for unwinder | ||
1227 | jmp start_kernel | ||
1228 | |||
1229 | -/* | ||
1230 | - * Point the GDT at this CPU's PDA. This will be | ||
1231 | - * cpu_gdt_table and boot_pda. | ||
1232 | - */ | ||
1233 | -ENTRY(setup_pda) | ||
1234 | - /* get the PDA pointer */ | ||
1235 | - movl $boot_pda, %eax | ||
1236 | - | ||
1237 | - /* slot the PDA address into the GDT */ | ||
1238 | - mov $cpu_gdt_table, %ecx | ||
1239 | - mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ | ||
1240 | - shr $16, %eax | ||
1241 | - mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ | ||
1242 | - mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ | ||
1243 | - | ||
1244 | - # %esi still points to start_info, and no registers | ||
1245 | - # need to be preserved. | ||
1246 | - | ||
1247 | - movl XEN_START_mfn_list(%esi), %ebx | ||
1248 | - movl $(cpu_gdt_table - __PAGE_OFFSET), %eax | ||
1249 | - shrl $PAGE_SHIFT, %eax | ||
1250 | - movl (%ebx,%eax,4), %ecx | ||
1251 | - pushl %ecx # frame number for set_gdt below | ||
1252 | - | ||
1253 | - xorl %esi, %esi | ||
1254 | - xorl %edx, %edx | ||
1255 | - shldl $PAGE_SHIFT, %ecx, %edx | ||
1256 | - shll $PAGE_SHIFT, %ecx | ||
1257 | - orl $0x61, %ecx | ||
1258 | - movl $cpu_gdt_table, %ebx | ||
1259 | - movl $__HYPERVISOR_update_va_mapping, %eax | ||
1260 | - int $0x82 | ||
1261 | - | ||
1262 | - movl $(PAGE_SIZE_asm / 8), %ecx | ||
1263 | - movl %esp, %ebx | ||
1264 | - movl $__HYPERVISOR_set_gdt, %eax | ||
1265 | - int $0x82 | ||
1266 | - | ||
1267 | - popl %ecx | ||
1268 | - ret | ||
1269 | - | ||
1270 | #define HYPERCALL_PAGE_OFFSET 0x1000 | ||
1271 | .org HYPERCALL_PAGE_OFFSET | ||
1272 | ENTRY(hypercall_page) | ||
1273 | @@ -138,60 +95,6 @@ | ||
1274 | */ | ||
1275 | .data | ||
1276 | |||
1277 | -/* | ||
1278 | - * The Global Descriptor Table contains 28 quadwords, per-CPU. | ||
1279 | - */ | ||
1280 | - .section .data.page_aligned, "aw" | ||
1281 | - .align PAGE_SIZE_asm | ||
1282 | -ENTRY(cpu_gdt_table) | ||
1283 | - .quad 0x0000000000000000 /* NULL descriptor */ | ||
1284 | - .quad 0x0000000000000000 /* 0x0b reserved */ | ||
1285 | - .quad 0x0000000000000000 /* 0x13 reserved */ | ||
1286 | - .quad 0x0000000000000000 /* 0x1b reserved */ | ||
1287 | - .quad 0x0000000000000000 /* 0x20 unused */ | ||
1288 | - .quad 0x0000000000000000 /* 0x28 unused */ | ||
1289 | - .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ | ||
1290 | - .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ | ||
1291 | - .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ | ||
1292 | - .quad 0x0000000000000000 /* 0x4b reserved */ | ||
1293 | - .quad 0x0000000000000000 /* 0x53 reserved */ | ||
1294 | - .quad 0x0000000000000000 /* 0x5b reserved */ | ||
1295 | - | ||
1296 | - .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ | ||
1297 | - .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ | ||
1298 | - .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ | ||
1299 | - .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ | ||
1300 | - | ||
1301 | - .quad 0x0000000000000000 /* 0x80 TSS descriptor */ | ||
1302 | - .quad 0x0000000000000000 /* 0x88 LDT descriptor */ | ||
1303 | - | ||
1304 | - /* | ||
1305 | - * Segments used for calling PnP BIOS have byte granularity. | ||
1306 | - * They code segments and data segments have fixed 64k limits, | ||
1307 | - * the transfer segment sizes are set at run time. | ||
1308 | - */ | ||
1309 | - .quad 0x0000000000000000 /* 0x90 32-bit code */ | ||
1310 | - .quad 0x0000000000000000 /* 0x98 16-bit code */ | ||
1311 | - .quad 0x0000000000000000 /* 0xa0 16-bit data */ | ||
1312 | - .quad 0x0000000000000000 /* 0xa8 16-bit data */ | ||
1313 | - .quad 0x0000000000000000 /* 0xb0 16-bit data */ | ||
1314 | - | ||
1315 | - /* | ||
1316 | - * The APM segments have byte granularity and their bases | ||
1317 | - * are set at run time. All have 64k limits. | ||
1318 | - */ | ||
1319 | - .quad 0x0000000000000000 /* 0xb8 APM CS code */ | ||
1320 | - .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ | ||
1321 | - .quad 0x0000000000000000 /* 0xc8 APM DS data */ | ||
1322 | - | ||
1323 | - .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */ | ||
1324 | - .quad 0x00cf92000000ffff /* 0xd8 - PDA */ | ||
1325 | - .quad 0x0000000000000000 /* 0xe0 - unused */ | ||
1326 | - .quad 0x0000000000000000 /* 0xe8 - unused */ | ||
1327 | - .quad 0x0000000000000000 /* 0xf0 - unused */ | ||
1328 | - .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ | ||
1329 | - .align PAGE_SIZE_asm | ||
1330 | - | ||
1331 | #if CONFIG_XEN_COMPAT <= 0x030002 | ||
1332 | /* | ||
1333 | * __xen_guest information | ||
1334 | --- a/arch/x86/kernel/head_64-xen.S | ||
1335 | +++ b/arch/x86/kernel/head_64-xen.S | ||
1336 | @@ -41,18 +42,15 @@ | ||
1337 | .word gdt_end-cpu_gdt_table-1 | ||
1338 | .long cpu_gdt_table-__START_KERNEL_map | ||
1339 | #endif | ||
1340 | -ENTRY(stext) | ||
1341 | -ENTRY(_stext) | ||
1342 | |||
1343 | - $page = 0 | ||
1344 | +.balign PAGE_SIZE | ||
1345 | + | ||
1346 | #define NEXT_PAGE(name) \ | ||
1347 | - $page = $page + 1; \ | ||
1348 | - .org $page * 0x1000; \ | ||
1349 | - phys_##name = $page * 0x1000 + __PHYSICAL_START; \ | ||
1350 | + .balign PAGE_SIZE; \ | ||
1351 | + phys_##name = . - .bootstrap.text; \ | ||
1352 | ENTRY(name) | ||
1353 | |||
1354 | NEXT_PAGE(init_level4_pgt) | ||
1355 | - /* This gets initialized in x86_64_start_kernel */ | ||
1356 | .fill 512,8,0 | ||
1357 | NEXT_PAGE(init_level4_user_pgt) | ||
1358 | /* | ||
1359 | @@ -136,13 +134,13 @@ | ||
1360 | |||
1361 | ENTRY(cpu_gdt_table) | ||
1362 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
1363 | + .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ | ||
1364 | + .quad 0x00af9b000000ffff /* __KERNEL_CS */ | ||
1365 | + .quad 0x00cf93000000ffff /* __KERNEL_DS */ | ||
1366 | + .quad 0x00cffb000000ffff /* __USER32_CS */ | ||
1367 | + .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ | ||
1368 | + .quad 0x00affb000000ffff /* __USER_CS */ | ||
1369 | .quad 0x0 /* unused */ | ||
1370 | - .quad 0x00af9a000000ffff /* __KERNEL_CS */ | ||
1371 | - .quad 0x00cf92000000ffff /* __KERNEL_DS */ | ||
1372 | - .quad 0x00cffa000000ffff /* __USER32_CS */ | ||
1373 | - .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */ | ||
1374 | - .quad 0x00affa000000ffff /* __USER_CS */ | ||
1375 | - .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ | ||
1376 | .quad 0,0 /* TSS */ | ||
1377 | .quad 0,0 /* LDT */ | ||
1378 | .quad 0,0,0 /* three TLS descriptors */ | ||
1379 | @@ -165,14 +163,11 @@ | ||
1380 | * __xen_guest information | ||
1381 | */ | ||
1382 | .macro utoh value | ||
1383 | - .if (\value) < 0 || (\value) >= 0x10 | ||
1384 | - utoh (((\value)>>4)&0x0fffffffffffffff) | ||
1385 | - .endif | ||
1386 | - .if ((\value) & 0xf) < 10 | ||
1387 | - .byte '0' + ((\value) & 0xf) | ||
1388 | - .else | ||
1389 | - .byte 'A' + ((\value) & 0xf) - 10 | ||
1390 | - .endif | ||
1391 | + i = 64 | ||
1392 | + .rept 16 | ||
1393 | + i = i - 4 | ||
1394 | + .byte '0' + ((((\value) >> i) & 0xf) > 9) * ('0' - 'A' + 10) + (((\value) >> i) & 0xf) | ||
1395 | + .endr | ||
1396 | .endm | ||
1397 | |||
1398 | .section __xen_guest | ||
1399 | --- a/arch/x86/kernel/io_apic_32-xen.c | ||
1400 | +++ b/arch/x86/kernel/io_apic_32-xen.c | ||
1401 | @@ -25,7 +25,6 @@ | ||
1402 | #include <linux/init.h> | ||
1403 | #include <linux/delay.h> | ||
1404 | #include <linux/sched.h> | ||
1405 | -#include <linux/smp_lock.h> | ||
1406 | #include <linux/mc146818rtc.h> | ||
1407 | #include <linux/compiler.h> | ||
1408 | #include <linux/acpi.h> | ||
1409 | @@ -35,6 +34,7 @@ | ||
1410 | #include <linux/msi.h> | ||
1411 | #include <linux/htirq.h> | ||
1412 | #include <linux/freezer.h> | ||
1413 | +#include <linux/kthread.h> | ||
1414 | |||
1415 | #include <asm/io.h> | ||
1416 | #include <asm/smp.h> | ||
1417 | @@ -705,8 +705,6 @@ | ||
1418 | unsigned long prev_balance_time = jiffies; | ||
1419 | long time_remaining = balanced_irq_interval; | ||
1420 | |||
1421 | - daemonize("kirqd"); | ||
1422 | - | ||
1423 | /* push everything to CPU 0 to give us a starting point. */ | ||
1424 | for (i = 0 ; i < NR_IRQS ; i++) { | ||
1425 | irq_desc[i].pending_mask = cpumask_of_cpu(0); | ||
1426 | @@ -766,10 +764,9 @@ | ||
1427 | } | ||
1428 | |||
1429 | printk(KERN_INFO "Starting balanced_irq\n"); | ||
1430 | - if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) | ||
1431 | + if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) | ||
1432 | return 0; | ||
1433 | - else | ||
1434 | - printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); | ||
1435 | + printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); | ||
1436 | failed: | ||
1437 | for_each_possible_cpu(i) { | ||
1438 | kfree(irq_cpu_data[i].irq_delta); | ||
1439 | @@ -1445,10 +1442,6 @@ | ||
1440 | enable_8259A_irq(0); | ||
1441 | } | ||
1442 | |||
1443 | -static inline void UNEXPECTED_IO_APIC(void) | ||
1444 | -{ | ||
1445 | -} | ||
1446 | - | ||
1447 | void __init print_IO_APIC(void) | ||
1448 | { | ||
1449 | int apic, i; | ||
1450 | @@ -1488,34 +1481,12 @@ | ||
1451 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | ||
1452 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | ||
1453 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); | ||
1454 | - if (reg_00.bits.ID >= get_physical_broadcast()) | ||
1455 | - UNEXPECTED_IO_APIC(); | ||
1456 | - if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) | ||
1457 | - UNEXPECTED_IO_APIC(); | ||
1458 | |||
1459 | printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); | ||
1460 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | ||
1461 | - if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ | ||
1462 | - (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ | ||
1463 | - (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ | ||
1464 | - (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ | ||
1465 | - (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ | ||
1466 | - (reg_01.bits.entries != 0x2E) && | ||
1467 | - (reg_01.bits.entries != 0x3F) | ||
1468 | - ) | ||
1469 | - UNEXPECTED_IO_APIC(); | ||
1470 | |||
1471 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | ||
1472 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | ||
1473 | - if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ | ||
1474 | - (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ | ||
1475 | - (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ | ||
1476 | - (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ | ||
1477 | - (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ | ||
1478 | - ) | ||
1479 | - UNEXPECTED_IO_APIC(); | ||
1480 | - if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) | ||
1481 | - UNEXPECTED_IO_APIC(); | ||
1482 | |||
1483 | /* | ||
1484 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, | ||
1485 | @@ -1525,8 +1496,6 @@ | ||
1486 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { | ||
1487 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); | ||
1488 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); | ||
1489 | - if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) | ||
1490 | - UNEXPECTED_IO_APIC(); | ||
1491 | } | ||
1492 | |||
1493 | /* | ||
1494 | @@ -1538,8 +1507,6 @@ | ||
1495 | reg_03.raw != reg_01.raw) { | ||
1496 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); | ||
1497 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); | ||
1498 | - if (reg_03.bits.__reserved_1) | ||
1499 | - UNEXPECTED_IO_APIC(); | ||
1500 | } | ||
1501 | |||
1502 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | ||
1503 | @@ -2670,19 +2637,19 @@ | ||
1504 | if (irq < 0) | ||
1505 | return irq; | ||
1506 | |||
1507 | - set_irq_msi(irq, desc); | ||
1508 | ret = msi_compose_msg(dev, irq, &msg); | ||
1509 | if (ret < 0) { | ||
1510 | destroy_irq(irq); | ||
1511 | return ret; | ||
1512 | } | ||
1513 | |||
1514 | + set_irq_msi(irq, desc); | ||
1515 | write_msi_msg(irq, &msg); | ||
1516 | |||
1517 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, | ||
1518 | "edge"); | ||
1519 | |||
1520 | - return irq; | ||
1521 | + return 0; | ||
1522 | } | ||
1523 | |||
1524 | void arch_teardown_msi_irq(unsigned int irq) | ||
1525 | --- a/arch/x86/kernel/io_apic_64-xen.c | ||
1526 | +++ b/arch/x86/kernel/io_apic_64-xen.c | ||
1527 | @@ -25,7 +25,6 @@ | ||
1528 | #include <linux/init.h> | ||
1529 | #include <linux/delay.h> | ||
1530 | #include <linux/sched.h> | ||
1531 | -#include <linux/smp_lock.h> | ||
1532 | #include <linux/pci.h> | ||
1533 | #include <linux/mc146818rtc.h> | ||
1534 | #include <linux/acpi.h> | ||
1535 | @@ -897,10 +896,6 @@ | ||
1536 | enable_8259A_irq(0); | ||
1537 | } | ||
1538 | |||
1539 | -void __init UNEXPECTED_IO_APIC(void) | ||
1540 | -{ | ||
1541 | -} | ||
1542 | - | ||
1543 | void __apicdebuginit print_IO_APIC(void) | ||
1544 | { | ||
1545 | int apic, i; | ||
1546 | @@ -936,40 +931,16 @@ | ||
1547 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | ||
1548 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | ||
1549 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | ||
1550 | - if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) | ||
1551 | - UNEXPECTED_IO_APIC(); | ||
1552 | |||
1553 | printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); | ||
1554 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | ||
1555 | - if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ | ||
1556 | - (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ | ||
1557 | - (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ | ||
1558 | - (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ | ||
1559 | - (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ | ||
1560 | - (reg_01.bits.entries != 0x2E) && | ||
1561 | - (reg_01.bits.entries != 0x3F) && | ||
1562 | - (reg_01.bits.entries != 0x03) | ||
1563 | - ) | ||
1564 | - UNEXPECTED_IO_APIC(); | ||
1565 | |||
1566 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | ||
1567 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | ||
1568 | - if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ | ||
1569 | - (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */ | ||
1570 | - (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ | ||
1571 | - (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ | ||
1572 | - (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ | ||
1573 | - (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ | ||
1574 | - ) | ||
1575 | - UNEXPECTED_IO_APIC(); | ||
1576 | - if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) | ||
1577 | - UNEXPECTED_IO_APIC(); | ||
1578 | |||
1579 | if (reg_01.bits.version >= 0x10) { | ||
1580 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); | ||
1581 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); | ||
1582 | - if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) | ||
1583 | - UNEXPECTED_IO_APIC(); | ||
1584 | } | ||
1585 | |||
1586 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | ||
1587 | @@ -1401,8 +1372,7 @@ | ||
1588 | |||
1589 | vector = ~get_irq_regs()->orig_rax; | ||
1590 | me = smp_processor_id(); | ||
1591 | - if ((vector == cfg->vector) && | ||
1592 | - cpu_isset(smp_processor_id(), cfg->domain)) { | ||
1593 | + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { | ||
1594 | cpumask_t cleanup_mask; | ||
1595 | |||
1596 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | ||
1597 | @@ -1437,7 +1407,7 @@ | ||
1598 | |||
1599 | /* | ||
1600 | * We must acknowledge the irq before we move it or the acknowledge will | ||
1601 | - * not propogate properly. | ||
1602 | + * not propagate properly. | ||
1603 | */ | ||
1604 | ack_APIC_irq(); | ||
1605 | |||
1606 | @@ -1520,6 +1490,7 @@ | ||
1607 | static void end_lapic_irq (unsigned int i) { /* nothing */ } | ||
1608 | |||
1609 | static struct hw_interrupt_type lapic_irq_type __read_mostly = { | ||
1610 | + .name = "local-APIC", | ||
1611 | .typename = "local-APIC-edge", | ||
1612 | .startup = NULL, /* startup_irq() not used for IRQ0 */ | ||
1613 | .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ | ||
1614 | @@ -1989,18 +1960,18 @@ | ||
1615 | if (irq < 0) | ||
1616 | return irq; | ||
1617 | |||
1618 | - set_irq_msi(irq, desc); | ||
1619 | ret = msi_compose_msg(dev, irq, &msg); | ||
1620 | if (ret < 0) { | ||
1621 | destroy_irq(irq); | ||
1622 | return ret; | ||
1623 | } | ||
1624 | |||
1625 | + set_irq_msi(irq, desc); | ||
1626 | write_msi_msg(irq, &msg); | ||
1627 | |||
1628 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | ||
1629 | |||
1630 | - return irq; | ||
1631 | + return 0; | ||
1632 | } | ||
1633 | |||
1634 | void arch_teardown_msi_irq(unsigned int irq) | ||
1635 | --- a/arch/x86/kernel/ioport_32-xen.c | ||
1636 | +++ b/arch/x86/kernel/ioport_32-xen.c | ||
1637 | @@ -12,10 +12,10 @@ | ||
1638 | #include <linux/types.h> | ||
1639 | #include <linux/ioport.h> | ||
1640 | #include <linux/smp.h> | ||
1641 | -#include <linux/smp_lock.h> | ||
1642 | #include <linux/stddef.h> | ||
1643 | #include <linux/slab.h> | ||
1644 | #include <linux/thread_info.h> | ||
1645 | +#include <linux/syscalls.h> | ||
1646 | #include <xen/interface/physdev.h> | ||
1647 | |||
1648 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | ||
1649 | --- a/arch/x86/kernel/ioport_64-xen.c | ||
1650 | +++ b/arch/x86/kernel/ioport_64-xen.c | ||
1651 | @@ -13,10 +13,10 @@ | ||
1652 | #include <linux/ioport.h> | ||
1653 | #include <linux/mm.h> | ||
1654 | #include <linux/smp.h> | ||
1655 | -#include <linux/smp_lock.h> | ||
1656 | #include <linux/stddef.h> | ||
1657 | #include <linux/slab.h> | ||
1658 | #include <linux/thread_info.h> | ||
1659 | +#include <linux/syscalls.h> | ||
1660 | #include <xen/interface/physdev.h> | ||
1661 | |||
1662 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | ||
1663 | --- a/arch/x86/kernel/irq_32-xen.c | ||
1664 | +++ b/arch/x86/kernel/irq_32-xen.c | ||
1665 | @@ -24,6 +24,9 @@ | ||
1666 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; | ||
1667 | EXPORT_PER_CPU_SYMBOL(irq_stat); | ||
1668 | |||
1669 | +DEFINE_PER_CPU(struct pt_regs *, irq_regs); | ||
1670 | +EXPORT_PER_CPU_SYMBOL(irq_regs); | ||
1671 | + | ||
1672 | /* | ||
1673 | * 'what should we do if we get a hw irq event on an illegal vector'. | ||
1674 | * each architecture has to answer this themselves. | ||
1675 | --- a/arch/x86/kernel/irq_64-xen.c | ||
1676 | +++ b/arch/x86/kernel/irq_64-xen.c | ||
1677 | @@ -32,7 +32,7 @@ | ||
1678 | */ | ||
1679 | static inline void stack_overflow_check(struct pt_regs *regs) | ||
1680 | { | ||
1681 | - u64 curbase = (u64) current->thread_info; | ||
1682 | + u64 curbase = (u64)task_stack_page(current); | ||
1683 | static unsigned long warned = -60*HZ; | ||
1684 | |||
1685 | if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && | ||
1686 | @@ -145,17 +145,43 @@ | ||
1687 | |||
1688 | for (irq = 0; irq < NR_IRQS; irq++) { | ||
1689 | cpumask_t mask; | ||
1690 | + int break_affinity = 0; | ||
1691 | + int set_affinity = 1; | ||
1692 | + | ||
1693 | if (irq == 2) | ||
1694 | continue; | ||
1695 | |||
1696 | + /* interrupt's are disabled at this point */ | ||
1697 | + spin_lock(&irq_desc[irq].lock); | ||
1698 | + | ||
1699 | + if (!irq_has_action(irq) || | ||
1700 | + cpus_equal(irq_desc[irq].affinity, map)) { | ||
1701 | + spin_unlock(&irq_desc[irq].lock); | ||
1702 | + continue; | ||
1703 | + } | ||
1704 | + | ||
1705 | cpus_and(mask, irq_desc[irq].affinity, map); | ||
1706 | - if (any_online_cpu(mask) == NR_CPUS) { | ||
1707 | - printk("Breaking affinity for irq %i\n", irq); | ||
1708 | + if (cpus_empty(mask)) { | ||
1709 | + break_affinity = 1; | ||
1710 | mask = map; | ||
1711 | } | ||
1712 | + | ||
1713 | + if (irq_desc[irq].chip->mask) | ||
1714 | + irq_desc[irq].chip->mask(irq); | ||
1715 | + | ||
1716 | if (irq_desc[irq].chip->set_affinity) | ||
1717 | irq_desc[irq].chip->set_affinity(irq, mask); | ||
1718 | - else if (irq_desc[irq].action && !(warned++)) | ||
1719 | + else if (!(warned++)) | ||
1720 | + set_affinity = 0; | ||
1721 | + | ||
1722 | + if (irq_desc[irq].chip->unmask) | ||
1723 | + irq_desc[irq].chip->unmask(irq); | ||
1724 | + | ||
1725 | + spin_unlock(&irq_desc[irq].lock); | ||
1726 | + | ||
1727 | + if (break_affinity && set_affinity) | ||
1728 | + printk("Broke affinity for irq %i\n", irq); | ||
1729 | + else if (!set_affinity) | ||
1730 | printk("Cannot set affinity for irq %i\n", irq); | ||
1731 | } | ||
1732 | |||
1733 | --- a/arch/x86/kernel/ldt_32-xen.c | ||
1734 | +++ b/arch/x86/kernel/ldt_32-xen.c | ||
1735 | @@ -10,7 +10,6 @@ | ||
1736 | #include <linux/string.h> | ||
1737 | #include <linux/mm.h> | ||
1738 | #include <linux/smp.h> | ||
1739 | -#include <linux/smp_lock.h> | ||
1740 | #include <linux/vmalloc.h> | ||
1741 | #include <linux/slab.h> | ||
1742 | |||
1743 | --- a/arch/x86/kernel/ldt_64-xen.c | ||
1744 | +++ b/arch/x86/kernel/ldt_64-xen.c | ||
1745 | @@ -13,7 +13,6 @@ | ||
1746 | #include <linux/string.h> | ||
1747 | #include <linux/mm.h> | ||
1748 | #include <linux/smp.h> | ||
1749 | -#include <linux/smp_lock.h> | ||
1750 | #include <linux/vmalloc.h> | ||
1751 | #include <linux/slab.h> | ||
1752 | |||
1753 | --- a/arch/x86/kernel/microcode-xen.c | ||
1754 | +++ b/arch/x86/kernel/microcode-xen.c | ||
1755 | @@ -135,7 +135,7 @@ | ||
1756 | return 0; | ||
1757 | } | ||
1758 | |||
1759 | -static void __exit microcode_dev_exit (void) | ||
1760 | +static void microcode_dev_exit (void) | ||
1761 | { | ||
1762 | misc_deregister(µcode_dev); | ||
1763 | } | ||
1764 | --- a/arch/x86/kernel/mpparse_32-xen.c | ||
1765 | +++ b/arch/x86/kernel/mpparse_32-xen.c | ||
1766 | @@ -18,7 +18,6 @@ | ||
1767 | #include <linux/acpi.h> | ||
1768 | #include <linux/delay.h> | ||
1769 | #include <linux/bootmem.h> | ||
1770 | -#include <linux/smp_lock.h> | ||
1771 | #include <linux/kernel_stat.h> | ||
1772 | #include <linux/mc146818rtc.h> | ||
1773 | #include <linux/bitops.h> | ||
1774 | @@ -484,7 +483,7 @@ | ||
1775 | } | ||
1776 | ++mpc_record; | ||
1777 | } | ||
1778 | - clustered_apic_check(); | ||
1779 | + setup_apic_routing(); | ||
1780 | if (!num_processors) | ||
1781 | printk(KERN_ERR "SMP mptable: no processors registered!\n"); | ||
1782 | return num_processors; | ||
1783 | --- a/arch/x86/kernel/mpparse_64-xen.c | ||
1784 | +++ b/arch/x86/kernel/mpparse_64-xen.c | ||
1785 | @@ -17,7 +17,6 @@ | ||
1786 | #include <linux/init.h> | ||
1787 | #include <linux/delay.h> | ||
1788 | #include <linux/bootmem.h> | ||
1789 | -#include <linux/smp_lock.h> | ||
1790 | #include <linux/kernel_stat.h> | ||
1791 | #include <linux/mc146818rtc.h> | ||
1792 | #include <linux/acpi.h> | ||
1793 | @@ -307,7 +306,7 @@ | ||
1794 | } | ||
1795 | } | ||
1796 | } | ||
1797 | - clustered_apic_check(); | ||
1798 | + setup_apic_routing(); | ||
1799 | if (!num_processors) | ||
1800 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | ||
1801 | return num_processors; | ||
1802 | --- a/arch/x86/kernel/pci-dma_32-xen.c | ||
1803 | +++ b/arch/x86/kernel/pci-dma_32-xen.c | ||
1804 | @@ -13,6 +13,7 @@ | ||
1805 | #include <linux/pci.h> | ||
1806 | #include <linux/module.h> | ||
1807 | #include <linux/version.h> | ||
1808 | +#include <linux/pci.h> | ||
1809 | #include <asm/io.h> | ||
1810 | #include <xen/balloon.h> | ||
1811 | #include <xen/gnttab.h> | ||
1812 | @@ -284,7 +285,7 @@ | ||
1813 | { | ||
1814 | void __iomem *mem_base = NULL; | ||
1815 | int pages = size >> PAGE_SHIFT; | ||
1816 | - int bitmap_size = (pages + 31)/32; | ||
1817 | + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); | ||
1818 | |||
1819 | if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) | ||
1820 | goto out; | ||
1821 | @@ -357,6 +358,32 @@ | ||
1822 | EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | ||
1823 | #endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ | ||
1824 | |||
1825 | +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) | ||
1826 | +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ | ||
1827 | + | ||
1828 | +int forbid_dac; | ||
1829 | +EXPORT_SYMBOL(forbid_dac); | ||
1830 | + | ||
1831 | +static __devinit void via_no_dac(struct pci_dev *dev) | ||
1832 | +{ | ||
1833 | + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { | ||
1834 | + printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); | ||
1835 | + forbid_dac = 1; | ||
1836 | + } | ||
1837 | +} | ||
1838 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); | ||
1839 | + | ||
1840 | +static int check_iommu(char *s) | ||
1841 | +{ | ||
1842 | + if (!strcmp(s, "usedac")) { | ||
1843 | + forbid_dac = -1; | ||
1844 | + return 1; | ||
1845 | + } | ||
1846 | + return 0; | ||
1847 | +} | ||
1848 | +__setup("iommu=", check_iommu); | ||
1849 | +#endif | ||
1850 | + | ||
1851 | dma_addr_t | ||
1852 | dma_map_single(struct device *dev, void *ptr, size_t size, | ||
1853 | enum dma_data_direction direction) | ||
1854 | --- a/arch/x86/kernel/pci-swiotlb_64-xen.c | ||
1855 | +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c | ||
1856 | @@ -16,7 +16,7 @@ | ||
1857 | |||
1858 | void swiotlb_init(void); | ||
1859 | |||
1860 | -struct dma_mapping_ops swiotlb_dma_ops = { | ||
1861 | +const struct dma_mapping_ops swiotlb_dma_ops = { | ||
1862 | #if 0 | ||
1863 | .mapping_error = swiotlb_dma_mapping_error, | ||
1864 | .alloc_coherent = swiotlb_alloc_coherent, | ||
1865 | --- a/arch/x86/kernel/process_32-xen.c | ||
1866 | +++ b/arch/x86/kernel/process_32-xen.c | ||
1867 | @@ -21,7 +21,6 @@ | ||
1868 | #include <linux/mm.h> | ||
1869 | #include <linux/elfcore.h> | ||
1870 | #include <linux/smp.h> | ||
1871 | -#include <linux/smp_lock.h> | ||
1872 | #include <linux/stddef.h> | ||
1873 | #include <linux/slab.h> | ||
1874 | #include <linux/vmalloc.h> | ||
1875 | @@ -39,6 +38,7 @@ | ||
1876 | #include <linux/random.h> | ||
1877 | #include <linux/personality.h> | ||
1878 | #include <linux/tick.h> | ||
1879 | +#include <linux/percpu.h> | ||
1880 | |||
1881 | #include <asm/uaccess.h> | ||
1882 | #include <asm/pgtable.h> | ||
1883 | @@ -61,7 +61,6 @@ | ||
1884 | |||
1885 | #include <asm/tlbflush.h> | ||
1886 | #include <asm/cpu.h> | ||
1887 | -#include <asm/pda.h> | ||
1888 | |||
1889 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | ||
1890 | |||
1891 | @@ -70,6 +69,12 @@ | ||
1892 | unsigned long boot_option_idle_override = 0; | ||
1893 | EXPORT_SYMBOL(boot_option_idle_override); | ||
1894 | |||
1895 | +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
1896 | +EXPORT_PER_CPU_SYMBOL(current_task); | ||
1897 | + | ||
1898 | +DEFINE_PER_CPU(int, cpu_number); | ||
1899 | +EXPORT_PER_CPU_SYMBOL(cpu_number); | ||
1900 | + | ||
1901 | /* | ||
1902 | * Return saved PC of a blocked thread. | ||
1903 | */ | ||
1904 | @@ -168,6 +173,7 @@ | ||
1905 | if (__get_cpu_var(cpu_idle_state)) | ||
1906 | __get_cpu_var(cpu_idle_state) = 0; | ||
1907 | |||
1908 | + check_pgt_cache(); | ||
1909 | rmb(); | ||
1910 | idle = xen_idle; /* no alternatives */ | ||
1911 | |||
1912 | @@ -218,18 +224,19 @@ | ||
1913 | { | ||
1914 | } | ||
1915 | |||
1916 | -static int __init idle_setup (char *str) | ||
1917 | +static int __init idle_setup(char *str) | ||
1918 | { | ||
1919 | - if (!strncmp(str, "poll", 4)) { | ||
1920 | + if (!strcmp(str, "poll")) { | ||
1921 | printk("using polling idle threads.\n"); | ||
1922 | pm_idle = poll_idle; | ||
1923 | } | ||
1924 | + else | ||
1925 | + return -1; | ||
1926 | |||
1927 | boot_option_idle_override = 1; | ||
1928 | - return 1; | ||
1929 | + return 0; | ||
1930 | } | ||
1931 | - | ||
1932 | -__setup("idle=", idle_setup); | ||
1933 | +early_param("idle", idle_setup); | ||
1934 | |||
1935 | void show_regs(struct pt_regs * regs) | ||
1936 | { | ||
1937 | @@ -282,7 +289,7 @@ | ||
1938 | |||
1939 | regs.xds = __USER_DS; | ||
1940 | regs.xes = __USER_DS; | ||
1941 | - regs.xfs = __KERNEL_PDA; | ||
1942 | + regs.xfs = __KERNEL_PERCPU; | ||
1943 | regs.orig_eax = -1; | ||
1944 | regs.eip = (unsigned long) kernel_thread_helper; | ||
1945 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | ||
1946 | @@ -556,7 +563,7 @@ | ||
1947 | * multicall to indicate FPU task switch, rather than | ||
1948 | * synchronously trapping to Xen. | ||
1949 | */ | ||
1950 | - if (prev_p->thread_info->status & TS_USEDFPU) { | ||
1951 | + if (task_thread_info(prev_p)->status & TS_USEDFPU) { | ||
1952 | __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ | ||
1953 | mcl->op = __HYPERVISOR_fpu_taskswitch; | ||
1954 | mcl->args[0] = 1; | ||
1955 | @@ -648,7 +655,7 @@ | ||
1956 | if (prev->gs | next->gs) | ||
1957 | loadsegment(gs, next->gs); | ||
1958 | |||
1959 | - write_pda(pcurrent, next_p); | ||
1960 | + x86_write_percpu(current_task, next_p); | ||
1961 | |||
1962 | return prev_p; | ||
1963 | } | ||
1964 | --- a/arch/x86/kernel/process_64-xen.c | ||
1965 | +++ b/arch/x86/kernel/process_64-xen.c | ||
1966 | @@ -39,6 +39,7 @@ | ||
1967 | #include <linux/random.h> | ||
1968 | #include <linux/notifier.h> | ||
1969 | #include <linux/kprobes.h> | ||
1970 | +#include <linux/kdebug.h> | ||
1971 | |||
1972 | #include <asm/uaccess.h> | ||
1973 | #include <asm/pgtable.h> | ||
1974 | @@ -49,7 +50,6 @@ | ||
1975 | #include <asm/mmu_context.h> | ||
1976 | #include <asm/pda.h> | ||
1977 | #include <asm/prctl.h> | ||
1978 | -#include <asm/kdebug.h> | ||
1979 | #include <xen/interface/platform.h> | ||
1980 | #include <xen/interface/physdev.h> | ||
1981 | #include <xen/interface/vcpu.h> | ||
1982 | @@ -232,16 +232,18 @@ | ||
1983 | |||
1984 | static int __init idle_setup (char *str) | ||
1985 | { | ||
1986 | - if (!strncmp(str, "poll", 4)) { | ||
1987 | + if (!strcmp(str, "poll")) { | ||
1988 | printk("using polling idle threads.\n"); | ||
1989 | pm_idle = poll_idle; | ||
1990 | - } | ||
1991 | + } else if (!strcmp(str, "mwait")) | ||
1992 | + force_mwait = 1; | ||
1993 | + else | ||
1994 | + return -1; | ||
1995 | |||
1996 | boot_option_idle_override = 1; | ||
1997 | - return 1; | ||
1998 | + return 0; | ||
1999 | } | ||
2000 | - | ||
2001 | -__setup("idle=", idle_setup); | ||
2002 | +early_param("idle", idle_setup); | ||
2003 | |||
2004 | /* Prints also some state that isn't saved in the pt_regs */ | ||
2005 | void __show_regs(struct pt_regs * regs) | ||
2006 | @@ -540,7 +542,7 @@ | ||
2007 | * The AMD workaround requires it to be after DS reload, or | ||
2008 | * after DS has been cleared, which we do in __prepare_arch_switch. | ||
2009 | */ | ||
2010 | - if (prev_p->thread_info->status & TS_USEDFPU) { | ||
2011 | + if (task_thread_info(prev_p)->status & TS_USEDFPU) { | ||
2012 | __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ | ||
2013 | mcl->op = __HYPERVISOR_fpu_taskswitch; | ||
2014 | mcl->args[0] = 1; | ||
2015 | --- a/arch/x86/kernel/quirks-xen.c | ||
2016 | +++ b/arch/x86/kernel/quirks-xen.c | ||
2017 | @@ -3,12 +3,10 @@ | ||
2018 | */ | ||
2019 | #include <linux/pci.h> | ||
2020 | #include <linux/irq.h> | ||
2021 | -#include <asm/pci-direct.h> | ||
2022 | -#include <asm/genapic.h> | ||
2023 | -#include <asm/cpu.h> | ||
2024 | |||
2025 | #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) | ||
2026 | -static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | ||
2027 | + | ||
2028 | +static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | ||
2029 | { | ||
2030 | u8 config, rev; | ||
2031 | u32 word; | ||
2032 | @@ -16,7 +14,7 @@ | ||
2033 | /* BIOS may enable hardware IRQ balancing for | ||
2034 | * E7520/E7320/E7525(revision ID 0x9 and below) | ||
2035 | * based platforms. | ||
2036 | - * For those platforms, make sure that the genapic is set to 'flat' | ||
2037 | + * Disable SW irqbalance/affinity on those platforms. | ||
2038 | */ | ||
2039 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | ||
2040 | if (rev > 0x9) | ||
2041 | @@ -30,59 +28,20 @@ | ||
2042 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); | ||
2043 | |||
2044 | if (!(word & (1 << 13))) { | ||
2045 | -#ifndef CONFIG_XEN | ||
2046 | -#ifdef CONFIG_X86_64 | ||
2047 | - if (genapic != &apic_flat) | ||
2048 | - panic("APIC mode must be flat on this system\n"); | ||
2049 | -#elif defined(CONFIG_X86_GENERICARCH) | ||
2050 | - if (genapic != &apic_default) | ||
2051 | - panic("APIC mode must be default(flat) on this system. Use apic=default\n"); | ||
2052 | -#endif | ||
2053 | -#endif | ||
2054 | - } | ||
2055 | - | ||
2056 | - /* put back the original value for config space*/ | ||
2057 | - if (!(config & 0x2)) | ||
2058 | - pci_write_config_byte(dev, 0xf4, config); | ||
2059 | -} | ||
2060 | - | ||
2061 | -void __init quirk_intel_irqbalance(void) | ||
2062 | -{ | ||
2063 | - u8 config, rev; | ||
2064 | - u32 word; | ||
2065 | - | ||
2066 | - /* BIOS may enable hardware IRQ balancing for | ||
2067 | - * E7520/E7320/E7525(revision ID 0x9 and below) | ||
2068 | - * based platforms. | ||
2069 | - * Disable SW irqbalance/affinity on those platforms. | ||
2070 | - */ | ||
2071 | - rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); | ||
2072 | - if (rev > 0x9) | ||
2073 | - return; | ||
2074 | - | ||
2075 | - printk(KERN_INFO "Intel E7520/7320/7525 detected."); | ||
2076 | - | ||
2077 | - /* enable access to config space */ | ||
2078 | - config = read_pci_config_byte(0, 0, 0, 0xf4); | ||
2079 | - write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); | ||
2080 | - | ||
2081 | - /* read xTPR register */ | ||
2082 | - word = read_pci_config_16(0, 0, 0x40, 0x4c); | ||
2083 | - | ||
2084 | - if (!(word & (1 << 13))) { | ||
2085 | struct xen_platform_op op; | ||
2086 | - printk(KERN_INFO "Disabling irq balancing and affinity\n"); | ||
2087 | + | ||
2088 | + printk(KERN_INFO "Intel E7520/7320/7525 detected. " | ||
2089 | + "Disabling irq balancing and affinity\n"); | ||
2090 | op.cmd = XENPF_platform_quirk; | ||
2091 | op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING; | ||
2092 | WARN_ON(HYPERVISOR_platform_op(&op)); | ||
2093 | } | ||
2094 | |||
2095 | - /* put back the original value for config space */ | ||
2096 | + /* put back the original value for config space*/ | ||
2097 | if (!(config & 0x2)) | ||
2098 | - write_pci_config_byte(0, 0, 0, 0xf4, config); | ||
2099 | + pci_write_config_byte(dev, 0xf4, config); | ||
2100 | } | ||
2101 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); | ||
2102 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); | ||
2103 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); | ||
2104 | - | ||
2105 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); | ||
2106 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); | ||
2107 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); | ||
2108 | #endif | ||
2109 | --- a/arch/x86/kernel/setup64-xen.c | ||
2110 | +++ b/arch/x86/kernel/setup64-xen.c | ||
2111 | @@ -113,9 +113,9 @@ | ||
2112 | if (!NODE_DATA(cpu_to_node(i))) { | ||
2113 | printk("cpu with no node %d, num_online_nodes %d\n", | ||
2114 | i, num_online_nodes()); | ||
2115 | - ptr = alloc_bootmem(size); | ||
2116 | + ptr = alloc_bootmem_pages(size); | ||
2117 | } else { | ||
2118 | - ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); | ||
2119 | + ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); | ||
2120 | } | ||
2121 | if (!ptr) | ||
2122 | panic("Cannot allocate cpu data for CPU %d\n", i); | ||
2123 | @@ -208,6 +208,8 @@ | ||
2124 | __attribute__((section(".bss.page_aligned"))); | ||
2125 | #endif | ||
2126 | |||
2127 | +extern asmlinkage void ignore_sysret(void); | ||
2128 | + | ||
2129 | /* May not be marked __init: used by software suspend */ | ||
2130 | void syscall_init(void) | ||
2131 | { | ||
2132 | @@ -219,12 +221,22 @@ | ||
2133 | */ | ||
2134 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | ||
2135 | wrmsrl(MSR_LSTAR, system_call); | ||
2136 | + wrmsrl(MSR_CSTAR, ignore_sysret); | ||
2137 | |||
2138 | /* Flags to clear on syscall */ | ||
2139 | wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); | ||
2140 | #endif | ||
2141 | #ifdef CONFIG_IA32_EMULATION | ||
2142 | syscall32_cpu_init (); | ||
2143 | +#else | ||
2144 | + { | ||
2145 | + static const struct callback_register cstar = { | ||
2146 | + .type = CALLBACKTYPE_syscall32, | ||
2147 | + .address = (unsigned long)ignore_sysret | ||
2148 | + }; | ||
2149 | + if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar)) | ||
2150 | + printk(KERN_WARN "Unable to register CSTAR callback\n"); | ||
2151 | + } | ||
2152 | #endif | ||
2153 | } | ||
2154 | |||
2155 | @@ -262,7 +274,6 @@ | ||
2156 | /* CPU 0 is initialised in head64.c */ | ||
2157 | if (cpu != 0) { | ||
2158 | pda_init(cpu); | ||
2159 | - zap_low_mappings(cpu); | ||
2160 | } | ||
2161 | #ifndef CONFIG_X86_NO_TSS | ||
2162 | else | ||
2163 | --- a/arch/x86/kernel/setup_64-xen.c | ||
2164 | +++ b/arch/x86/kernel/setup_64-xen.c | ||
2165 | @@ -123,6 +123,8 @@ | ||
2166 | |||
2167 | unsigned long saved_video_mode; | ||
2168 | |||
2169 | +int force_mwait __cpuinitdata; | ||
2170 | + | ||
2171 | /* | ||
2172 | * Early DMI memory | ||
2173 | */ | ||
2174 | @@ -256,10 +258,10 @@ | ||
2175 | * there is a real-mode segmented pointer pointing to the | ||
2176 | * 4K EBDA area at 0x40E | ||
2177 | */ | ||
2178 | - ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; | ||
2179 | + ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); | ||
2180 | ebda_addr <<= 4; | ||
2181 | |||
2182 | - ebda_size = *(unsigned short *)(unsigned long)ebda_addr; | ||
2183 | + ebda_size = *(unsigned short *)__va(ebda_addr); | ||
2184 | |||
2185 | /* Round EBDA up to pages */ | ||
2186 | if (ebda_size == 0) | ||
2187 | @@ -413,15 +415,8 @@ | ||
2188 | #endif | ||
2189 | |||
2190 | #ifdef CONFIG_SMP | ||
2191 | - /* | ||
2192 | - * But first pinch a few for the stack/trampoline stuff | ||
2193 | - * FIXME: Don't need the extra page at 4K, but need to fix | ||
2194 | - * trampoline before removing it. (see the GDT stuff) | ||
2195 | - */ | ||
2196 | - reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE); | ||
2197 | - | ||
2198 | /* Reserve SMP trampoline */ | ||
2199 | - reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); | ||
2200 | + reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE); | ||
2201 | #endif | ||
2202 | #endif | ||
2203 | |||
2204 | @@ -573,8 +568,6 @@ | ||
2205 | early_quirks(); | ||
2206 | #endif | ||
2207 | |||
2208 | - zap_low_mappings(0); | ||
2209 | - | ||
2210 | /* | ||
2211 | * set this early, so we dont allocate cpu0 | ||
2212 | * if MADT list doesnt list BSP first | ||
2213 | @@ -877,6 +870,10 @@ | ||
2214 | |||
2215 | /* RDTSC can be speculated around */ | ||
2216 | clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
2217 | + | ||
2218 | + /* Family 10 doesn't support C states in MWAIT so don't use it */ | ||
2219 | + if (c->x86 == 0x10 && !force_mwait) | ||
2220 | + clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); | ||
2221 | } | ||
2222 | |||
2223 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | ||
2224 | @@ -1159,9 +1156,7 @@ | ||
2225 | #ifdef CONFIG_X86_MCE | ||
2226 | mcheck_init(c); | ||
2227 | #endif | ||
2228 | - if (c == &boot_cpu_data) | ||
2229 | - mtrr_bp_init(); | ||
2230 | - else | ||
2231 | + if (c != &boot_cpu_data) | ||
2232 | mtrr_ap_init(); | ||
2233 | #ifdef CONFIG_NUMA | ||
2234 | numa_add_cpu(smp_processor_id()); | ||
2235 | @@ -1252,9 +1247,8 @@ | ||
2236 | "stc", | ||
2237 | "100mhzsteps", | ||
2238 | "hwpstate", | ||
2239 | - NULL, /* tsc invariant mapped to constant_tsc */ | ||
2240 | - NULL, | ||
2241 | - /* nothing */ /* constant_tsc - moved to flags */ | ||
2242 | + "", /* tsc invariant mapped to constant_tsc */ | ||
2243 | + /* nothing */ | ||
2244 | }; | ||
2245 | |||
2246 | |||
2247 | --- a/arch/x86/kernel/smp_32-xen.c | ||
2248 | +++ b/arch/x86/kernel/smp_32-xen.c | ||
2249 | @@ -13,7 +13,6 @@ | ||
2250 | #include <linux/mm.h> | ||
2251 | #include <linux/delay.h> | ||
2252 | #include <linux/spinlock.h> | ||
2253 | -#include <linux/smp_lock.h> | ||
2254 | #include <linux/kernel_stat.h> | ||
2255 | #include <linux/mc146818rtc.h> | ||
2256 | #include <linux/cache.h> | ||
2257 | @@ -216,7 +215,6 @@ | ||
2258 | static struct mm_struct * flush_mm; | ||
2259 | static unsigned long flush_va; | ||
2260 | static DEFINE_SPINLOCK(tlbstate_lock); | ||
2261 | -#define FLUSH_ALL 0xffffffff | ||
2262 | |||
2263 | /* | ||
2264 | * We cannot call mmdrop() because we are in interrupt context, | ||
2265 | @@ -298,7 +296,7 @@ | ||
2266 | |||
2267 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { | ||
2268 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { | ||
2269 | - if (flush_va == FLUSH_ALL) | ||
2270 | + if (flush_va == TLB_FLUSH_ALL) | ||
2271 | local_flush_tlb(); | ||
2272 | else | ||
2273 | __flush_tlb_one(flush_va); | ||
2274 | @@ -314,9 +312,11 @@ | ||
2275 | return IRQ_HANDLED; | ||
2276 | } | ||
2277 | |||
2278 | -static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | ||
2279 | - unsigned long va) | ||
2280 | +void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | ||
2281 | + unsigned long va) | ||
2282 | { | ||
2283 | + cpumask_t cpumask = *cpumaskp; | ||
2284 | + | ||
2285 | /* | ||
2286 | * A couple of (to be removed) sanity checks: | ||
2287 | * | ||
2288 | @@ -327,10 +327,12 @@ | ||
2289 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | ||
2290 | BUG_ON(!mm); | ||
2291 | |||
2292 | +#ifdef CONFIG_HOTPLUG_CPU | ||
2293 | /* If a CPU which we ran on has gone down, OK. */ | ||
2294 | cpus_and(cpumask, cpumask, cpu_online_map); | ||
2295 | - if (cpus_empty(cpumask)) | ||
2296 | + if (unlikely(cpus_empty(cpumask))) | ||
2297 | return; | ||
2298 | +#endif | ||
2299 | |||
2300 | /* | ||
2301 | * i'm not happy about this global shared spinlock in the | ||
2302 | @@ -341,17 +343,7 @@ | ||
2303 | |||
2304 | flush_mm = mm; | ||
2305 | flush_va = va; | ||
2306 | -#if NR_CPUS <= BITS_PER_LONG | ||
2307 | - atomic_set_mask(cpumask, &flush_cpumask); | ||
2308 | -#else | ||
2309 | - { | ||
2310 | - int k; | ||
2311 | - unsigned long *flush_mask = (unsigned long *)&flush_cpumask; | ||
2312 | - unsigned long *cpu_mask = (unsigned long *)&cpumask; | ||
2313 | - for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) | ||
2314 | - atomic_set_mask(cpu_mask[k], &flush_mask[k]); | ||
2315 | - } | ||
2316 | -#endif | ||
2317 | + cpus_or(flush_cpumask, cpumask, flush_cpumask); | ||
2318 | /* | ||
2319 | * We have to send the IPI only to | ||
2320 | * CPUs affected. | ||
2321 | @@ -378,7 +370,7 @@ | ||
2322 | |||
2323 | local_flush_tlb(); | ||
2324 | if (!cpus_empty(cpu_mask)) | ||
2325 | - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | ||
2326 | + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
2327 | preempt_enable(); | ||
2328 | } | ||
2329 | |||
2330 | @@ -397,7 +389,7 @@ | ||
2331 | leave_mm(smp_processor_id()); | ||
2332 | } | ||
2333 | if (!cpus_empty(cpu_mask)) | ||
2334 | - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | ||
2335 | + flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
2336 | |||
2337 | preempt_enable(); | ||
2338 | } | ||
2339 | @@ -446,7 +438,7 @@ | ||
2340 | * it goes straight through and wastes no time serializing | ||
2341 | * anything. Worst case is that we lose a reschedule ... | ||
2342 | */ | ||
2343 | -void smp_send_reschedule(int cpu) | ||
2344 | +void xen_smp_send_reschedule(int cpu) | ||
2345 | { | ||
2346 | WARN_ON(cpu_is_offline(cpu)); | ||
2347 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | ||
2348 | @@ -478,36 +470,79 @@ | ||
2349 | |||
2350 | static struct call_data_struct *call_data; | ||
2351 | |||
2352 | +static void __smp_call_function(void (*func) (void *info), void *info, | ||
2353 | + int nonatomic, int wait) | ||
2354 | +{ | ||
2355 | + struct call_data_struct data; | ||
2356 | + int cpus = num_online_cpus() - 1; | ||
2357 | + | ||
2358 | + if (!cpus) | ||
2359 | + return; | ||
2360 | + | ||
2361 | + data.func = func; | ||
2362 | + data.info = info; | ||
2363 | + atomic_set(&data.started, 0); | ||
2364 | + data.wait = wait; | ||
2365 | + if (wait) | ||
2366 | + atomic_set(&data.finished, 0); | ||
2367 | + | ||
2368 | + call_data = &data; | ||
2369 | + mb(); | ||
2370 | + | ||
2371 | + /* Send a message to all other CPUs and wait for them to respond */ | ||
2372 | + send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
2373 | + | ||
2374 | + /* Wait for response */ | ||
2375 | + while (atomic_read(&data.started) != cpus) | ||
2376 | + cpu_relax(); | ||
2377 | + | ||
2378 | + if (wait) | ||
2379 | + while (atomic_read(&data.finished) != cpus) | ||
2380 | + cpu_relax(); | ||
2381 | +} | ||
2382 | + | ||
2383 | + | ||
2384 | /** | ||
2385 | - * smp_call_function(): Run a function on all other CPUs. | ||
2386 | + * smp_call_function_mask(): Run a function on a set of other CPUs. | ||
2387 | + * @mask: The set of cpus to run on. Must not include the current cpu. | ||
2388 | * @func: The function to run. This must be fast and non-blocking. | ||
2389 | * @info: An arbitrary pointer to pass to the function. | ||
2390 | - * @nonatomic: currently unused. | ||
2391 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | ||
2392 | * | ||
2393 | - * Returns 0 on success, else a negative status code. Does not return until | ||
2394 | - * remote CPUs are nearly ready to execute <<func>> or are or have executed. | ||
2395 | + * Returns 0 on success, else a negative status code. | ||
2396 | + * | ||
2397 | + * If @wait is true, then returns once @func has returned; otherwise | ||
2398 | + * it returns just before the target cpu calls @func. | ||
2399 | * | ||
2400 | * You must not call this function with disabled interrupts or from a | ||
2401 | * hardware interrupt handler or from a bottom half handler. | ||
2402 | */ | ||
2403 | -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | ||
2404 | - int wait) | ||
2405 | +int | ||
2406 | +xen_smp_call_function_mask(cpumask_t mask, | ||
2407 | + void (*func)(void *), void *info, | ||
2408 | + int wait) | ||
2409 | { | ||
2410 | struct call_data_struct data; | ||
2411 | + cpumask_t allbutself; | ||
2412 | int cpus; | ||
2413 | |||
2414 | + /* Can deadlock when called with interrupts disabled */ | ||
2415 | + WARN_ON(irqs_disabled()); | ||
2416 | + | ||
2417 | /* Holding any lock stops cpus from going down. */ | ||
2418 | spin_lock(&call_lock); | ||
2419 | - cpus = num_online_cpus() - 1; | ||
2420 | + | ||
2421 | + allbutself = cpu_online_map; | ||
2422 | + cpu_clear(smp_processor_id(), allbutself); | ||
2423 | + | ||
2424 | + cpus_and(mask, mask, allbutself); | ||
2425 | + cpus = cpus_weight(mask); | ||
2426 | + | ||
2427 | if (!cpus) { | ||
2428 | spin_unlock(&call_lock); | ||
2429 | return 0; | ||
2430 | } | ||
2431 | |||
2432 | - /* Can deadlock when called with interrupts disabled */ | ||
2433 | - WARN_ON(irqs_disabled()); | ||
2434 | - | ||
2435 | data.func = func; | ||
2436 | data.info = info; | ||
2437 | atomic_set(&data.started, 0); | ||
2438 | @@ -517,9 +552,12 @@ | ||
2439 | |||
2440 | call_data = &data; | ||
2441 | mb(); | ||
2442 | - | ||
2443 | - /* Send a message to all other CPUs and wait for them to respond */ | ||
2444 | - send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
2445 | + | ||
2446 | + /* Send a message to other CPUs */ | ||
2447 | + if (cpus_equal(mask, allbutself)) | ||
2448 | + send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
2449 | + else | ||
2450 | + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | ||
2451 | |||
2452 | /* Wait for response */ | ||
2453 | while (atomic_read(&data.started) != cpus) | ||
2454 | @@ -532,15 +570,14 @@ | ||
2455 | |||
2456 | return 0; | ||
2457 | } | ||
2458 | -EXPORT_SYMBOL(smp_call_function); | ||
2459 | |||
2460 | static void stop_this_cpu (void * dummy) | ||
2461 | { | ||
2462 | + local_irq_disable(); | ||
2463 | /* | ||
2464 | * Remove this CPU: | ||
2465 | */ | ||
2466 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
2467 | - local_irq_disable(); | ||
2468 | disable_all_local_evtchn(); | ||
2469 | if (cpu_data[smp_processor_id()].hlt_works_ok) | ||
2470 | for(;;) halt(); | ||
2471 | @@ -551,13 +588,18 @@ | ||
2472 | * this function calls the 'stop' function on all other CPUs in the system. | ||
2473 | */ | ||
2474 | |||
2475 | -void smp_send_stop(void) | ||
2476 | +void xen_smp_send_stop(void) | ||
2477 | { | ||
2478 | - smp_call_function(stop_this_cpu, NULL, 1, 0); | ||
2479 | + /* Don't deadlock on the call lock in panic */ | ||
2480 | + int nolock = !spin_trylock(&call_lock); | ||
2481 | + unsigned long flags; | ||
2482 | |||
2483 | - local_irq_disable(); | ||
2484 | + local_irq_save(flags); | ||
2485 | + __smp_call_function(stop_this_cpu, NULL, 0, 0); | ||
2486 | + if (!nolock) | ||
2487 | + spin_unlock(&call_lock); | ||
2488 | disable_all_local_evtchn(); | ||
2489 | - local_irq_enable(); | ||
2490 | + local_irq_restore(flags); | ||
2491 | } | ||
2492 | |||
2493 | /* | ||
2494 | @@ -598,74 +640,3 @@ | ||
2495 | |||
2496 | return IRQ_HANDLED; | ||
2497 | } | ||
2498 | - | ||
2499 | -/* | ||
2500 | - * this function sends a 'generic call function' IPI to one other CPU | ||
2501 | - * in the system. | ||
2502 | - * | ||
2503 | - * cpu is a standard Linux logical CPU number. | ||
2504 | - */ | ||
2505 | -static void | ||
2506 | -__smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
2507 | - int nonatomic, int wait) | ||
2508 | -{ | ||
2509 | - struct call_data_struct data; | ||
2510 | - int cpus = 1; | ||
2511 | - | ||
2512 | - data.func = func; | ||
2513 | - data.info = info; | ||
2514 | - atomic_set(&data.started, 0); | ||
2515 | - data.wait = wait; | ||
2516 | - if (wait) | ||
2517 | - atomic_set(&data.finished, 0); | ||
2518 | - | ||
2519 | - call_data = &data; | ||
2520 | - wmb(); | ||
2521 | - /* Send a message to all other CPUs and wait for them to respond */ | ||
2522 | - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); | ||
2523 | - | ||
2524 | - /* Wait for response */ | ||
2525 | - while (atomic_read(&data.started) != cpus) | ||
2526 | - cpu_relax(); | ||
2527 | - | ||
2528 | - if (!wait) | ||
2529 | - return; | ||
2530 | - | ||
2531 | - while (atomic_read(&data.finished) != cpus) | ||
2532 | - cpu_relax(); | ||
2533 | -} | ||
2534 | - | ||
2535 | -/* | ||
2536 | - * smp_call_function_single - Run a function on another CPU | ||
2537 | - * @func: The function to run. This must be fast and non-blocking. | ||
2538 | - * @info: An arbitrary pointer to pass to the function. | ||
2539 | - * @nonatomic: Currently unused. | ||
2540 | - * @wait: If true, wait until function has completed on other CPUs. | ||
2541 | - * | ||
2542 | - * Retrurns 0 on success, else a negative status code. | ||
2543 | - * | ||
2544 | - * Does not return until the remote CPU is nearly ready to execute <func> | ||
2545 | - * or is or has executed. | ||
2546 | - */ | ||
2547 | - | ||
2548 | -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
2549 | - int nonatomic, int wait) | ||
2550 | -{ | ||
2551 | - /* prevent preemption and reschedule on another processor */ | ||
2552 | - int me = get_cpu(); | ||
2553 | - if (cpu == me) { | ||
2554 | - WARN_ON(1); | ||
2555 | - put_cpu(); | ||
2556 | - return -EBUSY; | ||
2557 | - } | ||
2558 | - | ||
2559 | - /* Can deadlock when called with interrupts disabled */ | ||
2560 | - WARN_ON(irqs_disabled()); | ||
2561 | - | ||
2562 | - spin_lock_bh(&call_lock); | ||
2563 | - __smp_call_function_single(cpu, func, info, nonatomic, wait); | ||
2564 | - spin_unlock_bh(&call_lock); | ||
2565 | - put_cpu(); | ||
2566 | - return 0; | ||
2567 | -} | ||
2568 | -EXPORT_SYMBOL(smp_call_function_single); | ||
2569 | --- a/arch/x86/kernel/smp_64-xen.c | ||
2570 | +++ b/arch/x86/kernel/smp_64-xen.c | ||
2571 | @@ -14,7 +14,6 @@ | ||
2572 | #include <linux/mm.h> | ||
2573 | #include <linux/delay.h> | ||
2574 | #include <linux/spinlock.h> | ||
2575 | -#include <linux/smp_lock.h> | ||
2576 | #include <linux/smp.h> | ||
2577 | #include <linux/kernel_stat.h> | ||
2578 | #include <linux/mc146818rtc.h> | ||
2579 | @@ -457,44 +456,36 @@ | ||
2580 | } | ||
2581 | EXPORT_SYMBOL(smp_call_function); | ||
2582 | |||
2583 | -void smp_stop_cpu(void) | ||
2584 | +static void stop_this_cpu(void *dummy) | ||
2585 | { | ||
2586 | - unsigned long flags; | ||
2587 | + local_irq_disable(); | ||
2588 | /* | ||
2589 | * Remove this CPU: | ||
2590 | */ | ||
2591 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
2592 | - local_irq_save(flags); | ||
2593 | disable_all_local_evtchn(); | ||
2594 | - local_irq_restore(flags); | ||
2595 | -} | ||
2596 | - | ||
2597 | -static void smp_really_stop_cpu(void *dummy) | ||
2598 | -{ | ||
2599 | - smp_stop_cpu(); | ||
2600 | for (;;) | ||
2601 | halt(); | ||
2602 | } | ||
2603 | |||
2604 | void smp_send_stop(void) | ||
2605 | { | ||
2606 | - int nolock = 0; | ||
2607 | + int nolock; | ||
2608 | + unsigned long flags; | ||
2609 | + | ||
2610 | #ifndef CONFIG_XEN | ||
2611 | if (reboot_force) | ||
2612 | return; | ||
2613 | #endif | ||
2614 | + | ||
2615 | /* Don't deadlock on the call lock in panic */ | ||
2616 | - if (!spin_trylock(&call_lock)) { | ||
2617 | - /* ignore locking because we have panicked anyways */ | ||
2618 | - nolock = 1; | ||
2619 | - } | ||
2620 | - __smp_call_function(smp_really_stop_cpu, NULL, 0, 0); | ||
2621 | + nolock = !spin_trylock(&call_lock); | ||
2622 | + local_irq_save(flags); | ||
2623 | + __smp_call_function(stop_this_cpu, NULL, 0, 0); | ||
2624 | if (!nolock) | ||
2625 | spin_unlock(&call_lock); | ||
2626 | - | ||
2627 | - local_irq_disable(); | ||
2628 | disable_all_local_evtchn(); | ||
2629 | - local_irq_enable(); | ||
2630 | + local_irq_restore(flags); | ||
2631 | } | ||
2632 | |||
2633 | /* | ||
2634 | --- a/arch/x86/kernel/time_32-xen.c | ||
2635 | +++ b/arch/x86/kernel/time_32-xen.c | ||
2636 | @@ -80,7 +80,6 @@ | ||
2637 | #include <asm/i8253.h> | ||
2638 | DEFINE_SPINLOCK(i8253_lock); | ||
2639 | EXPORT_SYMBOL(i8253_lock); | ||
2640 | -int pit_latch_buggy; /* extern */ | ||
2641 | #else | ||
2642 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | ||
2643 | #endif | ||
2644 | @@ -589,7 +588,7 @@ | ||
2645 | return IRQ_HANDLED; | ||
2646 | } | ||
2647 | |||
2648 | -void mark_tsc_unstable(void) | ||
2649 | +void mark_tsc_unstable(char *reason) | ||
2650 | { | ||
2651 | #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */ | ||
2652 | tsc_unstable = 1; | ||
2653 | @@ -597,17 +596,18 @@ | ||
2654 | } | ||
2655 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
2656 | |||
2657 | +static cycle_t cs_last; | ||
2658 | + | ||
2659 | static cycle_t xen_clocksource_read(void) | ||
2660 | { | ||
2661 | cycle_t ret = sched_clock(); | ||
2662 | |||
2663 | #ifdef CONFIG_SMP | ||
2664 | for (;;) { | ||
2665 | - static cycle_t last_ret; | ||
2666 | #ifndef CONFIG_64BIT | ||
2667 | - cycle_t last = cmpxchg64(&last_ret, 0, 0); | ||
2668 | + cycle_t last = cmpxchg64(&cs_last, 0, 0); | ||
2669 | #else | ||
2670 | - cycle_t last = last_ret; | ||
2671 | + cycle_t last = cs_last; | ||
2672 | #define cmpxchg64 cmpxchg | ||
2673 | #endif | ||
2674 | |||
2675 | @@ -627,7 +627,7 @@ | ||
2676 | } | ||
2677 | ret = last; | ||
2678 | } | ||
2679 | - if (cmpxchg64(&last_ret, last, ret) == last) | ||
2680 | + if (cmpxchg64(&cs_last, last, ret) == last) | ||
2681 | break; | ||
2682 | } | ||
2683 | #endif | ||
2684 | @@ -635,6 +635,14 @@ | ||
2685 | return ret; | ||
2686 | } | ||
2687 | |||
2688 | +static void xen_clocksource_resume(void) | ||
2689 | +{ | ||
2690 | + extern void time_resume(void); | ||
2691 | + | ||
2692 | + time_resume(); | ||
2693 | + cs_last = sched_clock(); | ||
2694 | +} | ||
2695 | + | ||
2696 | static struct clocksource clocksource_xen = { | ||
2697 | .name = "xen", | ||
2698 | .rating = 400, | ||
2699 | @@ -643,6 +651,7 @@ | ||
2700 | .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */ | ||
2701 | .shift = XEN_SHIFT, | ||
2702 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
2703 | + .resume = xen_clocksource_resume, | ||
2704 | }; | ||
2705 | |||
2706 | static void init_missing_ticks_accounting(unsigned int cpu) | ||
2707 | @@ -731,35 +740,6 @@ | ||
2708 | mod_timer(&sync_xen_wallclock_timer, jiffies + 1); | ||
2709 | } | ||
2710 | |||
2711 | -static int timer_resume(struct sys_device *dev) | ||
2712 | -{ | ||
2713 | - extern void time_resume(void); | ||
2714 | - time_resume(); | ||
2715 | - return 0; | ||
2716 | -} | ||
2717 | - | ||
2718 | -static struct sysdev_class timer_sysclass = { | ||
2719 | - .resume = timer_resume, | ||
2720 | - set_kset_name("timer"), | ||
2721 | -}; | ||
2722 | - | ||
2723 | - | ||
2724 | -/* XXX this driverfs stuff should probably go elsewhere later -john */ | ||
2725 | -static struct sys_device device_timer = { | ||
2726 | - .id = 0, | ||
2727 | - .cls = &timer_sysclass, | ||
2728 | -}; | ||
2729 | - | ||
2730 | -static int time_init_device(void) | ||
2731 | -{ | ||
2732 | - int error = sysdev_class_register(&timer_sysclass); | ||
2733 | - if (!error) | ||
2734 | - error = sysdev_register(&device_timer); | ||
2735 | - return error; | ||
2736 | -} | ||
2737 | - | ||
2738 | -device_initcall(time_init_device); | ||
2739 | - | ||
2740 | extern void (*late_time_init)(void); | ||
2741 | |||
2742 | /* Dynamically-mapped IRQ. */ | ||
2743 | @@ -772,7 +752,7 @@ | ||
2744 | VIRQ_TIMER, | ||
2745 | 0, | ||
2746 | timer_interrupt, | ||
2747 | - SA_INTERRUPT, | ||
2748 | + IRQF_DISABLED, | ||
2749 | "timer0", | ||
2750 | NULL); | ||
2751 | BUG_ON(per_cpu(timer_irq, 0) < 0); | ||
2752 | @@ -890,21 +870,21 @@ | ||
2753 | cpu_clear(smp_processor_id(), nohz_cpu_mask); | ||
2754 | } | ||
2755 | |||
2756 | -void raw_safe_halt(void) | ||
2757 | +void xen_safe_halt(void) | ||
2758 | { | ||
2759 | stop_hz_timer(); | ||
2760 | /* Blocking includes an implicit local_irq_enable(). */ | ||
2761 | HYPERVISOR_block(); | ||
2762 | start_hz_timer(); | ||
2763 | } | ||
2764 | -EXPORT_SYMBOL(raw_safe_halt); | ||
2765 | +EXPORT_SYMBOL(xen_safe_halt); | ||
2766 | |||
2767 | -void halt(void) | ||
2768 | +void xen_halt(void) | ||
2769 | { | ||
2770 | if (irqs_disabled()) | ||
2771 | VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); | ||
2772 | } | ||
2773 | -EXPORT_SYMBOL(halt); | ||
2774 | +EXPORT_SYMBOL(xen_halt); | ||
2775 | |||
2776 | /* No locking required. Interrupts are disabled on all CPUs. */ | ||
2777 | void time_resume(void) | ||
2778 | @@ -967,7 +947,7 @@ | ||
2779 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, | ||
2780 | cpu, | ||
2781 | timer_interrupt, | ||
2782 | - SA_INTERRUPT, | ||
2783 | + IRQF_DISABLED, | ||
2784 | timer_name[cpu], | ||
2785 | NULL); | ||
2786 | if (irq < 0) | ||
2787 | --- a/arch/x86/kernel/traps_32-xen.c | ||
2788 | +++ b/arch/x86/kernel/traps_32-xen.c | ||
2789 | @@ -52,7 +52,7 @@ | ||
2790 | #include <asm/unwind.h> | ||
2791 | #include <asm/smp.h> | ||
2792 | #include <asm/arch_hooks.h> | ||
2793 | -#include <asm/kdebug.h> | ||
2794 | +#include <linux/kdebug.h> | ||
2795 | #include <asm/stacktrace.h> | ||
2796 | |||
2797 | #include <linux/module.h> | ||
2798 | @@ -101,20 +101,6 @@ | ||
2799 | |||
2800 | int kstack_depth_to_print = 24; | ||
2801 | static unsigned int code_bytes = 64; | ||
2802 | -ATOMIC_NOTIFIER_HEAD(i386die_chain); | ||
2803 | - | ||
2804 | -int register_die_notifier(struct notifier_block *nb) | ||
2805 | -{ | ||
2806 | - vmalloc_sync_all(); | ||
2807 | - return atomic_notifier_chain_register(&i386die_chain, nb); | ||
2808 | -} | ||
2809 | -EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */ | ||
2810 | - | ||
2811 | -int unregister_die_notifier(struct notifier_block *nb) | ||
2812 | -{ | ||
2813 | - return atomic_notifier_chain_unregister(&i386die_chain, nb); | ||
2814 | -} | ||
2815 | -EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */ | ||
2816 | |||
2817 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) | ||
2818 | { | ||
2819 | @@ -325,7 +311,7 @@ | ||
2820 | regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); | ||
2821 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", | ||
2822 | TASK_COMM_LEN, current->comm, current->pid, | ||
2823 | - current_thread_info(), current, current->thread_info); | ||
2824 | + current_thread_info(), current, task_thread_info(current)); | ||
2825 | /* | ||
2826 | * When in-kernel, we also print out the stack and code at the | ||
2827 | * time of the fault.. | ||
2828 | @@ -482,8 +468,6 @@ | ||
2829 | siginfo_t *info) | ||
2830 | { | ||
2831 | struct task_struct *tsk = current; | ||
2832 | - tsk->thread.error_code = error_code; | ||
2833 | - tsk->thread.trap_no = trapnr; | ||
2834 | |||
2835 | if (regs->eflags & VM_MASK) { | ||
2836 | if (vm86) | ||
2837 | @@ -495,6 +479,18 @@ | ||
2838 | goto kernel_trap; | ||
2839 | |||
2840 | trap_signal: { | ||
2841 | + /* | ||
2842 | + * We want error_code and trap_no set for userspace faults and | ||
2843 | + * kernelspace faults which result in die(), but not | ||
2844 | + * kernelspace faults which are fixed up. die() gives the | ||
2845 | + * process no chance to handle the signal and notice the | ||
2846 | + * kernel fault information, so that won't result in polluting | ||
2847 | + * the information about previously queued, but not yet | ||
2848 | + * delivered, faults. See also do_general_protection below. | ||
2849 | + */ | ||
2850 | + tsk->thread.error_code = error_code; | ||
2851 | + tsk->thread.trap_no = trapnr; | ||
2852 | + | ||
2853 | if (info) | ||
2854 | force_sig_info(signr, info, tsk); | ||
2855 | else | ||
2856 | @@ -503,8 +499,11 @@ | ||
2857 | } | ||
2858 | |||
2859 | kernel_trap: { | ||
2860 | - if (!fixup_exception(regs)) | ||
2861 | + if (!fixup_exception(regs)) { | ||
2862 | + tsk->thread.error_code = error_code; | ||
2863 | + tsk->thread.trap_no = trapnr; | ||
2864 | die(str, regs, error_code); | ||
2865 | + } | ||
2866 | return; | ||
2867 | } | ||
2868 | |||
2869 | @@ -578,9 +577,6 @@ | ||
2870 | fastcall void __kprobes do_general_protection(struct pt_regs * regs, | ||
2871 | long error_code) | ||
2872 | { | ||
2873 | - current->thread.error_code = error_code; | ||
2874 | - current->thread.trap_no = 13; | ||
2875 | - | ||
2876 | if (regs->eflags & VM_MASK) | ||
2877 | goto gp_in_vm86; | ||
2878 | |||
2879 | @@ -599,6 +595,8 @@ | ||
2880 | |||
2881 | gp_in_kernel: | ||
2882 | if (!fixup_exception(regs)) { | ||
2883 | + current->thread.error_code = error_code; | ||
2884 | + current->thread.trap_no = 13; | ||
2885 | if (notify_die(DIE_GPF, "general protection fault", regs, | ||
2886 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | ||
2887 | return; | ||
2888 | @@ -987,9 +985,7 @@ | ||
2889 | fastcall unsigned long patch_espfix_desc(unsigned long uesp, | ||
2890 | unsigned long kesp) | ||
2891 | { | ||
2892 | - int cpu = smp_processor_id(); | ||
2893 | - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
2894 | - struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
2895 | + struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; | ||
2896 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; | ||
2897 | unsigned long new_kesp = kesp - base; | ||
2898 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | ||
2899 | --- a/arch/x86/kernel/traps_64-xen.c | ||
2900 | +++ b/arch/x86/kernel/traps_64-xen.c | ||
2901 | @@ -32,6 +32,7 @@ | ||
2902 | #include <linux/unwind.h> | ||
2903 | #include <linux/uaccess.h> | ||
2904 | #include <linux/bug.h> | ||
2905 | +#include <linux/kdebug.h> | ||
2906 | |||
2907 | #include <asm/system.h> | ||
2908 | #include <asm/io.h> | ||
2909 | @@ -39,7 +40,6 @@ | ||
2910 | #include <asm/debugreg.h> | ||
2911 | #include <asm/desc.h> | ||
2912 | #include <asm/i387.h> | ||
2913 | -#include <asm/kdebug.h> | ||
2914 | #include <asm/processor.h> | ||
2915 | #include <asm/unwind.h> | ||
2916 | #include <asm/smp.h> | ||
2917 | @@ -71,22 +71,6 @@ | ||
2918 | asmlinkage void machine_check(void); | ||
2919 | asmlinkage void spurious_interrupt_bug(void); | ||
2920 | |||
2921 | -ATOMIC_NOTIFIER_HEAD(die_chain); | ||
2922 | -EXPORT_SYMBOL(die_chain); | ||
2923 | - | ||
2924 | -int register_die_notifier(struct notifier_block *nb) | ||
2925 | -{ | ||
2926 | - vmalloc_sync_all(); | ||
2927 | - return atomic_notifier_chain_register(&die_chain, nb); | ||
2928 | -} | ||
2929 | -EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */ | ||
2930 | - | ||
2931 | -int unregister_die_notifier(struct notifier_block *nb) | ||
2932 | -{ | ||
2933 | - return atomic_notifier_chain_unregister(&die_chain, nb); | ||
2934 | -} | ||
2935 | -EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */ | ||
2936 | - | ||
2937 | static inline void conditional_sti(struct pt_regs *regs) | ||
2938 | { | ||
2939 | if (regs->eflags & X86_EFLAGS_IF) | ||
2940 | @@ -428,8 +412,7 @@ | ||
2941 | const int cpu = smp_processor_id(); | ||
2942 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | ||
2943 | |||
2944 | - rsp = regs->rsp; | ||
2945 | - | ||
2946 | + rsp = regs->rsp; | ||
2947 | printk("CPU %d ", cpu); | ||
2948 | __show_regs(regs); | ||
2949 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | ||
2950 | @@ -440,7 +423,6 @@ | ||
2951 | * time of the fault.. | ||
2952 | */ | ||
2953 | if (in_kernel) { | ||
2954 | - | ||
2955 | printk("Stack: "); | ||
2956 | _show_stack(NULL, regs, (unsigned long*)rsp); | ||
2957 | |||
2958 | @@ -485,13 +467,14 @@ | ||
2959 | |||
2960 | unsigned __kprobes long oops_begin(void) | ||
2961 | { | ||
2962 | - int cpu = smp_processor_id(); | ||
2963 | + int cpu; | ||
2964 | unsigned long flags; | ||
2965 | |||
2966 | oops_enter(); | ||
2967 | |||
2968 | /* racy, but better than risking deadlock. */ | ||
2969 | local_irq_save(flags); | ||
2970 | + cpu = smp_processor_id(); | ||
2971 | if (!spin_trylock(&die_lock)) { | ||
2972 | if (cpu == die_owner) | ||
2973 | /* nested oops. should stop eventually */; | ||
2974 | @@ -585,10 +568,20 @@ | ||
2975 | { | ||
2976 | struct task_struct *tsk = current; | ||
2977 | |||
2978 | - tsk->thread.error_code = error_code; | ||
2979 | - tsk->thread.trap_no = trapnr; | ||
2980 | - | ||
2981 | if (user_mode(regs)) { | ||
2982 | + /* | ||
2983 | + * We want error_code and trap_no set for userspace | ||
2984 | + * faults and kernelspace faults which result in | ||
2985 | + * die(), but not kernelspace faults which are fixed | ||
2986 | + * up. die() gives the process no chance to handle | ||
2987 | + * the signal and notice the kernel fault information, | ||
2988 | + * so that won't result in polluting the information | ||
2989 | + * about previously queued, but not yet delivered, | ||
2990 | + * faults. See also do_general_protection below. | ||
2991 | + */ | ||
2992 | + tsk->thread.error_code = error_code; | ||
2993 | + tsk->thread.trap_no = trapnr; | ||
2994 | + | ||
2995 | if (exception_trace && unhandled_signal(tsk, signr)) | ||
2996 | printk(KERN_INFO | ||
2997 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", | ||
2998 | @@ -609,8 +602,11 @@ | ||
2999 | fixup = search_exception_tables(regs->rip); | ||
3000 | if (fixup) | ||
3001 | regs->rip = fixup->fixup; | ||
3002 | - else | ||
3003 | + else { | ||
3004 | + tsk->thread.error_code = error_code; | ||
3005 | + tsk->thread.trap_no = trapnr; | ||
3006 | die(str, regs, error_code); | ||
3007 | + } | ||
3008 | return; | ||
3009 | } | ||
3010 | } | ||
3011 | @@ -686,10 +682,10 @@ | ||
3012 | |||
3013 | conditional_sti(regs); | ||
3014 | |||
3015 | - tsk->thread.error_code = error_code; | ||
3016 | - tsk->thread.trap_no = 13; | ||
3017 | - | ||
3018 | if (user_mode(regs)) { | ||
3019 | + tsk->thread.error_code = error_code; | ||
3020 | + tsk->thread.trap_no = 13; | ||
3021 | + | ||
3022 | if (exception_trace && unhandled_signal(tsk, SIGSEGV)) | ||
3023 | printk(KERN_INFO | ||
3024 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", | ||
3025 | @@ -708,6 +704,9 @@ | ||
3026 | regs->rip = fixup->fixup; | ||
3027 | return; | ||
3028 | } | ||
3029 | + | ||
3030 | + tsk->thread.error_code = error_code; | ||
3031 | + tsk->thread.trap_no = 13; | ||
3032 | if (notify_die(DIE_GPF, "general protection fault", regs, | ||
3033 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | ||
3034 | return; | ||
3035 | --- a/arch/x86/kernel/vsyscall_64-xen.c | ||
3036 | +++ b/arch/x86/kernel/vsyscall_64-xen.c | ||
3037 | @@ -45,14 +45,34 @@ | ||
3038 | |||
3039 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | ||
3040 | #define __syscall_clobber "r11","rcx","memory" | ||
3041 | +#define __pa_vsymbol(x) \ | ||
3042 | + ({unsigned long v; \ | ||
3043 | + extern char __vsyscall_0; \ | ||
3044 | + asm("" : "=r" (v) : "0" (x)); \ | ||
3045 | + ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) | ||
3046 | |||
3047 | +/* | ||
3048 | + * vsyscall_gtod_data contains data that is : | ||
3049 | + * - readonly from vsyscalls | ||
3050 | + * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) | ||
3051 | + * Try to keep this structure as small as possible to avoid cache line ping pongs | ||
3052 | + */ | ||
3053 | struct vsyscall_gtod_data_t { | ||
3054 | - seqlock_t lock; | ||
3055 | - int sysctl_enabled; | ||
3056 | - struct timeval wall_time_tv; | ||
3057 | + seqlock_t lock; | ||
3058 | + | ||
3059 | + /* open coded 'struct timespec' */ | ||
3060 | + time_t wall_time_sec; | ||
3061 | + u32 wall_time_nsec; | ||
3062 | + | ||
3063 | + int sysctl_enabled; | ||
3064 | struct timezone sys_tz; | ||
3065 | - cycle_t offset_base; | ||
3066 | - struct clocksource clock; | ||
3067 | + struct { /* extract of a clocksource struct */ | ||
3068 | + cycle_t (*vread)(void); | ||
3069 | + cycle_t cycle_last; | ||
3070 | + cycle_t mask; | ||
3071 | + u32 mult; | ||
3072 | + u32 shift; | ||
3073 | + } clock; | ||
3074 | }; | ||
3075 | int __vgetcpu_mode __section_vgetcpu_mode; | ||
3076 | |||
3077 | @@ -68,9 +88,13 @@ | ||
3078 | |||
3079 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); | ||
3080 | /* copy vsyscall data */ | ||
3081 | - vsyscall_gtod_data.clock = *clock; | ||
3082 | - vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; | ||
3083 | - vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; | ||
3084 | + vsyscall_gtod_data.clock.vread = clock->vread; | ||
3085 | + vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | ||
3086 | + vsyscall_gtod_data.clock.mask = clock->mask; | ||
3087 | + vsyscall_gtod_data.clock.mult = clock->mult; | ||
3088 | + vsyscall_gtod_data.clock.shift = clock->shift; | ||
3089 | + vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | ||
3090 | + vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | ||
3091 | vsyscall_gtod_data.sys_tz = sys_tz; | ||
3092 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | ||
3093 | } | ||
3094 | @@ -105,7 +129,8 @@ | ||
3095 | static __always_inline void do_vgettimeofday(struct timeval * tv) | ||
3096 | { | ||
3097 | cycle_t now, base, mask, cycle_delta; | ||
3098 | - unsigned long seq, mult, shift, nsec_delta; | ||
3099 | + unsigned seq; | ||
3100 | + unsigned long mult, shift, nsec; | ||
3101 | cycle_t (*vread)(void); | ||
3102 | do { | ||
3103 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); | ||
3104 | @@ -121,21 +146,20 @@ | ||
3105 | mult = __vsyscall_gtod_data.clock.mult; | ||
3106 | shift = __vsyscall_gtod_data.clock.shift; | ||
3107 | |||
3108 | - *tv = __vsyscall_gtod_data.wall_time_tv; | ||
3109 | - | ||
3110 | + tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; | ||
3111 | + nsec = __vsyscall_gtod_data.wall_time_nsec; | ||
3112 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | ||
3113 | |||
3114 | /* calculate interval: */ | ||
3115 | cycle_delta = (now - base) & mask; | ||
3116 | /* convert to nsecs: */ | ||
3117 | - nsec_delta = (cycle_delta * mult) >> shift; | ||
3118 | + nsec += (cycle_delta * mult) >> shift; | ||
3119 | |||
3120 | - /* convert to usecs and add to timespec: */ | ||
3121 | - tv->tv_usec += nsec_delta / NSEC_PER_USEC; | ||
3122 | - while (tv->tv_usec > USEC_PER_SEC) { | ||
3123 | + while (nsec >= NSEC_PER_SEC) { | ||
3124 | tv->tv_sec += 1; | ||
3125 | - tv->tv_usec -= USEC_PER_SEC; | ||
3126 | + nsec -= NSEC_PER_SEC; | ||
3127 | } | ||
3128 | + tv->tv_usec = nsec / NSEC_PER_USEC; | ||
3129 | } | ||
3130 | |||
3131 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | ||
3132 | @@ -151,11 +175,16 @@ | ||
3133 | * unlikely */ | ||
3134 | time_t __vsyscall(1) vtime(time_t *t) | ||
3135 | { | ||
3136 | + struct timeval tv; | ||
3137 | + time_t result; | ||
3138 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) | ||
3139 | return time_syscall(t); | ||
3140 | - else if (t) | ||
3141 | - *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; | ||
3142 | - return __vsyscall_gtod_data.wall_time_tv.tv_sec; | ||
3143 | + | ||
3144 | + vgettimeofday(&tv, 0); | ||
3145 | + result = tv.tv_sec; | ||
3146 | + if (t) | ||
3147 | + *t = result; | ||
3148 | + return result; | ||
3149 | } | ||
3150 | |||
3151 | /* Fast way to get current CPU and node. | ||
3152 | @@ -224,10 +253,10 @@ | ||
3153 | return ret; | ||
3154 | /* gcc has some trouble with __va(__pa()), so just do it this | ||
3155 | way. */ | ||
3156 | - map1 = ioremap(__pa_symbol(&vsysc1), 2); | ||
3157 | + map1 = ioremap(__pa_vsymbol(&vsysc1), 2); | ||
3158 | if (!map1) | ||
3159 | return -ENOMEM; | ||
3160 | - map2 = ioremap(__pa_symbol(&vsysc2), 2); | ||
3161 | + map2 = ioremap(__pa_vsymbol(&vsysc2), 2); | ||
3162 | if (!map2) { | ||
3163 | ret = -ENOMEM; | ||
3164 | goto out; | ||
3165 | @@ -304,7 +333,7 @@ | ||
3166 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | ||
3167 | { | ||
3168 | long cpu = (long)arg; | ||
3169 | - if (action == CPU_ONLINE) | ||
3170 | + if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | ||
3171 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); | ||
3172 | return NOTIFY_DONE; | ||
3173 | } | ||
3174 | --- a/arch/x86/mm/fault_32-xen.c | ||
3175 | +++ b/arch/x86/mm/fault_32-xen.c | ||
3176 | @@ -14,19 +14,20 @@ | ||
3177 | #include <linux/mman.h> | ||
3178 | #include <linux/mm.h> | ||
3179 | #include <linux/smp.h> | ||
3180 | -#include <linux/smp_lock.h> | ||
3181 | #include <linux/interrupt.h> | ||
3182 | #include <linux/init.h> | ||
3183 | #include <linux/tty.h> | ||
3184 | #include <linux/vt_kern.h> /* For unblank_screen() */ | ||
3185 | #include <linux/highmem.h> | ||
3186 | +#include <linux/bootmem.h> /* for max_low_pfn */ | ||
3187 | +#include <linux/vmalloc.h> | ||
3188 | #include <linux/module.h> | ||
3189 | #include <linux/kprobes.h> | ||
3190 | #include <linux/uaccess.h> | ||
3191 | +#include <linux/kdebug.h> | ||
3192 | |||
3193 | #include <asm/system.h> | ||
3194 | #include <asm/desc.h> | ||
3195 | -#include <asm/kdebug.h> | ||
3196 | #include <asm/segment.h> | ||
3197 | |||
3198 | extern void die(const char *,struct pt_regs *,long); | ||
3199 | @@ -259,25 +260,20 @@ | ||
3200 | unsigned long page; | ||
3201 | |||
3202 | page = read_cr3(); | ||
3203 | - page = ((unsigned long *) __va(page))[address >> 22]; | ||
3204 | - if (oops_may_print()) | ||
3205 | - printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, | ||
3206 | - machine_to_phys(page)); | ||
3207 | + page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT]; | ||
3208 | + printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page, | ||
3209 | + machine_to_phys(page)); | ||
3210 | /* | ||
3211 | * We must not directly access the pte in the highpte | ||
3212 | * case if the page table is located in highmem. | ||
3213 | * And lets rather not kmap-atomic the pte, just in case | ||
3214 | * it's allocated already. | ||
3215 | */ | ||
3216 | -#ifdef CONFIG_HIGHPTE | ||
3217 | - if ((page >> PAGE_SHIFT) >= highstart_pfn) | ||
3218 | - return; | ||
3219 | -#endif | ||
3220 | - if ((page & 1) && oops_may_print()) { | ||
3221 | - page &= PAGE_MASK; | ||
3222 | - address &= 0x003ff000; | ||
3223 | - page = machine_to_phys(page); | ||
3224 | - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; | ||
3225 | + if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn | ||
3226 | + && (page & _PAGE_PRESENT)) { | ||
3227 | + page = machine_to_phys(page & PAGE_MASK); | ||
3228 | + page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) | ||
3229 | + & (PTRS_PER_PTE - 1)]; | ||
3230 | printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page, | ||
3231 | machine_to_phys(page)); | ||
3232 | } | ||
3233 | @@ -581,6 +577,11 @@ | ||
3234 | bad_area_nosemaphore: | ||
3235 | /* User mode accesses just cause a SIGSEGV */ | ||
3236 | if (error_code & 4) { | ||
3237 | + /* | ||
3238 | + * It's possible to have interrupts off here. | ||
3239 | + */ | ||
3240 | + local_irq_enable(); | ||
3241 | + | ||
3242 | /* | ||
3243 | * Valid to do another page fault here because this one came | ||
3244 | * from user space. | ||
3245 | @@ -633,7 +634,7 @@ | ||
3246 | bust_spinlocks(1); | ||
3247 | |||
3248 | if (oops_may_print()) { | ||
3249 | - #ifdef CONFIG_X86_PAE | ||
3250 | +#ifdef CONFIG_X86_PAE | ||
3251 | if (error_code & 16) { | ||
3252 | pte_t *pte = lookup_address(address); | ||
3253 | |||
3254 | @@ -642,7 +643,7 @@ | ||
3255 | "NX-protected page - exploit attempt? " | ||
3256 | "(uid: %d)\n", current->uid); | ||
3257 | } | ||
3258 | - #endif | ||
3259 | +#endif | ||
3260 | if (address < PAGE_SIZE) | ||
3261 | printk(KERN_ALERT "BUG: unable to handle kernel NULL " | ||
3262 | "pointer dereference"); | ||
3263 | @@ -652,8 +653,8 @@ | ||
3264 | printk(" at virtual address %08lx\n",address); | ||
3265 | printk(KERN_ALERT " printing eip:\n"); | ||
3266 | printk("%08lx\n", regs->eip); | ||
3267 | + dump_fault_path(address); | ||
3268 | } | ||
3269 | - dump_fault_path(address); | ||
3270 | tsk->thread.cr2 = address; | ||
3271 | tsk->thread.trap_no = 14; | ||
3272 | tsk->thread.error_code = error_code; | ||
3273 | @@ -694,7 +695,6 @@ | ||
3274 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | ||
3275 | } | ||
3276 | |||
3277 | -#if !HAVE_SHARED_KERNEL_PMD | ||
3278 | void vmalloc_sync_all(void) | ||
3279 | { | ||
3280 | /* | ||
3281 | @@ -710,6 +710,9 @@ | ||
3282 | static unsigned long start = TASK_SIZE; | ||
3283 | unsigned long address; | ||
3284 | |||
3285 | + if (SHARED_KERNEL_PMD) | ||
3286 | + return; | ||
3287 | + | ||
3288 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | ||
3289 | for (address = start; | ||
3290 | address >= TASK_SIZE && address < hypervisor_virt_start; | ||
3291 | @@ -739,4 +742,3 @@ | ||
3292 | start = address + (1UL << PMD_SHIFT); | ||
3293 | } | ||
3294 | } | ||
3295 | -#endif | ||
3296 | --- a/arch/x86/mm/fault_64-xen.c | ||
3297 | +++ b/arch/x86/mm/fault_64-xen.c | ||
3298 | @@ -15,22 +15,22 @@ | ||
3299 | #include <linux/mman.h> | ||
3300 | #include <linux/mm.h> | ||
3301 | #include <linux/smp.h> | ||
3302 | -#include <linux/smp_lock.h> | ||
3303 | #include <linux/interrupt.h> | ||
3304 | #include <linux/init.h> | ||
3305 | #include <linux/tty.h> | ||
3306 | #include <linux/vt_kern.h> /* For unblank_screen() */ | ||
3307 | #include <linux/compiler.h> | ||
3308 | +#include <linux/vmalloc.h> | ||
3309 | #include <linux/module.h> | ||
3310 | #include <linux/kprobes.h> | ||
3311 | #include <linux/uaccess.h> | ||
3312 | +#include <linux/kdebug.h> | ||
3313 | |||
3314 | #include <asm/system.h> | ||
3315 | #include <asm/pgalloc.h> | ||
3316 | #include <asm/smp.h> | ||
3317 | #include <asm/tlbflush.h> | ||
3318 | #include <asm/proto.h> | ||
3319 | -#include <asm/kdebug.h> | ||
3320 | #include <asm-generic/sections.h> | ||
3321 | |||
3322 | /* Page fault error code bits */ | ||
3323 | @@ -537,6 +537,12 @@ | ||
3324 | bad_area_nosemaphore: | ||
3325 | /* User mode accesses just cause a SIGSEGV */ | ||
3326 | if (error_code & PF_USER) { | ||
3327 | + | ||
3328 | + /* | ||
3329 | + * It's possible to have interrupts off here. | ||
3330 | + */ | ||
3331 | + local_irq_enable(); | ||
3332 | + | ||
3333 | if (is_prefetch(regs, address, error_code)) | ||
3334 | return; | ||
3335 | |||
3336 | @@ -646,7 +652,7 @@ | ||
3337 | } | ||
3338 | |||
3339 | DEFINE_SPINLOCK(pgd_lock); | ||
3340 | -struct page *pgd_list; | ||
3341 | +LIST_HEAD(pgd_list); | ||
3342 | |||
3343 | void vmalloc_sync_all(void) | ||
3344 | { | ||
3345 | @@ -666,8 +672,7 @@ | ||
3346 | if (pgd_none(*pgd_ref)) | ||
3347 | continue; | ||
3348 | spin_lock(&pgd_lock); | ||
3349 | - for (page = pgd_list; page; | ||
3350 | - page = (struct page *)page->index) { | ||
3351 | + list_for_each_entry(page, &pgd_list, lru) { | ||
3352 | pgd_t *pgd; | ||
3353 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | ||
3354 | if (pgd_none(*pgd)) | ||
3355 | --- a/arch/x86/mm/highmem_32-xen.c | ||
3356 | +++ b/arch/x86/mm/highmem_32-xen.c | ||
3357 | @@ -26,7 +26,7 @@ | ||
3358 | * However when holding an atomic kmap is is not legal to sleep, so atomic | ||
3359 | * kmaps are appropriate for short, tight code paths only. | ||
3360 | */ | ||
3361 | -static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot) | ||
3362 | +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | ||
3363 | { | ||
3364 | enum fixed_addresses idx; | ||
3365 | unsigned long vaddr; | ||
3366 | @@ -49,15 +49,7 @@ | ||
3367 | |||
3368 | void *kmap_atomic(struct page *page, enum km_type type) | ||
3369 | { | ||
3370 | - return __kmap_atomic(page, type, kmap_prot); | ||
3371 | -} | ||
3372 | - | ||
3373 | -/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */ | ||
3374 | -void *kmap_atomic_pte(struct page *page, enum km_type type) | ||
3375 | -{ | ||
3376 | - return __kmap_atomic(page, type, | ||
3377 | - test_bit(PG_pinned, &page->flags) | ||
3378 | - ? PAGE_KERNEL_RO : kmap_prot); | ||
3379 | + return kmap_atomic_prot(page, type, kmap_prot); | ||
3380 | } | ||
3381 | |||
3382 | void kunmap_atomic(void *kvaddr, enum km_type type) | ||
3383 | @@ -80,6 +72,7 @@ | ||
3384 | #endif | ||
3385 | } | ||
3386 | |||
3387 | + arch_flush_lazy_mmu_mode(); | ||
3388 | pagefault_enable(); | ||
3389 | } | ||
3390 | |||
3391 | @@ -117,6 +110,5 @@ | ||
3392 | EXPORT_SYMBOL(kmap); | ||
3393 | EXPORT_SYMBOL(kunmap); | ||
3394 | EXPORT_SYMBOL(kmap_atomic); | ||
3395 | -EXPORT_SYMBOL(kmap_atomic_pte); | ||
3396 | EXPORT_SYMBOL(kunmap_atomic); | ||
3397 | EXPORT_SYMBOL(kmap_atomic_to_page); | ||
3398 | --- a/arch/x86/mm/init_32-xen.c | ||
3399 | +++ b/arch/x86/mm/init_32-xen.c | ||
3400 | @@ -22,6 +22,7 @@ | ||
3401 | #include <linux/init.h> | ||
3402 | #include <linux/highmem.h> | ||
3403 | #include <linux/pagemap.h> | ||
3404 | +#include <linux/pfn.h> | ||
3405 | #include <linux/poison.h> | ||
3406 | #include <linux/bootmem.h> | ||
3407 | #include <linux/slab.h> | ||
3408 | @@ -67,17 +68,19 @@ | ||
3409 | pmd_t *pmd_table; | ||
3410 | |||
3411 | #ifdef CONFIG_X86_PAE | ||
3412 | - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | ||
3413 | - paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); | ||
3414 | - make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); | ||
3415 | - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | ||
3416 | - pud = pud_offset(pgd, 0); | ||
3417 | - if (pmd_table != pmd_offset(pud, 0)) | ||
3418 | - BUG(); | ||
3419 | -#else | ||
3420 | + if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) { | ||
3421 | + pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | ||
3422 | + | ||
3423 | + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); | ||
3424 | + make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); | ||
3425 | + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | ||
3426 | + pud = pud_offset(pgd, 0); | ||
3427 | + if (pmd_table != pmd_offset(pud, 0)) | ||
3428 | + BUG(); | ||
3429 | + } | ||
3430 | +#endif | ||
3431 | pud = pud_offset(pgd, 0); | ||
3432 | pmd_table = pmd_offset(pud, 0); | ||
3433 | -#endif | ||
3434 | |||
3435 | return pmd_table; | ||
3436 | } | ||
3437 | @@ -88,16 +91,18 @@ | ||
3438 | */ | ||
3439 | static pte_t * __init one_page_table_init(pmd_t *pmd) | ||
3440 | { | ||
3441 | +#if CONFIG_XEN_COMPAT <= 0x030002 | ||
3442 | if (pmd_none(*pmd)) { | ||
3443 | +#else | ||
3444 | + if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) { | ||
3445 | +#endif | ||
3446 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); | ||
3447 | + | ||
3448 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); | ||
3449 | make_lowmem_page_readonly(page_table, | ||
3450 | XENFEAT_writable_page_tables); | ||
3451 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | ||
3452 | - if (page_table != pte_offset_kernel(pmd, 0)) | ||
3453 | - BUG(); | ||
3454 | - | ||
3455 | - return page_table; | ||
3456 | + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); | ||
3457 | } | ||
3458 | |||
3459 | return pte_offset_kernel(pmd, 0); | ||
3460 | @@ -117,7 +122,6 @@ | ||
3461 | static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) | ||
3462 | { | ||
3463 | pgd_t *pgd; | ||
3464 | - pud_t *pud; | ||
3465 | pmd_t *pmd; | ||
3466 | int pgd_idx, pmd_idx; | ||
3467 | unsigned long vaddr; | ||
3468 | @@ -128,12 +132,10 @@ | ||
3469 | pgd = pgd_base + pgd_idx; | ||
3470 | |||
3471 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { | ||
3472 | - if (pgd_none(*pgd)) | ||
3473 | - one_md_table_init(pgd); | ||
3474 | - pud = pud_offset(pgd, vaddr); | ||
3475 | - pmd = pmd_offset(pud, vaddr); | ||
3476 | + pmd = one_md_table_init(pgd); | ||
3477 | + pmd = pmd + pmd_index(vaddr); | ||
3478 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { | ||
3479 | - if (vaddr < hypervisor_virt_start && pmd_none(*pmd)) | ||
3480 | + if (vaddr < hypervisor_virt_start) | ||
3481 | one_page_table_init(pmd); | ||
3482 | |||
3483 | vaddr += PMD_SIZE; | ||
3484 | @@ -196,24 +198,25 @@ | ||
3485 | /* Map with big pages if possible, otherwise create normal page tables. */ | ||
3486 | if (cpu_has_pse) { | ||
3487 | unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; | ||
3488 | - | ||
3489 | if (is_kernel_text(address) || is_kernel_text(address2)) | ||
3490 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); | ||
3491 | else | ||
3492 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); | ||
3493 | + | ||
3494 | pfn += PTRS_PER_PTE; | ||
3495 | } else { | ||
3496 | pte = one_page_table_init(pmd); | ||
3497 | |||
3498 | - pte += pte_ofs; | ||
3499 | - for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { | ||
3500 | - /* XEN: Only map initial RAM allocation. */ | ||
3501 | - if ((pfn >= max_ram_pfn) || pte_present(*pte)) | ||
3502 | - continue; | ||
3503 | - if (is_kernel_text(address)) | ||
3504 | - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); | ||
3505 | - else | ||
3506 | - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); | ||
3507 | + for (pte += pte_ofs; | ||
3508 | + pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; | ||
3509 | + pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { | ||
3510 | + /* XEN: Only map initial RAM allocation. */ | ||
3511 | + if ((pfn >= max_ram_pfn) || pte_present(*pte)) | ||
3512 | + continue; | ||
3513 | + if (is_kernel_text(address)) | ||
3514 | + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); | ||
3515 | + else | ||
3516 | + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); | ||
3517 | } | ||
3518 | pte_ofs = 0; | ||
3519 | } | ||
3520 | @@ -383,15 +386,44 @@ | ||
3521 | |||
3522 | pgd_t *swapper_pg_dir; | ||
3523 | |||
3524 | +static void __init xen_pagetable_setup_start(pgd_t *base) | ||
3525 | +{ | ||
3526 | +} | ||
3527 | + | ||
3528 | +static void __init xen_pagetable_setup_done(pgd_t *base) | ||
3529 | +{ | ||
3530 | +} | ||
3531 | + | ||
3532 | +/* | ||
3533 | + * Build a proper pagetable for the kernel mappings. Up until this | ||
3534 | + * point, we've been running on some set of pagetables constructed by | ||
3535 | + * the boot process. | ||
3536 | + * | ||
3537 | + * If we're booting on native hardware, this will be a pagetable | ||
3538 | + * constructed in arch/i386/kernel/head.S, and not running in PAE mode | ||
3539 | + * (even if we'll end up running in PAE). The root of the pagetable | ||
3540 | + * will be swapper_pg_dir. | ||
3541 | + * | ||
3542 | + * If we're booting paravirtualized under a hypervisor, then there are | ||
3543 | + * more options: we may already be running PAE, and the pagetable may | ||
3544 | + * or may not be based in swapper_pg_dir. In any case, | ||
3545 | + * paravirt_pagetable_setup_start() will set up swapper_pg_dir | ||
3546 | + * appropriately for the rest of the initialization to work. | ||
3547 | + * | ||
3548 | + * In general, pagetable_init() assumes that the pagetable may already | ||
3549 | + * be partially populated, and so it avoids stomping on any existing | ||
3550 | + * mappings. | ||
3551 | + */ | ||
3552 | static void __init pagetable_init (void) | ||
3553 | { | ||
3554 | - unsigned long vaddr; | ||
3555 | + unsigned long vaddr, end; | ||
3556 | pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base; | ||
3557 | |||
3558 | + xen_pagetable_setup_start(pgd_base); | ||
3559 | + | ||
3560 | /* Enable PSE if available */ | ||
3561 | - if (cpu_has_pse) { | ||
3562 | + if (cpu_has_pse) | ||
3563 | set_in_cr4(X86_CR4_PSE); | ||
3564 | - } | ||
3565 | |||
3566 | /* Enable PGE if available */ | ||
3567 | if (cpu_has_pge) { | ||
3568 | @@ -408,9 +440,12 @@ | ||
3569 | * created - mappings will be set by set_fixmap(): | ||
3570 | */ | ||
3571 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | ||
3572 | - page_table_range_init(vaddr, hypervisor_virt_start, pgd_base); | ||
3573 | + end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; | ||
3574 | + page_table_range_init(vaddr, end, pgd_base); | ||
3575 | |||
3576 | permanent_kmaps_init(pgd_base); | ||
3577 | + | ||
3578 | + xen_pagetable_setup_done(pgd_base); | ||
3579 | } | ||
3580 | |||
3581 | #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) | ||
3582 | @@ -757,34 +792,29 @@ | ||
3583 | EXPORT_SYMBOL_GPL(remove_memory); | ||
3584 | #endif | ||
3585 | |||
3586 | -struct kmem_cache *pgd_cache; | ||
3587 | struct kmem_cache *pmd_cache; | ||
3588 | |||
3589 | void __init pgtable_cache_init(void) | ||
3590 | { | ||
3591 | + size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); | ||
3592 | + | ||
3593 | if (PTRS_PER_PMD > 1) { | ||
3594 | pmd_cache = kmem_cache_create("pmd", | ||
3595 | PTRS_PER_PMD*sizeof(pmd_t), | ||
3596 | PTRS_PER_PMD*sizeof(pmd_t), | ||
3597 | - 0, | ||
3598 | + SLAB_PANIC, | ||
3599 | pmd_ctor, | ||
3600 | NULL); | ||
3601 | - if (!pmd_cache) | ||
3602 | - panic("pgtable_cache_init(): cannot create pmd cache"); | ||
3603 | + if (!SHARED_KERNEL_PMD) { | ||
3604 | + /* If we're in PAE mode and have a non-shared | ||
3605 | + kernel pmd, then the pgd size must be a | ||
3606 | + page size. This is because the pgd_list | ||
3607 | + links through the page structure, so there | ||
3608 | + can only be one pgd per page for this to | ||
3609 | + work. */ | ||
3610 | + pgd_size = PAGE_SIZE; | ||
3611 | + } | ||
3612 | } | ||
3613 | - pgd_cache = kmem_cache_create("pgd", | ||
3614 | -#ifndef CONFIG_XEN | ||
3615 | - PTRS_PER_PGD*sizeof(pgd_t), | ||
3616 | - PTRS_PER_PGD*sizeof(pgd_t), | ||
3617 | -#else | ||
3618 | - PAGE_SIZE, | ||
3619 | - PAGE_SIZE, | ||
3620 | -#endif | ||
3621 | - 0, | ||
3622 | - pgd_ctor, | ||
3623 | - PTRS_PER_PMD == 1 ? pgd_dtor : NULL); | ||
3624 | - if (!pgd_cache) | ||
3625 | - panic("pgtable_cache_init(): Cannot create pgd cache"); | ||
3626 | } | ||
3627 | |||
3628 | /* | ||
3629 | @@ -818,13 +848,26 @@ | ||
3630 | |||
3631 | void mark_rodata_ro(void) | ||
3632 | { | ||
3633 | - unsigned long addr = (unsigned long)__start_rodata; | ||
3634 | - | ||
3635 | - for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | ||
3636 | - change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); | ||
3637 | + unsigned long start = PFN_ALIGN(_text); | ||
3638 | + unsigned long size = PFN_ALIGN(_etext) - start; | ||
3639 | |||
3640 | - printk("Write protecting the kernel read-only data: %uk\n", | ||
3641 | - (__end_rodata - __start_rodata) >> 10); | ||
3642 | +#ifndef CONFIG_KPROBES | ||
3643 | +#ifdef CONFIG_HOTPLUG_CPU | ||
3644 | + /* It must still be possible to apply SMP alternatives. */ | ||
3645 | + if (num_possible_cpus() <= 1) | ||
3646 | +#endif | ||
3647 | + { | ||
3648 | + change_page_attr(virt_to_page(start), | ||
3649 | + size >> PAGE_SHIFT, PAGE_KERNEL_RX); | ||
3650 | + printk("Write protecting the kernel text: %luk\n", size >> 10); | ||
3651 | + } | ||
3652 | +#endif | ||
3653 | + start += size; | ||
3654 | + size = (unsigned long)__end_rodata - start; | ||
3655 | + change_page_attr(virt_to_page(start), | ||
3656 | + size >> PAGE_SHIFT, PAGE_KERNEL_RO); | ||
3657 | + printk("Write protecting the kernel read-only data: %luk\n", | ||
3658 | + size >> 10); | ||
3659 | |||
3660 | /* | ||
3661 | * change_page_attr() requires a global_flush_tlb() call after it. | ||
3662 | @@ -847,7 +890,7 @@ | ||
3663 | free_page(addr); | ||
3664 | totalram_pages++; | ||
3665 | } | ||
3666 | - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | ||
3667 | + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | ||
3668 | } | ||
3669 | |||
3670 | void free_initmem(void) | ||
3671 | --- a/arch/x86/mm/init_64-xen.c | ||
3672 | +++ b/arch/x86/mm/init_64-xen.c | ||
3673 | @@ -25,10 +25,12 @@ | ||
3674 | #include <linux/bootmem.h> | ||
3675 | #include <linux/proc_fs.h> | ||
3676 | #include <linux/pci.h> | ||
3677 | +#include <linux/pfn.h> | ||
3678 | #include <linux/poison.h> | ||
3679 | #include <linux/dma-mapping.h> | ||
3680 | #include <linux/module.h> | ||
3681 | #include <linux/memory_hotplug.h> | ||
3682 | +#include <linux/nmi.h> | ||
3683 | |||
3684 | #include <asm/processor.h> | ||
3685 | #include <asm/system.h> | ||
3686 | @@ -51,7 +53,7 @@ | ||
3687 | #define Dprintk(x...) | ||
3688 | #endif | ||
3689 | |||
3690 | -struct dma_mapping_ops* dma_ops; | ||
3691 | +const struct dma_mapping_ops* dma_ops; | ||
3692 | EXPORT_SYMBOL(dma_ops); | ||
3693 | |||
3694 | #if CONFIG_XEN_COMPAT <= 0x030002 | ||
3695 | @@ -189,6 +191,13 @@ | ||
3696 | |||
3697 | for_each_online_pgdat(pgdat) { | ||
3698 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
3699 | + /* this loop can take a while with 256 GB and 4k pages | ||
3700 | + so update the NMI watchdog */ | ||
3701 | + if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { | ||
3702 | + touch_nmi_watchdog(); | ||
3703 | + } | ||
3704 | + if (!pfn_valid(pgdat->node_start_pfn + i)) | ||
3705 | + continue; | ||
3706 | page = pfn_to_page(pgdat->node_start_pfn + i); | ||
3707 | total++; | ||
3708 | if (PageReserved(page)) | ||
3709 | @@ -350,7 +359,7 @@ | ||
3710 | } | ||
3711 | } | ||
3712 | |||
3713 | -unsigned long __initdata table_start, table_end; | ||
3714 | +unsigned long __meminitdata table_start, table_end; | ||
3715 | |||
3716 | static __meminit void *alloc_static_page(unsigned long *phys) | ||
3717 | { | ||
3718 | @@ -367,7 +376,7 @@ | ||
3719 | start_pfn++; | ||
3720 | memset((void *)va, 0, PAGE_SIZE); | ||
3721 | return (void *)va; | ||
3722 | -} | ||
3723 | +} | ||
3724 | |||
3725 | #define PTE_SIZE PAGE_SIZE | ||
3726 | |||
3727 | @@ -408,28 +417,46 @@ | ||
3728 | |||
3729 | #ifndef CONFIG_XEN | ||
3730 | /* Must run before zap_low_mappings */ | ||
3731 | -__init void *early_ioremap(unsigned long addr, unsigned long size) | ||
3732 | +__meminit void *early_ioremap(unsigned long addr, unsigned long size) | ||
3733 | { | ||
3734 | - unsigned long map = round_down(addr, LARGE_PAGE_SIZE); | ||
3735 | - | ||
3736 | - /* actually usually some more */ | ||
3737 | - if (size >= LARGE_PAGE_SIZE) { | ||
3738 | - return NULL; | ||
3739 | + unsigned long vaddr; | ||
3740 | + pmd_t *pmd, *last_pmd; | ||
3741 | + int i, pmds; | ||
3742 | + | ||
3743 | + pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | ||
3744 | + vaddr = __START_KERNEL_map; | ||
3745 | + pmd = level2_kernel_pgt; | ||
3746 | + last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; | ||
3747 | + for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { | ||
3748 | + for (i = 0; i < pmds; i++) { | ||
3749 | + if (pmd_present(pmd[i])) | ||
3750 | + goto next; | ||
3751 | + } | ||
3752 | + vaddr += addr & ~PMD_MASK; | ||
3753 | + addr &= PMD_MASK; | ||
3754 | + for (i = 0; i < pmds; i++, addr += PMD_SIZE) | ||
3755 | + set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); | ||
3756 | + __flush_tlb(); | ||
3757 | + return (void *)vaddr; | ||
3758 | + next: | ||
3759 | + ; | ||
3760 | } | ||
3761 | - set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | ||
3762 | - map += LARGE_PAGE_SIZE; | ||
3763 | - set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | ||
3764 | - __flush_tlb(); | ||
3765 | - return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); | ||
3766 | + printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); | ||
3767 | + return NULL; | ||
3768 | } | ||
3769 | |||
3770 | /* To avoid virtual aliases later */ | ||
3771 | -__init void early_iounmap(void *addr, unsigned long size) | ||
3772 | +__meminit void early_iounmap(void *addr, unsigned long size) | ||
3773 | { | ||
3774 | - if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) | ||
3775 | - printk("early_iounmap: bad address %p\n", addr); | ||
3776 | - set_pmd(temp_mappings[0].pmd, __pmd(0)); | ||
3777 | - set_pmd(temp_mappings[1].pmd, __pmd(0)); | ||
3778 | + unsigned long vaddr; | ||
3779 | + pmd_t *pmd; | ||
3780 | + int i, pmds; | ||
3781 | + | ||
3782 | + vaddr = (unsigned long)addr; | ||
3783 | + pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | ||
3784 | + pmd = level2_kernel_pgt + pmd_index(vaddr); | ||
3785 | + for (i = 0; i < pmds; i++) | ||
3786 | + pmd_clear(pmd + i); | ||
3787 | __flush_tlb(); | ||
3788 | } | ||
3789 | #endif | ||
3790 | @@ -763,14 +790,6 @@ | ||
3791 | __flush_tlb_all(); | ||
3792 | } | ||
3793 | |||
3794 | -void __cpuinit zap_low_mappings(int cpu) | ||
3795 | -{ | ||
3796 | - /* this is not required for Xen */ | ||
3797 | -#if 0 | ||
3798 | - swap_low_mappings(); | ||
3799 | -#endif | ||
3800 | -} | ||
3801 | - | ||
3802 | #ifndef CONFIG_NUMA | ||
3803 | void __init paging_init(void) | ||
3804 | { | ||
3805 | @@ -961,17 +980,6 @@ | ||
3806 | reservedpages << (PAGE_SHIFT-10), | ||
3807 | datasize >> 10, | ||
3808 | initsize >> 10); | ||
3809 | - | ||
3810 | -#ifndef CONFIG_XEN | ||
3811 | -#ifdef CONFIG_SMP | ||
3812 | - /* | ||
3813 | - * Sync boot_level4_pgt mappings with the init_level4_pgt | ||
3814 | - * except for the low identity mappings which are already zapped | ||
3815 | - * in init_level4_pgt. This sync-up is essential for AP's bringup | ||
3816 | - */ | ||
3817 | - memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); | ||
3818 | -#endif | ||
3819 | -#endif | ||
3820 | } | ||
3821 | |||
3822 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | ||
3823 | @@ -981,7 +989,7 @@ | ||
3824 | if (begin >= end) | ||
3825 | return; | ||
3826 | |||
3827 | - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | ||
3828 | + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | ||
3829 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | ||
3830 | ClearPageReserved(virt_to_page(addr)); | ||
3831 | init_page_count(virt_to_page(addr)); | ||
3832 | @@ -990,24 +998,17 @@ | ||
3833 | if (addr >= __START_KERNEL_map) { | ||
3834 | /* make_readonly() reports all kernel addresses. */ | ||
3835 | __make_page_writable(__va(__pa(addr))); | ||
3836 | - if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | ||
3837 | - pgd_t *pgd = pgd_offset_k(addr); | ||
3838 | - pud_t *pud = pud_offset(pgd, addr); | ||
3839 | - pmd_t *pmd = pmd_offset(pud, addr); | ||
3840 | - pte_t *pte = pte_offset_kernel(pmd, addr); | ||
3841 | - | ||
3842 | - xen_l1_entry_update(pte, __pte(0)); /* fallback */ | ||
3843 | - } | ||
3844 | + change_page_attr_addr(addr, 1, __pgprot(0)); | ||
3845 | } | ||
3846 | free_page(addr); | ||
3847 | totalram_pages++; | ||
3848 | } | ||
3849 | + if (addr > __START_KERNEL_map) | ||
3850 | + global_flush_tlb(); | ||
3851 | } | ||
3852 | |||
3853 | void free_initmem(void) | ||
3854 | { | ||
3855 | - memset(__initdata_begin, POISON_FREE_INITDATA, | ||
3856 | - __initdata_end - __initdata_begin); | ||
3857 | free_init_pages("unused kernel memory", | ||
3858 | (unsigned long)(&__init_begin), | ||
3859 | (unsigned long)(&__init_end)); | ||
3860 | @@ -1017,13 +1018,28 @@ | ||
3861 | |||
3862 | void mark_rodata_ro(void) | ||
3863 | { | ||
3864 | - unsigned long addr = (unsigned long)__start_rodata; | ||
3865 | + unsigned long start = (unsigned long)_stext, end; | ||
3866 | + | ||
3867 | +#ifdef CONFIG_HOTPLUG_CPU | ||
3868 | + /* It must still be possible to apply SMP alternatives. */ | ||
3869 | + if (num_possible_cpus() > 1) | ||
3870 | + start = (unsigned long)_etext; | ||
3871 | +#endif | ||
3872 | + | ||
3873 | +#ifdef CONFIG_KPROBES | ||
3874 | + start = (unsigned long)__start_rodata; | ||
3875 | +#endif | ||
3876 | + | ||
3877 | + end = (unsigned long)__end_rodata; | ||
3878 | + start = (start + PAGE_SIZE - 1) & PAGE_MASK; | ||
3879 | + end &= PAGE_MASK; | ||
3880 | + if (end <= start) | ||
3881 | + return; | ||
3882 | |||
3883 | - for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | ||
3884 | - change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); | ||
3885 | + change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO); | ||
3886 | |||
3887 | - printk ("Write protecting the kernel read-only data: %luk\n", | ||
3888 | - (__end_rodata - __start_rodata) >> 10); | ||
3889 | + printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | ||
3890 | + (end - start) >> 10); | ||
3891 | |||
3892 | /* | ||
3893 | * change_page_attr_addr() requires a global_flush_tlb() call after it. | ||
3894 | @@ -1176,3 +1192,11 @@ | ||
3895 | { | ||
3896 | return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); | ||
3897 | } | ||
3898 | + | ||
3899 | +#ifndef CONFIG_XEN | ||
3900 | +void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) | ||
3901 | +{ | ||
3902 | + return __alloc_bootmem_core(pgdat->bdata, size, | ||
3903 | + SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); | ||
3904 | +} | ||
3905 | +#endif | ||
3906 | --- a/arch/x86/mm/ioremap_32-xen.c | ||
3907 | +++ b/arch/x86/mm/ioremap_32-xen.c | ||
3908 | @@ -13,6 +13,7 @@ | ||
3909 | #include <linux/slab.h> | ||
3910 | #include <linux/module.h> | ||
3911 | #include <linux/io.h> | ||
3912 | +#include <linux/sched.h> | ||
3913 | #include <asm/fixmap.h> | ||
3914 | #include <asm/cacheflush.h> | ||
3915 | #include <asm/tlbflush.h> | ||
3916 | --- a/arch/x86/mm/pageattr_64-xen.c | ||
3917 | +++ b/arch/x86/mm/pageattr_64-xen.c | ||
3918 | @@ -215,13 +215,13 @@ | ||
3919 | preempt_enable(); | ||
3920 | } | ||
3921 | |||
3922 | -void _arch_dup_mmap(struct mm_struct *mm) | ||
3923 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | ||
3924 | { | ||
3925 | if (!mm->context.pinned) | ||
3926 | mm_pin(mm); | ||
3927 | } | ||
3928 | |||
3929 | -void _arch_exit_mmap(struct mm_struct *mm) | ||
3930 | +void arch_exit_mmap(struct mm_struct *mm) | ||
3931 | { | ||
3932 | struct task_struct *tsk = current; | ||
3933 | |||
3934 | @@ -337,10 +337,11 @@ | ||
3935 | struct page *pg; | ||
3936 | |||
3937 | /* When clflush is available always use it because it is | ||
3938 | - much cheaper than WBINVD */ | ||
3939 | - if (!cpu_has_clflush) | ||
3940 | + much cheaper than WBINVD. Disable clflush for now because | ||
3941 | + the high level code is not ready yet */ | ||
3942 | + if (1 || !cpu_has_clflush) | ||
3943 | asm volatile("wbinvd" ::: "memory"); | ||
3944 | - list_for_each_entry(pg, l, lru) { | ||
3945 | + else list_for_each_entry(pg, l, lru) { | ||
3946 | void *adr = page_address(pg); | ||
3947 | if (cpu_has_clflush) | ||
3948 | cache_flush_page(adr); | ||
3949 | @@ -454,16 +455,24 @@ | ||
3950 | */ | ||
3951 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) | ||
3952 | { | ||
3953 | - int err = 0; | ||
3954 | + int err = 0, kernel_map = 0; | ||
3955 | int i; | ||
3956 | |||
3957 | + if (address >= __START_KERNEL_map | ||
3958 | + && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) { | ||
3959 | + address = (unsigned long)__va(__pa(address)); | ||
3960 | + kernel_map = 1; | ||
3961 | + } | ||
3962 | + | ||
3963 | down_write(&init_mm.mmap_sem); | ||
3964 | for (i = 0; i < numpages; i++, address += PAGE_SIZE) { | ||
3965 | unsigned long pfn = __pa(address) >> PAGE_SHIFT; | ||
3966 | |||
3967 | - err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); | ||
3968 | - if (err) | ||
3969 | - break; | ||
3970 | + if (!kernel_map || pte_present(pfn_pte(0, prot))) { | ||
3971 | + err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); | ||
3972 | + if (err) | ||
3973 | + break; | ||
3974 | + } | ||
3975 | /* Handle kernel mapping too which aliases part of the | ||
3976 | * lowmem */ | ||
3977 | if (__pa(address) < KERNEL_TEXT_SIZE) { | ||
3978 | --- a/arch/x86/mm/pgtable_32-xen.c | ||
3979 | +++ b/arch/x86/mm/pgtable_32-xen.c | ||
3980 | @@ -13,6 +13,7 @@ | ||
3981 | #include <linux/pagemap.h> | ||
3982 | #include <linux/spinlock.h> | ||
3983 | #include <linux/module.h> | ||
3984 | +#include <linux/quicklist.h> | ||
3985 | |||
3986 | #include <asm/system.h> | ||
3987 | #include <asm/pgtable.h> | ||
3988 | @@ -212,8 +213,6 @@ | ||
3989 | * against pageattr.c; it is the unique case in which a valid change | ||
3990 | * of kernel pagetables can't be lazily synchronized by vmalloc faults. | ||
3991 | * vmalloc faults work because attached pagetables are never freed. | ||
3992 | - * The locking scheme was chosen on the basis of manfred's | ||
3993 | - * recommendations and having no core impact whatsoever. | ||
3994 | * -- wli | ||
3995 | */ | ||
3996 | DEFINE_SPINLOCK(pgd_lock); | ||
3997 | @@ -239,37 +238,59 @@ | ||
3998 | set_page_private(next, (unsigned long)pprev); | ||
3999 | } | ||
4000 | |||
4001 | -void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | ||
4002 | + | ||
4003 | + | ||
4004 | +#if (PTRS_PER_PMD == 1) | ||
4005 | +/* Non-PAE pgd constructor */ | ||
4006 | +void pgd_ctor(void *pgd) | ||
4007 | { | ||
4008 | unsigned long flags; | ||
4009 | |||
4010 | - if (PTRS_PER_PMD > 1) { | ||
4011 | - if (HAVE_SHARED_KERNEL_PMD) | ||
4012 | - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | ||
4013 | - swapper_pg_dir + USER_PTRS_PER_PGD, | ||
4014 | - KERNEL_PGD_PTRS); | ||
4015 | - } else { | ||
4016 | - spin_lock_irqsave(&pgd_lock, flags); | ||
4017 | + /* !PAE, no pagetable sharing */ | ||
4018 | + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | ||
4019 | + | ||
4020 | + spin_lock_irqsave(&pgd_lock, flags); | ||
4021 | + | ||
4022 | + /* must happen under lock */ | ||
4023 | + clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | ||
4024 | + swapper_pg_dir + USER_PTRS_PER_PGD, | ||
4025 | + KERNEL_PGD_PTRS); | ||
4026 | + | ||
4027 | + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
4028 | + __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
4029 | + USER_PTRS_PER_PGD, | ||
4030 | + KERNEL_PGD_PTRS); | ||
4031 | + pgd_list_add(pgd); | ||
4032 | + spin_unlock_irqrestore(&pgd_lock, flags); | ||
4033 | +} | ||
4034 | +#else /* PTRS_PER_PMD > 1 */ | ||
4035 | +/* PAE pgd constructor */ | ||
4036 | +void pgd_ctor(void *pgd) | ||
4037 | +{ | ||
4038 | + /* PAE, kernel PMD may be shared */ | ||
4039 | + | ||
4040 | + if (SHARED_KERNEL_PMD) { | ||
4041 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | ||
4042 | swapper_pg_dir + USER_PTRS_PER_PGD, | ||
4043 | KERNEL_PGD_PTRS); | ||
4044 | - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | ||
4045 | - | ||
4046 | - /* must happen under lock */ | ||
4047 | - paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
4048 | - __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
4049 | - USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); | ||
4050 | + } else { | ||
4051 | + unsigned long flags; | ||
4052 | |||
4053 | + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | ||
4054 | + spin_lock_irqsave(&pgd_lock, flags); | ||
4055 | pgd_list_add(pgd); | ||
4056 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
4057 | } | ||
4058 | } | ||
4059 | +#endif /* PTRS_PER_PMD */ | ||
4060 | |||
4061 | -/* never called when PTRS_PER_PMD > 1 */ | ||
4062 | -void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | ||
4063 | +void pgd_dtor(void *pgd) | ||
4064 | { | ||
4065 | unsigned long flags; /* can be called from interrupt context */ | ||
4066 | |||
4067 | + if (SHARED_KERNEL_PMD) | ||
4068 | + return; | ||
4069 | + | ||
4070 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); | ||
4071 | spin_lock_irqsave(&pgd_lock, flags); | ||
4072 | pgd_list_del(pgd); | ||
4073 | @@ -278,11 +299,46 @@ | ||
4074 | pgd_test_and_unpin(pgd); | ||
4075 | } | ||
4076 | |||
4077 | +#define UNSHARED_PTRS_PER_PGD \ | ||
4078 | + (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | ||
4079 | + | ||
4080 | +/* If we allocate a pmd for part of the kernel address space, then | ||
4081 | + make sure its initialized with the appropriate kernel mappings. | ||
4082 | + Otherwise use a cached zeroed pmd. */ | ||
4083 | +static pmd_t *pmd_cache_alloc(int idx) | ||
4084 | +{ | ||
4085 | + pmd_t *pmd; | ||
4086 | + | ||
4087 | + if (idx >= USER_PTRS_PER_PGD) { | ||
4088 | + pmd = (pmd_t *)__get_free_page(GFP_KERNEL); | ||
4089 | + | ||
4090 | +#ifndef CONFIG_XEN | ||
4091 | + if (pmd) | ||
4092 | + memcpy(pmd, | ||
4093 | + (void *)pgd_page_vaddr(swapper_pg_dir[idx]), | ||
4094 | + sizeof(pmd_t) * PTRS_PER_PMD); | ||
4095 | +#endif | ||
4096 | + } else | ||
4097 | + pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | ||
4098 | + | ||
4099 | + return pmd; | ||
4100 | +} | ||
4101 | + | ||
4102 | +static void pmd_cache_free(pmd_t *pmd, int idx) | ||
4103 | +{ | ||
4104 | + if (idx >= USER_PTRS_PER_PGD) { | ||
4105 | + make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables); | ||
4106 | + memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | ||
4107 | + free_page((unsigned long)pmd); | ||
4108 | + } else | ||
4109 | + kmem_cache_free(pmd_cache, pmd); | ||
4110 | +} | ||
4111 | + | ||
4112 | pgd_t *pgd_alloc(struct mm_struct *mm) | ||
4113 | { | ||
4114 | int i; | ||
4115 | - pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); | ||
4116 | - pmd_t **pmd; | ||
4117 | + pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); | ||
4118 | + pmd_t **pmds = NULL; | ||
4119 | unsigned long flags; | ||
4120 | |||
4121 | pgd_test_and_unpin(pgd); | ||
4122 | @@ -290,37 +346,40 @@ | ||
4123 | if (PTRS_PER_PMD == 1 || !pgd) | ||
4124 | return pgd; | ||
4125 | |||
4126 | - if (HAVE_SHARED_KERNEL_PMD) { | ||
4127 | - for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | ||
4128 | - pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | ||
4129 | - if (!pmd) | ||
4130 | - goto out_oom; | ||
4131 | - paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | ||
4132 | - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | ||
4133 | +#ifdef CONFIG_XEN | ||
4134 | + if (!SHARED_KERNEL_PMD) { | ||
4135 | + /* | ||
4136 | + * We can race save/restore (if we sleep during a GFP_KERNEL memory | ||
4137 | + * allocation). We therefore store virtual addresses of pmds as they | ||
4138 | + * do not change across save/restore, and poke the machine addresses | ||
4139 | + * into the pgdir under the pgd_lock. | ||
4140 | + */ | ||
4141 | + pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); | ||
4142 | + if (!pmds) { | ||
4143 | + quicklist_free(0, pgd_dtor, pgd); | ||
4144 | + return NULL; | ||
4145 | } | ||
4146 | - return pgd; | ||
4147 | - } | ||
4148 | - | ||
4149 | - /* | ||
4150 | - * We can race save/restore (if we sleep during a GFP_KERNEL memory | ||
4151 | - * allocation). We therefore store virtual addresses of pmds as they | ||
4152 | - * do not change across save/restore, and poke the machine addresses | ||
4153 | - * into the pgdir under the pgd_lock. | ||
4154 | - */ | ||
4155 | - pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); | ||
4156 | - if (!pmd) { | ||
4157 | - kmem_cache_free(pgd_cache, pgd); | ||
4158 | - return NULL; | ||
4159 | } | ||
4160 | +#endif | ||
4161 | |||
4162 | /* Allocate pmds, remember virtual addresses. */ | ||
4163 | - for (i = 0; i < PTRS_PER_PGD; ++i) { | ||
4164 | - pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | ||
4165 | - if (!pmd[i]) | ||
4166 | + for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { | ||
4167 | + pmd_t *pmd = pmd_cache_alloc(i); | ||
4168 | + | ||
4169 | + if (!pmd) | ||
4170 | goto out_oom; | ||
4171 | + | ||
4172 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | ||
4173 | + if (pmds) | ||
4174 | + pmds[i] = pmd; | ||
4175 | + else | ||
4176 | + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | ||
4177 | } | ||
4178 | |||
4179 | +#ifdef CONFIG_XEN | ||
4180 | + if (SHARED_KERNEL_PMD) | ||
4181 | + return pgd; | ||
4182 | + | ||
4183 | spin_lock_irqsave(&pgd_lock, flags); | ||
4184 | |||
4185 | /* Protect against save/restore: move below 4GB under pgd_lock. */ | ||
4186 | @@ -335,44 +394,40 @@ | ||
4187 | |||
4188 | /* Copy kernel pmd contents and write-protect the new pmds. */ | ||
4189 | for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { | ||
4190 | - unsigned long v = (unsigned long)i << PGDIR_SHIFT; | ||
4191 | - pgd_t *kpgd = pgd_offset_k(v); | ||
4192 | - pud_t *kpud = pud_offset(kpgd, v); | ||
4193 | - pmd_t *kpmd = pmd_offset(kpud, v); | ||
4194 | - memcpy(pmd[i], kpmd, PAGE_SIZE); | ||
4195 | + memcpy(pmds[i], | ||
4196 | + (void *)pgd_page_vaddr(swapper_pg_dir[i]), | ||
4197 | + sizeof(pmd_t) * PTRS_PER_PMD); | ||
4198 | make_lowmem_page_readonly( | ||
4199 | - pmd[i], XENFEAT_writable_page_tables); | ||
4200 | + pmds[i], XENFEAT_writable_page_tables); | ||
4201 | } | ||
4202 | |||
4203 | /* It is safe to poke machine addresses of pmds under the pmd_lock. */ | ||
4204 | for (i = 0; i < PTRS_PER_PGD; i++) | ||
4205 | - set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i]))); | ||
4206 | - | ||
4207 | - /* Ensure this pgd gets picked up and pinned on save/restore. */ | ||
4208 | - pgd_list_add(pgd); | ||
4209 | + set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i]))); | ||
4210 | |||
4211 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
4212 | |||
4213 | - kfree(pmd); | ||
4214 | + kfree(pmds); | ||
4215 | +#endif | ||
4216 | |||
4217 | return pgd; | ||
4218 | |||
4219 | out_oom: | ||
4220 | - if (HAVE_SHARED_KERNEL_PMD) { | ||
4221 | + if (!pmds) { | ||
4222 | for (i--; i >= 0; i--) { | ||
4223 | pgd_t pgdent = pgd[i]; | ||
4224 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
4225 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
4226 | - kmem_cache_free(pmd_cache, pmd); | ||
4227 | + pmd_cache_free(pmd, i); | ||
4228 | } | ||
4229 | } else { | ||
4230 | for (i--; i >= 0; i--) { | ||
4231 | - paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT); | ||
4232 | - kmem_cache_free(pmd_cache, pmd[i]); | ||
4233 | + paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT); | ||
4234 | + pmd_cache_free(pmds[i], i); | ||
4235 | } | ||
4236 | - kfree(pmd); | ||
4237 | + kfree(pmds); | ||
4238 | } | ||
4239 | - kmem_cache_free(pgd_cache, pgd); | ||
4240 | + quicklist_free(0, pgd_dtor, pgd); | ||
4241 | return NULL; | ||
4242 | } | ||
4243 | |||
4244 | @@ -392,35 +447,24 @@ | ||
4245 | |||
4246 | /* in the PAE case user pgd entries are overwritten before usage */ | ||
4247 | if (PTRS_PER_PMD > 1) { | ||
4248 | - for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | ||
4249 | + for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { | ||
4250 | pgd_t pgdent = pgd[i]; | ||
4251 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
4252 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
4253 | - kmem_cache_free(pmd_cache, pmd); | ||
4254 | + pmd_cache_free(pmd, i); | ||
4255 | } | ||
4256 | |||
4257 | - if (!HAVE_SHARED_KERNEL_PMD) { | ||
4258 | - unsigned long flags; | ||
4259 | - spin_lock_irqsave(&pgd_lock, flags); | ||
4260 | - pgd_list_del(pgd); | ||
4261 | - spin_unlock_irqrestore(&pgd_lock, flags); | ||
4262 | - | ||
4263 | - for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { | ||
4264 | - pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); | ||
4265 | - make_lowmem_page_writable( | ||
4266 | - pmd, XENFEAT_writable_page_tables); | ||
4267 | - memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | ||
4268 | - kmem_cache_free(pmd_cache, pmd); | ||
4269 | - } | ||
4270 | - | ||
4271 | - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) | ||
4272 | - xen_destroy_contiguous_region( | ||
4273 | - (unsigned long)pgd, 0); | ||
4274 | - } | ||
4275 | + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) | ||
4276 | + xen_destroy_contiguous_region((unsigned long)pgd, 0); | ||
4277 | } | ||
4278 | |||
4279 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | ||
4280 | - kmem_cache_free(pgd_cache, pgd); | ||
4281 | + quicklist_free(0, pgd_dtor, pgd); | ||
4282 | +} | ||
4283 | + | ||
4284 | +void check_pgt_cache(void) | ||
4285 | +{ | ||
4286 | + quicklist_trim(0, pgd_dtor, 25, 16); | ||
4287 | } | ||
4288 | |||
4289 | void make_lowmem_page_readonly(void *va, unsigned int feature) | ||
4290 | @@ -717,13 +761,13 @@ | ||
4291 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
4292 | } | ||
4293 | |||
4294 | -void _arch_dup_mmap(struct mm_struct *mm) | ||
4295 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | ||
4296 | { | ||
4297 | if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags)) | ||
4298 | mm_pin(mm); | ||
4299 | } | ||
4300 | |||
4301 | -void _arch_exit_mmap(struct mm_struct *mm) | ||
4302 | +void arch_exit_mmap(struct mm_struct *mm) | ||
4303 | { | ||
4304 | struct task_struct *tsk = current; | ||
4305 | |||
4306 | --- a/drivers/char/tpm/tpm_xen.c | ||
4307 | +++ b/drivers/char/tpm/tpm_xen.c | ||
4308 | @@ -463,7 +463,7 @@ | ||
4309 | tp->backend_id = domid; | ||
4310 | |||
4311 | err = bind_listening_port_to_irqhandler( | ||
4312 | - domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); | ||
4313 | + domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp); | ||
4314 | if (err <= 0) { | ||
4315 | WPRINTK("bind_listening_port_to_irqhandler failed " | ||
4316 | "(err=%d)\n", err); | ||
4317 | --- a/drivers/xen/blkfront/blkfront.c | ||
4318 | +++ b/drivers/xen/blkfront/blkfront.c | ||
4319 | @@ -236,7 +236,7 @@ | ||
4320 | info->ring_ref = err; | ||
4321 | |||
4322 | err = bind_listening_port_to_irqhandler( | ||
4323 | - dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); | ||
4324 | + dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info); | ||
4325 | if (err <= 0) { | ||
4326 | xenbus_dev_fatal(dev, err, | ||
4327 | "bind_listening_port_to_irqhandler"); | ||
4328 | --- a/drivers/xen/char/mem.c | ||
4329 | +++ b/drivers/xen/char/mem.c | ||
4330 | @@ -18,7 +18,6 @@ | ||
4331 | #include <linux/raw.h> | ||
4332 | #include <linux/tty.h> | ||
4333 | #include <linux/capability.h> | ||
4334 | -#include <linux/smp_lock.h> | ||
4335 | #include <linux/ptrace.h> | ||
4336 | #include <linux/device.h> | ||
4337 | #include <asm/pgalloc.h> | ||
4338 | --- a/drivers/xen/core/hypervisor_sysfs.c | ||
4339 | +++ b/drivers/xen/core/hypervisor_sysfs.c | ||
4340 | @@ -50,7 +50,7 @@ | ||
4341 | if (!is_running_on_xen()) | ||
4342 | return -ENODEV; | ||
4343 | |||
4344 | - hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type; | ||
4345 | + hypervisor_subsys.kobj.ktype = &hyp_sysfs_kobj_type; | ||
4346 | return 0; | ||
4347 | } | ||
4348 | |||
4349 | --- a/drivers/xen/core/smpboot.c | ||
4350 | +++ b/drivers/xen/core/smpboot.c | ||
4351 | @@ -121,7 +121,7 @@ | ||
4352 | rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, | ||
4353 | cpu, | ||
4354 | smp_reschedule_interrupt, | ||
4355 | - SA_INTERRUPT, | ||
4356 | + IRQF_DISABLED, | ||
4357 | resched_name[cpu], | ||
4358 | NULL); | ||
4359 | if (rc < 0) | ||
4360 | @@ -132,7 +132,7 @@ | ||
4361 | rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, | ||
4362 | cpu, | ||
4363 | smp_call_function_interrupt, | ||
4364 | - SA_INTERRUPT, | ||
4365 | + IRQF_DISABLED, | ||
4366 | callfunc_name[cpu], | ||
4367 | NULL); | ||
4368 | if (rc < 0) | ||
4369 | @@ -165,13 +165,12 @@ | ||
4370 | |||
4371 | void __cpuinit cpu_bringup(void) | ||
4372 | { | ||
4373 | + cpu_init(); | ||
4374 | #ifdef __i386__ | ||
4375 | - cpu_set_gdt(current_thread_info()->cpu); | ||
4376 | - secondary_cpu_init(); | ||
4377 | + identify_secondary_cpu(cpu_data + smp_processor_id()); | ||
4378 | #else | ||
4379 | - cpu_init(); | ||
4380 | -#endif | ||
4381 | identify_cpu(cpu_data + smp_processor_id()); | ||
4382 | +#endif | ||
4383 | touch_softlockup_watchdog(); | ||
4384 | preempt_disable(); | ||
4385 | local_irq_enable(); | ||
4386 | @@ -191,11 +190,6 @@ | ||
4387 | static DEFINE_SPINLOCK(ctxt_lock); | ||
4388 | |||
4389 | struct task_struct *idle = idle_task(cpu); | ||
4390 | -#ifdef __x86_64__ | ||
4391 | - struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu]; | ||
4392 | -#else | ||
4393 | - struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
4394 | -#endif | ||
4395 | |||
4396 | if (cpu_test_and_set(cpu, cpu_initialized_map)) | ||
4397 | return; | ||
4398 | @@ -218,11 +212,11 @@ | ||
4399 | smp_trap_init(ctxt.trap_ctxt); | ||
4400 | |||
4401 | ctxt.ldt_ents = 0; | ||
4402 | - | ||
4403 | - ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address); | ||
4404 | - ctxt.gdt_ents = gdt_descr->size / 8; | ||
4405 | + ctxt.gdt_ents = GDT_SIZE / 8; | ||
4406 | |||
4407 | #ifdef __i386__ | ||
4408 | + ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu)); | ||
4409 | + | ||
4410 | ctxt.user_regs.cs = __KERNEL_CS; | ||
4411 | ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); | ||
4412 | |||
4413 | @@ -235,7 +229,11 @@ | ||
4414 | ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; | ||
4415 | |||
4416 | ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | ||
4417 | + | ||
4418 | + ctxt.user_regs.fs = __KERNEL_PERCPU; | ||
4419 | #else /* __x86_64__ */ | ||
4420 | + ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address); | ||
4421 | + | ||
4422 | ctxt.user_regs.cs = __KERNEL_CS; | ||
4423 | ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); | ||
4424 | |||
4425 | @@ -265,9 +263,8 @@ | ||
4426 | struct vcpu_get_physid cpu_id; | ||
4427 | #ifdef __x86_64__ | ||
4428 | struct desc_ptr *gdt_descr; | ||
4429 | -#else | ||
4430 | - struct Xgt_desc_struct *gdt_descr; | ||
4431 | #endif | ||
4432 | + void *gdt_addr; | ||
4433 | |||
4434 | apicid = 0; | ||
4435 | if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) | ||
4436 | @@ -317,14 +314,12 @@ | ||
4437 | } | ||
4438 | gdt_descr->size = GDT_SIZE; | ||
4439 | memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); | ||
4440 | + gdt_addr = (void *)gdt_descr->address; | ||
4441 | #else | ||
4442 | - if (unlikely(!init_gdt(cpu, idle))) | ||
4443 | - continue; | ||
4444 | - gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
4445 | + init_gdt(cpu); | ||
4446 | + gdt_addr = get_cpu_gdt_table(cpu); | ||
4447 | #endif | ||
4448 | - make_page_readonly( | ||
4449 | - (void *)gdt_descr->address, | ||
4450 | - XENFEAT_writable_descriptor_tables); | ||
4451 | + make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables); | ||
4452 | |||
4453 | apicid = cpu; | ||
4454 | if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) | ||
4455 | @@ -338,7 +333,9 @@ | ||
4456 | #ifdef __x86_64__ | ||
4457 | cpu_pda(cpu)->pcurrent = idle; | ||
4458 | cpu_pda(cpu)->cpunumber = cpu; | ||
4459 | - clear_ti_thread_flag(idle->thread_info, TIF_FORK); | ||
4460 | + clear_ti_thread_flag(task_thread_info(idle), TIF_FORK); | ||
4461 | +#else | ||
4462 | + per_cpu(current_task, cpu) = idle; | ||
4463 | #endif | ||
4464 | |||
4465 | irq_ctx_init(cpu); | ||
4466 | @@ -363,8 +360,12 @@ | ||
4467 | #endif | ||
4468 | } | ||
4469 | |||
4470 | -void __devinit smp_prepare_boot_cpu(void) | ||
4471 | +void __init smp_prepare_boot_cpu(void) | ||
4472 | { | ||
4473 | +#ifdef __i386__ | ||
4474 | + init_gdt(smp_processor_id()); | ||
4475 | + switch_to_new_gdt(); | ||
4476 | +#endif | ||
4477 | prefill_possible_map(); | ||
4478 | } | ||
4479 | |||
4480 | --- a/drivers/xen/core/xen_sysfs.c | ||
4481 | +++ b/drivers/xen/core/xen_sysfs.c | ||
4482 | @@ -28,12 +28,12 @@ | ||
4483 | |||
4484 | static int __init xen_sysfs_type_init(void) | ||
4485 | { | ||
4486 | - return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); | ||
4487 | + return sysfs_create_file(&hypervisor_subsys.kobj, &type_attr.attr); | ||
4488 | } | ||
4489 | |||
4490 | static void xen_sysfs_type_destroy(void) | ||
4491 | { | ||
4492 | - sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); | ||
4493 | + sysfs_remove_file(&hypervisor_subsys.kobj, &type_attr.attr); | ||
4494 | } | ||
4495 | |||
4496 | /* xen version attributes */ | ||
4497 | @@ -89,13 +89,13 @@ | ||
4498 | |||
4499 | static int __init xen_sysfs_version_init(void) | ||
4500 | { | ||
4501 | - return sysfs_create_group(&hypervisor_subsys.kset.kobj, | ||
4502 | + return sysfs_create_group(&hypervisor_subsys.kobj, | ||
4503 | &version_group); | ||
4504 | } | ||
4505 | |||
4506 | static void xen_sysfs_version_destroy(void) | ||
4507 | { | ||
4508 | - sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group); | ||
4509 | + sysfs_remove_group(&hypervisor_subsys.kobj, &version_group); | ||
4510 | } | ||
4511 | |||
4512 | /* UUID */ | ||
4513 | @@ -125,12 +125,12 @@ | ||
4514 | |||
4515 | static int __init xen_sysfs_uuid_init(void) | ||
4516 | { | ||
4517 | - return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); | ||
4518 | + return sysfs_create_file(&hypervisor_subsys.kobj, &uuid_attr.attr); | ||
4519 | } | ||
4520 | |||
4521 | static void xen_sysfs_uuid_destroy(void) | ||
4522 | { | ||
4523 | - sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); | ||
4524 | + sysfs_remove_file(&hypervisor_subsys.kobj, &uuid_attr.attr); | ||
4525 | } | ||
4526 | |||
4527 | /* xen compilation attributes */ | ||
4528 | @@ -203,13 +203,13 @@ | ||
4529 | |||
4530 | int __init static xen_compilation_init(void) | ||
4531 | { | ||
4532 | - return sysfs_create_group(&hypervisor_subsys.kset.kobj, | ||
4533 | + return sysfs_create_group(&hypervisor_subsys.kobj, | ||
4534 | &xen_compilation_group); | ||
4535 | } | ||
4536 | |||
4537 | static void xen_compilation_destroy(void) | ||
4538 | { | ||
4539 | - sysfs_remove_group(&hypervisor_subsys.kset.kobj, | ||
4540 | + sysfs_remove_group(&hypervisor_subsys.kobj, | ||
4541 | &xen_compilation_group); | ||
4542 | } | ||
4543 | |||
4544 | @@ -324,13 +324,13 @@ | ||
4545 | |||
4546 | static int __init xen_properties_init(void) | ||
4547 | { | ||
4548 | - return sysfs_create_group(&hypervisor_subsys.kset.kobj, | ||
4549 | + return sysfs_create_group(&hypervisor_subsys.kobj, | ||
4550 | &xen_properties_group); | ||
4551 | } | ||
4552 | |||
4553 | static void xen_properties_destroy(void) | ||
4554 | { | ||
4555 | - sysfs_remove_group(&hypervisor_subsys.kset.kobj, | ||
4556 | + sysfs_remove_group(&hypervisor_subsys.kobj, | ||
4557 | &xen_properties_group); | ||
4558 | } | ||
4559 | |||
4560 | --- a/drivers/xen/netback/netback.c | ||
4561 | +++ b/drivers/xen/netback/netback.c | ||
4562 | @@ -180,7 +180,7 @@ | ||
4563 | goto err; | ||
4564 | |||
4565 | skb_reserve(nskb, 16 + NET_IP_ALIGN); | ||
4566 | - headlen = nskb->end - nskb->data; | ||
4567 | + headlen = skb_end_pointer(nskb) - nskb->data; | ||
4568 | if (headlen > skb_headlen(skb)) | ||
4569 | headlen = skb_headlen(skb); | ||
4570 | ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); | ||
4571 | @@ -226,11 +226,15 @@ | ||
4572 | len -= copy; | ||
4573 | } | ||
4574 | |||
4575 | +#ifdef NET_SKBUFF_DATA_USES_OFFSET | ||
4576 | + offset = 0; | ||
4577 | +#else | ||
4578 | offset = nskb->data - skb->data; | ||
4579 | +#endif | ||
4580 | |||
4581 | - nskb->h.raw = skb->h.raw + offset; | ||
4582 | - nskb->nh.raw = skb->nh.raw + offset; | ||
4583 | - nskb->mac.raw = skb->mac.raw + offset; | ||
4584 | + nskb->transport_header = skb->transport_header + offset; | ||
4585 | + nskb->network_header = skb->network_header + offset; | ||
4586 | + nskb->mac_header = skb->mac_header + offset; | ||
4587 | |||
4588 | return nskb; | ||
4589 | |||
4590 | @@ -1601,7 +1605,7 @@ | ||
4591 | (void)bind_virq_to_irqhandler(VIRQ_DEBUG, | ||
4592 | 0, | ||
4593 | netif_be_dbg, | ||
4594 | - SA_SHIRQ, | ||
4595 | + IRQF_SHARED, | ||
4596 | "net-be-dbg", | ||
4597 | &netif_be_dbg); | ||
4598 | #endif | ||
4599 | --- a/drivers/xen/netfront/netfront.c | ||
4600 | +++ b/drivers/xen/netfront/netfront.c | ||
4601 | @@ -513,7 +513,7 @@ | ||
4602 | memcpy(netdev->dev_addr, info->mac, ETH_ALEN); | ||
4603 | |||
4604 | err = bind_listening_port_to_irqhandler( | ||
4605 | - dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name, | ||
4606 | + dev->otherend_id, netif_int, IRQF_SAMPLE_RANDOM, netdev->name, | ||
4607 | netdev); | ||
4608 | if (err < 0) | ||
4609 | goto fail; | ||
4610 | --- a/drivers/xen/pciback/xenbus.c | ||
4611 | +++ b/drivers/xen/pciback/xenbus.c | ||
4612 | @@ -86,7 +86,7 @@ | ||
4613 | |||
4614 | err = bind_interdomain_evtchn_to_irqhandler( | ||
4615 | pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, | ||
4616 | - SA_SAMPLE_RANDOM, "pciback", pdev); | ||
4617 | + IRQF_SAMPLE_RANDOM, "pciback", pdev); | ||
4618 | if (err < 0) { | ||
4619 | xenbus_dev_fatal(pdev->xdev, err, | ||
4620 | "Error binding event channel to IRQ"); | ||
4621 | --- a/drivers/xen/pcifront/xenbus.c | ||
4622 | +++ b/drivers/xen/pcifront/xenbus.c | ||
4623 | @@ -10,10 +10,6 @@ | ||
4624 | #include <xen/gnttab.h> | ||
4625 | #include "pcifront.h" | ||
4626 | |||
4627 | -#ifndef __init_refok | ||
4628 | -#define __init_refok | ||
4629 | -#endif | ||
4630 | - | ||
4631 | #define INVALID_GRANT_REF (0) | ||
4632 | #define INVALID_EVTCHN (-1) | ||
4633 | |||
4634 | --- a/drivers/xen/sfc_netback/accel_fwd.c | ||
4635 | +++ b/drivers/xen/sfc_netback/accel_fwd.c | ||
4636 | @@ -308,7 +308,7 @@ | ||
4637 | static inline int packet_is_arp_reply(struct sk_buff *skb) | ||
4638 | { | ||
4639 | return skb->protocol == ntohs(ETH_P_ARP) | ||
4640 | - && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY); | ||
4641 | + && arp_hdr(skb)->ar_op == ntohs(ARPOP_REPLY); | ||
4642 | } | ||
4643 | |||
4644 | |||
4645 | @@ -392,12 +392,13 @@ | ||
4646 | |||
4647 | BUG_ON(fwd_priv == NULL); | ||
4648 | |||
4649 | - if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) { | ||
4650 | + if (is_broadcast_ether_addr(skb_mac_header(skb)) | ||
4651 | + && packet_is_arp_reply(skb)) { | ||
4652 | /* | ||
4653 | * update our fast path forwarding to reflect this | ||
4654 | * gratuitous ARP | ||
4655 | */ | ||
4656 | - mac = skb->mac.raw+ETH_ALEN; | ||
4657 | + mac = skb_mac_header(skb)+ETH_ALEN; | ||
4658 | |||
4659 | DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n", | ||
4660 | __FUNCTION__, MAC_ARG(mac)); | ||
4661 | --- a/drivers/xen/sfc_netback/accel_solarflare.c | ||
4662 | +++ b/drivers/xen/sfc_netback/accel_solarflare.c | ||
4663 | @@ -114,7 +114,7 @@ | ||
4664 | BUG_ON(port == NULL); | ||
4665 | |||
4666 | NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++); | ||
4667 | - if (skb->mac.raw != NULL) | ||
4668 | + if (skb_mac_header_was_set(skb)) | ||
4669 | netback_accel_tx_packet(skb, port->fwd_priv); | ||
4670 | else { | ||
4671 | DPRINTK("Ignoring packet with missing mac address\n"); | ||
4672 | --- a/drivers/xen/sfc_netfront/accel_tso.c | ||
4673 | +++ b/drivers/xen/sfc_netfront/accel_tso.c | ||
4674 | @@ -33,10 +33,9 @@ | ||
4675 | |||
4676 | #include "accel_tso.h" | ||
4677 | |||
4678 | -#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2)) | ||
4679 | -#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data) | ||
4680 | -#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data) | ||
4681 | -#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data) | ||
4682 | +#define ETH_HDR_LEN(skb) skb_network_offset(skb) | ||
4683 | +#define SKB_TCP_OFF(skb) skb_transport_offset(skb) | ||
4684 | +#define SKB_IP_OFF(skb) skb_network_offset(skb) | ||
4685 | |||
4686 | /* | ||
4687 | * Set a maximum number of buffers in each output packet to make life | ||
4688 | @@ -114,9 +113,8 @@ | ||
4689 | static inline void tso_check_safe(struct sk_buff *skb) { | ||
4690 | EPRINTK_ON(skb->protocol != htons (ETH_P_IP)); | ||
4691 | EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP)); | ||
4692 | - EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP); | ||
4693 | - EPRINTK_ON((SKB_TCP_OFF(skb) | ||
4694 | - + (skb->h.th->doff << 2u)) > skb_headlen(skb)); | ||
4695 | + EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP); | ||
4696 | + EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb)); | ||
4697 | } | ||
4698 | |||
4699 | |||
4700 | @@ -129,17 +127,17 @@ | ||
4701 | * All ethernet/IP/TCP headers combined size is TCP header size | ||
4702 | * plus offset of TCP header relative to start of packet. | ||
4703 | */ | ||
4704 | - st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb); | ||
4705 | + st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb); | ||
4706 | st->p.full_packet_size = (st->p.header_length | ||
4707 | + skb_shinfo(skb)->gso_size); | ||
4708 | st->p.gso_size = skb_shinfo(skb)->gso_size; | ||
4709 | |||
4710 | - st->p.ip_id = htons(skb->nh.iph->id); | ||
4711 | - st->seqnum = ntohl(skb->h.th->seq); | ||
4712 | + st->p.ip_id = htons(ip_hdr(skb)->id); | ||
4713 | + st->seqnum = ntohl(tcp_hdr(skb)->seq); | ||
4714 | |||
4715 | - EPRINTK_ON(skb->h.th->urg); | ||
4716 | - EPRINTK_ON(skb->h.th->syn); | ||
4717 | - EPRINTK_ON(skb->h.th->rst); | ||
4718 | + EPRINTK_ON(tcp_hdr(skb)->urg); | ||
4719 | + EPRINTK_ON(tcp_hdr(skb)->syn); | ||
4720 | + EPRINTK_ON(tcp_hdr(skb)->rst); | ||
4721 | |||
4722 | st->remaining_len = skb->len - st->p.header_length; | ||
4723 | |||
4724 | @@ -258,8 +256,8 @@ | ||
4725 | /* This packet will be the last in the TSO burst. */ | ||
4726 | ip_length = (st->p.header_length - ETH_HDR_LEN(skb) | ||
4727 | + st->remaining_len); | ||
4728 | - tsoh_th->fin = skb->h.th->fin; | ||
4729 | - tsoh_th->psh = skb->h.th->psh; | ||
4730 | + tsoh_th->fin = tcp_hdr(skb)->fin; | ||
4731 | + tsoh_th->psh = tcp_hdr(skb)->psh; | ||
4732 | } | ||
4733 | |||
4734 | tsoh_iph->tot_len = htons(ip_length); | ||
4735 | --- a/drivers/xen/sfc_netfront/accel_vi.c | ||
4736 | +++ b/drivers/xen/sfc_netfront/accel_vi.c | ||
4737 | @@ -463,7 +463,7 @@ | ||
4738 | |||
4739 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
4740 | /* Set to zero to encourage falcon to work it out for us */ | ||
4741 | - *(u16*)(skb->h.raw + skb->csum_offset) = 0; | ||
4742 | + *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0; | ||
4743 | } | ||
4744 | |||
4745 | if (multi_post_start_new_buffer(vnic, &state)) { | ||
4746 | @@ -582,7 +582,7 @@ | ||
4747 | |||
4748 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
4749 | /* Set to zero to encourage falcon to work it out for us */ | ||
4750 | - *(u16*)(skb->h.raw + skb->csum_offset) = 0; | ||
4751 | + *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0; | ||
4752 | } | ||
4753 | NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT | ||
4754 | (skb, idx, frag_data, frag_len, { | ||
4755 | --- a/drivers/xen/sfc_netfront/accel_xenbus.c | ||
4756 | +++ b/drivers/xen/sfc_netfront/accel_xenbus.c | ||
4757 | @@ -356,7 +356,7 @@ | ||
4758 | /* Create xenbus msg event channel */ | ||
4759 | err = bind_listening_port_to_irqhandler | ||
4760 | (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend, | ||
4761 | - SA_SAMPLE_RANDOM, "vnicctrl", vnic); | ||
4762 | + IRQF_SAMPLE_RANDOM, "vnicctrl", vnic); | ||
4763 | if (err < 0) { | ||
4764 | EPRINTK("Couldn't bind msg event channel\n"); | ||
4765 | goto fail_msg_irq; | ||
4766 | @@ -367,7 +367,7 @@ | ||
4767 | /* Create xenbus net event channel */ | ||
4768 | err = bind_listening_port_to_irqhandler | ||
4769 | (dev->otherend_id, netfront_accel_net_channel_irq_from_bend, | ||
4770 | - SA_SAMPLE_RANDOM, "vnicfront", vnic); | ||
4771 | + IRQF_SAMPLE_RANDOM, "vnicfront", vnic); | ||
4772 | if (err < 0) { | ||
4773 | EPRINTK("Couldn't bind net event channel\n"); | ||
4774 | goto fail_net_irq; | ||
4775 | --- a/drivers/xen/xenoprof/xenoprofile.c | ||
4776 | +++ b/drivers/xen/xenoprof/xenoprofile.c | ||
4777 | @@ -236,7 +236,7 @@ | ||
4778 | result = bind_virq_to_irqhandler(VIRQ_XENOPROF, | ||
4779 | i, | ||
4780 | xenoprof_ovf_interrupt, | ||
4781 | - SA_INTERRUPT, | ||
4782 | + IRQF_DISABLED, | ||
4783 | "xenoprof", | ||
4784 | NULL); | ||
4785 | |||
4786 | --- a/fs/aio.c | ||
4787 | +++ b/fs/aio.c | ||
4788 | @@ -38,7 +38,7 @@ | ||
4789 | |||
4790 | #ifdef CONFIG_EPOLL | ||
4791 | #include <linux/poll.h> | ||
4792 | -#include <linux/eventpoll.h> | ||
4793 | +#include <linux/anon_inodes.h> | ||
4794 | #endif | ||
4795 | |||
4796 | #if DEBUG > 1 | ||
4797 | @@ -1308,7 +1308,7 @@ | ||
4798 | |||
4799 | /* make_aio_fd: | ||
4800 | * Create a file descriptor that can be used to poll the event queue. | ||
4801 | - * Based and piggybacked on the excellent epoll code. | ||
4802 | + * Based on the excellent epoll code. | ||
4803 | */ | ||
4804 | |||
4805 | static int make_aio_fd(struct kioctx *ioctx) | ||
4806 | @@ -1317,7 +1317,8 @@ | ||
4807 | struct inode *inode; | ||
4808 | struct file *file; | ||
4809 | |||
4810 | - error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); | ||
4811 | + error = anon_inode_getfd(&fd, &inode, &file, "[aioq]", | ||
4812 | + &aioq_fops, ioctx); | ||
4813 | if (error) | ||
4814 | return error; | ||
4815 | |||
4816 | --- a/include/asm-x86/mach-xen/asm/desc_32.h | ||
4817 | +++ b/include/asm-x86/mach-xen/asm/desc_32.h | ||
4818 | @@ -11,23 +11,24 @@ | ||
4819 | |||
4820 | #include <asm/mmu.h> | ||
4821 | |||
4822 | -extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; | ||
4823 | - | ||
4824 | struct Xgt_desc_struct { | ||
4825 | unsigned short size; | ||
4826 | unsigned long address __attribute__((packed)); | ||
4827 | unsigned short pad; | ||
4828 | } __attribute__ ((packed)); | ||
4829 | |||
4830 | -extern struct Xgt_desc_struct idt_descr; | ||
4831 | -DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); | ||
4832 | -extern struct Xgt_desc_struct early_gdt_descr; | ||
4833 | +struct gdt_page | ||
4834 | +{ | ||
4835 | + struct desc_struct gdt[GDT_ENTRIES]; | ||
4836 | +} __attribute__((aligned(PAGE_SIZE))); | ||
4837 | +DECLARE_PER_CPU(struct gdt_page, gdt_page); | ||
4838 | |||
4839 | static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) | ||
4840 | { | ||
4841 | - return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; | ||
4842 | + return per_cpu(gdt_page, cpu).gdt; | ||
4843 | } | ||
4844 | |||
4845 | +extern struct Xgt_desc_struct idt_descr; | ||
4846 | extern struct desc_struct idt_table[]; | ||
4847 | extern void set_intr_gate(unsigned int irq, void * addr); | ||
4848 | |||
4849 | @@ -55,53 +56,32 @@ | ||
4850 | #define DESCTYPE_S 0x10 /* !system */ | ||
4851 | |||
4852 | #ifndef CONFIG_XEN | ||
4853 | -#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) | ||
4854 | - | ||
4855 | -#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) | ||
4856 | -#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) | ||
4857 | +#define load_TR_desc() native_load_tr_desc() | ||
4858 | +#define load_gdt(dtr) native_load_gdt(dtr) | ||
4859 | +#define load_idt(dtr) native_load_idt(dtr) | ||
4860 | #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) | ||
4861 | #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) | ||
4862 | |||
4863 | -#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) | ||
4864 | -#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) | ||
4865 | -#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) | ||
4866 | +#define store_gdt(dtr) native_store_gdt(dtr) | ||
4867 | +#define store_idt(dtr) native_store_idt(dtr) | ||
4868 | +#define store_tr(tr) (tr = native_store_tr()) | ||
4869 | #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) | ||
4870 | -#endif | ||
4871 | |||
4872 | -#if TLS_SIZE != 24 | ||
4873 | -# error update this code. | ||
4874 | -#endif | ||
4875 | - | ||
4876 | -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) | ||
4877 | -{ | ||
4878 | -#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \ | ||
4879 | - *(u64 *)&t->tls_array[i]) \ | ||
4880 | - BUG() | ||
4881 | - C(0); C(1); C(2); | ||
4882 | -#undef C | ||
4883 | -} | ||
4884 | +#define load_TLS(t, cpu) native_load_tls(t, cpu) | ||
4885 | +#define set_ldt native_set_ldt | ||
4886 | |||
4887 | -#ifndef CONFIG_XEN | ||
4888 | #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
4889 | #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
4890 | #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
4891 | |||
4892 | -static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) | ||
4893 | +static inline void write_dt_entry(struct desc_struct *dt, | ||
4894 | + int entry, u32 entry_low, u32 entry_high) | ||
4895 | { | ||
4896 | - __u32 *lp = (__u32 *)((char *)dt + entry*8); | ||
4897 | - *lp = entry_a; | ||
4898 | - *(lp+1) = entry_b; | ||
4899 | + dt[entry].a = entry_low; | ||
4900 | + dt[entry].b = entry_high; | ||
4901 | } | ||
4902 | -#define set_ldt native_set_ldt | ||
4903 | -#else | ||
4904 | -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); | ||
4905 | -extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); | ||
4906 | -#define set_ldt xen_set_ldt | ||
4907 | -#endif | ||
4908 | |||
4909 | -#ifndef CONFIG_XEN | ||
4910 | -static inline fastcall void native_set_ldt(const void *addr, | ||
4911 | - unsigned int entries) | ||
4912 | +static inline void native_set_ldt(const void *addr, unsigned int entries) | ||
4913 | { | ||
4914 | if (likely(entries == 0)) | ||
4915 | __asm__ __volatile__("lldt %w0"::"q" (0)); | ||
4916 | @@ -116,6 +96,65 @@ | ||
4917 | __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); | ||
4918 | } | ||
4919 | } | ||
4920 | + | ||
4921 | + | ||
4922 | +static inline void native_load_tr_desc(void) | ||
4923 | +{ | ||
4924 | + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | ||
4925 | +} | ||
4926 | + | ||
4927 | +static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) | ||
4928 | +{ | ||
4929 | + asm volatile("lgdt %0"::"m" (*dtr)); | ||
4930 | +} | ||
4931 | + | ||
4932 | +static inline void native_load_idt(const struct Xgt_desc_struct *dtr) | ||
4933 | +{ | ||
4934 | + asm volatile("lidt %0"::"m" (*dtr)); | ||
4935 | +} | ||
4936 | + | ||
4937 | +static inline void native_store_gdt(struct Xgt_desc_struct *dtr) | ||
4938 | +{ | ||
4939 | + asm ("sgdt %0":"=m" (*dtr)); | ||
4940 | +} | ||
4941 | + | ||
4942 | +static inline void native_store_idt(struct Xgt_desc_struct *dtr) | ||
4943 | +{ | ||
4944 | + asm ("sidt %0":"=m" (*dtr)); | ||
4945 | +} | ||
4946 | + | ||
4947 | +static inline unsigned long native_store_tr(void) | ||
4948 | +{ | ||
4949 | + unsigned long tr; | ||
4950 | + asm ("str %0":"=r" (tr)); | ||
4951 | + return tr; | ||
4952 | +} | ||
4953 | + | ||
4954 | +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) | ||
4955 | +{ | ||
4956 | + unsigned int i; | ||
4957 | + struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
4958 | + | ||
4959 | + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) | ||
4960 | + gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; | ||
4961 | +} | ||
4962 | +#else | ||
4963 | +#define load_TLS(t, cpu) xen_load_tls(t, cpu) | ||
4964 | +#define set_ldt xen_set_ldt | ||
4965 | + | ||
4966 | +extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); | ||
4967 | +extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); | ||
4968 | + | ||
4969 | +static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu) | ||
4970 | +{ | ||
4971 | + unsigned int i; | ||
4972 | + struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN; | ||
4973 | + | ||
4974 | + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) | ||
4975 | + if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), | ||
4976 | + *(u64 *)&t->tls_array[i])) | ||
4977 | + BUG(); | ||
4978 | +} | ||
4979 | #endif | ||
4980 | |||
4981 | #ifndef CONFIG_X86_NO_IDT | ||
4982 | --- a/include/asm-x86/mach-xen/asm/desc_64.h | ||
4983 | +++ b/include/asm-x86/mach-xen/asm/desc_64.h | ||
4984 | @@ -127,16 +127,6 @@ | ||
4985 | DESC_LDT, size * 8 - 1); | ||
4986 | } | ||
4987 | |||
4988 | -static inline void set_seg_base(unsigned cpu, int entry, void *base) | ||
4989 | -{ | ||
4990 | - struct desc_struct *d = &cpu_gdt(cpu)[entry]; | ||
4991 | - u32 addr = (u32)(u64)base; | ||
4992 | - BUG_ON((u64)base >> 32); | ||
4993 | - d->base0 = addr & 0xffff; | ||
4994 | - d->base1 = (addr >> 16) & 0xff; | ||
4995 | - d->base2 = (addr >> 24) & 0xff; | ||
4996 | -} | ||
4997 | - | ||
4998 | #define LDT_entry_a(info) \ | ||
4999 | ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) | ||
5000 | /* Don't allow setting of the lm bit. It is useless anyways because | ||
5001 | @@ -165,25 +155,15 @@ | ||
5002 | (info)->useable == 0 && \ | ||
5003 | (info)->lm == 0) | ||
5004 | |||
5005 | -#if TLS_SIZE != 24 | ||
5006 | -# error update this code. | ||
5007 | -#endif | ||
5008 | - | ||
5009 | static inline void load_TLS(struct thread_struct *t, unsigned int cpu) | ||
5010 | { | ||
5011 | -#if 0 | ||
5012 | + unsigned int i; | ||
5013 | u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); | ||
5014 | - gdt[0] = t->tls_array[0]; | ||
5015 | - gdt[1] = t->tls_array[1]; | ||
5016 | - gdt[2] = t->tls_array[2]; | ||
5017 | -#endif | ||
5018 | -#define C(i) \ | ||
5019 | - if (HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]), \ | ||
5020 | - t->tls_array[i])) \ | ||
5021 | - BUG(); | ||
5022 | |||
5023 | - C(0); C(1); C(2); | ||
5024 | -#undef C | ||
5025 | + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) | ||
5026 | + if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), | ||
5027 | + t->tls_array[i])) | ||
5028 | + BUG(); | ||
5029 | } | ||
5030 | |||
5031 | /* | ||
5032 | --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h | ||
5033 | +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h | ||
5034 | @@ -51,7 +51,7 @@ | ||
5035 | }; | ||
5036 | |||
5037 | extern dma_addr_t bad_dma_address; | ||
5038 | -extern struct dma_mapping_ops* dma_ops; | ||
5039 | +extern const struct dma_mapping_ops* dma_ops; | ||
5040 | extern int iommu_merge; | ||
5041 | |||
5042 | #if 0 | ||
5043 | --- a/include/asm-x86/mach-xen/asm/fixmap_32.h | ||
5044 | +++ b/include/asm-x86/mach-xen/asm/fixmap_32.h | ||
5045 | @@ -19,10 +19,8 @@ | ||
5046 | * the start of the fixmap. | ||
5047 | */ | ||
5048 | extern unsigned long __FIXADDR_TOP; | ||
5049 | -#ifdef CONFIG_COMPAT_VDSO | ||
5050 | -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) | ||
5051 | -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) | ||
5052 | -#endif | ||
5053 | +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) | ||
5054 | +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) | ||
5055 | |||
5056 | #ifndef __ASSEMBLY__ | ||
5057 | #include <linux/kernel.h> | ||
5058 | @@ -85,6 +83,9 @@ | ||
5059 | #ifdef CONFIG_PCI_MMCONFIG | ||
5060 | FIX_PCIE_MCFG, | ||
5061 | #endif | ||
5062 | +#ifdef CONFIG_PARAVIRT | ||
5063 | + FIX_PARAVIRT_BOOTMAP, | ||
5064 | +#endif | ||
5065 | FIX_SHARED_INFO, | ||
5066 | #define NR_FIX_ISAMAPS 256 | ||
5067 | FIX_ISAMAP_END, | ||
5068 | --- a/include/asm-x86/mach-xen/asm/fixmap_64.h | ||
5069 | +++ b/include/asm-x86/mach-xen/asm/fixmap_64.h | ||
5070 | @@ -15,7 +15,6 @@ | ||
5071 | #include <asm/apicdef.h> | ||
5072 | #include <asm/page.h> | ||
5073 | #include <asm/vsyscall.h> | ||
5074 | -#include <asm/vsyscall32.h> | ||
5075 | #include <asm/acpi.h> | ||
5076 | |||
5077 | /* | ||
5078 | --- a/include/asm-x86/mach-xen/asm/highmem.h | ||
5079 | +++ b/include/asm-x86/mach-xen/asm/highmem.h | ||
5080 | @@ -67,12 +67,18 @@ | ||
5081 | |||
5082 | void *kmap(struct page *page); | ||
5083 | void kunmap(struct page *page); | ||
5084 | +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); | ||
5085 | void *kmap_atomic(struct page *page, enum km_type type); | ||
5086 | void *kmap_atomic_pte(struct page *page, enum km_type type); | ||
5087 | void kunmap_atomic(void *kvaddr, enum km_type type); | ||
5088 | void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); | ||
5089 | struct page *kmap_atomic_to_page(void *ptr); | ||
5090 | |||
5091 | +#define kmap_atomic_pte(page, type) \ | ||
5092 | + kmap_atomic_prot(page, type, \ | ||
5093 | + test_bit(PG_pinned, &(page)->flags) \ | ||
5094 | + ? PAGE_KERNEL_RO : kmap_prot) | ||
5095 | + | ||
5096 | #define flush_cache_kmaps() do { } while (0) | ||
5097 | |||
5098 | #endif /* __KERNEL__ */ | ||
5099 | --- a/include/asm-x86/mach-xen/asm/io_32.h | ||
5100 | +++ b/include/asm-x86/mach-xen/asm/io_32.h | ||
5101 | @@ -263,15 +263,18 @@ | ||
5102 | |||
5103 | #endif /* __KERNEL__ */ | ||
5104 | |||
5105 | -#define __SLOW_DOWN_IO "outb %%al,$0x80;" | ||
5106 | +static inline void xen_io_delay(void) | ||
5107 | +{ | ||
5108 | + asm volatile("outb %%al,$0x80" : : : "memory"); | ||
5109 | +} | ||
5110 | |||
5111 | static inline void slow_down_io(void) { | ||
5112 | - __asm__ __volatile__( | ||
5113 | - __SLOW_DOWN_IO | ||
5114 | + xen_io_delay(); | ||
5115 | #ifdef REALLY_SLOW_IO | ||
5116 | - __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO | ||
5117 | + xen_io_delay(); | ||
5118 | + xen_io_delay(); | ||
5119 | + xen_io_delay(); | ||
5120 | #endif | ||
5121 | - : : ); | ||
5122 | } | ||
5123 | |||
5124 | #ifdef CONFIG_X86_NUMAQ | ||
5125 | --- a/include/asm-x86/mach-xen/asm/irqflags_32.h | ||
5126 | +++ b/include/asm-x86/mach-xen/asm/irqflags_32.h | ||
5127 | @@ -11,6 +11,43 @@ | ||
5128 | #define _ASM_IRQFLAGS_H | ||
5129 | |||
5130 | #ifndef __ASSEMBLY__ | ||
5131 | +#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask) | ||
5132 | + | ||
5133 | +#define xen_restore_fl(f) \ | ||
5134 | +do { \ | ||
5135 | + vcpu_info_t *_vcpu; \ | ||
5136 | + barrier(); \ | ||
5137 | + _vcpu = current_vcpu_info(); \ | ||
5138 | + if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \ | ||
5139 | + barrier(); /* unmask then check (avoid races) */\ | ||
5140 | + if (unlikely(_vcpu->evtchn_upcall_pending)) \ | ||
5141 | + force_evtchn_callback(); \ | ||
5142 | + } \ | ||
5143 | +} while (0) | ||
5144 | + | ||
5145 | +#define xen_irq_disable() \ | ||
5146 | +do { \ | ||
5147 | + current_vcpu_info()->evtchn_upcall_mask = 1; \ | ||
5148 | + barrier(); \ | ||
5149 | +} while (0) | ||
5150 | + | ||
5151 | +#define xen_irq_enable() \ | ||
5152 | +do { \ | ||
5153 | + vcpu_info_t *_vcpu; \ | ||
5154 | + barrier(); \ | ||
5155 | + _vcpu = current_vcpu_info(); \ | ||
5156 | + _vcpu->evtchn_upcall_mask = 0; \ | ||
5157 | + barrier(); /* unmask then check (avoid races) */ \ | ||
5158 | + if (unlikely(_vcpu->evtchn_upcall_pending)) \ | ||
5159 | + force_evtchn_callback(); \ | ||
5160 | +} while (0) | ||
5161 | + | ||
5162 | +void xen_safe_halt(void); | ||
5163 | + | ||
5164 | +void xen_halt(void); | ||
5165 | +#endif /* __ASSEMBLY__ */ | ||
5166 | + | ||
5167 | +#ifndef __ASSEMBLY__ | ||
5168 | |||
5169 | /* | ||
5170 | * The use of 'barrier' in the following reflects their use as local-lock | ||
5171 | @@ -20,48 +57,31 @@ | ||
5172 | * includes these barriers, for example. | ||
5173 | */ | ||
5174 | |||
5175 | -#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) | ||
5176 | +#define __raw_local_save_flags(void) xen_save_fl() | ||
5177 | |||
5178 | -#define raw_local_irq_restore(x) \ | ||
5179 | -do { \ | ||
5180 | - vcpu_info_t *_vcpu; \ | ||
5181 | - barrier(); \ | ||
5182 | - _vcpu = current_vcpu_info(); \ | ||
5183 | - if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ | ||
5184 | - barrier(); /* unmask then check (avoid races) */ \ | ||
5185 | - if (unlikely(_vcpu->evtchn_upcall_pending)) \ | ||
5186 | - force_evtchn_callback(); \ | ||
5187 | - } \ | ||
5188 | -} while (0) | ||
5189 | +#define raw_local_irq_restore(flags) xen_restore_fl(flags) | ||
5190 | |||
5191 | -#define raw_local_irq_disable() \ | ||
5192 | -do { \ | ||
5193 | - current_vcpu_info()->evtchn_upcall_mask = 1; \ | ||
5194 | - barrier(); \ | ||
5195 | -} while (0) | ||
5196 | +#define raw_local_irq_disable() xen_irq_disable() | ||
5197 | |||
5198 | -#define raw_local_irq_enable() \ | ||
5199 | -do { \ | ||
5200 | - vcpu_info_t *_vcpu; \ | ||
5201 | - barrier(); \ | ||
5202 | - _vcpu = current_vcpu_info(); \ | ||
5203 | - _vcpu->evtchn_upcall_mask = 0; \ | ||
5204 | - barrier(); /* unmask then check (avoid races) */ \ | ||
5205 | - if (unlikely(_vcpu->evtchn_upcall_pending)) \ | ||
5206 | - force_evtchn_callback(); \ | ||
5207 | -} while (0) | ||
5208 | +#define raw_local_irq_enable() xen_irq_enable() | ||
5209 | |||
5210 | /* | ||
5211 | * Used in the idle loop; sti takes one instruction cycle | ||
5212 | * to complete: | ||
5213 | */ | ||
5214 | -void raw_safe_halt(void); | ||
5215 | +static inline void raw_safe_halt(void) | ||
5216 | +{ | ||
5217 | + xen_safe_halt(); | ||
5218 | +} | ||
5219 | |||
5220 | /* | ||
5221 | * Used when interrupts are already enabled or to | ||
5222 | * shutdown the processor: | ||
5223 | */ | ||
5224 | -void halt(void); | ||
5225 | +static inline void halt(void) | ||
5226 | +{ | ||
5227 | + xen_halt(); | ||
5228 | +} | ||
5229 | |||
5230 | /* | ||
5231 | * For spinlocks, etc: | ||
5232 | --- a/include/asm-x86/mach-xen/asm/irqflags_64.h | ||
5233 | +++ b/include/asm-x86/mach-xen/asm/irqflags_64.h | ||
5234 | @@ -9,6 +9,7 @@ | ||
5235 | */ | ||
5236 | #ifndef _ASM_IRQFLAGS_H | ||
5237 | #define _ASM_IRQFLAGS_H | ||
5238 | +#include <asm/processor-flags.h> | ||
5239 | |||
5240 | #ifndef __ASSEMBLY__ | ||
5241 | /* | ||
5242 | @@ -50,19 +51,19 @@ | ||
5243 | { | ||
5244 | unsigned long flags = __raw_local_save_flags(); | ||
5245 | |||
5246 | - raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); | ||
5247 | + raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); | ||
5248 | } | ||
5249 | |||
5250 | static inline void raw_local_irq_enable(void) | ||
5251 | { | ||
5252 | unsigned long flags = __raw_local_save_flags(); | ||
5253 | |||
5254 | - raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); | ||
5255 | + raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); | ||
5256 | } | ||
5257 | |||
5258 | static inline int raw_irqs_disabled_flags(unsigned long flags) | ||
5259 | { | ||
5260 | - return !(flags & (1<<9)) || (flags & (1 << 18)); | ||
5261 | + return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); | ||
5262 | } | ||
5263 | |||
5264 | #else /* CONFIG_X86_VSMP */ | ||
5265 | @@ -118,13 +119,21 @@ | ||
5266 | * Used in the idle loop; sti takes one instruction cycle | ||
5267 | * to complete: | ||
5268 | */ | ||
5269 | -void raw_safe_halt(void); | ||
5270 | +void xen_safe_halt(void); | ||
5271 | +static inline void raw_safe_halt(void) | ||
5272 | +{ | ||
5273 | + xen_safe_halt(); | ||
5274 | +} | ||
5275 | |||
5276 | /* | ||
5277 | * Used when interrupts are already enabled or to | ||
5278 | * shutdown the processor: | ||
5279 | */ | ||
5280 | -void halt(void); | ||
5281 | +void xen_halt(void); | ||
5282 | +static inline void halt(void) | ||
5283 | +{ | ||
5284 | + xen_halt(); | ||
5285 | +} | ||
5286 | |||
5287 | #else /* __ASSEMBLY__: */ | ||
5288 | # ifdef CONFIG_TRACE_IRQFLAGS | ||
5289 | --- a/include/asm-x86/mach-xen/asm/mmu.h | ||
5290 | +++ b/include/asm-x86/mach-xen/asm/mmu.h | ||
5291 | @@ -18,12 +18,4 @@ | ||
5292 | #endif | ||
5293 | } mm_context_t; | ||
5294 | |||
5295 | -/* mm/memory.c:exit_mmap hook */ | ||
5296 | -extern void _arch_exit_mmap(struct mm_struct *mm); | ||
5297 | -#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) | ||
5298 | - | ||
5299 | -/* kernel/fork.c:dup_mmap hook */ | ||
5300 | -extern void _arch_dup_mmap(struct mm_struct *mm); | ||
5301 | -#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm)) | ||
5302 | - | ||
5303 | #endif | ||
5304 | --- a/include/asm-x86/mach-xen/asm/mmu_64.h | ||
5305 | +++ b/include/asm-x86/mach-xen/asm/mmu_64.h | ||
5306 | @@ -25,14 +25,6 @@ | ||
5307 | #ifdef CONFIG_XEN | ||
5308 | extern struct list_head mm_unpinned; | ||
5309 | extern spinlock_t mm_unpinned_lock; | ||
5310 | - | ||
5311 | -/* mm/memory.c:exit_mmap hook */ | ||
5312 | -extern void _arch_exit_mmap(struct mm_struct *mm); | ||
5313 | -#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) | ||
5314 | - | ||
5315 | -/* kernel/fork.c:dup_mmap hook */ | ||
5316 | -extern void _arch_dup_mmap(struct mm_struct *mm); | ||
5317 | -#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm)) | ||
5318 | #endif | ||
5319 | |||
5320 | #endif | ||
5321 | --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h | ||
5322 | +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h | ||
5323 | @@ -6,6 +6,20 @@ | ||
5324 | #include <asm/pgalloc.h> | ||
5325 | #include <asm/tlbflush.h> | ||
5326 | |||
5327 | +void arch_exit_mmap(struct mm_struct *mm); | ||
5328 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | ||
5329 | + | ||
5330 | +void mm_pin(struct mm_struct *mm); | ||
5331 | +void mm_unpin(struct mm_struct *mm); | ||
5332 | +void mm_pin_all(void); | ||
5333 | + | ||
5334 | +static inline void xen_activate_mm(struct mm_struct *prev, | ||
5335 | + struct mm_struct *next) | ||
5336 | +{ | ||
5337 | + if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) | ||
5338 | + mm_pin(next); | ||
5339 | +} | ||
5340 | + | ||
5341 | /* | ||
5342 | * Used for LDT copy/destruction. | ||
5343 | */ | ||
5344 | @@ -37,10 +51,6 @@ | ||
5345 | : : "r" (0) ); | ||
5346 | } | ||
5347 | |||
5348 | -extern void mm_pin(struct mm_struct *mm); | ||
5349 | -extern void mm_unpin(struct mm_struct *mm); | ||
5350 | -void mm_pin_all(void); | ||
5351 | - | ||
5352 | static inline void switch_mm(struct mm_struct *prev, | ||
5353 | struct mm_struct *next, | ||
5354 | struct task_struct *tsk) | ||
5355 | @@ -97,11 +107,10 @@ | ||
5356 | #define deactivate_mm(tsk, mm) \ | ||
5357 | asm("movl %0,%%gs": :"r" (0)); | ||
5358 | |||
5359 | -static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) | ||
5360 | -{ | ||
5361 | - if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) | ||
5362 | - mm_pin(next); | ||
5363 | - switch_mm(prev, next, NULL); | ||
5364 | -} | ||
5365 | +#define activate_mm(prev, next) \ | ||
5366 | + do { \ | ||
5367 | + xen_activate_mm(prev, next); \ | ||
5368 | + switch_mm((prev),(next),NULL); \ | ||
5369 | + } while(0) | ||
5370 | |||
5371 | #endif | ||
5372 | --- a/include/asm-x86/mach-xen/asm/mmu_context_64.h | ||
5373 | +++ b/include/asm-x86/mach-xen/asm/mmu_context_64.h | ||
5374 | @@ -9,6 +9,9 @@ | ||
5375 | #include <asm/pgtable.h> | ||
5376 | #include <asm/tlbflush.h> | ||
5377 | |||
5378 | +void arch_exit_mmap(struct mm_struct *mm); | ||
5379 | +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | ||
5380 | + | ||
5381 | /* | ||
5382 | * possibly do the LDT unload here? | ||
5383 | */ | ||
5384 | --- a/include/asm-x86/mach-xen/asm/page_64.h | ||
5385 | +++ b/include/asm-x86/mach-xen/asm/page_64.h | ||
5386 | @@ -7,6 +7,7 @@ | ||
5387 | #include <linux/types.h> | ||
5388 | #include <asm/bug.h> | ||
5389 | #endif | ||
5390 | +#include <linux/const.h> | ||
5391 | #include <xen/interface/xen.h> | ||
5392 | |||
5393 | /* | ||
5394 | @@ -19,18 +20,14 @@ | ||
5395 | |||
5396 | /* PAGE_SHIFT determines the page size */ | ||
5397 | #define PAGE_SHIFT 12 | ||
5398 | -#ifdef __ASSEMBLY__ | ||
5399 | -#define PAGE_SIZE (0x1 << PAGE_SHIFT) | ||
5400 | -#else | ||
5401 | -#define PAGE_SIZE (1UL << PAGE_SHIFT) | ||
5402 | -#endif | ||
5403 | +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) | ||
5404 | #define PAGE_MASK (~(PAGE_SIZE-1)) | ||
5405 | |||
5406 | /* See Documentation/x86_64/mm.txt for a description of the memory map. */ | ||
5407 | #define __PHYSICAL_MASK_SHIFT 46 | ||
5408 | -#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) | ||
5409 | +#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1) | ||
5410 | #define __VIRTUAL_MASK_SHIFT 48 | ||
5411 | -#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) | ||
5412 | +#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) | ||
5413 | |||
5414 | #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) | ||
5415 | |||
5416 | @@ -55,10 +52,10 @@ | ||
5417 | #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ | ||
5418 | |||
5419 | #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) | ||
5420 | -#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) | ||
5421 | +#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) | ||
5422 | |||
5423 | #define HPAGE_SHIFT PMD_SHIFT | ||
5424 | -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) | ||
5425 | +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) | ||
5426 | #define HPAGE_MASK (~(HPAGE_SIZE - 1)) | ||
5427 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | ||
5428 | |||
5429 | @@ -152,17 +149,23 @@ | ||
5430 | |||
5431 | #define __pgprot(x) ((pgprot_t) { (x) } ) | ||
5432 | |||
5433 | -#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) | ||
5434 | -#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) | ||
5435 | -#define __START_KERNEL_map 0xffffffff80000000UL | ||
5436 | -#define __PAGE_OFFSET 0xffff880000000000UL | ||
5437 | +#endif /* !__ASSEMBLY__ */ | ||
5438 | |||
5439 | -#else | ||
5440 | #define __PHYSICAL_START CONFIG_PHYSICAL_START | ||
5441 | +#define __KERNEL_ALIGN 0x200000 | ||
5442 | + | ||
5443 | +/* | ||
5444 | + * Make sure kernel is aligned to 2MB address. Catching it at compile | ||
5445 | + * time is better. Change your config file and compile the kernel | ||
5446 | + * for a 2MB aligned address (CONFIG_PHYSICAL_START) | ||
5447 | + */ | ||
5448 | +#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 | ||
5449 | +#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" | ||
5450 | +#endif | ||
5451 | + | ||
5452 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) | ||
5453 | -#define __START_KERNEL_map 0xffffffff80000000 | ||
5454 | -#define __PAGE_OFFSET 0xffff880000000000 | ||
5455 | -#endif /* !__ASSEMBLY__ */ | ||
5456 | +#define __START_KERNEL_map _AC(0xffffffff80000000, UL) | ||
5457 | +#define __PAGE_OFFSET _AC(0xffff880000000000, UL) | ||
5458 | |||
5459 | #if CONFIG_XEN_COMPAT <= 0x030002 | ||
5460 | #undef LOAD_OFFSET | ||
5461 | @@ -172,20 +175,20 @@ | ||
5462 | /* to align the pointer to the (next) page boundary */ | ||
5463 | #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) | ||
5464 | |||
5465 | -#define KERNEL_TEXT_SIZE (40UL*1024*1024) | ||
5466 | -#define KERNEL_TEXT_START 0xffffffff80000000UL | ||
5467 | +#define KERNEL_TEXT_SIZE (40*1024*1024) | ||
5468 | +#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) | ||
5469 | + | ||
5470 | +#define PAGE_OFFSET __PAGE_OFFSET | ||
5471 | |||
5472 | -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) | ||
5473 | +#ifndef __ASSEMBLY__ | ||
5474 | +static inline unsigned long __phys_addr(unsigned long x) | ||
5475 | +{ | ||
5476 | + return x - (x >= __START_KERNEL_map ? __START_KERNEL_map : PAGE_OFFSET); | ||
5477 | +} | ||
5478 | +#endif | ||
5479 | |||
5480 | -/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. | ||
5481 | - Otherwise you risk miscompilation. */ | ||
5482 | -#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) | ||
5483 | -/* __pa_symbol should be used for C visible symbols. | ||
5484 | - This seems to be the official gcc blessed way to do such arithmetic. */ | ||
5485 | -#define __pa_symbol(x) \ | ||
5486 | - ({unsigned long v; \ | ||
5487 | - asm("" : "=r" (v) : "0" (x)); \ | ||
5488 | - __pa(v); }) | ||
5489 | +#define __pa(x) __phys_addr((unsigned long)(x)) | ||
5490 | +#define __pa_symbol(x) __phys_addr((unsigned long)(x)) | ||
5491 | |||
5492 | #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) | ||
5493 | #define __boot_va(x) __va(x) | ||
5494 | --- a/include/asm-x86/mach-xen/asm/pgalloc_32.h | ||
5495 | +++ b/include/asm-x86/mach-xen/asm/pgalloc_32.h | ||
5496 | @@ -1,7 +1,6 @@ | ||
5497 | #ifndef _I386_PGALLOC_H | ||
5498 | #define _I386_PGALLOC_H | ||
5499 | |||
5500 | -#include <asm/fixmap.h> | ||
5501 | #include <linux/threads.h> | ||
5502 | #include <linux/mm.h> /* for struct page */ | ||
5503 | #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */ | ||
5504 | @@ -69,6 +68,4 @@ | ||
5505 | #define pud_populate(mm, pmd, pte) BUG() | ||
5506 | #endif | ||
5507 | |||
5508 | -#define check_pgt_cache() do { } while (0) | ||
5509 | - | ||
5510 | #endif /* _I386_PGALLOC_H */ | ||
5511 | --- a/include/asm-x86/mach-xen/asm/pgalloc_64.h | ||
5512 | +++ b/include/asm-x86/mach-xen/asm/pgalloc_64.h | ||
5513 | @@ -1,7 +1,6 @@ | ||
5514 | #ifndef _X86_64_PGALLOC_H | ||
5515 | #define _X86_64_PGALLOC_H | ||
5516 | |||
5517 | -#include <asm/fixmap.h> | ||
5518 | #include <asm/pda.h> | ||
5519 | #include <linux/threads.h> | ||
5520 | #include <linux/mm.h> | ||
5521 | @@ -100,24 +99,16 @@ | ||
5522 | struct page *page = virt_to_page(pgd); | ||
5523 | |||
5524 | spin_lock(&pgd_lock); | ||
5525 | - page->index = (pgoff_t)pgd_list; | ||
5526 | - if (pgd_list) | ||
5527 | - pgd_list->private = (unsigned long)&page->index; | ||
5528 | - pgd_list = page; | ||
5529 | - page->private = (unsigned long)&pgd_list; | ||
5530 | + list_add(&page->lru, &pgd_list); | ||
5531 | spin_unlock(&pgd_lock); | ||
5532 | } | ||
5533 | |||
5534 | static inline void pgd_list_del(pgd_t *pgd) | ||
5535 | { | ||
5536 | - struct page *next, **pprev, *page = virt_to_page(pgd); | ||
5537 | + struct page *page = virt_to_page(pgd); | ||
5538 | |||
5539 | spin_lock(&pgd_lock); | ||
5540 | - next = (struct page *)page->index; | ||
5541 | - pprev = (struct page **)page->private; | ||
5542 | - *pprev = next; | ||
5543 | - if (next) | ||
5544 | - next->private = (unsigned long)pprev; | ||
5545 | + list_del(&page->lru); | ||
5546 | spin_unlock(&pgd_lock); | ||
5547 | } | ||
5548 | |||
5549 | --- a/include/asm-x86/mach-xen/asm/pgtable-2level.h | ||
5550 | +++ b/include/asm-x86/mach-xen/asm/pgtable-2level.h | ||
5551 | @@ -13,22 +13,43 @@ | ||
5552 | * within a page table are directly modified. Thus, the following | ||
5553 | * hook is made available. | ||
5554 | */ | ||
5555 | -#define set_pte(pteptr, pteval) (*(pteptr) = pteval) | ||
5556 | - | ||
5557 | -#define set_pte_at(_mm,addr,ptep,pteval) do { \ | ||
5558 | - if (((_mm) != current->mm && (_mm) != &init_mm) || \ | ||
5559 | - HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \ | ||
5560 | - set_pte((ptep), (pteval)); \ | ||
5561 | -} while (0) | ||
5562 | - | ||
5563 | -#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) | ||
5564 | +static inline void xen_set_pte(pte_t *ptep , pte_t pte) | ||
5565 | +{ | ||
5566 | + *ptep = pte; | ||
5567 | +} | ||
5568 | +static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
5569 | + pte_t *ptep , pte_t pte) | ||
5570 | +{ | ||
5571 | + if ((mm != current->mm && mm != &init_mm) || | ||
5572 | + HYPERVISOR_update_va_mapping(addr, pte, 0)) | ||
5573 | + xen_set_pte(ptep, pte); | ||
5574 | +} | ||
5575 | +static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
5576 | +{ | ||
5577 | + xen_l2_entry_update(pmdp, pmd); | ||
5578 | +} | ||
5579 | +#define set_pte(pteptr, pteval) xen_set_pte(pteptr, pteval) | ||
5580 | +#define set_pte_at(mm,addr,ptep,pteval) xen_set_pte_at(mm, addr, ptep, pteval) | ||
5581 | +#define set_pmd(pmdptr, pmdval) xen_set_pmd(pmdptr, pmdval) | ||
5582 | |||
5583 | #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) | ||
5584 | |||
5585 | #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) | ||
5586 | #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | ||
5587 | |||
5588 | -#define raw_ptep_get_and_clear(xp, pte) __pte_ma(xchg(&(xp)->pte_low, 0)) | ||
5589 | +static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) | ||
5590 | +{ | ||
5591 | + xen_set_pte_at(mm, addr, xp, __pte(0)); | ||
5592 | +} | ||
5593 | + | ||
5594 | +#ifdef CONFIG_SMP | ||
5595 | +static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res) | ||
5596 | +{ | ||
5597 | + return __pte_ma(xchg(&xp->pte_low, 0)); | ||
5598 | +} | ||
5599 | +#else | ||
5600 | +#define xen_ptep_get_and_clear(xp, res) xen_local_ptep_get_and_clear(xp, res) | ||
5601 | +#endif | ||
5602 | |||
5603 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH | ||
5604 | #define ptep_clear_flush(vma, addr, ptep) \ | ||
5605 | @@ -95,6 +116,4 @@ | ||
5606 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) | ||
5607 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | ||
5608 | |||
5609 | -void vmalloc_sync_all(void); | ||
5610 | - | ||
5611 | #endif /* _I386_PGTABLE_2LEVEL_H */ | ||
5612 | --- a/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h | ||
5613 | +++ b/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h | ||
5614 | @@ -1,7 +1,7 @@ | ||
5615 | #ifndef _I386_PGTABLE_3LEVEL_DEFS_H | ||
5616 | #define _I386_PGTABLE_3LEVEL_DEFS_H | ||
5617 | |||
5618 | -#define HAVE_SHARED_KERNEL_PMD 0 | ||
5619 | +#define SHARED_KERNEL_PMD 0 | ||
5620 | |||
5621 | /* | ||
5622 | * PGDIR_SHIFT determines what a top-level page table entry can map | ||
5623 | --- a/include/asm-x86/mach-xen/asm/pgtable-3level.h | ||
5624 | +++ b/include/asm-x86/mach-xen/asm/pgtable-3level.h | ||
5625 | @@ -52,32 +52,40 @@ | ||
5626 | * value and then use set_pte to update it. -ben | ||
5627 | */ | ||
5628 | |||
5629 | -static inline void set_pte(pte_t *ptep, pte_t pte) | ||
5630 | +static inline void xen_set_pte(pte_t *ptep, pte_t pte) | ||
5631 | { | ||
5632 | ptep->pte_high = pte.pte_high; | ||
5633 | smp_wmb(); | ||
5634 | ptep->pte_low = pte.pte_low; | ||
5635 | } | ||
5636 | -#define set_pte_atomic(pteptr,pteval) \ | ||
5637 | - set_64bit((unsigned long long *)(pteptr),__pte_val(pteval)) | ||
5638 | |||
5639 | -#define set_pte_at(_mm,addr,ptep,pteval) do { \ | ||
5640 | - if (((_mm) != current->mm && (_mm) != &init_mm) || \ | ||
5641 | - HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \ | ||
5642 | - set_pte((ptep), (pteval)); \ | ||
5643 | -} while (0) | ||
5644 | - | ||
5645 | -#define set_pmd(pmdptr,pmdval) \ | ||
5646 | - xen_l2_entry_update((pmdptr), (pmdval)) | ||
5647 | -#define set_pud(pudptr,pudval) \ | ||
5648 | - xen_l3_entry_update((pudptr), (pudval)) | ||
5649 | +static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
5650 | + pte_t *ptep , pte_t pte) | ||
5651 | +{ | ||
5652 | + if ((mm != current->mm && mm != &init_mm) || | ||
5653 | + HYPERVISOR_update_va_mapping(addr, pte, 0)) | ||
5654 | + xen_set_pte(ptep, pte); | ||
5655 | +} | ||
5656 | + | ||
5657 | +static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
5658 | +{ | ||
5659 | + set_64bit((unsigned long long *)(ptep),__pte_val(pte)); | ||
5660 | +} | ||
5661 | +static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
5662 | +{ | ||
5663 | + xen_l2_entry_update(pmdp, pmd); | ||
5664 | +} | ||
5665 | +static inline void xen_set_pud(pud_t *pudp, pud_t pud) | ||
5666 | +{ | ||
5667 | + xen_l3_entry_update(pudp, pud); | ||
5668 | +} | ||
5669 | |||
5670 | /* | ||
5671 | * For PTEs and PDEs, we must clear the P-bit first when clearing a page table | ||
5672 | * entry, so clear the bottom half first and enforce ordering with a compiler | ||
5673 | * barrier. | ||
5674 | */ | ||
5675 | -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
5676 | +static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
5677 | { | ||
5678 | if ((mm != current->mm && mm != &init_mm) | ||
5679 | || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | ||
5680 | @@ -87,7 +95,18 @@ | ||
5681 | } | ||
5682 | } | ||
5683 | |||
5684 | -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | ||
5685 | +static inline void xen_pmd_clear(pmd_t *pmd) | ||
5686 | +{ | ||
5687 | + xen_l2_entry_update(pmd, __pmd(0)); | ||
5688 | +} | ||
5689 | + | ||
5690 | +#define set_pte(ptep, pte) xen_set_pte(ptep, pte) | ||
5691 | +#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) | ||
5692 | +#define set_pte_atomic(ptep, pte) xen_set_pte_atomic(ptep, pte) | ||
5693 | +#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd) | ||
5694 | +#define set_pud(pudp, pud) xen_set_pud(pudp, pud) | ||
5695 | +#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep) | ||
5696 | +#define pmd_clear(pmd) xen_pmd_clear(pmd) | ||
5697 | |||
5698 | /* | ||
5699 | * Pentium-II erratum A13: in PAE mode we explicitly have to flush | ||
5700 | @@ -108,7 +127,8 @@ | ||
5701 | #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ | ||
5702 | pmd_index(address)) | ||
5703 | |||
5704 | -static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res) | ||
5705 | +#ifdef CONFIG_SMP | ||
5706 | +static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res) | ||
5707 | { | ||
5708 | uint64_t val = __pte_val(res); | ||
5709 | if (__cmpxchg64(ptep, val, 0) != val) { | ||
5710 | @@ -119,6 +139,9 @@ | ||
5711 | } | ||
5712 | return res; | ||
5713 | } | ||
5714 | +#else | ||
5715 | +#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte) | ||
5716 | +#endif | ||
5717 | |||
5718 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH | ||
5719 | #define ptep_clear_flush(vma, addr, ptep) \ | ||
5720 | @@ -165,13 +188,13 @@ | ||
5721 | static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) | ||
5722 | { | ||
5723 | return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | | ||
5724 | - pgprot_val(pgprot)) & __supported_pte_mask); | ||
5725 | + pgprot_val(pgprot)) & __supported_pte_mask); | ||
5726 | } | ||
5727 | |||
5728 | static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) | ||
5729 | { | ||
5730 | return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | | ||
5731 | - pgprot_val(pgprot)) & __supported_pte_mask); | ||
5732 | + pgprot_val(pgprot)) & __supported_pte_mask); | ||
5733 | } | ||
5734 | |||
5735 | /* | ||
5736 | @@ -191,6 +214,4 @@ | ||
5737 | |||
5738 | #define __pmd_free_tlb(tlb, x) do { } while (0) | ||
5739 | |||
5740 | -void vmalloc_sync_all(void); | ||
5741 | - | ||
5742 | #endif /* _I386_PGTABLE_3LEVEL_H */ | ||
5743 | --- a/include/asm-x86/mach-xen/asm/pgtable_32.h | ||
5744 | +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h | ||
5745 | @@ -24,11 +24,11 @@ | ||
5746 | #include <linux/slab.h> | ||
5747 | #include <linux/list.h> | ||
5748 | #include <linux/spinlock.h> | ||
5749 | +#include <linux/sched.h> | ||
5750 | |||
5751 | /* Is this pagetable pinned? */ | ||
5752 | #define PG_pinned PG_arch_1 | ||
5753 | |||
5754 | -struct mm_struct; | ||
5755 | struct vm_area_struct; | ||
5756 | |||
5757 | /* | ||
5758 | @@ -38,17 +38,16 @@ | ||
5759 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | ||
5760 | extern unsigned long empty_zero_page[1024]; | ||
5761 | extern pgd_t *swapper_pg_dir; | ||
5762 | -extern struct kmem_cache *pgd_cache; | ||
5763 | extern struct kmem_cache *pmd_cache; | ||
5764 | extern spinlock_t pgd_lock; | ||
5765 | extern struct page *pgd_list; | ||
5766 | +void check_pgt_cache(void); | ||
5767 | |||
5768 | void pmd_ctor(void *, struct kmem_cache *, unsigned long); | ||
5769 | -void pgd_ctor(void *, struct kmem_cache *, unsigned long); | ||
5770 | -void pgd_dtor(void *, struct kmem_cache *, unsigned long); | ||
5771 | void pgtable_cache_init(void); | ||
5772 | void paging_init(void); | ||
5773 | |||
5774 | + | ||
5775 | /* | ||
5776 | * The Linux x86 paging architecture is 'compile-time dual-mode', it | ||
5777 | * implements both the traditional 2-level x86 page tables and the | ||
5778 | @@ -165,6 +164,7 @@ | ||
5779 | |||
5780 | extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; | ||
5781 | #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) | ||
5782 | +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) | ||
5783 | #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) | ||
5784 | #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) | ||
5785 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) | ||
5786 | @@ -172,6 +172,7 @@ | ||
5787 | #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) | ||
5788 | #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) | ||
5789 | #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) | ||
5790 | +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) | ||
5791 | #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) | ||
5792 | #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) | ||
5793 | #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) | ||
5794 | @@ -275,7 +276,13 @@ | ||
5795 | */ | ||
5796 | #define pte_update(mm, addr, ptep) do { } while (0) | ||
5797 | #define pte_update_defer(mm, addr, ptep) do { } while (0) | ||
5798 | -#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) | ||
5799 | + | ||
5800 | +/* local pte updates need not use xchg for locking */ | ||
5801 | +static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res) | ||
5802 | +{ | ||
5803 | + xen_set_pte(ptep, __pte(0)); | ||
5804 | + return res; | ||
5805 | +} | ||
5806 | |||
5807 | /* | ||
5808 | * We only update the dirty/accessed state if we set | ||
5809 | @@ -286,17 +293,34 @@ | ||
5810 | */ | ||
5811 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | ||
5812 | #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ | ||
5813 | -do { \ | ||
5814 | - if (dirty) \ | ||
5815 | +({ \ | ||
5816 | + int __changed = !pte_same(*(ptep), entry); \ | ||
5817 | + if (__changed && (dirty)) \ | ||
5818 | ptep_establish(vma, address, ptep, entry); \ | ||
5819 | -} while (0) | ||
5820 | + __changed; \ | ||
5821 | +}) | ||
5822 | |||
5823 | -/* | ||
5824 | - * We don't actually have these, but we want to advertise them so that | ||
5825 | - * we can encompass the flush here. | ||
5826 | - */ | ||
5827 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY | ||
5828 | +#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \ | ||
5829 | + int __ret = 0; \ | ||
5830 | + if (pte_dirty(*(ptep))) \ | ||
5831 | + __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \ | ||
5832 | + &(ptep)->pte_low); \ | ||
5833 | + if (__ret) \ | ||
5834 | + pte_update((vma)->vm_mm, addr, ptep); \ | ||
5835 | + __ret; \ | ||
5836 | +}) | ||
5837 | + | ||
5838 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | ||
5839 | +#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ | ||
5840 | + int __ret = 0; \ | ||
5841 | + if (pte_young(*(ptep))) \ | ||
5842 | + __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ | ||
5843 | + &(ptep)->pte_low); \ | ||
5844 | + if (__ret) \ | ||
5845 | + pte_update((vma)->vm_mm, addr, ptep); \ | ||
5846 | + __ret; \ | ||
5847 | +}) | ||
5848 | |||
5849 | /* | ||
5850 | * Rules for using ptep_establish: the pte MUST be a user pte, and | ||
5851 | @@ -323,7 +347,7 @@ | ||
5852 | int __dirty = pte_dirty(__pte); \ | ||
5853 | __pte = pte_mkclean(__pte); \ | ||
5854 | if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ | ||
5855 | - ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ | ||
5856 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ | ||
5857 | else if (__dirty) \ | ||
5858 | (ptep)->pte_low = __pte.pte_low; \ | ||
5859 | __dirty; \ | ||
5860 | @@ -336,7 +360,7 @@ | ||
5861 | int __young = pte_young(__pte); \ | ||
5862 | __pte = pte_mkold(__pte); \ | ||
5863 | if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ | ||
5864 | - ptep_set_access_flags(vma, address, ptep, __pte, __young); \ | ||
5865 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ | ||
5866 | else if (__young) \ | ||
5867 | (ptep)->pte_low = __pte.pte_low; \ | ||
5868 | __young; \ | ||
5869 | @@ -349,7 +373,7 @@ | ||
5870 | if (!pte_none(pte) | ||
5871 | && (mm != &init_mm | ||
5872 | || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { | ||
5873 | - pte = raw_ptep_get_and_clear(ptep, pte); | ||
5874 | + pte = xen_ptep_get_and_clear(ptep, pte); | ||
5875 | pte_update(mm, addr, ptep); | ||
5876 | } | ||
5877 | return pte; | ||
5878 | @@ -491,24 +515,10 @@ | ||
5879 | #endif | ||
5880 | |||
5881 | #if defined(CONFIG_HIGHPTE) | ||
5882 | -#define pte_offset_map(dir, address) \ | ||
5883 | -({ \ | ||
5884 | - pte_t *__ptep; \ | ||
5885 | - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ | ||
5886 | - __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \ | ||
5887 | - paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ | ||
5888 | - __ptep = __ptep + pte_index(address); \ | ||
5889 | - __ptep; \ | ||
5890 | -}) | ||
5891 | -#define pte_offset_map_nested(dir, address) \ | ||
5892 | -({ \ | ||
5893 | - pte_t *__ptep; \ | ||
5894 | - unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ | ||
5895 | - __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \ | ||
5896 | - paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ | ||
5897 | - __ptep = __ptep + pte_index(address); \ | ||
5898 | - __ptep; \ | ||
5899 | -}) | ||
5900 | +#define pte_offset_map(dir, address) \ | ||
5901 | + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) | ||
5902 | +#define pte_offset_map_nested(dir, address) \ | ||
5903 | + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) | ||
5904 | #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) | ||
5905 | #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) | ||
5906 | #else | ||
5907 | @@ -587,10 +597,6 @@ | ||
5908 | #define io_remap_pfn_range(vma,from,pfn,size,prot) \ | ||
5909 | direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO) | ||
5910 | |||
5911 | -#define MK_IOSPACE_PFN(space, pfn) (pfn) | ||
5912 | -#define GET_IOSPACE(pfn) 0 | ||
5913 | -#define GET_PFN(pfn) (pfn) | ||
5914 | - | ||
5915 | #include <asm-generic/pgtable.h> | ||
5916 | |||
5917 | #endif /* _I386_PGTABLE_H */ | ||
5918 | --- a/include/asm-x86/mach-xen/asm/pgtable_64.h | ||
5919 | +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h | ||
5920 | @@ -1,12 +1,14 @@ | ||
5921 | #ifndef _X86_64_PGTABLE_H | ||
5922 | #define _X86_64_PGTABLE_H | ||
5923 | |||
5924 | +#include <linux/const.h> | ||
5925 | +#ifndef __ASSEMBLY__ | ||
5926 | + | ||
5927 | /* | ||
5928 | * This file contains the functions and defines necessary to modify and use | ||
5929 | * the x86-64 page table tree. | ||
5930 | */ | ||
5931 | #include <asm/processor.h> | ||
5932 | -#include <asm/fixmap.h> | ||
5933 | #include <asm/bitops.h> | ||
5934 | #include <linux/threads.h> | ||
5935 | #include <linux/sched.h> | ||
5936 | @@ -34,11 +36,9 @@ | ||
5937 | #endif | ||
5938 | |||
5939 | extern pud_t level3_kernel_pgt[512]; | ||
5940 | -extern pud_t level3_physmem_pgt[512]; | ||
5941 | extern pud_t level3_ident_pgt[512]; | ||
5942 | extern pmd_t level2_kernel_pgt[512]; | ||
5943 | extern pgd_t init_level4_pgt[]; | ||
5944 | -extern pgd_t boot_level4_pgt[]; | ||
5945 | extern unsigned long __supported_pte_mask; | ||
5946 | |||
5947 | #define swapper_pg_dir init_level4_pgt | ||
5948 | @@ -53,6 +53,8 @@ | ||
5949 | extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; | ||
5950 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | ||
5951 | |||
5952 | +#endif /* !__ASSEMBLY__ */ | ||
5953 | + | ||
5954 | /* | ||
5955 | * PGDIR_SHIFT determines what a top-level page table entry can map | ||
5956 | */ | ||
5957 | @@ -77,6 +79,8 @@ | ||
5958 | */ | ||
5959 | #define PTRS_PER_PTE 512 | ||
5960 | |||
5961 | +#ifndef __ASSEMBLY__ | ||
5962 | + | ||
5963 | #define pte_ERROR(e) \ | ||
5964 | printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \ | ||
5965 | &(e), __pte_val(e), pte_pfn(e)) | ||
5966 | @@ -119,22 +123,23 @@ | ||
5967 | |||
5968 | #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) | ||
5969 | |||
5970 | -#define PMD_SIZE (1UL << PMD_SHIFT) | ||
5971 | +#endif /* !__ASSEMBLY__ */ | ||
5972 | + | ||
5973 | +#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) | ||
5974 | #define PMD_MASK (~(PMD_SIZE-1)) | ||
5975 | -#define PUD_SIZE (1UL << PUD_SHIFT) | ||
5976 | +#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) | ||
5977 | #define PUD_MASK (~(PUD_SIZE-1)) | ||
5978 | -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) | ||
5979 | +#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) | ||
5980 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | ||
5981 | |||
5982 | #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) | ||
5983 | #define FIRST_USER_ADDRESS 0 | ||
5984 | |||
5985 | -#ifndef __ASSEMBLY__ | ||
5986 | -#define MAXMEM 0x3fffffffffffUL | ||
5987 | -#define VMALLOC_START 0xffffc20000000000UL | ||
5988 | -#define VMALLOC_END 0xffffe1ffffffffffUL | ||
5989 | -#define MODULES_VADDR 0xffffffff88000000UL | ||
5990 | -#define MODULES_END 0xfffffffffff00000UL | ||
5991 | +#define MAXMEM _AC(0x3fffffffffff, UL) | ||
5992 | +#define VMALLOC_START _AC(0xffffc20000000000, UL) | ||
5993 | +#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) | ||
5994 | +#define MODULES_VADDR _AC(0xffffffff88000000, UL) | ||
5995 | +#define MODULES_END _AC(0xfffffffffff00000, UL) | ||
5996 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | ||
5997 | |||
5998 | #define _PAGE_BIT_PRESENT 0 | ||
5999 | @@ -160,16 +165,18 @@ | ||
6000 | #define _PAGE_GLOBAL 0x100 /* Global TLB entry */ | ||
6001 | |||
6002 | #define _PAGE_PROTNONE 0x080 /* If not present */ | ||
6003 | -#define _PAGE_NX (1UL<<_PAGE_BIT_NX) | ||
6004 | +#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) | ||
6005 | |||
6006 | /* Mapped page is I/O or foreign and has no associated page struct. */ | ||
6007 | #define _PAGE_IO 0x200 | ||
6008 | |||
6009 | +#ifndef __ASSEMBLY__ | ||
6010 | #if CONFIG_XEN_COMPAT <= 0x030002 | ||
6011 | extern unsigned int __kernel_page_user; | ||
6012 | #else | ||
6013 | #define __kernel_page_user 0 | ||
6014 | #endif | ||
6015 | +#endif | ||
6016 | |||
6017 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
6018 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user) | ||
6019 | @@ -234,6 +241,8 @@ | ||
6020 | #define __S110 PAGE_SHARED_EXEC | ||
6021 | #define __S111 PAGE_SHARED_EXEC | ||
6022 | |||
6023 | +#ifndef __ASSEMBLY__ | ||
6024 | + | ||
6025 | static inline unsigned long pgd_bad(pgd_t pgd) | ||
6026 | { | ||
6027 | return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); | ||
6028 | @@ -345,6 +354,20 @@ | ||
6029 | static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } | ||
6030 | static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; } | ||
6031 | |||
6032 | +static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) | ||
6033 | +{ | ||
6034 | + if (!pte_dirty(*ptep)) | ||
6035 | + return 0; | ||
6036 | + return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte); | ||
6037 | +} | ||
6038 | + | ||
6039 | +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) | ||
6040 | +{ | ||
6041 | + if (!pte_young(*ptep)) | ||
6042 | + return 0; | ||
6043 | + return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte); | ||
6044 | +} | ||
6045 | + | ||
6046 | static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
6047 | { | ||
6048 | pte_t pte = *ptep; | ||
6049 | @@ -470,18 +493,12 @@ | ||
6050 | * bit at the same time. */ | ||
6051 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | ||
6052 | #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ | ||
6053 | - do { \ | ||
6054 | - if (dirty) \ | ||
6055 | - ptep_establish(vma, address, ptep, entry); \ | ||
6056 | - } while (0) | ||
6057 | - | ||
6058 | - | ||
6059 | -/* | ||
6060 | - * i386 says: We don't actually have these, but we want to advertise | ||
6061 | - * them so that we can encompass the flush here. | ||
6062 | - */ | ||
6063 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY | ||
6064 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | ||
6065 | +({ \ | ||
6066 | + int __changed = !pte_same(*(ptep), entry); \ | ||
6067 | + if (__changed && (dirty)) \ | ||
6068 | + ptep_establish(vma, address, ptep, entry); \ | ||
6069 | + __changed; \ | ||
6070 | +}) | ||
6071 | |||
6072 | #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH | ||
6073 | #define ptep_clear_flush_dirty(vma, address, ptep) \ | ||
6074 | @@ -490,7 +507,7 @@ | ||
6075 | int __dirty = pte_dirty(__pte); \ | ||
6076 | __pte = pte_mkclean(__pte); \ | ||
6077 | if ((vma)->vm_mm->context.pinned) \ | ||
6078 | - ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ | ||
6079 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ | ||
6080 | else if (__dirty) \ | ||
6081 | set_pte(ptep, __pte); \ | ||
6082 | __dirty; \ | ||
6083 | @@ -503,7 +520,7 @@ | ||
6084 | int __young = pte_young(__pte); \ | ||
6085 | __pte = pte_mkold(__pte); \ | ||
6086 | if ((vma)->vm_mm->context.pinned) \ | ||
6087 | - ptep_set_access_flags(vma, address, ptep, __pte, __young); \ | ||
6088 | + (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ | ||
6089 | else if (__young) \ | ||
6090 | set_pte(ptep, __pte); \ | ||
6091 | __young; \ | ||
6092 | @@ -517,10 +534,7 @@ | ||
6093 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | ||
6094 | |||
6095 | extern spinlock_t pgd_lock; | ||
6096 | -extern struct page *pgd_list; | ||
6097 | -void vmalloc_sync_all(void); | ||
6098 | - | ||
6099 | -#endif /* !__ASSEMBLY__ */ | ||
6100 | +extern struct list_head pgd_list; | ||
6101 | |||
6102 | extern int kern_addr_valid(unsigned long addr); | ||
6103 | |||
6104 | @@ -559,10 +573,6 @@ | ||
6105 | #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ | ||
6106 | direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO) | ||
6107 | |||
6108 | -#define MK_IOSPACE_PFN(space, pfn) (pfn) | ||
6109 | -#define GET_IOSPACE(pfn) 0 | ||
6110 | -#define GET_PFN(pfn) (pfn) | ||
6111 | - | ||
6112 | #define HAVE_ARCH_UNMAPPED_AREA | ||
6113 | |||
6114 | #define pgtable_cache_init() do { } while (0) | ||
6115 | @@ -576,11 +586,14 @@ | ||
6116 | #define kc_offset_to_vaddr(o) \ | ||
6117 | (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) | ||
6118 | |||
6119 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | ||
6120 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY | ||
6121 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR | ||
6122 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL | ||
6123 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH | ||
6124 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT | ||
6125 | #define __HAVE_ARCH_PTE_SAME | ||
6126 | #include <asm-generic/pgtable.h> | ||
6127 | +#endif /* !__ASSEMBLY__ */ | ||
6128 | |||
6129 | #endif /* _X86_64_PGTABLE_H */ | ||
6130 | --- a/include/asm-x86/mach-xen/asm/processor_32.h | ||
6131 | +++ b/include/asm-x86/mach-xen/asm/processor_32.h | ||
6132 | @@ -21,6 +21,7 @@ | ||
6133 | #include <asm/percpu.h> | ||
6134 | #include <linux/cpumask.h> | ||
6135 | #include <linux/init.h> | ||
6136 | +#include <asm/processor-flags.h> | ||
6137 | #include <xen/interface/physdev.h> | ||
6138 | |||
6139 | /* flag for disabling the tsc */ | ||
6140 | @@ -118,7 +119,8 @@ | ||
6141 | |||
6142 | void __init cpu_detect(struct cpuinfo_x86 *c); | ||
6143 | |||
6144 | -extern void identify_cpu(struct cpuinfo_x86 *); | ||
6145 | +extern void identify_boot_cpu(void); | ||
6146 | +extern void identify_secondary_cpu(struct cpuinfo_x86 *); | ||
6147 | extern void print_cpu_info(struct cpuinfo_x86 *); | ||
6148 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | ||
6149 | extern unsigned short num_cache_leaves; | ||
6150 | @@ -129,29 +131,8 @@ | ||
6151 | static inline void detect_ht(struct cpuinfo_x86 *c) {} | ||
6152 | #endif | ||
6153 | |||
6154 | -/* | ||
6155 | - * EFLAGS bits | ||
6156 | - */ | ||
6157 | -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ | ||
6158 | -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ | ||
6159 | -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ | ||
6160 | -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ | ||
6161 | -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ | ||
6162 | -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ | ||
6163 | -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ | ||
6164 | -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ | ||
6165 | -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ | ||
6166 | -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ | ||
6167 | -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ | ||
6168 | -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ | ||
6169 | -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ | ||
6170 | -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ | ||
6171 | -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ | ||
6172 | -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ | ||
6173 | -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ | ||
6174 | - | ||
6175 | -static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx, | ||
6176 | - unsigned int *ecx, unsigned int *edx) | ||
6177 | +static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx, | ||
6178 | + unsigned int *ecx, unsigned int *edx) | ||
6179 | { | ||
6180 | /* ecx is often an input as well as an output. */ | ||
6181 | __asm__(XEN_CPUID | ||
6182 | @@ -165,21 +146,6 @@ | ||
6183 | #define load_cr3(pgdir) write_cr3(__pa(pgdir)) | ||
6184 | |||
6185 | /* | ||
6186 | - * Intel CPU features in CR4 | ||
6187 | - */ | ||
6188 | -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ | ||
6189 | -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ | ||
6190 | -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ | ||
6191 | -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ | ||
6192 | -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ | ||
6193 | -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ | ||
6194 | -#define X86_CR4_MCE 0x0040 /* Machine check enable */ | ||
6195 | -#define X86_CR4_PGE 0x0080 /* enable global pages */ | ||
6196 | -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ | ||
6197 | -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ | ||
6198 | -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ | ||
6199 | - | ||
6200 | -/* | ||
6201 | * Save the cr4 feature set we're using (ie | ||
6202 | * Pentium 4MB enable and PPro Global page | ||
6203 | * enable), so that any CPU's that boot up | ||
6204 | @@ -206,26 +172,6 @@ | ||
6205 | } | ||
6206 | |||
6207 | /* | ||
6208 | - * NSC/Cyrix CPU configuration register indexes | ||
6209 | - */ | ||
6210 | - | ||
6211 | -#define CX86_PCR0 0x20 | ||
6212 | -#define CX86_GCR 0xb8 | ||
6213 | -#define CX86_CCR0 0xc0 | ||
6214 | -#define CX86_CCR1 0xc1 | ||
6215 | -#define CX86_CCR2 0xc2 | ||
6216 | -#define CX86_CCR3 0xc3 | ||
6217 | -#define CX86_CCR4 0xe8 | ||
6218 | -#define CX86_CCR5 0xe9 | ||
6219 | -#define CX86_CCR6 0xea | ||
6220 | -#define CX86_CCR7 0xeb | ||
6221 | -#define CX86_PCR1 0xf0 | ||
6222 | -#define CX86_DIR0 0xfe | ||
6223 | -#define CX86_DIR1 0xff | ||
6224 | -#define CX86_ARR_BASE 0xc4 | ||
6225 | -#define CX86_RCR_BASE 0xdc | ||
6226 | - | ||
6227 | -/* | ||
6228 | * NSC/Cyrix CPU indexed register access macros | ||
6229 | */ | ||
6230 | |||
6231 | @@ -351,7 +297,8 @@ | ||
6232 | struct thread_struct; | ||
6233 | |||
6234 | #ifndef CONFIG_X86_NO_TSS | ||
6235 | -struct tss_struct { | ||
6236 | +/* This is the TSS defined by the hardware. */ | ||
6237 | +struct i386_hw_tss { | ||
6238 | unsigned short back_link,__blh; | ||
6239 | unsigned long esp0; | ||
6240 | unsigned short ss0,__ss0h; | ||
6241 | @@ -375,6 +322,11 @@ | ||
6242 | unsigned short gs, __gsh; | ||
6243 | unsigned short ldt, __ldth; | ||
6244 | unsigned short trace, io_bitmap_base; | ||
6245 | +} __attribute__((packed)); | ||
6246 | + | ||
6247 | +struct tss_struct { | ||
6248 | + struct i386_hw_tss x86_tss; | ||
6249 | + | ||
6250 | /* | ||
6251 | * The extra 1 is there because the CPU will access an | ||
6252 | * additional byte beyond the end of the IO permission | ||
6253 | @@ -428,10 +380,11 @@ | ||
6254 | }; | ||
6255 | |||
6256 | #define INIT_THREAD { \ | ||
6257 | + .esp0 = sizeof(init_stack) + (long)&init_stack, \ | ||
6258 | .vm86_info = NULL, \ | ||
6259 | .sysenter_cs = __KERNEL_CS, \ | ||
6260 | .io_bitmap_ptr = NULL, \ | ||
6261 | - .fs = __KERNEL_PDA, \ | ||
6262 | + .fs = __KERNEL_PERCPU, \ | ||
6263 | } | ||
6264 | |||
6265 | /* | ||
6266 | @@ -441,10 +394,12 @@ | ||
6267 | * be within the limit. | ||
6268 | */ | ||
6269 | #define INIT_TSS { \ | ||
6270 | - .esp0 = sizeof(init_stack) + (long)&init_stack, \ | ||
6271 | - .ss0 = __KERNEL_DS, \ | ||
6272 | - .ss1 = __KERNEL_CS, \ | ||
6273 | - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ | ||
6274 | + .x86_tss = { \ | ||
6275 | + .esp0 = sizeof(init_stack) + (long)&init_stack, \ | ||
6276 | + .ss0 = __KERNEL_DS, \ | ||
6277 | + .ss1 = __KERNEL_CS, \ | ||
6278 | + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ | ||
6279 | + }, \ | ||
6280 | .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ | ||
6281 | } | ||
6282 | |||
6283 | @@ -551,38 +506,33 @@ | ||
6284 | |||
6285 | #define cpu_relax() rep_nop() | ||
6286 | |||
6287 | -#define paravirt_enabled() 0 | ||
6288 | -#define __cpuid xen_cpuid | ||
6289 | - | ||
6290 | #ifndef CONFIG_X86_NO_TSS | ||
6291 | -static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) | ||
6292 | +static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) | ||
6293 | { | ||
6294 | - tss->esp0 = thread->esp0; | ||
6295 | + tss->x86_tss.esp0 = thread->esp0; | ||
6296 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
6297 | - if (unlikely(tss->ss1 != thread->sysenter_cs)) { | ||
6298 | - tss->ss1 = thread->sysenter_cs; | ||
6299 | + if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | ||
6300 | + tss->x86_tss.ss1 = thread->sysenter_cs; | ||
6301 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
6302 | } | ||
6303 | } | ||
6304 | -#define load_esp0(tss, thread) \ | ||
6305 | - __load_esp0(tss, thread) | ||
6306 | #else | ||
6307 | -#define load_esp0(tss, thread) do { \ | ||
6308 | +#define xen_load_esp0(tss, thread) do { \ | ||
6309 | if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ | ||
6310 | BUG(); \ | ||
6311 | } while (0) | ||
6312 | #endif | ||
6313 | |||
6314 | |||
6315 | -/* | ||
6316 | - * These special macros can be used to get or set a debugging register | ||
6317 | - */ | ||
6318 | -#define get_debugreg(var, register) \ | ||
6319 | - (var) = HYPERVISOR_get_debugreg(register) | ||
6320 | -#define set_debugreg(value, register) \ | ||
6321 | - WARN_ON(HYPERVISOR_set_debugreg(register, value)) | ||
6322 | +static inline unsigned long xen_get_debugreg(int regno) | ||
6323 | +{ | ||
6324 | + return HYPERVISOR_get_debugreg(regno); | ||
6325 | +} | ||
6326 | |||
6327 | -#define set_iopl_mask xen_set_iopl_mask | ||
6328 | +static inline void xen_set_debugreg(int regno, unsigned long value) | ||
6329 | +{ | ||
6330 | + WARN_ON(HYPERVISOR_set_debugreg(regno, value)); | ||
6331 | +} | ||
6332 | |||
6333 | /* | ||
6334 | * Set IOPL bits in EFLAGS from given mask | ||
6335 | @@ -597,6 +547,21 @@ | ||
6336 | } | ||
6337 | |||
6338 | |||
6339 | +#define paravirt_enabled() 0 | ||
6340 | +#define __cpuid xen_cpuid | ||
6341 | + | ||
6342 | +#define load_esp0 xen_load_esp0 | ||
6343 | + | ||
6344 | +/* | ||
6345 | + * These special macros can be used to get or set a debugging register | ||
6346 | + */ | ||
6347 | +#define get_debugreg(var, register) \ | ||
6348 | + (var) = xen_get_debugreg(register) | ||
6349 | +#define set_debugreg(value, register) \ | ||
6350 | + xen_set_debugreg(register, value) | ||
6351 | + | ||
6352 | +#define set_iopl_mask xen_set_iopl_mask | ||
6353 | + | ||
6354 | /* | ||
6355 | * Generic CPUID function | ||
6356 | * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx | ||
6357 | @@ -749,8 +714,14 @@ | ||
6358 | extern void enable_sep_cpu(void); | ||
6359 | extern int sysenter_setup(void); | ||
6360 | |||
6361 | -extern int init_gdt(int cpu, struct task_struct *idle); | ||
6362 | +/* Defined in head.S */ | ||
6363 | +extern struct Xgt_desc_struct early_gdt_descr; | ||
6364 | + | ||
6365 | extern void cpu_set_gdt(int); | ||
6366 | -extern void secondary_cpu_init(void); | ||
6367 | +extern void switch_to_new_gdt(void); | ||
6368 | +extern void cpu_init(void); | ||
6369 | +extern void init_gdt(int cpu); | ||
6370 | + | ||
6371 | +extern int force_mwait; | ||
6372 | |||
6373 | #endif /* __ASM_I386_PROCESSOR_H */ | ||
6374 | --- a/include/asm-x86/mach-xen/asm/processor_64.h | ||
6375 | +++ b/include/asm-x86/mach-xen/asm/processor_64.h | ||
6376 | @@ -20,6 +20,7 @@ | ||
6377 | #include <asm/percpu.h> | ||
6378 | #include <linux/personality.h> | ||
6379 | #include <linux/cpumask.h> | ||
6380 | +#include <asm/processor-flags.h> | ||
6381 | |||
6382 | #define TF_MASK 0x00000100 | ||
6383 | #define IF_MASK 0x00000200 | ||
6384 | @@ -103,42 +104,6 @@ | ||
6385 | extern unsigned short num_cache_leaves; | ||
6386 | |||
6387 | /* | ||
6388 | - * EFLAGS bits | ||
6389 | - */ | ||
6390 | -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ | ||
6391 | -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ | ||
6392 | -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ | ||
6393 | -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ | ||
6394 | -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ | ||
6395 | -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ | ||
6396 | -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ | ||
6397 | -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ | ||
6398 | -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ | ||
6399 | -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ | ||
6400 | -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ | ||
6401 | -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ | ||
6402 | -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ | ||
6403 | -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ | ||
6404 | -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ | ||
6405 | -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ | ||
6406 | -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ | ||
6407 | - | ||
6408 | -/* | ||
6409 | - * Intel CPU features in CR4 | ||
6410 | - */ | ||
6411 | -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ | ||
6412 | -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ | ||
6413 | -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ | ||
6414 | -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ | ||
6415 | -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ | ||
6416 | -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ | ||
6417 | -#define X86_CR4_MCE 0x0040 /* Machine check enable */ | ||
6418 | -#define X86_CR4_PGE 0x0080 /* enable global pages */ | ||
6419 | -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ | ||
6420 | -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ | ||
6421 | -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ | ||
6422 | - | ||
6423 | -/* | ||
6424 | * Save the cr4 feature set we're using (ie | ||
6425 | * Pentium 4MB enable and PPro Global page | ||
6426 | * enable), so that any CPU's that boot up | ||
6427 | @@ -203,7 +168,7 @@ | ||
6428 | u32 mxcsr; | ||
6429 | u32 mxcsr_mask; | ||
6430 | u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | ||
6431 | - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ | ||
6432 | + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ | ||
6433 | u32 padding[24]; | ||
6434 | } __attribute__ ((aligned (16))); | ||
6435 | |||
6436 | @@ -436,22 +401,6 @@ | ||
6437 | #define cpu_relax() rep_nop() | ||
6438 | |||
6439 | /* | ||
6440 | - * NSC/Cyrix CPU configuration register indexes | ||
6441 | - */ | ||
6442 | -#define CX86_CCR0 0xc0 | ||
6443 | -#define CX86_CCR1 0xc1 | ||
6444 | -#define CX86_CCR2 0xc2 | ||
6445 | -#define CX86_CCR3 0xc3 | ||
6446 | -#define CX86_CCR4 0xe8 | ||
6447 | -#define CX86_CCR5 0xe9 | ||
6448 | -#define CX86_CCR6 0xea | ||
6449 | -#define CX86_CCR7 0xeb | ||
6450 | -#define CX86_DIR0 0xfe | ||
6451 | -#define CX86_DIR1 0xff | ||
6452 | -#define CX86_ARR_BASE 0xc4 | ||
6453 | -#define CX86_RCR_BASE 0xdc | ||
6454 | - | ||
6455 | -/* | ||
6456 | * NSC/Cyrix CPU indexed register access macros | ||
6457 | */ | ||
6458 | |||
6459 | --- a/include/asm-x86/mach-xen/asm/scatterlist_32.h | ||
6460 | +++ b/include/asm-x86/mach-xen/asm/scatterlist_32.h | ||
6461 | @@ -1,6 +1,8 @@ | ||
6462 | #ifndef _I386_SCATTERLIST_H | ||
6463 | #define _I386_SCATTERLIST_H | ||
6464 | |||
6465 | +#include <asm/types.h> | ||
6466 | + | ||
6467 | struct scatterlist { | ||
6468 | struct page *page; | ||
6469 | unsigned int offset; | ||
6470 | --- a/include/asm-x86/mach-xen/asm/segment_32.h | ||
6471 | +++ b/include/asm-x86/mach-xen/asm/segment_32.h | ||
6472 | @@ -39,7 +39,7 @@ | ||
6473 | * 25 - APM BIOS support | ||
6474 | * | ||
6475 | * 26 - ESPFIX small SS | ||
6476 | - * 27 - PDA [ per-cpu private data area ] | ||
6477 | + * 27 - per-cpu [ offset to per-cpu data area ] | ||
6478 | * 28 - unused | ||
6479 | * 29 - unused | ||
6480 | * 30 - unused | ||
6481 | @@ -74,8 +74,12 @@ | ||
6482 | #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) | ||
6483 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) | ||
6484 | |||
6485 | -#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) | ||
6486 | -#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) | ||
6487 | +#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) | ||
6488 | +#ifdef CONFIG_SMP | ||
6489 | +#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) | ||
6490 | +#else | ||
6491 | +#define __KERNEL_PERCPU 0 | ||
6492 | +#endif | ||
6493 | |||
6494 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 | ||
6495 | |||
6496 | --- a/include/asm-x86/mach-xen/asm/smp_32.h | ||
6497 | +++ b/include/asm-x86/mach-xen/asm/smp_32.h | ||
6498 | @@ -8,19 +8,15 @@ | ||
6499 | #include <linux/kernel.h> | ||
6500 | #include <linux/threads.h> | ||
6501 | #include <linux/cpumask.h> | ||
6502 | -#include <asm/pda.h> | ||
6503 | #endif | ||
6504 | |||
6505 | -#ifdef CONFIG_X86_LOCAL_APIC | ||
6506 | -#ifndef __ASSEMBLY__ | ||
6507 | -#include <asm/fixmap.h> | ||
6508 | +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) | ||
6509 | #include <asm/bitops.h> | ||
6510 | #include <asm/mpspec.h> | ||
6511 | +#include <asm/apic.h> | ||
6512 | #ifdef CONFIG_X86_IO_APIC | ||
6513 | #include <asm/io_apic.h> | ||
6514 | #endif | ||
6515 | -#include <asm/apic.h> | ||
6516 | -#endif | ||
6517 | #endif | ||
6518 | |||
6519 | #define BAD_APICID 0xFFu | ||
6520 | @@ -52,9 +48,76 @@ | ||
6521 | extern void cpu_uninit(void); | ||
6522 | #endif | ||
6523 | |||
6524 | -#ifndef CONFIG_PARAVIRT | ||
6525 | +#ifndef CONFIG_XEN | ||
6526 | +struct smp_ops | ||
6527 | +{ | ||
6528 | + void (*smp_prepare_boot_cpu)(void); | ||
6529 | + void (*smp_prepare_cpus)(unsigned max_cpus); | ||
6530 | + int (*cpu_up)(unsigned cpu); | ||
6531 | + void (*smp_cpus_done)(unsigned max_cpus); | ||
6532 | + | ||
6533 | + void (*smp_send_stop)(void); | ||
6534 | + void (*smp_send_reschedule)(int cpu); | ||
6535 | + int (*smp_call_function_mask)(cpumask_t mask, | ||
6536 | + void (*func)(void *info), void *info, | ||
6537 | + int wait); | ||
6538 | +}; | ||
6539 | + | ||
6540 | +extern struct smp_ops smp_ops; | ||
6541 | + | ||
6542 | +static inline void smp_prepare_boot_cpu(void) | ||
6543 | +{ | ||
6544 | + smp_ops.smp_prepare_boot_cpu(); | ||
6545 | +} | ||
6546 | +static inline void smp_prepare_cpus(unsigned int max_cpus) | ||
6547 | +{ | ||
6548 | + smp_ops.smp_prepare_cpus(max_cpus); | ||
6549 | +} | ||
6550 | +static inline int __cpu_up(unsigned int cpu) | ||
6551 | +{ | ||
6552 | + return smp_ops.cpu_up(cpu); | ||
6553 | +} | ||
6554 | +static inline void smp_cpus_done(unsigned int max_cpus) | ||
6555 | +{ | ||
6556 | + smp_ops.smp_cpus_done(max_cpus); | ||
6557 | +} | ||
6558 | + | ||
6559 | +static inline void smp_send_stop(void) | ||
6560 | +{ | ||
6561 | + smp_ops.smp_send_stop(); | ||
6562 | +} | ||
6563 | +static inline void smp_send_reschedule(int cpu) | ||
6564 | +{ | ||
6565 | + smp_ops.smp_send_reschedule(cpu); | ||
6566 | +} | ||
6567 | +static inline int smp_call_function_mask(cpumask_t mask, | ||
6568 | + void (*func) (void *info), void *info, | ||
6569 | + int wait) | ||
6570 | +{ | ||
6571 | + return smp_ops.smp_call_function_mask(mask, func, info, wait); | ||
6572 | +} | ||
6573 | + | ||
6574 | +void native_smp_prepare_boot_cpu(void); | ||
6575 | +void native_smp_prepare_cpus(unsigned int max_cpus); | ||
6576 | +int native_cpu_up(unsigned int cpunum); | ||
6577 | +void native_smp_cpus_done(unsigned int max_cpus); | ||
6578 | + | ||
6579 | #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ | ||
6580 | do { } while (0) | ||
6581 | + | ||
6582 | +#else | ||
6583 | + | ||
6584 | + | ||
6585 | +void xen_smp_send_stop(void); | ||
6586 | +void xen_smp_send_reschedule(int cpu); | ||
6587 | +int xen_smp_call_function_mask(cpumask_t mask, | ||
6588 | + void (*func) (void *info), void *info, | ||
6589 | + int wait); | ||
6590 | + | ||
6591 | +#define smp_send_stop xen_smp_send_stop | ||
6592 | +#define smp_send_reschedule xen_smp_send_reschedule | ||
6593 | +#define smp_call_function_mask xen_smp_call_function_mask | ||
6594 | + | ||
6595 | #endif | ||
6596 | |||
6597 | /* | ||
6598 | @@ -62,7 +125,8 @@ | ||
6599 | * from the initial startup. We map APIC_BASE very early in page_setup(), | ||
6600 | * so this is correct in the x86 case. | ||
6601 | */ | ||
6602 | -#define raw_smp_processor_id() (read_pda(cpu_number)) | ||
6603 | +DECLARE_PER_CPU(int, cpu_number); | ||
6604 | +#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) | ||
6605 | |||
6606 | extern cpumask_t cpu_possible_map; | ||
6607 | #define cpu_callin_map cpu_possible_map | ||
6608 | @@ -73,20 +137,6 @@ | ||
6609 | return cpus_weight(cpu_possible_map); | ||
6610 | } | ||
6611 | |||
6612 | -#ifdef CONFIG_X86_LOCAL_APIC | ||
6613 | - | ||
6614 | -#ifdef APIC_DEFINITION | ||
6615 | -extern int hard_smp_processor_id(void); | ||
6616 | -#else | ||
6617 | -#include <mach_apicdef.h> | ||
6618 | -static inline int hard_smp_processor_id(void) | ||
6619 | -{ | ||
6620 | - /* we don't want to mark this access volatile - bad code generation */ | ||
6621 | - return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); | ||
6622 | -} | ||
6623 | -#endif | ||
6624 | -#endif | ||
6625 | - | ||
6626 | #define safe_smp_processor_id() smp_processor_id() | ||
6627 | extern int __cpu_disable(void); | ||
6628 | extern void __cpu_die(unsigned int cpu); | ||
6629 | @@ -102,10 +152,31 @@ | ||
6630 | |||
6631 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | ||
6632 | |||
6633 | -#endif | ||
6634 | +#endif /* CONFIG_SMP */ | ||
6635 | |||
6636 | #ifndef __ASSEMBLY__ | ||
6637 | |||
6638 | +#ifdef CONFIG_X86_LOCAL_APIC | ||
6639 | + | ||
6640 | +#ifdef APIC_DEFINITION | ||
6641 | +extern int hard_smp_processor_id(void); | ||
6642 | +#else | ||
6643 | +#include <mach_apicdef.h> | ||
6644 | +static inline int hard_smp_processor_id(void) | ||
6645 | +{ | ||
6646 | + /* we don't want to mark this access volatile - bad code generation */ | ||
6647 | + return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); | ||
6648 | +} | ||
6649 | +#endif /* APIC_DEFINITION */ | ||
6650 | + | ||
6651 | +#else /* CONFIG_X86_LOCAL_APIC */ | ||
6652 | + | ||
6653 | +#ifndef CONFIG_SMP | ||
6654 | +#define hard_smp_processor_id() 0 | ||
6655 | +#endif | ||
6656 | + | ||
6657 | +#endif /* CONFIG_X86_LOCAL_APIC */ | ||
6658 | + | ||
6659 | extern u8 apicid_2_node[]; | ||
6660 | |||
6661 | #ifdef CONFIG_X86_LOCAL_APIC | ||
6662 | --- a/include/asm-x86/mach-xen/asm/smp_64.h | ||
6663 | +++ b/include/asm-x86/mach-xen/asm/smp_64.h | ||
6664 | @@ -11,12 +11,11 @@ | ||
6665 | extern int disable_apic; | ||
6666 | |||
6667 | #ifdef CONFIG_X86_LOCAL_APIC | ||
6668 | -#include <asm/fixmap.h> | ||
6669 | #include <asm/mpspec.h> | ||
6670 | +#include <asm/apic.h> | ||
6671 | #ifdef CONFIG_X86_IO_APIC | ||
6672 | #include <asm/io_apic.h> | ||
6673 | #endif | ||
6674 | -#include <asm/apic.h> | ||
6675 | #include <asm/thread_info.h> | ||
6676 | #endif | ||
6677 | |||
6678 | @@ -41,7 +40,6 @@ | ||
6679 | extern void unlock_ipi_call_lock(void); | ||
6680 | extern int smp_num_siblings; | ||
6681 | extern void smp_send_reschedule(int cpu); | ||
6682 | -void smp_stop_cpu(void); | ||
6683 | |||
6684 | extern cpumask_t cpu_sibling_map[NR_CPUS]; | ||
6685 | extern cpumask_t cpu_core_map[NR_CPUS]; | ||
6686 | @@ -62,14 +60,6 @@ | ||
6687 | |||
6688 | #define raw_smp_processor_id() read_pda(cpunumber) | ||
6689 | |||
6690 | -#ifdef CONFIG_X86_LOCAL_APIC | ||
6691 | -static inline int hard_smp_processor_id(void) | ||
6692 | -{ | ||
6693 | - /* we don't want to mark this access volatile - bad code generation */ | ||
6694 | - return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); | ||
6695 | -} | ||
6696 | -#endif | ||
6697 | - | ||
6698 | extern int __cpu_disable(void); | ||
6699 | extern void __cpu_die(unsigned int cpu); | ||
6700 | extern void prefill_possible_map(void); | ||
6701 | @@ -78,6 +68,14 @@ | ||
6702 | |||
6703 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | ||
6704 | |||
6705 | +#endif /* CONFIG_SMP */ | ||
6706 | + | ||
6707 | +#ifdef CONFIG_X86_LOCAL_APIC | ||
6708 | +static inline int hard_smp_processor_id(void) | ||
6709 | +{ | ||
6710 | + /* we don't want to mark this access volatile - bad code generation */ | ||
6711 | + return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); | ||
6712 | +} | ||
6713 | #endif | ||
6714 | |||
6715 | /* | ||
6716 | --- a/include/asm-x86/mach-xen/asm/system_32.h | ||
6717 | +++ b/include/asm-x86/mach-xen/asm/system_32.h | ||
6718 | @@ -4,7 +4,7 @@ | ||
6719 | #include <linux/kernel.h> | ||
6720 | #include <asm/segment.h> | ||
6721 | #include <asm/cpufeature.h> | ||
6722 | -#include <linux/bitops.h> /* for LOCK_PREFIX */ | ||
6723 | +#include <asm/cmpxchg.h> | ||
6724 | #include <asm/synch_bitops.h> | ||
6725 | #include <asm/hypervisor.h> | ||
6726 | |||
6727 | @@ -90,308 +90,102 @@ | ||
6728 | #define savesegment(seg, value) \ | ||
6729 | asm volatile("mov %%" #seg ",%0":"=rm" (value)) | ||
6730 | |||
6731 | -#define read_cr0() ({ \ | ||
6732 | - unsigned int __dummy; \ | ||
6733 | - __asm__ __volatile__( \ | ||
6734 | - "movl %%cr0,%0\n\t" \ | ||
6735 | - :"=r" (__dummy)); \ | ||
6736 | - __dummy; \ | ||
6737 | -}) | ||
6738 | -#define write_cr0(x) \ | ||
6739 | - __asm__ __volatile__("movl %0,%%cr0": :"r" (x)) | ||
6740 | - | ||
6741 | -#define read_cr2() (current_vcpu_info()->arch.cr2) | ||
6742 | -#define write_cr2(x) \ | ||
6743 | - __asm__ __volatile__("movl %0,%%cr2": :"r" (x)) | ||
6744 | - | ||
6745 | -#define read_cr3() ({ \ | ||
6746 | - unsigned int __dummy; \ | ||
6747 | - __asm__ ( \ | ||
6748 | - "movl %%cr3,%0\n\t" \ | ||
6749 | - :"=r" (__dummy)); \ | ||
6750 | - __dummy = xen_cr3_to_pfn(__dummy); \ | ||
6751 | - mfn_to_pfn(__dummy) << PAGE_SHIFT; \ | ||
6752 | -}) | ||
6753 | -#define write_cr3(x) ({ \ | ||
6754 | - unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \ | ||
6755 | - __dummy = xen_pfn_to_cr3(__dummy); \ | ||
6756 | - __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \ | ||
6757 | -}) | ||
6758 | -#define read_cr4() ({ \ | ||
6759 | - unsigned int __dummy; \ | ||
6760 | - __asm__( \ | ||
6761 | - "movl %%cr4,%0\n\t" \ | ||
6762 | - :"=r" (__dummy)); \ | ||
6763 | - __dummy; \ | ||
6764 | -}) | ||
6765 | -#define read_cr4_safe() ({ \ | ||
6766 | - unsigned int __dummy; \ | ||
6767 | - /* This could fault if %cr4 does not exist */ \ | ||
6768 | - __asm__("1: movl %%cr4, %0 \n" \ | ||
6769 | - "2: \n" \ | ||
6770 | - ".section __ex_table,\"a\" \n" \ | ||
6771 | - ".long 1b,2b \n" \ | ||
6772 | - ".previous \n" \ | ||
6773 | - : "=r" (__dummy): "0" (0)); \ | ||
6774 | - __dummy; \ | ||
6775 | -}) | ||
6776 | - | ||
6777 | -#define write_cr4(x) \ | ||
6778 | - __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) | ||
6779 | - | ||
6780 | -#define wbinvd() \ | ||
6781 | - __asm__ __volatile__ ("wbinvd": : :"memory") | ||
6782 | - | ||
6783 | -/* Clear the 'TS' bit */ | ||
6784 | -#define clts() (HYPERVISOR_fpu_taskswitch(0)) | ||
6785 | - | ||
6786 | -/* Set the 'TS' bit */ | ||
6787 | -#define stts() (HYPERVISOR_fpu_taskswitch(1)) | ||
6788 | - | ||
6789 | -#endif /* __KERNEL__ */ | ||
6790 | - | ||
6791 | -static inline unsigned long get_limit(unsigned long segment) | ||
6792 | +static inline void xen_clts(void) | ||
6793 | { | ||
6794 | - unsigned long __limit; | ||
6795 | - __asm__("lsll %1,%0" | ||
6796 | - :"=r" (__limit):"r" (segment)); | ||
6797 | - return __limit+1; | ||
6798 | + HYPERVISOR_fpu_taskswitch(0); | ||
6799 | } | ||
6800 | |||
6801 | -#define nop() __asm__ __volatile__ ("nop") | ||
6802 | - | ||
6803 | -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) | ||
6804 | - | ||
6805 | -#define tas(ptr) (xchg((ptr),1)) | ||
6806 | - | ||
6807 | -struct __xchg_dummy { unsigned long a[100]; }; | ||
6808 | -#define __xg(x) ((struct __xchg_dummy *)(x)) | ||
6809 | +static inline unsigned long xen_read_cr0(void) | ||
6810 | +{ | ||
6811 | + unsigned long val; | ||
6812 | + asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); | ||
6813 | + return val; | ||
6814 | +} | ||
6815 | |||
6816 | +static inline void xen_write_cr0(unsigned long val) | ||
6817 | +{ | ||
6818 | + asm volatile("movl %0,%%cr0": :"r" (val)); | ||
6819 | +} | ||
6820 | |||
6821 | -#ifdef CONFIG_X86_CMPXCHG64 | ||
6822 | +#define xen_read_cr2() (current_vcpu_info()->arch.cr2) | ||
6823 | |||
6824 | -/* | ||
6825 | - * The semantics of XCHGCMP8B are a bit strange, this is why | ||
6826 | - * there is a loop and the loading of %%eax and %%edx has to | ||
6827 | - * be inside. This inlines well in most cases, the cached | ||
6828 | - * cost is around ~38 cycles. (in the future we might want | ||
6829 | - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that | ||
6830 | - * might have an implicit FPU-save as a cost, so it's not | ||
6831 | - * clear which path to go.) | ||
6832 | - * | ||
6833 | - * cmpxchg8b must be used with the lock prefix here to allow | ||
6834 | - * the instruction to be executed atomically, see page 3-102 | ||
6835 | - * of the instruction set reference 24319102.pdf. We need | ||
6836 | - * the reader side to see the coherent 64bit value. | ||
6837 | - */ | ||
6838 | -static inline void __set_64bit (unsigned long long * ptr, | ||
6839 | - unsigned int low, unsigned int high) | ||
6840 | +static inline void xen_write_cr2(unsigned long val) | ||
6841 | { | ||
6842 | - __asm__ __volatile__ ( | ||
6843 | - "\n1:\t" | ||
6844 | - "movl (%0), %%eax\n\t" | ||
6845 | - "movl 4(%0), %%edx\n\t" | ||
6846 | - "lock cmpxchg8b (%0)\n\t" | ||
6847 | - "jnz 1b" | ||
6848 | - : /* no outputs */ | ||
6849 | - : "D"(ptr), | ||
6850 | - "b"(low), | ||
6851 | - "c"(high) | ||
6852 | - : "ax","dx","memory"); | ||
6853 | + asm volatile("movl %0,%%cr2": :"r" (val)); | ||
6854 | } | ||
6855 | |||
6856 | -static inline void __set_64bit_constant (unsigned long long *ptr, | ||
6857 | - unsigned long long value) | ||
6858 | +static inline unsigned long xen_read_cr3(void) | ||
6859 | { | ||
6860 | - __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); | ||
6861 | + unsigned long val; | ||
6862 | + asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); | ||
6863 | + return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT; | ||
6864 | } | ||
6865 | -#define ll_low(x) *(((unsigned int*)&(x))+0) | ||
6866 | -#define ll_high(x) *(((unsigned int*)&(x))+1) | ||
6867 | |||
6868 | -static inline void __set_64bit_var (unsigned long long *ptr, | ||
6869 | - unsigned long long value) | ||
6870 | +static inline void xen_write_cr3(unsigned long val) | ||
6871 | { | ||
6872 | - __set_64bit(ptr,ll_low(value), ll_high(value)); | ||
6873 | + val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT)); | ||
6874 | + asm volatile("movl %0,%%cr3": :"r" (val)); | ||
6875 | } | ||
6876 | |||
6877 | -#define set_64bit(ptr,value) \ | ||
6878 | -(__builtin_constant_p(value) ? \ | ||
6879 | - __set_64bit_constant(ptr, value) : \ | ||
6880 | - __set_64bit_var(ptr, value) ) | ||
6881 | +static inline unsigned long xen_read_cr4(void) | ||
6882 | +{ | ||
6883 | + unsigned long val; | ||
6884 | + asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); | ||
6885 | + return val; | ||
6886 | +} | ||
6887 | |||
6888 | -#define _set_64bit(ptr,value) \ | ||
6889 | -(__builtin_constant_p(value) ? \ | ||
6890 | - __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ | ||
6891 | - __set_64bit(ptr, ll_low(value), ll_high(value)) ) | ||
6892 | +static inline unsigned long xen_read_cr4_safe(void) | ||
6893 | +{ | ||
6894 | + unsigned long val; | ||
6895 | + /* This could fault if %cr4 does not exist */ | ||
6896 | + asm("1: movl %%cr4, %0 \n" | ||
6897 | + "2: \n" | ||
6898 | + ".section __ex_table,\"a\" \n" | ||
6899 | + ".long 1b,2b \n" | ||
6900 | + ".previous \n" | ||
6901 | + : "=r" (val): "0" (0)); | ||
6902 | + return val; | ||
6903 | +} | ||
6904 | |||
6905 | -#endif | ||
6906 | +static inline void xen_write_cr4(unsigned long val) | ||
6907 | +{ | ||
6908 | + asm volatile("movl %0,%%cr4": :"r" (val)); | ||
6909 | +} | ||
6910 | |||
6911 | -/* | ||
6912 | - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | ||
6913 | - * Note 2: xchg has side effect, so that attribute volatile is necessary, | ||
6914 | - * but generally the primitive is invalid, *ptr is output argument. --ANK | ||
6915 | - */ | ||
6916 | -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) | ||
6917 | +static inline void xen_wbinvd(void) | ||
6918 | { | ||
6919 | - switch (size) { | ||
6920 | - case 1: | ||
6921 | - __asm__ __volatile__("xchgb %b0,%1" | ||
6922 | - :"=q" (x) | ||
6923 | - :"m" (*__xg(ptr)), "0" (x) | ||
6924 | - :"memory"); | ||
6925 | - break; | ||
6926 | - case 2: | ||
6927 | - __asm__ __volatile__("xchgw %w0,%1" | ||
6928 | - :"=r" (x) | ||
6929 | - :"m" (*__xg(ptr)), "0" (x) | ||
6930 | - :"memory"); | ||
6931 | - break; | ||
6932 | - case 4: | ||
6933 | - __asm__ __volatile__("xchgl %0,%1" | ||
6934 | - :"=r" (x) | ||
6935 | - :"m" (*__xg(ptr)), "0" (x) | ||
6936 | - :"memory"); | ||
6937 | - break; | ||
6938 | - } | ||
6939 | - return x; | ||
6940 | + asm volatile("wbinvd": : :"memory"); | ||
6941 | } | ||
6942 | |||
6943 | -/* | ||
6944 | - * Atomic compare and exchange. Compare OLD with MEM, if identical, | ||
6945 | - * store NEW in MEM. Return the initial value in MEM. Success is | ||
6946 | - * indicated by comparing RETURN with OLD. | ||
6947 | - */ | ||
6948 | +#define read_cr0() (xen_read_cr0()) | ||
6949 | +#define write_cr0(x) (xen_write_cr0(x)) | ||
6950 | +#define read_cr2() (xen_read_cr2()) | ||
6951 | +#define write_cr2(x) (xen_write_cr2(x)) | ||
6952 | +#define read_cr3() (xen_read_cr3()) | ||
6953 | +#define write_cr3(x) (xen_write_cr3(x)) | ||
6954 | +#define read_cr4() (xen_read_cr4()) | ||
6955 | +#define read_cr4_safe() (xen_read_cr4_safe()) | ||
6956 | +#define write_cr4(x) (xen_write_cr4(x)) | ||
6957 | +#define wbinvd() (xen_wbinvd()) | ||
6958 | |||
6959 | -#ifdef CONFIG_X86_CMPXCHG | ||
6960 | -#define __HAVE_ARCH_CMPXCHG 1 | ||
6961 | -#define cmpxchg(ptr,o,n)\ | ||
6962 | - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ | ||
6963 | - (unsigned long)(n),sizeof(*(ptr)))) | ||
6964 | -#define sync_cmpxchg(ptr,o,n)\ | ||
6965 | - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ | ||
6966 | - (unsigned long)(n),sizeof(*(ptr)))) | ||
6967 | -#endif | ||
6968 | - | ||
6969 | -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | ||
6970 | - unsigned long new, int size) | ||
6971 | -{ | ||
6972 | - unsigned long prev; | ||
6973 | - switch (size) { | ||
6974 | - case 1: | ||
6975 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" | ||
6976 | - : "=a"(prev) | ||
6977 | - : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
6978 | - : "memory"); | ||
6979 | - return prev; | ||
6980 | - case 2: | ||
6981 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" | ||
6982 | - : "=a"(prev) | ||
6983 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
6984 | - : "memory"); | ||
6985 | - return prev; | ||
6986 | - case 4: | ||
6987 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" | ||
6988 | - : "=a"(prev) | ||
6989 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
6990 | - : "memory"); | ||
6991 | - return prev; | ||
6992 | - } | ||
6993 | - return old; | ||
6994 | -} | ||
6995 | +/* Clear the 'TS' bit */ | ||
6996 | +#define clts() (xen_clts()) | ||
6997 | |||
6998 | -/* | ||
6999 | - * Always use locked operations when touching memory shared with a | ||
7000 | - * hypervisor, since the system may be SMP even if the guest kernel | ||
7001 | - * isn't. | ||
7002 | - */ | ||
7003 | -static inline unsigned long __sync_cmpxchg(volatile void *ptr, | ||
7004 | - unsigned long old, | ||
7005 | - unsigned long new, int size) | ||
7006 | -{ | ||
7007 | - unsigned long prev; | ||
7008 | - switch (size) { | ||
7009 | - case 1: | ||
7010 | - __asm__ __volatile__("lock; cmpxchgb %b1,%2" | ||
7011 | - : "=a"(prev) | ||
7012 | - : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
7013 | - : "memory"); | ||
7014 | - return prev; | ||
7015 | - case 2: | ||
7016 | - __asm__ __volatile__("lock; cmpxchgw %w1,%2" | ||
7017 | - : "=a"(prev) | ||
7018 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
7019 | - : "memory"); | ||
7020 | - return prev; | ||
7021 | - case 4: | ||
7022 | - __asm__ __volatile__("lock; cmpxchgl %1,%2" | ||
7023 | - : "=a"(prev) | ||
7024 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
7025 | - : "memory"); | ||
7026 | - return prev; | ||
7027 | - } | ||
7028 | - return old; | ||
7029 | -} | ||
7030 | +/* Set the 'TS' bit */ | ||
7031 | +#define stts() (HYPERVISOR_fpu_taskswitch(1)) | ||
7032 | |||
7033 | -#ifndef CONFIG_X86_CMPXCHG | ||
7034 | -/* | ||
7035 | - * Building a kernel capable running on 80386. It may be necessary to | ||
7036 | - * simulate the cmpxchg on the 80386 CPU. For that purpose we define | ||
7037 | - * a function for each of the sizes we support. | ||
7038 | - */ | ||
7039 | +#endif /* __KERNEL__ */ | ||
7040 | |||
7041 | -extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8); | ||
7042 | -extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16); | ||
7043 | -extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32); | ||
7044 | - | ||
7045 | -static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, | ||
7046 | - unsigned long new, int size) | ||
7047 | -{ | ||
7048 | - switch (size) { | ||
7049 | - case 1: | ||
7050 | - return cmpxchg_386_u8(ptr, old, new); | ||
7051 | - case 2: | ||
7052 | - return cmpxchg_386_u16(ptr, old, new); | ||
7053 | - case 4: | ||
7054 | - return cmpxchg_386_u32(ptr, old, new); | ||
7055 | - } | ||
7056 | - return old; | ||
7057 | -} | ||
7058 | - | ||
7059 | -#define cmpxchg(ptr,o,n) \ | ||
7060 | -({ \ | ||
7061 | - __typeof__(*(ptr)) __ret; \ | ||
7062 | - if (likely(boot_cpu_data.x86 > 3)) \ | ||
7063 | - __ret = __cmpxchg((ptr), (unsigned long)(o), \ | ||
7064 | - (unsigned long)(n), sizeof(*(ptr))); \ | ||
7065 | - else \ | ||
7066 | - __ret = cmpxchg_386((ptr), (unsigned long)(o), \ | ||
7067 | - (unsigned long)(n), sizeof(*(ptr))); \ | ||
7068 | - __ret; \ | ||
7069 | -}) | ||
7070 | -#endif | ||
7071 | - | ||
7072 | -#ifdef CONFIG_X86_CMPXCHG64 | ||
7073 | - | ||
7074 | -static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, | ||
7075 | - unsigned long long new) | ||
7076 | -{ | ||
7077 | - unsigned long long prev; | ||
7078 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" | ||
7079 | - : "=A"(prev) | ||
7080 | - : "b"((unsigned long)new), | ||
7081 | - "c"((unsigned long)(new >> 32)), | ||
7082 | - "m"(*__xg(ptr)), | ||
7083 | - "0"(old) | ||
7084 | - : "memory"); | ||
7085 | - return prev; | ||
7086 | -} | ||
7087 | - | ||
7088 | -#define cmpxchg64(ptr,o,n)\ | ||
7089 | - ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ | ||
7090 | - (unsigned long long)(n))) | ||
7091 | +static inline unsigned long get_limit(unsigned long segment) | ||
7092 | +{ | ||
7093 | + unsigned long __limit; | ||
7094 | + __asm__("lsll %1,%0" | ||
7095 | + :"=r" (__limit):"r" (segment)); | ||
7096 | + return __limit+1; | ||
7097 | +} | ||
7098 | + | ||
7099 | +#define nop() __asm__ __volatile__ ("nop") | ||
7100 | |||
7101 | -#endif | ||
7102 | - | ||
7103 | /* | ||
7104 | * Force strict CPU ordering. | ||
7105 | * And yes, this is required on UP too when we're talking | ||
7106 | --- a/include/asm-x86/mach-xen/asm/system_64.h | ||
7107 | +++ b/include/asm-x86/mach-xen/asm/system_64.h | ||
7108 | @@ -3,7 +3,7 @@ | ||
7109 | |||
7110 | #include <linux/kernel.h> | ||
7111 | #include <asm/segment.h> | ||
7112 | -#include <asm/alternative.h> | ||
7113 | +#include <asm/cmpxchg.h> | ||
7114 | |||
7115 | #include <asm/synch_bitops.h> | ||
7116 | #include <asm/hypervisor.h> | ||
7117 | @@ -43,7 +43,7 @@ | ||
7118 | [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ | ||
7119 | [ti_flags] "i" (offsetof(struct thread_info, flags)),\ | ||
7120 | [tif_fork] "i" (TIF_FORK), \ | ||
7121 | - [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ | ||
7122 | + [thread_info] "i" (offsetof(struct task_struct, stack)), \ | ||
7123 | [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ | ||
7124 | : "memory", "cc" __EXTRA_CLOBBER) | ||
7125 | |||
7126 | @@ -92,6 +92,12 @@ | ||
7127 | machine_to_phys(__dummy); \ | ||
7128 | }) | ||
7129 | |||
7130 | +static inline void write_cr3(unsigned long val) | ||
7131 | +{ | ||
7132 | + val = phys_to_machine(val); | ||
7133 | + asm volatile("movq %0,%%cr3" :: "r" (val) : "memory"); | ||
7134 | +} | ||
7135 | + | ||
7136 | static inline unsigned long read_cr4(void) | ||
7137 | { | ||
7138 | unsigned long cr4; | ||
7139 | @@ -101,7 +107,7 @@ | ||
7140 | |||
7141 | static inline void write_cr4(unsigned long val) | ||
7142 | { | ||
7143 | - asm volatile("movq %0,%%cr4" :: "r" (val)); | ||
7144 | + asm volatile("movq %0,%%cr4" :: "r" (val) : "memory"); | ||
7145 | } | ||
7146 | |||
7147 | #define stts() (HYPERVISOR_fpu_taskswitch(1)) | ||
7148 | @@ -122,100 +128,6 @@ | ||
7149 | |||
7150 | #define nop() __asm__ __volatile__ ("nop") | ||
7151 | |||
7152 | -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) | ||
7153 | - | ||
7154 | -#define tas(ptr) (xchg((ptr),1)) | ||
7155 | - | ||
7156 | -#define __xg(x) ((volatile long *)(x)) | ||
7157 | - | ||
7158 | -static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) | ||
7159 | -{ | ||
7160 | - *ptr = val; | ||
7161 | -} | ||
7162 | - | ||
7163 | -#define _set_64bit set_64bit | ||
7164 | - | ||
7165 | -/* | ||
7166 | - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway | ||
7167 | - * Note 2: xchg has side effect, so that attribute volatile is necessary, | ||
7168 | - * but generally the primitive is invalid, *ptr is output argument. --ANK | ||
7169 | - */ | ||
7170 | -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) | ||
7171 | -{ | ||
7172 | - switch (size) { | ||
7173 | - case 1: | ||
7174 | - __asm__ __volatile__("xchgb %b0,%1" | ||
7175 | - :"=q" (x) | ||
7176 | - :"m" (*__xg(ptr)), "0" (x) | ||
7177 | - :"memory"); | ||
7178 | - break; | ||
7179 | - case 2: | ||
7180 | - __asm__ __volatile__("xchgw %w0,%1" | ||
7181 | - :"=r" (x) | ||
7182 | - :"m" (*__xg(ptr)), "0" (x) | ||
7183 | - :"memory"); | ||
7184 | - break; | ||
7185 | - case 4: | ||
7186 | - __asm__ __volatile__("xchgl %k0,%1" | ||
7187 | - :"=r" (x) | ||
7188 | - :"m" (*__xg(ptr)), "0" (x) | ||
7189 | - :"memory"); | ||
7190 | - break; | ||
7191 | - case 8: | ||
7192 | - __asm__ __volatile__("xchgq %0,%1" | ||
7193 | - :"=r" (x) | ||
7194 | - :"m" (*__xg(ptr)), "0" (x) | ||
7195 | - :"memory"); | ||
7196 | - break; | ||
7197 | - } | ||
7198 | - return x; | ||
7199 | -} | ||
7200 | - | ||
7201 | -/* | ||
7202 | - * Atomic compare and exchange. Compare OLD with MEM, if identical, | ||
7203 | - * store NEW in MEM. Return the initial value in MEM. Success is | ||
7204 | - * indicated by comparing RETURN with OLD. | ||
7205 | - */ | ||
7206 | - | ||
7207 | -#define __HAVE_ARCH_CMPXCHG 1 | ||
7208 | - | ||
7209 | -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | ||
7210 | - unsigned long new, int size) | ||
7211 | -{ | ||
7212 | - unsigned long prev; | ||
7213 | - switch (size) { | ||
7214 | - case 1: | ||
7215 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" | ||
7216 | - : "=a"(prev) | ||
7217 | - : "q"(new), "m"(*__xg(ptr)), "0"(old) | ||
7218 | - : "memory"); | ||
7219 | - return prev; | ||
7220 | - case 2: | ||
7221 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" | ||
7222 | - : "=a"(prev) | ||
7223 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
7224 | - : "memory"); | ||
7225 | - return prev; | ||
7226 | - case 4: | ||
7227 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2" | ||
7228 | - : "=a"(prev) | ||
7229 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
7230 | - : "memory"); | ||
7231 | - return prev; | ||
7232 | - case 8: | ||
7233 | - __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2" | ||
7234 | - : "=a"(prev) | ||
7235 | - : "r"(new), "m"(*__xg(ptr)), "0"(old) | ||
7236 | - : "memory"); | ||
7237 | - return prev; | ||
7238 | - } | ||
7239 | - return old; | ||
7240 | -} | ||
7241 | - | ||
7242 | -#define cmpxchg(ptr,o,n)\ | ||
7243 | - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ | ||
7244 | - (unsigned long)(n),sizeof(*(ptr)))) | ||
7245 | - | ||
7246 | #ifdef CONFIG_SMP | ||
7247 | #define smp_mb() mb() | ||
7248 | #define smp_rmb() rmb() | ||
7249 | --- a/include/asm-x86/mach-xen/asm/tlbflush_32.h | ||
7250 | +++ b/include/asm-x86/mach-xen/asm/tlbflush_32.h | ||
7251 | @@ -29,8 +29,13 @@ | ||
7252 | * and page-granular flushes are available only on i486 and up. | ||
7253 | */ | ||
7254 | |||
7255 | +#define TLB_FLUSH_ALL 0xffffffff | ||
7256 | + | ||
7257 | + | ||
7258 | #ifndef CONFIG_SMP | ||
7259 | |||
7260 | +#include <linux/sched.h> | ||
7261 | + | ||
7262 | #define flush_tlb() __flush_tlb() | ||
7263 | #define flush_tlb_all() __flush_tlb_all() | ||
7264 | #define local_flush_tlb() __flush_tlb() | ||
7265 | @@ -55,7 +60,7 @@ | ||
7266 | __flush_tlb(); | ||
7267 | } | ||
7268 | |||
7269 | -#else | ||
7270 | +#else /* SMP */ | ||
7271 | |||
7272 | #include <asm/smp.h> | ||
7273 | |||
7274 | @@ -84,9 +89,7 @@ | ||
7275 | char __cacheline_padding[L1_CACHE_BYTES-8]; | ||
7276 | }; | ||
7277 | DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); | ||
7278 | - | ||
7279 | - | ||
7280 | -#endif | ||
7281 | +#endif /* SMP */ | ||
7282 | |||
7283 | #define flush_tlb_kernel_range(start, end) flush_tlb_all() | ||
7284 | |||
7285 | --- a/include/asm-x86/mach-xen/asm/tlbflush_64.h | ||
7286 | +++ b/include/asm-x86/mach-xen/asm/tlbflush_64.h | ||
7287 | @@ -2,7 +2,9 @@ | ||
7288 | #define _X8664_TLBFLUSH_H | ||
7289 | |||
7290 | #include <linux/mm.h> | ||
7291 | +#include <linux/sched.h> | ||
7292 | #include <asm/processor.h> | ||
7293 | +#include <asm/system.h> | ||
7294 | |||
7295 | #define __flush_tlb() xen_tlb_flush() | ||
7296 | |||
7297 | --- a/lib/swiotlb-xen.c | ||
7298 | +++ b/lib/swiotlb-xen.c | ||
7299 | @@ -729,7 +729,6 @@ | ||
7300 | return (mask >= ((1UL << dma_bits) - 1)); | ||
7301 | } | ||
7302 | |||
7303 | -EXPORT_SYMBOL(swiotlb_init); | ||
7304 | EXPORT_SYMBOL(swiotlb_map_single); | ||
7305 | EXPORT_SYMBOL(swiotlb_unmap_single); | ||
7306 | EXPORT_SYMBOL(swiotlb_map_sg); | ||
7307 | --- a/net/core/dev.c | ||
7308 | +++ b/net/core/dev.c | ||
7309 | @@ -1590,12 +1590,17 @@ | ||
7310 | inline int skb_checksum_setup(struct sk_buff *skb) | ||
7311 | { | ||
7312 | if (skb->proto_csum_blank) { | ||
7313 | + struct iphdr *iph; | ||
7314 | + unsigned char *th; | ||
7315 | + | ||
7316 | if (skb->protocol != htons(ETH_P_IP)) | ||
7317 | goto out; | ||
7318 | - skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; | ||
7319 | - if (skb->h.raw >= skb->tail) | ||
7320 | + iph = ip_hdr(skb); | ||
7321 | + th = skb_network_header(skb) + 4 * iph->ihl; | ||
7322 | + if (th >= skb_tail_pointer(skb)) | ||
7323 | goto out; | ||
7324 | - switch (skb->nh.iph->protocol) { | ||
7325 | + skb->csum_start = th - skb->head; | ||
7326 | + switch (iph->protocol) { | ||
7327 | case IPPROTO_TCP: | ||
7328 | skb->csum_offset = offsetof(struct tcphdr, check); | ||
7329 | break; | ||
7330 | @@ -1606,10 +1611,10 @@ | ||
7331 | if (net_ratelimit()) | ||
7332 | printk(KERN_ERR "Attempting to checksum a non-" | ||
7333 | "TCP/UDP packet, dropping a protocol" | ||
7334 | - " %d packet", skb->nh.iph->protocol); | ||
7335 | + " %d packet", iph->protocol); | ||
7336 | goto out; | ||
7337 | } | ||
7338 | - if ((skb->h.raw + skb->csum_offset + 2) > skb->tail) | ||
7339 | + if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) | ||
7340 | goto out; | ||
7341 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
7342 | skb->proto_csum_blank = 0; | ||
7343 | --- a/scripts/Makefile.xen.awk | ||
7344 | +++ b/scripts/Makefile.xen.awk | ||
7345 | @@ -13,7 +13,7 @@ | ||
7346 | next | ||
7347 | } | ||
7348 | |||
7349 | -/:[[:space:]]*%\.[cS][[:space:]]/ { | ||
7350 | +/:[[:space:]]*\$\(src\)\/%\.[cS][[:space:]]/ { | ||
7351 | line = gensub(/%.([cS])/, "%-xen.\\1", "g", $0) | ||
7352 | line = gensub(/(single-used-m)/, "xen-\\1", "g", line) | ||
7353 | print line |