Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1020-2.6.25-xen-patch-2.6.19.patch
Parent Directory | Revision Log
Revision 612 -
(show annotations)
(download)
Sat May 24 01:03:50 2008 UTC (16 years, 4 months ago) by niro
File size: 318811 byte(s)
Sat May 24 01:03:50 2008 UTC (16 years, 4 months ago) by niro
File size: 318811 byte(s)
-fixed patch again
1 | From: www.kernel.org |
2 | Subject: Linux 2.6.19 |
3 | Patch-mainline: 2.6.19 |
4 | |
5 | Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py |
6 | |
7 | Acked-by: jbeulich@novell.com |
8 | |
9 | --- |
10 | arch/x86/Kconfig | 1 |
11 | arch/x86/ia32/ia32entry-xen.S | 9 |
12 | arch/x86/kernel/Makefile | 5 |
13 | arch/x86/kernel/apic_32-xen.c | 9 |
14 | arch/x86/kernel/apic_64-xen.c | 20 |
15 | arch/x86/kernel/cpu/common-xen.c | 20 |
16 | arch/x86/kernel/e820_64-xen.c | 320 +++--- |
17 | arch/x86/kernel/early_printk-xen.c | 20 |
18 | arch/x86/kernel/entry_32-xen.S | 139 +- |
19 | arch/x86/kernel/entry_64-xen.S | 106 -- |
20 | arch/x86/kernel/genapic_xen_64.c | 9 |
21 | arch/x86/kernel/head64-xen.c | 44 |
22 | arch/x86/kernel/head_32-xen.S | 2 |
23 | arch/x86/kernel/head_64-xen.S | 5 |
24 | arch/x86/kernel/io_apic_32-xen.c | 750 +++++++++------ |
25 | arch/x86/kernel/io_apic_64-xen.c | 1250 +++++++++++--------------- |
26 | arch/x86/kernel/ioport_64-xen.c | 1 |
27 | arch/x86/kernel/irq_32-xen.c | 19 |
28 | arch/x86/kernel/irq_64-xen.c | 35 |
29 | arch/x86/kernel/ldt_32-xen.c | 2 |
30 | arch/x86/kernel/microcode-xen.c | 85 + |
31 | arch/x86/kernel/mpparse_32-xen.c | 70 - |
32 | arch/x86/kernel/mpparse_64-xen.c | 313 +----- |
33 | arch/x86/kernel/pci-dma_32-xen.c | 16 |
34 | arch/x86/kernel/pci-swiotlb_64-xen.c | 3 |
35 | arch/x86/kernel/process_32-xen.c | 29 |
36 | arch/x86/kernel/process_64-xen.c | 90 + |
37 | arch/x86/kernel/setup64-xen.c | 41 |
38 | arch/x86/kernel/setup_32-xen.c | 430 +++----- |
39 | arch/x86/kernel/setup_64-xen.c | 271 +---- |
40 | arch/x86/kernel/smp_32-xen.c | 75 + |
41 | arch/x86/kernel/smp_64-xen.c | 35 |
42 | arch/x86/kernel/time_32-xen.c | 86 - |
43 | arch/x86/kernel/traps_32-xen.c | 238 +++- |
44 | arch/x86/kernel/traps_64-xen.c | 220 +++- |
45 | arch/x86/kernel/vsyscall_64-xen.c | 117 ++ |
46 | arch/x86/mach-xen/setup.c | 6 |
47 | arch/x86/mm/fault_32-xen.c | 29 |
48 | arch/x86/mm/fault_64-xen.c | 34 |
49 | arch/x86/mm/highmem_32-xen.c | 31 |
50 | arch/x86/mm/hypervisor.c | 9 |
51 | arch/x86/mm/init_32-xen.c | 89 + |
52 | arch/x86/mm/init_64-xen.c | 184 +-- |
53 | arch/x86/mm/ioremap_32-xen.c | 10 |
54 | arch/x86/mm/pageattr_64-xen.c | 24 |
55 | arch/x86/mm/pgtable_32-xen.c | 31 |
56 | arch/x86/pci/irq-xen.c | 38 |
57 | drivers/char/tpm/tpm_xen.c | 5 |
58 | drivers/pci/Kconfig | 2 |
59 | drivers/xen/Kconfig | 3 |
60 | drivers/xen/balloon/balloon.c | 2 |
61 | drivers/xen/blkback/blkback.c | 2 |
62 | drivers/xen/blkback/common.h | 2 |
63 | drivers/xen/blkfront/blkfront.c | 4 |
64 | drivers/xen/blktap/blktap.c | 2 |
65 | drivers/xen/blktap/common.h | 2 |
66 | drivers/xen/console/console.c | 10 |
67 | drivers/xen/console/xencons_ring.c | 4 |
68 | drivers/xen/core/evtchn.c | 50 - |
69 | drivers/xen/core/reboot.c | 3 |
70 | drivers/xen/core/smpboot.c | 6 |
71 | drivers/xen/fbfront/xenfb.c | 3 |
72 | drivers/xen/fbfront/xenkbd.c | 2 |
73 | drivers/xen/gntdev/gntdev.c | 11 |
74 | drivers/xen/netback/accel.c | 2 |
75 | drivers/xen/netback/common.h | 2 |
76 | drivers/xen/netback/loopback.c | 2 |
77 | drivers/xen/netback/netback.c | 6 |
78 | drivers/xen/netfront/netfront.c | 8 |
79 | drivers/xen/pciback/pciback.h | 2 |
80 | drivers/xen/pciback/pciback_ops.c | 2 |
81 | drivers/xen/pcifront/pci_op.c | 8 |
82 | drivers/xen/privcmd/compat_privcmd.c | 1 |
83 | drivers/xen/privcmd/privcmd.c | 2 |
84 | drivers/xen/sfc_netback/accel_xenbus.c | 6 |
85 | drivers/xen/sfc_netfront/accel.h | 6 |
86 | drivers/xen/sfc_netfront/accel_msg.c | 6 |
87 | drivers/xen/sfc_netfront/accel_tso.c | 2 |
88 | drivers/xen/sfc_netfront/accel_vi.c | 4 |
89 | drivers/xen/tpmback/common.h | 2 |
90 | drivers/xen/tpmback/tpmback.c | 4 |
91 | drivers/xen/xenbus/xenbus_comms.c | 2 |
92 | drivers/xen/xenoprof/xenoprofile.c | 2 |
93 | include/asm-generic/pgtable.h | 2 |
94 | include/asm-x86/mach-xen/asm/desc_32.h | 127 +- |
95 | include/asm-x86/mach-xen/asm/dma-mapping_64.h | 7 |
96 | include/asm-x86/mach-xen/asm/e820_64.h | 15 |
97 | include/asm-x86/mach-xen/asm/fixmap_32.h | 5 |
98 | include/asm-x86/mach-xen/asm/fixmap_64.h | 2 |
99 | include/asm-x86/mach-xen/asm/hw_irq_32.h | 8 |
100 | include/asm-x86/mach-xen/asm/hw_irq_64.h | 10 |
101 | include/asm-x86/mach-xen/asm/io_32.h | 27 |
102 | include/asm-x86/mach-xen/asm/io_64.h | 27 |
103 | include/asm-x86/mach-xen/asm/pgtable-2level.h | 12 |
104 | include/asm-x86/mach-xen/asm/pgtable-3level.h | 14 |
105 | include/asm-x86/mach-xen/asm/pgtable_32.h | 143 +- |
106 | include/asm-x86/mach-xen/asm/pgtable_64.h | 86 + |
107 | include/asm-x86/mach-xen/asm/processor_32.h | 62 - |
108 | include/asm-x86/mach-xen/asm/processor_64.h | 2 |
109 | include/asm-x86/mach-xen/asm/segment_32.h | 19 |
110 | include/asm-x86/mach-xen/asm/smp_32.h | 25 |
111 | include/asm-x86/mach-xen/asm/smp_64.h | 27 |
112 | include/asm-x86/mach-xen/asm/system_32.h | 36 |
113 | include/asm-x86/mach-xen/asm/system_64.h | 1 |
114 | include/asm-x86/mach-xen/asm/tlbflush_32.h | 2 |
115 | include/asm-x86/mach-xen/asm/tlbflush_64.h | 3 |
116 | include/asm-x86/thread_info_64.h | 4 |
117 | include/linux/skbuff.h | 7 |
118 | include/xen/evtchn.h | 10 |
119 | include/xen/xencons.h | 2 |
120 | mm/mprotect.c | 2 |
121 | net/core/dev.c | 8 |
122 | 112 files changed, 3102 insertions(+), 3145 deletions(-) |
123 | |
124 | --- a/arch/x86/Kconfig |
125 | +++ b/arch/x86/Kconfig |
126 | @@ -390,6 +390,7 @@ |
127 | |
128 | menuconfig PARAVIRT_GUEST |
129 | bool "Paravirtualized guest support" |
130 | + depends on !X86_XEN && !X86_64_XEN |
131 | help |
132 | Say Y here to get to see options related to running Linux under |
133 | various hypervisors. This option alone does not add any kernel code. |
134 | --- a/arch/x86/ia32/ia32entry-xen.S |
135 | +++ b/arch/x86/ia32/ia32entry-xen.S |
136 | @@ -83,6 +83,7 @@ |
137 | */ |
138 | ENTRY(ia32_sysenter_target) |
139 | CFI_STARTPROC32 simple |
140 | + CFI_SIGNAL_FRAME |
141 | CFI_DEF_CFA rsp,SS+8-RIP+16 |
142 | /*CFI_REL_OFFSET ss,SS-RIP+16*/ |
143 | CFI_REL_OFFSET rsp,RSP-RIP+16 |
144 | @@ -164,6 +165,7 @@ |
145 | */ |
146 | ENTRY(ia32_cstar_target) |
147 | CFI_STARTPROC32 simple |
148 | + CFI_SIGNAL_FRAME |
149 | CFI_DEF_CFA rsp,SS+8-RIP+16 |
150 | /*CFI_REL_OFFSET ss,SS-RIP+16*/ |
151 | CFI_REL_OFFSET rsp,RSP-RIP+16 |
152 | @@ -243,6 +245,7 @@ |
153 | |
154 | ENTRY(ia32_syscall) |
155 | CFI_STARTPROC simple |
156 | + CFI_SIGNAL_FRAME |
157 | CFI_DEF_CFA rsp,SS+8-RIP+16 |
158 | /*CFI_REL_OFFSET ss,SS-RIP+16*/ |
159 | CFI_REL_OFFSET rsp,RSP-RIP+16 |
160 | @@ -320,6 +323,7 @@ |
161 | popq %r11 |
162 | CFI_ENDPROC |
163 | CFI_STARTPROC32 simple |
164 | + CFI_SIGNAL_FRAME |
165 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET |
166 | CFI_REL_OFFSET rax,RAX-ARGOFFSET |
167 | CFI_REL_OFFSET rcx,RCX-ARGOFFSET |
168 | @@ -653,8 +657,8 @@ |
169 | .quad sys_readlinkat /* 305 */ |
170 | .quad sys_fchmodat |
171 | .quad sys_faccessat |
172 | - .quad quiet_ni_syscall /* pselect6 for now */ |
173 | - .quad quiet_ni_syscall /* ppoll for now */ |
174 | + .quad compat_sys_pselect6 |
175 | + .quad compat_sys_ppoll |
176 | .quad sys_unshare /* 310 */ |
177 | .quad compat_sys_set_robust_list |
178 | .quad compat_sys_get_robust_list |
179 | @@ -663,4 +667,5 @@ |
180 | .quad sys_tee |
181 | .quad compat_sys_vmsplice |
182 | .quad compat_sys_move_pages |
183 | + .quad sys_getcpu |
184 | ia32_syscall_end: |
185 | --- a/arch/x86/kernel/Makefile |
186 | +++ b/arch/x86/kernel/Makefile |
187 | @@ -91,7 +91,7 @@ |
188 | ### |
189 | # 64 bit specific files |
190 | ifeq ($(CONFIG_X86_64),y) |
191 | - obj-y += genapic_64.o genapic_flat_64.o |
192 | + obj-$(CONFIG_X86_LOCAL_APIC) += genapic_64.o genapic_flat_64.o |
193 | obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o |
194 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o |
195 | obj-$(CONFIG_AUDIT) += audit_64.o |
196 | @@ -104,5 +104,6 @@ |
197 | pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o |
198 | endif |
199 | |
200 | -disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o |
201 | +disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \ |
202 | + smpboot_$(BITS).o tsc_$(BITS).o |
203 | %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := |
204 | --- a/arch/x86/kernel/apic_32-xen.c |
205 | +++ b/arch/x86/kernel/apic_32-xen.c |
206 | @@ -54,7 +54,6 @@ |
207 | /* |
208 | * Knob to control our willingness to enable the local APIC. |
209 | */ |
210 | -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ |
211 | |
212 | /* |
213 | * Debug level |
214 | @@ -102,7 +101,7 @@ |
215 | |
216 | #ifndef CONFIG_XEN |
217 | #ifndef CONFIG_SMP |
218 | -static void up_apic_timer_interrupt_call(struct pt_regs *regs) |
219 | +static void up_apic_timer_interrupt_call(void) |
220 | { |
221 | int cpu = smp_processor_id(); |
222 | |
223 | @@ -111,11 +110,11 @@ |
224 | */ |
225 | per_cpu(irq_stat, cpu).apic_timer_irqs++; |
226 | |
227 | - smp_local_timer_interrupt(regs); |
228 | + smp_local_timer_interrupt(); |
229 | } |
230 | #endif |
231 | |
232 | -void smp_send_timer_broadcast_ipi(struct pt_regs *regs) |
233 | +void smp_send_timer_broadcast_ipi(void) |
234 | { |
235 | cpumask_t mask; |
236 | |
237 | @@ -128,7 +127,7 @@ |
238 | * We can directly call the apic timer interrupt handler |
239 | * in UP case. Minus all irq related functions |
240 | */ |
241 | - up_apic_timer_interrupt_call(regs); |
242 | + up_apic_timer_interrupt_call(); |
243 | #endif |
244 | } |
245 | } |
246 | --- a/arch/x86/kernel/apic_64-xen.c |
247 | +++ b/arch/x86/kernel/apic_64-xen.c |
248 | @@ -43,7 +43,7 @@ |
249 | */ |
250 | void ack_bad_irq(unsigned int irq) |
251 | { |
252 | - printk("unexpected IRQ trap at vector %02x\n", irq); |
253 | + printk("unexpected IRQ trap at irq %02x\n", irq); |
254 | /* |
255 | * Currently unexpected vectors happen only on SMP and APIC. |
256 | * We _must_ ack these because every local APIC has only N |
257 | @@ -62,19 +62,19 @@ |
258 | return -EINVAL; |
259 | } |
260 | |
261 | -void smp_local_timer_interrupt(struct pt_regs *regs) |
262 | +void smp_local_timer_interrupt(void) |
263 | { |
264 | - profile_tick(CPU_PROFILING, regs); |
265 | + profile_tick(CPU_PROFILING); |
266 | #ifndef CONFIG_XEN |
267 | #ifdef CONFIG_SMP |
268 | - update_process_times(user_mode(regs)); |
269 | + update_process_times(user_mode(get_irq_regs())); |
270 | #endif |
271 | #endif |
272 | /* |
273 | * We take the 'long' return path, and there every subsystem |
274 | * grabs the appropriate locks (kernel lock/ irq lock). |
275 | * |
276 | - * we might want to decouple profiling from the 'long path', |
277 | + * We might want to decouple profiling from the 'long path', |
278 | * and do the profiling totally in assembly. |
279 | * |
280 | * Currently this isn't too much of an issue (performance wise), |
281 | @@ -92,6 +92,8 @@ |
282 | */ |
283 | void smp_apic_timer_interrupt(struct pt_regs *regs) |
284 | { |
285 | + struct pt_regs *old_regs = set_irq_regs(regs); |
286 | + |
287 | /* |
288 | * the NMI deadlock-detector uses this. |
289 | */ |
290 | @@ -109,8 +111,9 @@ |
291 | */ |
292 | exit_idle(); |
293 | irq_enter(); |
294 | - smp_local_timer_interrupt(regs); |
295 | + smp_local_timer_interrupt(); |
296 | irq_exit(); |
297 | + set_irq_regs(old_regs); |
298 | } |
299 | |
300 | /* |
301 | @@ -188,9 +191,8 @@ |
302 | int __init APIC_init_uniprocessor (void) |
303 | { |
304 | #ifdef CONFIG_X86_IO_APIC |
305 | - if (smp_found_config) |
306 | - if (!skip_ioapic_setup && nr_ioapics) |
307 | - setup_IO_APIC(); |
308 | + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) |
309 | + setup_IO_APIC(); |
310 | #endif |
311 | |
312 | return 1; |
313 | --- a/arch/x86/kernel/cpu/common-xen.c |
314 | +++ b/arch/x86/kernel/cpu/common-xen.c |
315 | @@ -43,7 +43,7 @@ |
316 | |
317 | extern int disable_pse; |
318 | |
319 | -static void default_init(struct cpuinfo_x86 * c) |
320 | +static void __cpuinit default_init(struct cpuinfo_x86 * c) |
321 | { |
322 | /* Not much we can do here... */ |
323 | /* Check if at least it has cpuid */ |
324 | @@ -56,7 +56,7 @@ |
325 | } |
326 | } |
327 | |
328 | -static struct cpu_dev default_cpu = { |
329 | +static struct cpu_dev __cpuinitdata default_cpu = { |
330 | .c_init = default_init, |
331 | .c_vendor = "Unknown", |
332 | }; |
333 | @@ -191,7 +191,16 @@ |
334 | |
335 | static int __init x86_fxsr_setup(char * s) |
336 | { |
337 | + /* Tell all the other CPU's to not use it... */ |
338 | disable_x86_fxsr = 1; |
339 | + |
340 | + /* |
341 | + * ... and clear the bits early in the boot_cpu_data |
342 | + * so that the bootup process doesn't try to do this |
343 | + * either. |
344 | + */ |
345 | + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); |
346 | + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); |
347 | return 1; |
348 | } |
349 | __setup("nofxsr", x86_fxsr_setup); |
350 | @@ -272,7 +281,7 @@ |
351 | } |
352 | } |
353 | |
354 | -void __cpuinit generic_identify(struct cpuinfo_x86 * c) |
355 | +static void __cpuinit generic_identify(struct cpuinfo_x86 * c) |
356 | { |
357 | u32 tfms, xlvl; |
358 | int ebx; |
359 | @@ -698,8 +707,7 @@ |
360 | */ |
361 | atomic_inc(&init_mm.mm_count); |
362 | current->active_mm = &init_mm; |
363 | - if (current->mm) |
364 | - BUG(); |
365 | + BUG_ON(current->mm); |
366 | enter_lazy_tlb(&init_mm, current); |
367 | |
368 | load_esp0(t, thread); |
369 | @@ -712,7 +720,7 @@ |
370 | #endif |
371 | |
372 | /* Clear %fs and %gs. */ |
373 | - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); |
374 | + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); |
375 | |
376 | /* Clear all 6 debug registers: */ |
377 | set_debugreg(0, 0); |
378 | --- a/arch/x86/kernel/e820_64-xen.c |
379 | +++ b/arch/x86/kernel/e820_64-xen.c |
380 | @@ -16,6 +16,7 @@ |
381 | #include <linux/string.h> |
382 | #include <linux/kexec.h> |
383 | #include <linux/module.h> |
384 | +#include <linux/mm.h> |
385 | |
386 | #include <asm/pgtable.h> |
387 | #include <asm/page.h> |
388 | @@ -25,6 +26,11 @@ |
389 | #include <asm/sections.h> |
390 | #include <xen/interface/memory.h> |
391 | |
392 | +struct e820map e820 __initdata; |
393 | +#ifdef CONFIG_XEN |
394 | +struct e820map machine_e820 __initdata; |
395 | +#endif |
396 | + |
397 | /* |
398 | * PFN of last memory page. |
399 | */ |
400 | @@ -41,7 +47,7 @@ |
401 | /* |
402 | * Last pfn which the user wants to use. |
403 | */ |
404 | -unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT; |
405 | +static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; |
406 | |
407 | extern struct resource code_resource, data_resource; |
408 | |
409 | @@ -53,13 +59,13 @@ |
410 | #ifndef CONFIG_XEN |
411 | /* various gunk below that needed for SMP startup */ |
412 | if (addr < 0x8000) { |
413 | - *addrp = 0x8000; |
414 | + *addrp = PAGE_ALIGN(0x8000); |
415 | return 1; |
416 | } |
417 | |
418 | /* direct mapping tables of the kernel */ |
419 | if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { |
420 | - *addrp = table_end << PAGE_SHIFT; |
421 | + *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT); |
422 | return 1; |
423 | } |
424 | |
425 | @@ -67,23 +73,18 @@ |
426 | #ifdef CONFIG_BLK_DEV_INITRD |
427 | if (LOADER_TYPE && INITRD_START && last >= INITRD_START && |
428 | addr < INITRD_START+INITRD_SIZE) { |
429 | - *addrp = INITRD_START + INITRD_SIZE; |
430 | + *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE); |
431 | return 1; |
432 | } |
433 | #endif |
434 | - /* kernel code + 640k memory hole (later should not be needed, but |
435 | - be paranoid for now) */ |
436 | - if (last >= 640*1024 && addr < 1024*1024) { |
437 | - *addrp = 1024*1024; |
438 | - return 1; |
439 | - } |
440 | - if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) { |
441 | - *addrp = __pa_symbol(&_end); |
442 | + /* kernel code */ |
443 | + if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) { |
444 | + *addrp = PAGE_ALIGN(__pa_symbol(&_end)); |
445 | return 1; |
446 | } |
447 | |
448 | if (last >= ebda_addr && addr < ebda_addr + ebda_size) { |
449 | - *addrp = ebda_addr + ebda_size; |
450 | + *addrp = PAGE_ALIGN(ebda_addr + ebda_size); |
451 | return 1; |
452 | } |
453 | |
454 | @@ -141,8 +142,6 @@ |
455 | for (i = 0; i < e820.nr_map; i++) { |
456 | struct e820entry *ei = &e820.map[i]; |
457 | #else |
458 | - extern struct e820map machine_e820; |
459 | - |
460 | if (!is_initial_xendomain()) |
461 | return 0; |
462 | for (i = 0; i < machine_e820.nr_map; i++) { |
463 | @@ -184,7 +183,7 @@ |
464 | continue; |
465 | while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) |
466 | ; |
467 | - last = addr + size; |
468 | + last = PAGE_ALIGN(addr) + size; |
469 | if (last > ei->addr + ei->size) |
470 | continue; |
471 | if (last > end) |
472 | @@ -194,59 +193,14 @@ |
473 | return -1UL; |
474 | } |
475 | |
476 | -/* |
477 | - * Free bootmem based on the e820 table for a node. |
478 | - */ |
479 | -void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end) |
480 | -{ |
481 | - int i; |
482 | - for (i = 0; i < e820.nr_map; i++) { |
483 | - struct e820entry *ei = &e820.map[i]; |
484 | - unsigned long last, addr; |
485 | - |
486 | - if (ei->type != E820_RAM || |
487 | - ei->addr+ei->size <= start || |
488 | - ei->addr >= end) |
489 | - continue; |
490 | - |
491 | - addr = round_up(ei->addr, PAGE_SIZE); |
492 | - if (addr < start) |
493 | - addr = start; |
494 | - |
495 | - last = round_down(ei->addr + ei->size, PAGE_SIZE); |
496 | - if (last >= end) |
497 | - last = end; |
498 | - |
499 | - if (last > addr && last-addr >= PAGE_SIZE) |
500 | - free_bootmem_node(pgdat, addr, last-addr); |
501 | - } |
502 | -} |
503 | - |
504 | /* |
505 | * Find the highest page frame number we have available |
506 | */ |
507 | unsigned long __init e820_end_of_ram(void) |
508 | { |
509 | - int i; |
510 | unsigned long end_pfn = 0; |
511 | + end_pfn = find_max_pfn_with_active_regions(); |
512 | |
513 | - for (i = 0; i < e820.nr_map; i++) { |
514 | - struct e820entry *ei = &e820.map[i]; |
515 | - unsigned long start, end; |
516 | - |
517 | - start = round_up(ei->addr, PAGE_SIZE); |
518 | - end = round_down(ei->addr + ei->size, PAGE_SIZE); |
519 | - if (start >= end) |
520 | - continue; |
521 | - if (ei->type == E820_RAM) { |
522 | - if (end > end_pfn<<PAGE_SHIFT) |
523 | - end_pfn = end>>PAGE_SHIFT; |
524 | - } else { |
525 | - if (end > end_pfn_map<<PAGE_SHIFT) |
526 | - end_pfn_map = end>>PAGE_SHIFT; |
527 | - } |
528 | - } |
529 | - |
530 | if (end_pfn > end_pfn_map) |
531 | end_pfn_map = end_pfn; |
532 | if (end_pfn_map > MAXMEM>>PAGE_SHIFT) |
533 | @@ -256,43 +210,10 @@ |
534 | if (end_pfn > end_pfn_map) |
535 | end_pfn = end_pfn_map; |
536 | |
537 | + printk("end_pfn_map = %lu\n", end_pfn_map); |
538 | return end_pfn; |
539 | } |
540 | |
541 | -/* |
542 | - * Compute how much memory is missing in a range. |
543 | - * Unlike the other functions in this file the arguments are in page numbers. |
544 | - */ |
545 | -unsigned long __init |
546 | -e820_hole_size(unsigned long start_pfn, unsigned long end_pfn) |
547 | -{ |
548 | - unsigned long ram = 0; |
549 | - unsigned long start = start_pfn << PAGE_SHIFT; |
550 | - unsigned long end = end_pfn << PAGE_SHIFT; |
551 | - int i; |
552 | - for (i = 0; i < e820.nr_map; i++) { |
553 | - struct e820entry *ei = &e820.map[i]; |
554 | - unsigned long last, addr; |
555 | - |
556 | - if (ei->type != E820_RAM || |
557 | - ei->addr+ei->size <= start || |
558 | - ei->addr >= end) |
559 | - continue; |
560 | - |
561 | - addr = round_up(ei->addr, PAGE_SIZE); |
562 | - if (addr < start) |
563 | - addr = start; |
564 | - |
565 | - last = round_down(ei->addr + ei->size, PAGE_SIZE); |
566 | - if (last >= end) |
567 | - last = end; |
568 | - |
569 | - if (last > addr) |
570 | - ram += last - addr; |
571 | - } |
572 | - return ((end - start) - ram) >> PAGE_SHIFT; |
573 | -} |
574 | - |
575 | /* |
576 | * Mark e820 reserved areas as busy for the resource manager. |
577 | */ |
578 | @@ -333,6 +254,98 @@ |
579 | } |
580 | } |
581 | |
582 | +#ifndef CONFIG_XEN |
583 | +/* Mark pages corresponding to given address range as nosave */ |
584 | +static void __init |
585 | +e820_mark_nosave_range(unsigned long start, unsigned long end) |
586 | +{ |
587 | + unsigned long pfn, max_pfn; |
588 | + |
589 | + if (start >= end) |
590 | + return; |
591 | + |
592 | + printk("Nosave address range: %016lx - %016lx\n", start, end); |
593 | + max_pfn = end >> PAGE_SHIFT; |
594 | + for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) |
595 | + if (pfn_valid(pfn)) |
596 | + SetPageNosave(pfn_to_page(pfn)); |
597 | +} |
598 | + |
599 | +/* |
600 | + * Find the ranges of physical addresses that do not correspond to |
601 | + * e820 RAM areas and mark the corresponding pages as nosave for software |
602 | + * suspend and suspend to RAM. |
603 | + * |
604 | + * This function requires the e820 map to be sorted and without any |
605 | + * overlapping entries and assumes the first e820 area to be RAM. |
606 | + */ |
607 | +void __init e820_mark_nosave_regions(void) |
608 | +{ |
609 | + int i; |
610 | + unsigned long paddr; |
611 | + |
612 | + paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); |
613 | + for (i = 1; i < e820.nr_map; i++) { |
614 | + struct e820entry *ei = &e820.map[i]; |
615 | + |
616 | + if (paddr < ei->addr) |
617 | + e820_mark_nosave_range(paddr, |
618 | + round_up(ei->addr, PAGE_SIZE)); |
619 | + |
620 | + paddr = round_down(ei->addr + ei->size, PAGE_SIZE); |
621 | + if (ei->type != E820_RAM) |
622 | + e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), |
623 | + paddr); |
624 | + |
625 | + if (paddr >= (end_pfn << PAGE_SHIFT)) |
626 | + break; |
627 | + } |
628 | +} |
629 | +#endif |
630 | + |
631 | +/* Walk the e820 map and register active regions within a node */ |
632 | +void __init |
633 | +e820_register_active_regions(int nid, unsigned long start_pfn, |
634 | + unsigned long end_pfn) |
635 | +{ |
636 | + int i; |
637 | + unsigned long ei_startpfn, ei_endpfn; |
638 | + for (i = 0; i < e820.nr_map; i++) { |
639 | + struct e820entry *ei = &e820.map[i]; |
640 | + ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; |
641 | + ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) |
642 | + >> PAGE_SHIFT; |
643 | + |
644 | + /* Skip map entries smaller than a page */ |
645 | + if (ei_startpfn >= ei_endpfn) |
646 | + continue; |
647 | + |
648 | + /* Check if end_pfn_map should be updated */ |
649 | + if (ei->type != E820_RAM && ei_endpfn > end_pfn_map) |
650 | + end_pfn_map = ei_endpfn; |
651 | + |
652 | + /* Skip if map is outside the node */ |
653 | + if (ei->type != E820_RAM || |
654 | + ei_endpfn <= start_pfn || |
655 | + ei_startpfn >= end_pfn) |
656 | + continue; |
657 | + |
658 | + /* Check for overlaps */ |
659 | + if (ei_startpfn < start_pfn) |
660 | + ei_startpfn = start_pfn; |
661 | + if (ei_endpfn > end_pfn) |
662 | + ei_endpfn = end_pfn; |
663 | + |
664 | + /* Obey end_user_pfn to save on memmap */ |
665 | + if (ei_startpfn >= end_user_pfn) |
666 | + continue; |
667 | + if (ei_endpfn > end_user_pfn) |
668 | + ei_endpfn = end_user_pfn; |
669 | + |
670 | + add_active_range(nid, ei_startpfn, ei_endpfn); |
671 | + } |
672 | +} |
673 | + |
674 | /* |
675 | * Add a memory region to the kernel e820 map. |
676 | */ |
677 | @@ -553,13 +566,6 @@ |
678 | * If we're lucky and live on a modern system, the setup code |
679 | * will have given us a memory map that we can use to properly |
680 | * set up memory. If we aren't, we'll fake a memory map. |
681 | - * |
682 | - * We check to see that the memory map contains at least 2 elements |
683 | - * before we'll use it, because the detection code in setup.S may |
684 | - * not be perfect and most every PC known to man has two memory |
685 | - * regions: one from 0 to 640k, and one from 1mb up. (The IBM |
686 | - * thinkpad 560x, for example, does not cooperate with the memory |
687 | - * detection code.) |
688 | */ |
689 | static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) |
690 | { |
691 | @@ -581,37 +587,20 @@ |
692 | if (start > end) |
693 | return -1; |
694 | |
695 | -#ifndef CONFIG_XEN |
696 | - /* |
697 | - * Some BIOSes claim RAM in the 640k - 1M region. |
698 | - * Not right. Fix it up. |
699 | - * |
700 | - * This should be removed on Hammer which is supposed to not |
701 | - * have non e820 covered ISA mappings there, but I had some strange |
702 | - * problems so it stays for now. -AK |
703 | - */ |
704 | - if (type == E820_RAM) { |
705 | - if (start < 0x100000ULL && end > 0xA0000ULL) { |
706 | - if (start < 0xA0000ULL) |
707 | - add_memory_region(start, 0xA0000ULL-start, type); |
708 | - if (end <= 0x100000ULL) |
709 | - continue; |
710 | - start = 0x100000ULL; |
711 | - size = end - start; |
712 | - } |
713 | - } |
714 | -#endif |
715 | - |
716 | add_memory_region(start, size, type); |
717 | } while (biosmap++,--nr_map); |
718 | return 0; |
719 | } |
720 | |
721 | +void early_panic(char *msg) |
722 | +{ |
723 | + early_printk(msg); |
724 | + panic(msg); |
725 | +} |
726 | + |
727 | #ifndef CONFIG_XEN |
728 | void __init setup_memory_region(void) |
729 | { |
730 | - char *who = "BIOS-e820"; |
731 | - |
732 | /* |
733 | * Try to copy the BIOS-supplied E820-map. |
734 | * |
735 | @@ -619,24 +608,10 @@ |
736 | * the next section from 1mb->appropriate_mem_k |
737 | */ |
738 | sanitize_e820_map(E820_MAP, &E820_MAP_NR); |
739 | - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { |
740 | - unsigned long mem_size; |
741 | - |
742 | - /* compare results from other methods and take the greater */ |
743 | - if (ALT_MEM_K < EXT_MEM_K) { |
744 | - mem_size = EXT_MEM_K; |
745 | - who = "BIOS-88"; |
746 | - } else { |
747 | - mem_size = ALT_MEM_K; |
748 | - who = "BIOS-e801"; |
749 | - } |
750 | - |
751 | - e820.nr_map = 0; |
752 | - add_memory_region(0, LOWMEMSIZE(), E820_RAM); |
753 | - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); |
754 | - } |
755 | + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) |
756 | + early_panic("Cannot find a valid memory map"); |
757 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
758 | - e820_print_map(who); |
759 | + e820_print_map("BIOS-e820"); |
760 | } |
761 | |
762 | #else /* CONFIG_XEN */ |
763 | @@ -668,20 +643,23 @@ |
764 | |
765 | sanitize_e820_map(map, (char *)&memmap.nr_entries); |
766 | |
767 | - BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0); |
768 | + if (copy_e820_map(map, (char)memmap.nr_entries) < 0) |
769 | + early_panic("Cannot find a valid memory map"); |
770 | |
771 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
772 | e820_print_map("Xen"); |
773 | } |
774 | #endif |
775 | |
776 | -void __init parse_memopt(char *p, char **from) |
777 | -{ |
778 | +static int __init parse_memopt(char *p) |
779 | +{ |
780 | int i; |
781 | unsigned long current_end; |
782 | unsigned long end; |
783 | |
784 | - end_user_pfn = memparse(p, from); |
785 | + if (!p) |
786 | + return -EINVAL; |
787 | + end_user_pfn = memparse(p, &p); |
788 | end_user_pfn >>= PAGE_SHIFT; |
789 | |
790 | end = end_user_pfn<<PAGE_SHIFT; |
791 | @@ -698,27 +676,61 @@ |
792 | else |
793 | add_memory_region(current_end, end - current_end, E820_RAM); |
794 | } |
795 | + |
796 | + return 0; |
797 | } |
798 | +early_param("mem", parse_memopt); |
799 | + |
800 | +static int userdef __initdata; |
801 | |
802 | -void __init parse_memmapopt(char *p, char **from) |
803 | +static int __init parse_memmap_opt(char *p) |
804 | { |
805 | + char *oldp; |
806 | unsigned long long start_at, mem_size; |
807 | |
808 | - mem_size = memparse(p, from); |
809 | - p = *from; |
810 | + if (!strcmp(p, "exactmap")) { |
811 | +#ifdef CONFIG_CRASH_DUMP |
812 | + /* If we are doing a crash dump, we |
813 | + * still need to know the real mem |
814 | + * size before original memory map is |
815 | + * reset. |
816 | + */ |
817 | + e820_register_active_regions(0, 0, -1UL); |
818 | + saved_max_pfn = e820_end_of_ram(); |
819 | + remove_all_active_ranges(); |
820 | +#endif |
821 | + end_pfn_map = 0; |
822 | + e820.nr_map = 0; |
823 | + userdef = 1; |
824 | + return 0; |
825 | + } |
826 | + |
827 | + oldp = p; |
828 | + mem_size = memparse(p, &p); |
829 | + if (p == oldp) |
830 | + return -EINVAL; |
831 | if (*p == '@') { |
832 | - start_at = memparse(p+1, from); |
833 | + start_at = memparse(p+1, &p); |
834 | add_memory_region(start_at, mem_size, E820_RAM); |
835 | } else if (*p == '#') { |
836 | - start_at = memparse(p+1, from); |
837 | + start_at = memparse(p+1, &p); |
838 | add_memory_region(start_at, mem_size, E820_ACPI); |
839 | } else if (*p == '$') { |
840 | - start_at = memparse(p+1, from); |
841 | + start_at = memparse(p+1, &p); |
842 | add_memory_region(start_at, mem_size, E820_RESERVED); |
843 | } else { |
844 | end_user_pfn = (mem_size >> PAGE_SHIFT); |
845 | } |
846 | - p = *from; |
847 | + return *p == '\0' ? 0 : -EINVAL; |
848 | +} |
849 | +early_param("memmap", parse_memmap_opt); |
850 | + |
851 | +void finish_e820_parsing(void) |
852 | +{ |
853 | + if (userdef) { |
854 | + printk(KERN_INFO "user-defined physical RAM map:\n"); |
855 | + e820_print_map("user"); |
856 | + } |
857 | } |
858 | |
859 | unsigned long pci_mem_start = 0xaeedbabe; |
860 | --- a/arch/x86/kernel/early_printk-xen.c |
861 | +++ b/arch/x86/kernel/early_printk-xen.c |
862 | @@ -244,20 +244,16 @@ |
863 | |
864 | static int __initdata keep_early; |
865 | |
866 | -int __init setup_early_printk(char *opt) |
867 | +static int __init setup_early_printk(char *buf) |
868 | { |
869 | - char *space; |
870 | - char buf[256]; |
871 | + if (!buf) |
872 | + return 0; |
873 | |
874 | if (early_console_initialized) |
875 | - return 1; |
876 | - |
877 | - strlcpy(buf,opt,sizeof(buf)); |
878 | - space = strchr(buf, ' '); |
879 | - if (space) |
880 | - *space = 0; |
881 | + return 0; |
882 | + early_console_initialized = 1; |
883 | |
884 | - if (strstr(buf,"keep")) |
885 | + if (strstr(buf, "keep")) |
886 | keep_early = 1; |
887 | |
888 | if (!strncmp(buf, "serial", 6)) { |
889 | @@ -281,11 +277,12 @@ |
890 | early_console = &simnow_console; |
891 | keep_early = 1; |
892 | } |
893 | - early_console_initialized = 1; |
894 | register_console(early_console); |
895 | return 0; |
896 | } |
897 | |
898 | +early_param("earlyprintk", setup_early_printk); |
899 | + |
900 | void __init disable_early_printk(void) |
901 | { |
902 | if (!early_console_initialized || !early_console) |
903 | @@ -299,4 +296,3 @@ |
904 | } |
905 | } |
906 | |
907 | -__setup("earlyprintk=", setup_early_printk); |
908 | --- a/arch/x86/kernel/entry_32-xen.S |
909 | +++ b/arch/x86/kernel/entry_32-xen.S |
910 | @@ -80,8 +80,12 @@ |
911 | NMI_MASK = 0x80000000 |
912 | |
913 | #ifndef CONFIG_XEN |
914 | -#define DISABLE_INTERRUPTS cli |
915 | -#define ENABLE_INTERRUPTS sti |
916 | +/* These are replaces for paravirtualization */ |
917 | +#define DISABLE_INTERRUPTS cli |
918 | +#define ENABLE_INTERRUPTS sti |
919 | +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit |
920 | +#define INTERRUPT_RETURN iret |
921 | +#define GET_CR0_INTO_EAX movl %cr0, %eax |
922 | #else |
923 | /* Offsets into shared_info_t. */ |
924 | #define evtchn_upcall_pending /* 0 */ |
925 | @@ -99,15 +103,29 @@ |
926 | |
927 | #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) |
928 | #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) |
929 | +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) |
930 | #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ |
931 | __DISABLE_INTERRUPTS |
932 | #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ |
933 | __ENABLE_INTERRUPTS |
934 | -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) |
935 | +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ |
936 | +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ |
937 | + __TEST_PENDING ; \ |
938 | + jnz 14f # process more events if necessary... ; \ |
939 | + movl ESI(%esp), %esi ; \ |
940 | + sysexit ; \ |
941 | +14: __DISABLE_INTERRUPTS ; \ |
942 | + TRACE_IRQS_OFF ; \ |
943 | +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ |
944 | + push %esp ; \ |
945 | + call evtchn_do_upcall ; \ |
946 | + add $4,%esp ; \ |
947 | + jmp ret_from_intr |
948 | +#define INTERRUPT_RETURN iret |
949 | #endif |
950 | |
951 | #ifdef CONFIG_PREEMPT |
952 | -#define preempt_stop cli; TRACE_IRQS_OFF |
953 | +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF |
954 | #else |
955 | #define preempt_stop |
956 | #define resume_kernel restore_nocheck |
957 | @@ -206,18 +224,21 @@ |
958 | |
959 | #define RING0_INT_FRAME \ |
960 | CFI_STARTPROC simple;\ |
961 | + CFI_SIGNAL_FRAME;\ |
962 | CFI_DEF_CFA esp, 3*4;\ |
963 | /*CFI_OFFSET cs, -2*4;*/\ |
964 | CFI_OFFSET eip, -3*4 |
965 | |
966 | #define RING0_EC_FRAME \ |
967 | CFI_STARTPROC simple;\ |
968 | + CFI_SIGNAL_FRAME;\ |
969 | CFI_DEF_CFA esp, 4*4;\ |
970 | /*CFI_OFFSET cs, -2*4;*/\ |
971 | CFI_OFFSET eip, -3*4 |
972 | |
973 | #define RING0_PTREGS_FRAME \ |
974 | CFI_STARTPROC simple;\ |
975 | + CFI_SIGNAL_FRAME;\ |
976 | CFI_DEF_CFA esp, OLDESP-EBX;\ |
977 | /*CFI_OFFSET cs, CS-OLDESP;*/\ |
978 | CFI_OFFSET eip, EIP-OLDESP;\ |
979 | @@ -263,8 +284,9 @@ |
980 | check_userspace: |
981 | movl EFLAGS(%esp), %eax # mix EFLAGS and CS |
982 | movb CS(%esp), %al |
983 | - testl $(VM_MASK | 2), %eax |
984 | - jz resume_kernel |
985 | + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax |
986 | + cmpl $USER_RPL, %eax |
987 | + jb resume_kernel # not returning to v8086 or userspace |
988 | ENTRY(resume_userspace) |
989 | DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
990 | # setting need_resched or sigpending |
991 | @@ -277,7 +299,7 @@ |
992 | |
993 | #ifdef CONFIG_PREEMPT |
994 | ENTRY(resume_kernel) |
995 | - cli |
996 | + DISABLE_INTERRUPTS |
997 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? |
998 | jnz restore_nocheck |
999 | need_resched: |
1000 | @@ -297,6 +319,7 @@ |
1001 | # sysenter call handler stub |
1002 | ENTRY(sysenter_entry) |
1003 | CFI_STARTPROC simple |
1004 | + CFI_SIGNAL_FRAME |
1005 | CFI_DEF_CFA esp, 0 |
1006 | CFI_REGISTER esp, ebp |
1007 | movl SYSENTER_stack_esp0(%esp),%esp |
1008 | @@ -305,7 +328,7 @@ |
1009 | * No need to follow this irqs on/off section: the syscall |
1010 | * disabled irqs and here we enable it straight after entry: |
1011 | */ |
1012 | - sti |
1013 | + ENABLE_INTERRUPTS |
1014 | pushl $(__USER_DS) |
1015 | CFI_ADJUST_CFA_OFFSET 4 |
1016 | /*CFI_REL_OFFSET ss, 0*/ |
1017 | @@ -359,26 +382,8 @@ |
1018 | movl EIP(%esp), %edx |
1019 | movl OLDESP(%esp), %ecx |
1020 | xorl %ebp,%ebp |
1021 | -#ifdef CONFIG_XEN |
1022 | TRACE_IRQS_ON |
1023 | - __ENABLE_INTERRUPTS |
1024 | -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ |
1025 | - __TEST_PENDING |
1026 | - jnz 14f # process more events if necessary... |
1027 | - movl ESI(%esp), %esi |
1028 | - sysexit |
1029 | -14: __DISABLE_INTERRUPTS |
1030 | - TRACE_IRQS_OFF |
1031 | -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ |
1032 | - push %esp |
1033 | - call evtchn_do_upcall |
1034 | - add $4,%esp |
1035 | - jmp ret_from_intr |
1036 | -#else |
1037 | - TRACE_IRQS_ON |
1038 | - sti |
1039 | - sysexit |
1040 | -#endif /* !CONFIG_XEN */ |
1041 | + ENABLE_INTERRUPTS_SYSEXIT |
1042 | CFI_ENDPROC |
1043 | |
1044 | # pv sysenter call handler stub |
1045 | @@ -444,8 +449,8 @@ |
1046 | # See comments in process.c:copy_thread() for details. |
1047 | movb OLDSS(%esp), %ah |
1048 | movb CS(%esp), %al |
1049 | - andl $(VM_MASK | (4 << 8) | 3), %eax |
1050 | - cmpl $((4 << 8) | 3), %eax |
1051 | + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax |
1052 | + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax |
1053 | CFI_REMEMBER_STATE |
1054 | je ldt_ss # returning to user-space with LDT SS |
1055 | restore_nocheck: |
1056 | @@ -467,12 +472,11 @@ |
1057 | RESTORE_REGS |
1058 | addl $4, %esp |
1059 | CFI_ADJUST_CFA_OFFSET -4 |
1060 | -1: iret |
1061 | +1: INTERRUPT_RETURN |
1062 | .section .fixup,"ax" |
1063 | iret_exc: |
1064 | #ifndef CONFIG_XEN |
1065 | - TRACE_IRQS_ON |
1066 | - sti |
1067 | + ENABLE_INTERRUPTS |
1068 | #endif |
1069 | pushl $0 # no error code |
1070 | pushl $do_iret_error |
1071 | @@ -498,7 +502,7 @@ |
1072 | * dosemu and wine happy. */ |
1073 | subl $8, %esp # reserve space for switch16 pointer |
1074 | CFI_ADJUST_CFA_OFFSET 8 |
1075 | - cli |
1076 | + DISABLE_INTERRUPTS |
1077 | TRACE_IRQS_OFF |
1078 | movl %esp, %eax |
1079 | /* Set up the 16bit stack frame with switch32 pointer on top, |
1080 | @@ -508,7 +512,7 @@ |
1081 | TRACE_IRQS_IRET |
1082 | RESTORE_REGS |
1083 | lss 20+4(%esp), %esp # switch to 16bit stack |
1084 | -1: iret |
1085 | +1: INTERRUPT_RETURN |
1086 | .section __ex_table,"a" |
1087 | .align 4 |
1088 | .long 1b,iret_exc |
1089 | @@ -524,7 +528,7 @@ |
1090 | RESTORE_REGS |
1091 | addl $4, %esp |
1092 | CFI_ADJUST_CFA_OFFSET -4 |
1093 | -1: iret |
1094 | +1: INTERRUPT_RETURN |
1095 | .section __ex_table,"a" |
1096 | .align 4 |
1097 | .long 1b,iret_exc |
1098 | @@ -713,11 +717,9 @@ |
1099 | #define UNWIND_ESPFIX_STACK |
1100 | #endif |
1101 | |
1102 | -ENTRY(divide_error) |
1103 | - RING0_INT_FRAME |
1104 | - pushl $0 # no error code |
1105 | - CFI_ADJUST_CFA_OFFSET 4 |
1106 | - pushl $do_divide_error |
1107 | +KPROBE_ENTRY(page_fault) |
1108 | + RING0_EC_FRAME |
1109 | + pushl $do_page_fault |
1110 | CFI_ADJUST_CFA_OFFSET 4 |
1111 | ALIGN |
1112 | error_code: |
1113 | @@ -767,6 +769,7 @@ |
1114 | call *%edi |
1115 | jmp ret_from_exception |
1116 | CFI_ENDPROC |
1117 | +KPROBE_END(page_fault) |
1118 | |
1119 | #ifdef CONFIG_XEN |
1120 | # A note on the "critical region" in our callback handler. |
1121 | @@ -926,7 +929,7 @@ |
1122 | CFI_ADJUST_CFA_OFFSET 4 |
1123 | SAVE_ALL |
1124 | #ifndef CONFIG_XEN |
1125 | - movl %cr0, %eax |
1126 | + GET_CR0_INTO_EAX |
1127 | testl $0x4, %eax # EM (math emulation bit) |
1128 | je device_available_emulate |
1129 | pushl $0 # temporary storage for ORIG_EIP |
1130 | @@ -961,9 +964,15 @@ |
1131 | jne ok; \ |
1132 | label: \ |
1133 | movl SYSENTER_stack_esp0+offset(%esp),%esp; \ |
1134 | + CFI_DEF_CFA esp, 0; \ |
1135 | + CFI_UNDEFINED eip; \ |
1136 | pushfl; \ |
1137 | + CFI_ADJUST_CFA_OFFSET 4; \ |
1138 | pushl $__KERNEL_CS; \ |
1139 | - pushl $sysenter_past_esp |
1140 | + CFI_ADJUST_CFA_OFFSET 4; \ |
1141 | + pushl $sysenter_past_esp; \ |
1142 | + CFI_ADJUST_CFA_OFFSET 4; \ |
1143 | + CFI_REL_OFFSET eip, 0 |
1144 | #endif /* CONFIG_XEN */ |
1145 | |
1146 | KPROBE_ENTRY(debug) |
1147 | @@ -982,7 +991,8 @@ |
1148 | call do_debug |
1149 | jmp ret_from_exception |
1150 | CFI_ENDPROC |
1151 | - .previous .text |
1152 | +KPROBE_END(debug) |
1153 | + |
1154 | #ifndef CONFIG_XEN |
1155 | /* |
1156 | * NMI is doubly nasty. It can happen _while_ we're handling |
1157 | @@ -992,7 +1002,7 @@ |
1158 | * check whether we got an NMI on the debug path where the debug |
1159 | * fault happened on the sysenter path. |
1160 | */ |
1161 | -ENTRY(nmi) |
1162 | +KPROBE_ENTRY(nmi) |
1163 | RING0_INT_FRAME |
1164 | pushl %eax |
1165 | CFI_ADJUST_CFA_OFFSET 4 |
1166 | @@ -1017,6 +1027,7 @@ |
1167 | cmpl $sysenter_entry,12(%esp) |
1168 | je nmi_debug_stack_check |
1169 | nmi_stack_correct: |
1170 | + /* We have a RING0_INT_FRAME here */ |
1171 | pushl %eax |
1172 | CFI_ADJUST_CFA_OFFSET 4 |
1173 | SAVE_ALL |
1174 | @@ -1027,9 +1038,12 @@ |
1175 | CFI_ENDPROC |
1176 | |
1177 | nmi_stack_fixup: |
1178 | + RING0_INT_FRAME |
1179 | FIX_STACK(12,nmi_stack_correct, 1) |
1180 | jmp nmi_stack_correct |
1181 | + |
1182 | nmi_debug_stack_check: |
1183 | + /* We have a RING0_INT_FRAME here */ |
1184 | cmpw $__KERNEL_CS,16(%esp) |
1185 | jne nmi_stack_correct |
1186 | cmpl $debug,(%esp) |
1187 | @@ -1040,8 +1054,10 @@ |
1188 | jmp nmi_stack_correct |
1189 | |
1190 | nmi_16bit_stack: |
1191 | - RING0_INT_FRAME |
1192 | - /* create the pointer to lss back */ |
1193 | + /* We have a RING0_INT_FRAME here. |
1194 | + * |
1195 | + * create the pointer to lss back |
1196 | + */ |
1197 | pushl %ss |
1198 | CFI_ADJUST_CFA_OFFSET 4 |
1199 | pushl %esp |
1200 | @@ -1062,14 +1078,14 @@ |
1201 | call do_nmi |
1202 | RESTORE_REGS |
1203 | lss 12+4(%esp), %esp # back to 16bit stack |
1204 | -1: iret |
1205 | +1: INTERRUPT_RETURN |
1206 | CFI_ENDPROC |
1207 | .section __ex_table,"a" |
1208 | .align 4 |
1209 | .long 1b,iret_exc |
1210 | .previous |
1211 | #else |
1212 | -ENTRY(nmi) |
1213 | +KPROBE_ENTRY(nmi) |
1214 | RING0_INT_FRAME |
1215 | pushl %eax |
1216 | CFI_ADJUST_CFA_OFFSET 4 |
1217 | @@ -1081,6 +1097,7 @@ |
1218 | jmp restore_all |
1219 | CFI_ENDPROC |
1220 | #endif |
1221 | +KPROBE_END(nmi) |
1222 | |
1223 | KPROBE_ENTRY(int3) |
1224 | RING0_INT_FRAME |
1225 | @@ -1092,7 +1109,7 @@ |
1226 | call do_int3 |
1227 | jmp ret_from_exception |
1228 | CFI_ENDPROC |
1229 | - .previous .text |
1230 | +KPROBE_END(int3) |
1231 | |
1232 | ENTRY(overflow) |
1233 | RING0_INT_FRAME |
1234 | @@ -1157,7 +1174,7 @@ |
1235 | CFI_ADJUST_CFA_OFFSET 4 |
1236 | jmp error_code |
1237 | CFI_ENDPROC |
1238 | - .previous .text |
1239 | +KPROBE_END(general_protection) |
1240 | |
1241 | ENTRY(alignment_check) |
1242 | RING0_EC_FRAME |
1243 | @@ -1166,13 +1183,14 @@ |
1244 | jmp error_code |
1245 | CFI_ENDPROC |
1246 | |
1247 | -KPROBE_ENTRY(page_fault) |
1248 | - RING0_EC_FRAME |
1249 | - pushl $do_page_fault |
1250 | +ENTRY(divide_error) |
1251 | + RING0_INT_FRAME |
1252 | + pushl $0 # no error code |
1253 | + CFI_ADJUST_CFA_OFFSET 4 |
1254 | + pushl $do_divide_error |
1255 | CFI_ADJUST_CFA_OFFSET 4 |
1256 | jmp error_code |
1257 | CFI_ENDPROC |
1258 | - .previous .text |
1259 | |
1260 | #ifdef CONFIG_X86_MCE |
1261 | ENTRY(machine_check) |
1262 | @@ -1234,6 +1252,19 @@ |
1263 | jmp error_code |
1264 | CFI_ENDPROC |
1265 | |
1266 | +ENTRY(kernel_thread_helper) |
1267 | + pushl $0 # fake return address for unwinder |
1268 | + CFI_STARTPROC |
1269 | + movl %edx,%eax |
1270 | + push %edx |
1271 | + CFI_ADJUST_CFA_OFFSET 4 |
1272 | + call *%ebx |
1273 | + push %eax |
1274 | + CFI_ADJUST_CFA_OFFSET 4 |
1275 | + call do_exit |
1276 | + CFI_ENDPROC |
1277 | +ENDPROC(kernel_thread_helper) |
1278 | + |
1279 | .section .rodata,"a" |
1280 | #include "syscall_table.S" |
1281 | |
1282 | --- a/arch/x86/kernel/entry_64-xen.S |
1283 | +++ b/arch/x86/kernel/entry_64-xen.S |
1284 | @@ -26,15 +23,25 @@ |
1285 | * at the top of the kernel process stack. |
1286 | * - partial stack frame: partially saved registers upto R11. |
1287 | * - full stack frame: Like partial stack frame, but all register saved. |
1288 | - * |
1289 | - * TODO: |
1290 | - * - schedule it carefully for the final hardware. |
1291 | + * |
1292 | + * Some macro usage: |
1293 | + * - CFI macros are used to generate dwarf2 unwind information for better |
1294 | + * backtraces. They don't change any code. |
1295 | + * - SAVE_ALL/RESTORE_ALL - Save/restore all registers |
1296 | + * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. |
1297 | + * There are unfortunately lots of special cases where some registers |
1298 | + * not touched. The macro is a big mess that should be cleaned up. |
1299 | + * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. |
1300 | + * Gives a full stack frame. |
1301 | + * - ENTRY/END Define functions in the symbol table. |
1302 | + * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack |
1303 | + * frame that is otherwise undefined after a SYSCALL |
1304 | + * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. |
1305 | + * - errorentry/paranoidentry/zeroentry - Define exception entry points. |
1306 | */ |
1307 | |
1308 | -#define ASSEMBLY 1 |
1309 | #include <linux/linkage.h> |
1310 | #include <asm/segment.h> |
1311 | -#include <asm/smp.h> |
1312 | #include <asm/cache.h> |
1313 | #include <asm/errno.h> |
1314 | #include <asm/dwarf2.h> |
1315 | @@ -117,6 +124,7 @@ |
1316 | .macro CFI_DEFAULT_STACK start=1,adj=0 |
1317 | .if \start |
1318 | CFI_STARTPROC simple |
1319 | + CFI_SIGNAL_FRAME |
1320 | CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET) |
1321 | .else |
1322 | CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET) |
1323 | @@ -207,6 +215,7 @@ |
1324 | */ |
1325 | .macro _frame ref |
1326 | CFI_STARTPROC simple |
1327 | + CFI_SIGNAL_FRAME |
1328 | CFI_DEF_CFA rsp,SS+8-\ref |
1329 | /*CFI_REL_OFFSET ss,SS-\ref*/ |
1330 | CFI_REL_OFFSET rsp,RSP-\ref |
1331 | @@ -334,6 +343,8 @@ |
1332 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ |
1333 | RESTORE_REST |
1334 | cmpq $__NR_syscall_max,%rax |
1335 | + movq $-ENOSYS,%rcx |
1336 | + cmova %rcx,%rax |
1337 | ja 1f |
1338 | movq %r10,%rcx /* fixup for C */ |
1339 | call *sys_call_table(,%rax,8) |
1340 | @@ -349,6 +360,7 @@ |
1341 | */ |
1342 | ENTRY(int_ret_from_sys_call) |
1343 | CFI_STARTPROC simple |
1344 | + CFI_SIGNAL_FRAME |
1345 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET |
1346 | /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ |
1347 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET |
1348 | @@ -583,8 +595,7 @@ |
1349 | #ifdef CONFIG_PREEMPT |
1350 | /* Returning to kernel space. Check if we need preemption */ |
1351 | /* rcx: threadinfo. interrupts off. */ |
1352 | - .p2align |
1353 | -retint_kernel: |
1354 | +ENTRY(retint_kernel) |
1355 | cmpl $0,threadinfo_preempt_count(%rcx) |
1356 | jnz retint_restore_args |
1357 | bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) |
1358 | @@ -644,7 +655,6 @@ |
1359 | END(call_function_interrupt) |
1360 | #endif |
1361 | |
1362 | -#ifdef CONFIG_X86_LOCAL_APIC |
1363 | ENTRY(apic_timer_interrupt) |
1364 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt |
1365 | END(apic_timer_interrupt) |
1366 | @@ -656,7 +666,6 @@ |
1367 | ENTRY(spurious_interrupt) |
1368 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt |
1369 | END(spurious_interrupt) |
1370 | -#endif |
1371 | #endif /* !CONFIG_XEN */ |
1372 | |
1373 | /* |
1374 | @@ -755,7 +764,9 @@ |
1375 | testl $3,CS(%rsp) |
1376 | jnz paranoid_userspace\trace |
1377 | paranoid_swapgs\trace: |
1378 | + .if \trace |
1379 | TRACE_IRQS_IRETQ 0 |
1380 | + .endif |
1381 | swapgs |
1382 | paranoid_restore\trace: |
1383 | RESTORE_ALL 8 |
1384 | @@ -802,7 +813,7 @@ |
1385 | * Exception entry point. This expects an error code/orig_rax on the stack |
1386 | * and the exception handler in %rax. |
1387 | */ |
1388 | -ENTRY(error_entry) |
1389 | +KPROBE_ENTRY(error_entry) |
1390 | _frame RDI |
1391 | CFI_REL_OFFSET rax,0 |
1392 | /* rdi slot contains rax, oldrax contains error code */ |
1393 | @@ -896,7 +907,7 @@ |
1394 | jmp error_sti |
1395 | #endif |
1396 | CFI_ENDPROC |
1397 | -END(error_entry) |
1398 | +KPROBE_END(error_entry) |
1399 | |
1400 | ENTRY(hypervisor_callback) |
1401 | zeroentry do_hypervisor_callback |
1402 | @@ -936,26 +947,6 @@ |
1403 | CFI_ENDPROC |
1404 | END(do_hypervisor_callback) |
1405 | |
1406 | -#ifdef CONFIG_X86_LOCAL_APIC |
1407 | -KPROBE_ENTRY(nmi) |
1408 | - zeroentry do_nmi_callback |
1409 | -ENTRY(do_nmi_callback) |
1410 | - CFI_STARTPROC |
1411 | - addq $8, %rsp |
1412 | - CFI_ENDPROC |
1413 | - CFI_DEFAULT_STACK |
1414 | - call do_nmi |
1415 | - orl $NMI_MASK,EFLAGS(%rsp) |
1416 | - RESTORE_REST |
1417 | - XEN_BLOCK_EVENTS(%rsi) |
1418 | - TRACE_IRQS_OFF |
1419 | - GET_THREAD_INFO(%rcx) |
1420 | - jmp retint_restore_args |
1421 | - CFI_ENDPROC |
1422 | - .previous .text |
1423 | -END(nmi) |
1424 | -#endif |
1425 | - |
1426 | ALIGN |
1427 | restore_all_enable_events: |
1428 | CFI_DEFAULT_STACK adj=1 |
1429 | @@ -1121,7 +1112,7 @@ |
1430 | * do_sys_execve asm fallback arguments: |
1431 | * rdi: name, rsi: argv, rdx: envp, fake frame on the stack |
1432 | */ |
1433 | -ENTRY(execve) |
1434 | +ENTRY(kernel_execve) |
1435 | CFI_STARTPROC |
1436 | FAKE_STACK_FRAME $0 |
1437 | SAVE_ALL |
1438 | @@ -1135,12 +1126,11 @@ |
1439 | UNFAKE_STACK_FRAME |
1440 | ret |
1441 | CFI_ENDPROC |
1442 | -ENDPROC(execve) |
1443 | +ENDPROC(kernel_execve) |
1444 | |
1445 | KPROBE_ENTRY(page_fault) |
1446 | errorentry do_page_fault |
1447 | -END(page_fault) |
1448 | - .previous .text |
1449 | +KPROBE_END(page_fault) |
1450 | |
1451 | ENTRY(coprocessor_error) |
1452 | zeroentry do_coprocessor_error |
1453 | @@ -1162,25 +1152,25 @@ |
1454 | zeroentry do_debug |
1455 | /* paranoidexit |
1456 | CFI_ENDPROC */ |
1457 | -END(debug) |
1458 | - .previous .text |
1459 | +KPROBE_END(debug) |
1460 | |
1461 | -#if 0 |
1462 | - /* runs on exception stack */ |
1463 | KPROBE_ENTRY(nmi) |
1464 | - INTR_FRAME |
1465 | - pushq $-1 |
1466 | - CFI_ADJUST_CFA_OFFSET 8 |
1467 | - paranoidentry do_nmi, 0, 0 |
1468 | -#ifdef CONFIG_TRACE_IRQFLAGS |
1469 | - paranoidexit 0 |
1470 | -#else |
1471 | - jmp paranoid_exit1 |
1472 | - CFI_ENDPROC |
1473 | -#endif |
1474 | -END(nmi) |
1475 | - .previous .text |
1476 | -#endif |
1477 | + zeroentry do_nmi_callback |
1478 | +KPROBE_END(nmi) |
1479 | +do_nmi_callback: |
1480 | + CFI_STARTPROC |
1481 | + addq $8, %rsp |
1482 | + CFI_ENDPROC |
1483 | + CFI_DEFAULT_STACK |
1484 | + call do_nmi |
1485 | + orl $NMI_MASK,EFLAGS(%rsp) |
1486 | + RESTORE_REST |
1487 | + XEN_BLOCK_EVENTS(%rsi) |
1488 | + TRACE_IRQS_OFF |
1489 | + GET_THREAD_INFO(%rcx) |
1490 | + jmp retint_restore_args |
1491 | + CFI_ENDPROC |
1492 | +END(do_nmi_callback) |
1493 | |
1494 | KPROBE_ENTRY(int3) |
1495 | /* INTR_FRAME |
1496 | @@ -1189,8 +1179,7 @@ |
1497 | zeroentry do_int3 |
1498 | /* jmp paranoid_exit1 |
1499 | CFI_ENDPROC */ |
1500 | -END(int3) |
1501 | - .previous .text |
1502 | +KPROBE_END(int3) |
1503 | |
1504 | ENTRY(overflow) |
1505 | zeroentry do_overflow |
1506 | @@ -1241,8 +1230,7 @@ |
1507 | |
1508 | KPROBE_ENTRY(general_protection) |
1509 | errorentry do_general_protection |
1510 | -END(general_protection) |
1511 | - .previous .text |
1512 | +KPROBE_END(general_protection) |
1513 | |
1514 | ENTRY(alignment_check) |
1515 | errorentry do_alignment_check |
1516 | --- a/arch/x86/kernel/genapic_xen_64.c |
1517 | +++ b/arch/x86/kernel/genapic_xen_64.c |
1518 | @@ -71,6 +71,13 @@ |
1519 | return cpu_online_map; |
1520 | } |
1521 | |
1522 | +static cpumask_t xen_vector_allocation_domain(int cpu) |
1523 | +{ |
1524 | + cpumask_t domain = CPU_MASK_NONE; |
1525 | + cpu_set(cpu, domain); |
1526 | + return domain; |
1527 | +} |
1528 | + |
1529 | /* |
1530 | * Set up the logical destination ID. |
1531 | * Do nothing, not called now. |
1532 | @@ -147,8 +154,8 @@ |
1533 | .int_delivery_mode = dest_LowestPrio, |
1534 | #endif |
1535 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), |
1536 | - .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST, |
1537 | .target_cpus = xen_target_cpus, |
1538 | + .vector_allocation_domain = xen_vector_allocation_domain, |
1539 | #ifdef CONFIG_XEN_PRIVILEGED_GUEST |
1540 | .apic_id_registered = xen_apic_id_registered, |
1541 | #endif |
1542 | --- a/arch/x86/kernel/head64-xen.c |
1543 | +++ b/arch/x86/kernel/head64-xen.c |
1544 | @@ -54,11 +54,9 @@ |
1545 | new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); |
1546 | if (!new_data) { |
1547 | if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { |
1548 | - printk("so old bootloader that it does not support commandline?!\n"); |
1549 | return; |
1550 | } |
1551 | new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; |
1552 | - printk("old bootloader convention, maybe loadlin?\n"); |
1553 | } |
1554 | command_line = (char *) ((u64)(new_data)); |
1555 | memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); |
1556 | @@ -70,25 +68,6 @@ |
1557 | memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); |
1558 | saved_command_line[max_cmdline-1] = '\0'; |
1559 | #endif |
1560 | - printk("Bootdata ok (command line is %s)\n", saved_command_line); |
1561 | -} |
1562 | - |
1563 | -static void __init setup_boot_cpu_data(void) |
1564 | -{ |
1565 | - unsigned int dummy, eax; |
1566 | - |
1567 | - /* get vendor info */ |
1568 | - cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level, |
1569 | - (unsigned int *)&boot_cpu_data.x86_vendor_id[0], |
1570 | - (unsigned int *)&boot_cpu_data.x86_vendor_id[8], |
1571 | - (unsigned int *)&boot_cpu_data.x86_vendor_id[4]); |
1572 | - |
1573 | - /* get cpu type */ |
1574 | - cpuid(1, &eax, &dummy, &dummy, |
1575 | - (unsigned int *) &boot_cpu_data.x86_capability); |
1576 | - boot_cpu_data.x86 = (eax >> 8) & 0xf; |
1577 | - boot_cpu_data.x86_model = (eax >> 4) & 0xf; |
1578 | - boot_cpu_data.x86_mask = eax & 0xf; |
1579 | } |
1580 | |
1581 | #include <xen/interface/memory.h> |
1582 | @@ -101,7 +80,6 @@ |
1583 | { |
1584 | struct xen_machphys_mapping mapping; |
1585 | unsigned long machine_to_phys_nr_ents; |
1586 | - char *s; |
1587 | int i; |
1588 | |
1589 | setup_xen_features(); |
1590 | @@ -128,10 +106,7 @@ |
1591 | asm volatile("lidt %0" :: "m" (idt_descr)); |
1592 | #endif |
1593 | |
1594 | - /* |
1595 | - * This must be called really, really early: |
1596 | - */ |
1597 | - lockdep_init(); |
1598 | + early_printk("Kernel alive\n"); |
1599 | |
1600 | for (i = 0; i < NR_CPUS; i++) |
1601 | cpu_pda(i) = &boot_cpu_pda[i]; |
1602 | @@ -141,22 +116,5 @@ |
1603 | #ifdef CONFIG_SMP |
1604 | cpu_set(0, cpu_online_map); |
1605 | #endif |
1606 | - s = strstr(saved_command_line, "earlyprintk="); |
1607 | - if (s != NULL) |
1608 | - setup_early_printk(strchr(s, '=') + 1); |
1609 | -#ifdef CONFIG_NUMA |
1610 | - s = strstr(saved_command_line, "numa="); |
1611 | - if (s != NULL) |
1612 | - numa_setup(s+5); |
1613 | -#endif |
1614 | -#ifdef CONFIG_X86_IO_APIC |
1615 | - if (strstr(saved_command_line, "disableapic")) |
1616 | - disable_apic = 1; |
1617 | -#endif |
1618 | - /* You need early console to see that */ |
1619 | - if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE) |
1620 | - panic("Kernel too big for kernel mapping\n"); |
1621 | - |
1622 | - setup_boot_cpu_data(); |
1623 | start_kernel(); |
1624 | } |
1625 | --- a/arch/x86/kernel/head_32-xen.S |
1626 | +++ b/arch/x86/kernel/head_32-xen.S |
1627 | @@ -62,7 +62,7 @@ |
1628 | movl %eax,%gs |
1629 | cld # gcc2 wants the direction flag cleared at all times |
1630 | |
1631 | - pushl %eax # fake return address |
1632 | + pushl $0 # fake return address for unwinder |
1633 | jmp start_kernel |
1634 | |
1635 | #define HYPERCALL_PAGE_OFFSET 0x1000 |
1636 | --- a/arch/x86/kernel/head_64-xen.S |
1637 | +++ b/arch/x86/kernel/head_64-xen.S |
1638 | @@ -149,7 +146,7 @@ |
1639 | .quad 0,0 /* TSS */ |
1640 | .quad 0,0 /* LDT */ |
1641 | .quad 0,0,0 /* three TLS descriptors */ |
1642 | - .quad 0 /* unused */ |
1643 | + .quad 0x0000f40000000000 /* node/CPU stored in limit */ |
1644 | gdt_end: |
1645 | /* asm/segment.h:GDT_ENTRIES must match this */ |
1646 | /* This should be a multiple of the cache line size */ |
1647 | --- a/arch/x86/kernel/io_apic_32-xen.c |
1648 | +++ b/arch/x86/kernel/io_apic_32-xen.c |
1649 | @@ -31,6 +31,9 @@ |
1650 | #include <linux/acpi.h> |
1651 | #include <linux/module.h> |
1652 | #include <linux/sysdev.h> |
1653 | +#include <linux/pci.h> |
1654 | +#include <linux/msi.h> |
1655 | +#include <linux/htirq.h> |
1656 | |
1657 | #include <asm/io.h> |
1658 | #include <asm/smp.h> |
1659 | @@ -38,13 +41,15 @@ |
1660 | #include <asm/timer.h> |
1661 | #include <asm/i8259.h> |
1662 | #include <asm/nmi.h> |
1663 | +#include <asm/msidef.h> |
1664 | +#include <asm/hypertransport.h> |
1665 | |
1666 | #include <mach_apic.h> |
1667 | +#include <mach_apicdef.h> |
1668 | |
1669 | #include "io_ports.h" |
1670 | |
1671 | #ifdef CONFIG_XEN |
1672 | - |
1673 | #include <xen/interface/xen.h> |
1674 | #include <xen/interface/physdev.h> |
1675 | |
1676 | @@ -55,32 +60,7 @@ |
1677 | |
1678 | unsigned long io_apic_irqs; |
1679 | |
1680 | -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) |
1681 | -{ |
1682 | - struct physdev_apic apic_op; |
1683 | - int ret; |
1684 | - |
1685 | - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; |
1686 | - apic_op.reg = reg; |
1687 | - ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); |
1688 | - if (ret) |
1689 | - return ret; |
1690 | - return apic_op.value; |
1691 | -} |
1692 | - |
1693 | -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) |
1694 | -{ |
1695 | - struct physdev_apic apic_op; |
1696 | - |
1697 | - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; |
1698 | - apic_op.reg = reg; |
1699 | - apic_op.value = value; |
1700 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); |
1701 | -} |
1702 | - |
1703 | -#define io_apic_read(a,r) xen_io_apic_read(a,r) |
1704 | -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) |
1705 | - |
1706 | +#define clear_IO_APIC() ((void)0) |
1707 | #endif /* CONFIG_XEN */ |
1708 | |
1709 | int (*ioapic_renumber_irq)(int ioapic, int irq); |
1710 | @@ -105,7 +85,7 @@ |
1711 | */ |
1712 | int nr_ioapic_registers[MAX_IO_APICS]; |
1713 | |
1714 | -int disable_timer_pin_1 __initdata; |
1715 | +static int disable_timer_pin_1 __initdata; |
1716 | |
1717 | /* |
1718 | * Rough estimation of how many shared IRQs there are, can |
1719 | @@ -125,12 +105,122 @@ |
1720 | int apic, pin, next; |
1721 | } irq_2_pin[PIN_MAP_SIZE]; |
1722 | |
1723 | -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; |
1724 | -#ifdef CONFIG_PCI_MSI |
1725 | -#define vector_to_irq(vector) \ |
1726 | - (platform_legacy_irq(vector) ? vector : vector_irq[vector]) |
1727 | +#ifndef CONFIG_XEN |
1728 | +struct io_apic { |
1729 | + unsigned int index; |
1730 | + unsigned int unused[3]; |
1731 | + unsigned int data; |
1732 | +}; |
1733 | + |
1734 | +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
1735 | +{ |
1736 | + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
1737 | + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); |
1738 | +} |
1739 | +#endif |
1740 | + |
1741 | +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) |
1742 | +{ |
1743 | +#ifndef CONFIG_XEN |
1744 | + struct io_apic __iomem *io_apic = io_apic_base(apic); |
1745 | + writel(reg, &io_apic->index); |
1746 | + return readl(&io_apic->data); |
1747 | +#else |
1748 | + struct physdev_apic apic_op; |
1749 | + int ret; |
1750 | + |
1751 | + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; |
1752 | + apic_op.reg = reg; |
1753 | + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); |
1754 | + if (ret) |
1755 | + return ret; |
1756 | + return apic_op.value; |
1757 | +#endif |
1758 | +} |
1759 | + |
1760 | +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) |
1761 | +{ |
1762 | +#ifndef CONFIG_XEN |
1763 | + struct io_apic __iomem *io_apic = io_apic_base(apic); |
1764 | + writel(reg, &io_apic->index); |
1765 | + writel(value, &io_apic->data); |
1766 | +#else |
1767 | + struct physdev_apic apic_op; |
1768 | + |
1769 | + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; |
1770 | + apic_op.reg = reg; |
1771 | + apic_op.value = value; |
1772 | + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); |
1773 | +#endif |
1774 | +} |
1775 | + |
1776 | +#ifndef CONFIG_XEN |
1777 | +/* |
1778 | + * Re-write a value: to be used for read-modify-write |
1779 | + * cycles where the read already set up the index register. |
1780 | + * |
1781 | + * Older SiS APIC requires we rewrite the index register |
1782 | + */ |
1783 | +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) |
1784 | +{ |
1785 | + volatile struct io_apic *io_apic = io_apic_base(apic); |
1786 | + if (sis_apic_bug) |
1787 | + writel(reg, &io_apic->index); |
1788 | + writel(value, &io_apic->data); |
1789 | +} |
1790 | #else |
1791 | -#define vector_to_irq(vector) (vector) |
1792 | +#define io_apic_modify io_apic_write |
1793 | +#endif |
1794 | + |
1795 | +union entry_union { |
1796 | + struct { u32 w1, w2; }; |
1797 | + struct IO_APIC_route_entry entry; |
1798 | +}; |
1799 | + |
1800 | +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) |
1801 | +{ |
1802 | + union entry_union eu; |
1803 | + unsigned long flags; |
1804 | + spin_lock_irqsave(&ioapic_lock, flags); |
1805 | + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); |
1806 | + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); |
1807 | + spin_unlock_irqrestore(&ioapic_lock, flags); |
1808 | + return eu.entry; |
1809 | +} |
1810 | + |
1811 | +/* |
1812 | + * When we write a new IO APIC routing entry, we need to write the high |
1813 | + * word first! If the mask bit in the low word is clear, we will enable |
1814 | + * the interrupt, and we need to make sure the entry is fully populated |
1815 | + * before that happens. |
1816 | + */ |
1817 | +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
1818 | +{ |
1819 | + unsigned long flags; |
1820 | + union entry_union eu; |
1821 | + eu.entry = e; |
1822 | + spin_lock_irqsave(&ioapic_lock, flags); |
1823 | + io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
1824 | + io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
1825 | + spin_unlock_irqrestore(&ioapic_lock, flags); |
1826 | +} |
1827 | + |
1828 | +#ifndef CONFIG_XEN |
1829 | +/* |
1830 | + * When we mask an IO APIC routing entry, we need to write the low |
1831 | + * word first, in order to set the mask bit before we change the |
1832 | + * high bits! |
1833 | + */ |
1834 | +static void ioapic_mask_entry(int apic, int pin) |
1835 | +{ |
1836 | + unsigned long flags; |
1837 | + union entry_union eu = { .entry.mask = 1 }; |
1838 | + |
1839 | + spin_lock_irqsave(&ioapic_lock, flags); |
1840 | + io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
1841 | + io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
1842 | + spin_unlock_irqrestore(&ioapic_lock, flags); |
1843 | +} |
1844 | #endif |
1845 | |
1846 | /* |
1847 | @@ -156,9 +246,7 @@ |
1848 | entry->pin = pin; |
1849 | } |
1850 | |
1851 | -#ifdef CONFIG_XEN |
1852 | -#define clear_IO_APIC() ((void)0) |
1853 | -#else |
1854 | +#ifndef CONFIG_XEN |
1855 | /* |
1856 | * Reroute an IRQ to a different pin. |
1857 | */ |
1858 | @@ -243,25 +331,16 @@ |
1859 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
1860 | { |
1861 | struct IO_APIC_route_entry entry; |
1862 | - unsigned long flags; |
1863 | |
1864 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ |
1865 | - spin_lock_irqsave(&ioapic_lock, flags); |
1866 | - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); |
1867 | - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); |
1868 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
1869 | + entry = ioapic_read_entry(apic, pin); |
1870 | if (entry.delivery_mode == dest_SMI) |
1871 | return; |
1872 | |
1873 | /* |
1874 | * Disable it in the IO-APIC irq-routing table: |
1875 | */ |
1876 | - memset(&entry, 0, sizeof(entry)); |
1877 | - entry.mask = 1; |
1878 | - spin_lock_irqsave(&ioapic_lock, flags); |
1879 | - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); |
1880 | - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); |
1881 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
1882 | + ioapic_mask_entry(apic, pin); |
1883 | } |
1884 | |
1885 | static void clear_IO_APIC (void) |
1886 | @@ -301,7 +380,7 @@ |
1887 | break; |
1888 | entry = irq_2_pin + entry->next; |
1889 | } |
1890 | - set_irq_info(irq, cpumask); |
1891 | + set_native_irq_info(irq, cpumask); |
1892 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1893 | } |
1894 | |
1895 | @@ -1207,40 +1286,40 @@ |
1896 | /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ |
1897 | u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ |
1898 | |
1899 | -int assign_irq_vector(int irq) |
1900 | +static int __assign_irq_vector(int irq) |
1901 | { |
1902 | - unsigned long flags; |
1903 | int vector; |
1904 | struct physdev_irq irq_op; |
1905 | |
1906 | - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); |
1907 | - |
1908 | - spin_lock_irqsave(&vector_lock, flags); |
1909 | + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); |
1910 | |
1911 | - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { |
1912 | - spin_unlock_irqrestore(&vector_lock, flags); |
1913 | - return IO_APIC_VECTOR(irq); |
1914 | - } |
1915 | + if (irq_vector[irq] > 0) |
1916 | + return irq_vector[irq]; |
1917 | |
1918 | irq_op.irq = irq; |
1919 | - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { |
1920 | - spin_unlock_irqrestore(&vector_lock, flags); |
1921 | + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) |
1922 | return -ENOSPC; |
1923 | - } |
1924 | |
1925 | vector = irq_op.vector; |
1926 | - vector_irq[vector] = irq; |
1927 | - if (irq != AUTO_ASSIGN) |
1928 | - IO_APIC_VECTOR(irq) = vector; |
1929 | + irq_vector[irq] = vector; |
1930 | + |
1931 | + return vector; |
1932 | +} |
1933 | |
1934 | +static int assign_irq_vector(int irq) |
1935 | +{ |
1936 | + unsigned long flags; |
1937 | + int vector; |
1938 | + |
1939 | + spin_lock_irqsave(&vector_lock, flags); |
1940 | + vector = __assign_irq_vector(irq); |
1941 | spin_unlock_irqrestore(&vector_lock, flags); |
1942 | |
1943 | return vector; |
1944 | } |
1945 | |
1946 | #ifndef CONFIG_XEN |
1947 | -static struct hw_interrupt_type ioapic_level_type; |
1948 | -static struct hw_interrupt_type ioapic_edge_type; |
1949 | +static struct irq_chip ioapic_chip; |
1950 | |
1951 | #define IOAPIC_AUTO -1 |
1952 | #define IOAPIC_EDGE 0 |
1953 | @@ -1248,16 +1327,16 @@ |
1954 | |
1955 | static void ioapic_register_intr(int irq, int vector, unsigned long trigger) |
1956 | { |
1957 | - unsigned idx; |
1958 | - |
1959 | - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; |
1960 | - |
1961 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
1962 | trigger == IOAPIC_LEVEL) |
1963 | - irq_desc[idx].chip = &ioapic_level_type; |
1964 | - else |
1965 | - irq_desc[idx].chip = &ioapic_edge_type; |
1966 | - set_intr_gate(vector, interrupt[idx]); |
1967 | + set_irq_chip_and_handler_name(irq, &ioapic_chip, |
1968 | + handle_fasteoi_irq, "fasteoi"); |
1969 | + else { |
1970 | + irq_desc[irq].status |= IRQ_DELAYED_DISABLE; |
1971 | + set_irq_chip_and_handler_name(irq, &ioapic_chip, |
1972 | + handle_edge_irq, "edge"); |
1973 | + } |
1974 | + set_intr_gate(vector, interrupt[irq]); |
1975 | } |
1976 | #else |
1977 | #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) |
1978 | @@ -1328,9 +1407,8 @@ |
1979 | if (!apic && (irq < 16)) |
1980 | disable_8259A_irq(irq); |
1981 | } |
1982 | + ioapic_write_entry(apic, pin, entry); |
1983 | spin_lock_irqsave(&ioapic_lock, flags); |
1984 | - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); |
1985 | - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); |
1986 | set_native_irq_info(irq, TARGET_CPUS); |
1987 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1988 | } |
1989 | @@ -1347,7 +1425,6 @@ |
1990 | static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) |
1991 | { |
1992 | struct IO_APIC_route_entry entry; |
1993 | - unsigned long flags; |
1994 | |
1995 | memset(&entry,0,sizeof(entry)); |
1996 | |
1997 | @@ -1372,15 +1449,13 @@ |
1998 | * The timer IRQ doesn't have to know that behind the |
1999 | * scene we have a 8259A-master in AEOI mode ... |
2000 | */ |
2001 | - irq_desc[0].chip = &ioapic_edge_type; |
2002 | + irq_desc[0].chip = &ioapic_chip; |
2003 | + set_irq_handler(0, handle_edge_irq); |
2004 | |
2005 | /* |
2006 | * Add it to the IO-APIC irq-routing table: |
2007 | */ |
2008 | - spin_lock_irqsave(&ioapic_lock, flags); |
2009 | - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); |
2010 | - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); |
2011 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2012 | + ioapic_write_entry(apic, pin, entry); |
2013 | |
2014 | enable_8259A_irq(0); |
2015 | } |
2016 | @@ -1490,10 +1565,7 @@ |
2017 | for (i = 0; i <= reg_01.bits.entries; i++) { |
2018 | struct IO_APIC_route_entry entry; |
2019 | |
2020 | - spin_lock_irqsave(&ioapic_lock, flags); |
2021 | - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); |
2022 | - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); |
2023 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2024 | + entry = ioapic_read_entry(apic, i); |
2025 | |
2026 | printk(KERN_DEBUG " %02x %03X %02X ", |
2027 | i, |
2028 | @@ -1513,17 +1585,12 @@ |
2029 | ); |
2030 | } |
2031 | } |
2032 | - if (use_pci_vector()) |
2033 | - printk(KERN_INFO "Using vector-based indexing\n"); |
2034 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
2035 | for (i = 0; i < NR_IRQS; i++) { |
2036 | struct irq_pin_list *entry = irq_2_pin + i; |
2037 | if (entry->pin < 0) |
2038 | continue; |
2039 | - if (use_pci_vector() && !platform_legacy_irq(i)) |
2040 | - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); |
2041 | - else |
2042 | - printk(KERN_DEBUG "IRQ%d ", i); |
2043 | + printk(KERN_DEBUG "IRQ%d ", i); |
2044 | for (;;) { |
2045 | printk("-> %d:%d", entry->apic, entry->pin); |
2046 | if (!entry->next) |
2047 | @@ -1709,10 +1776,7 @@ |
2048 | /* See if any of the pins is in ExtINT mode */ |
2049 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
2050 | struct IO_APIC_route_entry entry; |
2051 | - spin_lock_irqsave(&ioapic_lock, flags); |
2052 | - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); |
2053 | - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); |
2054 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2055 | + entry = ioapic_read_entry(apic, pin); |
2056 | |
2057 | |
2058 | /* If the interrupt line is enabled and in ExtInt mode |
2059 | @@ -1770,7 +1834,6 @@ |
2060 | */ |
2061 | if (ioapic_i8259.pin != -1) { |
2062 | struct IO_APIC_route_entry entry; |
2063 | - unsigned long flags; |
2064 | |
2065 | memset(&entry, 0, sizeof(entry)); |
2066 | entry.mask = 0; /* Enabled */ |
2067 | @@ -1787,12 +1850,7 @@ |
2068 | /* |
2069 | * Add it to the IO-APIC irq-routing table: |
2070 | */ |
2071 | - spin_lock_irqsave(&ioapic_lock, flags); |
2072 | - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, |
2073 | - *(((int *)&entry)+1)); |
2074 | - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, |
2075 | - *(((int *)&entry)+0)); |
2076 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2077 | + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); |
2078 | } |
2079 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); |
2080 | #endif |
2081 | @@ -1959,6 +2017,8 @@ |
2082 | */ |
2083 | |
2084 | /* |
2085 | + * Startup quirk: |
2086 | + * |
2087 | * Starting up a edge-triggered IO-APIC interrupt is |
2088 | * nasty - we need to make sure that we get the edge. |
2089 | * If it is already asserted for some reason, we need |
2090 | @@ -1966,8 +2026,10 @@ |
2091 | * |
2092 | * This is not complete - we should be able to fake |
2093 | * an edge even if it isn't on the 8259A... |
2094 | + * |
2095 | + * (We do this for level-triggered IRQs too - it cannot hurt.) |
2096 | */ |
2097 | -static unsigned int startup_edge_ioapic_irq(unsigned int irq) |
2098 | +static unsigned int startup_ioapic_irq(unsigned int irq) |
2099 | { |
2100 | int was_pending = 0; |
2101 | unsigned long flags; |
2102 | @@ -1984,47 +2046,18 @@ |
2103 | return was_pending; |
2104 | } |
2105 | |
2106 | -/* |
2107 | - * Once we have recorded IRQ_PENDING already, we can mask the |
2108 | - * interrupt for real. This prevents IRQ storms from unhandled |
2109 | - * devices. |
2110 | - */ |
2111 | -static void ack_edge_ioapic_irq(unsigned int irq) |
2112 | -{ |
2113 | - move_irq(irq); |
2114 | - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) |
2115 | - == (IRQ_PENDING | IRQ_DISABLED)) |
2116 | - mask_IO_APIC_irq(irq); |
2117 | - ack_APIC_irq(); |
2118 | -} |
2119 | - |
2120 | -/* |
2121 | - * Level triggered interrupts can just be masked, |
2122 | - * and shutting down and starting up the interrupt |
2123 | - * is the same as enabling and disabling them -- except |
2124 | - * with a startup need to return a "was pending" value. |
2125 | - * |
2126 | - * Level triggered interrupts are special because we |
2127 | - * do not touch any IO-APIC register while handling |
2128 | - * them. We ack the APIC in the end-IRQ handler, not |
2129 | - * in the start-IRQ-handler. Protection against reentrance |
2130 | - * from the same interrupt is still provided, both by the |
2131 | - * generic IRQ layer and by the fact that an unacked local |
2132 | - * APIC does not accept IRQs. |
2133 | - */ |
2134 | -static unsigned int startup_level_ioapic_irq (unsigned int irq) |
2135 | +static void ack_ioapic_irq(unsigned int irq) |
2136 | { |
2137 | - unmask_IO_APIC_irq(irq); |
2138 | - |
2139 | - return 0; /* don't check for pending */ |
2140 | + move_native_irq(irq); |
2141 | + ack_APIC_irq(); |
2142 | } |
2143 | |
2144 | -static void end_level_ioapic_irq (unsigned int irq) |
2145 | +static void ack_ioapic_quirk_irq(unsigned int irq) |
2146 | { |
2147 | unsigned long v; |
2148 | int i; |
2149 | |
2150 | - move_irq(irq); |
2151 | + move_native_irq(irq); |
2152 | /* |
2153 | * It appears there is an erratum which affects at least version 0x11 |
2154 | * of I/O APIC (that's the 82093AA and cores integrated into various |
2155 | @@ -2044,7 +2077,7 @@ |
2156 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2157 | * The idea is from Manfred Spraul. --macro |
2158 | */ |
2159 | - i = IO_APIC_VECTOR(irq); |
2160 | + i = irq_vector[irq]; |
2161 | |
2162 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); |
2163 | |
2164 | @@ -2059,104 +2092,24 @@ |
2165 | } |
2166 | } |
2167 | |
2168 | -#ifdef CONFIG_PCI_MSI |
2169 | -static unsigned int startup_edge_ioapic_vector(unsigned int vector) |
2170 | -{ |
2171 | - int irq = vector_to_irq(vector); |
2172 | - |
2173 | - return startup_edge_ioapic_irq(irq); |
2174 | -} |
2175 | - |
2176 | -static void ack_edge_ioapic_vector(unsigned int vector) |
2177 | -{ |
2178 | - int irq = vector_to_irq(vector); |
2179 | - |
2180 | - move_native_irq(vector); |
2181 | - ack_edge_ioapic_irq(irq); |
2182 | -} |
2183 | - |
2184 | -static unsigned int startup_level_ioapic_vector (unsigned int vector) |
2185 | -{ |
2186 | - int irq = vector_to_irq(vector); |
2187 | - |
2188 | - return startup_level_ioapic_irq (irq); |
2189 | -} |
2190 | - |
2191 | -static void end_level_ioapic_vector (unsigned int vector) |
2192 | -{ |
2193 | - int irq = vector_to_irq(vector); |
2194 | - |
2195 | - move_native_irq(vector); |
2196 | - end_level_ioapic_irq(irq); |
2197 | -} |
2198 | - |
2199 | -static void mask_IO_APIC_vector (unsigned int vector) |
2200 | -{ |
2201 | - int irq = vector_to_irq(vector); |
2202 | - |
2203 | - mask_IO_APIC_irq(irq); |
2204 | -} |
2205 | - |
2206 | -static void unmask_IO_APIC_vector (unsigned int vector) |
2207 | -{ |
2208 | - int irq = vector_to_irq(vector); |
2209 | - |
2210 | - unmask_IO_APIC_irq(irq); |
2211 | -} |
2212 | - |
2213 | -#ifdef CONFIG_SMP |
2214 | -static void set_ioapic_affinity_vector (unsigned int vector, |
2215 | - cpumask_t cpu_mask) |
2216 | -{ |
2217 | - int irq = vector_to_irq(vector); |
2218 | - |
2219 | - set_native_irq_info(vector, cpu_mask); |
2220 | - set_ioapic_affinity_irq(irq, cpu_mask); |
2221 | -} |
2222 | -#endif |
2223 | -#endif |
2224 | - |
2225 | -static int ioapic_retrigger(unsigned int irq) |
2226 | +static int ioapic_retrigger_irq(unsigned int irq) |
2227 | { |
2228 | - send_IPI_self(IO_APIC_VECTOR(irq)); |
2229 | + send_IPI_self(irq_vector[irq]); |
2230 | |
2231 | return 1; |
2232 | } |
2233 | |
2234 | -/* |
2235 | - * Level and edge triggered IO-APIC interrupts need different handling, |
2236 | - * so we use two separate IRQ descriptors. Edge triggered IRQs can be |
2237 | - * handled with the level-triggered descriptor, but that one has slightly |
2238 | - * more overhead. Level-triggered interrupts cannot be handled with the |
2239 | - * edge-triggered handler, without risking IRQ storms and other ugly |
2240 | - * races. |
2241 | - */ |
2242 | -static struct hw_interrupt_type ioapic_edge_type __read_mostly = { |
2243 | - .typename = "IO-APIC-edge", |
2244 | - .startup = startup_edge_ioapic, |
2245 | - .shutdown = shutdown_edge_ioapic, |
2246 | - .enable = enable_edge_ioapic, |
2247 | - .disable = disable_edge_ioapic, |
2248 | - .ack = ack_edge_ioapic, |
2249 | - .end = end_edge_ioapic, |
2250 | -#ifdef CONFIG_SMP |
2251 | - .set_affinity = set_ioapic_affinity, |
2252 | -#endif |
2253 | - .retrigger = ioapic_retrigger, |
2254 | -}; |
2255 | - |
2256 | -static struct hw_interrupt_type ioapic_level_type __read_mostly = { |
2257 | - .typename = "IO-APIC-level", |
2258 | - .startup = startup_level_ioapic, |
2259 | - .shutdown = shutdown_level_ioapic, |
2260 | - .enable = enable_level_ioapic, |
2261 | - .disable = disable_level_ioapic, |
2262 | - .ack = mask_and_ack_level_ioapic, |
2263 | - .end = end_level_ioapic, |
2264 | +static struct irq_chip ioapic_chip __read_mostly = { |
2265 | + .name = "IO-APIC", |
2266 | + .startup = startup_ioapic_irq, |
2267 | + .mask = mask_IO_APIC_irq, |
2268 | + .unmask = unmask_IO_APIC_irq, |
2269 | + .ack = ack_ioapic_irq, |
2270 | + .eoi = ack_ioapic_quirk_irq, |
2271 | #ifdef CONFIG_SMP |
2272 | - .set_affinity = set_ioapic_affinity, |
2273 | + .set_affinity = set_ioapic_affinity_irq, |
2274 | #endif |
2275 | - .retrigger = ioapic_retrigger, |
2276 | + .retrigger = ioapic_retrigger_irq, |
2277 | }; |
2278 | #endif /* !CONFIG_XEN */ |
2279 | |
2280 | @@ -2177,12 +2130,7 @@ |
2281 | */ |
2282 | for (irq = 0; irq < NR_IRQS ; irq++) { |
2283 | int tmp = irq; |
2284 | - if (use_pci_vector()) { |
2285 | - if (!platform_legacy_irq(tmp)) |
2286 | - if ((tmp = vector_to_irq(tmp)) == -1) |
2287 | - continue; |
2288 | - } |
2289 | - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { |
2290 | + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { |
2291 | /* |
2292 | * Hmm.. We don't have an entry for this, |
2293 | * so default to an old-fashioned 8259 |
2294 | @@ -2193,22 +2141,23 @@ |
2295 | #ifndef CONFIG_XEN |
2296 | else |
2297 | /* Strange. Oh, well.. */ |
2298 | - irq_desc[irq].chip = &no_irq_type; |
2299 | + irq_desc[irq].chip = &no_irq_chip; |
2300 | #endif |
2301 | } |
2302 | } |
2303 | } |
2304 | |
2305 | #ifndef CONFIG_XEN |
2306 | -static void enable_lapic_irq (unsigned int irq) |
2307 | -{ |
2308 | - unsigned long v; |
2309 | +/* |
2310 | + * The local APIC irq-chip implementation: |
2311 | + */ |
2312 | |
2313 | - v = apic_read(APIC_LVT0); |
2314 | - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); |
2315 | +static void ack_apic(unsigned int irq) |
2316 | +{ |
2317 | + ack_APIC_irq(); |
2318 | } |
2319 | |
2320 | -static void disable_lapic_irq (unsigned int irq) |
2321 | +static void mask_lapic_irq (unsigned int irq) |
2322 | { |
2323 | unsigned long v; |
2324 | |
2325 | @@ -2216,21 +2165,19 @@ |
2326 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); |
2327 | } |
2328 | |
2329 | -static void ack_lapic_irq (unsigned int irq) |
2330 | +static void unmask_lapic_irq (unsigned int irq) |
2331 | { |
2332 | - ack_APIC_irq(); |
2333 | -} |
2334 | + unsigned long v; |
2335 | |
2336 | -static void end_lapic_irq (unsigned int i) { /* nothing */ } |
2337 | + v = apic_read(APIC_LVT0); |
2338 | + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); |
2339 | +} |
2340 | |
2341 | -static struct hw_interrupt_type lapic_irq_type __read_mostly = { |
2342 | - .typename = "local-APIC-edge", |
2343 | - .startup = NULL, /* startup_irq() not used for IRQ0 */ |
2344 | - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ |
2345 | - .enable = enable_lapic_irq, |
2346 | - .disable = disable_lapic_irq, |
2347 | - .ack = ack_lapic_irq, |
2348 | - .end = end_lapic_irq |
2349 | +static struct irq_chip lapic_chip __read_mostly = { |
2350 | + .name = "local-APIC-edge", |
2351 | + .mask = mask_lapic_irq, |
2352 | + .unmask = unmask_lapic_irq, |
2353 | + .eoi = ack_apic, |
2354 | }; |
2355 | |
2356 | static void setup_nmi (void) |
2357 | @@ -2263,17 +2210,13 @@ |
2358 | int apic, pin, i; |
2359 | struct IO_APIC_route_entry entry0, entry1; |
2360 | unsigned char save_control, save_freq_select; |
2361 | - unsigned long flags; |
2362 | |
2363 | pin = find_isa_irq_pin(8, mp_INT); |
2364 | apic = find_isa_irq_apic(8, mp_INT); |
2365 | if (pin == -1) |
2366 | return; |
2367 | |
2368 | - spin_lock_irqsave(&ioapic_lock, flags); |
2369 | - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); |
2370 | - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); |
2371 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2372 | + entry0 = ioapic_read_entry(apic, pin); |
2373 | clear_IO_APIC_pin(apic, pin); |
2374 | |
2375 | memset(&entry1, 0, sizeof(entry1)); |
2376 | @@ -2286,10 +2229,7 @@ |
2377 | entry1.trigger = 0; |
2378 | entry1.vector = 0; |
2379 | |
2380 | - spin_lock_irqsave(&ioapic_lock, flags); |
2381 | - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); |
2382 | - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); |
2383 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2384 | + ioapic_write_entry(apic, pin, entry1); |
2385 | |
2386 | save_control = CMOS_READ(RTC_CONTROL); |
2387 | save_freq_select = CMOS_READ(RTC_FREQ_SELECT); |
2388 | @@ -2308,10 +2248,7 @@ |
2389 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); |
2390 | clear_IO_APIC_pin(apic, pin); |
2391 | |
2392 | - spin_lock_irqsave(&ioapic_lock, flags); |
2393 | - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); |
2394 | - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); |
2395 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2396 | + ioapic_write_entry(apic, pin, entry0); |
2397 | } |
2398 | |
2399 | int timer_uses_ioapic_pin_0; |
2400 | @@ -2411,7 +2348,8 @@ |
2401 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); |
2402 | |
2403 | disable_8259A_irq(0); |
2404 | - irq_desc[0].chip = &lapic_irq_type; |
2405 | + set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, |
2406 | + "fasteio"); |
2407 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
2408 | enable_8259A_irq(0); |
2409 | |
2410 | @@ -2523,17 +2461,12 @@ |
2411 | { |
2412 | struct IO_APIC_route_entry *entry; |
2413 | struct sysfs_ioapic_data *data; |
2414 | - unsigned long flags; |
2415 | int i; |
2416 | |
2417 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
2418 | entry = data->entry; |
2419 | - spin_lock_irqsave(&ioapic_lock, flags); |
2420 | - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { |
2421 | - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); |
2422 | - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); |
2423 | - } |
2424 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2425 | + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) |
2426 | + entry[i] = ioapic_read_entry(dev->id, i); |
2427 | |
2428 | return 0; |
2429 | } |
2430 | @@ -2555,11 +2488,9 @@ |
2431 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; |
2432 | io_apic_write(dev->id, 0, reg_00.raw); |
2433 | } |
2434 | - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { |
2435 | - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); |
2436 | - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); |
2437 | - } |
2438 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2439 | + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) |
2440 | + ioapic_write_entry(dev->id, i, entry[i]); |
2441 | |
2442 | return 0; |
2443 | } |
2444 | @@ -2605,6 +2536,240 @@ |
2445 | |
2446 | device_initcall(ioapic_init_sysfs); |
2447 | |
2448 | +#ifndef CONFIG_XEN |
2449 | +/* |
2450 | + * Dynamic irq allocate and deallocation |
2451 | + */ |
2452 | +int create_irq(void) |
2453 | +{ |
2454 | + /* Allocate an unused irq */ |
2455 | + int irq, new, vector; |
2456 | + unsigned long flags; |
2457 | + |
2458 | + irq = -ENOSPC; |
2459 | + spin_lock_irqsave(&vector_lock, flags); |
2460 | + for (new = (NR_IRQS - 1); new >= 0; new--) { |
2461 | + if (platform_legacy_irq(new)) |
2462 | + continue; |
2463 | + if (irq_vector[new] != 0) |
2464 | + continue; |
2465 | + vector = __assign_irq_vector(new); |
2466 | + if (likely(vector > 0)) |
2467 | + irq = new; |
2468 | + break; |
2469 | + } |
2470 | + spin_unlock_irqrestore(&vector_lock, flags); |
2471 | + |
2472 | + if (irq >= 0) { |
2473 | + set_intr_gate(vector, interrupt[irq]); |
2474 | + dynamic_irq_init(irq); |
2475 | + } |
2476 | + return irq; |
2477 | +} |
2478 | + |
2479 | +void destroy_irq(unsigned int irq) |
2480 | +{ |
2481 | + unsigned long flags; |
2482 | + |
2483 | + dynamic_irq_cleanup(irq); |
2484 | + |
2485 | + spin_lock_irqsave(&vector_lock, flags); |
2486 | + irq_vector[irq] = 0; |
2487 | + spin_unlock_irqrestore(&vector_lock, flags); |
2488 | +} |
2489 | +#endif |
2490 | + |
2491 | +/* |
2492 | + * MSI mesage composition |
2493 | + */ |
2494 | +#ifdef CONFIG_PCI_MSI |
2495 | +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) |
2496 | +{ |
2497 | + int vector; |
2498 | + unsigned dest; |
2499 | + |
2500 | + vector = assign_irq_vector(irq); |
2501 | + if (vector >= 0) { |
2502 | + dest = cpu_mask_to_apicid(TARGET_CPUS); |
2503 | + |
2504 | + msg->address_hi = MSI_ADDR_BASE_HI; |
2505 | + msg->address_lo = |
2506 | + MSI_ADDR_BASE_LO | |
2507 | + ((INT_DEST_MODE == 0) ? |
2508 | + MSI_ADDR_DEST_MODE_PHYSICAL: |
2509 | + MSI_ADDR_DEST_MODE_LOGICAL) | |
2510 | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
2511 | + MSI_ADDR_REDIRECTION_CPU: |
2512 | + MSI_ADDR_REDIRECTION_LOWPRI) | |
2513 | + MSI_ADDR_DEST_ID(dest); |
2514 | + |
2515 | + msg->data = |
2516 | + MSI_DATA_TRIGGER_EDGE | |
2517 | + MSI_DATA_LEVEL_ASSERT | |
2518 | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
2519 | + MSI_DATA_DELIVERY_FIXED: |
2520 | + MSI_DATA_DELIVERY_LOWPRI) | |
2521 | + MSI_DATA_VECTOR(vector); |
2522 | + } |
2523 | + return vector; |
2524 | +} |
2525 | + |
2526 | +#ifdef CONFIG_SMP |
2527 | +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) |
2528 | +{ |
2529 | + struct msi_msg msg; |
2530 | + unsigned int dest; |
2531 | + cpumask_t tmp; |
2532 | + int vector; |
2533 | + |
2534 | + cpus_and(tmp, mask, cpu_online_map); |
2535 | + if (cpus_empty(tmp)) |
2536 | + tmp = TARGET_CPUS; |
2537 | + |
2538 | + vector = assign_irq_vector(irq); |
2539 | + if (vector < 0) |
2540 | + return; |
2541 | + |
2542 | + dest = cpu_mask_to_apicid(mask); |
2543 | + |
2544 | + read_msi_msg(irq, &msg); |
2545 | + |
2546 | + msg.data &= ~MSI_DATA_VECTOR_MASK; |
2547 | + msg.data |= MSI_DATA_VECTOR(vector); |
2548 | + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
2549 | + msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
2550 | + |
2551 | + write_msi_msg(irq, &msg); |
2552 | + set_native_irq_info(irq, mask); |
2553 | +} |
2554 | +#endif /* CONFIG_SMP */ |
2555 | + |
2556 | +/* |
2557 | + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, |
2558 | + * which implement the MSI or MSI-X Capability Structure. |
2559 | + */ |
2560 | +static struct irq_chip msi_chip = { |
2561 | + .name = "PCI-MSI", |
2562 | + .unmask = unmask_msi_irq, |
2563 | + .mask = mask_msi_irq, |
2564 | + .ack = ack_ioapic_irq, |
2565 | +#ifdef CONFIG_SMP |
2566 | + .set_affinity = set_msi_irq_affinity, |
2567 | +#endif |
2568 | + .retrigger = ioapic_retrigger_irq, |
2569 | +}; |
2570 | + |
2571 | +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) |
2572 | +{ |
2573 | + struct msi_msg msg; |
2574 | + int ret; |
2575 | + ret = msi_compose_msg(dev, irq, &msg); |
2576 | + if (ret < 0) |
2577 | + return ret; |
2578 | + |
2579 | + write_msi_msg(irq, &msg); |
2580 | + |
2581 | + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, |
2582 | + "edge"); |
2583 | + |
2584 | + return 0; |
2585 | +} |
2586 | + |
2587 | +void arch_teardown_msi_irq(unsigned int irq) |
2588 | +{ |
2589 | + return; |
2590 | +} |
2591 | + |
2592 | +#endif /* CONFIG_PCI_MSI */ |
2593 | + |
2594 | +/* |
2595 | + * Hypertransport interrupt support |
2596 | + */ |
2597 | +#ifdef CONFIG_HT_IRQ |
2598 | + |
2599 | +#ifdef CONFIG_SMP |
2600 | + |
2601 | +static void target_ht_irq(unsigned int irq, unsigned int dest) |
2602 | +{ |
2603 | + struct ht_irq_msg msg; |
2604 | + fetch_ht_irq_msg(irq, &msg); |
2605 | + |
2606 | + msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); |
2607 | + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); |
2608 | + |
2609 | + msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); |
2610 | + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); |
2611 | + |
2612 | + write_ht_irq_msg(irq, &msg); |
2613 | +} |
2614 | + |
2615 | +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) |
2616 | +{ |
2617 | + unsigned int dest; |
2618 | + cpumask_t tmp; |
2619 | + |
2620 | + cpus_and(tmp, mask, cpu_online_map); |
2621 | + if (cpus_empty(tmp)) |
2622 | + tmp = TARGET_CPUS; |
2623 | + |
2624 | + cpus_and(mask, tmp, CPU_MASK_ALL); |
2625 | + |
2626 | + dest = cpu_mask_to_apicid(mask); |
2627 | + |
2628 | + target_ht_irq(irq, dest); |
2629 | + set_native_irq_info(irq, mask); |
2630 | +} |
2631 | +#endif |
2632 | + |
2633 | +static struct irq_chip ht_irq_chip = { |
2634 | + .name = "PCI-HT", |
2635 | + .mask = mask_ht_irq, |
2636 | + .unmask = unmask_ht_irq, |
2637 | + .ack = ack_ioapic_irq, |
2638 | +#ifdef CONFIG_SMP |
2639 | + .set_affinity = set_ht_irq_affinity, |
2640 | +#endif |
2641 | + .retrigger = ioapic_retrigger_irq, |
2642 | +}; |
2643 | + |
2644 | +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) |
2645 | +{ |
2646 | + int vector; |
2647 | + |
2648 | + vector = assign_irq_vector(irq); |
2649 | + if (vector >= 0) { |
2650 | + struct ht_irq_msg msg; |
2651 | + unsigned dest; |
2652 | + cpumask_t tmp; |
2653 | + |
2654 | + cpus_clear(tmp); |
2655 | + cpu_set(vector >> 8, tmp); |
2656 | + dest = cpu_mask_to_apicid(tmp); |
2657 | + |
2658 | + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
2659 | + |
2660 | + msg.address_lo = |
2661 | + HT_IRQ_LOW_BASE | |
2662 | + HT_IRQ_LOW_DEST_ID(dest) | |
2663 | + HT_IRQ_LOW_VECTOR(vector) | |
2664 | + ((INT_DEST_MODE == 0) ? |
2665 | + HT_IRQ_LOW_DM_PHYSICAL : |
2666 | + HT_IRQ_LOW_DM_LOGICAL) | |
2667 | + HT_IRQ_LOW_RQEOI_EDGE | |
2668 | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
2669 | + HT_IRQ_LOW_MT_FIXED : |
2670 | + HT_IRQ_LOW_MT_ARBITRATED) | |
2671 | + HT_IRQ_LOW_IRQ_MASKED; |
2672 | + |
2673 | + write_ht_irq_msg(irq, &msg); |
2674 | + |
2675 | + set_irq_chip_and_handler_name(irq, &ht_irq_chip, |
2676 | + handle_edge_irq, "edge"); |
2677 | + } |
2678 | + return vector; |
2679 | +} |
2680 | +#endif /* CONFIG_HT_IRQ */ |
2681 | + |
2682 | /* -------------------------------------------------------------------------- |
2683 | ACPI-based IOAPIC Configuration |
2684 | -------------------------------------------------------------------------- */ |
2685 | @@ -2758,13 +2923,34 @@ |
2686 | if (!ioapic && (irq < 16)) |
2687 | disable_8259A_irq(irq); |
2688 | |
2689 | + ioapic_write_entry(ioapic, pin, entry); |
2690 | spin_lock_irqsave(&ioapic_lock, flags); |
2691 | - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); |
2692 | - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); |
2693 | - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); |
2694 | + set_native_irq_info(irq, TARGET_CPUS); |
2695 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2696 | |
2697 | return 0; |
2698 | } |
2699 | |
2700 | #endif /* CONFIG_ACPI */ |
2701 | + |
2702 | +static int __init parse_disable_timer_pin_1(char *arg) |
2703 | +{ |
2704 | + disable_timer_pin_1 = 1; |
2705 | + return 0; |
2706 | +} |
2707 | +early_param("disable_timer_pin_1", parse_disable_timer_pin_1); |
2708 | + |
2709 | +static int __init parse_enable_timer_pin_1(char *arg) |
2710 | +{ |
2711 | + disable_timer_pin_1 = -1; |
2712 | + return 0; |
2713 | +} |
2714 | +early_param("enable_timer_pin_1", parse_enable_timer_pin_1); |
2715 | + |
2716 | +static int __init parse_noapic(char *arg) |
2717 | +{ |
2718 | + /* disable IO-APIC */ |
2719 | + disable_ioapic_setup(); |
2720 | + return 0; |
2721 | +} |
2722 | +early_param("noapic", parse_noapic); |
2723 | --- a/arch/x86/kernel/io_apic_64-xen.c |
2724 | +++ b/arch/x86/kernel/io_apic_64-xen.c |
2725 | @@ -26,9 +26,12 @@ |
2726 | #include <linux/delay.h> |
2727 | #include <linux/sched.h> |
2728 | #include <linux/smp_lock.h> |
2729 | +#include <linux/pci.h> |
2730 | #include <linux/mc146818rtc.h> |
2731 | #include <linux/acpi.h> |
2732 | #include <linux/sysdev.h> |
2733 | +#include <linux/msi.h> |
2734 | +#include <linux/htirq.h> |
2735 | #ifdef CONFIG_ACPI |
2736 | #include <acpi/acpi_bus.h> |
2737 | #endif |
2738 | @@ -41,6 +44,10 @@ |
2739 | #include <asm/acpi.h> |
2740 | #include <asm/dma.h> |
2741 | #include <asm/nmi.h> |
2742 | +#include <asm/msidef.h> |
2743 | +#include <asm/hypertransport.h> |
2744 | + |
2745 | +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result); |
2746 | |
2747 | #define __apicdebuginit __init |
2748 | |
2749 | @@ -48,17 +55,29 @@ |
2750 | |
2751 | static int no_timer_check; |
2752 | |
2753 | -int disable_timer_pin_1 __initdata; |
2754 | +static int disable_timer_pin_1 __initdata; |
2755 | |
2756 | -#ifndef CONFIG_XEN |
2757 | -int timer_over_8254 __initdata = 0; |
2758 | +#ifdef CONFIG_XEN |
2759 | +#include <xen/interface/xen.h> |
2760 | +#include <xen/interface/physdev.h> |
2761 | + |
2762 | +/* Fake i8259 */ |
2763 | +#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) |
2764 | +#define disable_8259A_irq(_irq) ((void)0) |
2765 | +#define i8259A_irq_pending(_irq) (0) |
2766 | + |
2767 | +unsigned long io_apic_irqs; |
2768 | + |
2769 | +#define clear_IO_APIC() ((void)0) |
2770 | +#else |
2771 | +int timer_over_8254 __initdata = 1; |
2772 | |
2773 | /* Where if anywhere is the i8259 connect in external int mode */ |
2774 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
2775 | #endif |
2776 | |
2777 | static DEFINE_SPINLOCK(ioapic_lock); |
2778 | -static DEFINE_SPINLOCK(vector_lock); |
2779 | +DEFINE_SPINLOCK(vector_lock); |
2780 | |
2781 | /* |
2782 | * # of IRQ routing registers |
2783 | @@ -83,28 +102,27 @@ |
2784 | short apic, pin, next; |
2785 | } irq_2_pin[PIN_MAP_SIZE]; |
2786 | |
2787 | -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; |
2788 | -#ifdef CONFIG_PCI_MSI |
2789 | -#define vector_to_irq(vector) \ |
2790 | - (platform_legacy_irq(vector) ? vector : vector_irq[vector]) |
2791 | -#else |
2792 | -#define vector_to_irq(vector) (vector) |
2793 | -#endif |
2794 | - |
2795 | -#ifdef CONFIG_XEN |
2796 | - |
2797 | -#include <xen/interface/xen.h> |
2798 | -#include <xen/interface/physdev.h> |
2799 | - |
2800 | -/* Fake i8259 */ |
2801 | -#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) |
2802 | -#define disable_8259A_irq(_irq) ((void)0) |
2803 | -#define i8259A_irq_pending(_irq) (0) |
2804 | +#ifndef CONFIG_XEN |
2805 | +struct io_apic { |
2806 | + unsigned int index; |
2807 | + unsigned int unused[3]; |
2808 | + unsigned int data; |
2809 | +}; |
2810 | |
2811 | -unsigned long io_apic_irqs; |
2812 | +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
2813 | +{ |
2814 | + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
2815 | + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); |
2816 | +} |
2817 | +#endif |
2818 | |
2819 | -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) |
2820 | +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) |
2821 | { |
2822 | +#ifndef CONFIG_XEN |
2823 | + struct io_apic __iomem *io_apic = io_apic_base(apic); |
2824 | + writel(reg, &io_apic->index); |
2825 | + return readl(&io_apic->data); |
2826 | +#else |
2827 | struct physdev_apic apic_op; |
2828 | int ret; |
2829 | |
2830 | @@ -114,31 +132,131 @@ |
2831 | if (ret) |
2832 | return ret; |
2833 | return apic_op.value; |
2834 | +#endif |
2835 | } |
2836 | |
2837 | -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) |
2838 | +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) |
2839 | { |
2840 | +#ifndef CONFIG_XEN |
2841 | + struct io_apic __iomem *io_apic = io_apic_base(apic); |
2842 | + writel(reg, &io_apic->index); |
2843 | + writel(value, &io_apic->data); |
2844 | +#else |
2845 | struct physdev_apic apic_op; |
2846 | |
2847 | apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; |
2848 | apic_op.reg = reg; |
2849 | apic_op.value = value; |
2850 | WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op)); |
2851 | +#endif |
2852 | } |
2853 | |
2854 | -#define io_apic_read(a,r) xen_io_apic_read(a,r) |
2855 | -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) |
2856 | +#ifndef CONFIG_XEN |
2857 | +/* |
2858 | + * Re-write a value: to be used for read-modify-write |
2859 | + * cycles where the read already set up the index register. |
2860 | + */ |
2861 | +static inline void io_apic_modify(unsigned int apic, unsigned int value) |
2862 | +{ |
2863 | + struct io_apic __iomem *io_apic = io_apic_base(apic); |
2864 | + writel(value, &io_apic->data); |
2865 | +} |
2866 | +#else |
2867 | +#define io_apic_modify io_apic_write |
2868 | +#endif |
2869 | |
2870 | -#define clear_IO_APIC() ((void)0) |
2871 | +/* |
2872 | + * Synchronize the IO-APIC and the CPU by doing |
2873 | + * a dummy read from the IO-APIC |
2874 | + */ |
2875 | +static inline void io_apic_sync(unsigned int apic) |
2876 | +{ |
2877 | +#ifndef CONFIG_XEN |
2878 | + struct io_apic __iomem *io_apic = io_apic_base(apic); |
2879 | + readl(&io_apic->data); |
2880 | +#endif |
2881 | +} |
2882 | |
2883 | -#else |
2884 | +union entry_union { |
2885 | + struct { u32 w1, w2; }; |
2886 | + struct IO_APIC_route_entry entry; |
2887 | +}; |
2888 | + |
2889 | +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) |
2890 | +{ |
2891 | + union entry_union eu; |
2892 | + unsigned long flags; |
2893 | + spin_lock_irqsave(&ioapic_lock, flags); |
2894 | + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); |
2895 | + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); |
2896 | + spin_unlock_irqrestore(&ioapic_lock, flags); |
2897 | + return eu.entry; |
2898 | +} |
2899 | + |
2900 | +/* |
2901 | + * When we write a new IO APIC routing entry, we need to write the high |
2902 | + * word first! If the mask bit in the low word is clear, we will enable |
2903 | + * the interrupt, and we need to make sure the entry is fully populated |
2904 | + * before that happens. |
2905 | + */ |
2906 | +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
2907 | +{ |
2908 | + unsigned long flags; |
2909 | + union entry_union eu; |
2910 | + eu.entry = e; |
2911 | + spin_lock_irqsave(&ioapic_lock, flags); |
2912 | + io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
2913 | + io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
2914 | + spin_unlock_irqrestore(&ioapic_lock, flags); |
2915 | +} |
2916 | + |
2917 | +#ifndef CONFIG_XEN |
2918 | +/* |
2919 | + * When we mask an IO APIC routing entry, we need to write the low |
2920 | + * word first, in order to set the mask bit before we change the |
2921 | + * high bits! |
2922 | + */ |
2923 | +static void ioapic_mask_entry(int apic, int pin) |
2924 | +{ |
2925 | + unsigned long flags; |
2926 | + union entry_union eu = { .entry.mask = 1 }; |
2927 | + |
2928 | + spin_lock_irqsave(&ioapic_lock, flags); |
2929 | + io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
2930 | + io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
2931 | + spin_unlock_irqrestore(&ioapic_lock, flags); |
2932 | +} |
2933 | |
2934 | #ifdef CONFIG_SMP |
2935 | +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) |
2936 | +{ |
2937 | + int apic, pin; |
2938 | + struct irq_pin_list *entry = irq_2_pin + irq; |
2939 | + |
2940 | + BUG_ON(irq >= NR_IRQS); |
2941 | + for (;;) { |
2942 | + unsigned int reg; |
2943 | + apic = entry->apic; |
2944 | + pin = entry->pin; |
2945 | + if (pin == -1) |
2946 | + break; |
2947 | + io_apic_write(apic, 0x11 + pin*2, dest); |
2948 | + reg = io_apic_read(apic, 0x10 + pin*2); |
2949 | + reg &= ~0x000000ff; |
2950 | + reg |= vector; |
2951 | + io_apic_modify(apic, reg); |
2952 | + if (!entry->next) |
2953 | + break; |
2954 | + entry = irq_2_pin + entry->next; |
2955 | + } |
2956 | +} |
2957 | + |
2958 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) |
2959 | { |
2960 | unsigned long flags; |
2961 | unsigned int dest; |
2962 | cpumask_t tmp; |
2963 | + int vector; |
2964 | |
2965 | cpus_and(tmp, mask, cpu_online_map); |
2966 | if (cpus_empty(tmp)) |
2967 | @@ -146,7 +264,11 @@ |
2968 | |
2969 | cpus_and(mask, tmp, CPU_MASK_ALL); |
2970 | |
2971 | - dest = cpu_mask_to_apicid(mask); |
2972 | + vector = assign_irq_vector(irq, mask, &tmp); |
2973 | + if (vector < 0) |
2974 | + return; |
2975 | + |
2976 | + dest = cpu_mask_to_apicid(tmp); |
2977 | |
2978 | /* |
2979 | * Only the high 8 bits are valid. |
2980 | @@ -154,13 +276,12 @@ |
2981 | dest = SET_APIC_LOGICAL_ID(dest); |
2982 | |
2983 | spin_lock_irqsave(&ioapic_lock, flags); |
2984 | - __DO_ACTION(1, = dest, ) |
2985 | - set_irq_info(irq, mask); |
2986 | + __target_IO_APIC_irq(irq, dest, vector); |
2987 | + set_native_irq_info(irq, mask); |
2988 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2989 | } |
2990 | #endif |
2991 | - |
2992 | -#endif /* !CONFIG_XEN */ |
2993 | +#endif |
2994 | |
2995 | /* |
2996 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are |
2997 | @@ -240,24 +361,15 @@ |
2998 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
2999 | { |
3000 | struct IO_APIC_route_entry entry; |
3001 | - unsigned long flags; |
3002 | |
3003 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ |
3004 | - spin_lock_irqsave(&ioapic_lock, flags); |
3005 | - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); |
3006 | - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); |
3007 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
3008 | + entry = ioapic_read_entry(apic, pin); |
3009 | if (entry.delivery_mode == dest_SMI) |
3010 | return; |
3011 | /* |
3012 | * Disable it in the IO-APIC irq-routing table: |
3013 | */ |
3014 | - memset(&entry, 0, sizeof(entry)); |
3015 | - entry.mask = 1; |
3016 | - spin_lock_irqsave(&ioapic_lock, flags); |
3017 | - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); |
3018 | - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); |
3019 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
3020 | + ioapic_mask_entry(apic, pin); |
3021 | } |
3022 | |
3023 | static void clear_IO_APIC (void) |
3024 | @@ -271,16 +383,6 @@ |
3025 | |
3026 | #endif /* !CONFIG_XEN */ |
3027 | |
3028 | -static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF }; |
3029 | - |
3030 | -/* |
3031 | - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to |
3032 | - * specific CPU-side IRQs. |
3033 | - */ |
3034 | - |
3035 | -#define MAX_PIRQS 8 |
3036 | -static int pirq_entries [MAX_PIRQS]; |
3037 | -static int pirqs_enabled; |
3038 | int skip_ioapic_setup; |
3039 | int ioapic_force; |
3040 | |
3041 | @@ -289,18 +391,17 @@ |
3042 | static int __init disable_ioapic_setup(char *str) |
3043 | { |
3044 | skip_ioapic_setup = 1; |
3045 | - return 1; |
3046 | + return 0; |
3047 | } |
3048 | +early_param("noapic", disable_ioapic_setup); |
3049 | |
3050 | -static int __init enable_ioapic_setup(char *str) |
3051 | +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ |
3052 | +static int __init disable_timer_pin_setup(char *arg) |
3053 | { |
3054 | - ioapic_force = 1; |
3055 | - skip_ioapic_setup = 0; |
3056 | + disable_timer_pin_1 = 1; |
3057 | return 1; |
3058 | } |
3059 | - |
3060 | -__setup("noapic", disable_ioapic_setup); |
3061 | -__setup("apic", enable_ioapic_setup); |
3062 | +__setup("disable_timer_pin_1", disable_timer_pin_setup); |
3063 | |
3064 | #ifndef CONFIG_XEN |
3065 | static int __init setup_disable_8254_timer(char *s) |
3066 | @@ -318,137 +419,6 @@ |
3067 | __setup("enable_8254_timer", setup_enable_8254_timer); |
3068 | #endif /* !CONFIG_XEN */ |
3069 | |
3070 | -#include <asm/pci-direct.h> |
3071 | -#include <linux/pci_ids.h> |
3072 | -#include <linux/pci.h> |
3073 | - |
3074 | - |
3075 | -#ifdef CONFIG_ACPI |
3076 | - |
3077 | -static int nvidia_hpet_detected __initdata; |
3078 | - |
3079 | -static int __init nvidia_hpet_check(unsigned long phys, unsigned long size) |
3080 | -{ |
3081 | - nvidia_hpet_detected = 1; |
3082 | - return 0; |
3083 | -} |
3084 | -#endif |
3085 | - |
3086 | -/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC |
3087 | - off. Check for an Nvidia or VIA PCI bridge and turn it off. |
3088 | - Use pci direct infrastructure because this runs before the PCI subsystem. |
3089 | - |
3090 | - Can be overwritten with "apic" |
3091 | - |
3092 | - And another hack to disable the IOMMU on VIA chipsets. |
3093 | - |
3094 | - ... and others. Really should move this somewhere else. |
3095 | - |
3096 | - Kludge-O-Rama. */ |
3097 | -void __init check_ioapic(void) |
3098 | -{ |
3099 | - int num,slot,func; |
3100 | - /* Poor man's PCI discovery */ |
3101 | - for (num = 0; num < 32; num++) { |
3102 | - for (slot = 0; slot < 32; slot++) { |
3103 | - for (func = 0; func < 8; func++) { |
3104 | - u32 class; |
3105 | - u32 vendor; |
3106 | - u8 type; |
3107 | - class = read_pci_config(num,slot,func, |
3108 | - PCI_CLASS_REVISION); |
3109 | - if (class == 0xffffffff) |
3110 | - break; |
3111 | - |
3112 | - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) |
3113 | - continue; |
3114 | - |
3115 | - vendor = read_pci_config(num, slot, func, |
3116 | - PCI_VENDOR_ID); |
3117 | - vendor &= 0xffff; |
3118 | - switch (vendor) { |
3119 | - case PCI_VENDOR_ID_VIA: |
3120 | -#ifdef CONFIG_IOMMU |
3121 | - if ((end_pfn > MAX_DMA32_PFN || |
3122 | - force_iommu) && |
3123 | - !iommu_aperture_allowed) { |
3124 | - printk(KERN_INFO |
3125 | - "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n"); |
3126 | - iommu_aperture_disabled = 1; |
3127 | - } |
3128 | -#endif |
3129 | - return; |
3130 | - case PCI_VENDOR_ID_NVIDIA: |
3131 | -#ifdef CONFIG_ACPI |
3132 | - /* |
3133 | - * All timer overrides on Nvidia are |
3134 | - * wrong unless HPET is enabled. |
3135 | - */ |
3136 | - nvidia_hpet_detected = 0; |
3137 | - acpi_table_parse(ACPI_HPET, |
3138 | - nvidia_hpet_check); |
3139 | - if (nvidia_hpet_detected == 0) { |
3140 | - acpi_skip_timer_override = 1; |
3141 | - printk(KERN_INFO "Nvidia board " |
3142 | - "detected. Ignoring ACPI " |
3143 | - "timer override.\n"); |
3144 | - } |
3145 | -#endif |
3146 | - /* RED-PEN skip them on mptables too? */ |
3147 | - return; |
3148 | - case PCI_VENDOR_ID_ATI: |
3149 | - |
3150 | - /* This should be actually default, but |
3151 | - for 2.6.16 let's do it for ATI only where |
3152 | - it's really needed. */ |
3153 | -#ifndef CONFIG_XEN |
3154 | - if (timer_over_8254 == 1) { |
3155 | - timer_over_8254 = 0; |
3156 | - printk(KERN_INFO |
3157 | - "ATI board detected. Disabling timer routing over 8254.\n"); |
3158 | - } |
3159 | -#endif |
3160 | - return; |
3161 | - } |
3162 | - |
3163 | - |
3164 | - /* No multi-function device? */ |
3165 | - type = read_pci_config_byte(num,slot,func, |
3166 | - PCI_HEADER_TYPE); |
3167 | - if (!(type & 0x80)) |
3168 | - break; |
3169 | - } |
3170 | - } |
3171 | - } |
3172 | -} |
3173 | - |
3174 | -static int __init ioapic_pirq_setup(char *str) |
3175 | -{ |
3176 | - int i, max; |
3177 | - int ints[MAX_PIRQS+1]; |
3178 | - |
3179 | - get_options(str, ARRAY_SIZE(ints), ints); |
3180 | - |
3181 | - for (i = 0; i < MAX_PIRQS; i++) |
3182 | - pirq_entries[i] = -1; |
3183 | - |
3184 | - pirqs_enabled = 1; |
3185 | - apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n"); |
3186 | - max = MAX_PIRQS; |
3187 | - if (ints[0] < MAX_PIRQS) |
3188 | - max = ints[0]; |
3189 | - |
3190 | - for (i = 0; i < max; i++) { |
3191 | - apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); |
3192 | - /* |
3193 | - * PIRQs are mapped upside down, usually. |
3194 | - */ |
3195 | - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; |
3196 | - } |
3197 | - return 1; |
3198 | -} |
3199 | - |
3200 | -__setup("pirq=", ioapic_pirq_setup); |
3201 | |
3202 | /* |
3203 | * Find the IRQ entry number of a certain pin. |
3204 | @@ -478,9 +448,7 @@ |
3205 | for (i = 0; i < mp_irq_entries; i++) { |
3206 | int lbus = mp_irqs[i].mpc_srcbus; |
3207 | |
3208 | - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || |
3209 | - mp_bus_id_to_type[lbus] == MP_BUS_EISA || |
3210 | - mp_bus_id_to_type[lbus] == MP_BUS_MCA) && |
3211 | + if (test_bit(lbus, mp_bus_not_pci) && |
3212 | (mp_irqs[i].mpc_irqtype == type) && |
3213 | (mp_irqs[i].mpc_srcbusirq == irq)) |
3214 | |
3215 | @@ -496,9 +464,7 @@ |
3216 | for (i = 0; i < mp_irq_entries; i++) { |
3217 | int lbus = mp_irqs[i].mpc_srcbus; |
3218 | |
3219 | - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || |
3220 | - mp_bus_id_to_type[lbus] == MP_BUS_EISA || |
3221 | - mp_bus_id_to_type[lbus] == MP_BUS_MCA) && |
3222 | + if (test_bit(lbus, mp_bus_not_pci) && |
3223 | (mp_irqs[i].mpc_irqtype == type) && |
3224 | (mp_irqs[i].mpc_srcbusirq == irq)) |
3225 | break; |
3226 | @@ -539,7 +505,7 @@ |
3227 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) |
3228 | break; |
3229 | |
3230 | - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && |
3231 | + if (!test_bit(lbus, mp_bus_not_pci) && |
3232 | !mp_irqs[i].mpc_irqtype && |
3233 | (bus == lbus) && |
3234 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { |
3235 | @@ -562,27 +528,6 @@ |
3236 | return best_guess; |
3237 | } |
3238 | |
3239 | -/* |
3240 | - * EISA Edge/Level control register, ELCR |
3241 | - */ |
3242 | -static int EISA_ELCR(unsigned int irq) |
3243 | -{ |
3244 | - if (irq < 16) { |
3245 | - unsigned int port = 0x4d0 + (irq >> 3); |
3246 | - return (inb(port) >> (irq & 7)) & 1; |
3247 | - } |
3248 | - apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq); |
3249 | - return 0; |
3250 | -} |
3251 | - |
3252 | -/* EISA interrupts are always polarity zero and can be edge or level |
3253 | - * trigger depending on the ELCR value. If an interrupt is listed as |
3254 | - * EISA conforming in the MP table, that means its trigger type must |
3255 | - * be read in from the ELCR */ |
3256 | - |
3257 | -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) |
3258 | -#define default_EISA_polarity(idx) (0) |
3259 | - |
3260 | /* ISA interrupts are always polarity zero edge triggered, |
3261 | * when listed as conforming in the MP table. */ |
3262 | |
3263 | @@ -595,12 +540,6 @@ |
3264 | #define default_PCI_trigger(idx) (1) |
3265 | #define default_PCI_polarity(idx) (1) |
3266 | |
3267 | -/* MCA interrupts are always polarity zero level triggered, |
3268 | - * when listed as conforming in the MP table. */ |
3269 | - |
3270 | -#define default_MCA_trigger(idx) (1) |
3271 | -#define default_MCA_polarity(idx) (0) |
3272 | - |
3273 | static int __init MPBIOS_polarity(int idx) |
3274 | { |
3275 | int bus = mp_irqs[idx].mpc_srcbus; |
3276 | @@ -612,38 +551,11 @@ |
3277 | switch (mp_irqs[idx].mpc_irqflag & 3) |
3278 | { |
3279 | case 0: /* conforms, ie. bus-type dependent polarity */ |
3280 | - { |
3281 | - switch (mp_bus_id_to_type[bus]) |
3282 | - { |
3283 | - case MP_BUS_ISA: /* ISA pin */ |
3284 | - { |
3285 | - polarity = default_ISA_polarity(idx); |
3286 | - break; |
3287 | - } |
3288 | - case MP_BUS_EISA: /* EISA pin */ |
3289 | - { |
3290 | - polarity = default_EISA_polarity(idx); |
3291 | - break; |
3292 | - } |
3293 | - case MP_BUS_PCI: /* PCI pin */ |
3294 | - { |
3295 | - polarity = default_PCI_polarity(idx); |
3296 | - break; |
3297 | - } |
3298 | - case MP_BUS_MCA: /* MCA pin */ |
3299 | - { |
3300 | - polarity = default_MCA_polarity(idx); |
3301 | - break; |
3302 | - } |
3303 | - default: |
3304 | - { |
3305 | - printk(KERN_WARNING "broken BIOS!!\n"); |
3306 | - polarity = 1; |
3307 | - break; |
3308 | - } |
3309 | - } |
3310 | + if (test_bit(bus, mp_bus_not_pci)) |
3311 | + polarity = default_ISA_polarity(idx); |
3312 | + else |
3313 | + polarity = default_PCI_polarity(idx); |
3314 | break; |
3315 | - } |
3316 | case 1: /* high active */ |
3317 | { |
3318 | polarity = 0; |
3319 | @@ -681,38 +593,11 @@ |
3320 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) |
3321 | { |
3322 | case 0: /* conforms, ie. bus-type dependent */ |
3323 | - { |
3324 | - switch (mp_bus_id_to_type[bus]) |
3325 | - { |
3326 | - case MP_BUS_ISA: /* ISA pin */ |
3327 | - { |
3328 | - trigger = default_ISA_trigger(idx); |
3329 | - break; |
3330 | - } |
3331 | - case MP_BUS_EISA: /* EISA pin */ |
3332 | - { |
3333 | - trigger = default_EISA_trigger(idx); |
3334 | - break; |
3335 | - } |
3336 | - case MP_BUS_PCI: /* PCI pin */ |
3337 | - { |
3338 | - trigger = default_PCI_trigger(idx); |
3339 | - break; |
3340 | - } |
3341 | - case MP_BUS_MCA: /* MCA pin */ |
3342 | - { |
3343 | - trigger = default_MCA_trigger(idx); |
3344 | - break; |
3345 | - } |
3346 | - default: |
3347 | - { |
3348 | - printk(KERN_WARNING "broken BIOS!!\n"); |
3349 | - trigger = 1; |
3350 | - break; |
3351 | - } |
3352 | - } |
3353 | + if (test_bit(bus, mp_bus_not_pci)) |
3354 | + trigger = default_ISA_trigger(idx); |
3355 | + else |
3356 | + trigger = default_PCI_trigger(idx); |
3357 | break; |
3358 | - } |
3359 | case 1: /* edge */ |
3360 | { |
3361 | trigger = 0; |
3362 | @@ -749,64 +634,6 @@ |
3363 | return MPBIOS_trigger(idx); |
3364 | } |
3365 | |
3366 | -static int next_irq = 16; |
3367 | - |
3368 | -/* |
3369 | - * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ |
3370 | - * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number |
3371 | - * from ACPI, which can reach 800 in large boxen. |
3372 | - * |
3373 | - * Compact the sparse GSI space into a sequential IRQ series and reuse |
3374 | - * vectors if possible. |
3375 | - */ |
3376 | -int gsi_irq_sharing(int gsi) |
3377 | -{ |
3378 | - int i, tries, vector; |
3379 | - |
3380 | - BUG_ON(gsi >= NR_IRQ_VECTORS); |
3381 | - |
3382 | - if (platform_legacy_irq(gsi)) |
3383 | - return gsi; |
3384 | - |
3385 | - if (gsi_2_irq[gsi] != 0xFF) |
3386 | - return (int)gsi_2_irq[gsi]; |
3387 | - |
3388 | - tries = NR_IRQS; |
3389 | - try_again: |
3390 | - vector = assign_irq_vector(gsi); |
3391 | - |
3392 | - /* |
3393 | - * Sharing vectors means sharing IRQs, so scan irq_vectors for previous |
3394 | - * use of vector and if found, return that IRQ. However, we never want |
3395 | - * to share legacy IRQs, which usually have a different trigger mode |
3396 | - * than PCI. |
3397 | - */ |
3398 | - for (i = 0; i < NR_IRQS; i++) |
3399 | - if (IO_APIC_VECTOR(i) == vector) |
3400 | - break; |
3401 | - if (platform_legacy_irq(i)) { |
3402 | - if (--tries >= 0) { |
3403 | - IO_APIC_VECTOR(i) = 0; |
3404 | - goto try_again; |
3405 | - } |
3406 | - panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi); |
3407 | - } |
3408 | - if (i < NR_IRQS) { |
3409 | - gsi_2_irq[gsi] = i; |
3410 | - printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n", |
3411 | - gsi, vector, i); |
3412 | - return i; |
3413 | - } |
3414 | - |
3415 | - i = next_irq++; |
3416 | - BUG_ON(i >= NR_IRQS); |
3417 | - gsi_2_irq[gsi] = i; |
3418 | - IO_APIC_VECTOR(i) = vector; |
3419 | - printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n", |
3420 | - gsi, vector, i); |
3421 | - return i; |
3422 | -} |
3423 | - |
3424 | static int pin_2_irq(int idx, int apic, int pin) |
3425 | { |
3426 | int irq, i; |
3427 | @@ -818,49 +645,16 @@ |
3428 | if (mp_irqs[idx].mpc_dstirq != pin) |
3429 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); |
3430 | |
3431 | - switch (mp_bus_id_to_type[bus]) |
3432 | - { |
3433 | - case MP_BUS_ISA: /* ISA pin */ |
3434 | - case MP_BUS_EISA: |
3435 | - case MP_BUS_MCA: |
3436 | - { |
3437 | - irq = mp_irqs[idx].mpc_srcbusirq; |
3438 | - break; |
3439 | - } |
3440 | - case MP_BUS_PCI: /* PCI pin */ |
3441 | - { |
3442 | - /* |
3443 | - * PCI IRQs are mapped in order |
3444 | - */ |
3445 | - i = irq = 0; |
3446 | - while (i < apic) |
3447 | - irq += nr_ioapic_registers[i++]; |
3448 | - irq += pin; |
3449 | - irq = gsi_irq_sharing(irq); |
3450 | - break; |
3451 | - } |
3452 | - default: |
3453 | - { |
3454 | - printk(KERN_ERR "unknown bus type %d.\n",bus); |
3455 | - irq = 0; |
3456 | - break; |
3457 | - } |
3458 | - } |
3459 | - BUG_ON(irq >= NR_IRQS); |
3460 | - |
3461 | - /* |
3462 | - * PCI IRQ command line redirection. Yes, limits are hardcoded. |
3463 | - */ |
3464 | - if ((pin >= 16) && (pin <= 23)) { |
3465 | - if (pirq_entries[pin-16] != -1) { |
3466 | - if (!pirq_entries[pin-16]) { |
3467 | - apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16); |
3468 | - } else { |
3469 | - irq = pirq_entries[pin-16]; |
3470 | - apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n", |
3471 | - pin-16, irq); |
3472 | - } |
3473 | - } |
3474 | + if (test_bit(bus, mp_bus_not_pci)) { |
3475 | + irq = mp_irqs[idx].mpc_srcbusirq; |
3476 | + } else { |
3477 | + /* |
3478 | + * PCI IRQs are mapped in order |
3479 | + */ |
3480 | + i = irq = 0; |
3481 | + while (i < apic) |
3482 | + irq += nr_ioapic_registers[i++]; |
3483 | + irq += pin; |
3484 | } |
3485 | BUG_ON(irq >= NR_IRQS); |
3486 | return irq; |
3487 | @@ -884,43 +678,68 @@ |
3488 | } |
3489 | |
3490 | /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ |
3491 | -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; |
3492 | +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; |
3493 | |
3494 | -int assign_irq_vector(int irq) |
3495 | +static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) |
3496 | { |
3497 | - unsigned long flags; |
3498 | int vector; |
3499 | struct physdev_irq irq_op; |
3500 | |
3501 | - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); |
3502 | + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); |
3503 | |
3504 | - spin_lock_irqsave(&vector_lock, flags); |
3505 | + cpus_and(*result, mask, cpu_online_map); |
3506 | |
3507 | - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { |
3508 | - spin_unlock_irqrestore(&vector_lock, flags); |
3509 | - return IO_APIC_VECTOR(irq); |
3510 | - } |
3511 | + if (irq_vector[irq] > 0) |
3512 | + return irq_vector[irq]; |
3513 | |
3514 | irq_op.irq = irq; |
3515 | - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { |
3516 | - spin_unlock_irqrestore(&vector_lock, flags); |
3517 | + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) |
3518 | return -ENOSPC; |
3519 | - } |
3520 | |
3521 | vector = irq_op.vector; |
3522 | - vector_irq[vector] = irq; |
3523 | - if (irq != AUTO_ASSIGN) |
3524 | - IO_APIC_VECTOR(irq) = vector; |
3525 | + irq_vector[irq] = vector; |
3526 | |
3527 | - spin_unlock_irqrestore(&vector_lock, flags); |
3528 | + return vector; |
3529 | +} |
3530 | |
3531 | +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) |
3532 | +{ |
3533 | + int vector; |
3534 | + unsigned long flags; |
3535 | + |
3536 | + spin_lock_irqsave(&vector_lock, flags); |
3537 | + vector = __assign_irq_vector(irq, mask, result); |
3538 | + spin_unlock_irqrestore(&vector_lock, flags); |
3539 | return vector; |
3540 | } |
3541 | |
3542 | -extern void (*interrupt[NR_IRQS])(void); |
3543 | #ifndef CONFIG_XEN |
3544 | -static struct hw_interrupt_type ioapic_level_type; |
3545 | -static struct hw_interrupt_type ioapic_edge_type; |
3546 | +void __setup_vector_irq(int cpu) |
3547 | +{ |
3548 | + /* Initialize vector_irq on a new cpu */ |
3549 | + /* This function must be called with vector_lock held */ |
3550 | + int irq, vector; |
3551 | + |
3552 | + /* Mark the inuse vectors */ |
3553 | + for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) { |
3554 | + if (!cpu_isset(cpu, irq_domain[irq])) |
3555 | + continue; |
3556 | + vector = irq_vector[irq]; |
3557 | + per_cpu(vector_irq, cpu)[vector] = irq; |
3558 | + } |
3559 | + /* Mark the free vectors */ |
3560 | + for (vector = 0; vector < NR_VECTORS; ++vector) { |
3561 | + irq = per_cpu(vector_irq, cpu)[vector]; |
3562 | + if (irq < 0) |
3563 | + continue; |
3564 | + if (!cpu_isset(cpu, irq_domain[irq])) |
3565 | + per_cpu(vector_irq, cpu)[vector] = -1; |
3566 | + } |
3567 | +} |
3568 | + |
3569 | +extern void (*interrupt[NR_IRQS])(void); |
3570 | + |
3571 | +static struct irq_chip ioapic_chip; |
3572 | |
3573 | #define IOAPIC_AUTO -1 |
3574 | #define IOAPIC_EDGE 0 |
3575 | @@ -928,16 +747,15 @@ |
3576 | |
3577 | static void ioapic_register_intr(int irq, int vector, unsigned long trigger) |
3578 | { |
3579 | - unsigned idx; |
3580 | - |
3581 | - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; |
3582 | - |
3583 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
3584 | trigger == IOAPIC_LEVEL) |
3585 | - irq_desc[idx].chip = &ioapic_level_type; |
3586 | - else |
3587 | - irq_desc[idx].chip = &ioapic_edge_type; |
3588 | - set_intr_gate(vector, interrupt[idx]); |
3589 | + set_irq_chip_and_handler_name(irq, &ioapic_chip, |
3590 | + handle_fasteoi_irq, "fasteoi"); |
3591 | + else { |
3592 | + irq_desc[irq].status |= IRQ_DELAYED_DISABLE; |
3593 | + set_irq_chip_and_handler_name(irq, &ioapic_chip, |
3594 | + handle_edge_irq, "edge"); |
3595 | + } |
3596 | } |
3597 | #else |
3598 | #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) |
3599 | @@ -990,16 +808,21 @@ |
3600 | continue; |
3601 | |
3602 | if (IO_APIC_IRQ(irq)) { |
3603 | - vector = assign_irq_vector(irq); |
3604 | + cpumask_t mask; |
3605 | + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); |
3606 | + if (vector < 0) |
3607 | + continue; |
3608 | + |
3609 | + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); |
3610 | entry.vector = vector; |
3611 | |
3612 | ioapic_register_intr(irq, vector, IOAPIC_AUTO); |
3613 | if (!apic && (irq < 16)) |
3614 | disable_8259A_irq(irq); |
3615 | } |
3616 | + ioapic_write_entry(apic, pin, entry); |
3617 | + |
3618 | spin_lock_irqsave(&ioapic_lock, flags); |
3619 | - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); |
3620 | - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); |
3621 | set_native_irq_info(irq, TARGET_CPUS); |
3622 | spin_unlock_irqrestore(&ioapic_lock, flags); |
3623 | } |
3624 | @@ -1042,7 +865,7 @@ |
3625 | * The timer IRQ doesn't have to know that behind the |
3626 | * scene we have a 8259A-master in AEOI mode ... |
3627 | */ |
3628 | - irq_desc[0].chip = &ioapic_edge_type; |
3629 | + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); |
3630 | |
3631 | /* |
3632 | * Add it to the IO-APIC irq-routing table: |
3633 | @@ -1138,10 +961,7 @@ |
3634 | for (i = 0; i <= reg_01.bits.entries; i++) { |
3635 | struct IO_APIC_route_entry entry; |
3636 | |
3637 | - spin_lock_irqsave(&ioapic_lock, flags); |
3638 | - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); |
3639 | - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); |
3640 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
3641 | + entry = ioapic_read_entry(apic, i); |
3642 | |
3643 | printk(KERN_DEBUG " %02x %03X %02X ", |
3644 | i, |
3645 | @@ -1161,17 +981,12 @@ |
3646 | ); |
3647 | } |
3648 | } |
3649 | - if (use_pci_vector()) |
3650 | - printk(KERN_INFO "Using vector-based indexing\n"); |
3651 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
3652 | for (i = 0; i < NR_IRQS; i++) { |
3653 | struct irq_pin_list *entry = irq_2_pin + i; |
3654 | if (entry->pin < 0) |
3655 | continue; |
3656 | - if (use_pci_vector() && !platform_legacy_irq(i)) |
3657 | - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); |
3658 | - else |
3659 | - printk(KERN_DEBUG "IRQ%d ", i); |
3660 | + printk(KERN_DEBUG "IRQ%d ", i); |
3661 | for (;;) { |
3662 | printk("-> %d:%d", entry->apic, entry->pin); |
3663 | if (!entry->next) |
3664 | @@ -1335,9 +1150,6 @@ |
3665 | irq_2_pin[i].pin = -1; |
3666 | irq_2_pin[i].next = 0; |
3667 | } |
3668 | - if (!pirqs_enabled) |
3669 | - for (i = 0; i < MAX_PIRQS; i++) |
3670 | - pirq_entries[i] = -1; |
3671 | |
3672 | /* |
3673 | * The number of IO-APIC IRQ registers (== #pins): |
3674 | @@ -1354,11 +1166,7 @@ |
3675 | /* See if any of the pins is in ExtINT mode */ |
3676 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
3677 | struct IO_APIC_route_entry entry; |
3678 | - spin_lock_irqsave(&ioapic_lock, flags); |
3679 | - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); |
3680 | - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); |
3681 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
3682 | - |
3683 | + entry = ioapic_read_entry(apic, pin); |
3684 | |
3685 | /* If the interrupt line is enabled and in ExtInt mode |
3686 | * I have found the pin where the i8259 is connected. |
3687 | @@ -1412,7 +1220,6 @@ |
3688 | */ |
3689 | if (ioapic_i8259.pin != -1) { |
3690 | struct IO_APIC_route_entry entry; |
3691 | - unsigned long flags; |
3692 | |
3693 | memset(&entry, 0, sizeof(entry)); |
3694 | entry.mask = 0; /* Enabled */ |
3695 | @@ -1429,12 +1236,7 @@ |
3696 | /* |
3697 | * Add it to the IO-APIC irq-routing table: |
3698 | */ |
3699 | - spin_lock_irqsave(&ioapic_lock, flags); |
3700 | - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, |
3701 | - *(((int *)&entry)+1)); |
3702 | - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, |
3703 | - *(((int *)&entry)+0)); |
3704 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
3705 | + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); |
3706 | } |
3707 | |
3708 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); |
3709 | @@ -1442,76 +1244,6 @@ |
3710 | } |
3711 | |
3712 | /* |
3713 | - * function to set the IO-APIC physical IDs based on the |
3714 | - * values stored in the MPC table. |
3715 | - * |
3716 | - * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 |
3717 | - */ |
3718 | - |
3719 | -#ifndef CONFIG_XEN |
3720 | -static void __init setup_ioapic_ids_from_mpc (void) |
3721 | -{ |
3722 | - union IO_APIC_reg_00 reg_00; |
3723 | - int apic; |
3724 | - int i; |
3725 | - unsigned char old_id; |
3726 | - unsigned long flags; |
3727 | - |
3728 | - /* |
3729 | - * Set the IOAPIC ID to the value stored in the MPC table. |
3730 | - */ |
3731 | - for (apic = 0; apic < nr_ioapics; apic++) { |
3732 | - |
3733 | - /* Read the register 0 value */ |
3734 | - spin_lock_irqsave(&ioapic_lock, flags); |
3735 | - reg_00.raw = io_apic_read(apic, 0); |
3736 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
3737 | - |
3738 | - old_id = mp_ioapics[apic].mpc_apicid; |
3739 | - |
3740 | - |
3741 | - printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid); |
3742 | - |
3743 | - |
3744 | - /* |
3745 | - * We need to adjust the IRQ routing table |
3746 | - * if the ID changed. |
3747 | - */ |
3748 | - if (old_id != mp_ioapics[apic].mpc_apicid) |
3749 | - for (i = 0; i < mp_irq_entries; i++) |
3750 | - if (mp_irqs[i].mpc_dstapic == old_id) |
3751 | - mp_irqs[i].mpc_dstapic |
3752 | - = mp_ioapics[apic].mpc_apicid; |
3753 | - |
3754 | - /* |
3755 | - * Read the right value from the MPC table and |
3756 | - * write it into the ID register. |
3757 | - */ |
3758 | - apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", |
3759 | - mp_ioapics[apic].mpc_apicid); |
3760 | - |
3761 | - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; |
3762 | - spin_lock_irqsave(&ioapic_lock, flags); |
3763 | - io_apic_write(apic, 0, reg_00.raw); |
3764 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
3765 | - |
3766 | - /* |
3767 | - * Sanity check |
3768 | - */ |
3769 | - spin_lock_irqsave(&ioapic_lock, flags); |
3770 | - reg_00.raw = io_apic_read(apic, 0); |
3771 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
3772 | - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) |
3773 | - printk("could not set ID!\n"); |
3774 | - else |
3775 | - apic_printk(APIC_VERBOSE," ok.\n"); |
3776 | - } |
3777 | -} |
3778 | -#else |
3779 | -static void __init setup_ioapic_ids_from_mpc(void) { } |
3780 | -#endif |
3781 | - |
3782 | -/* |
3783 | * There is a nasty bug in some older SMP boards, their mptable lies |
3784 | * about the timer IRQ. We do the following to work around the situation: |
3785 | * |
3786 | @@ -1565,7 +1297,7 @@ |
3787 | * an edge even if it isn't on the 8259A... |
3788 | */ |
3789 | |
3790 | -static unsigned int startup_edge_ioapic_irq(unsigned int irq) |
3791 | +static unsigned int startup_ioapic_irq(unsigned int irq) |
3792 | { |
3793 | int was_pending = 0; |
3794 | unsigned long flags; |
3795 | @@ -1582,107 +1314,19 @@ |
3796 | return was_pending; |
3797 | } |
3798 | |
3799 | -/* |
3800 | - * Once we have recorded IRQ_PENDING already, we can mask the |
3801 | - * interrupt for real. This prevents IRQ storms from unhandled |
3802 | - * devices. |
3803 | - */ |
3804 | -static void ack_edge_ioapic_irq(unsigned int irq) |
3805 | -{ |
3806 | - move_irq(irq); |
3807 | - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) |
3808 | - == (IRQ_PENDING | IRQ_DISABLED)) |
3809 | - mask_IO_APIC_irq(irq); |
3810 | - ack_APIC_irq(); |
3811 | -} |
3812 | - |
3813 | -/* |
3814 | - * Level triggered interrupts can just be masked, |
3815 | - * and shutting down and starting up the interrupt |
3816 | - * is the same as enabling and disabling them -- except |
3817 | - * with a startup need to return a "was pending" value. |
3818 | - * |
3819 | - * Level triggered interrupts are special because we |
3820 | - * do not touch any IO-APIC register while handling |
3821 | - * them. We ack the APIC in the end-IRQ handler, not |
3822 | - * in the start-IRQ-handler. Protection against reentrance |
3823 | - * from the same interrupt is still provided, both by the |
3824 | - * generic IRQ layer and by the fact that an unacked local |
3825 | - * APIC does not accept IRQs. |
3826 | - */ |
3827 | -static unsigned int startup_level_ioapic_irq (unsigned int irq) |
3828 | -{ |
3829 | - unmask_IO_APIC_irq(irq); |
3830 | - |
3831 | - return 0; /* don't check for pending */ |
3832 | -} |
3833 | - |
3834 | -static void end_level_ioapic_irq (unsigned int irq) |
3835 | -{ |
3836 | - move_irq(irq); |
3837 | - ack_APIC_irq(); |
3838 | -} |
3839 | - |
3840 | -#ifdef CONFIG_PCI_MSI |
3841 | -static unsigned int startup_edge_ioapic_vector(unsigned int vector) |
3842 | -{ |
3843 | - int irq = vector_to_irq(vector); |
3844 | - |
3845 | - return startup_edge_ioapic_irq(irq); |
3846 | -} |
3847 | - |
3848 | -static void ack_edge_ioapic_vector(unsigned int vector) |
3849 | -{ |
3850 | - int irq = vector_to_irq(vector); |
3851 | - |
3852 | - move_native_irq(vector); |
3853 | - ack_edge_ioapic_irq(irq); |
3854 | -} |
3855 | - |
3856 | -static unsigned int startup_level_ioapic_vector (unsigned int vector) |
3857 | -{ |
3858 | - int irq = vector_to_irq(vector); |
3859 | - |
3860 | - return startup_level_ioapic_irq (irq); |
3861 | -} |
3862 | - |
3863 | -static void end_level_ioapic_vector (unsigned int vector) |
3864 | -{ |
3865 | - int irq = vector_to_irq(vector); |
3866 | - |
3867 | - move_native_irq(vector); |
3868 | - end_level_ioapic_irq(irq); |
3869 | -} |
3870 | - |
3871 | -static void mask_IO_APIC_vector (unsigned int vector) |
3872 | -{ |
3873 | - int irq = vector_to_irq(vector); |
3874 | - |
3875 | - mask_IO_APIC_irq(irq); |
3876 | -} |
3877 | - |
3878 | -static void unmask_IO_APIC_vector (unsigned int vector) |
3879 | -{ |
3880 | - int irq = vector_to_irq(vector); |
3881 | - |
3882 | - unmask_IO_APIC_irq(irq); |
3883 | -} |
3884 | - |
3885 | -#ifdef CONFIG_SMP |
3886 | -static void set_ioapic_affinity_vector (unsigned int vector, |
3887 | - cpumask_t cpu_mask) |
3888 | +static int ioapic_retrigger_irq(unsigned int irq) |
3889 | { |
3890 | - int irq = vector_to_irq(vector); |
3891 | + cpumask_t mask; |
3892 | + unsigned vector; |
3893 | + unsigned long flags; |
3894 | |
3895 | - set_native_irq_info(vector, cpu_mask); |
3896 | - set_ioapic_affinity_irq(irq, cpu_mask); |
3897 | -} |
3898 | -#endif // CONFIG_SMP |
3899 | -#endif // CONFIG_PCI_MSI |
3900 | + spin_lock_irqsave(&vector_lock, flags); |
3901 | + vector = irq_vector[irq]; |
3902 | + cpus_clear(mask); |
3903 | + cpu_set(first_cpu(irq_domain[irq]), mask); |
3904 | |
3905 | -static int ioapic_retrigger(unsigned int irq) |
3906 | -{ |
3907 | - send_IPI_self(IO_APIC_VECTOR(irq)); |
3908 | + send_IPI_mask(mask, vector); |
3909 | + spin_unlock_irqrestore(&vector_lock, flags); |
3910 | |
3911 | return 1; |
3912 | } |
3913 | @@ -1696,32 +1340,47 @@ |
3914 | * races. |
3915 | */ |
3916 | |
3917 | -static struct hw_interrupt_type ioapic_edge_type __read_mostly = { |
3918 | - .typename = "IO-APIC-edge", |
3919 | - .startup = startup_edge_ioapic, |
3920 | - .shutdown = shutdown_edge_ioapic, |
3921 | - .enable = enable_edge_ioapic, |
3922 | - .disable = disable_edge_ioapic, |
3923 | - .ack = ack_edge_ioapic, |
3924 | - .end = end_edge_ioapic, |
3925 | -#ifdef CONFIG_SMP |
3926 | - .set_affinity = set_ioapic_affinity, |
3927 | +static void ack_apic_edge(unsigned int irq) |
3928 | +{ |
3929 | + move_native_irq(irq); |
3930 | + ack_APIC_irq(); |
3931 | +} |
3932 | + |
3933 | +static void ack_apic_level(unsigned int irq) |
3934 | +{ |
3935 | + int do_unmask_irq = 0; |
3936 | + |
3937 | +#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) |
3938 | + /* If we are moving the irq we need to mask it */ |
3939 | + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { |
3940 | + do_unmask_irq = 1; |
3941 | + mask_IO_APIC_irq(irq); |
3942 | + } |
3943 | #endif |
3944 | - .retrigger = ioapic_retrigger, |
3945 | -}; |
3946 | |
3947 | -static struct hw_interrupt_type ioapic_level_type __read_mostly = { |
3948 | - .typename = "IO-APIC-level", |
3949 | - .startup = startup_level_ioapic, |
3950 | - .shutdown = shutdown_level_ioapic, |
3951 | - .enable = enable_level_ioapic, |
3952 | - .disable = disable_level_ioapic, |
3953 | - .ack = mask_and_ack_level_ioapic, |
3954 | - .end = end_level_ioapic, |
3955 | + /* |
3956 | + * We must acknowledge the irq before we move it or the acknowledge will |
3957 | + * not propogate properly. |
3958 | + */ |
3959 | + ack_APIC_irq(); |
3960 | + |
3961 | + /* Now we can move and renable the irq */ |
3962 | + move_masked_irq(irq); |
3963 | + if (unlikely(do_unmask_irq)) |
3964 | + unmask_IO_APIC_irq(irq); |
3965 | +} |
3966 | + |
3967 | +static struct irq_chip ioapic_chip __read_mostly = { |
3968 | + .name = "IO-APIC", |
3969 | + .startup = startup_ioapic_irq, |
3970 | + .mask = mask_IO_APIC_irq, |
3971 | + .unmask = unmask_IO_APIC_irq, |
3972 | + .ack = ack_apic_edge, |
3973 | + .eoi = ack_apic_level, |
3974 | #ifdef CONFIG_SMP |
3975 | - .set_affinity = set_ioapic_affinity, |
3976 | + .set_affinity = set_ioapic_affinity_irq, |
3977 | #endif |
3978 | - .retrigger = ioapic_retrigger, |
3979 | + .retrigger = ioapic_retrigger_irq, |
3980 | }; |
3981 | #endif /* !CONFIG_XEN */ |
3982 | |
3983 | @@ -1742,12 +1401,7 @@ |
3984 | */ |
3985 | for (irq = 0; irq < NR_IRQS ; irq++) { |
3986 | int tmp = irq; |
3987 | - if (use_pci_vector()) { |
3988 | - if (!platform_legacy_irq(tmp)) |
3989 | - if ((tmp = vector_to_irq(tmp)) == -1) |
3990 | - continue; |
3991 | - } |
3992 | - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { |
3993 | + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { |
3994 | /* |
3995 | * Hmm.. We don't have an entry for this, |
3996 | * so default to an old-fashioned 8259 |
3997 | @@ -1758,7 +1412,7 @@ |
3998 | #ifndef CONFIG_XEN |
3999 | else |
4000 | /* Strange. Oh, well.. */ |
4001 | - irq_desc[irq].chip = &no_irq_type; |
4002 | + irq_desc[irq].chip = &no_irq_chip; |
4003 | #endif |
4004 | } |
4005 | } |
4006 | @@ -1879,8 +1533,6 @@ |
4007 | spin_unlock_irqrestore(&ioapic_lock, flags); |
4008 | } |
4009 | |
4010 | -int timer_uses_ioapic_pin_0; |
4011 | - |
4012 | /* |
4013 | * This code may look a bit paranoid, but it's supposed to cooperate with |
4014 | * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ |
4015 | @@ -1893,13 +1545,13 @@ |
4016 | { |
4017 | int apic1, pin1, apic2, pin2; |
4018 | int vector; |
4019 | + cpumask_t mask; |
4020 | |
4021 | /* |
4022 | * get/set the timer IRQ vector: |
4023 | */ |
4024 | disable_8259A_irq(0); |
4025 | - vector = assign_irq_vector(0); |
4026 | - set_intr_gate(vector, interrupt[0]); |
4027 | + vector = assign_irq_vector(0, TARGET_CPUS, &mask); |
4028 | |
4029 | /* |
4030 | * Subtle, code in do_timer_interrupt() expects an AEOI |
4031 | @@ -1918,9 +1570,6 @@ |
4032 | pin2 = ioapic_i8259.pin; |
4033 | apic2 = ioapic_i8259.apic; |
4034 | |
4035 | - if (pin1 == 0) |
4036 | - timer_uses_ioapic_pin_0 = 1; |
4037 | - |
4038 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", |
4039 | vector, apic1, pin1, apic2, pin2); |
4040 | |
4041 | @@ -2035,11 +1684,6 @@ |
4042 | |
4043 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
4044 | |
4045 | - /* |
4046 | - * Set up the IO-APIC IRQ routing table. |
4047 | - */ |
4048 | - if (!acpi_ioapic) |
4049 | - setup_ioapic_ids_from_mpc(); |
4050 | #ifndef CONFIG_XEN |
4051 | sync_Arb_IDs(); |
4052 | #endif /* !CONFIG_XEN */ |
4053 | @@ -2060,17 +1704,12 @@ |
4054 | { |
4055 | struct IO_APIC_route_entry *entry; |
4056 | struct sysfs_ioapic_data *data; |
4057 | - unsigned long flags; |
4058 | int i; |
4059 | |
4060 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
4061 | entry = data->entry; |
4062 | - spin_lock_irqsave(&ioapic_lock, flags); |
4063 | - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { |
4064 | - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); |
4065 | - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); |
4066 | - } |
4067 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
4068 | + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) |
4069 | + *entry = ioapic_read_entry(dev->id, i); |
4070 | |
4071 | return 0; |
4072 | } |
4073 | @@ -2092,11 +1731,9 @@ |
4074 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; |
4075 | io_apic_write(dev->id, 0, reg_00.raw); |
4076 | } |
4077 | - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { |
4078 | - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); |
4079 | - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); |
4080 | - } |
4081 | spin_unlock_irqrestore(&ioapic_lock, flags); |
4082 | + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) |
4083 | + ioapic_write_entry(dev->id, i, entry[i]); |
4084 | |
4085 | return 0; |
4086 | } |
4087 | @@ -2142,26 +1779,254 @@ |
4088 | |
4089 | device_initcall(ioapic_init_sysfs); |
4090 | |
4091 | -/* -------------------------------------------------------------------------- |
4092 | - ACPI-based IOAPIC Configuration |
4093 | - -------------------------------------------------------------------------- */ |
4094 | +#ifndef CONFIG_XEN |
4095 | +/* |
4096 | + * Dynamic irq allocate and deallocation |
4097 | + */ |
4098 | +int create_irq(void) |
4099 | +{ |
4100 | + /* Allocate an unused irq */ |
4101 | + int irq; |
4102 | + int new; |
4103 | + int vector = 0; |
4104 | + unsigned long flags; |
4105 | + cpumask_t mask; |
4106 | |
4107 | -#ifdef CONFIG_ACPI |
4108 | + irq = -ENOSPC; |
4109 | + spin_lock_irqsave(&vector_lock, flags); |
4110 | + for (new = (NR_IRQS - 1); new >= 0; new--) { |
4111 | + if (platform_legacy_irq(new)) |
4112 | + continue; |
4113 | + if (irq_vector[new] != 0) |
4114 | + continue; |
4115 | + vector = __assign_irq_vector(new, TARGET_CPUS, &mask); |
4116 | + if (likely(vector > 0)) |
4117 | + irq = new; |
4118 | + break; |
4119 | + } |
4120 | + spin_unlock_irqrestore(&vector_lock, flags); |
4121 | |
4122 | -#define IO_APIC_MAX_ID 0xFE |
4123 | + if (irq >= 0) { |
4124 | + dynamic_irq_init(irq); |
4125 | + } |
4126 | + return irq; |
4127 | +} |
4128 | |
4129 | -int __init io_apic_get_version (int ioapic) |
4130 | +void destroy_irq(unsigned int irq) |
4131 | { |
4132 | - union IO_APIC_reg_01 reg_01; |
4133 | unsigned long flags; |
4134 | |
4135 | - spin_lock_irqsave(&ioapic_lock, flags); |
4136 | - reg_01.raw = io_apic_read(ioapic, 1); |
4137 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
4138 | + dynamic_irq_cleanup(irq); |
4139 | + |
4140 | + spin_lock_irqsave(&vector_lock, flags); |
4141 | + irq_vector[irq] = 0; |
4142 | + spin_unlock_irqrestore(&vector_lock, flags); |
4143 | +} |
4144 | +#endif |
4145 | + |
4146 | +/* |
4147 | + * MSI mesage composition |
4148 | + */ |
4149 | +#ifdef CONFIG_PCI_MSI |
4150 | +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) |
4151 | +{ |
4152 | + int vector; |
4153 | + unsigned dest; |
4154 | + cpumask_t tmp; |
4155 | + |
4156 | + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); |
4157 | + if (vector >= 0) { |
4158 | + dest = cpu_mask_to_apicid(tmp); |
4159 | + |
4160 | + msg->address_hi = MSI_ADDR_BASE_HI; |
4161 | + msg->address_lo = |
4162 | + MSI_ADDR_BASE_LO | |
4163 | + ((INT_DEST_MODE == 0) ? |
4164 | + MSI_ADDR_DEST_MODE_PHYSICAL: |
4165 | + MSI_ADDR_DEST_MODE_LOGICAL) | |
4166 | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
4167 | + MSI_ADDR_REDIRECTION_CPU: |
4168 | + MSI_ADDR_REDIRECTION_LOWPRI) | |
4169 | + MSI_ADDR_DEST_ID(dest); |
4170 | + |
4171 | + msg->data = |
4172 | + MSI_DATA_TRIGGER_EDGE | |
4173 | + MSI_DATA_LEVEL_ASSERT | |
4174 | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
4175 | + MSI_DATA_DELIVERY_FIXED: |
4176 | + MSI_DATA_DELIVERY_LOWPRI) | |
4177 | + MSI_DATA_VECTOR(vector); |
4178 | + } |
4179 | + return vector; |
4180 | +} |
4181 | + |
4182 | +#ifdef CONFIG_SMP |
4183 | +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) |
4184 | +{ |
4185 | + struct msi_msg msg; |
4186 | + unsigned int dest; |
4187 | + cpumask_t tmp; |
4188 | + int vector; |
4189 | + |
4190 | + cpus_and(tmp, mask, cpu_online_map); |
4191 | + if (cpus_empty(tmp)) |
4192 | + tmp = TARGET_CPUS; |
4193 | + |
4194 | + cpus_and(mask, tmp, CPU_MASK_ALL); |
4195 | + |
4196 | + vector = assign_irq_vector(irq, mask, &tmp); |
4197 | + if (vector < 0) |
4198 | + return; |
4199 | + |
4200 | + dest = cpu_mask_to_apicid(tmp); |
4201 | + |
4202 | + read_msi_msg(irq, &msg); |
4203 | + |
4204 | + msg.data &= ~MSI_DATA_VECTOR_MASK; |
4205 | + msg.data |= MSI_DATA_VECTOR(vector); |
4206 | + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
4207 | + msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
4208 | + |
4209 | + write_msi_msg(irq, &msg); |
4210 | + set_native_irq_info(irq, mask); |
4211 | +} |
4212 | +#endif /* CONFIG_SMP */ |
4213 | + |
4214 | +/* |
4215 | + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, |
4216 | + * which implement the MSI or MSI-X Capability Structure. |
4217 | + */ |
4218 | +static struct irq_chip msi_chip = { |
4219 | + .name = "PCI-MSI", |
4220 | + .unmask = unmask_msi_irq, |
4221 | + .mask = mask_msi_irq, |
4222 | + .ack = ack_apic_edge, |
4223 | +#ifdef CONFIG_SMP |
4224 | + .set_affinity = set_msi_irq_affinity, |
4225 | +#endif |
4226 | + .retrigger = ioapic_retrigger_irq, |
4227 | +}; |
4228 | + |
4229 | +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) |
4230 | +{ |
4231 | + struct msi_msg msg; |
4232 | + int ret; |
4233 | + ret = msi_compose_msg(dev, irq, &msg); |
4234 | + if (ret < 0) |
4235 | + return ret; |
4236 | + |
4237 | + write_msi_msg(irq, &msg); |
4238 | + |
4239 | + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); |
4240 | + |
4241 | + return 0; |
4242 | +} |
4243 | + |
4244 | +void arch_teardown_msi_irq(unsigned int irq) |
4245 | +{ |
4246 | + return; |
4247 | +} |
4248 | + |
4249 | +#endif /* CONFIG_PCI_MSI */ |
4250 | + |
4251 | +/* |
4252 | + * Hypertransport interrupt support |
4253 | + */ |
4254 | +#ifdef CONFIG_HT_IRQ |
4255 | + |
4256 | +#ifdef CONFIG_SMP |
4257 | + |
4258 | +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) |
4259 | +{ |
4260 | + struct ht_irq_msg msg; |
4261 | + fetch_ht_irq_msg(irq, &msg); |
4262 | + |
4263 | + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); |
4264 | + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); |
4265 | |
4266 | - return reg_01.bits.version; |
4267 | + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); |
4268 | + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); |
4269 | + |
4270 | + write_ht_irq_msg(irq, &msg); |
4271 | } |
4272 | |
4273 | +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) |
4274 | +{ |
4275 | + unsigned int dest; |
4276 | + cpumask_t tmp; |
4277 | + int vector; |
4278 | + |
4279 | + cpus_and(tmp, mask, cpu_online_map); |
4280 | + if (cpus_empty(tmp)) |
4281 | + tmp = TARGET_CPUS; |
4282 | + |
4283 | + cpus_and(mask, tmp, CPU_MASK_ALL); |
4284 | + |
4285 | + vector = assign_irq_vector(irq, mask, &tmp); |
4286 | + if (vector < 0) |
4287 | + return; |
4288 | + |
4289 | + dest = cpu_mask_to_apicid(tmp); |
4290 | + |
4291 | + target_ht_irq(irq, dest, vector); |
4292 | + set_native_irq_info(irq, mask); |
4293 | +} |
4294 | +#endif |
4295 | + |
4296 | +static struct irq_chip ht_irq_chip = { |
4297 | + .name = "PCI-HT", |
4298 | + .mask = mask_ht_irq, |
4299 | + .unmask = unmask_ht_irq, |
4300 | + .ack = ack_apic_edge, |
4301 | +#ifdef CONFIG_SMP |
4302 | + .set_affinity = set_ht_irq_affinity, |
4303 | +#endif |
4304 | + .retrigger = ioapic_retrigger_irq, |
4305 | +}; |
4306 | + |
4307 | +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) |
4308 | +{ |
4309 | + int vector; |
4310 | + cpumask_t tmp; |
4311 | + |
4312 | + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); |
4313 | + if (vector >= 0) { |
4314 | + struct ht_irq_msg msg; |
4315 | + unsigned dest; |
4316 | + |
4317 | + dest = cpu_mask_to_apicid(tmp); |
4318 | + |
4319 | + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
4320 | + |
4321 | + msg.address_lo = |
4322 | + HT_IRQ_LOW_BASE | |
4323 | + HT_IRQ_LOW_DEST_ID(dest) | |
4324 | + HT_IRQ_LOW_VECTOR(vector) | |
4325 | + ((INT_DEST_MODE == 0) ? |
4326 | + HT_IRQ_LOW_DM_PHYSICAL : |
4327 | + HT_IRQ_LOW_DM_LOGICAL) | |
4328 | + HT_IRQ_LOW_RQEOI_EDGE | |
4329 | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
4330 | + HT_IRQ_LOW_MT_FIXED : |
4331 | + HT_IRQ_LOW_MT_ARBITRATED) | |
4332 | + HT_IRQ_LOW_IRQ_MASKED; |
4333 | + |
4334 | + write_ht_irq_msg(irq, &msg); |
4335 | + |
4336 | + set_irq_chip_and_handler_name(irq, &ht_irq_chip, |
4337 | + handle_edge_irq, "edge"); |
4338 | + } |
4339 | + return vector; |
4340 | +} |
4341 | +#endif /* CONFIG_HT_IRQ */ |
4342 | + |
4343 | +/* -------------------------------------------------------------------------- |
4344 | + ACPI-based IOAPIC Configuration |
4345 | + -------------------------------------------------------------------------- */ |
4346 | + |
4347 | +#ifdef CONFIG_ACPI |
4348 | + |
4349 | +#define IO_APIC_MAX_ID 0xFE |
4350 | |
4351 | int __init io_apic_get_redir_entries (int ioapic) |
4352 | { |
4353 | @@ -2180,6 +2045,8 @@ |
4354 | { |
4355 | struct IO_APIC_route_entry entry; |
4356 | unsigned long flags; |
4357 | + int vector; |
4358 | + cpumask_t mask; |
4359 | |
4360 | if (!IO_APIC_IRQ(irq)) { |
4361 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", |
4362 | @@ -2188,6 +2055,17 @@ |
4363 | } |
4364 | |
4365 | /* |
4366 | + * IRQs < 16 are already in the irq_2_pin[] map |
4367 | + */ |
4368 | + if (irq >= 16) |
4369 | + add_pin_to_irq(irq, ioapic, pin); |
4370 | + |
4371 | + |
4372 | + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); |
4373 | + if (vector < 0) |
4374 | + return vector; |
4375 | + |
4376 | + /* |
4377 | * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. |
4378 | * Note that we mask (disable) IRQs now -- these get enabled when the |
4379 | * corresponding device driver registers for this IRQ. |
4380 | @@ -2197,19 +2075,11 @@ |
4381 | |
4382 | entry.delivery_mode = INT_DELIVERY_MODE; |
4383 | entry.dest_mode = INT_DEST_MODE; |
4384 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); |
4385 | + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); |
4386 | entry.trigger = edge_level; |
4387 | entry.polarity = active_high_low; |
4388 | entry.mask = 1; /* Disabled (masked) */ |
4389 | - |
4390 | - irq = gsi_irq_sharing(irq); |
4391 | - /* |
4392 | - * IRQs < 16 are already in the irq_2_pin[] map |
4393 | - */ |
4394 | - if (irq >= 16) |
4395 | - add_pin_to_irq(irq, ioapic, pin); |
4396 | - |
4397 | - entry.vector = assign_irq_vector(irq); |
4398 | + entry.vector = vector & 0xff; |
4399 | |
4400 | apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " |
4401 | "IRQ %d Mode:%i Active:%i)\n", ioapic, |
4402 | @@ -2221,10 +2091,10 @@ |
4403 | if (!ioapic && (irq < 16)) |
4404 | disable_8259A_irq(irq); |
4405 | |
4406 | + ioapic_write_entry(ioapic, pin, entry); |
4407 | + |
4408 | spin_lock_irqsave(&ioapic_lock, flags); |
4409 | - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); |
4410 | - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); |
4411 | - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); |
4412 | + set_native_irq_info(irq, TARGET_CPUS); |
4413 | spin_unlock_irqrestore(&ioapic_lock, flags); |
4414 | |
4415 | return 0; |
4416 | --- a/arch/x86/kernel/ioport_64-xen.c |
4417 | +++ b/arch/x86/kernel/ioport_64-xen.c |
4418 | @@ -58,6 +58,7 @@ |
4419 | |
4420 | memset(bitmap, 0xff, IO_BITMAP_BYTES); |
4421 | t->io_bitmap_ptr = bitmap; |
4422 | + set_thread_flag(TIF_IO_BITMAP); |
4423 | |
4424 | set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap); |
4425 | set_iobitmap.nr_ports = IO_BITMAP_BITS; |
4426 | --- a/arch/x86/kernel/irq_32-xen.c |
4427 | +++ b/arch/x86/kernel/irq_32-xen.c |
4428 | @@ -53,8 +53,10 @@ |
4429 | */ |
4430 | fastcall unsigned int do_IRQ(struct pt_regs *regs) |
4431 | { |
4432 | + struct pt_regs *old_regs; |
4433 | /* high bit used in ret_from_ code */ |
4434 | int irq = ~regs->orig_eax; |
4435 | + struct irq_desc *desc = irq_desc + irq; |
4436 | #ifdef CONFIG_4KSTACKS |
4437 | union irq_ctx *curctx, *irqctx; |
4438 | u32 *isp; |
4439 | @@ -66,6 +68,7 @@ |
4440 | BUG(); |
4441 | } |
4442 | |
4443 | + old_regs = set_irq_regs(regs); |
4444 | irq_enter(); |
4445 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
4446 | /* Debugging check for stack overflow: is there less than 1KB free? */ |
4447 | @@ -110,19 +113,20 @@ |
4448 | (curctx->tinfo.preempt_count & SOFTIRQ_MASK); |
4449 | |
4450 | asm volatile( |
4451 | - " xchgl %%ebx,%%esp \n" |
4452 | - " call __do_IRQ \n" |
4453 | + " xchgl %%ebx,%%esp \n" |
4454 | + " call *%%edi \n" |
4455 | " movl %%ebx,%%esp \n" |
4456 | : "=a" (arg1), "=d" (arg2), "=b" (ebx) |
4457 | - : "0" (irq), "1" (regs), "2" (isp) |
4458 | - : "memory", "cc", "ecx" |
4459 | + : "0" (irq), "1" (desc), "2" (isp), |
4460 | + "D" (desc->handle_irq) |
4461 | + : "memory", "cc" |
4462 | ); |
4463 | } else |
4464 | #endif |
4465 | - __do_IRQ(irq, regs); |
4466 | + desc->handle_irq(irq, desc); |
4467 | |
4468 | irq_exit(); |
4469 | - |
4470 | + set_irq_regs(old_regs); |
4471 | return 1; |
4472 | } |
4473 | |
4474 | @@ -253,7 +257,8 @@ |
4475 | for_each_online_cpu(j) |
4476 | seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); |
4477 | #endif |
4478 | - seq_printf(p, " %14s", irq_desc[i].chip->typename); |
4479 | + seq_printf(p, " %8s", irq_desc[i].chip->name); |
4480 | + seq_printf(p, "-%-8s", irq_desc[i].name); |
4481 | seq_printf(p, " %s", action->name); |
4482 | |
4483 | for (action=action->next; action; action = action->next) |
4484 | --- a/arch/x86/kernel/irq_64-xen.c |
4485 | +++ b/arch/x86/kernel/irq_64-xen.c |
4486 | @@ -20,11 +20,6 @@ |
4487 | #include <asm/idle.h> |
4488 | |
4489 | atomic_t irq_err_count; |
4490 | -#ifdef CONFIG_X86_IO_APIC |
4491 | -#ifdef APIC_MISMATCH_DEBUG |
4492 | -atomic_t irq_mis_count; |
4493 | -#endif |
4494 | -#endif |
4495 | |
4496 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
4497 | /* |
4498 | @@ -79,7 +74,8 @@ |
4499 | for_each_online_cpu(j) |
4500 | seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); |
4501 | #endif |
4502 | - seq_printf(p, " %14s", irq_desc[i].chip->typename); |
4503 | + seq_printf(p, " %8s", irq_desc[i].chip->name); |
4504 | + seq_printf(p, "-%-8s", irq_desc[i].name); |
4505 | |
4506 | seq_printf(p, " %s", action->name); |
4507 | for (action=action->next; action; action = action->next) |
4508 | @@ -99,11 +95,6 @@ |
4509 | seq_putc(p, '\n'); |
4510 | #endif |
4511 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); |
4512 | -#ifdef CONFIG_X86_IO_APIC |
4513 | -#ifdef APIC_MISMATCH_DEBUG |
4514 | - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); |
4515 | -#endif |
4516 | -#endif |
4517 | } |
4518 | return 0; |
4519 | } |
4520 | @@ -114,24 +105,28 @@ |
4521 | * handlers). |
4522 | */ |
4523 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) |
4524 | -{ |
4525 | +{ |
4526 | + struct pt_regs *old_regs = set_irq_regs(regs); |
4527 | + |
4528 | /* high bit used in ret_from_ code */ |
4529 | unsigned irq = ~regs->orig_rax; |
4530 | |
4531 | - if (unlikely(irq >= NR_IRQS)) { |
4532 | - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", |
4533 | - __FUNCTION__, irq); |
4534 | - BUG(); |
4535 | - } |
4536 | - |
4537 | exit_idle(); |
4538 | irq_enter(); |
4539 | + |
4540 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
4541 | stack_overflow_check(regs); |
4542 | #endif |
4543 | - __do_IRQ(irq, regs); |
4544 | + |
4545 | + if (likely(irq < NR_IRQS)) |
4546 | + generic_handle_irq(irq); |
4547 | + else |
4548 | + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", |
4549 | + __func__, smp_processor_id(), irq); |
4550 | + |
4551 | irq_exit(); |
4552 | |
4553 | + set_irq_regs(old_regs); |
4554 | return 1; |
4555 | } |
4556 | |
4557 | @@ -192,6 +187,6 @@ |
4558 | */ |
4559 | void ack_bad_irq(unsigned int irq) |
4560 | { |
4561 | - printk("unexpected IRQ trap at vector %02x\n", irq); |
4562 | + printk("unexpected IRQ trap at irq %02x\n", irq); |
4563 | } |
4564 | #endif |
4565 | --- a/arch/x86/kernel/ldt_32-xen.c |
4566 | +++ b/arch/x86/kernel/ldt_32-xen.c |
4567 | @@ -1,5 +1,5 @@ |
4568 | /* |
4569 | - * linux/kernel/ldt.c |
4570 | + * linux/arch/i386/kernel/ldt.c |
4571 | * |
4572 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds |
4573 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> |
4574 | --- a/arch/x86/kernel/microcode-xen.c |
4575 | +++ b/arch/x86/kernel/microcode-xen.c |
4576 | @@ -2,6 +2,7 @@ |
4577 | * Intel CPU Microcode Update Driver for Linux |
4578 | * |
4579 | * Copyright (C) 2000-2004 Tigran Aivazian |
4580 | + * 2006 Shaohua Li <shaohua.li@intel.com> |
4581 | * |
4582 | * This driver allows to upgrade microcode on Intel processors |
4583 | * belonging to IA-32 family - PentiumPro, Pentium II, |
4584 | @@ -33,7 +34,9 @@ |
4585 | #include <linux/spinlock.h> |
4586 | #include <linux/mm.h> |
4587 | #include <linux/mutex.h> |
4588 | -#include <linux/syscalls.h> |
4589 | +#include <linux/cpu.h> |
4590 | +#include <linux/firmware.h> |
4591 | +#include <linux/platform_device.h> |
4592 | |
4593 | #include <asm/msr.h> |
4594 | #include <asm/uaccess.h> |
4595 | @@ -55,12 +58,7 @@ |
4596 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ |
4597 | static DEFINE_MUTEX(microcode_mutex); |
4598 | |
4599 | -static int microcode_open (struct inode *unused1, struct file *unused2) |
4600 | -{ |
4601 | - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; |
4602 | -} |
4603 | - |
4604 | - |
4605 | +#ifdef CONFIG_MICROCODE_OLD_INTERFACE |
4606 | static int do_microcode_update (const void __user *ubuf, size_t len) |
4607 | { |
4608 | int err; |
4609 | @@ -85,6 +83,11 @@ |
4610 | return err; |
4611 | } |
4612 | |
4613 | +static int microcode_open (struct inode *unused1, struct file *unused2) |
4614 | +{ |
4615 | + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; |
4616 | +} |
4617 | + |
4618 | static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) |
4619 | { |
4620 | ssize_t ret; |
4621 | @@ -117,7 +120,7 @@ |
4622 | .fops = µcode_fops, |
4623 | }; |
4624 | |
4625 | -static int __init microcode_init (void) |
4626 | +static int __init microcode_dev_init (void) |
4627 | { |
4628 | int error; |
4629 | |
4630 | @@ -129,6 +132,68 @@ |
4631 | return error; |
4632 | } |
4633 | |
4634 | + return 0; |
4635 | +} |
4636 | + |
4637 | +static void __exit microcode_dev_exit (void) |
4638 | +{ |
4639 | + misc_deregister(µcode_dev); |
4640 | +} |
4641 | + |
4642 | +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); |
4643 | +#else |
4644 | +#define microcode_dev_init() 0 |
4645 | +#define microcode_dev_exit() do { } while(0) |
4646 | +#endif |
4647 | + |
4648 | +/* fake device for request_firmware */ |
4649 | +static struct platform_device *microcode_pdev; |
4650 | + |
4651 | +static int request_microcode(void) |
4652 | +{ |
4653 | + char name[30]; |
4654 | + const struct cpuinfo_x86 *c = &boot_cpu_data; |
4655 | + const struct firmware *firmware; |
4656 | + int error; |
4657 | + struct xen_platform_op op; |
4658 | + |
4659 | + sprintf(name,"intel-ucode/%02x-%02x-%02x", |
4660 | + c->x86, c->x86_model, c->x86_mask); |
4661 | + error = request_firmware(&firmware, name, µcode_pdev->dev); |
4662 | + if (error) { |
4663 | + pr_debug("ucode data file %s load failed\n", name); |
4664 | + return error; |
4665 | + } |
4666 | + |
4667 | + op.cmd = XENPF_microcode_update; |
4668 | + set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data); |
4669 | + op.u.microcode.length = firmware->size; |
4670 | + error = HYPERVISOR_platform_op(&op); |
4671 | + |
4672 | + release_firmware(firmware); |
4673 | + |
4674 | + if (error) |
4675 | + pr_debug("ucode load failed\n"); |
4676 | + |
4677 | + return error; |
4678 | +} |
4679 | + |
4680 | +static int __init microcode_init (void) |
4681 | +{ |
4682 | + int error; |
4683 | + |
4684 | + error = microcode_dev_init(); |
4685 | + if (error) |
4686 | + return error; |
4687 | + microcode_pdev = platform_device_register_simple("microcode", -1, |
4688 | + NULL, 0); |
4689 | + if (IS_ERR(microcode_pdev)) { |
4690 | + microcode_dev_exit(); |
4691 | + return PTR_ERR(microcode_pdev); |
4692 | + } |
4693 | + |
4694 | + request_microcode(); |
4695 | + |
4696 | printk(KERN_INFO |
4697 | "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n"); |
4698 | return 0; |
4699 | @@ -136,9 +201,9 @@ |
4700 | |
4701 | static void __exit microcode_exit (void) |
4702 | { |
4703 | - misc_deregister(µcode_dev); |
4704 | + microcode_dev_exit(); |
4705 | + platform_device_unregister(microcode_pdev); |
4706 | } |
4707 | |
4708 | module_init(microcode_init) |
4709 | module_exit(microcode_exit) |
4710 | -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); |
4711 | --- a/arch/x86/kernel/mpparse_32-xen.c |
4712 | +++ b/arch/x86/kernel/mpparse_32-xen.c |
4713 | @@ -30,6 +30,7 @@ |
4714 | #include <asm/io_apic.h> |
4715 | |
4716 | #include <mach_apic.h> |
4717 | +#include <mach_apicdef.h> |
4718 | #include <mach_mpparse.h> |
4719 | #include <bios_ebda.h> |
4720 | |
4721 | @@ -68,7 +69,7 @@ |
4722 | /* Processor that is doing the boot up */ |
4723 | unsigned int boot_cpu_physical_apicid = -1U; |
4724 | /* Internal processor count */ |
4725 | -static unsigned int __devinitdata num_processors; |
4726 | +unsigned int __cpuinitdata num_processors; |
4727 | |
4728 | /* Bitmask of physically existing CPUs */ |
4729 | physid_mask_t phys_cpu_present_map; |
4730 | @@ -235,12 +236,14 @@ |
4731 | |
4732 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); |
4733 | |
4734 | +#if MAX_MP_BUSSES < 256 |
4735 | if (m->mpc_busid >= MAX_MP_BUSSES) { |
4736 | printk(KERN_WARNING "MP table busid value (%d) for bustype %s " |
4737 | " is too large, max. supported is %d\n", |
4738 | m->mpc_busid, str, MAX_MP_BUSSES - 1); |
4739 | return; |
4740 | } |
4741 | +#endif |
4742 | |
4743 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { |
4744 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; |
4745 | @@ -300,19 +303,6 @@ |
4746 | m->mpc_irqtype, m->mpc_irqflag & 3, |
4747 | (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, |
4748 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); |
4749 | - /* |
4750 | - * Well it seems all SMP boards in existence |
4751 | - * use ExtINT/LVT1 == LINT0 and |
4752 | - * NMI/LVT2 == LINT1 - the following check |
4753 | - * will show us if this assumptions is false. |
4754 | - * Until then we do not have to add baggage. |
4755 | - */ |
4756 | - if ((m->mpc_irqtype == mp_ExtINT) && |
4757 | - (m->mpc_destapiclint != 0)) |
4758 | - BUG(); |
4759 | - if ((m->mpc_irqtype == mp_NMI) && |
4760 | - (m->mpc_destapiclint != 1)) |
4761 | - BUG(); |
4762 | } |
4763 | |
4764 | #ifdef CONFIG_X86_NUMAQ |
4765 | @@ -838,8 +828,7 @@ |
4766 | |
4767 | #ifdef CONFIG_ACPI |
4768 | |
4769 | -void __init mp_register_lapic_address ( |
4770 | - u64 address) |
4771 | +void __init mp_register_lapic_address(u64 address) |
4772 | { |
4773 | #ifndef CONFIG_XEN |
4774 | mp_lapic_addr = (unsigned long) address; |
4775 | @@ -853,13 +842,10 @@ |
4776 | #endif |
4777 | } |
4778 | |
4779 | - |
4780 | -void __devinit mp_register_lapic ( |
4781 | - u8 id, |
4782 | - u8 enabled) |
4783 | +void __devinit mp_register_lapic (u8 id, u8 enabled) |
4784 | { |
4785 | struct mpc_config_processor processor; |
4786 | - int boot_cpu = 0; |
4787 | + int boot_cpu = 0; |
4788 | |
4789 | if (MAX_APICS - id <= 0) { |
4790 | printk(KERN_WARNING "Processor #%d invalid (max %d)\n", |
4791 | @@ -898,11 +884,9 @@ |
4792 | u32 pin_programmed[4]; |
4793 | } mp_ioapic_routing[MAX_IO_APICS]; |
4794 | |
4795 | - |
4796 | -static int mp_find_ioapic ( |
4797 | - int gsi) |
4798 | +static int mp_find_ioapic (int gsi) |
4799 | { |
4800 | - int i = 0; |
4801 | + int i = 0; |
4802 | |
4803 | /* Find the IOAPIC that manages this GSI. */ |
4804 | for (i = 0; i < nr_ioapics; i++) { |
4805 | @@ -915,15 +899,11 @@ |
4806 | |
4807 | return -1; |
4808 | } |
4809 | - |
4810 | |
4811 | -void __init mp_register_ioapic ( |
4812 | - u8 id, |
4813 | - u32 address, |
4814 | - u32 gsi_base) |
4815 | +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) |
4816 | { |
4817 | - int idx = 0; |
4818 | - int tmpid; |
4819 | + int idx = 0; |
4820 | + int tmpid; |
4821 | |
4822 | if (nr_ioapics >= MAX_IO_APICS) { |
4823 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " |
4824 | @@ -971,16 +951,10 @@ |
4825 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, |
4826 | mp_ioapic_routing[idx].gsi_base, |
4827 | mp_ioapic_routing[idx].gsi_end); |
4828 | - |
4829 | - return; |
4830 | } |
4831 | |
4832 | - |
4833 | -void __init mp_override_legacy_irq ( |
4834 | - u8 bus_irq, |
4835 | - u8 polarity, |
4836 | - u8 trigger, |
4837 | - u32 gsi) |
4838 | +void __init |
4839 | +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) |
4840 | { |
4841 | struct mpc_config_intsrc intsrc; |
4842 | int ioapic = -1; |
4843 | @@ -1018,15 +992,13 @@ |
4844 | mp_irqs[mp_irq_entries] = intsrc; |
4845 | if (++mp_irq_entries == MAX_IRQ_SOURCES) |
4846 | panic("Max # of irq sources exceeded!\n"); |
4847 | - |
4848 | - return; |
4849 | } |
4850 | |
4851 | void __init mp_config_acpi_legacy_irqs (void) |
4852 | { |
4853 | struct mpc_config_intsrc intsrc; |
4854 | - int i = 0; |
4855 | - int ioapic = -1; |
4856 | + int i = 0; |
4857 | + int ioapic = -1; |
4858 | |
4859 | /* |
4860 | * Fabricate the legacy ISA bus (bus #31). |
4861 | @@ -1095,12 +1067,12 @@ |
4862 | |
4863 | #define MAX_GSI_NUM 4096 |
4864 | |
4865 | -int mp_register_gsi (u32 gsi, int triggering, int polarity) |
4866 | +int mp_register_gsi(u32 gsi, int triggering, int polarity) |
4867 | { |
4868 | - int ioapic = -1; |
4869 | - int ioapic_pin = 0; |
4870 | - int idx, bit = 0; |
4871 | - static int pci_irq = 16; |
4872 | + int ioapic = -1; |
4873 | + int ioapic_pin = 0; |
4874 | + int idx, bit = 0; |
4875 | + static int pci_irq = 16; |
4876 | /* |
4877 | * Mapping between Global System Interrups, which |
4878 | * represent all possible interrupts, and IRQs |
4879 | --- a/arch/x86/kernel/mpparse_64-xen.c |
4880 | +++ b/arch/x86/kernel/mpparse_64-xen.c |
4881 | @@ -41,8 +41,7 @@ |
4882 | * Various Linux-internal data structures created from the |
4883 | * MP-table. |
4884 | */ |
4885 | -unsigned char apic_version [MAX_APICS]; |
4886 | -unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
4887 | +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); |
4888 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
4889 | |
4890 | static int mp_current_pci_id = 0; |
4891 | @@ -56,7 +55,6 @@ |
4892 | int mp_irq_entries; |
4893 | |
4894 | int nr_ioapics; |
4895 | -int pic_mode; |
4896 | unsigned long mp_lapic_addr = 0; |
4897 | |
4898 | |
4899 | @@ -71,19 +69,6 @@ |
4900 | /* Bitmask of physically existing CPUs */ |
4901 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; |
4902 | |
4903 | -/* ACPI MADT entry parsing functions */ |
4904 | -#ifdef CONFIG_ACPI |
4905 | -extern struct acpi_boot_flags acpi_boot; |
4906 | -#ifdef CONFIG_X86_LOCAL_APIC |
4907 | -extern int acpi_parse_lapic (acpi_table_entry_header *header); |
4908 | -extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header); |
4909 | -extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header); |
4910 | -#endif /*CONFIG_X86_LOCAL_APIC*/ |
4911 | -#ifdef CONFIG_X86_IO_APIC |
4912 | -extern int acpi_parse_ioapic (acpi_table_entry_header *header); |
4913 | -#endif /*CONFIG_X86_IO_APIC*/ |
4914 | -#endif /*CONFIG_ACPI*/ |
4915 | - |
4916 | u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; |
4917 | |
4918 | |
4919 | @@ -109,24 +94,20 @@ |
4920 | static void __cpuinit MP_processor_info (struct mpc_config_processor *m) |
4921 | { |
4922 | int cpu; |
4923 | - unsigned char ver; |
4924 | cpumask_t tmp_map; |
4925 | + char *bootup_cpu = ""; |
4926 | |
4927 | if (!(m->mpc_cpuflag & CPU_ENABLED)) { |
4928 | disabled_cpus++; |
4929 | return; |
4930 | } |
4931 | - |
4932 | - printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", |
4933 | - m->mpc_apicid, |
4934 | - (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8, |
4935 | - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4, |
4936 | - m->mpc_apicver); |
4937 | - |
4938 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
4939 | - Dprintk(" Bootup CPU\n"); |
4940 | + bootup_cpu = " (Bootup-CPU)"; |
4941 | boot_cpu_id = m->mpc_apicid; |
4942 | } |
4943 | + |
4944 | + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); |
4945 | + |
4946 | if (num_processors >= NR_CPUS) { |
4947 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." |
4948 | " Processor ignored.\n", NR_CPUS); |
4949 | @@ -137,24 +118,7 @@ |
4950 | cpus_complement(tmp_map, cpu_present_map); |
4951 | cpu = first_cpu(tmp_map); |
4952 | |
4953 | -#if MAX_APICS < 255 |
4954 | - if ((int)m->mpc_apicid > MAX_APICS) { |
4955 | - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", |
4956 | - m->mpc_apicid, MAX_APICS); |
4957 | - return; |
4958 | - } |
4959 | -#endif |
4960 | - ver = m->mpc_apicver; |
4961 | - |
4962 | physid_set(m->mpc_apicid, phys_cpu_present_map); |
4963 | - /* |
4964 | - * Validate version |
4965 | - */ |
4966 | - if (ver == 0x0) { |
4967 | - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); |
4968 | - ver = 0x10; |
4969 | - } |
4970 | - apic_version[m->mpc_apicid] = ver; |
4971 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
4972 | /* |
4973 | * bios_cpu_apicid is required to have processors listed |
4974 | @@ -185,37 +149,42 @@ |
4975 | Dprintk("Bus #%d is %s\n", m->mpc_busid, str); |
4976 | |
4977 | if (strncmp(str, "ISA", 3) == 0) { |
4978 | - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; |
4979 | - } else if (strncmp(str, "EISA", 4) == 0) { |
4980 | - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; |
4981 | + set_bit(m->mpc_busid, mp_bus_not_pci); |
4982 | } else if (strncmp(str, "PCI", 3) == 0) { |
4983 | - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; |
4984 | + clear_bit(m->mpc_busid, mp_bus_not_pci); |
4985 | mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; |
4986 | mp_current_pci_id++; |
4987 | - } else if (strncmp(str, "MCA", 3) == 0) { |
4988 | - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; |
4989 | } else { |
4990 | printk(KERN_ERR "Unknown bustype %s\n", str); |
4991 | } |
4992 | } |
4993 | |
4994 | +static int bad_ioapic(unsigned long address) |
4995 | +{ |
4996 | + if (nr_ioapics >= MAX_IO_APICS) { |
4997 | + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " |
4998 | + "(found %d)\n", MAX_IO_APICS, nr_ioapics); |
4999 | + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); |
5000 | + } |
5001 | + if (!address) { |
5002 | + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" |
5003 | + " found in table, skipping!\n"); |
5004 | + return 1; |
5005 | + } |
5006 | + return 0; |
5007 | +} |
5008 | + |
5009 | static void __init MP_ioapic_info (struct mpc_config_ioapic *m) |
5010 | { |
5011 | if (!(m->mpc_flags & MPC_APIC_USABLE)) |
5012 | return; |
5013 | |
5014 | - printk("I/O APIC #%d Version %d at 0x%X.\n", |
5015 | - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); |
5016 | - if (nr_ioapics >= MAX_IO_APICS) { |
5017 | - printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n", |
5018 | - MAX_IO_APICS, nr_ioapics); |
5019 | - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); |
5020 | - } |
5021 | - if (!m->mpc_apicaddr) { |
5022 | - printk(KERN_ERR "WARNING: bogus zero I/O APIC address" |
5023 | - " found in MP table, skipping!\n"); |
5024 | + printk("I/O APIC #%d at 0x%X.\n", |
5025 | + m->mpc_apicid, m->mpc_apicaddr); |
5026 | + |
5027 | + if (bad_ioapic(m->mpc_apicaddr)) |
5028 | return; |
5029 | - } |
5030 | + |
5031 | mp_ioapics[nr_ioapics] = *m; |
5032 | nr_ioapics++; |
5033 | } |
5034 | @@ -239,19 +208,6 @@ |
5035 | m->mpc_irqtype, m->mpc_irqflag & 3, |
5036 | (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, |
5037 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); |
5038 | - /* |
5039 | - * Well it seems all SMP boards in existence |
5040 | - * use ExtINT/LVT1 == LINT0 and |
5041 | - * NMI/LVT2 == LINT1 - the following check |
5042 | - * will show us if this assumptions is false. |
5043 | - * Until then we do not have to add baggage. |
5044 | - */ |
5045 | - if ((m->mpc_irqtype == mp_ExtINT) && |
5046 | - (m->mpc_destapiclint != 0)) |
5047 | - BUG(); |
5048 | - if ((m->mpc_irqtype == mp_NMI) && |
5049 | - (m->mpc_destapiclint != 1)) |
5050 | - BUG(); |
5051 | } |
5052 | |
5053 | /* |
5054 | @@ -265,7 +221,7 @@ |
5055 | unsigned char *mpt=((unsigned char *)mpc)+count; |
5056 | |
5057 | if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { |
5058 | - printk("SMP mptable: bad signature [%c%c%c%c]!\n", |
5059 | + printk("MPTABLE: bad signature [%c%c%c%c]!\n", |
5060 | mpc->mpc_signature[0], |
5061 | mpc->mpc_signature[1], |
5062 | mpc->mpc_signature[2], |
5063 | @@ -273,31 +229,31 @@ |
5064 | return 0; |
5065 | } |
5066 | if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { |
5067 | - printk("SMP mptable: checksum error!\n"); |
5068 | + printk("MPTABLE: checksum error!\n"); |
5069 | return 0; |
5070 | } |
5071 | if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { |
5072 | - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", |
5073 | + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", |
5074 | mpc->mpc_spec); |
5075 | return 0; |
5076 | } |
5077 | if (!mpc->mpc_lapic) { |
5078 | - printk(KERN_ERR "SMP mptable: null local APIC address!\n"); |
5079 | + printk(KERN_ERR "MPTABLE: null local APIC address!\n"); |
5080 | return 0; |
5081 | } |
5082 | memcpy(str,mpc->mpc_oem,8); |
5083 | - str[8]=0; |
5084 | - printk(KERN_INFO "OEM ID: %s ",str); |
5085 | + str[8] = 0; |
5086 | + printk(KERN_INFO "MPTABLE: OEM ID: %s ",str); |
5087 | |
5088 | memcpy(str,mpc->mpc_productid,12); |
5089 | - str[12]=0; |
5090 | - printk("Product ID: %s ",str); |
5091 | + str[12] = 0; |
5092 | + printk("MPTABLE: Product ID: %s ",str); |
5093 | |
5094 | - printk("APIC at: 0x%X\n",mpc->mpc_lapic); |
5095 | + printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic); |
5096 | |
5097 | /* save the local APIC address, it might be non-default */ |
5098 | if (!acpi_lapic) |
5099 | - mp_lapic_addr = mpc->mpc_lapic; |
5100 | + mp_lapic_addr = mpc->mpc_lapic; |
5101 | |
5102 | /* |
5103 | * Now process the configuration blocks. |
5104 | @@ -309,7 +265,7 @@ |
5105 | struct mpc_config_processor *m= |
5106 | (struct mpc_config_processor *)mpt; |
5107 | if (!acpi_lapic) |
5108 | - MP_processor_info(m); |
5109 | + MP_processor_info(m); |
5110 | mpt += sizeof(*m); |
5111 | count += sizeof(*m); |
5112 | break; |
5113 | @@ -328,8 +284,8 @@ |
5114 | struct mpc_config_ioapic *m= |
5115 | (struct mpc_config_ioapic *)mpt; |
5116 | MP_ioapic_info(m); |
5117 | - mpt+=sizeof(*m); |
5118 | - count+=sizeof(*m); |
5119 | + mpt += sizeof(*m); |
5120 | + count += sizeof(*m); |
5121 | break; |
5122 | } |
5123 | case MP_INTSRC: |
5124 | @@ -338,8 +294,8 @@ |
5125 | (struct mpc_config_intsrc *)mpt; |
5126 | |
5127 | MP_intsrc_info(m); |
5128 | - mpt+=sizeof(*m); |
5129 | - count+=sizeof(*m); |
5130 | + mpt += sizeof(*m); |
5131 | + count += sizeof(*m); |
5132 | break; |
5133 | } |
5134 | case MP_LINTSRC: |
5135 | @@ -347,15 +303,15 @@ |
5136 | struct mpc_config_lintsrc *m= |
5137 | (struct mpc_config_lintsrc *)mpt; |
5138 | MP_lintsrc_info(m); |
5139 | - mpt+=sizeof(*m); |
5140 | - count+=sizeof(*m); |
5141 | + mpt += sizeof(*m); |
5142 | + count += sizeof(*m); |
5143 | break; |
5144 | } |
5145 | } |
5146 | } |
5147 | clustered_apic_check(); |
5148 | if (!num_processors) |
5149 | - printk(KERN_ERR "SMP mptable: no processors registered!\n"); |
5150 | + printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
5151 | return num_processors; |
5152 | } |
5153 | |
5154 | @@ -451,13 +407,10 @@ |
5155 | * 2 CPUs, numbered 0 & 1. |
5156 | */ |
5157 | processor.mpc_type = MP_PROCESSOR; |
5158 | - /* Either an integrated APIC or a discrete 82489DX. */ |
5159 | - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; |
5160 | + processor.mpc_apicver = 0; |
5161 | processor.mpc_cpuflag = CPU_ENABLED; |
5162 | - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | |
5163 | - (boot_cpu_data.x86_model << 4) | |
5164 | - boot_cpu_data.x86_mask; |
5165 | - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; |
5166 | + processor.mpc_cpufeature = 0; |
5167 | + processor.mpc_featureflag = 0; |
5168 | processor.mpc_reserved[0] = 0; |
5169 | processor.mpc_reserved[1] = 0; |
5170 | for (i = 0; i < 2; i++) { |
5171 | @@ -476,14 +429,6 @@ |
5172 | case 5: |
5173 | memcpy(bus.mpc_bustype, "ISA ", 6); |
5174 | break; |
5175 | - case 2: |
5176 | - case 6: |
5177 | - case 3: |
5178 | - memcpy(bus.mpc_bustype, "EISA ", 6); |
5179 | - break; |
5180 | - case 4: |
5181 | - case 7: |
5182 | - memcpy(bus.mpc_bustype, "MCA ", 6); |
5183 | } |
5184 | MP_bus_info(&bus); |
5185 | if (mpc_default_type > 4) { |
5186 | @@ -494,7 +439,7 @@ |
5187 | |
5188 | ioapic.mpc_type = MP_IOAPIC; |
5189 | ioapic.mpc_apicid = 2; |
5190 | - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; |
5191 | + ioapic.mpc_apicver = 0; |
5192 | ioapic.mpc_flags = MPC_APIC_USABLE; |
5193 | ioapic.mpc_apicaddr = 0xFEC00000; |
5194 | MP_ioapic_info(&ioapic); |
5195 | @@ -537,13 +482,6 @@ |
5196 | printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); |
5197 | |
5198 | printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); |
5199 | - if (mpf->mpf_feature2 & (1<<7)) { |
5200 | - printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); |
5201 | - pic_mode = 1; |
5202 | - } else { |
5203 | - printk(KERN_INFO " Virtual Wire compatibility mode.\n"); |
5204 | - pic_mode = 0; |
5205 | - } |
5206 | |
5207 | /* |
5208 | * Now see if we need to read further. |
5209 | @@ -620,7 +558,7 @@ |
5210 | return 0; |
5211 | } |
5212 | |
5213 | -void __init find_intel_smp (void) |
5214 | +void __init find_smp_config(void) |
5215 | { |
5216 | unsigned int address; |
5217 | |
5218 | @@ -637,9 +575,7 @@ |
5219 | smp_scan_config(0xF0000,0x10000)) |
5220 | return; |
5221 | /* |
5222 | - * If it is an SMP machine we should know now, unless the |
5223 | - * configuration is in an EISA/MCA bus machine with an |
5224 | - * extended bios data area. |
5225 | + * If it is an SMP machine we should know now. |
5226 | * |
5227 | * there is a real-mode segmented pointer pointing to the |
5228 | * 4K EBDA area at 0x40E, calculate and scan it here. |
5229 | @@ -660,64 +596,38 @@ |
5230 | printk(KERN_INFO "No mptable found.\n"); |
5231 | } |
5232 | |
5233 | -/* |
5234 | - * - Intel MP Configuration Table |
5235 | - */ |
5236 | -void __init find_smp_config (void) |
5237 | -{ |
5238 | -#ifdef CONFIG_X86_LOCAL_APIC |
5239 | - find_intel_smp(); |
5240 | -#endif |
5241 | -} |
5242 | - |
5243 | - |
5244 | /* -------------------------------------------------------------------------- |
5245 | ACPI-based MP Configuration |
5246 | -------------------------------------------------------------------------- */ |
5247 | |
5248 | #ifdef CONFIG_ACPI |
5249 | |
5250 | -void __init mp_register_lapic_address ( |
5251 | - u64 address) |
5252 | +void __init mp_register_lapic_address(u64 address) |
5253 | { |
5254 | #ifndef CONFIG_XEN |
5255 | mp_lapic_addr = (unsigned long) address; |
5256 | - |
5257 | set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); |
5258 | - |
5259 | if (boot_cpu_id == -1U) |
5260 | boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); |
5261 | - |
5262 | - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); |
5263 | #endif |
5264 | } |
5265 | |
5266 | - |
5267 | -void __cpuinit mp_register_lapic ( |
5268 | - u8 id, |
5269 | - u8 enabled) |
5270 | +void __cpuinit mp_register_lapic (u8 id, u8 enabled) |
5271 | { |
5272 | struct mpc_config_processor processor; |
5273 | int boot_cpu = 0; |
5274 | |
5275 | - if (id >= MAX_APICS) { |
5276 | - printk(KERN_WARNING "Processor #%d invalid (max %d)\n", |
5277 | - id, MAX_APICS); |
5278 | - return; |
5279 | - } |
5280 | - |
5281 | - if (id == boot_cpu_physical_apicid) |
5282 | + if (id == boot_cpu_id) |
5283 | boot_cpu = 1; |
5284 | |
5285 | #ifndef CONFIG_XEN |
5286 | processor.mpc_type = MP_PROCESSOR; |
5287 | processor.mpc_apicid = id; |
5288 | - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); |
5289 | + processor.mpc_apicver = 0; |
5290 | processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); |
5291 | processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); |
5292 | - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | |
5293 | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; |
5294 | - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; |
5295 | + processor.mpc_cpufeature = 0; |
5296 | + processor.mpc_featureflag = 0; |
5297 | processor.mpc_reserved[0] = 0; |
5298 | processor.mpc_reserved[1] = 0; |
5299 | #endif |
5300 | @@ -725,8 +635,6 @@ |
5301 | MP_processor_info(&processor); |
5302 | } |
5303 | |
5304 | -#ifdef CONFIG_X86_IO_APIC |
5305 | - |
5306 | #define MP_ISA_BUS 0 |
5307 | #define MP_MAX_IOAPIC_PIN 127 |
5308 | |
5309 | @@ -737,11 +645,9 @@ |
5310 | u32 pin_programmed[4]; |
5311 | } mp_ioapic_routing[MAX_IO_APICS]; |
5312 | |
5313 | - |
5314 | -static int mp_find_ioapic ( |
5315 | - int gsi) |
5316 | +static int mp_find_ioapic(int gsi) |
5317 | { |
5318 | - int i = 0; |
5319 | + int i = 0; |
5320 | |
5321 | /* Find the IOAPIC that manages this GSI. */ |
5322 | for (i = 0; i < nr_ioapics; i++) { |
5323 | @@ -751,28 +657,15 @@ |
5324 | } |
5325 | |
5326 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); |
5327 | - |
5328 | return -1; |
5329 | } |
5330 | - |
5331 | |
5332 | -void __init mp_register_ioapic ( |
5333 | - u8 id, |
5334 | - u32 address, |
5335 | - u32 gsi_base) |
5336 | +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) |
5337 | { |
5338 | - int idx = 0; |
5339 | + int idx = 0; |
5340 | |
5341 | - if (nr_ioapics >= MAX_IO_APICS) { |
5342 | - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " |
5343 | - "(found %d)\n", MAX_IO_APICS, nr_ioapics); |
5344 | - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); |
5345 | - } |
5346 | - if (!address) { |
5347 | - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" |
5348 | - " found in MADT table, skipping!\n"); |
5349 | + if (bad_ioapic(address)) |
5350 | return; |
5351 | - } |
5352 | |
5353 | idx = nr_ioapics++; |
5354 | |
5355 | @@ -784,7 +677,7 @@ |
5356 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); |
5357 | #endif |
5358 | mp_ioapics[idx].mpc_apicid = id; |
5359 | - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); |
5360 | + mp_ioapics[idx].mpc_apicver = 0; |
5361 | |
5362 | /* |
5363 | * Build basic IRQ lookup table to facilitate gsi->io_apic lookups |
5364 | @@ -795,21 +688,15 @@ |
5365 | mp_ioapic_routing[idx].gsi_end = gsi_base + |
5366 | io_apic_get_redir_entries(idx); |
5367 | |
5368 | - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
5369 | + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, " |
5370 | "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, |
5371 | - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, |
5372 | + mp_ioapics[idx].mpc_apicaddr, |
5373 | mp_ioapic_routing[idx].gsi_start, |
5374 | mp_ioapic_routing[idx].gsi_end); |
5375 | - |
5376 | - return; |
5377 | } |
5378 | |
5379 | - |
5380 | -void __init mp_override_legacy_irq ( |
5381 | - u8 bus_irq, |
5382 | - u8 polarity, |
5383 | - u8 trigger, |
5384 | - u32 gsi) |
5385 | +void __init |
5386 | +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) |
5387 | { |
5388 | struct mpc_config_intsrc intsrc; |
5389 | int ioapic = -1; |
5390 | @@ -847,22 +734,18 @@ |
5391 | mp_irqs[mp_irq_entries] = intsrc; |
5392 | if (++mp_irq_entries == MAX_IRQ_SOURCES) |
5393 | panic("Max # of irq sources exceeded!\n"); |
5394 | - |
5395 | - return; |
5396 | } |
5397 | |
5398 | - |
5399 | -void __init mp_config_acpi_legacy_irqs (void) |
5400 | +void __init mp_config_acpi_legacy_irqs(void) |
5401 | { |
5402 | struct mpc_config_intsrc intsrc; |
5403 | - int i = 0; |
5404 | - int ioapic = -1; |
5405 | + int i = 0; |
5406 | + int ioapic = -1; |
5407 | |
5408 | /* |
5409 | * Fabricate the legacy ISA bus (bus #31). |
5410 | */ |
5411 | - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; |
5412 | - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); |
5413 | + set_bit(MP_ISA_BUS, mp_bus_not_pci); |
5414 | |
5415 | /* |
5416 | * Locate the IOAPIC that manages the ISA IRQs (0-15). |
5417 | @@ -915,24 +798,13 @@ |
5418 | if (++mp_irq_entries == MAX_IRQ_SOURCES) |
5419 | panic("Max # of irq sources exceeded!\n"); |
5420 | } |
5421 | - |
5422 | - return; |
5423 | } |
5424 | |
5425 | -#define MAX_GSI_NUM 4096 |
5426 | - |
5427 | int mp_register_gsi(u32 gsi, int triggering, int polarity) |
5428 | { |
5429 | - int ioapic = -1; |
5430 | - int ioapic_pin = 0; |
5431 | - int idx, bit = 0; |
5432 | - static int pci_irq = 16; |
5433 | - /* |
5434 | - * Mapping between Global System Interrupts, which |
5435 | - * represent all possible interrupts, to the IRQs |
5436 | - * assigned to actual devices. |
5437 | - */ |
5438 | - static int gsi_to_irq[MAX_GSI_NUM]; |
5439 | + int ioapic = -1; |
5440 | + int ioapic_pin = 0; |
5441 | + int idx, bit = 0; |
5442 | |
5443 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) |
5444 | return gsi; |
5445 | @@ -965,47 +837,14 @@ |
5446 | if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { |
5447 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", |
5448 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); |
5449 | - return gsi_to_irq[gsi]; |
5450 | + return gsi; |
5451 | } |
5452 | |
5453 | mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); |
5454 | |
5455 | - if (triggering == ACPI_LEVEL_SENSITIVE) { |
5456 | - /* |
5457 | - * For PCI devices assign IRQs in order, avoiding gaps |
5458 | - * due to unused I/O APIC pins. |
5459 | - */ |
5460 | - int irq = gsi; |
5461 | - if (gsi < MAX_GSI_NUM) { |
5462 | - /* |
5463 | - * Retain the VIA chipset work-around (gsi > 15), but |
5464 | - * avoid a problem where the 8254 timer (IRQ0) is setup |
5465 | - * via an override (so it's not on pin 0 of the ioapic), |
5466 | - * and at the same time, the pin 0 interrupt is a PCI |
5467 | - * type. The gsi > 15 test could cause these two pins |
5468 | - * to be shared as IRQ0, and they are not shareable. |
5469 | - * So test for this condition, and if necessary, avoid |
5470 | - * the pin collision. |
5471 | - */ |
5472 | - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) |
5473 | - gsi = pci_irq++; |
5474 | - /* |
5475 | - * Don't assign IRQ used by ACPI SCI |
5476 | - */ |
5477 | - if (gsi == acpi_fadt.sci_int) |
5478 | - gsi = pci_irq++; |
5479 | - gsi_to_irq[irq] = gsi; |
5480 | - } else { |
5481 | - printk(KERN_ERR "GSI %u is too high\n", gsi); |
5482 | - return gsi; |
5483 | - } |
5484 | - } |
5485 | - |
5486 | io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, |
5487 | triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, |
5488 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
5489 | return gsi; |
5490 | } |
5491 | - |
5492 | -#endif /*CONFIG_X86_IO_APIC*/ |
5493 | #endif /*CONFIG_ACPI*/ |
5494 | --- a/arch/x86/kernel/pci-dma_32-xen.c |
5495 | +++ b/arch/x86/kernel/pci-dma_32-xen.c |
5496 | @@ -116,8 +116,7 @@ |
5497 | { |
5498 | int i, rc; |
5499 | |
5500 | - if (direction == DMA_NONE) |
5501 | - BUG(); |
5502 | + BUG_ON(!valid_dma_direction(direction)); |
5503 | WARN_ON(nents == 0 || sg[0].length == 0); |
5504 | |
5505 | if (swiotlb) { |
5506 | @@ -148,7 +147,7 @@ |
5507 | { |
5508 | int i; |
5509 | |
5510 | - BUG_ON(direction == DMA_NONE); |
5511 | + BUG_ON(!valid_dma_direction(direction)); |
5512 | if (swiotlb) |
5513 | swiotlb_unmap_sg(hwdev, sg, nents, direction); |
5514 | else { |
5515 | @@ -165,8 +164,7 @@ |
5516 | { |
5517 | dma_addr_t dma_addr; |
5518 | |
5519 | - BUG_ON(direction == DMA_NONE); |
5520 | - |
5521 | + BUG_ON(!valid_dma_direction(direction)); |
5522 | if (swiotlb) { |
5523 | dma_addr = swiotlb_map_page( |
5524 | dev, page, offset, size, direction); |
5525 | @@ -183,7 +181,7 @@ |
5526 | dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, |
5527 | enum dma_data_direction direction) |
5528 | { |
5529 | - BUG_ON(direction == DMA_NONE); |
5530 | + BUG_ON(!valid_dma_direction(direction)); |
5531 | if (swiotlb) |
5532 | swiotlb_unmap_page(dev, dma_address, size, direction); |
5533 | else |
5534 | @@ -365,8 +363,7 @@ |
5535 | { |
5536 | dma_addr_t dma; |
5537 | |
5538 | - if (direction == DMA_NONE) |
5539 | - BUG(); |
5540 | + BUG_ON(!valid_dma_direction(direction)); |
5541 | WARN_ON(size == 0); |
5542 | |
5543 | if (swiotlb) { |
5544 | @@ -387,8 +384,7 @@ |
5545 | dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, |
5546 | enum dma_data_direction direction) |
5547 | { |
5548 | - if (direction == DMA_NONE) |
5549 | - BUG(); |
5550 | + BUG_ON(!valid_dma_direction(direction)); |
5551 | if (swiotlb) |
5552 | swiotlb_unmap_single(dev, dma_addr, size, direction); |
5553 | else |
5554 | --- a/arch/x86/kernel/pci-swiotlb_64-xen.c |
5555 | +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c |
5556 | @@ -3,7 +3,8 @@ |
5557 | #include <linux/pci.h> |
5558 | #include <linux/cache.h> |
5559 | #include <linux/module.h> |
5560 | -#include <asm/dma-mapping.h> |
5561 | +#include <linux/dma-mapping.h> |
5562 | + |
5563 | #include <asm/proto.h> |
5564 | #include <asm/swiotlb.h> |
5565 | #include <asm/dma.h> |
5566 | --- a/arch/x86/kernel/process_32-xen.c |
5567 | +++ b/arch/x86/kernel/process_32-xen.c |
5568 | @@ -37,6 +37,7 @@ |
5569 | #include <linux/kallsyms.h> |
5570 | #include <linux/ptrace.h> |
5571 | #include <linux/random.h> |
5572 | +#include <linux/personality.h> |
5573 | |
5574 | #include <asm/uaccess.h> |
5575 | #include <asm/pgtable.h> |
5576 | @@ -186,7 +187,7 @@ |
5577 | void cpu_idle_wait(void) |
5578 | { |
5579 | unsigned int cpu, this_cpu = get_cpu(); |
5580 | - cpumask_t map; |
5581 | + cpumask_t map, tmp = current->cpus_allowed; |
5582 | |
5583 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); |
5584 | put_cpu(); |
5585 | @@ -208,6 +209,8 @@ |
5586 | } |
5587 | cpus_and(map, map, cpu_online_map); |
5588 | } while (!cpus_empty(map)); |
5589 | + |
5590 | + set_cpus_allowed(current, tmp); |
5591 | } |
5592 | EXPORT_SYMBOL_GPL(cpu_idle_wait); |
5593 | |
5594 | @@ -240,9 +243,9 @@ |
5595 | if (user_mode_vm(regs)) |
5596 | printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); |
5597 | printk(" EFLAGS: %08lx %s (%s %.*s)\n", |
5598 | - regs->eflags, print_tainted(), system_utsname.release, |
5599 | - (int)strcspn(system_utsname.version, " "), |
5600 | - system_utsname.version); |
5601 | + regs->eflags, print_tainted(), init_utsname()->release, |
5602 | + (int)strcspn(init_utsname()->version, " "), |
5603 | + init_utsname()->version); |
5604 | printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", |
5605 | regs->eax,regs->ebx,regs->ecx,regs->edx); |
5606 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", |
5607 | @@ -264,15 +267,6 @@ |
5608 | * the "args". |
5609 | */ |
5610 | extern void kernel_thread_helper(void); |
5611 | -__asm__(".section .text\n" |
5612 | - ".align 4\n" |
5613 | - "kernel_thread_helper:\n\t" |
5614 | - "movl %edx,%eax\n\t" |
5615 | - "pushl %edx\n\t" |
5616 | - "call *%ebx\n\t" |
5617 | - "pushl %eax\n\t" |
5618 | - "call do_exit\n" |
5619 | - ".previous"); |
5620 | |
5621 | /* |
5622 | * Create a kernel thread |
5623 | @@ -290,7 +284,7 @@ |
5624 | regs.xes = __USER_DS; |
5625 | regs.orig_eax = -1; |
5626 | regs.eip = (unsigned long) kernel_thread_helper; |
5627 | - regs.xcs = GET_KERNEL_CS(); |
5628 | + regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
5629 | regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; |
5630 | |
5631 | /* Ok, create the new process.. */ |
5632 | @@ -369,13 +363,12 @@ |
5633 | |
5634 | tsk = current; |
5635 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
5636 | - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
5637 | + p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, |
5638 | + IO_BITMAP_BYTES, GFP_KERNEL); |
5639 | if (!p->thread.io_bitmap_ptr) { |
5640 | p->thread.io_bitmap_max = 0; |
5641 | return -ENOMEM; |
5642 | } |
5643 | - memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr, |
5644 | - IO_BITMAP_BYTES); |
5645 | set_tsk_thread_flag(p, TIF_IO_BITMAP); |
5646 | } |
5647 | |
5648 | @@ -850,7 +843,7 @@ |
5649 | |
5650 | unsigned long arch_align_stack(unsigned long sp) |
5651 | { |
5652 | - if (randomize_va_space) |
5653 | + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) |
5654 | sp -= get_random_int() % 8192; |
5655 | return sp & ~0xf; |
5656 | } |
5657 | --- a/arch/x86/kernel/process_64-xen.c |
5658 | +++ b/arch/x86/kernel/process_64-xen.c |
5659 | @@ -89,25 +89,24 @@ |
5660 | } |
5661 | EXPORT_SYMBOL(idle_notifier_unregister); |
5662 | |
5663 | -enum idle_state { CPU_IDLE, CPU_NOT_IDLE }; |
5664 | -static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; |
5665 | - |
5666 | void enter_idle(void) |
5667 | { |
5668 | - __get_cpu_var(idle_state) = CPU_IDLE; |
5669 | + write_pda(isidle, 1); |
5670 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); |
5671 | } |
5672 | |
5673 | static void __exit_idle(void) |
5674 | { |
5675 | - __get_cpu_var(idle_state) = CPU_NOT_IDLE; |
5676 | + if (test_and_clear_bit_pda(0, isidle) == 0) |
5677 | + return; |
5678 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); |
5679 | } |
5680 | |
5681 | /* Called from interrupts to signify idle end */ |
5682 | void exit_idle(void) |
5683 | { |
5684 | - if (current->pid | read_pda(irqcount)) |
5685 | + /* idle loop has pid 0 */ |
5686 | + if (current->pid) |
5687 | return; |
5688 | __exit_idle(); |
5689 | } |
5690 | @@ -184,6 +183,9 @@ |
5691 | play_dead(); |
5692 | enter_idle(); |
5693 | idle(); |
5694 | + /* In many cases the interrupt that ended idle |
5695 | + has already called exit_idle. But some idle |
5696 | + loops can be woken up without interrupt. */ |
5697 | __exit_idle(); |
5698 | } |
5699 | |
5700 | @@ -196,7 +198,7 @@ |
5701 | void cpu_idle_wait(void) |
5702 | { |
5703 | unsigned int cpu, this_cpu = get_cpu(); |
5704 | - cpumask_t map; |
5705 | + cpumask_t map, tmp = current->cpus_allowed; |
5706 | |
5707 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); |
5708 | put_cpu(); |
5709 | @@ -219,6 +221,8 @@ |
5710 | } |
5711 | cpus_and(map, map, cpu_online_map); |
5712 | } while (!cpus_empty(map)); |
5713 | + |
5714 | + set_cpus_allowed(current, tmp); |
5715 | } |
5716 | EXPORT_SYMBOL_GPL(cpu_idle_wait); |
5717 | |
5718 | @@ -250,9 +254,9 @@ |
5719 | print_modules(); |
5720 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
5721 | current->pid, current->comm, print_tainted(), |
5722 | - system_utsname.release, |
5723 | - (int)strcspn(system_utsname.version, " "), |
5724 | - system_utsname.version); |
5725 | + init_utsname()->release, |
5726 | + (int)strcspn(init_utsname()->version, " "), |
5727 | + init_utsname()->version); |
5728 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); |
5729 | printk_address(regs->rip); |
5730 | printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, |
5731 | @@ -310,6 +314,7 @@ |
5732 | |
5733 | kfree(t->io_bitmap_ptr); |
5734 | t->io_bitmap_ptr = NULL; |
5735 | + clear_thread_flag(TIF_IO_BITMAP); |
5736 | /* |
5737 | * Careful, clear this in the TSS too: |
5738 | */ |
5739 | @@ -340,6 +345,7 @@ |
5740 | if (t->flags & _TIF_IA32) |
5741 | current_thread_info()->status |= TS_COMPAT; |
5742 | } |
5743 | + t->flags &= ~_TIF_DEBUG; |
5744 | |
5745 | tsk->thread.debugreg0 = 0; |
5746 | tsk->thread.debugreg1 = 0; |
5747 | @@ -432,7 +438,7 @@ |
5748 | asm("mov %%es,%0" : "=m" (p->thread.es)); |
5749 | asm("mov %%ds,%0" : "=m" (p->thread.ds)); |
5750 | |
5751 | - if (unlikely(me->thread.io_bitmap_ptr != NULL)) { |
5752 | + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
5753 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
5754 | if (!p->thread.io_bitmap_ptr) { |
5755 | p->thread.io_bitmap_max = 0; |
5756 | @@ -440,6 +446,7 @@ |
5757 | } |
5758 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, |
5759 | IO_BITMAP_BYTES); |
5760 | + set_tsk_thread_flag(p, TIF_IO_BITMAP); |
5761 | } |
5762 | |
5763 | /* |
5764 | @@ -474,6 +481,30 @@ |
5765 | } |
5766 | |
5767 | /* |
5768 | + * This special macro can be used to load a debugging register |
5769 | + */ |
5770 | +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) |
5771 | + |
5772 | +static inline void __switch_to_xtra(struct task_struct *prev_p, |
5773 | + struct task_struct *next_p) |
5774 | +{ |
5775 | + struct thread_struct *prev, *next; |
5776 | + |
5777 | + prev = &prev_p->thread, |
5778 | + next = &next_p->thread; |
5779 | + |
5780 | + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
5781 | + loaddebug(next, 0); |
5782 | + loaddebug(next, 1); |
5783 | + loaddebug(next, 2); |
5784 | + loaddebug(next, 3); |
5785 | + /* no 4 and 5 */ |
5786 | + loaddebug(next, 6); |
5787 | + loaddebug(next, 7); |
5788 | + } |
5789 | +} |
5790 | + |
5791 | +/* |
5792 | * switch_to(x,y) should switch tasks from x to y. |
5793 | * |
5794 | * This could still be optimized: |
5795 | @@ -495,6 +526,10 @@ |
5796 | struct physdev_set_iobitmap iobmp_op; |
5797 | multicall_entry_t _mcl[8], *mcl = _mcl; |
5798 | |
5799 | + /* we're going to use this soon, after a few expensive things */ |
5800 | + if (next_p->fpu_counter>5) |
5801 | + prefetch(&next->i387.fxsave); |
5802 | + |
5803 | /* |
5804 | * This is basically '__unlazy_fpu', except that we queue a |
5805 | * multicall to indicate FPU task switch, rather than |
5806 | @@ -507,7 +542,8 @@ |
5807 | mcl->op = __HYPERVISOR_fpu_taskswitch; |
5808 | mcl->args[0] = 1; |
5809 | mcl++; |
5810 | - } |
5811 | + } else |
5812 | + prev_p->fpu_counter = 0; |
5813 | |
5814 | /* |
5815 | * Reload esp0, LDT and the page table pointer: |
5816 | @@ -587,21 +623,29 @@ |
5817 | write_pda(oldrsp, next->userrsp); |
5818 | write_pda(pcurrent, next_p); |
5819 | write_pda(kernelstack, |
5820 | - task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); |
5821 | + (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); |
5822 | +#ifdef CONFIG_CC_STACKPROTECTOR |
5823 | + write_pda(stack_canary, next_p->stack_canary); |
5824 | + |
5825 | + /* |
5826 | + * Build time only check to make sure the stack_canary is at |
5827 | + * offset 40 in the pda; this is a gcc ABI requirement |
5828 | + */ |
5829 | + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); |
5830 | +#endif |
5831 | |
5832 | /* |
5833 | * Now maybe reload the debug registers |
5834 | */ |
5835 | - if (unlikely(next->debugreg7)) { |
5836 | - set_debugreg(next->debugreg0, 0); |
5837 | - set_debugreg(next->debugreg1, 1); |
5838 | - set_debugreg(next->debugreg2, 2); |
5839 | - set_debugreg(next->debugreg3, 3); |
5840 | - /* no 4 and 5 */ |
5841 | - set_debugreg(next->debugreg6, 6); |
5842 | - set_debugreg(next->debugreg7, 7); |
5843 | - } |
5844 | + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) |
5845 | + __switch_to_xtra(prev_p, next_p); |
5846 | |
5847 | + /* If the task has used fpu the last 5 timeslices, just do a full |
5848 | + * restore of the math state immediately to avoid the trap; the |
5849 | + * chances of needing FPU soon are obviously high now |
5850 | + */ |
5851 | + if (next_p->fpu_counter>5) |
5852 | + math_state_restore(); |
5853 | return prev_p; |
5854 | } |
5855 | |
5856 | @@ -821,7 +865,7 @@ |
5857 | |
5858 | unsigned long arch_align_stack(unsigned long sp) |
5859 | { |
5860 | - if (randomize_va_space) |
5861 | + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) |
5862 | sp -= get_random_int() % 8192; |
5863 | return sp & ~0xf; |
5864 | } |
5865 | --- a/arch/x86/kernel/setup64-xen.c |
5866 | +++ b/arch/x86/kernel/setup64-xen.c |
5867 | @@ -31,7 +31,7 @@ |
5868 | #include <asm/hypervisor.h> |
5869 | #endif |
5870 | |
5871 | -char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; |
5872 | +char x86_boot_params[BOOT_PARAM_SIZE] __initdata; |
5873 | |
5874 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
5875 | |
5876 | @@ -55,8 +55,10 @@ |
5877 | on Enable(default) |
5878 | off Disable |
5879 | */ |
5880 | -int __init nonx_setup(char *str) |
5881 | +static int __init nonx_setup(char *str) |
5882 | { |
5883 | + if (!str) |
5884 | + return -EINVAL; |
5885 | if (!strncmp(str, "on", 2)) { |
5886 | __supported_pte_mask |= _PAGE_NX; |
5887 | do_not_nx = 0; |
5888 | @@ -64,9 +66,9 @@ |
5889 | do_not_nx = 1; |
5890 | __supported_pte_mask &= ~_PAGE_NX; |
5891 | } |
5892 | - return 1; |
5893 | + return 0; |
5894 | } |
5895 | -__setup("noexec=", nonx_setup); /* parsed early actually */ |
5896 | +early_param("noexec", nonx_setup); |
5897 | |
5898 | int force_personality32 = 0; |
5899 | |
5900 | @@ -102,12 +104,9 @@ |
5901 | #endif |
5902 | |
5903 | /* Copy section for each CPU (we discard the original) */ |
5904 | - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); |
5905 | -#ifdef CONFIG_MODULES |
5906 | - if (size < PERCPU_ENOUGH_ROOM) |
5907 | - size = PERCPU_ENOUGH_ROOM; |
5908 | -#endif |
5909 | + size = PERCPU_ENOUGH_ROOM; |
5910 | |
5911 | + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); |
5912 | for_each_cpu_mask (i, cpu_possible_map) { |
5913 | char *ptr; |
5914 | |
5915 | @@ -169,7 +168,10 @@ |
5916 | /* Setup up data that may be needed in __get_free_pages early */ |
5917 | asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); |
5918 | #ifndef CONFIG_XEN |
5919 | + /* Memory clobbers used to order PDA accessed */ |
5920 | + mb(); |
5921 | wrmsrl(MSR_GS_BASE, pda); |
5922 | + mb(); |
5923 | #else |
5924 | if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, |
5925 | (unsigned long)pda)) |
5926 | @@ -302,28 +304,17 @@ |
5927 | * set up and load the per-CPU TSS |
5928 | */ |
5929 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
5930 | + static const unsigned int order[N_EXCEPTION_STACKS] = { |
5931 | + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, |
5932 | + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER |
5933 | + }; |
5934 | if (cpu) { |
5935 | - static const unsigned int order[N_EXCEPTION_STACKS] = { |
5936 | - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, |
5937 | - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER |
5938 | - }; |
5939 | - |
5940 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); |
5941 | if (!estacks) |
5942 | panic("Cannot allocate exception stack %ld %d\n", |
5943 | v, cpu); |
5944 | } |
5945 | - switch (v + 1) { |
5946 | -#if DEBUG_STKSZ > EXCEPTION_STKSZ |
5947 | - case DEBUG_STACK: |
5948 | - cpu_pda(cpu)->debugstack = (unsigned long)estacks; |
5949 | - estacks += DEBUG_STKSZ; |
5950 | - break; |
5951 | -#endif |
5952 | - default: |
5953 | - estacks += EXCEPTION_STKSZ; |
5954 | - break; |
5955 | - } |
5956 | + estacks += PAGE_SIZE << order[v]; |
5957 | orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; |
5958 | } |
5959 | |
5960 | --- a/arch/x86/kernel/setup_32-xen.c |
5961 | +++ b/arch/x86/kernel/setup_32-xen.c |
5962 | @@ -56,6 +56,7 @@ |
5963 | #include <asm/apic.h> |
5964 | #include <asm/e820.h> |
5965 | #include <asm/mpspec.h> |
5966 | +#include <asm/mmzone.h> |
5967 | #include <asm/setup.h> |
5968 | #include <asm/arch_hooks.h> |
5969 | #include <asm/sections.h> |
5970 | @@ -105,18 +106,6 @@ |
5971 | |
5972 | unsigned long mmu_cr4_features; |
5973 | |
5974 | -#ifdef CONFIG_ACPI |
5975 | - int acpi_disabled = 0; |
5976 | -#else |
5977 | - int acpi_disabled = 1; |
5978 | -#endif |
5979 | -EXPORT_SYMBOL(acpi_disabled); |
5980 | - |
5981 | -#ifdef CONFIG_ACPI |
5982 | -int __initdata acpi_force = 0; |
5983 | -extern acpi_interrupt_flags acpi_sci_flags; |
5984 | -#endif |
5985 | - |
5986 | /* for MCA, but anyone else can use it if they want */ |
5987 | unsigned int machine_id; |
5988 | #ifdef CONFIG_MCA |
5989 | @@ -170,7 +159,6 @@ |
5990 | #endif |
5991 | |
5992 | extern void early_cpu_init(void); |
5993 | -extern void generic_apic_probe(char *); |
5994 | extern int root_mountflags; |
5995 | |
5996 | unsigned long saved_videomode; |
5997 | @@ -243,9 +231,6 @@ |
5998 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
5999 | } }; |
6000 | |
6001 | -#define ADAPTER_ROM_RESOURCES \ |
6002 | - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) |
6003 | - |
6004 | static struct resource video_rom_resource = { |
6005 | .name = "Video ROM", |
6006 | .start = 0xc0000, |
6007 | @@ -307,9 +292,6 @@ |
6008 | .flags = IORESOURCE_BUSY | IORESOURCE_IO |
6009 | } }; |
6010 | |
6011 | -#define STANDARD_IO_RESOURCES \ |
6012 | - (sizeof standard_io_resources / sizeof standard_io_resources[0]) |
6013 | - |
6014 | #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) |
6015 | |
6016 | static int __init romchecksum(unsigned char *rom, unsigned long length) |
6017 | @@ -372,7 +354,7 @@ |
6018 | } |
6019 | |
6020 | /* check for adapter roms on 2k boundaries */ |
6021 | - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { |
6022 | + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { |
6023 | rom = isa_bus_to_virt(start); |
6024 | if (!romsignature(rom)) |
6025 | continue; |
6026 | @@ -764,246 +746,152 @@ |
6027 | } |
6028 | #endif |
6029 | |
6030 | -static void __init parse_cmdline_early (char ** cmdline_p) |
6031 | +static int __initdata user_defined_memmap = 0; |
6032 | + |
6033 | +/* |
6034 | + * "mem=nopentium" disables the 4MB page tables. |
6035 | + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM |
6036 | + * to <mem>, overriding the bios size. |
6037 | + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from |
6038 | + * <start> to <start>+<mem>, overriding the bios size. |
6039 | + * |
6040 | + * HPA tells me bootloaders need to parse mem=, so no new |
6041 | + * option should be mem= [also see Documentation/i386/boot.txt] |
6042 | + */ |
6043 | +static int __init parse_mem(char *arg) |
6044 | { |
6045 | - char c = ' ', *to = command_line, *from = saved_command_line; |
6046 | - int len = 0, max_cmdline; |
6047 | - int userdef = 0; |
6048 | - |
6049 | - if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) |
6050 | - max_cmdline = COMMAND_LINE_SIZE; |
6051 | - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); |
6052 | - /* Save unparsed command line copy for /proc/cmdline */ |
6053 | - saved_command_line[max_cmdline-1] = '\0'; |
6054 | - |
6055 | - for (;;) { |
6056 | - if (c != ' ') |
6057 | - goto next_char; |
6058 | - /* |
6059 | - * "mem=nopentium" disables the 4MB page tables. |
6060 | - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM |
6061 | - * to <mem>, overriding the bios size. |
6062 | - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from |
6063 | - * <start> to <start>+<mem>, overriding the bios size. |
6064 | - * |
6065 | - * HPA tells me bootloaders need to parse mem=, so no new |
6066 | - * option should be mem= [also see Documentation/i386/boot.txt] |
6067 | - */ |
6068 | - if (!memcmp(from, "mem=", 4)) { |
6069 | - if (to != command_line) |
6070 | - to--; |
6071 | - if (!memcmp(from+4, "nopentium", 9)) { |
6072 | - from += 9+4; |
6073 | - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); |
6074 | - disable_pse = 1; |
6075 | - } else { |
6076 | - /* If the user specifies memory size, we |
6077 | - * limit the BIOS-provided memory map to |
6078 | - * that size. exactmap can be used to specify |
6079 | - * the exact map. mem=number can be used to |
6080 | - * trim the existing memory map. |
6081 | - */ |
6082 | - unsigned long long mem_size; |
6083 | - |
6084 | - mem_size = memparse(from+4, &from); |
6085 | - limit_regions(mem_size); |
6086 | - userdef=1; |
6087 | - } |
6088 | - } |
6089 | + if (!arg) |
6090 | + return -EINVAL; |
6091 | |
6092 | - else if (!memcmp(from, "memmap=", 7)) { |
6093 | - if (to != command_line) |
6094 | - to--; |
6095 | - if (!memcmp(from+7, "exactmap", 8)) { |
6096 | -#ifdef CONFIG_CRASH_DUMP |
6097 | - /* If we are doing a crash dump, we |
6098 | - * still need to know the real mem |
6099 | - * size before original memory map is |
6100 | - * reset. |
6101 | - */ |
6102 | - find_max_pfn(); |
6103 | - saved_max_pfn = max_pfn; |
6104 | -#endif |
6105 | - from += 8+7; |
6106 | - e820.nr_map = 0; |
6107 | - userdef = 1; |
6108 | - } else { |
6109 | - /* If the user specifies memory size, we |
6110 | - * limit the BIOS-provided memory map to |
6111 | - * that size. exactmap can be used to specify |
6112 | - * the exact map. mem=number can be used to |
6113 | - * trim the existing memory map. |
6114 | - */ |
6115 | - unsigned long long start_at, mem_size; |
6116 | + if (strcmp(arg, "nopentium") == 0) { |
6117 | + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); |
6118 | + disable_pse = 1; |
6119 | + } else { |
6120 | + /* If the user specifies memory size, we |
6121 | + * limit the BIOS-provided memory map to |
6122 | + * that size. exactmap can be used to specify |
6123 | + * the exact map. mem=number can be used to |
6124 | + * trim the existing memory map. |
6125 | + */ |
6126 | + unsigned long long mem_size; |
6127 | |
6128 | - mem_size = memparse(from+7, &from); |
6129 | - if (*from == '@') { |
6130 | - start_at = memparse(from+1, &from); |
6131 | - add_memory_region(start_at, mem_size, E820_RAM); |
6132 | - } else if (*from == '#') { |
6133 | - start_at = memparse(from+1, &from); |
6134 | - add_memory_region(start_at, mem_size, E820_ACPI); |
6135 | - } else if (*from == '$') { |
6136 | - start_at = memparse(from+1, &from); |
6137 | - add_memory_region(start_at, mem_size, E820_RESERVED); |
6138 | - } else { |
6139 | - limit_regions(mem_size); |
6140 | - userdef=1; |
6141 | - } |
6142 | - } |
6143 | - } |
6144 | - |
6145 | - else if (!memcmp(from, "noexec=", 7)) |
6146 | - noexec_setup(from + 7); |
6147 | + mem_size = memparse(arg, &arg); |
6148 | + limit_regions(mem_size); |
6149 | + user_defined_memmap = 1; |
6150 | + } |
6151 | + return 0; |
6152 | +} |
6153 | +early_param("mem", parse_mem); |
6154 | |
6155 | +static int __init parse_memmap(char *arg) |
6156 | +{ |
6157 | + if (!arg) |
6158 | + return -EINVAL; |
6159 | |
6160 | -#ifdef CONFIG_X86_MPPARSE |
6161 | - /* |
6162 | - * If the BIOS enumerates physical processors before logical, |
6163 | - * maxcpus=N at enumeration-time can be used to disable HT. |
6164 | + if (strcmp(arg, "exactmap") == 0) { |
6165 | +#ifdef CONFIG_CRASH_DUMP |
6166 | + /* If we are doing a crash dump, we |
6167 | + * still need to know the real mem |
6168 | + * size before original memory map is |
6169 | + * reset. |
6170 | */ |
6171 | - else if (!memcmp(from, "maxcpus=", 8)) { |
6172 | - extern unsigned int maxcpus; |
6173 | - |
6174 | - maxcpus = simple_strtoul(from + 8, NULL, 0); |
6175 | - } |
6176 | + find_max_pfn(); |
6177 | + saved_max_pfn = max_pfn; |
6178 | #endif |
6179 | + e820.nr_map = 0; |
6180 | + user_defined_memmap = 1; |
6181 | + } else { |
6182 | + /* If the user specifies memory size, we |
6183 | + * limit the BIOS-provided memory map to |
6184 | + * that size. exactmap can be used to specify |
6185 | + * the exact map. mem=number can be used to |
6186 | + * trim the existing memory map. |
6187 | + */ |
6188 | + unsigned long long start_at, mem_size; |
6189 | |
6190 | -#ifdef CONFIG_ACPI |
6191 | - /* "acpi=off" disables both ACPI table parsing and interpreter */ |
6192 | - else if (!memcmp(from, "acpi=off", 8)) { |
6193 | - disable_acpi(); |
6194 | - } |
6195 | - |
6196 | - /* acpi=force to over-ride black-list */ |
6197 | - else if (!memcmp(from, "acpi=force", 10)) { |
6198 | - acpi_force = 1; |
6199 | - acpi_ht = 1; |
6200 | - acpi_disabled = 0; |
6201 | - } |
6202 | - |
6203 | - /* acpi=strict disables out-of-spec workarounds */ |
6204 | - else if (!memcmp(from, "acpi=strict", 11)) { |
6205 | - acpi_strict = 1; |
6206 | - } |
6207 | - |
6208 | - /* Limit ACPI just to boot-time to enable HT */ |
6209 | - else if (!memcmp(from, "acpi=ht", 7)) { |
6210 | - if (!acpi_force) |
6211 | - disable_acpi(); |
6212 | - acpi_ht = 1; |
6213 | - } |
6214 | - |
6215 | - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */ |
6216 | - else if (!memcmp(from, "pci=noacpi", 10)) { |
6217 | - acpi_disable_pci(); |
6218 | - } |
6219 | - /* "acpi=noirq" disables ACPI interrupt routing */ |
6220 | - else if (!memcmp(from, "acpi=noirq", 10)) { |
6221 | - acpi_noirq_set(); |
6222 | + mem_size = memparse(arg, &arg); |
6223 | + if (*arg == '@') { |
6224 | + start_at = memparse(arg+1, &arg); |
6225 | + add_memory_region(start_at, mem_size, E820_RAM); |
6226 | + } else if (*arg == '#') { |
6227 | + start_at = memparse(arg+1, &arg); |
6228 | + add_memory_region(start_at, mem_size, E820_ACPI); |
6229 | + } else if (*arg == '$') { |
6230 | + start_at = memparse(arg+1, &arg); |
6231 | + add_memory_region(start_at, mem_size, E820_RESERVED); |
6232 | + } else { |
6233 | + limit_regions(mem_size); |
6234 | + user_defined_memmap = 1; |
6235 | } |
6236 | + } |
6237 | + return 0; |
6238 | +} |
6239 | +early_param("memmap", parse_memmap); |
6240 | |
6241 | - else if (!memcmp(from, "acpi_sci=edge", 13)) |
6242 | - acpi_sci_flags.trigger = 1; |
6243 | +#ifdef CONFIG_PROC_VMCORE |
6244 | +/* elfcorehdr= specifies the location of elf core header |
6245 | + * stored by the crashed kernel. |
6246 | + */ |
6247 | +static int __init parse_elfcorehdr(char *arg) |
6248 | +{ |
6249 | + if (!arg) |
6250 | + return -EINVAL; |
6251 | |
6252 | - else if (!memcmp(from, "acpi_sci=level", 14)) |
6253 | - acpi_sci_flags.trigger = 3; |
6254 | + elfcorehdr_addr = memparse(arg, &arg); |
6255 | + return 0; |
6256 | +} |
6257 | +early_param("elfcorehdr", parse_elfcorehdr); |
6258 | +#endif /* CONFIG_PROC_VMCORE */ |
6259 | |
6260 | - else if (!memcmp(from, "acpi_sci=high", 13)) |
6261 | - acpi_sci_flags.polarity = 1; |
6262 | +/* |
6263 | + * highmem=size forces highmem to be exactly 'size' bytes. |
6264 | + * This works even on boxes that have no highmem otherwise. |
6265 | + * This also works to reduce highmem size on bigger boxes. |
6266 | + */ |
6267 | +static int __init parse_highmem(char *arg) |
6268 | +{ |
6269 | + if (!arg) |
6270 | + return -EINVAL; |
6271 | |
6272 | - else if (!memcmp(from, "acpi_sci=low", 12)) |
6273 | - acpi_sci_flags.polarity = 3; |
6274 | + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; |
6275 | + return 0; |
6276 | +} |
6277 | +early_param("highmem", parse_highmem); |
6278 | |
6279 | -#ifdef CONFIG_X86_IO_APIC |
6280 | - else if (!memcmp(from, "acpi_skip_timer_override", 24)) |
6281 | - acpi_skip_timer_override = 1; |
6282 | +/* |
6283 | + * vmalloc=size forces the vmalloc area to be exactly 'size' |
6284 | + * bytes. This can be used to increase (or decrease) the |
6285 | + * vmalloc area - the default is 128m. |
6286 | + */ |
6287 | +static int __init parse_vmalloc(char *arg) |
6288 | +{ |
6289 | + if (!arg) |
6290 | + return -EINVAL; |
6291 | |
6292 | - if (!memcmp(from, "disable_timer_pin_1", 19)) |
6293 | - disable_timer_pin_1 = 1; |
6294 | - if (!memcmp(from, "enable_timer_pin_1", 18)) |
6295 | - disable_timer_pin_1 = -1; |
6296 | - |
6297 | - /* disable IO-APIC */ |
6298 | - else if (!memcmp(from, "noapic", 6)) |
6299 | - disable_ioapic_setup(); |
6300 | -#endif /* CONFIG_X86_IO_APIC */ |
6301 | -#endif /* CONFIG_ACPI */ |
6302 | - |
6303 | -#ifdef CONFIG_X86_LOCAL_APIC |
6304 | - /* enable local APIC */ |
6305 | - else if (!memcmp(from, "lapic", 5)) |
6306 | - lapic_enable(); |
6307 | - |
6308 | - /* disable local APIC */ |
6309 | - else if (!memcmp(from, "nolapic", 6)) |
6310 | - lapic_disable(); |
6311 | -#endif /* CONFIG_X86_LOCAL_APIC */ |
6312 | + __VMALLOC_RESERVE = memparse(arg, &arg); |
6313 | + return 0; |
6314 | +} |
6315 | +early_param("vmalloc", parse_vmalloc); |
6316 | |
6317 | -#ifdef CONFIG_KEXEC |
6318 | - /* crashkernel=size@addr specifies the location to reserve for |
6319 | - * a crash kernel. By reserving this memory we guarantee |
6320 | - * that linux never set's it up as a DMA target. |
6321 | - * Useful for holding code to do something appropriate |
6322 | - * after a kernel panic. |
6323 | - */ |
6324 | - else if (!memcmp(from, "crashkernel=", 12)) { |
6325 | #ifndef CONFIG_XEN |
6326 | - unsigned long size, base; |
6327 | - size = memparse(from+12, &from); |
6328 | - if (*from == '@') { |
6329 | - base = memparse(from+1, &from); |
6330 | - /* FIXME: Do I want a sanity check |
6331 | - * to validate the memory range? |
6332 | - */ |
6333 | - crashk_res.start = base; |
6334 | - crashk_res.end = base + size - 1; |
6335 | - } |
6336 | -#else |
6337 | - printk("Ignoring crashkernel command line, " |
6338 | - "parameter will be supplied by xen\n"); |
6339 | -#endif |
6340 | - } |
6341 | -#endif |
6342 | -#ifdef CONFIG_PROC_VMCORE |
6343 | - /* elfcorehdr= specifies the location of elf core header |
6344 | - * stored by the crashed kernel. |
6345 | - */ |
6346 | - else if (!memcmp(from, "elfcorehdr=", 11)) |
6347 | - elfcorehdr_addr = memparse(from+11, &from); |
6348 | -#endif |
6349 | +/* |
6350 | + * reservetop=size reserves a hole at the top of the kernel address space which |
6351 | + * a hypervisor can load into later. Needed for dynamically loaded hypervisors, |
6352 | + * so relocating the fixmap can be done before paging initialization. |
6353 | + */ |
6354 | +static int __init parse_reservetop(char *arg) |
6355 | +{ |
6356 | + unsigned long address; |
6357 | |
6358 | - /* |
6359 | - * highmem=size forces highmem to be exactly 'size' bytes. |
6360 | - * This works even on boxes that have no highmem otherwise. |
6361 | - * This also works to reduce highmem size on bigger boxes. |
6362 | - */ |
6363 | - else if (!memcmp(from, "highmem=", 8)) |
6364 | - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; |
6365 | - |
6366 | - /* |
6367 | - * vmalloc=size forces the vmalloc area to be exactly 'size' |
6368 | - * bytes. This can be used to increase (or decrease) the |
6369 | - * vmalloc area - the default is 128m. |
6370 | - */ |
6371 | - else if (!memcmp(from, "vmalloc=", 8)) |
6372 | - __VMALLOC_RESERVE = memparse(from+8, &from); |
6373 | + if (!arg) |
6374 | + return -EINVAL; |
6375 | |
6376 | - next_char: |
6377 | - c = *(from++); |
6378 | - if (!c) |
6379 | - break; |
6380 | - if (COMMAND_LINE_SIZE <= ++len) |
6381 | - break; |
6382 | - *(to++) = c; |
6383 | - } |
6384 | - *to = '\0'; |
6385 | - *cmdline_p = command_line; |
6386 | - if (userdef) { |
6387 | - printk(KERN_INFO "user-defined physical RAM map:\n"); |
6388 | - print_memory_map("user"); |
6389 | - } |
6390 | + address = memparse(arg, &arg); |
6391 | + reserve_top_address(address); |
6392 | + return 0; |
6393 | } |
6394 | +early_param("reservetop", parse_reservetop); |
6395 | +#endif |
6396 | |
6397 | /* |
6398 | * Callback for efi_memory_walk. |
6399 | @@ -1024,7 +912,7 @@ |
6400 | static int __init |
6401 | efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) |
6402 | { |
6403 | - memory_present(0, start, end); |
6404 | + memory_present(0, PFN_UP(start), PFN_DOWN(end)); |
6405 | return 0; |
6406 | } |
6407 | |
6408 | @@ -1291,6 +1179,14 @@ |
6409 | } |
6410 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
6411 | pages_to_mb(highend_pfn - highstart_pfn)); |
6412 | + num_physpages = highend_pfn; |
6413 | + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
6414 | +#else |
6415 | + num_physpages = max_low_pfn; |
6416 | + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
6417 | +#endif |
6418 | +#ifdef CONFIG_FLATMEM |
6419 | + max_mapnr = num_physpages; |
6420 | #endif |
6421 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", |
6422 | pages_to_mb(max_low_pfn)); |
6423 | @@ -1302,22 +1198,19 @@ |
6424 | |
6425 | void __init zone_sizes_init(void) |
6426 | { |
6427 | - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; |
6428 | - unsigned int max_dma, low; |
6429 | - |
6430 | - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
6431 | - low = max_low_pfn; |
6432 | - |
6433 | - if (low < max_dma) |
6434 | - zones_size[ZONE_DMA] = low; |
6435 | - else { |
6436 | - zones_size[ZONE_DMA] = max_dma; |
6437 | - zones_size[ZONE_NORMAL] = low - max_dma; |
6438 | + unsigned long max_zone_pfns[MAX_NR_ZONES]; |
6439 | + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
6440 | + max_zone_pfns[ZONE_DMA] = |
6441 | + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
6442 | + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
6443 | #ifdef CONFIG_HIGHMEM |
6444 | - zones_size[ZONE_HIGHMEM] = highend_pfn - low; |
6445 | + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
6446 | + add_active_range(0, 0, highend_pfn); |
6447 | +#else |
6448 | + add_active_range(0, 0, max_low_pfn); |
6449 | #endif |
6450 | - } |
6451 | - free_area_init(zones_size); |
6452 | + |
6453 | + free_area_init_nodes(max_zone_pfns); |
6454 | } |
6455 | #else |
6456 | extern unsigned long __init setup_memory(void); |
6457 | @@ -1374,6 +1267,7 @@ |
6458 | */ |
6459 | acpi_reserve_bootmem(); |
6460 | #endif |
6461 | + numa_kva_reserve(); |
6462 | #endif /* !CONFIG_XEN */ |
6463 | |
6464 | #ifdef CONFIG_BLK_DEV_INITRD |
6465 | @@ -1559,7 +1453,7 @@ |
6466 | request_resource(&iomem_resource, &video_ram_resource); |
6467 | |
6468 | /* request I/O space for devices used on all i[345]86 PCs */ |
6469 | - for (i = 0; i < STANDARD_IO_RESOURCES; i++) |
6470 | + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) |
6471 | request_resource(&ioport_resource, &standard_io_resources[i]); |
6472 | return 0; |
6473 | } |
6474 | @@ -1700,17 +1594,19 @@ |
6475 | data_resource.start = virt_to_phys(_etext); |
6476 | data_resource.end = virt_to_phys(_edata)-1; |
6477 | |
6478 | - parse_cmdline_early(cmdline_p); |
6479 | + if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) |
6480 | + i = COMMAND_LINE_SIZE; |
6481 | + memcpy(saved_command_line, xen_start_info->cmd_line, i); |
6482 | + saved_command_line[i - 1] = '\0'; |
6483 | + parse_early_param(); |
6484 | |
6485 | -#ifdef CONFIG_EARLY_PRINTK |
6486 | - { |
6487 | - char *s = strstr(*cmdline_p, "earlyprintk="); |
6488 | - if (s) { |
6489 | - setup_early_printk(strchr(s, '=') + 1); |
6490 | - printk("early console enabled\n"); |
6491 | - } |
6492 | + if (user_defined_memmap) { |
6493 | + printk(KERN_INFO "user-defined physical RAM map:\n"); |
6494 | + print_memory_map("user"); |
6495 | } |
6496 | -#endif |
6497 | + |
6498 | + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); |
6499 | + *cmdline_p = command_line; |
6500 | |
6501 | max_low_pfn = setup_memory(); |
6502 | |
6503 | @@ -1817,7 +1713,7 @@ |
6504 | dmi_scan_machine(); |
6505 | |
6506 | #ifdef CONFIG_X86_GENERICARCH |
6507 | - generic_apic_probe(*cmdline_p); |
6508 | + generic_apic_probe(); |
6509 | #endif |
6510 | if (efi_enabled) |
6511 | efi_map_memmap(); |
6512 | @@ -1838,9 +1734,11 @@ |
6513 | acpi_boot_table_init(); |
6514 | #endif |
6515 | |
6516 | +#ifdef CONFIG_PCI |
6517 | #ifdef CONFIG_X86_IO_APIC |
6518 | check_acpi_pci(); /* Checks more than just ACPI actually */ |
6519 | #endif |
6520 | +#endif |
6521 | |
6522 | #ifdef CONFIG_ACPI |
6523 | acpi_boot_init(); |
6524 | --- a/arch/x86/kernel/setup_64-xen.c |
6525 | +++ b/arch/x86/kernel/setup_64-xen.c |
6526 | @@ -118,16 +118,6 @@ |
6527 | |
6528 | unsigned long mmu_cr4_features; |
6529 | |
6530 | -int acpi_disabled; |
6531 | -EXPORT_SYMBOL(acpi_disabled); |
6532 | -#ifdef CONFIG_ACPI |
6533 | -extern int __initdata acpi_ht; |
6534 | -extern acpi_interrupt_flags acpi_sci_flags; |
6535 | -int __initdata acpi_force = 0; |
6536 | -#endif |
6537 | - |
6538 | -int acpi_numa __initdata; |
6539 | - |
6540 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ |
6541 | int bootloader_type; |
6542 | |
6543 | @@ -151,10 +141,6 @@ |
6544 | |
6545 | struct edid_info edid_info; |
6546 | EXPORT_SYMBOL_GPL(edid_info); |
6547 | -struct e820map e820; |
6548 | -#ifdef CONFIG_XEN |
6549 | -struct e820map machine_e820; |
6550 | -#endif |
6551 | |
6552 | extern int root_mountflags; |
6553 | |
6554 | @@ -181,9 +167,6 @@ |
6555 | .flags = IORESOURCE_BUSY | IORESOURCE_IO } |
6556 | }; |
6557 | |
6558 | -#define STANDARD_IO_RESOURCES \ |
6559 | - (sizeof standard_io_resources / sizeof standard_io_resources[0]) |
6560 | - |
6561 | #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) |
6562 | |
6563 | struct resource data_resource = { |
6564 | @@ -230,9 +213,6 @@ |
6565 | .flags = IORESOURCE_ROM } |
6566 | }; |
6567 | |
6568 | -#define ADAPTER_ROM_RESOURCES \ |
6569 | - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) |
6570 | - |
6571 | static struct resource video_rom_resource = { |
6572 | .name = "Video ROM", |
6573 | .start = 0xc0000, |
6574 | @@ -309,7 +289,8 @@ |
6575 | } |
6576 | |
6577 | /* check for adapter roms on 2k boundaries */ |
6578 | - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { |
6579 | + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; |
6580 | + start += 2048) { |
6581 | rom = isa_bus_to_virt(start); |
6582 | if (!romsignature(rom)) |
6583 | continue; |
6584 | @@ -329,187 +310,22 @@ |
6585 | } |
6586 | } |
6587 | |
6588 | -/* Check for full argument with no trailing characters */ |
6589 | -static int fullarg(char *p, char *arg) |
6590 | +#ifdef CONFIG_PROC_VMCORE |
6591 | +/* elfcorehdr= specifies the location of elf core header |
6592 | + * stored by the crashed kernel. This option will be passed |
6593 | + * by kexec loader to the capture kernel. |
6594 | + */ |
6595 | +static int __init setup_elfcorehdr(char *arg) |
6596 | { |
6597 | - int l = strlen(arg); |
6598 | - return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l])); |
6599 | + char *end; |
6600 | + if (!arg) |
6601 | + return -EINVAL; |
6602 | + elfcorehdr_addr = memparse(arg, &end); |
6603 | + return end > arg ? 0 : -EINVAL; |
6604 | } |
6605 | - |
6606 | -static __init void parse_cmdline_early (char ** cmdline_p) |
6607 | -{ |
6608 | - char c = ' ', *to = command_line, *from = COMMAND_LINE; |
6609 | - int len = 0; |
6610 | - int userdef = 0; |
6611 | - |
6612 | - for (;;) { |
6613 | - if (c != ' ') |
6614 | - goto next_char; |
6615 | - |
6616 | -#ifdef CONFIG_SMP |
6617 | - /* |
6618 | - * If the BIOS enumerates physical processors before logical, |
6619 | - * maxcpus=N at enumeration-time can be used to disable HT. |
6620 | - */ |
6621 | - else if (!memcmp(from, "maxcpus=", 8)) { |
6622 | - extern unsigned int maxcpus; |
6623 | - |
6624 | - maxcpus = simple_strtoul(from + 8, NULL, 0); |
6625 | - } |
6626 | -#endif |
6627 | -#ifdef CONFIG_ACPI |
6628 | - /* "acpi=off" disables both ACPI table parsing and interpreter init */ |
6629 | - if (fullarg(from,"acpi=off")) |
6630 | - disable_acpi(); |
6631 | - |
6632 | - if (fullarg(from, "acpi=force")) { |
6633 | - /* add later when we do DMI horrors: */ |
6634 | - acpi_force = 1; |
6635 | - acpi_disabled = 0; |
6636 | - } |
6637 | - |
6638 | - /* acpi=ht just means: do ACPI MADT parsing |
6639 | - at bootup, but don't enable the full ACPI interpreter */ |
6640 | - if (fullarg(from, "acpi=ht")) { |
6641 | - if (!acpi_force) |
6642 | - disable_acpi(); |
6643 | - acpi_ht = 1; |
6644 | - } |
6645 | - else if (fullarg(from, "pci=noacpi")) |
6646 | - acpi_disable_pci(); |
6647 | - else if (fullarg(from, "acpi=noirq")) |
6648 | - acpi_noirq_set(); |
6649 | - |
6650 | - else if (fullarg(from, "acpi_sci=edge")) |
6651 | - acpi_sci_flags.trigger = 1; |
6652 | - else if (fullarg(from, "acpi_sci=level")) |
6653 | - acpi_sci_flags.trigger = 3; |
6654 | - else if (fullarg(from, "acpi_sci=high")) |
6655 | - acpi_sci_flags.polarity = 1; |
6656 | - else if (fullarg(from, "acpi_sci=low")) |
6657 | - acpi_sci_flags.polarity = 3; |
6658 | - |
6659 | - /* acpi=strict disables out-of-spec workarounds */ |
6660 | - else if (fullarg(from, "acpi=strict")) { |
6661 | - acpi_strict = 1; |
6662 | - } |
6663 | -#ifdef CONFIG_X86_IO_APIC |
6664 | - else if (fullarg(from, "acpi_skip_timer_override")) |
6665 | - acpi_skip_timer_override = 1; |
6666 | -#endif |
6667 | -#endif |
6668 | - |
6669 | -#ifndef CONFIG_XEN |
6670 | - if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) { |
6671 | - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); |
6672 | - disable_apic = 1; |
6673 | - } |
6674 | - |
6675 | - if (fullarg(from, "noapic")) |
6676 | - skip_ioapic_setup = 1; |
6677 | - |
6678 | - if (fullarg(from,"apic")) { |
6679 | - skip_ioapic_setup = 0; |
6680 | - ioapic_force = 1; |
6681 | - } |
6682 | -#endif |
6683 | - |
6684 | - if (!memcmp(from, "mem=", 4)) |
6685 | - parse_memopt(from+4, &from); |
6686 | - |
6687 | - if (!memcmp(from, "memmap=", 7)) { |
6688 | - /* exactmap option is for used defined memory */ |
6689 | - if (!memcmp(from+7, "exactmap", 8)) { |
6690 | -#ifdef CONFIG_CRASH_DUMP |
6691 | - /* If we are doing a crash dump, we |
6692 | - * still need to know the real mem |
6693 | - * size before original memory map is |
6694 | - * reset. |
6695 | - */ |
6696 | - saved_max_pfn = e820_end_of_ram(); |
6697 | -#endif |
6698 | - from += 8+7; |
6699 | - end_pfn_map = 0; |
6700 | - e820.nr_map = 0; |
6701 | - userdef = 1; |
6702 | - } |
6703 | - else { |
6704 | - parse_memmapopt(from+7, &from); |
6705 | - userdef = 1; |
6706 | - } |
6707 | - } |
6708 | - |
6709 | -#ifdef CONFIG_NUMA |
6710 | - if (!memcmp(from, "numa=", 5)) |
6711 | - numa_setup(from+5); |
6712 | +early_param("elfcorehdr", setup_elfcorehdr); |
6713 | #endif |
6714 | |
6715 | - if (!memcmp(from,"iommu=",6)) { |
6716 | - iommu_setup(from+6); |
6717 | - } |
6718 | - |
6719 | - if (fullarg(from,"oops=panic")) |
6720 | - panic_on_oops = 1; |
6721 | - |
6722 | - if (!memcmp(from, "noexec=", 7)) |
6723 | - nonx_setup(from + 7); |
6724 | - |
6725 | -#ifdef CONFIG_KEXEC |
6726 | - /* crashkernel=size@addr specifies the location to reserve for |
6727 | - * a crash kernel. By reserving this memory we guarantee |
6728 | - * that linux never set's it up as a DMA target. |
6729 | - * Useful for holding code to do something appropriate |
6730 | - * after a kernel panic. |
6731 | - */ |
6732 | - else if (!memcmp(from, "crashkernel=", 12)) { |
6733 | -#ifndef CONFIG_XEN |
6734 | - unsigned long size, base; |
6735 | - size = memparse(from+12, &from); |
6736 | - if (*from == '@') { |
6737 | - base = memparse(from+1, &from); |
6738 | - /* FIXME: Do I want a sanity check |
6739 | - * to validate the memory range? |
6740 | - */ |
6741 | - crashk_res.start = base; |
6742 | - crashk_res.end = base + size - 1; |
6743 | - } |
6744 | -#else |
6745 | - printk("Ignoring crashkernel command line, " |
6746 | - "parameter will be supplied by xen\n"); |
6747 | -#endif |
6748 | - } |
6749 | -#endif |
6750 | - |
6751 | -#ifdef CONFIG_PROC_VMCORE |
6752 | - /* elfcorehdr= specifies the location of elf core header |
6753 | - * stored by the crashed kernel. This option will be passed |
6754 | - * by kexec loader to the capture kernel. |
6755 | - */ |
6756 | - else if(!memcmp(from, "elfcorehdr=", 11)) |
6757 | - elfcorehdr_addr = memparse(from+11, &from); |
6758 | -#endif |
6759 | - |
6760 | -#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN) |
6761 | - else if (!memcmp(from, "additional_cpus=", 16)) |
6762 | - setup_additional_cpus(from+16); |
6763 | -#endif |
6764 | - |
6765 | - next_char: |
6766 | - c = *(from++); |
6767 | - if (!c) |
6768 | - break; |
6769 | - if (COMMAND_LINE_SIZE <= ++len) |
6770 | - break; |
6771 | - *(to++) = c; |
6772 | - } |
6773 | - if (userdef) { |
6774 | - printk(KERN_INFO "user-defined physical RAM map:\n"); |
6775 | - e820_print_map("user"); |
6776 | - } |
6777 | - *to = '\0'; |
6778 | - *cmdline_p = command_line; |
6779 | -} |
6780 | - |
6781 | #ifndef CONFIG_NUMA |
6782 | static void __init |
6783 | contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) |
6784 | @@ -521,10 +337,11 @@ |
6785 | if (bootmap == -1L) |
6786 | panic("Cannot find bootmem map of size %ld\n",bootmap_size); |
6787 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); |
6788 | + e820_register_active_regions(0, start_pfn, end_pfn); |
6789 | #ifdef CONFIG_XEN |
6790 | - e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT); |
6791 | + free_bootmem_with_active_regions(0, xen_start_info->nr_pages); |
6792 | #else |
6793 | - e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT); |
6794 | + free_bootmem_with_active_regions(0, end_pfn); |
6795 | #endif |
6796 | reserve_bootmem(bootmap, bootmap_size); |
6797 | } |
6798 | @@ -587,6 +404,10 @@ |
6799 | void __init setup_arch(char **cmdline_p) |
6800 | { |
6801 | #ifdef CONFIG_XEN |
6802 | + extern struct e820map machine_e820; |
6803 | + |
6804 | + printk(KERN_INFO "Command line: %s\n", saved_command_line); |
6805 | + |
6806 | /* Register a call for panic conditions. */ |
6807 | atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); |
6808 | |
6809 | @@ -612,6 +433,8 @@ |
6810 | |
6811 | ARCH_SETUP |
6812 | #else |
6813 | + printk(KERN_INFO "Command line: %s\n", saved_command_line); |
6814 | + |
6815 | ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); |
6816 | screen_info = SCREEN_INFO; |
6817 | edid_info = EDID_INFO; |
6818 | @@ -639,16 +462,22 @@ |
6819 | data_resource.start = virt_to_phys(&_etext); |
6820 | data_resource.end = virt_to_phys(&_edata)-1; |
6821 | |
6822 | - parse_cmdline_early(cmdline_p); |
6823 | - |
6824 | early_identify_cpu(&boot_cpu_data); |
6825 | |
6826 | + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); |
6827 | + *cmdline_p = command_line; |
6828 | + |
6829 | + parse_early_param(); |
6830 | + |
6831 | + finish_e820_parsing(); |
6832 | + |
6833 | + e820_register_active_regions(0, 0, -1UL); |
6834 | /* |
6835 | * partially used pages are not usable - thus |
6836 | * we are rounding upwards: |
6837 | */ |
6838 | end_pfn = e820_end_of_ram(); |
6839 | - num_physpages = end_pfn; /* for pfn_valid */ |
6840 | + num_physpages = end_pfn; |
6841 | |
6842 | check_efer(); |
6843 | |
6844 | @@ -659,6 +488,14 @@ |
6845 | if (is_initial_xendomain()) |
6846 | dmi_scan_machine(); |
6847 | |
6848 | + /* How many end-of-memory variables you have, grandma! */ |
6849 | + max_low_pfn = end_pfn; |
6850 | + max_pfn = end_pfn; |
6851 | + high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1; |
6852 | + |
6853 | + /* Remove active ranges so rediscovery with NUMA-awareness happens */ |
6854 | + remove_all_active_ranges(); |
6855 | + |
6856 | #ifdef CONFIG_ACPI_NUMA |
6857 | /* |
6858 | * Parse SRAT to discover nodes. |
6859 | @@ -848,16 +685,16 @@ |
6860 | BUG(); |
6861 | } |
6862 | |
6863 | +#ifdef CONFIG_ACPI |
6864 | if (!is_initial_xendomain()) { |
6865 | acpi_disabled = 1; |
6866 | -#ifdef CONFIG_ACPI |
6867 | acpi_ht = 0; |
6868 | -#endif |
6869 | } |
6870 | #endif |
6871 | +#endif |
6872 | |
6873 | -#ifndef CONFIG_XEN |
6874 | - check_ioapic(); |
6875 | +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) |
6876 | + early_quirks(); |
6877 | #endif |
6878 | |
6879 | zap_low_mappings(0); |
6880 | @@ -917,6 +754,7 @@ |
6881 | } |
6882 | #else |
6883 | e820_reserve_resources(e820.map, e820.nr_map); |
6884 | + e820_mark_nosave_regions(); |
6885 | #endif |
6886 | |
6887 | request_resource(&iomem_resource, &video_ram_resource); |
6888 | @@ -924,7 +762,7 @@ |
6889 | { |
6890 | unsigned i; |
6891 | /* request I/O space for devices used on all i[345]86 PCs */ |
6892 | - for (i = 0; i < STANDARD_IO_RESOURCES; i++) |
6893 | + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) |
6894 | request_resource(&ioport_resource, &standard_io_resources[i]); |
6895 | } |
6896 | |
6897 | @@ -1108,7 +946,7 @@ |
6898 | #endif |
6899 | } |
6900 | |
6901 | -static void __init init_amd(struct cpuinfo_x86 *c) |
6902 | +static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
6903 | { |
6904 | unsigned level; |
6905 | |
6906 | @@ -1164,6 +1002,12 @@ |
6907 | |
6908 | /* Fix cpuid4 emulation for more */ |
6909 | num_cache_leaves = 3; |
6910 | + |
6911 | + /* When there is only one core no need to synchronize RDTSC */ |
6912 | + if (num_possible_cpus() == 1) |
6913 | + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
6914 | + else |
6915 | + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
6916 | } |
6917 | |
6918 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
6919 | @@ -1245,8 +1089,7 @@ |
6920 | node = first_node(node_online_map); |
6921 | numa_set_node(cpu, node); |
6922 | |
6923 | - if (acpi_numa > 0) |
6924 | - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); |
6925 | + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); |
6926 | #endif |
6927 | } |
6928 | |
6929 | @@ -1280,6 +1123,8 @@ |
6930 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
6931 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
6932 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
6933 | + if (c->x86 == 6) |
6934 | + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); |
6935 | set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
6936 | c->x86_max_cores = intel_num_cpu_cores(c); |
6937 | |
6938 | @@ -1498,8 +1343,8 @@ |
6939 | |
6940 | /* Intel-defined (#2) */ |
6941 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", |
6942 | - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, |
6943 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
6944 | + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, |
6945 | + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, |
6946 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
6947 | |
6948 | /* VIA/Cyrix/Centaur-defined */ |
6949 | --- a/arch/x86/kernel/smp_32-xen.c |
6950 | +++ b/arch/x86/kernel/smp_32-xen.c |
6951 | @@ -279,8 +279,7 @@ |
6952 | * 2) Leave the mm if we are in the lazy tlb mode. |
6953 | */ |
6954 | |
6955 | -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id, |
6956 | - struct pt_regs *regs) |
6957 | +irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id) |
6958 | { |
6959 | unsigned long cpu; |
6960 | |
6961 | @@ -567,16 +566,14 @@ |
6962 | * all the work is done automatically when |
6963 | * we return from the interrupt. |
6964 | */ |
6965 | -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id, |
6966 | - struct pt_regs *regs) |
6967 | +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) |
6968 | { |
6969 | |
6970 | return IRQ_HANDLED; |
6971 | } |
6972 | |
6973 | #include <linux/kallsyms.h> |
6974 | -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id, |
6975 | - struct pt_regs *regs) |
6976 | +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id) |
6977 | { |
6978 | void (*func) (void *info) = call_data->func; |
6979 | void *info = call_data->info; |
6980 | @@ -603,3 +600,69 @@ |
6981 | return IRQ_HANDLED; |
6982 | } |
6983 | |
6984 | +/* |
6985 | + * this function sends a 'generic call function' IPI to one other CPU |
6986 | + * in the system. |
6987 | + * |
6988 | + * cpu is a standard Linux logical CPU number. |
6989 | + */ |
6990 | +static void |
6991 | +__smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
6992 | + int nonatomic, int wait) |
6993 | +{ |
6994 | + struct call_data_struct data; |
6995 | + int cpus = 1; |
6996 | + |
6997 | + data.func = func; |
6998 | + data.info = info; |
6999 | + atomic_set(&data.started, 0); |
7000 | + data.wait = wait; |
7001 | + if (wait) |
7002 | + atomic_set(&data.finished, 0); |
7003 | + |
7004 | + call_data = &data; |
7005 | + wmb(); |
7006 | + /* Send a message to all other CPUs and wait for them to respond */ |
7007 | + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); |
7008 | + |
7009 | + /* Wait for response */ |
7010 | + while (atomic_read(&data.started) != cpus) |
7011 | + cpu_relax(); |
7012 | + |
7013 | + if (!wait) |
7014 | + return; |
7015 | + |
7016 | + while (atomic_read(&data.finished) != cpus) |
7017 | + cpu_relax(); |
7018 | +} |
7019 | + |
7020 | +/* |
7021 | + * smp_call_function_single - Run a function on another CPU |
7022 | + * @func: The function to run. This must be fast and non-blocking. |
7023 | + * @info: An arbitrary pointer to pass to the function. |
7024 | + * @nonatomic: Currently unused. |
7025 | + * @wait: If true, wait until function has completed on other CPUs. |
7026 | + * |
7027 | + * Retrurns 0 on success, else a negative status code. |
7028 | + * |
7029 | + * Does not return until the remote CPU is nearly ready to execute <func> |
7030 | + * or is or has executed. |
7031 | + */ |
7032 | + |
7033 | +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
7034 | + int nonatomic, int wait) |
7035 | +{ |
7036 | + /* prevent preemption and reschedule on another processor */ |
7037 | + int me = get_cpu(); |
7038 | + if (cpu == me) { |
7039 | + WARN_ON(1); |
7040 | + put_cpu(); |
7041 | + return -EBUSY; |
7042 | + } |
7043 | + spin_lock_bh(&call_lock); |
7044 | + __smp_call_function_single(cpu, func, info, nonatomic, wait); |
7045 | + spin_unlock_bh(&call_lock); |
7046 | + put_cpu(); |
7047 | + return 0; |
7048 | +} |
7049 | +EXPORT_SYMBOL(smp_call_function_single); |
7050 | --- a/arch/x86/kernel/smp_64-xen.c |
7051 | +++ b/arch/x86/kernel/smp_64-xen.c |
7052 | @@ -381,9 +381,8 @@ |
7053 | /* prevent preemption and reschedule on another processor */ |
7054 | int me = get_cpu(); |
7055 | if (cpu == me) { |
7056 | - WARN_ON(1); |
7057 | put_cpu(); |
7058 | - return -EBUSY; |
7059 | + return 0; |
7060 | } |
7061 | spin_lock_bh(&call_lock); |
7062 | __smp_call_function_single(cpu, func, info, nonatomic, wait); |
7063 | @@ -501,7 +500,7 @@ |
7064 | #ifndef CONFIG_XEN |
7065 | asmlinkage void smp_reschedule_interrupt(void) |
7066 | #else |
7067 | -asmlinkage irqreturn_t smp_reschedule_interrupt(void) |
7068 | +asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx) |
7069 | #endif |
7070 | { |
7071 | #ifndef CONFIG_XEN |
7072 | @@ -514,7 +513,7 @@ |
7073 | #ifndef CONFIG_XEN |
7074 | asmlinkage void smp_call_function_interrupt(void) |
7075 | #else |
7076 | -asmlinkage irqreturn_t smp_call_function_interrupt(void) |
7077 | +asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx) |
7078 | #endif |
7079 | { |
7080 | void (*func) (void *info) = call_data->func; |
7081 | @@ -545,31 +544,3 @@ |
7082 | return IRQ_HANDLED; |
7083 | #endif |
7084 | } |
7085 | - |
7086 | -int safe_smp_processor_id(void) |
7087 | -{ |
7088 | -#ifdef CONFIG_XEN |
7089 | - return smp_processor_id(); |
7090 | -#else |
7091 | - unsigned apicid, i; |
7092 | - |
7093 | - if (disable_apic) |
7094 | - return 0; |
7095 | - |
7096 | - apicid = hard_smp_processor_id(); |
7097 | - if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid) |
7098 | - return apicid; |
7099 | - |
7100 | - for (i = 0; i < NR_CPUS; ++i) { |
7101 | - if (x86_cpu_to_apicid[i] == apicid) |
7102 | - return i; |
7103 | - } |
7104 | - |
7105 | - /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI, |
7106 | - * or called too early. Either way, we must be CPU 0. */ |
7107 | - if (x86_cpu_to_apicid[0] == BAD_APICID) |
7108 | - return 0; |
7109 | - |
7110 | - return 0; /* Should not happen */ |
7111 | -#endif |
7112 | -} |
7113 | --- a/arch/x86/kernel/time_32-xen.c |
7114 | +++ b/arch/x86/kernel/time_32-xen.c |
7115 | @@ -89,7 +89,6 @@ |
7116 | unsigned long vxtime_hz = PIT_TICK_RATE; |
7117 | struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ |
7118 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; |
7119 | -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; |
7120 | struct timespec __xtime __section_xtime; |
7121 | struct timezone __sys_tz __section_sys_tz; |
7122 | #endif |
7123 | @@ -97,8 +96,6 @@ |
7124 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ |
7125 | EXPORT_SYMBOL(cpu_khz); |
7126 | |
7127 | -extern unsigned long wall_jiffies; |
7128 | - |
7129 | DEFINE_SPINLOCK(rtc_lock); |
7130 | EXPORT_SYMBOL(rtc_lock); |
7131 | |
7132 | @@ -265,11 +262,10 @@ |
7133 | time_t wtm_sec, xtime_sec; |
7134 | u64 tmp, wc_nsec; |
7135 | |
7136 | - /* Adjust wall-clock time base based on wall_jiffies ticks. */ |
7137 | + /* Adjust wall-clock time base. */ |
7138 | wc_nsec = processed_system_time; |
7139 | wc_nsec += sec * (u64)NSEC_PER_SEC; |
7140 | wc_nsec += nsec; |
7141 | - wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK; |
7142 | |
7143 | /* Split wallclock base into seconds and nanoseconds. */ |
7144 | tmp = wc_nsec; |
7145 | @@ -387,16 +383,10 @@ |
7146 | shadow = &per_cpu(shadow_time, cpu); |
7147 | |
7148 | do { |
7149 | - unsigned long lost; |
7150 | - |
7151 | local_time_version = shadow->version; |
7152 | seq = read_seqbegin(&xtime_lock); |
7153 | |
7154 | usec = get_usec_offset(shadow); |
7155 | - lost = jiffies - wall_jiffies; |
7156 | - |
7157 | - if (unlikely(lost)) |
7158 | - usec += lost * (USEC_PER_SEC / HZ); |
7159 | |
7160 | sec = xtime.tv_sec; |
7161 | usec += (xtime.tv_nsec / NSEC_PER_USEC); |
7162 | @@ -519,7 +509,7 @@ |
7163 | write_seqlock_irq(&xtime_lock); |
7164 | |
7165 | sec = xtime.tv_sec; |
7166 | - nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK); |
7167 | + nsec = xtime.tv_nsec; |
7168 | __normalize_time(&sec, &nsec); |
7169 | |
7170 | op.cmd = XENPF_settime; |
7171 | @@ -593,42 +583,49 @@ |
7172 | } |
7173 | #endif |
7174 | |
7175 | -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) |
7176 | unsigned long profile_pc(struct pt_regs *regs) |
7177 | { |
7178 | unsigned long pc = instruction_pointer(regs); |
7179 | |
7180 | -#ifdef __x86_64__ |
7181 | - /* Assume the lock function has either no stack frame or only a single word. |
7182 | - This checks if the address on the stack looks like a kernel text address. |
7183 | - There is a small window for false hits, but in that case the tick |
7184 | - is just accounted to the spinlock function. |
7185 | - Better would be to write these functions in assembler again |
7186 | - and check exactly. */ |
7187 | +#if defined(CONFIG_SMP) || defined(__x86_64__) |
7188 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { |
7189 | - char *v = *(char **)regs->rsp; |
7190 | - if ((v >= _stext && v <= _etext) || |
7191 | - (v >= _sinittext && v <= _einittext) || |
7192 | - (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END)) |
7193 | - return (unsigned long)v; |
7194 | - return ((unsigned long *)regs->rsp)[1]; |
7195 | +# ifdef CONFIG_FRAME_POINTER |
7196 | +# ifdef __i386__ |
7197 | + return ((unsigned long *)regs->ebp)[1]; |
7198 | +# else |
7199 | + return ((unsigned long *)regs->rbp)[1]; |
7200 | +# endif |
7201 | +# else |
7202 | +# ifdef __i386__ |
7203 | + unsigned long *sp; |
7204 | + if ((regs->xcs & 2) == 0) |
7205 | + sp = (unsigned long *)®s->esp; |
7206 | + else |
7207 | + sp = (unsigned long *)regs->esp; |
7208 | +# else |
7209 | + unsigned long *sp = (unsigned long *)regs->rsp; |
7210 | +# endif |
7211 | + /* Return address is either directly at stack pointer |
7212 | + or above a saved eflags. Eflags has bits 22-31 zero, |
7213 | + kernel addresses don't. */ |
7214 | + if (sp[0] >> 22) |
7215 | + return sp[0]; |
7216 | + if (sp[1] >> 22) |
7217 | + return sp[1]; |
7218 | +# endif |
7219 | } |
7220 | -#else |
7221 | - if (!user_mode_vm(regs) && in_lock_functions(pc)) |
7222 | - return *(unsigned long *)(regs->ebp + 4); |
7223 | #endif |
7224 | |
7225 | return pc; |
7226 | } |
7227 | EXPORT_SYMBOL(profile_pc); |
7228 | -#endif |
7229 | |
7230 | /* |
7231 | * This is the same as the above, except we _also_ save the current |
7232 | * Time Stamp Counter value at the time of the timer interrupt, so that |
7233 | * we later on can estimate the time of day more exactly. |
7234 | */ |
7235 | -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) |
7236 | +irqreturn_t timer_interrupt(int irq, void *dev_id) |
7237 | { |
7238 | s64 delta, delta_cpu, stolen, blocked; |
7239 | u64 sched_time; |
7240 | @@ -686,10 +683,14 @@ |
7241 | } |
7242 | |
7243 | /* System-wide jiffy work. */ |
7244 | - while (delta >= NS_PER_TICK) { |
7245 | - delta -= NS_PER_TICK; |
7246 | - processed_system_time += NS_PER_TICK; |
7247 | - do_timer(regs); |
7248 | + if (delta >= NS_PER_TICK) { |
7249 | + do_div(delta, NS_PER_TICK); |
7250 | + processed_system_time += delta * NS_PER_TICK; |
7251 | + while (delta > HZ) { |
7252 | + do_timer(HZ); |
7253 | + delta -= HZ; |
7254 | + } |
7255 | + do_timer(delta); |
7256 | } |
7257 | |
7258 | if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { |
7259 | @@ -734,7 +735,7 @@ |
7260 | if (delta_cpu > 0) { |
7261 | do_div(delta_cpu, NS_PER_TICK); |
7262 | per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK; |
7263 | - if (user_mode_vm(regs)) |
7264 | + if (user_mode_vm(get_irq_regs())) |
7265 | account_user_time(current, (cputime_t)delta_cpu); |
7266 | else |
7267 | account_system_time(current, HARDIRQ_OFFSET, |
7268 | @@ -748,10 +749,10 @@ |
7269 | /* Local timer processing (see update_process_times()). */ |
7270 | run_local_timers(); |
7271 | if (rcu_pending(cpu)) |
7272 | - rcu_check_callbacks(cpu, user_mode_vm(regs)); |
7273 | + rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs())); |
7274 | scheduler_tick(); |
7275 | run_posix_cpu_timers(current); |
7276 | - profile_tick(CPU_PROFILING, regs); |
7277 | + profile_tick(CPU_PROFILING); |
7278 | |
7279 | return IRQ_HANDLED; |
7280 | } |
7281 | @@ -959,10 +960,11 @@ |
7282 | /* Duplicate of time_init() below, with hpet_enable part added */ |
7283 | static void __init hpet_time_init(void) |
7284 | { |
7285 | - xtime.tv_sec = get_cmos_time(); |
7286 | - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); |
7287 | - set_normalized_timespec(&wall_to_monotonic, |
7288 | - -xtime.tv_sec, -xtime.tv_nsec); |
7289 | + struct timespec ts; |
7290 | + ts.tv_sec = get_cmos_time(); |
7291 | + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); |
7292 | + |
7293 | + do_settimeofday(&ts); |
7294 | |
7295 | if ((hpet_enable() >= 0) && hpet_use_timer) { |
7296 | printk("Using HPET for base-timer\n"); |
7297 | --- a/arch/x86/kernel/traps_32-xen.c |
7298 | +++ b/arch/x86/kernel/traps_32-xen.c |
7299 | @@ -28,6 +28,7 @@ |
7300 | #include <linux/kprobes.h> |
7301 | #include <linux/kexec.h> |
7302 | #include <linux/unwind.h> |
7303 | +#include <linux/uaccess.h> |
7304 | |
7305 | #ifdef CONFIG_EISA |
7306 | #include <linux/ioport.h> |
7307 | @@ -40,7 +41,6 @@ |
7308 | |
7309 | #include <asm/processor.h> |
7310 | #include <asm/system.h> |
7311 | -#include <asm/uaccess.h> |
7312 | #include <asm/io.h> |
7313 | #include <asm/atomic.h> |
7314 | #include <asm/debugreg.h> |
7315 | @@ -51,11 +51,14 @@ |
7316 | #include <asm/smp.h> |
7317 | #include <asm/arch_hooks.h> |
7318 | #include <asm/kdebug.h> |
7319 | +#include <asm/stacktrace.h> |
7320 | |
7321 | #include <linux/module.h> |
7322 | |
7323 | #include "mach_traps.h" |
7324 | |
7325 | +int panic_on_unrecovered_nmi; |
7326 | + |
7327 | asmlinkage int system_call(void); |
7328 | |
7329 | struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, |
7330 | @@ -124,62 +127,63 @@ |
7331 | p < (void *)tinfo + THREAD_SIZE - 3; |
7332 | } |
7333 | |
7334 | -/* |
7335 | - * Print one address/symbol entries per line. |
7336 | - */ |
7337 | -static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl) |
7338 | -{ |
7339 | - printk(" [<%08lx>] ", addr); |
7340 | - |
7341 | - print_symbol("%s\n", addr); |
7342 | -} |
7343 | - |
7344 | static inline unsigned long print_context_stack(struct thread_info *tinfo, |
7345 | unsigned long *stack, unsigned long ebp, |
7346 | - char *log_lvl) |
7347 | + struct stacktrace_ops *ops, void *data) |
7348 | { |
7349 | unsigned long addr; |
7350 | |
7351 | #ifdef CONFIG_FRAME_POINTER |
7352 | while (valid_stack_ptr(tinfo, (void *)ebp)) { |
7353 | + unsigned long new_ebp; |
7354 | addr = *(unsigned long *)(ebp + 4); |
7355 | - print_addr_and_symbol(addr, log_lvl); |
7356 | + ops->address(data, addr); |
7357 | /* |
7358 | * break out of recursive entries (such as |
7359 | - * end_of_stack_stop_unwind_function): |
7360 | + * end_of_stack_stop_unwind_function). Also, |
7361 | + * we can never allow a frame pointer to |
7362 | + * move downwards! |
7363 | */ |
7364 | - if (ebp == *(unsigned long *)ebp) |
7365 | + new_ebp = *(unsigned long *)ebp; |
7366 | + if (new_ebp <= ebp) |
7367 | break; |
7368 | - ebp = *(unsigned long *)ebp; |
7369 | + ebp = new_ebp; |
7370 | } |
7371 | #else |
7372 | while (valid_stack_ptr(tinfo, stack)) { |
7373 | addr = *stack++; |
7374 | if (__kernel_text_address(addr)) |
7375 | - print_addr_and_symbol(addr, log_lvl); |
7376 | + ops->address(data, addr); |
7377 | } |
7378 | #endif |
7379 | return ebp; |
7380 | } |
7381 | |
7382 | +struct ops_and_data { |
7383 | + struct stacktrace_ops *ops; |
7384 | + void *data; |
7385 | +}; |
7386 | + |
7387 | static asmlinkage int |
7388 | -show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) |
7389 | +dump_trace_unwind(struct unwind_frame_info *info, void *data) |
7390 | { |
7391 | + struct ops_and_data *oad = (struct ops_and_data *)data; |
7392 | int n = 0; |
7393 | |
7394 | while (unwind(info) == 0 && UNW_PC(info)) { |
7395 | n++; |
7396 | - print_addr_and_symbol(UNW_PC(info), log_lvl); |
7397 | + oad->ops->address(oad->data, UNW_PC(info)); |
7398 | if (arch_unw_user_mode(info)) |
7399 | break; |
7400 | } |
7401 | return n; |
7402 | } |
7403 | |
7404 | -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
7405 | - unsigned long *stack, char *log_lvl) |
7406 | +void dump_trace(struct task_struct *task, struct pt_regs *regs, |
7407 | + unsigned long *stack, |
7408 | + struct stacktrace_ops *ops, void *data) |
7409 | { |
7410 | - unsigned long ebp; |
7411 | + unsigned long ebp = 0; |
7412 | |
7413 | if (!task) |
7414 | task = current; |
7415 | @@ -187,54 +191,116 @@ |
7416 | if (call_trace >= 0) { |
7417 | int unw_ret = 0; |
7418 | struct unwind_frame_info info; |
7419 | + struct ops_and_data oad = { .ops = ops, .data = data }; |
7420 | |
7421 | if (regs) { |
7422 | if (unwind_init_frame_info(&info, task, regs) == 0) |
7423 | - unw_ret = show_trace_unwind(&info, log_lvl); |
7424 | + unw_ret = dump_trace_unwind(&info, &oad); |
7425 | } else if (task == current) |
7426 | - unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); |
7427 | + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); |
7428 | else { |
7429 | if (unwind_init_blocked(&info, task) == 0) |
7430 | - unw_ret = show_trace_unwind(&info, log_lvl); |
7431 | + unw_ret = dump_trace_unwind(&info, &oad); |
7432 | } |
7433 | if (unw_ret > 0) { |
7434 | if (call_trace == 1 && !arch_unw_user_mode(&info)) { |
7435 | - print_symbol("DWARF2 unwinder stuck at %s\n", |
7436 | + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", |
7437 | UNW_PC(&info)); |
7438 | if (UNW_SP(&info) >= PAGE_OFFSET) { |
7439 | - printk("Leftover inexact backtrace:\n"); |
7440 | + ops->warning(data, "Leftover inexact backtrace:\n"); |
7441 | stack = (void *)UNW_SP(&info); |
7442 | + if (!stack) |
7443 | + return; |
7444 | + ebp = UNW_FP(&info); |
7445 | } else |
7446 | - printk("Full inexact backtrace again:\n"); |
7447 | + ops->warning(data, "Full inexact backtrace again:\n"); |
7448 | } else if (call_trace >= 1) |
7449 | return; |
7450 | else |
7451 | - printk("Full inexact backtrace again:\n"); |
7452 | + ops->warning(data, "Full inexact backtrace again:\n"); |
7453 | } else |
7454 | - printk("Inexact backtrace:\n"); |
7455 | + ops->warning(data, "Inexact backtrace:\n"); |
7456 | } |
7457 | - |
7458 | - if (task == current) { |
7459 | - /* Grab ebp right from our regs */ |
7460 | - asm ("movl %%ebp, %0" : "=r" (ebp) : ); |
7461 | - } else { |
7462 | - /* ebp is the last reg pushed by switch_to */ |
7463 | - ebp = *(unsigned long *) task->thread.esp; |
7464 | + if (!stack) { |
7465 | + unsigned long dummy; |
7466 | + stack = &dummy; |
7467 | + if (task && task != current) |
7468 | + stack = (unsigned long *)task->thread.esp; |
7469 | + } |
7470 | + |
7471 | +#ifdef CONFIG_FRAME_POINTER |
7472 | + if (!ebp) { |
7473 | + if (task == current) { |
7474 | + /* Grab ebp right from our regs */ |
7475 | + asm ("movl %%ebp, %0" : "=r" (ebp) : ); |
7476 | + } else { |
7477 | + /* ebp is the last reg pushed by switch_to */ |
7478 | + ebp = *(unsigned long *) task->thread.esp; |
7479 | + } |
7480 | } |
7481 | +#endif |
7482 | |
7483 | while (1) { |
7484 | struct thread_info *context; |
7485 | context = (struct thread_info *) |
7486 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
7487 | - ebp = print_context_stack(context, stack, ebp, log_lvl); |
7488 | + ebp = print_context_stack(context, stack, ebp, ops, data); |
7489 | + /* Should be after the line below, but somewhere |
7490 | + in early boot context comes out corrupted and we |
7491 | + can't reference it -AK */ |
7492 | + if (ops->stack(data, "IRQ") < 0) |
7493 | + break; |
7494 | stack = (unsigned long*)context->previous_esp; |
7495 | if (!stack) |
7496 | break; |
7497 | - printk("%s =======================\n", log_lvl); |
7498 | } |
7499 | } |
7500 | +EXPORT_SYMBOL(dump_trace); |
7501 | |
7502 | -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) |
7503 | +static void |
7504 | +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) |
7505 | +{ |
7506 | + printk(data); |
7507 | + print_symbol(msg, symbol); |
7508 | + printk("\n"); |
7509 | +} |
7510 | + |
7511 | +static void print_trace_warning(void *data, char *msg) |
7512 | +{ |
7513 | + printk("%s%s\n", (char *)data, msg); |
7514 | +} |
7515 | + |
7516 | +static int print_trace_stack(void *data, char *name) |
7517 | +{ |
7518 | + return 0; |
7519 | +} |
7520 | + |
7521 | +/* |
7522 | + * Print one address/symbol entries per line. |
7523 | + */ |
7524 | +static void print_trace_address(void *data, unsigned long addr) |
7525 | +{ |
7526 | + printk("%s [<%08lx>] ", (char *)data, addr); |
7527 | + print_symbol("%s\n", addr); |
7528 | +} |
7529 | + |
7530 | +static struct stacktrace_ops print_trace_ops = { |
7531 | + .warning = print_trace_warning, |
7532 | + .warning_symbol = print_trace_warning_symbol, |
7533 | + .stack = print_trace_stack, |
7534 | + .address = print_trace_address, |
7535 | +}; |
7536 | + |
7537 | +static void |
7538 | +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
7539 | + unsigned long * stack, char *log_lvl) |
7540 | +{ |
7541 | + dump_trace(task, regs, stack, &print_trace_ops, log_lvl); |
7542 | + printk("%s =======================\n", log_lvl); |
7543 | +} |
7544 | + |
7545 | +void show_trace(struct task_struct *task, struct pt_regs *regs, |
7546 | + unsigned long * stack) |
7547 | { |
7548 | show_trace_log_lvl(task, regs, stack, ""); |
7549 | } |
7550 | @@ -297,12 +363,13 @@ |
7551 | ss = regs->xss & 0xffff; |
7552 | } |
7553 | print_modules(); |
7554 | - printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" |
7555 | - "EFLAGS: %08lx (%s %.*s) \n", |
7556 | + printk(KERN_EMERG "CPU: %d\n" |
7557 | + KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" |
7558 | + KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", |
7559 | smp_processor_id(), 0xffff & regs->xcs, regs->eip, |
7560 | - print_tainted(), regs->eflags, system_utsname.release, |
7561 | - (int)strcspn(system_utsname.version, " "), |
7562 | - system_utsname.version); |
7563 | + print_tainted(), regs->eflags, init_utsname()->release, |
7564 | + (int)strcspn(init_utsname()->version, " "), |
7565 | + init_utsname()->version); |
7566 | print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); |
7567 | printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", |
7568 | regs->eax, regs->ebx, regs->ecx, regs->edx); |
7569 | @@ -319,6 +386,8 @@ |
7570 | */ |
7571 | if (in_kernel) { |
7572 | u8 __user *eip; |
7573 | + int code_bytes = 64; |
7574 | + unsigned char c; |
7575 | |
7576 | printk("\n" KERN_EMERG "Stack: "); |
7577 | show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); |
7578 | @@ -326,9 +395,12 @@ |
7579 | printk(KERN_EMERG "Code: "); |
7580 | |
7581 | eip = (u8 __user *)regs->eip - 43; |
7582 | - for (i = 0; i < 64; i++, eip++) { |
7583 | - unsigned char c; |
7584 | - |
7585 | + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { |
7586 | + /* try starting at EIP */ |
7587 | + eip = (u8 __user *)regs->eip; |
7588 | + code_bytes = 32; |
7589 | + } |
7590 | + for (i = 0; i < code_bytes; i++, eip++) { |
7591 | if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { |
7592 | printk(" Bad EIP value."); |
7593 | break; |
7594 | @@ -349,7 +421,7 @@ |
7595 | |
7596 | if (eip < PAGE_OFFSET) |
7597 | return; |
7598 | - if (__get_user(ud2, (unsigned short __user *)eip)) |
7599 | + if (probe_kernel_address((unsigned short __user *)eip, ud2)) |
7600 | return; |
7601 | if (ud2 != 0x0b0f) |
7602 | return; |
7603 | @@ -362,7 +434,8 @@ |
7604 | char *file; |
7605 | char c; |
7606 | |
7607 | - if (__get_user(line, (unsigned short __user *)(eip + 2))) |
7608 | + if (probe_kernel_address((unsigned short __user *)(eip + 2), |
7609 | + line)) |
7610 | break; |
7611 | if (__get_user(file, (char * __user *)(eip + 4)) || |
7612 | (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) |
7613 | @@ -604,18 +677,24 @@ |
7614 | } |
7615 | } |
7616 | |
7617 | -static void mem_parity_error(unsigned char reason, struct pt_regs * regs) |
7618 | +static __kprobes void |
7619 | +mem_parity_error(unsigned char reason, struct pt_regs * regs) |
7620 | { |
7621 | - printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " |
7622 | - "to continue\n"); |
7623 | + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " |
7624 | + "CPU %d.\n", reason, smp_processor_id()); |
7625 | printk(KERN_EMERG "You probably have a hardware problem with your RAM " |
7626 | "chips\n"); |
7627 | + if (panic_on_unrecovered_nmi) |
7628 | + panic("NMI: Not continuing"); |
7629 | + |
7630 | + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
7631 | |
7632 | /* Clear and disable the memory parity error line. */ |
7633 | clear_mem_error(reason); |
7634 | } |
7635 | |
7636 | -static void io_check_error(unsigned char reason, struct pt_regs * regs) |
7637 | +static __kprobes void |
7638 | +io_check_error(unsigned char reason, struct pt_regs * regs) |
7639 | { |
7640 | printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); |
7641 | show_registers(regs); |
7642 | @@ -624,7 +703,8 @@ |
7643 | clear_io_check_error(reason); |
7644 | } |
7645 | |
7646 | -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) |
7647 | +static __kprobes void |
7648 | +unknown_nmi_error(unsigned char reason, struct pt_regs * regs) |
7649 | { |
7650 | #ifdef CONFIG_MCA |
7651 | /* Might actually be able to figure out what the guilty party |
7652 | @@ -634,15 +714,18 @@ |
7653 | return; |
7654 | } |
7655 | #endif |
7656 | - printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", |
7657 | - reason, smp_processor_id()); |
7658 | - printk("Dazed and confused, but trying to continue\n"); |
7659 | - printk("Do you have a strange power saving mode enabled?\n"); |
7660 | + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " |
7661 | + "CPU %d.\n", reason, smp_processor_id()); |
7662 | + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); |
7663 | + if (panic_on_unrecovered_nmi) |
7664 | + panic("NMI: Not continuing"); |
7665 | + |
7666 | + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
7667 | } |
7668 | |
7669 | static DEFINE_SPINLOCK(nmi_print_lock); |
7670 | |
7671 | -void die_nmi (struct pt_regs *regs, const char *msg) |
7672 | +void __kprobes die_nmi(struct pt_regs *regs, const char *msg) |
7673 | { |
7674 | if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == |
7675 | NOTIFY_STOP) |
7676 | @@ -674,7 +757,7 @@ |
7677 | do_exit(SIGSEGV); |
7678 | } |
7679 | |
7680 | -static void default_do_nmi(struct pt_regs * regs) |
7681 | +static __kprobes void default_do_nmi(struct pt_regs * regs) |
7682 | { |
7683 | unsigned char reason = 0; |
7684 | |
7685 | @@ -691,12 +774,12 @@ |
7686 | * Ok, so this is none of the documented NMI sources, |
7687 | * so it must be the NMI watchdog. |
7688 | */ |
7689 | - if (nmi_watchdog) { |
7690 | - nmi_watchdog_tick(regs); |
7691 | + if (nmi_watchdog_tick(regs, reason)) |
7692 | return; |
7693 | - } |
7694 | + if (!do_nmi_callback(regs, smp_processor_id())) |
7695 | #endif |
7696 | - unknown_nmi_error(reason, regs); |
7697 | + unknown_nmi_error(reason, regs); |
7698 | + |
7699 | return; |
7700 | } |
7701 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) |
7702 | @@ -712,14 +795,7 @@ |
7703 | reassert_nmi(); |
7704 | } |
7705 | |
7706 | -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) |
7707 | -{ |
7708 | - return 0; |
7709 | -} |
7710 | - |
7711 | -static nmi_callback_t nmi_callback = dummy_nmi_callback; |
7712 | - |
7713 | -fastcall void do_nmi(struct pt_regs * regs, long error_code) |
7714 | +fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) |
7715 | { |
7716 | int cpu; |
7717 | |
7718 | @@ -729,25 +805,11 @@ |
7719 | |
7720 | ++nmi_count(cpu); |
7721 | |
7722 | - if (!rcu_dereference(nmi_callback)(regs, cpu)) |
7723 | - default_do_nmi(regs); |
7724 | + default_do_nmi(regs); |
7725 | |
7726 | nmi_exit(); |
7727 | } |
7728 | |
7729 | -void set_nmi_callback(nmi_callback_t callback) |
7730 | -{ |
7731 | - vmalloc_sync_all(); |
7732 | - rcu_assign_pointer(nmi_callback, callback); |
7733 | -} |
7734 | -EXPORT_SYMBOL_GPL(set_nmi_callback); |
7735 | - |
7736 | -void unset_nmi_callback(void) |
7737 | -{ |
7738 | - nmi_callback = dummy_nmi_callback; |
7739 | -} |
7740 | -EXPORT_SYMBOL_GPL(unset_nmi_callback); |
7741 | - |
7742 | #ifdef CONFIG_KPROBES |
7743 | fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) |
7744 | { |
7745 | --- a/arch/x86/kernel/traps_64-xen.c |
7746 | +++ b/arch/x86/kernel/traps_64-xen.c |
7747 | @@ -23,6 +23,7 @@ |
7748 | #include <linux/delay.h> |
7749 | #include <linux/spinlock.h> |
7750 | #include <linux/interrupt.h> |
7751 | +#include <linux/kallsyms.h> |
7752 | #include <linux/module.h> |
7753 | #include <linux/moduleparam.h> |
7754 | #include <linux/nmi.h> |
7755 | @@ -45,6 +46,7 @@ |
7756 | #include <asm/pda.h> |
7757 | #include <asm/proto.h> |
7758 | #include <asm/nmi.h> |
7759 | +#include <asm/stacktrace.h> |
7760 | |
7761 | asmlinkage void divide_error(void); |
7762 | asmlinkage void debug(void); |
7763 | @@ -114,7 +116,6 @@ |
7764 | #endif |
7765 | |
7766 | #ifdef CONFIG_KALLSYMS |
7767 | -# include <linux/kallsyms.h> |
7768 | void printk_address(unsigned long address) |
7769 | { |
7770 | unsigned long offset = 0, symsize; |
7771 | @@ -142,7 +143,7 @@ |
7772 | #endif |
7773 | |
7774 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
7775 | - unsigned *usedp, const char **idp) |
7776 | + unsigned *usedp, char **idp) |
7777 | { |
7778 | #ifndef CONFIG_X86_NO_TSS |
7779 | static char ids[][8] = { |
7780 | @@ -162,26 +163,7 @@ |
7781 | * 'stack' is in one of them: |
7782 | */ |
7783 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
7784 | - unsigned long end; |
7785 | - |
7786 | - /* |
7787 | - * set 'end' to the end of the exception stack. |
7788 | - */ |
7789 | - switch (k + 1) { |
7790 | - /* |
7791 | - * TODO: this block is not needed i think, because |
7792 | - * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK] |
7793 | - * properly too. |
7794 | - */ |
7795 | -#if DEBUG_STKSZ > EXCEPTION_STKSZ |
7796 | - case DEBUG_STACK: |
7797 | - end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; |
7798 | - break; |
7799 | -#endif |
7800 | - default: |
7801 | - end = per_cpu(orig_ist, cpu).ist[k]; |
7802 | - break; |
7803 | - } |
7804 | + unsigned long end = per_cpu(orig_ist, cpu).ist[k]; |
7805 | /* |
7806 | * Is 'stack' above this exception frame's end? |
7807 | * If yes then skip to the next frame. |
7808 | @@ -236,13 +218,19 @@ |
7809 | return NULL; |
7810 | } |
7811 | |
7812 | -static int show_trace_unwind(struct unwind_frame_info *info, void *context) |
7813 | +struct ops_and_data { |
7814 | + struct stacktrace_ops *ops; |
7815 | + void *data; |
7816 | +}; |
7817 | + |
7818 | +static int dump_trace_unwind(struct unwind_frame_info *info, void *context) |
7819 | { |
7820 | + struct ops_and_data *oad = (struct ops_and_data *)context; |
7821 | int n = 0; |
7822 | |
7823 | while (unwind(info) == 0 && UNW_PC(info)) { |
7824 | n++; |
7825 | - printk_address(UNW_PC(info)); |
7826 | + oad->ops->address(oad->data, UNW_PC(info)); |
7827 | if (arch_unw_user_mode(info)) |
7828 | break; |
7829 | } |
7830 | @@ -256,13 +244,19 @@ |
7831 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
7832 | */ |
7833 | |
7834 | -void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack) |
7835 | +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) |
7836 | { |
7837 | - const unsigned cpu = safe_smp_processor_id(); |
7838 | + void *t = (void *)tinfo; |
7839 | + return p > t && p < t + THREAD_SIZE - 3; |
7840 | +} |
7841 | + |
7842 | +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, |
7843 | + struct stacktrace_ops *ops, void *data) |
7844 | +{ |
7845 | + const unsigned cpu = smp_processor_id(); |
7846 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
7847 | unsigned used = 0; |
7848 | - |
7849 | - printk("\nCall Trace:\n"); |
7850 | + struct thread_info *tinfo; |
7851 | |
7852 | if (!tsk) |
7853 | tsk = current; |
7854 | @@ -270,32 +264,47 @@ |
7855 | if (call_trace >= 0) { |
7856 | int unw_ret = 0; |
7857 | struct unwind_frame_info info; |
7858 | + struct ops_and_data oad = { .ops = ops, .data = data }; |
7859 | |
7860 | if (regs) { |
7861 | if (unwind_init_frame_info(&info, tsk, regs) == 0) |
7862 | - unw_ret = show_trace_unwind(&info, NULL); |
7863 | + unw_ret = dump_trace_unwind(&info, &oad); |
7864 | } else if (tsk == current) |
7865 | - unw_ret = unwind_init_running(&info, show_trace_unwind, NULL); |
7866 | + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); |
7867 | else { |
7868 | if (unwind_init_blocked(&info, tsk) == 0) |
7869 | - unw_ret = show_trace_unwind(&info, NULL); |
7870 | + unw_ret = dump_trace_unwind(&info, &oad); |
7871 | } |
7872 | if (unw_ret > 0) { |
7873 | if (call_trace == 1 && !arch_unw_user_mode(&info)) { |
7874 | - print_symbol("DWARF2 unwinder stuck at %s\n", |
7875 | + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", |
7876 | UNW_PC(&info)); |
7877 | if ((long)UNW_SP(&info) < 0) { |
7878 | - printk("Leftover inexact backtrace:\n"); |
7879 | + ops->warning(data, "Leftover inexact backtrace:\n"); |
7880 | stack = (unsigned long *)UNW_SP(&info); |
7881 | + if (!stack) |
7882 | + return; |
7883 | } else |
7884 | - printk("Full inexact backtrace again:\n"); |
7885 | + ops->warning(data, "Full inexact backtrace again:\n"); |
7886 | } else if (call_trace >= 1) |
7887 | return; |
7888 | else |
7889 | - printk("Full inexact backtrace again:\n"); |
7890 | + ops->warning(data, "Full inexact backtrace again:\n"); |
7891 | } else |
7892 | - printk("Inexact backtrace:\n"); |
7893 | + ops->warning(data, "Inexact backtrace:\n"); |
7894 | + } |
7895 | + if (!stack) { |
7896 | + unsigned long dummy; |
7897 | + stack = &dummy; |
7898 | + if (tsk && tsk != current) |
7899 | + stack = (unsigned long *)tsk->thread.rsp; |
7900 | } |
7901 | + /* |
7902 | + * Align the stack pointer on word boundary, later loops |
7903 | + * rely on that (and corruption / debug info bugs can cause |
7904 | + * unaligned values here): |
7905 | + */ |
7906 | + stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); |
7907 | |
7908 | /* |
7909 | * Print function call entries within a stack. 'cond' is the |
7910 | @@ -305,7 +314,9 @@ |
7911 | #define HANDLE_STACK(cond) \ |
7912 | do while (cond) { \ |
7913 | unsigned long addr = *stack++; \ |
7914 | - if (kernel_text_address(addr)) { \ |
7915 | + if (oops_in_progress ? \ |
7916 | + __kernel_text_address(addr) : \ |
7917 | + kernel_text_address(addr)) { \ |
7918 | /* \ |
7919 | * If the address is either in the text segment of the \ |
7920 | * kernel, or in the region which contains vmalloc'ed \ |
7921 | @@ -314,7 +325,7 @@ |
7922 | * down the cause of the crash will be able to figure \ |
7923 | * out the call path that was taken. \ |
7924 | */ \ |
7925 | - printk_address(addr); \ |
7926 | + ops->address(data, addr); \ |
7927 | } \ |
7928 | } while (0) |
7929 | |
7930 | @@ -323,16 +334,17 @@ |
7931 | * current stack address. If the stacks consist of nested |
7932 | * exceptions |
7933 | */ |
7934 | - for ( ; ; ) { |
7935 | - const char *id; |
7936 | + for (;;) { |
7937 | + char *id; |
7938 | unsigned long *estack_end; |
7939 | estack_end = in_exception_stack(cpu, (unsigned long)stack, |
7940 | &used, &id); |
7941 | |
7942 | if (estack_end) { |
7943 | - printk(" <%s>", id); |
7944 | + if (ops->stack(data, id) < 0) |
7945 | + break; |
7946 | HANDLE_STACK (stack < estack_end); |
7947 | - printk(" <EOE>"); |
7948 | + ops->stack(data, "<EOE>"); |
7949 | /* |
7950 | * We link to the next stack via the |
7951 | * second-to-last pointer (index -2 to end) in the |
7952 | @@ -347,7 +359,8 @@ |
7953 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); |
7954 | |
7955 | if (stack >= irqstack && stack < irqstack_end) { |
7956 | - printk(" <IRQ>"); |
7957 | + if (ops->stack(data, "IRQ") < 0) |
7958 | + break; |
7959 | HANDLE_STACK (stack < irqstack_end); |
7960 | /* |
7961 | * We link to the next stack (which would be |
7962 | @@ -356,7 +369,7 @@ |
7963 | */ |
7964 | stack = (unsigned long *) (irqstack_end[-1]); |
7965 | irqstack_end = NULL; |
7966 | - printk(" <EOI>"); |
7967 | + ops->stack(data, "EOI"); |
7968 | continue; |
7969 | } |
7970 | } |
7971 | @@ -364,19 +377,58 @@ |
7972 | } |
7973 | |
7974 | /* |
7975 | - * This prints the process stack: |
7976 | + * This handles the process stack: |
7977 | */ |
7978 | - HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); |
7979 | + tinfo = current_thread_info(); |
7980 | + HANDLE_STACK (valid_stack_ptr(tinfo, stack)); |
7981 | #undef HANDLE_STACK |
7982 | +} |
7983 | +EXPORT_SYMBOL(dump_trace); |
7984 | + |
7985 | +static void |
7986 | +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) |
7987 | +{ |
7988 | + print_symbol(msg, symbol); |
7989 | + printk("\n"); |
7990 | +} |
7991 | + |
7992 | +static void print_trace_warning(void *data, char *msg) |
7993 | +{ |
7994 | + printk("%s\n", msg); |
7995 | +} |
7996 | + |
7997 | +static int print_trace_stack(void *data, char *name) |
7998 | +{ |
7999 | + printk(" <%s> ", name); |
8000 | + return 0; |
8001 | +} |
8002 | + |
8003 | +static void print_trace_address(void *data, unsigned long addr) |
8004 | +{ |
8005 | + printk_address(addr); |
8006 | +} |
8007 | + |
8008 | +static struct stacktrace_ops print_trace_ops = { |
8009 | + .warning = print_trace_warning, |
8010 | + .warning_symbol = print_trace_warning_symbol, |
8011 | + .stack = print_trace_stack, |
8012 | + .address = print_trace_address, |
8013 | +}; |
8014 | |
8015 | +void |
8016 | +show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack) |
8017 | +{ |
8018 | + printk("\nCall Trace:\n"); |
8019 | + dump_trace(tsk, regs, stack, &print_trace_ops, NULL); |
8020 | printk("\n"); |
8021 | } |
8022 | |
8023 | -static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp) |
8024 | +static void |
8025 | +_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) |
8026 | { |
8027 | unsigned long *stack; |
8028 | int i; |
8029 | - const int cpu = safe_smp_processor_id(); |
8030 | + const int cpu = smp_processor_id(); |
8031 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); |
8032 | unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); |
8033 | |
8034 | @@ -430,7 +482,7 @@ |
8035 | int i; |
8036 | int in_kernel = !user_mode(regs); |
8037 | unsigned long rsp; |
8038 | - const int cpu = safe_smp_processor_id(); |
8039 | + const int cpu = smp_processor_id(); |
8040 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
8041 | |
8042 | rsp = regs->rsp; |
8043 | @@ -505,9 +557,11 @@ |
8044 | |
8045 | unsigned __kprobes long oops_begin(void) |
8046 | { |
8047 | - int cpu = safe_smp_processor_id(); |
8048 | + int cpu = smp_processor_id(); |
8049 | unsigned long flags; |
8050 | |
8051 | + oops_enter(); |
8052 | + |
8053 | /* racy, but better than risking deadlock. */ |
8054 | local_irq_save(flags); |
8055 | if (!spin_trylock(&die_lock)) { |
8056 | @@ -536,6 +590,7 @@ |
8057 | spin_unlock_irqrestore(&die_lock, flags); |
8058 | if (panic_on_oops) |
8059 | panic("Fatal exception"); |
8060 | + oops_exit(); |
8061 | } |
8062 | |
8063 | void __kprobes __die(const char * str, struct pt_regs * regs, long err) |
8064 | @@ -573,7 +628,7 @@ |
8065 | } |
8066 | |
8067 | #ifdef CONFIG_X86_LOCAL_APIC |
8068 | -void __kprobes die_nmi(char *str, struct pt_regs *regs) |
8069 | +void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) |
8070 | { |
8071 | unsigned long flags = oops_begin(); |
8072 | |
8073 | @@ -581,13 +636,12 @@ |
8074 | * We are in trouble anyway, lets at least try |
8075 | * to get a message out. |
8076 | */ |
8077 | - printk(str, safe_smp_processor_id()); |
8078 | + printk(str, smp_processor_id()); |
8079 | show_registers(regs); |
8080 | if (kexec_should_crash(current)) |
8081 | crash_kexec(regs); |
8082 | - if (panic_on_timeout || panic_on_oops) |
8083 | - panic("nmi watchdog"); |
8084 | - printk("console shuts up ...\n"); |
8085 | + if (do_panic || panic_on_oops) |
8086 | + panic("Non maskable interrupt"); |
8087 | oops_end(flags); |
8088 | nmi_exit(); |
8089 | local_irq_enable(); |
8090 | @@ -734,8 +788,15 @@ |
8091 | static __kprobes void |
8092 | mem_parity_error(unsigned char reason, struct pt_regs * regs) |
8093 | { |
8094 | - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); |
8095 | - printk("You probably have a hardware problem with your RAM chips\n"); |
8096 | + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", |
8097 | + reason); |
8098 | + printk(KERN_EMERG "You probably have a hardware problem with your " |
8099 | + "RAM chips\n"); |
8100 | + |
8101 | + if (panic_on_unrecovered_nmi) |
8102 | + panic("NMI: Not continuing"); |
8103 | + |
8104 | + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
8105 | |
8106 | #if 0 /* XEN */ |
8107 | /* Clear and disable the memory parity error line. */ |
8108 | @@ -762,9 +823,15 @@ |
8109 | |
8110 | static __kprobes void |
8111 | unknown_nmi_error(unsigned char reason, struct pt_regs * regs) |
8112 | -{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); |
8113 | - printk("Dazed and confused, but trying to continue\n"); |
8114 | - printk("Do you have a strange power saving mode enabled?\n"); |
8115 | +{ |
8116 | + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", |
8117 | + reason); |
8118 | + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); |
8119 | + |
8120 | + if (panic_on_unrecovered_nmi) |
8121 | + panic("NMI: Not continuing"); |
8122 | + |
8123 | + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
8124 | } |
8125 | |
8126 | /* Runs on IST stack. This code must keep interrupts off all the time. |
8127 | @@ -789,12 +856,12 @@ |
8128 | * Ok, so this is none of the documented NMI sources, |
8129 | * so it must be the NMI watchdog. |
8130 | */ |
8131 | - if (nmi_watchdog > 0) { |
8132 | - nmi_watchdog_tick(regs,reason); |
8133 | + if (nmi_watchdog_tick(regs,reason)) |
8134 | return; |
8135 | - } |
8136 | #endif |
8137 | - unknown_nmi_error(reason, regs); |
8138 | + if (!do_nmi_callback(regs,cpu)) |
8139 | + unknown_nmi_error(reason, regs); |
8140 | + |
8141 | return; |
8142 | } |
8143 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) |
8144 | @@ -1081,6 +1148,7 @@ |
8145 | init_fpu(me); |
8146 | restore_fpu_checking(&me->thread.i387.fxsave); |
8147 | task_thread_info(me)->status |= TS_USEDFPU; |
8148 | + me->fpu_counter++; |
8149 | } |
8150 | |
8151 | |
8152 | @@ -1141,24 +1209,30 @@ |
8153 | } |
8154 | |
8155 | |
8156 | -/* Actual parsing is done early in setup.c. */ |
8157 | -static int __init oops_dummy(char *s) |
8158 | +static int __init oops_setup(char *s) |
8159 | { |
8160 | - panic_on_oops = 1; |
8161 | - return 1; |
8162 | + if (!s) |
8163 | + return -EINVAL; |
8164 | + if (!strcmp(s, "panic")) |
8165 | + panic_on_oops = 1; |
8166 | + return 0; |
8167 | } |
8168 | -__setup("oops=", oops_dummy); |
8169 | +early_param("oops", oops_setup); |
8170 | |
8171 | static int __init kstack_setup(char *s) |
8172 | { |
8173 | + if (!s) |
8174 | + return -EINVAL; |
8175 | kstack_depth_to_print = simple_strtoul(s,NULL,0); |
8176 | - return 1; |
8177 | + return 0; |
8178 | } |
8179 | -__setup("kstack=", kstack_setup); |
8180 | +early_param("kstack", kstack_setup); |
8181 | |
8182 | #ifdef CONFIG_STACK_UNWIND |
8183 | static int __init call_trace_setup(char *s) |
8184 | { |
8185 | + if (!s) |
8186 | + return -EINVAL; |
8187 | if (strcmp(s, "old") == 0) |
8188 | call_trace = -1; |
8189 | else if (strcmp(s, "both") == 0) |
8190 | @@ -1167,7 +1241,7 @@ |
8191 | call_trace = 1; |
8192 | else if (strcmp(s, "new") == 0) |
8193 | call_trace = 2; |
8194 | - return 1; |
8195 | + return 0; |
8196 | } |
8197 | -__setup("call_trace=", call_trace_setup); |
8198 | +early_param("call_trace", call_trace_setup); |
8199 | #endif |
8200 | --- a/arch/x86/kernel/vsyscall_64-xen.c |
8201 | +++ b/arch/x86/kernel/vsyscall_64-xen.c |
8202 | @@ -26,6 +26,10 @@ |
8203 | #include <linux/seqlock.h> |
8204 | #include <linux/jiffies.h> |
8205 | #include <linux/sysctl.h> |
8206 | +#include <linux/getcpu.h> |
8207 | +#include <linux/cpu.h> |
8208 | +#include <linux/smp.h> |
8209 | +#include <linux/notifier.h> |
8210 | |
8211 | #include <asm/vsyscall.h> |
8212 | #include <asm/pgtable.h> |
8213 | @@ -33,11 +37,15 @@ |
8214 | #include <asm/fixmap.h> |
8215 | #include <asm/errno.h> |
8216 | #include <asm/io.h> |
8217 | +#include <asm/segment.h> |
8218 | +#include <asm/desc.h> |
8219 | +#include <asm/topology.h> |
8220 | |
8221 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
8222 | |
8223 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
8224 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
8225 | +int __vgetcpu_mode __section_vgetcpu_mode; |
8226 | |
8227 | #include <asm/unistd.h> |
8228 | |
8229 | @@ -61,8 +69,7 @@ |
8230 | sequence = read_seqbegin(&__xtime_lock); |
8231 | |
8232 | sec = __xtime.tv_sec; |
8233 | - usec = (__xtime.tv_nsec / 1000) + |
8234 | - (__jiffies - __wall_jiffies) * (1000000 / HZ); |
8235 | + usec = __xtime.tv_nsec / 1000; |
8236 | |
8237 | if (__vxtime.mode != VXTIME_HPET) { |
8238 | t = get_cycles_sync(); |
8239 | @@ -72,7 +79,8 @@ |
8240 | __vxtime.tsc_quot) >> 32; |
8241 | /* See comment in x86_64 do_gettimeofday. */ |
8242 | } else { |
8243 | - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - |
8244 | + usec += ((readl((void __iomem *) |
8245 | + fix_to_virt(VSYSCALL_HPET) + 0xf0) - |
8246 | __vxtime.last) * __vxtime.quot) >> 32; |
8247 | } |
8248 | } while (read_seqretry(&__xtime_lock, sequence)); |
8249 | @@ -127,9 +135,46 @@ |
8250 | return __xtime.tv_sec; |
8251 | } |
8252 | |
8253 | -long __vsyscall(2) venosys_0(void) |
8254 | -{ |
8255 | - return -ENOSYS; |
8256 | +/* Fast way to get current CPU and node. |
8257 | + This helps to do per node and per CPU caches in user space. |
8258 | + The result is not guaranteed without CPU affinity, but usually |
8259 | + works out because the scheduler tries to keep a thread on the same |
8260 | + CPU. |
8261 | + |
8262 | + tcache must point to a two element sized long array. |
8263 | + All arguments can be NULL. */ |
8264 | +long __vsyscall(2) |
8265 | +vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) |
8266 | +{ |
8267 | + unsigned int dummy, p; |
8268 | + unsigned long j = 0; |
8269 | + |
8270 | + /* Fast cache - only recompute value once per jiffies and avoid |
8271 | + relatively costly rdtscp/cpuid otherwise. |
8272 | + This works because the scheduler usually keeps the process |
8273 | + on the same CPU and this syscall doesn't guarantee its |
8274 | + results anyways. |
8275 | + We do this here because otherwise user space would do it on |
8276 | + its own in a likely inferior way (no access to jiffies). |
8277 | + If you don't like it pass NULL. */ |
8278 | + if (tcache && tcache->blob[0] == (j = __jiffies)) { |
8279 | + p = tcache->blob[1]; |
8280 | + } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { |
8281 | + /* Load per CPU data from RDTSCP */ |
8282 | + rdtscp(dummy, dummy, p); |
8283 | + } else { |
8284 | + /* Load per CPU data from GDT */ |
8285 | + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); |
8286 | + } |
8287 | + if (tcache) { |
8288 | + tcache->blob[0] = j; |
8289 | + tcache->blob[1] = p; |
8290 | + } |
8291 | + if (cpu) |
8292 | + *cpu = p & 0xfff; |
8293 | + if (node) |
8294 | + *node = p >> 12; |
8295 | + return 0; |
8296 | } |
8297 | |
8298 | long __vsyscall(3) venosys_1(void) |
8299 | @@ -149,7 +194,8 @@ |
8300 | void __user *buffer, size_t *lenp, loff_t *ppos) |
8301 | { |
8302 | extern u16 vsysc1, vsysc2; |
8303 | - u16 *map1, *map2; |
8304 | + u16 __iomem *map1; |
8305 | + u16 __iomem *map2; |
8306 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); |
8307 | if (!write) |
8308 | return ret; |
8309 | @@ -164,11 +210,11 @@ |
8310 | goto out; |
8311 | } |
8312 | if (!sysctl_vsyscall) { |
8313 | - *map1 = SYSCALL; |
8314 | - *map2 = SYSCALL; |
8315 | + writew(SYSCALL, map1); |
8316 | + writew(SYSCALL, map2); |
8317 | } else { |
8318 | - *map1 = NOP2; |
8319 | - *map2 = NOP2; |
8320 | + writew(NOP2, map1); |
8321 | + writew(NOP2, map2); |
8322 | } |
8323 | iounmap(map2); |
8324 | out: |
8325 | @@ -200,6 +246,48 @@ |
8326 | |
8327 | #endif |
8328 | |
8329 | +/* Assume __initcall executes before all user space. Hopefully kmod |
8330 | + doesn't violate that. We'll find out if it does. */ |
8331 | +static void __cpuinit vsyscall_set_cpu(int cpu) |
8332 | +{ |
8333 | + unsigned long d; |
8334 | + unsigned long node = 0; |
8335 | +#ifdef CONFIG_NUMA |
8336 | + node = cpu_to_node[cpu]; |
8337 | +#endif |
8338 | + if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) |
8339 | + write_rdtscp_aux((node << 12) | cpu); |
8340 | + |
8341 | + /* Store cpu number in limit so that it can be loaded quickly |
8342 | + in user space in vgetcpu. |
8343 | + 12 bits for the CPU and 8 bits for the node. */ |
8344 | + d = 0x0f40000000000ULL; |
8345 | + d |= cpu; |
8346 | + d |= (node & 0xf) << 12; |
8347 | + d |= (node >> 4) << 48; |
8348 | + if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu) |
8349 | + + GDT_ENTRY_PER_CPU), |
8350 | + d)) |
8351 | + BUG(); |
8352 | +} |
8353 | + |
8354 | +static void __cpuinit cpu_vsyscall_init(void *arg) |
8355 | +{ |
8356 | + /* preemption should be already off */ |
8357 | + vsyscall_set_cpu(raw_smp_processor_id()); |
8358 | +} |
8359 | + |
8360 | +#ifdef CONFIG_HOTPLUG_CPU |
8361 | +static int __cpuinit |
8362 | +cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) |
8363 | +{ |
8364 | + long cpu = (long)arg; |
8365 | + if (action == CPU_ONLINE) |
8366 | + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); |
8367 | + return NOTIFY_DONE; |
8368 | +} |
8369 | +#endif |
8370 | + |
8371 | static void __init map_vsyscall(void) |
8372 | { |
8373 | extern char __vsyscall_0; |
8374 | @@ -214,13 +302,20 @@ |
8375 | VSYSCALL_ADDR(__NR_vgettimeofday))); |
8376 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
8377 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
8378 | + BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); |
8379 | map_vsyscall(); |
8380 | #ifdef CONFIG_XEN |
8381 | sysctl_vsyscall = 0; /* disable vgettimeofay() */ |
8382 | + if (boot_cpu_has(X86_FEATURE_RDTSCP)) |
8383 | + vgetcpu_mode = VGETCPU_RDTSCP; |
8384 | + else |
8385 | + vgetcpu_mode = VGETCPU_LSL; |
8386 | #endif |
8387 | #ifdef CONFIG_SYSCTL |
8388 | register_sysctl_table(kernel_root_table2, 0); |
8389 | #endif |
8390 | + on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); |
8391 | + hotcpu_notifier(cpu_vsyscall_notifier, 0); |
8392 | return 0; |
8393 | } |
8394 | |
8395 | --- a/arch/x86/mach-xen/setup.c |
8396 | +++ b/arch/x86/mach-xen/setup.c |
8397 | @@ -103,8 +103,10 @@ |
8398 | |
8399 | setup_xen_features(); |
8400 | |
8401 | - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) |
8402 | - set_fixaddr_top(pp.virt_start); |
8403 | + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) { |
8404 | + hypervisor_virt_start = pp.virt_start; |
8405 | + reserve_top_address(0UL - pp.virt_start); |
8406 | + } |
8407 | |
8408 | if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { |
8409 | machine_to_phys_mapping = (unsigned long *)mapping.v_start; |
8410 | --- a/arch/x86/mm/fault_32-xen.c |
8411 | +++ b/arch/x86/mm/fault_32-xen.c |
8412 | @@ -27,21 +27,24 @@ |
8413 | #include <asm/uaccess.h> |
8414 | #include <asm/desc.h> |
8415 | #include <asm/kdebug.h> |
8416 | +#include <asm/segment.h> |
8417 | |
8418 | extern void die(const char *,struct pt_regs *,long); |
8419 | |
8420 | -#ifdef CONFIG_KPROBES |
8421 | -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); |
8422 | +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); |
8423 | + |
8424 | int register_page_fault_notifier(struct notifier_block *nb) |
8425 | { |
8426 | vmalloc_sync_all(); |
8427 | return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); |
8428 | } |
8429 | +EXPORT_SYMBOL_GPL(register_page_fault_notifier); |
8430 | |
8431 | int unregister_page_fault_notifier(struct notifier_block *nb) |
8432 | { |
8433 | return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); |
8434 | } |
8435 | +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); |
8436 | |
8437 | static inline int notify_page_fault(enum die_val val, const char *str, |
8438 | struct pt_regs *regs, long err, int trap, int sig) |
8439 | @@ -55,14 +58,6 @@ |
8440 | }; |
8441 | return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); |
8442 | } |
8443 | -#else |
8444 | -static inline int notify_page_fault(enum die_val val, const char *str, |
8445 | - struct pt_regs *regs, long err, int trap, int sig) |
8446 | -{ |
8447 | - return NOTIFY_DONE; |
8448 | -} |
8449 | -#endif |
8450 | - |
8451 | |
8452 | /* |
8453 | * Unlock any spinlocks which will prevent us from getting the |
8454 | @@ -119,10 +114,10 @@ |
8455 | } |
8456 | |
8457 | /* The standard kernel/user address space limit. */ |
8458 | - *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg; |
8459 | + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; |
8460 | |
8461 | /* By far the most common cases. */ |
8462 | - if (likely(seg == __USER_CS || seg == GET_KERNEL_CS())) |
8463 | + if (likely(SEGMENT_IS_FLAT_CODE(seg))) |
8464 | return eip; |
8465 | |
8466 | /* Check the segment exists, is within the current LDT/GDT size, |
8467 | @@ -559,11 +554,7 @@ |
8468 | write = 0; |
8469 | switch (error_code & 3) { |
8470 | default: /* 3: write, present */ |
8471 | -#ifdef TEST_VERIFY_AREA |
8472 | - if (regs->cs == GET_KERNEL_CS()) |
8473 | - printk("WP fault at %08lx\n", regs->eip); |
8474 | -#endif |
8475 | - /* fall through */ |
8476 | + /* fall through */ |
8477 | case 2: /* write, not present */ |
8478 | if (!(vma->vm_flags & VM_WRITE)) |
8479 | goto bad_area; |
8480 | @@ -572,7 +563,7 @@ |
8481 | case 1: /* read, present */ |
8482 | goto bad_area; |
8483 | case 0: /* read, not present */ |
8484 | - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) |
8485 | + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) |
8486 | goto bad_area; |
8487 | } |
8488 | |
8489 | @@ -704,7 +695,7 @@ |
8490 | */ |
8491 | out_of_memory: |
8492 | up_read(&mm->mmap_sem); |
8493 | - if (tsk->pid == 1) { |
8494 | + if (is_init(tsk)) { |
8495 | yield(); |
8496 | down_read(&mm->mmap_sem); |
8497 | goto survive; |
8498 | --- a/arch/x86/mm/fault_64-xen.c |
8499 | +++ b/arch/x86/mm/fault_64-xen.c |
8500 | @@ -40,8 +40,7 @@ |
8501 | #define PF_RSVD (1<<3) |
8502 | #define PF_INSTR (1<<4) |
8503 | |
8504 | -#ifdef CONFIG_KPROBES |
8505 | -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); |
8506 | +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); |
8507 | |
8508 | /* Hook to register for page fault notifications */ |
8509 | int register_page_fault_notifier(struct notifier_block *nb) |
8510 | @@ -49,11 +48,13 @@ |
8511 | vmalloc_sync_all(); |
8512 | return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); |
8513 | } |
8514 | +EXPORT_SYMBOL_GPL(register_page_fault_notifier); |
8515 | |
8516 | int unregister_page_fault_notifier(struct notifier_block *nb) |
8517 | { |
8518 | return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); |
8519 | } |
8520 | +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); |
8521 | |
8522 | static inline int notify_page_fault(enum die_val val, const char *str, |
8523 | struct pt_regs *regs, long err, int trap, int sig) |
8524 | @@ -67,13 +68,6 @@ |
8525 | }; |
8526 | return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); |
8527 | } |
8528 | -#else |
8529 | -static inline int notify_page_fault(enum die_val val, const char *str, |
8530 | - struct pt_regs *regs, long err, int trap, int sig) |
8531 | -{ |
8532 | - return NOTIFY_DONE; |
8533 | -} |
8534 | -#endif |
8535 | |
8536 | void bust_spinlocks(int yes) |
8537 | { |
8538 | @@ -102,7 +96,7 @@ |
8539 | static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, |
8540 | unsigned long error_code) |
8541 | { |
8542 | - unsigned char *instr; |
8543 | + unsigned char __user *instr; |
8544 | int scan_more = 1; |
8545 | int prefetch = 0; |
8546 | unsigned char *max_instr; |
8547 | @@ -111,7 +105,7 @@ |
8548 | if (error_code & PF_INSTR) |
8549 | return 0; |
8550 | |
8551 | - instr = (unsigned char *)convert_rip_to_linear(current, regs); |
8552 | + instr = (unsigned char __user *)convert_rip_to_linear(current, regs); |
8553 | max_instr = instr + 15; |
8554 | |
8555 | if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) |
8556 | @@ -122,7 +116,7 @@ |
8557 | unsigned char instr_hi; |
8558 | unsigned char instr_lo; |
8559 | |
8560 | - if (__get_user(opcode, instr)) |
8561 | + if (__get_user(opcode, (char __user *)instr)) |
8562 | break; |
8563 | |
8564 | instr_hi = opcode & 0xf0; |
8565 | @@ -160,7 +154,7 @@ |
8566 | case 0x00: |
8567 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ |
8568 | scan_more = 0; |
8569 | - if (__get_user(opcode, instr)) |
8570 | + if (__get_user(opcode, (char __user *)instr)) |
8571 | break; |
8572 | prefetch = (instr_lo == 0xF) && |
8573 | (opcode == 0x0D || opcode == 0x18); |
8574 | @@ -176,7 +170,7 @@ |
8575 | static int bad_address(void *p) |
8576 | { |
8577 | unsigned long dummy; |
8578 | - return __get_user(dummy, (unsigned long *)p); |
8579 | + return __get_user(dummy, (unsigned long __user *)p); |
8580 | } |
8581 | |
8582 | void dump_pagetable(unsigned long address) |
8583 | @@ -248,7 +242,7 @@ |
8584 | |
8585 | int unhandled_signal(struct task_struct *tsk, int sig) |
8586 | { |
8587 | - if (tsk->pid == 1) |
8588 | + if (is_init(tsk)) |
8589 | return 1; |
8590 | if (tsk->ptrace & PT_PTRACED) |
8591 | return 0; |
8592 | @@ -300,7 +294,7 @@ |
8593 | if (pgd_none(*pgd)) |
8594 | set_pgd(pgd, *pgd_ref); |
8595 | else |
8596 | - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); |
8597 | + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); |
8598 | |
8599 | /* Below here mismatches are bugs because these lower tables |
8600 | are shared */ |
8601 | @@ -309,7 +303,7 @@ |
8602 | pud_ref = pud_offset(pgd_ref, address); |
8603 | if (pud_none(*pud_ref)) |
8604 | return -1; |
8605 | - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref)) |
8606 | + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) |
8607 | BUG(); |
8608 | pmd = pmd_offset(pud, address); |
8609 | pmd_ref = pmd_offset(pud_ref, address); |
8610 | @@ -531,7 +525,7 @@ |
8611 | case PF_PROT: /* read, present */ |
8612 | goto bad_area; |
8613 | case 0: /* read, not present */ |
8614 | - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) |
8615 | + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) |
8616 | goto bad_area; |
8617 | } |
8618 | |
8619 | @@ -647,7 +641,7 @@ |
8620 | */ |
8621 | out_of_memory: |
8622 | up_read(&mm->mmap_sem); |
8623 | - if (current->pid == 1) { |
8624 | + if (is_init(current)) { |
8625 | yield(); |
8626 | goto again; |
8627 | } |
8628 | @@ -702,7 +696,7 @@ |
8629 | if (pgd_none(*pgd)) |
8630 | set_pgd(pgd, *pgd_ref); |
8631 | else |
8632 | - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); |
8633 | + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); |
8634 | } |
8635 | spin_unlock(&pgd_lock); |
8636 | set_bit(pgd_index(address), insync); |
8637 | --- a/arch/x86/mm/highmem_32-xen.c |
8638 | +++ b/arch/x86/mm/highmem_32-xen.c |
8639 | @@ -38,11 +38,9 @@ |
8640 | |
8641 | idx = type + KM_TYPE_NR*smp_processor_id(); |
8642 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
8643 | -#ifdef CONFIG_DEBUG_HIGHMEM |
8644 | if (!pte_none(*(kmap_pte-idx))) |
8645 | BUG(); |
8646 | -#endif |
8647 | - set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); |
8648 | + set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); |
8649 | |
8650 | return (void*) vaddr; |
8651 | } |
8652 | @@ -62,36 +60,26 @@ |
8653 | |
8654 | void kunmap_atomic(void *kvaddr, enum km_type type) |
8655 | { |
8656 | -#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN) |
8657 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; |
8658 | enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); |
8659 | |
8660 | - if (vaddr < FIXADDR_START) { // FIXME |
8661 | +#ifdef CONFIG_DEBUG_HIGHMEM |
8662 | + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { |
8663 | dec_preempt_count(); |
8664 | preempt_check_resched(); |
8665 | return; |
8666 | } |
8667 | -#endif |
8668 | |
8669 | -#if defined(CONFIG_DEBUG_HIGHMEM) |
8670 | if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) |
8671 | BUG(); |
8672 | - |
8673 | - /* |
8674 | - * force other mappings to Oops if they'll try to access |
8675 | - * this pte without first remap it |
8676 | - */ |
8677 | - pte_clear(&init_mm, vaddr, kmap_pte-idx); |
8678 | - __flush_tlb_one(vaddr); |
8679 | -#elif defined(CONFIG_XEN) |
8680 | +#endif |
8681 | /* |
8682 | - * We must ensure there are no dangling pagetable references when |
8683 | - * returning memory to Xen (decrease_reservation). |
8684 | - * XXX TODO: We could make this faster by only zapping when |
8685 | - * kmap_flush_unused is called but that is trickier and more invasive. |
8686 | + * Force other mappings to Oops if they'll try to access this pte |
8687 | + * without first remap it. Keeping stale mappings around is a bad idea |
8688 | + * also, in case the page changes cacheability attributes or becomes |
8689 | + * a protected page in a hypervisor. |
8690 | */ |
8691 | - pte_clear(&init_mm, vaddr, kmap_pte-idx); |
8692 | -#endif |
8693 | + kpte_clear_flush(kmap_pte-idx, vaddr); |
8694 | |
8695 | dec_preempt_count(); |
8696 | preempt_check_resched(); |
8697 | @@ -110,7 +98,6 @@ |
8698 | idx = type + KM_TYPE_NR*smp_processor_id(); |
8699 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
8700 | set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); |
8701 | - __flush_tlb_one(vaddr); |
8702 | |
8703 | return (void*) vaddr; |
8704 | } |
8705 | --- a/arch/x86/mm/hypervisor.c |
8706 | +++ b/arch/x86/mm/hypervisor.c |
8707 | @@ -569,7 +569,8 @@ |
8708 | #define MAX_BATCHED_FULL_PTES 32 |
8709 | |
8710 | int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, |
8711 | - unsigned long addr, unsigned long end, pgprot_t newprot) |
8712 | + unsigned long addr, unsigned long end, pgprot_t newprot, |
8713 | + int dirty_accountable) |
8714 | { |
8715 | int rc = 0, i = 0; |
8716 | mmu_update_t u[MAX_BATCHED_FULL_PTES]; |
8717 | @@ -582,10 +583,14 @@ |
8718 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
8719 | do { |
8720 | if (pte_present(*pte)) { |
8721 | + pte_t ptent = pte_modify(*pte, newprot); |
8722 | + |
8723 | + if (dirty_accountable && pte_dirty(ptent)) |
8724 | + ptent = pte_mkwrite(ptent); |
8725 | u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK) |
8726 | | ((unsigned long)pte & ~PAGE_MASK) |
8727 | | MMU_PT_UPDATE_PRESERVE_AD; |
8728 | - u[i].val = __pte_val(pte_modify(*pte, newprot)); |
8729 | + u[i].val = __pte_val(ptent); |
8730 | if (++i == MAX_BATCHED_FULL_PTES) { |
8731 | if ((rc = HYPERVISOR_mmu_update( |
8732 | &u[0], i, NULL, DOMID_SELF)) != 0) |
8733 | --- a/arch/x86/mm/init_32-xen.c |
8734 | +++ b/arch/x86/mm/init_32-xen.c |
8735 | @@ -464,16 +464,22 @@ |
8736 | * on Enable |
8737 | * off Disable |
8738 | */ |
8739 | -void __init noexec_setup(const char *str) |
8740 | +static int __init noexec_setup(char *str) |
8741 | { |
8742 | - if (!strncmp(str, "on",2) && cpu_has_nx) { |
8743 | - __supported_pte_mask |= _PAGE_NX; |
8744 | - disable_nx = 0; |
8745 | - } else if (!strncmp(str,"off",3)) { |
8746 | + if (!str || !strcmp(str, "on")) { |
8747 | + if (cpu_has_nx) { |
8748 | + __supported_pte_mask |= _PAGE_NX; |
8749 | + disable_nx = 0; |
8750 | + } |
8751 | + } else if (!strcmp(str,"off")) { |
8752 | disable_nx = 1; |
8753 | __supported_pte_mask &= ~_PAGE_NX; |
8754 | - } |
8755 | + } else |
8756 | + return -EINVAL; |
8757 | + |
8758 | + return 0; |
8759 | } |
8760 | +early_param("noexec", noexec_setup); |
8761 | |
8762 | int nx_enabled = 0; |
8763 | #ifdef CONFIG_X86_PAE |
8764 | @@ -516,6 +522,7 @@ |
8765 | pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); |
8766 | else |
8767 | pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); |
8768 | + pte_update_defer(&init_mm, vaddr, pte); |
8769 | __flush_tlb_all(); |
8770 | out: |
8771 | return ret; |
8772 | @@ -598,18 +605,6 @@ |
8773 | } |
8774 | } |
8775 | |
8776 | -static void __init set_max_mapnr_init(void) |
8777 | -{ |
8778 | -#ifdef CONFIG_HIGHMEM |
8779 | - num_physpages = highend_pfn; |
8780 | -#else |
8781 | - num_physpages = max_low_pfn; |
8782 | -#endif |
8783 | -#ifdef CONFIG_FLATMEM |
8784 | - max_mapnr = num_physpages; |
8785 | -#endif |
8786 | -} |
8787 | - |
8788 | static struct kcore_list kcore_mem, kcore_vmalloc; |
8789 | |
8790 | void __init mem_init(void) |
8791 | @@ -630,8 +625,7 @@ |
8792 | #endif |
8793 | |
8794 | #ifdef CONFIG_FLATMEM |
8795 | - if (!mem_map) |
8796 | - BUG(); |
8797 | + BUG_ON(!mem_map); |
8798 | #endif |
8799 | |
8800 | bad_ppro = ppro_with_ram_bug(); |
8801 | @@ -646,17 +640,6 @@ |
8802 | } |
8803 | #endif |
8804 | |
8805 | - set_max_mapnr_init(); |
8806 | - |
8807 | -#ifdef CONFIG_HIGHMEM |
8808 | - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
8809 | -#else |
8810 | - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
8811 | -#endif |
8812 | - printk("vmalloc area: %lx-%lx, maxmem %lx\n", |
8813 | - VMALLOC_START,VMALLOC_END,MAXMEM); |
8814 | - BUG_ON(VMALLOC_START > VMALLOC_END); |
8815 | - |
8816 | /* this will put all low memory onto the freelists */ |
8817 | totalram_pages += free_all_bootmem(); |
8818 | /* XEN: init and count low-mem pages outside initial allocation. */ |
8819 | @@ -694,6 +677,48 @@ |
8820 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) |
8821 | ); |
8822 | |
8823 | +#if 1 /* double-sanity-check paranoia */ |
8824 | + printk("virtual kernel memory layout:\n" |
8825 | + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" |
8826 | +#ifdef CONFIG_HIGHMEM |
8827 | + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" |
8828 | +#endif |
8829 | + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" |
8830 | + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" |
8831 | + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" |
8832 | + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" |
8833 | + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", |
8834 | + FIXADDR_START, FIXADDR_TOP, |
8835 | + (FIXADDR_TOP - FIXADDR_START) >> 10, |
8836 | + |
8837 | +#ifdef CONFIG_HIGHMEM |
8838 | + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, |
8839 | + (LAST_PKMAP*PAGE_SIZE) >> 10, |
8840 | +#endif |
8841 | + |
8842 | + VMALLOC_START, VMALLOC_END, |
8843 | + (VMALLOC_END - VMALLOC_START) >> 20, |
8844 | + |
8845 | + (unsigned long)__va(0), (unsigned long)high_memory, |
8846 | + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, |
8847 | + |
8848 | + (unsigned long)&__init_begin, (unsigned long)&__init_end, |
8849 | + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, |
8850 | + |
8851 | + (unsigned long)&_etext, (unsigned long)&_edata, |
8852 | + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, |
8853 | + |
8854 | + (unsigned long)&_text, (unsigned long)&_etext, |
8855 | + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); |
8856 | + |
8857 | +#ifdef CONFIG_HIGHMEM |
8858 | + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); |
8859 | + BUG_ON(VMALLOC_END > PKMAP_BASE); |
8860 | +#endif |
8861 | + BUG_ON(VMALLOC_START > VMALLOC_END); |
8862 | + BUG_ON((unsigned long)high_memory > VMALLOC_START); |
8863 | +#endif /* double-sanity-check paranoia */ |
8864 | + |
8865 | #ifdef CONFIG_X86_PAE |
8866 | if (!cpu_has_pae) |
8867 | panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); |
8868 | @@ -724,7 +749,7 @@ |
8869 | int arch_add_memory(int nid, u64 start, u64 size) |
8870 | { |
8871 | struct pglist_data *pgdata = &contig_page_data; |
8872 | - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; |
8873 | + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; |
8874 | unsigned long start_pfn = start >> PAGE_SHIFT; |
8875 | unsigned long nr_pages = size >> PAGE_SHIFT; |
8876 | |
8877 | --- a/arch/x86/mm/init_64-xen.c |
8878 | +++ b/arch/x86/mm/init_64-xen.c |
8879 | @@ -61,8 +61,6 @@ |
8880 | |
8881 | extern unsigned long *contiguous_bitmap; |
8882 | |
8883 | -static unsigned long dma_reserve __initdata; |
8884 | - |
8885 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
8886 | extern unsigned long start_pfn; |
8887 | |
8888 | @@ -416,7 +414,6 @@ |
8889 | |
8890 | /* actually usually some more */ |
8891 | if (size >= LARGE_PAGE_SIZE) { |
8892 | - printk("SMBIOS area too long %lu\n", size); |
8893 | return NULL; |
8894 | } |
8895 | set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); |
8896 | @@ -438,13 +435,15 @@ |
8897 | #endif |
8898 | |
8899 | static void __meminit |
8900 | -phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) |
8901 | +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) |
8902 | { |
8903 | - int i, k; |
8904 | + int i = pmd_index(address); |
8905 | |
8906 | - for (i = 0; i < PTRS_PER_PMD; pmd++, i++) { |
8907 | + for (; i < PTRS_PER_PMD; i++) { |
8908 | unsigned long pte_phys; |
8909 | + pmd_t *pmd = pmd_page + i; |
8910 | pte_t *pte, *pte_save; |
8911 | + int k; |
8912 | |
8913 | if (address >= end) { |
8914 | if (!after_bootmem) |
8915 | @@ -452,6 +451,12 @@ |
8916 | set_pmd(pmd, __pmd(0)); |
8917 | break; |
8918 | } |
8919 | + |
8920 | + if (__pmd_val(*pmd)) { |
8921 | + address += PMD_SIZE; |
8922 | + continue; |
8923 | + } |
8924 | + |
8925 | pte = alloc_static_page(&pte_phys); |
8926 | pte_save = pte; |
8927 | for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) { |
8928 | @@ -474,40 +479,35 @@ |
8929 | static void __meminit |
8930 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) |
8931 | { |
8932 | - pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); |
8933 | - |
8934 | - if (pmd_none(*pmd)) { |
8935 | - spin_lock(&init_mm.page_table_lock); |
8936 | - phys_pmd_init(pmd, address, end); |
8937 | - spin_unlock(&init_mm.page_table_lock); |
8938 | - __flush_tlb_all(); |
8939 | - } |
8940 | + pmd_t *pmd = pmd_offset(pud,0); |
8941 | + spin_lock(&init_mm.page_table_lock); |
8942 | + phys_pmd_init(pmd, address, end); |
8943 | + spin_unlock(&init_mm.page_table_lock); |
8944 | + __flush_tlb_all(); |
8945 | } |
8946 | |
8947 | -static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) |
8948 | +static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) |
8949 | { |
8950 | - long i = pud_index(address); |
8951 | - |
8952 | - pud = pud + i; |
8953 | - |
8954 | - if (after_bootmem && pud_val(*pud)) { |
8955 | - phys_pmd_update(pud, address, end); |
8956 | - return; |
8957 | - } |
8958 | + int i = pud_index(addr); |
8959 | |
8960 | - for (; i < PTRS_PER_PUD; pud++, i++) { |
8961 | - unsigned long paddr, pmd_phys; |
8962 | + for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { |
8963 | + unsigned long pmd_phys; |
8964 | + pud_t *pud = pud_page + pud_index(addr); |
8965 | pmd_t *pmd; |
8966 | |
8967 | - paddr = (address & PGDIR_MASK) + i*PUD_SIZE; |
8968 | - if (paddr >= end) |
8969 | + if (addr >= end) |
8970 | break; |
8971 | |
8972 | + if (__pud_val(*pud)) { |
8973 | + phys_pmd_update(pud, addr, end); |
8974 | + continue; |
8975 | + } |
8976 | + |
8977 | pmd = alloc_static_page(&pmd_phys); |
8978 | early_make_page_readonly(pmd, XENFEAT_writable_page_tables); |
8979 | spin_lock(&init_mm.page_table_lock); |
8980 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); |
8981 | - phys_pmd_init(pmd, paddr, end); |
8982 | + phys_pmd_init(pmd, addr, end); |
8983 | spin_unlock(&init_mm.page_table_lock); |
8984 | } |
8985 | __flush_tlb(); |
8986 | @@ -771,69 +771,18 @@ |
8987 | #endif |
8988 | } |
8989 | |
8990 | -/* Compute zone sizes for the DMA and DMA32 zones in a node. */ |
8991 | -__init void |
8992 | -size_zones(unsigned long *z, unsigned long *h, |
8993 | - unsigned long start_pfn, unsigned long end_pfn) |
8994 | -{ |
8995 | - int i; |
8996 | - unsigned long w; |
8997 | - |
8998 | - for (i = 0; i < MAX_NR_ZONES; i++) |
8999 | - z[i] = 0; |
9000 | - |
9001 | - if (start_pfn < MAX_DMA_PFN) |
9002 | - z[ZONE_DMA] = MAX_DMA_PFN - start_pfn; |
9003 | - if (start_pfn < MAX_DMA32_PFN) { |
9004 | - unsigned long dma32_pfn = MAX_DMA32_PFN; |
9005 | - if (dma32_pfn > end_pfn) |
9006 | - dma32_pfn = end_pfn; |
9007 | - z[ZONE_DMA32] = dma32_pfn - start_pfn; |
9008 | - } |
9009 | - z[ZONE_NORMAL] = end_pfn - start_pfn; |
9010 | - |
9011 | - /* Remove lower zones from higher ones. */ |
9012 | - w = 0; |
9013 | - for (i = 0; i < MAX_NR_ZONES; i++) { |
9014 | - if (z[i]) |
9015 | - z[i] -= w; |
9016 | - w += z[i]; |
9017 | - } |
9018 | - |
9019 | - /* Compute holes */ |
9020 | - w = start_pfn; |
9021 | - for (i = 0; i < MAX_NR_ZONES; i++) { |
9022 | - unsigned long s = w; |
9023 | - w += z[i]; |
9024 | - h[i] = e820_hole_size(s, w); |
9025 | - } |
9026 | - |
9027 | - /* Add the space pace needed for mem_map to the holes too. */ |
9028 | - for (i = 0; i < MAX_NR_ZONES; i++) |
9029 | - h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE; |
9030 | - |
9031 | - /* The 16MB DMA zone has the kernel and other misc mappings. |
9032 | - Account them too */ |
9033 | - if (h[ZONE_DMA]) { |
9034 | - h[ZONE_DMA] += dma_reserve; |
9035 | - if (h[ZONE_DMA] >= z[ZONE_DMA]) { |
9036 | - printk(KERN_WARNING |
9037 | - "Kernel too large and filling up ZONE_DMA?\n"); |
9038 | - h[ZONE_DMA] = z[ZONE_DMA]; |
9039 | - } |
9040 | - } |
9041 | -} |
9042 | - |
9043 | #ifndef CONFIG_NUMA |
9044 | void __init paging_init(void) |
9045 | { |
9046 | - unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; |
9047 | + unsigned long max_zone_pfns[MAX_NR_ZONES]; |
9048 | + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
9049 | + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; |
9050 | + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
9051 | + max_zone_pfns[ZONE_NORMAL] = end_pfn; |
9052 | |
9053 | memory_present(0, 0, end_pfn); |
9054 | sparse_init(); |
9055 | - size_zones(zones, holes, 0, end_pfn); |
9056 | - free_area_init_node(0, NODE_DATA(0), zones, |
9057 | - __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); |
9058 | + free_area_init_nodes(max_zone_pfns); |
9059 | |
9060 | init_mm.context.pinned = 1; |
9061 | } |
9062 | @@ -887,36 +836,23 @@ |
9063 | |
9064 | #ifdef CONFIG_MEMORY_HOTPLUG |
9065 | /* |
9066 | - * XXX: memory_add_physaddr_to_nid() is to find node id from physical address |
9067 | - * via probe interface of sysfs. If acpi notifies hot-add event, then it |
9068 | - * can tell node id by searching dsdt. But, probe interface doesn't have |
9069 | - * node id. So, return 0 as node id at this time. |
9070 | - */ |
9071 | -#ifdef CONFIG_NUMA |
9072 | -int memory_add_physaddr_to_nid(u64 start) |
9073 | -{ |
9074 | - return 0; |
9075 | -} |
9076 | -#endif |
9077 | - |
9078 | -/* |
9079 | * Memory is added always to NORMAL zone. This means you will never get |
9080 | * additional DMA/DMA32 memory. |
9081 | */ |
9082 | int arch_add_memory(int nid, u64 start, u64 size) |
9083 | { |
9084 | struct pglist_data *pgdat = NODE_DATA(nid); |
9085 | - struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; |
9086 | + struct zone *zone = pgdat->node_zones + ZONE_NORMAL; |
9087 | unsigned long start_pfn = start >> PAGE_SHIFT; |
9088 | unsigned long nr_pages = size >> PAGE_SHIFT; |
9089 | int ret; |
9090 | |
9091 | + init_memory_mapping(start, (start + size -1)); |
9092 | + |
9093 | ret = __add_pages(zone, start_pfn, nr_pages); |
9094 | if (ret) |
9095 | goto error; |
9096 | |
9097 | - init_memory_mapping(start, (start + size -1)); |
9098 | - |
9099 | return ret; |
9100 | error: |
9101 | printk("%s: Problem encountered in __add_pages!\n", __func__); |
9102 | @@ -930,7 +866,17 @@ |
9103 | } |
9104 | EXPORT_SYMBOL_GPL(remove_memory); |
9105 | |
9106 | -#else /* CONFIG_MEMORY_HOTPLUG */ |
9107 | +#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) |
9108 | +int memory_add_physaddr_to_nid(u64 start) |
9109 | +{ |
9110 | + return 0; |
9111 | +} |
9112 | +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
9113 | +#endif |
9114 | + |
9115 | +#endif /* CONFIG_MEMORY_HOTPLUG */ |
9116 | + |
9117 | +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE |
9118 | /* |
9119 | * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, |
9120 | * just online the pages. |
9121 | @@ -956,7 +902,7 @@ |
9122 | } |
9123 | return err; |
9124 | } |
9125 | -#endif /* CONFIG_MEMORY_HOTPLUG */ |
9126 | +#endif |
9127 | |
9128 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, |
9129 | kcore_vsyscall; |
9130 | @@ -973,12 +919,6 @@ |
9131 | |
9132 | pci_iommu_alloc(); |
9133 | |
9134 | - /* How many end-of-memory variables you have, grandma! */ |
9135 | - max_low_pfn = end_pfn; |
9136 | - max_pfn = end_pfn; |
9137 | - num_physpages = end_pfn; |
9138 | - high_memory = (void *) __va(end_pfn * PAGE_SIZE); |
9139 | - |
9140 | /* clear the zero-page */ |
9141 | memset(empty_zero_page, 0, PAGE_SIZE); |
9142 | |
9143 | @@ -996,7 +936,8 @@ |
9144 | init_page_count(pfn_to_page(pfn)); |
9145 | totalram_pages++; |
9146 | } |
9147 | - reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn); |
9148 | + reservedpages = end_pfn - totalram_pages - |
9149 | + absent_pages_in_range(0, end_pfn); |
9150 | |
9151 | after_bootmem = 1; |
9152 | |
9153 | @@ -1103,15 +1044,34 @@ |
9154 | |
9155 | void __init reserve_bootmem_generic(unsigned long phys, unsigned len) |
9156 | { |
9157 | - /* Should check here against the e820 map to avoid double free */ |
9158 | #ifdef CONFIG_NUMA |
9159 | int nid = phys_to_nid(phys); |
9160 | +#endif |
9161 | + unsigned long pfn = phys >> PAGE_SHIFT; |
9162 | + if (pfn >= end_pfn) { |
9163 | + /* This can happen with kdump kernels when accessing firmware |
9164 | + tables. */ |
9165 | + if (pfn < end_pfn_map) |
9166 | + return; |
9167 | + printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", |
9168 | + phys, len); |
9169 | + return; |
9170 | + } |
9171 | + |
9172 | + /* Should check here against the e820 map to avoid double free */ |
9173 | +#ifdef CONFIG_NUMA |
9174 | reserve_bootmem_node(NODE_DATA(nid), phys, len); |
9175 | #else |
9176 | reserve_bootmem(phys, len); |
9177 | #endif |
9178 | - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) |
9179 | +#ifndef CONFIG_XEN |
9180 | + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { |
9181 | + static unsigned long dma_reserve __initdata; |
9182 | + |
9183 | dma_reserve += len / PAGE_SIZE; |
9184 | + set_dma_reserve(dma_reserve); |
9185 | + } |
9186 | +#endif |
9187 | } |
9188 | |
9189 | int kern_addr_valid(unsigned long addr) |
9190 | --- a/arch/x86/mm/ioremap_32-xen.c |
9191 | +++ b/arch/x86/mm/ioremap_32-xen.c |
9192 | @@ -12,7 +12,7 @@ |
9193 | #include <linux/init.h> |
9194 | #include <linux/slab.h> |
9195 | #include <linux/module.h> |
9196 | -#include <asm/io.h> |
9197 | +#include <linux/io.h> |
9198 | #include <asm/fixmap.h> |
9199 | #include <asm/cacheflush.h> |
9200 | #include <asm/tlbflush.h> |
9201 | @@ -118,7 +118,7 @@ |
9202 | if (domid == DOMID_SELF) |
9203 | return -EINVAL; |
9204 | |
9205 | - vma->vm_flags |= VM_IO | VM_RESERVED; |
9206 | + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; |
9207 | |
9208 | vma->vm_mm->context.has_foreign_mappings = 1; |
9209 | |
9210 | @@ -203,6 +203,7 @@ |
9211 | void __iomem * addr; |
9212 | struct vm_struct * area; |
9213 | unsigned long offset, last_addr; |
9214 | + pgprot_t prot; |
9215 | domid_t domid = DOMID_IO; |
9216 | |
9217 | /* Don't allow wraparound or zero size */ |
9218 | @@ -234,6 +235,8 @@ |
9219 | domid = DOMID_SELF; |
9220 | } |
9221 | |
9222 | + prot = __pgprot(_KERNPG_TABLE | flags); |
9223 | + |
9224 | /* |
9225 | * Mappings have to be page-aligned |
9226 | */ |
9227 | @@ -249,10 +252,9 @@ |
9228 | return NULL; |
9229 | area->phys_addr = phys_addr; |
9230 | addr = (void __iomem *) area->addr; |
9231 | - flags |= _KERNPG_TABLE; |
9232 | if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr, |
9233 | phys_addr>>PAGE_SHIFT, |
9234 | - size, __pgprot(flags), domid)) { |
9235 | + size, prot, domid)) { |
9236 | vunmap((void __force *) addr); |
9237 | return NULL; |
9238 | } |
9239 | --- a/arch/x86/mm/pageattr_64-xen.c |
9240 | +++ b/arch/x86/mm/pageattr_64-xen.c |
9241 | @@ -371,8 +371,8 @@ |
9242 | BUG_ON(pud_none(*pud)); |
9243 | pmd = pmd_offset(pud, address); |
9244 | BUG_ON(__pmd_val(*pmd) & _PAGE_PSE); |
9245 | - pgprot_val(ref_prot) |= _PAGE_PSE; |
9246 | large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); |
9247 | + large_pte = pte_mkhuge(large_pte); |
9248 | set_pte((pte_t *)pmd, large_pte); |
9249 | } |
9250 | |
9251 | @@ -382,32 +382,28 @@ |
9252 | { |
9253 | pte_t *kpte; |
9254 | struct page *kpte_page; |
9255 | - unsigned kpte_flags; |
9256 | pgprot_t ref_prot2; |
9257 | kpte = lookup_address(address); |
9258 | if (!kpte) return 0; |
9259 | kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); |
9260 | - kpte_flags = pte_val(*kpte); |
9261 | if (pgprot_val(prot) != pgprot_val(ref_prot)) { |
9262 | - if ((kpte_flags & _PAGE_PSE) == 0) { |
9263 | + if (!pte_huge(*kpte)) { |
9264 | set_pte(kpte, pfn_pte(pfn, prot)); |
9265 | } else { |
9266 | /* |
9267 | * split_large_page will take the reference for this |
9268 | * change_page_attr on the split page. |
9269 | */ |
9270 | - |
9271 | struct page *split; |
9272 | - ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE)); |
9273 | - |
9274 | + ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); |
9275 | split = split_large_page(address, prot, ref_prot2); |
9276 | if (!split) |
9277 | return -ENOMEM; |
9278 | - set_pte(kpte,mk_pte(split, ref_prot2)); |
9279 | + set_pte(kpte, mk_pte(split, ref_prot2)); |
9280 | kpte_page = split; |
9281 | - } |
9282 | + } |
9283 | page_private(kpte_page)++; |
9284 | - } else if ((kpte_flags & _PAGE_PSE) == 0) { |
9285 | + } else if (!pte_huge(*kpte)) { |
9286 | set_pte(kpte, pfn_pte(pfn, ref_prot)); |
9287 | BUG_ON(page_private(kpte_page) == 0); |
9288 | page_private(kpte_page)--; |
9289 | @@ -464,10 +460,12 @@ |
9290 | * lowmem */ |
9291 | if (__pa(address) < KERNEL_TEXT_SIZE) { |
9292 | unsigned long addr2; |
9293 | - pgprot_t prot2 = prot; |
9294 | + pgprot_t prot2; |
9295 | addr2 = __START_KERNEL_map + __pa(address); |
9296 | - pgprot_val(prot2) &= ~_PAGE_NX; |
9297 | - err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC); |
9298 | + /* Make sure the kernel mappings stay executable */ |
9299 | + prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); |
9300 | + err = __change_page_attr(addr2, pfn, prot2, |
9301 | + PAGE_KERNEL_EXEC); |
9302 | } |
9303 | } |
9304 | up_write(&init_mm.mmap_sem); |
9305 | --- a/arch/x86/mm/pgtable_32-xen.c |
9306 | +++ b/arch/x86/mm/pgtable_32-xen.c |
9307 | @@ -68,7 +68,9 @@ |
9308 | printk(KERN_INFO "%lu pages writeback\n", |
9309 | global_page_state(NR_WRITEBACK)); |
9310 | printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); |
9311 | - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB)); |
9312 | + printk(KERN_INFO "%lu pages slab\n", |
9313 | + global_page_state(NR_SLAB_RECLAIMABLE) + |
9314 | + global_page_state(NR_SLAB_UNRECLAIMABLE)); |
9315 | printk(KERN_INFO "%lu pages pagetables\n", |
9316 | global_page_state(NR_PAGETABLE)); |
9317 | } |
9318 | @@ -108,18 +110,11 @@ |
9319 | __flush_tlb_one(vaddr); |
9320 | } |
9321 | |
9322 | -static int nr_fixmaps = 0; |
9323 | +static int fixmaps; |
9324 | unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START; |
9325 | -unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE); |
9326 | +unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE); |
9327 | EXPORT_SYMBOL(__FIXADDR_TOP); |
9328 | |
9329 | -void __init set_fixaddr_top(unsigned long top) |
9330 | -{ |
9331 | - BUG_ON(nr_fixmaps > 0); |
9332 | - hypervisor_virt_start = top; |
9333 | - __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE; |
9334 | -} |
9335 | - |
9336 | void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags) |
9337 | { |
9338 | unsigned long address = __fix_to_virt(idx); |
9339 | @@ -141,7 +136,21 @@ |
9340 | if (HYPERVISOR_update_va_mapping(address, pte, |
9341 | UVMF_INVLPG|UVMF_ALL)) |
9342 | BUG(); |
9343 | - nr_fixmaps++; |
9344 | + fixmaps++; |
9345 | +} |
9346 | + |
9347 | +/** |
9348 | + * reserve_top_address - reserves a hole in the top of kernel address space |
9349 | + * @reserve - size of hole to reserve |
9350 | + * |
9351 | + * Can be used to relocate the fixmap area and poke a hole in the top |
9352 | + * of kernel address space to make room for a hypervisor. |
9353 | + */ |
9354 | +void __init reserve_top_address(unsigned long reserve) |
9355 | +{ |
9356 | + BUG_ON(fixmaps > 0); |
9357 | + __FIXADDR_TOP = -reserve - PAGE_SIZE; |
9358 | + __VMALLOC_RESERVE += reserve; |
9359 | } |
9360 | |
9361 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
9362 | --- a/arch/x86/pci/irq-xen.c |
9363 | +++ b/arch/x86/pci/irq-xen.c |
9364 | @@ -991,10 +991,6 @@ |
9365 | pci_name(bridge), 'A' + pin, irq); |
9366 | } |
9367 | if (irq >= 0) { |
9368 | - if (use_pci_vector() && |
9369 | - !platform_legacy_irq(irq)) |
9370 | - irq = IO_APIC_VECTOR(irq); |
9371 | - |
9372 | printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", |
9373 | pci_name(dev), 'A' + pin, irq); |
9374 | dev->irq = irq; |
9375 | @@ -1155,10 +1151,6 @@ |
9376 | } |
9377 | dev = temp_dev; |
9378 | if (irq >= 0) { |
9379 | -#ifdef CONFIG_PCI_MSI |
9380 | - if (!platform_legacy_irq(irq)) |
9381 | - irq = IO_APIC_VECTOR(irq); |
9382 | -#endif |
9383 | printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", |
9384 | pci_name(dev), 'A' + pin, irq); |
9385 | dev->irq = irq; |
9386 | @@ -1179,33 +1171,3 @@ |
9387 | } |
9388 | return 0; |
9389 | } |
9390 | - |
9391 | -int pci_vector_resources(int last, int nr_released) |
9392 | -{ |
9393 | - int count = nr_released; |
9394 | - |
9395 | - int next = last; |
9396 | - int offset = (last % 8); |
9397 | - |
9398 | - while (next < FIRST_SYSTEM_VECTOR) { |
9399 | - next += 8; |
9400 | -#ifdef CONFIG_X86_64 |
9401 | - if (next == IA32_SYSCALL_VECTOR) |
9402 | - continue; |
9403 | -#else |
9404 | - if (next == SYSCALL_VECTOR) |
9405 | - continue; |
9406 | -#endif |
9407 | - count++; |
9408 | - if (next >= FIRST_SYSTEM_VECTOR) { |
9409 | - if (offset%8) { |
9410 | - next = FIRST_DEVICE_VECTOR + offset; |
9411 | - offset++; |
9412 | - continue; |
9413 | - } |
9414 | - count--; |
9415 | - } |
9416 | - } |
9417 | - |
9418 | - return count; |
9419 | -} |
9420 | --- a/drivers/char/tpm/tpm_xen.c |
9421 | +++ b/drivers/char/tpm/tpm_xen.c |
9422 | @@ -85,8 +85,7 @@ |
9423 | |
9424 | /* local function prototypes */ |
9425 | static irqreturn_t tpmif_int(int irq, |
9426 | - void *tpm_priv, |
9427 | - struct pt_regs *ptregs); |
9428 | + void *tpm_priv); |
9429 | static void tpmif_rx_action(unsigned long unused); |
9430 | static int tpmif_connect(struct xenbus_device *dev, |
9431 | struct tpm_private *tp, |
9432 | @@ -559,7 +558,7 @@ |
9433 | } |
9434 | |
9435 | |
9436 | -static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs) |
9437 | +static irqreturn_t tpmif_int(int irq, void *tpm_priv) |
9438 | { |
9439 | struct tpm_private *tp = tpm_priv; |
9440 | unsigned long flags; |
9441 | --- a/drivers/pci/Kconfig |
9442 | +++ b/drivers/pci/Kconfig |
9443 | @@ -45,7 +45,7 @@ |
9444 | config HT_IRQ |
9445 | bool "Interrupts on hypertransport devices" |
9446 | default y |
9447 | - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC |
9448 | + depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN |
9449 | help |
9450 | This allows native hypertransport devices to use interrupts. |
9451 | |
9452 | --- a/drivers/xen/Kconfig |
9453 | +++ b/drivers/xen/Kconfig |
9454 | @@ -278,6 +278,9 @@ |
9455 | config HAVE_IRQ_IGNORE_UNHANDLED |
9456 | def_bool y |
9457 | |
9458 | +config GENERIC_HARDIRQS_NO__DO_IRQ |
9459 | + def_bool y |
9460 | + |
9461 | config NO_IDLE_HZ |
9462 | def_bool y |
9463 | |
9464 | --- a/drivers/xen/balloon/balloon.c |
9465 | +++ b/drivers/xen/balloon/balloon.c |
9466 | @@ -84,7 +84,7 @@ |
9467 | /* VM /proc information for memory */ |
9468 | extern unsigned long totalram_pages; |
9469 | |
9470 | -#ifndef MODULE |
9471 | +#if !defined(MODULE) && defined(CONFIG_HIGHMEM) |
9472 | extern unsigned long totalhigh_pages; |
9473 | #define inc_totalhigh_pages() (totalhigh_pages++) |
9474 | #define dec_totalhigh_pages() (totalhigh_pages--) |
9475 | --- a/drivers/xen/blkback/blkback.c |
9476 | +++ b/drivers/xen/blkback/blkback.c |
9477 | @@ -288,7 +288,7 @@ |
9478 | wake_up(&blkif->wq); |
9479 | } |
9480 | |
9481 | -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) |
9482 | +irqreturn_t blkif_be_int(int irq, void *dev_id) |
9483 | { |
9484 | blkif_notify_work(dev_id); |
9485 | return IRQ_HANDLED; |
9486 | --- a/drivers/xen/blkback/common.h |
9487 | +++ b/drivers/xen/blkback/common.h |
9488 | @@ -130,7 +130,7 @@ |
9489 | |
9490 | void blkif_xenbus_init(void); |
9491 | |
9492 | -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); |
9493 | +irqreturn_t blkif_be_int(int irq, void *dev_id); |
9494 | int blkif_schedule(void *arg); |
9495 | |
9496 | int blkback_barrier(struct xenbus_transaction xbt, |
9497 | --- a/drivers/xen/blkfront/blkfront.c |
9498 | +++ b/drivers/xen/blkfront/blkfront.c |
9499 | @@ -69,7 +69,7 @@ |
9500 | |
9501 | static void kick_pending_request_queues(struct blkfront_info *); |
9502 | |
9503 | -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs); |
9504 | +static irqreturn_t blkif_int(int irq, void *dev_id); |
9505 | static void blkif_restart_queue(void *arg); |
9506 | static void blkif_recover(struct blkfront_info *); |
9507 | static void blkif_completion(struct blk_shadow *); |
9508 | @@ -698,7 +698,7 @@ |
9509 | } |
9510 | |
9511 | |
9512 | -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) |
9513 | +static irqreturn_t blkif_int(int irq, void *dev_id) |
9514 | { |
9515 | struct request *req; |
9516 | blkif_response_t *bret; |
9517 | --- a/drivers/xen/blktap/blktap.c |
9518 | +++ b/drivers/xen/blktap/blktap.c |
9519 | @@ -1175,7 +1175,7 @@ |
9520 | wake_up(&blkif->wq); |
9521 | } |
9522 | |
9523 | -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) |
9524 | +irqreturn_t tap_blkif_be_int(int irq, void *dev_id) |
9525 | { |
9526 | blkif_notify_work(dev_id); |
9527 | return IRQ_HANDLED; |
9528 | --- a/drivers/xen/blktap/common.h |
9529 | +++ b/drivers/xen/blktap/common.h |
9530 | @@ -112,7 +112,7 @@ |
9531 | |
9532 | void tap_blkif_xenbus_init(void); |
9533 | |
9534 | -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); |
9535 | +irqreturn_t tap_blkif_be_int(int irq, void *dev_id); |
9536 | int tap_blkif_schedule(void *arg); |
9537 | |
9538 | int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif); |
9539 | --- a/drivers/xen/console/console.c |
9540 | +++ b/drivers/xen/console/console.c |
9541 | @@ -345,7 +345,7 @@ |
9542 | static int xencons_priv_irq; |
9543 | static char x_char; |
9544 | |
9545 | -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs) |
9546 | +void xencons_rx(char *buf, unsigned len) |
9547 | { |
9548 | int i; |
9549 | unsigned long flags; |
9550 | @@ -370,8 +370,7 @@ |
9551 | if (time_before(jiffies, sysrq_timeout)) { |
9552 | spin_unlock_irqrestore( |
9553 | &xencons_lock, flags); |
9554 | - handle_sysrq( |
9555 | - buf[i], regs, xencons_tty); |
9556 | + handle_sysrq(buf[i], xencons_tty); |
9557 | spin_lock_irqsave( |
9558 | &xencons_lock, flags); |
9559 | continue; |
9560 | @@ -436,14 +435,13 @@ |
9561 | } |
9562 | |
9563 | /* Privileged receive callback and transmit kicker. */ |
9564 | -static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id, |
9565 | - struct pt_regs *regs) |
9566 | +static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id) |
9567 | { |
9568 | static char rbuf[16]; |
9569 | int l; |
9570 | |
9571 | while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0) |
9572 | - xencons_rx(rbuf, l, regs); |
9573 | + xencons_rx(rbuf, l); |
9574 | |
9575 | xencons_tx(); |
9576 | |
9577 | --- a/drivers/xen/console/xencons_ring.c |
9578 | +++ b/drivers/xen/console/xencons_ring.c |
9579 | @@ -83,7 +83,7 @@ |
9580 | return sent; |
9581 | } |
9582 | |
9583 | -static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs) |
9584 | +static irqreturn_t handle_input(int irq, void *unused) |
9585 | { |
9586 | struct xencons_interface *intf = xencons_interface(); |
9587 | XENCONS_RING_IDX cons, prod; |
9588 | @@ -94,7 +94,7 @@ |
9589 | BUG_ON((prod - cons) > sizeof(intf->in)); |
9590 | |
9591 | while (cons != prod) { |
9592 | - xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs); |
9593 | + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1); |
9594 | cons++; |
9595 | } |
9596 | |
9597 | --- a/drivers/xen/core/evtchn.c |
9598 | +++ b/drivers/xen/core/evtchn.c |
9599 | @@ -507,7 +507,7 @@ |
9600 | |
9601 | int bind_caller_port_to_irqhandler( |
9602 | unsigned int caller_port, |
9603 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
9604 | + irq_handler_t handler, |
9605 | unsigned long irqflags, |
9606 | const char *devname, |
9607 | void *dev_id) |
9608 | @@ -530,7 +530,7 @@ |
9609 | |
9610 | int bind_listening_port_to_irqhandler( |
9611 | unsigned int remote_domain, |
9612 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
9613 | + irq_handler_t handler, |
9614 | unsigned long irqflags, |
9615 | const char *devname, |
9616 | void *dev_id) |
9617 | @@ -554,7 +554,7 @@ |
9618 | int bind_interdomain_evtchn_to_irqhandler( |
9619 | unsigned int remote_domain, |
9620 | unsigned int remote_port, |
9621 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
9622 | + irq_handler_t handler, |
9623 | unsigned long irqflags, |
9624 | const char *devname, |
9625 | void *dev_id) |
9626 | @@ -578,7 +578,7 @@ |
9627 | int bind_virq_to_irqhandler( |
9628 | unsigned int virq, |
9629 | unsigned int cpu, |
9630 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
9631 | + irq_handler_t handler, |
9632 | unsigned long irqflags, |
9633 | const char *devname, |
9634 | void *dev_id) |
9635 | @@ -602,7 +602,7 @@ |
9636 | int bind_ipi_to_irqhandler( |
9637 | unsigned int ipi, |
9638 | unsigned int cpu, |
9639 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
9640 | + irq_handler_t handler, |
9641 | unsigned long irqflags, |
9642 | const char *devname, |
9643 | void *dev_id) |
9644 | @@ -687,15 +687,7 @@ |
9645 | return 0; |
9646 | } |
9647 | |
9648 | -static void shutdown_dynirq(unsigned int irq) |
9649 | -{ |
9650 | - int evtchn = evtchn_from_irq(irq); |
9651 | - |
9652 | - if (VALID_EVTCHN(evtchn)) |
9653 | - mask_evtchn(evtchn); |
9654 | -} |
9655 | - |
9656 | -static void enable_dynirq(unsigned int irq) |
9657 | +static void unmask_dynirq(unsigned int irq) |
9658 | { |
9659 | int evtchn = evtchn_from_irq(irq); |
9660 | |
9661 | @@ -703,7 +695,7 @@ |
9662 | unmask_evtchn(evtchn); |
9663 | } |
9664 | |
9665 | -static void disable_dynirq(unsigned int irq) |
9666 | +static void mask_dynirq(unsigned int irq) |
9667 | { |
9668 | int evtchn = evtchn_from_irq(irq); |
9669 | |
9670 | @@ -731,12 +723,12 @@ |
9671 | unmask_evtchn(evtchn); |
9672 | } |
9673 | |
9674 | -static struct hw_interrupt_type dynirq_type = { |
9675 | - .typename = "Dynamic-irq", |
9676 | +static struct irq_chip dynirq_chip = { |
9677 | + .name = "Dynamic-irq", |
9678 | .startup = startup_dynirq, |
9679 | - .shutdown = shutdown_dynirq, |
9680 | - .enable = enable_dynirq, |
9681 | - .disable = disable_dynirq, |
9682 | + .mask = mask_dynirq, |
9683 | + .unmask = unmask_dynirq, |
9684 | + .mask_ack = ack_dynirq, |
9685 | .ack = ack_dynirq, |
9686 | .end = end_dynirq, |
9687 | #ifdef CONFIG_SMP |
9688 | @@ -820,12 +812,12 @@ |
9689 | irq_info[irq] = IRQ_UNBOUND; |
9690 | } |
9691 | |
9692 | -static void enable_pirq(unsigned int irq) |
9693 | +static void unmask_pirq(unsigned int irq) |
9694 | { |
9695 | startup_pirq(irq); |
9696 | } |
9697 | |
9698 | -static void disable_pirq(unsigned int irq) |
9699 | +static void mask_pirq(unsigned int irq) |
9700 | { |
9701 | } |
9702 | |
9703 | @@ -854,12 +846,14 @@ |
9704 | } |
9705 | } |
9706 | |
9707 | -static struct hw_interrupt_type pirq_type = { |
9708 | +static struct irq_chip pirq_chip = { |
9709 | + .name = "Phys-irq", |
9710 | .typename = "Phys-irq", |
9711 | .startup = startup_pirq, |
9712 | .shutdown = shutdown_pirq, |
9713 | - .enable = enable_pirq, |
9714 | - .disable = disable_pirq, |
9715 | + .mask = mask_pirq, |
9716 | + .unmask = unmask_pirq, |
9717 | + .mask_ack = ack_pirq, |
9718 | .ack = ack_pirq, |
9719 | .end = end_pirq, |
9720 | #ifdef CONFIG_SMP |
9721 | @@ -1043,7 +1037,8 @@ |
9722 | irq_desc[dynirq_to_irq(i)].status = IRQ_DISABLED; |
9723 | irq_desc[dynirq_to_irq(i)].action = NULL; |
9724 | irq_desc[dynirq_to_irq(i)].depth = 1; |
9725 | - irq_desc[dynirq_to_irq(i)].chip = &dynirq_type; |
9726 | + set_irq_chip_and_handler_name(dynirq_to_irq(i), &dynirq_chip, |
9727 | + handle_level_irq, "level"); |
9728 | } |
9729 | |
9730 | /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */ |
9731 | @@ -1059,6 +1054,7 @@ |
9732 | irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED; |
9733 | irq_desc[pirq_to_irq(i)].action = NULL; |
9734 | irq_desc[pirq_to_irq(i)].depth = 1; |
9735 | - irq_desc[pirq_to_irq(i)].chip = &pirq_type; |
9736 | + set_irq_chip_and_handler_name(pirq_to_irq(i), &pirq_chip, |
9737 | + handle_level_irq, "level"); |
9738 | } |
9739 | } |
9740 | --- a/drivers/xen/core/reboot.c |
9741 | +++ b/drivers/xen/core/reboot.c |
9742 | @@ -13,6 +13,7 @@ |
9743 | |
9744 | #ifdef HAVE_XEN_PLATFORM_COMPAT_H |
9745 | #include <xen/platform-compat.h> |
9746 | +#undef handle_sysrq |
9747 | #endif |
9748 | |
9749 | MODULE_LICENSE("Dual BSD/GPL"); |
9750 | @@ -203,7 +204,7 @@ |
9751 | |
9752 | #ifdef CONFIG_MAGIC_SYSRQ |
9753 | if (sysrq_key != '\0') |
9754 | - handle_sysrq(sysrq_key, NULL, NULL); |
9755 | + handle_sysrq(sysrq_key, NULL); |
9756 | #endif |
9757 | } |
9758 | |
9759 | --- a/drivers/xen/core/smpboot.c |
9760 | +++ b/drivers/xen/core/smpboot.c |
9761 | @@ -25,8 +25,8 @@ |
9762 | #include <xen/cpu_hotplug.h> |
9763 | #include <xen/xenbus.h> |
9764 | |
9765 | -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); |
9766 | -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); |
9767 | +extern irqreturn_t smp_reschedule_interrupt(int, void *); |
9768 | +extern irqreturn_t smp_call_function_interrupt(int, void *); |
9769 | |
9770 | extern int local_setup_timer(unsigned int cpu); |
9771 | extern void local_teardown_timer(unsigned int cpu); |
9772 | @@ -66,8 +66,6 @@ |
9773 | #if defined(__i386__) |
9774 | u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; |
9775 | EXPORT_SYMBOL(x86_cpu_to_apicid); |
9776 | -#elif !defined(CONFIG_X86_IO_APIC) |
9777 | -unsigned int maxcpus = NR_CPUS; |
9778 | #endif |
9779 | |
9780 | void __init prefill_possible_map(void) |
9781 | --- a/drivers/xen/fbfront/xenfb.c |
9782 | +++ b/drivers/xen/fbfront/xenfb.c |
9783 | @@ -523,8 +523,7 @@ |
9784 | .fb_set_par = xenfb_set_par, |
9785 | }; |
9786 | |
9787 | -static irqreturn_t xenfb_event_handler(int rq, void *dev_id, |
9788 | - struct pt_regs *regs) |
9789 | +static irqreturn_t xenfb_event_handler(int rq, void *dev_id) |
9790 | { |
9791 | /* |
9792 | * No in events recognized, simply ignore them all. |
9793 | --- a/drivers/xen/fbfront/xenkbd.c |
9794 | +++ b/drivers/xen/fbfront/xenkbd.c |
9795 | @@ -46,7 +46,7 @@ |
9796 | * to do that. |
9797 | */ |
9798 | |
9799 | -static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs) |
9800 | +static irqreturn_t input_handler(int rq, void *dev_id) |
9801 | { |
9802 | struct xenkbd_info *info = dev_id; |
9803 | struct xenkbd_page *page = info->page; |
9804 | --- a/drivers/xen/gntdev/gntdev.c |
9805 | +++ b/drivers/xen/gntdev/gntdev.c |
9806 | @@ -755,9 +755,6 @@ |
9807 | BUG(); |
9808 | } |
9809 | |
9810 | - /* Copy the existing value of the PTE for returning. */ |
9811 | - copy = *ptep; |
9812 | - |
9813 | /* Calculate the grant relating to this PTE. */ |
9814 | slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); |
9815 | |
9816 | @@ -772,6 +769,10 @@ |
9817 | GNTDEV_INVALID_HANDLE && |
9818 | !xen_feature(XENFEAT_auto_translated_physmap)) { |
9819 | /* NOT USING SHADOW PAGE TABLES. */ |
9820 | + |
9821 | + /* Copy the existing value of the PTE for returning. */ |
9822 | + copy = *ptep; |
9823 | + |
9824 | gnttab_set_unmap_op(&op, virt_to_machine(ptep), |
9825 | GNTMAP_contains_pte, |
9826 | private_data->grants[slot_index] |
9827 | @@ -784,7 +785,7 @@ |
9828 | op.status); |
9829 | } else { |
9830 | /* USING SHADOW PAGE TABLES. */ |
9831 | - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm); |
9832 | + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm); |
9833 | } |
9834 | |
9835 | /* Finally, we unmap the grant from kernel space. */ |
9836 | @@ -812,7 +813,7 @@ |
9837 | >> PAGE_SHIFT, INVALID_P2M_ENTRY); |
9838 | |
9839 | } else { |
9840 | - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm); |
9841 | + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm); |
9842 | } |
9843 | |
9844 | return copy; |
9845 | --- a/drivers/xen/netback/accel.c |
9846 | +++ b/drivers/xen/netback/accel.c |
9847 | @@ -65,7 +65,7 @@ |
9848 | |
9849 | if (IS_ERR(eth_name)) { |
9850 | /* Probably means not present */ |
9851 | - DPRINTK("%s: no match due to xenbus_read accel error %d\n", |
9852 | + DPRINTK("%s: no match due to xenbus_read accel error %ld\n", |
9853 | __FUNCTION__, PTR_ERR(eth_name)); |
9854 | return 0; |
9855 | } else { |
9856 | --- a/drivers/xen/netback/common.h |
9857 | +++ b/drivers/xen/netback/common.h |
9858 | @@ -200,7 +200,7 @@ |
9859 | |
9860 | int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); |
9861 | struct net_device_stats *netif_be_get_stats(struct net_device *dev); |
9862 | -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); |
9863 | +irqreturn_t netif_be_int(int irq, void *dev_id); |
9864 | |
9865 | static inline int netbk_can_queue(struct net_device *dev) |
9866 | { |
9867 | --- a/drivers/xen/netback/loopback.c |
9868 | +++ b/drivers/xen/netback/loopback.c |
9869 | @@ -151,7 +151,7 @@ |
9870 | np->stats.rx_bytes += skb->len; |
9871 | np->stats.rx_packets++; |
9872 | |
9873 | - if (skb->ip_summed == CHECKSUM_HW) { |
9874 | + if (skb->ip_summed == CHECKSUM_PARTIAL) { |
9875 | /* Defer checksum calculation. */ |
9876 | skb->proto_csum_blank = 1; |
9877 | /* Must be a local packet: assert its integrity. */ |
9878 | --- a/drivers/xen/netback/netback.c |
9879 | +++ b/drivers/xen/netback/netback.c |
9880 | @@ -677,7 +677,7 @@ |
9881 | id = meta[npo.meta_cons].id; |
9882 | flags = nr_frags ? NETRXF_more_data : 0; |
9883 | |
9884 | - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ |
9885 | + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ |
9886 | flags |= NETRXF_csum_blank | NETRXF_data_validated; |
9887 | else if (skb->proto_data_valid) /* remote but checksummed? */ |
9888 | flags |= NETRXF_data_validated; |
9889 | @@ -1441,7 +1441,7 @@ |
9890 | netif_idx_release(netif_page_index(page)); |
9891 | } |
9892 | |
9893 | -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) |
9894 | +irqreturn_t netif_be_int(int irq, void *dev_id) |
9895 | { |
9896 | netif_t *netif = dev_id; |
9897 | |
9898 | @@ -1508,7 +1508,7 @@ |
9899 | } |
9900 | |
9901 | #ifdef NETBE_DEBUG_INTERRUPT |
9902 | -static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) |
9903 | +static irqreturn_t netif_be_dbg(int irq, void *dev_id) |
9904 | { |
9905 | struct list_head *ent; |
9906 | netif_t *netif; |
9907 | --- a/drivers/xen/netfront/netfront.c |
9908 | +++ b/drivers/xen/netfront/netfront.c |
9909 | @@ -136,7 +136,7 @@ |
9910 | { |
9911 | return skb_is_gso(skb) && |
9912 | (!skb_gso_ok(skb, dev->features) || |
9913 | - unlikely(skb->ip_summed != CHECKSUM_HW)); |
9914 | + unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); |
9915 | } |
9916 | #else |
9917 | #define HAVE_GSO 0 |
9918 | @@ -222,7 +222,7 @@ |
9919 | static void network_alloc_rx_buffers(struct net_device *); |
9920 | static void send_fake_arp(struct net_device *); |
9921 | |
9922 | -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs); |
9923 | +static irqreturn_t netif_int(int irq, void *dev_id); |
9924 | |
9925 | #ifdef CONFIG_SYSFS |
9926 | static int xennet_sysfs_addif(struct net_device *netdev); |
9927 | @@ -992,7 +992,7 @@ |
9928 | tx->flags = 0; |
9929 | extra = NULL; |
9930 | |
9931 | - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ |
9932 | + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ |
9933 | tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; |
9934 | #ifdef CONFIG_XEN |
9935 | if (skb->proto_data_valid) /* remote but checksummed? */ |
9936 | @@ -1049,7 +1049,7 @@ |
9937 | return 0; |
9938 | } |
9939 | |
9940 | -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs) |
9941 | +static irqreturn_t netif_int(int irq, void *dev_id) |
9942 | { |
9943 | struct net_device *dev = dev_id; |
9944 | struct netfront_info *np = netdev_priv(dev); |
9945 | --- a/drivers/xen/pciback/pciback.h |
9946 | +++ b/drivers/xen/pciback/pciback.h |
9947 | @@ -87,7 +87,7 @@ |
9948 | void pciback_release_devices(struct pciback_device *pdev); |
9949 | |
9950 | /* Handles events from front-end */ |
9951 | -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs); |
9952 | +irqreturn_t pciback_handle_event(int irq, void *dev_id); |
9953 | void pciback_do_op(void *data); |
9954 | |
9955 | int pciback_xenbus_register(void); |
9956 | --- a/drivers/xen/pciback/pciback_ops.c |
9957 | +++ b/drivers/xen/pciback/pciback_ops.c |
9958 | @@ -85,7 +85,7 @@ |
9959 | test_and_schedule_op(pdev); |
9960 | } |
9961 | |
9962 | -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs) |
9963 | +irqreturn_t pciback_handle_event(int irq, void *dev_id) |
9964 | { |
9965 | struct pciback_device *pdev = dev_id; |
9966 | |
9967 | --- a/drivers/xen/pcifront/pci_op.c |
9968 | +++ b/drivers/xen/pcifront/pci_op.c |
9969 | @@ -392,10 +392,16 @@ |
9970 | |
9971 | d = pci_scan_single_device(b, devfn); |
9972 | if (d) { |
9973 | + int err; |
9974 | + |
9975 | dev_info(&pdev->xdev->dev, "New device on " |
9976 | "%04x:%02x:%02x.%02x found.\n", domain, bus, |
9977 | PCI_SLOT(devfn), PCI_FUNC(devfn)); |
9978 | - pci_bus_add_device(d); |
9979 | + err = pci_bus_add_device(d); |
9980 | + if (err) |
9981 | + dev_err(&pdev->xdev->dev, |
9982 | + "error %d adding device, continuing.\n", |
9983 | + err); |
9984 | } |
9985 | } |
9986 | |
9987 | --- a/drivers/xen/privcmd/compat_privcmd.c |
9988 | +++ b/drivers/xen/privcmd/compat_privcmd.c |
9989 | @@ -18,7 +18,6 @@ |
9990 | * Authors: Jimi Xenidis <jimix@watson.ibm.com> |
9991 | */ |
9992 | |
9993 | -#include <linux/config.h> |
9994 | #include <linux/compat.h> |
9995 | #include <linux/ioctl.h> |
9996 | #include <linux/syscalls.h> |
9997 | --- a/drivers/xen/privcmd/privcmd.c |
9998 | +++ b/drivers/xen/privcmd/privcmd.c |
9999 | @@ -236,7 +236,7 @@ |
10000 | #endif |
10001 | |
10002 | /* DONTCOPY is essential for Xen as copy_page_range is broken. */ |
10003 | - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; |
10004 | + vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY; |
10005 | vma->vm_ops = &privcmd_vm_ops; |
10006 | vma->vm_private_data = NULL; |
10007 | |
10008 | --- a/drivers/xen/sfc_netback/accel_xenbus.c |
10009 | +++ b/drivers/xen/sfc_netback/accel_xenbus.c |
10010 | @@ -68,8 +68,7 @@ |
10011 | |
10012 | |
10013 | /* Demultiplex a message IRQ from the frontend driver. */ |
10014 | -static irqreturn_t msgirq_from_frontend(int irq, void *context, |
10015 | - struct pt_regs *unused) |
10016 | +static irqreturn_t msgirq_from_frontend(int irq, void *context) |
10017 | { |
10018 | struct xenbus_device *dev = context; |
10019 | struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev); |
10020 | @@ -84,8 +83,7 @@ |
10021 | * functionally, but we need it to pass to the bind function, and may |
10022 | * get called spuriously |
10023 | */ |
10024 | -static irqreturn_t netirq_from_frontend(int irq, void *context, |
10025 | - struct pt_regs *unused) |
10026 | +static irqreturn_t netirq_from_frontend(int irq, void *context) |
10027 | { |
10028 | VPRINTK("netirq %d from device %s\n", irq, |
10029 | ((struct xenbus_device *)context)->nodename); |
10030 | --- a/drivers/xen/sfc_netfront/accel.h |
10031 | +++ b/drivers/xen/sfc_netfront/accel.h |
10032 | @@ -449,10 +449,8 @@ |
10033 | u32 ip, u16 port, u8 protocol); |
10034 | |
10035 | /* Process an IRQ received from back end driver */ |
10036 | -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, |
10037 | - struct pt_regs *unused); |
10038 | -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, |
10039 | - struct pt_regs *unused); |
10040 | +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context); |
10041 | +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context); |
10042 | |
10043 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) |
10044 | extern void netfront_accel_msg_from_bend(struct work_struct *context); |
10045 | --- a/drivers/xen/sfc_netfront/accel_msg.c |
10046 | +++ b/drivers/xen/sfc_netfront/accel_msg.c |
10047 | @@ -490,8 +490,7 @@ |
10048 | } |
10049 | |
10050 | |
10051 | -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, |
10052 | - struct pt_regs *unused) |
10053 | +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context) |
10054 | { |
10055 | netfront_accel_vnic *vnic = (netfront_accel_vnic *)context; |
10056 | VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename); |
10057 | @@ -502,8 +501,7 @@ |
10058 | } |
10059 | |
10060 | /* Process an interrupt received from the NIC via backend */ |
10061 | -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, |
10062 | - struct pt_regs *unused) |
10063 | +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context) |
10064 | { |
10065 | netfront_accel_vnic *vnic = (netfront_accel_vnic *)context; |
10066 | struct net_device *net_dev = vnic->net_dev; |
10067 | --- a/drivers/xen/sfc_netfront/accel_tso.c |
10068 | +++ b/drivers/xen/sfc_netfront/accel_tso.c |
10069 | @@ -363,7 +363,7 @@ |
10070 | |
10071 | tso_check_safe(skb); |
10072 | |
10073 | - if (skb->ip_summed != CHECKSUM_HW) |
10074 | + if (skb->ip_summed != CHECKSUM_PARTIAL) |
10075 | EPRINTK("Trying to TSO send a packet without HW checksum\n"); |
10076 | |
10077 | tso_start(&state, skb); |
10078 | --- a/drivers/xen/sfc_netfront/accel_vi.c |
10079 | +++ b/drivers/xen/sfc_netfront/accel_vi.c |
10080 | @@ -461,7 +461,7 @@ |
10081 | |
10082 | frag_i = -1; |
10083 | |
10084 | - if (skb->ip_summed == CHECKSUM_HW) { |
10085 | + if (skb->ip_summed == CHECKSUM_PARTIAL) { |
10086 | /* Set to zero to encourage falcon to work it out for us */ |
10087 | *(u16*)(skb->h.raw + skb->csum) = 0; |
10088 | } |
10089 | @@ -580,7 +580,7 @@ |
10090 | |
10091 | kva = buf->pkt_kva; |
10092 | |
10093 | - if (skb->ip_summed == CHECKSUM_HW) { |
10094 | + if (skb->ip_summed == CHECKSUM_PARTIAL) { |
10095 | /* Set to zero to encourage falcon to work it out for us */ |
10096 | *(u16*)(skb->h.raw + skb->csum) = 0; |
10097 | } |
10098 | --- a/drivers/xen/tpmback/common.h |
10099 | +++ b/drivers/xen/tpmback/common.h |
10100 | @@ -61,7 +61,7 @@ |
10101 | void tpmif_xenbus_init(void); |
10102 | void tpmif_xenbus_exit(void); |
10103 | int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn); |
10104 | -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs); |
10105 | +irqreturn_t tpmif_be_int(int irq, void *dev_id); |
10106 | |
10107 | long int tpmback_get_instance(struct backend_info *bi); |
10108 | |
10109 | --- a/drivers/xen/tpmback/tpmback.c |
10110 | +++ b/drivers/xen/tpmback/tpmback.c |
10111 | @@ -502,7 +502,7 @@ |
10112 | list_del(&pak->next); |
10113 | write_unlock_irqrestore(&dataex.pak_lock, flags); |
10114 | |
10115 | - DPRINTK("size given by app: %d, available: %d\n", size, left); |
10116 | + DPRINTK("size given by app: %zu, available: %u\n", size, left); |
10117 | |
10118 | ret_size = min_t(size_t, size, left); |
10119 | |
10120 | @@ -899,7 +899,7 @@ |
10121 | } |
10122 | } |
10123 | |
10124 | -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs) |
10125 | +irqreturn_t tpmif_be_int(int irq, void *dev_id) |
10126 | { |
10127 | tpmif_t *tpmif = (tpmif_t *) dev_id; |
10128 | |
10129 | --- a/drivers/xen/xenbus/xenbus_comms.c |
10130 | +++ b/drivers/xen/xenbus/xenbus_comms.c |
10131 | @@ -55,7 +55,7 @@ |
10132 | |
10133 | static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); |
10134 | |
10135 | -static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs) |
10136 | +static irqreturn_t wake_waiting(int irq, void *unused) |
10137 | { |
10138 | if (unlikely(xenstored_ready == 0)) { |
10139 | xenstored_ready = 1; |
10140 | --- a/drivers/xen/xenoprof/xenoprofile.c |
10141 | +++ b/drivers/xen/xenoprof/xenoprofile.c |
10142 | @@ -195,7 +195,7 @@ |
10143 | } |
10144 | |
10145 | static irqreturn_t |
10146 | -xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs) |
10147 | +xenoprof_ovf_interrupt(int irq, void * dev_id) |
10148 | { |
10149 | struct xenoprof_buf * buf; |
10150 | static unsigned long flag; |
10151 | --- a/include/asm-generic/pgtable.h |
10152 | +++ b/include/asm-generic/pgtable.h |
10153 | @@ -100,7 +100,7 @@ |
10154 | #endif |
10155 | |
10156 | #ifndef arch_change_pte_range |
10157 | -#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0 |
10158 | +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0 |
10159 | #endif |
10160 | |
10161 | #ifndef __HAVE_ARCH_PTE_SAME |
10162 | --- a/include/asm-x86/mach-xen/asm/desc_32.h |
10163 | +++ b/include/asm-x86/mach-xen/asm/desc_32.h |
10164 | @@ -32,52 +32,110 @@ |
10165 | return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; |
10166 | } |
10167 | |
10168 | +/* |
10169 | + * This is the ldt that every process will get unless we need |
10170 | + * something other than this. |
10171 | + */ |
10172 | +extern struct desc_struct default_ldt[]; |
10173 | +extern struct desc_struct idt_table[]; |
10174 | +extern void set_intr_gate(unsigned int irq, void * addr); |
10175 | + |
10176 | +static inline void pack_descriptor(__u32 *a, __u32 *b, |
10177 | + unsigned long base, unsigned long limit, unsigned char type, unsigned char flags) |
10178 | +{ |
10179 | + *a = ((base & 0xffff) << 16) | (limit & 0xffff); |
10180 | + *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | |
10181 | + (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20); |
10182 | +} |
10183 | + |
10184 | +static inline void pack_gate(__u32 *a, __u32 *b, |
10185 | + unsigned long base, unsigned short seg, unsigned char type, unsigned char flags) |
10186 | +{ |
10187 | + *a = (seg << 16) | (base & 0xffff); |
10188 | + *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff); |
10189 | +} |
10190 | + |
10191 | +#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */ |
10192 | +#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */ |
10193 | +#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */ |
10194 | +#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */ |
10195 | +#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */ |
10196 | +#define DESCTYPE_DPL3 0x60 /* DPL-3 */ |
10197 | +#define DESCTYPE_S 0x10 /* !system */ |
10198 | + |
10199 | #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) |
10200 | #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) |
10201 | |
10202 | #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) |
10203 | #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) |
10204 | -#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr)) |
10205 | -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt)) |
10206 | +#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) |
10207 | +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) |
10208 | |
10209 | #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) |
10210 | #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) |
10211 | -#define store_tr(tr) __asm__ ("str %0":"=mr" (tr)) |
10212 | -#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt)) |
10213 | +#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) |
10214 | +#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) |
10215 | |
10216 | -/* |
10217 | - * This is the ldt that every process will get unless we need |
10218 | - * something other than this. |
10219 | - */ |
10220 | -extern struct desc_struct default_ldt[]; |
10221 | -extern void set_intr_gate(unsigned int irq, void * addr); |
10222 | +#if TLS_SIZE != 24 |
10223 | +# error update this code. |
10224 | +#endif |
10225 | + |
10226 | +static inline void load_TLS(struct thread_struct *t, unsigned int cpu) |
10227 | +{ |
10228 | +#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \ |
10229 | + *(u64 *)&t->tls_array[i]) \ |
10230 | + BUG() |
10231 | + C(0); C(1); C(2); |
10232 | +#undef C |
10233 | +} |
10234 | |
10235 | -#define _set_tssldt_desc(n,addr,limit,type) \ |
10236 | -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ |
10237 | - "movw %w1,2(%2)\n\t" \ |
10238 | - "rorl $16,%1\n\t" \ |
10239 | - "movb %b1,4(%2)\n\t" \ |
10240 | - "movb %4,5(%2)\n\t" \ |
10241 | - "movb $0,6(%2)\n\t" \ |
10242 | - "movb %h1,7(%2)\n\t" \ |
10243 | - "rorl $16,%1" \ |
10244 | - : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type)) |
10245 | +#ifndef CONFIG_XEN |
10246 | +static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) |
10247 | +{ |
10248 | + __u32 *lp = (__u32 *)((char *)dt + entry*8); |
10249 | + *lp = entry_a; |
10250 | + *(lp+1) = entry_b; |
10251 | +} |
10252 | |
10253 | -#ifndef CONFIG_X86_NO_TSS |
10254 | -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr) |
10255 | +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
10256 | +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
10257 | +#else |
10258 | +extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); |
10259 | +extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); |
10260 | +#endif |
10261 | +#ifndef CONFIG_X86_NO_IDT |
10262 | +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
10263 | + |
10264 | +static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) |
10265 | { |
10266 | - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr, |
10267 | - offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89); |
10268 | + __u32 a, b; |
10269 | + pack_gate(&a, &b, (unsigned long)addr, seg, type, 0); |
10270 | + write_idt_entry(idt_table, gate, a, b); |
10271 | } |
10272 | +#endif |
10273 | |
10274 | -#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) |
10275 | +#ifndef CONFIG_X86_NO_TSS |
10276 | +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr) |
10277 | +{ |
10278 | + __u32 a, b; |
10279 | + pack_descriptor(&a, &b, (unsigned long)addr, |
10280 | + offsetof(struct tss_struct, __cacheline_filler) - 1, |
10281 | + DESCTYPE_TSS, 0); |
10282 | + write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); |
10283 | +} |
10284 | #endif |
10285 | |
10286 | -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) |
10287 | +static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) |
10288 | { |
10289 | - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); |
10290 | + __u32 a, b; |
10291 | + pack_descriptor(&a, &b, (unsigned long)addr, |
10292 | + entries * sizeof(struct desc_struct) - 1, |
10293 | + DESCTYPE_LDT, 0); |
10294 | + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); |
10295 | } |
10296 | |
10297 | +#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) |
10298 | + |
10299 | #define LDT_entry_a(info) \ |
10300 | ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) |
10301 | |
10302 | @@ -103,21 +161,6 @@ |
10303 | (info)->seg_not_present == 1 && \ |
10304 | (info)->useable == 0 ) |
10305 | |
10306 | -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); |
10307 | - |
10308 | -#if TLS_SIZE != 24 |
10309 | -# error update this code. |
10310 | -#endif |
10311 | - |
10312 | -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) |
10313 | -{ |
10314 | -#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \ |
10315 | - *(u64 *)&t->tls_array[i])) \ |
10316 | - BUG(); |
10317 | - C(0); C(1); C(2); |
10318 | -#undef C |
10319 | -} |
10320 | - |
10321 | static inline void clear_LDT(void) |
10322 | { |
10323 | int cpu = get_cpu(); |
10324 | --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h |
10325 | +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h |
10326 | @@ -55,13 +55,6 @@ |
10327 | extern struct dma_mapping_ops* dma_ops; |
10328 | extern int iommu_merge; |
10329 | |
10330 | -static inline int valid_dma_direction(int dma_direction) |
10331 | -{ |
10332 | - return ((dma_direction == DMA_BIDIRECTIONAL) || |
10333 | - (dma_direction == DMA_TO_DEVICE) || |
10334 | - (dma_direction == DMA_FROM_DEVICE)); |
10335 | -} |
10336 | - |
10337 | #if 0 |
10338 | static inline int dma_mapping_error(dma_addr_t dma_addr) |
10339 | { |
10340 | --- a/include/asm-x86/mach-xen/asm/e820_64.h |
10341 | +++ b/include/asm-x86/mach-xen/asm/e820_64.h |
10342 | @@ -19,13 +19,9 @@ |
10343 | |
10344 | #define E820_RAM 1 |
10345 | #define E820_RESERVED 2 |
10346 | -#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ |
10347 | +#define E820_ACPI 3 |
10348 | #define E820_NVS 4 |
10349 | |
10350 | -#define HIGH_MEMORY (1024*1024) |
10351 | - |
10352 | -#define LOWMEMSIZE() (0x9f000) |
10353 | - |
10354 | #ifndef __ASSEMBLY__ |
10355 | struct e820entry { |
10356 | u64 addr; /* start of memory segment */ |
10357 | @@ -46,17 +42,16 @@ |
10358 | extern void contig_e820_setup(void); |
10359 | extern unsigned long e820_end_of_ram(void); |
10360 | extern void e820_reserve_resources(struct e820entry *e820, int nr_map); |
10361 | +extern void e820_mark_nosave_regions(void); |
10362 | extern void e820_print_map(char *who); |
10363 | extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); |
10364 | extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); |
10365 | |
10366 | -extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end); |
10367 | extern void e820_setup_gap(struct e820entry *e820, int nr_map); |
10368 | -extern unsigned long e820_hole_size(unsigned long start_pfn, |
10369 | - unsigned long end_pfn); |
10370 | +extern void e820_register_active_regions(int nid, |
10371 | + unsigned long start_pfn, unsigned long end_pfn); |
10372 | |
10373 | -extern void __init parse_memopt(char *p, char **end); |
10374 | -extern void __init parse_memmapopt(char *p, char **end); |
10375 | +extern void finish_e820_parsing(void); |
10376 | |
10377 | extern struct e820map e820; |
10378 | |
10379 | --- a/include/asm-x86/mach-xen/asm/fixmap_32.h |
10380 | +++ b/include/asm-x86/mach-xen/asm/fixmap_32.h |
10381 | @@ -55,7 +55,7 @@ |
10382 | #ifdef CONFIG_X86_LOCAL_APIC |
10383 | FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ |
10384 | #endif |
10385 | -#ifdef CONFIG_X86_IO_APIC |
10386 | +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN) |
10387 | FIX_IO_APIC_BASE_0, |
10388 | FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, |
10389 | #endif |
10390 | @@ -95,10 +95,9 @@ |
10391 | __end_of_fixed_addresses |
10392 | }; |
10393 | |
10394 | -extern void set_fixaddr_top(unsigned long top); |
10395 | - |
10396 | extern void __set_fixmap(enum fixed_addresses idx, |
10397 | maddr_t phys, pgprot_t flags); |
10398 | +extern void reserve_top_address(unsigned long reserve); |
10399 | |
10400 | #define set_fixmap(idx, phys) \ |
10401 | __set_fixmap(idx, phys, PAGE_KERNEL) |
10402 | --- a/include/asm-x86/mach-xen/asm/fixmap_64.h |
10403 | +++ b/include/asm-x86/mach-xen/asm/fixmap_64.h |
10404 | @@ -41,7 +41,7 @@ |
10405 | #ifdef CONFIG_X86_LOCAL_APIC |
10406 | FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ |
10407 | #endif |
10408 | -#ifdef CONFIG_X86_IO_APIC |
10409 | +#ifndef CONFIG_XEN |
10410 | FIX_IO_APIC_BASE_0, |
10411 | FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, |
10412 | #endif |
10413 | --- a/include/asm-x86/mach-xen/asm/hw_irq_32.h |
10414 | +++ b/include/asm-x86/mach-xen/asm/hw_irq_32.h |
10415 | @@ -17,8 +17,6 @@ |
10416 | #include <asm/irq.h> |
10417 | #include <asm/sections.h> |
10418 | |
10419 | -struct hw_interrupt_type; |
10420 | - |
10421 | #define NMI_VECTOR 0x02 |
10422 | |
10423 | /* |
10424 | @@ -28,10 +26,6 @@ |
10425 | * Interrupt entry/exit code at both C and assembly level |
10426 | */ |
10427 | |
10428 | -extern u8 irq_vector[NR_IRQ_VECTORS]; |
10429 | -#define IO_APIC_VECTOR(irq) (irq_vector[irq]) |
10430 | -#define AUTO_ASSIGN -1 |
10431 | - |
10432 | extern void (*interrupt[NR_IRQS])(void); |
10433 | |
10434 | #ifdef CONFIG_SMP |
10435 | @@ -44,7 +38,7 @@ |
10436 | fastcall void apic_timer_interrupt(void); |
10437 | fastcall void error_interrupt(void); |
10438 | fastcall void spurious_interrupt(void); |
10439 | -fastcall void thermal_interrupt(struct pt_regs *); |
10440 | +fastcall void thermal_interrupt(void); |
10441 | #define platform_legacy_irq(irq) ((irq) < 16) |
10442 | #endif |
10443 | |
10444 | --- a/include/asm-x86/mach-xen/asm/hw_irq_64.h |
10445 | +++ b/include/asm-x86/mach-xen/asm/hw_irq_64.h |
10446 | @@ -19,8 +19,7 @@ |
10447 | #include <asm/irq.h> |
10448 | #include <linux/profile.h> |
10449 | #include <linux/smp.h> |
10450 | - |
10451 | -struct hw_interrupt_type; |
10452 | +#include <linux/percpu.h> |
10453 | #endif |
10454 | |
10455 | #define NMI_VECTOR 0x02 |
10456 | @@ -77,9 +76,10 @@ |
10457 | |
10458 | |
10459 | #ifndef __ASSEMBLY__ |
10460 | -extern u8 irq_vector[NR_IRQ_VECTORS]; |
10461 | -#define IO_APIC_VECTOR(irq) (irq_vector[irq]) |
10462 | -#define AUTO_ASSIGN -1 |
10463 | +typedef int vector_irq_t[NR_VECTORS]; |
10464 | +DECLARE_PER_CPU(vector_irq_t, vector_irq); |
10465 | +extern void __setup_vector_irq(int cpu); |
10466 | +extern spinlock_t vector_lock; |
10467 | |
10468 | /* |
10469 | * Various low-level irq details needed by irq.c, process.c, |
10470 | --- a/include/asm-x86/mach-xen/asm/io_32.h |
10471 | +++ b/include/asm-x86/mach-xen/asm/io_32.h |
10472 | @@ -237,33 +237,6 @@ |
10473 | |
10474 | #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d)) |
10475 | |
10476 | -/** |
10477 | - * check_signature - find BIOS signatures |
10478 | - * @io_addr: mmio address to check |
10479 | - * @signature: signature block |
10480 | - * @length: length of signature |
10481 | - * |
10482 | - * Perform a signature comparison with the mmio address io_addr. This |
10483 | - * address should have been obtained by ioremap. |
10484 | - * Returns 1 on a match. |
10485 | - */ |
10486 | - |
10487 | -static inline int check_signature(volatile void __iomem * io_addr, |
10488 | - const unsigned char *signature, int length) |
10489 | -{ |
10490 | - int retval = 0; |
10491 | - do { |
10492 | - if (readb(io_addr) != *signature) |
10493 | - goto out; |
10494 | - io_addr++; |
10495 | - signature++; |
10496 | - length--; |
10497 | - } while (length); |
10498 | - retval = 1; |
10499 | -out: |
10500 | - return retval; |
10501 | -} |
10502 | - |
10503 | /* |
10504 | * Cache management |
10505 | * |
10506 | --- a/include/asm-x86/mach-xen/asm/io_64.h |
10507 | +++ b/include/asm-x86/mach-xen/asm/io_64.h |
10508 | @@ -273,33 +273,6 @@ |
10509 | |
10510 | #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d)) |
10511 | |
10512 | -/** |
10513 | - * check_signature - find BIOS signatures |
10514 | - * @io_addr: mmio address to check |
10515 | - * @signature: signature block |
10516 | - * @length: length of signature |
10517 | - * |
10518 | - * Perform a signature comparison with the mmio address io_addr. This |
10519 | - * address should have been obtained by ioremap. |
10520 | - * Returns 1 on a match. |
10521 | - */ |
10522 | - |
10523 | -static inline int check_signature(void __iomem *io_addr, |
10524 | - const unsigned char *signature, int length) |
10525 | -{ |
10526 | - int retval = 0; |
10527 | - do { |
10528 | - if (readb(io_addr) != *signature) |
10529 | - goto out; |
10530 | - io_addr++; |
10531 | - signature++; |
10532 | - length--; |
10533 | - } while (length); |
10534 | - retval = 1; |
10535 | -out: |
10536 | - return retval; |
10537 | -} |
10538 | - |
10539 | /* Nothing to do */ |
10540 | |
10541 | #define dma_cache_inv(_start,_size) do { } while (0) |
10542 | --- a/include/asm-x86/mach-xen/asm/pgtable-2level.h |
10543 | +++ b/include/asm-x86/mach-xen/asm/pgtable-2level.h |
10544 | @@ -23,14 +23,6 @@ |
10545 | set_pte((ptep), (pteval)); \ |
10546 | } while (0) |
10547 | |
10548 | -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \ |
10549 | - if (((_mm) != current->mm && (_mm) != &init_mm) || \ |
10550 | - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \ |
10551 | - set_pte((ptep), (pteval)); \ |
10552 | - xen_invlpg((addr)); \ |
10553 | - } \ |
10554 | -} while (0) |
10555 | - |
10556 | #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) |
10557 | |
10558 | #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) |
10559 | @@ -40,6 +32,7 @@ |
10560 | |
10561 | #define pte_none(x) (!(x).pte_low) |
10562 | |
10563 | +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
10564 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
10565 | { |
10566 | pte_t pte = *ptep; |
10567 | @@ -51,6 +44,7 @@ |
10568 | return pte; |
10569 | } |
10570 | |
10571 | +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
10572 | #define ptep_clear_flush(vma, addr, ptep) \ |
10573 | ({ \ |
10574 | pte_t *__ptep = (ptep); \ |
10575 | @@ -66,8 +60,6 @@ |
10576 | __res; \ |
10577 | }) |
10578 | |
10579 | -#define pte_same(a, b) ((a).pte_low == (b).pte_low) |
10580 | - |
10581 | #define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) |
10582 | #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ |
10583 | __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) |
10584 | --- a/include/asm-x86/mach-xen/asm/pgtable-3level.h |
10585 | +++ b/include/asm-x86/mach-xen/asm/pgtable-3level.h |
10586 | @@ -53,7 +53,6 @@ |
10587 | * not possible, use pte_get_and_clear to obtain the old pte |
10588 | * value and then use set_pte to update it. -ben |
10589 | */ |
10590 | -#define __HAVE_ARCH_SET_PTE_ATOMIC |
10591 | |
10592 | static inline void set_pte(pte_t *ptep, pte_t pte) |
10593 | { |
10594 | @@ -70,14 +69,6 @@ |
10595 | set_pte((ptep), (pteval)); \ |
10596 | } while (0) |
10597 | |
10598 | -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \ |
10599 | - if (((_mm) != current->mm && (_mm) != &init_mm) || \ |
10600 | - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \ |
10601 | - set_pte((ptep), (pteval)); \ |
10602 | - xen_invlpg((addr)); \ |
10603 | - } \ |
10604 | -} while (0) |
10605 | - |
10606 | #define set_pmd(pmdptr,pmdval) \ |
10607 | xen_l2_entry_update((pmdptr), (pmdval)) |
10608 | #define set_pud(pudptr,pudval) \ |
10609 | @@ -94,7 +85,7 @@ |
10610 | #define pud_page(pud) \ |
10611 | ((struct page *) __va(pud_val(pud) & PAGE_MASK)) |
10612 | |
10613 | -#define pud_page_kernel(pud) \ |
10614 | +#define pud_page_vaddr(pud) \ |
10615 | ((unsigned long) __va(pud_val(pud) & PAGE_MASK)) |
10616 | |
10617 | |
10618 | @@ -124,6 +115,7 @@ |
10619 | |
10620 | #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) |
10621 | |
10622 | +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
10623 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
10624 | { |
10625 | pte_t pte = *ptep; |
10626 | @@ -142,6 +134,7 @@ |
10627 | return pte; |
10628 | } |
10629 | |
10630 | +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
10631 | #define ptep_clear_flush(vma, addr, ptep) \ |
10632 | ({ \ |
10633 | pte_t *__ptep = (ptep); \ |
10634 | @@ -159,6 +152,7 @@ |
10635 | __res; \ |
10636 | }) |
10637 | |
10638 | +#define __HAVE_ARCH_PTE_SAME |
10639 | static inline int pte_same(pte_t a, pte_t b) |
10640 | { |
10641 | return a.pte_low == b.pte_low && a.pte_high == b.pte_high; |
10642 | --- a/include/asm-x86/mach-xen/asm/pgtable_32.h |
10643 | +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h |
10644 | @@ -260,31 +260,89 @@ |
10645 | # include <asm/pgtable-2level.h> |
10646 | #endif |
10647 | |
10648 | -#define ptep_test_and_clear_dirty(vma, addr, ptep) \ |
10649 | +/* |
10650 | + * Rules for using pte_update - it must be called after any PTE update which |
10651 | + * has not been done using the set_pte / clear_pte interfaces. It is used by |
10652 | + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE |
10653 | + * updates should either be sets, clears, or set_pte_atomic for P->P |
10654 | + * transitions, which means this hook should only be called for user PTEs. |
10655 | + * This hook implies a P->P protection or access change has taken place, which |
10656 | + * requires a subsequent TLB flush. The notification can optionally be delayed |
10657 | + * until the TLB flush event by using the pte_update_defer form of the |
10658 | + * interface, but care must be taken to assure that the flush happens while |
10659 | + * still holding the same page table lock so that the shadow and primary pages |
10660 | + * do not become out of sync on SMP. |
10661 | + */ |
10662 | +#define pte_update(mm, addr, ptep) do { } while (0) |
10663 | +#define pte_update_defer(mm, addr, ptep) do { } while (0) |
10664 | + |
10665 | + |
10666 | +/* |
10667 | + * We only update the dirty/accessed state if we set |
10668 | + * the dirty bit by hand in the kernel, since the hardware |
10669 | + * will do the accessed bit for us, and we don't want to |
10670 | + * race with other CPU's that might be updating the dirty |
10671 | + * bit at the same time. |
10672 | + */ |
10673 | +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
10674 | +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ |
10675 | +do { \ |
10676 | + if (dirty) \ |
10677 | + ptep_establish(vma, address, ptep, entry); \ |
10678 | +} while (0) |
10679 | + |
10680 | +/* |
10681 | + * We don't actually have these, but we want to advertise them so that |
10682 | + * we can encompass the flush here. |
10683 | + */ |
10684 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY |
10685 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
10686 | + |
10687 | +/* |
10688 | + * Rules for using ptep_establish: the pte MUST be a user pte, and |
10689 | + * must be a present->present transition. |
10690 | + */ |
10691 | +#define __HAVE_ARCH_PTEP_ESTABLISH |
10692 | +#define ptep_establish(vma, address, ptep, pteval) \ |
10693 | +do { \ |
10694 | + if ( likely((vma)->vm_mm == current->mm) ) { \ |
10695 | + BUG_ON(HYPERVISOR_update_va_mapping(address, \ |
10696 | + pteval, \ |
10697 | + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ |
10698 | + UVMF_INVLPG|UVMF_MULTI)); \ |
10699 | + } else { \ |
10700 | + xen_l1_entry_update(ptep, pteval); \ |
10701 | + flush_tlb_page(vma, address); \ |
10702 | + } \ |
10703 | +} while (0) |
10704 | + |
10705 | +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH |
10706 | +#define ptep_clear_flush_dirty(vma, address, ptep) \ |
10707 | ({ \ |
10708 | pte_t __pte = *(ptep); \ |
10709 | - int __ret = pte_dirty(__pte); \ |
10710 | - if (__ret) { \ |
10711 | - __pte = pte_mkclean(__pte); \ |
10712 | - if ((vma)->vm_mm != current->mm || \ |
10713 | - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \ |
10714 | - (ptep)->pte_low = __pte.pte_low; \ |
10715 | - } \ |
10716 | - __ret; \ |
10717 | + int __dirty = pte_dirty(__pte); \ |
10718 | + __pte = pte_mkclean(__pte); \ |
10719 | + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ |
10720 | + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ |
10721 | + else if (__dirty) \ |
10722 | + (ptep)->pte_low = __pte.pte_low; \ |
10723 | + __dirty; \ |
10724 | }) |
10725 | |
10726 | -#define ptep_test_and_clear_young(vma, addr, ptep) \ |
10727 | +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH |
10728 | +#define ptep_clear_flush_young(vma, address, ptep) \ |
10729 | ({ \ |
10730 | pte_t __pte = *(ptep); \ |
10731 | - int __ret = pte_young(__pte); \ |
10732 | - if (__ret) \ |
10733 | - __pte = pte_mkold(__pte); \ |
10734 | - if ((vma)->vm_mm != current->mm || \ |
10735 | - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \ |
10736 | - (ptep)->pte_low = __pte.pte_low; \ |
10737 | - __ret; \ |
10738 | + int __young = pte_young(__pte); \ |
10739 | + __pte = pte_mkold(__pte); \ |
10740 | + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ |
10741 | + ptep_set_access_flags(vma, address, ptep, __pte, __young); \ |
10742 | + else if (__young) \ |
10743 | + (ptep)->pte_low = __pte.pte_low; \ |
10744 | + __young; \ |
10745 | }) |
10746 | |
10747 | +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
10748 | #define ptep_get_and_clear_full(mm, addr, ptep, full) \ |
10749 | ((full) ? ({ \ |
10750 | pte_t __res = *(ptep); \ |
10751 | @@ -296,6 +354,7 @@ |
10752 | }) : \ |
10753 | ptep_get_and_clear(mm, addr, ptep)) |
10754 | |
10755 | +#define __HAVE_ARCH_PTEP_SET_WRPROTECT |
10756 | static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
10757 | { |
10758 | pte_t pte = *ptep; |
10759 | @@ -391,11 +450,11 @@ |
10760 | #define pte_index(address) \ |
10761 | (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) |
10762 | #define pte_offset_kernel(dir, address) \ |
10763 | - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) |
10764 | + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) |
10765 | |
10766 | #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) |
10767 | |
10768 | -#define pmd_page_kernel(pmd) \ |
10769 | +#define pmd_page_vaddr(pmd) \ |
10770 | ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) |
10771 | |
10772 | /* |
10773 | @@ -418,8 +477,6 @@ |
10774 | static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} |
10775 | #endif |
10776 | |
10777 | -extern void noexec_setup(const char *str); |
10778 | - |
10779 | #if defined(CONFIG_HIGHPTE) |
10780 | #define pte_offset_map(dir, address) \ |
10781 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \ |
10782 | @@ -437,37 +494,17 @@ |
10783 | #define pte_unmap_nested(pte) do { } while (0) |
10784 | #endif |
10785 | |
10786 | -#define __HAVE_ARCH_PTEP_ESTABLISH |
10787 | -#define ptep_establish(vma, address, ptep, pteval) \ |
10788 | - do { \ |
10789 | - if ( likely((vma)->vm_mm == current->mm) ) { \ |
10790 | - BUG_ON(HYPERVISOR_update_va_mapping(address, \ |
10791 | - pteval, \ |
10792 | - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ |
10793 | - UVMF_INVLPG|UVMF_MULTI)); \ |
10794 | - } else { \ |
10795 | - xen_l1_entry_update(ptep, pteval); \ |
10796 | - flush_tlb_page(vma, address); \ |
10797 | - } \ |
10798 | - } while (0) |
10799 | +/* Clear a kernel PTE and flush it from the TLB */ |
10800 | +#define kpte_clear_flush(ptep, vaddr) do { \ |
10801 | + if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \ |
10802 | + BUG(); \ |
10803 | +} while (0) |
10804 | |
10805 | /* |
10806 | * The i386 doesn't have any external MMU info: the kernel page |
10807 | * tables contain all the necessary information. |
10808 | - * |
10809 | - * Also, we only update the dirty/accessed state if we set |
10810 | - * the dirty bit by hand in the kernel, since the hardware |
10811 | - * will do the accessed bit for us, and we don't want to |
10812 | - * race with other CPU's that might be updating the dirty |
10813 | - * bit at the same time. |
10814 | */ |
10815 | #define update_mmu_cache(vma,address,pte) do { } while (0) |
10816 | -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
10817 | -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ |
10818 | - do { \ |
10819 | - if (dirty) \ |
10820 | - ptep_establish(vma, address, ptep, entry); \ |
10821 | - } while (0) |
10822 | |
10823 | #include <xen/features.h> |
10824 | void make_lowmem_page_readonly(void *va, unsigned int feature); |
10825 | @@ -516,10 +553,11 @@ |
10826 | unsigned long size); |
10827 | |
10828 | int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, |
10829 | - unsigned long addr, unsigned long end, pgprot_t newprot); |
10830 | + unsigned long addr, unsigned long end, pgprot_t newprot, |
10831 | + int dirty_accountable); |
10832 | |
10833 | -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \ |
10834 | - xen_change_pte_range(mm, pmd, addr, end, newprot) |
10835 | +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \ |
10836 | + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) |
10837 | |
10838 | #define io_remap_pfn_range(vma,from,pfn,size,prot) \ |
10839 | direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO) |
10840 | @@ -528,13 +566,6 @@ |
10841 | #define GET_IOSPACE(pfn) 0 |
10842 | #define GET_PFN(pfn) (pfn) |
10843 | |
10844 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
10845 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY |
10846 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
10847 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
10848 | -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
10849 | -#define __HAVE_ARCH_PTEP_SET_WRPROTECT |
10850 | -#define __HAVE_ARCH_PTE_SAME |
10851 | #include <asm-generic/pgtable.h> |
10852 | |
10853 | #endif /* _I386_PGTABLE_H */ |
10854 | --- a/include/asm-x86/mach-xen/asm/pgtable_64.h |
10855 | +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h |
10856 | @@ -43,12 +43,9 @@ |
10857 | |
10858 | #define swapper_pg_dir init_level4_pgt |
10859 | |
10860 | -extern int nonx_setup(char *str); |
10861 | extern void paging_init(void); |
10862 | extern void clear_kernel_mapping(unsigned long addr, unsigned long size); |
10863 | |
10864 | -extern unsigned long pgkern_mask; |
10865 | - |
10866 | /* |
10867 | * ZERO_PAGE is a global shared page that is always zero: used |
10868 | * for zero-mapped memory areas etc.. |
10869 | @@ -118,9 +115,6 @@ |
10870 | set_pgd(__user_pgd(pgd), __pgd(0)); |
10871 | } |
10872 | |
10873 | -#define pud_page(pud) \ |
10874 | - ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) |
10875 | - |
10876 | #define pte_same(a, b) ((a).pte == (b).pte) |
10877 | |
10878 | #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) |
10879 | @@ -332,7 +326,7 @@ |
10880 | #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) |
10881 | static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; } |
10882 | static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; } |
10883 | -static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; } |
10884 | +static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); } |
10885 | static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; } |
10886 | static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; } |
10887 | static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; } |
10888 | @@ -345,29 +339,12 @@ |
10889 | static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } |
10890 | static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; } |
10891 | static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; } |
10892 | -static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; } |
10893 | +static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; } |
10894 | static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; } |
10895 | static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; } |
10896 | static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; } |
10897 | static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } |
10898 | - |
10899 | -#define ptep_test_and_clear_dirty(vma, addr, ptep) \ |
10900 | -({ \ |
10901 | - pte_t __pte = *(ptep); \ |
10902 | - int __ret = pte_dirty(__pte); \ |
10903 | - if (__ret) \ |
10904 | - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \ |
10905 | - __ret; \ |
10906 | -}) |
10907 | - |
10908 | -#define ptep_test_and_clear_young(vma, addr, ptep) \ |
10909 | -({ \ |
10910 | - pte_t __pte = *(ptep); \ |
10911 | - int __ret = pte_young(__pte); \ |
10912 | - if (__ret) \ |
10913 | - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \ |
10914 | - __ret; \ |
10915 | -}) |
10916 | +static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; } |
10917 | |
10918 | static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
10919 | { |
10920 | @@ -395,7 +372,8 @@ |
10921 | * Level 4 access. |
10922 | * Never use these in the common code. |
10923 | */ |
10924 | -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK)) |
10925 | +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK)) |
10926 | +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)) |
10927 | #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) |
10928 | #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) |
10929 | #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address)) |
10930 | @@ -404,16 +382,18 @@ |
10931 | |
10932 | /* PUD - Level3 access */ |
10933 | /* to find an entry in a page-table-directory. */ |
10934 | +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) |
10935 | +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT)) |
10936 | #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) |
10937 | -#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) |
10938 | +#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address)) |
10939 | #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT) |
10940 | |
10941 | /* PMD - Level 2 access */ |
10942 | -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) |
10943 | +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) |
10944 | #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) |
10945 | |
10946 | #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) |
10947 | -#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \ |
10948 | +#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \ |
10949 | pmd_index(address)) |
10950 | #define pmd_none(x) (!__pmd_val(x)) |
10951 | #if CONFIG_XEN_COMPAT <= 0x030002 |
10952 | @@ -444,6 +424,7 @@ |
10953 | { |
10954 | unsigned long pteval; |
10955 | pteval = physpage | pgprot_val(pgprot); |
10956 | + pteval &= __supported_pte_mask; |
10957 | return __pte(pteval); |
10958 | } |
10959 | |
10960 | @@ -465,7 +446,7 @@ |
10961 | |
10962 | #define pte_index(address) \ |
10963 | (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) |
10964 | -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \ |
10965 | +#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ |
10966 | pte_index(address)) |
10967 | |
10968 | /* x86-64 always has all page tables mapped. */ |
10969 | @@ -506,6 +487,40 @@ |
10970 | ptep_establish(vma, address, ptep, entry); \ |
10971 | } while (0) |
10972 | |
10973 | + |
10974 | +/* |
10975 | + * i386 says: We don't actually have these, but we want to advertise |
10976 | + * them so that we can encompass the flush here. |
10977 | + */ |
10978 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY |
10979 | +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
10980 | + |
10981 | +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH |
10982 | +#define ptep_clear_flush_dirty(vma, address, ptep) \ |
10983 | +({ \ |
10984 | + pte_t __pte = *(ptep); \ |
10985 | + int __dirty = pte_dirty(__pte); \ |
10986 | + __pte = pte_mkclean(__pte); \ |
10987 | + if ((vma)->vm_mm->context.pinned) \ |
10988 | + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ |
10989 | + else if (__dirty) \ |
10990 | + set_pte(ptep, __pte); \ |
10991 | + __dirty; \ |
10992 | +}) |
10993 | + |
10994 | +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH |
10995 | +#define ptep_clear_flush_young(vma, address, ptep) \ |
10996 | +({ \ |
10997 | + pte_t __pte = *(ptep); \ |
10998 | + int __young = pte_young(__pte); \ |
10999 | + __pte = pte_mkold(__pte); \ |
11000 | + if ((vma)->vm_mm->context.pinned) \ |
11001 | + ptep_set_access_flags(vma, address, ptep, __pte, __young); \ |
11002 | + else if (__young) \ |
11003 | + set_pte(ptep, __pte); \ |
11004 | + __young; \ |
11005 | +}) |
11006 | + |
11007 | /* Encode and de-code a swap entry */ |
11008 | #define __swp_type(x) (((x).val >> 1) & 0x3f) |
11009 | #define __swp_offset(x) ((x).val >> 8) |
11010 | @@ -547,10 +562,11 @@ |
11011 | unsigned long size); |
11012 | |
11013 | int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, |
11014 | - unsigned long addr, unsigned long end, pgprot_t newprot); |
11015 | + unsigned long addr, unsigned long end, pgprot_t newprot, |
11016 | + int dirty_accountable); |
11017 | |
11018 | -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \ |
11019 | - xen_change_pte_range(mm, pmd, addr, end, newprot) |
11020 | +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \ |
11021 | + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) |
11022 | |
11023 | #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ |
11024 | direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO) |
11025 | @@ -572,8 +588,6 @@ |
11026 | #define kc_offset_to_vaddr(o) \ |
11027 | (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) |
11028 | |
11029 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
11030 | -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY |
11031 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
11032 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
11033 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
11034 | --- a/include/asm-x86/mach-xen/asm/processor_32.h |
11035 | +++ b/include/asm-x86/mach-xen/asm/processor_32.h |
11036 | @@ -146,6 +146,18 @@ |
11037 | #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ |
11038 | #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ |
11039 | |
11040 | +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, |
11041 | + unsigned int *ecx, unsigned int *edx) |
11042 | +{ |
11043 | + /* ecx is often an input as well as an output. */ |
11044 | + __asm__(XEN_CPUID |
11045 | + : "=a" (*eax), |
11046 | + "=b" (*ebx), |
11047 | + "=c" (*ecx), |
11048 | + "=d" (*edx) |
11049 | + : "0" (*eax), "2" (*ecx)); |
11050 | +} |
11051 | + |
11052 | /* |
11053 | * Generic CPUID function |
11054 | * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx |
11055 | @@ -153,24 +165,18 @@ |
11056 | */ |
11057 | static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) |
11058 | { |
11059 | - __asm__(XEN_CPUID |
11060 | - : "=a" (*eax), |
11061 | - "=b" (*ebx), |
11062 | - "=c" (*ecx), |
11063 | - "=d" (*edx) |
11064 | - : "0" (op), "c"(0)); |
11065 | + *eax = op; |
11066 | + *ecx = 0; |
11067 | + __cpuid(eax, ebx, ecx, edx); |
11068 | } |
11069 | |
11070 | /* Some CPUID calls want 'count' to be placed in ecx */ |
11071 | static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, |
11072 | - int *edx) |
11073 | + int *edx) |
11074 | { |
11075 | - __asm__(XEN_CPUID |
11076 | - : "=a" (*eax), |
11077 | - "=b" (*ebx), |
11078 | - "=c" (*ecx), |
11079 | - "=d" (*edx) |
11080 | - : "0" (op), "c" (count)); |
11081 | + *eax = op; |
11082 | + *ecx = count; |
11083 | + __cpuid(eax, ebx, ecx, edx); |
11084 | } |
11085 | |
11086 | /* |
11087 | @@ -178,42 +184,30 @@ |
11088 | */ |
11089 | static inline unsigned int cpuid_eax(unsigned int op) |
11090 | { |
11091 | - unsigned int eax; |
11092 | + unsigned int eax, ebx, ecx, edx; |
11093 | |
11094 | - __asm__(XEN_CPUID |
11095 | - : "=a" (eax) |
11096 | - : "0" (op) |
11097 | - : "bx", "cx", "dx"); |
11098 | + cpuid(op, &eax, &ebx, &ecx, &edx); |
11099 | return eax; |
11100 | } |
11101 | static inline unsigned int cpuid_ebx(unsigned int op) |
11102 | { |
11103 | - unsigned int eax, ebx; |
11104 | + unsigned int eax, ebx, ecx, edx; |
11105 | |
11106 | - __asm__(XEN_CPUID |
11107 | - : "=a" (eax), "=b" (ebx) |
11108 | - : "0" (op) |
11109 | - : "cx", "dx" ); |
11110 | + cpuid(op, &eax, &ebx, &ecx, &edx); |
11111 | return ebx; |
11112 | } |
11113 | static inline unsigned int cpuid_ecx(unsigned int op) |
11114 | { |
11115 | - unsigned int eax, ecx; |
11116 | + unsigned int eax, ebx, ecx, edx; |
11117 | |
11118 | - __asm__(XEN_CPUID |
11119 | - : "=a" (eax), "=c" (ecx) |
11120 | - : "0" (op) |
11121 | - : "bx", "dx" ); |
11122 | + cpuid(op, &eax, &ebx, &ecx, &edx); |
11123 | return ecx; |
11124 | } |
11125 | static inline unsigned int cpuid_edx(unsigned int op) |
11126 | { |
11127 | - unsigned int eax, edx; |
11128 | + unsigned int eax, ebx, ecx, edx; |
11129 | |
11130 | - __asm__(XEN_CPUID |
11131 | - : "=a" (eax), "=d" (edx) |
11132 | - : "0" (op) |
11133 | - : "bx", "cx"); |
11134 | + cpuid(op, &eax, &ebx, &ecx, &edx); |
11135 | return edx; |
11136 | } |
11137 | |
11138 | @@ -315,6 +309,8 @@ |
11139 | : :"a" (eax), "c" (ecx)); |
11140 | } |
11141 | |
11142 | +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); |
11143 | + |
11144 | /* from system description table in BIOS. Mostly for MCA use, but |
11145 | others may find it useful. */ |
11146 | extern unsigned int machine_id; |
11147 | --- a/include/asm-x86/mach-xen/asm/processor_64.h |
11148 | +++ b/include/asm-x86/mach-xen/asm/processor_64.h |
11149 | @@ -484,6 +484,8 @@ |
11150 | : :"a" (eax), "c" (ecx)); |
11151 | } |
11152 | |
11153 | +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); |
11154 | + |
11155 | #define stack_current() \ |
11156 | ({ \ |
11157 | struct thread_info *ti; \ |
11158 | --- a/include/asm-x86/mach-xen/asm/segment_32.h |
11159 | +++ b/include/asm-x86/mach-xen/asm/segment_32.h |
11160 | @@ -61,11 +61,9 @@ |
11161 | |
11162 | #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) |
11163 | #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) |
11164 | -#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) ) |
11165 | |
11166 | #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) |
11167 | #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) |
11168 | -#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) ) |
11169 | |
11170 | #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) |
11171 | #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) |
11172 | @@ -85,6 +83,11 @@ |
11173 | |
11174 | #define GDT_SIZE (GDT_ENTRIES * 8) |
11175 | |
11176 | +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ |
11177 | +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) |
11178 | +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ |
11179 | +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) |
11180 | + |
11181 | /* Simple and small GDT entries for booting only */ |
11182 | |
11183 | #define GDT_ENTRY_BOOT_CS 2 |
11184 | @@ -114,4 +117,16 @@ |
11185 | */ |
11186 | #define IDT_ENTRIES 256 |
11187 | |
11188 | +/* Bottom two bits of selector give the ring privilege level */ |
11189 | +#define SEGMENT_RPL_MASK 0x3 |
11190 | +/* Bit 2 is table indicator (LDT/GDT) */ |
11191 | +#define SEGMENT_TI_MASK 0x4 |
11192 | + |
11193 | +/* User mode is privilege level 3 */ |
11194 | +#define USER_RPL 0x3 |
11195 | +/* LDT segment has TI set, GDT has it cleared */ |
11196 | +#define SEGMENT_LDT 0x4 |
11197 | +#define SEGMENT_GDT 0x0 |
11198 | + |
11199 | +#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) |
11200 | #endif |
11201 | --- a/include/asm-x86/mach-xen/asm/smp_32.h |
11202 | +++ b/include/asm-x86/mach-xen/asm/smp_32.h |
11203 | @@ -79,25 +79,36 @@ |
11204 | return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); |
11205 | } |
11206 | #endif |
11207 | - |
11208 | -static __inline int logical_smp_processor_id(void) |
11209 | -{ |
11210 | - /* we don't want to mark this access volatile - bad code generation */ |
11211 | - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); |
11212 | -} |
11213 | - |
11214 | #endif |
11215 | |
11216 | +#define safe_smp_processor_id() smp_processor_id() |
11217 | extern int __cpu_disable(void); |
11218 | extern void __cpu_die(unsigned int cpu); |
11219 | extern void prefill_possible_map(void); |
11220 | +extern unsigned int num_processors; |
11221 | + |
11222 | #endif /* !__ASSEMBLY__ */ |
11223 | |
11224 | #else /* CONFIG_SMP */ |
11225 | |
11226 | +#define safe_smp_processor_id() 0 |
11227 | #define cpu_physical_id(cpu) boot_cpu_physical_apicid |
11228 | |
11229 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
11230 | |
11231 | #endif |
11232 | + |
11233 | +#ifndef __ASSEMBLY__ |
11234 | + |
11235 | +extern u8 apicid_2_node[]; |
11236 | + |
11237 | +#ifdef CONFIG_X86_LOCAL_APIC |
11238 | +static __inline int logical_smp_processor_id(void) |
11239 | +{ |
11240 | + /* we don't want to mark this access volatile - bad code generation */ |
11241 | + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); |
11242 | +} |
11243 | +#endif |
11244 | +#endif |
11245 | + |
11246 | #endif |
11247 | --- a/include/asm-x86/mach-xen/asm/smp_64.h |
11248 | +++ b/include/asm-x86/mach-xen/asm/smp_64.h |
11249 | @@ -4,15 +4,12 @@ |
11250 | /* |
11251 | * We need the APIC definitions automatically as part of 'smp.h' |
11252 | */ |
11253 | -#ifndef __ASSEMBLY__ |
11254 | #include <linux/threads.h> |
11255 | #include <linux/cpumask.h> |
11256 | #include <linux/bitops.h> |
11257 | extern int disable_apic; |
11258 | -#endif |
11259 | |
11260 | #ifdef CONFIG_X86_LOCAL_APIC |
11261 | -#ifndef __ASSEMBLY__ |
11262 | #include <asm/fixmap.h> |
11263 | #include <asm/mpspec.h> |
11264 | #ifdef CONFIG_X86_IO_APIC |
11265 | @@ -21,10 +18,8 @@ |
11266 | #include <asm/apic.h> |
11267 | #include <asm/thread_info.h> |
11268 | #endif |
11269 | -#endif |
11270 | |
11271 | #ifdef CONFIG_SMP |
11272 | -#ifndef ASSEMBLY |
11273 | |
11274 | #include <asm/pda.h> |
11275 | |
11276 | @@ -41,14 +36,11 @@ |
11277 | |
11278 | extern void smp_alloc_memory(void); |
11279 | extern volatile unsigned long smp_invalidate_needed; |
11280 | -extern int pic_mode; |
11281 | extern void lock_ipi_call_lock(void); |
11282 | extern void unlock_ipi_call_lock(void); |
11283 | extern int smp_num_siblings; |
11284 | extern void smp_send_reschedule(int cpu); |
11285 | void smp_stop_cpu(void); |
11286 | -extern int smp_call_function_single(int cpuid, void (*func) (void *info), |
11287 | - void *info, int retry, int wait); |
11288 | |
11289 | extern cpumask_t cpu_sibling_map[NR_CPUS]; |
11290 | extern cpumask_t cpu_core_map[NR_CPUS]; |
11291 | @@ -77,20 +69,16 @@ |
11292 | } |
11293 | #endif |
11294 | |
11295 | -extern int safe_smp_processor_id(void); |
11296 | extern int __cpu_disable(void); |
11297 | extern void __cpu_die(unsigned int cpu); |
11298 | extern void prefill_possible_map(void); |
11299 | extern unsigned num_processors; |
11300 | extern unsigned disabled_cpus; |
11301 | |
11302 | -#endif /* !ASSEMBLY */ |
11303 | - |
11304 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
11305 | |
11306 | #endif |
11307 | |
11308 | -#ifndef ASSEMBLY |
11309 | /* |
11310 | * Some lowlevel functions might want to know about |
11311 | * the real APIC ID <-> CPU # mapping. |
11312 | @@ -114,11 +102,8 @@ |
11313 | } |
11314 | #endif |
11315 | |
11316 | -#endif /* !ASSEMBLY */ |
11317 | - |
11318 | #ifndef CONFIG_SMP |
11319 | #define stack_smp_processor_id() 0 |
11320 | -#define safe_smp_processor_id() 0 |
11321 | #define cpu_logical_map(x) (x) |
11322 | #else |
11323 | #include <asm/thread_info.h> |
11324 | @@ -130,7 +115,6 @@ |
11325 | }) |
11326 | #endif |
11327 | |
11328 | -#ifndef __ASSEMBLY__ |
11329 | #ifdef CONFIG_X86_LOCAL_APIC |
11330 | static __inline int logical_smp_processor_id(void) |
11331 | { |
11332 | @@ -138,13 +122,18 @@ |
11333 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); |
11334 | } |
11335 | #endif |
11336 | -#endif |
11337 | |
11338 | #ifdef CONFIG_SMP |
11339 | #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] |
11340 | #else |
11341 | #define cpu_physical_id(cpu) boot_cpu_id |
11342 | -#endif |
11343 | - |
11344 | +static inline int smp_call_function_single(int cpuid, void (*func) (void *info), |
11345 | + void *info, int retry, int wait) |
11346 | +{ |
11347 | + /* Disable interrupts here? */ |
11348 | + func(info); |
11349 | + return 0; |
11350 | +} |
11351 | +#endif /* !CONFIG_SMP */ |
11352 | #endif |
11353 | |
11354 | --- a/include/asm-x86/mach-xen/asm/system_32.h |
11355 | +++ b/include/asm-x86/mach-xen/asm/system_32.h |
11356 | @@ -267,6 +267,9 @@ |
11357 | #define cmpxchg(ptr,o,n)\ |
11358 | ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ |
11359 | (unsigned long)(n),sizeof(*(ptr)))) |
11360 | +#define sync_cmpxchg(ptr,o,n)\ |
11361 | + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ |
11362 | + (unsigned long)(n),sizeof(*(ptr)))) |
11363 | #endif |
11364 | |
11365 | static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, |
11366 | @@ -291,6 +294,39 @@ |
11367 | : "=a"(prev) |
11368 | : "r"(new), "m"(*__xg(ptr)), "0"(old) |
11369 | : "memory"); |
11370 | + return prev; |
11371 | + } |
11372 | + return old; |
11373 | +} |
11374 | + |
11375 | +/* |
11376 | + * Always use locked operations when touching memory shared with a |
11377 | + * hypervisor, since the system may be SMP even if the guest kernel |
11378 | + * isn't. |
11379 | + */ |
11380 | +static inline unsigned long __sync_cmpxchg(volatile void *ptr, |
11381 | + unsigned long old, |
11382 | + unsigned long new, int size) |
11383 | +{ |
11384 | + unsigned long prev; |
11385 | + switch (size) { |
11386 | + case 1: |
11387 | + __asm__ __volatile__("lock; cmpxchgb %b1,%2" |
11388 | + : "=a"(prev) |
11389 | + : "q"(new), "m"(*__xg(ptr)), "0"(old) |
11390 | + : "memory"); |
11391 | + return prev; |
11392 | + case 2: |
11393 | + __asm__ __volatile__("lock; cmpxchgw %w1,%2" |
11394 | + : "=a"(prev) |
11395 | + : "r"(new), "m"(*__xg(ptr)), "0"(old) |
11396 | + : "memory"); |
11397 | + return prev; |
11398 | + case 4: |
11399 | + __asm__ __volatile__("lock; cmpxchgl %1,%2" |
11400 | + : "=a"(prev) |
11401 | + : "r"(new), "m"(*__xg(ptr)), "0"(old) |
11402 | + : "memory"); |
11403 | return prev; |
11404 | } |
11405 | return old; |
11406 | --- a/include/asm-x86/mach-xen/asm/system_64.h |
11407 | +++ b/include/asm-x86/mach-xen/asm/system_64.h |
11408 | @@ -24,6 +24,7 @@ |
11409 | #define __EXTRA_CLOBBER \ |
11410 | ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" |
11411 | |
11412 | +/* Save restore flags to clear handle leaking NT */ |
11413 | #define switch_to(prev,next,last) \ |
11414 | asm volatile(SAVE_CONTEXT \ |
11415 | "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ |
11416 | --- a/include/asm-x86/mach-xen/asm/tlbflush_32.h |
11417 | +++ b/include/asm-x86/mach-xen/asm/tlbflush_32.h |
11418 | @@ -8,8 +8,6 @@ |
11419 | #define __flush_tlb_global() xen_tlb_flush() |
11420 | #define __flush_tlb_all() xen_tlb_flush() |
11421 | |
11422 | -extern unsigned long pgkern_mask; |
11423 | - |
11424 | #define cpu_has_invlpg (boot_cpu_data.x86 > 3) |
11425 | |
11426 | #define __flush_tlb_single(addr) xen_invlpg(addr) |
11427 | --- a/include/asm-x86/mach-xen/asm/tlbflush_64.h |
11428 | +++ b/include/asm-x86/mach-xen/asm/tlbflush_64.h |
11429 | @@ -12,9 +12,6 @@ |
11430 | */ |
11431 | #define __flush_tlb_global() xen_tlb_flush() |
11432 | |
11433 | - |
11434 | -extern unsigned long pgkern_mask; |
11435 | - |
11436 | #define __flush_tlb_all() __flush_tlb_global() |
11437 | |
11438 | #define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr) |
11439 | --- a/include/asm-x86/thread_info_64.h |
11440 | +++ b/include/asm-x86/thread_info_64.h |
11441 | @@ -157,10 +157,14 @@ |
11442 | (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) |
11443 | |
11444 | /* flags to check in __switch_to() */ |
11445 | +#ifndef CONFIG_XEN |
11446 | #define _TIF_WORK_CTXSW \ |
11447 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS) |
11448 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW |
11449 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
11450 | +#else |
11451 | +#define _TIF_WORK_CTXSW _TIF_DEBUG |
11452 | +#endif |
11453 | |
11454 | #define PREEMPT_ACTIVE 0x10000000 |
11455 | |
11456 | --- a/include/linux/skbuff.h |
11457 | +++ b/include/linux/skbuff.h |
11458 | @@ -1821,5 +1821,12 @@ |
11459 | } |
11460 | |
11461 | bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); |
11462 | + |
11463 | +#ifdef CONFIG_XEN |
11464 | +int skb_checksum_setup(struct sk_buff *skb); |
11465 | +#else |
11466 | +static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } |
11467 | +#endif |
11468 | + |
11469 | #endif /* __KERNEL__ */ |
11470 | #endif /* _LINUX_SKBUFF_H */ |
11471 | --- a/include/xen/evtchn.h |
11472 | +++ b/include/xen/evtchn.h |
11473 | @@ -54,34 +54,34 @@ |
11474 | */ |
11475 | int bind_caller_port_to_irqhandler( |
11476 | unsigned int caller_port, |
11477 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
11478 | + irq_handler_t handler, |
11479 | unsigned long irqflags, |
11480 | const char *devname, |
11481 | void *dev_id); |
11482 | int bind_listening_port_to_irqhandler( |
11483 | unsigned int remote_domain, |
11484 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
11485 | + irq_handler_t handler, |
11486 | unsigned long irqflags, |
11487 | const char *devname, |
11488 | void *dev_id); |
11489 | int bind_interdomain_evtchn_to_irqhandler( |
11490 | unsigned int remote_domain, |
11491 | unsigned int remote_port, |
11492 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
11493 | + irq_handler_t handler, |
11494 | unsigned long irqflags, |
11495 | const char *devname, |
11496 | void *dev_id); |
11497 | int bind_virq_to_irqhandler( |
11498 | unsigned int virq, |
11499 | unsigned int cpu, |
11500 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
11501 | + irq_handler_t handler, |
11502 | unsigned long irqflags, |
11503 | const char *devname, |
11504 | void *dev_id); |
11505 | int bind_ipi_to_irqhandler( |
11506 | unsigned int ipi, |
11507 | unsigned int cpu, |
11508 | - irqreturn_t (*handler)(int, void *, struct pt_regs *), |
11509 | + irq_handler_t handler, |
11510 | unsigned long irqflags, |
11511 | const char *devname, |
11512 | void *dev_id); |
11513 | --- a/include/xen/xencons.h |
11514 | +++ b/include/xen/xencons.h |
11515 | @@ -8,7 +8,7 @@ |
11516 | void xencons_resume(void); |
11517 | |
11518 | /* Interrupt work hooks. Receive data, or kick data out. */ |
11519 | -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs); |
11520 | +void xencons_rx(char *buf, unsigned len); |
11521 | void xencons_tx(void); |
11522 | |
11523 | int xencons_ring_init(void); |
11524 | --- a/mm/mprotect.c |
11525 | +++ b/mm/mprotect.c |
11526 | @@ -86,7 +86,7 @@ |
11527 | next = pmd_addr_end(addr, end); |
11528 | if (pmd_none_or_clear_bad(pmd)) |
11529 | continue; |
11530 | - if (arch_change_pte_range(mm, pmd, addr, next, newprot)) |
11531 | + if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable)) |
11532 | continue; |
11533 | change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable); |
11534 | } while (pmd++, addr = next, addr != end); |
11535 | --- a/net/core/dev.c |
11536 | +++ b/net/core/dev.c |
11537 | @@ -1611,15 +1611,14 @@ |
11538 | } |
11539 | if ((skb->h.raw + skb->csum + 2) > skb->tail) |
11540 | goto out; |
11541 | - skb->ip_summed = CHECKSUM_HW; |
11542 | + skb->ip_summed = CHECKSUM_PARTIAL; |
11543 | skb->proto_csum_blank = 0; |
11544 | } |
11545 | return 0; |
11546 | out: |
11547 | return -EPROTO; |
11548 | } |
11549 | -#else |
11550 | -inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } |
11551 | +EXPORT_SYMBOL(skb_checksum_setup); |
11552 | #endif |
11553 | |
11554 | /** |
11555 | @@ -2115,7 +2114,7 @@ |
11556 | case CHECKSUM_UNNECESSARY: |
11557 | skb->proto_data_valid = 1; |
11558 | break; |
11559 | - case CHECKSUM_HW: |
11560 | + case CHECKSUM_PARTIAL: |
11561 | /* XXX Implement me. */ |
11562 | default: |
11563 | skb->proto_data_valid = 0; |
11564 | @@ -4648,7 +4647,6 @@ |
11565 | EXPORT_SYMBOL(net_enable_timestamp); |
11566 | EXPORT_SYMBOL(net_disable_timestamp); |
11567 | EXPORT_SYMBOL(dev_get_flags); |
11568 | -EXPORT_SYMBOL(skb_checksum_setup); |
11569 | |
11570 | #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) |
11571 | EXPORT_SYMBOL(br_handle_frame_hook); |