Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1022-2.6.25-xen-patch-2.6.21.patch
Parent Directory | Revision Log
Revision 609 -
(show annotations)
(download)
Fri May 23 17:35:37 2008 UTC (16 years, 4 months ago) by niro
File size: 123925 byte(s)
Fri May 23 17:35:37 2008 UTC (16 years, 4 months ago) by niro
File size: 123925 byte(s)
-using opensuse xen patchset, updated kernel configs
1 | From: www.kernel.org |
2 | Subject: Linux 2.6.21 |
3 | Patch-mainline: 2.6.21 |
4 | |
5 | Automatically created from "patches.kernel.org/patch-2.6.21" by xen-port-patches.py |
6 | |
7 | Acked-by: jbeulich@novell.com |
8 | |
9 | --- |
10 | arch/x86/Kconfig | 4 |
11 | arch/x86/ia32/ia32entry-xen.S | 5 |
12 | arch/x86/kernel/Makefile | 4 |
13 | arch/x86/kernel/acpi/sleep_64-xen.c | 6 |
14 | arch/x86/kernel/apic_32-xen.c | 65 ---- |
15 | arch/x86/kernel/cpu/common-xen.c | 14 |
16 | arch/x86/kernel/e820_32-xen.c | 18 - |
17 | arch/x86/kernel/e820_64-xen.c | 40 ++ |
18 | arch/x86/kernel/entry_32-xen.S | 80 +++-- |
19 | arch/x86/kernel/entry_64-xen.S | 3 |
20 | arch/x86/kernel/genapic_64-xen.c | 4 |
21 | arch/x86/kernel/head64-xen.c | 8 |
22 | arch/x86/kernel/head_32-xen.S | 9 |
23 | arch/x86/kernel/io_apic_32-xen.c | 43 +- |
24 | arch/x86/kernel/io_apic_64-xen.c | 413 +++++++++++++------------- |
25 | arch/x86/kernel/irq_32-xen.c | 22 + |
26 | arch/x86/kernel/irq_64-xen.c | 13 |
27 | arch/x86/kernel/microcode-xen.c | 2 |
28 | arch/x86/kernel/mpparse_32-xen.c | 4 |
29 | arch/x86/kernel/mpparse_64-xen.c | 6 |
30 | arch/x86/kernel/pci-dma_32-xen.c | 2 |
31 | arch/x86/kernel/pci-swiotlb_64-xen.c | 2 |
32 | arch/x86/kernel/pcspeaker.c | 5 |
33 | arch/x86/kernel/process_32-xen.c | 42 +- |
34 | arch/x86/kernel/process_64-xen.c | 13 |
35 | arch/x86/kernel/setup_32-xen.c | 46 -- |
36 | arch/x86/kernel/setup_64-xen.c | 184 +---------- |
37 | arch/x86/kernel/smp_32-xen.c | 5 |
38 | arch/x86/kernel/time_32-xen.c | 275 +---------------- |
39 | arch/x86/kernel/traps_32-xen.c | 27 + |
40 | arch/x86/kernel/vsyscall_64-xen.c | 127 ++++--- |
41 | arch/x86/mm/fault_32-xen.c | 44 -- |
42 | arch/x86/mm/fault_64-xen.c | 39 -- |
43 | arch/x86/mm/highmem_32-xen.c | 9 |
44 | arch/x86/mm/init_32-xen.c | 2 |
45 | arch/x86/mm/init_64-xen.c | 24 + |
46 | arch/x86/mm/pageattr_64-xen.c | 6 |
47 | arch/x86/mm/pgtable_32-xen.c | 28 + |
48 | drivers/char/tpm/tpm_xen.c | 5 |
49 | drivers/xen/balloon/sysfs.c | 1 |
50 | drivers/xen/blkback/xenbus.c | 4 |
51 | drivers/xen/blkfront/blkfront.c | 1 |
52 | drivers/xen/blktap/xenbus.c | 4 |
53 | drivers/xen/core/evtchn.c | 4 |
54 | drivers/xen/core/smpboot.c | 18 - |
55 | drivers/xen/fbfront/xenfb.c | 1 |
56 | drivers/xen/fbfront/xenkbd.c | 1 |
57 | drivers/xen/netback/xenbus.c | 4 |
58 | drivers/xen/netfront/netfront.c | 49 +-- |
59 | drivers/xen/pciback/xenbus.c | 1 |
60 | drivers/xen/pcifront/xenbus.c | 1 |
61 | drivers/xen/tpmback/common.h | 4 |
62 | drivers/xen/tpmback/interface.c | 5 |
63 | drivers/xen/tpmback/tpmback.c | 16 - |
64 | drivers/xen/tpmback/xenbus.c | 5 |
65 | drivers/xen/xenbus/xenbus_probe.c | 17 - |
66 | drivers/xen/xenbus/xenbus_probe.h | 4 |
67 | drivers/xen/xenbus/xenbus_probe_backend.c | 8 |
68 | include/asm-x86/i8253.h | 4 |
69 | include/asm-x86/mach-xen/asm/desc_32.h | 2 |
70 | include/asm-x86/mach-xen/asm/dma-mapping_64.h | 4 |
71 | include/asm-x86/mach-xen/asm/e820_64.h | 2 |
72 | include/asm-x86/mach-xen/asm/hw_irq_64.h | 33 +- |
73 | include/asm-x86/mach-xen/asm/hypervisor.h | 2 |
74 | include/asm-x86/mach-xen/asm/io_32.h | 6 |
75 | include/asm-x86/mach-xen/asm/io_64.h | 8 |
76 | include/asm-x86/mach-xen/asm/mmu_context_32.h | 10 |
77 | include/asm-x86/mach-xen/asm/pgalloc_32.h | 21 + |
78 | include/asm-x86/mach-xen/asm/pgtable_32.h | 25 + |
79 | include/asm-x86/mach-xen/asm/pgtable_64.h | 9 |
80 | include/asm-x86/mach-xen/asm/processor_32.h | 6 |
81 | include/asm-x86/mach-xen/asm/segment_32.h | 23 + |
82 | include/asm-x86/mach-xen/asm/smp_32.h | 5 |
83 | include/asm-x86/mach-xen/asm/smp_64.h | 3 |
84 | include/xen/xenbus.h | 24 + |
85 | lib/swiotlb-xen.c | 19 - |
86 | 76 files changed, 889 insertions(+), 1113 deletions(-) |
87 | |
88 | --- a/arch/x86/Kconfig |
89 | +++ b/arch/x86/Kconfig |
90 | @@ -48,13 +48,15 @@ |
91 | |
92 | config CLOCKSOURCE_WATCHDOG |
93 | def_bool y |
94 | + depends on !X86_XEN |
95 | |
96 | config GENERIC_CLOCKEVENTS |
97 | def_bool y |
98 | + depends on !X86_XEN |
99 | |
100 | config GENERIC_CLOCKEVENTS_BROADCAST |
101 | def_bool y |
102 | - depends on X86_64 || (X86_32 && X86_LOCAL_APIC) |
103 | + depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN) |
104 | |
105 | config LOCKDEP_SUPPORT |
106 | def_bool y |
107 | --- a/arch/x86/ia32/ia32entry-xen.S |
108 | +++ b/arch/x86/ia32/ia32entry-xen.S |
109 | @@ -465,7 +465,7 @@ |
110 | .quad sys32_vm86_warning /* vm86old */ |
111 | .quad compat_sys_wait4 |
112 | .quad sys_swapoff /* 115 */ |
113 | - .quad sys32_sysinfo |
114 | + .quad compat_sys_sysinfo |
115 | .quad sys32_ipc |
116 | .quad sys_fsync |
117 | .quad stub32_sigreturn |
118 | @@ -510,7 +510,7 @@ |
119 | .quad sys_sched_yield |
120 | .quad sys_sched_get_priority_max |
121 | .quad sys_sched_get_priority_min /* 160 */ |
122 | - .quad sys_sched_rr_get_interval |
123 | + .quad sys32_sched_rr_get_interval |
124 | .quad compat_sys_nanosleep |
125 | .quad sys_mremap |
126 | .quad sys_setresuid16 |
127 | @@ -668,4 +668,5 @@ |
128 | .quad compat_sys_vmsplice |
129 | .quad compat_sys_move_pages |
130 | .quad sys_getcpu |
131 | + .quad sys_epoll_pwait |
132 | ia32_syscall_end: |
133 | --- a/arch/x86/kernel/Makefile |
134 | +++ b/arch/x86/kernel/Makefile |
135 | @@ -104,6 +104,6 @@ |
136 | pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o |
137 | endif |
138 | |
139 | -disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \ |
140 | - smpboot_$(BITS).o tsc_$(BITS).o |
141 | +disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ |
142 | + smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o |
143 | %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := |
144 | --- a/arch/x86/kernel/acpi/sleep_64-xen.c |
145 | +++ b/arch/x86/kernel/acpi/sleep_64-xen.c |
146 | @@ -59,7 +59,7 @@ |
147 | unsigned long acpi_video_flags; |
148 | extern char wakeup_start, wakeup_end; |
149 | |
150 | -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); |
151 | +extern unsigned long acpi_copy_wakeup_routine(unsigned long); |
152 | |
153 | static pgd_t low_ptr; |
154 | |
155 | @@ -67,8 +67,10 @@ |
156 | { |
157 | pgd_t *slot0 = pgd_offset(current->mm, 0UL); |
158 | low_ptr = *slot0; |
159 | + /* FIXME: We're playing with the current task's page tables here, which |
160 | + * is potentially dangerous on SMP systems. |
161 | + */ |
162 | set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); |
163 | - WARN_ON(num_online_cpus() != 1); |
164 | local_flush_tlb(); |
165 | } |
166 | #endif |
167 | --- a/arch/x86/kernel/apic_32-xen.c |
168 | +++ b/arch/x86/kernel/apic_32-xen.c |
169 | @@ -25,6 +25,8 @@ |
170 | #include <linux/kernel_stat.h> |
171 | #include <linux/sysdev.h> |
172 | #include <linux/cpu.h> |
173 | +#include <linux/clockchips.h> |
174 | +#include <linux/acpi_pmtmr.h> |
175 | #include <linux/module.h> |
176 | |
177 | #include <asm/atomic.h> |
178 | @@ -56,83 +58,26 @@ |
179 | */ |
180 | |
181 | /* |
182 | - * Debug level |
183 | + * Debug level, exported for io_apic.c |
184 | */ |
185 | int apic_verbosity; |
186 | |
187 | #ifndef CONFIG_XEN |
188 | static int modern_apic(void) |
189 | { |
190 | - unsigned int lvr, version; |
191 | /* AMD systems use old APIC versions, so check the CPU */ |
192 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && |
193 | - boot_cpu_data.x86 >= 0xf) |
194 | + boot_cpu_data.x86 >= 0xf) |
195 | return 1; |
196 | - lvr = apic_read(APIC_LVR); |
197 | - version = GET_APIC_VERSION(lvr); |
198 | - return version >= 0x14; |
199 | + return lapic_get_version() >= 0x14; |
200 | } |
201 | #endif /* !CONFIG_XEN */ |
202 | |
203 | -/* |
204 | - * 'what should we do if we get a hw irq event on an illegal vector'. |
205 | - * each architecture has to answer this themselves. |
206 | - */ |
207 | -void ack_bad_irq(unsigned int irq) |
208 | -{ |
209 | - printk("unexpected IRQ trap at vector %02x\n", irq); |
210 | - /* |
211 | - * Currently unexpected vectors happen only on SMP and APIC. |
212 | - * We _must_ ack these because every local APIC has only N |
213 | - * irq slots per priority level, and a 'hanging, unacked' IRQ |
214 | - * holds up an irq slot - in excessive cases (when multiple |
215 | - * unexpected vectors occur) that might lock up the APIC |
216 | - * completely. |
217 | - * But only ack when the APIC is enabled -AK |
218 | - */ |
219 | - if (cpu_has_apic) |
220 | - ack_APIC_irq(); |
221 | -} |
222 | - |
223 | int get_physical_broadcast(void) |
224 | { |
225 | return 0xff; |
226 | } |
227 | |
228 | -#ifndef CONFIG_XEN |
229 | -#ifndef CONFIG_SMP |
230 | -static void up_apic_timer_interrupt_call(void) |
231 | -{ |
232 | - int cpu = smp_processor_id(); |
233 | - |
234 | - /* |
235 | - * the NMI deadlock-detector uses this. |
236 | - */ |
237 | - per_cpu(irq_stat, cpu).apic_timer_irqs++; |
238 | - |
239 | - smp_local_timer_interrupt(); |
240 | -} |
241 | -#endif |
242 | - |
243 | -void smp_send_timer_broadcast_ipi(void) |
244 | -{ |
245 | - cpumask_t mask; |
246 | - |
247 | - cpus_and(mask, cpu_online_map, timer_bcast_ipi); |
248 | - if (!cpus_empty(mask)) { |
249 | -#ifdef CONFIG_SMP |
250 | - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); |
251 | -#else |
252 | - /* |
253 | - * We can directly call the apic timer interrupt handler |
254 | - * in UP case. Minus all irq related functions |
255 | - */ |
256 | - up_apic_timer_interrupt_call(); |
257 | -#endif |
258 | - } |
259 | -} |
260 | -#endif |
261 | - |
262 | int setup_profiling_timer(unsigned int multiplier) |
263 | { |
264 | return -EINVAL; |
265 | --- a/arch/x86/kernel/cpu/common-xen.c |
266 | +++ b/arch/x86/kernel/cpu/common-xen.c |
267 | @@ -610,7 +610,7 @@ |
268 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) |
269 | { |
270 | memset(regs, 0, sizeof(struct pt_regs)); |
271 | - regs->xgs = __KERNEL_PDA; |
272 | + regs->xfs = __KERNEL_PDA; |
273 | return regs; |
274 | } |
275 | |
276 | @@ -667,12 +667,12 @@ |
277 | .pcurrent = &init_task, |
278 | }; |
279 | |
280 | -static inline void set_kernel_gs(void) |
281 | +static inline void set_kernel_fs(void) |
282 | { |
283 | - /* Set %gs for this CPU's PDA. Memory clobber is to create a |
284 | + /* Set %fs for this CPU's PDA. Memory clobber is to create a |
285 | barrier with respect to any PDA operations, so the compiler |
286 | doesn't move any before here. */ |
287 | - asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); |
288 | + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); |
289 | } |
290 | |
291 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for |
292 | @@ -730,7 +730,7 @@ |
293 | } |
294 | BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); |
295 | |
296 | - set_kernel_gs(); |
297 | + set_kernel_fs(); |
298 | } |
299 | |
300 | /* Common CPU init for both boot and secondary CPUs */ |
301 | @@ -775,8 +775,8 @@ |
302 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
303 | #endif |
304 | |
305 | - /* Clear %fs. */ |
306 | - asm volatile ("mov %0, %%fs" : : "r" (0)); |
307 | + /* Clear %gs. */ |
308 | + asm volatile ("mov %0, %%gs" : : "r" (0)); |
309 | |
310 | /* Clear all 6 debug registers: */ |
311 | set_debugreg(0, 0); |
312 | --- a/arch/x86/kernel/e820_32-xen.c |
313 | +++ b/arch/x86/kernel/e820_32-xen.c |
314 | @@ -14,6 +14,7 @@ |
315 | #include <asm/pgtable.h> |
316 | #include <asm/page.h> |
317 | #include <asm/e820.h> |
318 | +#include <asm/setup.h> |
319 | #include <xen/interface/memory.h> |
320 | |
321 | #ifdef CONFIG_EFI |
322 | @@ -157,21 +158,22 @@ |
323 | .flags = IORESOURCE_BUSY | IORESOURCE_IO |
324 | } }; |
325 | |
326 | -static int romsignature(const unsigned char *x) |
327 | +#define ROMSIGNATURE 0xaa55 |
328 | + |
329 | +static int __init romsignature(const unsigned char *rom) |
330 | { |
331 | unsigned short sig; |
332 | - int ret = 0; |
333 | - if (probe_kernel_address((const unsigned short *)x, sig) == 0) |
334 | - ret = (sig == 0xaa55); |
335 | - return ret; |
336 | + |
337 | + return probe_kernel_address((const unsigned short *)rom, sig) == 0 && |
338 | + sig == ROMSIGNATURE; |
339 | } |
340 | |
341 | static int __init romchecksum(unsigned char *rom, unsigned long length) |
342 | { |
343 | - unsigned char *p, sum = 0; |
344 | + unsigned char sum; |
345 | |
346 | - for (p = rom; p < rom + length; p++) |
347 | - sum += *p; |
348 | + for (sum = 0; length; length--) |
349 | + sum += *rom++; |
350 | return sum == 0; |
351 | } |
352 | |
353 | --- a/arch/x86/kernel/e820_64-xen.c |
354 | +++ b/arch/x86/kernel/e820_64-xen.c |
355 | @@ -88,6 +88,13 @@ |
356 | return 1; |
357 | } |
358 | |
359 | +#ifdef CONFIG_NUMA |
360 | + /* NUMA memory to node map */ |
361 | + if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { |
362 | + *addrp = nodemap_addr + nodemap_size; |
363 | + return 1; |
364 | + } |
365 | +#endif |
366 | /* XXX ramdisk image here? */ |
367 | #else |
368 | if (last < (table_end<<PAGE_SHIFT)) { |
369 | @@ -215,6 +222,37 @@ |
370 | } |
371 | |
372 | /* |
373 | + * Find the hole size in the range. |
374 | + */ |
375 | +unsigned long __init e820_hole_size(unsigned long start, unsigned long end) |
376 | +{ |
377 | + unsigned long ram = 0; |
378 | + int i; |
379 | + |
380 | + for (i = 0; i < e820.nr_map; i++) { |
381 | + struct e820entry *ei = &e820.map[i]; |
382 | + unsigned long last, addr; |
383 | + |
384 | + if (ei->type != E820_RAM || |
385 | + ei->addr+ei->size <= start || |
386 | + ei->addr >= end) |
387 | + continue; |
388 | + |
389 | + addr = round_up(ei->addr, PAGE_SIZE); |
390 | + if (addr < start) |
391 | + addr = start; |
392 | + |
393 | + last = round_down(ei->addr + ei->size, PAGE_SIZE); |
394 | + if (last >= end) |
395 | + last = end; |
396 | + |
397 | + if (last > addr) |
398 | + ram += last - addr; |
399 | + } |
400 | + return ((end - start) - ram); |
401 | +} |
402 | + |
403 | +/* |
404 | * Mark e820 reserved areas as busy for the resource manager. |
405 | */ |
406 | void __init e820_reserve_resources(struct e820entry *e820, int nr_map) |
407 | @@ -725,7 +763,7 @@ |
408 | } |
409 | early_param("memmap", parse_memmap_opt); |
410 | |
411 | -void finish_e820_parsing(void) |
412 | +void __init finish_e820_parsing(void) |
413 | { |
414 | if (userdef) { |
415 | printk(KERN_INFO "user-defined physical RAM map:\n"); |
416 | --- a/arch/x86/kernel/entry_32-xen.S |
417 | +++ b/arch/x86/kernel/entry_32-xen.S |
418 | @@ -30,7 +30,7 @@ |
419 | * 18(%esp) - %eax |
420 | * 1C(%esp) - %ds |
421 | * 20(%esp) - %es |
422 | - * 24(%esp) - %gs |
423 | + * 24(%esp) - %fs |
424 | * 28(%esp) - orig_eax |
425 | * 2C(%esp) - %eip |
426 | * 30(%esp) - %cs |
427 | @@ -102,9 +102,9 @@ |
428 | |
429 | #define SAVE_ALL \ |
430 | cld; \ |
431 | - pushl %gs; \ |
432 | + pushl %fs; \ |
433 | CFI_ADJUST_CFA_OFFSET 4;\ |
434 | - /*CFI_REL_OFFSET gs, 0;*/\ |
435 | + /*CFI_REL_OFFSET fs, 0;*/\ |
436 | pushl %es; \ |
437 | CFI_ADJUST_CFA_OFFSET 4;\ |
438 | /*CFI_REL_OFFSET es, 0;*/\ |
439 | @@ -136,7 +136,7 @@ |
440 | movl %edx, %ds; \ |
441 | movl %edx, %es; \ |
442 | movl $(__KERNEL_PDA), %edx; \ |
443 | - movl %edx, %gs |
444 | + movl %edx, %fs |
445 | |
446 | #define RESTORE_INT_REGS \ |
447 | popl %ebx; \ |
448 | @@ -169,9 +169,9 @@ |
449 | 2: popl %es; \ |
450 | CFI_ADJUST_CFA_OFFSET -4;\ |
451 | /*CFI_RESTORE es;*/\ |
452 | -3: popl %gs; \ |
453 | +3: popl %fs; \ |
454 | CFI_ADJUST_CFA_OFFSET -4;\ |
455 | - /*CFI_RESTORE gs;*/\ |
456 | + /*CFI_RESTORE fs;*/\ |
457 | .pushsection .fixup,"ax"; \ |
458 | 4: movl $0,(%esp); \ |
459 | jmp 1b; \ |
460 | @@ -230,6 +230,7 @@ |
461 | CFI_ADJUST_CFA_OFFSET -4 |
462 | jmp syscall_exit |
463 | CFI_ENDPROC |
464 | +END(ret_from_fork) |
465 | |
466 | /* |
467 | * Return to user mode is not as complex as all this looks, |
468 | @@ -261,6 +262,7 @@ |
469 | # int/exception return? |
470 | jne work_pending |
471 | jmp restore_all |
472 | +END(ret_from_exception) |
473 | |
474 | #ifdef CONFIG_PREEMPT |
475 | ENTRY(resume_kernel) |
476 | @@ -275,6 +277,7 @@ |
477 | jz restore_all |
478 | call preempt_schedule_irq |
479 | jmp need_resched |
480 | +END(resume_kernel) |
481 | #endif |
482 | CFI_ENDPROC |
483 | |
484 | @@ -352,16 +355,17 @@ |
485 | movl PT_OLDESP(%esp), %ecx |
486 | xorl %ebp,%ebp |
487 | TRACE_IRQS_ON |
488 | -1: mov PT_GS(%esp), %gs |
489 | +1: mov PT_FS(%esp), %fs |
490 | ENABLE_INTERRUPTS_SYSEXIT |
491 | CFI_ENDPROC |
492 | .pushsection .fixup,"ax" |
493 | -2: movl $0,PT_GS(%esp) |
494 | +2: movl $0,PT_FS(%esp) |
495 | jmp 1b |
496 | .section __ex_table,"a" |
497 | .align 4 |
498 | .long 1b,2b |
499 | .popsection |
500 | +ENDPROC(sysenter_entry) |
501 | |
502 | # pv sysenter call handler stub |
503 | ENTRY(sysenter_entry_pv) |
504 | @@ -533,6 +537,7 @@ |
505 | jmp hypercall_page + (__HYPERVISOR_iret * 32) |
506 | #endif |
507 | CFI_ENDPROC |
508 | +ENDPROC(system_call) |
509 | |
510 | # perform work that needs to be done immediately before resumption |
511 | ALIGN |
512 | @@ -578,6 +583,7 @@ |
513 | xorl %edx, %edx |
514 | call do_notify_resume |
515 | jmp resume_userspace_sig |
516 | +END(work_pending) |
517 | |
518 | # perform syscall exit tracing |
519 | ALIGN |
520 | @@ -593,6 +599,7 @@ |
521 | cmpl $(nr_syscalls), %eax |
522 | jnae syscall_call |
523 | jmp syscall_exit |
524 | +END(syscall_trace_entry) |
525 | |
526 | # perform syscall exit tracing |
527 | ALIGN |
528 | @@ -606,6 +613,7 @@ |
529 | movl $1, %edx |
530 | call do_syscall_trace |
531 | jmp resume_userspace |
532 | +END(syscall_exit_work) |
533 | CFI_ENDPROC |
534 | |
535 | RING0_INT_FRAME # can't unwind into user space anyway |
536 | @@ -616,16 +624,18 @@ |
537 | GET_THREAD_INFO(%ebp) |
538 | movl $-EFAULT,PT_EAX(%esp) |
539 | jmp resume_userspace |
540 | +END(syscall_fault) |
541 | |
542 | syscall_badsys: |
543 | movl $-ENOSYS,PT_EAX(%esp) |
544 | jmp resume_userspace |
545 | +END(syscall_badsys) |
546 | CFI_ENDPROC |
547 | |
548 | #ifndef CONFIG_XEN |
549 | #define FIXUP_ESPFIX_STACK \ |
550 | /* since we are on a wrong stack, we cant make it a C code :( */ \ |
551 | - movl %gs:PDA_cpu, %ebx; \ |
552 | + movl %fs:PDA_cpu, %ebx; \ |
553 | PER_CPU(cpu_gdt_descr, %ebx); \ |
554 | movl GDS_address(%ebx), %ebx; \ |
555 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
556 | @@ -656,9 +666,9 @@ |
557 | ENTRY(interrupt) |
558 | .text |
559 | |
560 | -vector=0 |
561 | ENTRY(irq_entries_start) |
562 | RING0_INT_FRAME |
563 | +vector=0 |
564 | .rept NR_IRQS |
565 | ALIGN |
566 | .if vector |
567 | @@ -667,11 +677,16 @@ |
568 | 1: pushl $~(vector) |
569 | CFI_ADJUST_CFA_OFFSET 4 |
570 | jmp common_interrupt |
571 | -.data |
572 | + .previous |
573 | .long 1b |
574 | -.text |
575 | + .text |
576 | vector=vector+1 |
577 | .endr |
578 | +END(irq_entries_start) |
579 | + |
580 | +.previous |
581 | +END(interrupt) |
582 | +.previous |
583 | |
584 | /* |
585 | * the CPU automatically disables interrupts when executing an IRQ vector, |
586 | @@ -684,6 +699,7 @@ |
587 | movl %esp,%eax |
588 | call do_IRQ |
589 | jmp ret_from_intr |
590 | +ENDPROC(common_interrupt) |
591 | CFI_ENDPROC |
592 | |
593 | #define BUILD_INTERRUPT(name, nr) \ |
594 | @@ -696,10 +712,16 @@ |
595 | movl %esp,%eax; \ |
596 | call smp_/**/name; \ |
597 | jmp ret_from_intr; \ |
598 | - CFI_ENDPROC |
599 | + CFI_ENDPROC; \ |
600 | +ENDPROC(name) |
601 | |
602 | /* The include is where all of the SMP etc. interrupts come from */ |
603 | #include "entry_arch.h" |
604 | + |
605 | +/* This alternate entry is needed because we hijack the apic LVTT */ |
606 | +#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) |
607 | +BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) |
608 | +#endif |
609 | #else |
610 | #define UNWIND_ESPFIX_STACK |
611 | #endif |
612 | @@ -710,7 +732,7 @@ |
613 | CFI_ADJUST_CFA_OFFSET 4 |
614 | ALIGN |
615 | error_code: |
616 | - /* the function address is in %gs's slot on the stack */ |
617 | + /* the function address is in %fs's slot on the stack */ |
618 | pushl %es |
619 | CFI_ADJUST_CFA_OFFSET 4 |
620 | /*CFI_REL_OFFSET es, 0*/ |
621 | @@ -739,20 +761,20 @@ |
622 | CFI_ADJUST_CFA_OFFSET 4 |
623 | CFI_REL_OFFSET ebx, 0 |
624 | cld |
625 | - pushl %gs |
626 | + pushl %fs |
627 | CFI_ADJUST_CFA_OFFSET 4 |
628 | - /*CFI_REL_OFFSET gs, 0*/ |
629 | + /*CFI_REL_OFFSET fs, 0*/ |
630 | movl $(__KERNEL_PDA), %ecx |
631 | - movl %ecx, %gs |
632 | + movl %ecx, %fs |
633 | UNWIND_ESPFIX_STACK |
634 | popl %ecx |
635 | CFI_ADJUST_CFA_OFFSET -4 |
636 | /*CFI_REGISTER es, ecx*/ |
637 | - movl PT_GS(%esp), %edi # get the function address |
638 | + movl PT_FS(%esp), %edi # get the function address |
639 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
640 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
641 | - mov %ecx, PT_GS(%esp) |
642 | - /*CFI_REL_OFFSET gs, ES*/ |
643 | + mov %ecx, PT_FS(%esp) |
644 | + /*CFI_REL_OFFSET fs, ES*/ |
645 | movl $(__USER_DS), %ecx |
646 | movl %ecx, %ds |
647 | movl %ecx, %es |
648 | @@ -839,7 +861,7 @@ |
649 | .byte 0x18 # pop %eax |
650 | .byte 0x1c # pop %ds |
651 | .byte 0x20 # pop %es |
652 | - .byte 0x24,0x24 # pop %gs |
653 | + .byte 0x24,0x24 # pop %fs |
654 | .byte 0x28,0x28,0x28 # add $4,%esp |
655 | .byte 0x2c # iret |
656 | .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) |
657 | @@ -905,6 +927,7 @@ |
658 | CFI_ADJUST_CFA_OFFSET 4 |
659 | jmp error_code |
660 | CFI_ENDPROC |
661 | +END(coprocessor_error) |
662 | |
663 | ENTRY(simd_coprocessor_error) |
664 | RING0_INT_FRAME |
665 | @@ -914,6 +937,7 @@ |
666 | CFI_ADJUST_CFA_OFFSET 4 |
667 | jmp error_code |
668 | CFI_ENDPROC |
669 | +END(simd_coprocessor_error) |
670 | |
671 | ENTRY(device_not_available) |
672 | RING0_INT_FRAME |
673 | @@ -936,6 +960,7 @@ |
674 | call math_state_restore |
675 | jmp ret_from_exception |
676 | CFI_ENDPROC |
677 | +END(device_not_available) |
678 | |
679 | #ifndef CONFIG_XEN |
680 | /* |
681 | @@ -1097,10 +1122,12 @@ |
682 | .align 4 |
683 | .long 1b,iret_exc |
684 | .previous |
685 | +END(native_iret) |
686 | |
687 | ENTRY(native_irq_enable_sysexit) |
688 | sti |
689 | sysexit |
690 | +END(native_irq_enable_sysexit) |
691 | #endif |
692 | |
693 | KPROBE_ENTRY(int3) |
694 | @@ -1123,6 +1150,7 @@ |
695 | CFI_ADJUST_CFA_OFFSET 4 |
696 | jmp error_code |
697 | CFI_ENDPROC |
698 | +END(overflow) |
699 | |
700 | ENTRY(bounds) |
701 | RING0_INT_FRAME |
702 | @@ -1132,6 +1160,7 @@ |
703 | CFI_ADJUST_CFA_OFFSET 4 |
704 | jmp error_code |
705 | CFI_ENDPROC |
706 | +END(bounds) |
707 | |
708 | ENTRY(invalid_op) |
709 | RING0_INT_FRAME |
710 | @@ -1141,6 +1170,7 @@ |
711 | CFI_ADJUST_CFA_OFFSET 4 |
712 | jmp error_code |
713 | CFI_ENDPROC |
714 | +END(invalid_op) |
715 | |
716 | ENTRY(coprocessor_segment_overrun) |
717 | RING0_INT_FRAME |
718 | @@ -1150,6 +1180,7 @@ |
719 | CFI_ADJUST_CFA_OFFSET 4 |
720 | jmp error_code |
721 | CFI_ENDPROC |
722 | +END(coprocessor_segment_overrun) |
723 | |
724 | ENTRY(invalid_TSS) |
725 | RING0_EC_FRAME |
726 | @@ -1157,6 +1188,7 @@ |
727 | CFI_ADJUST_CFA_OFFSET 4 |
728 | jmp error_code |
729 | CFI_ENDPROC |
730 | +END(invalid_TSS) |
731 | |
732 | ENTRY(segment_not_present) |
733 | RING0_EC_FRAME |
734 | @@ -1164,6 +1196,7 @@ |
735 | CFI_ADJUST_CFA_OFFSET 4 |
736 | jmp error_code |
737 | CFI_ENDPROC |
738 | +END(segment_not_present) |
739 | |
740 | ENTRY(stack_segment) |
741 | RING0_EC_FRAME |
742 | @@ -1171,6 +1204,7 @@ |
743 | CFI_ADJUST_CFA_OFFSET 4 |
744 | jmp error_code |
745 | CFI_ENDPROC |
746 | +END(stack_segment) |
747 | |
748 | KPROBE_ENTRY(general_protection) |
749 | RING0_EC_FRAME |
750 | @@ -1186,6 +1220,7 @@ |
751 | CFI_ADJUST_CFA_OFFSET 4 |
752 | jmp error_code |
753 | CFI_ENDPROC |
754 | +END(alignment_check) |
755 | |
756 | ENTRY(divide_error) |
757 | RING0_INT_FRAME |
758 | @@ -1195,6 +1230,7 @@ |
759 | CFI_ADJUST_CFA_OFFSET 4 |
760 | jmp error_code |
761 | CFI_ENDPROC |
762 | +END(divide_error) |
763 | |
764 | #ifdef CONFIG_X86_MCE |
765 | ENTRY(machine_check) |
766 | @@ -1205,6 +1241,7 @@ |
767 | CFI_ADJUST_CFA_OFFSET 4 |
768 | jmp error_code |
769 | CFI_ENDPROC |
770 | +END(machine_check) |
771 | #endif |
772 | |
773 | #ifndef CONFIG_XEN |
774 | @@ -1224,6 +1261,7 @@ |
775 | CFI_ADJUST_CFA_OFFSET 4 |
776 | jmp error_code |
777 | CFI_ENDPROC |
778 | +END(spurious_interrupt_bug) |
779 | |
780 | ENTRY(kernel_thread_helper) |
781 | pushl $0 # fake return address for unwinder |
782 | --- a/arch/x86/kernel/entry_64-xen.S |
783 | +++ b/arch/x86/kernel/entry_64-xen.S |
784 | @@ -629,6 +629,9 @@ |
785 | ENTRY(call_function_interrupt) |
786 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt |
787 | END(call_function_interrupt) |
788 | +ENTRY(irq_move_cleanup_interrupt) |
789 | + apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt |
790 | +END(irq_move_cleanup_interrupt) |
791 | #endif |
792 | |
793 | ENTRY(apic_timer_interrupt) |
794 | --- a/arch/x86/kernel/genapic_64-xen.c |
795 | +++ b/arch/x86/kernel/genapic_64-xen.c |
796 | @@ -65,8 +65,8 @@ |
797 | * Some x86_64 machines use physical APIC mode regardless of how many |
798 | * procs/clusters are present (x86_64 ES7000 is an example). |
799 | */ |
800 | - if (acpi_fadt.revision > FADT2_REVISION_ID) |
801 | - if (acpi_fadt.force_apic_physical_destination_mode) { |
802 | + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) |
803 | + if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { |
804 | genapic = &apic_cluster; |
805 | goto print; |
806 | } |
807 | --- a/arch/x86/kernel/head64-xen.c |
808 | +++ b/arch/x86/kernel/head64-xen.c |
809 | @@ -42,8 +42,6 @@ |
810 | #define OLD_CL_BASE_ADDR 0x90000 |
811 | #define OLD_CL_OFFSET 0x90022 |
812 | |
813 | -extern char saved_command_line[]; |
814 | - |
815 | static void __init copy_bootdata(char *real_mode_data) |
816 | { |
817 | #ifndef CONFIG_XEN |
818 | @@ -59,14 +57,14 @@ |
819 | new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; |
820 | } |
821 | command_line = (char *) ((u64)(new_data)); |
822 | - memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); |
823 | + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); |
824 | #else |
825 | int max_cmdline; |
826 | |
827 | if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) |
828 | max_cmdline = COMMAND_LINE_SIZE; |
829 | - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); |
830 | - saved_command_line[max_cmdline-1] = '\0'; |
831 | + memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline); |
832 | + boot_command_line[max_cmdline-1] = '\0'; |
833 | #endif |
834 | } |
835 | |
836 | --- a/arch/x86/kernel/head_32-xen.S |
837 | +++ b/arch/x86/kernel/head_32-xen.S |
838 | @@ -27,6 +27,7 @@ |
839 | #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability |
840 | #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id |
841 | |
842 | +.section .text.head,"ax",@progbits |
843 | #define VIRT_ENTRY_OFFSET 0x0 |
844 | .org VIRT_ENTRY_OFFSET |
845 | ENTRY(startup_32) |
846 | @@ -60,11 +61,11 @@ |
847 | |
848 | movb $1,X86_HARD_MATH |
849 | |
850 | - xorl %eax,%eax # Clear FS |
851 | - movl %eax,%fs |
852 | + xorl %eax,%eax # Clear GS |
853 | + movl %eax,%gs |
854 | |
855 | movl $(__KERNEL_PDA),%eax |
856 | - mov %eax,%gs |
857 | + mov %eax,%fs |
858 | |
859 | cld # gcc2 wants the direction flag cleared at all times |
860 | |
861 | @@ -75,7 +76,7 @@ |
862 | * Point the GDT at this CPU's PDA. This will be |
863 | * cpu_gdt_table and boot_pda. |
864 | */ |
865 | -setup_pda: |
866 | +ENTRY(setup_pda) |
867 | /* get the PDA pointer */ |
868 | movl $boot_pda, %eax |
869 | |
870 | --- a/arch/x86/kernel/io_apic_32-xen.c |
871 | +++ b/arch/x86/kernel/io_apic_32-xen.c |
872 | @@ -164,7 +164,7 @@ |
873 | */ |
874 | static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) |
875 | { |
876 | - volatile struct io_apic *io_apic = io_apic_base(apic); |
877 | + volatile struct io_apic __iomem *io_apic = io_apic_base(apic); |
878 | if (sis_apic_bug) |
879 | writel(reg, &io_apic->index); |
880 | writel(value, &io_apic->data); |
881 | @@ -387,7 +387,7 @@ |
882 | break; |
883 | entry = irq_2_pin + entry->next; |
884 | } |
885 | - set_native_irq_info(irq, cpumask); |
886 | + irq_desc[irq].affinity = cpumask; |
887 | spin_unlock_irqrestore(&ioapic_lock, flags); |
888 | } |
889 | |
890 | @@ -526,8 +526,8 @@ |
891 | package_index = CPU_TO_PACKAGEINDEX(i); |
892 | for (j = 0; j < NR_IRQS; j++) { |
893 | unsigned long value_now, delta; |
894 | - /* Is this an active IRQ? */ |
895 | - if (!irq_desc[j].action) |
896 | + /* Is this an active IRQ or balancing disabled ? */ |
897 | + if (!irq_desc[j].action || irq_balancing_disabled(j)) |
898 | continue; |
899 | if ( package_index == i ) |
900 | IRQ_DELTA(package_index,j) = 0; |
901 | @@ -780,7 +780,7 @@ |
902 | return 0; |
903 | } |
904 | |
905 | -int __init irqbalance_disable(char *str) |
906 | +int __devinit irqbalance_disable(char *str) |
907 | { |
908 | irqbalance_disabled = 1; |
909 | return 1; |
910 | @@ -1319,11 +1319,9 @@ |
911 | trigger == IOAPIC_LEVEL) |
912 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
913 | handle_fasteoi_irq, "fasteoi"); |
914 | - else { |
915 | - irq_desc[irq].status |= IRQ_DELAYED_DISABLE; |
916 | + else |
917 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
918 | handle_edge_irq, "edge"); |
919 | - } |
920 | set_intr_gate(vector, interrupt[irq]); |
921 | } |
922 | #else |
923 | @@ -1397,7 +1395,6 @@ |
924 | } |
925 | spin_lock_irqsave(&ioapic_lock, flags); |
926 | __ioapic_write_entry(apic, pin, entry); |
927 | - set_native_irq_info(irq, TARGET_CPUS); |
928 | spin_unlock_irqrestore(&ioapic_lock, flags); |
929 | } |
930 | } |
931 | @@ -1628,7 +1625,7 @@ |
932 | v = apic_read(APIC_LVR); |
933 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); |
934 | ver = GET_APIC_VERSION(v); |
935 | - maxlvt = get_maxlvt(); |
936 | + maxlvt = lapic_get_maxlvt(); |
937 | |
938 | v = apic_read(APIC_TASKPRI); |
939 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); |
940 | @@ -1962,7 +1959,7 @@ |
941 | #endif |
942 | |
943 | #ifndef CONFIG_XEN |
944 | -static int no_timer_check __initdata; |
945 | +int no_timer_check __initdata; |
946 | |
947 | static int __init notimercheck(char *s) |
948 | { |
949 | @@ -2355,7 +2352,7 @@ |
950 | |
951 | disable_8259A_irq(0); |
952 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, |
953 | - "fasteio"); |
954 | + "fasteoi"); |
955 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
956 | enable_8259A_irq(0); |
957 | |
958 | @@ -2646,7 +2643,7 @@ |
959 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
960 | |
961 | write_msi_msg(irq, &msg); |
962 | - set_native_irq_info(irq, mask); |
963 | + irq_desc[irq].affinity = mask; |
964 | } |
965 | #endif /* CONFIG_SMP */ |
966 | |
967 | @@ -2665,25 +2662,32 @@ |
968 | .retrigger = ioapic_retrigger_irq, |
969 | }; |
970 | |
971 | -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) |
972 | +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) |
973 | { |
974 | struct msi_msg msg; |
975 | - int ret; |
976 | + int irq, ret; |
977 | + irq = create_irq(); |
978 | + if (irq < 0) |
979 | + return irq; |
980 | + |
981 | + set_irq_msi(irq, desc); |
982 | ret = msi_compose_msg(dev, irq, &msg); |
983 | - if (ret < 0) |
984 | + if (ret < 0) { |
985 | + destroy_irq(irq); |
986 | return ret; |
987 | + } |
988 | |
989 | write_msi_msg(irq, &msg); |
990 | |
991 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, |
992 | "edge"); |
993 | |
994 | - return 0; |
995 | + return irq; |
996 | } |
997 | |
998 | void arch_teardown_msi_irq(unsigned int irq) |
999 | { |
1000 | - return; |
1001 | + destroy_irq(irq); |
1002 | } |
1003 | |
1004 | #endif /* CONFIG_PCI_MSI */ |
1005 | @@ -2723,7 +2727,7 @@ |
1006 | dest = cpu_mask_to_apicid(mask); |
1007 | |
1008 | target_ht_irq(irq, dest); |
1009 | - set_native_irq_info(irq, mask); |
1010 | + irq_desc[irq].affinity = mask; |
1011 | } |
1012 | #endif |
1013 | |
1014 | @@ -2931,7 +2935,6 @@ |
1015 | |
1016 | spin_lock_irqsave(&ioapic_lock, flags); |
1017 | __ioapic_write_entry(ioapic, pin, entry); |
1018 | - set_native_irq_info(irq, TARGET_CPUS); |
1019 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1020 | |
1021 | return 0; |
1022 | --- a/arch/x86/kernel/io_apic_64-xen.c |
1023 | +++ b/arch/x86/kernel/io_apic_64-xen.c |
1024 | @@ -36,6 +36,7 @@ |
1025 | #include <acpi/acpi_bus.h> |
1026 | #endif |
1027 | |
1028 | +#include <asm/idle.h> |
1029 | #include <asm/io.h> |
1030 | #include <asm/smp.h> |
1031 | #include <asm/desc.h> |
1032 | @@ -47,7 +48,20 @@ |
1033 | #include <asm/msidef.h> |
1034 | #include <asm/hypertransport.h> |
1035 | |
1036 | -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result); |
1037 | +struct irq_cfg { |
1038 | +#ifndef CONFIG_XEN |
1039 | + cpumask_t domain; |
1040 | + cpumask_t old_domain; |
1041 | +#endif |
1042 | + unsigned move_cleanup_count; |
1043 | + u8 vector; |
1044 | + u8 move_in_progress : 1; |
1045 | +}; |
1046 | + |
1047 | +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
1048 | +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly; |
1049 | + |
1050 | +static int assign_irq_vector(int irq, cpumask_t mask); |
1051 | |
1052 | #define __apicdebuginit __init |
1053 | |
1054 | @@ -88,7 +102,7 @@ |
1055 | * Rough estimation of how many shared IRQs there are, can |
1056 | * be changed anytime. |
1057 | */ |
1058 | -#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS |
1059 | +#define MAX_PLUS_SHARED_IRQS NR_IRQS |
1060 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) |
1061 | |
1062 | /* |
1063 | @@ -259,21 +273,19 @@ |
1064 | |
1065 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) |
1066 | { |
1067 | + struct irq_cfg *cfg = irq_cfg + irq; |
1068 | unsigned long flags; |
1069 | unsigned int dest; |
1070 | cpumask_t tmp; |
1071 | - int vector; |
1072 | |
1073 | cpus_and(tmp, mask, cpu_online_map); |
1074 | if (cpus_empty(tmp)) |
1075 | - tmp = TARGET_CPUS; |
1076 | - |
1077 | - cpus_and(mask, tmp, CPU_MASK_ALL); |
1078 | + return; |
1079 | |
1080 | - vector = assign_irq_vector(irq, mask, &tmp); |
1081 | - if (vector < 0) |
1082 | + if (assign_irq_vector(irq, mask)) |
1083 | return; |
1084 | |
1085 | + cpus_and(tmp, cfg->domain, mask); |
1086 | dest = cpu_mask_to_apicid(tmp); |
1087 | |
1088 | /* |
1089 | @@ -282,8 +294,8 @@ |
1090 | dest = SET_APIC_LOGICAL_ID(dest); |
1091 | |
1092 | spin_lock_irqsave(&ioapic_lock, flags); |
1093 | - __target_IO_APIC_irq(irq, dest, vector); |
1094 | - set_native_irq_info(irq, mask); |
1095 | + __target_IO_APIC_irq(irq, dest, cfg->vector); |
1096 | + irq_desc[irq].affinity = mask; |
1097 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1098 | } |
1099 | #endif |
1100 | @@ -329,11 +341,11 @@ |
1101 | reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ |
1102 | reg ACTION; \ |
1103 | io_apic_modify(entry->apic, reg); \ |
1104 | + FINAL; \ |
1105 | if (!entry->next) \ |
1106 | break; \ |
1107 | entry = irq_2_pin + entry->next; \ |
1108 | } \ |
1109 | - FINAL; \ |
1110 | } |
1111 | |
1112 | #define DO_ACTION(name,R,ACTION, FINAL) \ |
1113 | @@ -666,74 +678,58 @@ |
1114 | return irq; |
1115 | } |
1116 | |
1117 | -static inline int IO_APIC_irq_trigger(int irq) |
1118 | -{ |
1119 | - int apic, idx, pin; |
1120 | - |
1121 | - for (apic = 0; apic < nr_ioapics; apic++) { |
1122 | - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
1123 | - idx = find_irq_entry(apic,pin,mp_INT); |
1124 | - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) |
1125 | - return irq_trigger(idx); |
1126 | - } |
1127 | - } |
1128 | - /* |
1129 | - * nonexistent IRQs are edge default |
1130 | - */ |
1131 | - return 0; |
1132 | -} |
1133 | - |
1134 | -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ |
1135 | -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; |
1136 | - |
1137 | -static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) |
1138 | +static int __assign_irq_vector(int irq, cpumask_t mask) |
1139 | { |
1140 | - int vector; |
1141 | struct physdev_irq irq_op; |
1142 | + struct irq_cfg *cfg; |
1143 | |
1144 | - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); |
1145 | + BUG_ON((unsigned)irq >= NR_IRQS); |
1146 | + cfg = &irq_cfg[irq]; |
1147 | |
1148 | - cpus_and(*result, mask, cpu_online_map); |
1149 | + if ((cfg->move_in_progress) || cfg->move_cleanup_count) |
1150 | + return -EBUSY; |
1151 | |
1152 | - if (irq_vector[irq] > 0) |
1153 | - return irq_vector[irq]; |
1154 | + if (cfg->vector) |
1155 | + return 0; |
1156 | |
1157 | irq_op.irq = irq; |
1158 | if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) |
1159 | return -ENOSPC; |
1160 | |
1161 | - vector = irq_op.vector; |
1162 | - irq_vector[irq] = vector; |
1163 | + cfg->vector = irq_op.vector; |
1164 | |
1165 | - return vector; |
1166 | + return 0; |
1167 | } |
1168 | |
1169 | -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) |
1170 | +static int assign_irq_vector(int irq, cpumask_t mask) |
1171 | { |
1172 | - int vector; |
1173 | + int err; |
1174 | unsigned long flags; |
1175 | |
1176 | spin_lock_irqsave(&vector_lock, flags); |
1177 | - vector = __assign_irq_vector(irq, mask, result); |
1178 | + err = __assign_irq_vector(irq, mask); |
1179 | spin_unlock_irqrestore(&vector_lock, flags); |
1180 | - return vector; |
1181 | + return err; |
1182 | } |
1183 | |
1184 | #ifndef CONFIG_XEN |
1185 | static void __clear_irq_vector(int irq) |
1186 | { |
1187 | + struct irq_cfg *cfg; |
1188 | cpumask_t mask; |
1189 | int cpu, vector; |
1190 | |
1191 | - BUG_ON(!irq_vector[irq]); |
1192 | + BUG_ON((unsigned)irq >= NR_IRQS); |
1193 | + cfg = &irq_cfg[irq]; |
1194 | + BUG_ON(!cfg->vector); |
1195 | |
1196 | - vector = irq_vector[irq]; |
1197 | - cpus_and(mask, irq_domain[irq], cpu_online_map); |
1198 | + vector = cfg->vector; |
1199 | + cpus_and(mask, cfg->domain, cpu_online_map); |
1200 | for_each_cpu_mask(cpu, mask) |
1201 | per_cpu(vector_irq, cpu)[vector] = -1; |
1202 | |
1203 | - irq_vector[irq] = 0; |
1204 | - irq_domain[irq] = CPU_MASK_NONE; |
1205 | + cfg->vector = 0; |
1206 | + cfg->domain = CPU_MASK_NONE; |
1207 | } |
1208 | |
1209 | void __setup_vector_irq(int cpu) |
1210 | @@ -743,10 +739,10 @@ |
1211 | int irq, vector; |
1212 | |
1213 | /* Mark the inuse vectors */ |
1214 | - for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) { |
1215 | - if (!cpu_isset(cpu, irq_domain[irq])) |
1216 | + for (irq = 0; irq < NR_IRQS; ++irq) { |
1217 | + if (!cpu_isset(cpu, irq_cfg[irq].domain)) |
1218 | continue; |
1219 | - vector = irq_vector[irq]; |
1220 | + vector = irq_cfg[irq].vector; |
1221 | per_cpu(vector_irq, cpu)[vector] = irq; |
1222 | } |
1223 | /* Mark the free vectors */ |
1224 | @@ -754,41 +750,49 @@ |
1225 | irq = per_cpu(vector_irq, cpu)[vector]; |
1226 | if (irq < 0) |
1227 | continue; |
1228 | - if (!cpu_isset(cpu, irq_domain[irq])) |
1229 | + if (!cpu_isset(cpu, irq_cfg[irq].domain)) |
1230 | per_cpu(vector_irq, cpu)[vector] = -1; |
1231 | } |
1232 | } |
1233 | |
1234 | -extern void (*interrupt[NR_IRQS])(void); |
1235 | - |
1236 | static struct irq_chip ioapic_chip; |
1237 | |
1238 | -#define IOAPIC_AUTO -1 |
1239 | -#define IOAPIC_EDGE 0 |
1240 | -#define IOAPIC_LEVEL 1 |
1241 | - |
1242 | -static void ioapic_register_intr(int irq, int vector, unsigned long trigger) |
1243 | +static void ioapic_register_intr(int irq, unsigned long trigger) |
1244 | { |
1245 | - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
1246 | - trigger == IOAPIC_LEVEL) |
1247 | + if (trigger) |
1248 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
1249 | handle_fasteoi_irq, "fasteoi"); |
1250 | - else { |
1251 | - irq_desc[irq].status |= IRQ_DELAYED_DISABLE; |
1252 | + else |
1253 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
1254 | handle_edge_irq, "edge"); |
1255 | - } |
1256 | } |
1257 | #else |
1258 | -#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) |
1259 | +#define ioapic_register_intr(irq,trigger) ((void)0) |
1260 | #endif /* !CONFIG_XEN */ |
1261 | |
1262 | -static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) |
1263 | +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, |
1264 | + int trigger, int polarity) |
1265 | { |
1266 | + struct irq_cfg *cfg = irq_cfg + irq; |
1267 | struct IO_APIC_route_entry entry; |
1268 | - int vector; |
1269 | - unsigned long flags; |
1270 | + cpumask_t mask; |
1271 | |
1272 | + if (!IO_APIC_IRQ(irq)) |
1273 | + return; |
1274 | + |
1275 | + mask = TARGET_CPUS; |
1276 | + if (assign_irq_vector(irq, mask)) |
1277 | + return; |
1278 | + |
1279 | +#ifndef CONFIG_XEN |
1280 | + cpus_and(mask, cfg->domain, mask); |
1281 | +#endif |
1282 | + |
1283 | + apic_printk(APIC_VERBOSE,KERN_DEBUG |
1284 | + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
1285 | + "IRQ %d Mode:%i Active:%i)\n", |
1286 | + apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, |
1287 | + irq, trigger, polarity); |
1288 | |
1289 | /* |
1290 | * add it to the IO-APIC irq-routing table: |
1291 | @@ -797,41 +801,23 @@ |
1292 | |
1293 | entry.delivery_mode = INT_DELIVERY_MODE; |
1294 | entry.dest_mode = INT_DEST_MODE; |
1295 | + entry.dest = cpu_mask_to_apicid(mask); |
1296 | entry.mask = 0; /* enable IRQ */ |
1297 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); |
1298 | + entry.trigger = trigger; |
1299 | + entry.polarity = polarity; |
1300 | + entry.vector = cfg->vector; |
1301 | |
1302 | - entry.trigger = irq_trigger(idx); |
1303 | - entry.polarity = irq_polarity(idx); |
1304 | - |
1305 | - if (irq_trigger(idx)) { |
1306 | - entry.trigger = 1; |
1307 | + /* Mask level triggered irqs. |
1308 | + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. |
1309 | + */ |
1310 | + if (trigger) |
1311 | entry.mask = 1; |
1312 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); |
1313 | - } |
1314 | |
1315 | - if (/* !apic && */ !IO_APIC_IRQ(irq)) |
1316 | - return; |
1317 | - |
1318 | - if (IO_APIC_IRQ(irq)) { |
1319 | - cpumask_t mask; |
1320 | - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); |
1321 | - if (vector < 0) |
1322 | - return; |
1323 | - |
1324 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); |
1325 | - entry.vector = vector; |
1326 | - |
1327 | - ioapic_register_intr(irq, vector, IOAPIC_AUTO); |
1328 | - if (!apic && (irq < 16)) |
1329 | - disable_8259A_irq(irq); |
1330 | - } |
1331 | + ioapic_register_intr(irq, trigger); |
1332 | + if (irq < 16) |
1333 | + disable_8259A_irq(irq); |
1334 | |
1335 | ioapic_write_entry(apic, pin, entry); |
1336 | - |
1337 | - spin_lock_irqsave(&ioapic_lock, flags); |
1338 | - set_native_irq_info(irq, TARGET_CPUS); |
1339 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
1340 | - |
1341 | } |
1342 | |
1343 | static void __init setup_IO_APIC_irqs(void) |
1344 | @@ -856,8 +842,8 @@ |
1345 | irq = pin_2_irq(idx, apic, pin); |
1346 | add_pin_to_irq(irq, apic, pin); |
1347 | |
1348 | - setup_IO_APIC_irq(apic, pin, idx, irq); |
1349 | - |
1350 | + setup_IO_APIC_irq(apic, pin, irq, |
1351 | + irq_trigger(idx), irq_polarity(idx)); |
1352 | } |
1353 | } |
1354 | |
1355 | @@ -888,7 +874,7 @@ |
1356 | */ |
1357 | entry.dest_mode = INT_DEST_MODE; |
1358 | entry.mask = 0; /* unmask IRQ now */ |
1359 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); |
1360 | + entry.dest = cpu_mask_to_apicid(TARGET_CPUS); |
1361 | entry.delivery_mode = INT_DELIVERY_MODE; |
1362 | entry.polarity = 0; |
1363 | entry.trigger = 0; |
1364 | @@ -988,18 +974,17 @@ |
1365 | |
1366 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
1367 | |
1368 | - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" |
1369 | - " Stat Dest Deli Vect: \n"); |
1370 | + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" |
1371 | + " Stat Dmod Deli Vect: \n"); |
1372 | |
1373 | for (i = 0; i <= reg_01.bits.entries; i++) { |
1374 | struct IO_APIC_route_entry entry; |
1375 | |
1376 | entry = ioapic_read_entry(apic, i); |
1377 | |
1378 | - printk(KERN_DEBUG " %02x %03X %02X ", |
1379 | + printk(KERN_DEBUG " %02x %03X ", |
1380 | i, |
1381 | - entry.dest.logical.logical_dest, |
1382 | - entry.dest.physical.physical_dest |
1383 | + entry.dest |
1384 | ); |
1385 | |
1386 | printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", |
1387 | @@ -1263,8 +1248,7 @@ |
1388 | entry.dest_mode = 0; /* Physical */ |
1389 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ |
1390 | entry.vector = 0; |
1391 | - entry.dest.physical.physical_dest = |
1392 | - GET_APIC_ID(apic_read(APIC_ID)); |
1393 | + entry.dest = GET_APIC_ID(apic_read(APIC_ID)); |
1394 | |
1395 | /* |
1396 | * Add it to the IO-APIC irq-routing table: |
1397 | @@ -1349,16 +1333,15 @@ |
1398 | |
1399 | static int ioapic_retrigger_irq(unsigned int irq) |
1400 | { |
1401 | + struct irq_cfg *cfg = &irq_cfg[irq]; |
1402 | cpumask_t mask; |
1403 | - unsigned vector; |
1404 | unsigned long flags; |
1405 | |
1406 | spin_lock_irqsave(&vector_lock, flags); |
1407 | - vector = irq_vector[irq]; |
1408 | cpus_clear(mask); |
1409 | - cpu_set(first_cpu(irq_domain[irq]), mask); |
1410 | + cpu_set(first_cpu(cfg->domain), mask); |
1411 | |
1412 | - send_IPI_mask(mask, vector); |
1413 | + send_IPI_mask(mask, cfg->vector); |
1414 | spin_unlock_irqrestore(&vector_lock, flags); |
1415 | |
1416 | return 1; |
1417 | @@ -1373,8 +1356,68 @@ |
1418 | * races. |
1419 | */ |
1420 | |
1421 | +#ifdef CONFIG_SMP |
1422 | +asmlinkage void smp_irq_move_cleanup_interrupt(void) |
1423 | +{ |
1424 | + unsigned vector, me; |
1425 | + ack_APIC_irq(); |
1426 | + exit_idle(); |
1427 | + irq_enter(); |
1428 | + |
1429 | + me = smp_processor_id(); |
1430 | + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
1431 | + unsigned int irq; |
1432 | + struct irq_desc *desc; |
1433 | + struct irq_cfg *cfg; |
1434 | + irq = __get_cpu_var(vector_irq)[vector]; |
1435 | + if (irq >= NR_IRQS) |
1436 | + continue; |
1437 | + |
1438 | + desc = irq_desc + irq; |
1439 | + cfg = irq_cfg + irq; |
1440 | + spin_lock(&desc->lock); |
1441 | + if (!cfg->move_cleanup_count) |
1442 | + goto unlock; |
1443 | + |
1444 | + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) |
1445 | + goto unlock; |
1446 | + |
1447 | + __get_cpu_var(vector_irq)[vector] = -1; |
1448 | + cfg->move_cleanup_count--; |
1449 | +unlock: |
1450 | + spin_unlock(&desc->lock); |
1451 | + } |
1452 | + |
1453 | + irq_exit(); |
1454 | +} |
1455 | + |
1456 | +static void irq_complete_move(unsigned int irq) |
1457 | +{ |
1458 | + struct irq_cfg *cfg = irq_cfg + irq; |
1459 | + unsigned vector, me; |
1460 | + |
1461 | + if (likely(!cfg->move_in_progress)) |
1462 | + return; |
1463 | + |
1464 | + vector = ~get_irq_regs()->orig_rax; |
1465 | + me = smp_processor_id(); |
1466 | + if ((vector == cfg->vector) && |
1467 | + cpu_isset(smp_processor_id(), cfg->domain)) { |
1468 | + cpumask_t cleanup_mask; |
1469 | + |
1470 | + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); |
1471 | + cfg->move_cleanup_count = cpus_weight(cleanup_mask); |
1472 | + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); |
1473 | + cfg->move_in_progress = 0; |
1474 | + } |
1475 | +} |
1476 | +#else |
1477 | +static inline void irq_complete_move(unsigned int irq) {} |
1478 | +#endif |
1479 | + |
1480 | static void ack_apic_edge(unsigned int irq) |
1481 | { |
1482 | + irq_complete_move(irq); |
1483 | move_native_irq(irq); |
1484 | ack_APIC_irq(); |
1485 | } |
1486 | @@ -1383,6 +1426,7 @@ |
1487 | { |
1488 | int do_unmask_irq = 0; |
1489 | |
1490 | + irq_complete_move(irq); |
1491 | #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) |
1492 | /* If we are moving the irq we need to mask it */ |
1493 | if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { |
1494 | @@ -1434,7 +1478,7 @@ |
1495 | */ |
1496 | for (irq = 0; irq < NR_IRQS ; irq++) { |
1497 | int tmp = irq; |
1498 | - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { |
1499 | + if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) { |
1500 | /* |
1501 | * Hmm.. We don't have an entry for this, |
1502 | * so default to an old-fashioned 8259 |
1503 | @@ -1532,7 +1576,7 @@ |
1504 | |
1505 | entry1.dest_mode = 0; /* physical delivery */ |
1506 | entry1.mask = 0; /* unmask IRQ now */ |
1507 | - entry1.dest.physical.physical_dest = hard_smp_processor_id(); |
1508 | + entry1.dest = hard_smp_processor_id(); |
1509 | entry1.delivery_mode = dest_ExtINT; |
1510 | entry1.polarity = entry0.polarity; |
1511 | entry1.trigger = 0; |
1512 | @@ -1576,15 +1620,14 @@ |
1513 | */ |
1514 | static inline void check_timer(void) |
1515 | { |
1516 | + struct irq_cfg *cfg = irq_cfg + 0; |
1517 | int apic1, pin1, apic2, pin2; |
1518 | - int vector; |
1519 | - cpumask_t mask; |
1520 | |
1521 | /* |
1522 | * get/set the timer IRQ vector: |
1523 | */ |
1524 | disable_8259A_irq(0); |
1525 | - vector = assign_irq_vector(0, TARGET_CPUS, &mask); |
1526 | + assign_irq_vector(0, TARGET_CPUS); |
1527 | |
1528 | /* |
1529 | * Subtle, code in do_timer_interrupt() expects an AEOI |
1530 | @@ -1604,7 +1647,7 @@ |
1531 | apic2 = ioapic_i8259.apic; |
1532 | |
1533 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", |
1534 | - vector, apic1, pin1, apic2, pin2); |
1535 | + cfg->vector, apic1, pin1, apic2, pin2); |
1536 | |
1537 | if (pin1 != -1) { |
1538 | /* |
1539 | @@ -1635,7 +1678,7 @@ |
1540 | /* |
1541 | * legacy devices should be connected to IO APIC #0 |
1542 | */ |
1543 | - setup_ExtINT_IRQ0_pin(apic2, pin2, vector); |
1544 | + setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); |
1545 | if (timer_irq_works()) { |
1546 | apic_printk(APIC_VERBOSE," works.\n"); |
1547 | nmi_watchdog_default(); |
1548 | @@ -1660,14 +1703,14 @@ |
1549 | |
1550 | disable_8259A_irq(0); |
1551 | irq_desc[0].chip = &lapic_irq_type; |
1552 | - apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
1553 | + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
1554 | enable_8259A_irq(0); |
1555 | |
1556 | if (timer_irq_works()) { |
1557 | apic_printk(APIC_VERBOSE," works.\n"); |
1558 | return; |
1559 | } |
1560 | - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); |
1561 | + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
1562 | apic_printk(APIC_VERBOSE," failed.\n"); |
1563 | |
1564 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); |
1565 | @@ -1821,19 +1864,16 @@ |
1566 | /* Allocate an unused irq */ |
1567 | int irq; |
1568 | int new; |
1569 | - int vector = 0; |
1570 | unsigned long flags; |
1571 | - cpumask_t mask; |
1572 | |
1573 | irq = -ENOSPC; |
1574 | spin_lock_irqsave(&vector_lock, flags); |
1575 | for (new = (NR_IRQS - 1); new >= 0; new--) { |
1576 | if (platform_legacy_irq(new)) |
1577 | continue; |
1578 | - if (irq_vector[new] != 0) |
1579 | + if (irq_cfg[new].vector != 0) |
1580 | continue; |
1581 | - vector = __assign_irq_vector(new, TARGET_CPUS, &mask); |
1582 | - if (likely(vector > 0)) |
1583 | + if (__assign_irq_vector(new, TARGET_CPUS) == 0) |
1584 | irq = new; |
1585 | break; |
1586 | } |
1587 | @@ -1863,12 +1903,15 @@ |
1588 | #ifdef CONFIG_PCI_MSI |
1589 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) |
1590 | { |
1591 | - int vector; |
1592 | + struct irq_cfg *cfg = irq_cfg + irq; |
1593 | + int err; |
1594 | unsigned dest; |
1595 | cpumask_t tmp; |
1596 | |
1597 | - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); |
1598 | - if (vector >= 0) { |
1599 | + tmp = TARGET_CPUS; |
1600 | + err = assign_irq_vector(irq, tmp); |
1601 | + if (!err) { |
1602 | + cpus_and(tmp, cfg->domain, tmp); |
1603 | dest = cpu_mask_to_apicid(tmp); |
1604 | |
1605 | msg->address_hi = MSI_ADDR_BASE_HI; |
1606 | @@ -1888,40 +1931,38 @@ |
1607 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
1608 | MSI_DATA_DELIVERY_FIXED: |
1609 | MSI_DATA_DELIVERY_LOWPRI) | |
1610 | - MSI_DATA_VECTOR(vector); |
1611 | + MSI_DATA_VECTOR(cfg->vector); |
1612 | } |
1613 | - return vector; |
1614 | + return err; |
1615 | } |
1616 | |
1617 | #ifdef CONFIG_SMP |
1618 | static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) |
1619 | { |
1620 | + struct irq_cfg *cfg = irq_cfg + irq; |
1621 | struct msi_msg msg; |
1622 | unsigned int dest; |
1623 | cpumask_t tmp; |
1624 | - int vector; |
1625 | |
1626 | cpus_and(tmp, mask, cpu_online_map); |
1627 | if (cpus_empty(tmp)) |
1628 | - tmp = TARGET_CPUS; |
1629 | - |
1630 | - cpus_and(mask, tmp, CPU_MASK_ALL); |
1631 | + return; |
1632 | |
1633 | - vector = assign_irq_vector(irq, mask, &tmp); |
1634 | - if (vector < 0) |
1635 | + if (assign_irq_vector(irq, mask)) |
1636 | return; |
1637 | |
1638 | + cpus_and(tmp, cfg->domain, mask); |
1639 | dest = cpu_mask_to_apicid(tmp); |
1640 | |
1641 | read_msi_msg(irq, &msg); |
1642 | |
1643 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
1644 | - msg.data |= MSI_DATA_VECTOR(vector); |
1645 | + msg.data |= MSI_DATA_VECTOR(cfg->vector); |
1646 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
1647 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
1648 | |
1649 | write_msi_msg(irq, &msg); |
1650 | - set_native_irq_info(irq, mask); |
1651 | + irq_desc[irq].affinity = mask; |
1652 | } |
1653 | #endif /* CONFIG_SMP */ |
1654 | |
1655 | @@ -1940,24 +1981,31 @@ |
1656 | .retrigger = ioapic_retrigger_irq, |
1657 | }; |
1658 | |
1659 | -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) |
1660 | +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) |
1661 | { |
1662 | struct msi_msg msg; |
1663 | - int ret; |
1664 | + int irq, ret; |
1665 | + irq = create_irq(); |
1666 | + if (irq < 0) |
1667 | + return irq; |
1668 | + |
1669 | + set_irq_msi(irq, desc); |
1670 | ret = msi_compose_msg(dev, irq, &msg); |
1671 | - if (ret < 0) |
1672 | + if (ret < 0) { |
1673 | + destroy_irq(irq); |
1674 | return ret; |
1675 | + } |
1676 | |
1677 | write_msi_msg(irq, &msg); |
1678 | |
1679 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); |
1680 | |
1681 | - return 0; |
1682 | + return irq; |
1683 | } |
1684 | |
1685 | void arch_teardown_msi_irq(unsigned int irq) |
1686 | { |
1687 | - return; |
1688 | + destroy_irq(irq); |
1689 | } |
1690 | |
1691 | #endif /* CONFIG_PCI_MSI */ |
1692 | @@ -1985,24 +2033,22 @@ |
1693 | |
1694 | static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) |
1695 | { |
1696 | + struct irq_cfg *cfg = irq_cfg + irq; |
1697 | unsigned int dest; |
1698 | cpumask_t tmp; |
1699 | - int vector; |
1700 | |
1701 | cpus_and(tmp, mask, cpu_online_map); |
1702 | if (cpus_empty(tmp)) |
1703 | - tmp = TARGET_CPUS; |
1704 | - |
1705 | - cpus_and(mask, tmp, CPU_MASK_ALL); |
1706 | + return; |
1707 | |
1708 | - vector = assign_irq_vector(irq, mask, &tmp); |
1709 | - if (vector < 0) |
1710 | + if (assign_irq_vector(irq, mask)) |
1711 | return; |
1712 | |
1713 | + cpus_and(tmp, cfg->domain, mask); |
1714 | dest = cpu_mask_to_apicid(tmp); |
1715 | |
1716 | - target_ht_irq(irq, dest, vector); |
1717 | - set_native_irq_info(irq, mask); |
1718 | + target_ht_irq(irq, dest, cfg->vector); |
1719 | + irq_desc[irq].affinity = mask; |
1720 | } |
1721 | #endif |
1722 | |
1723 | @@ -2019,14 +2065,17 @@ |
1724 | |
1725 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) |
1726 | { |
1727 | - int vector; |
1728 | + struct irq_cfg *cfg = irq_cfg + irq; |
1729 | + int err; |
1730 | cpumask_t tmp; |
1731 | |
1732 | - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); |
1733 | - if (vector >= 0) { |
1734 | + tmp = TARGET_CPUS; |
1735 | + err = assign_irq_vector(irq, tmp); |
1736 | + if (!err) { |
1737 | struct ht_irq_msg msg; |
1738 | unsigned dest; |
1739 | |
1740 | + cpus_and(tmp, cfg->domain, tmp); |
1741 | dest = cpu_mask_to_apicid(tmp); |
1742 | |
1743 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
1744 | @@ -2034,7 +2083,7 @@ |
1745 | msg.address_lo = |
1746 | HT_IRQ_LOW_BASE | |
1747 | HT_IRQ_LOW_DEST_ID(dest) | |
1748 | - HT_IRQ_LOW_VECTOR(vector) | |
1749 | + HT_IRQ_LOW_VECTOR(cfg->vector) | |
1750 | ((INT_DEST_MODE == 0) ? |
1751 | HT_IRQ_LOW_DM_PHYSICAL : |
1752 | HT_IRQ_LOW_DM_LOGICAL) | |
1753 | @@ -2049,7 +2098,7 @@ |
1754 | set_irq_chip_and_handler_name(irq, &ht_irq_chip, |
1755 | handle_edge_irq, "edge"); |
1756 | } |
1757 | - return vector; |
1758 | + return err; |
1759 | } |
1760 | #endif /* CONFIG_HT_IRQ */ |
1761 | |
1762 | @@ -2074,13 +2123,8 @@ |
1763 | } |
1764 | |
1765 | |
1766 | -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) |
1767 | +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) |
1768 | { |
1769 | - struct IO_APIC_route_entry entry; |
1770 | - unsigned long flags; |
1771 | - int vector; |
1772 | - cpumask_t mask; |
1773 | - |
1774 | if (!IO_APIC_IRQ(irq)) { |
1775 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", |
1776 | ioapic); |
1777 | @@ -2093,42 +2137,7 @@ |
1778 | if (irq >= 16) |
1779 | add_pin_to_irq(irq, ioapic, pin); |
1780 | |
1781 | - |
1782 | - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); |
1783 | - if (vector < 0) |
1784 | - return vector; |
1785 | - |
1786 | - /* |
1787 | - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. |
1788 | - * Note that we mask (disable) IRQs now -- these get enabled when the |
1789 | - * corresponding device driver registers for this IRQ. |
1790 | - */ |
1791 | - |
1792 | - memset(&entry,0,sizeof(entry)); |
1793 | - |
1794 | - entry.delivery_mode = INT_DELIVERY_MODE; |
1795 | - entry.dest_mode = INT_DEST_MODE; |
1796 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); |
1797 | - entry.trigger = edge_level; |
1798 | - entry.polarity = active_high_low; |
1799 | - entry.mask = 1; /* Disabled (masked) */ |
1800 | - entry.vector = vector & 0xff; |
1801 | - |
1802 | - apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " |
1803 | - "IRQ %d Mode:%i Active:%i)\n", ioapic, |
1804 | - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, |
1805 | - edge_level, active_high_low); |
1806 | - |
1807 | - ioapic_register_intr(irq, entry.vector, edge_level); |
1808 | - |
1809 | - if (!ioapic && (irq < 16)) |
1810 | - disable_8259A_irq(irq); |
1811 | - |
1812 | - ioapic_write_entry(ioapic, pin, entry); |
1813 | - |
1814 | - spin_lock_irqsave(&ioapic_lock, flags); |
1815 | - set_native_irq_info(irq, TARGET_CPUS); |
1816 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
1817 | + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); |
1818 | |
1819 | return 0; |
1820 | } |
1821 | @@ -2161,8 +2170,10 @@ |
1822 | * when you have too many devices, because at that time only boot |
1823 | * cpu is online. |
1824 | */ |
1825 | - if(!irq_vector[irq]) |
1826 | - setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); |
1827 | + if (!irq_cfg[irq].vector) |
1828 | + setup_IO_APIC_irq(ioapic, pin, irq, |
1829 | + irq_trigger(irq_entry), |
1830 | + irq_polarity(irq_entry)); |
1831 | else |
1832 | set_ioapic_affinity_irq(irq, TARGET_CPUS); |
1833 | } |
1834 | --- a/arch/x86/kernel/irq_32-xen.c |
1835 | +++ b/arch/x86/kernel/irq_32-xen.c |
1836 | @@ -10,7 +10,6 @@ |
1837 | * io_apic.c.) |
1838 | */ |
1839 | |
1840 | -#include <asm/uaccess.h> |
1841 | #include <linux/module.h> |
1842 | #include <linux/seq_file.h> |
1843 | #include <linux/interrupt.h> |
1844 | @@ -19,19 +18,34 @@ |
1845 | #include <linux/cpu.h> |
1846 | #include <linux/delay.h> |
1847 | |
1848 | +#include <asm/apic.h> |
1849 | +#include <asm/uaccess.h> |
1850 | + |
1851 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; |
1852 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
1853 | |
1854 | -#ifndef CONFIG_X86_LOCAL_APIC |
1855 | /* |
1856 | * 'what should we do if we get a hw irq event on an illegal vector'. |
1857 | * each architecture has to answer this themselves. |
1858 | */ |
1859 | void ack_bad_irq(unsigned int irq) |
1860 | { |
1861 | - printk("unexpected IRQ trap at vector %02x\n", irq); |
1862 | -} |
1863 | + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); |
1864 | + |
1865 | +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) |
1866 | + /* |
1867 | + * Currently unexpected vectors happen only on SMP and APIC. |
1868 | + * We _must_ ack these because every local APIC has only N |
1869 | + * irq slots per priority level, and a 'hanging, unacked' IRQ |
1870 | + * holds up an irq slot - in excessive cases (when multiple |
1871 | + * unexpected vectors occur) that might lock up the APIC |
1872 | + * completely. |
1873 | + * But only ack when the APIC is enabled -AK |
1874 | + */ |
1875 | + if (cpu_has_apic) |
1876 | + ack_APIC_irq(); |
1877 | #endif |
1878 | +} |
1879 | |
1880 | #ifdef CONFIG_4KSTACKS |
1881 | /* |
1882 | --- a/arch/x86/kernel/irq_64-xen.c |
1883 | +++ b/arch/x86/kernel/irq_64-xen.c |
1884 | @@ -18,6 +18,7 @@ |
1885 | #include <asm/uaccess.h> |
1886 | #include <asm/io_apic.h> |
1887 | #include <asm/idle.h> |
1888 | +#include <asm/smp.h> |
1889 | |
1890 | atomic_t irq_err_count; |
1891 | |
1892 | @@ -120,9 +121,15 @@ |
1893 | |
1894 | if (likely(irq < NR_IRQS)) |
1895 | generic_handle_irq(irq); |
1896 | - else if (printk_ratelimit()) |
1897 | - printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", |
1898 | - __func__, smp_processor_id(), irq); |
1899 | + else { |
1900 | +#ifndef CONFIG_XEN |
1901 | + if (!disable_apic) |
1902 | + ack_APIC_irq(); |
1903 | +#endif |
1904 | + if (printk_ratelimit()) |
1905 | + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", |
1906 | + __func__, smp_processor_id(), irq); |
1907 | + } |
1908 | |
1909 | irq_exit(); |
1910 | |
1911 | --- a/arch/x86/kernel/microcode-xen.c |
1912 | +++ b/arch/x86/kernel/microcode-xen.c |
1913 | @@ -108,7 +108,7 @@ |
1914 | return ret; |
1915 | } |
1916 | |
1917 | -static struct file_operations microcode_fops = { |
1918 | +static const struct file_operations microcode_fops = { |
1919 | .owner = THIS_MODULE, |
1920 | .write = microcode_write, |
1921 | .open = microcode_open, |
1922 | --- a/arch/x86/kernel/mpparse_32-xen.c |
1923 | +++ b/arch/x86/kernel/mpparse_32-xen.c |
1924 | @@ -1079,7 +1079,7 @@ |
1925 | static int gsi_to_irq[MAX_GSI_NUM]; |
1926 | |
1927 | /* Don't set up the ACPI SCI because it's already set up */ |
1928 | - if (acpi_fadt.sci_int == gsi) |
1929 | + if (acpi_gbl_FADT.sci_interrupt == gsi) |
1930 | return gsi; |
1931 | |
1932 | ioapic = mp_find_ioapic(gsi); |
1933 | @@ -1136,7 +1136,7 @@ |
1934 | /* |
1935 | * Don't assign IRQ used by ACPI SCI |
1936 | */ |
1937 | - if (gsi == acpi_fadt.sci_int) |
1938 | + if (gsi == acpi_gbl_FADT.sci_interrupt) |
1939 | gsi = pci_irq++; |
1940 | gsi_to_irq[irq] = gsi; |
1941 | } else { |
1942 | --- a/arch/x86/kernel/mpparse_64-xen.c |
1943 | +++ b/arch/x86/kernel/mpparse_64-xen.c |
1944 | @@ -60,9 +60,9 @@ |
1945 | /* Processor that is doing the boot up */ |
1946 | unsigned int boot_cpu_id = -1U; |
1947 | /* Internal processor count */ |
1948 | -unsigned int num_processors __initdata = 0; |
1949 | +unsigned int num_processors __cpuinitdata = 0; |
1950 | |
1951 | -unsigned disabled_cpus __initdata; |
1952 | +unsigned disabled_cpus __cpuinitdata; |
1953 | |
1954 | /* Bitmask of physically existing CPUs */ |
1955 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; |
1956 | @@ -808,7 +808,7 @@ |
1957 | return gsi; |
1958 | |
1959 | /* Don't set up the ACPI SCI because it's already set up */ |
1960 | - if (acpi_fadt.sci_int == gsi) |
1961 | + if (acpi_gbl_FADT.sci_interrupt == gsi) |
1962 | return gsi; |
1963 | |
1964 | ioapic = mp_find_ioapic(gsi); |
1965 | --- a/arch/x86/kernel/pci-dma_32-xen.c |
1966 | +++ b/arch/x86/kernel/pci-dma_32-xen.c |
1967 | @@ -317,7 +317,7 @@ |
1968 | return DMA_MEMORY_IO; |
1969 | |
1970 | free1_out: |
1971 | - kfree(dev->dma_mem->bitmap); |
1972 | + kfree(dev->dma_mem); |
1973 | out: |
1974 | if (mem_base) |
1975 | iounmap(mem_base); |
1976 | --- a/arch/x86/kernel/pci-swiotlb_64-xen.c |
1977 | +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c |
1978 | @@ -35,7 +35,7 @@ |
1979 | #endif |
1980 | }; |
1981 | |
1982 | -void pci_swiotlb_init(void) |
1983 | +void __init pci_swiotlb_init(void) |
1984 | { |
1985 | #if 0 |
1986 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
1987 | --- a/arch/x86/kernel/pcspeaker.c |
1988 | +++ b/arch/x86/kernel/pcspeaker.c |
1989 | @@ -7,6 +7,11 @@ |
1990 | struct platform_device *pd; |
1991 | int ret; |
1992 | |
1993 | +#ifdef CONFIG_XEN |
1994 | + if (!is_initial_xendomain()) |
1995 | + return 0; |
1996 | +#endif |
1997 | + |
1998 | pd = platform_device_alloc("pcspkr", -1); |
1999 | if (!pd) |
2000 | return -ENOMEM; |
2001 | --- a/arch/x86/kernel/process_32-xen.c |
2002 | +++ b/arch/x86/kernel/process_32-xen.c |
2003 | @@ -38,6 +38,7 @@ |
2004 | #include <linux/ptrace.h> |
2005 | #include <linux/random.h> |
2006 | #include <linux/personality.h> |
2007 | +#include <linux/tick.h> |
2008 | |
2009 | #include <asm/uaccess.h> |
2010 | #include <asm/pgtable.h> |
2011 | @@ -160,6 +161,7 @@ |
2012 | |
2013 | /* endless idle loop with no priority at all */ |
2014 | while (1) { |
2015 | + tick_nohz_stop_sched_tick(); |
2016 | while (!need_resched()) { |
2017 | void (*idle)(void); |
2018 | |
2019 | @@ -175,6 +177,7 @@ |
2020 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; |
2021 | idle(); |
2022 | } |
2023 | + tick_nohz_restart_sched_tick(); |
2024 | preempt_enable_no_resched(); |
2025 | schedule(); |
2026 | preempt_disable(); |
2027 | @@ -247,8 +250,8 @@ |
2028 | regs->eax,regs->ebx,regs->ecx,regs->edx); |
2029 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", |
2030 | regs->esi, regs->edi, regs->ebp); |
2031 | - printk(" DS: %04x ES: %04x GS: %04x\n", |
2032 | - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); |
2033 | + printk(" DS: %04x ES: %04x FS: %04x\n", |
2034 | + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); |
2035 | |
2036 | cr0 = read_cr0(); |
2037 | cr2 = read_cr2(); |
2038 | @@ -279,7 +282,7 @@ |
2039 | |
2040 | regs.xds = __USER_DS; |
2041 | regs.xes = __USER_DS; |
2042 | - regs.xgs = __KERNEL_PDA; |
2043 | + regs.xfs = __KERNEL_PDA; |
2044 | regs.orig_eax = -1; |
2045 | regs.eip = (unsigned long) kernel_thread_helper; |
2046 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
2047 | @@ -356,7 +359,7 @@ |
2048 | |
2049 | p->thread.eip = (unsigned long) ret_from_fork; |
2050 | |
2051 | - savesegment(fs,p->thread.fs); |
2052 | + savesegment(gs,p->thread.gs); |
2053 | |
2054 | tsk = current; |
2055 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
2056 | @@ -434,8 +437,8 @@ |
2057 | dump->regs.eax = regs->eax; |
2058 | dump->regs.ds = regs->xds; |
2059 | dump->regs.es = regs->xes; |
2060 | - savesegment(fs,dump->regs.fs); |
2061 | - dump->regs.gs = regs->xgs; |
2062 | + dump->regs.fs = regs->xfs; |
2063 | + savesegment(gs,dump->regs.gs); |
2064 | dump->regs.orig_eax = regs->orig_eax; |
2065 | dump->regs.eip = regs->eip; |
2066 | dump->regs.cs = regs->xcs; |
2067 | @@ -616,16 +619,6 @@ |
2068 | prefetch(&next->i387.fxsave); |
2069 | |
2070 | /* |
2071 | - * Restore %fs if needed. |
2072 | - * |
2073 | - * Glibc normally makes %fs be zero. |
2074 | - */ |
2075 | - if (unlikely(next->fs)) |
2076 | - loadsegment(fs, next->fs); |
2077 | - |
2078 | - write_pda(pcurrent, next_p); |
2079 | - |
2080 | - /* |
2081 | * Now maybe handle debug registers |
2082 | */ |
2083 | if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) |
2084 | @@ -633,6 +626,15 @@ |
2085 | |
2086 | disable_tsc(prev_p, next_p); |
2087 | |
2088 | + /* |
2089 | + * Leave lazy mode, flushing any hypercalls made here. |
2090 | + * This must be done before restoring TLS segments so |
2091 | + * the GDT and LDT are properly updated, and must be |
2092 | + * done before math_state_restore, so the TS bit is up |
2093 | + * to date. |
2094 | + */ |
2095 | + arch_leave_lazy_cpu_mode(); |
2096 | + |
2097 | /* If the task has used fpu the last 5 timeslices, just do a full |
2098 | * restore of the math state immediately to avoid the trap; the |
2099 | * chances of needing FPU soon are obviously high now |
2100 | @@ -640,6 +642,14 @@ |
2101 | if (next_p->fpu_counter > 5) |
2102 | math_state_restore(); |
2103 | |
2104 | + /* |
2105 | + * Restore %gs if needed (which is common) |
2106 | + */ |
2107 | + if (prev->gs | next->gs) |
2108 | + loadsegment(gs, next->gs); |
2109 | + |
2110 | + write_pda(pcurrent, next_p); |
2111 | + |
2112 | return prev_p; |
2113 | } |
2114 | |
2115 | --- a/arch/x86/kernel/process_64-xen.c |
2116 | +++ b/arch/x86/kernel/process_64-xen.c |
2117 | @@ -338,14 +338,17 @@ |
2118 | void flush_thread(void) |
2119 | { |
2120 | struct task_struct *tsk = current; |
2121 | - struct thread_info *t = current_thread_info(); |
2122 | |
2123 | - if (t->flags & _TIF_ABI_PENDING) { |
2124 | - t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); |
2125 | - if (t->flags & _TIF_IA32) |
2126 | + if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { |
2127 | + clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); |
2128 | + if (test_tsk_thread_flag(tsk, TIF_IA32)) { |
2129 | + clear_tsk_thread_flag(tsk, TIF_IA32); |
2130 | + } else { |
2131 | + set_tsk_thread_flag(tsk, TIF_IA32); |
2132 | current_thread_info()->status |= TS_COMPAT; |
2133 | + } |
2134 | } |
2135 | - t->flags &= ~_TIF_DEBUG; |
2136 | + clear_tsk_thread_flag(tsk, TIF_DEBUG); |
2137 | |
2138 | tsk->thread.debugreg0 = 0; |
2139 | tsk->thread.debugreg1 = 0; |
2140 | --- a/arch/x86/kernel/setup_32-xen.c |
2141 | +++ b/arch/x86/kernel/setup_32-xen.c |
2142 | @@ -33,7 +33,6 @@ |
2143 | #include <linux/initrd.h> |
2144 | #include <linux/bootmem.h> |
2145 | #include <linux/seq_file.h> |
2146 | -#include <linux/platform_device.h> |
2147 | #include <linux/console.h> |
2148 | #include <linux/mca.h> |
2149 | #include <linux/root_dev.h> |
2150 | @@ -151,7 +150,7 @@ |
2151 | #define RAMDISK_PROMPT_FLAG 0x8000 |
2152 | #define RAMDISK_LOAD_FLAG 0x4000 |
2153 | |
2154 | -static char command_line[COMMAND_LINE_SIZE]; |
2155 | +static char __initdata command_line[COMMAND_LINE_SIZE]; |
2156 | |
2157 | unsigned char __initdata boot_params[PARAM_SIZE]; |
2158 | |
2159 | @@ -650,8 +649,8 @@ |
2160 | |
2161 | if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) |
2162 | i = COMMAND_LINE_SIZE; |
2163 | - memcpy(saved_command_line, xen_start_info->cmd_line, i); |
2164 | - saved_command_line[i - 1] = '\0'; |
2165 | + memcpy(boot_command_line, xen_start_info->cmd_line, i); |
2166 | + boot_command_line[i - 1] = '\0'; |
2167 | parse_early_param(); |
2168 | |
2169 | if (user_defined_memmap) { |
2170 | @@ -659,11 +658,19 @@ |
2171 | print_memory_map("user"); |
2172 | } |
2173 | |
2174 | - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); |
2175 | + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
2176 | *cmdline_p = command_line; |
2177 | |
2178 | max_low_pfn = setup_memory(); |
2179 | |
2180 | +#ifdef CONFIG_VMI |
2181 | + /* |
2182 | + * Must be after max_low_pfn is determined, and before kernel |
2183 | + * pagetables are setup. |
2184 | + */ |
2185 | + vmi_init(); |
2186 | +#endif |
2187 | + |
2188 | /* |
2189 | * NOTE: before this point _nobody_ is allowed to allocate |
2190 | * any memory using the bootmem allocator. Although the |
2191 | @@ -826,7 +833,6 @@ |
2192 | conswitchp = &dummy_con; |
2193 | #endif |
2194 | } |
2195 | - tsc_init(); |
2196 | } |
2197 | |
2198 | static int |
2199 | @@ -836,31 +842,3 @@ |
2200 | /* we're never actually going to get here... */ |
2201 | return NOTIFY_DONE; |
2202 | } |
2203 | - |
2204 | -static __init int add_pcspkr(void) |
2205 | -{ |
2206 | - struct platform_device *pd; |
2207 | - int ret; |
2208 | - |
2209 | - if (!is_initial_xendomain()) |
2210 | - return 0; |
2211 | - |
2212 | - pd = platform_device_alloc("pcspkr", -1); |
2213 | - if (!pd) |
2214 | - return -ENOMEM; |
2215 | - |
2216 | - ret = platform_device_add(pd); |
2217 | - if (ret) |
2218 | - platform_device_put(pd); |
2219 | - |
2220 | - return ret; |
2221 | -} |
2222 | -device_initcall(add_pcspkr); |
2223 | - |
2224 | -/* |
2225 | - * Local Variables: |
2226 | - * mode:c |
2227 | - * c-file-style:"k&r" |
2228 | - * c-basic-offset:8 |
2229 | - * End: |
2230 | - */ |
2231 | --- a/arch/x86/kernel/setup_64-xen.c |
2232 | +++ b/arch/x86/kernel/setup_64-xen.c |
2233 | @@ -144,7 +144,7 @@ |
2234 | |
2235 | extern int root_mountflags; |
2236 | |
2237 | -char command_line[COMMAND_LINE_SIZE]; |
2238 | +char __initdata command_line[COMMAND_LINE_SIZE]; |
2239 | |
2240 | struct resource standard_io_resources[] = { |
2241 | { .name = "dma1", .start = 0x00, .end = 0x1f, |
2242 | @@ -182,134 +182,6 @@ |
2243 | .flags = IORESOURCE_RAM, |
2244 | }; |
2245 | |
2246 | -#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM) |
2247 | - |
2248 | -static struct resource system_rom_resource = { |
2249 | - .name = "System ROM", |
2250 | - .start = 0xf0000, |
2251 | - .end = 0xfffff, |
2252 | - .flags = IORESOURCE_ROM, |
2253 | -}; |
2254 | - |
2255 | -static struct resource extension_rom_resource = { |
2256 | - .name = "Extension ROM", |
2257 | - .start = 0xe0000, |
2258 | - .end = 0xeffff, |
2259 | - .flags = IORESOURCE_ROM, |
2260 | -}; |
2261 | - |
2262 | -static struct resource adapter_rom_resources[] = { |
2263 | - { .name = "Adapter ROM", .start = 0xc8000, .end = 0, |
2264 | - .flags = IORESOURCE_ROM }, |
2265 | - { .name = "Adapter ROM", .start = 0, .end = 0, |
2266 | - .flags = IORESOURCE_ROM }, |
2267 | - { .name = "Adapter ROM", .start = 0, .end = 0, |
2268 | - .flags = IORESOURCE_ROM }, |
2269 | - { .name = "Adapter ROM", .start = 0, .end = 0, |
2270 | - .flags = IORESOURCE_ROM }, |
2271 | - { .name = "Adapter ROM", .start = 0, .end = 0, |
2272 | - .flags = IORESOURCE_ROM }, |
2273 | - { .name = "Adapter ROM", .start = 0, .end = 0, |
2274 | - .flags = IORESOURCE_ROM } |
2275 | -}; |
2276 | - |
2277 | -static struct resource video_rom_resource = { |
2278 | - .name = "Video ROM", |
2279 | - .start = 0xc0000, |
2280 | - .end = 0xc7fff, |
2281 | - .flags = IORESOURCE_ROM, |
2282 | -}; |
2283 | - |
2284 | -static struct resource video_ram_resource = { |
2285 | - .name = "Video RAM area", |
2286 | - .start = 0xa0000, |
2287 | - .end = 0xbffff, |
2288 | - .flags = IORESOURCE_RAM, |
2289 | -}; |
2290 | - |
2291 | -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) |
2292 | - |
2293 | -static int __init romchecksum(unsigned char *rom, unsigned long length) |
2294 | -{ |
2295 | - unsigned char *p, sum = 0; |
2296 | - |
2297 | - for (p = rom; p < rom + length; p++) |
2298 | - sum += *p; |
2299 | - return sum == 0; |
2300 | -} |
2301 | - |
2302 | -static void __init probe_roms(void) |
2303 | -{ |
2304 | - unsigned long start, length, upper; |
2305 | - unsigned char *rom; |
2306 | - int i; |
2307 | - |
2308 | -#ifdef CONFIG_XEN |
2309 | - /* Nothing to do if not running in dom0. */ |
2310 | - if (!is_initial_xendomain()) |
2311 | - return; |
2312 | -#endif |
2313 | - |
2314 | - /* video rom */ |
2315 | - upper = adapter_rom_resources[0].start; |
2316 | - for (start = video_rom_resource.start; start < upper; start += 2048) { |
2317 | - rom = isa_bus_to_virt(start); |
2318 | - if (!romsignature(rom)) |
2319 | - continue; |
2320 | - |
2321 | - video_rom_resource.start = start; |
2322 | - |
2323 | - /* 0 < length <= 0x7f * 512, historically */ |
2324 | - length = rom[2] * 512; |
2325 | - |
2326 | - /* if checksum okay, trust length byte */ |
2327 | - if (length && romchecksum(rom, length)) |
2328 | - video_rom_resource.end = start + length - 1; |
2329 | - |
2330 | - request_resource(&iomem_resource, &video_rom_resource); |
2331 | - break; |
2332 | - } |
2333 | - |
2334 | - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; |
2335 | - if (start < upper) |
2336 | - start = upper; |
2337 | - |
2338 | - /* system rom */ |
2339 | - request_resource(&iomem_resource, &system_rom_resource); |
2340 | - upper = system_rom_resource.start; |
2341 | - |
2342 | - /* check for extension rom (ignore length byte!) */ |
2343 | - rom = isa_bus_to_virt(extension_rom_resource.start); |
2344 | - if (romsignature(rom)) { |
2345 | - length = extension_rom_resource.end - extension_rom_resource.start + 1; |
2346 | - if (romchecksum(rom, length)) { |
2347 | - request_resource(&iomem_resource, &extension_rom_resource); |
2348 | - upper = extension_rom_resource.start; |
2349 | - } |
2350 | - } |
2351 | - |
2352 | - /* check for adapter roms on 2k boundaries */ |
2353 | - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; |
2354 | - start += 2048) { |
2355 | - rom = isa_bus_to_virt(start); |
2356 | - if (!romsignature(rom)) |
2357 | - continue; |
2358 | - |
2359 | - /* 0 < length <= 0x7f * 512, historically */ |
2360 | - length = rom[2] * 512; |
2361 | - |
2362 | - /* but accept any length that fits if checksum okay */ |
2363 | - if (!length || start + length > upper || !romchecksum(rom, length)) |
2364 | - continue; |
2365 | - |
2366 | - adapter_rom_resources[i].start = start; |
2367 | - adapter_rom_resources[i].end = start + length - 1; |
2368 | - request_resource(&iomem_resource, &adapter_rom_resources[i]); |
2369 | - |
2370 | - start = adapter_rom_resources[i++].end & ~2047UL; |
2371 | - } |
2372 | -} |
2373 | - |
2374 | #ifdef CONFIG_PROC_VMCORE |
2375 | /* elfcorehdr= specifies the location of elf core header |
2376 | * stored by the crashed kernel. This option will be passed |
2377 | @@ -406,7 +278,7 @@ |
2378 | #ifdef CONFIG_XEN |
2379 | extern struct e820map machine_e820; |
2380 | |
2381 | - printk(KERN_INFO "Command line: %s\n", saved_command_line); |
2382 | + printk(KERN_INFO "Command line: %s\n", boot_command_line); |
2383 | |
2384 | /* Register a call for panic conditions. */ |
2385 | atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); |
2386 | @@ -433,7 +305,7 @@ |
2387 | |
2388 | ARCH_SETUP |
2389 | #else |
2390 | - printk(KERN_INFO "Command line: %s\n", saved_command_line); |
2391 | + printk(KERN_INFO "Command line: %s\n", boot_command_line); |
2392 | |
2393 | ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); |
2394 | screen_info = SCREEN_INFO; |
2395 | @@ -464,7 +336,7 @@ |
2396 | |
2397 | early_identify_cpu(&boot_cpu_data); |
2398 | |
2399 | - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); |
2400 | + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
2401 | *cmdline_p = command_line; |
2402 | |
2403 | parse_early_param(); |
2404 | @@ -534,6 +406,11 @@ |
2405 | /* reserve ebda region */ |
2406 | if (ebda_addr) |
2407 | reserve_bootmem_generic(ebda_addr, ebda_size); |
2408 | +#ifdef CONFIG_NUMA |
2409 | + /* reserve nodemap region */ |
2410 | + if (nodemap_addr) |
2411 | + reserve_bootmem_generic(nodemap_addr, nodemap_size); |
2412 | +#endif |
2413 | |
2414 | #ifdef CONFIG_SMP |
2415 | /* |
2416 | @@ -734,10 +611,8 @@ |
2417 | #endif |
2418 | |
2419 | /* |
2420 | - * Request address space for all standard RAM and ROM resources |
2421 | - * and also for regions reported as reserved by the e820. |
2422 | + * We trust e820 completely. No explicit ROM probing in memory. |
2423 | */ |
2424 | - probe_roms(); |
2425 | #ifdef CONFIG_XEN |
2426 | if (is_initial_xendomain()) { |
2427 | struct xen_memory_map memmap; |
2428 | @@ -756,8 +631,6 @@ |
2429 | e820_mark_nosave_regions(); |
2430 | #endif |
2431 | |
2432 | - request_resource(&iomem_resource, &video_ram_resource); |
2433 | - |
2434 | { |
2435 | unsigned i; |
2436 | /* request I/O space for devices used on all i[345]86 PCs */ |
2437 | @@ -1334,7 +1207,8 @@ |
2438 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2439 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, |
2440 | NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, |
2441 | - NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow", |
2442 | + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", |
2443 | + "3dnowext", "3dnow", |
2444 | |
2445 | /* Transmeta-defined */ |
2446 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, |
2447 | @@ -1352,7 +1226,7 @@ |
2448 | /* Intel-defined (#2) */ |
2449 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", |
2450 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, |
2451 | - NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, |
2452 | + NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", |
2453 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2454 | |
2455 | /* VIA/Cyrix/Centaur-defined */ |
2456 | @@ -1362,8 +1236,10 @@ |
2457 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2458 | |
2459 | /* AMD-defined (#2) */ |
2460 | - "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL, |
2461 | - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2462 | + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", |
2463 | + "altmovcr8", "abm", "sse4a", |
2464 | + "misalignsse", "3dnowprefetch", |
2465 | + "osvw", "ibs", NULL, NULL, NULL, NULL, |
2466 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2467 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2468 | }; |
2469 | @@ -1374,6 +1250,9 @@ |
2470 | "ttp", /* thermal trip */ |
2471 | "tm", |
2472 | "stc", |
2473 | + "100mhzsteps", |
2474 | + "hwpstate", |
2475 | + NULL, /* tsc invariant mapped to constant_tsc */ |
2476 | NULL, |
2477 | /* nothing */ /* constant_tsc - moved to flags */ |
2478 | }; |
2479 | @@ -1490,26 +1369,3 @@ |
2480 | .stop = c_stop, |
2481 | .show = show_cpuinfo, |
2482 | }; |
2483 | - |
2484 | -#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) |
2485 | -#include <linux/platform_device.h> |
2486 | -static __init int add_pcspkr(void) |
2487 | -{ |
2488 | - struct platform_device *pd; |
2489 | - int ret; |
2490 | - |
2491 | - if (!is_initial_xendomain()) |
2492 | - return 0; |
2493 | - |
2494 | - pd = platform_device_alloc("pcspkr", -1); |
2495 | - if (!pd) |
2496 | - return -ENOMEM; |
2497 | - |
2498 | - ret = platform_device_add(pd); |
2499 | - if (ret) |
2500 | - platform_device_put(pd); |
2501 | - |
2502 | - return ret; |
2503 | -} |
2504 | -device_initcall(add_pcspkr); |
2505 | -#endif |
2506 | --- a/arch/x86/kernel/smp_32-xen.c |
2507 | +++ b/arch/x86/kernel/smp_32-xen.c |
2508 | @@ -335,8 +335,7 @@ |
2509 | /* |
2510 | * i'm not happy about this global shared spinlock in the |
2511 | * MM hot path, but we'll see how contended it is. |
2512 | - * Temporarily this turns IRQs off, so that lockups are |
2513 | - * detected by the NMI watchdog. |
2514 | + * AK: x86-64 has a faster method that could be ported. |
2515 | */ |
2516 | spin_lock(&tlbstate_lock); |
2517 | |
2518 | @@ -361,7 +360,7 @@ |
2519 | |
2520 | while (!cpus_empty(flush_cpumask)) |
2521 | /* nothing. lockup detection does not belong here */ |
2522 | - mb(); |
2523 | + cpu_relax(); |
2524 | |
2525 | flush_mm = NULL; |
2526 | flush_va = 0; |
2527 | --- a/arch/x86/kernel/time_32-xen.c |
2528 | +++ b/arch/x86/kernel/time_32-xen.c |
2529 | @@ -51,6 +51,7 @@ |
2530 | #include <linux/kernel_stat.h> |
2531 | #include <linux/posix-timers.h> |
2532 | #include <linux/cpufreq.h> |
2533 | +#include <linux/clocksource.h> |
2534 | |
2535 | #include <asm/io.h> |
2536 | #include <asm/smp.h> |
2537 | @@ -75,25 +76,17 @@ |
2538 | #include <xen/evtchn.h> |
2539 | #include <xen/interface/vcpu.h> |
2540 | |
2541 | -#if defined (__i386__) |
2542 | -#include <asm/i8259.h> |
2543 | +#ifdef CONFIG_X86_32 |
2544 | #include <asm/i8253.h> |
2545 | DEFINE_SPINLOCK(i8253_lock); |
2546 | EXPORT_SYMBOL(i8253_lock); |
2547 | -#endif |
2548 | - |
2549 | -#define XEN_SHIFT 22 |
2550 | - |
2551 | int pit_latch_buggy; /* extern */ |
2552 | - |
2553 | -#if defined(__x86_64__) |
2554 | -unsigned long vxtime_hz = PIT_TICK_RATE; |
2555 | -struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ |
2556 | +#else |
2557 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; |
2558 | -struct timespec __xtime __section_xtime; |
2559 | -struct timezone __sys_tz __section_sys_tz; |
2560 | #endif |
2561 | |
2562 | +#define XEN_SHIFT 22 |
2563 | + |
2564 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ |
2565 | EXPORT_SYMBOL(cpu_khz); |
2566 | |
2567 | @@ -113,9 +106,6 @@ |
2568 | static struct timespec shadow_tv; |
2569 | static u32 shadow_tv_version; |
2570 | |
2571 | -static struct timeval monotonic_tv; |
2572 | -static spinlock_t monotonic_lock = SPIN_LOCK_UNLOCKED; |
2573 | - |
2574 | /* Keep track of last time we did processing/updating of jiffies and xtime. */ |
2575 | static u64 processed_system_time; /* System time (ns) at last processing. */ |
2576 | static DEFINE_PER_CPU(u64, processed_system_time); |
2577 | @@ -228,7 +218,7 @@ |
2578 | } |
2579 | #endif |
2580 | |
2581 | -void init_cpu_khz(void) |
2582 | +static void init_cpu_khz(void) |
2583 | { |
2584 | u64 __cpu_khz = 1000000ULL << 32; |
2585 | struct vcpu_time_info *info = &vcpu_info(0)->time; |
2586 | @@ -247,16 +237,6 @@ |
2587 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); |
2588 | } |
2589 | |
2590 | -#ifdef CONFIG_X86_64 |
2591 | -static unsigned long get_usec_offset(struct shadow_time_info *shadow) |
2592 | -{ |
2593 | - u64 now, delta; |
2594 | - rdtscll(now); |
2595 | - delta = now - shadow->tsc_timestamp; |
2596 | - return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift); |
2597 | -} |
2598 | -#endif |
2599 | - |
2600 | static void __update_wallclock(time_t sec, long nsec) |
2601 | { |
2602 | long wtm_nsec, xtime_nsec; |
2603 | @@ -364,138 +344,6 @@ |
2604 | } |
2605 | EXPORT_SYMBOL(rtc_cmos_write); |
2606 | |
2607 | -#ifdef CONFIG_X86_64 |
2608 | - |
2609 | -/* |
2610 | - * This version of gettimeofday has microsecond resolution |
2611 | - * and better than microsecond precision on fast x86 machines with TSC. |
2612 | - */ |
2613 | -void do_gettimeofday(struct timeval *tv) |
2614 | -{ |
2615 | - unsigned long seq; |
2616 | - unsigned long usec, sec; |
2617 | - unsigned long flags; |
2618 | - s64 nsec; |
2619 | - unsigned int cpu; |
2620 | - struct shadow_time_info *shadow; |
2621 | - u32 local_time_version; |
2622 | - |
2623 | - cpu = get_cpu(); |
2624 | - shadow = &per_cpu(shadow_time, cpu); |
2625 | - |
2626 | - do { |
2627 | - local_time_version = shadow->version; |
2628 | - seq = read_seqbegin(&xtime_lock); |
2629 | - |
2630 | - usec = get_usec_offset(shadow); |
2631 | - |
2632 | - sec = xtime.tv_sec; |
2633 | - usec += (xtime.tv_nsec / NSEC_PER_USEC); |
2634 | - |
2635 | - nsec = shadow->system_timestamp - processed_system_time; |
2636 | - __normalize_time(&sec, &nsec); |
2637 | - usec += (long)nsec / NSEC_PER_USEC; |
2638 | - |
2639 | - if (unlikely(!time_values_up_to_date(cpu))) { |
2640 | - /* |
2641 | - * We may have blocked for a long time, |
2642 | - * rendering our calculations invalid |
2643 | - * (e.g. the time delta may have |
2644 | - * overflowed). Detect that and recalculate |
2645 | - * with fresh values. |
2646 | - */ |
2647 | - get_time_values_from_xen(cpu); |
2648 | - continue; |
2649 | - } |
2650 | - } while (read_seqretry(&xtime_lock, seq) || |
2651 | - (local_time_version != shadow->version)); |
2652 | - |
2653 | - put_cpu(); |
2654 | - |
2655 | - while (usec >= USEC_PER_SEC) { |
2656 | - usec -= USEC_PER_SEC; |
2657 | - sec++; |
2658 | - } |
2659 | - |
2660 | - spin_lock_irqsave(&monotonic_lock, flags); |
2661 | - if ((sec > monotonic_tv.tv_sec) || |
2662 | - ((sec == monotonic_tv.tv_sec) && (usec > monotonic_tv.tv_usec))) |
2663 | - { |
2664 | - monotonic_tv.tv_sec = sec; |
2665 | - monotonic_tv.tv_usec = usec; |
2666 | - } else { |
2667 | - sec = monotonic_tv.tv_sec; |
2668 | - usec = monotonic_tv.tv_usec; |
2669 | - } |
2670 | - spin_unlock_irqrestore(&monotonic_lock, flags); |
2671 | - |
2672 | - tv->tv_sec = sec; |
2673 | - tv->tv_usec = usec; |
2674 | -} |
2675 | - |
2676 | -EXPORT_SYMBOL(do_gettimeofday); |
2677 | - |
2678 | -int do_settimeofday(struct timespec *tv) |
2679 | -{ |
2680 | - time_t sec; |
2681 | - s64 nsec; |
2682 | - unsigned int cpu; |
2683 | - struct shadow_time_info *shadow; |
2684 | - struct xen_platform_op op; |
2685 | - |
2686 | - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) |
2687 | - return -EINVAL; |
2688 | - |
2689 | - cpu = get_cpu(); |
2690 | - shadow = &per_cpu(shadow_time, cpu); |
2691 | - |
2692 | - write_seqlock_irq(&xtime_lock); |
2693 | - |
2694 | - /* |
2695 | - * Ensure we don't get blocked for a long time so that our time delta |
2696 | - * overflows. If that were to happen then our shadow time values would |
2697 | - * be stale, so we can retry with fresh ones. |
2698 | - */ |
2699 | - for (;;) { |
2700 | - nsec = tv->tv_nsec - get_nsec_offset(shadow); |
2701 | - if (time_values_up_to_date(cpu)) |
2702 | - break; |
2703 | - get_time_values_from_xen(cpu); |
2704 | - } |
2705 | - sec = tv->tv_sec; |
2706 | - __normalize_time(&sec, &nsec); |
2707 | - |
2708 | - if (is_initial_xendomain() && !independent_wallclock) { |
2709 | - op.cmd = XENPF_settime; |
2710 | - op.u.settime.secs = sec; |
2711 | - op.u.settime.nsecs = nsec; |
2712 | - op.u.settime.system_time = shadow->system_timestamp; |
2713 | - WARN_ON(HYPERVISOR_platform_op(&op)); |
2714 | - update_wallclock(); |
2715 | - } else if (independent_wallclock) { |
2716 | - nsec -= shadow->system_timestamp; |
2717 | - __normalize_time(&sec, &nsec); |
2718 | - __update_wallclock(sec, nsec); |
2719 | - } |
2720 | - |
2721 | - /* Reset monotonic gettimeofday() timeval. */ |
2722 | - spin_lock(&monotonic_lock); |
2723 | - monotonic_tv.tv_sec = 0; |
2724 | - monotonic_tv.tv_usec = 0; |
2725 | - spin_unlock(&monotonic_lock); |
2726 | - |
2727 | - write_sequnlock_irq(&xtime_lock); |
2728 | - |
2729 | - put_cpu(); |
2730 | - |
2731 | - clock_was_set(); |
2732 | - return 0; |
2733 | -} |
2734 | - |
2735 | -EXPORT_SYMBOL(do_settimeofday); |
2736 | - |
2737 | -#endif |
2738 | - |
2739 | static void sync_xen_wallclock(unsigned long dummy); |
2740 | static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0); |
2741 | static void sync_xen_wallclock(unsigned long dummy) |
2742 | @@ -544,15 +392,7 @@ |
2743 | return retval; |
2744 | } |
2745 | |
2746 | -#ifdef CONFIG_X86_64 |
2747 | -/* monotonic_clock(): returns # of nanoseconds passed since time_init() |
2748 | - * Note: This function is required to return accurate |
2749 | - * time even in the absence of multiple timer ticks. |
2750 | - */ |
2751 | -unsigned long long monotonic_clock(void) |
2752 | -#else |
2753 | unsigned long long sched_clock(void) |
2754 | -#endif |
2755 | { |
2756 | unsigned int cpu = get_cpu(); |
2757 | struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); |
2758 | @@ -572,21 +412,18 @@ |
2759 | |
2760 | return time; |
2761 | } |
2762 | -#ifdef CONFIG_X86_64 |
2763 | -EXPORT_SYMBOL(monotonic_clock); |
2764 | - |
2765 | -unsigned long long sched_clock(void) |
2766 | -{ |
2767 | - return monotonic_clock(); |
2768 | -} |
2769 | -#endif |
2770 | |
2771 | unsigned long profile_pc(struct pt_regs *regs) |
2772 | { |
2773 | unsigned long pc = instruction_pointer(regs); |
2774 | |
2775 | #if defined(CONFIG_SMP) || defined(__x86_64__) |
2776 | - if (!user_mode_vm(regs) && in_lock_functions(pc)) { |
2777 | +# ifdef __i386__ |
2778 | + if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) |
2779 | +# else |
2780 | + if (!user_mode(regs) |
2781 | +# endif |
2782 | + && in_lock_functions(pc)) { |
2783 | # ifdef CONFIG_FRAME_POINTER |
2784 | # ifdef __i386__ |
2785 | return ((unsigned long *)regs->ebp)[1]; |
2786 | @@ -595,14 +432,11 @@ |
2787 | # endif |
2788 | # else |
2789 | # ifdef __i386__ |
2790 | - unsigned long *sp; |
2791 | - if ((regs->xcs & 2) == 0) |
2792 | - sp = (unsigned long *)®s->esp; |
2793 | - else |
2794 | - sp = (unsigned long *)regs->esp; |
2795 | + unsigned long *sp = (unsigned long *)®s->esp; |
2796 | # else |
2797 | unsigned long *sp = (unsigned long *)regs->rsp; |
2798 | # endif |
2799 | + |
2800 | /* Return address is either directly at stack pointer |
2801 | or above a saved eflags. Eflags has bits 22-31 zero, |
2802 | kernel addresses don't. */ |
2803 | @@ -755,19 +589,6 @@ |
2804 | return IRQ_HANDLED; |
2805 | } |
2806 | |
2807 | -#ifndef CONFIG_X86_64 |
2808 | - |
2809 | -void tsc_init(void) |
2810 | -{ |
2811 | - init_cpu_khz(); |
2812 | - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", |
2813 | - cpu_khz / 1000, cpu_khz % 1000); |
2814 | - |
2815 | - use_tsc_delay(); |
2816 | -} |
2817 | - |
2818 | -#include <linux/clocksource.h> |
2819 | - |
2820 | void mark_tsc_unstable(void) |
2821 | { |
2822 | #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */ |
2823 | @@ -821,21 +642,9 @@ |
2824 | .mask = CLOCKSOURCE_MASK(64), |
2825 | .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */ |
2826 | .shift = XEN_SHIFT, |
2827 | - .is_continuous = 1, |
2828 | + .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
2829 | }; |
2830 | |
2831 | -static int __init init_xen_clocksource(void) |
2832 | -{ |
2833 | - clocksource_xen.mult = clocksource_khz2mult(cpu_khz, |
2834 | - clocksource_xen.shift); |
2835 | - |
2836 | - return clocksource_register(&clocksource_xen); |
2837 | -} |
2838 | - |
2839 | -module_init(init_xen_clocksource); |
2840 | - |
2841 | -#endif |
2842 | - |
2843 | static void init_missing_ticks_accounting(unsigned int cpu) |
2844 | { |
2845 | struct vcpu_register_runstate_memory_area area; |
2846 | @@ -856,7 +665,7 @@ |
2847 | } |
2848 | |
2849 | /* not static: needed by APM */ |
2850 | -unsigned long get_cmos_time(void) |
2851 | +unsigned long read_persistent_clock(void) |
2852 | { |
2853 | unsigned long retval; |
2854 | unsigned long flags; |
2855 | @@ -869,11 +678,11 @@ |
2856 | |
2857 | return retval; |
2858 | } |
2859 | -EXPORT_SYMBOL(get_cmos_time); |
2860 | |
2861 | static void sync_cmos_clock(unsigned long dummy); |
2862 | |
2863 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); |
2864 | +int no_sync_cmos_clock; |
2865 | |
2866 | static void sync_cmos_clock(unsigned long dummy) |
2867 | { |
2868 | @@ -917,7 +726,8 @@ |
2869 | |
2870 | void notify_arch_cmos_timer(void) |
2871 | { |
2872 | - mod_timer(&sync_cmos_timer, jiffies + 1); |
2873 | + if (!no_sync_cmos_clock) |
2874 | + mod_timer(&sync_cmos_timer, jiffies + 1); |
2875 | mod_timer(&sync_xen_wallclock_timer, jiffies + 1); |
2876 | } |
2877 | |
2878 | @@ -950,29 +760,11 @@ |
2879 | |
2880 | device_initcall(time_init_device); |
2881 | |
2882 | -#ifdef CONFIG_HPET_TIMER |
2883 | extern void (*late_time_init)(void); |
2884 | -/* Duplicate of time_init() below, with hpet_enable part added */ |
2885 | -static void __init hpet_time_init(void) |
2886 | -{ |
2887 | - struct timespec ts; |
2888 | - ts.tv_sec = get_cmos_time(); |
2889 | - ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); |
2890 | - |
2891 | - do_settimeofday(&ts); |
2892 | - |
2893 | - if ((hpet_enable() >= 0) && hpet_use_timer) { |
2894 | - printk("Using HPET for base-timer\n"); |
2895 | - } |
2896 | - |
2897 | - do_time_init(); |
2898 | -} |
2899 | -#endif |
2900 | |
2901 | /* Dynamically-mapped IRQ. */ |
2902 | DEFINE_PER_CPU(int, timer_irq); |
2903 | |
2904 | -extern void (*late_time_init)(void); |
2905 | static void setup_cpu0_timer_irq(void) |
2906 | { |
2907 | per_cpu(timer_irq, 0) = |
2908 | @@ -992,16 +784,9 @@ |
2909 | |
2910 | void __init time_init(void) |
2911 | { |
2912 | -#ifdef CONFIG_HPET_TIMER |
2913 | - if (is_hpet_capable()) { |
2914 | - /* |
2915 | - * HPET initialization needs to do memory-mapped io. So, let |
2916 | - * us do a late initialization after mem_init(). |
2917 | - */ |
2918 | - late_time_init = hpet_time_init; |
2919 | - return; |
2920 | - } |
2921 | -#endif |
2922 | + init_cpu_khz(); |
2923 | + printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", |
2924 | + cpu_khz / 1000, cpu_khz % 1000); |
2925 | |
2926 | switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0, |
2927 | &xen_set_periodic_tick)) { |
2928 | @@ -1020,18 +805,12 @@ |
2929 | per_cpu(processed_system_time, 0) = processed_system_time; |
2930 | init_missing_ticks_accounting(0); |
2931 | |
2932 | - update_wallclock(); |
2933 | + clocksource_register(&clocksource_xen); |
2934 | |
2935 | -#ifdef CONFIG_X86_64 |
2936 | - init_cpu_khz(); |
2937 | - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", |
2938 | - cpu_khz / 1000, cpu_khz % 1000); |
2939 | + update_wallclock(); |
2940 | |
2941 | - vxtime.mode = VXTIME_TSC; |
2942 | - vxtime.quot = (1000000L << 32) / vxtime_hz; |
2943 | - vxtime.tsc_quot = (1000L << 32) / cpu_khz; |
2944 | - sync_core(); |
2945 | - rdtscll(vxtime.last_tsc); |
2946 | +#ifndef CONFIG_X86_64 |
2947 | + use_tsc_delay(); |
2948 | #endif |
2949 | |
2950 | /* Cannot request_irq() until kmem is initialised. */ |
2951 | @@ -1277,7 +1056,7 @@ |
2952 | }; |
2953 | static int __init xen_sysctl_init(void) |
2954 | { |
2955 | - (void)register_sysctl_table(xen_table, 0); |
2956 | + (void)register_sysctl_table(xen_table); |
2957 | return 0; |
2958 | } |
2959 | __initcall(xen_sysctl_init); |
2960 | --- a/arch/x86/kernel/traps_32-xen.c |
2961 | +++ b/arch/x86/kernel/traps_32-xen.c |
2962 | @@ -100,6 +100,7 @@ |
2963 | asmlinkage void machine_check(void); |
2964 | |
2965 | int kstack_depth_to_print = 24; |
2966 | +static unsigned int code_bytes = 64; |
2967 | ATOMIC_NOTIFIER_HEAD(i386die_chain); |
2968 | |
2969 | int register_die_notifier(struct notifier_block *nb) |
2970 | @@ -297,10 +298,11 @@ |
2971 | int i; |
2972 | int in_kernel = 1; |
2973 | unsigned long esp; |
2974 | - unsigned short ss; |
2975 | + unsigned short ss, gs; |
2976 | |
2977 | esp = (unsigned long) (®s->esp); |
2978 | savesegment(ss, ss); |
2979 | + savesegment(gs, gs); |
2980 | if (user_mode_vm(regs)) { |
2981 | in_kernel = 0; |
2982 | esp = regs->esp; |
2983 | @@ -319,8 +321,8 @@ |
2984 | regs->eax, regs->ebx, regs->ecx, regs->edx); |
2985 | printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", |
2986 | regs->esi, regs->edi, regs->ebp, esp); |
2987 | - printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", |
2988 | - regs->xds & 0xffff, regs->xes & 0xffff, ss); |
2989 | + printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", |
2990 | + regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); |
2991 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", |
2992 | TASK_COMM_LEN, current->comm, current->pid, |
2993 | current_thread_info(), current, current->thread_info); |
2994 | @@ -330,7 +332,8 @@ |
2995 | */ |
2996 | if (in_kernel) { |
2997 | u8 *eip; |
2998 | - int code_bytes = 64; |
2999 | + unsigned int code_prologue = code_bytes * 43 / 64; |
3000 | + unsigned int code_len = code_bytes; |
3001 | unsigned char c; |
3002 | |
3003 | printk("\n" KERN_EMERG "Stack: "); |
3004 | @@ -338,14 +341,14 @@ |
3005 | |
3006 | printk(KERN_EMERG "Code: "); |
3007 | |
3008 | - eip = (u8 *)regs->eip - 43; |
3009 | + eip = (u8 *)regs->eip - code_prologue; |
3010 | if (eip < (u8 *)PAGE_OFFSET || |
3011 | probe_kernel_address(eip, c)) { |
3012 | /* try starting at EIP */ |
3013 | eip = (u8 *)regs->eip; |
3014 | - code_bytes = 32; |
3015 | + code_len = code_len - code_prologue + 1; |
3016 | } |
3017 | - for (i = 0; i < code_bytes; i++, eip++) { |
3018 | + for (i = 0; i < code_len; i++, eip++) { |
3019 | if (eip < (u8 *)PAGE_OFFSET || |
3020 | probe_kernel_address(eip, c)) { |
3021 | printk(" Bad EIP value."); |
3022 | @@ -1134,3 +1137,13 @@ |
3023 | return 1; |
3024 | } |
3025 | __setup("kstack=", kstack_setup); |
3026 | + |
3027 | +static int __init code_bytes_setup(char *s) |
3028 | +{ |
3029 | + code_bytes = simple_strtoul(s, NULL, 0); |
3030 | + if (code_bytes > 8192) |
3031 | + code_bytes = 8192; |
3032 | + |
3033 | + return 1; |
3034 | +} |
3035 | +__setup("code_bytes=", code_bytes_setup); |
3036 | --- a/arch/x86/kernel/vsyscall_64-xen.c |
3037 | +++ b/arch/x86/kernel/vsyscall_64-xen.c |
3038 | @@ -26,6 +26,7 @@ |
3039 | #include <linux/seqlock.h> |
3040 | #include <linux/jiffies.h> |
3041 | #include <linux/sysctl.h> |
3042 | +#include <linux/clocksource.h> |
3043 | #include <linux/getcpu.h> |
3044 | #include <linux/cpu.h> |
3045 | #include <linux/smp.h> |
3046 | @@ -34,6 +35,7 @@ |
3047 | #include <asm/vsyscall.h> |
3048 | #include <asm/pgtable.h> |
3049 | #include <asm/page.h> |
3050 | +#include <asm/unistd.h> |
3051 | #include <asm/fixmap.h> |
3052 | #include <asm/errno.h> |
3053 | #include <asm/io.h> |
3054 | @@ -44,56 +46,41 @@ |
3055 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
3056 | #define __syscall_clobber "r11","rcx","memory" |
3057 | |
3058 | -int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
3059 | -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
3060 | +struct vsyscall_gtod_data_t { |
3061 | + seqlock_t lock; |
3062 | + int sysctl_enabled; |
3063 | + struct timeval wall_time_tv; |
3064 | + struct timezone sys_tz; |
3065 | + cycle_t offset_base; |
3066 | + struct clocksource clock; |
3067 | +}; |
3068 | int __vgetcpu_mode __section_vgetcpu_mode; |
3069 | |
3070 | -#include <asm/unistd.h> |
3071 | - |
3072 | -static __always_inline void timeval_normalize(struct timeval * tv) |
3073 | +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = |
3074 | { |
3075 | - time_t __sec; |
3076 | - |
3077 | - __sec = tv->tv_usec / 1000000; |
3078 | - if (__sec) { |
3079 | - tv->tv_usec %= 1000000; |
3080 | - tv->tv_sec += __sec; |
3081 | - } |
3082 | -} |
3083 | + .lock = SEQLOCK_UNLOCKED, |
3084 | + .sysctl_enabled = 1, |
3085 | +}; |
3086 | |
3087 | -static __always_inline void do_vgettimeofday(struct timeval * tv) |
3088 | +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) |
3089 | { |
3090 | - long sequence, t; |
3091 | - unsigned long sec, usec; |
3092 | + unsigned long flags; |
3093 | |
3094 | - do { |
3095 | - sequence = read_seqbegin(&__xtime_lock); |
3096 | - |
3097 | - sec = __xtime.tv_sec; |
3098 | - usec = __xtime.tv_nsec / 1000; |
3099 | - |
3100 | - if (__vxtime.mode != VXTIME_HPET) { |
3101 | - t = get_cycles_sync(); |
3102 | - if (t < __vxtime.last_tsc) |
3103 | - t = __vxtime.last_tsc; |
3104 | - usec += ((t - __vxtime.last_tsc) * |
3105 | - __vxtime.tsc_quot) >> 32; |
3106 | - /* See comment in x86_64 do_gettimeofday. */ |
3107 | - } else { |
3108 | - usec += ((readl((void __iomem *) |
3109 | - fix_to_virt(VSYSCALL_HPET) + 0xf0) - |
3110 | - __vxtime.last) * __vxtime.quot) >> 32; |
3111 | - } |
3112 | - } while (read_seqretry(&__xtime_lock, sequence)); |
3113 | - |
3114 | - tv->tv_sec = sec + usec / 1000000; |
3115 | - tv->tv_usec = usec % 1000000; |
3116 | + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); |
3117 | + /* copy vsyscall data */ |
3118 | + vsyscall_gtod_data.clock = *clock; |
3119 | + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; |
3120 | + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; |
3121 | + vsyscall_gtod_data.sys_tz = sys_tz; |
3122 | + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
3123 | } |
3124 | |
3125 | -/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ |
3126 | +/* RED-PEN may want to readd seq locking, but then the variable should be |
3127 | + * write-once. |
3128 | + */ |
3129 | static __always_inline void do_get_tz(struct timezone * tz) |
3130 | { |
3131 | - *tz = __sys_tz; |
3132 | + *tz = __vsyscall_gtod_data.sys_tz; |
3133 | } |
3134 | |
3135 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) |
3136 | @@ -101,7 +88,8 @@ |
3137 | int ret; |
3138 | asm volatile("vsysc2: syscall" |
3139 | : "=a" (ret) |
3140 | - : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber ); |
3141 | + : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) |
3142 | + : __syscall_clobber ); |
3143 | return ret; |
3144 | } |
3145 | |
3146 | @@ -114,10 +102,44 @@ |
3147 | return secs; |
3148 | } |
3149 | |
3150 | +static __always_inline void do_vgettimeofday(struct timeval * tv) |
3151 | +{ |
3152 | + cycle_t now, base, mask, cycle_delta; |
3153 | + unsigned long seq, mult, shift, nsec_delta; |
3154 | + cycle_t (*vread)(void); |
3155 | + do { |
3156 | + seq = read_seqbegin(&__vsyscall_gtod_data.lock); |
3157 | + |
3158 | + vread = __vsyscall_gtod_data.clock.vread; |
3159 | + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { |
3160 | + gettimeofday(tv,NULL); |
3161 | + return; |
3162 | + } |
3163 | + now = vread(); |
3164 | + base = __vsyscall_gtod_data.clock.cycle_last; |
3165 | + mask = __vsyscall_gtod_data.clock.mask; |
3166 | + mult = __vsyscall_gtod_data.clock.mult; |
3167 | + shift = __vsyscall_gtod_data.clock.shift; |
3168 | + |
3169 | + *tv = __vsyscall_gtod_data.wall_time_tv; |
3170 | + |
3171 | + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); |
3172 | + |
3173 | + /* calculate interval: */ |
3174 | + cycle_delta = (now - base) & mask; |
3175 | + /* convert to nsecs: */ |
3176 | + nsec_delta = (cycle_delta * mult) >> shift; |
3177 | + |
3178 | + /* convert to usecs and add to timespec: */ |
3179 | + tv->tv_usec += nsec_delta / NSEC_PER_USEC; |
3180 | + while (tv->tv_usec > USEC_PER_SEC) { |
3181 | + tv->tv_sec += 1; |
3182 | + tv->tv_usec -= USEC_PER_SEC; |
3183 | + } |
3184 | +} |
3185 | + |
3186 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) |
3187 | { |
3188 | - if (!__sysctl_vsyscall) |
3189 | - return gettimeofday(tv,tz); |
3190 | if (tv) |
3191 | do_vgettimeofday(tv); |
3192 | if (tz) |
3193 | @@ -129,11 +151,11 @@ |
3194 | * unlikely */ |
3195 | time_t __vsyscall(1) vtime(time_t *t) |
3196 | { |
3197 | - if (!__sysctl_vsyscall) |
3198 | + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) |
3199 | return time_syscall(t); |
3200 | else if (t) |
3201 | - *t = __xtime.tv_sec; |
3202 | - return __xtime.tv_sec; |
3203 | + *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; |
3204 | + return __vsyscall_gtod_data.wall_time_tv.tv_sec; |
3205 | } |
3206 | |
3207 | /* Fast way to get current CPU and node. |
3208 | @@ -210,7 +232,7 @@ |
3209 | ret = -ENOMEM; |
3210 | goto out; |
3211 | } |
3212 | - if (!sysctl_vsyscall) { |
3213 | + if (!vsyscall_gtod_data.sysctl_enabled) { |
3214 | writew(SYSCALL, map1); |
3215 | writew(SYSCALL, map2); |
3216 | } else { |
3217 | @@ -232,16 +254,17 @@ |
3218 | |
3219 | static ctl_table kernel_table2[] = { |
3220 | { .ctl_name = 99, .procname = "vsyscall64", |
3221 | - .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644, |
3222 | + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), |
3223 | + .mode = 0644, |
3224 | .strategy = vsyscall_sysctl_nostrat, |
3225 | .proc_handler = vsyscall_sysctl_change }, |
3226 | - { 0, } |
3227 | + {} |
3228 | }; |
3229 | |
3230 | static ctl_table kernel_root_table2[] = { |
3231 | { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, |
3232 | .child = kernel_table2 }, |
3233 | - { 0 }, |
3234 | + {} |
3235 | }; |
3236 | |
3237 | #endif |
3238 | @@ -304,14 +327,14 @@ |
3239 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); |
3240 | map_vsyscall(); |
3241 | #ifdef CONFIG_XEN |
3242 | - sysctl_vsyscall = 0; /* disable vgettimeofay() */ |
3243 | + vsyscall_gtod_data.sysctl_enabled = 0; /* disable vgettimeofay() */ |
3244 | if (boot_cpu_has(X86_FEATURE_RDTSCP)) |
3245 | vgetcpu_mode = VGETCPU_RDTSCP; |
3246 | else |
3247 | vgetcpu_mode = VGETCPU_LSL; |
3248 | #endif |
3249 | #ifdef CONFIG_SYSCTL |
3250 | - register_sysctl_table(kernel_root_table2, 0); |
3251 | + register_sysctl_table(kernel_root_table2); |
3252 | #endif |
3253 | on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); |
3254 | hotcpu_notifier(cpu_vsyscall_notifier, 0); |
3255 | --- a/arch/x86/mm/fault_32-xen.c |
3256 | +++ b/arch/x86/mm/fault_32-xen.c |
3257 | @@ -46,43 +46,17 @@ |
3258 | } |
3259 | EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); |
3260 | |
3261 | -static inline int notify_page_fault(enum die_val val, const char *str, |
3262 | - struct pt_regs *regs, long err, int trap, int sig) |
3263 | +static inline int notify_page_fault(struct pt_regs *regs, long err) |
3264 | { |
3265 | struct die_args args = { |
3266 | .regs = regs, |
3267 | - .str = str, |
3268 | + .str = "page fault", |
3269 | .err = err, |
3270 | - .trapnr = trap, |
3271 | - .signr = sig |
3272 | + .trapnr = 14, |
3273 | + .signr = SIGSEGV |
3274 | }; |
3275 | - return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); |
3276 | -} |
3277 | - |
3278 | -/* |
3279 | - * Unlock any spinlocks which will prevent us from getting the |
3280 | - * message out |
3281 | - */ |
3282 | -void bust_spinlocks(int yes) |
3283 | -{ |
3284 | - int loglevel_save = console_loglevel; |
3285 | - |
3286 | - if (yes) { |
3287 | - oops_in_progress = 1; |
3288 | - return; |
3289 | - } |
3290 | -#ifdef CONFIG_VT |
3291 | - unblank_screen(); |
3292 | -#endif |
3293 | - oops_in_progress = 0; |
3294 | - /* |
3295 | - * OK, the message is on the console. Now we call printk() |
3296 | - * without oops_in_progress set so that printk will give klogd |
3297 | - * a poke. Hold onto your hats... |
3298 | - */ |
3299 | - console_loglevel = 15; /* NMI oopser may have shut the console up */ |
3300 | - printk(" "); |
3301 | - console_loglevel = loglevel_save; |
3302 | + return atomic_notifier_call_chain(¬ify_page_fault_chain, |
3303 | + DIE_PAGE_FAULT, &args); |
3304 | } |
3305 | |
3306 | /* |
3307 | @@ -476,8 +450,7 @@ |
3308 | /* Can take a spurious fault if mapping changes R/O -> R/W. */ |
3309 | if (spurious_fault(regs, address, error_code)) |
3310 | return; |
3311 | - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
3312 | - SIGSEGV) == NOTIFY_STOP) |
3313 | + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
3314 | return; |
3315 | /* |
3316 | * Don't take the mm semaphore here. If we fixup a prefetch |
3317 | @@ -486,8 +459,7 @@ |
3318 | goto bad_area_nosemaphore; |
3319 | } |
3320 | |
3321 | - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
3322 | - SIGSEGV) == NOTIFY_STOP) |
3323 | + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
3324 | return; |
3325 | |
3326 | /* It's safe to allow irq's after cr2 has been saved and the vmalloc |
3327 | --- a/arch/x86/mm/fault_64-xen.c |
3328 | +++ b/arch/x86/mm/fault_64-xen.c |
3329 | @@ -56,38 +56,17 @@ |
3330 | } |
3331 | EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); |
3332 | |
3333 | -static inline int notify_page_fault(enum die_val val, const char *str, |
3334 | - struct pt_regs *regs, long err, int trap, int sig) |
3335 | +static inline int notify_page_fault(struct pt_regs *regs, long err) |
3336 | { |
3337 | struct die_args args = { |
3338 | .regs = regs, |
3339 | - .str = str, |
3340 | + .str = "page fault", |
3341 | .err = err, |
3342 | - .trapnr = trap, |
3343 | - .signr = sig |
3344 | + .trapnr = 14, |
3345 | + .signr = SIGSEGV |
3346 | }; |
3347 | - return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); |
3348 | -} |
3349 | - |
3350 | -void bust_spinlocks(int yes) |
3351 | -{ |
3352 | - int loglevel_save = console_loglevel; |
3353 | - if (yes) { |
3354 | - oops_in_progress = 1; |
3355 | - } else { |
3356 | -#ifdef CONFIG_VT |
3357 | - unblank_screen(); |
3358 | -#endif |
3359 | - oops_in_progress = 0; |
3360 | - /* |
3361 | - * OK, the message is on the console. Now we call printk() |
3362 | - * without oops_in_progress set so that printk will give klogd |
3363 | - * a poke. Hold onto your hats... |
3364 | - */ |
3365 | - console_loglevel = 15; /* NMI oopser may have shut the console up */ |
3366 | - printk(" "); |
3367 | - console_loglevel = loglevel_save; |
3368 | - } |
3369 | + return atomic_notifier_call_chain(¬ify_page_fault_chain, |
3370 | + DIE_PAGE_FAULT, &args); |
3371 | } |
3372 | |
3373 | /* Sometimes the CPU reports invalid exceptions on prefetch. |
3374 | @@ -437,8 +416,7 @@ |
3375 | /* Can take a spurious fault if mapping changes R/O -> R/W. */ |
3376 | if (spurious_fault(regs, address, error_code)) |
3377 | return; |
3378 | - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
3379 | - SIGSEGV) == NOTIFY_STOP) |
3380 | + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
3381 | return; |
3382 | /* |
3383 | * Don't take the mm semaphore here. If we fixup a prefetch |
3384 | @@ -447,8 +425,7 @@ |
3385 | goto bad_area_nosemaphore; |
3386 | } |
3387 | |
3388 | - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
3389 | - SIGSEGV) == NOTIFY_STOP) |
3390 | + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
3391 | return; |
3392 | |
3393 | if (likely(regs->eflags & X86_EFLAGS_IF)) |
3394 | --- a/arch/x86/mm/highmem_32-xen.c |
3395 | +++ b/arch/x86/mm/highmem_32-xen.c |
3396 | @@ -33,14 +33,16 @@ |
3397 | |
3398 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ |
3399 | pagefault_disable(); |
3400 | + |
3401 | + idx = type + KM_TYPE_NR*smp_processor_id(); |
3402 | + BUG_ON(!pte_none(*(kmap_pte-idx))); |
3403 | + |
3404 | if (!PageHighMem(page)) |
3405 | return page_address(page); |
3406 | |
3407 | - idx = type + KM_TYPE_NR*smp_processor_id(); |
3408 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
3409 | - if (!pte_none(*(kmap_pte-idx))) |
3410 | - BUG(); |
3411 | set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); |
3412 | + arch_flush_lazy_mmu_mode(); |
3413 | |
3414 | return (void*) vaddr; |
3415 | } |
3416 | @@ -94,6 +96,7 @@ |
3417 | idx = type + KM_TYPE_NR*smp_processor_id(); |
3418 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
3419 | set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); |
3420 | + arch_flush_lazy_mmu_mode(); |
3421 | |
3422 | return (void*) vaddr; |
3423 | } |
3424 | --- a/arch/x86/mm/init_32-xen.c |
3425 | +++ b/arch/x86/mm/init_32-xen.c |
3426 | @@ -68,6 +68,7 @@ |
3427 | |
3428 | #ifdef CONFIG_X86_PAE |
3429 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
3430 | + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); |
3431 | make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); |
3432 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
3433 | pud = pud_offset(pgd, 0); |
3434 | @@ -89,6 +90,7 @@ |
3435 | { |
3436 | if (pmd_none(*pmd)) { |
3437 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
3438 | + paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); |
3439 | make_lowmem_page_readonly(page_table, |
3440 | XENFEAT_writable_page_tables); |
3441 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
3442 | --- a/arch/x86/mm/init_64-xen.c |
3443 | +++ b/arch/x86/mm/init_64-xen.c |
3444 | @@ -1111,20 +1111,30 @@ |
3445 | extern int exception_trace, page_fault_trace; |
3446 | |
3447 | static ctl_table debug_table2[] = { |
3448 | - { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, |
3449 | - proc_dointvec }, |
3450 | - { 0, } |
3451 | + { |
3452 | + .ctl_name = 99, |
3453 | + .procname = "exception-trace", |
3454 | + .data = &exception_trace, |
3455 | + .maxlen = sizeof(int), |
3456 | + .mode = 0644, |
3457 | + .proc_handler = proc_dointvec |
3458 | + }, |
3459 | + {} |
3460 | }; |
3461 | |
3462 | static ctl_table debug_root_table2[] = { |
3463 | - { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555, |
3464 | - .child = debug_table2 }, |
3465 | - { 0 }, |
3466 | + { |
3467 | + .ctl_name = CTL_DEBUG, |
3468 | + .procname = "debug", |
3469 | + .mode = 0555, |
3470 | + .child = debug_table2 |
3471 | + }, |
3472 | + {} |
3473 | }; |
3474 | |
3475 | static __init int x8664_sysctl_init(void) |
3476 | { |
3477 | - register_sysctl_table(debug_root_table2, 1); |
3478 | + register_sysctl_table(debug_root_table2); |
3479 | return 0; |
3480 | } |
3481 | __initcall(x8664_sysctl_init); |
3482 | --- a/arch/x86/mm/pageattr_64-xen.c |
3483 | +++ b/arch/x86/mm/pageattr_64-xen.c |
3484 | @@ -344,8 +344,8 @@ |
3485 | void *adr = page_address(pg); |
3486 | if (cpu_has_clflush) |
3487 | cache_flush_page(adr); |
3488 | - __flush_tlb_one(adr); |
3489 | } |
3490 | + __flush_tlb_all(); |
3491 | } |
3492 | |
3493 | static inline void flush_map(struct list_head *l) |
3494 | @@ -370,6 +370,7 @@ |
3495 | pud_t *pud; |
3496 | pmd_t *pmd; |
3497 | pte_t large_pte; |
3498 | + unsigned long pfn; |
3499 | |
3500 | pgd = pgd_offset_k(address); |
3501 | BUG_ON(pgd_none(*pgd)); |
3502 | @@ -377,7 +378,8 @@ |
3503 | BUG_ON(pud_none(*pud)); |
3504 | pmd = pmd_offset(pud, address); |
3505 | BUG_ON(__pmd_val(*pmd) & _PAGE_PSE); |
3506 | - large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); |
3507 | + pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; |
3508 | + large_pte = pfn_pte(pfn, ref_prot); |
3509 | large_pte = pte_mkhuge(large_pte); |
3510 | set_pte((pte_t *)pmd, large_pte); |
3511 | } |
3512 | --- a/arch/x86/mm/pgtable_32-xen.c |
3513 | +++ b/arch/x86/mm/pgtable_32-xen.c |
3514 | @@ -149,6 +149,8 @@ |
3515 | void __init reserve_top_address(unsigned long reserve) |
3516 | { |
3517 | BUG_ON(fixmaps > 0); |
3518 | + printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", |
3519 | + (int)-reserve); |
3520 | __FIXADDR_TOP = -reserve - PAGE_SIZE; |
3521 | __VMALLOC_RESERVE += reserve; |
3522 | } |
3523 | @@ -252,6 +254,12 @@ |
3524 | swapper_pg_dir + USER_PTRS_PER_PGD, |
3525 | KERNEL_PGD_PTRS); |
3526 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); |
3527 | + |
3528 | + /* must happen under lock */ |
3529 | + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, |
3530 | + __pa(swapper_pg_dir) >> PAGE_SHIFT, |
3531 | + USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); |
3532 | + |
3533 | pgd_list_add(pgd); |
3534 | spin_unlock_irqrestore(&pgd_lock, flags); |
3535 | } |
3536 | @@ -262,6 +270,7 @@ |
3537 | { |
3538 | unsigned long flags; /* can be called from interrupt context */ |
3539 | |
3540 | + paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); |
3541 | spin_lock_irqsave(&pgd_lock, flags); |
3542 | pgd_list_del(pgd); |
3543 | spin_unlock_irqrestore(&pgd_lock, flags); |
3544 | @@ -286,6 +295,7 @@ |
3545 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); |
3546 | if (!pmd) |
3547 | goto out_oom; |
3548 | + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); |
3549 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); |
3550 | } |
3551 | return pgd; |
3552 | @@ -308,6 +318,7 @@ |
3553 | pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); |
3554 | if (!pmd[i]) |
3555 | goto out_oom; |
3556 | + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); |
3557 | } |
3558 | |
3559 | spin_lock_irqsave(&pgd_lock, flags); |
3560 | @@ -348,12 +359,17 @@ |
3561 | |
3562 | out_oom: |
3563 | if (HAVE_SHARED_KERNEL_PMD) { |
3564 | - for (i--; i >= 0; i--) |
3565 | - kmem_cache_free(pmd_cache, |
3566 | - (void *)__va(pgd_val(pgd[i])-1)); |
3567 | + for (i--; i >= 0; i--) { |
3568 | + pgd_t pgdent = pgd[i]; |
3569 | + void* pmd = (void *)__va(pgd_val(pgdent)-1); |
3570 | + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
3571 | + kmem_cache_free(pmd_cache, pmd); |
3572 | + } |
3573 | } else { |
3574 | - for (i--; i >= 0; i--) |
3575 | + for (i--; i >= 0; i--) { |
3576 | + paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT); |
3577 | kmem_cache_free(pmd_cache, pmd[i]); |
3578 | + } |
3579 | kfree(pmd); |
3580 | } |
3581 | kmem_cache_free(pgd_cache, pgd); |
3582 | @@ -377,7 +393,9 @@ |
3583 | /* in the PAE case user pgd entries are overwritten before usage */ |
3584 | if (PTRS_PER_PMD > 1) { |
3585 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { |
3586 | - pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); |
3587 | + pgd_t pgdent = pgd[i]; |
3588 | + void* pmd = (void *)__va(pgd_val(pgdent)-1); |
3589 | + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
3590 | kmem_cache_free(pmd_cache, pmd); |
3591 | } |
3592 | |
3593 | --- a/drivers/char/tpm/tpm_xen.c |
3594 | +++ b/drivers/char/tpm/tpm_xen.c |
3595 | @@ -481,7 +481,6 @@ |
3596 | |
3597 | static struct xenbus_driver tpmfront = { |
3598 | .name = "vtpm", |
3599 | - .owner = THIS_MODULE, |
3600 | .ids = tpmfront_ids, |
3601 | .probe = tpmfront_probe, |
3602 | .remove = tpmfront_remove, |
3603 | @@ -491,9 +490,9 @@ |
3604 | .suspend_cancel = tpmfront_suspend_cancel, |
3605 | }; |
3606 | |
3607 | -static void __init init_tpm_xenbus(void) |
3608 | +static int __init init_tpm_xenbus(void) |
3609 | { |
3610 | - xenbus_register_frontend(&tpmfront); |
3611 | + return xenbus_register_frontend(&tpmfront); |
3612 | } |
3613 | |
3614 | static int tpmif_allocate_tx_buffers(struct tpm_private *tp) |
3615 | --- a/drivers/xen/balloon/sysfs.c |
3616 | +++ b/drivers/xen/balloon/sysfs.c |
3617 | @@ -33,6 +33,7 @@ |
3618 | #include <linux/stat.h> |
3619 | #include <linux/string.h> |
3620 | #include <linux/sysdev.h> |
3621 | +#include <linux/module.h> |
3622 | #include "common.h" |
3623 | |
3624 | #ifdef HAVE_XEN_PLATFORM_COMPAT_H |
3625 | --- a/drivers/xen/blkback/xenbus.c |
3626 | +++ b/drivers/xen/blkback/xenbus.c |
3627 | @@ -519,7 +519,6 @@ |
3628 | |
3629 | static struct xenbus_driver blkback = { |
3630 | .name = "vbd", |
3631 | - .owner = THIS_MODULE, |
3632 | .ids = blkback_ids, |
3633 | .probe = blkback_probe, |
3634 | .remove = blkback_remove, |
3635 | @@ -529,5 +528,6 @@ |
3636 | |
3637 | void blkif_xenbus_init(void) |
3638 | { |
3639 | - xenbus_register_backend(&blkback); |
3640 | + if (xenbus_register_backend(&blkback)) |
3641 | + BUG(); |
3642 | } |
3643 | --- a/drivers/xen/blkfront/blkfront.c |
3644 | +++ b/drivers/xen/blkfront/blkfront.c |
3645 | @@ -893,7 +893,6 @@ |
3646 | |
3647 | static struct xenbus_driver blkfront = { |
3648 | .name = "vbd", |
3649 | - .owner = THIS_MODULE, |
3650 | .ids = blkfront_ids, |
3651 | .probe = blkfront_probe, |
3652 | .remove = blkfront_remove, |
3653 | --- a/drivers/xen/blktap/xenbus.c |
3654 | +++ b/drivers/xen/blktap/xenbus.c |
3655 | @@ -463,7 +463,6 @@ |
3656 | |
3657 | static struct xenbus_driver blktap = { |
3658 | .name = "tap", |
3659 | - .owner = THIS_MODULE, |
3660 | .ids = blktap_ids, |
3661 | .probe = blktap_probe, |
3662 | .remove = blktap_remove, |
3663 | @@ -473,5 +472,6 @@ |
3664 | |
3665 | void tap_blkif_xenbus_init(void) |
3666 | { |
3667 | - xenbus_register_backend(&blktap); |
3668 | + if (xenbus_register_backend(&blktap)) |
3669 | + BUG(); |
3670 | } |
3671 | --- a/drivers/xen/core/evtchn.c |
3672 | +++ b/drivers/xen/core/evtchn.c |
3673 | @@ -133,7 +133,7 @@ |
3674 | BUG_ON(!test_bit(chn, s->evtchn_mask)); |
3675 | |
3676 | if (irq != -1) |
3677 | - set_native_irq_info(irq, cpumask_of_cpu(cpu)); |
3678 | + irq_desc[irq].affinity = cpumask_of_cpu(cpu); |
3679 | |
3680 | clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); |
3681 | set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); |
3682 | @@ -146,7 +146,7 @@ |
3683 | |
3684 | /* By default all event channels notify CPU#0. */ |
3685 | for (i = 0; i < NR_IRQS; i++) |
3686 | - set_native_irq_info(i, cpumask_of_cpu(0)); |
3687 | + irq_desc[i].affinity = cpumask_of_cpu(0); |
3688 | |
3689 | memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); |
3690 | memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); |
3691 | --- a/drivers/xen/core/smpboot.c |
3692 | +++ b/drivers/xen/core/smpboot.c |
3693 | @@ -261,7 +261,7 @@ |
3694 | { |
3695 | unsigned int cpu; |
3696 | struct task_struct *idle; |
3697 | - int apicid, acpiid; |
3698 | + int apicid; |
3699 | struct vcpu_get_physid cpu_id; |
3700 | #ifdef __x86_64__ |
3701 | struct desc_ptr *gdt_descr; |
3702 | @@ -270,14 +270,8 @@ |
3703 | #endif |
3704 | |
3705 | apicid = 0; |
3706 | - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) { |
3707 | + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) |
3708 | apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); |
3709 | - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); |
3710 | -#ifdef CONFIG_ACPI |
3711 | - if (acpiid != 0xff) |
3712 | - x86_acpiid_to_apicid[acpiid] = apicid; |
3713 | -#endif |
3714 | - } |
3715 | boot_cpu_data.apicid = apicid; |
3716 | cpu_data[0] = boot_cpu_data; |
3717 | |
3718 | @@ -333,14 +327,8 @@ |
3719 | XENFEAT_writable_descriptor_tables); |
3720 | |
3721 | apicid = cpu; |
3722 | - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { |
3723 | + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) |
3724 | apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); |
3725 | - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); |
3726 | -#ifdef CONFIG_ACPI |
3727 | - if (acpiid != 0xff) |
3728 | - x86_acpiid_to_apicid[acpiid] = apicid; |
3729 | -#endif |
3730 | - } |
3731 | cpu_data[cpu] = boot_cpu_data; |
3732 | cpu_data[cpu].apicid = apicid; |
3733 | |
3734 | --- a/drivers/xen/fbfront/xenfb.c |
3735 | +++ b/drivers/xen/fbfront/xenfb.c |
3736 | @@ -856,7 +856,6 @@ |
3737 | |
3738 | static struct xenbus_driver xenfb_driver = { |
3739 | .name = "vfb", |
3740 | - .owner = THIS_MODULE, |
3741 | .ids = xenfb_ids, |
3742 | .probe = xenfb_probe, |
3743 | .remove = xenfb_remove, |
3744 | --- a/drivers/xen/fbfront/xenkbd.c |
3745 | +++ b/drivers/xen/fbfront/xenkbd.c |
3746 | @@ -323,7 +323,6 @@ |
3747 | |
3748 | static struct xenbus_driver xenkbd_driver = { |
3749 | .name = "vkbd", |
3750 | - .owner = THIS_MODULE, |
3751 | .ids = xenkbd_ids, |
3752 | .probe = xenkbd_probe, |
3753 | .remove = xenkbd_remove, |
3754 | --- a/drivers/xen/netback/xenbus.c |
3755 | +++ b/drivers/xen/netback/xenbus.c |
3756 | @@ -437,7 +437,6 @@ |
3757 | |
3758 | static struct xenbus_driver netback = { |
3759 | .name = "vif", |
3760 | - .owner = THIS_MODULE, |
3761 | .ids = netback_ids, |
3762 | .probe = netback_probe, |
3763 | .remove = netback_remove, |
3764 | @@ -448,5 +447,6 @@ |
3765 | |
3766 | void netif_xenbus_init(void) |
3767 | { |
3768 | - xenbus_register_backend(&netback); |
3769 | + if (xenbus_register_backend(&netback)) |
3770 | + BUG(); |
3771 | } |
3772 | --- a/drivers/xen/netfront/netfront.c |
3773 | +++ b/drivers/xen/netfront/netfront.c |
3774 | @@ -1893,20 +1893,19 @@ |
3775 | }; |
3776 | |
3777 | #ifdef CONFIG_SYSFS |
3778 | -static ssize_t show_rxbuf_min(struct class_device *cd, char *buf) |
3779 | +static ssize_t show_rxbuf_min(struct device *dev, |
3780 | + struct device_attribute *attr, char *buf) |
3781 | { |
3782 | - struct net_device *netdev = container_of(cd, struct net_device, |
3783 | - class_dev); |
3784 | - struct netfront_info *info = netdev_priv(netdev); |
3785 | + struct netfront_info *info = netdev_priv(to_net_dev(dev)); |
3786 | |
3787 | return sprintf(buf, "%u\n", info->rx_min_target); |
3788 | } |
3789 | |
3790 | -static ssize_t store_rxbuf_min(struct class_device *cd, |
3791 | +static ssize_t store_rxbuf_min(struct device *dev, |
3792 | + struct device_attribute *attr, |
3793 | const char *buf, size_t len) |
3794 | { |
3795 | - struct net_device *netdev = container_of(cd, struct net_device, |
3796 | - class_dev); |
3797 | + struct net_device *netdev = to_net_dev(dev); |
3798 | struct netfront_info *np = netdev_priv(netdev); |
3799 | char *endp; |
3800 | unsigned long target; |
3801 | @@ -1936,20 +1935,19 @@ |
3802 | return len; |
3803 | } |
3804 | |
3805 | -static ssize_t show_rxbuf_max(struct class_device *cd, char *buf) |
3806 | +static ssize_t show_rxbuf_max(struct device *dev, |
3807 | + struct device_attribute *attr, char *buf) |
3808 | { |
3809 | - struct net_device *netdev = container_of(cd, struct net_device, |
3810 | - class_dev); |
3811 | - struct netfront_info *info = netdev_priv(netdev); |
3812 | + struct netfront_info *info = netdev_priv(to_net_dev(dev)); |
3813 | |
3814 | return sprintf(buf, "%u\n", info->rx_max_target); |
3815 | } |
3816 | |
3817 | -static ssize_t store_rxbuf_max(struct class_device *cd, |
3818 | +static ssize_t store_rxbuf_max(struct device *dev, |
3819 | + struct device_attribute *attr, |
3820 | const char *buf, size_t len) |
3821 | { |
3822 | - struct net_device *netdev = container_of(cd, struct net_device, |
3823 | - class_dev); |
3824 | + struct net_device *netdev = to_net_dev(dev); |
3825 | struct netfront_info *np = netdev_priv(netdev); |
3826 | char *endp; |
3827 | unsigned long target; |
3828 | @@ -1979,16 +1977,15 @@ |
3829 | return len; |
3830 | } |
3831 | |
3832 | -static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf) |
3833 | +static ssize_t show_rxbuf_cur(struct device *dev, |
3834 | + struct device_attribute *attr, char *buf) |
3835 | { |
3836 | - struct net_device *netdev = container_of(cd, struct net_device, |
3837 | - class_dev); |
3838 | - struct netfront_info *info = netdev_priv(netdev); |
3839 | + struct netfront_info *info = netdev_priv(to_net_dev(dev)); |
3840 | |
3841 | return sprintf(buf, "%u\n", info->rx_target); |
3842 | } |
3843 | |
3844 | -static const struct class_device_attribute xennet_attrs[] = { |
3845 | +static struct device_attribute xennet_attrs[] = { |
3846 | __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), |
3847 | __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), |
3848 | __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), |
3849 | @@ -2000,8 +1997,8 @@ |
3850 | int error = 0; |
3851 | |
3852 | for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { |
3853 | - error = class_device_create_file(&netdev->class_dev, |
3854 | - &xennet_attrs[i]); |
3855 | + error = device_create_file(&netdev->dev, |
3856 | + &xennet_attrs[i]); |
3857 | if (error) |
3858 | goto fail; |
3859 | } |
3860 | @@ -2009,8 +2006,7 @@ |
3861 | |
3862 | fail: |
3863 | while (--i >= 0) |
3864 | - class_device_remove_file(&netdev->class_dev, |
3865 | - &xennet_attrs[i]); |
3866 | + device_remove_file(&netdev->dev, &xennet_attrs[i]); |
3867 | return error; |
3868 | } |
3869 | |
3870 | @@ -2018,10 +2014,8 @@ |
3871 | { |
3872 | int i; |
3873 | |
3874 | - for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { |
3875 | - class_device_remove_file(&netdev->class_dev, |
3876 | - &xennet_attrs[i]); |
3877 | - } |
3878 | + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) |
3879 | + device_remove_file(&netdev->dev, &xennet_attrs[i]); |
3880 | } |
3881 | |
3882 | #endif /* CONFIG_SYSFS */ |
3883 | @@ -2187,7 +2181,6 @@ |
3884 | |
3885 | static struct xenbus_driver netfront_driver = { |
3886 | .name = "vif", |
3887 | - .owner = THIS_MODULE, |
3888 | .ids = netfront_ids, |
3889 | .probe = netfront_probe, |
3890 | .remove = __devexit_p(netfront_remove), |
3891 | --- a/drivers/xen/pciback/xenbus.c |
3892 | +++ b/drivers/xen/pciback/xenbus.c |
3893 | @@ -663,7 +663,6 @@ |
3894 | |
3895 | static struct xenbus_driver xenbus_pciback_driver = { |
3896 | .name = "pciback", |
3897 | - .owner = THIS_MODULE, |
3898 | .ids = xenpci_ids, |
3899 | .probe = pciback_xenbus_probe, |
3900 | .remove = pciback_xenbus_remove, |
3901 | --- a/drivers/xen/pcifront/xenbus.c |
3902 | +++ b/drivers/xen/pcifront/xenbus.c |
3903 | @@ -435,7 +435,6 @@ |
3904 | |
3905 | static struct xenbus_driver xenbus_pcifront_driver = { |
3906 | .name = "pcifront", |
3907 | - .owner = THIS_MODULE, |
3908 | .ids = xenpci_ids, |
3909 | .probe = pcifront_xenbus_probe, |
3910 | .remove = pcifront_xenbus_remove, |
3911 | --- a/drivers/xen/tpmback/common.h |
3912 | +++ b/drivers/xen/tpmback/common.h |
3913 | @@ -54,11 +54,11 @@ |
3914 | |
3915 | void tpmif_disconnect_complete(tpmif_t * tpmif); |
3916 | tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi); |
3917 | -void tpmif_interface_init(void); |
3918 | +int tpmif_interface_init(void); |
3919 | void tpmif_interface_exit(void); |
3920 | void tpmif_schedule_work(tpmif_t * tpmif); |
3921 | void tpmif_deschedule_work(tpmif_t * tpmif); |
3922 | -void tpmif_xenbus_init(void); |
3923 | +int tpmif_xenbus_init(void); |
3924 | void tpmif_xenbus_exit(void); |
3925 | int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn); |
3926 | irqreturn_t tpmif_be_int(int irq, void *dev_id); |
3927 | --- a/drivers/xen/tpmback/interface.c |
3928 | +++ b/drivers/xen/tpmback/interface.c |
3929 | @@ -156,13 +156,14 @@ |
3930 | free_tpmif(tpmif); |
3931 | } |
3932 | |
3933 | -void __init tpmif_interface_init(void) |
3934 | +int __init tpmif_interface_init(void) |
3935 | { |
3936 | tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t), |
3937 | 0, 0, NULL, NULL); |
3938 | + return tpmif_cachep ? 0 : -ENOMEM; |
3939 | } |
3940 | |
3941 | -void __exit tpmif_interface_exit(void) |
3942 | +void tpmif_interface_exit(void) |
3943 | { |
3944 | kmem_cache_destroy(tpmif_cachep); |
3945 | } |
3946 | --- a/drivers/xen/tpmback/tpmback.c |
3947 | +++ b/drivers/xen/tpmback/tpmback.c |
3948 | @@ -923,22 +923,30 @@ |
3949 | spin_lock_init(&tpm_schedule_list_lock); |
3950 | INIT_LIST_HEAD(&tpm_schedule_list); |
3951 | |
3952 | - tpmif_interface_init(); |
3953 | - tpmif_xenbus_init(); |
3954 | + rc = tpmif_interface_init(); |
3955 | + if (!rc) { |
3956 | + rc = tpmif_xenbus_init(); |
3957 | + if (rc) |
3958 | + tpmif_interface_exit(); |
3959 | + } |
3960 | + if (rc) { |
3961 | + misc_deregister(&vtpms_miscdevice); |
3962 | + return rc; |
3963 | + } |
3964 | |
3965 | printk(KERN_ALERT "Successfully initialized TPM backend driver.\n"); |
3966 | |
3967 | return 0; |
3968 | } |
3969 | - |
3970 | module_init(tpmback_init); |
3971 | |
3972 | -void __exit tpmback_exit(void) |
3973 | +static void __exit tpmback_exit(void) |
3974 | { |
3975 | vtpm_release_packets(NULL, 0); |
3976 | tpmif_xenbus_exit(); |
3977 | tpmif_interface_exit(); |
3978 | misc_deregister(&vtpms_miscdevice); |
3979 | } |
3980 | +module_exit(tpmback_exit) |
3981 | |
3982 | MODULE_LICENSE("Dual BSD/GPL"); |
3983 | --- a/drivers/xen/tpmback/xenbus.c |
3984 | +++ b/drivers/xen/tpmback/xenbus.c |
3985 | @@ -270,7 +270,6 @@ |
3986 | |
3987 | static struct xenbus_driver tpmback = { |
3988 | .name = "vtpm", |
3989 | - .owner = THIS_MODULE, |
3990 | .ids = tpmback_ids, |
3991 | .probe = tpmback_probe, |
3992 | .remove = tpmback_remove, |
3993 | @@ -278,9 +277,9 @@ |
3994 | }; |
3995 | |
3996 | |
3997 | -void tpmif_xenbus_init(void) |
3998 | +int tpmif_xenbus_init(void) |
3999 | { |
4000 | - xenbus_register_backend(&tpmback); |
4001 | + return xenbus_register_backend(&tpmback); |
4002 | } |
4003 | |
4004 | void tpmif_xenbus_exit(void) |
4005 | --- a/drivers/xen/xenbus/xenbus_probe.c |
4006 | +++ b/drivers/xen/xenbus/xenbus_probe.c |
4007 | @@ -362,7 +362,9 @@ |
4008 | } |
4009 | |
4010 | int xenbus_register_driver_common(struct xenbus_driver *drv, |
4011 | - struct xen_bus_type *bus) |
4012 | + struct xen_bus_type *bus, |
4013 | + struct module *owner, |
4014 | + const char *mod_name) |
4015 | { |
4016 | int ret; |
4017 | |
4018 | @@ -372,7 +374,10 @@ |
4019 | drv->driver.name = drv->name; |
4020 | drv->driver.bus = &bus->bus; |
4021 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) |
4022 | - drv->driver.owner = drv->owner; |
4023 | + drv->driver.owner = owner; |
4024 | +#endif |
4025 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) |
4026 | + drv->driver.mod_name = mod_name; |
4027 | #endif |
4028 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) |
4029 | drv->driver.probe = xenbus_dev_probe; |
4030 | @@ -386,13 +391,15 @@ |
4031 | return ret; |
4032 | } |
4033 | |
4034 | -int xenbus_register_frontend(struct xenbus_driver *drv) |
4035 | +int __xenbus_register_frontend(struct xenbus_driver *drv, |
4036 | + struct module *owner, const char *mod_name) |
4037 | { |
4038 | int ret; |
4039 | |
4040 | drv->read_otherend_details = read_backend_details; |
4041 | |
4042 | - ret = xenbus_register_driver_common(drv, &xenbus_frontend); |
4043 | + ret = xenbus_register_driver_common(drv, &xenbus_frontend, |
4044 | + owner, mod_name); |
4045 | if (ret) |
4046 | return ret; |
4047 | |
4048 | @@ -401,7 +408,7 @@ |
4049 | |
4050 | return 0; |
4051 | } |
4052 | -EXPORT_SYMBOL_GPL(xenbus_register_frontend); |
4053 | +EXPORT_SYMBOL_GPL(__xenbus_register_frontend); |
4054 | |
4055 | void xenbus_unregister_driver(struct xenbus_driver *drv) |
4056 | { |
4057 | --- a/drivers/xen/xenbus/xenbus_probe.h |
4058 | +++ b/drivers/xen/xenbus/xenbus_probe.h |
4059 | @@ -63,7 +63,9 @@ |
4060 | extern int xenbus_dev_probe(struct device *_dev); |
4061 | extern int xenbus_dev_remove(struct device *_dev); |
4062 | extern int xenbus_register_driver_common(struct xenbus_driver *drv, |
4063 | - struct xen_bus_type *bus); |
4064 | + struct xen_bus_type *bus, |
4065 | + struct module *owner, |
4066 | + const char *mod_name); |
4067 | extern int xenbus_probe_node(struct xen_bus_type *bus, |
4068 | const char *type, |
4069 | const char *nodename); |
4070 | --- a/drivers/xen/xenbus/xenbus_probe_backend.c |
4071 | +++ b/drivers/xen/xenbus/xenbus_probe_backend.c |
4072 | @@ -172,13 +172,15 @@ |
4073 | return 0; |
4074 | } |
4075 | |
4076 | -int xenbus_register_backend(struct xenbus_driver *drv) |
4077 | +int __xenbus_register_backend(struct xenbus_driver *drv, |
4078 | + struct module *owner, const char *mod_name) |
4079 | { |
4080 | drv->read_otherend_details = read_frontend_details; |
4081 | |
4082 | - return xenbus_register_driver_common(drv, &xenbus_backend); |
4083 | + return xenbus_register_driver_common(drv, &xenbus_backend, |
4084 | + owner, mod_name); |
4085 | } |
4086 | -EXPORT_SYMBOL_GPL(xenbus_register_backend); |
4087 | +EXPORT_SYMBOL_GPL(__xenbus_register_backend); |
4088 | |
4089 | /* backend/<typename>/<frontend-uuid>/<name> */ |
4090 | static int xenbus_probe_backend_unit(const char *dir, |
4091 | --- a/include/asm-x86/i8253.h |
4092 | +++ b/include/asm-x86/i8253.h |
4093 | @@ -8,10 +8,14 @@ |
4094 | |
4095 | extern spinlock_t i8253_lock; |
4096 | |
4097 | +#ifdef CONFIG_GENERIC_CLOCKEVENTS |
4098 | + |
4099 | extern struct clock_event_device *global_clock_event; |
4100 | |
4101 | extern void setup_pit_timer(void); |
4102 | |
4103 | +#endif |
4104 | + |
4105 | #define inb_pit inb_p |
4106 | #define outb_pit outb_p |
4107 | |
4108 | --- a/include/asm-x86/mach-xen/asm/desc_32.h |
4109 | +++ b/include/asm-x86/mach-xen/asm/desc_32.h |
4110 | @@ -21,7 +21,7 @@ |
4111 | |
4112 | extern struct Xgt_desc_struct idt_descr; |
4113 | DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); |
4114 | - |
4115 | +extern struct Xgt_desc_struct early_gdt_descr; |
4116 | |
4117 | static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) |
4118 | { |
4119 | --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h |
4120 | +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h |
4121 | @@ -9,7 +9,6 @@ |
4122 | |
4123 | #include <asm/scatterlist.h> |
4124 | #include <asm/io.h> |
4125 | -#include <asm/swiotlb.h> |
4126 | |
4127 | struct dma_mapping_ops { |
4128 | int (*mapping_error)(dma_addr_t dma_addr); |
4129 | @@ -66,6 +65,9 @@ |
4130 | |
4131 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) |
4132 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) |
4133 | + |
4134 | +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) |
4135 | +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) |
4136 | |
4137 | extern void *dma_alloc_coherent(struct device *dev, size_t size, |
4138 | dma_addr_t *dma_handle, gfp_t gfp); |
4139 | --- a/include/asm-x86/mach-xen/asm/e820_64.h |
4140 | +++ b/include/asm-x86/mach-xen/asm/e820_64.h |
4141 | @@ -46,6 +46,7 @@ |
4142 | extern void e820_print_map(char *who); |
4143 | extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); |
4144 | extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); |
4145 | +extern unsigned long e820_hole_size(unsigned long start, unsigned long end); |
4146 | |
4147 | extern void e820_setup_gap(struct e820entry *e820, int nr_map); |
4148 | extern void e820_register_active_regions(int nid, |
4149 | @@ -56,6 +57,7 @@ |
4150 | extern struct e820map e820; |
4151 | |
4152 | extern unsigned ebda_addr, ebda_size; |
4153 | +extern unsigned long nodemap_addr, nodemap_size; |
4154 | #endif/*!__ASSEMBLY__*/ |
4155 | |
4156 | #endif/*__E820_HEADER*/ |
4157 | --- a/include/asm-x86/mach-xen/asm/hw_irq_64.h |
4158 | +++ b/include/asm-x86/mach-xen/asm/hw_irq_64.h |
4159 | @@ -31,10 +31,32 @@ |
4160 | |
4161 | #define IA32_SYSCALL_VECTOR 0x80 |
4162 | |
4163 | +#ifndef CONFIG_XEN |
4164 | + |
4165 | +/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering |
4166 | + * cleanup after irq migration. |
4167 | + */ |
4168 | +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR |
4169 | |
4170 | /* |
4171 | - * Vectors 0x20-0x2f are used for ISA interrupts. |
4172 | + * Vectors 0x30-0x3f are used for ISA interrupts. |
4173 | */ |
4174 | +#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10 |
4175 | +#define IRQ1_VECTOR IRQ0_VECTOR + 1 |
4176 | +#define IRQ2_VECTOR IRQ0_VECTOR + 2 |
4177 | +#define IRQ3_VECTOR IRQ0_VECTOR + 3 |
4178 | +#define IRQ4_VECTOR IRQ0_VECTOR + 4 |
4179 | +#define IRQ5_VECTOR IRQ0_VECTOR + 5 |
4180 | +#define IRQ6_VECTOR IRQ0_VECTOR + 6 |
4181 | +#define IRQ7_VECTOR IRQ0_VECTOR + 7 |
4182 | +#define IRQ8_VECTOR IRQ0_VECTOR + 8 |
4183 | +#define IRQ9_VECTOR IRQ0_VECTOR + 9 |
4184 | +#define IRQ10_VECTOR IRQ0_VECTOR + 10 |
4185 | +#define IRQ11_VECTOR IRQ0_VECTOR + 11 |
4186 | +#define IRQ12_VECTOR IRQ0_VECTOR + 12 |
4187 | +#define IRQ13_VECTOR IRQ0_VECTOR + 13 |
4188 | +#define IRQ14_VECTOR IRQ0_VECTOR + 14 |
4189 | +#define IRQ15_VECTOR IRQ0_VECTOR + 15 |
4190 | |
4191 | /* |
4192 | * Special IRQ vectors used by the SMP architecture, 0xf0-0xff |
4193 | @@ -43,7 +65,6 @@ |
4194 | * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. |
4195 | * TLB, reschedule and local APIC vectors are performance-critical. |
4196 | */ |
4197 | -#ifndef CONFIG_XEN |
4198 | #define SPURIOUS_APIC_VECTOR 0xff |
4199 | #define ERROR_APIC_VECTOR 0xfe |
4200 | #define RESCHEDULE_VECTOR 0xfd |
4201 | @@ -57,7 +78,6 @@ |
4202 | #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ |
4203 | |
4204 | #define NUM_INVALIDATE_TLB_VECTORS 8 |
4205 | -#endif |
4206 | |
4207 | /* |
4208 | * Local APIC timer IRQ vector is on a different priority level, |
4209 | @@ -68,12 +88,13 @@ |
4210 | |
4211 | /* |
4212 | * First APIC vector available to drivers: (vectors 0x30-0xee) |
4213 | - * we start at 0x31 to spread out vectors evenly between priority |
4214 | + * we start at 0x41 to spread out vectors evenly between priority |
4215 | * levels. (0x80 is the syscall vector) |
4216 | */ |
4217 | -#define FIRST_DEVICE_VECTOR 0x31 |
4218 | +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) |
4219 | #define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ |
4220 | |
4221 | +#endif |
4222 | |
4223 | #ifndef __ASSEMBLY__ |
4224 | typedef int vector_irq_t[NR_VECTORS]; |
4225 | @@ -93,7 +114,7 @@ |
4226 | extern int i8259A_irq_pending(unsigned int irq); |
4227 | extern void make_8259A_irq(unsigned int irq); |
4228 | extern void init_8259A(int aeoi); |
4229 | -extern void FASTCALL(send_IPI_self(int vector)); |
4230 | +extern void send_IPI_self(int vector); |
4231 | extern void init_VISWS_APIC_irqs(void); |
4232 | extern void setup_IO_APIC(void); |
4233 | extern void disable_IO_APIC(void); |
4234 | --- a/include/asm-x86/mach-xen/asm/hypervisor.h |
4235 | +++ b/include/asm-x86/mach-xen/asm/hypervisor.h |
4236 | @@ -171,7 +171,7 @@ |
4237 | return rc; |
4238 | } |
4239 | |
4240 | -static inline void /*__noreturn*/ |
4241 | +static inline void __noreturn |
4242 | HYPERVISOR_shutdown( |
4243 | unsigned int reason) |
4244 | { |
4245 | --- a/include/asm-x86/mach-xen/asm/io_32.h |
4246 | +++ b/include/asm-x86/mach-xen/asm/io_32.h |
4247 | @@ -232,12 +232,6 @@ |
4248 | #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN))) |
4249 | |
4250 | /* |
4251 | - * Again, i386 does not require mem IO specific function. |
4252 | - */ |
4253 | - |
4254 | -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d)) |
4255 | - |
4256 | -/* |
4257 | * Cache management |
4258 | * |
4259 | * This needed for two cases |
4260 | --- a/include/asm-x86/mach-xen/asm/io_64.h |
4261 | +++ b/include/asm-x86/mach-xen/asm/io_64.h |
4262 | @@ -101,7 +101,7 @@ |
4263 | |
4264 | #define IO_SPACE_LIMIT 0xffff |
4265 | |
4266 | -#if defined(__KERNEL__) && __x86_64__ |
4267 | +#if defined(__KERNEL__) && defined(__x86_64__) |
4268 | |
4269 | #include <linux/vmalloc.h> |
4270 | |
4271 | @@ -267,12 +267,6 @@ |
4272 | */ |
4273 | #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN))) |
4274 | |
4275 | -/* |
4276 | - * Again, x86-64 does not require mem IO specific function. |
4277 | - */ |
4278 | - |
4279 | -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d)) |
4280 | - |
4281 | /* Nothing to do */ |
4282 | |
4283 | #define dma_cache_inv(_start,_size) do { } while (0) |
4284 | --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h |
4285 | +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h |
4286 | @@ -27,13 +27,13 @@ |
4287 | static inline void __prepare_arch_switch(void) |
4288 | { |
4289 | /* |
4290 | - * Save away %fs. No need to save %gs, as it was saved on the |
4291 | + * Save away %gs. No need to save %fs, as it was saved on the |
4292 | * stack on entry. No need to save %es and %ds, as those are |
4293 | * always kernel segments while inside the kernel. |
4294 | */ |
4295 | - asm volatile ( "mov %%fs,%0" |
4296 | - : "=m" (current->thread.fs)); |
4297 | - asm volatile ( "movl %0,%%fs" |
4298 | + asm volatile ( "mov %%gs,%0" |
4299 | + : "=m" (current->thread.gs)); |
4300 | + asm volatile ( "movl %0,%%gs" |
4301 | : : "r" (0) ); |
4302 | } |
4303 | |
4304 | @@ -95,7 +95,7 @@ |
4305 | } |
4306 | |
4307 | #define deactivate_mm(tsk, mm) \ |
4308 | - asm("movl %0,%%fs": :"r" (0)); |
4309 | + asm("movl %0,%%gs": :"r" (0)); |
4310 | |
4311 | static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) |
4312 | { |
4313 | --- a/include/asm-x86/mach-xen/asm/pgalloc_32.h |
4314 | +++ b/include/asm-x86/mach-xen/asm/pgalloc_32.h |
4315 | @@ -6,12 +6,23 @@ |
4316 | #include <linux/mm.h> /* for struct page */ |
4317 | #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */ |
4318 | |
4319 | -#define pmd_populate_kernel(mm, pmd, pte) \ |
4320 | - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) |
4321 | +#define paravirt_alloc_pt(pfn) do { } while (0) |
4322 | +#define paravirt_alloc_pd(pfn) do { } while (0) |
4323 | +#define paravirt_alloc_pd(pfn) do { } while (0) |
4324 | +#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) |
4325 | +#define paravirt_release_pt(pfn) do { } while (0) |
4326 | +#define paravirt_release_pd(pfn) do { } while (0) |
4327 | + |
4328 | +#define pmd_populate_kernel(mm, pmd, pte) \ |
4329 | +do { \ |
4330 | + paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ |
4331 | + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ |
4332 | +} while (0) |
4333 | |
4334 | #define pmd_populate(mm, pmd, pte) \ |
4335 | do { \ |
4336 | unsigned long pfn = page_to_pfn(pte); \ |
4337 | + paravirt_alloc_pt(pfn); \ |
4338 | if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \ |
4339 | if (!PageHighMem(pte)) \ |
4340 | BUG_ON(HYPERVISOR_update_va_mapping( \ |
4341 | @@ -42,7 +53,11 @@ |
4342 | |
4343 | extern void pte_free(struct page *pte); |
4344 | |
4345 | -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) |
4346 | +#define __pte_free_tlb(tlb,pte) \ |
4347 | +do { \ |
4348 | + paravirt_release_pt(page_to_pfn(pte)); \ |
4349 | + tlb_remove_page((tlb),(pte)); \ |
4350 | +} while (0) |
4351 | |
4352 | #ifdef CONFIG_X86_PAE |
4353 | /* |
4354 | --- a/include/asm-x86/mach-xen/asm/pgtable_32.h |
4355 | +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h |
4356 | @@ -275,6 +275,7 @@ |
4357 | */ |
4358 | #define pte_update(mm, addr, ptep) do { } while (0) |
4359 | #define pte_update_defer(mm, addr, ptep) do { } while (0) |
4360 | +#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) |
4361 | |
4362 | /* |
4363 | * We only update the dirty/accessed state if we set |
4364 | @@ -490,12 +491,24 @@ |
4365 | #endif |
4366 | |
4367 | #if defined(CONFIG_HIGHPTE) |
4368 | -#define pte_offset_map(dir, address) \ |
4369 | - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \ |
4370 | - pte_index(address)) |
4371 | -#define pte_offset_map_nested(dir, address) \ |
4372 | - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \ |
4373 | - pte_index(address)) |
4374 | +#define pte_offset_map(dir, address) \ |
4375 | +({ \ |
4376 | + pte_t *__ptep; \ |
4377 | + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ |
4378 | + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \ |
4379 | + paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ |
4380 | + __ptep = __ptep + pte_index(address); \ |
4381 | + __ptep; \ |
4382 | +}) |
4383 | +#define pte_offset_map_nested(dir, address) \ |
4384 | +({ \ |
4385 | + pte_t *__ptep; \ |
4386 | + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ |
4387 | + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \ |
4388 | + paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ |
4389 | + __ptep = __ptep + pte_index(address); \ |
4390 | + __ptep; \ |
4391 | +}) |
4392 | #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) |
4393 | #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) |
4394 | #else |
4395 | --- a/include/asm-x86/mach-xen/asm/pgtable_64.h |
4396 | +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h |
4397 | @@ -416,15 +416,6 @@ |
4398 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) |
4399 | #define mk_pte_huge(entry) (__pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) |
4400 | |
4401 | -/* physical address -> PTE */ |
4402 | -static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) |
4403 | -{ |
4404 | - unsigned long pteval; |
4405 | - pteval = physpage | pgprot_val(pgprot); |
4406 | - pteval &= __supported_pte_mask; |
4407 | - return __pte(pteval); |
4408 | -} |
4409 | - |
4410 | /* Change flags of a PTE */ |
4411 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
4412 | { |
4413 | --- a/include/asm-x86/mach-xen/asm/processor_32.h |
4414 | +++ b/include/asm-x86/mach-xen/asm/processor_32.h |
4415 | @@ -431,7 +431,7 @@ |
4416 | .vm86_info = NULL, \ |
4417 | .sysenter_cs = __KERNEL_CS, \ |
4418 | .io_bitmap_ptr = NULL, \ |
4419 | - .gs = __KERNEL_PDA, \ |
4420 | + .fs = __KERNEL_PDA, \ |
4421 | } |
4422 | |
4423 | /* |
4424 | @@ -449,8 +449,8 @@ |
4425 | } |
4426 | |
4427 | #define start_thread(regs, new_eip, new_esp) do { \ |
4428 | - __asm__("movl %0,%%fs": :"r" (0)); \ |
4429 | - regs->xgs = 0; \ |
4430 | + __asm__("movl %0,%%gs": :"r" (0)); \ |
4431 | + regs->xfs = 0; \ |
4432 | set_fs(USER_DS); \ |
4433 | regs->xds = __USER_DS; \ |
4434 | regs->xes = __USER_DS; \ |
4435 | --- a/include/asm-x86/mach-xen/asm/segment_32.h |
4436 | +++ b/include/asm-x86/mach-xen/asm/segment_32.h |
4437 | @@ -83,14 +83,8 @@ |
4438 | * The GDT has 32 entries |
4439 | */ |
4440 | #define GDT_ENTRIES 32 |
4441 | - |
4442 | #define GDT_SIZE (GDT_ENTRIES * 8) |
4443 | |
4444 | -/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ |
4445 | -#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) |
4446 | -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ |
4447 | -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) |
4448 | - |
4449 | /* Simple and small GDT entries for booting only */ |
4450 | |
4451 | #define GDT_ENTRY_BOOT_CS 2 |
4452 | @@ -132,4 +126,21 @@ |
4453 | #define SEGMENT_GDT 0x0 |
4454 | |
4455 | #define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) |
4456 | + |
4457 | +/* |
4458 | + * Matching rules for certain types of segments. |
4459 | + */ |
4460 | + |
4461 | +/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ |
4462 | +#define SEGMENT_IS_KERNEL_CODE(x) (((x) & ~3) == GDT_ENTRY_KERNEL_CS * 8 \ |
4463 | + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3)) |
4464 | + |
4465 | +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ |
4466 | +#define SEGMENT_IS_FLAT_CODE(x) (((x) & ~0x13) == GDT_ENTRY_KERNEL_CS * 8 \ |
4467 | + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3) \ |
4468 | + || ((x) & ~3) == (FLAT_USER_CS & ~3)) |
4469 | + |
4470 | +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ |
4471 | +#define SEGMENT_IS_PNP_CODE(x) (((x) & ~0x0b) == GDT_ENTRY_PNPBIOS_BASE * 8) |
4472 | + |
4473 | #endif |
4474 | --- a/include/asm-x86/mach-xen/asm/smp_32.h |
4475 | +++ b/include/asm-x86/mach-xen/asm/smp_32.h |
4476 | @@ -52,6 +52,11 @@ |
4477 | extern void cpu_uninit(void); |
4478 | #endif |
4479 | |
4480 | +#ifndef CONFIG_PARAVIRT |
4481 | +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ |
4482 | +do { } while (0) |
4483 | +#endif |
4484 | + |
4485 | /* |
4486 | * This function is needed by all SMP systems. It must _always_ be valid |
4487 | * from the initial startup. We map APIC_BASE very early in page_setup(), |
4488 | --- a/include/asm-x86/mach-xen/asm/smp_64.h |
4489 | +++ b/include/asm-x86/mach-xen/asm/smp_64.h |
4490 | @@ -7,6 +7,7 @@ |
4491 | #include <linux/threads.h> |
4492 | #include <linux/cpumask.h> |
4493 | #include <linux/bitops.h> |
4494 | +#include <linux/init.h> |
4495 | extern int disable_apic; |
4496 | |
4497 | #ifdef CONFIG_X86_LOCAL_APIC |
4498 | @@ -73,7 +74,7 @@ |
4499 | extern void __cpu_die(unsigned int cpu); |
4500 | extern void prefill_possible_map(void); |
4501 | extern unsigned num_processors; |
4502 | -extern unsigned disabled_cpus; |
4503 | +extern unsigned __cpuinitdata disabled_cpus; |
4504 | |
4505 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
4506 | |
4507 | --- a/include/xen/xenbus.h |
4508 | +++ b/include/xen/xenbus.h |
4509 | @@ -93,8 +93,7 @@ |
4510 | |
4511 | /* A xenbus driver. */ |
4512 | struct xenbus_driver { |
4513 | - char *name; |
4514 | - struct module *owner; |
4515 | + const char *name; |
4516 | const struct xenbus_device_id *ids; |
4517 | int (*probe)(struct xenbus_device *dev, |
4518 | const struct xenbus_device_id *id); |
4519 | @@ -115,8 +114,25 @@ |
4520 | return container_of(drv, struct xenbus_driver, driver); |
4521 | } |
4522 | |
4523 | -int xenbus_register_frontend(struct xenbus_driver *drv); |
4524 | -int xenbus_register_backend(struct xenbus_driver *drv); |
4525 | +int __must_check __xenbus_register_frontend(struct xenbus_driver *drv, |
4526 | + struct module *owner, |
4527 | + const char *mod_name); |
4528 | + |
4529 | +static inline int __must_check |
4530 | +xenbus_register_frontend(struct xenbus_driver *drv) |
4531 | +{ |
4532 | + return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME); |
4533 | +} |
4534 | + |
4535 | +int __must_check __xenbus_register_backend(struct xenbus_driver *drv, |
4536 | + struct module *owner, |
4537 | + const char *mod_name); |
4538 | +static inline int __must_check |
4539 | +xenbus_register_backend(struct xenbus_driver *drv) |
4540 | +{ |
4541 | + return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME); |
4542 | +} |
4543 | + |
4544 | void xenbus_unregister_driver(struct xenbus_driver *drv); |
4545 | |
4546 | struct xenbus_transaction |
4547 | --- a/lib/swiotlb-xen.c |
4548 | +++ b/lib/swiotlb-xen.c |
4549 | @@ -138,8 +138,8 @@ |
4550 | * Statically reserve bounce buffer space and initialize bounce buffer data |
4551 | * structures for the software IO TLB used to implement the PCI DMA API. |
4552 | */ |
4553 | -void |
4554 | -swiotlb_init_with_default_size (size_t default_size) |
4555 | +void __init |
4556 | +swiotlb_init_with_default_size(size_t default_size) |
4557 | { |
4558 | unsigned long i, bytes; |
4559 | int rc; |
4560 | @@ -227,7 +227,7 @@ |
4561 | dma_bits); |
4562 | } |
4563 | |
4564 | -void |
4565 | +void __init |
4566 | swiotlb_init(void) |
4567 | { |
4568 | long ram_end; |
4569 | @@ -463,7 +463,7 @@ |
4570 | * When the mapping is small enough return a static buffer to limit |
4571 | * the damage, or panic when the transfer is too big. |
4572 | */ |
4573 | - printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " |
4574 | + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at " |
4575 | "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?"); |
4576 | |
4577 | if (size > io_tlb_overflow && do_panic) { |
4578 | @@ -608,7 +608,7 @@ |
4579 | sg[0].dma_length = 0; |
4580 | return 0; |
4581 | } |
4582 | - sg->dma_address = (dma_addr_t)virt_to_bus(map); |
4583 | + sg->dma_address = virt_to_bus(map); |
4584 | } else |
4585 | sg->dma_address = dev_addr; |
4586 | sg->dma_length = sg->length; |
4587 | @@ -630,8 +630,7 @@ |
4588 | |
4589 | for (i = 0; i < nelems; i++, sg++) |
4590 | if (in_swiotlb_aperture(sg->dma_address)) |
4591 | - unmap_single(hwdev, |
4592 | - (void *)bus_to_virt(sg->dma_address), |
4593 | + unmap_single(hwdev, bus_to_virt(sg->dma_address), |
4594 | sg->dma_length, dir); |
4595 | else |
4596 | gnttab_dma_unmap_page(sg->dma_address); |
4597 | @@ -654,8 +653,7 @@ |
4598 | |
4599 | for (i = 0; i < nelems; i++, sg++) |
4600 | if (in_swiotlb_aperture(sg->dma_address)) |
4601 | - sync_single(hwdev, |
4602 | - (void *)bus_to_virt(sg->dma_address), |
4603 | + sync_single(hwdev, bus_to_virt(sg->dma_address), |
4604 | sg->dma_length, dir); |
4605 | } |
4606 | |
4607 | @@ -669,8 +667,7 @@ |
4608 | |
4609 | for (i = 0; i < nelems; i++, sg++) |
4610 | if (in_swiotlb_aperture(sg->dma_address)) |
4611 | - sync_single(hwdev, |
4612 | - (void *)bus_to_virt(sg->dma_address), |
4613 | + sync_single(hwdev, bus_to_virt(sg->dma_address), |
4614 | sg->dma_length, dir); |
4615 | } |
4616 |